From 02aaed0d42f601f02fdab763f3f1b35499814151 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Fri, 12 Apr 2024 09:28:57 +0900
Subject: [PATCH 01/31] add kotlin and kotlin-test

---
 build.gradle | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/build.gradle b/build.gradle
index 352a283f8b..4a57f048c9 100644
--- a/build.gradle
+++ b/build.gradle
@@ -10,15 +10,20 @@ buildscript {
         classpath 'gradle.plugin.org.kt3k.gradle.plugin:coveralls-gradle-plugin:2.12.0'
         classpath "gradle.plugin.com.github.jengelman.gradle.plugins:shadow:7.0.0"
         classpath 'com.adarshr:gradle-test-logger-plugin:2.0.0'
+        classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:1.8.21"
     }
 }
 
 repositories {
     mavenLocal()
     mavenCentral()
+    maven {
+        url = uri("https://plugins.gradle.org/m2/")
+    }
 }
 
 apply plugin: 'jacoco'
+apply plugin: 'org.jetbrains.kotlin.jvm'
 
 jacoco {
     toolVersion = '0.8.8'
@@ -55,8 +60,18 @@ subprojects {
         }
     }
 
-    sourceCompatibility = 1.11
-    targetCompatibility = 1.11
+//    sourceCompatibility = 1.11
+//    targetCompatibility = 1.11
+
+    kotlin {
+        jvmToolchain(17)
+    }
+
+    java {
+        toolchain {
+            languageVersion.set(JavaLanguageVersion.of(17))
+        }
+    }
 
     repositories {
         mavenCentral()
@@ -86,14 +101,20 @@ subprojects {
         // packaging local libs inside grobid-core.jar
         implementation fileTree(dir: new File(rootProject.rootDir, 'grobid-core/localLibs'), include: localLibs)
 
-        testRuntimeOnly 'org.junit.vintage:junit-vintage-engine:5.9.3'
-        testImplementation(platform('org.junit:junit-bom:5.9.3'))
+        testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine"
+        testRuntimeOnly "org.junit.vintage:junit-vintage-engine"
+        testImplementation(platform('org.junit:junit-bom:5.10.2'))
+        testRuntimeOnly("org.junit.platform:junit-platform-launcher") {
+            because("Only needed to run tests in a version of IntelliJ IDEA that bundles older versions")
+        }
         testImplementation('org.junit.jupiter:junit-jupiter')
         testImplementation 'org.easymock:easymock:5.1.0'
         testImplementation "org.powermock:powermock-api-easymock:2.0.7"
         testImplementation "org.powermock:powermock-module-junit4:2.0.7"
         testImplementation "xmlunit:xmlunit:1.6"
         testImplementation "org.hamcrest:hamcrest-all:1.3"
+        testImplementation 'org.jetbrains.kotlin:kotlin-test'
+        testImplementation "io.mockk:mockk:1.13.9"
 
         implementation "com.cybozu.labs:langdetect:1.1-20120112"
         implementation "com.rockymadden.stringmetric:stringmetric-core_2.11:0.27.4"

From 84ef8020a6d382fa769e8fe68a5dc75830f1d9e5 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 09:00:33 +0800
Subject: [PATCH 02/31] fix build

---
 build.gradle | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.gradle b/build.gradle
index 4a57f048c9..421879fb6c 100644
--- a/build.gradle
+++ b/build.gradle
@@ -23,7 +23,6 @@ repositories {
 }
 
 apply plugin: 'jacoco'
-apply plugin: 'org.jetbrains.kotlin.jvm'
 
 jacoco {
     toolVersion = '0.8.8'
@@ -34,6 +33,7 @@ allprojects {
     apply plugin: 'base'
     apply plugin: 'com.github.kt3k.coveralls'
     apply plugin: 'com.adarshr.test-logger'
+    apply plugin: 'org.jetbrains.kotlin.jvm'
 
     group = "org.grobid"
 

From 1ebcf3a59abd932640c698830a6943e456eeb38d Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 09:01:22 +0800
Subject: [PATCH 03/31] add kotlin test

---
 .../src/test/kotlin/org/grobid/core/test.kt       | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 grobid-core/src/test/kotlin/org/grobid/core/test.kt

diff --git a/grobid-core/src/test/kotlin/org/grobid/core/test.kt b/grobid-core/src/test/kotlin/org/grobid/core/test.kt
new file mode 100644
index 0000000000..282420c3be
--- /dev/null
+++ b/grobid-core/src/test/kotlin/org/grobid/core/test.kt
@@ -0,0 +1,15 @@
+package org.grobid.core
+
+import org.junit.Test
+import java.nio.charset.StandardCharsets
+
+class TextParserTest {
+
+    @Test
+    fun testConvertFractions6Numeric() {
+        val byteArray = byteArrayOf(-3, -1, -73, 0, 103, 0, 47, 0, 109, 0, 108, 0);
+        val input = String(byteArray, StandardCharsets.UTF_16LE)
+
+        print("toto")
+    }
+}

From 8443e6d04bcb174a491568304522802aa36d6537 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 09:02:14 +0800
Subject: [PATCH 04/31] update action's component version

---
 .github/workflows/ci-build-unstable.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml
index cdfa981049..9d56158542 100644
--- a/.github/workflows/ci-build-unstable.yml
+++ b/.github/workflows/ci-build-unstable.yml
@@ -12,9 +12,9 @@ jobs:
     runs-on: ubuntu-latest
     
     steps:
-      - uses: actions/checkout@v1
+      - uses: actions/checkout@v4
       - name: Set up JDK 17
-        uses: actions/setup-java@v1
+        uses: actions/setup-java@v4
         with:
           java-version: 1.17
       - name: Build with Gradle
@@ -38,10 +38,10 @@ jobs:
     steps:
       - name: Create more disk space
         run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Build and push
         id: docker_build
-        uses: mr-smithers-excellent/docker-build-push@v5
+        uses: mr-smithers-excellent/docker-build-push@v6
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME_LFOPPIANO }}
           password: ${{ secrets.DOCKERHUB_TOKEN_LFOPPIANO }}

From 7fd419470641b34b491f3b57e7b0bfd4150cc5bc Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 09:06:54 +0800
Subject: [PATCH 05/31] fix actions

---
 .github/workflows/ci-build-unstable.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml
index 9d56158542..cf69c53314 100644
--- a/.github/workflows/ci-build-unstable.yml
+++ b/.github/workflows/ci-build-unstable.yml
@@ -16,7 +16,9 @@ jobs:
       - name: Set up JDK 17
         uses: actions/setup-java@v4
         with:
-          java-version: 1.17
+          java-version: '17.0.10+7'
+          distribution: 'temurin'
+          cache: 'gradle'
       - name: Build with Gradle
         run: ./gradlew clean assemble --info --stacktrace --no-daemon
 

From 2dc07a8d8ff68f127d4989c71a8741c569529785 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 09:36:06 +0800
Subject: [PATCH 06/31] cleanup

---
 .../src/test/kotlin/org/grobid/core/test.kt   | 15 ---------
 .../core/utilities/SentenceUtilitiesKTest.kt  | 32 +++++++++++++++++++
 2 files changed, 32 insertions(+), 15 deletions(-)
 delete mode 100644 grobid-core/src/test/kotlin/org/grobid/core/test.kt
 create mode 100644 grobid-core/src/test/kotlin/org/grobid/core/utilities/SentenceUtilitiesKTest.kt

diff --git a/grobid-core/src/test/kotlin/org/grobid/core/test.kt b/grobid-core/src/test/kotlin/org/grobid/core/test.kt
deleted file mode 100644
index 282420c3be..0000000000
--- a/grobid-core/src/test/kotlin/org/grobid/core/test.kt
+++ /dev/null
@@ -1,15 +0,0 @@
-package org.grobid.core
-
-import org.junit.Test
-import java.nio.charset.StandardCharsets
-
-class TextParserTest {
-
-    @Test
-    fun testConvertFractions6Numeric() {
-        val byteArray = byteArrayOf(-3, -1, -73, 0, 103, 0, 47, 0, 109, 0, 108, 0);
-        val input = String(byteArray, StandardCharsets.UTF_16LE)
-
-        print("toto")
-    }
-}
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/utilities/SentenceUtilitiesKTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/utilities/SentenceUtilitiesKTest.kt
new file mode 100644
index 0000000000..e26efb79a2
--- /dev/null
+++ b/grobid-core/src/test/kotlin/org/grobid/core/utilities/SentenceUtilitiesKTest.kt
@@ -0,0 +1,32 @@
+package org.grobid.core.utilities
+
+import org.junit.Test
+import kotlin.test.assertFalse
+import kotlin.test.assertTrue
+
+class SentenceUtilitiesKTest {
+
+    @Test
+    fun testToSkipToken_shouldReturnTrue() {
+        val tokens = arrayOf("-", " ", "\n", "\t")
+
+        tokens.forEach { token ->
+            assertTrue(SentenceUtilities.toSkipToken(token))
+        }
+
+    }
+
+    @Test
+    fun testToSkipTokenNoHypen_shouldReturnTrue() {
+        val tokens = arrayOf(" ", "\n", "\t")
+
+        tokens.forEach { token ->
+            assertTrue(SentenceUtilities.toSkipToken(token))
+        }
+
+        assertFalse { SentenceUtilities.toSkipToken("-") }
+
+    }
+
+
+}

From 9db86676a132542b0dda29e7c7651d52960529a3 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 09:37:18 +0800
Subject: [PATCH 07/31] cleanup and fix test

---
 .../java/org/grobid/core/utilities/SentenceUtilities.java     | 2 +-
 .../org/grobid/core/utilities/SentenceUtilitiesKTest.kt       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/SentenceUtilities.java b/grobid-core/src/main/java/org/grobid/core/utilities/SentenceUtilities.java
index a424e5e808..c0b4498835 100644
--- a/grobid-core/src/main/java/org/grobid/core/utilities/SentenceUtilities.java
+++ b/grobid-core/src/main/java/org/grobid/core/utilities/SentenceUtilities.java
@@ -297,7 +297,7 @@ public static boolean toSkipToken(String tok) {
             return false;
     }
 
-    private static boolean toSkipTokenNoHyphen(String tok) {
+    static boolean toSkipTokenNoHyphen(String tok) {
         if (tok.equals(" ") || tok.equals("\n") || tok.equals("\t"))
             return true;
         else
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/utilities/SentenceUtilitiesKTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/utilities/SentenceUtilitiesKTest.kt
index e26efb79a2..a9fdeca5e6 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/utilities/SentenceUtilitiesKTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/utilities/SentenceUtilitiesKTest.kt
@@ -21,10 +21,10 @@ class SentenceUtilitiesKTest {
         val tokens = arrayOf(" ", "\n", "\t")
 
         tokens.forEach { token ->
-            assertTrue(SentenceUtilities.toSkipToken(token))
+            assertTrue(SentenceUtilities.toSkipTokenNoHyphen(token))
         }
 
-        assertFalse { SentenceUtilities.toSkipToken("-") }
+        assertFalse { SentenceUtilities.toSkipTokenNoHyphen("-") }
 
     }
 

From d4a82614b3da94d4dfef7d937d2fc42de8713de0 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 09:52:39 +0800
Subject: [PATCH 08/31] add tests on the current code

---
 .../engines/FundingAcknowledgementParser.java |  2 +-
 .../FundingAcknowledgementParserTest.kt       | 29 +++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)
 create mode 100644 grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index c92b270ff1..4db6af52d2 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -102,7 +102,7 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
     /**
      * For convenience, a processing method taking an TEI XML segment as input - only paragraphs (Element p) 
      * will be processed in this segment and paragraph element will be replaced with the processed content.
-     * Resulting entities are relative to the whole procssed XML segment.
+     * Resulting entities are relative to the whole processed XML segment.
      * 
      * Tokenization is done with the default Grobid analyzer triggered by the identified language. 
      **/
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
new file mode 100644
index 0000000000..01ee60cdf9
--- /dev/null
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
@@ -0,0 +1,29 @@
+package org.grobid.core.engines
+
+import org.grobid.core.GrobidModels
+import org.grobid.core.lexicon.Lexicon
+import org.grobid.core.utilities.GrobidConfig
+import org.grobid.core.utilities.GrobidProperties
+import org.junit.Before
+import org.junit.Test
+import org.powermock.api.easymock.PowerMock
+
+class FundingAcknowledgementParserTest {
+
+    private lateinit var target: DateParser
+
+    @Before
+    @Throws(Exception::class)
+    fun setUp() {
+        PowerMock.mockStatic(Lexicon::class.java)
+        val modelParameters = GrobidConfig.ModelParameters()
+        modelParameters.name = "bao"
+        GrobidProperties.addModel(modelParameters)
+        target = DateParser(GrobidModels.DUMMY)
+    }
+
+    @Test
+    fun testGetExtractionResult() {
+
+    }
+}
\ No newline at end of file

From e0fd3b405197179414f32f25d27c20d75be457d6 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 10:35:07 +0800
Subject: [PATCH 09/31] fix missing of last person in the acknowledgment /
 funding

---
 .../engines/FundingAcknowledgementParser.java | 17 +++-
 .../FundingAcknowledgementParserTest.kt       | 88 ++++++++++++++++++-
 2 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 4db6af52d2..0c11294c28 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -69,6 +69,10 @@ protected FundingAcknowledgementParser() {
         super(GrobidModels.FUNDING_ACKNOWLEDGEMENT);
     }
 
+    FundingAcknowledgementParser(GrobidModel model) {
+        super(model);
+    }
+
     private MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>>
         processing(List<LayoutToken> tokenizationFunding, GrobidAnalysisConfig config) {
         if (tokenizationFunding == null || tokenizationFunding.size() == 0)
@@ -189,7 +193,7 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
      * extracted normalized entities. These entities are referenced by the inline 
      * annotations with the usual @target attribute pointing to xml:id. 
      */
-    private MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>>
+    protected MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>>
             getExtractionResult(List<LayoutToken> tokenizations, String result) {
         List<Funding> fundings = new ArrayList<>();
         List<Person> persons = new ArrayList<>();
@@ -451,8 +455,14 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
         }
 
         // last funding, person, institution/affiliation
-        if (funding.isValid())
+        if (person.isValid()) {
+            persons.add(person);
+        }
+
+        if (funding.isValid()) {
             fundings.add(funding);
+        }
+
 
         if (institution.isNotNull()) 
             institutions.add(institution);
@@ -460,8 +470,9 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
         if (affiliation.isNotNull()) 
             affiliations.add(affiliation);
 
-        if (institutions != null && institutions.size() > 0)
+        if (CollectionUtils.isNotEmpty(institutions)) {
             affiliations.addAll(institutions);
+        }
 
         for(Funding localFunding : fundings) {
             localFunding.inferAcronyms();
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
index 01ee60cdf9..5cd4693c78 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
@@ -1,16 +1,21 @@
 package org.grobid.core.engines
 
 import org.grobid.core.GrobidModels
+import org.grobid.core.analyzers.GrobidAnalyzer
+import org.grobid.core.layout.LayoutToken
 import org.grobid.core.lexicon.Lexicon
 import org.grobid.core.utilities.GrobidConfig
 import org.grobid.core.utilities.GrobidProperties
+import org.hamcrest.CoreMatchers.`is`
+import org.hamcrest.MatcherAssert.assertThat
+import org.hamcrest.Matchers.hasSize
 import org.junit.Before
 import org.junit.Test
 import org.powermock.api.easymock.PowerMock
 
 class FundingAcknowledgementParserTest {
 
-    private lateinit var target: DateParser
+    private lateinit var target: FundingAcknowledgementParser
 
     @Before
     @Throws(Exception::class)
@@ -19,11 +24,90 @@ class FundingAcknowledgementParserTest {
         val modelParameters = GrobidConfig.ModelParameters()
         modelParameters.name = "bao"
         GrobidProperties.addModel(modelParameters)
-        target = DateParser(GrobidModels.DUMMY)
+        target = FundingAcknowledgementParser(GrobidModels.DUMMY)
     }
 
     @Test
     fun testGetExtractionResult() {
 
+        val input: String = "Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript.";
+
+        val results: String = "Our\tour\tO\tOu\tOur\tOur\tr\tur\tOur\tOur\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+                "warmest\twarmest\tw\twa\twar\twarm\tt\tst\test\tmest\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "thanks\tthanks\tt\tth\ttha\tthan\ts\tks\tnks\tanks\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "to\tto\tt\tto\tto\tto\to\tto\tto\tto\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "Patrice\tpatrice\tP\tPa\tPat\tPatr\te\tce\tice\trice\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+                "Lopez\tlopez\tL\tLo\tLop\tLope\tz\tez\tpez\topez\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+                ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\tI-<other>\n" +
+                "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "author\tauthor\ta\tau\taut\tauth\tr\tor\thor\tthor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "Grobid\tgrobid\tG\tGr\tGro\tGrob\td\tid\tbid\tobid\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "[\t[\t[\t[\t[\t[\t[\t[\t[\t[\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<other>\n" +
+                "22\t22\t2\t22\t22\t22\t2\t22\t22\t22\tLINEIN\tNOCAPS\tALLDIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "]\t]\t]\t]\t]\t]\t]\t]\t]\t]\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\t<other>\n" +
+                ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\t<other>\n" +
+                "DeLFT\tdelft\tD\tDe\tDeL\tDeLF\tT\tFT\tLFT\teLFT\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "[\t[\t[\t[\t[\t[\t[\t[\t[\t[\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<other>\n" +
+                "20\t20\t2\t20\t20\t20\t0\t20\t20\t20\tLINEIN\tNOCAPS\tALLDIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "]\t]\t]\t]\t]\t]\t]\t]\t]\t]\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\t<other>\n" +
+                ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\t<other>\n" +
+                "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "other\tother\to\tot\toth\tothe\tr\ter\ther\tther\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "open\topen\to\top\tope\topen\tn\ten\tpen\topen\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tHYPHEN\t0\t<other>\n" +
+                "source\tsource\ts\tso\tsou\tsour\te\tce\trce\turce\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "projects\tprojects\tp\tpr\tpro\tproj\ts\tts\tcts\tects\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "his\this\th\thi\this\this\ts\tis\this\this\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "continuous\tcontinuous\tc\tco\tcon\tcont\ts\tus\tous\tuous\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "support\tsupport\ts\tsu\tsup\tsupp\tt\trt\tort\tport\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "inspiration\tinspiration\ti\tin\tins\tinsp\tn\ton\tion\ttion\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "with\twith\tw\twi\twit\twith\th\tth\tith\twith\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "ideas\tideas\ti\tid\tide\tidea\ts\tas\teas\tdeas\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\t<other>\n" +
+                "suggestions\tsuggestions\ts\tsu\tsug\tsugg\ts\tns\tons\tions\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\t<other>\n" +
+                "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "fruitful\tfruitful\tf\tfr\tfru\tfrui\tl\tul\tful\ttful\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "discussions\tdiscussions\td\tdi\tdis\tdisc\ts\tns\tons\tions\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>\n" +
+                "We\twe\tW\tWe\tWe\tWe\te\tWe\tWe\tWe\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "thank\tthank\tt\tth\ttha\tthan\tk\tnk\tank\thank\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "Pedro\tpedro\tP\tPe\tPed\tPedr\to\tro\tdro\tedro\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+                "Baptista\tbaptista\tB\tBa\tBap\tBapt\ta\tta\tsta\tista\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+                "de\tde\td\tde\tde\tde\te\tde\tde\tde\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+                "Castro\tcastro\tC\tCa\tCas\tCast\to\tro\ttro\tstro\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+                "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+                "his\this\th\thi\this\this\ts\tis\this\this\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "support\tsupport\ts\tsu\tsup\tsupp\tt\trt\tort\tport\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "during\tduring\td\tdu\tdur\tduri\tg\tng\ting\tring\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "this\tthis\tt\tth\tthi\tthis\ts\tis\this\tthis\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "work\twork\tw\two\twor\twork\tk\trk\tork\twork\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>\n" +
+                "Special\tspecial\tS\tSp\tSpe\tSpec\tl\tal\tial\tcial\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "thanks\tthanks\tt\tth\ttha\tthan\ts\tks\tnks\tanks\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "to\tto\tt\tto\tto\tto\to\tto\tto\tto\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "Erina\terina\tE\tEr\tEri\tErin\ta\tna\tina\trina\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+                "Fujita\tfujita\tF\tFu\tFuj\tFuji\ta\tta\tita\tjita\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+                "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+                "useful\tuseful\tu\tus\tuse\tusef\tl\tul\tful\teful\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "tips\ttips\tt\tti\ttip\ttips\ts\tps\tips\ttips\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "on\ton\to\ton\ton\ton\tn\ton\ton\ton\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                "manuscript\tmanuscript\tm\tma\tman\tmanu\tt\tpt\tipt\tript\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+                ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>";
+
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+
+        val (element, mutableTriple) = target.getExtractionResult(tokens, results)
+
+        assertThat(mutableTriple.left, hasSize(0))
+        assertThat(mutableTriple.middle, hasSize(3))
+        assertThat(mutableTriple.middle.get(0).rawName, `is`("Patrice Lopez"))
+        assertThat(mutableTriple.middle.get(1).rawName, `is`("Pedro Baptista de Castro"))
+        assertThat(mutableTriple.middle.get(2).rawName, `is`("Erina Fujita"))
+        assertThat(mutableTriple.right, hasSize(0))
     }
 }
\ No newline at end of file

From ea1245a26a5cfc33f942b2413b609d8b0792c6d2 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 28 Apr 2024 10:58:05 +0800
Subject: [PATCH 10/31] add more tests and add MEXT abbreviation

---
 .../java/org/grobid/core/data/Funder.java     |  2 +
 .../FundingAcknowledgementParserTest.kt       | 59 ++++++++++++++++++-
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/data/Funder.java b/grobid-core/src/main/java/org/grobid/core/data/Funder.java
index e4e847ea45..465b05a5da 100644
--- a/grobid-core/src/main/java/org/grobid/core/data/Funder.java
+++ b/grobid-core/src/main/java/org/grobid/core/data/Funder.java
@@ -60,6 +60,8 @@ public class Funder {
         prefixFounders.put("NSF", "National Science Foundation");
         prefixFounders.put("NIH", "National Institutes of Health");
         prefixFounders.put("ERC", "European Research Council");
+        //Japanese government
+        prefixFounders.put("MEXT", "Ministry of Education, Culture, Sports, Science and Technology");
     }
 
     public Funder() {
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
index 5cd4693c78..565fb78594 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
@@ -2,6 +2,8 @@ package org.grobid.core.engines
 
 import org.grobid.core.GrobidModels
 import org.grobid.core.analyzers.GrobidAnalyzer
+import org.grobid.core.data.Funder
+import org.grobid.core.data.Funding
 import org.grobid.core.layout.LayoutToken
 import org.grobid.core.lexicon.Lexicon
 import org.grobid.core.utilities.GrobidConfig
@@ -30,7 +32,7 @@ class FundingAcknowledgementParserTest {
     @Test
     fun testGetExtractionResult() {
 
-        val input: String = "Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript.";
+        val input = "Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript.";
 
         val results: String = "Our\tour\tO\tOu\tOur\tOur\tr\tur\tOur\tOur\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
                 "warmest\twarmest\tw\twa\twar\twarm\tt\tst\test\tmest\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
@@ -110,4 +112,59 @@ class FundingAcknowledgementParserTest {
         assertThat(mutableTriple.middle.get(2).rawName, `is`("Erina Fujita"))
         assertThat(mutableTriple.right, hasSize(0))
     }
+
+    @Test
+    fun testGetExtractionResult2() {
+
+        val input = "This work was partly supported by MEXT Program: Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials) Grant Number [JPMXP1122715503].";
+
+        val results: String = "This\tthis\tT\tTh\tThi\tThis\ts\tis\this\tThis\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            "work\twork\tw\two\twor\twork\tk\trk\tork\twork\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "partly\tpartly\tp\tpa\tpar\tpart\ty\tly\ttly\trtly\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "supported\tsupported\ts\tsu\tsup\tsupp\td\ted\tted\trted\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "by\tby\tb\tby\tby\tby\ty\tby\tby\tby\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "MEXT\tmext\tM\tME\tMEX\tMEXT\tT\tXT\tEXT\tMEXT\tLINEIN\tALLCAP\tNODIGIT\t0\t1\t0\tNOPUNCT\t0\tI-<funderName>\n" +
+            "Program\tprogram\tP\tPr\tPro\tProg\tm\tam\tram\tgram\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            ":\t:\t:\t:\t:\t:\t:\t:\t:\t:\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tPUNCT\t0\t<other>\n" +
+            "Data\tdata\tD\tDa\tDat\tData\ta\tta\tata\tData\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<programName>\n" +
+            "Creation\tcreation\tC\tCr\tCre\tCrea\tn\ton\tion\ttion\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Utilization\tutilization\tU\tUt\tUti\tUtil\tn\ton\tion\ttion\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tHYPHEN\t0\t<programName>\n" +
+            "Type\ttype\tT\tTy\tTyp\tType\te\tpe\type\tType\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Material\tmaterial\tM\tMa\tMat\tMate\tl\tal\tial\trial\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Research\tresearch\tR\tRe\tRes\tRese\th\tch\trch\tarch\tLINEIN\tINITCAP\tNODIGIT\t0\t1\t0\tNOPUNCT\t0\t<programName>\n" +
+            "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t1\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Development\tdevelopment\tD\tDe\tDev\tDeve\tt\tnt\tent\tment\tLINEIN\tINITCAP\tNODIGIT\t0\t1\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Project\tproject\tP\tPr\tPro\tProj\tt\tct\tect\tject\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "(\t(\t(\t(\t(\t(\t(\t(\t(\t(\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<programName>\n" +
+            "Digital\tdigital\tD\tDi\tDig\tDigi\tl\tal\ttal\tital\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Transformation\ttransformation\tT\tTr\tTra\tTran\tn\ton\tion\ttion\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Initiative\tinitiative\tI\tIn\tIni\tInit\te\tve\tive\ttive\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Center\tcenter\tC\tCe\tCen\tCent\tr\ter\tter\tnter\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Magnetic\tmagnetic\tM\tMa\tMag\tMagn\tc\tic\ttic\tetic\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Materials\tmaterials\tM\tMa\tMat\tMate\ts\tls\tals\tials\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            ")\t)\t)\t)\t)\t)\t)\t)\t)\t)\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\t<programName>\n" +
+            "Grant\tgrant\tG\tGr\tGra\tGran\tt\tnt\tant\trant\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            "Number\tnumber\tN\tNu\tNum\tNumb\tr\ter\tber\tmber\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "[\t[\t[\t[\t[\t[\t[\t[\t[\t[\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<other>\n" +
+            "JPMXP1122715503\tjpmxp1122715503\tJ\tJP\tJPM\tJPMX\t3\t03\t503\t5503\tLINEIN\tALLCAP\tCONTAINSDIGITS\t0\t0\t0\tNOPUNCT\t0\tI-<grantNumber>\n" +
+            "]\t]\t]\t]\t]\t]\t]\t]\t]\t]\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\tI-<other>\n" +
+            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>";
+
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+
+        val (element, mutableTriple) = target.getExtractionResult(tokens, results)
+
+        assertThat(mutableTriple.left, hasSize(1))
+        val funding1: Funding = mutableTriple.left.get(0)
+        val funder1: Funder = funding1.funder
+//        assertThat(funder1.fullName, `is`("MEXT"))
+        assertThat(funding1.programFullName, `is`("Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials)"))
+        assertThat(funder1.fullName, `is`("Ministry of Education, Culture, Sports, Science and Technology"))
+        assertThat(mutableTriple.middle, hasSize(0))
+        assertThat(mutableTriple.right, hasSize(0))
+    }
 }
\ No newline at end of file

From f74466ebe89d9ea4e2febedaa947ff73e177f4c6 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 17:07:02 +0900
Subject: [PATCH 11/31] cosmetics

---
 .../java/org/grobid/core/engines/Engine.java  | 21 +++++++------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/Engine.java b/grobid-core/src/main/java/org/grobid/core/engines/Engine.java
index 949e2d63a5..296b685114 100755
--- a/grobid-core/src/main/java/org/grobid/core/engines/Engine.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/Engine.java
@@ -1,18 +1,10 @@
 package org.grobid.core.engines;
 
-import org.apache.commons.lang3.tuple.Pair;
+import nu.xom.Element;
 import org.apache.commons.lang3.tuple.MutablePair;
 import org.apache.commons.lang3.tuple.MutableTriple;
-
-import nu.xom.Element;
-
-import org.grobid.core.data.Affiliation;
-import org.grobid.core.data.BibDataSet;
-import org.grobid.core.data.BiblioItem;
-import org.grobid.core.data.ChemicalEntity;
-import org.grobid.core.data.PatentItem;
-import org.grobid.core.data.Person;
-import org.grobid.core.data.Funding;
+import org.apache.commons.lang3.tuple.Pair;
+import org.grobid.core.data.*;
 import org.grobid.core.document.Document;
 import org.grobid.core.document.DocumentSource;
 import org.grobid.core.engines.config.GrobidAnalysisConfig;
@@ -24,14 +16,15 @@
 import org.grobid.core.utilities.Utilities;
 import org.grobid.core.utilities.counters.CntManager;
 import org.grobid.core.utilities.counters.impl.CntManagerFactory;
-
 import org.grobid.core.utilities.crossref.CrossrefClient;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.*;
-import java.util.*;
 import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
 
 /**
  * Class for managing the extraction of bibliographical information from PDF
@@ -1184,7 +1177,7 @@ public String processFundingAcknowledgement(String text, GrobidAnalysisConfig co
                 result.append(localResult.getLeft().toXML()); 
 
         } catch (final Exception exp) {
-            throw new GrobidException("An exception occured while running Grobid funding-acknowledgement model.", exp);
+            throw new GrobidException("An exception occurred while running Grobid funding-acknowledgement model.", exp);
         }
 
         return result.toString();

From 047af5bdd0a3c4c0ff61e02866805d058323a1eb Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 17:07:30 +0900
Subject: [PATCH 12/31] add transformation from token to character position

---
 .../grobid/core/utilities/TextUtilities.java  | 49 ++++++++++++++
 .../core/utilities/TextUtilitiesTest.java     | 64 +++++++++++++++++++
 2 files changed, 113 insertions(+)

diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
index 06f69bcdee..73ec73b352 100755
--- a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
+++ b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
@@ -1556,4 +1556,53 @@ public static org.apache.commons.lang3.tuple.Pair<OffsetPosition, OffsetPosition
         else
             return null;
     }
+
+    public static List<OffsetPosition> matchTokenAndString(List<LayoutToken> layoutTokens, String text, List<OffsetPosition> positions) {
+        List<OffsetPosition> newPositions = new ArrayList<>();
+        StringBuilder accumulator = new StringBuilder();
+        int pos = 0;
+
+        for (OffsetPosition position : positions) {
+            List<LayoutToken> urlTokens = layoutTokens.subList(position.start, position.end);
+            boolean first = true;
+            accumulator = new StringBuilder();
+            for (int i = 0; i < urlTokens.size(); i++) {
+                LayoutToken token = urlTokens.get(i);
+                if (StringUtils.isEmpty(token.getText()))
+                    continue;
+                int newPos = text.indexOf(token.getText(), pos);
+                if (newPos != -1) {
+                    //We update pos only at the first token of the annotation positions
+                    if (first) {
+                        pos = newPos;
+                        first = false;
+                    }
+                    accumulator.append(token);
+                } else {
+                    if (SentenceUtilities.toSkipToken(token.getText())) {
+                        continue;
+                    }
+                    if (StringUtils.isNotEmpty(accumulator)) {
+                        int start = text.indexOf(accumulator.toString(), pos);
+                        newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
+                        pos = newPos;
+                        break;
+                    }
+                    pos = newPos;
+                }
+            }
+            if (StringUtils.isNotEmpty(accumulator)) {
+                int start = text.indexOf(accumulator.toString(), pos);
+                newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
+                accumulator = new StringBuilder();
+            }
+
+        }
+        if (StringUtils.isNotEmpty(accumulator)) {
+            int start = text.indexOf(accumulator.toString(), pos);
+            newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
+        }
+
+        return newPositions;
+    }
 }
diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
index ff5ac7467b..4df8704ae9 100644
--- a/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
+++ b/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
@@ -8,12 +8,14 @@
 import org.junit.Test;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.regex.Matcher;
 
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.CoreMatchers.startsWith;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.hasSize;
 import static org.junit.Assert.*;
 
 
@@ -407,4 +409,66 @@ public void testOrcidPattern() {
             }
         }
     }
+
+    @Test
+    public void testMatchTokenAndString() throws Exception {
+        final String input = "This work is available at https://github.com/lfoppiano/ \n" +
+            "supercon2. The repository contains the code of the \n" +
+            "SuperCon 2 interface, the curation workflow, and the \n" +
+            "\n" +
+            "Table 2. Data support, the number of entities for each label in \n" +
+            "each of the datasets used for evaluating the ML models. The \n" +
+            "base dataset is the original dataset described in [18], and the \n" +
+            "curation dataset is automatically collected based on the data-\n" +
+            "base corrections by the interface and manually corrected. \n" +
+            "\n";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        final String inputReal = "This work is available at https://github.com/lfoppiano/ supercon2. The repository contains the code of the SuperCon 2 interface, the curation workflow, and the Table 2. Data support, the number of entities for each label in each of the datasets used for evaluating the ML models. The base dataset is the original dataset described in [18], and the curation dataset is automatically collected based on the database corrections by the interface and manually corrected. ";
+        List<OffsetPosition> urlTokens = Arrays.asList(new OffsetPosition(10, 23));
+
+        List<OffsetPosition> offsetPositions = TextUtilities.matchTokenAndString(tokenisedInput, inputReal, urlTokens);
+
+        assertThat(offsetPositions, hasSize(1));
+        OffsetPosition url1 = offsetPositions.get(0);
+        assertThat(url1.start, is(26));
+        assertThat(url1.end, is(65));
+        assertThat(inputReal.substring(url1.start, url1.end), is("https://github.com/lfoppiano/ supercon2"));
+
+    }
+
+
+    @Test
+    public void testMatchTokenAndString_twoElements() throws Exception {
+        final String input = "This work is available at https://github.com/lfoppiano/ \n" +
+            "supercon2. The repository contains the code of the \n" +
+            "SuperCon 2 interface, the curation workflow, and the \n" +
+            "\n" +
+            "Table 2. Data support, the number of entities for each label in \n" +
+            "each of the datasets used for evaluating the ML models. The \n" +
+            "base dataset is the original dataset described in [18], and the \n" +
+            "curation dataset is automatically collected based on the data-\n" +
+            "base corrections by the interface and manually corrected. \n" +
+            "\n";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        final String inputReal = "This work is available at https://github.com/lfoppiano/ supercon2. The repository contains the code of the SuperCon 2 interface, the curation workflow, and the Table 2. Data support, the number of entities for each label in each of the datasets used for evaluating the ML models. The base dataset is the original dataset described in [18], and the curation dataset is automatically collected based on the database corrections by the interface and manually corrected. ";
+        List<OffsetPosition> urlTokens = Arrays.asList(new OffsetPosition(0, 3), new OffsetPosition(10, 23));
+
+        List<OffsetPosition> offsetPositions = TextUtilities.matchTokenAndString(tokenisedInput, inputReal, urlTokens);
+
+        assertThat(offsetPositions, hasSize(2));
+        OffsetPosition url0 = offsetPositions.get(0);
+        assertThat(url0.start, is(0));
+        assertThat(url0.end, is(9));
+
+        assertThat(inputReal.substring(url0.start, url0.end), is("This work"));
+
+        OffsetPosition url1 = offsetPositions.get(1);
+        assertThat(url1.start, is(26));
+        assertThat(url1.end, is(65));
+
+        assertThat(inputReal.substring(url1.start, url1.end), is("https://github.com/lfoppiano/ supercon2"));
+
+    }
 }

From 9f2edb6aeae31bf2a25f07813bc5c16117ff511a Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 17:07:54 +0900
Subject: [PATCH 13/31] add class to represent the parse of a funding and
 acknowledgement statement

---
 .../core/data/FundingAcknowledgmentParse.java | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 grobid-core/src/main/java/org/grobid/core/data/FundingAcknowledgmentParse.java

diff --git a/grobid-core/src/main/java/org/grobid/core/data/FundingAcknowledgmentParse.java b/grobid-core/src/main/java/org/grobid/core/data/FundingAcknowledgmentParse.java
new file mode 100644
index 0000000000..a44e930922
--- /dev/null
+++ b/grobid-core/src/main/java/org/grobid/core/data/FundingAcknowledgmentParse.java
@@ -0,0 +1,46 @@
+package org.grobid.core.data;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class represent the funding / acknowledgement statement
+ */
+public class FundingAcknowledgmentParse {
+    List<Funding> fundingList = new ArrayList<>();
+    List<Person> personList = new ArrayList<>();
+    List<Affiliation> affiliations = new ArrayList<>();
+//    List<Pair<OffsetPosition, Element> statementAnnotations = new ArrayList<>();
+
+    public List<Funding> getFundings() {
+        return fundingList;
+    }
+
+    public void setFundings(List<Funding> fundingList) {
+        this.fundingList = fundingList;
+    }
+
+    public List<Person> getPersons() {
+        return personList;
+    }
+
+    public void setPersons(List<Person> personList) {
+        this.personList = personList;
+    }
+
+    public List<Affiliation> getAffiliations() {
+        return affiliations;
+    }
+
+    public void setAffiliations(List<Affiliation> fundingBodies) {
+        this.affiliations = fundingBodies;
+    }
+
+//    public List<GrobidAnnotation> getStatementAnnotations() {
+//        return statementAnnotations;
+//    }
+
+//    public void setStatementAnnotations(List<GrobidAnnotation> statementAnnotations) {
+//        this.statementAnnotations = statementAnnotations;
+//    }
+}

From 4b3a763269df5fccb11ba82282edbebcac2d66bd Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 17:18:08 +0900
Subject: [PATCH 14/31] fix the funding and acknowledgement parser to preserve
 the sentence segmentation and the reference markers

---
 .../engines/FundingAcknowledgementParser.java | 475 +++++++++++++-----
 ...ingAcknowledgementParserIntegrationTest.kt | 117 +++++
 .../FundingAcknowledgementParserTest.kt       | 237 ++++++++-
 3 files changed, 675 insertions(+), 154 deletions(-)
 create mode 100644 grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 0c11294c28..160e84854f 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -1,65 +1,38 @@
 package org.grobid.core.engines;
 
+import com.google.common.collect.Iterables;
+import nu.xom.*;
 import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.MutablePair;
+import org.apache.commons.lang3.tuple.MutableTriple;
+import org.apache.commons.lang3.tuple.Pair;
 import org.grobid.core.GrobidModel;
 import org.grobid.core.GrobidModels;
 import org.grobid.core.analyzers.GrobidAnalyzer;
-import org.grobid.core.data.Funding;
-import org.grobid.core.data.Funder;
-import org.grobid.core.data.Person;
-import org.grobid.core.data.Affiliation;
+import org.grobid.core.data.*;
+import org.grobid.core.engines.config.GrobidAnalysisConfig;
 import org.grobid.core.engines.label.TaggingLabel;
-import org.grobid.core.engines.label.TaggingLabels;
 import org.grobid.core.engines.tagging.GenericTaggerUtils;
 import org.grobid.core.exceptions.GrobidException;
 import org.grobid.core.features.FeaturesVectorFunding;
-import org.grobid.core.features.FeatureFactory;
-import org.grobid.core.lang.Language;
 import org.grobid.core.layout.LayoutToken;
-import org.grobid.core.lexicon.Lexicon;
 import org.grobid.core.tokenization.TaggingTokenCluster;
 import org.grobid.core.tokenization.TaggingTokenClusteror;
 import org.grobid.core.utilities.LayoutTokensUtil;
-import org.grobid.core.utilities.TextUtilities;
 import org.grobid.core.utilities.OffsetPosition;
+import org.grobid.core.utilities.TextUtilities;
 import org.grobid.core.utilities.UnicodeUtil;
-import org.grobid.core.engines.config.GrobidAnalysisConfig;
-
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.List;
-import java.util.StringTokenizer;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
-
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import nu.xom.Attribute;
-import nu.xom.Element;
-import nu.xom.Elements;
-import nu.xom.Node;
-import nu.xom.Nodes;
-import nu.xom.Text;
-import nu.xom.Document;
-import nu.xom.ParsingException;
-import nu.xom.ValidityException;
-import nu.xom.Builder;
-
 import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.commons.lang3.tuple.MutablePair;
-import org.apache.commons.lang3.tuple.Pair;
-import org.apache.commons.lang3.tuple.MutableTriple;
+import java.util.ArrayList;
+import java.util.List;
 
-import static org.apache.commons.lang3.StringUtils.isNotBlank;
-import static org.grobid.core.engines.label.TaggingLabels.*;
 import static org.grobid.core.document.xml.XmlBuilderUtils.teiElement;
-import static org.grobid.core.document.xml.XmlBuilderUtils.addXmlId;
-import static org.grobid.core.document.xml.XmlBuilderUtils.textNode;
+import static org.grobid.core.engines.label.TaggingLabels.*;
 
 public class FundingAcknowledgementParser extends AbstractParser {
 
@@ -73,15 +46,16 @@ protected FundingAcknowledgementParser() {
         super(model);
     }
 
-    private MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>>
+    private MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse>
         processing(List<LayoutToken> tokenizationFunding, GrobidAnalysisConfig config) {
-        if (tokenizationFunding == null || tokenizationFunding.size() == 0)
+        if (CollectionUtils.isEmpty(tokenizationFunding)) {
             return null;
+        }
         String res;
         try {
             String featureVector = FeaturesVectorFunding.addFeatures(tokenizationFunding, null);
             res = label(featureVector);
-//System.out.println(res);
+
         } catch (Exception e) {
             throw new GrobidException("CRF labeling with table model fails.", e);
         }
@@ -93,14 +67,59 @@ protected FundingAcknowledgementParser() {
     }
 
     /**
-     * For convenience, a processing method taking a raw string as input. 
-     * Tokenization is done with the default Grobid analyzer triggered by the identified language. 
+     * For convenience, a processing method taking a raw string as input.
+     * Tokenization is done with the default Grobid analyzer triggered by the identified language.
+     *
+     * TODO: implement the sentence segmentation
      **/
     public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>> processing(String text,
-                               GrobidAnalysisConfig config) {
+                                                                                                   GrobidAnalysisConfig config) {
         text = UnicodeUtil.normaliseText(text);
         List<LayoutToken> tokenizationFunding = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
-        return processing(tokenizationFunding, config);
+        MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> results = processing(tokenizationFunding, config);
+        MutableTriple<List<Funding>, List<Person>, List<Affiliation>> entities = MutableTriple.of(results.getRight().getFundings(), results.getRight().getPersons(), results.getRight().getAffiliations());
+        List<Pair<OffsetPosition, Element>> annotations = results.getLeft();
+
+        Element outputParagraph = injectedAnnotationsInNode(tokenizationFunding, annotations, teiElement("p"));
+
+        return MutablePair.of(outputParagraph, entities);
+    }
+
+    /**
+     * This method takes in input a tokenized text, a set of annotations and a root element and attach a list of nodes
+     * under the root where the text is combined with the annotations
+     */
+    protected static Element injectedAnnotationsInNode(List<LayoutToken> tokenizationFunding, List<Pair<OffsetPosition, Element>> annotations, Element rootElement) {
+
+        int pos = 0;
+        for(Pair<OffsetPosition, Element> annotation: annotations) {
+            OffsetPosition annotationPosition = annotation.getLeft();
+            Element annotationContentElement = annotation.getRight();
+
+            List<LayoutToken> before = tokenizationFunding.subList(pos, annotationPosition.start);
+            String clusterContentBefore = LayoutTokensUtil.toText(before);
+
+            if (CollectionUtils.isNotEmpty(before) && before.get(0).getText().equals(" ")) {
+                rootElement.appendChild(new Text(" "));
+            }
+
+            rootElement.appendChild(clusterContentBefore);
+
+            pos = annotationPosition.end;
+            rootElement.appendChild(annotationContentElement);
+        }
+
+        // add last chunk of paragraph stuff (or whole paragraph if no note callout matching)
+        List<LayoutToken> remaining = tokenizationFunding.subList(pos, tokenizationFunding.size());
+        String remainingClusterContent = LayoutTokensUtil.normalizeDehyphenizeText(remaining);
+
+        if (CollectionUtils.isNotEmpty(remaining) && remaining.get(0).getText().equals(" ")) {
+            rootElement.appendChild(new Text(" "));
+        }
+
+        rootElement.appendChild(remainingClusterContent);
+
+        return rootElement;
     }
 
     /**
@@ -121,49 +140,129 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
             Document localDoc = parser.build(tei, null);
 
             // get the paragraphs
-            Element root = localDoc.getRootElement();
-            Nodes paragraphs = root.query("//p");
+            Element rootElementStatement = localDoc.getRootElement();
+            Nodes paragraphs = rootElementStatement.query("//p");
+
+            boolean sentenceSegmentation = config.isWithSentenceSegmentation();
 
             for(Node paragraph : paragraphs) {
                 String paragraphText = paragraph.getValue();
-                List<LayoutToken> tokenizationFunding = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(paragraphText);
-
-                MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>> localResult = processing(tokenizationFunding, config);
-                
-                // replace paragraph content
-                if (localResult.getLeft() != null && localResult.getLeft().getChildCount()>0) {
-                    ((Element) paragraph).removeChildren();
-                    for (int i = localResult.getLeft().getChildCount()-1; i >=0; i--) {
-                        Node localNode = localResult.getLeft().getChild(i);
-                        localNode.detach();
-                        ((Element) paragraph).insertChild(localNode, 0);
+                GrobidAnalyzer analyzer = GrobidAnalyzer.getInstance();
+                List<LayoutToken> tokenizationFunding = analyzer.tokenizeWithLayoutToken(paragraphText);
+
+                StringBuilder sb = new StringBuilder();
+
+                MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> localResult = processing(tokenizationFunding, config);
+
+                List<Pair<OffsetPosition, Element>> annotations = localResult.left;
+                FundingAcknowledgmentParse localEntities = localResult.right;
+
+                if (CollectionUtils.isEmpty(annotations)) {
+                    continue;
+                }
+
+                List<OffsetPosition> list = annotations.stream().map(a -> a.getLeft()).toList();
+                List<OffsetPosition> annotationsPositionText = TextUtilities.matchTokenAndString(tokenizationFunding, paragraphText, list);
+                List<Pair<OffsetPosition, Element>> annotationsWithPosRefToText = new ArrayList<>();
+                for (int i = 0; i < annotationsPositionText.size(); i++) {
+                    annotationsWithPosRefToText.add(Pair.of(annotationsPositionText.get(i), annotations.get(i).getRight()));
+                }
+
+                annotations = annotationsWithPosRefToText;
+
+                if (sentenceSegmentation) {
+//                    Pair<List<String>, List<OffsetPosition>> sentenceInformation = extractSentencesAndPositionsFromParagraphElement(rootElementStatement);
+//
+//                    List<String> sentencesList = sentenceInformation.getLeft();
+//                    List<OffsetPosition> offsetPositionList = sentenceInformation.getRight();
+//
+//                    List<List<LayoutToken>> sentenceLayoutTokens = sentencesList.stream()
+//                        .map(analyzer::tokenizeWithLayoutToken)
+//                        .toList();
+//
+//                    List<OffsetPosition> sentenceTokenPositions = new ArrayList<>();
+//                    int pos = 0;
+//                    for (List<LayoutToken> sentenceLayoutToken : sentenceLayoutTokens) {
+//                        offsetPositionList.add(new OffsetPosition(pos, pos + sentenceLayoutToken.size()));
+//                        pos += sentenceLayoutToken.size();
+//                    }
+                    int pos = 0;
+                    int sentenceStartOffset = 0;
+                    Nodes sentences = paragraph.query("//s");
+
+                    if(sentences.size() == 0) {
+                        // Overly careful - we should never end up here.
+                        LOGGER.warn("While the configuration claim that paragraphs must be segmented, we did not find any sentence. ");
+
+                        List<Node> nodes = getNodesAnnotationsInTextNode(paragraph, annotations);
+
+                        for (int i = 0; i < paragraph.getChildCount(); i++) {
+                            paragraph.getChild(i).detach();
+                        }
+                        for (Node node: nodes) {
+                            node.detach();
+                            ((Element) paragraph).appendChild(node);
+                        }
+                    }
+
+                    for (Node sentence : sentences) {
+                        String sentenceText = sentence.getValue();
+                        List<Node> newChildren = new ArrayList<>();
+                        for (int i = 0; i < sentence.getChildCount(); i++) {
+                            //Assumption here is that the structure is flat to maximum one level down
+                            Node currentNode = sentence.getChild(i);
+                            if (currentNode instanceof Text) {
+                                String text = currentNode.getValue();
+                                int finalPos = pos;
+                                List<Pair<OffsetPosition, Element>> annotationsInThisChunk = annotations.stream()
+                                    .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end < finalPos + text.length())
+                                    .toList();
+
+                                if (CollectionUtils.isNotEmpty(annotationsInThisChunk)) {
+                                    List<Node> nodes = getNodesAnnotationsInTextNode(currentNode, annotationsInThisChunk, pos);
+                                    newChildren.addAll(nodes);
+                                } else {
+                                    newChildren.add(currentNode);
+                                }
+                                pos += text.length();
+                            } else if (currentNode instanceof Element) {
+                                newChildren.add(currentNode);
+                                pos += currentNode.getValue().length();
+                            } /*else {
+                                System.out.println(currentNode);
+                            }*/
+                        }
+
+                        for (int i = 0; i < sentence.getChildCount(); i++) {
+                            sentence.getChild(i).detach();
+                        }
+                        for (Node node: newChildren) {
+                            node.detach();
+                            ((Element) sentence).appendChild(node);
+                        }
+
+                        sentenceStartOffset += sentenceText.length();
+                    }
+                } else {
+                    List<Node> nodes = getNodesAnnotationsInTextNode(paragraph, annotations);
+
+                    for (int i = 0; i < paragraph.getChildCount(); i++) {
+                        paragraph.getChild(i).detach();
+                    }
+                    for (Node node: nodes) {
+                        node.detach();
+                        ((Element) paragraph).appendChild(node);
                     }
                 }
+
                 // update extracted entities
                 if (globalResult == null) {
-                    globalResult = MutablePair.of(root, localResult.getRight());
+                    globalResult = MutablePair.of(rootElementStatement, MutableTriple.of(localEntities.getFundings(), localEntities.getPersons(), localEntities.getAffiliations()));
                 } else {
                     // concatenate members of the local results to the global ones
-                    MutableTriple<List<Funding>,List<Person>,List<Affiliation>> localEntities = localResult.getRight();
-                    MutableTriple<List<Funding>,List<Person>,List<Affiliation>> globalEntities = globalResult.getRight();
-
-                    List<Funding> localFundings = localEntities.getLeft();
-                    List<Funding> globalFundings = globalEntities.getLeft();
-                    globalFundings.addAll(localFundings);
-                    globalEntities.setLeft(globalFundings);
-
-                    List<Person> localPersons = localEntities.getMiddle();
-                    List<Person> globalPersons = globalEntities.getMiddle();
-                    globalPersons.addAll(localPersons);
-                    globalEntities.setMiddle(globalPersons);
-
-                    List<Affiliation> localAffiliation = localEntities.getRight();
-                    List<Affiliation> globalAffiliations = globalEntities.getRight();
-                    globalAffiliations.addAll(localAffiliation);
-                    globalEntities.setRight(globalAffiliations);
-
-                    globalResult.setRight(globalEntities);
+                    globalResult = aggregateResults(MutableTriple.of(localEntities.getFundings(), localEntities.getPersons(), localEntities.getAffiliations()), globalResult);
                 }
+
             }
 
             //System.out.println(globalResult.getLeft().toXML());
@@ -173,11 +272,95 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
             LOGGER.warn("Parsing error of the TEI fragment from funding/acknowledgement section", exp);
         } catch(IOException exp) {
             LOGGER.warn("Input TEI fragment invalid from funding/acknowledgement section", exp);
-        } 
-        
+        }
+
+        return globalResult;
+    }
+
+    /**
+     * This method return a list of nodes corresponding to the annotations as they are positioned in
+     * the text content of the target node. If the node is empty, should be used @see injectedAnnotationsInNode
+     * as this method will fail
+     */
+    protected static List<Node> getNodesAnnotationsInTextNode(Node targetNode, List<Pair<OffsetPosition, Element>> annotations) {
+        return getNodesAnnotationsInTextNode(targetNode, annotations, 0);
+    }
+
+    /**
+     * The sentence offset allow to calculate the position relative to the sentence of annotations that
+     * have been calculated in relation with the paragraph.
+     */
+    protected static List<Node> getNodesAnnotationsInTextNode(Node targetNode, List<Pair<OffsetPosition, Element>> annotations, int sentenceOffset) {
+        String text = targetNode.getValue();
+
+        List<Node> outputNodes = new ArrayList<>();
+
+        int pos = 0;
+        for (Pair<OffsetPosition, Element> annotation : annotations) {
+            OffsetPosition annotationPosition = annotation.getLeft();
+            Element annotationContentElement = annotation.getRight();
+
+            String before = text.substring(pos, annotationPosition.start - sentenceOffset);
+
+//            if (StringUtils.isNotEmpty(before) && before.startsWith(" ")) {
+//                outputNodes.add(new Text(" "));
+//            }
+
+            outputNodes.add(new Text(before));
+            pos = annotationPosition.end - sentenceOffset;
+            outputNodes.add(annotationContentElement);
+        }
+
+        String remaining = text.substring(pos);
+
+//        if (StringUtils.isNotEmpty(remaining) && remaining.startsWith(" ")) {
+//            outputNodes.add(new Text(" "));
+//        }
+
+        outputNodes.add(new Text(remaining));
+
+        return outputNodes;
+    }
+
+    private static @NotNull MutablePair<Element, MutableTriple<List<Funding>, List<Person>, List<Affiliation>>> aggregateResults(MutableTriple<List<Funding>, List<Person>, List<Affiliation>> localEntities, MutablePair<Element, MutableTriple<List<Funding>, List<Person>, List<Affiliation>>> globalResult) {
+        MutableTriple<List<Funding>,List<Person>,List<Affiliation>> globalEntities = globalResult.getRight();
+
+        List<Funding> localFundings = localEntities.getLeft();
+        List<Funding> globalFundings = globalEntities.getLeft();
+        globalFundings.addAll(localFundings);
+        globalEntities.setLeft(globalFundings);
+
+        List<Person> localPersons = localEntities.getMiddle();
+        List<Person> globalPersons = globalEntities.getMiddle();
+        globalPersons.addAll(localPersons);
+        globalEntities.setMiddle(globalPersons);
+
+        List<Affiliation> localAffiliation = localEntities.getRight();
+        List<Affiliation> globalAffiliations = globalEntities.getRight();
+        globalAffiliations.addAll(localAffiliation);
+        globalEntities.setRight(globalAffiliations);
+
+        globalResult.setRight(globalEntities);
+
         return globalResult;
     }
 
+    protected static Pair<List<String>, List<OffsetPosition>> extractSentencesAndPositionsFromParagraphElement(Element paragraphElement) {
+        int offset = 0;
+        List<OffsetPosition> sentenceOffsetPositions = new ArrayList<>();
+
+        Nodes sentences = paragraphElement.query("//s");
+        List<String> sentencesAsString = new ArrayList<>();
+        for (Node sentence : sentences) {
+            String sentenceText = sentence.getValue();
+            sentenceOffsetPositions.add(new OffsetPosition(offset, offset + sentenceText.length()));
+            sentencesAsString.add(sentence.getValue());
+            offset += sentence.getValue().length();
+        }
+
+        return Pair.of(sentencesAsString, sentenceOffsetPositions);
+    }
+
 
     /**
      * The processing here is called from the header and/or full text parser in cascade
@@ -188,35 +371,44 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
      * of layout tokens in a context free manner. 
      * 
      * The expected input here is a paragraph.
+     *
+     *     // This returns a Element of the annotation and the position where should be injected, relative to the paragraph.
+     *     // TODO: make new data objects for the annotations
      * 
      * Return an XML fragment with inline annotations of the input text, together with 
      * extracted normalized entities. These entities are referenced by the inline 
      * annotations with the usual @target attribute pointing to xml:id. 
      */
-    protected MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>>
-            getExtractionResult(List<LayoutToken> tokenizations, String result) {
+    protected MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> getExtractionResult(List<LayoutToken> tokensParagraph, String labellingResult) {
         List<Funding> fundings = new ArrayList<>();
         List<Person> persons = new ArrayList<>();
         List<Affiliation> affiliations = new ArrayList<>();
         List<Affiliation> institutions = new ArrayList<>();
 
+        FundingAcknowledgmentParse parsedStatement = new FundingAcknowledgmentParse();
+        parsedStatement.setFundings(fundings);
+        parsedStatement.setPersons(persons);
+        parsedStatement.setAffiliations(affiliations);
+
         // current funding
         Funding funding = new Funding();
 
         // current person
         Person person = new Person();
-        
+
         // current organization
         Affiliation affiliation = new Affiliation();
         Affiliation institution = new Affiliation();
 
-        TaggingTokenClusteror clusteror = new TaggingTokenClusteror(GrobidModels.FUNDING_ACKNOWLEDGEMENT, result, tokenizations);
+        TaggingTokenClusteror clusteror = new TaggingTokenClusteror(GrobidModels.FUNDING_ACKNOWLEDGEMENT, labellingResult, tokensParagraph);
         List<TaggingTokenCluster> clusters = clusteror.cluster();
         TaggingLabel previousLabel = null;
 
-        Element curParagraph = teiElement("p");
-        List<Node> curParagraphNodes = new ArrayList<>();
+        List<Element> elements = new ArrayList<>();
+        List<OffsetPosition> positions = new ArrayList<>();
+
         int posTokenization = 0;
+        int posCharacters = 0;
 
         for (TaggingTokenCluster cluster : clusters) {
             if (cluster == null) {
@@ -224,7 +416,9 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
             }
 
             boolean spaceBefore = false;
-            if (posTokenization > 0 && tokenizations.size()>=posTokenization && tokenizations.get(posTokenization-1).getText().equals(" ")) {
+            if (posTokenization > 0
+                && tokensParagraph.size()>=posTokenization
+                && tokensParagraph.get(posTokenization-1).getText().equals(" ")) {
                 spaceBefore = true;
             }
 
@@ -232,7 +426,24 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
             Engine.getCntManager().i(clusterLabel);
 
             List<LayoutToken> tokens = cluster.concatTokens();
-            String clusterContent = LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(tokens));   
+            String clusterContent = LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(tokens));
+
+            if (clusterLabel.equals(FUNDING_OTHER)) {
+                posTokenization += tokens.size();
+                posCharacters += clusterContent.length();
+                continue;
+            }
+
+            // We adjust the end position when the entity ends with a space
+            int endPosTokenization = posTokenization + tokens.size();
+            if (Iterables.getLast(tokens).getText().equals(" ")) {
+                endPosTokenization -= 1;
+            }
+
+            int endPosCharacters = posCharacters + clusterContent.length();
+            if (Iterables.getLast(tokens).getText().equals(" ")) {
+                endPosCharacters -= 1;
+            }
 
             if (clusterLabel.equals(FUNDING_FUNDER_NAME)) {
                 Funder localFunder = funding.getFunder();
@@ -259,11 +470,9 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 Element entity = teiElement("rs");
                 entity.addAttribute(new Attribute("type", "funder"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
-
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
             } else if (clusterLabel.equals(FUNDING_GRANT_NAME)) {
                 if (StringUtils.isNotBlank(funding.getGrantName())) {
                     if (funding.isValid()) {
@@ -280,10 +489,9 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 Element entity = teiElement("rs");
                 entity.addAttribute(new Attribute("type", "grantName"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
 
             } else if (clusterLabel.equals(FUNDING_PERSON)) {
                 if (StringUtils.isNotBlank(person.getRawName())) {
@@ -300,10 +508,9 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 Element entity = teiElement("rs");
                 entity.addAttribute(new Attribute("type", "person"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
 
             } else if (clusterLabel.equals(FUNDING_AFFILIATION)) {
                 if (StringUtils.isNotBlank(affiliation.getAffiliationString())) {
@@ -320,17 +527,16 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 Element entity = teiElement("rs");
                 entity.addAttribute(new Attribute("type", "affiliation"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
 
             } else if (clusterLabel.equals(FUNDING_INSTITUTION)) {
                 if (StringUtils.isNotBlank(institution.getAffiliationString())) {
                     //if (institution.isNotNull()) {
-                        institutions.add(institution);
-                        // next funding object
-                        institution = new Affiliation();
+                    institutions.add(institution);
+                    // next funding object
+                    institution = new Affiliation();
                     //}
                 }
 
@@ -340,17 +546,16 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 Element entity = teiElement("rs");
                 entity.addAttribute(new Attribute("type", "institution"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
 
             } else if (clusterLabel.equals(FUNDING_INFRASTRUCTURE)) {
                 if (StringUtils.isNotBlank(institution.getAffiliationString())) {
                     //if (institution.isNotNull()) {
-                        institutions.add(institution);
-                        // next funding object
-                        institution = new Affiliation();
+                    institutions.add(institution);
+                    // next funding object
+                    institution = new Affiliation();
                     //}
                 }
                 institution.setAffiliationString(clusterContent);
@@ -361,10 +566,9 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 entity.addAttribute(new Attribute("type", "institution"));
                 entity.addAttribute(new Attribute("subtype", "infrastructure"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
 
             } else if (clusterLabel.equals(FUNDING_GRANT_NUMBER)) {
                 Funding previousFounding = null;
@@ -382,8 +586,8 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 funding.addLayoutTokens(tokens);
 
                 // possibly copy funder from previous funding object (case of "factorization" of grant numbers)
-                if (previousFounding != null && 
-                    previousFounding.getGrantNumber() != null && 
+                if (previousFounding != null &&
+                    previousFounding.getGrantNumber() != null &&
                     clusterContent.length() == previousFounding.getGrantNumber().length()) {
                     funding.setFunder(previousFounding.getFunder());
                 }
@@ -391,10 +595,9 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 Element entity = teiElement("rs");
                 entity.addAttribute(new Attribute("type", "grantNumber"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
 
             } else if (clusterLabel.equals(FUNDING_PROGRAM_NAME)) {
                 if (StringUtils.isNotBlank(funding.getProgramFullName())) {
@@ -412,10 +615,9 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 Element entity = teiElement("rs");
                 entity.addAttribute(new Attribute("type", "programName"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
 
             } else if (clusterLabel.equals(FUNDING_PROJECT_NAME)) {
                 if (StringUtils.isNotBlank(funding.getProjectFullName())) {
@@ -433,25 +635,17 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 Element entity = teiElement("rs");
                 entity.addAttribute(new Attribute("type", "projectName"));
                 entity.appendChild(clusterContent);
+                elements.add(entity);
 
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(entity);
+                positions.add(new OffsetPosition(posTokenization, endPosTokenization));
 
-            } else if (clusterLabel.equals(FUNDING_OTHER)) {
-                if (spaceBefore)
-                    curParagraphNodes.add(textNode(" "));
-                curParagraphNodes.add(textNode(clusterContent));
             } else {
                 LOGGER.warn("Unexpected funding model label - " + clusterLabel.getLabel() + " for " + clusterContent);
             }
 
             previousLabel = clusterLabel;
-            posTokenization += tokens.size(); 
-        }
-
-        for (Node n : curParagraphNodes) {
-            curParagraph.appendChild(n);
+            posTokenization += tokens.size();
+            posCharacters += clusterContent.length();
         }
 
         // last funding, person, institution/affiliation
@@ -463,11 +657,10 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
             fundings.add(funding);
         }
 
-
-        if (institution.isNotNull()) 
+        if (institution.isNotNull())
             institutions.add(institution);
 
-        if (affiliation.isNotNull()) 
+        if (affiliation.isNotNull())
             affiliations.add(affiliation);
 
         if (CollectionUtils.isNotEmpty(institutions)) {
@@ -478,9 +671,13 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
             localFunding.inferAcronyms();
         }
 
-        MutableTriple<List<Funding>,List<Person>,List<Affiliation>> entities = MutableTriple.of(fundings, persons, affiliations);
+        List<Pair<OffsetPosition, Element>> annotations = new ArrayList<>();
+
+        for (int i = 0; i < elements.size(); i++) {
+            annotations.add(Pair.of(positions.get(i), elements.get(i)));
+        }
 
-        return MutablePair.of(curParagraph, entities);
+        return MutablePair.of(annotations, parsedStatement);
     }
 
     /**
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
new file mode 100644
index 0000000000..72011fcdc2
--- /dev/null
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
@@ -0,0 +1,117 @@
+package org.grobid.core.engines
+
+import org.grobid.core.engines.config.GrobidAnalysisConfig
+import org.grobid.core.factory.AbstractEngineFactory
+import org.grobid.core.utilities.GrobidConfig
+import org.grobid.core.utilities.GrobidProperties
+import org.hamcrest.CoreMatchers.`is`
+import org.hamcrest.MatcherAssert.assertThat
+import org.hamcrest.Matchers.hasSize
+import org.junit.Before
+import org.junit.BeforeClass
+import org.junit.Test
+
+class FundingAcknowledgementParserIntegrationTest {
+
+    private lateinit var target: FundingAcknowledgementParser
+
+    @Before
+    @Throws(Exception::class)
+    fun setUp() {
+        val modelParameters = GrobidConfig.ModelParameters()
+        modelParameters.name = "bao"
+        GrobidProperties.addModel(modelParameters)
+        target = FundingAcknowledgementParser()
+    }
+
+    @Test
+    fun testXmlFragmentProcessing_withoutSentenceSegmentation_shouldReturnSameXML() {
+
+        val input = "\n\t\t\t<div type=\"acknowledgement\">\n<div><head>Acknowledgments</head><p>This research was " +
+            "funded by the NASA Land-Cover and Land-Use Change Program (Grant Number: 80NSSC18K0315), the NASA " +
+            "Carbon Monitoring System (Grant Number: 80NSSC20K0022), and </p></div>\n\t\t\t</div>\n\n"
+
+
+        // Expected
+//        val output = "\n\t\t\t<div type=\"acknowledgement\">\n<div><head>Acknowledgments</head><p>This research was " +
+//            "funded by the <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"funder\">NASA</rs> " +
+//            "<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"programName\">Land-Cover and Land-Use Change Program</rs> " +
+//            "(Grant Number: <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">80NSSC18K0315</rs>), " +
+//            "the <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"funder\">NASA Carbon Monitoring System</rs> " +
+//            "(Grant Number: <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">80NSSC20K0022</rs>), " +
+//            "and </p></div>\n\t\t\t</div>\n\n"
+
+        // Current version output
+        val output = "<div type=\"acknowledgement\">\n<div><head>Acknowledgments</head><p>This research was " +
+            "funded by the <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"funder\">NASA</rs> " +
+            "<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"programName\">Land-Cover and Land-Use Change Program</rs> " +
+            "(Grant Number: <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">80NSSC18K0315</rs>), " +
+            "the <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"funder\">NASA Carbon Monitoring System</rs> " +
+            "(Grant Number: <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">80NSSC20K0022</rs>), " +
+            "and</p></div>\n\t\t\t</div>"
+
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(false)
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), `is`(output))
+        assertThat(mutableTriple.left, hasSize(2))
+    }
+
+    @Test
+    fun testXmlFragmentProcessing2_withoutSentenceSegmentation_shouldReturnSameXML() {
+        val input ="\n" +
+            "\t\t\t<div type=\"acknowledgement\">\n" +
+            "<div xmlns=\"http://www.tei-c.org/ns/1.0\"><head>Acknowledgements</head><p>Our warmest thanks to Patrice Lopez, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript.</p></div>\n" +
+            "\t\t\t</div>\n\n"
+
+        // Expected
+//        val output = "\n\t\t\t<div type=\"acknowledgement\">\n" +
+//            "<div><head>Acknowledgements</head><p>Our warmest thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs> for his support during this work. Special thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs> for useful tips on the manuscript.</p></div>\n" +
+//            "\t\t\t</div>\n\n"
+
+        // Current version output
+        val output = "<div type=\"acknowledgement\">\n" +
+            "<div><head>Acknowledgements</head><p>Our warmest thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs> for his support during this work. Special thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs> for useful tips on the manuscript.</p></div>\n" +
+            "\t\t\t</div>"
+
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(false)
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), `is`(output))
+    }
+
+    @Test
+    fun testXmlFragmentProcessing2_withSentenceSegmentation_shouldWork() {
+        val input ="\n" +
+            "\t\t\t<div type=\"acknowledgement\">\n" +
+            "<div xmlns=\"http://www.tei-c.org/ns/1.0\"><head>Acknowledgements</head><p><s>Our warmest thanks to Patrice Lopez, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank Pedro Baptista de Castro for his support during this work.</s><s>Special thanks to Erina Fujita for useful tips on the manuscript.</s></p></div>\n" +
+            "\t\t\t</div>\n\n"
+
+        val output = "\n\t\t\t<div type=\"acknowledgement\">\n" +
+            "<div><head>Acknowledgements</head><p><s>Our warmest thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs> for his support during this work.</s><s>Special thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs> for useful tips on the manuscript.</p></div>\n" +
+            "\t\t\t</div>\n\n"
+
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(true)
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), `is`(output))
+    }
+
+    companion object {
+        @JvmStatic
+        @BeforeClass
+        @Throws(java.lang.Exception::class)
+        fun setInitialContext(): Unit {
+            AbstractEngineFactory.init()
+        }
+    }
+}
\ No newline at end of file
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
index 565fb78594..cc636b4aa7 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
@@ -1,19 +1,21 @@
 package org.grobid.core.engines
 
+import nu.xom.Builder
+import nu.xom.Document
+import nu.xom.Element
 import org.grobid.core.GrobidModels
 import org.grobid.core.analyzers.GrobidAnalyzer
 import org.grobid.core.data.Funder
 import org.grobid.core.data.Funding
 import org.grobid.core.layout.LayoutToken
-import org.grobid.core.lexicon.Lexicon
 import org.grobid.core.utilities.GrobidConfig
 import org.grobid.core.utilities.GrobidProperties
+import org.grobid.core.utilities.LayoutTokensUtil
 import org.hamcrest.CoreMatchers.`is`
 import org.hamcrest.MatcherAssert.assertThat
 import org.hamcrest.Matchers.hasSize
 import org.junit.Before
 import org.junit.Test
-import org.powermock.api.easymock.PowerMock
 
 class FundingAcknowledgementParserTest {
 
@@ -22,7 +24,6 @@ class FundingAcknowledgementParserTest {
     @Before
     @Throws(Exception::class)
     fun setUp() {
-        PowerMock.mockStatic(Lexicon::class.java)
         val modelParameters = GrobidConfig.ModelParameters()
         modelParameters.name = "bao"
         GrobidProperties.addModel(modelParameters)
@@ -103,14 +104,14 @@ class FundingAcknowledgementParserTest {
 
         val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
 
-        val (element, mutableTriple) = target.getExtractionResult(tokens, results)
+        val (element, fundingAcknowledgmentParse) = target.getExtractionResult(tokens, results)
 
-        assertThat(mutableTriple.left, hasSize(0))
-        assertThat(mutableTriple.middle, hasSize(3))
-        assertThat(mutableTriple.middle.get(0).rawName, `is`("Patrice Lopez"))
-        assertThat(mutableTriple.middle.get(1).rawName, `is`("Pedro Baptista de Castro"))
-        assertThat(mutableTriple.middle.get(2).rawName, `is`("Erina Fujita"))
-        assertThat(mutableTriple.right, hasSize(0))
+        assertThat(fundingAcknowledgmentParse.fundings, hasSize(0))
+        assertThat(fundingAcknowledgmentParse.persons, hasSize(3))
+        assertThat(fundingAcknowledgmentParse.persons.get(0).rawName, `is`("Patrice Lopez"))
+        assertThat(fundingAcknowledgmentParse.persons.get(1).rawName, `is`("Pedro Baptista de Castro"))
+        assertThat(fundingAcknowledgmentParse.persons.get(2).rawName, `is`("Erina Fujita"))
+        assertThat(fundingAcknowledgmentParse.affiliations, hasSize(0))
     }
 
     @Test
@@ -156,15 +157,221 @@ class FundingAcknowledgementParserTest {
 
         val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
 
-        val (element, mutableTriple) = target.getExtractionResult(tokens, results)
+        val (element, fundingAcknowledgmentParse) = target.getExtractionResult(tokens, results)
 
-        assertThat(mutableTriple.left, hasSize(1))
-        val funding1: Funding = mutableTriple.left.get(0)
+        assertThat(fundingAcknowledgmentParse.fundings, hasSize(1))
+        val funding1: Funding = fundingAcknowledgmentParse.fundings.get(0)
         val funder1: Funder = funding1.funder
 //        assertThat(funder1.fullName, `is`("MEXT"))
         assertThat(funding1.programFullName, `is`("Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials)"))
         assertThat(funder1.fullName, `is`("Ministry of Education, Culture, Sports, Science and Technology"))
-        assertThat(mutableTriple.middle, hasSize(0))
-        assertThat(mutableTriple.right, hasSize(0))
+        assertThat(fundingAcknowledgmentParse.persons, hasSize(0))
+        assertThat(fundingAcknowledgmentParse.affiliations, hasSize(0))
+    }
+
+    @Test
+    fun extractSentencesAndPositionsFromParagraphElement_shouldReturnValidIntervals() {
+        //Here the namespace is already removed as it must be removed when the node arrives at the method we are testing
+        val input ="\n" +
+            "\t\t\t<div type=\"acknowledgement\">\n" +
+            "<div><head>Acknowledgements</head><p><s>Our warmest thanks to Patrice Lopez, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank Pedro Baptista de Castro for his support during this work.</s><s>Special thanks to Erina Fujita for useful tips on the manuscript.</s></p></div>\n" +
+            "\t\t\t</div>\n\n"
+
+        val parser = Builder()
+        val localDoc: Document = parser.build(input, null)
+        val root = localDoc.rootElement
+        val paragraphs = root.query("//p")
+
+        val firstParagraphText = paragraphs[0].value
+
+        val (strings, offsetPositions) = FundingAcknowledgementParser.extractSentencesAndPositionsFromParagraphElement(
+            paragraphs[0] as Element?
+        )
+
+        assertThat(strings, hasSize(3))
+        assertThat(offsetPositions, hasSize(3))
+        assertThat(firstParagraphText.substring(offsetPositions[0].start, offsetPositions[0].end),
+            `is`("Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions."))
+        assertThat(firstParagraphText.substring(offsetPositions[1].start, offsetPositions[1].end),
+            `is`("We thank Pedro Baptista de Castro for his support during this work."))
+        assertThat(firstParagraphText.substring(offsetPositions[2].start, offsetPositions[2].end),
+            `is`("Special thanks to Erina Fujita for useful tips on the manuscript."))
+    }
+
+    @Test
+    fun testGetExtractionResultNew1_ShouldReturnCorrectElementsAndPositions() {
+
+        val input = "Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript.";
+
+        val results: String = "Our\tour\tO\tOu\tOur\tOur\tr\tur\tOur\tOur\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            "warmest\twarmest\tw\twa\twar\twarm\tt\tst\test\tmest\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "thanks\tthanks\tt\tth\ttha\tthan\ts\tks\tnks\tanks\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "to\tto\tt\tto\tto\tto\to\tto\tto\tto\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "Patrice\tpatrice\tP\tPa\tPat\tPatr\te\tce\tice\trice\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+            "Lopez\tlopez\tL\tLo\tLop\tLope\tz\tez\tpez\topez\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\tI-<other>\n" +
+            "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "author\tauthor\ta\tau\taut\tauth\tr\tor\thor\tthor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "of\tof\to\tof\tof\tof\tf\tof\tof\tof\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "Grobid\tgrobid\tG\tGr\tGro\tGrob\td\tid\tbid\tobid\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "[\t[\t[\t[\t[\t[\t[\t[\t[\t[\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<other>\n" +
+            "22\t22\t2\t22\t22\t22\t2\t22\t22\t22\tLINEIN\tNOCAPS\tALLDIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "]\t]\t]\t]\t]\t]\t]\t]\t]\t]\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\t<other>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\t<other>\n" +
+            "DeLFT\tdelft\tD\tDe\tDeL\tDeLF\tT\tFT\tLFT\teLFT\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "[\t[\t[\t[\t[\t[\t[\t[\t[\t[\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<other>\n" +
+            "20\t20\t2\t20\t20\t20\t0\t20\t20\t20\tLINEIN\tNOCAPS\tALLDIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "]\t]\t]\t]\t]\t]\t]\t]\t]\t]\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\t<other>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\t<other>\n" +
+            "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "other\tother\to\tot\toth\tothe\tr\ter\ther\tther\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "open\topen\to\top\tope\topen\tn\ten\tpen\topen\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tHYPHEN\t0\t<other>\n" +
+            "source\tsource\ts\tso\tsou\tsour\te\tce\trce\turce\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "projects\tprojects\tp\tpr\tpro\tproj\ts\tts\tcts\tects\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "his\this\th\thi\this\this\ts\tis\this\this\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "continuous\tcontinuous\tc\tco\tcon\tcont\ts\tus\tous\tuous\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "support\tsupport\ts\tsu\tsup\tsupp\tt\trt\tort\tport\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "inspiration\tinspiration\ti\tin\tins\tinsp\tn\ton\tion\ttion\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "with\twith\tw\twi\twit\twith\th\tth\tith\twith\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "ideas\tideas\ti\tid\tide\tidea\ts\tas\teas\tdeas\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\t<other>\n" +
+            "suggestions\tsuggestions\ts\tsu\tsug\tsugg\ts\tns\tons\tions\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\t<other>\n" +
+            "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "fruitful\tfruitful\tf\tfr\tfru\tfrui\tl\tul\tful\ttful\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "discussions\tdiscussions\td\tdi\tdis\tdisc\ts\tns\tons\tions\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>\n" +
+            "We\twe\tW\tWe\tWe\tWe\te\tWe\tWe\tWe\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "thank\tthank\tt\tth\ttha\tthan\tk\tnk\tank\thank\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "Pedro\tpedro\tP\tPe\tPed\tPedr\to\tro\tdro\tedro\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+            "Baptista\tbaptista\tB\tBa\tBap\tBapt\ta\tta\tsta\tista\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            "de\tde\td\tde\tde\tde\te\tde\tde\tde\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            "Castro\tcastro\tC\tCa\tCas\tCast\to\tro\ttro\tstro\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            "his\this\th\thi\this\this\ts\tis\this\this\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "support\tsupport\ts\tsu\tsup\tsupp\tt\trt\tort\tport\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "during\tduring\td\tdu\tdur\tduri\tg\tng\ting\tring\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "this\tthis\tt\tth\tthi\tthis\ts\tis\this\tthis\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "work\twork\tw\two\twor\twork\tk\trk\tork\twork\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>\n" +
+            "Special\tspecial\tS\tSp\tSpe\tSpec\tl\tal\tial\tcial\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "thanks\tthanks\tt\tth\ttha\tthan\ts\tks\tnks\tanks\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "to\tto\tt\tto\tto\tto\to\tto\tto\tto\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "Erina\terina\tE\tEr\tEri\tErin\ta\tna\tina\trina\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+            "Fujita\tfujita\tF\tFu\tFuj\tFuji\ta\tta\tita\tjita\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            "useful\tuseful\tu\tus\tuse\tusef\tl\tul\tful\teful\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "tips\ttips\tt\tti\ttip\ttips\ts\tps\tips\ttips\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "on\ton\to\ton\ton\ton\tn\ton\ton\ton\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "manuscript\tmanuscript\tm\tma\tman\tmanu\tt\tpt\tipt\tript\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>";
+
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+
+        val (spans, statement) = target.getExtractionResult(tokens, results)
+
+        assertThat(statement.fundings, hasSize(0))
+        assertThat(statement.persons, hasSize(3))
+        assertThat(statement.persons[0].rawName, `is`("Patrice Lopez"))
+        assertThat(statement.persons[1].rawName, `is`("Pedro Baptista de Castro"))
+        assertThat(statement.persons[2].rawName, `is`("Erina Fujita"))
+        assertThat(statement.affiliations, hasSize(0))
+
+        assertThat(spans, hasSize(3))
+        val span0 = spans[0]
+        val offsetPosition0 = span0.left
+        val element0 = span0.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition0.start, offsetPosition0.end)), `is`("Patrice Lopez"))
+        assertThat(element0.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>"))
+
+        val span1 = spans[1]
+        val offsetPosition1 = span1.left
+        val element1 = span1.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition1.start, offsetPosition1.end)), `is`("Pedro Baptista de Castro"))
+        assertThat(element1.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs>"))
+
+        val span2 = spans[2]
+        val offsetPosition2 = span2.left
+        val element2 = span2.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition2.start, offsetPosition2.end)), `is`("Erina Fujita"))
+        assertThat(element2.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs>"))
+    }
+
+    @Test
+    fun testGetExtractionResultNew2_ShouldReturnCorrectElementsAndPositions() {
+        val input = "This work was partly supported by MEXT Program: Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials) Grant Number [JPMXP1122715503].";
+
+        val results: String = "This\tthis\tT\tTh\tThi\tThis\ts\tis\this\tThis\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            "work\twork\tw\two\twor\twork\tk\trk\tork\twork\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "was\twas\tw\twa\twas\twas\ts\tas\twas\twas\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "partly\tpartly\tp\tpa\tpar\tpart\ty\tly\ttly\trtly\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "supported\tsupported\ts\tsu\tsup\tsupp\td\ted\tted\trted\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "by\tby\tb\tby\tby\tby\ty\tby\tby\tby\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "MEXT\tmext\tM\tME\tMEX\tMEXT\tT\tXT\tEXT\tMEXT\tLINEIN\tALLCAP\tNODIGIT\t0\t1\t0\tNOPUNCT\t0\tI-<funderName>\n" +
+            "Program\tprogram\tP\tPr\tPro\tProg\tm\tam\tram\tgram\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            ":\t:\t:\t:\t:\t:\t:\t:\t:\t:\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tPUNCT\t0\t<other>\n" +
+            "Data\tdata\tD\tDa\tDat\tData\ta\tta\tata\tData\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<programName>\n" +
+            "Creation\tcreation\tC\tCr\tCre\tCrea\tn\ton\tion\ttion\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Utilization\tutilization\tU\tUt\tUti\tUtil\tn\ton\tion\ttion\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tHYPHEN\t0\t<programName>\n" +
+            "Type\ttype\tT\tTy\tTyp\tType\te\tpe\type\tType\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Material\tmaterial\tM\tMa\tMat\tMate\tl\tal\tial\trial\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Research\tresearch\tR\tRe\tRes\tRese\th\tch\trch\tarch\tLINEIN\tINITCAP\tNODIGIT\t0\t1\t0\tNOPUNCT\t0\t<programName>\n" +
+            "and\tand\ta\tan\tand\tand\td\tnd\tand\tand\tLINEIN\tNOCAPS\tNODIGIT\t0\t1\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Development\tdevelopment\tD\tDe\tDev\tDeve\tt\tnt\tent\tment\tLINEIN\tINITCAP\tNODIGIT\t0\t1\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Project\tproject\tP\tPr\tPro\tProj\tt\tct\tect\tject\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "(\t(\t(\t(\t(\t(\t(\t(\t(\t(\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<programName>\n" +
+            "Digital\tdigital\tD\tDi\tDig\tDigi\tl\tal\ttal\tital\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Transformation\ttransformation\tT\tTr\tTra\tTran\tn\ton\tion\ttion\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Initiative\tinitiative\tI\tIn\tIni\tInit\te\tve\tive\ttive\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Center\tcenter\tC\tCe\tCen\tCent\tr\ter\tter\tnter\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "for\tfor\tf\tfo\tfor\tfor\tr\tor\tfor\tfor\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Magnetic\tmagnetic\tM\tMa\tMag\tMagn\tc\tic\ttic\tetic\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            "Materials\tmaterials\tM\tMa\tMat\tMate\ts\tls\tals\tials\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<programName>\n" +
+            ")\t)\t)\t)\t)\t)\t)\t)\t)\t)\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\t<programName>\n" +
+            "Grant\tgrant\tG\tGr\tGra\tGran\tt\tnt\tant\trant\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
+            "Number\tnumber\tN\tNu\tNum\tNumb\tr\ter\tber\tmber\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
+            "[\t[\t[\t[\t[\t[\t[\t[\t[\t[\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<other>\n" +
+            "JPMXP1122715503\tjpmxp1122715503\tJ\tJP\tJPM\tJPMX\t3\t03\t503\t5503\tLINEIN\tALLCAP\tCONTAINSDIGITS\t0\t0\t0\tNOPUNCT\t0\tI-<grantNumber>\n" +
+            "]\t]\t]\t]\t]\t]\t]\t]\t]\t]\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\tI-<other>\n" +
+            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>";
+
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+
+        val (spans, statement) = target.getExtractionResult(tokens, results)
+
+        assertThat(statement.fundings, hasSize(1))
+        assertThat(statement.persons, hasSize(0))
+        assertThat(statement.affiliations, hasSize(0))
+
+        assertThat(spans, hasSize(3))
+        val span0 = spans[0]
+        val offsetPosition0 = span0.left
+        val element0 = span0.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition0.start, offsetPosition0.end)), `is`("MEXT"))
+        assertThat(element0.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"funder\">MEXT</rs>"))
+
+        val span1 = spans[1]
+        val offsetPosition1 = span1.left
+        val element1 = span1.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition1.start, offsetPosition1.end)), `is`("Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials)"))
+        assertThat(element1.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"programName\">Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials)</rs>"))
+
+        val span2 = spans[2]
+        val offsetPosition2 = span2.left
+        val element2 = span2.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition2.start, offsetPosition2.end)), `is`("JPMXP1122715503"))
+        assertThat(element2.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">JPMXP1122715503</rs>"))
     }
 }
\ No newline at end of file

From 7628f4099e305551cb784c5c2d72f4d785ae1d76 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 17:34:14 +0900
Subject: [PATCH 15/31] publish tests results on github actions

---
 .github/workflows/ci-build-unstable.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml
index cf69c53314..444c527e00 100644
--- a/.github/workflows/ci-build-unstable.yml
+++ b/.github/workflows/ci-build-unstable.yml
@@ -25,6 +25,15 @@ jobs:
       - name: Test with Gradle Jacoco and Coveralls
         run: ./gradlew test jacocoTestReport coveralls --no-daemon
 
+      - name: Publish Test Results
+        uses: EnricoMi/publish-unit-test-result-action@v2
+        if: always()
+        with:
+          files: |
+            test-results/**/*.xml
+            test-results/**/*.trx
+            test-results/**/*.json
+
       - name: Coveralls GitHub Action
         uses: coverallsapp/github-action@v2
         with:

From 83416a92c9b26ea13bad590e83c1f37769bdd2e3 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 17:44:27 +0900
Subject: [PATCH 16/31] fix test path

---
 .github/workflows/ci-build-unstable.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml
index 444c527e00..19cb5afcda 100644
--- a/.github/workflows/ci-build-unstable.yml
+++ b/.github/workflows/ci-build-unstable.yml
@@ -30,9 +30,9 @@ jobs:
         if: always()
         with:
           files: |
-            test-results/**/*.xml
-            test-results/**/*.trx
-            test-results/**/*.json
+            build/test-results/**/*.xml
+            build/test-results/**/*.trx
+            build/test-results/**/*.json
 
       - name: Coveralls GitHub Action
         uses: coverallsapp/github-action@v2

From 364176da2b5b71568f0211cc7d9cd7b0a9c8cf47 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 18:12:27 +0900
Subject: [PATCH 17/31] Fix incorrect offsets when processing paragraphs and
 update tests

---
 .../engines/FundingAcknowledgementParser.java | 142 ++++++++++--------
 ...ingAcknowledgementParserIntegrationTest.kt |  10 +-
 2 files changed, 86 insertions(+), 66 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 160e84854f..6be01ec4b5 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -150,8 +150,6 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 GrobidAnalyzer analyzer = GrobidAnalyzer.getInstance();
                 List<LayoutToken> tokenizationFunding = analyzer.tokenizeWithLayoutToken(paragraphText);
 
-                StringBuilder sb = new StringBuilder();
-
                 MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> localResult = processing(tokenizationFunding, config);
 
                 List<Pair<OffsetPosition, Element>> annotations = localResult.left;
@@ -186,73 +184,17 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
 //                        offsetPositionList.add(new OffsetPosition(pos, pos + sentenceLayoutToken.size()));
 //                        pos += sentenceLayoutToken.size();
 //                    }
-                    int pos = 0;
-                    int sentenceStartOffset = 0;
                     Nodes sentences = paragraph.query("//s");
 
                     if(sentences.size() == 0) {
                         // Overly careful - we should never end up here.
                         LOGGER.warn("While the configuration claim that paragraphs must be segmented, we did not find any sentence. ");
-
-                        List<Node> nodes = getNodesAnnotationsInTextNode(paragraph, annotations);
-
-                        for (int i = 0; i < paragraph.getChildCount(); i++) {
-                            paragraph.getChild(i).detach();
-                        }
-                        for (Node node: nodes) {
-                            node.detach();
-                            ((Element) paragraph).appendChild(node);
-                        }
+                        updateParagraphNodeWithAnnotations(paragraph, annotations);
                     }
 
-                    for (Node sentence : sentences) {
-                        String sentenceText = sentence.getValue();
-                        List<Node> newChildren = new ArrayList<>();
-                        for (int i = 0; i < sentence.getChildCount(); i++) {
-                            //Assumption here is that the structure is flat to maximum one level down
-                            Node currentNode = sentence.getChild(i);
-                            if (currentNode instanceof Text) {
-                                String text = currentNode.getValue();
-                                int finalPos = pos;
-                                List<Pair<OffsetPosition, Element>> annotationsInThisChunk = annotations.stream()
-                                    .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end < finalPos + text.length())
-                                    .toList();
-
-                                if (CollectionUtils.isNotEmpty(annotationsInThisChunk)) {
-                                    List<Node> nodes = getNodesAnnotationsInTextNode(currentNode, annotationsInThisChunk, pos);
-                                    newChildren.addAll(nodes);
-                                } else {
-                                    newChildren.add(currentNode);
-                                }
-                                pos += text.length();
-                            } else if (currentNode instanceof Element) {
-                                newChildren.add(currentNode);
-                                pos += currentNode.getValue().length();
-                            } /*else {
-                                System.out.println(currentNode);
-                            }*/
-                        }
-
-                        for (int i = 0; i < sentence.getChildCount(); i++) {
-                            sentence.getChild(i).detach();
-                        }
-                        for (Node node: newChildren) {
-                            node.detach();
-                            ((Element) sentence).appendChild(node);
-                        }
-
-                        sentenceStartOffset += sentenceText.length();
-                    }
+                    updateNodes(sentences, annotations);
                 } else {
-                    List<Node> nodes = getNodesAnnotationsInTextNode(paragraph, annotations);
-
-                    for (int i = 0; i < paragraph.getChildCount(); i++) {
-                        paragraph.getChild(i).detach();
-                    }
-                    for (Node node: nodes) {
-                        node.detach();
-                        ((Element) paragraph).appendChild(node);
-                    }
+                    updateParagraphNodeWithAnnotations(paragraph, annotations);
                 }
 
                 // update extracted entities
@@ -277,6 +219,84 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
         return globalResult;
     }
 
+    private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair<OffsetPosition, Element>> annotations) {
+        int pos = 0;
+        List<Node> newChildren = new ArrayList<>();
+        for (int i = 0; i < paragraph.getChildCount(); i++) {
+            //Assumption here is that the structure is flat to maximum one level down
+            Node currentNode = paragraph.getChild(i);
+            if (currentNode instanceof Text) {
+                String text = currentNode.getValue();
+                int finalPos = pos;
+                List<Pair<OffsetPosition, Element>> annotationsInThisChunk = annotations.stream()
+                    .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end < finalPos + text.length())
+                    .toList();
+
+                if (CollectionUtils.isNotEmpty(annotationsInThisChunk)) {
+                    List<Node> nodes = getNodesAnnotationsInTextNode(currentNode, annotationsInThisChunk, pos);
+                    newChildren.addAll(nodes);
+                } else {
+                    newChildren.add(currentNode);
+                }
+                pos += text.length();
+            } else if (currentNode instanceof Element) {
+                newChildren.add(currentNode);
+                pos += currentNode.getValue().length();
+            }
+        }
+
+        for (int i = 0; i < paragraph.getChildCount(); i++) {
+            paragraph.getChild(i).detach();
+        }
+        for (Node node: newChildren) {
+            node.detach();
+            ((Element) paragraph).appendChild(node);
+        }
+    }
+
+    private static void updateNodes(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations) {
+        int pos = 0;
+        int sentenceStartOffset = 0;
+        for (Node sentence : sentences) {
+            String sentenceText = sentence.getValue();
+            List<Node> newChildren = new ArrayList<>();
+            for (int i = 0; i < sentence.getChildCount(); i++) {
+                //Assumption here is that the structure is flat to maximum one level down
+                Node currentNode = sentence.getChild(i);
+                if (currentNode instanceof Text) {
+                    String text = currentNode.getValue();
+                    int finalPos = pos;
+                    List<Pair<OffsetPosition, Element>> annotationsInThisChunk = annotations.stream()
+                        .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end < finalPos + text.length())
+                        .toList();
+
+                    if (CollectionUtils.isNotEmpty(annotationsInThisChunk)) {
+                        List<Node> nodes = getNodesAnnotationsInTextNode(currentNode, annotationsInThisChunk, pos);
+                        newChildren.addAll(nodes);
+                    } else {
+                        newChildren.add(currentNode);
+                    }
+                    pos += text.length();
+                } else if (currentNode instanceof Element) {
+                    newChildren.add(currentNode);
+                    pos += currentNode.getValue().length();
+                } /*else {
+                    System.out.println(currentNode);
+                }*/
+            }
+
+            for (int i = 0; i < sentence.getChildCount(); i++) {
+                sentence.getChild(i).detach();
+            }
+            for (Node node: newChildren) {
+                node.detach();
+                ((Element) sentence).appendChild(node);
+            }
+
+            sentenceStartOffset += sentenceText.length();
+        }
+    }
+
     /**
      * This method return a list of nodes corresponding to the annotations as they are positioned in
      * the text content of the target node. If the node is empty, should be used @see injectedAnnotationsInNode
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
index 72011fcdc2..c7413c3f24 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
@@ -48,7 +48,7 @@ class FundingAcknowledgementParserIntegrationTest {
             "(Grant Number: <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">80NSSC18K0315</rs>), " +
             "the <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"funder\">NASA Carbon Monitoring System</rs> " +
             "(Grant Number: <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">80NSSC20K0022</rs>), " +
-            "and</p></div>\n\t\t\t</div>"
+            "and </p></div>\n\t\t\t</div>"
 
         val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
             .withSentenceSegmentation(false)
@@ -74,7 +74,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
         // Current version output
         val output = "<div type=\"acknowledgement\">\n" +
-            "<div><head>Acknowledgements</head><p>Our warmest thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs> for his support during this work. Special thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs> for useful tips on the manuscript.</p></div>\n" +
+            "<div><head>Acknowledgements</head><p>Our warmest thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs> for his support during this work. Special thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs> for useful tips on the manuscript.</p></div>\n" +
             "\t\t\t</div>"
 
         val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
@@ -93,9 +93,9 @@ class FundingAcknowledgementParserIntegrationTest {
             "<div xmlns=\"http://www.tei-c.org/ns/1.0\"><head>Acknowledgements</head><p><s>Our warmest thanks to Patrice Lopez, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank Pedro Baptista de Castro for his support during this work.</s><s>Special thanks to Erina Fujita for useful tips on the manuscript.</s></p></div>\n" +
             "\t\t\t</div>\n\n"
 
-        val output = "\n\t\t\t<div type=\"acknowledgement\">\n" +
-            "<div><head>Acknowledgements</head><p><s>Our warmest thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs> for his support during this work.</s><s>Special thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs> for useful tips on the manuscript.</p></div>\n" +
-            "\t\t\t</div>\n\n"
+        val output = "<div type=\"acknowledgement\">\n" +
+            "<div><head>Acknowledgements</head><p><s>Our warmest thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs> for his support during this work.</s><s>Special thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs> for useful tips on the manuscript.</s></p></div>\n" +
+            "\t\t\t</div>"
 
         val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
             .withSentenceSegmentation(true)

From 9dc767f3224ffaaed2d332aaf51a194529ef119f Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 18:17:44 +0900
Subject: [PATCH 18/31] report on test failure/success

---
 .github/workflows/ci-build-unstable.yml | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml
index 19cb5afcda..f5d7721f60 100644
--- a/.github/workflows/ci-build-unstable.yml
+++ b/.github/workflows/ci-build-unstable.yml
@@ -25,14 +25,13 @@ jobs:
       - name: Test with Gradle Jacoco and Coveralls
         run: ./gradlew test jacocoTestReport coveralls --no-daemon
 
-      - name: Publish Test Results
-        uses: EnricoMi/publish-unit-test-result-action@v2
-        if: always()
+      - name: Test Results
+        uses: dorny/test-reporter@v1
         with:
-          files: |
-            build/test-results/**/*.xml
-            build/test-results/**/*.trx
-            build/test-results/**/*.json
+          artifact: test-results
+          name: JUNIT Tests
+          path: '*.xml'
+          reporter: java-junit
 
       - name: Coveralls GitHub Action
         uses: coverallsapp/github-action@v2

From 753a73ecbe20a591050f0accb58e9d9607e162d2 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 18:42:03 +0900
Subject: [PATCH 19/31] report on test failure/success

---
 .github/workflows/ci-build-unstable.yml | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml
index f5d7721f60..8ed302e156 100644
--- a/.github/workflows/ci-build-unstable.yml
+++ b/.github/workflows/ci-build-unstable.yml
@@ -25,13 +25,11 @@ jobs:
       - name: Test with Gradle Jacoco and Coveralls
         run: ./gradlew test jacocoTestReport coveralls --no-daemon
 
-      - name: Test Results
-        uses: dorny/test-reporter@v1
+      - name: Publish Test Report
+        uses: mikepenz/action-junit-report@v4
+        if: success() || failure() # always run even if the previous step fails
         with:
-          artifact: test-results
-          name: JUNIT Tests
-          path: '*.xml'
-          reporter: java-junit
+          report_paths: '**/build/test-results/test/TEST-*.xml'
 
       - name: Coveralls GitHub Action
         uses: coverallsapp/github-action@v2

From b2873bd473a6fdc121816d1c7698becf72771500 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 1 May 2024 19:06:30 +0900
Subject: [PATCH 20/31] enable sentence segmentation in the processing of a
 text chunk

---
 .../engines/FundingAcknowledgementParser.java | 45 ++++++++++++++++---
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 6be01ec4b5..3b20d704fa 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -11,6 +11,7 @@
 import org.grobid.core.GrobidModels;
 import org.grobid.core.analyzers.GrobidAnalyzer;
 import org.grobid.core.data.*;
+import org.grobid.core.document.xml.XmlBuilderUtils;
 import org.grobid.core.engines.config.GrobidAnalysisConfig;
 import org.grobid.core.engines.label.TaggingLabel;
 import org.grobid.core.engines.tagging.GenericTaggerUtils;
@@ -19,10 +20,8 @@
 import org.grobid.core.layout.LayoutToken;
 import org.grobid.core.tokenization.TaggingTokenCluster;
 import org.grobid.core.tokenization.TaggingTokenClusteror;
-import org.grobid.core.utilities.LayoutTokensUtil;
-import org.grobid.core.utilities.OffsetPosition;
-import org.grobid.core.utilities.TextUtilities;
 import org.grobid.core.utilities.UnicodeUtil;
+import org.grobid.core.utilities.*;
 import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -70,9 +69,8 @@ protected FundingAcknowledgementParser() {
      * For convenience, a processing method taking a raw string as input.
      * Tokenization is done with the default Grobid analyzer triggered by the identified language.
      *
-     * TODO: implement the sentence segmentation
      **/
-    public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>> processing(String text,
+    public MutablePair<Element, MutableTriple<List<Funding>, List<Person>, List<Affiliation>>> processing(String text,
                                                                                                    GrobidAnalysisConfig config) {
         text = UnicodeUtil.normaliseText(text);
         List<LayoutToken> tokenizationFunding = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
@@ -80,9 +78,42 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
         MutableTriple<List<Funding>, List<Person>, List<Affiliation>> entities = MutableTriple.of(results.getRight().getFundings(), results.getRight().getPersons(), results.getRight().getAffiliations());
         List<Pair<OffsetPosition, Element>> annotations = results.getLeft();
 
-        Element outputParagraph = injectedAnnotationsInNode(tokenizationFunding, annotations, teiElement("p"));
+        Element outputParagraph = teiElement("p");
+        outputParagraph.appendChild(text);
+
+        if (config.isWithSentenceSegmentation()) {
+            List<OffsetPosition> theSentences =
+                SentenceUtilities.getInstance().runSentenceDetection(text);
+
+            // update the xml paragraph element
+            int pos = 0;
+            int posInSentence = 0;
+            for(int i=0; i<theSentences.size(); i++) {
+                pos = theSentences.get(i).start;
+                posInSentence = 0;
+                Element sentenceElement = teiElement("s");
+
+                if (pos+posInSentence <= theSentences.get(i).end) {
+                    String localTextChunk = text.substring(pos+posInSentence, theSentences.get(i).end);
+                    localTextChunk = XmlBuilderUtils.stripNonValidXMLCharacters(localTextChunk);
+                    sentenceElement.appendChild(localTextChunk);
+                    outputParagraph.appendChild(sentenceElement);
+                }
+            }
+
+            for(int i=outputParagraph.getChildCount()-1; i>=0; i--) {
+                Node theNode = outputParagraph.getChild(i);
+                if (theNode instanceof Text) {
+                    outputParagraph.removeChild(theNode);
+                } else if (theNode instanceof Element) {
+                    if (!((Element) theNode).getLocalName().equals("s")) {
+                        outputParagraph.removeChild(theNode);
+                    }
+                }
+            }
+        }
 
-        return MutablePair.of(outputParagraph, entities);
+        return processingXmlFragment(outputParagraph.toXML(), config);
     }
 
     /**

From 097ca9371790d5887cc02399d197a93b2d21880b Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sat, 4 May 2024 08:09:38 +0900
Subject: [PATCH 21/31] update xmlunit library

---
 build.gradle | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/build.gradle b/build.gradle
index 421879fb6c..89546fb8ac 100644
--- a/build.gradle
+++ b/build.gradle
@@ -111,7 +111,8 @@ subprojects {
         testImplementation 'org.easymock:easymock:5.1.0'
         testImplementation "org.powermock:powermock-api-easymock:2.0.7"
         testImplementation "org.powermock:powermock-module-junit4:2.0.7"
-        testImplementation "xmlunit:xmlunit:1.6"
+        testImplementation "org.xmlunit:xmlunit-matchers:2.10.0"
+        testImplementation "org.xmlunit:xmlunit-legacy:2.10.0"
         testImplementation "org.hamcrest:hamcrest-all:1.3"
         testImplementation 'org.jetbrains.kotlin:kotlin-test'
         testImplementation "io.mockk:mockk:1.13.9"

From cedee649c9abc624507b80a5b8bcea29ce8cef6f Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sat, 4 May 2024 08:10:10 +0900
Subject: [PATCH 22/31] Fix bug in the transformation of the intervals from
 token-based to character-based when the same tokens occur subsequently

---
 .../grobid/core/utilities/TextUtilities.java  | 18 +++---
 .../core/utilities/TextUtilitiesTest.java     | 42 +++++++++++++
 ...ingAcknowledgementParserIntegrationTest.kt | 32 +++++++++-
 .../FundingAcknowledgementParserTest.kt       | 63 +++++++++++++++++++
 4 files changed, 144 insertions(+), 11 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
index 73ec73b352..26d520fe73 100755
--- a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
+++ b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
@@ -1561,20 +1561,21 @@ public static List<OffsetPosition> matchTokenAndString(List<LayoutToken> layoutT
         List<OffsetPosition> newPositions = new ArrayList<>();
         StringBuilder accumulator = new StringBuilder();
         int pos = 0;
+        int textPositionOfToken = 0;
 
         for (OffsetPosition position : positions) {
-            List<LayoutToken> urlTokens = layoutTokens.subList(position.start, position.end);
+            List<LayoutToken> annotationTokens = layoutTokens.subList(position.start, position.end);
             boolean first = true;
             accumulator = new StringBuilder();
-            for (int i = 0; i < urlTokens.size(); i++) {
-                LayoutToken token = urlTokens.get(i);
+            for (int i = 0; i < annotationTokens.size(); i++) {
+                LayoutToken token = annotationTokens.get(i);
                 if (StringUtils.isEmpty(token.getText()))
                     continue;
-                int newPos = text.indexOf(token.getText(), pos);
-                if (newPos != -1) {
+                textPositionOfToken = text.indexOf(token.getText(), pos);
+                if (textPositionOfToken != -1) {
                     //We update pos only at the first token of the annotation positions
                     if (first) {
-                        pos = newPos;
+                        pos = textPositionOfToken;
                         first = false;
                     }
                     accumulator.append(token);
@@ -1585,16 +1586,17 @@ public static List<OffsetPosition> matchTokenAndString(List<LayoutToken> layoutT
                     if (StringUtils.isNotEmpty(accumulator)) {
                         int start = text.indexOf(accumulator.toString(), pos);
                         newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
-                        pos = newPos;
+                        pos = textPositionOfToken;
                         break;
                     }
-                    pos = newPos;
+                    pos = textPositionOfToken;
                 }
             }
             if (StringUtils.isNotEmpty(accumulator)) {
                 int start = text.indexOf(accumulator.toString(), pos);
                 newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
                 accumulator = new StringBuilder();
+                pos = textPositionOfToken;
             }
 
         }
diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
index 4df8704ae9..4db3914aca 100644
--- a/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
+++ b/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
@@ -471,4 +471,46 @@ public void testMatchTokenAndString_twoElements() throws Exception {
         assertThat(inputReal.substring(url1.start, url1.end), is("https://github.com/lfoppiano/ supercon2"));
 
     }
+
+    @Test
+    public void testMatchTokenAndString_twoElementsWithEqualValue() throws Exception {
+        final String input = "Christophe Castagne, Claudie Marec, Claudie Marec, Claudio Stalder,";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        List<OffsetPosition> urlTokens = Arrays.asList(
+            new OffsetPosition(0, 3),
+            new OffsetPosition(5, 8),
+            new OffsetPosition(10, 13),
+            new OffsetPosition(15, 18)
+        );
+        
+        List<OffsetPosition> offsetPositions = TextUtilities.matchTokenAndString(tokenisedInput, input, urlTokens);
+
+        assertThat(offsetPositions, hasSize(4));
+        
+        OffsetPosition url0 = offsetPositions.get(0);
+        assertThat(url0.start, is(0));
+        assertThat(url0.end, is(19));
+
+        assertThat(input.substring(url0.start, url0.end), is("Christophe Castagne"));
+
+        OffsetPosition url1 = offsetPositions.get(1);
+        assertThat(url1.start, is(21));
+        assertThat(url1.end, is(34));
+
+        assertThat(input.substring(url1.start, url1.end), is("Claudie Marec"));
+
+        OffsetPosition url2 = offsetPositions.get(2);
+        assertThat(url2.start, is(36));
+        assertThat(url2.end, is(49));
+
+        assertThat(input.substring(url2.start, url2.end), is("Claudie Marec"));
+
+        OffsetPosition url3 = offsetPositions.get(3);
+        assertThat(url3.start, is(51));
+        assertThat(url3.end, is(66));
+
+        assertThat(input.substring(url3.start, url3.end), is("Claudio Stalder"));
+
+    }
 }
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
index c7413c3f24..eead71bbd4 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
@@ -10,6 +10,7 @@ import org.hamcrest.Matchers.hasSize
 import org.junit.Before
 import org.junit.BeforeClass
 import org.junit.Test
+import org.xmlunit.matchers.CompareMatcher
 
 class FundingAcknowledgementParserIntegrationTest {
 
@@ -56,7 +57,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
         val (element, mutableTriple) = target.processingXmlFragment(input, config)
 
-        assertThat(element.toXML(), `is`(output))
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
         assertThat(mutableTriple.left, hasSize(2))
     }
 
@@ -83,7 +84,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
         val (element, mutableTriple) = target.processingXmlFragment(input, config)
 
-        assertThat(element.toXML(), `is`(output))
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
     }
 
     @Test
@@ -103,7 +104,32 @@ class FundingAcknowledgementParserIntegrationTest {
 
         val (element, mutableTriple) = target.processingXmlFragment(input, config)
 
-        assertThat(element.toXML(), `is`(output))
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
+    }
+
+    @Test
+    fun testXmlFragmentProcessing_ErrorCase_withSentenceSegmentation_shouldWork() {
+        val input ="""
+			<div type="funding">
+<div><p><s>Florentina Münzner, Lucy Schlicht, Adrian Tanara, Sany Tchanra and Marie-Jeanne Pesant for the manual curation of logsheets and archiving data at PANGAEA.</s><s>We also acknowledge the work of Andree Behnken who developed the dds-fdp web service.</s><s>All authors approved the final manuscript.</s><s>This article is contribution number 26 of the Tara Oceans Consortium.</s><s>The collection of Tara Oceans data was made possible by those who contributed to sampling and to logistics during the Tara Oceans Expedition: Alain Giese, Alan Deidun, Alban Lazar, Aldine Amiel, Ali Chase, Aline Tribollet, Ameer Abdullah, Amélie Betus, André Abreu, Andres Peyrot, Andrew Baker, Anna Deniaud, Anne Doye, Anne Ghuysen Watrin, Anne Royer, Anne Thompson, Annie McGrother, Antoine Sciandra, Antoine Triller, Aurélie Chambouvet, Baptiste Bernard, Baptiste Regnier, Beatriz Fernandez, Benedetto Barone, Bertrand Manzano, Bianca Silva, Brett Grant, Brigitte Sabard, Bruno Dunckel, Camille Clérissi, Catarina Marcolin, Cédric Guigand, Céline Bachelier, Céline Blanchard, Céline Dimier-Hugueney, Céline Rottier, Chris Bowler, Christian Rouvière, Christian Sardet, Christophe Boutte, Christophe Castagne, Claudie Marec, Claudie Marec, Claudio Stalder, Colomban De Vargas, Cornelia Maier, Cyril Tricot, Dana Sardet, Daniel Bayley, Daniel Cron, Daniele Iudicone, David Mountain, David Obura, David Sauveur, Defne Arslan, Denis Dausse, Denis de La Broise, Diana Ruiz Pino, Didier Zoccola, Édouard Leymarie, Éloïse Fontaine, Émilie Sauvage, Emilie Villar, Emmanuel Boss, Emmanuel G. Reynaud, Éric Béraud, Eric Karsenti, Eric Pelletier, Éric Roettinger, Erica Goetz, Fabien Perault, Fabiola Canard, Fabrice Not, Fabrizio D'Ortenzio, Fabrizio Limena, Floriane Desprez, Franck Prejger, François Aurat, François Noël, Franscisco Cornejo, Gabriel Gorsky, Gabriele Procaccini, Gabriella Gilkes, Gipsi Lima-Mendez, Grigor Obolensky, Guillaume Bracq, Guillem Salazar, Halldor Stefansson, Hélène Santener, Hervé Bourmaud, Hervé Le Goff, Hiroyuki Ogata, Hubert Gautier, Hugo Sarmento, Ian Probert, Isabel Ferrera, Isabelle Taupier-Letage, Jan Wengers, Jarred Swalwell, Javier del Campo, Jean-Baptiste Romagnan, Jean-Claude Gascard, Jean-Jacques Kerdraon, Jean-Louis Jamet, Jean-Michel Grisoni, Jennifer Gillette, Jérémie Capoulade, Jérôme Bastion, Jérôme Teigné, Joannie Ferland, Johan Decelle, Judith Prihoda, Julie Poulain, Julien Daniel, Julien Girardot, Juliette Chatelin, Lars Stemmann, Laurence Garczarek, Laurent Beguery, Lee Karp-Boss, Leila Tirichine, Linda Mollestan, Lionel Bigot, Loïc Vallette, Lucie Bittner, Lucie Subirana, Luis Gutiérrez, Lydiane Mattio, Magali Puiseux, Marc Domingos, Marc Picheral, Marc Wessner, Marcela Cornejo, Margaux Carmichael, Marion Lauters, Martin Hertau, Martina Sailerova, Mathilde Ménard, Matthieu Labaste, Matthieu Oriot, Matthieu Bretaud, Mattias Ormestad, Maya Dolan, Melissa Duhaime, Michael Pitiot, Mike Lunn, Mike Sieracki, Montse Coll, Myriam Thomas, Nadine Lebois, Nicole Poulton, Nigel Grimsley, Noan Le Bescot, Oleg Simakov, Olivier Broutin, Olivier Desprez, Olivier Jaillon, Olivier Marien, Olivier Poirot, Olivier Quesnel, Pamela Labbe-Ibanez, Pascal Hingamp, Pascal Morin, Pascale Joannot, Patrick Chang, Patrick Wincker, Paul Muir, Philippe Clais, Philippe Koubbi, Pierre Testor, Rachel Moreau, Raphaël Morard, Roland Heilig, Romain Troublé, Roxana Di Mauro, Roxanne Boonstra, Ruby Pillay, Sabrina Speich, Sacha Bollet, Samuel Audrain, Sandra Da Costa, Sarah Searson, Sasha Tozzi, Sébastien Colin, Sergey Pisarev, Shirley Falcone, Sibylle Le Barrois d'Orgeval, Silvia G. Acinas, Simon Morisset, Sophie Marinesque, Sophie Nicaud, Stefanie Kandels-Lewis, Stéphane Audic, Stephane Pesant, Stéphanie Reynaud, Thierry Mansir, Thomas Lefort, Uros Krzic, Valérian Morzadec, Vincent Hilaire, Vincent Le Pennec, Vincent Taillandier, Xavier Bailly, Xavier Bougeard, Xavier Durrieu de Madron, Yann Chavance, Yann Depays, Yohann Mucherie.</s></p></div>
+			</div>
+
+"""
+
+        val output = """
+			<div type="funding">
+<div><p><s><rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Florentina Münzner</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lucy Schlicht</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Adrian Tanara</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sany Tchanra</rs> and <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Marie-Jeanne Pesant</rs> for the manual curation of logsheets and archiving data at PANGAEA.</s><s>We also acknowledge the work of <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Andree Behnken</rs> who developed the dds-fdp web service.</s><s>All authors approved the final manuscript.</s><s>This article is contribution number <rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">26</rs> of the <rs xmlns="http://www.tei-c.org/ns/1.0" type="institution">Tara Oceans Consortium</rs>.</s><s>The collection of Tara Oceans data was made possible by those who contributed to sampling and to logistics during the Tara Oceans Expedition: <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Alain Giese</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Alan Deidun</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Alban Lazar</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Aldine Amiel</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Ali Chase</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Aline Tribollet</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Ameer Abdullah</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Amélie Betus</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">André Abreu</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Andres Peyrot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Andrew Baker</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Anna Deniaud</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Anne Doye</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Anne Ghuysen Watrin</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Anne Royer</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Anne Thompson</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Annie McGrother</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Antoine Sciandra</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Antoine Triller</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Aurélie Chambouvet</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Baptiste Bernard</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Baptiste Regnier</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Beatriz Fernandez</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Benedetto Barone</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Bertrand Manzano</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Bianca Silva</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Brett Grant</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Brigitte Sabard</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Bruno Dunckel</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Camille Clérissi</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Catarina Marcolin</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Cédric Guigand</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Céline Bachelier</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Céline Blanchard</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Céline Dimier-Hugueney</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Céline Rottier</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Chris Bowler</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Christian Rouvière</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Christian Sardet</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Christophe Boutte</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Christophe Castagne</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Claudie Marec</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Claudie Marec</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Claudio Stalder</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Colomban De Vargas</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Cornelia Maier</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Cyril Tricot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Dana Sardet</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Daniel Bayley</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Daniel Cron</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Daniele Iudicone</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">David Mountain</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">David Obura</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">David Sauveur</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Defne Arslan</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Denis Dausse</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Denis de La Broise</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Diana Ruiz Pino</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Didier Zoccola</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Édouard Leymarie</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Éloïse Fontaine</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Émilie Sauvage</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Emilie Villar</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Emmanuel Boss</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Emmanuel G. Reynaud</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Éric Béraud</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Eric Karsenti</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Eric Pelletier</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Éric Roettinger</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Erica Goetz</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Fabien Perault</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Fabiola Canard</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Fabrice Not</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Fabrizio D'Ortenzio</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Fabrizio Limena</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Floriane Desprez</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Franck Prejger</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">François Aurat</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">François Noël</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Franscisco Cornejo</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Gabriel Gorsky</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Gabriele Procaccini</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Gabriella Gilkes</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Gipsi Lima-Mendez</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Grigor Obolensky</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Guillaume Bracq</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Guillem Salazar</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Halldor Stefansson</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Hélène Santener</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Hervé Bourmaud</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Hervé Le Goff</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Hiroyuki Ogata</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Hubert Gautier</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Hugo Sarmento</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Ian Probert</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Isabel Ferrera</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Isabelle Taupier-Letage</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jan Wengers</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jarred Swalwell</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Javier del Campo</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jean-Baptiste Romagnan</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jean-Claude Gascard</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jean-Jacques Kerdraon</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jean-Louis Jamet</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jean-Michel Grisoni</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jennifer Gillette</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jérémie Capoulade</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jérôme Bastion</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Jérôme Teigné</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Joannie Ferland</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Johan Decelle</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Judith Prihoda</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Julie Poulain</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Julien Daniel</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Julien Girardot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Juliette Chatelin</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lars Stemmann</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Laurence Garczarek</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Laurent Beguery</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lee Karp-Boss</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Leila Tirichine</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Linda Mollestan</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lionel Bigot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Loïc Vallette</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lucie Bittner</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lucie Subirana</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Luis Gutiérrez</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lydiane Mattio</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Magali Puiseux</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Marc Domingos</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Marc Picheral</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Marc Wessner</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Marcela Cornejo</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Margaux Carmichael</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Marion Lauters</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Martin Hertau</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Martina Sailerova</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Mathilde Ménard</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Matthieu Labaste</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Matthieu Oriot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Matthieu Bretaud</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Mattias Ormestad</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Maya Dolan</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Melissa Duhaime</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Michael Pitiot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Mike Lunn</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Mike Sieracki</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Montse Coll</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Myriam Thomas</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Nadine Lebois</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Nicole Poulton</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Nigel Grimsley</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Noan Le Bescot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Oleg Simakov</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Olivier Broutin</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Olivier Desprez</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Olivier Jaillon</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Olivier Marien</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Olivier Poirot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Olivier Quesnel</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="institution">Pamela Labbe-Ibanez, Pascal Hingamp, Pascal Morin</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Pascale Joannot</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Patrick Chang</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Patrick Wincker</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Paul Muir</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Philippe Clais</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Philippe Koubbi</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Pierre Testor</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Rachel Moreau</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Raphaël Morard</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Roland Heilig</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Romain Troublé</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Roxana Di Mauro</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Roxanne Boonstra</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Ruby Pillay</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sabrina Speich</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sacha Bollet</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Samuel Audrain</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sandra Da Costa</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sarah Searson</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sasha Tozzi</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sébastien Colin</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sergey Pisarev</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Shirley Falcone</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sibylle Le Barrois d'Orgeval</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Silvia G. Acinas</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Simon Morisset</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sophie Marinesque</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Sophie Nicaud</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Stefanie Kandels-Lewis</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Stéphane Audic</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Stephane Pesant</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Stéphanie Reynaud</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Thierry Mansir</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Thomas Lefort</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Uros Krzic</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Valérian Morzadec</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Vincent Hilaire</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Vincent Le Pennec</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Vincent Taillandier</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Xavier Bailly</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Xavier Bougeard</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Xavier Durrieu de Madron</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Yann Chavance</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Yann Depays</rs>, <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Yohann Mucherie</rs>.</s></p></div>
+			</div>
+
+"""
+
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(true)
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
     }
 
     companion object {
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
index cc636b4aa7..b3aa7227ec 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
@@ -12,6 +12,7 @@ import org.grobid.core.utilities.GrobidConfig
 import org.grobid.core.utilities.GrobidProperties
 import org.grobid.core.utilities.LayoutTokensUtil
 import org.hamcrest.CoreMatchers.`is`
+import org.hamcrest.CoreMatchers.not
 import org.hamcrest.MatcherAssert.assertThat
 import org.hamcrest.Matchers.hasSize
 import org.junit.Before
@@ -374,4 +375,66 @@ class FundingAcknowledgementParserTest {
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition2.start, offsetPosition2.end)), `is`("JPMXP1122715503"))
         assertThat(element2.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">JPMXP1122715503</rs>"))
     }
+
+    @Test
+    fun testGetExtractionResult_ErrorCase_ShouldReturnCorrectElementsAndPositions() {
+        val input = "Christophe Castagne, Claudie Marec, Claudie Marec, Claudio Stalder,";
+
+        val results: String = "Christophe\tchristophe\tC\tCh\tChr\tChri\te\the\tphe\tophe\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+            "Castagne\tcastagne\tC\tCa\tCas\tCast\te\tne\tgne\tagne\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\tI-<other>\n" +
+            "Claudie\tclaudie\tC\tCl\tCla\tClau\te\tie\tdie\tudie\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+            "Marec\tmarec\tM\tMa\tMar\tMare\tc\tec\trec\tarec\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\tI-<other>\n" +
+            "Claudie\tclaudie\tC\tCl\tCla\tClau\te\tie\tdie\tudie\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+            "Marec\tmarec\tM\tMa\tMar\tMare\tc\tec\trec\tarec\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\tI-<other>\n" +
+            "Claudio\tclaudio\tC\tCl\tCla\tClau\to\tio\tdio\tudio\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
+            "Stalder\tstalder\tS\tSt\tSta\tStal\tr\ter\tder\tlder\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
+            ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\tI-<other>\n"
+
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+
+        val (spans, statement) = target.getExtractionResult(tokens, results)
+
+        assertThat(statement.fundings, hasSize(0))
+        assertThat(statement.persons, hasSize(4))
+        assertThat(statement.affiliations, hasSize(0))
+
+        assertThat(spans, hasSize(4))
+        val span0 = spans[0]
+        val offsetPosition0 = span0.left
+        val element0 = span0.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition0.start, offsetPosition0.end)), `is`("Christophe Castagne"))
+        assertThat(element0.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Christophe Castagne</rs>"))
+
+        val span1 = spans[1]
+        val offsetPosition1 = span1.left
+        val element1 = span1.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition1.start, offsetPosition1.end)), `is`("Claudie Marec"))
+        assertThat(element1.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Claudie Marec</rs>"))
+
+        val span2 = spans[2]
+        val offsetPosition2 = span2.left
+        val element2 = span2.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition2.start, offsetPosition2.end)), `is`("Claudie Marec"))
+        assertThat(element2.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Claudie Marec</rs>"))
+
+        // The name is the same, but the offset should be different
+        assertThat(offsetPosition2.start, `is`(not(offsetPosition1.start)))
+        assertThat(offsetPosition2.end, `is`(not(offsetPosition1.end)))
+
+        val span3 = spans[3]
+        val offsetPosition3 = span3.left
+        val element3 = span3.right
+
+        assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition3.start, offsetPosition3.end)), `is`("Claudio Stalder"))
+        assertThat(element3.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Claudio Stalder</rs>"))
+    }
+
+
+
 }
\ No newline at end of file

From 83c7a1015814a69ac4e46d5a6919dded950ec8b8 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sat, 4 May 2024 10:04:57 +0900
Subject: [PATCH 23/31] Fix bug in the transformation of the intervals from
 token-based to character-based when the same tokens occur subsequently and
 the annotation is composed by a single token

---
 .../grobid/core/utilities/TextUtilities.java  |  4 +--
 .../core/utilities/TextUtilitiesTest.java     | 36 ++++++++++++++++++-
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
index 26d520fe73..163c296046 100755
--- a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
+++ b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
@@ -1586,7 +1586,7 @@ public static List<OffsetPosition> matchTokenAndString(List<LayoutToken> layoutT
                     if (StringUtils.isNotEmpty(accumulator)) {
                         int start = text.indexOf(accumulator.toString(), pos);
                         newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
-                        pos = textPositionOfToken;
+                        pos = textPositionOfToken + 1;
                         break;
                     }
                     pos = textPositionOfToken;
@@ -1596,7 +1596,7 @@ public static List<OffsetPosition> matchTokenAndString(List<LayoutToken> layoutT
                 int start = text.indexOf(accumulator.toString(), pos);
                 newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
                 accumulator = new StringBuilder();
-                pos = textPositionOfToken;
+                pos = textPositionOfToken + 1;
             }
 
         }
diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
index 4db3914aca..f0eaaa2887 100644
--- a/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
+++ b/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
@@ -13,7 +13,6 @@
 import java.util.regex.Matcher;
 
 import static org.hamcrest.CoreMatchers.is;
-import static org.hamcrest.CoreMatchers.startsWith;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.hasSize;
 import static org.junit.Assert.*;
@@ -513,4 +512,39 @@ public void testMatchTokenAndString_twoElementsWithEqualValue() throws Exception
         assertThat(input.substring(url3.start, url3.end), is("Claudio Stalder"));
 
     }
+
+    @Test
+    public void testMatchTokenAndString_twoElementsWithEqualValue2() throws Exception {
+        final String input = "We thank Felix Randow, Shigeki Higashiyama and Feng Zhang for plasmids.We thank Florian Steinberg for discussions and disclosure of unpublished results.We thank Matthew Freeman for helpful discussions.We express our deep gratitude to Moises Mallo for advice concerning CRISPR plus CRISPR reagents.We are grateful for the assistance of Ana Nóvoa and IGC's transgenics and mouse facilities.We thank IGC's cell sorting/flow cytometry, sequencing, and histopathology facilities.";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        List<OffsetPosition> annotationTokenPositions = Arrays.asList(
+            new OffsetPosition(4, 7),
+            new OffsetPosition(9, 12),
+            new OffsetPosition(15, 18),
+            new OffsetPosition(27, 30),
+            new OffsetPosition(49, 52),
+            new OffsetPosition(71, 74),
+            new OffsetPosition(103, 106),
+            new OffsetPosition(109, 110),
+            new OffsetPosition(125, 126)
+        );
+
+        List<OffsetPosition> offsetPositions = TextUtilities.matchTokenAndString(tokenisedInput, input, annotationTokenPositions);
+
+        assertThat(offsetPositions, hasSize(9));
+
+        OffsetPosition url7 = offsetPositions.get(7);
+        assertThat(url7.start, is(349));
+        assertThat(url7.end, is(352));
+
+        assertThat(input.substring(url7.start, url7.end), is("IGC"));
+
+        OffsetPosition url8 = offsetPositions.get(8);
+        assertThat(url8.start, is(397));
+        assertThat(url8.end, is(400));
+
+        assertThat(input.substring(url8.start, url8.end), is("IGC"));
+
+    }
 }

From 39892ff5f636143bd331c06c05e07ef37e18a4e4 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sat, 4 May 2024 11:18:34 +0900
Subject: [PATCH 24/31] Fix wrong Xpath expression

---
 .../engines/FundingAcknowledgementParser.java |  6 ++---
 .../grobid/core/utilities/TextUtilities.java  |  2 +-
 ...ingAcknowledgementParserIntegrationTest.kt | 26 +++++++++++++++++++
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 3b20d704fa..768855af7b 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -215,7 +215,7 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
 //                        offsetPositionList.add(new OffsetPosition(pos, pos + sentenceLayoutToken.size()));
 //                        pos += sentenceLayoutToken.size();
 //                    }
-                    Nodes sentences = paragraph.query("//s");
+                    Nodes sentences = paragraph.query(".//s");
 
                     if(sentences.size() == 0) {
                         // Overly careful - we should never end up here.
@@ -223,7 +223,7 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                         updateParagraphNodeWithAnnotations(paragraph, annotations);
                     }
 
-                    updateNodes(sentences, annotations);
+                    updateSentencesNodes(sentences, annotations);
                 } else {
                     updateParagraphNodeWithAnnotations(paragraph, annotations);
                 }
@@ -285,7 +285,7 @@ private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair
         }
     }
 
-    private static void updateNodes(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations) {
+    private static void updateSentencesNodes(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations) {
         int pos = 0;
         int sentenceStartOffset = 0;
         for (Node sentence : sentences) {
diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
index 163c296046..a8ea6a7c3e 100755
--- a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
+++ b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
@@ -1586,7 +1586,7 @@ public static List<OffsetPosition> matchTokenAndString(List<LayoutToken> layoutT
                     if (StringUtils.isNotEmpty(accumulator)) {
                         int start = text.indexOf(accumulator.toString(), pos);
                         newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
-                        pos = textPositionOfToken + 1;
+                        pos = textPositionOfToken;
                         break;
                     }
                     pos = textPositionOfToken;
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
index eead71bbd4..32c96f868c 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
@@ -132,6 +132,32 @@ class FundingAcknowledgementParserIntegrationTest {
         assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
     }
 
+    @Test
+    fun testXmlFragmentProcessing_ErrorCase2_withSentenceSegmentation_shouldWork() {
+        val input ="""
+			<div type="acknowledgement">
+<div><head>Acknowledgements</head><p><s>The authors would like to acknowledge Lucy Popplewell in the preparation of EMR notes for this study.</s></p></div>
+<div><head>The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology</head><p><s>Research Group who have given us permission to utilise the morbidity definitions (©2014).</s><s>The copyright of the morbidity definitions/categorization lists (©2014) used in this publication is owned by Keele University, the development of which was supported by the Primary Care Research Consortium; For access/details relating to the morbidity definitions/categorisation lists (©2014) please go to www.keele.ac.uk/mrr.</s></p></div>
+			</div>
+
+"""
+
+        val output ="""
+			<div type="acknowledgement">
+<div><head>Acknowledgements</head><p><s>The authors would like to acknowledge <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lucy Popplewell</rs> in the preparation of EMR notes for this study.</s></p></div>
+<div><head>The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology</head><p><s>Research Group who have given us permission to utilise the morbidity definitions (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>).</s><s>The copyright of the morbidity definitions/categorization lists (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>) used in this publication is owned by <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Keele University</rs>, the development of which was supported by the <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Primary Care Research Consortium</rs>; For access/details relating to the morbidity definitions/categorisation lists (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>) please go to www.keele.ac.uk/mrr.</s></p></div>
+			</div>
+
+"""
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(true)
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
+    }
+
     companion object {
         @JvmStatic
         @BeforeClass

From 48779a2c1e22952aded0ef0a81986af180c7d861 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sat, 4 May 2024 12:50:26 +0900
Subject: [PATCH 25/31] Fix another corner case

---
 .../grobid/core/utilities/TextUtilities.java  | 12 ++++++----
 .../core/utilities/TextUtilitiesTest.java     | 23 +++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
index a8ea6a7c3e..f0e6cf03af 100755
--- a/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
+++ b/grobid-core/src/main/java/org/grobid/core/utilities/TextUtilities.java
@@ -1584,19 +1584,23 @@ public static List<OffsetPosition> matchTokenAndString(List<LayoutToken> layoutT
                         continue;
                     }
                     if (StringUtils.isNotEmpty(accumulator)) {
+                        int accumulatorTextLength = accumulator.toString().length();
                         int start = text.indexOf(accumulator.toString(), pos);
-                        newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
-                        pos = textPositionOfToken;
+                        int end = start + accumulatorTextLength;
+                        newPositions.add(new OffsetPosition(start, end));
+                        pos = end;
                         break;
                     }
                     pos = textPositionOfToken;
                 }
             }
             if (StringUtils.isNotEmpty(accumulator)) {
+                int annotationTextLength = accumulator.toString().length();
                 int start = text.indexOf(accumulator.toString(), pos);
-                newPositions.add(new OffsetPosition(start, start + accumulator.toString().length()));
+                int end = start + annotationTextLength;
+                newPositions.add(new OffsetPosition(start, end));
+                pos = end;
                 accumulator = new StringBuilder();
-                pos = textPositionOfToken + 1;
             }
 
         }
diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
index f0eaaa2887..8b53cc263e 100644
--- a/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
+++ b/grobid-core/src/test/java/org/grobid/core/utilities/TextUtilitiesTest.java
@@ -547,4 +547,27 @@ public void testMatchTokenAndString_twoElementsWithEqualValue2() throws Exceptio
         assertThat(input.substring(url8.start, url8.end), is("IGC"));
 
     }
+
+    @Test
+    public void testMatchTokenAndString_twoElementsWithEqualValue3() throws Exception {
+        final String input = "We thank Benoit Demars for providing reaeration data and comments that signficantly improved the manuscript.This study was supported a NERC Case studentship awarded to DP, GYD and SJ, an ERC starting grant awarded to GYD, and the University of Exeter.";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        List<OffsetPosition> annotationTokenPositions = Arrays.asList(
+            new OffsetPosition(4, 7),
+            new OffsetPosition(40, 41),
+            new OffsetPosition(62, 63),
+            new OffsetPosition(79, 84)
+        );
+
+        List<OffsetPosition> offsetPositions = TextUtilities.matchTokenAndString(tokenisedInput, input, annotationTokenPositions);
+
+        assertThat(offsetPositions, hasSize(4));
+
+        OffsetPosition url7 = offsetPositions.get(1);
+        assertThat(input.substring(url7.start, url7.end), is("NERC"));
+
+        OffsetPosition url8 = offsetPositions.get(2);
+        assertThat(input.substring(url8.start, url8.end), is("ERC"));
+    }
 }

From e15416733f72a948267815197d29b0447407632f Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sat, 4 May 2024 13:25:08 +0900
Subject: [PATCH 26/31] cleanup

---
 .../engines/FundingAcknowledgementParser.java | 28 ++++---------------
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 768855af7b..72be438603 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -183,14 +183,13 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
 
                 MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> localResult = processing(tokenizationFunding, config);
 
-                List<Pair<OffsetPosition, Element>> annotations = localResult.left;
-                FundingAcknowledgmentParse localEntities = localResult.right;
-
-                if (CollectionUtils.isEmpty(annotations)) {
+                if (localResult == null || CollectionUtils.isEmpty(localResult.left)) {
                     continue;
                 }
+                List<Pair<OffsetPosition, Element>> annotations = localResult.left;
+                FundingAcknowledgmentParse localEntities = localResult.right;
 
-                List<OffsetPosition> list = annotations.stream().map(a -> a.getLeft()).toList();
+                List<OffsetPosition> list = annotations.stream().map(Pair::getLeft).toList();
                 List<OffsetPosition> annotationsPositionText = TextUtilities.matchTokenAndString(tokenizationFunding, paragraphText, list);
                 List<Pair<OffsetPosition, Element>> annotationsWithPosRefToText = new ArrayList<>();
                 for (int i = 0; i < annotationsPositionText.size(); i++) {
@@ -200,21 +199,6 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 annotations = annotationsWithPosRefToText;
 
                 if (sentenceSegmentation) {
-//                    Pair<List<String>, List<OffsetPosition>> sentenceInformation = extractSentencesAndPositionsFromParagraphElement(rootElementStatement);
-//
-//                    List<String> sentencesList = sentenceInformation.getLeft();
-//                    List<OffsetPosition> offsetPositionList = sentenceInformation.getRight();
-//
-//                    List<List<LayoutToken>> sentenceLayoutTokens = sentencesList.stream()
-//                        .map(analyzer::tokenizeWithLayoutToken)
-//                        .toList();
-//
-//                    List<OffsetPosition> sentenceTokenPositions = new ArrayList<>();
-//                    int pos = 0;
-//                    for (List<LayoutToken> sentenceLayoutToken : sentenceLayoutTokens) {
-//                        offsetPositionList.add(new OffsetPosition(pos, pos + sentenceLayoutToken.size()));
-//                        pos += sentenceLayoutToken.size();
-//                    }
                     Nodes sentences = paragraph.query(".//s");
 
                     if(sentences.size() == 0) {
@@ -223,7 +207,7 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                         updateParagraphNodeWithAnnotations(paragraph, annotations);
                     }
 
-                    updateSentencesNodes(sentences, annotations);
+                    updateSentencesNodesWithAnnotations(sentences, annotations);
                 } else {
                     updateParagraphNodeWithAnnotations(paragraph, annotations);
                 }
@@ -285,7 +269,7 @@ private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair
         }
     }
 
-    private static void updateSentencesNodes(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations) {
+    private static void updateSentencesNodesWithAnnotations(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations) {
         int pos = 0;
         int sentenceStartOffset = 0;
         for (Node sentence : sentences) {

From 21a0cdd7d50faa24704cf55375dc37b212e08729 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sat, 4 May 2024 13:25:58 +0900
Subject: [PATCH 27/31] add --open of java.base/java.io (warn from huggingface
 spaces)

---
 Dockerfile.crf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile.crf b/Dockerfile.crf
index 55d8fdca21..6b2383f362 100644
--- a/Dockerfile.crf
+++ b/Dockerfile.crf
@@ -78,7 +78,7 @@ WORKDIR /opt/grobid
 
 COPY --from=builder /opt/grobid .
 
-ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED"
+ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED"
 
 CMD ["./grobid-service/bin/grobid-service"]
 

From fb17eece22f25f27184a6cbcfa66542f71483fab Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 5 May 2024 14:08:41 +0900
Subject: [PATCH 28/31] fix lost of the last entity that was sharing boundary
 with the sentence

---
 .../engines/FundingAcknowledgementParser.java |  7 +++---
 ...ingAcknowledgementParserIntegrationTest.kt | 24 +++++++++++++++++++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 72be438603..2b202ea4cb 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -22,7 +22,6 @@
 import org.grobid.core.tokenization.TaggingTokenClusteror;
 import org.grobid.core.utilities.UnicodeUtil;
 import org.grobid.core.utilities.*;
-import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -244,7 +243,7 @@ private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair
                 String text = currentNode.getValue();
                 int finalPos = pos;
                 List<Pair<OffsetPosition, Element>> annotationsInThisChunk = annotations.stream()
-                    .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end < finalPos + text.length())
+                    .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end <= finalPos + text.length())
                     .toList();
 
                 if (CollectionUtils.isNotEmpty(annotationsInThisChunk)) {
@@ -282,7 +281,7 @@ private static void updateSentencesNodesWithAnnotations(Nodes sentences, List<Pa
                     String text = currentNode.getValue();
                     int finalPos = pos;
                     List<Pair<OffsetPosition, Element>> annotationsInThisChunk = annotations.stream()
-                        .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end < finalPos + text.length())
+                        .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end <= finalPos + text.length())
                         .toList();
 
                     if (CollectionUtils.isNotEmpty(annotationsInThisChunk)) {
@@ -357,7 +356,7 @@ protected static List<Node> getNodesAnnotationsInTextNode(Node targetNode, List<
         return outputNodes;
     }
 
-    private static @NotNull MutablePair<Element, MutableTriple<List<Funding>, List<Person>, List<Affiliation>>> aggregateResults(MutableTriple<List<Funding>, List<Person>, List<Affiliation>> localEntities, MutablePair<Element, MutableTriple<List<Funding>, List<Person>, List<Affiliation>>> globalResult) {
+    private static MutablePair<Element, MutableTriple<List<Funding>, List<Person>, List<Affiliation>>> aggregateResults(MutableTriple<List<Funding>, List<Person>, List<Affiliation>> localEntities, MutablePair<Element, MutableTriple<List<Funding>, List<Person>, List<Affiliation>>> globalResult) {
         MutableTriple<List<Funding>,List<Person>,List<Affiliation>> globalEntities = globalResult.getRight();
 
         List<Funding> localFundings = localEntities.getLeft();
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
index 32c96f868c..04fefa973b 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
@@ -148,6 +148,30 @@ class FundingAcknowledgementParserIntegrationTest {
 <div><head>The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology</head><p><s>Research Group who have given us permission to utilise the morbidity definitions (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>).</s><s>The copyright of the morbidity definitions/categorization lists (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>) used in this publication is owned by <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Keele University</rs>, the development of which was supported by the <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Primary Care Research Consortium</rs>; For access/details relating to the morbidity definitions/categorisation lists (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>) please go to www.keele.ac.uk/mrr.</s></p></div>
 			</div>
 
+"""
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(true)
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
+    }
+
+    @Test
+    fun testXmlFragmentProcessing_ErrorCase3_withSentenceSegmentation_shouldWork() {
+        val input ="""
+			<div type="funding">
+<div><head>Funding</head><p><s>This work was supported by European Molecular Biology Laboratory, the NSF award "BIGDATA: Mid-Scale: DA: ESCE: Collaborative Research: Scalable Statistical Computing for Emerging Omics Data Streams" and Genentech Inc.</s></p></div>
+			</div>
+
+"""
+
+        val output ="""
+			<div type="funding">
+<div><head>Funding</head><p><s>This work was supported by <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">European Molecular Biology Laboratory</rs>, the <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">NSF</rs> award "<rs xmlns="http://www.tei-c.org/ns/1.0" type="projectName">BIGDATA: Mid-Scale: DA: ESCE: Collaborative Research: Scalable Statistical Computing for Emerging Omics Data Streams</rs>" and <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Genentech Inc.</rs></s></p></div>
+			</div>
+
 """
         val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
             .withSentenceSegmentation(true)

From 633651209b7bf69509b100fb6c7cbb1e33948938 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Sun, 5 May 2024 20:57:05 +0900
Subject: [PATCH 29/31] merge sentences whose boundaries are clashing with the
 annotations from the funding-acknowledgment

---
 .../engines/FundingAcknowledgementParser.java | 148 ++++++++++++++++--
 .../core/utilities/SentenceUtilities.java     |  47 +++---
 ...ingAcknowledgementParserIntegrationTest.kt |  20 +++
 3 files changed, 180 insertions(+), 35 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 2b202ea4cb..d1199fbbb8 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -17,20 +17,26 @@
 import org.grobid.core.engines.tagging.GenericTaggerUtils;
 import org.grobid.core.exceptions.GrobidException;
 import org.grobid.core.features.FeaturesVectorFunding;
+import org.grobid.core.layout.BoundingBox;
 import org.grobid.core.layout.LayoutToken;
 import org.grobid.core.tokenization.TaggingTokenCluster;
 import org.grobid.core.tokenization.TaggingTokenClusteror;
 import org.grobid.core.utilities.UnicodeUtil;
 import org.grobid.core.utilities.*;
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.List;
+import java.util.stream.Collectors;
 
 import static org.grobid.core.document.xml.XmlBuilderUtils.teiElement;
 import static org.grobid.core.engines.label.TaggingLabels.*;
+import static org.grobid.core.layout.VectorGraphicBoxCalculator.mergeBoxes;
 
 public class FundingAcknowledgementParser extends AbstractParser {
 
@@ -153,11 +159,11 @@ protected static Element injectedAnnotationsInNode(List<LayoutToken> tokenizatio
     }
 
     /**
-     * For convenience, a processing method taking an TEI XML segment as input - only paragraphs (Element p) 
+     * For convenience, a processing method taking an TEI XML segment as input - only paragraphs (Element p)
      * will be processed in this segment and paragraph element will be replaced with the processed content.
      * Resulting entities are relative to the whole processed XML segment.
-     * 
-     * Tokenization is done with the default Grobid analyzer triggered by the identified language. 
+     *
+     * Tokenization is done with the default Grobid analyzer triggered by the identified language.
      **/
     public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affiliation>>> processingXmlFragment(String tei,
                                GrobidAnalysisConfig config) {
@@ -188,8 +194,8 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 List<Pair<OffsetPosition, Element>> annotations = localResult.left;
                 FundingAcknowledgmentParse localEntities = localResult.right;
 
-                List<OffsetPosition> list = annotations.stream().map(Pair::getLeft).toList();
-                List<OffsetPosition> annotationsPositionText = TextUtilities.matchTokenAndString(tokenizationFunding, paragraphText, list);
+                List<OffsetPosition> annotationsPositionTokens = annotations.stream().map(Pair::getLeft).toList();
+                List<OffsetPosition> annotationsPositionText = TextUtilities.matchTokenAndString(tokenizationFunding, paragraphText, annotationsPositionTokens);
                 List<Pair<OffsetPosition, Element>> annotationsWithPosRefToText = new ArrayList<>();
                 for (int i = 0; i < annotationsPositionText.size(); i++) {
                     annotationsWithPosRefToText.add(Pair.of(annotationsPositionText.get(i), annotations.get(i).getRight()));
@@ -205,7 +211,7 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                         LOGGER.warn("While the configuration claim that paragraphs must be segmented, we did not find any sentence. ");
                         updateParagraphNodeWithAnnotations(paragraph, annotations);
                     }
-
+                    mergeSentencesFallingOnAnnotations(sentences, annotations, config);
                     updateSentencesNodesWithAnnotations(sentences, annotations);
                 } else {
                     updateParagraphNodeWithAnnotations(paragraph, annotations);
@@ -233,6 +239,120 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
         return globalResult;
     }
 
+    /**
+     * This method identify the sentences that should be merged because the annotations are falling on their boundaries.
+     * This is necessary when the annotations are extracted from the paragraphs they need to be applied to sentences
+     * calculated from the plain text.
+     * <b>This method modify the sentences in input</b>
+     */
+    private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations, GrobidAnalysisConfig config) {
+        // We merge the sentences (including their coordinates) for which the annotations
+        // are falling in between two of them or they will be lost later.
+
+        List<OffsetPosition> sentencePositions = getOffsetPositionsFromNodes(sentences);
+
+        // We obtain the corrected coordinates that don't fall over the annotations
+        List<OffsetPosition> correctedOffsetPositions = SentenceUtilities.correctSentencePositions(sentencePositions, annotations
+            .stream()
+            .map(Pair::getLeft).toList());
+
+        List<Integer> toRemove = new ArrayList<>();
+        for (OffsetPosition correctedOffsetPosition : correctedOffsetPositions) {
+            List<OffsetPosition> originalSentences = sentencePositions.stream()
+                .filter(a -> a.start >= correctedOffsetPosition.start && a.end <= correctedOffsetPosition.end)
+                .toList();
+
+            // if for each "corrected sentences offset" there are more than one original sentence that
+            // falls into it, it means we need to merge
+            if (originalSentences.size() > 1) {
+                List<Integer> toMerge = originalSentences.stream()
+                    .map(sentencePositions::indexOf)
+                    .toList();
+
+                Element destination = (Element) sentences.get(toMerge.get(0));
+                boolean needToMergeCoordinates = config.isGenerateTeiCoordinates("s");
+                List<BoundingBox> boundingBoxes = new ArrayList<>();
+                Attribute destCoordinates = null;
+
+                if (needToMergeCoordinates) {
+                    destCoordinates = destination.getAttribute("coords");
+                    String coordinates = destCoordinates.getValue();
+                    boundingBoxes = Arrays.stream(coordinates.split(";"))
+                        .map(BoundingBox::fromString)
+                        .collect(Collectors.toList());
+                }
+
+                for (int i = 1; i < toMerge.size(); i++) {
+                    Integer sentenceToMergeIndex = toMerge.get(i);
+                    Node sentenceToMerge = sentences.get(sentenceToMergeIndex);
+
+                    // Merge coordinates
+                    if (needToMergeCoordinates) {
+                        Attribute coords = destination.getAttribute("coords");
+                        String coordinates = coords.getValue();
+                        boundingBoxes.addAll(Arrays.stream(coordinates.split(";"))
+                            .map(BoundingBox::fromString)
+                            .toList());
+
+                        List<BoundingBox> mergedBoundingBoxes = mergeBoxes(boundingBoxes);
+                        String coordsAsString = String.join(";", mergedBoundingBoxes.stream().map(BoundingBox::toString).toList());
+                        Attribute newCoords = new Attribute("coords", coordsAsString);
+                        destination.removeAttribute(coords);
+                        destination.addAttribute(newCoords);
+                    }
+
+                    // Merge content
+                    boolean first = true;
+                    Node previous = null;
+                    for (int c = 0; c < sentenceToMerge.getChildCount(); c++) {
+                        Node child = sentenceToMerge.getChild(c);
+
+                        if (first) {
+                            first = false;
+                            Node lastNodeDestination = destination.getChild(destination.getChildCount() - 1);
+                            previous = lastNodeDestination;
+//                                        if (lastNodeDestination instanceof Text) {
+//                                            ((Text) lastNodeDestination).setValue(((Text) lastNodeDestination).getValue() + " ");
+//                                            previous = lastNodeDestination;
+//                                        } else {
+//                                            Text newSpace = new Text(" ");
+//                                            destination.appendChild(newSpace);
+//                                            previous = newSpace;
+//                                        }
+                        }
+
+                        if (previous instanceof Text && child instanceof Text) {
+                            ((Text) previous).setValue(previous.getValue() + child.getValue());
+                        } else {
+                            ((Element) sentenceToMerge).replaceChild(child, new Text("placeholder"));
+                            child.detach();
+                            destination.appendChild(child);
+                            previous = child;
+                        }
+                    }
+                    sentenceToMerge.detach();
+                    toRemove.add(sentenceToMergeIndex);
+                }
+            }
+        }
+        toRemove.stream()
+            .sorted(Comparator.reverseOrder())
+            .forEach(sentences::remove);
+
+        return sentences;
+    }
+
+    private static @NotNull List<OffsetPosition> getOffsetPositionsFromNodes(Nodes sentences) {
+        List<OffsetPosition> sentencePositions = new ArrayList<>();
+        int start = 0;
+        for (Node sentence : sentences) {
+            int end = start + sentence.getValue().length();
+            sentencePositions.add(new OffsetPosition(start, end));
+            start = end;
+        }
+        return sentencePositions;
+    }
+
     private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair<OffsetPosition, Element>> annotations) {
         int pos = 0;
         List<Node> newChildren = new ArrayList<>();
@@ -400,18 +520,18 @@ protected static Pair<List<String>, List<OffsetPosition>> extractSentencesAndPos
      * The processing here is called from the header and/or full text parser in cascade
      * when one of these higher-level model detect a "funding" section, or in case
      * no funding section is found, when a acknolwedgements section is detected.
-     * 
-     * Independently from the place this parser is called, it process the input sequence 
-     * of layout tokens in a context free manner. 
-     * 
+     *
+     * Independently from the place this parser is called, it process the input sequence
+     * of layout tokens in a context free manner.
+     *
      * The expected input here is a paragraph.
      *
      *     // This returns a Element of the annotation and the position where should be injected, relative to the paragraph.
      *     // TODO: make new data objects for the annotations
-     * 
-     * Return an XML fragment with inline annotations of the input text, together with 
-     * extracted normalized entities. These entities are referenced by the inline 
-     * annotations with the usual @target attribute pointing to xml:id. 
+     *
+     * Return an XML fragment with inline annotations of the input text, together with
+     * extracted normalized entities. These entities are referenced by the inline
+     * annotations with the usual @target attribute pointing to xml:id.
      */
     protected MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> getExtractionResult(List<LayoutToken> tokensParagraph, String labellingResult) {
         List<Funding> fundings = new ArrayList<>();
diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/SentenceUtilities.java b/grobid-core/src/main/java/org/grobid/core/utilities/SentenceUtilities.java
index c0b4498835..7446f26bc5 100644
--- a/grobid-core/src/main/java/org/grobid/core/utilities/SentenceUtilities.java
+++ b/grobid-core/src/main/java/org/grobid/core/utilities/SentenceUtilities.java
@@ -141,27 +141,7 @@ public List<OffsetPosition> runSentenceDetection(String text, List<OffsetPositio
             Collections.sort(forbidden);
 
             // cancel sentence boundaries within the forbidden spans
-            List<OffsetPosition> finalSentencePositions = new ArrayList<>();
-            int forbiddenIndex = 0;
-            for(int j=0; j < sentencePositions.size(); j++) {
-                OffsetPosition position = sentencePositions.get(j);
-                for(int i=forbiddenIndex; i < forbidden.size(); i++) {
-                    OffsetPosition forbiddenPos = forbidden.get(i);
-                    if (forbiddenPos.end < position.end) 
-                        continue;
-                    if (forbiddenPos.start > position.end) 
-                        break;
-                    while ( (forbiddenPos.start < position.end && position.end < forbiddenPos.end) ) {
-                        if (j+1 < sentencePositions.size()) {
-                            position.end = sentencePositions.get(j+1).end;
-                            j++;
-                            forbiddenIndex = i;
-                        } else
-                            break;
-                    }
-                }
-                finalSentencePositions.add(position);
-            }
+            List<OffsetPosition> finalSentencePositions = correctSentencePositions(sentencePositions, forbidden);
 
             // as a heuristics for all implementations, because they clearly all fail for this case, we 
             // attached to the right sentence the numerical bibliographical references markers expressed 
@@ -286,6 +266,31 @@ public List<OffsetPosition> runSentenceDetection(String text, List<OffsetPositio
         }
     }
 
+    public static List<OffsetPosition> correctSentencePositions(List<OffsetPosition> sentencePositions, List<OffsetPosition> forbiddenPositions) {
+        List<OffsetPosition> finalSentencePositions = new ArrayList<>();
+        int forbiddenIndex = 0;
+        for(int j = 0; j < sentencePositions.size(); j++) {
+            OffsetPosition position = new OffsetPosition(sentencePositions.get(j).start, sentencePositions.get(j).end);
+            for(int i = forbiddenIndex; i < forbiddenPositions.size(); i++) {
+                OffsetPosition forbiddenPos = forbiddenPositions.get(i);
+                if (forbiddenPos.end < position.end)
+                    continue;
+                if (forbiddenPos.start > position.end)
+                    break;
+                while ( (forbiddenPos.start < position.end && position.end < forbiddenPos.end) ) {
+                    if (j+1 < sentencePositions.size()) {
+                        position.end = sentencePositions.get(j+1).end;
+                        j++;
+                        forbiddenIndex = i;
+                    } else
+                        break;
+                }
+            }
+            finalSentencePositions.add(position);
+        }
+        return finalSentencePositions;
+    }
+
     /**
      * Return true if the token should be skipped when considering sentence content. 
      */
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
index 04fefa973b..17bf78d85b 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
@@ -182,6 +182,26 @@ class FundingAcknowledgementParserIntegrationTest {
         assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
     }
 
+    @Test
+    fun testXmlFragmentProcessing_mergingSentences_shouldMergeCorrectly() {
+        val input ="\n" +
+            "\t\t\t<div type=\"acknowledgement\">\n" +
+            "<div xmlns=\"http://www.tei-c.org/ns/1.0\"><head>Acknowledgements</head><p><s>Our warmest thanks to Patrice</s><s>Lopez, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank Pedro Baptista</s><s>de</s><s>Castro for his support during this work.</s><s>Special thanks to Erina Fujita for useful tips on the manuscript.</s></p></div>\n" +
+            "\t\t\t</div>\n\n"
+
+        val output = "<div type=\"acknowledgement\">\n" +
+            "<div><head>Acknowledgements</head><p><s>Our warmest thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">PatriceLopez</rs>, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro BaptistadeCastro</rs> for his support during this work.</s><s>Special thanks to <rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs> for useful tips on the manuscript.</s></p></div>\n" +
+            "\t\t\t</div>"
+
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(true)
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
+    }
+
     companion object {
         @JvmStatic
         @BeforeClass

From 91991706c556c15f57d2a99c687a1f696cb0c628 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Mon, 6 May 2024 07:56:02 +0900
Subject: [PATCH 30/31] fix coordinates merge

---
 .../grobid/core/engines/FundingAcknowledgementParser.java   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index d1199fbbb8..0a5994b02f 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -278,8 +278,10 @@ private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<Pa
                     destCoordinates = destination.getAttribute("coords");
                     String coordinates = destCoordinates.getValue();
                     boundingBoxes = Arrays.stream(coordinates.split(";"))
+                        .filter(StringUtils::isNotBlank)
                         .map(BoundingBox::fromString)
                         .collect(Collectors.toList());
+                    destination.removeAttribute(destCoordinates);
                 }
 
                 for (int i = 1; i < toMerge.size(); i++) {
@@ -288,16 +290,16 @@ private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<Pa
 
                     // Merge coordinates
                     if (needToMergeCoordinates) {
-                        Attribute coords = destination.getAttribute("coords");
+                        Attribute coords = ((Element) sentenceToMerge).getAttribute("coords");
                         String coordinates = coords.getValue();
                         boundingBoxes.addAll(Arrays.stream(coordinates.split(";"))
+                            .filter(StringUtils::isNotBlank)
                             .map(BoundingBox::fromString)
                             .toList());
 
                         List<BoundingBox> mergedBoundingBoxes = mergeBoxes(boundingBoxes);
                         String coordsAsString = String.join(";", mergedBoundingBoxes.stream().map(BoundingBox::toString).toList());
                         Attribute newCoords = new Attribute("coords", coordsAsString);
-                        destination.removeAttribute(coords);
                         destination.addAttribute(newCoords);
                     }
 

From c70d6d3a04e9604c32ad5c0fe51c8cdbc8d98308 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 8 May 2024 09:39:59 +0900
Subject: [PATCH 31/31] Fix merging of coordinates to avoid merge when on
 different pages, add object for annotations with xml nodes

---
 .../grobid/core/data/AnnotatedXMLElement.java | 35 +++++++++
 .../engines/FundingAcknowledgementParser.java | 73 ++++++++---------
 ...ingAcknowledgementParserIntegrationTest.kt | 78 ++++++++++++++++---
 .../FundingAcknowledgementParserTest.kt       | 68 ++++++++--------
 4 files changed, 176 insertions(+), 78 deletions(-)
 create mode 100644 grobid-core/src/main/java/org/grobid/core/data/AnnotatedXMLElement.java

diff --git a/grobid-core/src/main/java/org/grobid/core/data/AnnotatedXMLElement.java b/grobid-core/src/main/java/org/grobid/core/data/AnnotatedXMLElement.java
new file mode 100644
index 0000000000..8a9b28e02a
--- /dev/null
+++ b/grobid-core/src/main/java/org/grobid/core/data/AnnotatedXMLElement.java
@@ -0,0 +1,35 @@
+package org.grobid.core.data;
+
+import nu.xom.Element;
+import org.grobid.core.utilities.OffsetPosition;
+
+/**
+ * This class represent an annotation in an XML node.
+ * The annotation is composed by two information: the XML Element node and the offset position
+ */
+public class AnnotatedXMLElement {
+
+    private OffsetPosition offsetPosition;
+    private Element annotationNode;
+
+    public AnnotatedXMLElement(Element annotationNode, OffsetPosition offsetPosition) {
+        this.annotationNode = annotationNode;
+        this.offsetPosition = offsetPosition;
+    }
+
+    public OffsetPosition getOffsetPosition() {
+        return offsetPosition;
+    }
+
+    public void setOffsetPosition(OffsetPosition offsetPosition) {
+        this.offsetPosition = offsetPosition;
+    }
+
+    public Element getAnnotationNode() {
+        return annotationNode;
+    }
+
+    public void setAnnotationNode(Element annotationNode) {
+        this.annotationNode = annotationNode;
+    }
+}
diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
index 0a5994b02f..cfeef3637f 100644
--- a/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
+++ b/grobid-core/src/main/java/org/grobid/core/engines/FundingAcknowledgementParser.java
@@ -9,6 +9,7 @@
 import org.apache.commons.lang3.tuple.Pair;
 import org.grobid.core.GrobidModel;
 import org.grobid.core.GrobidModels;
+import org.grobid.core.data.AnnotatedXMLElement;
 import org.grobid.core.analyzers.GrobidAnalyzer;
 import org.grobid.core.data.*;
 import org.grobid.core.document.xml.XmlBuilderUtils;
@@ -23,15 +24,11 @@
 import org.grobid.core.tokenization.TaggingTokenClusteror;
 import org.grobid.core.utilities.UnicodeUtil;
 import org.grobid.core.utilities.*;
-import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.List;
+import java.util.*;
 import java.util.stream.Collectors;
 
 import static org.grobid.core.document.xml.XmlBuilderUtils.teiElement;
@@ -50,8 +47,7 @@ protected FundingAcknowledgementParser() {
         super(model);
     }
 
-    private MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse>
-        processing(List<LayoutToken> tokenizationFunding, GrobidAnalysisConfig config) {
+    private MutablePair<List<AnnotatedXMLElement>, FundingAcknowledgmentParse> processing(List<LayoutToken> tokenizationFunding, GrobidAnalysisConfig config) {
         if (CollectionUtils.isEmpty(tokenizationFunding)) {
             return null;
         }
@@ -78,10 +74,10 @@ protected FundingAcknowledgementParser() {
     public MutablePair<Element, MutableTriple<List<Funding>, List<Person>, List<Affiliation>>> processing(String text,
                                                                                                    GrobidAnalysisConfig config) {
         text = UnicodeUtil.normaliseText(text);
-        List<LayoutToken> tokenizationFunding = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
-        MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> results = processing(tokenizationFunding, config);
-        MutableTriple<List<Funding>, List<Person>, List<Affiliation>> entities = MutableTriple.of(results.getRight().getFundings(), results.getRight().getPersons(), results.getRight().getAffiliations());
-        List<Pair<OffsetPosition, Element>> annotations = results.getLeft();
+//        List<LayoutToken> tokenizationFunding = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);
+//        MutablePair<List<AnnotatedXMLElement>, FundingAcknowledgmentParse> results = processing(tokenizationFunding, config);
+//        MutableTriple<List<Funding>, List<Person>, List<Affiliation>> entities = MutableTriple.of(results.getRight().getFundings(), results.getRight().getPersons(), results.getRight().getAffiliations());
+//        List<AnnotatedXMLElement> annotations = results.getLeft();
 
         Element outputParagraph = teiElement("p");
         outputParagraph.appendChild(text);
@@ -186,19 +182,19 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
                 GrobidAnalyzer analyzer = GrobidAnalyzer.getInstance();
                 List<LayoutToken> tokenizationFunding = analyzer.tokenizeWithLayoutToken(paragraphText);
 
-                MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> localResult = processing(tokenizationFunding, config);
+                MutablePair<List<AnnotatedXMLElement>, FundingAcknowledgmentParse> localResult = processing(tokenizationFunding, config);
 
                 if (localResult == null || CollectionUtils.isEmpty(localResult.left)) {
                     continue;
                 }
-                List<Pair<OffsetPosition, Element>> annotations = localResult.left;
+                List<AnnotatedXMLElement> annotations = localResult.left;
                 FundingAcknowledgmentParse localEntities = localResult.right;
 
-                List<OffsetPosition> annotationsPositionTokens = annotations.stream().map(Pair::getLeft).toList();
+                List<OffsetPosition> annotationsPositionTokens = annotations.stream().map(AnnotatedXMLElement::getOffsetPosition).toList();
                 List<OffsetPosition> annotationsPositionText = TextUtilities.matchTokenAndString(tokenizationFunding, paragraphText, annotationsPositionTokens);
-                List<Pair<OffsetPosition, Element>> annotationsWithPosRefToText = new ArrayList<>();
+                List<AnnotatedXMLElement> annotationsWithPosRefToText = new ArrayList<>();
                 for (int i = 0; i < annotationsPositionText.size(); i++) {
-                    annotationsWithPosRefToText.add(Pair.of(annotationsPositionText.get(i), annotations.get(i).getRight()));
+                    annotationsWithPosRefToText.add(new AnnotatedXMLElement(annotations.get(i).getAnnotationNode(), annotationsPositionText.get(i)));
                 }
 
                 annotations = annotationsWithPosRefToText;
@@ -245,7 +241,7 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
      * calculated from the plain text.
      * <b>This method modify the sentences in input</b>
      */
-    private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations, GrobidAnalysisConfig config) {
+    private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<AnnotatedXMLElement> annotations, GrobidAnalysisConfig config) {
         // We merge the sentences (including their coordinates) for which the annotations
         // are falling in between two of them or they will be lost later.
 
@@ -254,7 +250,7 @@ private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<Pa
         // We obtain the corrected coordinates that don't fall over the annotations
         List<OffsetPosition> correctedOffsetPositions = SentenceUtilities.correctSentencePositions(sentencePositions, annotations
             .stream()
-            .map(Pair::getLeft).toList());
+            .map(AnnotatedXMLElement::getOffsetPosition).toList());
 
         List<Integer> toRemove = new ArrayList<>();
         for (OffsetPosition correctedOffsetPosition : correctedOffsetPositions) {
@@ -297,8 +293,15 @@ private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<Pa
                             .map(BoundingBox::fromString)
                             .toList());
 
-                        List<BoundingBox> mergedBoundingBoxes = mergeBoxes(boundingBoxes);
-                        String coordsAsString = String.join(";", mergedBoundingBoxes.stream().map(BoundingBox::toString).toList());
+                        // Group by page, then merge
+                        List<BoundingBox> postMergeBoxes = new ArrayList<>();
+                        Map<Integer, List<BoundingBox>> boundingBoxesByPage = boundingBoxes.stream().collect(Collectors.groupingBy(BoundingBox::getPage));
+                        for(Map.Entry<Integer, List<BoundingBox>> boxesByPages : boundingBoxesByPage.entrySet()) {
+                            List<BoundingBox> mergedBoundingBoxes = mergeBoxes(boxesByPages.getValue());
+                            postMergeBoxes.addAll(mergedBoundingBoxes);
+                        }
+
+                        String coordsAsString = String.join(";", postMergeBoxes.stream().map(BoundingBox::toString).toList());
                         Attribute newCoords = new Attribute("coords", coordsAsString);
                         destination.addAttribute(newCoords);
                     }
@@ -344,7 +347,7 @@ private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<Pa
         return sentences;
     }
 
-    private static @NotNull List<OffsetPosition> getOffsetPositionsFromNodes(Nodes sentences) {
+    private static List<OffsetPosition> getOffsetPositionsFromNodes(Nodes sentences) {
         List<OffsetPosition> sentencePositions = new ArrayList<>();
         int start = 0;
         for (Node sentence : sentences) {
@@ -355,7 +358,7 @@ private static Nodes mergeSentencesFallingOnAnnotations(Nodes sentences, List<Pa
         return sentencePositions;
     }
 
-    private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair<OffsetPosition, Element>> annotations) {
+    private static void updateParagraphNodeWithAnnotations(Node paragraph, List<AnnotatedXMLElement> annotations) {
         int pos = 0;
         List<Node> newChildren = new ArrayList<>();
         for (int i = 0; i < paragraph.getChildCount(); i++) {
@@ -364,8 +367,8 @@ private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair
             if (currentNode instanceof Text) {
                 String text = currentNode.getValue();
                 int finalPos = pos;
-                List<Pair<OffsetPosition, Element>> annotationsInThisChunk = annotations.stream()
-                    .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end <= finalPos + text.length())
+                List<AnnotatedXMLElement> annotationsInThisChunk = annotations.stream()
+                    .filter(a -> a.getOffsetPosition().start >= finalPos && a.getOffsetPosition().end <= finalPos + text.length())
                     .toList();
 
                 if (CollectionUtils.isNotEmpty(annotationsInThisChunk)) {
@@ -390,7 +393,7 @@ private static void updateParagraphNodeWithAnnotations(Node paragraph, List<Pair
         }
     }
 
-    private static void updateSentencesNodesWithAnnotations(Nodes sentences, List<Pair<OffsetPosition, Element>> annotations) {
+    private static void updateSentencesNodesWithAnnotations(Nodes sentences, List<AnnotatedXMLElement> annotations) {
         int pos = 0;
         int sentenceStartOffset = 0;
         for (Node sentence : sentences) {
@@ -402,8 +405,8 @@ private static void updateSentencesNodesWithAnnotations(Nodes sentences, List<Pa
                 if (currentNode instanceof Text) {
                     String text = currentNode.getValue();
                     int finalPos = pos;
-                    List<Pair<OffsetPosition, Element>> annotationsInThisChunk = annotations.stream()
-                        .filter(a -> a.getLeft().start >= finalPos && a.getLeft().end <= finalPos + text.length())
+                    List<AnnotatedXMLElement> annotationsInThisChunk = annotations.stream()
+                        .filter(a -> a.getOffsetPosition().start >= finalPos && a.getOffsetPosition().end <= finalPos + text.length())
                         .toList();
 
                     if (CollectionUtils.isNotEmpty(annotationsInThisChunk)) {
@@ -438,7 +441,7 @@ private static void updateSentencesNodesWithAnnotations(Nodes sentences, List<Pa
      * the text content of the target node. If the node is empty, should be used @see injectedAnnotationsInNode
      * as this method will fail
      */
-    protected static List<Node> getNodesAnnotationsInTextNode(Node targetNode, List<Pair<OffsetPosition, Element>> annotations) {
+    protected static List<Node> getNodesAnnotationsInTextNode(Node targetNode, List<AnnotatedXMLElement> annotations) {
         return getNodesAnnotationsInTextNode(targetNode, annotations, 0);
     }
 
@@ -446,15 +449,15 @@ protected static List<Node> getNodesAnnotationsInTextNode(Node targetNode, List<
      * The sentence offset allow to calculate the position relative to the sentence of annotations that
      * have been calculated in relation with the paragraph.
      */
-    protected static List<Node> getNodesAnnotationsInTextNode(Node targetNode, List<Pair<OffsetPosition, Element>> annotations, int sentenceOffset) {
+    protected static List<Node> getNodesAnnotationsInTextNode(Node targetNode, List<AnnotatedXMLElement> annotations, int sentenceOffset) {
         String text = targetNode.getValue();
 
         List<Node> outputNodes = new ArrayList<>();
 
         int pos = 0;
-        for (Pair<OffsetPosition, Element> annotation : annotations) {
-            OffsetPosition annotationPosition = annotation.getLeft();
-            Element annotationContentElement = annotation.getRight();
+        for (AnnotatedXMLElement annotation : annotations) {
+            OffsetPosition annotationPosition = annotation.getOffsetPosition();
+            Element annotationContentElement = annotation.getAnnotationNode();
 
             String before = text.substring(pos, annotationPosition.start - sentenceOffset);
 
@@ -535,7 +538,7 @@ protected static Pair<List<String>, List<OffsetPosition>> extractSentencesAndPos
      * extracted normalized entities. These entities are referenced by the inline
      * annotations with the usual @target attribute pointing to xml:id.
      */
-    protected MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgmentParse> getExtractionResult(List<LayoutToken> tokensParagraph, String labellingResult) {
+    protected MutablePair<List<AnnotatedXMLElement>, FundingAcknowledgmentParse> getExtractionResult(List<LayoutToken> tokensParagraph, String labellingResult) {
         List<Funding> fundings = new ArrayList<>();
         List<Person> persons = new ArrayList<>();
         List<Affiliation> affiliations = new ArrayList<>();
@@ -827,10 +830,10 @@ protected MutablePair<List<Pair<OffsetPosition, Element>>, FundingAcknowledgment
             localFunding.inferAcronyms();
         }
 
-        List<Pair<OffsetPosition, Element>> annotations = new ArrayList<>();
+        List<AnnotatedXMLElement> annotations = new ArrayList<>();
 
         for (int i = 0; i < elements.size(); i++) {
-            annotations.add(Pair.of(positions.get(i), elements.get(i)));
+            annotations.add(new AnnotatedXMLElement(elements.get(i), positions.get(i)));
         }
 
         return MutablePair.of(annotations, parsedStatement);
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
index 17bf78d85b..4ae2422567 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt
@@ -4,13 +4,13 @@ import org.grobid.core.engines.config.GrobidAnalysisConfig
 import org.grobid.core.factory.AbstractEngineFactory
 import org.grobid.core.utilities.GrobidConfig
 import org.grobid.core.utilities.GrobidProperties
-import org.hamcrest.CoreMatchers.`is`
 import org.hamcrest.MatcherAssert.assertThat
 import org.hamcrest.Matchers.hasSize
 import org.junit.Before
 import org.junit.BeforeClass
 import org.junit.Test
 import org.xmlunit.matchers.CompareMatcher
+import java.util.*
 
 class FundingAcknowledgementParserIntegrationTest {
 
@@ -63,7 +63,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
     @Test
     fun testXmlFragmentProcessing2_withoutSentenceSegmentation_shouldReturnSameXML() {
-        val input ="\n" +
+        val input = "\n" +
             "\t\t\t<div type=\"acknowledgement\">\n" +
             "<div xmlns=\"http://www.tei-c.org/ns/1.0\"><head>Acknowledgements</head><p>Our warmest thanks to Patrice Lopez, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript.</p></div>\n" +
             "\t\t\t</div>\n\n"
@@ -89,7 +89,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
     @Test
     fun testXmlFragmentProcessing2_withSentenceSegmentation_shouldWork() {
-        val input ="\n" +
+        val input = "\n" +
             "\t\t\t<div type=\"acknowledgement\">\n" +
             "<div xmlns=\"http://www.tei-c.org/ns/1.0\"><head>Acknowledgements</head><p><s>Our warmest thanks to Patrice Lopez, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank Pedro Baptista de Castro for his support during this work.</s><s>Special thanks to Erina Fujita for useful tips on the manuscript.</s></p></div>\n" +
             "\t\t\t</div>\n\n"
@@ -109,7 +109,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
     @Test
     fun testXmlFragmentProcessing_ErrorCase_withSentenceSegmentation_shouldWork() {
-        val input ="""
+        val input = """
 			<div type="funding">
 <div><p><s>Florentina Münzner, Lucy Schlicht, Adrian Tanara, Sany Tchanra and Marie-Jeanne Pesant for the manual curation of logsheets and archiving data at PANGAEA.</s><s>We also acknowledge the work of Andree Behnken who developed the dds-fdp web service.</s><s>All authors approved the final manuscript.</s><s>This article is contribution number 26 of the Tara Oceans Consortium.</s><s>The collection of Tara Oceans data was made possible by those who contributed to sampling and to logistics during the Tara Oceans Expedition: Alain Giese, Alan Deidun, Alban Lazar, Aldine Amiel, Ali Chase, Aline Tribollet, Ameer Abdullah, Amélie Betus, André Abreu, Andres Peyrot, Andrew Baker, Anna Deniaud, Anne Doye, Anne Ghuysen Watrin, Anne Royer, Anne Thompson, Annie McGrother, Antoine Sciandra, Antoine Triller, Aurélie Chambouvet, Baptiste Bernard, Baptiste Regnier, Beatriz Fernandez, Benedetto Barone, Bertrand Manzano, Bianca Silva, Brett Grant, Brigitte Sabard, Bruno Dunckel, Camille Clérissi, Catarina Marcolin, Cédric Guigand, Céline Bachelier, Céline Blanchard, Céline Dimier-Hugueney, Céline Rottier, Chris Bowler, Christian Rouvière, Christian Sardet, Christophe Boutte, Christophe Castagne, Claudie Marec, Claudie Marec, Claudio Stalder, Colomban De Vargas, Cornelia Maier, Cyril Tricot, Dana Sardet, Daniel Bayley, Daniel Cron, Daniele Iudicone, David Mountain, David Obura, David Sauveur, Defne Arslan, Denis Dausse, Denis de La Broise, Diana Ruiz Pino, Didier Zoccola, Édouard Leymarie, Éloïse Fontaine, Émilie Sauvage, Emilie Villar, Emmanuel Boss, Emmanuel G. Reynaud, Éric Béraud, Eric Karsenti, Eric Pelletier, Éric Roettinger, Erica Goetz, Fabien Perault, Fabiola Canard, Fabrice Not, Fabrizio D'Ortenzio, Fabrizio Limena, Floriane Desprez, Franck Prejger, François Aurat, François Noël, Franscisco Cornejo, Gabriel Gorsky, Gabriele Procaccini, Gabriella Gilkes, Gipsi Lima-Mendez, Grigor Obolensky, Guillaume Bracq, Guillem Salazar, Halldor Stefansson, Hélène Santener, Hervé Bourmaud, Hervé Le Goff, Hiroyuki Ogata, Hubert Gautier, Hugo Sarmento, Ian Probert, Isabel Ferrera, Isabelle Taupier-Letage, Jan Wengers, Jarred Swalwell, Javier del Campo, Jean-Baptiste Romagnan, Jean-Claude Gascard, Jean-Jacques Kerdraon, Jean-Louis Jamet, Jean-Michel Grisoni, Jennifer Gillette, Jérémie Capoulade, Jérôme Bastion, Jérôme Teigné, Joannie Ferland, Johan Decelle, Judith Prihoda, Julie Poulain, Julien Daniel, Julien Girardot, Juliette Chatelin, Lars Stemmann, Laurence Garczarek, Laurent Beguery, Lee Karp-Boss, Leila Tirichine, Linda Mollestan, Lionel Bigot, Loïc Vallette, Lucie Bittner, Lucie Subirana, Luis Gutiérrez, Lydiane Mattio, Magali Puiseux, Marc Domingos, Marc Picheral, Marc Wessner, Marcela Cornejo, Margaux Carmichael, Marion Lauters, Martin Hertau, Martina Sailerova, Mathilde Ménard, Matthieu Labaste, Matthieu Oriot, Matthieu Bretaud, Mattias Ormestad, Maya Dolan, Melissa Duhaime, Michael Pitiot, Mike Lunn, Mike Sieracki, Montse Coll, Myriam Thomas, Nadine Lebois, Nicole Poulton, Nigel Grimsley, Noan Le Bescot, Oleg Simakov, Olivier Broutin, Olivier Desprez, Olivier Jaillon, Olivier Marien, Olivier Poirot, Olivier Quesnel, Pamela Labbe-Ibanez, Pascal Hingamp, Pascal Morin, Pascale Joannot, Patrick Chang, Patrick Wincker, Paul Muir, Philippe Clais, Philippe Koubbi, Pierre Testor, Rachel Moreau, Raphaël Morard, Roland Heilig, Romain Troublé, Roxana Di Mauro, Roxanne Boonstra, Ruby Pillay, Sabrina Speich, Sacha Bollet, Samuel Audrain, Sandra Da Costa, Sarah Searson, Sasha Tozzi, Sébastien Colin, Sergey Pisarev, Shirley Falcone, Sibylle Le Barrois d'Orgeval, Silvia G. Acinas, Simon Morisset, Sophie Marinesque, Sophie Nicaud, Stefanie Kandels-Lewis, Stéphane Audic, Stephane Pesant, Stéphanie Reynaud, Thierry Mansir, Thomas Lefort, Uros Krzic, Valérian Morzadec, Vincent Hilaire, Vincent Le Pennec, Vincent Taillandier, Xavier Bailly, Xavier Bougeard, Xavier Durrieu de Madron, Yann Chavance, Yann Depays, Yohann Mucherie.</s></p></div>
 			</div>
@@ -134,7 +134,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
     @Test
     fun testXmlFragmentProcessing_ErrorCase2_withSentenceSegmentation_shouldWork() {
-        val input ="""
+        val input = """
 			<div type="acknowledgement">
 <div><head>Acknowledgements</head><p><s>The authors would like to acknowledge Lucy Popplewell in the preparation of EMR notes for this study.</s></p></div>
 <div><head>The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology</head><p><s>Research Group who have given us permission to utilise the morbidity definitions (©2014).</s><s>The copyright of the morbidity definitions/categorization lists (©2014) used in this publication is owned by Keele University, the development of which was supported by the Primary Care Research Consortium; For access/details relating to the morbidity definitions/categorisation lists (©2014) please go to www.keele.ac.uk/mrr.</s></p></div>
@@ -142,7 +142,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
 """
 
-        val output ="""
+        val output = """
 			<div type="acknowledgement">
 <div><head>Acknowledgements</head><p><s>The authors would like to acknowledge <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">Lucy Popplewell</rs> in the preparation of EMR notes for this study.</s></p></div>
 <div><head>The authors would like to acknowledge Keele University's Prognosis and Consultation Epidemiology</head><p><s>Research Group who have given us permission to utilise the morbidity definitions (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>).</s><s>The copyright of the morbidity definitions/categorization lists (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>) used in this publication is owned by <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Keele University</rs>, the development of which was supported by the <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Primary Care Research Consortium</rs>; For access/details relating to the morbidity definitions/categorisation lists (<rs xmlns="http://www.tei-c.org/ns/1.0" type="grantNumber">©2014</rs>) please go to www.keele.ac.uk/mrr.</s></p></div>
@@ -160,14 +160,14 @@ class FundingAcknowledgementParserIntegrationTest {
 
     @Test
     fun testXmlFragmentProcessing_ErrorCase3_withSentenceSegmentation_shouldWork() {
-        val input ="""
+        val input = """
 			<div type="funding">
 <div><head>Funding</head><p><s>This work was supported by European Molecular Biology Laboratory, the NSF award "BIGDATA: Mid-Scale: DA: ESCE: Collaborative Research: Scalable Statistical Computing for Emerging Omics Data Streams" and Genentech Inc.</s></p></div>
 			</div>
 
 """
 
-        val output ="""
+        val output = """
 			<div type="funding">
 <div><head>Funding</head><p><s>This work was supported by <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">European Molecular Biology Laboratory</rs>, the <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">NSF</rs> award "<rs xmlns="http://www.tei-c.org/ns/1.0" type="projectName">BIGDATA: Mid-Scale: DA: ESCE: Collaborative Research: Scalable Statistical Computing for Emerging Omics Data Streams</rs>" and <rs xmlns="http://www.tei-c.org/ns/1.0" type="funder">Genentech Inc.</rs></s></p></div>
 			</div>
@@ -184,7 +184,7 @@ class FundingAcknowledgementParserIntegrationTest {
 
     @Test
     fun testXmlFragmentProcessing_mergingSentences_shouldMergeCorrectly() {
-        val input ="\n" +
+        val input = "\n" +
             "\t\t\t<div type=\"acknowledgement\">\n" +
             "<div xmlns=\"http://www.tei-c.org/ns/1.0\"><head>Acknowledgements</head><p><s>Our warmest thanks to Patrice</s><s>Lopez, the author of Grobid <ref type=\"bibr\" target=\"#b21\">[22]</ref>, DeLFT <ref type=\"bibr\" target=\"#b19\">[20]</ref>, and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions.</s><s>We thank Pedro Baptista</s><s>de</s><s>Castro for his support during this work.</s><s>Special thanks to Erina Fujita for useful tips on the manuscript.</s></p></div>\n" +
             "\t\t\t</div>\n\n"
@@ -202,6 +202,66 @@ class FundingAcknowledgementParserIntegrationTest {
         assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
     }
 
+    @Test
+    fun testXmlFragmentProcessing_mergingSentencesAndCoordinatesInTheSamePage_shouldMergeCoordinates() {
+        val input = """<div type="acknowledgement">" +
+            "<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgements</head><p><s coords="1,56.80,41.48,432.74,26.53">This is sentence 1 in page 1 where we thanks Patrice</s><s coords="1,56.80,41.48,432.74,26.57">Lopez, who is also overlapping in sentence 2, page 2, with annotations <ref type="bibr" target="#b21">[22]</ref>, DeLFT <ref type="bibr" target="#b19">[20]</ref>, and more text.</s></p></div>\n" +
+            "</div>"""
+
+        val output = """<div type="acknowledgement">" +
+            "<div><head>Acknowledgements</head><p><s coords="1,56.80,41.48,432.74,26.57">This is sentence 1 in page 1 where we thanks <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">PatriceLopez</rs>, who is also overlapping in sentence 2, page 2, with annotations <ref type="bibr" target="#b21">[22]</ref>, DeLFT <ref type="bibr" target="#b19">[20]</ref>, and more text.</s></p></div>\n" +
+            "</div>"""
+
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(true)
+            .generateTeiCoordinates(listOf("s"))
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
+    }
+
+    @Test
+    fun testXmlFragmentProcessing_mergingSentencesAndCoordinatesInTheSamePage2_shouldMergeCoordinates() {
+        val input = """<div type="acknowledgement">" +
+            "<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgements</head><p><s coords="1,56.80,41.48,432.74,26.53">This is sentence 1 in page 1 where we thanks Patrice</s><s coords="1,86.80,141.48,532.74,26.57">Lopez, who is also overlapping in sentence 2, page 2, with annotations <ref type="bibr" target="#b21">[22]</ref>, DeLFT <ref type="bibr" target="#b19">[20]</ref>, and more text.</s></p></div>\n" +
+            "</div>"""
+
+        val output = """<div type="acknowledgement">" +
+            "<div><head>Acknowledgements</head><p><s coords="1,56.80,41.48,432.74,26.53;1,86.80,141.48,532.74,26.57">This is sentence 1 in page 1 where we thanks <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">PatriceLopez</rs>, who is also overlapping in sentence 2, page 2, with annotations <ref type="bibr" target="#b21">[22]</ref>, DeLFT <ref type="bibr" target="#b19">[20]</ref>, and more text.</s></p></div>\n" +
+            "</div>"""
+
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(true)
+            .generateTeiCoordinates(listOf("s"))
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
+    }
+
+    @Test
+    fun testXmlFragmentProcessing_mergingSentencesAndCoordinatesInDifferentPages_shouldNotMergeCoordinates() {
+        val input = """<div type="acknowledgement">" +
+            "<div xmlns="http://www.tei-c.org/ns/1.0"><head>Acknowledgements</head><p><s coords="1,56.80,41.48,432.74,26.57">This is sentence 1 in page 1 where we thanks Patrice</s><s coords="2,56.80,41.48,432.74,26.57">Lopez, who is also overlapping in sentence 2, page 2, with annotations <ref type="bibr" target="#b21">[22]</ref>, DeLFT <ref type="bibr" target="#b19">[20]</ref>, and more text.</s></p></div>\n" +
+            "</div>"""
+
+        val output = """<div type="acknowledgement">" +
+            "<div><head>Acknowledgements</head><p><s coords="1,56.80,41.48,432.74,26.57;2,56.80,41.48,432.74,26.57">This is sentence 1 in page 1 where we thanks <rs xmlns="http://www.tei-c.org/ns/1.0" type="person">PatriceLopez</rs>, who is also overlapping in sentence 2, page 2, with annotations <ref type="bibr" target="#b21">[22]</ref>, DeLFT <ref type="bibr" target="#b19">[20]</ref>, and more text.</s></p></div>\n" +
+            "</div>"""
+
+        val config = GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
+            .withSentenceSegmentation(true)
+            .generateTeiCoordinates(listOf("s"))
+            .build()
+
+        val (element, mutableTriple) = target.processingXmlFragment(input, config)
+
+        assertThat(element.toXML(), CompareMatcher.isIdenticalTo(output))
+    }
+
     companion object {
         @JvmStatic
         @BeforeClass
diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
index b3aa7227ec..006a1e3ebf 100644
--- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
+++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserTest.kt
@@ -34,7 +34,7 @@ class FundingAcknowledgementParserTest {
     @Test
     fun testGetExtractionResult() {
 
-        val input = "Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript.";
+        val input = "Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript."
 
         val results: String = "Our\tour\tO\tOu\tOur\tOur\tr\tur\tOur\tOur\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
                 "warmest\twarmest\tw\twa\twar\twarm\tt\tst\test\tmest\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
@@ -101,9 +101,9 @@ class FundingAcknowledgementParserTest {
                 "on\ton\to\ton\ton\ton\tn\ton\ton\ton\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
                 "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
                 "manuscript\tmanuscript\tm\tma\tman\tmanu\tt\tpt\tipt\tript\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
-                ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>";
+                ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>"
 
-        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input)
 
         val (element, fundingAcknowledgmentParse) = target.getExtractionResult(tokens, results)
 
@@ -118,7 +118,7 @@ class FundingAcknowledgementParserTest {
     @Test
     fun testGetExtractionResult2() {
 
-        val input = "This work was partly supported by MEXT Program: Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials) Grant Number [JPMXP1122715503].";
+        val input = "This work was partly supported by MEXT Program: Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials) Grant Number [JPMXP1122715503]."
 
         val results: String = "This\tthis\tT\tTh\tThi\tThis\ts\tis\this\tThis\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
             "work\twork\tw\two\twor\twork\tk\trk\tork\twork\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
@@ -154,9 +154,9 @@ class FundingAcknowledgementParserTest {
             "[\t[\t[\t[\t[\t[\t[\t[\t[\t[\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<other>\n" +
             "JPMXP1122715503\tjpmxp1122715503\tJ\tJP\tJPM\tJPMX\t3\t03\t503\t5503\tLINEIN\tALLCAP\tCONTAINSDIGITS\t0\t0\t0\tNOPUNCT\t0\tI-<grantNumber>\n" +
             "]\t]\t]\t]\t]\t]\t]\t]\t]\t]\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\tI-<other>\n" +
-            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>";
+            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>"
 
-        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input)
 
         val (element, fundingAcknowledgmentParse) = target.getExtractionResult(tokens, results)
 
@@ -202,7 +202,7 @@ class FundingAcknowledgementParserTest {
     @Test
     fun testGetExtractionResultNew1_ShouldReturnCorrectElementsAndPositions() {
 
-        val input = "Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript.";
+        val input = "Our warmest thanks to Patrice Lopez, the author of Grobid [22], DeLFT [20], and other open-source projects for his continuous support and inspiration with ideas, suggestions, and fruitful discussions. We thank Pedro Baptista de Castro for his support during this work. Special thanks to Erina Fujita for useful tips on the manuscript."
 
         val results: String = "Our\tour\tO\tOu\tOur\tOur\tr\tur\tOur\tOur\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
             "warmest\twarmest\tw\twa\twar\twarm\tt\tst\test\tmest\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
@@ -269,9 +269,9 @@ class FundingAcknowledgementParserTest {
             "on\ton\to\ton\ton\ton\tn\ton\ton\ton\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
             "the\tthe\tt\tth\tthe\tthe\te\the\tthe\tthe\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
             "manuscript\tmanuscript\tm\tma\tman\tmanu\tt\tpt\tipt\tript\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
-            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>";
+            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>"
 
-        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input)
 
         val (spans, statement) = target.getExtractionResult(tokens, results)
 
@@ -284,22 +284,22 @@ class FundingAcknowledgementParserTest {
 
         assertThat(spans, hasSize(3))
         val span0 = spans[0]
-        val offsetPosition0 = span0.left
-        val element0 = span0.right
+        val offsetPosition0 = span0.offsetPosition
+        val element0 = span0.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition0.start, offsetPosition0.end)), `is`("Patrice Lopez"))
         assertThat(element0.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Patrice Lopez</rs>"))
 
         val span1 = spans[1]
-        val offsetPosition1 = span1.left
-        val element1 = span1.right
+        val offsetPosition1 = span1.offsetPosition
+        val element1 = span1.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition1.start, offsetPosition1.end)), `is`("Pedro Baptista de Castro"))
         assertThat(element1.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Pedro Baptista de Castro</rs>"))
 
         val span2 = spans[2]
-        val offsetPosition2 = span2.left
-        val element2 = span2.right
+        val offsetPosition2 = span2.offsetPosition
+        val element2 = span2.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition2.start, offsetPosition2.end)), `is`("Erina Fujita"))
         assertThat(element2.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Erina Fujita</rs>"))
@@ -307,7 +307,7 @@ class FundingAcknowledgementParserTest {
 
     @Test
     fun testGetExtractionResultNew2_ShouldReturnCorrectElementsAndPositions() {
-        val input = "This work was partly supported by MEXT Program: Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials) Grant Number [JPMXP1122715503].";
+        val input = "This work was partly supported by MEXT Program: Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials) Grant Number [JPMXP1122715503]."
 
         val results: String = "This\tthis\tT\tTh\tThi\tThis\ts\tis\this\tThis\tLINESTART\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<other>\n" +
             "work\twork\tw\two\twor\twork\tk\trk\tork\twork\tLINEIN\tNOCAPS\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<other>\n" +
@@ -343,9 +343,9 @@ class FundingAcknowledgementParserTest {
             "[\t[\t[\t[\t[\t[\t[\t[\t[\t[\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tOPENBRACKET\t0\t<other>\n" +
             "JPMXP1122715503\tjpmxp1122715503\tJ\tJP\tJPM\tJPMX\t3\t03\t503\t5503\tLINEIN\tALLCAP\tCONTAINSDIGITS\t0\t0\t0\tNOPUNCT\t0\tI-<grantNumber>\n" +
             "]\t]\t]\t]\t]\t]\t]\t]\t]\t]\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tENDBRACKET\t0\tI-<other>\n" +
-            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>";
+            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\tLINEEND\tALLCAP\tNODIGIT\t1\t0\t0\tDOT\t0\t<other>"
 
-        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input)
 
         val (spans, statement) = target.getExtractionResult(tokens, results)
 
@@ -355,22 +355,22 @@ class FundingAcknowledgementParserTest {
 
         assertThat(spans, hasSize(3))
         val span0 = spans[0]
-        val offsetPosition0 = span0.left
-        val element0 = span0.right
+        val offsetPosition0 = span0.offsetPosition
+        val element0 = span0.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition0.start, offsetPosition0.end)), `is`("MEXT"))
         assertThat(element0.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"funder\">MEXT</rs>"))
 
         val span1 = spans[1]
-        val offsetPosition1 = span1.left
-        val element1 = span1.right
+        val offsetPosition1 = span1.offsetPosition
+        val element1 = span1.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition1.start, offsetPosition1.end)), `is`("Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials)"))
         assertThat(element1.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"programName\">Data Creation and Utilization-Type Material Research and Development Project (Digital Transformation Initiative Center for Magnetic Materials)</rs>"))
 
         val span2 = spans[2]
-        val offsetPosition2 = span2.left
-        val element2 = span2.right
+        val offsetPosition2 = span2.offsetPosition
+        val element2 = span2.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition2.start, offsetPosition2.end)), `is`("JPMXP1122715503"))
         assertThat(element2.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"grantNumber\">JPMXP1122715503</rs>"))
@@ -378,7 +378,7 @@ class FundingAcknowledgementParserTest {
 
     @Test
     fun testGetExtractionResult_ErrorCase_ShouldReturnCorrectElementsAndPositions() {
-        val input = "Christophe Castagne, Claudie Marec, Claudie Marec, Claudio Stalder,";
+        val input = "Christophe Castagne, Claudie Marec, Claudie Marec, Claudio Stalder,"
 
         val results: String = "Christophe\tchristophe\tC\tCh\tChr\tChri\te\the\tphe\tophe\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\tI-<person>\n" +
             "Castagne\tcastagne\tC\tCa\tCas\tCast\te\tne\tgne\tagne\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
@@ -393,7 +393,7 @@ class FundingAcknowledgementParserTest {
             "Stalder\tstalder\tS\tSt\tSta\tStal\tr\ter\tder\tlder\tLINEIN\tINITCAP\tNODIGIT\t0\t0\t0\tNOPUNCT\t0\t<person>\n" +
             ",\t,\t,\t,\t,\t,\t,\t,\t,\t,\tLINEIN\tALLCAP\tNODIGIT\t1\t0\t0\tCOMMA\t0\tI-<other>\n"
 
-        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        val tokens: List<LayoutToken> = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input)
 
         val (spans, statement) = target.getExtractionResult(tokens, results)
 
@@ -403,22 +403,22 @@ class FundingAcknowledgementParserTest {
 
         assertThat(spans, hasSize(4))
         val span0 = spans[0]
-        val offsetPosition0 = span0.left
-        val element0 = span0.right
+        val offsetPosition0 = span0.offsetPosition
+        val element0 = span0.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition0.start, offsetPosition0.end)), `is`("Christophe Castagne"))
         assertThat(element0.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Christophe Castagne</rs>"))
 
         val span1 = spans[1]
-        val offsetPosition1 = span1.left
-        val element1 = span1.right
+        val offsetPosition1 = span1.offsetPosition
+        val element1 = span1.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition1.start, offsetPosition1.end)), `is`("Claudie Marec"))
         assertThat(element1.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Claudie Marec</rs>"))
 
         val span2 = spans[2]
-        val offsetPosition2 = span2.left
-        val element2 = span2.right
+        val offsetPosition2 = span2.offsetPosition
+        val element2 = span2.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition2.start, offsetPosition2.end)), `is`("Claudie Marec"))
         assertThat(element2.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Claudie Marec</rs>"))
@@ -428,8 +428,8 @@ class FundingAcknowledgementParserTest {
         assertThat(offsetPosition2.end, `is`(not(offsetPosition1.end)))
 
         val span3 = spans[3]
-        val offsetPosition3 = span3.left
-        val element3 = span3.right
+        val offsetPosition3 = span3.offsetPosition
+        val element3 = span3.annotationNode
 
         assertThat(LayoutTokensUtil.toText(tokens.subList(offsetPosition3.start, offsetPosition3.end)), `is`("Claudio Stalder"))
         assertThat(element3.toXML(), `is`("<rs xmlns=\"http://www.tei-c.org/ns/1.0\" type=\"person\">Claudio Stalder</rs>"))