Skip to content

Commit

Permalink
Merge pull request #1106 from kermitt2/bugfix/sent-seg-ack-fund
Browse files Browse the repository at this point in the history
Add missing sentence segmentation in funding and acknowledgement
  • Loading branch information
lfoppiano authored Jun 9, 2024
2 parents cb7118d + bbca7dd commit 694f0ed
Show file tree
Hide file tree
Showing 11 changed files with 1,361 additions and 193 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/ci-build-unstable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ jobs:
- name: Test with Gradle Jacoco and Coveralls
run: ./gradlew test jacocoTestReport coveralls --no-daemon

- name: Publish Test Report
uses: mikepenz/action-junit-report@v4
if: success() || failure() # always run even if the previous step fails
with:
report_paths: '**/build/test-results/test/TEST-*.xml'

- name: Coveralls GitHub Action
uses: coverallsapp/github-action@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.crf
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ WORKDIR /opt/grobid

COPY --from=builder /opt/grobid .

ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED"
ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED"

CMD ["./grobid-service/bin/grobid-service"]

Expand Down
3 changes: 2 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ subprojects {
testImplementation 'org.easymock:easymock:5.1.0'
testImplementation "org.powermock:powermock-api-easymock:2.0.7"
testImplementation "org.powermock:powermock-module-junit4:2.0.7"
testImplementation "xmlunit:xmlunit:1.6"
testImplementation "org.xmlunit:xmlunit-matchers:2.10.0"
testImplementation "org.xmlunit:xmlunit-legacy:2.10.0"
testImplementation "org.hamcrest:hamcrest-all:1.3"
testImplementation 'org.jetbrains.kotlin:kotlin-test'
testImplementation "io.mockk:mockk:1.13.9"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.grobid.core.data;

import nu.xom.Element;
import org.grobid.core.utilities.OffsetPosition;

/**
* This class represent an annotation in an XML node.
* The annotation is composed by two information: the XML Element node and the offset position
*/
public class AnnotatedXMLElement {

private OffsetPosition offsetPosition;
private Element annotationNode;

public AnnotatedXMLElement(Element annotationNode, OffsetPosition offsetPosition) {
this.annotationNode = annotationNode;
this.offsetPosition = offsetPosition;
}

public OffsetPosition getOffsetPosition() {
return offsetPosition;
}

public void setOffsetPosition(OffsetPosition offsetPosition) {
this.offsetPosition = offsetPosition;
}

public Element getAnnotationNode() {
return annotationNode;
}

public void setAnnotationNode(Element annotationNode) {
this.annotationNode = annotationNode;
}
}
2 changes: 2 additions & 0 deletions grobid-core/src/main/java/org/grobid/core/data/Funder.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ public class Funder {
prefixFounders.put("NSF", "National Science Foundation");
prefixFounders.put("NIH", "National Institutes of Health");
prefixFounders.put("ERC", "European Research Council");
//Japanese government
prefixFounders.put("MEXT", "Ministry of Education, Culture, Sports, Science and Technology");
}

public Funder() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package org.grobid.core.data;

import java.util.ArrayList;
import java.util.List;

/**
* This class represent the funding / acknowledgement statement
*/
public class FundingAcknowledgmentParse {
List<Funding> fundingList = new ArrayList<>();
List<Person> personList = new ArrayList<>();
List<Affiliation> affiliations = new ArrayList<>();
// List<Pair<OffsetPosition, Element> statementAnnotations = new ArrayList<>();

public List<Funding> getFundings() {
return fundingList;
}

public void setFundings(List<Funding> fundingList) {
this.fundingList = fundingList;
}

public List<Person> getPersons() {
return personList;
}

public void setPersons(List<Person> personList) {
this.personList = personList;
}

public List<Affiliation> getAffiliations() {
return affiliations;
}

public void setAffiliations(List<Affiliation> fundingBodies) {
this.affiliations = fundingBodies;
}

// public List<GrobidAnnotation> getStatementAnnotations() {
// return statementAnnotations;
// }

// public void setStatementAnnotations(List<GrobidAnnotation> statementAnnotations) {
// this.statementAnnotations = statementAnnotations;
// }
}
21 changes: 7 additions & 14 deletions grobid-core/src/main/java/org/grobid/core/engines/Engine.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
package org.grobid.core.engines;

import org.apache.commons.lang3.tuple.Pair;
import nu.xom.Element;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.MutableTriple;

import nu.xom.Element;

import org.grobid.core.data.Affiliation;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.ChemicalEntity;
import org.grobid.core.data.PatentItem;
import org.grobid.core.data.Person;
import org.grobid.core.data.Funding;
import org.apache.commons.lang3.tuple.Pair;
import org.grobid.core.data.*;
import org.grobid.core.document.Document;
import org.grobid.core.document.DocumentSource;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
Expand All @@ -24,14 +16,15 @@
import org.grobid.core.utilities.Utilities;
import org.grobid.core.utilities.counters.CntManager;
import org.grobid.core.utilities.counters.impl.CntManagerFactory;

import org.grobid.core.utilities.crossref.CrossrefClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
* Class for managing the extraction of bibliographical information from PDF
Expand Down Expand Up @@ -1184,7 +1177,7 @@ public String processFundingAcknowledgement(String text, GrobidAnalysisConfig co
result.append(localResult.getLeft().toXML());

} catch (final Exception exp) {
throw new GrobidException("An exception occured while running Grobid funding-acknowledgement model.", exp);
throw new GrobidException("An exception occurred while running Grobid funding-acknowledgement model.", exp);
}

return result.toString();
Expand Down
Loading

0 comments on commit 694f0ed

Please sign in to comment.