Skip to content

Commit

Permalink
training cases
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Aug 20, 2023
1 parent 8163bba commit d49b439
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 2 deletions.
14 changes: 13 additions & 1 deletion grobid-core/src/main/java/org/grobid/core/data/Funder.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,43 @@

import org.grobid.core.utilities.TextUtilities;
import org.grobid.core.utilities.OffsetPosition;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.lang.Language;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.HashMap;

/**
* Class for representing a funding organization.
* Optionally the funder is identified by its DOI at CrossRef funder registry.
*/

public class Funder {
// prefered full name
private String fullName = null;
private List<LayoutToken> fullNameLayoutTokens = new ArrayList<>();

// full names by languages
private Map<Language, List<String>> fullNameByLanguage = new HashMap<>();

private String abbreviatedName = null;
private List<LayoutToken> abbreviatedNameLayoutTokens = new ArrayList<>();

// abbreviated names by languages
private Map<Language, List<String>> abbreviatedNameByLanguage = new HashMap<>();

private String doi = null;

// country or regional area (e.g. EU)
private String country = null;
private String countryCode = null;
private String address = null;
private String region = null;

private Date startActiveDate = null;
private Date endActiveDate = null;
Expand All @@ -37,6 +47,8 @@ public class Funder {

private String url = null;

private String crossrefFunderType = null;

private List<LayoutToken> layoutTokens = new ArrayList<>();

static public Funder EMPTY = new Funder("unknown");
Expand Down
20 changes: 20 additions & 0 deletions grobid-core/src/main/java/org/grobid/core/data/Funding.java
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,19 @@ public void inferAcronyms() {
}
}
}

// check if full name is an acronym
if (funder.getAbbreviatedName() == null && funder.getFullName() != null) {
for (Map.Entry<String,String> entry : Funder.prefixFounders.entrySet()) {
if (funder.getFullName().equals(entry.getKey())) {
this.funder.setAbbreviatedName(entry.getKey());
this.funder.setAbbreviatedNameLayoutTokens(this.funder.getFullNameLayoutTokens());
this.funder.setFullName(entry.getValue());
this.funder.setFullNameLayoutTokens(null);
break;
}
}
}
}

public String toString() {
Expand Down Expand Up @@ -352,6 +365,13 @@ public String toTEI(int nbIndent) {
tei.append("\t");
tei.append("<idno type=\"grant-number\">"+TextUtilities.HTMLEncode(grantNumber)+"</idno>\n");
}

if (grantName != null) {
for(int i=0; i<nbIndent+1; i++)
tei.append("\t");
tei.append("<orgName type=\"grant-name\">"+TextUtilities.HTMLEncode(grantName)+"</orgName>\n");
}

if (projectFullName != null) {
for(int i=0; i<nbIndent+1; i++)
tei.append("\t");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,8 @@
<acknowledgment>This work was supported by the <funder>Université de Strasbourg</funder> and <funder>Centre National de la Recherche Scientifique (CNRS)</funder>, by the <funder>French Agence Nationale de la Recherche</funder> (<grantNumber>ANR-18-CE12-0021-01</grantNumber> '<projectName>Polyglot</projectName>') and by the <programName>French National Program 'Investissement d’Avenir'</programName> (<grantNumber>ANR-11-LABX-0057</grantNumber> <projectName>'MitoCross' LabEx</projectName>). MH has a fellowship from <funder>MitoCross</funder> and JR has a fellowship from <funder>Polyglot</funder>. This work of the <projectName>Interdisciplinary Thematic Institute IMCBio</projectName>>, conducted as part of the <programName>ITI 2021–2028 program</programName> of the <institution>University of Strasbourg</institution>, <institution>CNRS</institution> and <institution>Inserm</institution>, was supported by <projectName>IdEx Unistra</projectName> (<grantNumber>ANR-10-IDEX-0002</grantNumber>), <projectName>STRAT’US</projectName> (<grantNumber>ANR 20-SFRI-0012</grantNumber>) and <projectName>EUR IMCBio</projectName> (<grantNumber>ANR-17-EURE-0023</grantNumber>) under the framework of the <programName>French Investments for the Future Program</programName>.</acknowledgment>
<acknowledgment>Most of this work took place during the first author master internship at <institution>IRIT</institution>. His PhD at the <institution>University of Genova</institution>, is supported by the ITN-ETN project <projectName>TraDE-OPT</projectName> funded by the <funder>European Union</funder>'s <programName>Horizon 2020 research and innovation programme</programName> under the <grantName>Marie Skłodowska-Curie</grantName> grant agreement No <grantNumber>861137</grantNumber>. The second author would like to acknowledge the support of <funder>ANR</funder>-<projectName>3IA Artificial and Natural Intelligence Toulouse Institute</projectName>, <funder>Air Force Office of Scientific Research, Air Force Material Command, USAF</funder>, under grant numbers <grantNumber>FA9550-19-1-7026</grantNumber>, <grantNumber>FA9550-18-1-0226</grantNumber>, and <funder>ANR</funder> <projectName>MaSDOL</projectName> - <grantNumber>19-CE23-0017-01</grantNumber>. We warmly thank the anonymous referee for careful reading and relevant suggestions which improved the quality of the manuscript.</acknowledgment>
<acknowledgment>The <institution>laboratory of Human Evolutionary Genetics</institution> is supported by the <funder>Institut Pasteur</funder>, the <funder>Collège de France</funder>, the <funder>Centre Nationale de la Recherche Scientifique (CNRS)</funder>, the <funder>Agence Nationale de la Recherche (ANR)</funder> grants <projectName>LIFECHANGE</projectName> (<grantNumber>ANR-17- CE12-0018-02</grantNumber>), <projectName>CNSVIRGEN</projectName> (<grantNumber>ANR-19-CE15-0009-02</grantNumber>) and <projectName>COVID-19-POPCELL</projectName> (<grantNumber>ANR-21-CO14-003-01</grantNumber>), the <funder>French Government</funder>'s <programName>Investissement d’Avenir program</programName>, <projectName>Laboratoires d’Excellence 'Integrative Biology of Emerging Infectious Diseases'</projectName> (<grantNumber>ANR-10- LABX-62-IBEID</grantNumber>) and '<projectName>Milieu Intérieur</projectName>' (<grantNumber>ANR-10-LABX-69-01</grantNumber>), the <funder>Fondation pour la Recherche Médicale</funder> (Equipe FRM <grantNumber>DEQ20180339214</grantNumber>), the <funder>Fondation Allianz-Institut de France</funder>, and the <funder>Fondation de France</funder> (n°<grantNumber>00106080</grantNumber>)</acknowledgment>

<acknowledgment>Acknowledgments S.D.M.S. is supported by an <grantName>MRC career development award</grantName>. P.B. is supported by the <funder>Human Frontier Science Program</funder> (<grantNumber>CDA00069/2013 C</grantNumber>).-A.S. and U.P. are supported by the <funder>NIH NIGMS</funder> grant <grantNumber>U54 GM074945</grantNumber>. We thank the <institution>Xenopus laevis genome project consortium</institution> to provide gene annotation information from unpublished RNA-seq data. Especially, for the RNA-seq based gene model we used in this project, we thank <individual>Shuji Takahashi</individual>, <individual>Atsushi Toyoda</individual>, <individual>Yutaka Suzuki</individual>, <individual>Sumio Sugano</individual>, <individual>Asao Fujiyama</individual>, and <individual>Masanori Taira</individual> for sharing their unpublished RNA-seq data (the construction of RNAseq data sets was supported in part by <institution>KAKENHI</institution> (<grantName>Grant-in-Aid for Scientific Research</grantName>) on <programName>Innovative Areas "Genome Science"</programName> from the <funder>Ministry of Education, Culture, Sports, Science and Technology of Japan</funder>), and <individual>Taejoon Kwon</individual>, <individual>Shuji Takahashi</individual>, <individual>Toshiaki Tanaka</individual>, <individual>Edward Marcotte</individual> for gene model construction and validation.</acknowledgment>
<acknowledgment>Acknowledgements This work was partially funded by the <funder>National Institutes of Health (NIH)</funder>, <grantNumber>R01MH096906</grantNumber> [TY], <funder>NSF</funder> <grantNumber>OCI1131441</grantNumber> [RP], <funder>International Neuroinformatics Coordinating Facility (INCF)</funder> and the <funder>Max Planck Society</funder> [KJG, DSM]. We thank the INCF Neuroimaging Data Sharing task force members for their input during several discussions.</acknowledgment>
<acknowledgment>We thank <individual>Dr. Petter Holme</individual> for sharing the internet dating community dataset, and <individual>Dr. Gerald F. Davis</individual> for the American company director network dataset. This work was supported by grants from the <funder>National Research Foundation of Korea</funder> (<grantNumber>2010-0017649</grantNumber>, <grantNumber>2012M3A9B4028641</grantNumber>, <grantNumber>2012M3A9C7050151</grantNumber>) to I.L, and from the <funder>N.S.F.</funder>, <funder>N.I.H.</funder>, <funder>U. S. Army</funder> (<grantNumber>58343-MA</grantNumber>) and <funder>Welch Foundation</funder> (<grantNumber>F-1515</grantNumber>) to E.M.M.</acknowledgment>
</acknowledgments>
</TEI>
Original file line number Diff line number Diff line change
Expand Up @@ -157,5 +157,7 @@
<funding>supported by the <funder>ANR</funder> project <projectName>ASSK</projectName> (<grantNumber>ANR-18-CE40-0025-01</grantNumber>).</funding>
<funding>Work on thiol redox regulation in our group is supported by the <funder>Centre National de la Recherche Scientifique</funder>, by the <funder>University of Perpignan</funder> Via Domitia and by the <funder>Agence Nationale de la Recherche</funder> (<grantNumber>ANR-REPHARE19-CE12-0027</grantNumber> and <grantNumber>ANR-RoxRNase20-CE12-0025-01</grantNumber>).</funding>
<funding>This work was supported by the <funder>ANR</funder> project <projectName>NORMA</projectName> under grant <grantNumber>ANR-19-CE40-0020-01</grantNumber>.</funding>
<funding>FINANCIAL DISCLOSURE This research was partially supported by a <funder>Royal Society</funder> <grantName>Research Grant</grantName> awarded to JFM and <funder>Wellcome Trust</funder> funding to DWL (grant number <grantNumber>098051</grantNumber>). JFM, MJH and MTS are supported by the <funder>Biosciences, Environment and Agriculture Alliance (BEAA) between Bangor University and Aberystwyth University</funder> and ADH is funded by a <funder>Bangor University</funder> <grantName>125th Anniversary Studentship</grantName>.</funding>
<funding>Funding <individual>Mesut Erzurumluoglu</individual> is a PhD student funded by the <funder>Medical Research Council (MRC UK)</funder>.</funding>

</fundings>

0 comments on commit d49b439

Please sign in to comment.