Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/1856 support regex based recommendations #2000

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified inception-active-learning/src/media/active_learning_icon.pptx
Binary file not shown.
35 changes: 35 additions & 0 deletions inception-app-webapp/hs_err_pid15048.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#
# A fatal error has been detected by the Java Runtime Environment:
#
# SIGSEGV (0xb) at pc=0x00007f92b74b8401, pid=15048, tid=15064
#
# JRE version: OpenJDK Runtime Environment (11.0.9.1+1) (build 11.0.9.1+1-Ubuntu-0ubuntu1.18.04)
# Java VM: OpenJDK 64-Bit Server VM (11.0.9.1+1-Ubuntu-0ubuntu1.18.04, mixed mode, sharing, tiered, compressed oops, g1 gc, linux-amd64)
# Problematic frame:
# C [libjimage.so+0x2401]
#
# Core dump will be written. Default location: Core dumps may be processed with "/usr/share/apport/apport %p %s %c %d %P %E" (or dumping to /home/christoph/Desktop/Arbeit/inception/inception-app-webapp/core.15048)
#
# If you would like to submit a bug report, please visit:
# https://bugs.launchpad.net/ubuntu/+source/openjdk-lts
#

--------------- S U M M A R Y ------------

Command Line: -agentlib:jdwp=transport=dt_socket,suspend=y,address=localhost:46701 -javaagent:/home/christoph/.eclipse/360744286_linux_gtk_x86_64/configuration/org.eclipse.osgi/222/0/.cp/lib/javaagent-shaded.jar -Dfile.encoding=UTF-8 de.tudarmstadt.ukp.inception.INCEpTION

Host: Intel(R) Core(TM) i3-4005U CPU @ 1.70GHz, 4 cores, 3G, Ubuntu 18.04.5 LTS
Time: Tue Jan 26 13:39:43 2021 CET elapsed time: 212.402363 seconds (0d 0h 3m 32s)

--------------- T H R E A D ---------------

Current thread (0x00007f92b013c800): JavaThread "Signal Dispatcher" daemon [_thread_in_vm, id=15064, stack(0x00007f9290bbe000,0x00007f9290cbf000)]

Stack: [0x00007f9290bbe000,0x00007f9290cbf000], sp=0x00007f9290cbc1e0, free space=1016k
Native frames: (J=compiled Java code, A=aot compiled Java code, j=interpreted, Vv=VM code, C=native code)
C [libjimage.so+0x2401]
C [libjimage.so+0x30ea]
C [libjimage.so+0x35c6] JIMAGE_FindResource+0xd6
V [libjvm.so+0x5bf44e]
V [libjvm.so+0x5c21b0]
V [libjvm.so+0xe427d5]
1,709 changes: 1,709 additions & 0 deletions inception-app-webapp/hs_err_pid16755.log

Large diffs are not rendered by default.

1,566 changes: 1,566 additions & 0 deletions inception-app-webapp/hs_err_pid19667.log

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion inception-app-webapp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,10 @@
<groupId>de.tudarmstadt.ukp.inception.app</groupId>
<artifactId>inception-sharing</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.inception.app</groupId>
<artifactId>inception-regex-recommender</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
Expand Down Expand Up @@ -790,6 +794,7 @@
<usedDependency>de.tudarmstadt.ukp.inception.app:inception-ui-curation</usedDependency>
<usedDependency>de.tudarmstadt.ukp.inception.app:inception-ui-monitoring</usedDependency>
<usedDependency>de.tudarmstadt.ukp.inception.app:inception-sharing</usedDependency>
<usedDependency>de.tudarmstadt.ukp.inception.app:inception-regex-recommender</usedDependency>
<!-- INCEpTION annotation editor modules - used via Spring -->
<usedDependency>de.tudarmstadt.ukp.inception.app:inception-brat-editor</usedDependency>
<usedDependency>de.tudarmstadt.ukp.inception.app:inception-html-editor</usedDependency>
Expand Down Expand Up @@ -1097,4 +1102,4 @@
</properties>
</profile>
</profiles>
</project>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationException;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommenderContext;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommenderContext.Key;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.gazeteer.GazeteerService;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.gazeteer.GazeteerServiceImpl;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.Gazeteer;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.GazeteerEntry;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.GazeteerEntryImpl;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.trie.Trie;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.trie.WhitespaceNormalizingSanitizer;

Expand All @@ -70,7 +70,7 @@ public class StringMatchingRecommender
private final Logger log = LoggerFactory.getLogger(getClass());
private final StringMatchingRecommenderTraits traits;

private final GazeteerService gazeteerService;
private final GazeteerServiceImpl gazeteerService;

public StringMatchingRecommender(Recommender aRecommender,
StringMatchingRecommenderTraits aTraits)
Expand All @@ -79,10 +79,9 @@ public StringMatchingRecommender(Recommender aRecommender,
}

public StringMatchingRecommender(Recommender aRecommender,
StringMatchingRecommenderTraits aTraits, GazeteerService aGazeteerService)
StringMatchingRecommenderTraits aTraits, GazeteerServiceImpl aGazeteerService)
{
super(aRecommender);

traits = aTraits;
gazeteerService = aGazeteerService;
}
Expand All @@ -92,13 +91,13 @@ public boolean isReadyForPrediction(RecommenderContext aContext)
{
return aContext.get(KEY_MODEL).map(Objects::nonNull).orElse(false);
}

public void pretrain(List<GazeteerEntry> aData, RecommenderContext aContext)
public void pretrain(List<GazeteerEntryImpl> aData, RecommenderContext aContext)
{
Trie<DictEntry> dict = aContext.get(KEY_MODEL).orElseGet(this::createTrie);

if (aData != null) {
for (GazeteerEntry entry : aData) {
for (GazeteerEntryImpl entry : aData) {
learn(dict, entry.text, entry.label);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngine;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngineFactoryImplBase;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.config.StringMatchingRecommenderAutoConfiguration;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.gazeteer.GazeteerService;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.gazeteer.GazeteerServiceImpl;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.settings.StringMatchingRecommenderTraitsEditor;

/**
Expand All @@ -48,9 +48,9 @@ public class StringMatchingRecommenderFactory
// and without the database starting to refer to non-existing recommendation tools.
public static final String ID = "de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.StringMatchingRecommender";

private final GazeteerService gazeteerService;

public StringMatchingRecommenderFactory(GazeteerService aGazeteerService)
private final GazeteerServiceImpl gazeteerService;
public StringMatchingRecommenderFactory(GazeteerServiceImpl aGazeteerService)
{
gazeteerService = aGazeteerService;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import de.tudarmstadt.ukp.inception.recommendation.api.RecommendationService;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.StringMatchingRecommenderFactory;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.exporter.GazeteerExporter;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.gazeteer.GazeteerService;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.gazeteer.GazeteerServiceImpl;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.ner.StringMatchingNerClassificationToolFactory;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.pos.StringMatchingPosClassificationToolFactory;
Expand All @@ -47,14 +46,14 @@ public class StringMatchingRecommenderAutoConfiguration
@Bean
@Autowired
public GazeteerExporter gazeteerExporter(RecommendationService aRecommendationService,
GazeteerService aGazeteerService)
GazeteerServiceImpl aGazeteerService)
{
return new GazeteerExporter(aRecommendationService, aGazeteerService);
}

@Bean
@Autowired
public GazeteerService gazeteerService(RepositoryProperties aRepositoryProperties)
public GazeteerServiceImpl gazeteerService(RepositoryProperties aRepositoryProperties)
{
return new GazeteerServiceImpl(aRepositoryProperties, entityManager);
}
Expand All @@ -74,8 +73,8 @@ public StringMatchingPosClassificationToolFactory stringMatchingPosClassificatio
@Bean
@Autowired
public StringMatchingRecommenderFactory stringMatchingRecommenderFactory(
GazeteerService aGazeteerService)
{
GazeteerServiceImpl aGazeteerService)
{
return new StringMatchingRecommenderFactory(aGazeteerService);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public interface GazeteerService
/**
* Loads the gazeteer.
*/
List<GazeteerEntry> readGazeteerFile(Gazeteer aGaz) throws IOException;
List<? extends GazeteerEntry> readGazeteerFile(Gazeteer aGaz) throws IOException;

boolean existsGazeteer(Recommender aRecommender, String aName);
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,15 @@
import de.tudarmstadt.ukp.inception.recommendation.api.model.Recommender;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.config.StringMatchingRecommenderAutoConfiguration;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.Gazeteer;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.GazeteerEntry;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.GazeteerEntryImpl;

/**
* <p>
* This class is exposed as a Spring Component via
* {@link StringMatchingRecommenderAutoConfiguration#gazeteerService}.
* </p>
*/

public class GazeteerServiceImpl
implements GazeteerService
{
Expand Down Expand Up @@ -163,20 +164,22 @@ public void deleteGazeteers(Gazeteer aGazeteer) throws IOException
}

@Override
public List<GazeteerEntry> readGazeteerFile(Gazeteer aGaz) throws IOException
public List<GazeteerEntryImpl> readGazeteerFile(Gazeteer aGaz)
throws IOException
{
File file = getGazeteerFile(aGaz);

List<GazeteerEntry> data = new ArrayList<>();

List<GazeteerEntryImpl> data = new ArrayList<>();
try (InputStream is = new FileInputStream(file)) {
parseGazeteer(aGaz, is, data);
}

return data;
}

public void parseGazeteer(Gazeteer aGaz, InputStream aStream, List<GazeteerEntry> aTarget)


public void parseGazeteer(Gazeteer aGaz, InputStream aStream, List<GazeteerEntryImpl> aTarget)
throws IOException
{
int lineNumber = 0;
Expand All @@ -195,7 +198,7 @@ public void parseGazeteer(Gazeteer aGaz, InputStream aStream, List<GazeteerEntry
String text = trimToNull(fields[0]);
String label = trimToNull(fields[1]);
if (label != null && text != null) {
aTarget.add(new GazeteerEntry(text, label));
aTarget.add(new GazeteerEntryImpl(text, label));
}
}
else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,44 +19,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model;

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
package de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model;

public class GazeteerEntry
public interface GazeteerEntry
{
public final String text;
public final String label;

public GazeteerEntry(String aText, String aLabel)
{
super();
text = aText;
label = aLabel;
}
public boolean equals(final Object other);

@Override
public boolean equals(final Object other)
{
if (!(other instanceof GazeteerEntry)) {
return false;
}
GazeteerEntry castOther = (GazeteerEntry) other;
return new EqualsBuilder().append(text, castOther.text).append(label, castOther.label)
.isEquals();
}
public int hashCode();

@Override
public int hashCode()
{
return new HashCodeBuilder().append(text).append(label).toHashCode();
}
public String toString();

@Override
public String toString()
{
return new ToStringBuilder(this).append("text", text).append("label", label).toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright 2019
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model;

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;

public class GazeteerEntryImpl
implements GazeteerEntry
{
public final String text;
public final String label;

public GazeteerEntryImpl(String aText, String aLabel)
{
super();
text = aText;
label = aLabel;
}

@Override
public boolean equals(final Object other)
{
if (!(other instanceof GazeteerEntryImpl)) {
return false;
}
GazeteerEntryImpl castOther = (GazeteerEntryImpl) other;
return new EqualsBuilder().append(text, castOther.text).append(label, castOther.label)
.isEquals();
}

@Override
public int hashCode()
{
return new HashCodeBuilder().append(text).append(label).toHashCode();
}

@Override
public String toString()
{
return new ToStringBuilder(this).append("text", text).append("label", label).toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
import de.tudarmstadt.ukp.inception.recommendation.api.evaluation.PercentageBasedSplitter;
import de.tudarmstadt.ukp.inception.recommendation.api.model.Recommender;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommenderContext;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.GazeteerEntry;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.GazeteerEntryImpl;
import de.tudarmstadt.ukp.inception.support.test.recommendation.RecommenderTestHelper;

public class StringMatchingRecommenderTest
Expand Down Expand Up @@ -164,9 +164,9 @@ public void thatPredictionForCharacterLevelLayerWorks() throws Exception

RecommenderTestHelper.addScoreFeature(cas, NamedEntity.class, "value");

List<GazeteerEntry> gazeteer = new ArrayList<>();
gazeteer.add(new GazeteerEntry("John Smith", "ORG"));
gazeteer.add(new GazeteerEntry("Peter John", "LOC"));
List<GazeteerEntryImpl> gazeteer = new ArrayList<>();
gazeteer.add(new GazeteerEntryImpl("John Smith", "ORG"));
gazeteer.add(new GazeteerEntryImpl("Peter John", "LOC"));

sut.pretrain(gazeteer, context);

Expand All @@ -193,8 +193,8 @@ public void thatPredictionForCrossSentenceLayerWorks() throws Exception

RecommenderTestHelper.addScoreFeature(cas, NamedEntity.class, "value");

List<GazeteerEntry> gazeteer = new ArrayList<>();
gazeteer.add(new GazeteerEntry("Smith . Peter", "ORG"));
List<GazeteerEntryImpl> gazeteer = new ArrayList<>();
gazeteer.add(new GazeteerEntryImpl("Smith . Peter", "ORG"));

sut.pretrain(gazeteer, context);

Expand Down Expand Up @@ -233,10 +233,10 @@ public void thatPredictionWithPretrainigWorks() throws Exception
CAS cas = casList.get(0);
RecommenderTestHelper.addScoreFeature(cas, NamedEntity.class, "value");

List<GazeteerEntry> gazeteer = new ArrayList<>();
gazeteer.add(new GazeteerEntry("Toyota", "ORG"));
gazeteer.add(new GazeteerEntry("Deutschland", "LOC"));
gazeteer.add(new GazeteerEntry("Deutschland", "GPE"));
List<GazeteerEntryImpl> gazeteer = new ArrayList<>();
gazeteer.add(new GazeteerEntryImpl("Toyota", "ORG"));
gazeteer.add(new GazeteerEntryImpl("Deutschland", "LOC"));
gazeteer.add(new GazeteerEntryImpl("Deutschland", "GPE"));

sut.pretrain(gazeteer, context);

Expand Down
Loading