Skip to content

Commit

Permalink
fix: issue with tokens that match prefixes (#106)
Browse files Browse the repository at this point in the history
  • Loading branch information
juanjoDiaz committed Jun 15, 2023
1 parent fdfff3d commit 8c06597
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 0 deletions.
3 changes: 3 additions & 0 deletions simplemma/strategies/prefix_decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ def get_lemma(self, token: str, lang: str) -> Optional[str]:
return None
prefix = prefix_match[1]

if prefix == token:
return None

subword = self._dictionary_lookup.get_lemma(token[len(prefix) :], lang)
if subword is None:
return None
Expand Down
3 changes: 3 additions & 0 deletions tests/strategies/test_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
DefaultStrategy,
GreedyDictionaryLookupStrategy,
AffixDecompositionStrategy,
PrefixDecompositionStrategy,
)


Expand Down Expand Up @@ -51,3 +52,5 @@ def test_search() -> None:
)

assert AffixDecompositionStrategy(greedy=True).get_lemma("ccc", "de") is None

assert PrefixDecompositionStrategy().get_lemma("auf", "de") is None

0 comments on commit 8c06597

Please sign in to comment.