From 90faaa78756098762c68c4a5d69451513041ed79 Mon Sep 17 00:00:00 2001 From: GeraldJansen Date: Sat, 13 Jan 2024 16:37:27 +0100 Subject: [PATCH] Require whitespace before tags in description field As of v3.0.3 hamster harvests single word #hash tags from the description field. This PR tweaks the regex used to extract the tags, requiring whitespace before the # character. This will prevent harvesting a tag from a pattern like example.com/page#anchor, as mentioned here: https://github.com/projecthamster/hamster/issues/753#issuecomment-1884597243. This does not affect any of the test cases in test_stuff.py nor the example from the input.page documentation. --- src/hamster/lib/parsing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hamster/lib/parsing.py b/src/hamster/lib/parsing.py index aacd200d8..9820b0d58 100644 --- a/src/hamster/lib/parsing.py +++ b/src/hamster/lib/parsing.py @@ -19,6 +19,7 @@ """, flags=re.VERBOSE) tags_in_description = re.compile(r""" + \s \# (?P [a-zA-Z] # Starts with an alphabetic character (digits excluded)