diff --git a/TUTORIAL.md b/TUTORIAL.md index 9b3ac22c..f1852898 100644 --- a/TUTORIAL.md +++ b/TUTORIAL.md @@ -301,6 +301,21 @@ options and their defaults are: different fields, you can supply a list here. The extraction will fail if not all fields are matched. +### Priority + +In case of multiple templates matching single invoice the one with the +highest priority will be used. Default `priority` value (assigned if +missing) is 5. + +This property needs to be specified only when designing some generic or +very specific templates. + +Suggested values: + +- 0-4: accounting/invoice software specific template +- 5: company specific template +- 6-10: company department/unit specific template + ### Example of template using most options issuer: Free Mobile diff --git a/src/invoice2data/extract/loader.py b/src/invoice2data/extract/loader.py index 95418b49..f8ad030d 100644 --- a/src/invoice2data/extract/loader.py +++ b/src/invoice2data/extract/loader.py @@ -107,6 +107,9 @@ def read_templates(folder=None): elif type(tpl["exclude_keywords"]) is not list: tpl["exclude_keywords"] = [tpl["exclude_keywords"]] + if 'priority' not in tpl.keys(): + tpl['priority'] = 5 + output.append(InvoiceTemplate(tpl)) logger.info("Loaded %d templates from %s", len(output), folder) diff --git a/src/invoice2data/main.py b/src/invoice2data/main.py index c322b179..f750975a 100644 --- a/src/invoice2data/main.py +++ b/src/invoice2data/main.py @@ -79,10 +79,6 @@ def extract_data(invoicefile, templates=None, input_module=None): 'currency': 'INR', 'desc': 'Invoice IBZY2087 from OYO'} """ - if templates is None: - templates = read_templates() - - # print(templates[0]) if input_module is None: if invoicefile.lower().endswith('.txt'): @@ -98,18 +94,18 @@ def extract_data(invoicefile, templates=None, input_module=None): logger.debug("START pdftotext result ===========================\n" + extracted_str) logger.debug("END pdftotext result =============================") - for t in templates: - optimized_str = t.prepare_input(extracted_str) - - if t.matches_input(optimized_str): - logger.info("Using %s template", t["template_name"]) - # Call extract with entire text and the invoicefile path - # The path is used if an area is called as a field option - return t.extract(optimized_str, invoicefile, input_module) - - logger.error("No template for %s", invoicefile) - return False + if templates is None: + templates = read_templates() + templates = filter(lambda t: t.matches_input(t.prepare_input(extracted_str)), templates) + templates = sorted(templates, key=lambda k: k['priority'], reverse=True) + if not templates: + logger.error("No template for %s", invoicefile) + return False + t = templates[0] + logger.info("Using %s template", t["template_name"]) + optimized_str = t.prepare_input(extracted_str) + return t.extract(optimized_str, invoicefile, input_module) def create_parser(): """Returns argument parser """