From 7bed8412760043656a48377da2d5e6f01c92650d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 1 Oct 2022 19:18:35 +0200 Subject: [PATCH] templates: pl: add templates for InsERT's software issued invoices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit InsERT is one of the most popular Polish accounting software company. They have two very common softwares: 1. Subiekt nexo 2. Subiekt GT Add 2 templates to parse invoices generated by above softwares. Those templates are software-specific so they have priority set to 3. This allows parsing a lot invoices issued in Poland. Signed-off-by: Rafał Miłecki --- .../templates/pl/pl.insert.subiekt-gt.yml | 43 +++++++++++++++++++ .../templates/pl/pl.insert.subiekt-nexo.yml | 33 ++++++++++++++ src/invoice2data/main.py | 1 + 3 files changed, 77 insertions(+) create mode 100644 src/invoice2data/extract/templates/pl/pl.insert.subiekt-gt.yml create mode 100644 src/invoice2data/extract/templates/pl/pl.insert.subiekt-nexo.yml diff --git a/src/invoice2data/extract/templates/pl/pl.insert.subiekt-gt.yml b/src/invoice2data/extract/templates/pl/pl.insert.subiekt-gt.yml new file mode 100644 index 00000000..2c7fa0ce --- /dev/null +++ b/src/invoice2data/extract/templates/pl/pl.insert.subiekt-gt.yml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: MIT +keywords: + - 'Miejsce wystawienia:' + - 'Data wystawienia:' + - 'Sprzedawca:' + - 'Nabywca:' + - 'według stawki VAT' + - 'Razem do zapłaty:' + - 'Wystawił\(a\)' + - 'Odebrał\(a\)' + - 'Podpis osoby upoważnionej' +fields: + issuer: + parser: regex + regex: Sprzedawca:.*\n(.*?)\s{3,} + vatin: + parser: regex + regex: NIP:\s+(\d{10}) + type: int + group: first + date: + parser: regex + regex: + - Data wystawienia:\n.*(\d{2}\.\d{2}\.\d{4}) + - Data wystawienia:\n.*(\d{4}-\d{2}-\d{2}) + type: date + invoice_number: + parser: regex + regex: Faktura VAT\s+(.*?)\s+oryginał + amount: + parser: regex + regex: Razem do zapłaty:\s+([\d\s]+,[\d][\d]) + type: float + nrb: + parser: regex + regex: PLN:\s+([0-9]{2}(?:\s?[0-9]{4}){6}) +options: + currency: PLN + date_formats: + - '%d.%m.%Y' + - '%Y-%m-%d' + decimal_separator: ',' +priority: 3 diff --git a/src/invoice2data/extract/templates/pl/pl.insert.subiekt-nexo.yml b/src/invoice2data/extract/templates/pl/pl.insert.subiekt-nexo.yml new file mode 100644 index 00000000..25877f25 --- /dev/null +++ b/src/invoice2data/extract/templates/pl/pl.insert.subiekt-nexo.yml @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: MIT +keywords: + - 'InsERT nexo' +fields: + issuer: + parser: regex + regex: Sprzedawca.*\n(.*?)\s{3,} + vatin: + parser: regex + regex: NIP:\s+(\d{10}) + type: int + group: first + date: + parser: regex + regex: Data wystawienia\s+(\d{2}-\d{2}-\d{4}) + type: date + invoice_number: + parser: regex + regex: Faktura VAT sprzedaży\s+(.*) + group: first + amount: + parser: regex + regex: Razem do zapłaty:\s+([\d\s]+,[\d][\d]) + type: float + nrb: + parser: regex + regex: PL\s+([0-9]{2}(?:\s?[0-9]{4}){6}) +options: + currency: PLN + date_formats: + - '%d-%m-%Y' + decimal_separator: ',' +priority: 3 diff --git a/src/invoice2data/main.py b/src/invoice2data/main.py index f750975a..d0858967 100644 --- a/src/invoice2data/main.py +++ b/src/invoice2data/main.py @@ -107,6 +107,7 @@ def extract_data(invoicefile, templates=None, input_module=None): optimized_str = t.prepare_input(extracted_str) return t.extract(optimized_str, invoicefile, input_module) + def create_parser(): """Returns argument parser """