From 7c38f9889cb849fe25cda31d60a258a84c5830fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 8 Oct 2022 22:19:20 +0200 Subject: [PATCH] templates: de: improve parsing QualityHosting lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single QualityHosting invoice position spans across multiple lines. For that reason it uses a very generic RegEx for middle lines: line: '^\s+(?P.+)$' That doesn't work well with multi-page invoices. It's because above RegEx matches page footer lines. That results in footer content getting extracted as invoice line "desc". Improve that situation by adding "last_line" RegEx matching position last line. That prevents parsing lines between last and first lines (e.g. footer content). Signed-off-by: Rafał Miłecki --- src/invoice2data/extract/templates/de/de.qualityhosting.yml | 1 + tests/compare/QualityHosting.json | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/invoice2data/extract/templates/de/de.qualityhosting.yml b/src/invoice2data/extract/templates/de/de.qualityhosting.yml index c9aa57cd..716bcd2d 100644 --- a/src/invoice2data/extract/templates/de/de.qualityhosting.yml +++ b/src/invoice2data/extract/templates/de/de.qualityhosting.yml @@ -12,6 +12,7 @@ lines: end: 'Total EUR' first_line: '\s+(?P\d+)\s+(?P\d+)\s+(?P.{,70})\s+(?P\d+,\d+)' line: '^\s+(?P.+)$' + last_line: '^\s+(?P\d\d\.\d\d\.\d\d-\d\d\.\d\d\.\d\d)$' types: qty: float price: float diff --git a/tests/compare/QualityHosting.json b/tests/compare/QualityHosting.json index 26ac4517..fc939b02 100644 --- a/tests/compare/QualityHosting.json +++ b/tests/compare/QualityHosting.json @@ -41,16 +41,16 @@ { "pos": "6", "qty": 1.0, - "desc": "Small Business QualityExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_jauernik\n01.05.14-31.05.14\nQualityHosting AG - Uferweg 40-42 - D-63571 Gelnhausen\niViveLabs Ltd.\n93B Sai Yu Chung\nYuen Long, N.T.\nHong Kong\nPos. Menge Beschreibung Rabatt % VK-Preis Zeilenbetrag\nOhne Ohne MwSt.\nMwSt.", + "desc": "Small Business QualityExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_jauernik\n01.05.14-31.05.14", "price": 5.39 }, { "pos": "7", "qty": 1.0, - "desc": "Small Business StandardExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_office\n01.05.14-31.05.14\n", + "desc": "Small Business StandardExchange 2010\nGrundgebühr pro Einheit\nDienst: OUDJQ_office\n01.05.14-31.05.14", "price": 3.89 } ], "desc": "Invoice from QualityHosting AG" } -] \ No newline at end of file +]