Skip to content

Commit

Permalink
Update Templates
Browse files Browse the repository at this point in the history
  • Loading branch information
bosd committed Nov 23, 2023
1 parent 374a767 commit fcea9d3
Show file tree
Hide file tree
Showing 18 changed files with 1,791 additions and 279 deletions.
60 changes: 60 additions & 0 deletions src/invoice2data/extract/templates/com/com.eur.aliexpress.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"issuer": "Alibaba",
"fields": {
"amount": "Amount paid\\s+\\d{1,4}.\\d{2}\\s+(\\d{1,4}.\\d{2})",
"amount_untaxed": "\\s{4}Total\\s+(\\d+.\\d{2})",
"date": {
"parser": "regex",
"regex": "Invoice Date . (\\d{4}-\\d.-\\d.)",
"type": "date"
},
"invoice_number": "Invoice No.? . ([A-Z]{2}\\d+)",
"static_vat": "IM5280002556"
},
"lines": {
"start": "Amount.In.+[)]",
"end": "Grant Total",
"first_line": [
"(?P<name>(\\w+(?:\\S|[ ]\\w\\w+|\\n)*))\\s+(?P<qty>\\S)\\s+(?P<price_unit>\\d+.\\d{2})\\s+(?P<discount>\\d+.\\d{2})\\s+(?P<line_tax_percent>\\d{2}).\\s+(?P<line_tax_amount>\\d+.\\d{2})\\s+\\s+(?P<amounttxcurrency>\\d+.\\d{2})\\s+(?P<amountcurrency>\\d+.\\d{2})",
"(?P<sectionheader>Order Number.\\s+(\\d+))"
],
"line": "^(?P<name>\\w+(?:\\S|[ ]\\w\\w+|\\n)*)$",
"types": {
"qty": "float",
"price_unit": "float",
"discount": "float",
"line_tax_percent": "float",
"line_tax_amount": "float",
"amounttxcurrency": "float",
"amountcurrency": "float"
}
},
"keywords": [
"Alibaba.com Singapore E-Commerce Private Limited"
],
"options": {
"currency": "EUR",
"languages": [
"en"
],
"decimal_separator": ".",
"replace": [
[
"\\s-\\s",
"1"
],
[
"/",
"_"
],
[
"\\n\\n",
"\\n"
],
[
"\\n\\s\\s\\s",
""
]
]
}
}
179 changes: 179 additions & 0 deletions src/invoice2data/extract/templates/nl/nl.accor.rhine.opco hotels.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
{
"issuer": "Rhine Opco Hotel",
"fields": {
"amount": {
"parser": "regex",
"regex": [
"\\s{37}(?:\\s+\\w+){0,4}\\s\\d*[%]?\\s+\\d*[.,]?\\d+[,.]\\d+\\s{11,13}\\d*[.,]?\\d+[,.]\\d+?\\s{2,30}([.,]?\\d+[,.]\\d+)"
],
"type": "float",
"group": "sum"
},
"amount_tax": {
"parser": "regex",
"regex": [
"\\s{37}BTW verrekenbaar(?:\\s+\\w+){0,4}\\s\\d*[%]?\\s+\\d*[.,]?\\d+[,.]\\d+\\s{11,13}(\\d*[.,]?\\d*[,.]?\\d*)\\s{2,30}[.,]?\\d+[,.]\\d+"
],
"type": "float",
"group": "sum"
},
"amount_untaxed": {
"parser": "regex",
"regex": [
"\\s{37}(?:\\s+\\w+){0,4}\\s\\d*[%]?\\s+(\\d*[.,]?\\d+[,.]\\d+)\\s{11,13}\\d*[.,]?\\d+[,.]\\d+?\\s{2,30}[.,]?\\d+[,.]\\d+"
],
"type": "float",
"group": "sum"
},
"date": {
"parser": "regex",
"regex": ",\\s(\\d{1,2}[-]\\S{1,4}[-]\\d{2}\\s\\d{2}[:]\\d{2})\\n",
"type": "date",
"group": "first"
},
"invoice_number": {
"parser": "regex",
"regex": "REKENING\\s+[:]\\s+(.*)"
},
"iban": {
"parser": "regex",
"regex": "[A-Z]{2}\\d{2}?\\s?\\w{4}?\\d?\\s?\\d{3,4}\\s?\\d{4}?\\s\\d{0,2}"
},
"bic": {
"parser": "regex",
"regex": "(?i)BIC[:]\\s+(\\w{8,11})",
"group": "first"
},
"vat": {
"parser": "regex",
"regex": "\\s(NL\\d{4}[.]?\\d{2}[.]?\\d{3}[.]?B[.]?\\d{2})\\s"
},
"partner_website": {
"parser": "static",
"value": "all.accor.com"
},
"partner_name": {
"parser": "regex",
"regex": "(Rhine.+Opco\\sBV)\\s"
},
"country_code": {
"parser": "regex",
"area": {
"f": 1,
"l": 1,
"x": 0,
"y": 0,
"r": 100,
"W": 270,
"H": 240
},
"regex": "nl"
},
"partner_zip": {
"parser": "regex",
"area": {
"f": 1,
"l": 1,
"x": 0,
"y": 0,
"r": 100,
"W": 270,
"H": 240
},
"regex": "\\s(\\d{4}\\s?[A-Z]{2})\\s"
},
"partner_city": {
"parser": "regex",
"area": {
"f": 1,
"l": 1,
"x": 0,
"y": 0,
"r": 100,
"W": 270,
"H": 240
},
"regex": "\\d{4}\\s?[A-Z]{2}\\s(\\w+(?:\\s\\w+)*)\\s"
},
"partner_street": {
"parser": "regex",
"area": {
"f": 1,
"l": 1,
"x": 0,
"y": 0,
"r": 100,
"W": 270,
"H": 240
},
"regex": "^(\\w+.*\\s\\d+)\\n"
},
"telephone": {
"parser": "regex",
"area": {
"f": 1,
"l": 1,
"x": 0,
"y": 0,
"r": 100,
"W": 270,
"H": 240
},
"regex": "Tel[.][:]\\s+(.+)"
},
"partner_email": {
"parser": "regex",
"area": {
"f": 1,
"l": 1,
"x": 0,
"y": 0,
"r": 100,
"W": 270,
"H": 240
},
"regex": "\\s+(\\w+[@]\\w+[.]nl)\\s"
},
"partner_coc": {
"parser": "regex",
"regex": "C[.]o[.]C[.]\\snr[.](\\d{8})"
},
"lines": {
"parser": "lines",
"start": "Datum\\s+Omschrijving",
"end": "\\s+Totaal",
"line": "^\\s{37}(?P<date_start>\\d{2}[-]\\d{2}[-]\\d{2})\\s{13}(?P<name>((?:\\w+\\s){0,2}))\\s{80,94}(?P<price_subtotal>-?\\d*[.]?\\d+[.,]\\d{2})",
"types": {
"price_unit": "float",
"price_subtotal": "float"
}
},
"payment_method": [
"(?i)(American Express)",
"(VISA)",
"(MCARD)",
"(CONTANT)",
"(KAS):\\s.\\s\\d+\\.\\d+"
],
"payment_reference": {
"parser": "regex",
"regex": "Transaction\\sID\\s+(\\d{9})"
}
},
"keywords": [
"(?i)Novotel",
"REKENING",
"Rhine"
],
"options": {
"currency": "EUR",
"date_formats": [
"%d/%m/%Y"
],
"languages": [
"nl"
],
"priority": 4,
"decimal_separator": "."
}
}
115 changes: 115 additions & 0 deletions src/invoice2data/extract/templates/nl/nl.agrisneltank.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"issuer": "Agrisneltank B.V.",
"fields": {
"amount": "TOTAAL\\s+.?€\\s+(\\d{1,3}.\\d{2})",
"amount_untaxed": "Netto\\s+€\\s+(\\d{1,3}.\\d{2})",
"date": "Datum\\s+(\\d{1,2}-\\d{2}-\\d{4})",
"invoice_number": "ketnummer\\s+(\\d+)",
"static_vat": "NL862414489B01",
"partner_name": "(?i)(Agrisneltank B.V.)",
"country_code": {
"parser": "regex",
"regex": "(?i)nl",
"group": "first"
},
"partner_website": {
"parser": "regex",
"regex": "agrisneltank.nl",
"group": "first"
},
"payment_method": [
"(MASTERCARD)",
"(VISA)",
"(MAESTRO)",
"(CONTANT)",
"(KAS):\\s.\\s\\d+\\.\\d+"
]
},
"lines": {
"start": "(?i)BON",
"end": "(Netto\\s|\\Z)",
"first_line": "(?i)(?P<name>POMP\\s+\\d+)\\s+(?P<product>\\w+)",
"line": [
"(?i)Volume.*\\s+(?P<qty>\\d+[,.]\\d+)\\s?(?P<uom>[l|ℓ|L])?",
"(?i)Prijs\\s+[E|€] (?P<price_unit>\\d.\\d{2,3})\\s*[/ ]?(?P<uom>[l|ℓ|L])?",
"(?i)B.W\\s+(?P<line_tax_percent>\\d{2}[,.]\\d{2})\\s+[%]\\s+[E|€]?\\s+(?P<line_tax_amount>\\d+[,.]\\d{2})?"
],
"types": {
"qty": "float",
"price_unit": "float",
"line_tax_percent": "float",
"line_tax_amount": "float"
}
},
"keywords": [
"(?i)Agri",
"(?i)nl",
"",
"NL862414489B01"
],
"options": {
"currency": "EUR",
"languages": [
"nl"
],
"decimal_separator": ",",
"replace": [
[
"é",
""
],
[
"L.p.9.",
"LPG"
],
[
"L.P.G.",
"LPG"
],
[
"L.P.9.",
"LPG"
],
[
"LPLG",
"LPG"
],
[
"Contant",
"CONTANT"
],
[
"\\s[l|&|@]",
""
],
[
"([0-9]{2,3})[ /][2]",
"\\1 /ℓ"
],
[
"B.W",
"BTW"
],
[
" - ",
"-"
],
[
"agrisneltank\\s+[.]?nl",
"agrisneltank.nl"
],
[
"Kuwait\\s+Petroleum\\s+Ned",
"Kuwait Petroleum Nederland B.V."
],
[
"€ ([0-9]+) ([0-9]{2})(\\s)",
"\\1,\\2\\3"
],
[
"(\\s)([0-9]+)\\.([0-9]{2,3})",
"\\1\\2,\\3"
]
]
}
}
Loading

0 comments on commit fcea9d3

Please sign in to comment.