From 1ae80ab93bd6b055b7f2438e9757232af8e90fa5 Mon Sep 17 00:00:00 2001 From: bosd Date: Thu, 23 Feb 2023 19:32:44 +0100 Subject: [PATCH] pdplumber raise importerror --- src/invoice2data/input/pdfplumber.py | 3 ++- src/invoice2data/main.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/invoice2data/input/pdfplumber.py b/src/invoice2data/input/pdfplumber.py index a2e53500..f48e32e7 100644 --- a/src/invoice2data/input/pdfplumber.py +++ b/src/invoice2data/input/pdfplumber.py @@ -19,7 +19,8 @@ def to_text(path): try: import pdfplumber except ImportError: - logger.debug("Cannot import pdfplumber") + logger.error("Cannot import pdfplumber") + raise ImportError("Cannot import pdfplumber") raw_text = "" raw_text = raw_text.encode(encoding='UTF-8') diff --git a/src/invoice2data/main.py b/src/invoice2data/main.py index cea046ce..24f5536a 100644 --- a/src/invoice2data/main.py +++ b/src/invoice2data/main.py @@ -86,7 +86,11 @@ def extract_data(invoicefile, templates=None, input_module=None): else: input_module = pdftotext - extracted_str = input_module.to_text(invoicefile) + try: + extracted_str = input_module.to_text(invoicefile) + except Exception as e: + logger.error("Error has occured %s", e) + return False if not isinstance(extracted_str, str) or not extracted_str.strip(): logger.error("Failed to extract text from %s using %s", invoicefile, input_module.__name__) return False @@ -218,7 +222,11 @@ def main(args=None): templates += read_templates() output = [] for f in args.input_files: - res = extract_data(f.name, templates=templates, input_module=input_module) + try: + res = extract_data(f.name, templates=templates, input_module=input_module) + except Exception as e: + logger.error("Error has occured %s", e) + continue if res: logger.info(res) output.append(res)