diff --git a/README.rst b/README.rst index 078aa1e1..3cbd8585 100644 --- a/README.rst +++ b/README.rst @@ -67,7 +67,7 @@ Choose any of the following input readers: - tesseract ``invoice2data --input-reader tesseract invoice.pdf`` - pdf miner ``invoice2data --input-reader pdfminer invoice.pdf`` - tesseract4 ``invoice2data --input-reader tesseract4 invoice.pdf`` - - gvision ``invoice2data --input-reader gvision invoice.pdf`` (needs ``GOOGLE_APPLICATION_CREDENTIALS`` env var) + - gvision ``invoice2data --input-reader gvision invoice.pdf`` (needs ``GOOGLE_APPLICATION_CREDENTIALS`` and a Google Cloud Bucket name. The bucket name can be set as an argument to the function ``to_text`` or as an Environment variable named ``GOOGLE_CLOUD_BUCKET_NAME`` ) Choose any of the following output formats: diff --git a/src/invoice2data/input/gvision.py b/src/invoice2data/input/gvision.py index cfb48b49..cb75d5c7 100644 --- a/src/invoice2data/input/gvision.py +++ b/src/invoice2data/input/gvision.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -def to_text(path, bucket_name='cloud-vision-84893', language='fr'): +def to_text(path, bucket_name=None, language='fr'): """Sends PDF files to Google Cloud Vision for OCR. Before using invoice2data, make sure you have the auth json path set as @@ -28,6 +28,14 @@ def to_text(path, bucket_name='cloud-vision-84893', language='fr'): # Supported mime_types are: 'application/pdf' and 'image/tiff' mime_type = 'application/pdf' + if bucket_name is None: + bucket_name = os.getenv('GOOGLE_CLOUD_BUCKET_NAME', None) + + if bucket_name is None: + raise EnvironmentError( + 'No Google Cloud Bucket name set.\n Set it as an input variable or as an environment variable named GOOGLE_CLOUD_BUCKET_NAME' + ) + path_dir, filename = os.path.split(path) result_blob_basename = filename.replace('.pdf', '').replace('.PDF', '') result_blob_name = result_blob_basename + '/output-1-to-1.json' diff --git a/tests/test_cli.py b/tests/test_cli.py index 40f908f7..0741185f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -133,7 +133,7 @@ def test_copy(self): i += 1 shutil.rmtree('tests/copy_test/', ignore_errors=True) - self.assertEqual(i, len(get_sample_files('.json'))) + self.assertEqual(i, len(get_sample_files('.pdf'))) ''' if i != len(self._get_test_file_json_path()): print(i)