-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
84 lines (71 loc) · 2.54 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/python3
# SPDX-FileCopyrightText: 2020 Nicolás Alvarez <[email protected]>
#
# SPDX-License-Identifier: MIT
import tempfile
import json
import logging
import os
import request
import pdfconvert
import textparser
logging.getLogger().setLevel(logging.INFO)
aws = os.getenv('LAMBDA_TASK_ROOT')
if aws:
import boto3
s3 = boto3.client('s3')
def get_file(path):
logging.debug('reading from S3 file {}'.format(path))
obj = s3.get_object(Bucket='nicolas17', Key=path)
data = obj['Body'].read()
return data
def put_file(path, data, public=False):
if public:
acl='public-read'
else:
acl='private'
logging.debug('uploading to S3 key {} with ACL {}'.format(path, acl))
s3.put_object(Bucket='nicolas17', Key=path, ACL=acl, ContentType='application/json', Body=data)
else:
def get_file(path):
logging.debug('reading from local file {}'.format(path))
with open(path, 'rb') as f:
return f.read()
def put_file(path, data, public=False):
logging.debug('writing to local file {}'.format(path))
with open(path, 'wb') as f:
f.write(data)
def handler(event, context):
logging.info("Downloading list")
pdf_urls = list(request.get_pdfs())
pdf_url, pdf_date = pdf_urls[0]
logging.info("PDF date: %s", pdf_date)
logging.info("PDF URL: %s", pdf_url)
with tempfile.NamedTemporaryFile(suffix='.pdf') as f:
request.download_file(pdf_url, f)
logging.info("File downloaded into %s", f.name)
text = pdfconvert.text_from_pdf(f.name)
logging.info("Converted into %d bytes of text", len(text))
report = textparser.parse(text)
output = {
'cases': report.cases,
'deaths': report.deaths,
'new_cases_today': report.new_cases,
'date': pdf_date.strftime('%Y-%m-%d'),
'source_url': pdf_url
}
output_json = json.dumps(output).encode('utf8')
if report.cases is None or report.deaths is None:
logging.warning("Couldn't get main data: {}".format(report))
return {'result': output, 'failed': 'main data was missing'}
logging.info("Getting current file")
current_data = get_file('covid-ar.json')
if current_data == output_json:
logging.info("File is already up to date")
else:
logging.info("Storing new file")
put_file('covid-ar.json', data=output_json, public=True)
logging.info("Done!")
return {'result': output}
if __name__ == '__main__':
print(handler({},{}))