-
Notifications
You must be signed in to change notification settings - Fork 303
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ecbc660
commit e270e16
Showing
4 changed files
with
132 additions
and
115 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""External utility functions.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
"""Google Drive utils.""" | ||
|
||
import io | ||
import json | ||
import os | ||
import pathlib | ||
import tempfile | ||
from functools import lru_cache | ||
|
||
import pandas as pd | ||
import yaml | ||
from pydrive.auth import GoogleAuth | ||
from pydrive.drive import GoogleDrive | ||
|
||
PYDRIVE_CREDENTIALS = 'PYDRIVE_CREDENTIALS' | ||
|
||
|
||
def _get_drive_client(): | ||
tmp_credentials = os.getenv(PYDRIVE_CREDENTIALS) | ||
if not tmp_credentials: | ||
gauth = GoogleAuth() | ||
gauth.LocalWebserverAuth() | ||
else: | ||
with tempfile.TemporaryDirectory() as tempdir: | ||
credentials_file_path = pathlib.Path(tempdir) / 'credentials.json' | ||
credentials_file_path.write_text(tmp_credentials) | ||
|
||
credentials = json.loads(tmp_credentials) | ||
|
||
settings = { | ||
'client_config_backend': 'settings', | ||
'client_config': { | ||
'client_id': credentials['client_id'], | ||
'client_secret': credentials['client_secret'], | ||
}, | ||
'save_credentials': True, | ||
'save_credentials_backend': 'file', | ||
'save_credentials_file': str(credentials_file_path), | ||
'get_refresh_token': True, | ||
} | ||
settings_file = pathlib.Path(tempdir) / 'settings.yaml' | ||
settings_file.write_text(yaml.safe_dump(settings)) | ||
|
||
gauth = GoogleAuth(str(settings_file)) | ||
gauth.LocalWebserverAuth() | ||
|
||
return GoogleDrive(gauth) | ||
|
||
|
||
@lru_cache() | ||
def read_excel(file_id): | ||
"""Read a file as an XLSX from Google Drive. | ||
Args: | ||
file_id (str): | ||
The ID of the file to load. | ||
Returns: | ||
pd.DataFrame or dict[pd.DataFrame]: | ||
A DataFrame containing the body of file if single sheet else dict of DataFrames one for | ||
each sheet | ||
""" | ||
client = _get_drive_client() | ||
drive_file = client.CreateFile({'id': file_id}) | ||
xlsx_mime = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' | ||
drive_file.FetchContent(mimetype=xlsx_mime) | ||
return pd.read_excel(drive_file.content, sheet_name=None) | ||
|
||
|
||
def _set_column_width(writer, results, sheet_name): | ||
for column in results: | ||
column_width = max(results[column].astype(str).map(len).max(), len(column)) | ||
col_idx = results.columns.get_loc(column) | ||
writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width + 2) | ||
|
||
|
||
def save_to_gdrive(output_folder, results, output_filename=None): | ||
"""Save a ``DataFrame`` to google drive folder as ``xlsx`` (spreadsheet). | ||
Given the output folder id (google drive folder id), store the given ``results`` as | ||
``spreadsheet``. If not ``output_filename`` is given, the spreadsheet is saved with the | ||
current date and commit as name. | ||
Args: | ||
output_folder (str): | ||
String representing a google drive folder id. | ||
results (pd.DataFrame or dict[pd.DataFrame]): | ||
Dataframe to be stored as ``xlsx``, or dictionary mapping sheet names to dataframes for | ||
storage in one ``xlsx`` file. | ||
output_filename (str, optional): | ||
String representing the filename to be used for the results spreadsheet. If None, | ||
uses to the current date and commit as the name. Defaults to None. | ||
Returns: | ||
str: | ||
Google drive file id of uploaded file. | ||
""" | ||
output = io.BytesIO() | ||
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: # pylint: disable=E0110 | ||
for sheet_name, data in results.items(): | ||
data.to_excel(writer, sheet_name=sheet_name, index=False) | ||
_set_column_width(writer, data, sheet_name) | ||
|
||
file_config = {'title': output_filename, 'parents': [{'id': output_folder}]} | ||
drive = _get_drive_client() | ||
drive_file = drive.CreateFile(file_config) | ||
drive_file.content = output | ||
drive_file.Upload({'convert': True}) | ||
return drive_file['id'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters