Fix lint

sdv-dev · Sep 17, 2024 · dae1d47 · dae1d47
1 parent e94a4bf
commit dae1d47
Show file tree

Hide file tree

Showing 6 changed files with 41 additions and 50 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -185,6 +185,7 @@ exclude = [
     ".tox",
     ".git",
     "__pycache__",
+    "*.ipynb",
     ".ipynb_checkpoints",
     "tasks.py",
 ]

diff --git a/tests/_external/gdrive_utils.py b/tests/_external/gdrive_utils.py
@@ -67,7 +67,7 @@ def get_latest_file(folder_id):
     drive_query = drive.ListFile({
         'q': f"'{folder_id}' in parents and trashed=False",
         'orderBy': 'modifiedDate desc',
-        'maxResults': 1
+        'maxResults': 1,
     })
     file_list = drive_query.GetList()
     if len(file_list) > 0:

diff --git a/tests/_external/slack_utils.py b/tests/_external/slack_utils.py
@@ -34,9 +34,7 @@ def post_slack_message(channel, text):
     response = client.chat_postMessage(channel=channel, text=text)
     if not response['ok']:
         error = response.get('error', 'unknown_error')
-        msg = (
-            f'{error} occured trying to post message to {channel}'
-        )
+        msg = f'{error} occured trying to post message to {channel}'
         raise RuntimeError(msg)
 
     return response
diff --git a/tests/benchmark/pyarrow_dtypes.py b/tests/benchmark/pyarrow_dtypes.py
@@ -44,77 +44,75 @@
         'pa.utf8': pd.Series(['A', 'B', None, 'C'], dtype=pd.ArrowDtype(pa.utf8()))
     }),
     'pa.binary': pd.DataFrame({
-        'pa.binary': pd.Series([
-            b'binary1', b'binary2', None, b'binary3'], dtype=pd.ArrowDtype(pa.binary()))
+        'pa.binary': pd.Series(
+            [b'binary1', b'binary2', None, b'binary3'], dtype=pd.ArrowDtype(pa.binary())
+        )
     }),
     'pa.large_binary': pd.DataFrame({
-        'pa.large_binary': pd.Series([
-            b'large_binary1',
-            b'large_binary2',
-            None,
-            b'large_binary3'
-        ], dtype=pd.ArrowDtype(pa.large_binary()))
+        'pa.large_binary': pd.Series(
+            [b'large_binary1', b'large_binary2', None, b'large_binary3'],
+            dtype=pd.ArrowDtype(pa.large_binary()),
+        )
     }),
     'pa.large_string': pd.DataFrame({
         'pa.large_string': pd.Series(['A', 'B', None, 'C'], dtype=pd.ArrowDtype(pa.large_string()))
     }),
     'pa.date32': pd.DataFrame({
         'pa.date32': pd.Series(
             [pd.Timestamp('2023-01-01'), pd.Timestamp('2024-01-01'), None],
-            dtype=pd.ArrowDtype(pa.date32())
+            dtype=pd.ArrowDtype(pa.date32()),
         )
     }),
     'pa.date64': pd.DataFrame({
         'pa.date64': pd.Series(
             [pd.Timestamp('2023-01-01'), pd.Timestamp('2024-01-01'), None],
-            dtype=pd.ArrowDtype(pa.date64())
+            dtype=pd.ArrowDtype(pa.date64()),
         )
     }),
     'pa.timestamp': pd.DataFrame({
         'pa.timestamp': pd.Series(
-            [
-                pd.Timestamp('2023-01-01T00:00:00'),
-                pd.Timestamp('2024-01-01T00:00:00'),
-                None
-            ],
-            dtype=pd.ArrowDtype(pa.timestamp('ms'))
+            [pd.Timestamp('2023-01-01T00:00:00'), pd.Timestamp('2024-01-01T00:00:00'), None],
+            dtype=pd.ArrowDtype(pa.timestamp('ms')),
         )
     }),
     'pa.duration': pd.DataFrame({
         'pa.duration': pd.Series(
             [pd.Timedelta(days=1), pd.Timedelta(hours=2), None],
-            dtype=pd.ArrowDtype(pa.duration('s'))
+            dtype=pd.ArrowDtype(pa.duration('s')),
         )
     }),
     'pa.time32': pd.DataFrame({
-        'pa.time32': pd.Series([
-            pd.Timestamp('2023-01-01T01:00:00').time(),
-            pd.Timestamp('2023-01-01T02:00:00').time(),
-            None
-        ], dtype=pd.ArrowDtype(pa.time32('s'))
+        'pa.time32': pd.Series(
+            [
+                pd.Timestamp('2023-01-01T01:00:00').time(),
+                pd.Timestamp('2023-01-01T02:00:00').time(),
+                None,
+            ],
+            dtype=pd.ArrowDtype(pa.time32('s')),
         )
     }),
     'pa.time64': pd.DataFrame({
-        'pa.time64': pd.Series([
-            pd.Timestamp('2023-01-01T01:00:00').time(),
-            pd.Timestamp('2023-01-01T02:00:00').time(),
-            None
-        ], dtype=pd.ArrowDtype(pa.time64('ns')))
+        'pa.time64': pd.Series(
+            [
+                pd.Timestamp('2023-01-01T01:00:00').time(),
+                pd.Timestamp('2023-01-01T02:00:00').time(),
+                None,
+            ],
+            dtype=pd.ArrowDtype(pa.time64('ns')),
+        )
     }),
     'pa.binary_view': pd.DataFrame({
         'pa.binary_view': pd.Series(
             [b'view1', b'view2', None, b'view3'], dtype=pd.ArrowDtype(pa.binary())
         )
     }),
     'pa.string_view': pd.DataFrame({
-        'pa.string_view': pd.Series(
-            ['A', 'B', None, 'C'], dtype=pd.ArrowDtype(pa.string())
-        )
+        'pa.string_view': pd.Series(['A', 'B', None, 'C'], dtype=pd.ArrowDtype(pa.string()))
     }),
     'pa.decimal128': pd.DataFrame({
         'pa.decimal128': pd.Series(
             [decimal.Decimal('123.45'), decimal.Decimal('678.90'), None],
-            dtype=pd.ArrowDtype(pa.decimal128(precision=10, scale=2))
+            dtype=pd.ArrowDtype(pa.decimal128(precision=10, scale=2)),
         )
-    })
+    }),
 }
diff --git a/tests/benchmark/supported_dtypes_benchmark.py b/tests/benchmark/supported_dtypes_benchmark.py
@@ -97,7 +97,6 @@
     'pd.timedelta64': 'datetime',
     'pd.Period': 'datetime',
     'pd.Complex': 'numerical',
-
     # NumPy
     'np.int8': 'numerical',
     'np.int16': 'numerical',
@@ -118,7 +117,6 @@
     'np.bool': 'categorical',
     'np.string': 'categorical',
     'np.unicode': 'categorical',
-
     # PyArrow
     'pa.int8': 'numerical',
     'pa.int16': 'numerical',
@@ -294,7 +292,7 @@ def test_fit_and_sample_synthesizer(dtype, data):
     save_results_to_json({
         'dtype': dtype,
         'SYNTHESIZER_FIT': fit_result,
-        'SYNTHESIZER_SAMPLE': sample_result
+        'SYNTHESIZER_SAMPLE': sample_result,
     })
     fit_assertion_message = f"{dtype} is no longer supported by 'SYNTHESIZER_FIT'."
     if fit_result is False:
@@ -475,7 +473,7 @@ def test_fit_and_sample_single_column_constraints(constraint_name, constraint, d
     save_results_to_json({
         'dtype': dtype,
         f'{constraint_name}_FIT': fit_result,
-        f'{constraint_name}_SAMPLE': sample_result
+        f'{constraint_name}_SAMPLE': sample_result,
     })
     if fit_result is False:
         fit_assertion_message = f"{dtype} is no longer supported by '{constraint_name}_FIT''."
@@ -553,7 +551,7 @@ def test_fit_and_sample_multi_column_constraints(constraint_name, constraint, dt
     save_results_to_json({
         'dtype': dtype,
         f'{constraint_name}_FIT': fit_result,
-        f'{constraint_name}_SAMPLE': sample_result
+        f'{constraint_name}_SAMPLE': sample_result,
     })
     if fit_result is False:
         assert fit_result == previous_fit_result, f"{dtype} failed during '{constraint_name}_FIT'."

diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py
@@ -58,7 +58,7 @@ def _get_output_filename():
 
 def compare_previous_result_with_current(args):
     """Compare the previous result with the current and post a message on slack."""
-    output_filename = output_filename or TEMPRESULTS
+    output_filename = TEMPRESULTS
     current_results = _load_temp_results(output_filename)
     output_filename = _get_output_filename()
     csv_output = Path(f'results/{PYTHON_VERSION}.csv')
@@ -127,19 +127,15 @@ def _get_parser():
     action.required = True
 
     # Compare with previous results
-    compare = action.add_parser(
-        'compare',
-        help='Compare previous results to the current one.'
-    )
+    compare = action.add_parser('compare', help='Compare previous results to the current one.')
     compare.set_defaults(action=compare_previous_result_with_current)
 
     # Command Line package creation
-    upload = action.add_parser(
-        'upload',
-        help='Upload a new spreadsheet with the results.'
-    )
+    upload = action.add_parser('upload', help='Upload a new spreadsheet with the results.')
 
     upload.set_defaults(action=store_results_in_gdrive)
+    return parser
+
 
 if __name__ == '__main__':
     parser = _get_parser()