Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add warnings that were suggested from metadata bughunt #2203

Merged
merged 3 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sdv/metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def _set_metadata_dict(self, metadata, single_table_name=None):
else:
if single_table_name is None:
single_table_name = self.DEFAULT_SINGLE_TABLE_NAME
warnings.warn(
'Did not assign a table name to single table. '
lajohn4747 marked this conversation as resolved.
Show resolved Hide resolved
f'Assigning name: {single_table_name}'
)
self.tables[single_table_name] = SingleTableMetadata.load_from_dict(metadata)

def _get_single_table_name(self):
Expand Down
7 changes: 6 additions & 1 deletion sdv/metadata/multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,8 @@ def update_columns(self, table_name, column_names, **kwargs):
**kwargs:
Any key word arguments that describe metadata for the columns.
"""
if not isinstance(column_names, list):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wouldn't work for tuples and other list-like objects, something like pandas.api.types is_list_like could handle those.

If that's expected then you can leave as is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's expected. Don't imagine a user putting in something more complicated than a list of strings and it's easy enough to convert.

raise InvalidMetadataError('Please pass in a list to column_names arg.')
self._validate_table_exists(table_name)
table = self.tables.get(table_name)
table.update_columns(column_names, **kwargs)
Expand Down Expand Up @@ -843,6 +845,9 @@ def validate_data(self, data):
A warning is being raised if ``datetime_format`` is missing from a column represented
as ``object`` in the dataframe and its sdtype is ``datetime``.
"""
if not isinstance(data, dict):
raise InvalidMetadataError('Please pass in a dictionary mapping tables to dataframes.')
lajohn4747 marked this conversation as resolved.
Show resolved Hide resolved

errors = []
errors += self._validate_missing_tables(data)
errors += self._validate_all_tables(data)
Expand Down Expand Up @@ -880,7 +885,7 @@ def get_column_names(self, table_name, **kwargs):

Args:
table_name (str):
The name of the table to get column names for.s
The name of the table to get column names for.
**kwargs:
Metadata keywords to filter on, for example sdtype='id' or pii=True.

Expand Down
3 changes: 2 additions & 1 deletion sdv/multi_table/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def _initialize_models(self):
with disable_single_table_logger():
for table_name, table_metadata in self.metadata.tables.items():
synthesizer_parameters = self._table_parameters.get(table_name, {})
metadata = Metadata.load_from_dict(table_metadata.to_dict())
metadata_dict = {'tables': {table_name: table_metadata.to_dict()}}
metadata = Metadata.load_from_dict(metadata_dict)
Comment on lines +80 to +81
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was this crashing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it was causing the single table metadata name to be missing since it gave it a default table (it was in a SingleTableMetadata format)

self._table_synthesizers[table_name] = self._synthesizer(
metadata=metadata, locales=self.locales, **synthesizer_parameters
)
Expand Down
37 changes: 37 additions & 0 deletions tests/unit/metadata/test_multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3064,3 +3064,40 @@ def test_anonymize(self, mock_load):
'parent_primary_key': 'col1',
'child_foreign_key': 'col2',
}

def test_update_columns_no_list_error(self):
# Setup
lajohn4747 marked this conversation as resolved.
Show resolved Hide resolved
metadata = MultiTableMetadata()
metadata.add_table('table')
metadata.add_column('table', 'col1', sdtype='numerical')

error_msg = re.escape('Please pass in a list to column_names arg.')
# Run and Assert
with pytest.raises(InvalidMetadataError, match=error_msg):
metadata.update_columns('table', 'col1', sdtype='categorical')

def test_validate_data_without_dict(self):
# Setup
metadata = MultiTableMetadata.load_from_dict({
'tables': {
'table_1': {
'columns': {
'col_1': {'sdtype': 'numerical'},
'col_2': {'sdtype': 'categorical'},
'latitude': {'sdtype': 'latitude'},
'longitude': {'sdtype': 'longitude'},
}
}
}
})
data = pd.DataFrame({
'col_1': [1, 2, 3],
'col_2': ['a', 'b', 'c'],
'latitude': [1, 2, 3],
'longitude': [1, 2, 3],
})
error_msg = re.escape('Please pass in a dictionary mapping tables to dataframes.')

# Run and Assert
with pytest.raises(InvalidMetadataError, match=error_msg):
metadata.validate_data(data)
Loading