Skip to content

Commit

Permalink
Style changes
Browse files Browse the repository at this point in the history
  • Loading branch information
us8945 committed Sep 20, 2024
1 parent 8805ff6 commit 43742d0
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
7 changes: 3 additions & 4 deletions llm_studio/app_utils/sections/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,12 +706,11 @@ async def dataset_import(
sanity_check(cfg)

except AssertionError as exception:
logger.error(
f"Error while validating data: {exception}", exc_info=True
)
logger.error(f"Error while validating data: {exception}", exc_info=True)
text = (
"# Error while validating data\n"
"Please go back and verify whether the problem type and other settings were set properly.\n"
"Please go back and verify whether the problem type and other "
"settings were set properly.\n"
"\n"
"**Details of the Validation Error**:\n"
f"```\n{exception}\n```"
Expand Down
14 changes: 9 additions & 5 deletions llm_studio/src/datasets/text_causal_language_modeling_ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,18 +336,22 @@ def sanity_check(cls, df: pd.DataFrame, cfg: Any, mode: str = "train"):
and cfg.dataset.parent_id_column in df.columns
and "id" in df.columns
):
assert (
df[cfg.dataset.parent_id_column] != df["id"]
).all(), f"Parent id column:{cfg.dataset.parent_id_column} is the same as id column for some rows"
assert (df[cfg.dataset.parent_id_column] != df["id"]).all(), (
f"Parent id column:{cfg.dataset.parent_id_column}"
"is the same as id column for some rows"
)
assert (df[cfg.dataset.parent_id_column].fillna("") == "").sum() > 0, (
"Did not find any conversation chain. "
"Please ensure that some parent ids are empty."
"\n"
"Conversations are chained using parent id, start conversation record should not have parent id populated"
"Conversations are chained using parent id, "
"start conversation record should "
"not have parent id populated"
"\n"
f"Parent id column checked:{cfg.dataset.parent_id_column}"
"\n"
f"Number of records with empty <{cfg.dataset.parent_id_column}> column:{(df[cfg.dataset.parent_id_column].fillna('') == '').sum()}"
f"Number of records with empty <{cfg.dataset.parent_id_column}>"
"column:{(df[cfg.dataset.parent_id_column].fillna('') == '').sum()}"
)

assert cfg.dataset.answer_column in df.columns, (
Expand Down

0 comments on commit 43742d0

Please sign in to comment.