Skip to content

Commit

Permalink
Fix pytest errors
Browse files Browse the repository at this point in the history
  • Loading branch information
us8945 committed Sep 21, 2024
1 parent 43742d0 commit dc50e3a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def sanity_check(cls, df: pd.DataFrame, cfg: Any, mode: str = "train"):
):
assert (df[cfg.dataset.parent_id_column] != df["id"]).all(), (
f"Parent id column:{cfg.dataset.parent_id_column}"
"is the same as id column for some rows"
" is the same as id column for some rows"
)
assert (df[cfg.dataset.parent_id_column].fillna("") == "").sum() > 0, (
"Did not find any conversation chain. "
Expand Down
19 changes: 16 additions & 3 deletions tests/src/datasets/test_text_causal_language_modeling_ds.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from unittest import mock
from unittest.mock import MagicMock, patch

Expand Down Expand Up @@ -89,7 +90,8 @@ def test_sanity_check_raises_error():
}
)
with pytest.raises(
AssertionError, match="Parent id column is the same as id column for some rows"
AssertionError,
match=r"Parent id column:.* is the same as id column for some rows",
):
CustomDataset.sanity_check(invalid_df_1, mock_config)

Expand All @@ -102,8 +104,19 @@ def test_sanity_check_raises_error():
)
with pytest.raises(
AssertionError,
match="Did not find any conversation start. "
"Please ensure that some parent ids are empty.",
match=re.escape(
"Did not find any conversation chain. "
"Please ensure that some parent ids are empty."
"\n"
"Conversations are chained using parent id, "
"start conversation record should "
"not have parent id populated"
"\n"
"Parent id column checked:parent_id"
"\n"
"Number of records with empty <parent_id>"
"column:{(df[cfg.dataset.parent_id_column].fillna('') == '').sum()}"
),
):
CustomDataset.sanity_check(invalid_df_2, mock_config)

Expand Down

0 comments on commit dc50e3a

Please sign in to comment.