From c1285c3f9be10d8a326b3f46605fdb924ac94ff8 Mon Sep 17 00:00:00 2001 From: Reinier Koops Date: Thu, 28 Mar 2024 06:26:59 +0100 Subject: [PATCH] Add explicit state setting (#242) Set the random states explicitly. Tasks: - [x] Adjust the code where "sample" does not use random_state - [x] Adjust the test code for it - [x] Make sure the tests use it consistently. - [x] Look if we can remove some unnecessary checks as mentioned in this issue: https://github.com/ing-bank/probatus/issues/221 --- .../feature_elimination.py | 30 +- probatus/interpret/model_interpret.py | 16 +- probatus/interpret/shap_dependence.py | 21 +- .../sample_similarity/resemblance_model.py | 24 +- probatus/utils/__init__.py | 2 +- probatus/utils/arrayfuncs.py | 11 +- .../{interface.py => base_class_interface.py} | 0 probatus/utils/shap_helpers.py | 16 +- pyproject.toml | 1 + tests/conftest.py | 158 +++++++++- .../test_feature_elimination.py | 276 ++++++++---------- tests/interpret/test_model_interpret.py | 94 +----- tests/interpret/test_shap_dependence.py | 46 +-- .../test_resemblance_model.py | 91 ++---- tests/utils/test_base_class.py | 32 ++ tests/utils/test_utils_array_funcs.py | 12 +- 16 files changed, 428 insertions(+), 402 deletions(-) rename probatus/utils/{interface.py => base_class_interface.py} (100%) create mode 100644 tests/utils/test_base_class.py diff --git a/probatus/feature_elimination/feature_elimination.py b/probatus/feature_elimination/feature_elimination.py index c83416ed..5812c5d4 100644 --- a/probatus/feature_elimination/feature_elimination.py +++ b/probatus/feature_elimination/feature_elimination.py @@ -7,6 +7,7 @@ from sklearn.base import clone, is_classifier, is_regressor from sklearn.model_selection import check_cv from sklearn.model_selection._search import BaseSearchCV +from loguru import logger from probatus.utils import ( BaseFitComputePlotClass, @@ -156,9 +157,8 @@ def __init__( Controls verbosity of the output: - 0 - neither prints nor warnings are shown - - 1 - 50 - only most important warnings - - 51 - 100 - shows other warnings and prints - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 1 - only most important warnings + - 2 - shows all prints and all warnings. random_state (int, optional): Random state set at each round of feature elimination. If it is None, the results will not be @@ -395,7 +395,7 @@ def _get_feature_shap_values_per_fold( score_val = self.scorer.scorer(clf, X_val, y_val) # Compute SHAP values - shap_values = shap_calc(clf, X_val, verbose=self.verbose, **shap_kwargs) + shap_values = shap_calc(clf, X_val, verbose=self.verbose, random_state=self.random_state, **shap_kwargs) return shap_values, score_train, score_val def fit( @@ -537,7 +537,7 @@ def fit( self.min_features_to_select = 0 # This ensures that, if columns_to_keep is provided , # the last features remaining are only the columns_to_keep. - if self.verbose > 50: + if self.verbose > 1: warnings.warn(f"Minimum features to select : {stopping_criteria}") while len(current_features_set) > stopping_criteria: @@ -615,8 +615,8 @@ def fit( val_metric_mean=np.mean(scores_val), val_metric_std=np.std(scores_val), ) - if self.verbose > 50: - print( + if self.verbose > 1: + logger.info( f"Round: {round_number}, Current number of features: {len(current_features_set)}, " f'Current performance: Train {self.report_df.loc[round_number]["train_metric_mean"]} ' f'+/- {self.report_df.loc[round_number]["train_metric_std"]}, CV Validation ' @@ -841,8 +841,8 @@ def _get_best_num_features(self, best_method, standard_error_threshold=1.0): ) # Log shap_report for users who want to inspect / debug - if self.verbose > 50: - print(shap_report) + if self.verbose > 1: + logger.info(shap_report) return best_num_features @@ -1110,10 +1110,9 @@ def __init__( verbose (int, optional): Controls verbosity of the output: - - 0 - nether prints nor warnings are shown - - 1 - 50 - only most important warnings - - 51 - 100 - shows other warnings and prints - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 0 - neither prints nor warnings are shown + - 1 - only most important warnings + - 2 - shows all prints and all warnings. random_state (int, optional): Random state set at each round of feature elimination. If it is None, the results will not be @@ -1210,7 +1209,8 @@ def _get_fit_params_lightGBM( "eval_set": [(X_val, y_val)], "callbacks": [early_stopping(self.early_stopping_rounds, first_metric_only=True)], } - if self.verbose >= 100: + + if self.verbose >= 2: fit_params["callbacks"].append(log_evaluation(1)) else: fit_params["callbacks"].append(log_evaluation(0)) @@ -1505,5 +1505,5 @@ def _get_feature_shap_values_per_fold( score_val = self.scorer.scorer(clf, X_val, y_val) # Compute SHAP values - shap_values = shap_calc(clf, X_val, verbose=self.verbose, **shap_kwargs) + shap_values = shap_calc(clf, X_val, verbose=self.verbose, random_state=self.random_state, **shap_kwargs) return shap_values, score_train, score_val diff --git a/probatus/interpret/model_interpret.py b/probatus/interpret/model_interpret.py index 50cacf59..c1d8a4a1 100644 --- a/probatus/interpret/model_interpret.py +++ b/probatus/interpret/model_interpret.py @@ -80,7 +80,7 @@ class ShapModelInterpreter(BaseFitComputePlotClass): """ - def __init__(self, clf, scoring="roc_auc", verbose=0): + def __init__(self, clf, scoring="roc_auc", verbose=0, random_state=None): """ Initializes the class. @@ -98,13 +98,17 @@ def __init__(self, clf, scoring="roc_auc", verbose=0): Controls verbosity of the output: - 0 - neither prints nor warnings are shown - - 1 - 50 - only most important warnings - - 51 - 100 - shows other warnings and prints - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 1 - only most important warnings + - 2 - shows all prints and all warnings. + + random_state (int, optional): + Random state set for the nr of samples. If it is None, the results will not be reproducible. For + reproducible results set it to an integer. """ self.clf = clf self.scorer = get_single_scorer(scoring) self.verbose = verbose + self.random_state = random_state def fit( self, @@ -186,6 +190,7 @@ def fit( column_names=self.column_names, class_names=self.class_names, verbose=self.verbose, + random_state=self.random_state, **shap_kwargs, ) @@ -200,6 +205,7 @@ def fit( column_names=self.column_names, class_names=self.class_names, verbose=self.verbose, + random_state=self.random_state, **shap_kwargs, ) @@ -212,6 +218,7 @@ def _prep_shap_related_variables( y, approximate=False, verbose=0, + random_state=None, column_names=None, class_names=None, **shap_kwargs, @@ -228,6 +235,7 @@ def _prep_shap_related_variables( X, approximate=approximate, verbose=verbose, + random_state=random_state, return_explainer=True, **shap_kwargs, ) diff --git a/probatus/interpret/shap_dependence.py b/probatus/interpret/shap_dependence.py index e809f89f..5e947e2b 100644 --- a/probatus/interpret/shap_dependence.py +++ b/probatus/interpret/shap_dependence.py @@ -52,7 +52,7 @@ class DependencePlotter(BaseFitComputePlotClass): """ - def __init__(self, clf, verbose=0): + def __init__(self, clf, verbose=0, random_state=None): """ Initializes the class. @@ -64,12 +64,16 @@ def __init__(self, clf, verbose=0): Controls verbosity of the output: - 0 - neither prints nor warnings are shown - - 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings) - - 51 - 100 - shows most important warnings, prints of the feature removal process - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 1 - only most important warnings + - 2 - shows all prints and all warnings. + + random_state (int, optional): + Random state set for the nr of samples. If it is None, the results will not be reproducible. For + reproducible results set it to an integer. """ self.clf = clf self.verbose = verbose + self.random_state = random_state def __repr__(self): """ @@ -113,7 +117,14 @@ def fit(self, X, y, column_names=None, class_names=None, precalc_shap=None, **sh if self.class_names is None: self.class_names = ["Negative Class", "Positive Class"] - self.shap_vals_df = shap_to_df(self.clf, self.X, precalc_shap=precalc_shap, verbose=self.verbose, **shap_kwargs) + self.shap_vals_df = shap_to_df( + self.clf, + self.X, + precalc_shap=precalc_shap, + verbose=self.verbose, + random_state=self.random_state, + **shap_kwargs, + ) self.fitted = True return self diff --git a/probatus/sample_similarity/resemblance_model.py b/probatus/sample_similarity/resemblance_model.py index 2c605a68..dcc268e1 100644 --- a/probatus/sample_similarity/resemblance_model.py +++ b/probatus/sample_similarity/resemblance_model.py @@ -21,6 +21,7 @@ import warnings import matplotlib.pyplot as plt +from loguru import logger import numpy as np import pandas as pd from shap import summary_plot @@ -76,9 +77,8 @@ class is 'roc_auc'. Controls verbosity of the output: - 0 - neither prints nor warnings are shown - - 1 - 50 - only most important warnings - - 51 - 100 - shows other warnings and prints - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 1 - only most important warnings + - 2 - shows all prints and all warnings. random_state (int, optional): Random state set at each round of feature elimination. If it is None, the results will not be @@ -178,8 +178,8 @@ def fit(self, X1, X2, column_names=None, class_names=None): f"Train {self.scorer.metric_name}: {np.round(self.train_score, 3)},\n" f"Test {self.scorer.metric_name}: {np.round(self.test_score, 3)}." ) - if self.verbose > 50: - print(f"Finished model training: \n{self.results_text}") + if self.verbose > 1: + logger.info(f"Finished model training: \n{self.results_text}") if self.verbose > 0: if self.train_score > self.test_score: @@ -343,9 +343,8 @@ class is 'roc_auc'. Controls verbosity of the output: - 0 - neither prints nor warnings are shown - - 1 - 50 - only most important warnings - - 51 - 100 - shows other warnings and prints - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 1 - only most important warnings + - 2 - shows all prints and all warnings. random_state (int, optional): Random state set at each round of feature elimination. If it is None, the results will not be @@ -572,9 +571,8 @@ class is 'roc_auc'. Controls verbosity of the output: - 0 - neither prints nor warnings are shown - - 1 - 50 - only most important warnings - - 51 - 100 - shows other warnings and prints - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 1 - only most important warnings + - 2 - shows all prints and all warnings. random_state (int, optional): Random state set at each round of feature elimination. If it is None, the results will not be @@ -630,7 +628,9 @@ def fit(self, X1, X2, column_names=None, class_names=None, **shap_kwargs): """ super().fit(X1=X1, X2=X2, column_names=column_names, class_names=class_names) - self.shap_values_test = shap_calc(self.clf, self.X_test, verbose=self.verbose, **shap_kwargs) + self.shap_values_test = shap_calc( + self.clf, self.X_test, verbose=self.verbose, random_state=self.random_state, **shap_kwargs + ) self.report = calculate_shap_importance(self.shap_values_test, self.column_names) return self diff --git a/probatus/utils/__init__.py b/probatus/utils/__init__.py index e8db672b..18c31ba1 100644 --- a/probatus/utils/__init__.py +++ b/probatus/utils/__init__.py @@ -36,7 +36,7 @@ assure_list_values_allowed, ) from .plots import plot_distributions_of_feature -from .interface import BaseFitComputeClass, BaseFitComputePlotClass +from .base_class_interface import BaseFitComputeClass, BaseFitComputePlotClass __all__ = [ "NotFittedError", diff --git a/probatus/utils/arrayfuncs.py b/probatus/utils/arrayfuncs.py index 768e4569..fd3377a6 100644 --- a/probatus/utils/arrayfuncs.py +++ b/probatus/utils/arrayfuncs.py @@ -189,9 +189,9 @@ def preprocess_data(X, X_name=None, column_names=None, verbose=0): Controls verbosity of the output: - 0 - neither prints nor warnings are shown - - 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings) - - 51 - 100 - shows most important warnings, prints of the feature removal process - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 1 - only most important warnings + - 2 - shows all prints and all warnings. + Returns: (pd.DataFrame): @@ -255,9 +255,8 @@ def preprocess_labels(y, y_name=None, index=None, verbose=0): Controls verbosity of the output: - 0 - neither prints nor warnings are shown - - 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings) - - 51 - 100 - shows most important warnings, prints of the feature removal process - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 1 - only most important warnings + - 2 - shows all prints and all warnings. Returns: (pd.Series): diff --git a/probatus/utils/interface.py b/probatus/utils/base_class_interface.py similarity index 100% rename from probatus/utils/interface.py rename to probatus/utils/base_class_interface.py diff --git a/probatus/utils/shap_helpers.py b/probatus/utils/shap_helpers.py index 88585224..c14f16fa 100644 --- a/probatus/utils/shap_helpers.py +++ b/probatus/utils/shap_helpers.py @@ -33,6 +33,7 @@ def shap_calc( X, return_explainer=False, verbose=0, + random_state=None, sample_size=100, approximate=False, check_additivity=True, @@ -54,10 +55,13 @@ def shap_calc( verbose (int, optional): Controls verbosity of the output: - - 0 - nether prints nor warnings are shown - - 1 - 50 - only most important warnings - - 51 - 100 - shows other warnings and prints - - above 100 - presents all prints and all warnings (including SHAP warnings). + - 0 - neither prints nor warnings are shown + - 1 - only most important warnings + - 2 - shows all prints and all warnings. + + random_state (int, optional): + Random state set for the nr of samples. If it is None, the results will not be reproducible. For + reproducible results set it to an integer. approximate (boolean): if True uses shap approximations - less accurate, but very fast. It applies to tree-based explainers only. @@ -82,7 +86,7 @@ def shap_calc( ) # Suppress warnings regarding XGboost and Lightgbm models. with warnings.catch_warnings(): - if verbose <= 100: + if verbose <= 1: warnings.simplefilter("ignore") # For tree explainers, do not pass masker when feature_perturbation is @@ -100,7 +104,7 @@ def shap_calc( sample_size = int(np.ceil(X.shape[0] * 0.2)) else: pass - mask = sample(X, sample_size) + mask = sample(X, sample_size, random_state=random_state) explainer = Explainer(model, masker=mask, **shap_kwargs) # For tree-explainers allow for using check_additivity and approximate arguments diff --git a/pyproject.toml b/pyproject.toml index 51eda8bd..630f7f55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ "shap>=0.43.0 ; python_version != '3.8'", "numpy>=1.23.2", "numba>=0.57.0", + "loguru>=0.7.2", ] [project.urls] diff --git a/tests/conftest.py b/tests/conftest.py index 0d54856a..ab3501fc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,51 @@ import pytest from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split +from sklearn.tree import DecisionTreeClassifier +from catboost import CatBoostClassifier +from lightgbm import LGBMClassifier +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import RandomizedSearchCV + + +@pytest.fixture(scope="function") +def random_state(): + """ + Fixture to automatically provide a random state. + """ + RANDOM_STATE = 0 + + return RANDOM_STATE + + +@pytest.fixture(scope="function") +def random_state_42(): + """ + Fixture to automatically provide a random state. + """ + RANDOM_STATE = 42 + + return RANDOM_STATE + + +@pytest.fixture(scope="function") +def random_state_1234(): + """ + Fixture to automatically provide a random state. + """ + RANDOM_STATE = 1234 + + return RANDOM_STATE + + +@pytest.fixture(scope="function") +def random_state_1(): + """ + Fixture to automatically provide a random state. + """ + RANDOM_STATE = 1 + + return RANDOM_STATE @pytest.fixture(scope="function") @@ -16,7 +61,7 @@ def mock_model(): @pytest.fixture(scope="function") -def complex_data(): +def complex_data(random_state): """ Fixture. """ @@ -29,42 +74,131 @@ def complex_data(): class_sep=0.05, n_informative=2, n_features=5, - random_state=0, + random_state=random_state, n_redundant=2, n_clusters_per_class=1, ) X = pd.DataFrame(X, columns=feature_names) - X["f1_categorical"] = X["f1_categorical"].astype("category") X.loc[0:10, "f2_missing"] = np.nan return X, y @pytest.fixture(scope="function") -def complex_data_split(complex_data): +def complex_data_with_categorical(complex_data): + X, y = complex_data + X["f1_categorical"] = X["f1_categorical"].astype(str).astype("category") + + return X, y + + +@pytest.fixture(scope="function") +def complex_data_split(complex_data, random_state_42): """ Fixture. """ X, y = complex_data - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state_42) + return X_train, X_test, y_train, y_test + + +@pytest.fixture(scope="function") +def complex_data_split_with_categorical(complex_data_split): + X_train, X_test, y_train, y_test = complex_data_split + X_train["f1_categorical"] = X_train["f1_categorical"].astype(str).astype("category") + X_test["f1_categorical"] = X_test["f1_categorical"].astype(str).astype("category") + return X_train, X_test, y_train, y_test @pytest.fixture(scope="function") -def complex_lightgbm(): +def complex_lightgbm(random_state_42): + """This fixture allows to reuse the import of the LGBMClassifier class across different tests.""" + model = LGBMClassifier(max_depth=5, num_leaves=11, class_weight="balanced", random_state=random_state_42) + return model + + +@pytest.fixture(scope="function") +def complex_fitted_lightgbm(complex_data_split_with_categorical, complex_lightgbm): """ Fixture. """ - import lightgbm + X_train, _, y_train, _ = complex_data_split_with_categorical - return lightgbm.LGBMClassifier(max_depth=5, num_leaves=11, class_weight="balanced", random_state=42) + return complex_lightgbm.fit(X_train, y_train) @pytest.fixture(scope="function") -def complex_fitted_lightgbm(complex_data_split, complex_lightgbm): +def catboost_classifier(random_state): + """This fixture allows to reuse the import of the CatboostClassifier class across different tests.""" + model = CatBoostClassifier(random_seed=random_state) + return model + + +@pytest.fixture(scope="function") +def decision_tree_classifier(random_state): + """This fixture allows to reuse the import of the DecisionTreeClassifier class across different tests.""" + model = DecisionTreeClassifier(max_depth=1, random_state=random_state) + return model + + +@pytest.fixture(scope="function") +def randomized_search_decision_tree_classifier(decision_tree_classifier, random_state): + """This fixture allows to reuse the import of the DecisionTreeClassifier in combination with a new CV class across different tests.""" + param_grid = {"criterion": ["gini"], "min_samples_split": [1, 2]} + cv = RandomizedSearchCV(decision_tree_classifier, param_grid, cv=2, n_iter=2, random_state=random_state) + return cv + + +@pytest.fixture(scope="function") +def logistic_regression(random_state): + """This fixture allows to reuse the import of the DecisionTreeClassifier class across different tests.""" + model = LogisticRegression(random_state=random_state) + return model + + +@pytest.fixture(scope="function") +def X_train(): """ Fixture. """ - X_train, _, y_train, _ = complex_data_split - X_train["f1_categorical"] = X_train["f1_categorical"].astype("category") + return pd.DataFrame({"col_1": [1, 1, 1, 1], "col_2": [0, 0, 0, 0], "col_3": [1, 0, 1, 0]}, index=[1, 2, 3, 4]) - return complex_lightgbm.fit(X_train, y_train) + +@pytest.fixture(scope="function") +def y_train(): + """ + Fixture. + """ + return pd.Series([1, 0, 1, 0], index=[1, 2, 3, 4]) + + +@pytest.fixture(scope="function") +def X_test(): + """ + Fixture. + """ + return pd.DataFrame({"col_1": [1, 1, 1, 1], "col_2": [0, 0, 0, 0], "col_3": [1, 0, 1, 0]}, index=[5, 6, 7, 8]) + + +@pytest.fixture(scope="function") +def y_test(): + """ + Fixture. + """ + return pd.Series([0, 0, 1, 0], index=[5, 6, 7, 8]) + + +@pytest.fixture(scope="function") +def fitted_logistic_regression(X_train, y_train, logistic_regression): + """ + Fixture. + """ + return logistic_regression.fit(X_train, y_train) + + +@pytest.fixture(scope="function") +def fitted_tree(X_train, y_train, decision_tree_classifier): + """ + Fixture. + """ + return decision_tree_classifier.fit(X_train, y_train) diff --git a/tests/feature_elimination/test_feature_elimination.py b/tests/feature_elimination/test_feature_elimination.py index a304feaf..0e2cab5b 100644 --- a/tests/feature_elimination/test_feature_elimination.py +++ b/tests/feature_elimination/test_feature_elimination.py @@ -2,6 +2,7 @@ import pandas as pd import pytest +from lightgbm import LGBMClassifier from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression @@ -9,7 +10,7 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC -from sklearn.tree import DecisionTreeClassifier +from xgboost import XGBClassifier from probatus.feature_elimination import EarlyStoppingShapRFECV, ShapRFECV from probatus.utils import preprocess_labels @@ -30,20 +31,6 @@ def X(): ) -@pytest.fixture(scope="session") -def catboost_classifier_class(): - """This fixture allows to reuse the import of the CatboostClassifier class across different tests. - - It is equivalent to importing the package at the beginning of the file. - - Importing catboost multiple times results in a ValueError: I/O operation on closed file. - - """ - from catboost import CatBoostClassifier - - return CatBoostClassifier - - @pytest.fixture(scope="function") def y(): """ @@ -52,37 +39,6 @@ def y(): return pd.Series([1, 0, 1, 0, 1, 0, 1, 0], index=[1, 2, 3, 4, 5, 6, 7, 8]) -@pytest.fixture(scope="function") -def X_multi(): - """ - Fixture for multi-class X. - """ - return pd.DataFrame( - { - "col_1": [1, 1, 1, 1, 0, 1, 0, 0], - "col_2": [0, 0, 0, 0, 1, 0, 1, 1], - "col_3": [1, 0, 2, 0, 2, 0, 1, 0], - }, - index=[1, 2, 3, 4, 5, 6, 7, 8], - ) - - -@pytest.fixture(scope="function") -def y_multi(): - """ - Fixture for multi-class y. - """ - return pd.Series([1, 0, 2, 0, 2, 0, 1, 0], index=[1, 2, 3, 4, 5, 6, 7, 8]) - - -@pytest.fixture(scope="function") -def y_reg(): - """ - Fixture for y. - """ - return pd.Series([100, 1, 101, 2, 99, -1, 102, 1], index=[1, 2, 3, 4, 5, 6, 7, 8]) - - @pytest.fixture(scope="function") def sample_weight(): """ @@ -99,14 +55,19 @@ def groups(): return pd.Series(["grp1", "grp1", "grp1", "grp1", "grp2", "grp2", "grp2", "grp2"], index=[1, 2, 3, 4, 5, 6, 7, 8]) -def test_shap_rfe_randomized_search(X, y): +@pytest.fixture(scope="function") +def XGBoost_classifier(random_state): + """This fixture allows to reuse the import of the XGBClassifier class across different tests.""" + model = XGBClassifier(n_estimators=200, max_depth=3, random_state=random_state) + return model + + +def test_shap_rfe_randomized_search(X, y, randomized_search_decision_tree_classifier, random_state): """ Test with RandomizedSearchCV. """ - clf = DecisionTreeClassifier(max_depth=1) - param_grid = {"criterion": ["gini"], "min_samples_split": [1, 2]} - search = RandomizedSearchCV(clf, param_grid, cv=2, n_iter=2) - shap_elimination = ShapRFECV(search, step=0.8, cv=2, scoring="roc_auc", n_jobs=4, random_state=1) + search = randomized_search_decision_tree_classifier + shap_elimination = ShapRFECV(search, step=0.8, cv=2, scoring="roc_auc", n_jobs=4, random_state=random_state) report = shap_elimination.fit_compute(X, y) assert report.shape[0] == 2 @@ -115,14 +76,12 @@ def test_shap_rfe_randomized_search(X, y): _ = shap_elimination.plot(show=False) -def test_shap_rfe_multi_class(X, y): - clf = DecisionTreeClassifier(max_depth=1, random_state=1) - +def test_shap_rfe_multi_class(X, y, decision_tree_classifier, random_state): shap_elimination = ShapRFECV( - clf, + decision_tree_classifier, cv=2, scoring="roc_auc_ovr", - random_state=1, + random_state=random_state, ) report = shap_elimination.fit_compute(X, y, approximate=False, check_additivity=False) @@ -131,14 +90,13 @@ def test_shap_rfe_multi_class(X, y): assert shap_elimination.get_reduced_features_set(1) == ["col_3"] -def test_shap_rfe(X, y, sample_weight): +def test_shap_rfe(X, y, sample_weight, decision_tree_classifier, random_state): """ Test with ShapRFECV. """ - clf = DecisionTreeClassifier(max_depth=1, random_state=1) shap_elimination = ShapRFECV( - clf, - random_state=1, + decision_tree_classifier, + random_state=random_state, step=1, cv=2, scoring="roc_auc", @@ -150,15 +108,14 @@ def test_shap_rfe(X, y, sample_weight): assert shap_elimination.get_reduced_features_set(1) == ["col_3"] -def test_shap_rfe_group_cv(X, y, groups, sample_weight): +def test_shap_rfe_group_cv(X, y, groups, sample_weight, decision_tree_classifier, random_state): """ Test ShapRFECV with StratifiedGroupKFold. """ - clf = DecisionTreeClassifier(max_depth=1, random_state=1) - cv = StratifiedGroupKFold(n_splits=2, shuffle=True, random_state=1) + cv = StratifiedGroupKFold(n_splits=2, shuffle=True, random_state=random_state) shap_elimination = ShapRFECV( - clf, - random_state=1, + decision_tree_classifier, + random_state=random_state, step=1, cv=cv, scoring="roc_auc", @@ -172,20 +129,20 @@ def test_shap_rfe_group_cv(X, y, groups, sample_weight): assert shap_elimination.get_reduced_features_set(1) == ["col_3"] -def test_shap_pipeline_error(X, y): +def test_shap_pipeline_error(X, y, decision_tree_classifier, random_state): """ Test with ShapRFECV for pipelines. """ clf = Pipeline( [ ("scaler", StandardScaler()), - ("dt", DecisionTreeClassifier(max_depth=1, random_state=1)), + ("dt", decision_tree_classifier), ] ) with pytest.raises(TypeError): shap_elimination = ShapRFECV( clf, - random_state=1, + random_state=random_state, step=1, cv=2, scoring="roc_auc", @@ -194,24 +151,24 @@ def test_shap_pipeline_error(X, y): shap_elimination = shap_elimination.fit(X, y, approximate=True, check_additivity=False) -def test_shap_rfe_linear_model(X, y): +def test_shap_rfe_linear_model(X, y, random_state): """ Test ShapRFECV with linear model. """ - clf = LogisticRegression(C=1, random_state=1) - shap_elimination = ShapRFECV(clf, random_state=1, step=1, cv=2, scoring="roc_auc", n_jobs=4) + clf = LogisticRegression(C=1, random_state=random_state) + shap_elimination = ShapRFECV(clf, random_state=random_state, step=1, cv=2, scoring="roc_auc", n_jobs=4) report = shap_elimination.fit_compute(X, y) assert report.shape[0] == 3 assert shap_elimination.get_reduced_features_set(1) == ["col_3"] -def test_shap_rfe_svm(X, y): +def test_shap_rfe_svm(X, y, random_state): """ Test with ShapRFECV with SVM. """ - clf = SVC(C=1, kernel="linear", probability=True) - shap_elimination = ShapRFECV(clf, random_state=1, step=1, cv=2, scoring="roc_auc", n_jobs=4) + clf = SVC(C=1, kernel="linear", probability=True, random_state=random_state) + shap_elimination = ShapRFECV(clf, random_state=random_state, step=1, cv=2, scoring="roc_auc", n_jobs=4) shap_elimination = shap_elimination.fit(X, y) report = shap_elimination.compute() @@ -219,14 +176,13 @@ def test_shap_rfe_svm(X, y): assert shap_elimination.get_reduced_features_set(1) == ["col_3"] -def test_shap_rfe_cols_to_keep(X, y): +def test_shap_rfe_cols_to_keep(X, y, decision_tree_classifier, random_state): """ Test for shap_rfe_cv with features to keep parameter. """ - clf = DecisionTreeClassifier(max_depth=1, random_state=1) shap_elimination = ShapRFECV( - clf, - random_state=1, + decision_tree_classifier, + random_state=random_state, step=2, cv=2, scoring="roc_auc", @@ -240,14 +196,12 @@ def test_shap_rfe_cols_to_keep(X, y): assert reduced_feature_set == {"col_2", "col_3"} -def test_shap_rfe_randomized_search_cols_to_keep(X, y): +def test_shap_rfe_randomized_search_cols_to_keep(X, y, randomized_search_decision_tree_classifier, random_state): """ Test with ShapRFECV with column to keep param. """ - clf = DecisionTreeClassifier(max_depth=1) - param_grid = {"criterion": ["gini"], "min_samples_split": [1, 2]} - search = RandomizedSearchCV(clf, param_grid, cv=2, n_iter=2) - shap_elimination = ShapRFECV(search, step=0.8, cv=2, scoring="roc_auc", n_jobs=4, random_state=1) + search = randomized_search_decision_tree_classifier + shap_elimination = ShapRFECV(search, step=0.8, cv=2, scoring="roc_auc", n_jobs=4, random_state=random_state) report = shap_elimination.fit_compute(X, y, columns_to_keep=["col_2", "col_3"]) assert report.shape[0] == 2 @@ -273,7 +227,7 @@ def test_calculate_number_of_features_to_remove(): ) -def test_shap_automatic_num_feature_selection(): +def test_shap_automatic_num_feature_selection(decision_tree_classifier, random_state): """ Test automatic num feature selection methods """ @@ -286,10 +240,9 @@ def test_shap_automatic_num_feature_selection(): ) y = pd.Series([0, 0, 0, 0, 1, 1, 1, 1]) - clf = DecisionTreeClassifier(max_depth=1, random_state=1) shap_elimination = ShapRFECV( - clf, - random_state=1, + decision_tree_classifier, + random_state=random_state, step=1, cv=2, scoring="roc_auc", @@ -308,12 +261,11 @@ def test_shap_automatic_num_feature_selection(): assert best_parsimonious_features == ["col_2"] -def test_get_feature_shap_values_per_fold(X, y): +def test_get_feature_shap_values_per_fold(X, y, decision_tree_classifier, random_state): """ Test with ShapRFECV with features per fold. """ - clf = DecisionTreeClassifier(max_depth=1) - shap_elimination = ShapRFECV(clf, scoring="roc_auc") + shap_elimination = ShapRFECV(decision_tree_classifier, scoring="roc_auc", random_state=random_state) ( shap_values, train_score, @@ -321,7 +273,7 @@ def test_get_feature_shap_values_per_fold(X, y): ) = shap_elimination._get_feature_shap_values_per_fold( X, y, - clf, + decision_tree_classifier, train_index=[2, 3, 4, 5, 6, 7], val_index=[0, 1], ) @@ -330,15 +282,13 @@ def test_get_feature_shap_values_per_fold(X, y): assert shap_values.shape == (2, 3) -def test_shap_rfe_same_features_are_kept_after_each_run(): +def test_shap_rfe_same_features_are_kept_after_each_run(random_state_1234): """ Test a use case which appears to be flickering with Probatus 1.8.9 and lower. Expected result: every run the same outcome. Probatus <= 1.8.9: A different order every time. """ - SEED = 1234 - feature_names = [(f"f{num}") for num in range(1, 21)] # Code from tutorial on probatus documentation @@ -347,14 +297,14 @@ def test_shap_rfe_same_features_are_kept_after_each_run(): class_sep=0.05, n_informative=6, n_features=20, - random_state=SEED, + random_state=random_state_1234, n_redundant=10, n_clusters_per_class=1, ) X = pd.DataFrame(X, columns=feature_names) random_forest = RandomForestClassifier( - random_state=SEED, + random_state=random_state_1234, n_estimators=70, max_features="log2", criterion="entropy", @@ -367,26 +317,42 @@ def test_shap_rfe_same_features_are_kept_after_each_run(): cv=5, scoring="f1_macro", n_jobs=1, - random_state=SEED, + random_state=random_state_1234, ) - report = shap_elimination.fit_compute(X, y, check_additivity=True, seed=SEED) + report = shap_elimination.fit_compute(X, y, check_additivity=True) # Return the set of features with the best validation accuracy kept_features = list(report.iloc[[report["val_metric_mean"].idxmax() - 1]]["features_set"].to_list()[0]) # Results from the first run - assert ["f2", "f3", "f6", "f10", "f11", "f12", "f13", "f14", "f15", "f17", "f18", "f19", "f20"] == kept_features - - -def test_shap_rfe_penalty_factor(X, y): + assert [ + "f1", + "f2", + "f3", + "f5", + "f6", + "f10", + "f11", + "f12", + "f13", + "f14", + "f15", + "f16", + "f17", + "f18", + "f19", + "f20", + ] == kept_features + + +def test_shap_rfe_penalty_factor(X, y, decision_tree_classifier, random_state): """ Test ShapRFECV with shap_variance_penalty_factor """ - clf = DecisionTreeClassifier(max_depth=1, random_state=1) shap_elimination = ShapRFECV( - clf, - random_state=1, + decision_tree_classifier, + random_state=random_state, step=1, cv=2, scoring="roc_auc", @@ -401,7 +367,7 @@ def test_shap_rfe_penalty_factor(X, y): @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_complex_dataset(complex_data, complex_lightgbm): +def test_complex_dataset(complex_data, complex_lightgbm, random_state_1): """ Test on complex dataset. """ @@ -411,9 +377,11 @@ def test_complex_dataset(complex_data, complex_lightgbm): "n_estimators": [5, 7, 10], "num_leaves": [3, 5, 7, 10], } - search = RandomizedSearchCV(complex_lightgbm, param_grid, n_iter=1) + search = RandomizedSearchCV(complex_lightgbm, param_grid, n_iter=1, random_state=random_state_1) - shap_elimination = ShapRFECV(clf=search, step=1, cv=10, scoring="roc_auc", n_jobs=3, verbose=50) + shap_elimination = ShapRFECV( + clf=search, step=1, cv=10, scoring="roc_auc", n_jobs=3, verbose=1, random_state=random_state_1 + ) report = shap_elimination.fit_compute(X, y) @@ -421,18 +389,16 @@ def test_complex_dataset(complex_data, complex_lightgbm): @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_shap_rfe_early_stopping_lightGBM(complex_data): +def test_shap_rfe_early_stopping_lightGBM(complex_data, random_state): """ Test EarlyStoppingShapRFECV with a LGBMClassifier. """ - from lightgbm import LGBMClassifier - - clf = LGBMClassifier(n_estimators=200, max_depth=3) + clf = LGBMClassifier(n_estimators=200, max_depth=3, random_state=random_state) X, y = complex_data shap_elimination = EarlyStoppingShapRFECV( clf, - random_state=1, + random_state=random_state, step=1, cv=10, scoring="roc_auc", @@ -447,19 +413,16 @@ def test_shap_rfe_early_stopping_lightGBM(complex_data): @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_shap_rfe_early_stopping_XGBoost(complex_data): +def test_shap_rfe_early_stopping_XGBoost(XGBoost_classifier, complex_data, random_state): """ Test EarlyStoppingShapRFECV with a LGBMClassifier. """ - from xgboost import XGBClassifier - - clf = XGBClassifier(n_estimators=200, max_depth=3, random_state=42) X, y = complex_data X["f1_categorical"] = X["f1_categorical"].astype(float) shap_elimination = EarlyStoppingShapRFECV( - clf, - random_state=1, + XGBoost_classifier, + random_state=random_state, step=1, cv=10, scoring="roc_auc", @@ -473,19 +436,16 @@ def test_shap_rfe_early_stopping_XGBoost(complex_data): assert shap_elimination.get_reduced_features_set(1) == ["f4"] -# For now this test fails, catboost has issues with categorical variables and -@pytest.mark.xfail @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_shap_rfe_early_stopping_CatBoost(complex_data, catboost_classifier_class): +def test_shap_rfe_early_stopping_CatBoost(complex_data_with_categorical, catboost_classifier, random_state): """ Test EarlyStoppingShapRFECV with a CatBoostClassifier. """ - clf = catboost_classifier_class(random_seed=42) - X, y = complex_data + X, y = complex_data_with_categorical shap_elimination = EarlyStoppingShapRFECV( - clf, - random_state=1, + catboost_classifier, + random_state=random_state, step=1, cv=10, scoring="roc_auc", @@ -500,19 +460,17 @@ def test_shap_rfe_early_stopping_CatBoost(complex_data, catboost_classifier_clas @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_shap_rfe_randomized_search_early_stopping_lightGBM(complex_data): +def test_shap_rfe_randomized_search_early_stopping_lightGBM(complex_data, random_state): """ Test EarlyStoppingShapRFECV with RandomizedSearchCV and a LGBMClassifier on complex dataset. """ - from lightgbm import LGBMClassifier - - clf = LGBMClassifier(n_estimators=200) + clf = LGBMClassifier(n_estimators=200, random_state=random_state) X, y = complex_data param_grid = { "max_depth": [3, 4, 5], } - search = RandomizedSearchCV(clf, param_grid, cv=2, n_iter=2) + search = RandomizedSearchCV(clf, param_grid, cv=2, n_iter=2, random_state=random_state) shap_elimination = EarlyStoppingShapRFECV( search, step=1, @@ -521,8 +479,8 @@ def test_shap_rfe_randomized_search_early_stopping_lightGBM(complex_data): early_stopping_rounds=5, eval_metric="auc", n_jobs=4, - verbose=50, - random_state=1, + verbose=1, + random_state=random_state, ) report = shap_elimination.fit_compute(X, y) @@ -533,17 +491,17 @@ def test_shap_rfe_randomized_search_early_stopping_lightGBM(complex_data): @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_get_feature_shap_values_per_fold_early_stopping_lightGBM(complex_data): +def test_get_feature_shap_values_per_fold_early_stopping_lightGBM(complex_data, random_state): """ Test with ShapRFECV with features per fold. """ - from lightgbm import LGBMClassifier - - clf = LGBMClassifier(n_estimators=200, max_depth=3) + clf = LGBMClassifier(n_estimators=200, max_depth=3, random_state=random_state) X, y = complex_data y = preprocess_labels(y, y_name="y", index=X.index) - shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5, scoring="roc_auc") + shap_elimination = EarlyStoppingShapRFECV( + clf, early_stopping_rounds=5, scoring="roc_auc", random_state=random_state + ) ( shap_values, train_score, @@ -561,16 +519,18 @@ def test_get_feature_shap_values_per_fold_early_stopping_lightGBM(complex_data): @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_get_feature_shap_values_per_fold_early_stopping_CatBoost(complex_data, catboost_classifier_class): +def test_get_feature_shap_values_per_fold_early_stopping_CatBoost( + complex_data_with_categorical, catboost_classifier, random_state +): """ Test with ShapRFECV with features per fold. """ - clf = catboost_classifier_class(random_seed=42) - X, y = complex_data - X["f1_categorical"] = X["f1_categorical"].astype(str).astype("category") + X, y = complex_data_with_categorical y = preprocess_labels(y, y_name="y", index=X.index) - shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5, scoring="roc_auc") + shap_elimination = EarlyStoppingShapRFECV( + catboost_classifier, early_stopping_rounds=5, scoring="roc_auc", random_state=random_state + ) ( shap_values, train_score, @@ -578,7 +538,7 @@ def test_get_feature_shap_values_per_fold_early_stopping_CatBoost(complex_data, ) = shap_elimination._get_feature_shap_values_per_fold( X, y, - clf, + catboost_classifier, train_index=list(range(5, 50)), val_index=[0, 1, 2, 3, 4], ) @@ -588,18 +548,16 @@ def test_get_feature_shap_values_per_fold_early_stopping_CatBoost(complex_data, @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_get_feature_shap_values_per_fold_early_stopping_XGBoost(complex_data): +def test_get_feature_shap_values_per_fold_early_stopping_XGBoost(XGBoost_classifier, complex_data, random_state): """ Test with ShapRFECV with features per fold. """ - from xgboost import XGBClassifier - - clf = XGBClassifier(n_estimators=200, max_depth=3, random_state=42) X, y = complex_data - X["f1_categorical"] = X["f1_categorical"].astype(float) y = preprocess_labels(y, y_name="y", index=X.index) - shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5, scoring="roc_auc") + shap_elimination = EarlyStoppingShapRFECV( + XGBoost_classifier, early_stopping_rounds=5, scoring="roc_auc", random_state=random_state + ) ( shap_values, train_score, @@ -607,7 +565,7 @@ def test_get_feature_shap_values_per_fold_early_stopping_XGBoost(complex_data): ) = shap_elimination._get_feature_shap_values_per_fold( X, y, - clf, + XGBoost_classifier, train_index=list(range(5, 50)), val_index=[0, 1, 2, 3, 4], ) @@ -617,11 +575,9 @@ def test_get_feature_shap_values_per_fold_early_stopping_XGBoost(complex_data): @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_EarlyStoppingShapRFECV_no_categorical(complex_data): +def test_EarlyStoppingShapRFECV_no_categorical(complex_data, random_state): """Test EarlyStoppingShapRFECV when no categorical features are present.""" - from lightgbm import LGBMClassifier - - clf = LGBMClassifier(n_estimators=50, max_depth=3, num_leaves=3) + clf = LGBMClassifier(n_estimators=50, max_depth=3, num_leaves=3, random_state=random_state) shap_elimination = EarlyStoppingShapRFECV( clf=clf, @@ -630,6 +586,7 @@ def test_EarlyStoppingShapRFECV_no_categorical(complex_data): scoring="accuracy", eval_metric="logloss", early_stopping_rounds=5, + random_state=random_state, ) X, y = complex_data X = X.drop(columns=["f1_categorical"]) @@ -642,12 +599,10 @@ def test_EarlyStoppingShapRFECV_no_categorical(complex_data): @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_LightGBM_stratified_kfold(): +def test_LightGBM_stratified_kfold(random_state): """ Test added to check for https://github.com/ing-bank/probatus/issues/170. """ - from lightgbm import LGBMClassifier - X = pd.DataFrame( [ [1, 2, 3, 4, 5, 101, 102, 103, 104, 105], @@ -660,12 +615,12 @@ def test_LightGBM_stratified_kfold(): X[0] = X[0].astype("float") y = [0] * 5 + [1] * 5 - clf = LGBMClassifier() + clf = LGBMClassifier(random_state=random_state) n_iter = 2 n_folds = 3 for _ in range(n_iter): - skf = StratifiedKFold(n_folds, shuffle=True, random_state=42) + skf = StratifiedKFold(n_folds, shuffle=True, random_state=random_state) shap_elimination = EarlyStoppingShapRFECV( clf=clf, step=1 / (n_iter + 1), @@ -673,6 +628,7 @@ def test_LightGBM_stratified_kfold(): scoring="accuracy", eval_metric="logloss", early_stopping_rounds=5, + random_state=random_state, ) report = shap_elimination.fit_compute(X, y, feature_perturbation="tree_path_dependent") diff --git a/tests/interpret/test_model_interpret.py b/tests/interpret/test_model_interpret.py index dd318500..a42f5725 100644 --- a/tests/interpret/test_model_interpret.py +++ b/tests/interpret/test_model_interpret.py @@ -3,60 +3,10 @@ import numpy as np import pandas as pd import pytest -from sklearn.linear_model import LogisticRegression -from sklearn.tree import DecisionTreeClassifier from probatus.interpret import ShapModelInterpreter -@pytest.fixture(scope="function") -def X_train(): - """ - Fixture. - """ - return pd.DataFrame({"col_1": [1, 1, 1, 1], "col_2": [0, 0, 0, 0], "col_3": [1, 0, 1, 0]}, index=[1, 2, 3, 4]) - - -@pytest.fixture(scope="function") -def y_train(): - """ - Fixture. - """ - return pd.Series([1, 0, 1, 0], index=[1, 2, 3, 4]) - - -@pytest.fixture(scope="function") -def X_test(): - """ - Fixture. - """ - return pd.DataFrame({"col_1": [1, 1, 1, 1], "col_2": [0, 0, 0, 0], "col_3": [1, 0, 1, 0]}, index=[5, 6, 7, 8]) - - -@pytest.fixture(scope="function") -def y_test(): - """ - Fixture. - """ - return pd.Series([0, 0, 1, 0], index=[5, 6, 7, 8]) - - -@pytest.fixture(scope="function") -def fitted_tree(X_train, y_train): - """ - Fixture. - """ - return DecisionTreeClassifier(max_depth=1, random_state=1).fit(X_train, y_train) - - -@pytest.fixture(scope="function") -def fitted_lin(X_train, y_train): - """ - Fixture. - """ - return LogisticRegression(random_state=1).fit(X_train, y_train) - - @pytest.fixture(scope="function") def expected_feature_importance(): """ @@ -89,19 +39,15 @@ def expected_feature_importance_lin_models(): ) -def test_shap_interpret(fitted_tree, X_train, y_train, X_test, y_test, expected_feature_importance): +def test_shap_interpret(fitted_tree, X_train, y_train, X_test, y_test, expected_feature_importance, random_state): """ Test. """ class_names = ["neg", "pos"] - shap_interpret = ShapModelInterpreter(fitted_tree) + shap_interpret = ShapModelInterpreter(fitted_tree, random_state=random_state) shap_interpret.fit(X_train, X_test, y_train, y_test, class_names=class_names) - # Check parameters - assert shap_interpret.fitted - shap_interpret._check_if_fitted - assert shap_interpret.class_names == class_names assert shap_interpret.train_score == 1 assert shap_interpret.test_score == pytest.approx(0.833, 0.01) @@ -136,20 +82,16 @@ def test_shap_interpret(fitted_tree, X_train, y_train, X_test, y_test, expected_ def test_shap_interpret_lin_models( - fitted_lin, X_train, y_train, X_test, y_test, expected_feature_importance_lin_models + fitted_logistic_regression, X_train, y_train, X_test, y_test, expected_feature_importance_lin_models, random_state ): """ Test. """ class_names = ["neg", "pos"] - shap_interpret = ShapModelInterpreter(fitted_lin) + shap_interpret = ShapModelInterpreter(fitted_logistic_regression, random_state=random_state) shap_interpret.fit(X_train, X_test, y_train, y_test, class_names=class_names) - # Check parameters - assert shap_interpret.fitted - shap_interpret._check_if_fitted - assert shap_interpret.class_names == class_names assert shap_interpret.train_score == 1 assert shap_interpret.test_score == pytest.approx(0.833, 0.01) @@ -185,21 +127,17 @@ def test_shap_interpret_lin_models( def test_shap_interpret_fit_compute_lin_models( - fitted_lin, X_train, y_train, X_test, y_test, expected_feature_importance_lin_models + fitted_logistic_regression, X_train, y_train, X_test, y_test, expected_feature_importance_lin_models, random_state ): """ Test. """ class_names = ["neg", "pos"] - shap_interpret = ShapModelInterpreter(fitted_lin) + shap_interpret = ShapModelInterpreter(fitted_logistic_regression, random_state=random_state) importance_df = shap_interpret.fit_compute(X_train, X_test, y_train, y_test, class_names=class_names) importance_df = importance_df.round(2) - # Check parameters - assert shap_interpret.fitted - shap_interpret._check_if_fitted - assert shap_interpret.class_names == class_names assert shap_interpret.train_score == 1 @@ -212,19 +150,17 @@ def test_shap_interpret_fit_compute_lin_models( pd.testing.assert_frame_equal(expected_feature_importance_lin_models, importance_df) -def test_shap_interpret_fit_compute(fitted_tree, X_train, y_train, X_test, y_test, expected_feature_importance): +def test_shap_interpret_fit_compute( + fitted_tree, X_train, y_train, X_test, y_test, expected_feature_importance, random_state +): """ Test. """ class_names = ["neg", "pos"] - shap_interpret = ShapModelInterpreter(fitted_tree) + shap_interpret = ShapModelInterpreter(fitted_tree, random_state=random_state) importance_df = shap_interpret.fit_compute(X_train, X_test, y_train, y_test, class_names=class_names) - # Check parameters - assert shap_interpret.fitted - shap_interpret._check_if_fitted - assert shap_interpret.class_names == class_names assert shap_interpret.train_score == 1 assert shap_interpret.test_score == pytest.approx(0.833, 0.01) @@ -237,22 +173,18 @@ def test_shap_interpret_fit_compute(fitted_tree, X_train, y_train, X_test, y_tes @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_shap_interpret_complex_data(complex_data_split, complex_fitted_lightgbm): +def test_shap_interpret_complex_data(complex_data_split_with_categorical, complex_fitted_lightgbm, random_state): """ Test lightgbm. """ class_names = ["neg", "pos"] - X_train, X_test, y_train, y_test = complex_data_split + X_train, X_test, y_train, y_test = complex_data_split_with_categorical - shap_interpret = ShapModelInterpreter(complex_fitted_lightgbm, verbose=50) + shap_interpret = ShapModelInterpreter(complex_fitted_lightgbm, verbose=1, random_state=random_state) importance_df = shap_interpret.fit_compute( X_train, X_test, y_train, y_test, class_names=class_names, approximate=False, check_additivity=False ) - # Check parameters - assert shap_interpret.fitted - shap_interpret._check_if_fitted - assert shap_interpret.class_names == class_names assert importance_df.shape[0] == X_train.shape[1] diff --git a/tests/interpret/test_shap_dependence.py b/tests/interpret/test_shap_dependence.py index 5e0e6fa3..2717705c 100644 --- a/tests/interpret/test_shap_dependence.py +++ b/tests/interpret/test_shap_dependence.py @@ -71,13 +71,13 @@ def expected_shap_vals(): @pytest.fixture(scope="function") -def clf(X_y): +def clf(X_y, random_state): """ Fixture. """ X, y = X_y - model = RandomForestClassifier(random_state=42, n_estimators=10, max_depth=5) + model = RandomForestClassifier(random_state=random_state, n_estimators=10, max_depth=5) model.fit(X, y) return model @@ -97,22 +97,22 @@ def expected_feat_importances(): ) -def test_not_fitted(clf): +def test_not_fitted(clf, random_state): """ Test. """ - plotter = DependencePlotter(clf) + plotter = DependencePlotter(clf, random_state) assert plotter.fitted is False @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_fit_complex(complex_data_split, complex_fitted_lightgbm): +def test_fit_complex(complex_data_split, complex_fitted_lightgbm, random_state): """ Test. """ - X_train, X_test, y_train, y_test = complex_data_split + _, X_test, _, y_test = complex_data_split - plotter = DependencePlotter(complex_fitted_lightgbm) + plotter = DependencePlotter(complex_fitted_lightgbm, random_state=random_state) plotter.fit(X_test, y_test) @@ -124,23 +124,23 @@ def test_fit_complex(complex_data_split, complex_fitted_lightgbm): _ = plotter.plot(feature="f2_missing", show=False) -def test_get_X_y_shap_with_q_cut_normal(X_y, clf): +def test_get_X_y_shap_with_q_cut_normal(X_y, clf, random_state): """ Test. """ X, y = X_y - plotter = DependencePlotter(clf).fit(X, y) + plotter = DependencePlotter(clf, random_state).fit(X, y) plotter.min_q, plotter.max_q = 0, 1 - X_cut, y_cut, shap_val = plotter._get_X_y_shap_with_q_cut(0) + X_cut, y_cut, _ = plotter._get_X_y_shap_with_q_cut(0) assert np.isclose(X[0], X_cut).all() assert y.equals(y_cut) plotter.min_q = 0.2 plotter.max_q = 0.8 - X_cut, y_cut, shap_val = plotter._get_X_y_shap_with_q_cut(0) + X_cut, y_cut, _ = plotter._get_X_y_shap_with_q_cut(0) assert np.isclose( X_cut, [ @@ -158,46 +158,46 @@ def test_get_X_y_shap_with_q_cut_normal(X_y, clf): assert np.equal(y_cut.values, [1, 0, 0, 1, 1, 0, 0, 0, 0]).all() -def test_get_X_y_shap_with_q_cut_unfitted(clf): +def test_get_X_y_shap_with_q_cut_unfitted(clf, random_state): """ Test. """ - plotter = DependencePlotter(clf) + plotter = DependencePlotter(clf, random_state) with pytest.raises(NotFittedError): plotter._get_X_y_shap_with_q_cut(0) -def test_get_X_y_shap_with_q_cut_input(X_y, clf): +def test_get_X_y_shap_with_q_cut_input(X_y, clf, random_state): """ Test. """ - plotter = DependencePlotter(clf).fit(X_y[0], X_y[1]) + plotter = DependencePlotter(clf, random_state).fit(X_y[0], X_y[1]) with pytest.raises(ValueError): plotter._get_X_y_shap_with_q_cut("not a feature") -def test_plot_normal(X_y, clf): +def test_plot_normal(X_y, clf, random_state): """ Test. """ - plotter = DependencePlotter(clf).fit(X_y[0], X_y[1]) + plotter = DependencePlotter(clf, random_state).fit(X_y[0], X_y[1]) _ = plotter.plot(feature=0) -def test_plot_class_names(X_y, clf): +def test_plot_class_names(X_y, clf, random_state): """ Test. """ - plotter = DependencePlotter(clf).fit(X_y[0], X_y[1], class_names=["a", "b"]) + plotter = DependencePlotter(clf, random_state).fit(X_y[0], X_y[1], class_names=["a", "b"]) _ = plotter.plot(feature=0) assert plotter.class_names == ["a", "b"] -def test_plot_input(X_y, clf): +def test_plot_input(X_y, clf, random_state): """ Test. """ - plotter = DependencePlotter(clf).fit(X_y[0], X_y[1]) + plotter = DependencePlotter(clf, random_state).fit(X_y[0], X_y[1]) with pytest.raises(ValueError): plotter.plot(feature="not a feature") with pytest.raises(TypeError): @@ -206,9 +206,9 @@ def test_plot_input(X_y, clf): plotter.plot(feature=0, min_q=1, max_q=0) -def test__repr__(clf): +def test__repr__(clf, random_state): """ Test string representation. """ - plotter = DependencePlotter(clf) + plotter = DependencePlotter(clf, random_state) assert str(plotter) == "Shap dependence plotter for RandomForestClassifier" diff --git a/tests/sample_similarity/test_resemblance_model.py b/tests/sample_similarity/test_resemblance_model.py index 6e11d2e4..7d81fb47 100644 --- a/tests/sample_similarity/test_resemblance_model.py +++ b/tests/sample_similarity/test_resemblance_model.py @@ -6,11 +6,8 @@ import pandas as pd import pytest from pandas.api.types import is_numeric_dtype -from sklearn.linear_model import LogisticRegression -from sklearn.tree import DecisionTreeClassifier from probatus.sample_similarity import BaseResemblanceModel, PermutationImportanceResemblance, SHAPImportanceResemblance -from probatus.utils import NotFittedError # Turn off interactive mode in plots plt.ioff() @@ -33,22 +30,14 @@ def X2(): return pd.DataFrame({"col_1": [0, 0, 0, 0], "col_2": [0, 0, 0, 0], "col_3": [0, 0, 0, 0]}, index=[1, 2, 3, 4]) -def test_base_class(X1, X2): +def test_base_class(X1, X2, decision_tree_classifier, random_state): """ Test. """ - clf = DecisionTreeClassifier(max_depth=1, random_state=1) - rm = BaseResemblanceModel(clf, test_prc=0.5, n_jobs=1, random_state=42) - - # Before fit it should raise an exception - with pytest.raises(NotFittedError) as _: - rm._check_if_fitted() + rm = BaseResemblanceModel(decision_tree_classifier, test_prc=0.5, n_jobs=1, random_state=random_state) actual_report, train_score, test_score = rm.fit_compute(X1, X2, return_scores=True) - # After the fit this should not raise any error - rm._check_if_fitted() - assert train_score == 1 assert test_score == 1 assert actual_report is None @@ -73,23 +62,15 @@ def test_base_class(X1, X2): rm.plot() -def test_base_class_lin_models(X1, X2): +def test_base_class_lin_models(X1, X2, logistic_regression, random_state): """ Test. """ # Test class BaseResemblanceModel for linear models. - clf = LogisticRegression() - rm = BaseResemblanceModel(clf, test_prc=0.5, n_jobs=1, random_state=42) - - # Before fit it should raise an exception - with pytest.raises(NotFittedError) as _: - rm._check_if_fitted() + rm = BaseResemblanceModel(logistic_regression, test_prc=0.5, n_jobs=1, random_state=random_state) actual_report, train_score, test_score = rm.fit_compute(X1, X2, return_scores=True) - # After the fit this should not raise any error - rm._check_if_fitted() - assert train_score == 1 assert test_score == 1 assert actual_report is None @@ -114,22 +95,14 @@ def test_base_class_lin_models(X1, X2): rm.plot() -def test_shap_resemblance_class(X1, X2): +def test_shap_resemblance_class(X1, X2, decision_tree_classifier, random_state): """ Test. """ - clf = DecisionTreeClassifier(max_depth=1, random_state=1) - rm = SHAPImportanceResemblance(clf, test_prc=0.5, n_jobs=1, random_state=42) - - # Before fit it should raise an exception - with pytest.raises(NotFittedError) as _: - rm._check_if_fitted() + rm = SHAPImportanceResemblance(decision_tree_classifier, test_prc=0.5, n_jobs=1, random_state=random_state) actual_report, train_score, test_score = rm.fit_compute(X1, X2, return_scores=True) - # After the fit this should not raise any error - rm._check_if_fitted() - assert train_score == 1 assert test_score == 1 @@ -139,8 +112,7 @@ def test_shap_resemblance_class(X1, X2): assert actual_report.iloc[0].name == "col_1" # Check report values assert actual_report.loc["col_1"]["mean_abs_shap_value"] > 0 - # see https://github.com/ing-bank/probatus/issues/225 - # assert actual_report.loc["col_1"]["mean_shap_value"] >= 0 + assert actual_report.loc["col_1"]["mean_shap_value"] < 0 assert actual_report.loc["col_2"]["mean_abs_shap_value"] == 0 assert actual_report.loc["col_2"]["mean_shap_value"] == 0 assert actual_report.loc["col_3"]["mean_abs_shap_value"] == 0 @@ -154,25 +126,17 @@ def test_shap_resemblance_class(X1, X2): rm.plot(plot_type="dot") -def test_shap_resemblance_class_lin_models(X1, X2): +def test_shap_resemblance_class_lin_models(X1, X2, logistic_regression, random_state): """ Test. """ # Test SHAP Resemblance Model for linear models. - clf = LogisticRegression() - rm = SHAPImportanceResemblance(clf, test_prc=0.5, n_jobs=1, random_state=42) - - # Before fit it should raise an exception - with pytest.raises(NotFittedError) as _: - rm._check_if_fitted() + rm = SHAPImportanceResemblance(logistic_regression, test_prc=0.5, n_jobs=1, random_state=random_state) actual_report, train_score, test_score = rm.fit_compute( X1, X2, return_scores=True, approximate=True, check_additivity=False ) - # After the fit this should not raise any error - rm._check_if_fitted() - assert train_score == 1 assert test_score == 1 @@ -182,8 +146,7 @@ def test_shap_resemblance_class_lin_models(X1, X2): assert actual_report.iloc[0].name == "col_1" # Check report values assert actual_report.loc["col_1"]["mean_abs_shap_value"] > 0 - # see https://github.com/ing-bank/probatus/issues/225 - # assert actual_report.loc["col_1"]["mean_shap_value"] > 0 + assert actual_report.loc["col_1"]["mean_shap_value"] < 0 assert actual_report.loc["col_2"]["mean_abs_shap_value"] == 0 assert actual_report.loc["col_2"]["mean_shap_value"] == 0 assert actual_report.loc["col_3"]["mean_abs_shap_value"] == 0 @@ -198,19 +161,17 @@ def test_shap_resemblance_class_lin_models(X1, X2): @pytest.mark.skipif(os.environ.get("SKIP_LIGHTGBM") == "true", reason="LightGBM tests disabled") -def test_shap_resemblance_class2(complex_data, complex_lightgbm): +def test_shap_resemblance_class2(complex_data_with_categorical, complex_lightgbm, random_state): """ Test. """ - X1, _ = complex_data + X1, _ = complex_data_with_categorical X2 = X1.copy() X2["f4"] = X2["f4"] + 100 - rm = SHAPImportanceResemblance(complex_lightgbm, scoring="accuracy", test_prc=0.5, n_jobs=1, random_state=42) - - # Before fit it should raise an exception - with pytest.raises(NotFittedError) as _: - rm._check_if_fitted() + rm = SHAPImportanceResemblance( + complex_lightgbm, scoring="accuracy", test_prc=0.5, n_jobs=1, random_state=random_state + ) actual_report, train_score, test_score = rm.fit_compute(X1, X2, return_scores=True, class_names=["a", "b"]) @@ -219,9 +180,6 @@ def test_shap_resemblance_class2(complex_data, complex_lightgbm): for num_column in ["f2_missing", "f3_static", "f4", "f5"]: assert is_numeric_dtype(rm.X[num_column]) - # After the fit this should not raise any error - rm._check_if_fitted() - assert train_score == pytest.approx(1, 0.05) assert test_score == pytest.approx(1, 0.05) @@ -242,22 +200,16 @@ def test_shap_resemblance_class2(complex_data, complex_lightgbm): rm.plot(plot_type="dot", show=False) -def test_permutation_resemblance_class(X1, X2): +def test_permutation_resemblance_class(X1, X2, decision_tree_classifier, random_state): """ Test. """ - clf = DecisionTreeClassifier(max_depth=1, random_state=1) - rm = PermutationImportanceResemblance(clf, test_prc=0.5, n_jobs=1, random_state=42, iterations=20) - - # Before fit it should raise an exception - with pytest.raises(NotFittedError) as _: - rm._check_if_fitted() + rm = PermutationImportanceResemblance( + decision_tree_classifier, test_prc=0.5, n_jobs=1, random_state=random_state, iterations=20 + ) actual_report, train_score, test_score = rm.fit_compute(X1, X2, return_scores=True) - # After the fit this should not raise any error - rm._check_if_fitted() - assert train_score == 1 assert test_score == 1 @@ -280,12 +232,11 @@ def test_permutation_resemblance_class(X1, X2): assert size[0] == 10 and size[1] == 10 -def test_base_class_same_data(X1): +def test_base_class_same_data(X1, decision_tree_classifier, random_state): """ Test. """ - clf = DecisionTreeClassifier(max_depth=1, random_state=1) - rm = BaseResemblanceModel(clf, test_prc=0.5, n_jobs=1, random_state=42) + rm = BaseResemblanceModel(decision_tree_classifier, test_prc=0.5, n_jobs=1, random_state=random_state) actual_report, train_score, test_score = rm.fit_compute(X1, X1, return_scores=True) diff --git a/tests/utils/test_base_class.py b/tests/utils/test_base_class.py new file mode 100644 index 00000000..892dc5aa --- /dev/null +++ b/tests/utils/test_base_class.py @@ -0,0 +1,32 @@ +from probatus.interpret import ShapModelInterpreter +import pytest +from probatus.utils import NotFittedError + + +def test_fitted_exception(fitted_tree, X_train, y_train, X_test, y_test, random_state): + """ + Test if fitted works.. + """ + class_names = ["neg", "pos"] + + shap_interpret = ShapModelInterpreter(fitted_tree, random_state=random_state) + + # Before fit it should raise an exception + with pytest.raises(NotFittedError) as _: + shap_interpret._check_if_fitted() + + shap_interpret.fit(X_train, X_test, y_train, y_test, class_names=class_names) + + # Check parameters + assert shap_interpret.fitted + shap_interpret._check_if_fitted + + +@pytest.mark.xfail +def test_fitted_exception_is_raised(fitted_tree, random_state): + """ + Test if fitted works fails when not fitted. + """ + shap_interpret = ShapModelInterpreter(fitted_tree, random_state=random_state) + + shap_interpret._check_if_fitted diff --git a/tests/utils/test_utils_array_funcs.py b/tests/utils/test_utils_array_funcs.py index ee8b83da..1cf0232a 100644 --- a/tests/utils/test_utils_array_funcs.py +++ b/tests/utils/test_utils_array_funcs.py @@ -225,15 +225,15 @@ def test_preprocess_labels(): y1 = pd.Series([1, 0, 1, 0, 1]) index_1 = np.array([5, 4, 3, 2, 1]) - y1_output = preprocess_labels(y1, y_name="y1", index=index_1, verbose=150) + y1_output = preprocess_labels(y1, y_name="y1", index=index_1, verbose=2) pd.testing.assert_series_equal(y1_output, pd.Series([1, 0, 1, 0, 1], index=index_1)) y2 = [False, False, False, False, False] - y2_output = preprocess_labels(y2, y_name="y2", verbose=150) + y2_output = preprocess_labels(y2, y_name="y2", verbose=2) pd.testing.assert_series_equal(y2_output, pd.Series(y2)) y3 = np.array([0, 1, 2, 3, 4]) - y3_output = preprocess_labels(y3, y_name="y3", verbose=150) + y3_output = preprocess_labels(y3, y_name="y3", verbose=2) pd.testing.assert_series_equal(y3_output, pd.Series(y3)) y4 = pd.Series(["2", "1", "3", "2", "1"]) @@ -252,9 +252,7 @@ def test_preprocess_data(): X1_expected_output = pd.DataFrame({"1": ["a", "b", "c"], "2": [1, np.nan, 2], "3": [1, 2, 3]}) X1_expected_output["1"] = X1_expected_output["1"].astype("category") - X1_output, output_column_names_X1 = preprocess_data( - X1, X_name="X1", column_names=target_column_names_X1, verbose=150 - ) + X1_output, output_column_names_X1 = preprocess_data(X1, X_name="X1", column_names=target_column_names_X1, verbose=2) assert target_column_names_X1 == output_column_names_X1 pd.testing.assert_frame_equal(X1_output, X1_expected_output) @@ -262,7 +260,7 @@ def test_preprocess_data(): target_column_names_X1 = [0, 1, 2] X2_expected_output = pd.DataFrame(X2, columns=target_column_names_X1) - X2_output, output_column_names_X2 = preprocess_data(X2, X_name="X2", column_names=None, verbose=150) + X2_output, output_column_names_X2 = preprocess_data(X2, X_name="X2", column_names=None, verbose=2) assert target_column_names_X1 == output_column_names_X2 pd.testing.assert_frame_equal(X2_output, X2_expected_output)