From 46fbd9391149a2d8d929d31d5766a112386e41cc Mon Sep 17 00:00:00 2001 From: PurvalBhude Date: Sun, 15 Sep 2024 15:26:05 +0530 Subject: [PATCH 1/5] Update linear_regression.py add error handling function. it can handle errors and still run the linear regression algorithm. --- machine_learning/linear_regression.py | 134 +++++++++++++++++--------- 1 file changed, 87 insertions(+), 47 deletions(-) diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py index 839a5366d1cc..e84ef505b62f 100644 --- a/machine_learning/linear_regression.py +++ b/machine_learning/linear_regression.py @@ -17,19 +17,27 @@ def collect_dataset(): The dataset contains ADR vs Rating of a Player :return : dataset obtained from the link, as matrix """ - response = requests.get( - "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/" - "master/Week1/ADRvsRating.csv", - timeout=10, - ) - lines = response.text.splitlines() - data = [] - for item in lines: - item = item.split(",") - data.append(item) - data.pop(0) # This is for removing the labels from the list - dataset = np.matrix(data) - return dataset + try: + response = requests.get( + "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/" + "master/Week1/ADRvsRating.csv", + timeout=10, + ) + response.raise_for_status() # Check for HTTP errors + lines = response.text.splitlines() + data = [] + for item in lines: + item = item.split(",") + data.append(item) + data.pop(0) # This is for removing the labels from the list + dataset = np.matrix(data) + return dataset + except requests.exceptions.RequestException as e: + print(f"Error in fetching dataset: {e}") + return None + except Exception as e: + print(f"Unexpected error in processing dataset: {e}") + return None def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta): @@ -42,13 +50,16 @@ def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta): ;param return : Updated Feature's, using curr_features - alpha_ * gradient(w.r.t. feature) """ - n = len_data - - prod = np.dot(theta, data_x.transpose()) - prod -= data_y.transpose() - sum_grad = np.dot(prod, data_x) - theta = theta - (alpha / n) * sum_grad - return theta + try: + n = len_data + prod = np.dot(theta, data_x.transpose()) + prod -= data_y.transpose() + sum_grad = np.dot(prod, data_x) + theta = theta - (alpha / n) * sum_grad + return theta + except Exception as e: + print(f"Error during gradient descent: {e}") + return None def sum_of_square_error(data_x, data_y, len_data, theta): @@ -59,11 +70,15 @@ def sum_of_square_error(data_x, data_y, len_data, theta): :param theta : contains the feature vector :return : sum of square error computed from given feature's """ - prod = np.dot(theta, data_x.transpose()) - prod -= data_y.transpose() - sum_elem = np.sum(np.square(prod)) - error = sum_elem / (2 * len_data) - return error + try: + prod = np.dot(theta, data_x.transpose()) + prod -= data_y.transpose() + sum_elem = np.sum(np.square(prod)) + error = sum_elem / (2 * len_data) + return error + except Exception as e: + print(f"Error in calculating sum of square error: {e}") + return None def run_linear_regression(data_x, data_y): @@ -72,20 +87,31 @@ def run_linear_regression(data_x, data_y): :param data_y : contains the output (result vector) :return : feature for line of best fit (Feature vector) """ - iterations = 100000 - alpha = 0.0001550 + try: + iterations = 100000 + alpha = 0.0001550 - no_features = data_x.shape[1] - len_data = data_x.shape[0] - 1 + no_features = data_x.shape[1] + len_data = data_x.shape[0] - theta = np.zeros((1, no_features)) + theta = np.zeros((1, no_features)) - for i in range(iterations): - theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta) - error = sum_of_square_error(data_x, data_y, len_data, theta) - print(f"At Iteration {i + 1} - Error is {error:.5f}") + for i in range(iterations): + theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta) + if theta is None: # If gradient descent fails, exit + print("Gradient descent failed. Exiting.") + return None + error = sum_of_square_error(data_x, data_y, len_data, theta) + if error is None: # If error calculation fails, exit + print("Error calculation failed. Exiting.") + return None + if i % 1000 == 0: # Print every 1000 iterations + print(f"At Iteration {i + 1} - Error is {error:.5f}") - return theta + return theta + except Exception as e: + print(f"Error in linear regression: {e}") + return None def mean_absolute_error(predicted_y, original_y): @@ -94,23 +120,37 @@ def mean_absolute_error(predicted_y, original_y): :param original_y : contains values of expected outcome :return : mean absolute error computed from given feature's """ - total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y)) - return total / len(original_y) + try: + total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y)) + return total / len(original_y) + except Exception as e: + print(f"Error in calculating mean absolute error: {e}") + return None def main(): """Driver function""" data = collect_dataset() - - len_data = data.shape[0] - data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float) - data_y = data[:, -1].astype(float) - - theta = run_linear_regression(data_x, data_y) - len_result = theta.shape[1] - print("Resultant Feature vector : ") - for i in range(len_result): - print(f"{theta[0, i]:.5f}") + if data is None: + print("Failed to collect or process the dataset. Exiting.") + return + + try: + len_data = data.shape[0] + data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float) + data_y = data[:, -1].astype(float) + + theta = run_linear_regression(data_x, data_y) + if theta is None: + print("Linear regression failed. Exiting.") + return + + len_result = theta.shape[1] + print("Resultant Feature vector:") + for i in range(len_result): + print(f"{theta[0, i]:.5f}") + except Exception as e: + print(f"Unexpected error in main: {e}") if __name__ == "__main__": From 1a93fd5fe7d7f907e1973d3f6900443325f44701 Mon Sep 17 00:00:00 2001 From: PurvalBhude Date: Sun, 15 Sep 2024 15:41:03 +0530 Subject: [PATCH 2/5] update linear_regression.py --- machine_learning/linear_regression.py | 74 +++++++++------------------ 1 file changed, 25 insertions(+), 49 deletions(-) diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py index e84ef505b62f..f0be2c8c6b59 100644 --- a/machine_learning/linear_regression.py +++ b/machine_learning/linear_regression.py @@ -23,21 +23,15 @@ def collect_dataset(): "master/Week1/ADRvsRating.csv", timeout=10, ) - response.raise_for_status() # Check for HTTP errors + response.raise_for_status() # Raise an error for failed HTTP requests lines = response.text.splitlines() - data = [] - for item in lines: - item = item.split(",") - data.append(item) - data.pop(0) # This is for removing the labels from the list + data = [line.split(",") for line in lines] + data.pop(0) # Remove the labels from the list dataset = np.matrix(data) return dataset except requests.exceptions.RequestException as e: - print(f"Error in fetching dataset: {e}") - return None - except Exception as e: - print(f"Unexpected error in processing dataset: {e}") - return None + print(f"Error fetching dataset: {e}") + return None # Return None if dataset fetching fails def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta): @@ -51,15 +45,13 @@ def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta): curr_features - alpha_ * gradient(w.r.t. feature) """ try: - n = len_data - prod = np.dot(theta, data_x.transpose()) - prod -= data_y.transpose() + prod = np.dot(theta, data_x.transpose()) - data_y.transpose() sum_grad = np.dot(prod, data_x) - theta = theta - (alpha / n) * sum_grad + theta = theta - (alpha / len_data) * sum_grad return theta except Exception as e: - print(f"Error during gradient descent: {e}") - return None + print(f"Error in gradient descent: {e}") + return theta # Return current theta even if an error occurs def sum_of_square_error(data_x, data_y, len_data, theta): @@ -71,14 +63,13 @@ def sum_of_square_error(data_x, data_y, len_data, theta): :return : sum of square error computed from given feature's """ try: - prod = np.dot(theta, data_x.transpose()) - prod -= data_y.transpose() + prod = np.dot(theta, data_x.transpose()) - data_y.transpose() sum_elem = np.sum(np.square(prod)) error = sum_elem / (2 * len_data) return error except Exception as e: print(f"Error in calculating sum of square error: {e}") - return None + return float('inf') # Return infinity in case of an error def run_linear_regression(data_x, data_y): @@ -87,31 +78,20 @@ def run_linear_regression(data_x, data_y): :param data_y : contains the output (result vector) :return : feature for line of best fit (Feature vector) """ - try: - iterations = 100000 - alpha = 0.0001550 - - no_features = data_x.shape[1] - len_data = data_x.shape[0] - - theta = np.zeros((1, no_features)) + iterations = 100000 + alpha = 0.0001550 + len_data = data_x.shape[0] - 1 + no_features = data_x.shape[1] + theta = np.zeros((1, no_features)) + try: for i in range(iterations): theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta) - if theta is None: # If gradient descent fails, exit - print("Gradient descent failed. Exiting.") - return None error = sum_of_square_error(data_x, data_y, len_data, theta) - if error is None: # If error calculation fails, exit - print("Error calculation failed. Exiting.") - return None - if i % 1000 == 0: # Print every 1000 iterations - print(f"At Iteration {i + 1} - Error is {error:.5f}") - - return theta + print(f"At Iteration {i + 1} - Error is {error:.5f}") except Exception as e: - print(f"Error in linear regression: {e}") - return None + print(f"Error during linear regression: {e}") + return theta def mean_absolute_error(predicted_y, original_y): @@ -125,14 +105,14 @@ def mean_absolute_error(predicted_y, original_y): return total / len(original_y) except Exception as e: print(f"Error in calculating mean absolute error: {e}") - return None + return float('inf') def main(): - """Driver function""" + """Driver function.""" data = collect_dataset() if data is None: - print("Failed to collect or process the dataset. Exiting.") + print("Failed to retrieve dataset. Exiting.") return try: @@ -141,16 +121,12 @@ def main(): data_y = data[:, -1].astype(float) theta = run_linear_regression(data_x, data_y) - if theta is None: - print("Linear regression failed. Exiting.") - return - len_result = theta.shape[1] - print("Resultant Feature vector:") + print("Resultant Feature vector : ") for i in range(len_result): print(f"{theta[0, i]:.5f}") except Exception as e: - print(f"Unexpected error in main: {e}") + print(f"Error in main execution: {e}") if __name__ == "__main__": From b8b341c3caffa2b669b7cea1e1a70b9329c9362c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:11:28 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/linear_regression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py index f0be2c8c6b59..824a26008804 100644 --- a/machine_learning/linear_regression.py +++ b/machine_learning/linear_regression.py @@ -69,7 +69,7 @@ def sum_of_square_error(data_x, data_y, len_data, theta): return error except Exception as e: print(f"Error in calculating sum of square error: {e}") - return float('inf') # Return infinity in case of an error + return float("inf") # Return infinity in case of an error def run_linear_regression(data_x, data_y): @@ -105,7 +105,7 @@ def mean_absolute_error(predicted_y, original_y): return total / len(original_y) except Exception as e: print(f"Error in calculating mean absolute error: {e}") - return float('inf') + return float("inf") def main(): From 93c1efb09e1d4c7244f2f54ddb1d5b7201ff8739 Mon Sep 17 00:00:00 2001 From: PurvalBhude Date: Sun, 15 Sep 2024 15:48:54 +0530 Subject: [PATCH 4/5] update linear_regression.py file --- machine_learning/linear_regression.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py index f0be2c8c6b59..5558b73ff2b1 100644 --- a/machine_learning/linear_regression.py +++ b/machine_learning/linear_regression.py @@ -49,9 +49,10 @@ def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta): sum_grad = np.dot(prod, data_x) theta = theta - (alpha / len_data) * sum_grad return theta - except Exception as e: + except (TypeError, ValueError) as e: print(f"Error in gradient descent: {e}") - return theta # Return current theta even if an error occurs + return theta + def sum_of_square_error(data_x, data_y, len_data, theta): @@ -67,9 +68,10 @@ def sum_of_square_error(data_x, data_y, len_data, theta): sum_elem = np.sum(np.square(prod)) error = sum_elem / (2 * len_data) return error - except Exception as e: + except (TypeError, ValueError) as e: print(f"Error in calculating sum of square error: {e}") - return float('inf') # Return infinity in case of an error + return float("inf") + def run_linear_regression(data_x, data_y): @@ -89,11 +91,12 @@ def run_linear_regression(data_x, data_y): theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta) error = sum_of_square_error(data_x, data_y, len_data, theta) print(f"At Iteration {i + 1} - Error is {error:.5f}") - except Exception as e: + except (OverflowError, ValueError) as e: print(f"Error during linear regression: {e}") return theta + def mean_absolute_error(predicted_y, original_y): """Return sum of square error for error calculation :param predicted_y : contains the output of prediction (result vector) @@ -103,9 +106,10 @@ def mean_absolute_error(predicted_y, original_y): try: total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y)) return total / len(original_y) - except Exception as e: + except (TypeError, ZeroDivisionError) as e: print(f"Error in calculating mean absolute error: {e}") - return float('inf') + return float("inf") + def main(): @@ -125,9 +129,10 @@ def main(): print("Resultant Feature vector : ") for i in range(len_result): print(f"{theta[0, i]:.5f}") - except Exception as e: + except (IndexError, TypeError) as e: print(f"Error in main execution: {e}") + if __name__ == "__main__": main() From 713d94e10cf1a9fafd586325dd1d3e77d8366928 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:21:41 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/linear_regression.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py index 5fa122bc8805..d0b47359c93f 100644 --- a/machine_learning/linear_regression.py +++ b/machine_learning/linear_regression.py @@ -54,7 +54,6 @@ def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta): return theta - def sum_of_square_error(data_x, data_y, len_data, theta): """Return sum of square error for error calculation :param data_x : contains our dataset @@ -95,7 +94,6 @@ def run_linear_regression(data_x, data_y): return theta - def mean_absolute_error(predicted_y, original_y): """Return sum of square error for error calculation :param predicted_y : contains the output of prediction (result vector) @@ -131,6 +129,5 @@ def main(): print(f"Error in main execution: {e}") - if __name__ == "__main__": main()