Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update linear_regression.py #11567

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 62 additions & 46 deletions machine_learning/linear_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,21 @@ def collect_dataset():
The dataset contains ADR vs Rating of a Player
:return : dataset obtained from the link, as matrix
"""
response = requests.get(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
"master/Week1/ADRvsRating.csv",
timeout=10,
)
lines = response.text.splitlines()
data = []
for item in lines:
item = item.split(",")
data.append(item)
data.pop(0) # This is for removing the labels from the list
dataset = np.matrix(data)
return dataset
try:
response = requests.get(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
"master/Week1/ADRvsRating.csv",
timeout=10,
)
response.raise_for_status() # Raise an error for failed HTTP requests
lines = response.text.splitlines()
data = [line.split(",") for line in lines]
data.pop(0) # Remove the labels from the list
dataset = np.matrix(data)
return dataset
except requests.exceptions.RequestException as e:
print(f"Error fetching dataset: {e}")
return None # Return None if dataset fetching fails


def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
Expand All @@ -42,13 +44,14 @@ def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
;param return : Updated Feature's, using
curr_features - alpha_ * gradient(w.r.t. feature)
"""
n = len_data

prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_grad = np.dot(prod, data_x)
theta = theta - (alpha / n) * sum_grad
return theta
try:
prod = np.dot(theta, data_x.transpose()) - data_y.transpose()
sum_grad = np.dot(prod, data_x)
theta = theta - (alpha / len_data) * sum_grad
return theta
except (TypeError, ValueError) as e:
print(f"Error in gradient descent: {e}")
return theta


def sum_of_square_error(data_x, data_y, len_data, theta):
Expand All @@ -59,11 +62,14 @@ def sum_of_square_error(data_x, data_y, len_data, theta):
:param theta : contains the feature vector
:return : sum of square error computed from given feature's
"""
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_elem = np.sum(np.square(prod))
error = sum_elem / (2 * len_data)
return error
try:
prod = np.dot(theta, data_x.transpose()) - data_y.transpose()
sum_elem = np.sum(np.square(prod))
error = sum_elem / (2 * len_data)
return error
except (TypeError, ValueError) as e:
print(f"Error in calculating sum of square error: {e}")
return float("inf")


def run_linear_regression(data_x, data_y):
Expand All @@ -74,17 +80,17 @@ def run_linear_regression(data_x, data_y):
"""
iterations = 100000
alpha = 0.0001550

no_features = data_x.shape[1]
len_data = data_x.shape[0] - 1

no_features = data_x.shape[1]
theta = np.zeros((1, no_features))

for i in range(iterations):
theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
error = sum_of_square_error(data_x, data_y, len_data, theta)
print(f"At Iteration {i + 1} - Error is {error:.5f}")

try:
for i in range(iterations):
theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
error = sum_of_square_error(data_x, data_y, len_data, theta)
print(f"At Iteration {i + 1} - Error is {error:.5f}")
except (OverflowError, ValueError) as e:
print(f"Error during linear regression: {e}")
return theta


Expand All @@ -94,23 +100,33 @@ def mean_absolute_error(predicted_y, original_y):
:param original_y : contains values of expected outcome
:return : mean absolute error computed from given feature's
"""
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
return total / len(original_y)
try:
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
return total / len(original_y)
except (TypeError, ZeroDivisionError) as e:
print(f"Error in calculating mean absolute error: {e}")
return float("inf")


def main():
"""Driver function"""
"""Driver function."""
data = collect_dataset()

len_data = data.shape[0]
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
data_y = data[:, -1].astype(float)

theta = run_linear_regression(data_x, data_y)
len_result = theta.shape[1]
print("Resultant Feature vector : ")
for i in range(len_result):
print(f"{theta[0, i]:.5f}")
if data is None:
print("Failed to retrieve dataset. Exiting.")
return

try:
len_data = data.shape[0]
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
data_y = data[:, -1].astype(float)

theta = run_linear_regression(data_x, data_y)
len_result = theta.shape[1]
print("Resultant Feature vector : ")
for i in range(len_result):
print(f"{theta[0, i]:.5f}")
except (IndexError, TypeError) as e:
print(f"Error in main execution: {e}")


if __name__ == "__main__":
Expand Down