Avik-Jain · liyuan1062 · Aug 26, 2018 · Aug 26, 2018 · Sep 3, 2018
diff --git a/Code/1Days.py b/Code/1Days.py
@@ -0,0 +1,38 @@
+import pandas as pd
+from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder, StandardScaler
+from sklearn.cross_validation import train_test_split
+from sklearn.linear_model import LinearRegression
+import matplotlib.pyplot as plt
+
+dataset = pd.read_csv('../datasets/Data.csv')
+
+X = dataset.iloc[:, : -1].values
+Y = dataset.iloc[:, 3].values
+
+imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
+imputer = imputer.fit(X[:, 1:3])
+X[:, 1:3] = imputer.transform(X[:, 1:3])
+
+labelencoder_X = LabelEncoder()
+X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
+onehotencoder = OneHotEncoder(categorical_features=[0])
+X = onehotencoder.fit_transform(X).toarray()
+labelencoder_Y = LabelEncoder()
+Y = labelencoder_Y.fit_transform(Y)
+
+X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
+
+sc_X = StandardScaler()
+X_train = sc_X.fit_transform(X_train)
+X_test = sc_X.fit_transform(X_test)
+
+regressor = LinearRegression()
+regressor = regressor.fit(X_train, Y_train)
+
+Y_pred = regressor.predict(X_test)
+
+plt.scatter(X_train, Y_train, color='red')
+plt.plot(X_train, regressor.predict(X_train), color='blue')
+
+plt.scatter(X_test, Y_test, color='red')
+plt.plot(X_test, regressor.predict(X_test), color='blue')
diff --git a/Code/2Days.py b/Code/2Days.py
@@ -0,0 +1,28 @@
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder, StandardScaler
+from sklearn.cross_validation import train_test_split
+from sklearn.linear_model import LinearRegression
+import matplotlib.pyplot as plt
+
+dataset = pd.read_csv('../datasets/studentscores.csv')
+
+X = dataset.iloc[:, : -1].values
+Y = dataset.iloc[:, 1].values
+
+
+X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
+
+regressor = LinearRegression()
+regressor = regressor.fit(X_train, Y_train)
+
+Y_pred = regressor.predict(X_test)
+
+fig_train = plt.figure(1)
+plt.scatter(X_train, Y_train, color='red')
+plt.plot(X_train, regressor.predict(X_train), color='blue')
+
+fig_test = plt.figure(2)
+plt.scatter(X_test, Y_test, color='red')
+plt.plot(X_test, regressor.predict(X_test), color='blue')
+plt.show()
diff --git a/Code/3Days.py b/Code/3Days.py
@@ -0,0 +1,26 @@
+import numpy as np
+import pandas as pd
+
+dataset = pd.read_csv("../datasets/50_Startups.csv")
+X = dataset.iloc[:, :-1].values
+Y = dataset.iloc[:, -1]
+
+from sklearn.preprocessing import LabelEncoder, OneHotEncoder
+labelencoder = LabelEncoder()
+X[:, 3] = labelencoder.fit_transform(X[:, 3])
+onehotencoder = OneHotEncoder(categorical_features=[3])
+X = onehotencoder.fit_transform(X).toarray()
+
+
+X = X[:, 1:]
+
+from sklearn.cross_validation import train_test_split
+
+X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
+
+from sklearn.linear_model import LinearRegression
+regressor = LinearRegression()
+regressor = regressor.fit(X_train, Y_train)
+
+y_pred = regressor.predict(X_test)
+print(y_pred)
diff --git a/Code/Day 6 Logistic Regression.md b/Code/Day 6 Logistic Regression.md
@@ -81,6 +81,42 @@ cm = confusion_matrix(y_test, y_pred)
 
 ## Visualization
 
+```
+import matplotlib.pyplot as plt
+
+theta = classifier.coef_
+b = classifier.intercept_
+# line equation: age * theta_1 + salary * theta2 + b = 0
+age_plot = [i/10 for i in range(-20, 20)]
+salary_plot = -1 * (theta[0, 0] * np.array(age_plot) + b)/theta[0, 1]
+
+
+def plot_result(x, y, type='train'):
+    xlim = [-3, 3]
+    ylim = [-2.5, 3.5]
+    x_positive = x[np.where(y == 1)]
+    x_negative = x[np.where(y == 0)]
+    fig_train = plt.figure()
+    ax = fig_train.add_subplot(111)
+    plt.xlabel('Age')
+    plt.ylabel('Salary')
+    plt.title('Logistic Regresstion (%s set)' % type)
+
+    ax.plot(age_plot, salary_plot, c='r')
+    plt.fill_between(age_plot, salary_plot, ylim[-1], color='lawngreen')
+    plt.fill_between(age_plot, ylim[0], salary_plot, color='hotpink')
+
+    ax.scatter(x_negative[:, 0], x_negative[:, 1], c='r', label='0')
+    ax.scatter(x_positive[:, 0], x_positive[:, 1], c='g', label='1')
+    ax.set_xlim(xlim)
+    ax.set_ylim(ylim)
+    plt.legend()
+    plt.show()
+
+
+plot_result(x_train, y_train, type="Train")
+plot_result(x_test, y_test, type="Test")
+```
 <p align="center">
   <img src="https://github.com/Avik-Jain/100-Days-Of-ML-Code/blob/master/Other%20Docs/training.png">
 </p> 

diff --git a/Code/Day6.py b/Code/Day6.py
@@ -0,0 +1,79 @@
+import numpy as np
+import pandas as pd
+
+dataset = pd.read_csv('..\datasets\Social_Network_Ads.csv')
+
+x = dataset.iloc[:, [2, 3]].values
+y = dataset.iloc[:, 4].values
+
+from sklearn.cross_validation import train_test_split
+x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)
+
+from sklearn.preprocessing import StandardScaler
+
+sc = StandardScaler()
+x_train = sc.fit_transform(x_train)
+x_test = sc.fit_transform(x_test)
+
+def sigmod(theta):
+    import math
+    y = 1/(1 + math.e**(-theta))
+    return y
+
+
+from sklearn.linear_model import LogisticRegression
+classifier = LogisticRegression()
+classifier.fit(x_train, y_train)
+
+y_pred = classifier.predict(x_test)
+
+
+# out = sigmod(yy_train)
+from sklearn.metrics import confusion_matrix
+cm = confusion_matrix(y_test, y_pred)
+
+# plot data set and boundary
+import matplotlib.pyplot as plt
+from matplotlib.colors import ListedColormap
+
+theta = classifier.coef_
+b = classifier.intercept_
+# line equation: age * theta_0 + salary * theta1 + b = 0
+age_plot = [i/10 for i in range(-40, 40)]
+salary_plot = -1 * (theta[0, 0] * np.array(age_plot) + b)/theta[0, 1]
+
+
+def plot_result(x, y, type='train'):
+    xlim = [-3, 3]
+    ylim = [-2.5, 3.5]
+    x_positive = x[np.where(y == 1)]
+    x_negative = x[np.where(y == 0)]
+    fig_train = plt.figure()
+    ax = fig_train.add_subplot(111)
+    plt.xlabel('Age')
+    plt.ylabel('Salary')
+    plt.title('Logistic Regresstion (%s set)' % type)
+
+    X_set, y_set = x_test, y_test
+    X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01),
+                         np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01))
+    plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
+                 alpha=0.55, cmap=ListedColormap(('red', 'green')))
+
+    # ax.plot(age_plot, salary_plot, c='r')
+    # plt.fill_between(age_plot, salary_plot, ylim[-1], color='lawngreen')
+    # plt.fill_between(age_plot, ylim[0], salary_plot, color='hotpink')
+    #
+    ax.scatter(x_negative[:, 0], x_negative[:, 1], c='r', label='0')
+    ax.scatter(x_positive[:, 0], x_positive[:, 1], c='g', label='1')
+    ax.set_xlim((X1.min(), X1.max()))
+    ax.set_ylim((X2.min(), X2.max()))
+    plt.legend()
+    plt.show()
+
+
+plot_result(x_train, y_train, type="Train")
+plot_result(x_test, y_test, type="Test")
+
+
+