-
Notifications
You must be signed in to change notification settings - Fork 0
/
multivariate_gaussian.py
83 lines (68 loc) · 2.26 KB
/
multivariate_gaussian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.stats import multivariate_normal
class GaussNB:
def __init__(self):
"""
No params are needed for basic functionality.
"""
pass
def _mean(self,X): # CHECKED
"""
Returns class probability for each
"""
mu = dict()
for i in self.classes_:
idx = np.argwhere(self.y == i).flatten()
mean = []
for j in range(self.n_feats):
mean.append(np.mean( X[idx,j] ))
mu[i] = mean
return mu
def _cov(self,X):
cov = dict()
for i in self.classes_:
idx = np.argwhere(self.y==i).flatten()
cov[i] = np.cov(X[idx,:].T)
# cov = np.cov(X.T)
return cov
def _prior(self):
P = {}
for i in self.classes_:
P[i] = 0.5
return P
def P_E_H(self,x,h):
return multivariate_normal.pdf(x, mean= self.means_[h], cov=self.cov_[h])
def fit(self, X, y):
# print(pd.DataFrame(X))
# print(y)
self.n_samples, self.n_feats = X.shape
self.n_classes = np.unique(y).shape[0]
self.classes_ = np.unique(y)
self.y = y
self.means_ = self._mean(X)
self.priors_ = self._prior()
self.cov_ = self._cov(X)
def predict(self,X):
samples, feats = X.shape
if feats!=self.n_feats:
print("No dimension match with training data!")
result = []
for i in range(samples):
distinct_likelyhoods = []
for h in self.classes_:
tmp = self.P_E_H(X[i],h)
distinct_likelyhoods.append( tmp * self.priors_[h])
marginal = np.sum(distinct_likelyhoods)
tmp = 0
probas = []
for h in self.classes_:
numerator = distinct_likelyhoods[tmp]
denominator = marginal
probas.append( numerator / denominator )
tmp+=1
# predicting maximum
idx = np.argmax(probas)
result.append(self.classes_[idx])
return result