-
Notifications
You must be signed in to change notification settings - Fork 0
/
(6-7) Cluster Analysis.py
90 lines (75 loc) · 2.39 KB
/
(6-7) Cluster Analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 데이터 읽기
beer = pd.read_csv("c:/data/mva/beerbrand.csv", index_col='name')
beer.head()
# 기술통계량 구하기
beer.describe()
#표준화 패키지 불러오기
from sklearn.preprocessing import StandardScaler
#표준화 시행
zbeer = StandardScaler().fit_transform(beer)
Py 4.2
# 패키지 불러오기
import scipy.cluster.hierarchy as sch
# 계층적 군집분석 시행하기: 최단연결법
slink = sch.linkage(zbeer, 'single')
# method = 'single', 'complete', 'average', 'median', 'ward'
# 덴드로그램 그리기
plt.figure(figsize=(7,5))
sch.dendrogram(slink, leaf_rotation=80, leaf_font_size=10, labels = beer.index)
plt.title("Dendrogram of Single linkage")
plt.show()
# Py 4.3
# 계층적 군집분석 시행: 와드의 방법
wlink = sch.linkage(zbeer, 'ward')
plt.figure(figsize=(7,5))
sch.dendrogram(wlink, leaf_rotation=80, leaf_font_size=10, labels=beer.index )
plt.title("Dendrogram of Ward's method")
plt.show()
# Py 4.4# 계층적 군집분석: 중심연결법
clink = sch.linkage(zbeer, 'centroid')
# 덴드로그램 그리기
# 덴드로그램 그리기
plt.figure(figsize=(7,5))
sch.dendrogram(clink, leaf_rotation=80, leaf_font_size=10, labels = beer.index )
plt.title("Dendrogram of Centroid linkage")
plt.show()
#py 4.5
from sklearn.cluster import AgglomerativeClustering
wcluster = AgglomerativeClustering(n_clusters=4, affinity='euclidean', linkage='ward')
# 소속군집
member = wcluster.fit_predict(zbeer)
member
# 군집별 평균계산
member1 = pd.DataFrame(member, columns=['cluster'], index=beer.index)
data_combined = beer.join(member1)
data_combined.groupby('cluster').mean()
# Py 4.6
# K-means 군집분석
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
# 표준화
zbeer = StandardScaler().fit_transform(beer)
# k-평균 군집분석: 군집수 = 2
kmc = KMeans(n_clusters=2)
kmc.fit(zbeer)
# Py 4.6
# K-means 군집분석
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
# 표준화
zbeer = StandardScaler().fit_transform(beer)
# k-평균 군집분석: 군집수 = 2
kmc = KMeans(n_clusters=2)
kmc.fit(zbeer)
# 군집 중심 알기
kmc.cluster_centers_
# 소속군집 알기
kmc.labels_
# Py 4.7
# 소속 군집 산점도
plt.figure(figsize=(5,5))
plt.scatter(x=beer['calories'], y=beer['sodium'], c=kmc.labels_)
plt.show()