-
Notifications
You must be signed in to change notification settings - Fork 2
/
UCI_data_getter.py
60 lines (51 loc) · 1.63 KB
/
UCI_data_getter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
'''
Differentially private Bayesian learning on distributed data
Mikko Heikkilä 2016-17
UCI data
Script for reading UCI datasets. Returns a dataset with target as the last col.
'''
import numpy as np
import data_reader
data_folder = 'data/'
#abalone dataset, predict abalone age
def get_abalone():
filename = data_folder + 'abalone/abalone.data'
apu = data_reader.read_data(filename)
data = np.zeros( (len(apu),len(apu[0])))
for k_row in range(data.shape[0]):
data[k_row,1:] = apu[k_row][1:]
#code categorical sex as 0=male, 1=female
if apu[k_row][0] == 'M':
data[k_row,0] = 0
else:
data[k_row,0] = 1
return data
#predict concrete compressive strength
def get_concrete():
#8 mittausta ja target
filename = data_folder + 'concrete/Concrete_Data.txt'
apu = data_reader.read_data(filename)
data = np.zeros( (len(apu)-1,len(apu[0])))
#0s rivi=nimet
for k_row in range(1,data.shape[0]):
data[k_row,:] = apu[k_row]
return data
#predict wine quality, red & white separately
def get_red_wine():
filename = data_folder + 'wine/winequality-red.csv'
apu = data_reader.read_data(filename)
data = np.zeros((len(apu)-1,len(apu[0][0].split(";") )))
for k_row in range(1, data.shape[0]):
data[k_row-1,:] = apu[k_row][0].split(";")
#0th row=names
return data
def get_white_wine():
filename = data_folder + 'wine/winequality-white.csv'
apu = data_reader.read_data(filename)
data = np.zeros((len(apu)-1,len(apu[0][0].split(";") )))
for k_row in range(1, data.shape[0]):
data[k_row-1,:] = apu[k_row][0].split(";")
#0th row=names
return data
if __name__=='__main__':
get_white_wine()