-
Notifications
You must be signed in to change notification settings - Fork 1
/
final.py
151 lines (138 loc) · 4.18 KB
/
final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
'''
CMPT 383 (Fall 2010): Final Assignment - Travesty in Python
File: final.py
Naoya Makino
301117541
Dec 3rd, 2010
http://www.cs.sfu.ca/CC/383/ted/383-10-3/Final.html
'''
import sys
import random
class FinalProject:
def __init__(self, fname):
self.lemma, self.coll, self.collPoss, self.colPercent = 0, 1, 2, 3
i = 0
numOfCol = 8
lemPoS = 2
self.adjList, self.nounList, self.adverbList, self.verbList, self.final = [], [], [], [], {}
corpus = self.getCorpus(fname)
for line in corpus:
for word in line:
if i % numOfCol == lemPoS:
row = []
row.append(line[i-1])
row.append(line[i+1])
row.append(line[i+2])
row.append(0.00) if line[i+5] == '-' else row.append(line[i+5])
if word == 'j':
if line[i+2] == 'n':
self.adjList.append(row)
if word == 'n':
self.nounList.append(row)
if word == 'r':
self.adverbList.append(row)
if word == 'v':
if line[i+2] == 'n' or line[i+2] == 'j':
self.verbList.append(row)
i += 1
i = 0
def getCorpus(self, fname):
try:
f = open(fname)
except:
print 'error reading ' + fname
sys.exit()
context, words = [], []
for i, l in enumerate(f):
if i + 1 >= 41:
context.append(l)
pass
for word in context:
w = word.split()
words.append(w)
return words
def genSubject(self):
r = random.randrange(0, len(self.nounList))
subjectNoun = []
for noun in self.nounList:
if noun[self.lemma] == self.nounList[r][self.lemma]:
subjectNoun.append(noun)
self.final['subject'] = subjectNoun[0][self.lemma]
def genVerb(self, typeOfSentence):
verb = []
r = random.randrange(0, len(self.verbList))
for v in self.verbList:
if v[self.lemma] == self.verbList[r][self.lemma]:
verb.append(v)
for v in verb:
if typeOfSentence == 1:
if self.genObjectNoun(verb):
self.final['verb'] = v[self.lemma]
return True
else:
if self.genAdjective(verb):
self.final['verb'] = v[self.lemma]
return True
return False
def genObjectNoun(self, verb):
for v in verb:
if v[self.collPoss] == 'n':
if float(v[self.colPercent]) <= 0.5:
if self.genAdverb(v[self.coll]):
self.final['objectNoun'] = v[self.coll]
return True
return False
def genAdjective(self, verb):
for v in verb:
if v[self.collPoss] == 'j':
if float( v[self.colPercent]) <= 0.5:
if self.genObjNounType2(v[self.coll]):
self.final['adjective'] = v[self.coll]
return True
return False
def genAdverb(self, objNoun):
nList = []
for n in self.nounList:
if n[self.lemma] == objNoun:
nList.append(n)
for n in nList:
if n[self.collPoss] == 'r':
if float(n[self.colPercent]) <= 0.5:
self.final['adverb'] = n[self.coll]
return True
return False
def genObjNounType2(self, adj):
aList = []
for a in self.adjList:
if a[self.lemma] == adj:
aList.append(a)
for a in aList:
if a[self.collPoss] == 'n':
if float(a[self.colPercent]) <= 0.5:
self.final['objectNounType2'] = a[self.coll]
return True
return False
def genSentence(self, typeOfSentence):
if typeOfSentence == 1:
print self.final['subject'][0].upper() + self.final['subject'][1:] + ' ' + self.final['verb'] + ' ' + self.final['objectNoun'] + ' ' + self.final['adverb'] + '.'
else:
print self.final['subject'][0].upper() + self.final['subject'][1:] + ' ' + self.final['verb'] + ' ' + self.final['adjective'] + ' ' + self.final['objectNounType2'] + '.'
if __name__ == '__main__':
assert int(sys.argv[2]) > 0
assert int(sys.argv[3]) == 1 or int(sys.argv[3]) == 2
if len(sys.argv) != 4:
print "Your program should take three command-line arguments. \n" + \
"The first argument should be the name of the corpus file, \n" + \
"the second argument should be the number of travesty sentences to generate, \n" + \
"and the third argument indicates which type of sentence that you should generate\n" + \
"(a value of '1' specifies Type 1 sentences and '2' specifies Type 2 sentences):"
sys.exit()
else:
final = FinalProject(sys.argv[1])
for i in range(0, int(sys.argv[2])):
final.genSubject()
while True:
if final.genVerb(int(sys.argv[3])):
final.genSentence(int(sys.argv[3]))
break