-
Notifications
You must be signed in to change notification settings - Fork 0
/
splicer.py
101 lines (91 loc) · 4.21 KB
/
splicer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import sys
import cPickle
import re
from random import randint
import glob
from pybloomfilter import BloomFilter
book_dir = sys.argv[1]
bloom_dir = sys.argv[1]
#Grab all book filepaths
candidatebook_paths = glob.glob(book_dir+'/*.txt')
bloom_paths = glob.glob(book_dir+'/*.bf')
print candidatebook_paths
print bloom_paths
def mutate(first_sentence,bloom_paths,bias=3):
#Mutates first_sentence at a random word by splicing with another sentence container that same randomly selected word-- the word will not be selected from the first (1/bias) of the sentence.
first_sentence_words = first_sentence.split()
#print first_sentence_words
#Pick a random word in the target sentence as the splice point
word_index = randint(randint(0,(len(first_sentence_words)-1)),(len(first_sentence_words)-1))
target_word = first_sentence_words[word_index].strip('"')
return mutateAtWord(first_sentence,target_word,bloom_paths)
def makeQuote(splices = 1):
#Pick a random book to grab the first sentence piece from
firstbook_index = randint(0,len(candidatebook_paths)-1)
firstbook = open(candidatebook_paths[firstbook_index]).read()
#Split the book into sentences
sentences = re.split('\.+|\?+|\!+',firstbook)
#Pick a random sentence in the book to start with
sentence_index = randint(0,(len(sentences)-1))
sentence = sentences[sentence_index]
lastsentence = sentence
for i in range(splices):
mutate_count = 0
while sentence == lastsentence and mutate_count < 10:
sentence = mutate(sentence,bloom_paths)
mutate_count+=1
lastsentence = sentence
print str(i)+':'+sentence
return sentence
def mutateAtWord(first_sentence,word,bloom_paths):
#Mutate first_sentence at point word by splicing with another sentence containing word pulled from a file in bloom_paths.
#print first_sentence
#print word
tried_indices = []
second_sentence = ''
while second_sentence == '' and len(tried_indices) < len(bloom_paths):
#Pick a random bloom filter to check for our word
bloom_index = randint(0,(len(bloom_paths)-1))
if not bloom_index in tried_indices:
#print bloom_paths[bloom_index]
bf = BloomFilter.open(bloom_paths[bloom_index])
tried_indices.append(bloom_index)
else:
continue
#Check if the bloom filter we picked has seen the word we're looking for
if word in bf:
#print 'got a match!'
#If it has, open the book and pick a sentence to splice with
target_book = open(bloom_paths[bloom_index][:-3]).read()
target_book_sentences = re.split('\.+|\?+|\!+',target_book)
#Pick a random index to start scanning at
target_book_sentence_index = randint(0,(len(target_book_sentences)-1))
#Scan until we find the word we wanted
while target_book_sentence_index < len(target_book_sentences):
#print target_book_sentence_index
if word in target_book_sentences[target_book_sentence_index].split() and target_book_sentences[target_book_sentence_index] !=first_sentence:
second_sentence = target_book_sentences[target_book_sentence_index]
break
target_book_sentence_index+=1
#print '______'
if second_sentence == '':
#We should probably just try a new word if we couldnt find a match
print 'couldn\'t generate'
return first_sentence
else:
new_sentence = ''
#print 'First:'+first_sentence
#print 'Second:'+second_sentence
#print 'First,cut:'+first_sentence[:first_sentence.find(' '+word+' ')+1]
#print 'Second,cut:'+second_sentence[second_sentence.find(word):]
new_sentence +=first_sentence[:first_sentence.find(' '+word+' ')+1]
new_sentence +=second_sentence[second_sentence.find(' '+word+' '):]
if '"' in new_sentence.strip() and new_sentence.strip().count('"') %2 != 0:
new_sentence+='"'
#print new_sentence
#print '______'
return new_sentence
for i in range(100):
print '{'
makeQuote(3)
print '}'