Skip to content

Commit

Permalink
🚨 Major Refactoring to Examples and Names
Browse files Browse the repository at this point in the history
  • Loading branch information
EssamWisam committed Jul 24, 2023
1 parent 0842e3f commit 67cc7ab
Show file tree
Hide file tree
Showing 44 changed files with 1,066 additions and 4,318 deletions.
2 changes: 0 additions & 2 deletions botiverse/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
"""Entry point for the botiverse package."""
from botiverse.gui.gui import chat_gui
#from botiverse.TODS.DNN_DST.DNN_DST import DNNDST
#from botiverse.TODS.DNN_TODS import DNNTODS
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
stemmer = PorterStemmer()


class basic_chatbot:
class BasicBot:
'''
An interface for a basic chatbot model suitable for small datasets such as FAQs. Note that the
underlying model is not sequential (either an NN or an SVM).
Expand Down Expand Up @@ -42,7 +42,6 @@ def __init__(self, machine='nn', repr='tf-idf'):
self.transformer = repr
else:
raise Exception('Representation must either be one of those the basic chatbot support or a custom one that implement the transform API. Found was ' + repr)


self.tf = None
self.idf = None
Expand Down Expand Up @@ -84,13 +83,22 @@ def setup_data(self):
y[i] = classes.index(tag)
y = np.array(y)
return X, y

def read_data(self, path):
"""
Read the data from a JSON file for the chatbot to train on later.
:param data: A stringfied JSON object containing the training data
:type number: string
"""
with open(path, 'r') as f:
self.raw_data = json.load(f)

def train(self, path, max_epochs=None, early_stop=False, **kwargs):
self.X, self.y = self.setup_data()

def train(self, max_epochs=None, early_stop=False, **kwargs):
"""
Train the chatbot model with the given JSON data.
:param data: A stringfied JSON object containing the training data
:type number: string
:param early_stop: Whether to use early stopping or not
:type early_stop: bool
:param provided_model: A model to use instead of the default one
Expand All @@ -101,11 +109,7 @@ def train(self, path, max_epochs=None, early_stop=False, **kwargs):
:return: None
:rtype: NoneType
"""
with open(path, 'r') as f:
self.raw_data = json.load(f)

X, y = self.setup_data()

X, y = self.X, self.y
if self.machine == 'nn':
self.model = NeuralNet(structure=[X.shape[1], 12, len(self.classes)], activation='sigmoid')
max_epochs = max_epochs if max_epochs is not None else 30 * len(self.classes)
Expand All @@ -119,7 +123,7 @@ def train(self, path, max_epochs=None, early_stop=False, **kwargs):
self.model = SVM(kernel='linear', C=700)
self.model.fit(X, y, eval_train=True)
elif type(self.machine) != str:
self.model.fit(X, y, **kwargs)
self.machine.fit(X, y, **kwargs)
else:
raise Exception('Machine must either be one of those the basic chatbot support or a custom one that implement the fit API. Found was ' + self.machine)

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,82 +1,3 @@
import numpy as np
import json
from gtts import gTTS
import tempfile
import os
from botiverse.models import TTS
from playsound import playsound

from botiverse.models import LSTMClassifier
from botiverse.preprocessors import Vocalize, Wav2Vec, Wav2Text, BertEmbedder, Frequency, BertSentenceEmbedder
from botiverse.bots.Vocalizer.utils import voice_input



class Vocalizer():
'''An interface for the vocalizer chatbot which simulates a call with a customer service bot.'''
def __init__(self, call_json_path, repr='BERT-Sentence'):
'''
Load the call data from a json file.
:param call_json_path: The path to the json file containing the call state machine.
'''
with open(call_json_path, 'r') as file:
call_json = file.read()
self.call_data = json.loads(call_json)
self.current_node = 'A'
self.wav2text = Wav2Text()
if repr == 'BERT':
self.bert_embeddings = BertEmbedder()
elif repr == 'BERT-Sentence':
self.bert_sentence_embeddings = BertSentenceEmbedder()
else:
raise Exception(f"Invalid representation {repr}. Expected BERT or BERT-Sentence.")

def generate_speech(self, text, offline=False):
'''Use Google's TTS or offline FastSpeech 1.0 to play speech from the given text.
:param text: The text to be converted into speech.
:param offline: Whether to use offline FastSpeech 1.0 to generate speech.
'''
if offline:
tts = TTS()
tts.speak(text)
else:
tts = gTTS(text=text, lang='en', tld="us", slow=False)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
temp_filename = temp_audio.name
tts.save(temp_filename)
playsound(temp_filename)

def simulate_call(self):
'''
Simulate a call with a customer service bot as driven by the call state machine.
'''
while True:
if self.current_node == 'Z':
# the final state has a different structure, bot only speaks and then the call ends
bot_message = self.call_data[self.current_node]['Bot']
self.generate_speech(bot_message)
break

# 1 - get the current node's data and from that get the message the bot should speak
node_data = self.call_data[self.current_node]
bot_message = node_data['Bot']
self.generate_speech(bot_message)

# 2 - get the intent options that the bot expects from the user and classify the user's response
options = node_data['Options']
intents = [option['Intent'] for option in options]
max_dur = node_data['max_duration']
human_resp = voice_input(record_time=int(max_dur))
human_resp = self.wav2text.transcribe(human_resp)
selected_ind, score = self.bert_sentence_embeddings.closest_sentence(human_resp, intents, retun_ind=True)
print(f"you said: {human_resp} and the bot decided that you meant {intents[selected_ind]} with a score of {score}")

# 3 - speak according to the chosen option
speak_message = options[selected_ind]['Speak']
self.generate_speech(speak_message)

# 4 - go to the next state
self.current_node = options[selected_ind]['Next']


class SpeechClassifier():
Expand Down
79 changes: 79 additions & 0 deletions botiverse/bots/VoiceBot/VoiceBot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import numpy as np
import json
from gtts import gTTS
import tempfile
import os
from botiverse.models import TTS
from playsound import playsound

from botiverse.models import LSTMClassifier
from botiverse.preprocessors import Vocalize, Wav2Vec, Wav2Text, BertEmbedder, Frequency, BertSentenceEmbedder
from botiverse.bots.VoiceBot.utils import voice_input


class VoiceBot():
'''An interface for the vocalizer chatbot which simulates a call with a customer service bot.'''
def __init__(self, call_json_path, repr='BERT-Sentence'):
'''
Load the call data from a json file.
:param call_json_path: The path to the json file containing the call state machine.
'''
with open(call_json_path, 'r') as file:
call_json = file.read()
self.call_data = json.loads(call_json)
self.current_node = 'A'
self.wav2text = Wav2Text()
if repr == 'BERT':
self.bert_embeddings = BertEmbedder()
elif repr == 'BERT-Sentence':
self.bert_sentence_embeddings = BertSentenceEmbedder()
else:
raise Exception(f"Invalid representation {repr}. Expected BERT or BERT-Sentence.")

def generate_speech(self, text, offline=False):
'''Use Google's TTS or offline FastSpeech 1.0 to play speech from the given text.
:param text: The text to be converted into speech.
:param offline: Whether to use offline FastSpeech 1.0 to generate speech.
'''
if offline:
tts = TTS()
tts.speak(text)
else:
tts = gTTS(text=text, lang='en', tld="us", slow=False)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
temp_filename = temp_audio.name
tts.save(temp_filename)
playsound(temp_filename)

def simulate_call(self):
'''
Simulate a call with a customer service bot as driven by the call state machine.
'''
while True:
if self.current_node == 'Z':
# the final state has a different structure, bot only speaks and then the call ends
bot_message = self.call_data[self.current_node]['Bot']
self.generate_speech(bot_message)
break

# 1 - get the current node's data and from that get the message the bot should speak
node_data = self.call_data[self.current_node]
bot_message = node_data['Bot']
self.generate_speech(bot_message)

# 2 - get the intent options that the bot expects from the user and classify the user's response
options = node_data['Options']
intents = [option['Intent'] for option in options]
max_dur = node_data['max_duration']
human_resp = voice_input(record_time=int(max_dur))
human_resp = self.wav2text.transcribe(human_resp)
selected_ind, score = self.bert_sentence_embeddings.closest_sentence(human_resp, intents, retun_ind=True)
print(f"you said: {human_resp} and the bot decided that you meant {intents[selected_ind]} with a score of {score}")

# 3 - speak according to the chosen option
speak_message = options[selected_ind]['Speak']
self.generate_speech(speak_message)

# 4 - go to the next state
self.current_node = options[selected_ind]['Next']

File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# import pyaudio
import pyaudio
import wave
from array import array
from tqdm import tqdm
Expand All @@ -11,8 +11,7 @@ def voice_input(record_time=3, voice_threshold=900, save_path='sample.wav'):
:param save_path: The path to save the audio file to.
:return: The path to the audio file.
'''

"""
# """"
#instantiate the pyaudio
audio = pyaudio.PyAudio()

Expand Down Expand Up @@ -46,5 +45,5 @@ def voice_input(record_time=3, voice_threshold=900, save_path='sample.wav'):
wavfile.close()

return save_path
"""
pass
# """"
#pass
90 changes: 90 additions & 0 deletions botiverse/bots/WhizBot/WhizBot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from botiverse.bots.WhizBot.WhizBot_GRU import WhizBot_GRU
from botiverse.bots.WhizBot.WhizBot_BERT import WhizBot_BERT

class WhizBot:
'''
A class that provides an interface for the WhizBot-BERT and WhizBot-GRU models.
'''
def __init__(self, repr='BERT'):
"""
Initializes WhizBot and sets its representation type.
:param repr: The representation type of the WhizBot model. Either "BERT" or "GRU".
:type repr: str
"""
if repr == 'BERT':
self.bot = WhizBot_BERT()
elif repr == 'GRU':
self.bot = WhizBot_GRU()
else:
raise ValueError('Invalid representation type for WhizBot. Please choose either "BERT" or "GRU".')



def read_data(self, file_path):
"""
Reads and pre-processes the data, sets up the model based on the data and prepares the train-validation split.
:param file_path: The path to the file that contains the dataset.
:type file_path: str
:returns: None
"""
self.bot.read_data(file_path)

def train(self, epochs=10, batch_size=32):
"""
Trains the model using the training dataset.
:param epochs: The number of training epochs.
:type epochs: int
:param batch_size: The number of training examples utilized used to make one paramenters updat.
:type batch_size: int
:returns: None
"""
self.bot.train(epochs, batch_size)

def validation(self, batch_size=32):
"""
Tests the model performance using the validation dataset and calculates the accuracy.
:param batch_size: The number of training examples utilized used to make one paramenters updat.
:type batch_size: int
:returns: None
"""
self.bot.validation(batch_size)

def infer(self, string):
"""
Performs inference using the model.
:param string: The input string to perform inference on.
:type string: str
:returns: A random response from the response list of the predicted label.
"""
return self.bot.infer(string)

def save(self, path):
"""
Saves the model parameters to the given path.
:param path: The path where the model parameters will be saved.
:type path: str
:returns: None
"""
self.bot.save(path)

def load(self, path):
"""
Loads the model parameters from the given path.
:param path: The path from where the model parameters will be loaded.
:type path: str
:returns: None
"""
self.bot.load(path)
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,18 @@ def train(self, epochs=10, batch_size=32):
:returns: None
"""
self.model.train()
for epoch in range(epochs):
for i in tqdm(range(0, len(self.train_data), batch_size)):
pbar = tqdm(range(epochs), leave=True)
for epoch in pbar:
for i in range(0, len(self.train_data), batch_size):
self.model.zero_grad()
batch_texts = torch.cat(self.train_data['text'][i:i+batch_size].tolist()).to(self.device)
batch_labels = torch.cat(self.train_data['label'][i:i+batch_size].tolist()).to(self.device)
output = self.model(batch_texts)
loss = self.criterion(output, batch_labels)
loss.backward()
self.optimizer.step()
print("Epoch: " + str(epoch) + " Loss: " + str(loss.item()))
pbar.set_description("Epoch: " + str(epoch) + " Loss: " + str(loss.item()))


def validation(self, batch_size=32):
"""
Expand All @@ -75,7 +77,7 @@ def validation(self, batch_size=32):
total = 0
self.model.eval()
with torch.no_grad():
for i in tqdm(range(0, len(self.validation_data), batch_size)):
for i in tqdm(range(0, len(self.validation_data), batch_size), leave=True):
batch_texts = torch.cat(self.validation_data['text'][i:i+batch_size].tolist()).to(self.device)
batch_labels = torch.cat(self.validation_data['label'][i:i+batch_size].tolist()).to(self.device)
outputs = self.model(batch_texts)
Expand Down
File renamed without changes.
File renamed without changes.
6 changes: 4 additions & 2 deletions botiverse/bots/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from botiverse.bots.basic_chatbot.basic_chatbot import basic_chatbot
from botiverse.bots.BasicBot.BasicBot import BasicBot
from botiverse.bots.WhizBot.WhizBot import WhizBot
from botiverse.bots.basic_TODS.basic_TODS import BasicTODS
from botiverse.bots.deep_TODS.deep_TODS import DeepTODS
from botiverse.bots.Vocalizer.Vocalizer import SpeechClassifier, Vocalizer
from botiverse.bots.VoiceBot.SpeechClassifier import SpeechClassifier
from botiverse.bots.VoiceBot.VoiceBot import VoiceBot
Empty file.
Loading

0 comments on commit 67cc7ab

Please sign in to comment.