🚨 Major Refactoring to Examples and Names

TheBotiverse · Jul 24, 2023 · 67cc7ab · 67cc7ab
1 parent 0842e3f
commit 67cc7ab
Show file tree

Hide file tree

Showing 44 changed files with 1,066 additions and 4,318 deletions.
diff --git a/botiverse/__init__.py b/botiverse/__init__.py
@@ -1,4 +1,2 @@
 """Entry point for the botiverse package."""
 from botiverse.gui.gui import chat_gui
-#from botiverse.TODS.DNN_DST.DNN_DST import DNNDST
-#from botiverse.TODS.DNN_TODS import DNNTODS
diff --git a/...verse/bots/basic_chatbot/basic_chatbot.py → botiverse/bots/BasicBot/BasicBot.py b/...verse/bots/basic_chatbot/basic_chatbot.py → botiverse/bots/BasicBot/BasicBot.py
@@ -8,7 +8,7 @@
 stemmer = PorterStemmer()
 
 
-class basic_chatbot:
+class BasicBot:
     '''
     An interface for a basic chatbot model suitable for small datasets such as FAQs. Note that the
     underlying model is not sequential (either an NN or an SVM).
@@ -42,7 +42,6 @@ def __init__(self, machine='nn', repr='tf-idf'):
             self.transformer = repr
         else:
             raise Exception('Representation must either be one of those the basic chatbot support or a custom one that implement the transform API. Found was ' + repr)
-
 
         self.tf = None
         self.idf = None
@@ -84,13 +83,22 @@ def setup_data(self):
             y[i] = classes.index(tag)
         y = np.array(y)
         return X, y
+
+    def read_data(self, path):
+        """
+        Read the data from a JSON file for the chatbot to train on later.
+        :param data: A stringfied JSON object containing the training data 
+        :type number: string
+        """
+        with open(path, 'r') as f:
+            self.raw_data = json.load(f) 
 
-    def train(self, path, max_epochs=None, early_stop=False, **kwargs):
+        self.X, self.y = self.setup_data()
+
+    def train(self, max_epochs=None, early_stop=False, **kwargs):
         """
         Train the chatbot model with the given JSON data.
         
-        :param data: A stringfied JSON object containing the training data 
-        :type number: string
         :param early_stop: Whether to use early stopping or not
         :type early_stop: bool
         :param provided_model: A model to use instead of the default one
@@ -101,11 +109,7 @@ def train(self, path, max_epochs=None, early_stop=False, **kwargs):
         :return: None
         :rtype: NoneType
         """
-        with open(path, 'r') as f:
-            self.raw_data = json.load(f) 
-
-        X, y = self.setup_data()
-
+        X, y = self.X, self.y
         if self.machine == 'nn':
             self.model = NeuralNet(structure=[X.shape[1], 12, len(self.classes)], activation='sigmoid')
             max_epochs = max_epochs if max_epochs is not None else 30 * len(self.classes)
@@ -119,7 +123,7 @@ def train(self, path, max_epochs=None, early_stop=False, **kwargs):
             self.model = SVM(kernel='linear', C=700)
             self.model.fit(X, y, eval_train=True)
         elif type(self.machine) != str:
-                self.model.fit(X, y, **kwargs)
+                self.machine.fit(X, y, **kwargs)
         else:
             raise Exception('Machine must either be one of those the basic chatbot support or a custom one that implement the fit API. Found was ' + self.machine)
 

diff --git a/botiverse/bots/Vocalizer/__init__.py → botiverse/bots/BasicBot/__init__.py b/botiverse/bots/Vocalizer/__init__.py → botiverse/bots/BasicBot/__init__.py
diff --git a/botiverse/bots/Vocalizer/Vocalizer.py → botiverse/bots/VoiceBot/SpeechClassifier.py b/botiverse/bots/Vocalizer/Vocalizer.py → botiverse/bots/VoiceBot/SpeechClassifier.py
@@ -1,82 +1,3 @@
-import numpy as np
-import json
-from gtts import gTTS
-import tempfile
-import os
-from botiverse.models import TTS
-from playsound import playsound
-
-from botiverse.models import LSTMClassifier
-from botiverse.preprocessors import Vocalize, Wav2Vec, Wav2Text, BertEmbedder, Frequency, BertSentenceEmbedder
-from botiverse.bots.Vocalizer.utils import voice_input
-
-
-
-class Vocalizer():
-    '''An interface for the vocalizer chatbot which simulates a call with a customer service bot.'''
-    def __init__(self,  call_json_path, repr='BERT-Sentence'):
-        ''' 
-        Load the call data from a json file.
-        :param call_json_path: The path to the json file containing the call state machine.
-        '''
-        with open(call_json_path, 'r') as file:
-            call_json = file.read()
-        self.call_data = json.loads(call_json)
-        self.current_node = 'A'
-        self.wav2text = Wav2Text()
-        if repr == 'BERT':
-            self.bert_embeddings = BertEmbedder()
-        elif repr == 'BERT-Sentence':
-            self.bert_sentence_embeddings = BertSentenceEmbedder()
-        else:
-            raise Exception(f"Invalid representation {repr}. Expected BERT or BERT-Sentence.")        
-
-    def generate_speech(self, text, offline=False):
-        '''Use Google's TTS or offline FastSpeech 1.0 to play speech from the given text.
-        :param text: The text to be converted into speech.
-        :param offline: Whether to use offline FastSpeech 1.0 to generate speech.
-        '''
-        if offline:
-            tts = TTS()
-            tts.speak(text)
-        else:
-            tts = gTTS(text=text, lang='en', tld="us", slow=False)
-            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
-                temp_filename = temp_audio.name
-                tts.save(temp_filename)
-                playsound(temp_filename)
-
-    def simulate_call(self):
-        '''
-        Simulate a call with a customer service bot as driven by the call state machine.
-        '''
-        while True:
-            if self.current_node == 'Z':
-                # the final state has a different structure, bot only speaks and then the call ends
-                bot_message = self.call_data[self.current_node]['Bot']
-                self.generate_speech(bot_message)
-                break
-
-            # 1 - get the current node's data and from that get the message the bot should speak
-            node_data = self.call_data[self.current_node]
-            bot_message = node_data['Bot']
-            self.generate_speech(bot_message)
-
-            # 2 - get the intent options that the bot expects from the user and classify the user's response
-            options = node_data['Options']
-            intents = [option['Intent'] for option in options]
-            max_dur = node_data['max_duration']
-            human_resp = voice_input(record_time=int(max_dur))
-            human_resp = self.wav2text.transcribe(human_resp)
-            selected_ind, score = self.bert_sentence_embeddings.closest_sentence(human_resp, intents, retun_ind=True)
-            print(f"you said: {human_resp} and the bot decided that you meant {intents[selected_ind]} with a score of {score}")
-
-            # 3 - speak according to the chosen option
-            speak_message = options[selected_ind]['Speak']
-            self.generate_speech(speak_message)
-
-            # 4 - go to the next state
-            self.current_node = options[selected_ind]['Next']
 
 
 class SpeechClassifier():

diff --git a/botiverse/bots/VoiceBot/VoiceBot.py b/botiverse/bots/VoiceBot/VoiceBot.py
@@ -0,0 +1,79 @@
+import numpy as np
+import json
+from gtts import gTTS
+import tempfile
+import os
+from botiverse.models import TTS
+from playsound import playsound
+
+from botiverse.models import LSTMClassifier
+from botiverse.preprocessors import Vocalize, Wav2Vec, Wav2Text, BertEmbedder, Frequency, BertSentenceEmbedder
+from botiverse.bots.VoiceBot.utils import voice_input
+
+
+class VoiceBot():
+    '''An interface for the vocalizer chatbot which simulates a call with a customer service bot.'''
+    def __init__(self,  call_json_path, repr='BERT-Sentence'):
+        ''' 
+        Load the call data from a json file.
+        :param call_json_path: The path to the json file containing the call state machine.
+        '''
+        with open(call_json_path, 'r') as file:
+            call_json = file.read()
+        self.call_data = json.loads(call_json)
+        self.current_node = 'A'
+        self.wav2text = Wav2Text()
+        if repr == 'BERT':
+            self.bert_embeddings = BertEmbedder()
+        elif repr == 'BERT-Sentence':
+            self.bert_sentence_embeddings = BertSentenceEmbedder()
+        else:
+            raise Exception(f"Invalid representation {repr}. Expected BERT or BERT-Sentence.")        
+
+    def generate_speech(self, text, offline=False):
+        '''Use Google's TTS or offline FastSpeech 1.0 to play speech from the given text.
+        :param text: The text to be converted into speech.
+        :param offline: Whether to use offline FastSpeech 1.0 to generate speech.
+        '''
+        if offline:
+            tts = TTS()
+            tts.speak(text)
+        else:
+            tts = gTTS(text=text, lang='en', tld="us", slow=False)
+            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
+                temp_filename = temp_audio.name
+                tts.save(temp_filename)
+                playsound(temp_filename)
+
+    def simulate_call(self):
+        '''
+        Simulate a call with a customer service bot as driven by the call state machine.
+        '''
+        while True:
+            if self.current_node == 'Z':
+                # the final state has a different structure, bot only speaks and then the call ends
+                bot_message = self.call_data[self.current_node]['Bot']
+                self.generate_speech(bot_message)
+                break
+
+            # 1 - get the current node's data and from that get the message the bot should speak
+            node_data = self.call_data[self.current_node]
+            bot_message = node_data['Bot']
+            self.generate_speech(bot_message)
+
+            # 2 - get the intent options that the bot expects from the user and classify the user's response
+            options = node_data['Options']
+            intents = [option['Intent'] for option in options]
+            max_dur = node_data['max_duration']
+            human_resp = voice_input(record_time=int(max_dur))
+            human_resp = self.wav2text.transcribe(human_resp)
+            selected_ind, score = self.bert_sentence_embeddings.closest_sentence(human_resp, intents, retun_ind=True)
+            print(f"you said: {human_resp} and the bot decided that you meant {intents[selected_ind]} with a score of {score}")
+
+            # 3 - speak according to the chosen option
+            speak_message = options[selected_ind]['Speak']
+            self.generate_speech(speak_message)
+
+            # 4 - go to the next state
+            self.current_node = options[selected_ind]['Next']
+
diff --git a/botiverse/bots/WhizBot_BERT/__init__.py → botiverse/bots/VoiceBot/__init__.py b/botiverse/bots/WhizBot_BERT/__init__.py → botiverse/bots/VoiceBot/__init__.py
diff --git a/botiverse/bots/Vocalizer/utils.py → botiverse/bots/VoiceBot/utils.py b/botiverse/bots/Vocalizer/utils.py → botiverse/bots/VoiceBot/utils.py
@@ -1,4 +1,4 @@
-# import pyaudio
+import pyaudio
 import wave
 from array import array
 from tqdm import tqdm
@@ -11,8 +11,7 @@ def voice_input(record_time=3, voice_threshold=900, save_path='sample.wav'):
     :param save_path: The path to save the audio file to.
     :return: The path to the audio file.
     '''
-
-    """
+    # """"
     #instantiate the pyaudio
     audio = pyaudio.PyAudio()
 
@@ -46,5 +45,5 @@ def voice_input(record_time=3, voice_threshold=900, save_path='sample.wav'):
     wavfile.close()
 
     return save_path
-    """
-    pass
+    # """"
+    #pass
diff --git a/botiverse/bots/WhizBot/WhizBot.py b/botiverse/bots/WhizBot/WhizBot.py
@@ -0,0 +1,90 @@
+from botiverse.bots.WhizBot.WhizBot_GRU import WhizBot_GRU
+from botiverse.bots.WhizBot.WhizBot_BERT import WhizBot_BERT
+
+class WhizBot:
+    '''
+    A class that provides an interface for the WhizBot-BERT and WhizBot-GRU models.
+    '''
+    def __init__(self, repr='BERT'):
+        """
+        Initializes WhizBot and sets its representation type.
+        :param repr: The representation type of the WhizBot model. Either "BERT" or "GRU".
+        :type repr: str
+        """
+        if repr == 'BERT':
+            self.bot = WhizBot_BERT()
+        elif repr == 'GRU':
+            self.bot = WhizBot_GRU()
+        else:
+            raise ValueError('Invalid representation type for WhizBot. Please choose either "BERT" or "GRU".')
+
+
+
+    def read_data(self, file_path):
+        """
+        Reads and pre-processes the data, sets up the model based on the data and prepares the train-validation split.
+
+        :param file_path: The path to the file that contains the dataset.
+        :type file_path: str
+
+        :returns: None
+        """
+        self.bot.read_data(file_path)
+
+    def train(self, epochs=10, batch_size=32):
+        """
+        Trains the model using the training dataset.
+
+        :param epochs: The number of training epochs.
+        :type epochs: int
+
+        :param batch_size: The number of training examples utilized used to make one paramenters updat.
+        :type batch_size: int
+
+        :returns: None
+        """
+        self.bot.train(epochs, batch_size)
+
+    def validation(self, batch_size=32):
+        """
+        Tests the model performance using the validation dataset and calculates the accuracy.
+        
+        :param batch_size: The number of training examples utilized used to make one paramenters updat.
+        :type batch_size: int
+
+        :returns: None
+        """
+        self.bot.validation(batch_size)
+
+    def infer(self, string):
+        """
+        Performs inference using the model.
+
+        :param string: The input string to perform inference on.
+        :type string: str
+
+        :returns: A random response from the response list of the predicted label.
+        """
+        return self.bot.infer(string)
+
+    def save(self, path):
+        """
+        Saves the model parameters to the given path.
+
+        :param path: The path where the model parameters will be saved.
+        :type path: str
+
+        :returns: None
+        """
+        self.bot.save(path)
+
+    def load(self, path):
+        """
+        Loads the model parameters from the given path.
+
+        :param path: The path from where the model parameters will be loaded.
+        :type path: str
+
+        :returns: None
+        """
+        self.bot.load(path)
diff --git a/botiverse/bots/WhizBot_BERT/WhizBot_BERT.py → botiverse/bots/WhizBot/WhizBot_BERT.py b/botiverse/bots/WhizBot_BERT/WhizBot_BERT.py → botiverse/bots/WhizBot/WhizBot_BERT.py
@@ -51,16 +51,18 @@ def train(self, epochs=10, batch_size=32):
         :returns: None
         """
         self.model.train()
-        for epoch in range(epochs):
-            for i in tqdm(range(0, len(self.train_data), batch_size)):
+        pbar = tqdm(range(epochs), leave=True)
+        for epoch in pbar:
+            for i in range(0, len(self.train_data), batch_size):
                 self.model.zero_grad()
                 batch_texts = torch.cat(self.train_data['text'][i:i+batch_size].tolist()).to(self.device)
                 batch_labels = torch.cat(self.train_data['label'][i:i+batch_size].tolist()).to(self.device)
                 output = self.model(batch_texts)
                 loss = self.criterion(output, batch_labels)
                 loss.backward()
                 self.optimizer.step()
-            print("Epoch: " + str(epoch) + " Loss: " + str(loss.item()))
+            pbar.set_description("Epoch: " + str(epoch) + " Loss: " + str(loss.item()))
+
 
     def validation(self, batch_size=32):
         """
@@ -75,7 +77,7 @@ def validation(self, batch_size=32):
         total = 0
         self.model.eval()
         with torch.no_grad():
-            for i in tqdm(range(0, len(self.validation_data), batch_size)):
+            for i in tqdm(range(0, len(self.validation_data), batch_size), leave=True):
                 batch_texts = torch.cat(self.validation_data['text'][i:i+batch_size].tolist()).to(self.device)
                 batch_labels = torch.cat(self.validation_data['label'][i:i+batch_size].tolist()).to(self.device)
                 outputs = self.model(batch_texts)

diff --git a/botiverse/bots/WhizBot_GRU/WhizBot_GRU.py → botiverse/bots/WhizBot/WhizBot_GRU.py b/botiverse/bots/WhizBot_GRU/WhizBot_GRU.py → botiverse/bots/WhizBot/WhizBot_GRU.py
diff --git a/botiverse/bots/WhizBot_GRU/__init__.py → botiverse/bots/WhizBot/__init__.py b/botiverse/bots/WhizBot_GRU/__init__.py → botiverse/bots/WhizBot/__init__.py
diff --git a/botiverse/bots/__init__.py b/botiverse/bots/__init__.py
@@ -1,4 +1,6 @@
-from botiverse.bots.basic_chatbot.basic_chatbot import basic_chatbot
+from botiverse.bots.BasicBot.BasicBot import BasicBot
+from botiverse.bots.WhizBot.WhizBot import WhizBot
 from botiverse.bots.basic_TODS.basic_TODS import BasicTODS
 from botiverse.bots.deep_TODS.deep_TODS import DeepTODS
-from botiverse.bots.Vocalizer.Vocalizer import SpeechClassifier, Vocalizer
+from botiverse.bots.VoiceBot.SpeechClassifier import SpeechClassifier
+from botiverse.bots.VoiceBot.VoiceBot import VoiceBot
diff --git a/botiverse/bots/basic_chatbot/__init__.py b/botiverse/bots/basic_chatbot/__init__.py