diff --git a/cc b/cc
new file mode 100644
index 000000000..e69de29bb
diff --git a/llm-server/Dockerfile b/llm-server/Dockerfile
index 0e92a13e4..fe7cf7fe9 100644
--- a/llm-server/Dockerfile
+++ b/llm-server/Dockerfile
@@ -14,4 +14,4 @@ CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "--wait-for-client",
 # Production stage
 FROM common AS production
 EXPOSE 8002
-CMD ["python", "-m", "flask", "run", "--host=0.0.0.0", "--port=8002"]
+CMD ["python", "-m", "flask", "run", "--host=0.0.0.0", "--port=8002", "--reload"]
diff --git a/llm-server/custom_types/bot_message.py b/llm-server/custom_types/bot_message.py
index e07a83069..b77b6c0e2 100644
--- a/llm-server/custom_types/bot_message.py
+++ b/llm-server/custom_types/bot_message.py
@@ -1,10 +1,11 @@
-from typing import List
+from typing import List, Optional
 from langchain.pydantic_v1 import BaseModel, Field
 from langchain.output_parsers import PydanticOutputParser
 
 class BotMessage(BaseModel):
     bot_message: str = Field(description="Message from the bot")
     ids: List[str] = Field(description="List of IDs")
+    missing_information: Optional[str] = Field(description="Incase of ambiguity ask user follow up question")
     
     
 # Set up a parser + inject instructions into the prompt template.
diff --git a/llm-server/requirements.txt b/llm-server/requirements.txt
index ab5e3c691..6b4d8aeca 100644
--- a/llm-server/requirements.txt
+++ b/llm-server/requirements.txt
@@ -114,6 +114,7 @@ pydantic==2.4.2
 pydantic_core==2.10.1
 pymongo==4.5.0
 PyMySQL==1.1.0
+pypdfium2==4.24.0
 PyPika==0.48.9
 PySocks==1.7.1
 python-dateutil==2.8.2
diff --git a/llm-server/routes/chat/chat_controller.py b/llm-server/routes/chat/chat_controller.py
index 4e5fdacff..2cdfaee18 100644
--- a/llm-server/routes/chat/chat_controller.py
+++ b/llm-server/routes/chat/chat_controller.py
@@ -159,13 +159,14 @@ async def send_chat():
             app=app_name,
         )
 
-        create_chat_history(str(bot.id), session_id, True, message)
-        create_chat_history(
-            str(bot.id),
-            session_id,
-            False,
-            response_data["response"] or response_data["error"],
-        )
+        if response_data["response"]:
+            create_chat_history(str(bot.id), session_id, True, message)
+            create_chat_history(
+                str(bot.id),
+                session_id,
+                False,
+                response_data["response"] or response_data["error"],
+            )
 
         return jsonify(
             {"type": "text", "response": {"text": response_data["response"]}}
diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py
index fc8985410..eae88e92d 100644
--- a/llm-server/routes/root_service.py
+++ b/llm-server/routes/root_service.py
@@ -47,6 +47,34 @@
 
 chat = get_chat_model(CHAT_MODELS.gpt_3_5_turbo_16k)
 
+def validate_steps(steps: List[str], swagger_doc: ResolvingParser):
+    try:
+        paths = swagger_doc.specification.get("paths", {})
+        operationIds: List[str] = []
+
+        for path in paths:
+            operations = paths[path]
+            for method in operations:
+                operation = operations[method]
+                operationId = operation.get("operationId")
+                if operationId:
+                    operationIds.append(operationId)
+
+        if not operationIds:
+            logger.warn("No operationIds found in the Swagger document.")
+            return False
+
+        if all(x in operationIds for x in steps):
+            return True
+        else:
+            logger.warn("Model has hallucinated, made up operation id", steps=steps, operationIds=operationIds)
+            return False
+
+    except Exception as e:
+        logger.error(f"An error occurred: {str(e)}")
+        return False
+
+
 
 async def handle_request(
     text: str,
@@ -84,12 +112,25 @@ async def handle_request(
             prev_conversations=prev_conversations,
             flows=flows,
             bot_id=bot_id,
+            base_prompt=base_prompt
         )
+        
+        if step.missing_information is not None and len(step.missing_information) >= 10:
+            return {
+                "error": None,
+                "response": step.missing_information
+            }
 
         if len(step.ids) > 0:
+            swagger_doc = get_swagger_doc(swagger_url)
+            fl = validate_steps(step.ids, swagger_doc)
+            
+            if fl is False:
+                return {"error": None, "response": step.bot_message}
+            
             response = await handle_api_calls(
                 ids=step.ids,
-                swagger_doc=get_swagger_doc(swagger_url),
+                swagger_doc=swagger_doc,
                 app=app,
                 bot_id=bot_id,
                 headers=headers,
diff --git a/llm-server/routes/workflow/extractors/convert_json_to_text.py b/llm-server/routes/workflow/extractors/convert_json_to_text.py
index 5e6928d76..36cf19439 100644
--- a/llm-server/routes/workflow/extractors/convert_json_to_text.py
+++ b/llm-server/routes/workflow/extractors/convert_json_to_text.py
@@ -14,7 +14,7 @@
 
 def convert_json_to_text(
     user_input: str,
-    api_response: str,
+    api_response: Dict[str, Any],
     api_request_data: Dict[str, Any],
     bot_id: str,
 ) -> str:
@@ -22,7 +22,7 @@ def convert_json_to_text(
 
     api_summarizer_template = None
     system_message = SystemMessage(
-        content="You are a chatbot that can understand API responses"
+        content="You are an ai assistant that can summarize api responses"
     )
     prompt_templates = load_prompts(bot_id)
     api_summarizer_template = (
@@ -35,15 +35,15 @@ def convert_json_to_text(
     messages = [
         system_message,
         HumanMessage(
-            content="You'll receive user input and server responses obtained by making calls to various APIs. You will also recieve a dictionary that specifies, the body, param and query param used to make those api calls. Your task is to transform the JSON response into a response that in an answer to the user input. You should inform the user about the filters that were used to make these api calls"
+            content="You'll receive user input and server responses obtained by making calls to various APIs. Your task is to summarize the api response that is an answer to the user input. Try to be concise and accurate, and also include references if present."
         ),
-        HumanMessage(content="Here is the user input: {}.".format(user_input)),
+        HumanMessage(content=user_input),
         HumanMessage(
             content="Here is the response from the apis: {}".format(api_response)
         ),
-        HumanMessage(
-            content="Here is the api_request_data: {}".format(api_request_data)
-        ),
+        # HumanMessage(
+        #     content="Here is the api_request_data: {}".format(api_request_data)
+        # ),
     ]
 
     result = chat(messages)
diff --git a/llm-server/routes/workflow/utils/process_conversation_step.py b/llm-server/routes/workflow/utils/process_conversation_step.py
index d66cacf5d..398da0416 100644
--- a/llm-server/routes/workflow/utils/process_conversation_step.py
+++ b/llm-server/routes/workflow/utils/process_conversation_step.py
@@ -28,25 +28,15 @@ def process_conversation_step(
     prev_conversations: List[BaseMessage],
     flows: List[WorkflowFlowType],
     bot_id: str,
+    base_prompt: str
 ):
+    logger.info("planner data", context=context, api_summaries=api_summaries, prev_conversations=prev_conversations, flows=flows)
     if not session_id:
         raise ValueError("Session id must be defined for chat conversations")
-    prompt_templates = load_prompts(bot_id)
-    system_message_classifier = SystemMessage(
-        content="You are a helpful ai assistant. User will give you two things, a list of api's and some useful information, called context."
-    )
-    if app and prompt_templates.system_message is not None:
-        system_message_classifier = SystemMessage(
-            content=prompt_templates.system_message
-        )
-    logger.debug(
-        "System message classification",
-        incident="system_message_classifier",
-        app=app,
-        context=context,
-    )
     messages: List[BaseMessage] = []
-    messages.append(system_message_classifier)
+    messages.append(SystemMessage(content=base_prompt))
+    
+    messages.append(SystemMessage(content="You will have access to a list of api's and some useful information, called context."))
 
     if len(prev_conversations) > 0:
         messages.extend(prev_conversations)
@@ -81,10 +71,11 @@ def process_conversation_step(
 
     messages.append(
         HumanMessage(
-            content="""Based on the information provided to you I want you to answer the questions that follow. Your should respond with a json that looks like the following - 
+            content="""Based on the information provided to you I want you to answer the questions that follow. Your should respond with a json that looks like the following, you must always use the operationIds provided in api summaries. Do not make up an operation id - 
     {{
         "ids": ["list", "of", "operationIds", "for apis to be called"],
-        "bot_message": "your response based on the instructions provided at the beginning"
+        "bot_message": "your response based on the instructions provided at the beginning",
+        "missing_information": "Optional Field; Incase of ambiguity where user input is not sufficient to make the api call, ask follow up questions. Followup question should only be asked once per user input"
     }}                
     """
         )
@@ -94,6 +85,9 @@ def process_conversation_step(
     )
 
     messages.append(HumanMessage(content=user_requirement))
+    
+    
+    logger.info("messages array", messages=messages)
 
     content = cast(str, chat(messages=messages).content)
 
@@ -110,7 +104,7 @@ def process_conversation_step(
     except OutputParserException as e:
         logger.error("Failed to parse json", data=content)
         logger.error("Failed to parse json", err=str(e))
-        return BotMessage(bot_message=content, ids=[])
+        return BotMessage(bot_message=content, ids=[], missing_information=None)
     except Exception as e:
         logger.error("unexpected error occured", err=str(e))
-        return BotMessage(ids=[], bot_message=str(e))
+        return BotMessage(ids=[], bot_message=str(e), missing_information=None)
diff --git a/llm-server/routes/workflow/utils/run_openapi_ops.py b/llm-server/routes/workflow/utils/run_openapi_ops.py
index c857fa47b..8e4563bbf 100644
--- a/llm-server/routes/workflow/utils/run_openapi_ops.py
+++ b/llm-server/routes/workflow/utils/run_openapi_ops.py
@@ -47,7 +47,7 @@ async def run_openapi_operations(
                 api_request_data[operation_id] = api_payload.__dict__
                 api_response = None
                 try:
-                    logger.info("Making API call", incident="make_api_call", payload=json.dumps(api_payload.body_schema))
+                    logger.info("Making API call", incident="make_api_call", body=json.dumps(api_payload.body_schema), params=api_payload.query_params)
 
                     api_response = make_api_request(
                         headers=headers, **api_payload.__dict__
@@ -60,28 +60,34 @@ async def run_openapi_operations(
 
                 except Exception as e:
                     logger.error("Error occurred while making API call", incident="make_api_call_failed", error=str(e))
-                    return {}
+                    raise e
 
+                logger.info("Got the following api response", text = api_response.text)
                 # if a custom transformer function is defined for this operationId use that, otherwise forward it to the llm
                 # so we don't necessarily have to defined mappers for all api endpoints
                 partial_json = load_json_config(app, operation_id)
-                logger.info("Loading JSON configuration", incident="load_json_config", json_config=json.dumps(partial_json))
                 if not partial_json:
-                    logger.error(
-                        "Failed to find a config map. Consider adding a config map for this operation id",
-                        incident="load_json_config",
-                        error="Failed to find a config map, consider adding a config map for this operation id",
+                    logger.warn(
+                        "Config map is not defined for this operationId",
+                        incident="config_map_undefined",
                         operation_id=operation_id,
+                        app=app
                     )
-                    record_info[operation_id] = transform_api_response_from_schema(
-                        api_payload.endpoint or "", api_response.text
-                    )
+                    record_info[operation_id] = api_response.text
+                    
+                    # Removed this because this slows down the bot response instead of speeding it
+                    # record_info[operation_id] = transform_api_response_from_schema(
+                    #     api_payload.endpoint or "", api_response.text
+                    # )
+                    
+                    pass
                 else:
                     logger.info(
                         "API Response",
-                        incident="api_response",
-                        text=api_response.text,
-                        action="Truncate unnecessary info using json_config provided",
+                        incident="log_api_response",
+                        api_response=api_response.text,
+                        json_config_used=partial_json,
+                        next_action="summarize_with_partial_json",
                     )
                     api_json = json.loads(api_response.text)
                     record_info[operation_id] = json.dumps(
@@ -91,19 +97,13 @@ async def run_openapi_operations(
                     )
 
             except Exception as e:
-                payload = json.dumps(
-                    {
-                        "text": text,
-                        "headers": headers,
-                        "server_base_url": server_base_url,
-                        "app": app,
-                    }
-                )
-
                 logger.error(
                     "Error occurred during workflow check in store",
-                    incident="check_workflow_in_store",
-                    payload=payload,
+                    incident="check_workflow_in_store",                    
+                    text= text,
+                    headers= headers,
+                    server_base_url= server_base_url,
+                    app= app,
                     error=str(e),
                 )
     return convert_json_to_text(text, record_info, api_request_data, bot_id=bot_id)
diff --git a/llm-server/shared/utils/opencopilot_utils/embedding_type.py b/llm-server/shared/utils/opencopilot_utils/embedding_type.py
index ee06ca72f..09eefbd6e 100644
--- a/llm-server/shared/utils/opencopilot_utils/embedding_type.py
+++ b/llm-server/shared/utils/opencopilot_utils/embedding_type.py
@@ -6,4 +6,5 @@ class EmbeddingProvider(Enum):
     BARD = "bard"
     azure = "azure"
     llama2 = "llama2"
+    openchat = "openchat"
 
diff --git a/llm-server/shared/utils/opencopilot_utils/get_embeddings.py b/llm-server/shared/utils/opencopilot_utils/get_embeddings.py
index 37062a267..4ba84031b 100644
--- a/llm-server/shared/utils/opencopilot_utils/get_embeddings.py
+++ b/llm-server/shared/utils/opencopilot_utils/get_embeddings.py
@@ -1,11 +1,17 @@
+from functools import lru_cache
 from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.embeddings.ollama import OllamaEmbeddings
 from .embedding_type import EmbeddingProvider
 import os
 from langchain.embeddings.base import Embeddings
-
+from utils.get_logger import CustomLogger
 import os, warnings
 
 
+logger = CustomLogger(module_name=__name__)
+
+LOCAL_IP = os.getenv("LOCAL_IP", "host.docker.internal")
+
 def get_embedding_provider():
     """Gets the chosen embedding provider from environment variables."""
     return os.environ.get("EMBEDDING_PROVIDER")
@@ -31,7 +37,7 @@ def get_openai_embedding():
     """Gets embeddings using the OpenAI embedding provider."""
     openai_api_key = os.environ.get("OPENAI_API_KEY")
 
-    return OpenAIEmbeddings(openai_api_key=openai_api_key, chunk_size=1)
+    return OpenAIEmbeddings(openai_api_key=openai_api_key)
 
 def choose_embedding_provider():
     """Chooses and returns the appropriate embedding provider instance."""
@@ -40,6 +46,10 @@ def choose_embedding_provider():
     if embedding_provider == EmbeddingProvider.azure.value:
         return get_azure_embedding()
     
+    elif embedding_provider == EmbeddingProvider.openchat.value:
+        logger.info("Got ollama embedding provider", provider=embedding_provider)
+        return OllamaEmbeddings(base_url=f"{LOCAL_IP}:11434", model="openchat")
+    
     elif embedding_provider == EmbeddingProvider.OPENAI.value or embedding_provider is None:
         if embedding_provider is None:
             warnings.warn("No embedding provider specified. Defaulting to OpenAI.")
@@ -53,6 +63,7 @@ def choose_embedding_provider():
         )
 
 # Main function to get embeddings
+@lru_cache(maxsize=1)
 def get_embeddings() -> Embeddings:
     """Gets embeddings using the chosen embedding provider."""
     return choose_embedding_provider()
\ No newline at end of file
diff --git a/llm-server/shared/utils/opencopilot_utils/get_vector_store.py b/llm-server/shared/utils/opencopilot_utils/get_vector_store.py
index 0e263dd37..264b48484 100644
--- a/llm-server/shared/utils/opencopilot_utils/get_vector_store.py
+++ b/llm-server/shared/utils/opencopilot_utils/get_vector_store.py
@@ -27,10 +27,10 @@ def get_vector_store(options: StoreOptions) -> VectorStore:
         vector_store = Qdrant(
             client, collection_name=options.namespace, embeddings=embedding
         )
-        
+
         # vector_store = Qdrant.from_documents([], embedding, url='http://localhost:6333', collection=options.namespace)
 
     else:
         raise ValueError("Invalid STORE environment variable value")
 
-    return vector_store
\ No newline at end of file
+    return vector_store
diff --git a/llm-server/utils/chat_models.py b/llm-server/utils/chat_models.py
index d1c8777ca..3b6c37351 100644
--- a/llm-server/utils/chat_models.py
+++ b/llm-server/utils/chat_models.py
@@ -9,6 +9,7 @@ class ChatModels(NamedTuple):
     nous_hermes = "nous-hermes"
     llama2: str = "llama2"
     xwinlm = "xwinlm"
+    openchat = "openchat"
 
 
 CHAT_MODELS: ChatModels = ChatModels()
diff --git a/llm-server/utils/get_chat_model.py b/llm-server/utils/get_chat_model.py
index 7d26ac47f..da07ac8c9 100644
--- a/llm-server/utils/get_chat_model.py
+++ b/llm-server/utils/get_chat_model.py
@@ -1,3 +1,4 @@
+from typing import Dict
 from langchain.chat_models import ChatOpenAI
 from langchain.chat_models.base import BaseChatModel
 from langchain.chat_models import ChatOllama, ChatAnthropic
@@ -8,45 +9,55 @@
 localip = os.getenv("LOCAL_IP", "localhost")
 
 
+# Create a dictionary to store cached instances
+model_cache: Dict[str, BaseChatModel] = {}
+
+
 def get_chat_model(prop: str) -> BaseChatModel:
+    # Check if the model is already cached
+    if prop in model_cache:
+        return model_cache[prop]
+
     if prop == CHAT_MODELS.gpt_3_5_turbo:
-        return ChatOpenAI(
+        model = ChatOpenAI(
             openai_api_key=os.getenv("OPENAI_API_KEY"),
             model=CHAT_MODELS.gpt_3_5_turbo,
             temperature=0,
         )
-
     elif prop == CHAT_MODELS.gpt_3_5_turbo_16k:
-        return ChatOpenAI(
+        model = ChatOpenAI(
             openai_api_key=os.getenv("OPENAI_API_KEY"),
             model=CHAT_MODELS.gpt_3_5_turbo_16k,
             temperature=0,
         )
     elif prop == CHAT_MODELS.claude_2_0:
-        return ChatAnthropic(
-            anthropic_api_key=os.getenv("CLAUDE_API_KEY", "CLAUDE_API_KEY")  # type: ignore
+        model = ChatAnthropic(
+            anthropic_api_key=os.getenv("CLAUDE_API_KEY", "CLAUDE_API_KEY")
         )
     elif prop == CHAT_MODELS.mistral_openorca:
-        return ChatOllama(
+        model = ChatOllama(
             base_url=f"{localip}:11434",
             model=CHAT_MODELS.mistral_openorca,
             temperature=0,
         )
     elif prop == CHAT_MODELS.nous_hermes:
-        return ChatOllama(
+        model = ChatOllama(
             base_url=f"{localip}:11434",
             model=CHAT_MODELS.nous_hermes,
             temperature=0,
         )
     elif prop == CHAT_MODELS.xwinlm:
-        return ChatOllama(
+        model = ChatOllama(
             base_url=f"{localip}:11434",
             model=CHAT_MODELS.xwinlm,
             temperature=0,
         )
     elif prop == "llama2":
-        return ChatOpenAI(model="llama2", temperature=0)
+        model = ChatOpenAI(model="llama2", temperature=0)
     else:
-        raise ValueError(
-            "Couldn't match one of the supported models, please refer to llm-server/readme.md"
-        )
+        raise ValueError("Couldn't match one of the supported models.")
+
+    # Cache the initialized model
+    model_cache[prop] = model
+
+    return model
diff --git a/llm-server/utils/vector_db/add_workflow.py b/llm-server/utils/vector_db/add_workflow.py
index bd9e4f976..937218619 100644
--- a/llm-server/utils/vector_db/add_workflow.py
+++ b/llm-server/utils/vector_db/add_workflow.py
@@ -1,12 +1,7 @@
 from routes.workflow.dto.workflow_dto import Workflow
 from typing import Any , List
 from opencopilot_types.workflow_type import WorkflowDataType
-from shared.utils.opencopilot_utils import (
-    StoreOptions,
-    get_embeddings,
-    init_vector_store,
-    get_vector_store
-)
+from shared.utils.opencopilot_utils import (StoreOptions,get_vector_store)
 from langchain.docstore.document import Document
 
 
@@ -27,3 +22,4 @@ def add_workflow_data_to_qdrant(
 
     vector_ids = vector_store.add_documents(docs)
     return vector_ids
+
diff --git a/llm-server/workers/tasks/process_pdfs.py b/llm-server/workers/tasks/process_pdfs.py
index 556a5607c..7d3898bf1 100644
--- a/llm-server/workers/tasks/process_pdfs.py
+++ b/llm-server/workers/tasks/process_pdfs.py
@@ -4,20 +4,31 @@
 from langchain.document_loaders import PyPDFium2Loader
 from shared.models.opencopilot_db.pdf_data_sources import insert_pdf_data_source, update_pdf_data_source_status
 from shared.utils.opencopilot_utils import get_embeddings, init_vector_store, StoreOptions, get_file_path
+from shared.utils.opencopilot_utils import get_vector_store
+from utils.get_logger import CustomLogger
 
+logger = CustomLogger(module_name=__name__)
+
+embeddings = get_embeddings()
+kb_vector_store = get_vector_store(StoreOptions("knowledgebase"))
 @shared_task
 def process_pdf(file_name: str, bot_id: str):
     try:
+        logger.info("Pdf task picked up", file_name=file_name, bot_id=bot_id)
         insert_pdf_data_source(chatbot_id=bot_id, file_name=file_name, status="PENDING")
         loader = PyPDFium2Loader(get_file_path(file_name))
         raw_docs = loader.load()
+        
+        # clean the data received from pdf document before passing it 
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000, chunk_overlap=200, length_function=len
         )
         docs = text_splitter.split_documents(raw_docs)
-        embeddings = get_embeddings()
-        init_vector_store(docs, embeddings, StoreOptions(namespace="knowledgebase", metadata={"bot_id": bot_id}))
-
+        
+        for doc in docs:
+            doc.metadata["bot_id"] = bot_id
+        
+        kb_vector_store.add_documents(docs)
         update_pdf_data_source_status(chatbot_id=bot_id, file_name=file_name, status="COMPLETED")
     except Exception as e:
         update_pdf_data_source_status(chatbot_id=bot_id, file_name=file_name, status="FAILED")