diff --git a/cc b/cc new file mode 100644 index 000000000..e69de29bb diff --git a/llm-server/Dockerfile b/llm-server/Dockerfile index 0e92a13e4..fe7cf7fe9 100644 --- a/llm-server/Dockerfile +++ b/llm-server/Dockerfile @@ -14,4 +14,4 @@ CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "--wait-for-client", # Production stage FROM common AS production EXPOSE 8002 -CMD ["python", "-m", "flask", "run", "--host=0.0.0.0", "--port=8002"] +CMD ["python", "-m", "flask", "run", "--host=0.0.0.0", "--port=8002", "--reload"] diff --git a/llm-server/custom_types/bot_message.py b/llm-server/custom_types/bot_message.py index e07a83069..b77b6c0e2 100644 --- a/llm-server/custom_types/bot_message.py +++ b/llm-server/custom_types/bot_message.py @@ -1,10 +1,11 @@ -from typing import List +from typing import List, Optional from langchain.pydantic_v1 import BaseModel, Field from langchain.output_parsers import PydanticOutputParser class BotMessage(BaseModel): bot_message: str = Field(description="Message from the bot") ids: List[str] = Field(description="List of IDs") + missing_information: Optional[str] = Field(description="Incase of ambiguity ask user follow up question") # Set up a parser + inject instructions into the prompt template. diff --git a/llm-server/requirements.txt b/llm-server/requirements.txt index ab5e3c691..6b4d8aeca 100644 --- a/llm-server/requirements.txt +++ b/llm-server/requirements.txt @@ -114,6 +114,7 @@ pydantic==2.4.2 pydantic_core==2.10.1 pymongo==4.5.0 PyMySQL==1.1.0 +pypdfium2==4.24.0 PyPika==0.48.9 PySocks==1.7.1 python-dateutil==2.8.2 diff --git a/llm-server/routes/chat/chat_controller.py b/llm-server/routes/chat/chat_controller.py index 4e5fdacff..2cdfaee18 100644 --- a/llm-server/routes/chat/chat_controller.py +++ b/llm-server/routes/chat/chat_controller.py @@ -159,13 +159,14 @@ async def send_chat(): app=app_name, ) - create_chat_history(str(bot.id), session_id, True, message) - create_chat_history( - str(bot.id), - session_id, - False, - response_data["response"] or response_data["error"], - ) + if response_data["response"]: + create_chat_history(str(bot.id), session_id, True, message) + create_chat_history( + str(bot.id), + session_id, + False, + response_data["response"] or response_data["error"], + ) return jsonify( {"type": "text", "response": {"text": response_data["response"]}} diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py index fc8985410..eae88e92d 100644 --- a/llm-server/routes/root_service.py +++ b/llm-server/routes/root_service.py @@ -47,6 +47,34 @@ chat = get_chat_model(CHAT_MODELS.gpt_3_5_turbo_16k) +def validate_steps(steps: List[str], swagger_doc: ResolvingParser): + try: + paths = swagger_doc.specification.get("paths", {}) + operationIds: List[str] = [] + + for path in paths: + operations = paths[path] + for method in operations: + operation = operations[method] + operationId = operation.get("operationId") + if operationId: + operationIds.append(operationId) + + if not operationIds: + logger.warn("No operationIds found in the Swagger document.") + return False + + if all(x in operationIds for x in steps): + return True + else: + logger.warn("Model has hallucinated, made up operation id", steps=steps, operationIds=operationIds) + return False + + except Exception as e: + logger.error(f"An error occurred: {str(e)}") + return False + + async def handle_request( text: str, @@ -84,12 +112,25 @@ async def handle_request( prev_conversations=prev_conversations, flows=flows, bot_id=bot_id, + base_prompt=base_prompt ) + + if step.missing_information is not None and len(step.missing_information) >= 10: + return { + "error": None, + "response": step.missing_information + } if len(step.ids) > 0: + swagger_doc = get_swagger_doc(swagger_url) + fl = validate_steps(step.ids, swagger_doc) + + if fl is False: + return {"error": None, "response": step.bot_message} + response = await handle_api_calls( ids=step.ids, - swagger_doc=get_swagger_doc(swagger_url), + swagger_doc=swagger_doc, app=app, bot_id=bot_id, headers=headers, diff --git a/llm-server/routes/workflow/extractors/convert_json_to_text.py b/llm-server/routes/workflow/extractors/convert_json_to_text.py index 5e6928d76..36cf19439 100644 --- a/llm-server/routes/workflow/extractors/convert_json_to_text.py +++ b/llm-server/routes/workflow/extractors/convert_json_to_text.py @@ -14,7 +14,7 @@ def convert_json_to_text( user_input: str, - api_response: str, + api_response: Dict[str, Any], api_request_data: Dict[str, Any], bot_id: str, ) -> str: @@ -22,7 +22,7 @@ def convert_json_to_text( api_summarizer_template = None system_message = SystemMessage( - content="You are a chatbot that can understand API responses" + content="You are an ai assistant that can summarize api responses" ) prompt_templates = load_prompts(bot_id) api_summarizer_template = ( @@ -35,15 +35,15 @@ def convert_json_to_text( messages = [ system_message, HumanMessage( - content="You'll receive user input and server responses obtained by making calls to various APIs. You will also recieve a dictionary that specifies, the body, param and query param used to make those api calls. Your task is to transform the JSON response into a response that in an answer to the user input. You should inform the user about the filters that were used to make these api calls" + content="You'll receive user input and server responses obtained by making calls to various APIs. Your task is to summarize the api response that is an answer to the user input. Try to be concise and accurate, and also include references if present." ), - HumanMessage(content="Here is the user input: {}.".format(user_input)), + HumanMessage(content=user_input), HumanMessage( content="Here is the response from the apis: {}".format(api_response) ), - HumanMessage( - content="Here is the api_request_data: {}".format(api_request_data) - ), + # HumanMessage( + # content="Here is the api_request_data: {}".format(api_request_data) + # ), ] result = chat(messages) diff --git a/llm-server/routes/workflow/utils/process_conversation_step.py b/llm-server/routes/workflow/utils/process_conversation_step.py index d66cacf5d..398da0416 100644 --- a/llm-server/routes/workflow/utils/process_conversation_step.py +++ b/llm-server/routes/workflow/utils/process_conversation_step.py @@ -28,25 +28,15 @@ def process_conversation_step( prev_conversations: List[BaseMessage], flows: List[WorkflowFlowType], bot_id: str, + base_prompt: str ): + logger.info("planner data", context=context, api_summaries=api_summaries, prev_conversations=prev_conversations, flows=flows) if not session_id: raise ValueError("Session id must be defined for chat conversations") - prompt_templates = load_prompts(bot_id) - system_message_classifier = SystemMessage( - content="You are a helpful ai assistant. User will give you two things, a list of api's and some useful information, called context." - ) - if app and prompt_templates.system_message is not None: - system_message_classifier = SystemMessage( - content=prompt_templates.system_message - ) - logger.debug( - "System message classification", - incident="system_message_classifier", - app=app, - context=context, - ) messages: List[BaseMessage] = [] - messages.append(system_message_classifier) + messages.append(SystemMessage(content=base_prompt)) + + messages.append(SystemMessage(content="You will have access to a list of api's and some useful information, called context.")) if len(prev_conversations) > 0: messages.extend(prev_conversations) @@ -81,10 +71,11 @@ def process_conversation_step( messages.append( HumanMessage( - content="""Based on the information provided to you I want you to answer the questions that follow. Your should respond with a json that looks like the following - + content="""Based on the information provided to you I want you to answer the questions that follow. Your should respond with a json that looks like the following, you must always use the operationIds provided in api summaries. Do not make up an operation id - {{ "ids": ["list", "of", "operationIds", "for apis to be called"], - "bot_message": "your response based on the instructions provided at the beginning" + "bot_message": "your response based on the instructions provided at the beginning", + "missing_information": "Optional Field; Incase of ambiguity where user input is not sufficient to make the api call, ask follow up questions. Followup question should only be asked once per user input" }} """ ) @@ -94,6 +85,9 @@ def process_conversation_step( ) messages.append(HumanMessage(content=user_requirement)) + + + logger.info("messages array", messages=messages) content = cast(str, chat(messages=messages).content) @@ -110,7 +104,7 @@ def process_conversation_step( except OutputParserException as e: logger.error("Failed to parse json", data=content) logger.error("Failed to parse json", err=str(e)) - return BotMessage(bot_message=content, ids=[]) + return BotMessage(bot_message=content, ids=[], missing_information=None) except Exception as e: logger.error("unexpected error occured", err=str(e)) - return BotMessage(ids=[], bot_message=str(e)) + return BotMessage(ids=[], bot_message=str(e), missing_information=None) diff --git a/llm-server/routes/workflow/utils/run_openapi_ops.py b/llm-server/routes/workflow/utils/run_openapi_ops.py index c857fa47b..8e4563bbf 100644 --- a/llm-server/routes/workflow/utils/run_openapi_ops.py +++ b/llm-server/routes/workflow/utils/run_openapi_ops.py @@ -47,7 +47,7 @@ async def run_openapi_operations( api_request_data[operation_id] = api_payload.__dict__ api_response = None try: - logger.info("Making API call", incident="make_api_call", payload=json.dumps(api_payload.body_schema)) + logger.info("Making API call", incident="make_api_call", body=json.dumps(api_payload.body_schema), params=api_payload.query_params) api_response = make_api_request( headers=headers, **api_payload.__dict__ @@ -60,28 +60,34 @@ async def run_openapi_operations( except Exception as e: logger.error("Error occurred while making API call", incident="make_api_call_failed", error=str(e)) - return {} + raise e + logger.info("Got the following api response", text = api_response.text) # if a custom transformer function is defined for this operationId use that, otherwise forward it to the llm # so we don't necessarily have to defined mappers for all api endpoints partial_json = load_json_config(app, operation_id) - logger.info("Loading JSON configuration", incident="load_json_config", json_config=json.dumps(partial_json)) if not partial_json: - logger.error( - "Failed to find a config map. Consider adding a config map for this operation id", - incident="load_json_config", - error="Failed to find a config map, consider adding a config map for this operation id", + logger.warn( + "Config map is not defined for this operationId", + incident="config_map_undefined", operation_id=operation_id, + app=app ) - record_info[operation_id] = transform_api_response_from_schema( - api_payload.endpoint or "", api_response.text - ) + record_info[operation_id] = api_response.text + + # Removed this because this slows down the bot response instead of speeding it + # record_info[operation_id] = transform_api_response_from_schema( + # api_payload.endpoint or "", api_response.text + # ) + + pass else: logger.info( "API Response", - incident="api_response", - text=api_response.text, - action="Truncate unnecessary info using json_config provided", + incident="log_api_response", + api_response=api_response.text, + json_config_used=partial_json, + next_action="summarize_with_partial_json", ) api_json = json.loads(api_response.text) record_info[operation_id] = json.dumps( @@ -91,19 +97,13 @@ async def run_openapi_operations( ) except Exception as e: - payload = json.dumps( - { - "text": text, - "headers": headers, - "server_base_url": server_base_url, - "app": app, - } - ) - logger.error( "Error occurred during workflow check in store", - incident="check_workflow_in_store", - payload=payload, + incident="check_workflow_in_store", + text= text, + headers= headers, + server_base_url= server_base_url, + app= app, error=str(e), ) return convert_json_to_text(text, record_info, api_request_data, bot_id=bot_id) diff --git a/llm-server/shared/utils/opencopilot_utils/embedding_type.py b/llm-server/shared/utils/opencopilot_utils/embedding_type.py index ee06ca72f..09eefbd6e 100644 --- a/llm-server/shared/utils/opencopilot_utils/embedding_type.py +++ b/llm-server/shared/utils/opencopilot_utils/embedding_type.py @@ -6,4 +6,5 @@ class EmbeddingProvider(Enum): BARD = "bard" azure = "azure" llama2 = "llama2" + openchat = "openchat" diff --git a/llm-server/shared/utils/opencopilot_utils/get_embeddings.py b/llm-server/shared/utils/opencopilot_utils/get_embeddings.py index 37062a267..4ba84031b 100644 --- a/llm-server/shared/utils/opencopilot_utils/get_embeddings.py +++ b/llm-server/shared/utils/opencopilot_utils/get_embeddings.py @@ -1,11 +1,17 @@ +from functools import lru_cache from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.embeddings.ollama import OllamaEmbeddings from .embedding_type import EmbeddingProvider import os from langchain.embeddings.base import Embeddings - +from utils.get_logger import CustomLogger import os, warnings +logger = CustomLogger(module_name=__name__) + +LOCAL_IP = os.getenv("LOCAL_IP", "host.docker.internal") + def get_embedding_provider(): """Gets the chosen embedding provider from environment variables.""" return os.environ.get("EMBEDDING_PROVIDER") @@ -31,7 +37,7 @@ def get_openai_embedding(): """Gets embeddings using the OpenAI embedding provider.""" openai_api_key = os.environ.get("OPENAI_API_KEY") - return OpenAIEmbeddings(openai_api_key=openai_api_key, chunk_size=1) + return OpenAIEmbeddings(openai_api_key=openai_api_key) def choose_embedding_provider(): """Chooses and returns the appropriate embedding provider instance.""" @@ -40,6 +46,10 @@ def choose_embedding_provider(): if embedding_provider == EmbeddingProvider.azure.value: return get_azure_embedding() + elif embedding_provider == EmbeddingProvider.openchat.value: + logger.info("Got ollama embedding provider", provider=embedding_provider) + return OllamaEmbeddings(base_url=f"{LOCAL_IP}:11434", model="openchat") + elif embedding_provider == EmbeddingProvider.OPENAI.value or embedding_provider is None: if embedding_provider is None: warnings.warn("No embedding provider specified. Defaulting to OpenAI.") @@ -53,6 +63,7 @@ def choose_embedding_provider(): ) # Main function to get embeddings +@lru_cache(maxsize=1) def get_embeddings() -> Embeddings: """Gets embeddings using the chosen embedding provider.""" return choose_embedding_provider() \ No newline at end of file diff --git a/llm-server/shared/utils/opencopilot_utils/get_vector_store.py b/llm-server/shared/utils/opencopilot_utils/get_vector_store.py index 0e263dd37..264b48484 100644 --- a/llm-server/shared/utils/opencopilot_utils/get_vector_store.py +++ b/llm-server/shared/utils/opencopilot_utils/get_vector_store.py @@ -27,10 +27,10 @@ def get_vector_store(options: StoreOptions) -> VectorStore: vector_store = Qdrant( client, collection_name=options.namespace, embeddings=embedding ) - + # vector_store = Qdrant.from_documents([], embedding, url='http://localhost:6333', collection=options.namespace) else: raise ValueError("Invalid STORE environment variable value") - return vector_store \ No newline at end of file + return vector_store diff --git a/llm-server/utils/chat_models.py b/llm-server/utils/chat_models.py index d1c8777ca..3b6c37351 100644 --- a/llm-server/utils/chat_models.py +++ b/llm-server/utils/chat_models.py @@ -9,6 +9,7 @@ class ChatModels(NamedTuple): nous_hermes = "nous-hermes" llama2: str = "llama2" xwinlm = "xwinlm" + openchat = "openchat" CHAT_MODELS: ChatModels = ChatModels() diff --git a/llm-server/utils/get_chat_model.py b/llm-server/utils/get_chat_model.py index 7d26ac47f..da07ac8c9 100644 --- a/llm-server/utils/get_chat_model.py +++ b/llm-server/utils/get_chat_model.py @@ -1,3 +1,4 @@ +from typing import Dict from langchain.chat_models import ChatOpenAI from langchain.chat_models.base import BaseChatModel from langchain.chat_models import ChatOllama, ChatAnthropic @@ -8,45 +9,55 @@ localip = os.getenv("LOCAL_IP", "localhost") +# Create a dictionary to store cached instances +model_cache: Dict[str, BaseChatModel] = {} + + def get_chat_model(prop: str) -> BaseChatModel: + # Check if the model is already cached + if prop in model_cache: + return model_cache[prop] + if prop == CHAT_MODELS.gpt_3_5_turbo: - return ChatOpenAI( + model = ChatOpenAI( openai_api_key=os.getenv("OPENAI_API_KEY"), model=CHAT_MODELS.gpt_3_5_turbo, temperature=0, ) - elif prop == CHAT_MODELS.gpt_3_5_turbo_16k: - return ChatOpenAI( + model = ChatOpenAI( openai_api_key=os.getenv("OPENAI_API_KEY"), model=CHAT_MODELS.gpt_3_5_turbo_16k, temperature=0, ) elif prop == CHAT_MODELS.claude_2_0: - return ChatAnthropic( - anthropic_api_key=os.getenv("CLAUDE_API_KEY", "CLAUDE_API_KEY") # type: ignore + model = ChatAnthropic( + anthropic_api_key=os.getenv("CLAUDE_API_KEY", "CLAUDE_API_KEY") ) elif prop == CHAT_MODELS.mistral_openorca: - return ChatOllama( + model = ChatOllama( base_url=f"{localip}:11434", model=CHAT_MODELS.mistral_openorca, temperature=0, ) elif prop == CHAT_MODELS.nous_hermes: - return ChatOllama( + model = ChatOllama( base_url=f"{localip}:11434", model=CHAT_MODELS.nous_hermes, temperature=0, ) elif prop == CHAT_MODELS.xwinlm: - return ChatOllama( + model = ChatOllama( base_url=f"{localip}:11434", model=CHAT_MODELS.xwinlm, temperature=0, ) elif prop == "llama2": - return ChatOpenAI(model="llama2", temperature=0) + model = ChatOpenAI(model="llama2", temperature=0) else: - raise ValueError( - "Couldn't match one of the supported models, please refer to llm-server/readme.md" - ) + raise ValueError("Couldn't match one of the supported models.") + + # Cache the initialized model + model_cache[prop] = model + + return model diff --git a/llm-server/utils/vector_db/add_workflow.py b/llm-server/utils/vector_db/add_workflow.py index bd9e4f976..937218619 100644 --- a/llm-server/utils/vector_db/add_workflow.py +++ b/llm-server/utils/vector_db/add_workflow.py @@ -1,12 +1,7 @@ from routes.workflow.dto.workflow_dto import Workflow from typing import Any , List from opencopilot_types.workflow_type import WorkflowDataType -from shared.utils.opencopilot_utils import ( - StoreOptions, - get_embeddings, - init_vector_store, - get_vector_store -) +from shared.utils.opencopilot_utils import (StoreOptions,get_vector_store) from langchain.docstore.document import Document @@ -27,3 +22,4 @@ def add_workflow_data_to_qdrant( vector_ids = vector_store.add_documents(docs) return vector_ids + diff --git a/llm-server/workers/tasks/process_pdfs.py b/llm-server/workers/tasks/process_pdfs.py index 556a5607c..7d3898bf1 100644 --- a/llm-server/workers/tasks/process_pdfs.py +++ b/llm-server/workers/tasks/process_pdfs.py @@ -4,20 +4,31 @@ from langchain.document_loaders import PyPDFium2Loader from shared.models.opencopilot_db.pdf_data_sources import insert_pdf_data_source, update_pdf_data_source_status from shared.utils.opencopilot_utils import get_embeddings, init_vector_store, StoreOptions, get_file_path +from shared.utils.opencopilot_utils import get_vector_store +from utils.get_logger import CustomLogger +logger = CustomLogger(module_name=__name__) + +embeddings = get_embeddings() +kb_vector_store = get_vector_store(StoreOptions("knowledgebase")) @shared_task def process_pdf(file_name: str, bot_id: str): try: + logger.info("Pdf task picked up", file_name=file_name, bot_id=bot_id) insert_pdf_data_source(chatbot_id=bot_id, file_name=file_name, status="PENDING") loader = PyPDFium2Loader(get_file_path(file_name)) raw_docs = loader.load() + + # clean the data received from pdf document before passing it text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) docs = text_splitter.split_documents(raw_docs) - embeddings = get_embeddings() - init_vector_store(docs, embeddings, StoreOptions(namespace="knowledgebase", metadata={"bot_id": bot_id})) - + + for doc in docs: + doc.metadata["bot_id"] = bot_id + + kb_vector_store.add_documents(docs) update_pdf_data_source_status(chatbot_id=bot_id, file_name=file_name, status="COMPLETED") except Exception as e: update_pdf_data_source_status(chatbot_id=bot_id, file_name=file_name, status="FAILED")