Python: fix for streaming openai responses, and first parts of fixes …

…for Chat With Your Data (#5387) ### Motivation and Context  ### Description  ### Contribution Checklist  - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄
microsoft · Mar 8, 2024 · 555a7c8 · 555a7c8
1 parent bf5d21c
commit 555a7c8
Show file tree

Hide file tree

Showing 31 changed files with 523 additions and 354 deletions.
diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_api.py
@@ -3,17 +3,12 @@
 import asyncio
 import logging
 
-from dotenv import load_dotenv
-
 import semantic_kernel as sk
 import semantic_kernel.connectors.ai.open_ai as sk_oai
 from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.prompt_template.input_variable import InputVariable
 from semantic_kernel.utils.settings import azure_openai_settings_from_dot_env_as_dict
 
-logging.basicConfig(level=logging.INFO)
-
-load_dotenv()
+logging.basicConfig(level=logging.WARNING)
 
 system_message = """
 You are a chat bot. Your name is Mosscap and
@@ -45,31 +40,25 @@
 
 ## The second method is useful when you are using a single service, and you want to have type checking on the request settings or when you are using multiple instances of the same type of service, for instance gpt-35-turbo and gpt-4, both in openai and both for chat.  # noqa: E501 E266
 ## 3. create the request settings from the kernel based on the registered service class: # noqa: E266
-req_settings = kernel.get_service(service_id).get_prompt_execution_settings_class()(service_id=service_id)
+req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
 req_settings.max_tokens = 2000
 req_settings.temperature = 0.7
 req_settings.top_p = 0.8
+req_settings.auto_invoke_kernel_functions = True
 ## The third method is the most specific as the returned request settings class is the one that is registered for the service and has some fields already filled in, like the service_id and ai_model_id. # noqa: E501 E266
 
-prompt_template_config = sk.PromptTemplateConfig(
-    template=system_message
-    + """ Summarize the on-going chat history: {{$chat_history}} and respond to this statement: {{$request}}""",
-    name="chat",
-    input_variables=[
-        InputVariable(name="request", description="The user input", is_required=True),
-        InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
-    ],
-    execution_settings=req_settings,
+
+chat_function = kernel.create_function_from_prompt(
+    prompt=system_message + """{{$chat_history}}{{$user_input}}""",
+    function_name="chat",
+    plugin_name="chat",
+    prompt_execution_settings=req_settings,
 )
 
 history = ChatHistory()
 history.add_user_message("Hi there, who are you?")
 history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
 
-chat_function = kernel.create_function_from_prompt(
-    function_name="chat", plugin_name="chat", prompt_template_config=prompt_template_config
-)
-
 
 async def chat() -> bool:
     try:
@@ -89,7 +78,7 @@ async def chat() -> bool:
     if stream:
         answer = kernel.invoke_stream(
             chat_function,
-            request=user_input,
+            user_input=user_input,
             chat_history=history,
         )
         print("Mosscap:> ", end="")
@@ -99,7 +88,7 @@ async def chat() -> bool:
         return True
     answer = await kernel.invoke(
         chat_function,
-        request=user_input,
+        user_input=user_input,
         chat_history=history,
     )
     print(f"Mosscap:> {answer}")

diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api.py
@@ -4,32 +4,37 @@
 
 import semantic_kernel as sk
 import semantic_kernel.connectors.ai.open_ai as sk_oai
-from semantic_kernel.connectors.ai.open_ai.contents.azure_streaming_chat_message_content import (
-    AzureStreamingChatMessageContent,
-)
+from semantic_kernel.connectors.ai.open_ai.contents.azure_chat_message_content import AzureChatMessageContent
+from semantic_kernel.connectors.ai.open_ai.contents.function_call import FunctionCall
+from semantic_kernel.connectors.ai.open_ai.contents.tool_calls import ToolCall
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
     AzureAISearchDataSources,
     AzureChatPromptExecutionSettings,
     AzureDataSources,
     ExtraBody,
 )
 from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_role import ChatRole
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.prompt_template.input_variable import InputVariable
 from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig
+from semantic_kernel.utils.settings import (
+    azure_aisearch_settings_from_dot_env_as_dict,
+    azure_openai_settings_from_dot_env_as_dict,
+)
 
 kernel = sk.Kernel()
 
 # Load Azure OpenAI Settings
-deployment, api_key, endpoint = sk.azure_openai_settings_from_dot_env()
+aoai_settings = azure_openai_settings_from_dot_env_as_dict()
 
 # For example, AI Search index may contain the following document:
 
 # Emily and David, two passionate scientists, met during a research expedition to Antarctica.
 # Bonded by their love for the natural world and shared curiosity, they uncovered a
 # groundbreaking phenomenon in glaciology that could potentially reshape our understanding of climate change.
 
-azure_ai_search_settings = sk.azure_aisearch_settings_from_dot_env_as_dict()
+azure_ai_search_settings = azure_aisearch_settings_from_dot_env_as_dict()
 
 # Our example index has fields "source_title", "source_text", "source_url", and "source_file".
 # Add fields mapping to the settings to indicate which fields to use for the title, content, URL, and file path.
@@ -49,35 +54,28 @@
 # When using data, set use_extensions=True and use the 2023-12-01-preview API version.
 chat_service = sk_oai.AzureChatCompletion(
     service_id="chat-gpt",
-    deployment_name=deployment,
-    api_key=api_key,
-    endpoint=endpoint,
-    api_version="2023-12-01-preview",
     use_extensions=True,
+    **aoai_settings,
 )
 kernel.add_service(chat_service)
 
 prompt_template_config = PromptTemplateConfig(
-    template="{{$user_input}}",
+    template="{{$chat_history}}{{$user_input}}",
     name="chat",
     template_format="semantic-kernel",
     input_variables=[
+        InputVariable(name="chat_history", description="The chat history", is_required=True),
         InputVariable(name="request", description="The user input", is_required=True),
     ],
     execution_settings={"default": req_settings},
 )
-
-chat = ChatHistory()
-
-chat.add_user_message("Hi there, who are you?")
-chat.add_assistant_message("I am an AI assistant here to answer your questions.")
-
-arguments = KernelArguments()
-
 chat_function = kernel.create_function_from_prompt(
     plugin_name="ChatBot", function_name="Chat", prompt_template_config=prompt_template_config
 )
 
+chat_history = ChatHistory()
+chat_history.add_system_message("I am an AI assistant here to answer your questions.")
+
 
 async def chat() -> bool:
     try:
@@ -96,20 +94,34 @@ async def chat() -> bool:
     # Non streaming
     # answer = await kernel.run(chat_function, input_vars=context_vars)
     # print(f"Assistant:> {answer}")
-    arguments = KernelArguments(user_input=user_input, execution_settings=req_settings)
+    arguments = KernelArguments(chat_history=chat_history, user_input=user_input, execution_settings=req_settings)
 
     full_message = None
     print("Assistant:> ", end="")
     async for message in kernel.invoke_stream(chat_function, arguments=arguments):
         print(str(message[0]), end="")
         full_message = message[0] if not full_message else full_message + message[0]
-    chat.add_assistant_message(str(full_message))
     print("\n")
 
     # The tool message containing cited sources is available in the context
-    if isinstance(full_message, AzureStreamingChatMessageContent):
-        chat.add_function_response_message(name="tool", content=full_message.tool_message)
-        print(f"Tool:> {full_message.tool_message}")
+    if full_message:
+        chat_history.add_user_message(user_input)
+        if hasattr(full_message, "tool_message"):
+            chat_history.add_message(
+                AzureChatMessageContent(
+                    role="assistant",
+                    tool_calls=[
+                        ToolCall(
+                            id="chat_with_your_data",
+                            function=FunctionCall(name="chat_with_your_data", arguments=""),
+                        )
+                    ],
+                )
+            )
+            chat_history.add_tool_message(full_message.tool_message, {"tool_call_id": "chat_with_your_data"})
+        if full_message.role is None:
+            full_message.role = ChatRole.ASSISTANT
+        chat_history.add_message(full_message)
     return True
 
 

diff --git a/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py b/python/samples/kernel-syntax-examples/azure_chat_gpt_with_data_api_function_calling.py
@@ -68,11 +68,11 @@
 # the format for that is 'PluginName-FunctionName', (i.e. 'math-Add').
 # if the model or api version do not support this you will get an error.
 prompt_template_config = PromptTemplateConfig(
-    template="{{$user_input}}",
+    template="{{$chat_history}}{{$user_input}}",
     name="chat",
     template_format="semantic-kernel",
     input_variables=[
-        InputVariable(name="history", description="The history of the conversation", is_required=True),
+        InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
         InputVariable(name="user_input", description="The user input", is_required=True),
     ],
 )
@@ -110,6 +110,7 @@ async def chat() -> bool:
         print("\n\nExiting chat...")
         return False
 
+    arguments["chat_history"] = history
     arguments["user_input"] = user_input
     answer = await kernel.invoke(
         functions=chat_function,

diff --git a/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py b/python/samples/kernel-syntax-examples/chat_gpt_api_function_calling.py
@@ -14,13 +14,10 @@
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIPromptExecutionSettings,
 )
-from semantic_kernel.connectors.ai.open_ai.utils import (
-    get_tool_call_object,
-)
+from semantic_kernel.connectors.ai.open_ai.utils import get_tool_call_object
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.functions.kernel_arguments import KernelArguments
-from semantic_kernel.prompt_template.input_variable import InputVariable
 
 if TYPE_CHECKING:
     from semantic_kernel.functions.kernel_function import KernelFunction
@@ -58,6 +55,11 @@
 kernel.import_plugin_from_object(MathPlugin(), plugin_name="math")
 kernel.import_plugin_from_object(TimePlugin(), plugin_name="time")
 
+chat_function = kernel.create_function_from_prompt(
+    prompt="{{$chat_history}}{{$user_input}}",
+    plugin_name="ChatBot",
+    function_name="Chat",
+)
 # enabling or disabling function calling is done by setting the function_call parameter for the completion.
 # when the function_call parameter is set to "auto" the model will decide which function to use, if any.
 # if you only want to use a specific function, set the name of that function in this parameter,
@@ -68,6 +70,7 @@
 # If configured to be greater than one, this value will be overridden to 1.
 execution_settings = sk_oai.OpenAIChatPromptExecutionSettings(
     service_id="chat",
+    ai_model_id="gpt-3.5-turbo-1106",
     max_tokens=2000,
     temperature=0.7,
     top_p=0.8,
@@ -77,30 +80,13 @@
     max_auto_invoke_attempts=3,
 )
 
-prompt_template_config = sk.PromptTemplateConfig(
-    template="{{$user_input}}",
-    name="chat",
-    template_format="semantic-kernel",
-    input_variables=[
-        InputVariable(name="user_input", description="The user input", is_required=True),
-        InputVariable(name="chat_history", description="The history of the conversation", is_required=True),
-    ],
-    execution_settings={"chat": execution_settings},
-)
-
 history = ChatHistory()
 
 history.add_system_message(system_message)
 history.add_user_message("Hi there, who are you?")
 history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
 
-arguments = KernelArguments()
-
-chat_function = kernel.create_function_from_prompt(
-    prompt_template_config=prompt_template_config,
-    plugin_name="ChatBot",
-    function_name="Chat",
-)
+arguments = KernelArguments(settings=execution_settings)
 
 
 def print_tool_calls(message: Union[OpenAIChatMessageContent, OpenAIStreamingChatMessageContent]) -> None:
@@ -138,7 +124,7 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: List[OpenAIStreamingChatMessageContent] = []
-    tool_call_ids_by_index: Dict[int, Any] = {}
+    tool_call_ids_by_index: Dict[str, Any] = {}
 
     async for message in response:
         if not execution_settings.auto_invoke_kernel_functions and isinstance(
@@ -147,11 +133,11 @@ async def handle_streaming(
             streamed_chunks.append(message[0])
             if message[0].tool_calls is not None:
                 for tc in message[0].tool_calls:
-                    if tc.index not in tool_call_ids_by_index:
-                        tool_call_ids_by_index[tc.index] = tc
+                    if tc.id not in tool_call_ids_by_index:
+                        tool_call_ids_by_index[tc.id] = tc
                     else:
                         for tc in message[0].tool_calls:
-                            tool_call_ids_by_index[tc.index] += tc
+                            tool_call_ids_by_index[tc.id] += tc
         else:
             print(str(message[0]), end="")
 
@@ -178,7 +164,7 @@ async def chat() -> bool:
         print("\n\nExiting chat...")
         return False
 
-    stream = False
+    stream = True
     if stream:
         await handle_streaming(kernel, chat_function, user_input, history, execution_settings)
     else:

diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
@@ -22,7 +22,7 @@ async def complete_chat(
         self,
         chat_history: "ChatHistory",
         settings: "PromptExecutionSettings",
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> List["ChatMessageContent"]:
         """
         This is the method that is called from the kernel to get a response from a chat-optimized LLM.
@@ -43,7 +43,7 @@ async def complete_chat_stream(
         self,
         chat_history: "ChatHistory",
         settings: "PromptExecutionSettings",
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> AsyncIterable[List["StreamingChatMessageContent"]]:
         """
         This is the method that is called from the kernel to get a stream response from a chat-optimized LLM.
@@ -82,5 +82,5 @@ def _prepare_chat_history_for_request(
 
     def _chat_message_content_to_dict(self, message: ChatMessageContent) -> Dict[str, Optional[str]]:
         """can be overridden to customize the serialization of the chat message content"""
-        msg = message.model_dump(exclude_none=True, include=["role", "content"])
+        msg = message.model_dump(include=["role", "content"])
         return msg
diff --git a/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py b/python/semantic_kernel/connectors/ai/google_palm/services/gp_chat_completion.py
@@ -63,7 +63,7 @@ async def complete_chat(
         self,
         chat_history: ChatHistory,
         settings: GooglePalmPromptExecutionSettings,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> List[ChatMessageContent]:
         """
         This is the method that is called from the kernel to get a response from a chat-optimized LLM.
@@ -114,7 +114,7 @@ async def complete_chat_stream(
         self,
         messages: List[Tuple[str, str]],
         settings: GooglePalmPromptExecutionSettings,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         raise NotImplementedError("Google Palm API does not currently support streaming")