Merge branch 'main' into fix_azure_api_key_check

microsoft · Sep 19, 2024 · d45f31c · d45f31c
2 parents 86fb934 + 0ae0e52
commit d45f31c
Show file tree

Hide file tree

Showing 3 changed files with 256 additions and 0 deletions.
diff --git a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs
@@ -4,12 +4,16 @@
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.Connectors.Ollama;
+using OllamaSharp.Models.Chat;
 
 namespace ChatCompletion;
 
 // The following example shows how to use Semantic Kernel with Ollama Chat Completion API
 public class Ollama_ChatCompletion(ITestOutputHelper output) : BaseTest(output)
 {
+    /// <summary>
+    /// Demonstrates how you can use the chat completion service directly.
+    /// </summary>
     [Fact]
     public async Task ServicePromptAsync()
     {
@@ -45,6 +49,46 @@ public async Task ServicePromptAsync()
         this.OutputLastMessage(chatHistory);
     }
 
+    /// <summary>
+    /// Demonstrates how you can get extra information from the service response, using the underlying inner content.
+    /// </summary>
+    /// <remarks>
+    /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+    /// may cause breaking changes in the code below.
+    /// </remarks>
+    [Fact]
+    public async Task ServicePromptWithInnerContentAsync()
+    {
+        Assert.NotNull(TestConfiguration.Ollama.ModelId);
+
+        Console.WriteLine("======== Ollama - Chat Completion ========");
+
+        var chatService = new OllamaChatCompletionService(
+            endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
+            modelId: TestConfiguration.Ollama.ModelId);
+
+        Console.WriteLine("Chat content:");
+        Console.WriteLine("------------------------");
+
+        var chatHistory = new ChatHistory("You are a librarian, expert about books");
+
+        // First user message
+        chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+        this.OutputLastMessage(chatHistory);
+
+        // First assistant message
+        var reply = await chatService.GetChatMessageContentAsync(chatHistory);
+
+        // Assistant message details
+        // Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content is always a list of chunks.
+        var replyInnerContent = reply.InnerContent as List<ChatResponseStream>;
+
+        OutputInnerContent(replyInnerContent!);
+    }
+
+    /// <summary>
+    /// Demonstrates how you can template a chat history call using the kernel for invocation.
+    /// </summary>
     [Fact]
     public async Task ChatPromptAsync()
     {
@@ -70,4 +114,70 @@ public async Task ChatPromptAsync()
 
         Console.WriteLine(reply);
     }
+
+    /// <summary>
+    /// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation.
+    /// </summary>
+    /// <remarks>
+    /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+    /// may cause breaking changes in the code below.
+    /// </remarks>
+    [Fact]
+    public async Task ChatPromptWithInnerContentAsync()
+    {
+        Assert.NotNull(TestConfiguration.Ollama.ModelId);
+
+        StringBuilder chatPrompt = new("""
+                                       <message role="system">You are a librarian, expert about books</message>
+                                       <message role="user">Hi, I'm looking for book suggestions</message>
+                                       """);
+
+        var kernel = Kernel.CreateBuilder()
+            .AddOllamaChatCompletion(
+                endpoint: new Uri(TestConfiguration.Ollama.Endpoint ?? "http://localhost:11434"),
+                modelId: TestConfiguration.Ollama.ModelId)
+            .Build();
+
+        var functionResult = await kernel.InvokePromptAsync(chatPrompt.ToString());
+
+        // Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content of a non-streaming result is a list of the generated chunks.
+        var messageContent = functionResult.GetValue<ChatMessageContent>(); // Retrieves underlying chat message content from FunctionResult.
+        var replyInnerContent = messageContent!.InnerContent as List<ChatResponseStream>; // Retrieves inner content from ChatMessageContent.
+
+        OutputInnerContent(replyInnerContent!);
+    }
+
+    /// <summary>
+    /// Retrieve extra information from each streaming chunk response in a list of chunks.
+    /// </summary>
+    /// <param name="innerContent">List of streaming chunks provided as inner content of a chat message</param>
+    /// <remarks>
+    /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+    /// may cause breaking changes in the code below.
+    /// </remarks>
+    private void OutputInnerContent(List<ChatResponseStream> innerContent)
+    {
+        Console.WriteLine($"Model: {innerContent![0].Model}"); // Model doesn't change per chunk, so we can get it from the first chunk only
+        Console.WriteLine(" -- Chunk changing data -- ");
+
+        innerContent.ForEach(streamChunk =>
+        {
+            Console.WriteLine($"Message role: {streamChunk.Message.Role}");
+            Console.WriteLine($"Message content: {streamChunk.Message.Content}");
+            Console.WriteLine($"Created at: {streamChunk.CreatedAt}");
+            Console.WriteLine($"Done: {streamChunk.Done}");
+            /// The last message in the chunk is a <see cref="ChatDoneResponseStream"/> type with additional metadata.
+            if (streamChunk is ChatDoneResponseStream doneStreamChunk)
+            {
+                Console.WriteLine($"Done Reason: {doneStreamChunk.DoneReason}");
+                Console.WriteLine($"Eval count: {doneStreamChunk.EvalCount}");
+                Console.WriteLine($"Eval duration: {doneStreamChunk.EvalDuration}");
+                Console.WriteLine($"Load duration: {doneStreamChunk.LoadDuration}");
+                Console.WriteLine($"Total duration: {doneStreamChunk.TotalDuration}");
+                Console.WriteLine($"Prompt eval count: {doneStreamChunk.PromptEvalCount}");
+                Console.WriteLine($"Prompt eval duration: {doneStreamChunk.PromptEvalDuration}");
+            }
+            Console.WriteLine("------------------------");
+        });
+    }
 }
diff --git a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs
@@ -4,6 +4,7 @@
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.Connectors.Ollama;
+using OllamaSharp.Models.Chat;
 
 namespace ChatCompletion;
 
@@ -29,6 +30,44 @@ public Task StreamChatAsync()
         return this.StartStreamingChatAsync(chatService);
     }
 
+    /// <summary>
+    /// This example demonstrates retrieving extra information chat completion streaming using Ollama.
+    /// </summary>
+    /// <remarks>
+    /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+    /// may cause breaking changes in the code below.
+    /// </remarks>
+    [Fact]
+    public async Task StreamChatWithInnerContentAsync()
+    {
+        Assert.NotNull(TestConfiguration.Ollama.ModelId);
+
+        Console.WriteLine("======== Ollama - Chat Completion Streaming ========");
+
+        var chatService = new OllamaChatCompletionService(
+            endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
+            modelId: TestConfiguration.Ollama.ModelId);
+
+        Console.WriteLine("Chat content:");
+        Console.WriteLine("------------------------");
+
+        var chatHistory = new ChatHistory("You are a librarian, expert about books");
+        this.OutputLastMessage(chatHistory);
+
+        // First user message
+        chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+        this.OutputLastMessage(chatHistory);
+
+        await foreach (var chatUpdate in chatService.GetStreamingChatMessageContentsAsync(chatHistory))
+        {
+            var innerContent = chatUpdate.InnerContent as ChatResponseStream;
+            OutputInnerContent(innerContent!);
+        }
+    }
+
+    /// <summary>
+    /// Demonstrates how you can template a chat history call while using the kernel for invocation.
+    /// </summary>
     [Fact]
     public async Task StreamChatPromptAsync()
     {
@@ -55,6 +94,41 @@ public async Task StreamChatPromptAsync()
         Console.WriteLine(reply);
     }
 
+    /// <summary>
+    /// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation.
+    /// </summary>
+    /// <remarks>
+    /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+    /// may cause breaking changes in the code below.
+    /// </remarks>
+    [Fact]
+    public async Task StreamChatPromptWithInnerContentAsync()
+    {
+        Assert.NotNull(TestConfiguration.Ollama.ModelId);
+
+        StringBuilder chatPrompt = new("""
+                                       <message role="system">You are a librarian, expert about books</message>
+                                       <message role="user">Hi, I'm looking for book suggestions</message>
+                                       """);
+
+        var kernel = Kernel.CreateBuilder()
+            .AddOllamaChatCompletion(
+                endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
+                modelId: TestConfiguration.Ollama.ModelId)
+            .Build();
+
+        var reply = await StreamMessageOutputFromKernelAsync(kernel, chatPrompt.ToString());
+
+        chatPrompt.AppendLine($"<message role=\"assistant\"><![CDATA[{reply}]]></message>");
+        chatPrompt.AppendLine("<message role=\"user\">I love history and philosophy, I'd like to learn something new about Greece, any suggestion</message>");
+
+        await foreach (var chatUpdate in kernel.InvokePromptStreamingAsync<StreamingChatMessageContent>(chatPrompt.ToString()))
+        {
+            var innerContent = chatUpdate.InnerContent as ChatResponseStream;
+            OutputInnerContent(innerContent!);
+        }
+    }
+
     /// <summary>
     /// This example demonstrates how the chat completion service streams text content.
     /// It shows how to access the response update via StreamingChatMessageContent.Content property
@@ -158,4 +232,34 @@ private async Task<string> StreamMessageOutputFromKernelAsync(Kernel kernel, str
         Console.WriteLine("\n------------------------");
         return fullMessage;
     }
+
+    /// <summary>
+    /// Retrieve extra information from each streaming chunk response.
+    /// </summary>
+    /// <param name="streamChunk">Streaming chunk provided as inner content of a streaming chat message</param>
+    /// <remarks>
+    /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+    /// may cause breaking changes in the code below.
+    /// </remarks>
+    private void OutputInnerContent(ChatResponseStream streamChunk)
+    {
+        Console.WriteLine($"Model: {streamChunk.Model}");
+        Console.WriteLine($"Message role: {streamChunk.Message.Role}");
+        Console.WriteLine($"Message content: {streamChunk.Message.Content}");
+        Console.WriteLine($"Created at: {streamChunk.CreatedAt}");
+        Console.WriteLine($"Done: {streamChunk.Done}");
+
+        /// The last message in the chunk is a <see cref="ChatDoneResponseStream"/> type with additional metadata.
+        if (streamChunk is ChatDoneResponseStream doneStream)
+        {
+            Console.WriteLine($"Done Reason: {doneStream.DoneReason}");
+            Console.WriteLine($"Eval count: {doneStream.EvalCount}");
+            Console.WriteLine($"Eval duration: {doneStream.EvalDuration}");
+            Console.WriteLine($"Load duration: {doneStream.LoadDuration}");
+            Console.WriteLine($"Total duration: {doneStream.TotalDuration}");
+            Console.WriteLine($"Prompt eval count: {doneStream.PromptEvalCount}");
+            Console.WriteLine($"Prompt eval duration: {doneStream.PromptEvalDuration}");
+        }
+        Console.WriteLine("------------------------");
+    }
 }
diff --git a/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs b/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft. All rights reserved.
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Linq;
 using System.Net.Http;
@@ -102,6 +103,19 @@ public async Task GetChatMessageContentsShouldHaveModelAndInnerContentAsync()
 
         Assert.NotNull(message.ModelId);
         Assert.Equal("phi3", message.ModelId);
+
+        // Ollama Sharp always perform streaming even for non-streaming calls,
+        // The inner content in this case is the full list of chunks returned by the Ollama Client.
+        Assert.NotNull(message.InnerContent);
+        Assert.IsType<List<ChatResponseStream>>(message.InnerContent);
+        var innerContentList = message.InnerContent as List<ChatResponseStream>;
+        Assert.NotNull(innerContentList);
+        Assert.NotEmpty(innerContentList);
+        var lastMessage = innerContentList.Last();
+        var doneMessageChunk = lastMessage as ChatDoneResponseStream;
+        Assert.NotNull(doneMessageChunk);
+        Assert.True(doneMessageChunk.Done);
+        Assert.Equal("stop", doneMessageChunk.DoneReason);
     }
 
     [Fact]
@@ -142,6 +156,34 @@ public async Task GetStreamingChatMessageContentsShouldHaveModelAndInnerContentA
         Assert.True(innerContent.Done);
     }
 
+    [Fact]
+    public async Task GetStreamingChatMessageContentsShouldHaveDoneReasonAsync()
+    {
+        //Arrange
+        var expectedModel = "phi3";
+        var sut = new OllamaChatCompletionService(
+            expectedModel,
+            httpClient: this._httpClient);
+
+        var chat = new ChatHistory();
+        chat.AddMessage(AuthorRole.User, "fake-text");
+
+        // Act
+        StreamingChatMessageContent? lastMessage = null;
+        await foreach (var message in sut.GetStreamingChatMessageContentsAsync(chat))
+        {
+            lastMessage = message;
+        }
+
+        // Assert
+        Assert.NotNull(lastMessage);
+        Assert.IsType<ChatDoneResponseStream>(lastMessage.InnerContent);
+        var innerContent = lastMessage.InnerContent as ChatDoneResponseStream;
+        Assert.NotNull(innerContent);
+        Assert.True(innerContent.Done);
+        Assert.Equal("stop", innerContent.DoneReason);
+    }
+
     [Fact]
     public async Task GetStreamingChatMessageContentsExecutionSettingsMustBeSentAsync()
     {