diff --git a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs index b76b4fff88a1..7768ff24ba36 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs @@ -4,12 +4,16 @@ using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.Ollama; +using OllamaSharp.Models.Chat; namespace ChatCompletion; // The following example shows how to use Semantic Kernel with Ollama Chat Completion API public class Ollama_ChatCompletion(ITestOutputHelper output) : BaseTest(output) { + /// + /// Demonstrates how you can use the chat completion service directly. + /// [Fact] public async Task ServicePromptAsync() { @@ -45,6 +49,46 @@ public async Task ServicePromptAsync() this.OutputLastMessage(chatHistory); } + /// + /// Demonstrates how you can get extra information from the service response, using the underlying inner content. + /// + /// + /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes + /// may cause breaking changes in the code below. + /// + [Fact] + public async Task ServicePromptWithInnerContentAsync() + { + Assert.NotNull(TestConfiguration.Ollama.ModelId); + + Console.WriteLine("======== Ollama - Chat Completion ========"); + + var chatService = new OllamaChatCompletionService( + endpoint: new Uri(TestConfiguration.Ollama.Endpoint), + modelId: TestConfiguration.Ollama.ModelId); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + this.OutputLastMessage(chatHistory); + + // First assistant message + var reply = await chatService.GetChatMessageContentAsync(chatHistory); + + // Assistant message details + // Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content is always a list of chunks. + var replyInnerContent = reply.InnerContent as List; + + OutputInnerContent(replyInnerContent!); + } + + /// + /// Demonstrates how you can template a chat history call using the kernel for invocation. + /// [Fact] public async Task ChatPromptAsync() { @@ -70,4 +114,70 @@ public async Task ChatPromptAsync() Console.WriteLine(reply); } + + /// + /// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation. + /// + /// + /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes + /// may cause breaking changes in the code below. + /// + [Fact] + public async Task ChatPromptWithInnerContentAsync() + { + Assert.NotNull(TestConfiguration.Ollama.ModelId); + + StringBuilder chatPrompt = new(""" + You are a librarian, expert about books + Hi, I'm looking for book suggestions + """); + + var kernel = Kernel.CreateBuilder() + .AddOllamaChatCompletion( + endpoint: new Uri(TestConfiguration.Ollama.Endpoint ?? "http://localhost:11434"), + modelId: TestConfiguration.Ollama.ModelId) + .Build(); + + var functionResult = await kernel.InvokePromptAsync(chatPrompt.ToString()); + + // Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content of a non-streaming result is a list of the generated chunks. + var messageContent = functionResult.GetValue(); // Retrieves underlying chat message content from FunctionResult. + var replyInnerContent = messageContent!.InnerContent as List; // Retrieves inner content from ChatMessageContent. + + OutputInnerContent(replyInnerContent!); + } + + /// + /// Retrieve extra information from each streaming chunk response in a list of chunks. + /// + /// List of streaming chunks provided as inner content of a chat message + /// + /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes + /// may cause breaking changes in the code below. + /// + private void OutputInnerContent(List innerContent) + { + Console.WriteLine($"Model: {innerContent![0].Model}"); // Model doesn't change per chunk, so we can get it from the first chunk only + Console.WriteLine(" -- Chunk changing data -- "); + + innerContent.ForEach(streamChunk => + { + Console.WriteLine($"Message role: {streamChunk.Message.Role}"); + Console.WriteLine($"Message content: {streamChunk.Message.Content}"); + Console.WriteLine($"Created at: {streamChunk.CreatedAt}"); + Console.WriteLine($"Done: {streamChunk.Done}"); + /// The last message in the chunk is a type with additional metadata. + if (streamChunk is ChatDoneResponseStream doneStreamChunk) + { + Console.WriteLine($"Done Reason: {doneStreamChunk.DoneReason}"); + Console.WriteLine($"Eval count: {doneStreamChunk.EvalCount}"); + Console.WriteLine($"Eval duration: {doneStreamChunk.EvalDuration}"); + Console.WriteLine($"Load duration: {doneStreamChunk.LoadDuration}"); + Console.WriteLine($"Total duration: {doneStreamChunk.TotalDuration}"); + Console.WriteLine($"Prompt eval count: {doneStreamChunk.PromptEvalCount}"); + Console.WriteLine($"Prompt eval duration: {doneStreamChunk.PromptEvalDuration}"); + } + Console.WriteLine("------------------------"); + }); + } } diff --git a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs index d83aac04e9bf..45424cd3f87e 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs @@ -4,6 +4,7 @@ using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.Ollama; +using OllamaSharp.Models.Chat; namespace ChatCompletion; @@ -29,6 +30,44 @@ public Task StreamChatAsync() return this.StartStreamingChatAsync(chatService); } + /// + /// This example demonstrates retrieving extra information chat completion streaming using Ollama. + /// + /// + /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes + /// may cause breaking changes in the code below. + /// + [Fact] + public async Task StreamChatWithInnerContentAsync() + { + Assert.NotNull(TestConfiguration.Ollama.ModelId); + + Console.WriteLine("======== Ollama - Chat Completion Streaming ========"); + + var chatService = new OllamaChatCompletionService( + endpoint: new Uri(TestConfiguration.Ollama.Endpoint), + modelId: TestConfiguration.Ollama.ModelId); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + this.OutputLastMessage(chatHistory); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + this.OutputLastMessage(chatHistory); + + await foreach (var chatUpdate in chatService.GetStreamingChatMessageContentsAsync(chatHistory)) + { + var innerContent = chatUpdate.InnerContent as ChatResponseStream; + OutputInnerContent(innerContent!); + } + } + + /// + /// Demonstrates how you can template a chat history call while using the kernel for invocation. + /// [Fact] public async Task StreamChatPromptAsync() { @@ -55,6 +94,41 @@ public async Task StreamChatPromptAsync() Console.WriteLine(reply); } + /// + /// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation. + /// + /// + /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes + /// may cause breaking changes in the code below. + /// + [Fact] + public async Task StreamChatPromptWithInnerContentAsync() + { + Assert.NotNull(TestConfiguration.Ollama.ModelId); + + StringBuilder chatPrompt = new(""" + You are a librarian, expert about books + Hi, I'm looking for book suggestions + """); + + var kernel = Kernel.CreateBuilder() + .AddOllamaChatCompletion( + endpoint: new Uri(TestConfiguration.Ollama.Endpoint), + modelId: TestConfiguration.Ollama.ModelId) + .Build(); + + var reply = await StreamMessageOutputFromKernelAsync(kernel, chatPrompt.ToString()); + + chatPrompt.AppendLine($""); + chatPrompt.AppendLine("I love history and philosophy, I'd like to learn something new about Greece, any suggestion"); + + await foreach (var chatUpdate in kernel.InvokePromptStreamingAsync(chatPrompt.ToString())) + { + var innerContent = chatUpdate.InnerContent as ChatResponseStream; + OutputInnerContent(innerContent!); + } + } + /// /// This example demonstrates how the chat completion service streams text content. /// It shows how to access the response update via StreamingChatMessageContent.Content property @@ -158,4 +232,34 @@ private async Task StreamMessageOutputFromKernelAsync(Kernel kernel, str Console.WriteLine("\n------------------------"); return fullMessage; } + + /// + /// Retrieve extra information from each streaming chunk response. + /// + /// Streaming chunk provided as inner content of a streaming chat message + /// + /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes + /// may cause breaking changes in the code below. + /// + private void OutputInnerContent(ChatResponseStream streamChunk) + { + Console.WriteLine($"Model: {streamChunk.Model}"); + Console.WriteLine($"Message role: {streamChunk.Message.Role}"); + Console.WriteLine($"Message content: {streamChunk.Message.Content}"); + Console.WriteLine($"Created at: {streamChunk.CreatedAt}"); + Console.WriteLine($"Done: {streamChunk.Done}"); + + /// The last message in the chunk is a type with additional metadata. + if (streamChunk is ChatDoneResponseStream doneStream) + { + Console.WriteLine($"Done Reason: {doneStream.DoneReason}"); + Console.WriteLine($"Eval count: {doneStream.EvalCount}"); + Console.WriteLine($"Eval duration: {doneStream.EvalDuration}"); + Console.WriteLine($"Load duration: {doneStream.LoadDuration}"); + Console.WriteLine($"Total duration: {doneStream.TotalDuration}"); + Console.WriteLine($"Prompt eval count: {doneStream.PromptEvalCount}"); + Console.WriteLine($"Prompt eval duration: {doneStream.PromptEvalDuration}"); + } + Console.WriteLine("------------------------"); + } } diff --git a/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs b/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs index 40e1b840beaf..09fff4ab5d95 100644 --- a/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs +++ b/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections.Generic; using System.IO; using System.Linq; using System.Net.Http; @@ -102,6 +103,19 @@ public async Task GetChatMessageContentsShouldHaveModelAndInnerContentAsync() Assert.NotNull(message.ModelId); Assert.Equal("phi3", message.ModelId); + + // Ollama Sharp always perform streaming even for non-streaming calls, + // The inner content in this case is the full list of chunks returned by the Ollama Client. + Assert.NotNull(message.InnerContent); + Assert.IsType>(message.InnerContent); + var innerContentList = message.InnerContent as List; + Assert.NotNull(innerContentList); + Assert.NotEmpty(innerContentList); + var lastMessage = innerContentList.Last(); + var doneMessageChunk = lastMessage as ChatDoneResponseStream; + Assert.NotNull(doneMessageChunk); + Assert.True(doneMessageChunk.Done); + Assert.Equal("stop", doneMessageChunk.DoneReason); } [Fact] @@ -142,6 +156,34 @@ public async Task GetStreamingChatMessageContentsShouldHaveModelAndInnerContentA Assert.True(innerContent.Done); } + [Fact] + public async Task GetStreamingChatMessageContentsShouldHaveDoneReasonAsync() + { + //Arrange + var expectedModel = "phi3"; + var sut = new OllamaChatCompletionService( + expectedModel, + httpClient: this._httpClient); + + var chat = new ChatHistory(); + chat.AddMessage(AuthorRole.User, "fake-text"); + + // Act + StreamingChatMessageContent? lastMessage = null; + await foreach (var message in sut.GetStreamingChatMessageContentsAsync(chat)) + { + lastMessage = message; + } + + // Assert + Assert.NotNull(lastMessage); + Assert.IsType(lastMessage.InnerContent); + var innerContent = lastMessage.InnerContent as ChatDoneResponseStream; + Assert.NotNull(innerContent); + Assert.True(innerContent.Done); + Assert.Equal("stop", innerContent.DoneReason); + } + [Fact] public async Task GetStreamingChatMessageContentsExecutionSettingsMustBeSentAsync() {