From 0ae0e52de10e213306fba6b2fc3cace7d5cde4c2 Mon Sep 17 00:00:00 2001
From: Artur Kordowski <9746197+akordowski@users.noreply.github.com>
Date: Thu, 19 Sep 2024 17:25:34 +0200
Subject: [PATCH] .Net: Add Examples to retrieve FinishReason/DoneReason and
other details with the Ollama connector (#8889)
### Motivation and Context
As other connectors are providing a `FinishReason = "STOP"` metadata in
the last `StreamingChatMessageContent` message, I adjusted the
`GetStreamingChatMessageContentsAsync()` method in the
`OllamaChatCompletionService` to provide the expected metadata.
### Description
The `ChatResponseStream` message from the **OllamaSharp** package
provides a `Done` property, which indicates if the stream is finished.
This property is used to determine when to set the metadata with
`FinishReason = "STOP"`.
### Contribution Checklist
- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:
---------
Co-authored-by: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com>
---
.../ChatCompletion/Ollama_ChatCompletion.cs | 110 ++++++++++++++++++
.../Ollama_ChatCompletionStreaming.cs | 104 +++++++++++++++++
.../Services/OllamaChatCompletionTests.cs | 42 +++++++
3 files changed, 256 insertions(+)
diff --git a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs
index b76b4fff88a1..7768ff24ba36 100644
--- a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs
+++ b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs
@@ -4,12 +4,16 @@
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.Ollama;
+using OllamaSharp.Models.Chat;
namespace ChatCompletion;
// The following example shows how to use Semantic Kernel with Ollama Chat Completion API
public class Ollama_ChatCompletion(ITestOutputHelper output) : BaseTest(output)
{
+ ///
+ /// Demonstrates how you can use the chat completion service directly.
+ ///
[Fact]
public async Task ServicePromptAsync()
{
@@ -45,6 +49,46 @@ public async Task ServicePromptAsync()
this.OutputLastMessage(chatHistory);
}
+ ///
+ /// Demonstrates how you can get extra information from the service response, using the underlying inner content.
+ ///
+ ///
+ /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+ /// may cause breaking changes in the code below.
+ ///
+ [Fact]
+ public async Task ServicePromptWithInnerContentAsync()
+ {
+ Assert.NotNull(TestConfiguration.Ollama.ModelId);
+
+ Console.WriteLine("======== Ollama - Chat Completion ========");
+
+ var chatService = new OllamaChatCompletionService(
+ endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
+ modelId: TestConfiguration.Ollama.ModelId);
+
+ Console.WriteLine("Chat content:");
+ Console.WriteLine("------------------------");
+
+ var chatHistory = new ChatHistory("You are a librarian, expert about books");
+
+ // First user message
+ chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+ this.OutputLastMessage(chatHistory);
+
+ // First assistant message
+ var reply = await chatService.GetChatMessageContentAsync(chatHistory);
+
+ // Assistant message details
+ // Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content is always a list of chunks.
+ var replyInnerContent = reply.InnerContent as List;
+
+ OutputInnerContent(replyInnerContent!);
+ }
+
+ ///
+ /// Demonstrates how you can template a chat history call using the kernel for invocation.
+ ///
[Fact]
public async Task ChatPromptAsync()
{
@@ -70,4 +114,70 @@ public async Task ChatPromptAsync()
Console.WriteLine(reply);
}
+
+ ///
+ /// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation.
+ ///
+ ///
+ /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+ /// may cause breaking changes in the code below.
+ ///
+ [Fact]
+ public async Task ChatPromptWithInnerContentAsync()
+ {
+ Assert.NotNull(TestConfiguration.Ollama.ModelId);
+
+ StringBuilder chatPrompt = new("""
+ You are a librarian, expert about books
+ Hi, I'm looking for book suggestions
+ """);
+
+ var kernel = Kernel.CreateBuilder()
+ .AddOllamaChatCompletion(
+ endpoint: new Uri(TestConfiguration.Ollama.Endpoint ?? "http://localhost:11434"),
+ modelId: TestConfiguration.Ollama.ModelId)
+ .Build();
+
+ var functionResult = await kernel.InvokePromptAsync(chatPrompt.ToString());
+
+ // Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content of a non-streaming result is a list of the generated chunks.
+ var messageContent = functionResult.GetValue(); // Retrieves underlying chat message content from FunctionResult.
+ var replyInnerContent = messageContent!.InnerContent as List; // Retrieves inner content from ChatMessageContent.
+
+ OutputInnerContent(replyInnerContent!);
+ }
+
+ ///
+ /// Retrieve extra information from each streaming chunk response in a list of chunks.
+ ///
+ /// List of streaming chunks provided as inner content of a chat message
+ ///
+ /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+ /// may cause breaking changes in the code below.
+ ///
+ private void OutputInnerContent(List innerContent)
+ {
+ Console.WriteLine($"Model: {innerContent![0].Model}"); // Model doesn't change per chunk, so we can get it from the first chunk only
+ Console.WriteLine(" -- Chunk changing data -- ");
+
+ innerContent.ForEach(streamChunk =>
+ {
+ Console.WriteLine($"Message role: {streamChunk.Message.Role}");
+ Console.WriteLine($"Message content: {streamChunk.Message.Content}");
+ Console.WriteLine($"Created at: {streamChunk.CreatedAt}");
+ Console.WriteLine($"Done: {streamChunk.Done}");
+ /// The last message in the chunk is a type with additional metadata.
+ if (streamChunk is ChatDoneResponseStream doneStreamChunk)
+ {
+ Console.WriteLine($"Done Reason: {doneStreamChunk.DoneReason}");
+ Console.WriteLine($"Eval count: {doneStreamChunk.EvalCount}");
+ Console.WriteLine($"Eval duration: {doneStreamChunk.EvalDuration}");
+ Console.WriteLine($"Load duration: {doneStreamChunk.LoadDuration}");
+ Console.WriteLine($"Total duration: {doneStreamChunk.TotalDuration}");
+ Console.WriteLine($"Prompt eval count: {doneStreamChunk.PromptEvalCount}");
+ Console.WriteLine($"Prompt eval duration: {doneStreamChunk.PromptEvalDuration}");
+ }
+ Console.WriteLine("------------------------");
+ });
+ }
}
diff --git a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs
index d83aac04e9bf..45424cd3f87e 100644
--- a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs
+++ b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs
@@ -4,6 +4,7 @@
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.Ollama;
+using OllamaSharp.Models.Chat;
namespace ChatCompletion;
@@ -29,6 +30,44 @@ public Task StreamChatAsync()
return this.StartStreamingChatAsync(chatService);
}
+ ///
+ /// This example demonstrates retrieving extra information chat completion streaming using Ollama.
+ ///
+ ///
+ /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+ /// may cause breaking changes in the code below.
+ ///
+ [Fact]
+ public async Task StreamChatWithInnerContentAsync()
+ {
+ Assert.NotNull(TestConfiguration.Ollama.ModelId);
+
+ Console.WriteLine("======== Ollama - Chat Completion Streaming ========");
+
+ var chatService = new OllamaChatCompletionService(
+ endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
+ modelId: TestConfiguration.Ollama.ModelId);
+
+ Console.WriteLine("Chat content:");
+ Console.WriteLine("------------------------");
+
+ var chatHistory = new ChatHistory("You are a librarian, expert about books");
+ this.OutputLastMessage(chatHistory);
+
+ // First user message
+ chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+ this.OutputLastMessage(chatHistory);
+
+ await foreach (var chatUpdate in chatService.GetStreamingChatMessageContentsAsync(chatHistory))
+ {
+ var innerContent = chatUpdate.InnerContent as ChatResponseStream;
+ OutputInnerContent(innerContent!);
+ }
+ }
+
+ ///
+ /// Demonstrates how you can template a chat history call while using the kernel for invocation.
+ ///
[Fact]
public async Task StreamChatPromptAsync()
{
@@ -55,6 +94,41 @@ public async Task StreamChatPromptAsync()
Console.WriteLine(reply);
}
+ ///
+ /// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation.
+ ///
+ ///
+ /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+ /// may cause breaking changes in the code below.
+ ///
+ [Fact]
+ public async Task StreamChatPromptWithInnerContentAsync()
+ {
+ Assert.NotNull(TestConfiguration.Ollama.ModelId);
+
+ StringBuilder chatPrompt = new("""
+ You are a librarian, expert about books
+ Hi, I'm looking for book suggestions
+ """);
+
+ var kernel = Kernel.CreateBuilder()
+ .AddOllamaChatCompletion(
+ endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
+ modelId: TestConfiguration.Ollama.ModelId)
+ .Build();
+
+ var reply = await StreamMessageOutputFromKernelAsync(kernel, chatPrompt.ToString());
+
+ chatPrompt.AppendLine($"");
+ chatPrompt.AppendLine("I love history and philosophy, I'd like to learn something new about Greece, any suggestion");
+
+ await foreach (var chatUpdate in kernel.InvokePromptStreamingAsync(chatPrompt.ToString()))
+ {
+ var innerContent = chatUpdate.InnerContent as ChatResponseStream;
+ OutputInnerContent(innerContent!);
+ }
+ }
+
///
/// This example demonstrates how the chat completion service streams text content.
/// It shows how to access the response update via StreamingChatMessageContent.Content property
@@ -158,4 +232,34 @@ private async Task StreamMessageOutputFromKernelAsync(Kernel kernel, str
Console.WriteLine("\n------------------------");
return fullMessage;
}
+
+ ///
+ /// Retrieve extra information from each streaming chunk response.
+ ///
+ /// Streaming chunk provided as inner content of a streaming chat message
+ ///
+ /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
+ /// may cause breaking changes in the code below.
+ ///
+ private void OutputInnerContent(ChatResponseStream streamChunk)
+ {
+ Console.WriteLine($"Model: {streamChunk.Model}");
+ Console.WriteLine($"Message role: {streamChunk.Message.Role}");
+ Console.WriteLine($"Message content: {streamChunk.Message.Content}");
+ Console.WriteLine($"Created at: {streamChunk.CreatedAt}");
+ Console.WriteLine($"Done: {streamChunk.Done}");
+
+ /// The last message in the chunk is a type with additional metadata.
+ if (streamChunk is ChatDoneResponseStream doneStream)
+ {
+ Console.WriteLine($"Done Reason: {doneStream.DoneReason}");
+ Console.WriteLine($"Eval count: {doneStream.EvalCount}");
+ Console.WriteLine($"Eval duration: {doneStream.EvalDuration}");
+ Console.WriteLine($"Load duration: {doneStream.LoadDuration}");
+ Console.WriteLine($"Total duration: {doneStream.TotalDuration}");
+ Console.WriteLine($"Prompt eval count: {doneStream.PromptEvalCount}");
+ Console.WriteLine($"Prompt eval duration: {doneStream.PromptEvalDuration}");
+ }
+ Console.WriteLine("------------------------");
+ }
}
diff --git a/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs b/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs
index 40e1b840beaf..09fff4ab5d95 100644
--- a/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs
+++ b/dotnet/src/Connectors/Connectors.Ollama.UnitTests/Services/OllamaChatCompletionTests.cs
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
+using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
@@ -102,6 +103,19 @@ public async Task GetChatMessageContentsShouldHaveModelAndInnerContentAsync()
Assert.NotNull(message.ModelId);
Assert.Equal("phi3", message.ModelId);
+
+ // Ollama Sharp always perform streaming even for non-streaming calls,
+ // The inner content in this case is the full list of chunks returned by the Ollama Client.
+ Assert.NotNull(message.InnerContent);
+ Assert.IsType>(message.InnerContent);
+ var innerContentList = message.InnerContent as List;
+ Assert.NotNull(innerContentList);
+ Assert.NotEmpty(innerContentList);
+ var lastMessage = innerContentList.Last();
+ var doneMessageChunk = lastMessage as ChatDoneResponseStream;
+ Assert.NotNull(doneMessageChunk);
+ Assert.True(doneMessageChunk.Done);
+ Assert.Equal("stop", doneMessageChunk.DoneReason);
}
[Fact]
@@ -142,6 +156,34 @@ public async Task GetStreamingChatMessageContentsShouldHaveModelAndInnerContentA
Assert.True(innerContent.Done);
}
+ [Fact]
+ public async Task GetStreamingChatMessageContentsShouldHaveDoneReasonAsync()
+ {
+ //Arrange
+ var expectedModel = "phi3";
+ var sut = new OllamaChatCompletionService(
+ expectedModel,
+ httpClient: this._httpClient);
+
+ var chat = new ChatHistory();
+ chat.AddMessage(AuthorRole.User, "fake-text");
+
+ // Act
+ StreamingChatMessageContent? lastMessage = null;
+ await foreach (var message in sut.GetStreamingChatMessageContentsAsync(chat))
+ {
+ lastMessage = message;
+ }
+
+ // Assert
+ Assert.NotNull(lastMessage);
+ Assert.IsType(lastMessage.InnerContent);
+ var innerContent = lastMessage.InnerContent as ChatDoneResponseStream;
+ Assert.NotNull(innerContent);
+ Assert.True(innerContent.Done);
+ Assert.Equal("stop", innerContent.DoneReason);
+ }
+
[Fact]
public async Task GetStreamingChatMessageContentsExecutionSettingsMustBeSentAsync()
{