Skip to content

Commit

Permalink
Merge branch 'main' into fix_azure_api_key_check
Browse files Browse the repository at this point in the history
  • Loading branch information
moonbox3 committed Sep 19, 2024
2 parents 86fb934 + 0ae0e52 commit d45f31c
Show file tree
Hide file tree
Showing 3 changed files with 256 additions and 0 deletions.
110 changes: 110 additions & 0 deletions dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.Ollama;
using OllamaSharp.Models.Chat;

namespace ChatCompletion;

// The following example shows how to use Semantic Kernel with Ollama Chat Completion API
public class Ollama_ChatCompletion(ITestOutputHelper output) : BaseTest(output)
{
/// <summary>
/// Demonstrates how you can use the chat completion service directly.
/// </summary>
[Fact]
public async Task ServicePromptAsync()
{
Expand Down Expand Up @@ -45,6 +49,46 @@ public async Task ServicePromptAsync()
this.OutputLastMessage(chatHistory);
}

/// <summary>
/// Demonstrates how you can get extra information from the service response, using the underlying inner content.
/// </summary>
/// <remarks>
/// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
/// may cause breaking changes in the code below.
/// </remarks>
[Fact]
public async Task ServicePromptWithInnerContentAsync()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine("======== Ollama - Chat Completion ========");

var chatService = new OllamaChatCompletionService(
endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
modelId: TestConfiguration.Ollama.ModelId);

Console.WriteLine("Chat content:");
Console.WriteLine("------------------------");

var chatHistory = new ChatHistory("You are a librarian, expert about books");

// First user message
chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
this.OutputLastMessage(chatHistory);

// First assistant message
var reply = await chatService.GetChatMessageContentAsync(chatHistory);

// Assistant message details
// Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content is always a list of chunks.
var replyInnerContent = reply.InnerContent as List<ChatResponseStream>;

OutputInnerContent(replyInnerContent!);
}

/// <summary>
/// Demonstrates how you can template a chat history call using the kernel for invocation.
/// </summary>
[Fact]
public async Task ChatPromptAsync()
{
Expand All @@ -70,4 +114,70 @@ public async Task ChatPromptAsync()

Console.WriteLine(reply);
}

/// <summary>
/// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation.
/// </summary>
/// <remarks>
/// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
/// may cause breaking changes in the code below.
/// </remarks>
[Fact]
public async Task ChatPromptWithInnerContentAsync()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

StringBuilder chatPrompt = new("""
<message role="system">You are a librarian, expert about books</message>
<message role="user">Hi, I'm looking for book suggestions</message>
""");

var kernel = Kernel.CreateBuilder()
.AddOllamaChatCompletion(
endpoint: new Uri(TestConfiguration.Ollama.Endpoint ?? "http://localhost:11434"),
modelId: TestConfiguration.Ollama.ModelId)
.Build();

var functionResult = await kernel.InvokePromptAsync(chatPrompt.ToString());

// Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content of a non-streaming result is a list of the generated chunks.
var messageContent = functionResult.GetValue<ChatMessageContent>(); // Retrieves underlying chat message content from FunctionResult.
var replyInnerContent = messageContent!.InnerContent as List<ChatResponseStream>; // Retrieves inner content from ChatMessageContent.

OutputInnerContent(replyInnerContent!);
}

/// <summary>
/// Retrieve extra information from each streaming chunk response in a list of chunks.
/// </summary>
/// <param name="innerContent">List of streaming chunks provided as inner content of a chat message</param>
/// <remarks>
/// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
/// may cause breaking changes in the code below.
/// </remarks>
private void OutputInnerContent(List<ChatResponseStream> innerContent)
{
Console.WriteLine($"Model: {innerContent![0].Model}"); // Model doesn't change per chunk, so we can get it from the first chunk only
Console.WriteLine(" -- Chunk changing data -- ");

innerContent.ForEach(streamChunk =>
{
Console.WriteLine($"Message role: {streamChunk.Message.Role}");
Console.WriteLine($"Message content: {streamChunk.Message.Content}");
Console.WriteLine($"Created at: {streamChunk.CreatedAt}");
Console.WriteLine($"Done: {streamChunk.Done}");
/// The last message in the chunk is a <see cref="ChatDoneResponseStream"/> type with additional metadata.
if (streamChunk is ChatDoneResponseStream doneStreamChunk)
{
Console.WriteLine($"Done Reason: {doneStreamChunk.DoneReason}");
Console.WriteLine($"Eval count: {doneStreamChunk.EvalCount}");
Console.WriteLine($"Eval duration: {doneStreamChunk.EvalDuration}");
Console.WriteLine($"Load duration: {doneStreamChunk.LoadDuration}");
Console.WriteLine($"Total duration: {doneStreamChunk.TotalDuration}");
Console.WriteLine($"Prompt eval count: {doneStreamChunk.PromptEvalCount}");
Console.WriteLine($"Prompt eval duration: {doneStreamChunk.PromptEvalDuration}");
}
Console.WriteLine("------------------------");
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.Ollama;
using OllamaSharp.Models.Chat;

namespace ChatCompletion;

Expand All @@ -29,6 +30,44 @@ public Task StreamChatAsync()
return this.StartStreamingChatAsync(chatService);
}

/// <summary>
/// This example demonstrates retrieving extra information chat completion streaming using Ollama.
/// </summary>
/// <remarks>
/// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
/// may cause breaking changes in the code below.
/// </remarks>
[Fact]
public async Task StreamChatWithInnerContentAsync()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine("======== Ollama - Chat Completion Streaming ========");

var chatService = new OllamaChatCompletionService(
endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
modelId: TestConfiguration.Ollama.ModelId);

Console.WriteLine("Chat content:");
Console.WriteLine("------------------------");

var chatHistory = new ChatHistory("You are a librarian, expert about books");
this.OutputLastMessage(chatHistory);

// First user message
chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
this.OutputLastMessage(chatHistory);

await foreach (var chatUpdate in chatService.GetStreamingChatMessageContentsAsync(chatHistory))
{
var innerContent = chatUpdate.InnerContent as ChatResponseStream;
OutputInnerContent(innerContent!);
}
}

/// <summary>
/// Demonstrates how you can template a chat history call while using the kernel for invocation.
/// </summary>
[Fact]
public async Task StreamChatPromptAsync()
{
Expand All @@ -55,6 +94,41 @@ public async Task StreamChatPromptAsync()
Console.WriteLine(reply);
}

/// <summary>
/// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation.
/// </summary>
/// <remarks>
/// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
/// may cause breaking changes in the code below.
/// </remarks>
[Fact]
public async Task StreamChatPromptWithInnerContentAsync()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

StringBuilder chatPrompt = new("""
<message role="system">You are a librarian, expert about books</message>
<message role="user">Hi, I'm looking for book suggestions</message>
""");

var kernel = Kernel.CreateBuilder()
.AddOllamaChatCompletion(
endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
modelId: TestConfiguration.Ollama.ModelId)
.Build();

var reply = await StreamMessageOutputFromKernelAsync(kernel, chatPrompt.ToString());

chatPrompt.AppendLine($"<message role=\"assistant\"><![CDATA[{reply}]]></message>");
chatPrompt.AppendLine("<message role=\"user\">I love history and philosophy, I'd like to learn something new about Greece, any suggestion</message>");

await foreach (var chatUpdate in kernel.InvokePromptStreamingAsync<StreamingChatMessageContent>(chatPrompt.ToString()))
{
var innerContent = chatUpdate.InnerContent as ChatResponseStream;
OutputInnerContent(innerContent!);
}
}

/// <summary>
/// This example demonstrates how the chat completion service streams text content.
/// It shows how to access the response update via StreamingChatMessageContent.Content property
Expand Down Expand Up @@ -158,4 +232,34 @@ private async Task<string> StreamMessageOutputFromKernelAsync(Kernel kernel, str
Console.WriteLine("\n------------------------");
return fullMessage;
}

/// <summary>
/// Retrieve extra information from each streaming chunk response.
/// </summary>
/// <param name="streamChunk">Streaming chunk provided as inner content of a streaming chat message</param>
/// <remarks>
/// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
/// may cause breaking changes in the code below.
/// </remarks>
private void OutputInnerContent(ChatResponseStream streamChunk)
{
Console.WriteLine($"Model: {streamChunk.Model}");
Console.WriteLine($"Message role: {streamChunk.Message.Role}");
Console.WriteLine($"Message content: {streamChunk.Message.Content}");
Console.WriteLine($"Created at: {streamChunk.CreatedAt}");
Console.WriteLine($"Done: {streamChunk.Done}");

/// The last message in the chunk is a <see cref="ChatDoneResponseStream"/> type with additional metadata.
if (streamChunk is ChatDoneResponseStream doneStream)
{
Console.WriteLine($"Done Reason: {doneStream.DoneReason}");
Console.WriteLine($"Eval count: {doneStream.EvalCount}");
Console.WriteLine($"Eval duration: {doneStream.EvalDuration}");
Console.WriteLine($"Load duration: {doneStream.LoadDuration}");
Console.WriteLine($"Total duration: {doneStream.TotalDuration}");
Console.WriteLine($"Prompt eval count: {doneStream.PromptEvalCount}");
Console.WriteLine($"Prompt eval duration: {doneStream.PromptEvalDuration}");
}
Console.WriteLine("------------------------");
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
Expand Down Expand Up @@ -102,6 +103,19 @@ public async Task GetChatMessageContentsShouldHaveModelAndInnerContentAsync()

Assert.NotNull(message.ModelId);
Assert.Equal("phi3", message.ModelId);

// Ollama Sharp always perform streaming even for non-streaming calls,
// The inner content in this case is the full list of chunks returned by the Ollama Client.
Assert.NotNull(message.InnerContent);
Assert.IsType<List<ChatResponseStream>>(message.InnerContent);
var innerContentList = message.InnerContent as List<ChatResponseStream>;
Assert.NotNull(innerContentList);
Assert.NotEmpty(innerContentList);
var lastMessage = innerContentList.Last();
var doneMessageChunk = lastMessage as ChatDoneResponseStream;
Assert.NotNull(doneMessageChunk);
Assert.True(doneMessageChunk.Done);
Assert.Equal("stop", doneMessageChunk.DoneReason);
}

[Fact]
Expand Down Expand Up @@ -142,6 +156,34 @@ public async Task GetStreamingChatMessageContentsShouldHaveModelAndInnerContentA
Assert.True(innerContent.Done);
}

[Fact]
public async Task GetStreamingChatMessageContentsShouldHaveDoneReasonAsync()
{
//Arrange
var expectedModel = "phi3";
var sut = new OllamaChatCompletionService(
expectedModel,
httpClient: this._httpClient);

var chat = new ChatHistory();
chat.AddMessage(AuthorRole.User, "fake-text");

// Act
StreamingChatMessageContent? lastMessage = null;
await foreach (var message in sut.GetStreamingChatMessageContentsAsync(chat))
{
lastMessage = message;
}

// Assert
Assert.NotNull(lastMessage);
Assert.IsType<ChatDoneResponseStream>(lastMessage.InnerContent);
var innerContent = lastMessage.InnerContent as ChatDoneResponseStream;
Assert.NotNull(innerContent);
Assert.True(innerContent.Done);
Assert.Equal("stop", innerContent.DoneReason);
}

[Fact]
public async Task GetStreamingChatMessageContentsExecutionSettingsMustBeSentAsync()
{
Expand Down

0 comments on commit d45f31c

Please sign in to comment.