From 710a1e19c2292232e510c5ac3981bf17b72e3003 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Mon, 12 May 2025 15:26:05 -0500 Subject: [PATCH 01/12] integrate --- .../Controllers/ConversationController.cs | 59 +++++++------------ .../View/ConversationViewModel.cs | 4 ++ .../Models/Realtime/RealtimeSessionBody.cs | 2 +- 3 files changed, 26 insertions(+), 39 deletions(-) diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs index 5e180f22b..a5a96a123 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs @@ -143,55 +143,29 @@ public async Task> GetDialogs([FromRoute] string { var service = _services.GetRequiredService(); var userService = _services.GetRequiredService(); + var settings = _services.GetRequiredService(); + var (isAdmin, user) = await userService.IsAdminUser(_user.Id); - if (user == null) - { - return null; - } var filter = new ConversationFilter { Id = conversationId, - UserId = !isAdmin ? user.Id : null, + UserId = !isAdmin ? user?.Id : null, IsLoadLatestStates = isLoadStates }; - var conversations = await service.GetConversations(filter); - if (conversations.Items.IsNullOrEmpty()) - { - return null; - } - var result = ConversationViewModel.FromSession(conversations.Items.First()); - var state = _services.GetRequiredService(); - user = await userService.GetUser(result.User.Id); - result.User = UserViewModel.FromUser(user); - - return result; - } - - [HttpPost("/conversation/summary")] - public async Task GetConversationSummary([FromBody] ConversationSummaryModel input) - { - var service = _services.GetRequiredService(); - return await service.GetConversationSummary(input.ConversationIds); - } + var conversations = await service.GetConversations(filter); + var conv = !conversations.Items.IsNullOrEmpty() + ? ConversationViewModel.FromSession(conversations.Items.First()) + : new(); - [HttpGet("/conversation/{conversationId}/user")] - public async Task GetConversationUser([FromRoute] string conversationId) - { - var service = _services.GetRequiredService(); - var conversations = await service.GetConversations(new ConversationFilter - { - Id = conversationId - }); + user = !string.IsNullOrEmpty(conv?.User?.Id) + ? await userService.GetUser(conv.User.Id) + : null; - var userService = _services.GetRequiredService(); - var conversation = conversations?.Items?.FirstOrDefault(); - var userId = conversation == null ? _user.Id : conversation.UserId; - var user = await userService.GetUser(userId); if (user == null) { - return new UserViewModel + user = new User { Id = _user.Id, UserName = _user.UserName, @@ -202,7 +176,16 @@ public async Task GetConversationUser([FromRoute] string conversa }; } - return UserViewModel.FromUser(user); + conv.User = UserViewModel.FromUser(user); + conv.IsRealtimeEnabled = settings?.Assemblies?.Contains("BotSharp.Core.Realtime") ?? false; + return conv; + } + + [HttpPost("/conversation/summary")] + public async Task GetConversationSummary([FromBody] ConversationSummaryModel input) + { + var service = _services.GetRequiredService(); + return await service.GetConversationSummary(input.ConversationIds); } [HttpPut("/conversation/{conversationId}/update-title")] diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/View/ConversationViewModel.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/View/ConversationViewModel.cs index 6300f6612..e7110f44d 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/View/ConversationViewModel.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/View/ConversationViewModel.cs @@ -1,9 +1,13 @@ using BotSharp.Abstraction.Conversations.Dtos; +using System.Text.Json.Serialization; namespace BotSharp.OpenAPI.ViewModels.Conversations; public class ConversationViewModel : ConversationDto { + [JsonPropertyName("is_realtime_enabled")] + public bool IsRealtimeEnabled { get; set; } + public static ConversationViewModel FromSession(Conversation sess) { return new ConversationViewModel diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Models/Realtime/RealtimeSessionBody.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Models/Realtime/RealtimeSessionBody.cs index a5ed764e0..f767a6f16 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Models/Realtime/RealtimeSessionBody.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Models/Realtime/RealtimeSessionBody.cs @@ -76,7 +76,7 @@ public class RealtimeSessionTurnDetection public string Type { get; set; } = "semantic_vad"; [JsonPropertyName("eagerness")] - public string eagerness { get;set; } = "auto"; + public string Eagerness { get;set; } = "auto"; } public class InputAudioTranscription From c837dd6611025edfaf82c601b0076c371805e789 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 13 May 2025 11:45:19 -0500 Subject: [PATCH 02/12] temp save --- .../BotSharp.Core/BotSharp.Core.csproj | 10 + .../BotSharp.Core/Functions/GetLocationFn.cs | 25 +++ .../BotSharp.Core/Functions/GetWeatherFn.cs | 56 +++++ .../AsyncWebsocketDataResultEnumerator.cs | 1 + .../Session/LlmRealtimeSession.cs | 9 +- .../functions/get_location.json | 20 ++ .../functions/get_weather.json | 19 ++ .../Realtime/RealTimeCompletionProvider.cs | 208 +++++++++++++++--- .../Realtime/RealTimeCompletionProvider.cs | 2 +- 9 files changed, 316 insertions(+), 34 deletions(-) create mode 100644 src/Infrastructure/BotSharp.Core/Functions/GetLocationFn.cs create mode 100644 src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs create mode 100644 src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_location.json create mode 100644 src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json diff --git a/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj b/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj index 89619484e..de2c8909e 100644 --- a/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj +++ b/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj @@ -96,6 +96,8 @@ + + @@ -204,6 +206,14 @@ PreserveNewest + + + + PreserveNewest + + + PreserveNewest + diff --git a/src/Infrastructure/BotSharp.Core/Functions/GetLocationFn.cs b/src/Infrastructure/BotSharp.Core/Functions/GetLocationFn.cs new file mode 100644 index 000000000..cabfbdbb4 --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/Functions/GetLocationFn.cs @@ -0,0 +1,25 @@ +using BotSharp.Abstraction.Functions; +using BotSharp.Abstraction.Options; + +namespace BotSharp.Core.Functions; + +public class GetLocationFn : IFunctionCallback +{ + private readonly IServiceProvider _services; + + public GetLocationFn(IServiceProvider services) + { + _services = services; + } + + public string Name => "get_location"; + public string Indication => "Finding location"; + + public async Task Execute(RoleDialogModel message) + { + var args = JsonSerializer.Deserialize(message.FunctionArgs, BotSharpOptions.defaultJsonOptions); + + message.Content = $"There are a lot of fun events here in {args.City}"; + return true; + } +} diff --git a/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs b/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs new file mode 100644 index 000000000..759bc68c8 --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs @@ -0,0 +1,56 @@ +using BotSharp.Abstraction.Functions; +using BotSharp.Abstraction.Models; +using BotSharp.Abstraction.Options; +using BotSharp.Abstraction.SideCar; +using System.Text.Json.Serialization; + +namespace BotSharp.Core.Functions; + +public class GetWeatherFn : IFunctionCallback +{ + private readonly IServiceProvider _services; + + public GetWeatherFn(IServiceProvider services) + { + _services = services; + } + + public string Name => "get_weather"; + public string Indication => "Querying weather"; + + public async Task Execute(RoleDialogModel message) + { + var args = JsonSerializer.Deserialize(message.FunctionArgs, BotSharpOptions.defaultJsonOptions); + + var sidecar = _services.GetService(); + var states = GetSideCarStates(); + + var userMessage = $"Please find the information at location {args.City}, {args.State}"; + var response = await sidecar.SendMessage(BuiltInAgentId.Chatbot, userMessage, states: states); + message.Content = $"It is a sunny day {response.Content}."; + return true; + } + + private List GetSideCarStates() + { + var sideCarStates = new List() + { + new("channel", "email") + }; + return sideCarStates; + } +} + +class Location +{ + [JsonPropertyName("city")] + public string? City { get; set; } + + [JsonPropertyName("state")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? State { get; set; } + + [JsonPropertyName("county")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? County { get; set; } +} \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs b/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs index 692e72204..af2abf8fb 100644 --- a/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs +++ b/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs @@ -44,6 +44,7 @@ public async ValueTask MoveNextAsync() if (receivedResult.CloseStatus.HasValue) { + Console.WriteLine($"Web socket close status: {receivedResult.CloseStatus}"); Current = null; return false; } diff --git a/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs b/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs index 70b8ba096..d799480ed 100644 --- a/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs +++ b/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs @@ -22,14 +22,17 @@ public LlmRealtimeSession( _sessionOptions = sessionOptions; } - public async Task ConnectAsync(Uri uri, Dictionary headers, CancellationToken cancellationToken = default) + public async Task ConnectAsync(Uri uri, Dictionary? headers = null, CancellationToken cancellationToken = default) { _webSocket?.Dispose(); _webSocket = new ClientWebSocket(); - foreach (var header in headers) + if (!headers.IsNullOrEmpty()) { - _webSocket.Options.SetRequestHeader(header.Key, header.Value); + foreach (var header in headers) + { + _webSocket.Options.SetRequestHeader(header.Key, header.Value); + } } await _webSocket.ConnectAsync(uri, cancellationToken); diff --git a/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_location.json b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_location.json new file mode 100644 index 000000000..b0716218e --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_location.json @@ -0,0 +1,20 @@ +{ + "name": "get_location", + "description": "Get location information for user.", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "visibility_expression": "{% if states.channel == 'email' %}visible{% endif %}", + "description": "The location city that user wants to know about." + }, + "county": { + "type": "string", + "visibility_expression": "{% if states.channel != 'email' %}visible{% endif %}", + "description": "The location county that user wants to know about." + } + }, + "required": [ "city", "county" ] + } +} \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json new file mode 100644 index 000000000..bdb679a25 --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json @@ -0,0 +1,19 @@ +{ + "name": "get_weather", + "description": "Get weather information for user.", + "visibility_expression": "{% if states.channel != 'email' %}visible{% endif %}", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city where the user wants to get weather information." + }, + "state": { + "type": "string", + "description": "The state where the user wants to get weather information." + } + }, + "required": [ "city", "state" ] + } +} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index 98896ac3c..d901f9816 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -1,8 +1,15 @@ +using BotSharp.Abstraction.Options; +using BotSharp.Abstraction.Realtime.Models.Session; +using BotSharp.Core.Session; using GenerativeAI; using GenerativeAI.Core; using GenerativeAI.Live; using GenerativeAI.Live.Extensions; using GenerativeAI.Types; +using GenerativeAI.Types.Converters; +using Google.Ai.Generativelanguage.V1Beta2; +using Google.Api; +using System.Threading; namespace BotSharp.Plugin.GoogleAi.Providers.Realtime; @@ -18,14 +25,18 @@ public class GoogleRealTimeProvider : IRealTimeCompletion private readonly ILogger _logger; private List renderedInstructions = []; + private LlmRealtimeSession _session; + private readonly BotSharpOptions _botsharpOptions; private readonly GoogleAiSettings _settings; public GoogleRealTimeProvider( IServiceProvider services, GoogleAiSettings settings, + BotSharpOptions botSharpOptions, ILogger logger) { _settings = settings; + _botsharpOptions = botSharpOptions; _services = services; _logger = logger; } @@ -66,8 +77,48 @@ public async Task Connect(RealtimeHubConnection conn, _onInputAudioTranscriptionCompleted = onInputAudioTranscriptionCompleted; _onUserInterrupted = onUserInterrupted; + var settingsService = _services.GetRequiredService(); var realtimeModelSettings = _services.GetRequiredService(); + _model = realtimeModelSettings.Model; + var modelSettings = settingsService.GetSetting(Provider, _model); + + //if (_session != null) + //{ + // _session.Dispose(); + //} + + //_session = new LlmRealtimeSession(_services, new ChatSessionOptions + //{ + // JsonOptions = new JsonSerializerOptions + // { + // PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + // PropertyNameCaseInsensitive = true, + // Converters = { new JsonStringEnumConverter(), new DateOnlyJsonConverter(), new TimeOnlyJsonConverter() }, + // DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + // TypeInfoResolver = TypesSerializerContext.Default, + // UnknownTypeHandling = JsonUnknownTypeHandling.JsonElement, + + // } + //}); + + //await _session.ConnectAsync( + // uri: new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key={modelSettings.ApiKey}"), + // cancellationToken: CancellationToken.None); + + ////await UpdateSession(conn, true); + + //_ = ReceiveMessage( + // conn, + // onModelReady, + // onModelAudioDeltaReceived, + // onModelAudioResponseDone, + // onModelAudioTranscriptDone, + // onModelResponseDone, + // onConversationItemCreated, + // onInputAudioTranscriptionCompleted, + // onUserInterrupted); + var client = ProviderHelper.GetGeminiClient(Provider, _model, _services); _chatClient = client.CreateGenerativeModel(_model); @@ -75,7 +126,7 @@ public async Task Connect(RealtimeHubConnection conn, config: new GenerationConfig { ResponseModalities = [Modality.AUDIO], - }, + }, systemInstruction: "You are a helpful assistant.", logger: _logger); @@ -84,37 +135,107 @@ public async Task Connect(RealtimeHubConnection conn, await _client.ConnectAsync(false); } + + private async Task ReceiveMessage( + RealtimeHubConnection conn, + Action onModelReady, + Action onModelAudioDeltaReceived, + Action onModelAudioResponseDone, + Action onModelAudioTranscriptDone, + Action> onModelResponseDone, + Action onConversationItemCreated, + Action onUserAudioTranscriptionCompleted, + Action onInterruptionDetected) + { + await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) + { + var receivedText = update?.RawResponse; + Console.WriteLine($"Received text: {receivedText}"); + + if (string.IsNullOrEmpty(receivedText)) + { + continue; + } + + + } + + _session.Dispose(); + } + + public async Task Disconnect() { + //if (_session != null) + //{ + // await _session.Disconnect(); + //} + if (_client != null) + { await _client.DisconnectAsync(); + } } public async Task AppenAudioBuffer(string message) { await _client.SendAudioAsync(Convert.FromBase64String(message)); + + //await SendEventToModel(new BidiClientPayload + //{ + // RealtimeInput = new() + // { + // MediaChunks = [ new() { Data = message, MimeType = "audio/pcm; rate=16000" } ] + // } + //}); } public async Task AppenAudioBuffer(ArraySegment data, int length) { var buffer = data.AsSpan(0, length).ToArray(); - await _client.SendAudioAsync(buffer,"audio/pcm;rate=16000"); + await _client.SendAudioAsync(buffer, "audio/pcm; rate=16000"); + + //await SendEventToModel(new BidiClientPayload + //{ + // RealtimeInput = new() + // { + // MediaChunks = [new() { Data = Convert.ToBase64String(buffer), MimeType = "audio/pcm; rate=16000" }] + // } + //}); } public async Task TriggerModelInference(string? instructions = null) { + var content = !string.IsNullOrWhiteSpace(instructions) + ? new Content(instructions, AgentRole.User) + : null; + await _client.SendClientContentAsync(new BidiGenerateContentClientContent() { + Turns = content != null ? [content] : null, TurnComplete = true, }); + + + + //await SendEventToModel(new BidiClientPayload + //{ + // ClientContent = new() + // { + // Turns = content != null ? [content] : null, + // TurnComplete = true + // } + //}); } public async Task CancelModelResponse() { + } public async Task RemoveConversationItem(string itemId) { + } private Task AttachEvents(MultiModalLiveClient client) @@ -236,6 +357,10 @@ await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, "response.don public async Task SendEventToModel(object message) { //todo Send Audio Chunks to Model, Botsharp RealTime Implementation seems to be incomplete + + //if (_session == null) return; + + //await _session.SendEventToModel(message); } public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) @@ -246,13 +371,13 @@ public async Task UpdateSession(RealtimeHubConnection conn, bool isInit var agentService = _services.GetRequiredService(); var agent = await agentService.LoadAgent(conn.CurrentAgentId); - var (prompt, request) = PrepareOptions(_chatClient, agent, new List()); + var (prompt, request) = PrepareOptions(agent, []); var config = request.GenerationConfig; //Output Modality can either be text or audio if (config != null) { - config.ResponseModalities = new List([Modality.AUDIO]); + config.ResponseModalities = [Modality.AUDIO]; var words = new List(); HookEmitter.Emit(_services, hook => words.AddRange(hook.OnModelTranscriptPrompt(agent))); @@ -270,10 +395,10 @@ public async Task UpdateSession(RealtimeHubConnection conn, bool isInit { Name = x.Name ?? string.Empty, Description = x.Description ?? string.Empty, + Parameters = x.Parameters != null + ? JsonSerializer.Deserialize(JsonSerializer.Serialize(x.Parameters)) + : null }; - fn.Parameters = x.Parameters != null - ? JsonSerializer.Deserialize(JsonSerializer.Serialize(x.Parameters)) - : null; return fn; }).ToArray(); @@ -282,33 +407,39 @@ await HookEmitter.Emit(_services, if (_settings.Gemini.UseGoogleSearch) { - if (request.Tools == null) - request.Tools = new List(); + request.Tools ??= []; request.Tools.Add(new Tool() { GoogleSearch = new GoogleSearchTool() }); } - // if(request.Tools.Count == 0) - // request.Tools = null; - // config.MaxOutputTokens = null; - await _client.SendSetupAsync(new BidiGenerateContentSetup() { GenerationConfig = config, Model = Model.ToModelId(), SystemInstruction = request.SystemInstruction, - Tools = request.Tools?.ToArray(), + //Tools = request.Tools?.ToArray(), }); + //await SendEventToModel(new BidiClientPayload + //{ + // Setup = new BidiGenerateContentSetup() + // { + // GenerationConfig = config, + // Model = $"models/{_model}", + // SystemInstruction = new Content(agent.Instruction, AgentRole.System), + // //Tools = request.Tools?.ToArray(), + // } + //}); + return prompt; } public async Task InsertConversationItem(RoleDialogModel message) { - if (_client == null) - throw new Exception("Client is not initialized"); + //if (_client == null) + // throw new Exception("Client is not initialized"); if (message.Role == AgentRole.Function) { var function = new FunctionResponse() @@ -321,13 +452,38 @@ await _client.SendToolResponseAsync(new BidiGenerateContentToolResponse() { FunctionResponses = [function] }); + + //await SendEventToModel(new BidiClientPayload + //{ + // ToolResponse = new() + // { + // FunctionResponses = [function] + // } + //}); } else if (message.Role == AgentRole.Assistant) { + //await SendEventToModel(new BidiClientPayload + //{ + // ClientContent = new() + // { + // Turns = [new Content(message.Content, AgentRole.Model)], + // TurnComplete = true + // } + //}); } else if (message.Role == AgentRole.User) { await _client.SentTextAsync(message.Content); + + //await SendEventToModel(new BidiClientPayload + //{ + // ClientContent = new() + // { + // Turns = [new Content(message.Content, AgentRole.User)], + // TurnComplete = true + // } + //}); } else { @@ -335,33 +491,24 @@ await _client.SendToolResponseAsync(new BidiGenerateContentToolResponse() } } - public Task> OnResponsedDone(RealtimeHubConnection conn, string response) + public async Task> OnResponsedDone(RealtimeHubConnection conn, string response) { - throw new NotImplementedException(""); + return []; } - public Task OnConversationItemCreated(RealtimeHubConnection conn, string response) + public async Task OnConversationItemCreated(RealtimeHubConnection conn, string response) { - return Task.FromResult(new RoleDialogModel(AgentRole.User, response)); + return await Task.FromResult(new RoleDialogModel(AgentRole.User, response)); } - private (string, GenerateContentRequest) PrepareOptions(GenerativeModel aiModel, Agent agent, + private (string, GenerateContentRequest) PrepareOptions(Agent agent, List conversations) { var agentService = _services.GetRequiredService(); var googleSettings = _settings; renderedInstructions = []; - // Add settings - aiModel.UseGoogleSearch = googleSettings.Gemini.UseGoogleSearch; - aiModel.UseGrounding = googleSettings.Gemini.UseGrounding; - - aiModel.FunctionCallingBehaviour = new FunctionCallingBehaviour() - { - AutoCallFunction = false - }; - // Assembly messages var contents = new List(); var tools = new List(); @@ -458,6 +605,7 @@ public Task OnConversationItemCreated(RealtimeHubConnection con var maxTokens = int.TryParse(state.GetState("max_tokens"), out var tokens) ? tokens : agent.LlmConfig?.MaxOutputTokens ?? LlmConstant.DEFAULT_MAX_OUTPUT_TOKEN; + var request = new GenerateContentRequest { SystemInstruction = !systemPrompts.IsNullOrEmpty() diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs index b32c0e1c1..3257e9bf8 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -671,7 +671,7 @@ await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) return outputs; } - public async Task OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string response) + private async Task OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string response) { var data = JsonSerializer.Deserialize(response); return new RoleDialogModel(AgentRole.User, data.Transcript) From da9c3c6ab0273a52564f2b85380b1455aa1c97a9 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 13 May 2025 15:43:08 -0500 Subject: [PATCH 03/12] temp save --- .../MLTasks/IRealTimeCompletion.cs | 18 +- .../Services/RealtimeHub.cs | 6 +- .../AsyncWebsocketDataResultEnumerator.cs | 4 +- .../BotSharp.Plugin.GoogleAI.csproj | 2 +- .../Models/Realtime/RealtimeServerResponse.cs | 61 +++ .../Realtime/RealTimeCompletionProvider.cs | 357 ++++++++++-------- .../Realtime/RealTimeCompletionProvider.cs | 52 +-- .../BotSharp.LLM.Tests/GoogleRealTimeTests.cs | 15 +- 8 files changed, 315 insertions(+), 200 deletions(-) create mode 100644 src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs b/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs index 11174356e..315f4f5a3 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs @@ -1,4 +1,6 @@ using BotSharp.Abstraction.Realtime.Models; +using System; +using static System.Runtime.InteropServices.JavaScript.JSType; namespace BotSharp.Abstraction.MLTasks; @@ -10,14 +12,14 @@ public interface IRealTimeCompletion Task Connect( RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onInputAudioTranscriptionCompleted, - Action onInterruptionDetected); + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionDone, + Func onInterruptionDetected); Task AppenAudioBuffer(string message); Task AppenAudioBuffer(ArraySegment data, int length); diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs b/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs index 607297582..93363878d 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs @@ -76,8 +76,8 @@ await _completer.Connect( { var data = _conn.OnModelAudioResponseDone(); await (responseToUser?.Invoke(data) ?? Task.CompletedTask); - }, - onAudioTranscriptDone: async transcript => + }, + onModelAudioTranscriptDone: async transcript => { }, @@ -117,7 +117,7 @@ await _completer.Connect( { }, - onInputAudioTranscriptionCompleted: async message => + onInputAudioTranscriptionDone: async message => { // append input audio transcript to conversation dialogs.Add(message); diff --git a/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs b/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs index af2abf8fb..548850e93 100644 --- a/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs +++ b/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs @@ -44,7 +44,9 @@ public async ValueTask MoveNextAsync() if (receivedResult.CloseStatus.HasValue) { - Console.WriteLine($"Web socket close status: {receivedResult.CloseStatus}"); +#if DEBUG + Console.WriteLine($"Websocket close: {receivedResult.CloseStatus} {receivedResult.CloseStatusDescription}"); +#endif Current = null; return false; } diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/BotSharp.Plugin.GoogleAI.csproj b/src/Plugins/BotSharp.Plugin.GoogleAI/BotSharp.Plugin.GoogleAI.csproj index c4c28822e..ac68607cc 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/BotSharp.Plugin.GoogleAI.csproj +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/BotSharp.Plugin.GoogleAI.csproj @@ -1,4 +1,4 @@ - + $(TargetFramework) diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs new file mode 100644 index 000000000..25cf59e43 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs @@ -0,0 +1,61 @@ +using GenerativeAI.Types; + +namespace BotSharp.Plugin.GoogleAI.Models.Realtime; + +internal class RealtimeServerResponse +{ + [JsonPropertyName("setupComplete")] + public RealtimeGenerateContentSetupComplete? SetupComplete { get; set; } + + [JsonPropertyName("serverContent")] + public RealtimeGenerateContentServerContent? ServerContent { get; set; } + + [JsonPropertyName("usageMetadata")] + public RealtimeUsageMetaData? UsageMetaData { get; set; } +} + + +internal class RealtimeGenerateContentSetupComplete { } + +internal class RealtimeGenerateContentServerContent +{ + [JsonPropertyName("turnComplete")] + public bool? TurnComplete { get; set; } + + [JsonPropertyName("generationComplete")] + public bool? GenerationComplete { get; set; } + + [JsonPropertyName("interrupted")] + public bool? Interrupted { get; set; } + + [JsonPropertyName("modelTurn")] + public Content? ModelTurn { get; set; } +} + +internal class RealtimeUsageMetaData +{ + [JsonPropertyName("promptTokenCount")] + public int? PromptTokenCount { get; set; } + + [JsonPropertyName("responseTokenCount")] + public int? ResponseTokenCount { get; set; } + + [JsonPropertyName("totalTokenCount")] + public int? TotalTokenCount { get; set; } + + [JsonPropertyName("promptTokensDetails")] + public List? PromptTokensDetails { get; set; } + + [JsonPropertyName("responseTokensDetails")] + public List? ResponseTokensDetails { get; set; } +} + + +internal class RealtimeTokenDetail +{ + [JsonPropertyName("modality")] + public string? Modality { get; set; } + + [JsonPropertyName("tokenCount")] + public int? TokenCount { get; set; } +} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index d901f9816..fe3480c76 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -1,6 +1,7 @@ using BotSharp.Abstraction.Options; using BotSharp.Abstraction.Realtime.Models.Session; using BotSharp.Core.Session; +using BotSharp.Plugin.GoogleAI.Models.Realtime; using GenerativeAI; using GenerativeAI.Core; using GenerativeAI.Live; @@ -9,6 +10,7 @@ using GenerativeAI.Types.Converters; using Google.Ai.Generativelanguage.V1Beta2; using Google.Api; +using System; using System.Threading; namespace BotSharp.Plugin.GoogleAi.Providers.Realtime; @@ -29,6 +31,17 @@ public class GoogleRealTimeProvider : IRealTimeCompletion private readonly BotSharpOptions _botsharpOptions; private readonly GoogleAiSettings _settings; + private const string DEFAULT_MIME_TYPE = "audio/pcm;rate=16000"; + + private readonly JsonSerializerOptions _jsonOptions = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true, + Converters = { new JsonStringEnumConverter(), new DateOnlyJsonConverter(), new TimeOnlyJsonConverter() }, + DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, + UnknownTypeHandling = JsonUnknownTypeHandling.JsonElement + }; + public GoogleRealTimeProvider( IServiceProvider services, GoogleAiSettings settings, @@ -47,25 +60,26 @@ public void SetModelName(string model) } private RealtimeHubConnection _conn; - private Action _onModelReady; - private Action _onModelAudioDeltaReceived; - private Action _onModelAudioResponseDone; - private Action _onModelAudioTranscriptDone; - private Action> _onModelResponseDone; - private Action _onConversationItemCreated; - private Action _onInputAudioTranscriptionCompleted; - private Action _onUserInterrupted; + private Func _onModelReady; + private Func _onModelAudioDeltaReceived; + private Func _onModelAudioResponseDone; + private Func _onModelAudioTranscriptDone; + private Func, Task> _onModelResponseDone; + private Func _onConversationItemCreated; + private Func _onInputAudioTranscriptionDone; + private Func _onUserInterrupted; - public async Task Connect(RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onModelAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onInputAudioTranscriptionCompleted, - Action onUserInterrupted) + public async Task Connect( + RealtimeHubConnection conn, + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionDone, + Func onInterruptionDetected) { _conn = conn; _onModelReady = onModelReady; @@ -74,8 +88,8 @@ public async Task Connect(RealtimeHubConnection conn, _onModelAudioTranscriptDone = onModelAudioTranscriptDone; _onModelResponseDone = onModelResponseDone; _onConversationItemCreated = onConversationItemCreated; - _onInputAudioTranscriptionCompleted = onInputAudioTranscriptionCompleted; - _onUserInterrupted = onUserInterrupted; + _onInputAudioTranscriptionDone = onInputAudioTranscriptionDone; + _onUserInterrupted = onInterruptionDetected; var settingsService = _services.GetRequiredService(); var realtimeModelSettings = _services.GetRequiredService(); @@ -83,81 +97,114 @@ public async Task Connect(RealtimeHubConnection conn, _model = realtimeModelSettings.Model; var modelSettings = settingsService.GetSetting(Provider, _model); - //if (_session != null) - //{ - // _session.Dispose(); - //} + if (_session != null) + { + _session.Dispose(); + } - //_session = new LlmRealtimeSession(_services, new ChatSessionOptions - //{ - // JsonOptions = new JsonSerializerOptions - // { - // PropertyNamingPolicy = JsonNamingPolicy.CamelCase, - // PropertyNameCaseInsensitive = true, - // Converters = { new JsonStringEnumConverter(), new DateOnlyJsonConverter(), new TimeOnlyJsonConverter() }, - // DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, - // TypeInfoResolver = TypesSerializerContext.Default, - // UnknownTypeHandling = JsonUnknownTypeHandling.JsonElement, - - // } - //}); + _session = new LlmRealtimeSession(_services, new ChatSessionOptions + { + JsonOptions = _jsonOptions + }); - //await _session.ConnectAsync( - // uri: new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key={modelSettings.ApiKey}"), - // cancellationToken: CancellationToken.None); + await _session.ConnectAsync( + uri: new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key={modelSettings.ApiKey}"), + cancellationToken: CancellationToken.None); - ////await UpdateSession(conn, true); + await onModelReady(); - //_ = ReceiveMessage( - // conn, - // onModelReady, - // onModelAudioDeltaReceived, - // onModelAudioResponseDone, - // onModelAudioTranscriptDone, - // onModelResponseDone, - // onConversationItemCreated, - // onInputAudioTranscriptionCompleted, - // onUserInterrupted); + _ = ReceiveMessage( + conn, + onModelReady, + onModelAudioDeltaReceived, + onModelAudioResponseDone, + onModelAudioTranscriptDone, + onModelResponseDone, + onConversationItemCreated, + onInputAudioTranscriptionDone, + onInterruptionDetected); - var client = ProviderHelper.GetGeminiClient(Provider, _model, _services); - _chatClient = client.CreateGenerativeModel(_model); - _client = _chatClient.CreateMultiModalLiveClient( - config: new GenerationConfig - { - ResponseModalities = [Modality.AUDIO], - }, - systemInstruction: "You are a helpful assistant.", - logger: _logger); + //var client = ProviderHelper.GetGeminiClient(Provider, _model, _services); + //_chatClient = client.CreateGenerativeModel(_model); + //_client = _chatClient.CreateMultiModalLiveClient( + // config: new GenerationConfig + // { + // ResponseModalities = [Modality.AUDIO], + // }, + // systemInstruction: "You are a helpful assistant.", + // logger: _logger); - await AttachEvents(_client); + //await AttachEvents(_client); - await _client.ConnectAsync(false); + //await _client.ConnectAsync(false); } - private async Task ReceiveMessage( RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onModelAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onUserAudioTranscriptionCompleted, - Action onInterruptionDetected) + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionCompleted, + Func onInterruptionDetected) { await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) { var receivedText = update?.RawResponse; - Console.WriteLine($"Received text: {receivedText}"); - if (string.IsNullOrEmpty(receivedText)) { continue; } - + Console.WriteLine($"Received text: {receivedText}"); + try + { + var response = JsonSerializer.Deserialize(receivedText, _jsonOptions); + + if (response == null) + { + continue; + } + + if (response.SetupComplete != null) + { + _logger.LogInformation($"Session setup completed."); + } + else if (response.ServerContent != null) + { + if (response.ServerContent.ModelTurn != null) + { + _logger.LogInformation($"Model audio delta received."); + var parts = response.ServerContent.ModelTurn.Parts; + if (!parts.IsNullOrEmpty()) + { + foreach (var part in parts) + { + if (!string.IsNullOrEmpty(part.InlineData?.Data)) + { + await onModelAudioDeltaReceived(part.InlineData.Data, string.Empty); + } + } + } + } + else if (response.ServerContent.GenerationComplete == true) + { + _logger.LogInformation($"Model generation completed."); + } + else if (response.ServerContent.TurnComplete == true) + { + _logger.LogInformation($"Model turn completed."); + } + } + } + catch (Exception ex) + { + _logger.LogError(ex, $"Error when deserializing server response."); + continue; + } } _session.Dispose(); @@ -166,42 +213,42 @@ private async Task ReceiveMessage( public async Task Disconnect() { - //if (_session != null) - //{ - // await _session.Disconnect(); - //} - - if (_client != null) + if (_session != null) { - await _client.DisconnectAsync(); + await _session.Disconnect(); } + + //if (_client != null) + //{ + // await _client.DisconnectAsync(); + //} } public async Task AppenAudioBuffer(string message) { - await _client.SendAudioAsync(Convert.FromBase64String(message)); + //await _client.SendAudioAsync(Convert.FromBase64String(message)); - //await SendEventToModel(new BidiClientPayload - //{ - // RealtimeInput = new() - // { - // MediaChunks = [ new() { Data = message, MimeType = "audio/pcm; rate=16000" } ] - // } - //}); + await SendEventToModel(new BidiClientPayload + { + RealtimeInput = new() + { + MediaChunks = [new() { Data = message, MimeType = DEFAULT_MIME_TYPE }] + } + }); } public async Task AppenAudioBuffer(ArraySegment data, int length) { var buffer = data.AsSpan(0, length).ToArray(); - await _client.SendAudioAsync(buffer, "audio/pcm; rate=16000"); + //await _client.SendAudioAsync(buffer, "audio/pcm;rate=16000"); - //await SendEventToModel(new BidiClientPayload - //{ - // RealtimeInput = new() - // { - // MediaChunks = [new() { Data = Convert.ToBase64String(buffer), MimeType = "audio/pcm; rate=16000" }] - // } - //}); + await SendEventToModel(new BidiClientPayload + { + RealtimeInput = new() + { + MediaChunks = [new() { Data = Convert.ToBase64String(buffer), MimeType = DEFAULT_MIME_TYPE }] + } + }); } public async Task TriggerModelInference(string? instructions = null) @@ -210,22 +257,20 @@ public async Task TriggerModelInference(string? instructions = null) ? new Content(instructions, AgentRole.User) : null; - await _client.SendClientContentAsync(new BidiGenerateContentClientContent() - { - Turns = content != null ? [content] : null, - TurnComplete = true, - }); - - - - //await SendEventToModel(new BidiClientPayload + //await _client.SendClientContentAsync(new BidiGenerateContentClientContent() //{ - // ClientContent = new() - // { - // Turns = content != null ? [content] : null, - // TurnComplete = true - // } + // Turns = content != null ? [content] : null, + // TurnComplete = true, //}); + + await SendEventToModel(new BidiClientPayload + { + ClientContent = new() + { + Turns = content != null ? [content] : null, + TurnComplete = true + } + }); } public async Task CancelModelResponse() @@ -276,7 +321,7 @@ private Task AttachEvents(MultiModalLiveClient client) client.TextChunkReceived += (sender, e) => { - _onInputAudioTranscriptionCompleted(new RoleDialogModel(AgentRole.Assistant, e.Text)); + _onInputAudioTranscriptionDone(new RoleDialogModel(AgentRole.Assistant, e.Text)); }; client.GenerationInterrupted += (sender, e) => @@ -358,9 +403,9 @@ public async Task SendEventToModel(object message) { //todo Send Audio Chunks to Model, Botsharp RealTime Implementation seems to be incomplete - //if (_session == null) return; + if (_session == null) return; - //await _session.SendEventToModel(message); + await _session.SendEventToModel(message); } public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) @@ -414,25 +459,25 @@ await HookEmitter.Emit(_services, }); } - await _client.SendSetupAsync(new BidiGenerateContentSetup() - { - GenerationConfig = config, - Model = Model.ToModelId(), - SystemInstruction = request.SystemInstruction, - //Tools = request.Tools?.ToArray(), - }); - - //await SendEventToModel(new BidiClientPayload + //await _client.SendSetupAsync(new BidiGenerateContentSetup() //{ - // Setup = new BidiGenerateContentSetup() - // { - // GenerationConfig = config, - // Model = $"models/{_model}", - // SystemInstruction = new Content(agent.Instruction, AgentRole.System), - // //Tools = request.Tools?.ToArray(), - // } + // GenerationConfig = config, + // Model = Model.ToModelId(), + // SystemInstruction = request.SystemInstruction, + // //Tools = request.Tools?.ToArray(), //}); + await SendEventToModel(new BidiClientPayload + { + Setup = new BidiGenerateContentSetup() + { + GenerationConfig = config, + Model = Model.ToModelId(), + SystemInstruction = request.SystemInstruction, + Tools = [] + } + }); + return prompt; } @@ -448,42 +493,42 @@ public async Task InsertConversationItem(RoleDialogModel message) Response = JsonNode.Parse(message.Content ?? "{}") }; - await _client.SendToolResponseAsync(new BidiGenerateContentToolResponse() - { - FunctionResponses = [function] - }); - - //await SendEventToModel(new BidiClientPayload + //await _client.SendToolResponseAsync(new BidiGenerateContentToolResponse() //{ - // ToolResponse = new() - // { - // FunctionResponses = [function] - // } + // FunctionResponses = [function] //}); + + await SendEventToModel(new BidiClientPayload + { + ToolResponse = new() + { + FunctionResponses = [function] + } + }); } else if (message.Role == AgentRole.Assistant) { - //await SendEventToModel(new BidiClientPayload - //{ - // ClientContent = new() - // { - // Turns = [new Content(message.Content, AgentRole.Model)], - // TurnComplete = true - // } - //}); + await SendEventToModel(new BidiClientPayload + { + ClientContent = new() + { + Turns = [new Content(message.Content, AgentRole.Model)], + TurnComplete = true + } + }); } else if (message.Role == AgentRole.User) { - await _client.SentTextAsync(message.Content); + //await _client.SentTextAsync(message.Content); - //await SendEventToModel(new BidiClientPayload - //{ - // ClientContent = new() - // { - // Turns = [new Content(message.Content, AgentRole.User)], - // TurnComplete = true - // } - //}); + await SendEventToModel(new BidiClientPayload + { + ClientContent = new() + { + Turns = [new Content(message.Content, AgentRole.User)], + TurnComplete = true + } + }); } else { diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs index 3257e9bf8..dcb82164e 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -30,14 +30,14 @@ public RealTimeCompletionProvider( public async Task Connect( RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onModelAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onInputAudioTranscriptionCompleted, - Action onInterruptionDetected) + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionDone, + Func onInterruptionDetected) { var settingsService = _services.GetRequiredService(); var realtimeModelSettings = _services.GetRequiredService(); @@ -72,7 +72,7 @@ await _session.ConnectAsync( onModelAudioTranscriptDone, onModelResponseDone, onConversationItemCreated, - onInputAudioTranscriptionCompleted, + onInputAudioTranscriptionDone, onInterruptionDetected); } @@ -144,14 +144,14 @@ await SendEventToModel(new private async Task ReceiveMessage( RealtimeHubConnection conn, - Action onModelReady, - Action onModelAudioDeltaReceived, - Action onModelAudioResponseDone, - Action onModelAudioTranscriptDone, - Action> onModelResponseDone, - Action onConversationItemCreated, - Action onUserAudioTranscriptionCompleted, - Action onInterruptionDetected) + Func onModelReady, + Func onModelAudioDeltaReceived, + Func onModelAudioResponseDone, + Func onModelAudioTranscriptDone, + Func, Task> onModelResponseDone, + Func onConversationItemCreated, + Func onInputAudioTranscriptionDone, + Func onInterruptionDetected) { await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) { @@ -175,7 +175,7 @@ private async Task ReceiveMessage( else if (response.Type == "session.created") { _logger.LogInformation($"{response.Type}: {receivedText}"); - onModelReady(); + await onModelReady(); } else if (response.Type == "session.updated") { @@ -189,7 +189,7 @@ private async Task ReceiveMessage( { _logger.LogInformation($"{response.Type}: {receivedText}"); var data = JsonSerializer.Deserialize(receivedText); - onModelAudioTranscriptDone(data.Transcript); + await onModelAudioTranscriptDone(data.Transcript); } else if (response.Type == "response.audio.delta") { @@ -197,13 +197,13 @@ private async Task ReceiveMessage( if (audio?.Delta != null) { _logger.LogDebug($"{response.Type}: {receivedText}"); - onModelAudioDeltaReceived(audio.Delta, audio.ItemId); + await onModelAudioDeltaReceived(audio.Delta, audio.ItemId); } } else if (response.Type == "response.audio.done") { _logger.LogInformation($"{response.Type}: {receivedText}"); - onModelAudioResponseDone(); + await onModelAudioResponseDone(); } else if (response.Type == "response.done") { @@ -213,14 +213,14 @@ private async Task ReceiveMessage( { if (data.StatusDetails.Type == "incomplete" && data.StatusDetails.Reason == "max_output_tokens") { - onInterruptionDetected(); + await onInterruptionDetected(); await TriggerModelInference("Response user concisely"); } } else { var messages = await OnResponsedDone(conn, receivedText); - onModelResponseDone(messages); + await onModelResponseDone(messages); } } else if (response.Type == "conversation.item.created") @@ -228,7 +228,7 @@ private async Task ReceiveMessage( _logger.LogInformation($"{response.Type}: {receivedText}"); var data = JsonSerializer.Deserialize(receivedText); - onConversationItemCreated(receivedText); + await onConversationItemCreated(receivedText); } else if (response.Type == "conversation.item.input_audio_transcription.completed") { @@ -237,14 +237,14 @@ private async Task ReceiveMessage( var message = await OnUserAudioTranscriptionCompleted(conn, receivedText); if (!string.IsNullOrEmpty(message.Content)) { - onUserAudioTranscriptionCompleted(message); + await onInputAudioTranscriptionDone(message); } } else if (response.Type == "input_audio_buffer.speech_started") { _logger.LogInformation($"{response.Type}: {receivedText}"); // Handle user interuption - onInterruptionDetected(); + await onInterruptionDetected(); } else if (response.Type == "input_audio_buffer.speech_stopped") { diff --git a/tests/BotSharp.LLM.Tests/GoogleRealTimeTests.cs b/tests/BotSharp.LLM.Tests/GoogleRealTimeTests.cs index 8c9eac4cf..061f05454 100644 --- a/tests/BotSharp.LLM.Tests/GoogleRealTimeTests.cs +++ b/tests/BotSharp.LLM.Tests/GoogleRealTimeTests.cs @@ -40,11 +40,16 @@ public async Task ShouldConnect_Tests() var realTimeCompleter = services.BuildServiceProvider().GetService(); realTimeCompleter.SetModelName(GoogleAIModels.Gemini2FlashExp); bool modelReady = false; - await realTimeCompleter.Connect(new RealtimeHubConnection(), () => { modelReady = true; }, - (s, s1) => { Console.WriteLine(s); }, () => { }, (s) => { Console.WriteLine(s); }, - (list => { Console.WriteLine(list); }), - (s => { Console.WriteLine(s); }), - (model => { Console.WriteLine(model); }), (() => { Console.WriteLine("UserInterrupted"); })); + await realTimeCompleter.Connect( + new RealtimeHubConnection(), + async () => { modelReady = true; }, + async (s, s1) => { Console.WriteLine(s); }, + async () => { }, + async (s) => { Console.WriteLine(s); }, + async list => { Console.WriteLine(list); }, + async s => { Console.WriteLine(s); }, + async model => { Console.WriteLine(model); }, + async () => { Console.WriteLine("UserInterrupted"); }); Thread.Sleep(1000); modelReady.ShouldBeTrue(); From d9fe6da0990917fbb31564b6edba9a207b658a79 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 13 May 2025 17:33:23 -0500 Subject: [PATCH 04/12] temp save --- .../Models/Realtime/RealtimeClientPayload.cs | 18 +++ .../Realtime/RealtimeGenerateContentSetup.cs | 26 ++++ .../Models/Realtime/RealtimeServerResponse.cs | 12 ++ .../Realtime/RealTimeCompletionProvider.cs | 125 +++++++++++++++--- .../Realtime/RealTimeCompletionProvider.cs | 2 +- 5 files changed, 161 insertions(+), 22 deletions(-) create mode 100644 src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeClientPayload.cs create mode 100644 src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeClientPayload.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeClientPayload.cs new file mode 100644 index 000000000..579215952 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeClientPayload.cs @@ -0,0 +1,18 @@ +using GenerativeAI.Types; + +namespace BotSharp.Plugin.GoogleAI.Models.Realtime; + +internal class RealtimeClientPayload +{ + [JsonPropertyName("setup")] + public RealtimeGenerateContentSetup? Setup { get; set; } + + [JsonPropertyName("clientContent")] + public BidiGenerateContentClientContent? ClientContent { get; set; } + + [JsonPropertyName("realtimeInput")] + public BidiGenerateContentRealtimeInput? RealtimeInput { get; set; } + + [JsonPropertyName("toolResponse")] + public BidiGenerateContentToolResponse? ToolResponse { get; set; } +} diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs new file mode 100644 index 000000000..c72e8cb6d --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs @@ -0,0 +1,26 @@ +using GenerativeAI.Types; + +namespace BotSharp.Plugin.GoogleAI.Models.Realtime; + +internal class RealtimeGenerateContentSetup +{ + [JsonPropertyName("model")] + public string? Model { get; set; } + + [JsonPropertyName("generationConfig")] + public GenerationConfig? GenerationConfig { get; set; } + + [JsonPropertyName("systemInstruction")] + public Content? SystemInstruction { get; set; } + + [JsonPropertyName("tools")] + public Tool[]? Tools { get; set; } + + [JsonPropertyName("inputAudioTranscription")] + public AudioTranscriptionConfig? InputAudioTranscription { get; set; } + + [JsonPropertyName("outputAudioTranscription")] + public AudioTranscriptionConfig? OutputAudioTranscription { get; set; } +} + +internal class AudioTranscriptionConfig { } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs index 25cf59e43..354a572f2 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs @@ -30,6 +30,12 @@ internal class RealtimeGenerateContentServerContent [JsonPropertyName("modelTurn")] public Content? ModelTurn { get; set; } + + [JsonPropertyName("inputTranscription")] + public RealtimeGenerateContentTranscription? InputTranscription { get; set; } + + [JsonPropertyName("outputTranscription")] + public RealtimeGenerateContentTranscription? OutputTranscription { get; set; } } internal class RealtimeUsageMetaData @@ -58,4 +64,10 @@ internal class RealtimeTokenDetail [JsonPropertyName("tokenCount")] public int? TokenCount { get; set; } +} + +internal class RealtimeGenerateContentTranscription +{ + [JsonPropertyName("text")] + public string? Text { get; set; } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index fe3480c76..b267821cc 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -107,8 +107,9 @@ public async Task Connect( JsonOptions = _jsonOptions }); + var uri = BuildWebsocketUri(modelSettings.ApiKey, "v1beta"); await _session.ConnectAsync( - uri: new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key={modelSettings.ApiKey}"), + uri: uri, cancellationToken: CancellationToken.None); await onModelReady(); @@ -148,9 +149,12 @@ private async Task ReceiveMessage( Func onModelAudioTranscriptDone, Func, Task> onModelResponseDone, Func onConversationItemCreated, - Func onInputAudioTranscriptionCompleted, + Func onInputAudioTranscriptionDone, Func onInterruptionDetected) { + var inputTranscription = string.Empty; + var outputTranscription = string.Empty; + await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) { var receivedText = update?.RawResponse; @@ -163,7 +167,6 @@ private async Task ReceiveMessage( try { var response = JsonSerializer.Deserialize(receivedText, _jsonOptions); - if (response == null) { continue; @@ -175,10 +178,29 @@ private async Task ReceiveMessage( } else if (response.ServerContent != null) { + if (response.ServerContent.InputTranscription?.Text != null) + { + outputTranscription = string.Empty; + inputTranscription += response.ServerContent.InputTranscription.Text; + } + + if (response.ServerContent.OutputTranscription?.Text != null) + { + outputTranscription += response.ServerContent.OutputTranscription.Text; + } + if (response.ServerContent.ModelTurn != null) { _logger.LogInformation($"Model audio delta received."); var parts = response.ServerContent.ModelTurn.Parts; + + if (!string.IsNullOrEmpty(inputTranscription)) + { + var message = await OnUserAudioTranscriptionCompleted(conn, inputTranscription); + await onInputAudioTranscriptionDone(message); + inputTranscription = string.Empty; + } + if (!parts.IsNullOrEmpty()) { foreach (var part in parts) @@ -197,13 +219,23 @@ private async Task ReceiveMessage( else if (response.ServerContent.TurnComplete == true) { _logger.LogInformation($"Model turn completed."); + + if (!string.IsNullOrEmpty(outputTranscription)) + { + var messages = await OnResponseDone(conn, outputTranscription, response.UsageMetaData); + await onModelResponseDone(messages); + + // Reset input/output transcription + inputTranscription = string.Empty; + outputTranscription = string.Empty; + } } } } catch (Exception ex) { - _logger.LogError(ex, $"Error when deserializing server response."); - continue; + _logger.LogError(ex, $"Error when deserializing server response. {ex.Message}"); + break; } } @@ -288,7 +320,7 @@ private Task AttachEvents(MultiModalLiveClient client) client.Connected += (sender, e) => { _logger.LogInformation("Google Realtime Client connected."); - _onModelReady(); + _onModelReady().ConfigureAwait(false).GetAwaiter().GetResult(); }; client.Disconnected += (sender, e) => @@ -301,7 +333,7 @@ private Task AttachEvents(MultiModalLiveClient client) _logger.LogInformation("User message received."); if (e.Payload.SetupComplete != null) { - _onConversationItemCreated(_client.ConnectionId.ToString()); + _onConversationItemCreated(_client.ConnectionId.ToString()).ConfigureAwait(false).GetAwaiter().GetResult(); } if (e.Payload.ServerContent != null) @@ -309,31 +341,31 @@ private Task AttachEvents(MultiModalLiveClient client) if (e.Payload.ServerContent.TurnComplete == true) { var responseDone = await ResponseDone(_conn, e.Payload.ServerContent); - _onModelResponseDone(responseDone); + _onModelResponseDone(responseDone).ConfigureAwait(false).GetAwaiter().GetResult(); } } }; client.AudioChunkReceived += (sender, e) => { - _onModelAudioDeltaReceived(Convert.ToBase64String(e.Buffer), Guid.NewGuid().ToString()); + _onModelAudioDeltaReceived(Convert.ToBase64String(e.Buffer), Guid.NewGuid().ToString()).ConfigureAwait(false).GetAwaiter().GetResult(); }; client.TextChunkReceived += (sender, e) => { - _onInputAudioTranscriptionDone(new RoleDialogModel(AgentRole.Assistant, e.Text)); + _onInputAudioTranscriptionDone(new RoleDialogModel(AgentRole.Assistant, e.Text)).ConfigureAwait(false).GetAwaiter().GetResult(); }; client.GenerationInterrupted += (sender, e) => { _logger.LogInformation("Audio generation interrupted."); - _onUserInterrupted(); + _onUserInterrupted().ConfigureAwait(false).GetAwaiter().GetResult(); }; client.AudioReceiveCompleted += (sender, e) => { _logger.LogInformation("Audio receive completed."); - _onModelAudioResponseDone(); + _onModelAudioResponseDone().ConfigureAwait(false).GetAwaiter().GetResult(); }; client.ErrorOccurred += (sender, e) => @@ -345,6 +377,43 @@ private Task AttachEvents(MultiModalLiveClient client) return Task.CompletedTask; } + private async Task> OnResponseDone(RealtimeHubConnection conn, string text, RealtimeUsageMetaData? useage) + { + var outputs = new List + { + new(AgentRole.Assistant, text) + { + CurrentAgentId = conn.CurrentAgentId, + MessageId = Guid.NewGuid().ToString(), + MessageType = MessageTypeName.Plain + } + }; + + if (useage != null) + { + var contentHooks = _services.GetServices(); + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) + { + CurrentAgentId = conn.CurrentAgentId + }, + new TokenStatsModel + { + Provider = Provider, + Model = _model, + Prompt = text, + TextInputTokens = useage.PromptTokensDetails?.FirstOrDefault(x => x.Modality == Modality.TEXT.ToString())?.TokenCount ?? 0, + AudioInputTokens = useage.PromptTokensDetails?.FirstOrDefault(x => x.Modality == Modality.AUDIO.ToString())?.TokenCount ?? 0, + TextOutputTokens = useage.ResponseTokensDetails?.FirstOrDefault(x => x.Modality == Modality.TEXT.ToString())?.TokenCount ?? 0, + AudioOutputTokens = useage.ResponseTokensDetails?.FirstOrDefault(x => x.Modality == Modality.AUDIO.ToString())?.TokenCount ?? 0 + }); + } + } + + return outputs; + } + private async Task> ResponseDone(RealtimeHubConnection conn, BidiGenerateContentServerContent serverContent) { @@ -401,8 +470,6 @@ await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, "response.don public async Task SendEventToModel(object message) { - //todo Send Audio Chunks to Model, Botsharp RealTime Implementation seems to be incomplete - if (_session == null) return; await _session.SendEventToModel(message); @@ -419,9 +486,9 @@ public async Task UpdateSession(RealtimeHubConnection conn, bool isInit var (prompt, request) = PrepareOptions(agent, []); var config = request.GenerationConfig; - //Output Modality can either be text or audio if (config != null) { + //Output Modality can either be text or audio config.ResponseModalities = [Modality.AUDIO]; var words = new List(); @@ -467,14 +534,16 @@ await HookEmitter.Emit(_services, // //Tools = request.Tools?.ToArray(), //}); - await SendEventToModel(new BidiClientPayload + await SendEventToModel(new RealtimeClientPayload { - Setup = new BidiGenerateContentSetup() + Setup = new RealtimeGenerateContentSetup() { GenerationConfig = config, Model = Model.ToModelId(), SystemInstruction = request.SystemInstruction, - Tools = [] + Tools = [], + InputAudioTranscription = new(), + OutputAudioTranscription = new() } }); @@ -532,7 +601,7 @@ await SendEventToModel(new BidiClientPayload } else { - throw new NotImplementedException(""); + throw new NotImplementedException($"Unrecognized role {message.Role}."); } } @@ -542,9 +611,9 @@ public async Task> OnResponsedDone(RealtimeHubConnection c } - public async Task OnConversationItemCreated(RealtimeHubConnection conn, string response) + public async Task OnConversationItemCreated(RealtimeHubConnection conn, string text) { - return await Task.FromResult(new RoleDialogModel(AgentRole.User, response)); + return await Task.FromResult(new RoleDialogModel(AgentRole.User, text)); } private (string, GenerateContentRequest) PrepareOptions(Agent agent, @@ -688,4 +757,18 @@ private string GetPrompt(IEnumerable systemPrompts, IEnumerable return prompt; } + + + private async Task OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string text) + { + return new RoleDialogModel(AgentRole.User, text) + { + CurrentAgentId = conn.CurrentAgentId + }; + } + + private Uri BuildWebsocketUri(string apiKey, string version = "v1alpha") + { + return new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.{version}.GenerativeService.BidiGenerateContent?key={apiKey}"); + } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs index dcb82164e..f45074774 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -402,7 +402,7 @@ public async Task InsertConversationItem(RoleDialogModel message) } else { - throw new NotImplementedException(""); + throw new NotImplementedException($"Unrecognized role {message.Role}."); } } From 04eafedb7837330f442a811d06c0a007d882deac Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Wed, 14 May 2025 18:14:01 -0500 Subject: [PATCH 05/12] refine transcription --- .../MLTasks/IRealTimeCompletion.cs | 5 +- .../Realtime/Models/RealtimeModelSettings.cs | 7 +- .../Hooks/RealtimeConversationHook.cs | 1 + .../BotSharp.Core/BotSharp.Core.csproj | 3 - .../BotSharp.Core/Functions/GetWeatherFn.cs | 12 +- .../AsyncWebsocketDataResultEnumerator.cs | 1 - .../Session/BotSharpRealtimeSession.cs | 4 +- .../Session/LlmRealtimeSession.cs | 4 +- .../functions/get_location.json | 20 - .../functions/get_weather.json | 7 +- .../ChatStreamMiddleware.cs | 5 +- .../Models/Realtime/RealtimeServerResponse.cs | 21 ++ .../Realtime/RealtimeTranscriptionResponse.cs | 53 +++ .../Realtime/RealTimeCompletionProvider.cs | 356 +++++------------- src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs | 7 +- .../Realtime/RealTimeCompletionProvider.cs | 225 ++++++----- .../appsettings.json | 24 +- 17 files changed, 328 insertions(+), 427 deletions(-) delete mode 100644 src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_location.json create mode 100644 src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs b/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs index 315f4f5a3..d6057859c 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MLTasks/IRealTimeCompletion.cs @@ -1,6 +1,4 @@ using BotSharp.Abstraction.Realtime.Models; -using System; -using static System.Runtime.InteropServices.JavaScript.JSType; namespace BotSharp.Abstraction.MLTasks; @@ -20,6 +18,7 @@ Task Connect( Func onConversationItemCreated, Func onInputAudioTranscriptionDone, Func onInterruptionDetected); + Task AppenAudioBuffer(string message); Task AppenAudioBuffer(ArraySegment data, int length); @@ -31,6 +30,4 @@ Task Connect( Task RemoveConversationItem(string itemId); Task TriggerModelInference(string? instructions = null); Task CancelModelResponse(); - Task> OnResponsedDone(RealtimeHubConnection conn, string response); - Task OnConversationItemCreated(RealtimeHubConnection conn, string response); } diff --git a/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeModelSettings.cs b/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeModelSettings.cs index daf8714a8..14f5923fb 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeModelSettings.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeModelSettings.cs @@ -12,7 +12,12 @@ public class RealtimeModelSettings public string Voice { get; set; } = "alloy"; public float Temperature { get; set; } = 0.8f; public int MaxResponseOutputTokens { get; set; } = 512; - public int ModelResponseTimeout { get; set; } = 30; + public int ModelResponseTimeoutSeconds { get; set; } = 30; + + /// + /// Whether the target event arrives after ModelResponseTimeoutSeconds, e.g., "response.done" + /// + public string? ModelResponseTimeoutEndEvent { get; set; } public AudioTranscription InputAudioTranscription { get; set; } = new(); public ModelTurnDetection TurnDetection { get; set; } = new(); } diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs index 5e1fcfee1..aabac1869 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs @@ -42,6 +42,7 @@ public async Task OnFunctionExecuted(RoleDialogModel message) var routing = _services.GetRequiredService(); message.Role = AgentRole.Function; + //message.Role = AgentRole.Assistant; if (message.FunctionName == "route_to_agent") { diff --git a/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj b/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj index de2c8909e..b3e29b3f5 100644 --- a/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj +++ b/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj @@ -211,9 +211,6 @@ PreserveNewest - - PreserveNewest - diff --git a/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs b/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs index 759bc68c8..09f6e6615 100644 --- a/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs +++ b/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs @@ -20,14 +20,14 @@ public GetWeatherFn(IServiceProvider services) public async Task Execute(RoleDialogModel message) { - var args = JsonSerializer.Deserialize(message.FunctionArgs, BotSharpOptions.defaultJsonOptions); + //var args = JsonSerializer.Deserialize(message.FunctionArgs, BotSharpOptions.defaultJsonOptions); - var sidecar = _services.GetService(); - var states = GetSideCarStates(); + //var sidecar = _services.GetService(); + //var states = GetSideCarStates(); - var userMessage = $"Please find the information at location {args.City}, {args.State}"; - var response = await sidecar.SendMessage(BuiltInAgentId.Chatbot, userMessage, states: states); - message.Content = $"It is a sunny day {response.Content}."; + //var userMessage = $"Please find the information at location {args.City}, {args.State}"; + //var response = await sidecar.SendMessage(BuiltInAgentId.Chatbot, userMessage, states: states); + message.Content = $"It is a sunny day."; return true; } diff --git a/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs b/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs index 548850e93..f89127e27 100644 --- a/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs +++ b/src/Infrastructure/BotSharp.Core/Infrastructures/Websocket/AsyncWebsocketDataResultEnumerator.cs @@ -1,4 +1,3 @@ -using BotSharp.Abstraction.Realtime.Models.Session; using System.Buffers; using System.ClientModel; using System.Net.WebSockets; diff --git a/src/Infrastructure/BotSharp.Core/Session/BotSharpRealtimeSession.cs b/src/Infrastructure/BotSharp.Core/Session/BotSharpRealtimeSession.cs index 0c863b7b7..7f5f6c159 100644 --- a/src/Infrastructure/BotSharp.Core/Session/BotSharpRealtimeSession.cs +++ b/src/Infrastructure/BotSharp.Core/Session/BotSharpRealtimeSession.cs @@ -55,7 +55,7 @@ private ChatSessionUpdate HandleSessionResult(ClientResult result) }; } - public async Task SendEvent(string message) + public async Task SendEventAsync(string message) { if (_websocket.State == WebSocketState.Open) { @@ -64,7 +64,7 @@ public async Task SendEvent(string message) } } - public async Task Disconnect() + public async Task DisconnectAsync() { if (_websocket.State == WebSocketState.Open) { diff --git a/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs b/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs index d799480ed..2f3259cf4 100644 --- a/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs +++ b/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs @@ -71,7 +71,7 @@ private ChatSessionUpdate HandleSessionResult(ClientResult result) }; } - public async Task SendEventToModel(object message) + public async Task SendEventToModelAsync(object message) { if (_webSocket.State != WebSocketState.Open) { @@ -96,7 +96,7 @@ public async Task SendEventToModel(object message) } } - public async Task Disconnect() + public async Task DisconnectAsync() { if (_webSocket.State == WebSocketState.Open) { diff --git a/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_location.json b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_location.json deleted file mode 100644 index b0716218e..000000000 --- a/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_location.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "name": "get_location", - "description": "Get location information for user.", - "parameters": { - "type": "object", - "properties": { - "city": { - "type": "string", - "visibility_expression": "{% if states.channel == 'email' %}visible{% endif %}", - "description": "The location city that user wants to know about." - }, - "county": { - "type": "string", - "visibility_expression": "{% if states.channel != 'email' %}visible{% endif %}", - "description": "The location county that user wants to know about." - } - }, - "required": [ "city", "county" ] - } -} \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json index bdb679a25..0fd0a459b 100644 --- a/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json +++ b/src/Infrastructure/BotSharp.Core/data/agents/01e2fc5c-2c89-4ec7-8470-7688608b496c/functions/get_weather.json @@ -1,19 +1,14 @@ { "name": "get_weather", "description": "Get weather information for user.", - "visibility_expression": "{% if states.channel != 'email' %}visible{% endif %}", "parameters": { "type": "object", "properties": { "city": { "type": "string", "description": "The city where the user wants to get weather information." - }, - "state": { - "type": "string", - "description": "The state where the user wants to get weather information." } }, - "required": [ "city", "state" ] + "required": [ "city" ] } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.ChatHub/ChatStreamMiddleware.cs b/src/Plugins/BotSharp.Plugin.ChatHub/ChatStreamMiddleware.cs index b7d1b21a2..ece767f9a 100644 --- a/src/Plugins/BotSharp.Plugin.ChatHub/ChatStreamMiddleware.cs +++ b/src/Plugins/BotSharp.Plugin.ChatHub/ChatStreamMiddleware.cs @@ -94,8 +94,7 @@ private async Task HandleWebSocket(IServiceProvider services, string agentId, st } } - - await _session.Disconnect(); + await _session.DisconnectAsync(); _session.Dispose(); } @@ -105,7 +104,7 @@ await hub.ConnectToModel(async data => { if (_session != null) { - await _session.SendEvent(data); + await _session.SendEventAsync(data); } }); } diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs index 354a572f2..4b65bdf81 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs @@ -12,6 +12,9 @@ internal class RealtimeServerResponse [JsonPropertyName("usageMetadata")] public RealtimeUsageMetaData? UsageMetaData { get; set; } + + [JsonPropertyName("toolCall")] + public RealtimeToolCall? ToolCall { get; set; } } @@ -70,4 +73,22 @@ internal class RealtimeGenerateContentTranscription { [JsonPropertyName("text")] public string? Text { get; set; } +} + +internal class RealtimeToolCall +{ + [JsonPropertyName("functionCalls")] + public List? FunctionCalls { get; set; } +} + +internal class RealtimeFunctionCall +{ + [JsonPropertyName("id")] + public string Id { get; set; } + + [JsonPropertyName("name")] + public string Name { get; set; } + + [JsonPropertyName("args")] + public JsonNode? Args { get; set; } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs new file mode 100644 index 000000000..b14c1bde3 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs @@ -0,0 +1,53 @@ +using System.IO; + +namespace BotSharp.Plugin.GoogleAI.Models.Realtime; + +internal class RealtimeTranscriptionResponse : IDisposable +{ + public RealtimeTranscriptionResponse() + { + + } + + private MemoryStream _contentStream = new(); + public Stream? ContentStream + { + get + { + return _contentStream != null ? _contentStream : new MemoryStream(); + } + } + + public void Collect(string text) + { + var binary = BinaryData.FromString(text); + var bytes = binary.ToArray(); + + _contentStream.Position = _contentStream.Length; + _contentStream.Write(bytes, 0, bytes.Length); + _contentStream.Position = 0; + } + + public string GetString() + { + if (_contentStream.Length == 0) + { + return string.Empty; + } + + var bytes = _contentStream.ToArray(); + var text = Encoding.UTF8.GetString(bytes, 0, bytes.Length); + return text; + } + + public void Clear() + { + _contentStream.SetLength(0); + _contentStream.Position = 0; + } + + public void Dispose() + { + _contentStream?.Dispose(); + } +} diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index b267821cc..f628a2811 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -1,17 +1,10 @@ -using BotSharp.Abstraction.Options; +using System.Threading; using BotSharp.Abstraction.Realtime.Models.Session; using BotSharp.Core.Session; using BotSharp.Plugin.GoogleAI.Models.Realtime; using GenerativeAI; -using GenerativeAI.Core; -using GenerativeAI.Live; -using GenerativeAI.Live.Extensions; using GenerativeAI.Types; using GenerativeAI.Types.Converters; -using Google.Ai.Generativelanguage.V1Beta2; -using Google.Api; -using System; -using System.Threading; namespace BotSharp.Plugin.GoogleAi.Providers.Realtime; @@ -21,18 +14,15 @@ public class GoogleRealTimeProvider : IRealTimeCompletion public string Model => _model; private string _model = GoogleAIModels.Gemini2FlashExp; - private MultiModalLiveClient _client; - private GenerativeModel _chatClient; + private readonly IServiceProvider _services; private readonly ILogger _logger; private List renderedInstructions = []; private LlmRealtimeSession _session; - private readonly BotSharpOptions _botsharpOptions; private readonly GoogleAiSettings _settings; private const string DEFAULT_MIME_TYPE = "audio/pcm;rate=16000"; - private readonly JsonSerializerOptions _jsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, @@ -45,11 +35,9 @@ public class GoogleRealTimeProvider : IRealTimeCompletion public GoogleRealTimeProvider( IServiceProvider services, GoogleAiSettings settings, - BotSharpOptions botSharpOptions, ILogger logger) { _settings = settings; - _botsharpOptions = botSharpOptions; _services = services; _logger = logger; } @@ -59,17 +47,6 @@ public void SetModelName(string model) _model = model; } - private RealtimeHubConnection _conn; - private Func _onModelReady; - private Func _onModelAudioDeltaReceived; - private Func _onModelAudioResponseDone; - private Func _onModelAudioTranscriptDone; - private Func, Task> _onModelResponseDone; - private Func _onConversationItemCreated; - private Func _onInputAudioTranscriptionDone; - private Func _onUserInterrupted; - - public async Task Connect( RealtimeHubConnection conn, Func onModelReady, @@ -81,16 +58,6 @@ public async Task Connect( Func onInputAudioTranscriptionDone, Func onInterruptionDetected) { - _conn = conn; - _onModelReady = onModelReady; - _onModelAudioDeltaReceived = onModelAudioDeltaReceived; - _onModelAudioResponseDone = onModelAudioResponseDone; - _onModelAudioTranscriptDone = onModelAudioTranscriptDone; - _onModelResponseDone = onModelResponseDone; - _onConversationItemCreated = onConversationItemCreated; - _onInputAudioTranscriptionDone = onInputAudioTranscriptionDone; - _onUserInterrupted = onInterruptionDetected; - var settingsService = _services.GetRequiredService(); var realtimeModelSettings = _services.GetRequiredService(); @@ -108,9 +75,7 @@ public async Task Connect( }); var uri = BuildWebsocketUri(modelSettings.ApiKey, "v1beta"); - await _session.ConnectAsync( - uri: uri, - cancellationToken: CancellationToken.None); + await _session.ConnectAsync(uri: uri, cancellationToken: CancellationToken.None); await onModelReady(); @@ -124,21 +89,6 @@ await _session.ConnectAsync( onConversationItemCreated, onInputAudioTranscriptionDone, onInterruptionDetected); - - - //var client = ProviderHelper.GetGeminiClient(Provider, _model, _services); - //_chatClient = client.CreateGenerativeModel(_model); - //_client = _chatClient.CreateMultiModalLiveClient( - // config: new GenerationConfig - // { - // ResponseModalities = [Modality.AUDIO], - // }, - // systemInstruction: "You are a helpful assistant.", - // logger: _logger); - - //await AttachEvents(_client); - - //await _client.ConnectAsync(false); } private async Task ReceiveMessage( @@ -152,8 +102,8 @@ private async Task ReceiveMessage( Func onInputAudioTranscriptionDone, Func onInterruptionDetected) { - var inputTranscription = string.Empty; - var outputTranscription = string.Empty; + using var inputStream = new RealtimeTranscriptionResponse(); + using var outputStream = new RealtimeTranscriptionResponse(); await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) { @@ -176,31 +126,43 @@ private async Task ReceiveMessage( { _logger.LogInformation($"Session setup completed."); } + else if (response.ToolCall != null && !response.ToolCall.FunctionCalls.IsNullOrEmpty()) + { + var functionCall = response.ToolCall.FunctionCalls.First(); + _logger.LogInformation($"Tool call received {functionCall.Name}({functionCall.Args?.ToJsonString(_jsonOptions) ?? string.Empty})."); + + if (functionCall != null) + { + var messages = OnFunctionCall(conn, functionCall); + await onModelResponseDone(messages); + } + } else if (response.ServerContent != null) { if (response.ServerContent.InputTranscription?.Text != null) { - outputTranscription = string.Empty; - inputTranscription += response.ServerContent.InputTranscription.Text; + inputStream.Collect(response.ServerContent.InputTranscription.Text); } if (response.ServerContent.OutputTranscription?.Text != null) { - outputTranscription += response.ServerContent.OutputTranscription.Text; + outputStream.Collect(response.ServerContent.OutputTranscription.Text); } if (response.ServerContent.ModelTurn != null) { _logger.LogInformation($"Model audio delta received."); - var parts = response.ServerContent.ModelTurn.Parts; + // Handle input transcription + var inputTranscription = inputStream.GetString(); if (!string.IsNullOrEmpty(inputTranscription)) { - var message = await OnUserAudioTranscriptionCompleted(conn, inputTranscription); + var message = OnUserAudioTranscriptionCompleted(conn, inputTranscription); await onInputAudioTranscriptionDone(message); - inputTranscription = string.Empty; } + inputStream.Clear(); + var parts = response.ServerContent.ModelTurn.Parts; if (!parts.IsNullOrEmpty()) { foreach (var part in parts) @@ -220,15 +182,14 @@ private async Task ReceiveMessage( { _logger.LogInformation($"Model turn completed."); + var outputTranscription = outputStream.GetString(); if (!string.IsNullOrEmpty(outputTranscription)) { var messages = await OnResponseDone(conn, outputTranscription, response.UsageMetaData); await onModelResponseDone(messages); - - // Reset input/output transcription - inputTranscription = string.Empty; - outputTranscription = string.Empty; } + inputStream.Clear(); + outputStream.Clear(); } } } @@ -247,19 +208,12 @@ public async Task Disconnect() { if (_session != null) { - await _session.Disconnect(); + await _session.DisconnectAsync(); } - - //if (_client != null) - //{ - // await _client.DisconnectAsync(); - //} } public async Task AppenAudioBuffer(string message) { - //await _client.SendAudioAsync(Convert.FromBase64String(message)); - await SendEventToModel(new BidiClientPayload { RealtimeInput = new() @@ -272,8 +226,6 @@ await SendEventToModel(new BidiClientPayload public async Task AppenAudioBuffer(ArraySegment data, int length) { var buffer = data.AsSpan(0, length).ToArray(); - //await _client.SendAudioAsync(buffer, "audio/pcm;rate=16000"); - await SendEventToModel(new BidiClientPayload { RealtimeInput = new() @@ -285,21 +237,13 @@ await SendEventToModel(new BidiClientPayload public async Task TriggerModelInference(string? instructions = null) { - var content = !string.IsNullOrWhiteSpace(instructions) - ? new Content(instructions, AgentRole.User) - : null; - - //await _client.SendClientContentAsync(new BidiGenerateContentClientContent() - //{ - // Turns = content != null ? [content] : null, - // TurnComplete = true, - //}); + var content = new Content("Please respond to me.", AgentRole.User); await SendEventToModel(new BidiClientPayload { ClientContent = new() { - Turns = content != null ? [content] : null, + Turns = null, TurnComplete = true } }); @@ -315,164 +259,11 @@ public async Task RemoveConversationItem(string itemId) } - private Task AttachEvents(MultiModalLiveClient client) - { - client.Connected += (sender, e) => - { - _logger.LogInformation("Google Realtime Client connected."); - _onModelReady().ConfigureAwait(false).GetAwaiter().GetResult(); - }; - - client.Disconnected += (sender, e) => - { - _logger.LogInformation("Google Realtime Client disconnected."); - }; - - client.MessageReceived += async (sender, e) => - { - _logger.LogInformation("User message received."); - if (e.Payload.SetupComplete != null) - { - _onConversationItemCreated(_client.ConnectionId.ToString()).ConfigureAwait(false).GetAwaiter().GetResult(); - } - - if (e.Payload.ServerContent != null) - { - if (e.Payload.ServerContent.TurnComplete == true) - { - var responseDone = await ResponseDone(_conn, e.Payload.ServerContent); - _onModelResponseDone(responseDone).ConfigureAwait(false).GetAwaiter().GetResult(); - } - } - }; - - client.AudioChunkReceived += (sender, e) => - { - _onModelAudioDeltaReceived(Convert.ToBase64String(e.Buffer), Guid.NewGuid().ToString()).ConfigureAwait(false).GetAwaiter().GetResult(); - }; - - client.TextChunkReceived += (sender, e) => - { - _onInputAudioTranscriptionDone(new RoleDialogModel(AgentRole.Assistant, e.Text)).ConfigureAwait(false).GetAwaiter().GetResult(); - }; - - client.GenerationInterrupted += (sender, e) => - { - _logger.LogInformation("Audio generation interrupted."); - _onUserInterrupted().ConfigureAwait(false).GetAwaiter().GetResult(); - }; - - client.AudioReceiveCompleted += (sender, e) => - { - _logger.LogInformation("Audio receive completed."); - _onModelAudioResponseDone().ConfigureAwait(false).GetAwaiter().GetResult(); - }; - - client.ErrorOccurred += (sender, e) => - { - var ex = e.GetException(); - _logger.LogError(ex, "Error occurred in Google Realtime Client"); - }; - - return Task.CompletedTask; - } - - private async Task> OnResponseDone(RealtimeHubConnection conn, string text, RealtimeUsageMetaData? useage) - { - var outputs = new List - { - new(AgentRole.Assistant, text) - { - CurrentAgentId = conn.CurrentAgentId, - MessageId = Guid.NewGuid().ToString(), - MessageType = MessageTypeName.Plain - } - }; - - if (useage != null) - { - var contentHooks = _services.GetServices(); - foreach (var hook in contentHooks) - { - await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) - { - CurrentAgentId = conn.CurrentAgentId - }, - new TokenStatsModel - { - Provider = Provider, - Model = _model, - Prompt = text, - TextInputTokens = useage.PromptTokensDetails?.FirstOrDefault(x => x.Modality == Modality.TEXT.ToString())?.TokenCount ?? 0, - AudioInputTokens = useage.PromptTokensDetails?.FirstOrDefault(x => x.Modality == Modality.AUDIO.ToString())?.TokenCount ?? 0, - TextOutputTokens = useage.ResponseTokensDetails?.FirstOrDefault(x => x.Modality == Modality.TEXT.ToString())?.TokenCount ?? 0, - AudioOutputTokens = useage.ResponseTokensDetails?.FirstOrDefault(x => x.Modality == Modality.AUDIO.ToString())?.TokenCount ?? 0 - }); - } - } - - return outputs; - } - - private async Task> ResponseDone(RealtimeHubConnection conn, - BidiGenerateContentServerContent serverContent) - { - var outputs = new List(); - - var parts = serverContent.ModelTurn?.Parts; - if (parts != null) - { - foreach (var part in parts) - { - var call = part.FunctionCall; - if (call != null) - { - var item = new RoleDialogModel(AgentRole.Assistant, part.Text) - { - CurrentAgentId = conn.CurrentAgentId, - MessageId = call.Id ?? String.Empty, - MessageType = MessageTypeName.FunctionCall - }; - outputs.Add(item); - } - else - { - var item = new RoleDialogModel(AgentRole.Assistant, call.Args?.ToJsonString() ?? string.Empty) - { - CurrentAgentId = conn.CurrentAgentId, - FunctionName = call.Name, - FunctionArgs = call.Args?.ToJsonString() ?? string.Empty, - ToolCallId = call.Id ?? String.Empty, - MessageId = call.Id ?? String.Empty, - MessageType = MessageTypeName.FunctionCall - }; - outputs.Add(item); - } - } - } - - var contentHooks = _services.GetServices().ToList(); - // After chat completion hook - foreach (var hook in contentHooks) - { - await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, "response.done") - { - CurrentAgentId = conn.CurrentAgentId - }, new TokenStatsModel - { - Provider = Provider, - Model = _model, - }); - } - - return outputs; - } - public async Task SendEventToModel(object message) { if (_session == null) return; - await _session.SendEventToModel(message); + await _session.SendEventToModelAsync(message); } public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) @@ -500,7 +291,6 @@ public async Task UpdateSession(RealtimeHubConnection conn, bool isInit config.MaxOutputTokens = realtimeModelSettings.MaxResponseOutputTokens; } - var functions = request.Tools?.SelectMany(s => s.FunctionDeclarations).Select(x => { var fn = new FunctionDef @@ -526,14 +316,7 @@ await HookEmitter.Emit(_services, }); } - //await _client.SendSetupAsync(new BidiGenerateContentSetup() - //{ - // GenerationConfig = config, - // Model = Model.ToModelId(), - // SystemInstruction = request.SystemInstruction, - // //Tools = request.Tools?.ToArray(), - //}); - + var realtimeSetting = _services.GetRequiredService(); await SendEventToModel(new RealtimeClientPayload { Setup = new RealtimeGenerateContentSetup() @@ -541,9 +324,9 @@ await SendEventToModel(new RealtimeClientPayload GenerationConfig = config, Model = Model.ToModelId(), SystemInstruction = request.SystemInstruction, - Tools = [], - InputAudioTranscription = new(), - OutputAudioTranscription = new() + Tools = request.Tools?.ToArray(), + InputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null, + OutputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null } }); @@ -552,21 +335,17 @@ await SendEventToModel(new RealtimeClientPayload public async Task InsertConversationItem(RoleDialogModel message) { - //if (_client == null) - // throw new Exception("Client is not initialized"); if (message.Role == AgentRole.Function) { var function = new FunctionResponse() { Name = message.FunctionName ?? string.Empty, - Response = JsonNode.Parse(message.Content ?? "{}") + Response = new JsonObject() + { + ["result"] = message.Content ?? string.Empty + } }; - //await _client.SendToolResponseAsync(new BidiGenerateContentToolResponse() - //{ - // FunctionResponses = [function] - //}); - await SendEventToModel(new BidiClientPayload { ToolResponse = new() @@ -588,8 +367,6 @@ await SendEventToModel(new BidiClientPayload } else if (message.Role == AgentRole.User) { - //await _client.SentTextAsync(message.Content); - await SendEventToModel(new BidiClientPayload { ClientContent = new() @@ -605,17 +382,63 @@ await SendEventToModel(new BidiClientPayload } } - public async Task> OnResponsedDone(RealtimeHubConnection conn, string response) + #region Private methods + private List OnFunctionCall(RealtimeHubConnection conn, RealtimeFunctionCall functionCall) { - return []; + var outputs = new List + { + new(AgentRole.Assistant, string.Empty) + { + CurrentAgentId = conn.CurrentAgentId, + FunctionName = functionCall.Name, + FunctionArgs = functionCall.Args?.ToJsonString(_jsonOptions), + ToolCallId = functionCall.Id, + MessageType = MessageTypeName.FunctionCall + } + }; + + return outputs; } - public async Task OnConversationItemCreated(RealtimeHubConnection conn, string text) + private async Task> OnResponseDone(RealtimeHubConnection conn, string text, RealtimeUsageMetaData? usage) { - return await Task.FromResult(new RoleDialogModel(AgentRole.User, text)); + var outputs = new List + { + new(AgentRole.Assistant, text) + { + CurrentAgentId = conn.CurrentAgentId, + MessageId = Guid.NewGuid().ToString(), + MessageType = MessageTypeName.Plain + } + }; + + if (usage != null) + { + var contentHooks = _services.GetServices(); + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) + { + CurrentAgentId = conn.CurrentAgentId + }, + new TokenStatsModel + { + Provider = Provider, + Model = _model, + Prompt = text, + TextInputTokens = usage.PromptTokensDetails?.FirstOrDefault(x => x.Modality == Modality.TEXT.ToString())?.TokenCount ?? 0, + AudioInputTokens = usage.PromptTokensDetails?.FirstOrDefault(x => x.Modality == Modality.AUDIO.ToString())?.TokenCount ?? 0, + TextOutputTokens = usage.ResponseTokensDetails?.FirstOrDefault(x => x.Modality == Modality.TEXT.ToString())?.TokenCount ?? 0, + AudioOutputTokens = usage.ResponseTokensDetails?.FirstOrDefault(x => x.Modality == Modality.AUDIO.ToString())?.TokenCount ?? 0 + }); + } + } + + return outputs; } + private (string, GenerateContentRequest) PrepareOptions(Agent agent, List conversations) { @@ -759,7 +582,7 @@ private string GetPrompt(IEnumerable systemPrompts, IEnumerable } - private async Task OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string text) + private RoleDialogModel OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string text) { return new RoleDialogModel(AgentRole.User, text) { @@ -771,4 +594,5 @@ private Uri BuildWebsocketUri(string apiKey, string version = "v1alpha") { return new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.{version}.GenerativeService.BidiGenerateContent?key={apiKey}"); } + #endregion } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs index a4e6606e0..daff1e0b0 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Using.cs @@ -16,14 +16,15 @@ global using BotSharp.Abstraction.Agents.Models; global using BotSharp.Abstraction.MLTasks; global using BotSharp.Abstraction.Utilities; -global using BotSharp.Plugin.GoogleAi.Settings; global using BotSharp.Abstraction.Realtime; global using BotSharp.Abstraction.Realtime.Models; global using BotSharp.Core.Infrastructures; -global using BotSharp.Plugin.GoogleAi.Providers.Chat; global using BotSharp.Abstraction.Agents; global using BotSharp.Abstraction.Agents.Enums; global using BotSharp.Abstraction.Conversations; global using BotSharp.Abstraction.Conversations.Enums; global using BotSharp.Abstraction.Functions.Models; -global using BotSharp.Abstraction.Loggers; \ No newline at end of file +global using BotSharp.Abstraction.Loggers; + +global using BotSharp.Plugin.GoogleAi.Settings; +global using BotSharp.Plugin.GoogleAi.Providers.Chat; \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs index f45074774..fa1bd6230 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -40,9 +40,9 @@ public async Task Connect( Func onInterruptionDetected) { var settingsService = _services.GetRequiredService(); - var realtimeModelSettings = _services.GetRequiredService(); + var realtimeSettings = _services.GetRequiredService(); - _model = realtimeModelSettings.Model; + _model = realtimeSettings.Model; var settings = settingsService.GetSetting(Provider, _model); if (_session != null) @@ -65,6 +65,7 @@ await _session.ConnectAsync( cancellationToken: CancellationToken.None); _ = ReceiveMessage( + _services, conn, onModelReady, onModelAudioDeltaReceived, @@ -80,7 +81,7 @@ public async Task Disconnect() { if (_session != null) { - await _session.Disconnect(); + await _session.DisconnectAsync(); _session.Dispose(); } } @@ -143,6 +144,7 @@ await SendEventToModel(new } private async Task ReceiveMessage( + IServiceProvider services, RealtimeHubConnection conn, Func onModelReady, Func onModelAudioDeltaReceived, @@ -153,6 +155,9 @@ private async Task ReceiveMessage( Func onInputAudioTranscriptionDone, Func onInterruptionDetected) { + DateTime? startTime = null; + var realtimeSettings = _services.GetRequiredService(); + await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) { var receivedText = update?.RawResponse; @@ -163,6 +168,17 @@ private async Task ReceiveMessage( var response = JsonSerializer.Deserialize(receivedText); + if (realtimeSettings?.ModelResponseTimeoutSeconds > 0 + && !string.IsNullOrWhiteSpace(realtimeSettings?.ModelResponseTimeoutEndEvent) + && startTime.HasValue + && (DateTime.UtcNow - startTime.Value).TotalSeconds >= realtimeSettings.ModelResponseTimeoutSeconds + && response.Type != realtimeSettings.ModelResponseTimeoutEndEvent) + { + startTime = null; + await TriggerModelInference("Responsd to user immediately"); + continue; + } + if (response.Type == "error") { _logger.LogError($"{response.Type}: {receivedText}"); @@ -228,6 +244,11 @@ private async Task ReceiveMessage( _logger.LogInformation($"{response.Type}: {receivedText}"); var data = JsonSerializer.Deserialize(receivedText); + if (data?.Item?.Role == "user") + { + startTime = DateTime.UtcNow; + } + await onConversationItemCreated(receivedText); } else if (response.Type == "conversation.item.input_audio_transcription.completed") @@ -263,7 +284,7 @@ public async Task SendEventToModel(object message) { if (_session == null) return; - await _session.SendEventToModel(message); + await _session.SendEventToModelAsync(message); } public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) @@ -406,7 +427,101 @@ public async Task InsertConversationItem(RoleDialogModel message) } } - protected (string, IEnumerable, ChatCompletionOptions) PrepareOptions(Agent agent, List conversations) + + public void SetModelName(string model) + { + _model = model; + } + + #region Private methods + private async Task> OnResponsedDone(RealtimeHubConnection conn, string response) + { + var outputs = new List(); + + var data = JsonSerializer.Deserialize(response).Body; + if (data.Status != "completed") + { + _logger.LogError(data.StatusDetails.ToString()); + /*if (data.StatusDetails.Type == "incomplete" && data.StatusDetails.Reason == "max_output_tokens") + { + await TriggerModelInference("Response user concisely"); + }*/ + return []; + } + + var prompts = new List(); + var inputTokenDetails = data.Usage?.InputTokenDetails; + var outputTokenDetails = data.Usage?.OutputTokenDetails; + + foreach (var output in data.Outputs) + { + if (output.Type == "function_call") + { + outputs.Add(new RoleDialogModel(AgentRole.Assistant, output.Arguments) + { + CurrentAgentId = conn.CurrentAgentId, + FunctionName = output.Name, + FunctionArgs = output.Arguments, + ToolCallId = output.CallId, + MessageId = output.Id, + MessageType = MessageTypeName.FunctionCall + }); + + prompts.Add($"{output.Name}({output.Arguments})"); + } + else if (output.Type == "message") + { + var content = output.Content.FirstOrDefault()?.Transcript ?? string.Empty; + + outputs.Add(new RoleDialogModel(output.Role, content) + { + CurrentAgentId = conn.CurrentAgentId, + MessageId = output.Id, + MessageType = MessageTypeName.Plain + }); + + prompts.Add(content); + } + } + + + // After chat completion hook + var text = string.Join("\r\n", prompts); + var contentHooks = _services.GetServices(); + + foreach (var hook in contentHooks) + { + await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) + { + CurrentAgentId = conn.CurrentAgentId + }, + new TokenStatsModel + { + Provider = Provider, + Model = _model, + Prompt = text, + TextInputTokens = inputTokenDetails?.TextTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, + CachedTextInputTokens = data.Usage?.InputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, + AudioInputTokens = inputTokenDetails?.AudioTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, + CachedAudioInputTokens = inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, + TextOutputTokens = outputTokenDetails?.TextTokens ?? 0, + AudioOutputTokens = outputTokenDetails?.AudioTokens ?? 0 + }); + } + + return outputs; + } + + private async Task OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string response) + { + var data = JsonSerializer.Deserialize(response); + return new RoleDialogModel(AgentRole.User, data.Transcript) + { + CurrentAgentId = conn.CurrentAgentId + }; + } + + private (string, IEnumerable, ChatCompletionOptions) PrepareOptions(Agent agent, List conversations) { var agentService = _services.GetRequiredService(); var state = _services.GetRequiredService(); @@ -588,103 +703,5 @@ private string GetPrompt(IEnumerable messages, ChatCompletionOption return prompt; } - - public void SetModelName(string model) - { - _model = model; - } - - public async Task> OnResponsedDone(RealtimeHubConnection conn, string response) - { - var outputs = new List(); - - var data = JsonSerializer.Deserialize(response).Body; - if (data.Status != "completed") - { - _logger.LogError(data.StatusDetails.ToString()); - /*if (data.StatusDetails.Type == "incomplete" && data.StatusDetails.Reason == "max_output_tokens") - { - await TriggerModelInference("Response user concisely"); - }*/ - return []; - } - - var contentHooks = _services.GetServices().ToList(); - - var prompts = new List(); - var inputTokenDetails = data.Usage?.InputTokenDetails; - var outputTokenDetails = data.Usage?.OutputTokenDetails; - - foreach (var output in data.Outputs) - { - if (output.Type == "function_call") - { - outputs.Add(new RoleDialogModel(AgentRole.Assistant, output.Arguments) - { - CurrentAgentId = conn.CurrentAgentId, - FunctionName = output.Name, - FunctionArgs = output.Arguments, - ToolCallId = output.CallId, - MessageId = output.Id, - MessageType = MessageTypeName.FunctionCall - }); - - prompts.Add($"{output.Name}({output.Arguments})"); - } - else if (output.Type == "message") - { - var content = output.Content.FirstOrDefault()?.Transcript ?? string.Empty; - - outputs.Add(new RoleDialogModel(output.Role, content) - { - CurrentAgentId = conn.CurrentAgentId, - MessageId = output.Id, - MessageType = MessageTypeName.Plain - }); - - prompts.Add(content); - } - } - - var text = string.Join("\r\n", prompts); - // After chat completion hook - foreach (var hook in contentHooks) - { - await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) - { - CurrentAgentId = conn.CurrentAgentId - }, - new TokenStatsModel - { - Provider = Provider, - Model = _model, - Prompt = text, - TextInputTokens = inputTokenDetails?.TextTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, - CachedTextInputTokens = data.Usage?.InputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, - AudioInputTokens = inputTokenDetails?.AudioTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, - CachedAudioInputTokens = inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, - TextOutputTokens = outputTokenDetails?.TextTokens ?? 0, - AudioOutputTokens = outputTokenDetails?.AudioTokens ?? 0 - }); - } - - return outputs; - } - - private async Task OnUserAudioTranscriptionCompleted(RealtimeHubConnection conn, string response) - { - var data = JsonSerializer.Deserialize(response); - return new RoleDialogModel(AgentRole.User, data.Transcript) - { - CurrentAgentId = conn.CurrentAgentId - }; - } - - public async Task OnConversationItemCreated(RealtimeHubConnection conn, string response) - { - var item = response.JsonContent().Item; - var message = new RoleDialogModel(item.Role, item.Content.FirstOrDefault()?.Transcript); - - return message; - } + #endregion } \ No newline at end of file diff --git a/tests/BotSharp.Test.RealtimeVoice/appsettings.json b/tests/BotSharp.Test.RealtimeVoice/appsettings.json index e0ffcb8c4..38a4b2c1f 100644 --- a/tests/BotSharp.Test.RealtimeVoice/appsettings.json +++ b/tests/BotSharp.Test.RealtimeVoice/appsettings.json @@ -16,9 +16,15 @@ "Version": "2024-12-17", "ApiKey": "", "Type": "realtime", - "MultiModal": true, - "PromptCost": 0.0025, - "CompletionCost": 0.01 + "RealTime": true, + "Cost": { + "TextInputCost": 0.0006, + "CachedTextInputCost": 0.0003, + "AudioInputCost": 0.01, + "CachedAudioInputCost": 0.0003, + "TextOutputCost": 0.0024, + "AudioOutputCost": 0.02 + } } ] }, @@ -31,9 +37,15 @@ "Version": "20240620", "ApiKey": "", "Type": "realtime", - "MultiModal": true, - "PromptCost": 0.003, - "CompletionCost": 0.015 + "RealTime": true, + "Cost": { + "TextInputCost": 0.0006, + "CachedTextInputCost": 0.0003, + "AudioInputCost": 0.01, + "CachedAudioInputCost": 0.0003, + "TextOutputCost": 0.0024, + "AudioOutputCost": 0.02 + } } ] } From bc40deed5405088fa3e90851e62e8c0293e00bc6 Mon Sep 17 00:00:00 2001 From: Jicheng Lu Date: Wed, 14 May 2025 23:49:36 -0500 Subject: [PATCH 06/12] tool call done; to do session restart --- .../Hooks/RealtimeConversationHook.cs | 22 ++++++------ .../Conversations/ConversationPlugin.cs | 1 + .../Session/LlmRealtimeSession.cs | 1 + .../Services/BotSharpStatsService.cs | 1 - .../BotSharp.Plugin.ChatHub/ChatHubPlugin.cs | 1 - .../Realtime/RealtimeGenerateContentSetup.cs | 11 +++++- .../Models/Realtime/RealtimeServerResponse.cs | 12 +++++++ .../Realtime/RealtimeTranscriptionResponse.cs | 2 +- .../Realtime/RealTimeCompletionProvider.cs | 35 +++++++++++-------- .../Realtime/RealTimeCompletionProvider.cs | 5 ++- .../appsettings.json | 2 -- 11 files changed, 59 insertions(+), 34 deletions(-) diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs index aabac1869..2f8a03ee9 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs @@ -42,7 +42,6 @@ public async Task OnFunctionExecuted(RoleDialogModel message) var routing = _services.GetRequiredService(); message.Role = AgentRole.Function; - //message.Role = AgentRole.Assistant; if (message.FunctionName == "route_to_agent") { @@ -66,21 +65,24 @@ public async Task OnFunctionExecuted(RoleDialogModel message) else { // Update session for changed states - var instruction = await hub.Completer.UpdateSession(hub.HubConn); + + // TO DO + //var instruction = await hub.Completer.UpdateSession(hub.HubConn); await hub.Completer.InsertConversationItem(message); if (string.IsNullOrEmpty(message.Content)) { return; } - else if (message.StopCompletion) - { - await hub.Completer.TriggerModelInference($"Say to user: \"{message.Content}\""); - } - else - { - await hub.Completer.TriggerModelInference(instruction); - } + + //if (message.StopCompletion) + //{ + // await hub.Completer.TriggerModelInference($"Say to user: \"{message.Content}\""); + //} + //else + //{ + // await hub.Completer.TriggerModelInference(); + //} } } } diff --git a/src/Infrastructure/BotSharp.Core/Conversations/ConversationPlugin.cs b/src/Infrastructure/BotSharp.Core/Conversations/ConversationPlugin.cs index 13ee1de62..bf694b71f 100644 --- a/src/Infrastructure/BotSharp.Core/Conversations/ConversationPlugin.cs +++ b/src/Infrastructure/BotSharp.Core/Conversations/ConversationPlugin.cs @@ -41,6 +41,7 @@ public void RegisterDI(IServiceCollection services, IConfiguration config) return settingService.Bind("GoogleApi"); }); + services.AddScoped(); services.AddScoped(); services.AddScoped(); services.AddScoped(); diff --git a/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs b/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs index 2f3259cf4..60ecee044 100644 --- a/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs +++ b/src/Infrastructure/BotSharp.Core/Session/LlmRealtimeSession.cs @@ -106,6 +106,7 @@ public async Task DisconnectAsync() public void Dispose() { + _clientEventSemaphore?.Dispose(); _webSocket?.Dispose(); } } diff --git a/src/Infrastructure/BotSharp.Core/Statistics/Services/BotSharpStatsService.cs b/src/Infrastructure/BotSharp.Core/Statistics/Services/BotSharpStatsService.cs index b5320c45a..8cbf974f8 100644 --- a/src/Infrastructure/BotSharp.Core/Statistics/Services/BotSharpStatsService.cs +++ b/src/Infrastructure/BotSharp.Core/Statistics/Services/BotSharpStatsService.cs @@ -1,4 +1,3 @@ -using BotSharp.Abstraction.Infrastructures; using BotSharp.Abstraction.Statistics.Settings; namespace BotSharp.Core.Statistics.Services; diff --git a/src/Plugins/BotSharp.Plugin.ChatHub/ChatHubPlugin.cs b/src/Plugins/BotSharp.Plugin.ChatHub/ChatHubPlugin.cs index 7275a2730..725655fce 100644 --- a/src/Plugins/BotSharp.Plugin.ChatHub/ChatHubPlugin.cs +++ b/src/Plugins/BotSharp.Plugin.ChatHub/ChatHubPlugin.cs @@ -24,7 +24,6 @@ public void RegisterDI(IServiceCollection services, IConfiguration config) services.AddScoped(); services.AddScoped(); services.AddScoped(); - services.AddScoped(); services.AddScoped(); services.AddScoped(); services.AddScoped(); diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs index c72e8cb6d..c334c7ff8 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeGenerateContentSetup.cs @@ -21,6 +21,15 @@ internal class RealtimeGenerateContentSetup [JsonPropertyName("outputAudioTranscription")] public AudioTranscriptionConfig? OutputAudioTranscription { get; set; } + + [JsonPropertyName("sessionResumption")] + public SessionResumptionConfig? SessionResumption { get; set; } } -internal class AudioTranscriptionConfig { } \ No newline at end of file +internal class AudioTranscriptionConfig { } + +internal class SessionResumptionConfig +{ + [JsonPropertyName("handle")] + public string? Handle { get; set; } +} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs index 4b65bdf81..dd6ff508a 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeServerResponse.cs @@ -15,6 +15,9 @@ internal class RealtimeServerResponse [JsonPropertyName("toolCall")] public RealtimeToolCall? ToolCall { get; set; } + + [JsonPropertyName("sessionResumptionUpdate")] + public RealtimeSessionResumptionUpdate? SessionResumptionUpdate { get; set; } } @@ -91,4 +94,13 @@ internal class RealtimeFunctionCall [JsonPropertyName("args")] public JsonNode? Args { get; set; } +} + +internal class RealtimeSessionResumptionUpdate +{ + [JsonPropertyName("newHandle")] + public string? NewHandle { get; set; } + + [JsonPropertyName("resumable")] + public bool? Resumable { get; set; } } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs index b14c1bde3..5d75ca97a 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs @@ -28,7 +28,7 @@ public void Collect(string text) _contentStream.Position = 0; } - public string GetString() + public string GetText() { if (_contentStream.Length == 0) { diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index f628a2811..f8f551fe2 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -113,7 +113,6 @@ private async Task ReceiveMessage( continue; } - Console.WriteLine($"Received text: {receivedText}"); try { var response = JsonSerializer.Deserialize(receivedText, _jsonOptions); @@ -126,10 +125,15 @@ private async Task ReceiveMessage( { _logger.LogInformation($"Session setup completed."); } + else if (response.SessionResumptionUpdate != null) + { + _logger.LogInformation($"Session resumption update => New handle: {response.SessionResumptionUpdate.NewHandle}, Resumable: {response.SessionResumptionUpdate.Resumable}"); + } else if (response.ToolCall != null && !response.ToolCall.FunctionCalls.IsNullOrEmpty()) { - var functionCall = response.ToolCall.FunctionCalls.First(); - _logger.LogInformation($"Tool call received {functionCall.Name}({functionCall.Args?.ToJsonString(_jsonOptions) ?? string.Empty})."); + var functionCall = response.ToolCall.FunctionCalls!.First(); + + _logger.LogInformation($"Tool call received: {functionCall.Name}({functionCall.Args?.ToJsonString(_jsonOptions) ?? string.Empty})."); if (functionCall != null) { @@ -154,7 +158,7 @@ private async Task ReceiveMessage( _logger.LogInformation($"Model audio delta received."); // Handle input transcription - var inputTranscription = inputStream.GetString(); + var inputTranscription = inputStream.GetText(); if (!string.IsNullOrEmpty(inputTranscription)) { var message = OnUserAudioTranscriptionCompleted(conn, inputTranscription); @@ -182,7 +186,8 @@ private async Task ReceiveMessage( { _logger.LogInformation($"Model turn completed."); - var outputTranscription = outputStream.GetString(); + // Handle output transcription + var outputTranscription = outputStream.GetText(); if (!string.IsNullOrEmpty(outputTranscription)) { var messages = await OnResponseDone(conn, outputTranscription, response.UsageMetaData); @@ -237,13 +242,13 @@ await SendEventToModel(new BidiClientPayload public async Task TriggerModelInference(string? instructions = null) { - var content = new Content("Please respond to me.", AgentRole.User); + var content = new Content(instructions ?? "Please respond to user.", AgentRole.User); await SendEventToModel(new BidiClientPayload { ClientContent = new() { - Turns = null, + Turns = [content], TurnComplete = true } }); @@ -269,9 +274,10 @@ public async Task SendEventToModel(object message) public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) { var convService = _services.GetRequiredService(); - var conv = await convService.GetConversation(conn.ConversationId); - var agentService = _services.GetRequiredService(); + var realtimeSetting = _services.GetRequiredService(); + + var conv = await convService.GetConversation(conn.ConversationId); var agent = await agentService.LoadAgent(conn.CurrentAgentId); var (prompt, request) = PrepareOptions(agent, []); @@ -285,10 +291,8 @@ public async Task UpdateSession(RealtimeHubConnection conn, bool isInit var words = new List(); HookEmitter.Emit(_services, hook => words.AddRange(hook.OnModelTranscriptPrompt(agent))); - var realtimeModelSettings = _services.GetRequiredService(); - - config.Temperature = Math.Max(realtimeModelSettings.Temperature, 0.6f); - config.MaxOutputTokens = realtimeModelSettings.MaxResponseOutputTokens; + config.Temperature = Math.Max(realtimeSetting.Temperature, 0.6f); + config.MaxOutputTokens = realtimeSetting.MaxResponseOutputTokens; } var functions = request.Tools?.SelectMany(s => s.FunctionDeclarations).Select(x => @@ -316,7 +320,6 @@ await HookEmitter.Emit(_services, }); } - var realtimeSetting = _services.GetRequiredService(); await SendEventToModel(new RealtimeClientPayload { Setup = new RealtimeGenerateContentSetup() @@ -326,7 +329,8 @@ await SendEventToModel(new RealtimeClientPayload SystemInstruction = request.SystemInstruction, Tools = request.Tools?.ToArray(), InputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null, - OutputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null + OutputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null, + SessionResumption = new() } }); @@ -339,6 +343,7 @@ public async Task InsertConversationItem(RoleDialogModel message) { var function = new FunctionResponse() { + Id = message.ToolCallId, Name = message.FunctionName ?? string.Empty, Response = new JsonObject() { diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs index fa1bd6230..29e495ecb 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -65,7 +65,7 @@ await _session.ConnectAsync( cancellationToken: CancellationToken.None); _ = ReceiveMessage( - _services, + realtimeSettings, conn, onModelReady, onModelAudioDeltaReceived, @@ -144,7 +144,7 @@ await SendEventToModel(new } private async Task ReceiveMessage( - IServiceProvider services, + RealtimeModelSettings realtimeSettings, RealtimeHubConnection conn, Func onModelReady, Func onModelAudioDeltaReceived, @@ -156,7 +156,6 @@ private async Task ReceiveMessage( Func onInterruptionDetected) { DateTime? startTime = null; - var realtimeSettings = _services.GetRequiredService(); await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) { diff --git a/tests/BotSharp.Test.RealtimeVoice/appsettings.json b/tests/BotSharp.Test.RealtimeVoice/appsettings.json index 38a4b2c1f..24b852668 100644 --- a/tests/BotSharp.Test.RealtimeVoice/appsettings.json +++ b/tests/BotSharp.Test.RealtimeVoice/appsettings.json @@ -16,7 +16,6 @@ "Version": "2024-12-17", "ApiKey": "", "Type": "realtime", - "RealTime": true, "Cost": { "TextInputCost": 0.0006, "CachedTextInputCost": 0.0003, @@ -37,7 +36,6 @@ "Version": "20240620", "ApiKey": "", "Type": "realtime", - "RealTime": true, "Cost": { "TextInputCost": 0.0006, "CachedTextInputCost": 0.0003, From 4c4dce852d99d0a1742443eb2b7142bab40cb8b2 Mon Sep 17 00:00:00 2001 From: Jicheng Lu Date: Wed, 14 May 2025 23:54:18 -0500 Subject: [PATCH 07/12] minor change --- .../BotSharp.Core/Functions/GetLocationFn.cs | 25 ------------------- .../BotSharp.Core/Functions/GetWeatherFn.cs | 18 ------------- .../Realtime/RealTimeCompletionProvider.cs | 3 --- 3 files changed, 46 deletions(-) delete mode 100644 src/Infrastructure/BotSharp.Core/Functions/GetLocationFn.cs diff --git a/src/Infrastructure/BotSharp.Core/Functions/GetLocationFn.cs b/src/Infrastructure/BotSharp.Core/Functions/GetLocationFn.cs deleted file mode 100644 index cabfbdbb4..000000000 --- a/src/Infrastructure/BotSharp.Core/Functions/GetLocationFn.cs +++ /dev/null @@ -1,25 +0,0 @@ -using BotSharp.Abstraction.Functions; -using BotSharp.Abstraction.Options; - -namespace BotSharp.Core.Functions; - -public class GetLocationFn : IFunctionCallback -{ - private readonly IServiceProvider _services; - - public GetLocationFn(IServiceProvider services) - { - _services = services; - } - - public string Name => "get_location"; - public string Indication => "Finding location"; - - public async Task Execute(RoleDialogModel message) - { - var args = JsonSerializer.Deserialize(message.FunctionArgs, BotSharpOptions.defaultJsonOptions); - - message.Content = $"There are a lot of fun events here in {args.City}"; - return true; - } -} diff --git a/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs b/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs index 09f6e6615..d203ac8c2 100644 --- a/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs +++ b/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs @@ -1,7 +1,4 @@ using BotSharp.Abstraction.Functions; -using BotSharp.Abstraction.Models; -using BotSharp.Abstraction.Options; -using BotSharp.Abstraction.SideCar; using System.Text.Json.Serialization; namespace BotSharp.Core.Functions; @@ -21,24 +18,9 @@ public GetWeatherFn(IServiceProvider services) public async Task Execute(RoleDialogModel message) { //var args = JsonSerializer.Deserialize(message.FunctionArgs, BotSharpOptions.defaultJsonOptions); - - //var sidecar = _services.GetService(); - //var states = GetSideCarStates(); - - //var userMessage = $"Please find the information at location {args.City}, {args.State}"; - //var response = await sidecar.SendMessage(BuiltInAgentId.Chatbot, userMessage, states: states); message.Content = $"It is a sunny day."; return true; } - - private List GetSideCarStates() - { - var sideCarStates = new List() - { - new("channel", "email") - }; - return sideCarStates; - } } class Location diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index f8f551fe2..eb6f4e765 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -273,13 +273,10 @@ public async Task SendEventToModel(object message) public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) { - var convService = _services.GetRequiredService(); var agentService = _services.GetRequiredService(); var realtimeSetting = _services.GetRequiredService(); - var conv = await convService.GetConversation(conn.ConversationId); var agent = await agentService.LoadAgent(conn.CurrentAgentId); - var (prompt, request) = PrepareOptions(agent, []); var config = request.GenerationConfig; From b64f22ef8edb5a5877caebcb79426301dd7e6f30 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Thu, 15 May 2025 15:46:42 -0500 Subject: [PATCH 08/12] temp save --- .../Conversations/Models/Conversation.cs | 13 ++ .../Realtime/Models/RealtimeHubConnection.cs | 1 + .../Hooks/RealtimeConversationHook.cs | 6 +- .../Services/RealtimeHub.cs | 2 + .../Services/ConversationStorage.cs | 7 +- .../Realtime/RealTimeCompletionProvider.cs | 121 +++++++++++------- .../Models/DialogMongoElement.cs | 6 + .../MongoRepository.Conversation.cs | 1 - 8 files changed, 105 insertions(+), 52 deletions(-) diff --git a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/Conversation.cs b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/Conversation.cs index ac1efb04e..5ffdf1877 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/Conversation.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/Conversation.cs @@ -47,15 +47,19 @@ public class DialogElement public string Content { get; set; } = default!; [JsonPropertyName("secondary_content")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? SecondaryContent { get; set; } [JsonPropertyName("rich_content")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? RichContent { get; set; } [JsonPropertyName("secondary_rich_content")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? SecondaryRichContent { get; set; } [JsonPropertyName("payload")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? Payload { get; set; } public DialogElement() @@ -95,8 +99,17 @@ public class DialogMetaData public string MessageType { get; set; } = default!; [JsonPropertyName("function_name")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public string? FunctionName { get; set; } + [JsonPropertyName("function_args")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? FunctionArgs { get; set; } + + [JsonPropertyName("tool_call_id")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? ToolCallId { get; set; } + [JsonPropertyName("sender_id")] public string? SenderId { get; set; } diff --git a/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeHubConnection.cs b/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeHubConnection.cs index c0dac6d5b..4d66ac5b8 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeHubConnection.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeHubConnection.cs @@ -10,6 +10,7 @@ public class RealtimeHubConnection public string KeypadInputBuffer { get; set; } = string.Empty; public string CurrentAgentId { get; set; } = null!; public string ConversationId { get; set; } = null!; + public string? PrevSessionId { get; set; } public Func OnModelReady { get; set; } = () => string.Empty; public Func OnModelMessageReceived { get; set; } = null!; public Func OnModelAudioResponseDone { get; set; } = null!; diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs index 2f8a03ee9..32cdc1135 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs @@ -65,9 +65,7 @@ public async Task OnFunctionExecuted(RoleDialogModel message) else { // Update session for changed states - - // TO DO - //var instruction = await hub.Completer.UpdateSession(hub.HubConn); + var instruction = await hub.Completer.UpdateSession(hub.HubConn); await hub.Completer.InsertConversationItem(message); if (string.IsNullOrEmpty(message.Content)) @@ -81,7 +79,7 @@ public async Task OnFunctionExecuted(RoleDialogModel message) //} //else //{ - // await hub.Completer.TriggerModelInference(); + // await hub.Completer.TriggerModelInference(instruction); //} } } diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs b/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs index 93363878d..104cd22c0 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs @@ -96,6 +96,8 @@ await _completer.Connect( } await routing.InvokeFunction(message.FunctionName, message); + dialogs.Add(message); + storage.Append(_conn.ConversationId, message); } else { diff --git a/src/Infrastructure/BotSharp.Core/Conversations/Services/ConversationStorage.cs b/src/Infrastructure/BotSharp.Core/Conversations/Services/ConversationStorage.cs index 49852eb22..eeceeb091 100644 --- a/src/Infrastructure/BotSharp.Core/Conversations/Services/ConversationStorage.cs +++ b/src/Infrastructure/BotSharp.Core/Conversations/Services/ConversationStorage.cs @@ -40,6 +40,8 @@ public void Append(string conversationId, IEnumerable dialogs) MessageId = dialog.MessageId, MessageType = dialog.MessageType, FunctionName = dialog.FunctionName, + FunctionArgs = dialog.FunctionArgs, + ToolCallId = dialog.ToolCallId, CreatedTime = dialog.CreatedAt }; @@ -109,7 +111,6 @@ public List GetDialogs(string conversationId) var currentAgentId = meta.AgentId; var messageId = meta.MessageId; var messageType = meta.MessageType; - var function = meta.FunctionName; var senderId = role == AgentRole.Function ? currentAgentId : meta.SenderId; var createdAt = meta.CreatedTime; var richContent = !string.IsNullOrEmpty(dialog.RichContent) ? @@ -124,7 +125,9 @@ public List GetDialogs(string conversationId) MessageType = messageType, CreatedAt = createdAt, SenderId = senderId, - FunctionName = function, + FunctionName = meta.FunctionName, + FunctionArgs = meta.FunctionArgs, + ToolCallId = meta.ToolCallId, RichContent = richContent, SecondaryContent = secondaryContent, SecondaryRichContent = secondaryRichContent, diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index eb6f4e765..1cd322159 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -32,6 +32,20 @@ public class GoogleRealTimeProvider : IRealTimeCompletion UnknownTypeHandling = JsonUnknownTypeHandling.JsonElement }; + private RealtimeTranscriptionResponse _inputStream = new(); + private RealtimeTranscriptionResponse _outputStream = new(); + + + private RealtimeHubConnection _conn; + private Func _onModelReady; + private Func _onModelAudioDeltaReceived; + private Func _onModelAudioResponseDone; + private Func _onModelAudioTranscriptDone; + private Func, Task> _onModelResponseDone; + private Func _onConversationItemCreated; + private Func _onInputAudioTranscriptionDone; + private Func _onInterruptionDetected; + public GoogleRealTimeProvider( IServiceProvider services, GoogleAiSettings settings, @@ -58,17 +72,26 @@ public async Task Connect( Func onInputAudioTranscriptionDone, Func onInterruptionDetected) { + _conn = conn; + _onModelReady = onModelReady; + _onModelAudioDeltaReceived = onModelAudioDeltaReceived; + _onModelAudioResponseDone = onModelAudioResponseDone; + _onModelAudioTranscriptDone = onModelAudioTranscriptDone; + _onModelResponseDone = onModelResponseDone; + _onConversationItemCreated = onConversationItemCreated; + _onInputAudioTranscriptionDone = onInputAudioTranscriptionDone; + _onInterruptionDetected = onInterruptionDetected; + var settingsService = _services.GetRequiredService(); var realtimeModelSettings = _services.GetRequiredService(); _model = realtimeModelSettings.Model; var modelSettings = settingsService.GetSetting(Provider, _model); - if (_session != null) - { - _session.Dispose(); - } + Reset(); + _inputStream = new(); + _outputStream = new(); _session = new LlmRealtimeSession(_services, new ChatSessionOptions { JsonOptions = _jsonOptions @@ -79,32 +102,11 @@ public async Task Connect( await onModelReady(); - _ = ReceiveMessage( - conn, - onModelReady, - onModelAudioDeltaReceived, - onModelAudioResponseDone, - onModelAudioTranscriptDone, - onModelResponseDone, - onConversationItemCreated, - onInputAudioTranscriptionDone, - onInterruptionDetected); + _ = ReceiveMessage(); } - private async Task ReceiveMessage( - RealtimeHubConnection conn, - Func onModelReady, - Func onModelAudioDeltaReceived, - Func onModelAudioResponseDone, - Func onModelAudioTranscriptDone, - Func, Task> onModelResponseDone, - Func onConversationItemCreated, - Func onInputAudioTranscriptionDone, - Func onInterruptionDetected) + private async Task ReceiveMessage() { - using var inputStream = new RealtimeTranscriptionResponse(); - using var outputStream = new RealtimeTranscriptionResponse(); - await foreach (ChatSessionUpdate update in _session.ReceiveUpdatesAsync(CancellationToken.None)) { var receivedText = update?.RawResponse; @@ -128,6 +130,7 @@ private async Task ReceiveMessage( else if (response.SessionResumptionUpdate != null) { _logger.LogInformation($"Session resumption update => New handle: {response.SessionResumptionUpdate.NewHandle}, Resumable: {response.SessionResumptionUpdate.Resumable}"); + _conn.PrevSessionId = response.SessionResumptionUpdate?.NewHandle; } else if (response.ToolCall != null && !response.ToolCall.FunctionCalls.IsNullOrEmpty()) { @@ -137,20 +140,20 @@ private async Task ReceiveMessage( if (functionCall != null) { - var messages = OnFunctionCall(conn, functionCall); - await onModelResponseDone(messages); + var messages = OnFunctionCall(_conn, functionCall); + await _onModelResponseDone(messages); } } else if (response.ServerContent != null) { if (response.ServerContent.InputTranscription?.Text != null) { - inputStream.Collect(response.ServerContent.InputTranscription.Text); + _inputStream.Collect(response.ServerContent.InputTranscription.Text); } if (response.ServerContent.OutputTranscription?.Text != null) { - outputStream.Collect(response.ServerContent.OutputTranscription.Text); + _outputStream.Collect(response.ServerContent.OutputTranscription.Text); } if (response.ServerContent.ModelTurn != null) @@ -158,13 +161,13 @@ private async Task ReceiveMessage( _logger.LogInformation($"Model audio delta received."); // Handle input transcription - var inputTranscription = inputStream.GetText(); + var inputTranscription = _inputStream.GetText(); if (!string.IsNullOrEmpty(inputTranscription)) { - var message = OnUserAudioTranscriptionCompleted(conn, inputTranscription); - await onInputAudioTranscriptionDone(message); + var message = OnUserAudioTranscriptionCompleted(_conn, inputTranscription); + await _onInputAudioTranscriptionDone(message); } - inputStream.Clear(); + _inputStream.Clear(); var parts = response.ServerContent.ModelTurn.Parts; if (!parts.IsNullOrEmpty()) @@ -173,7 +176,7 @@ private async Task ReceiveMessage( { if (!string.IsNullOrEmpty(part.InlineData?.Data)) { - await onModelAudioDeltaReceived(part.InlineData.Data, string.Empty); + await _onModelAudioDeltaReceived(part.InlineData.Data, string.Empty); } } } @@ -187,14 +190,14 @@ private async Task ReceiveMessage( _logger.LogInformation($"Model turn completed."); // Handle output transcription - var outputTranscription = outputStream.GetText(); + var outputTranscription = _outputStream.GetText(); if (!string.IsNullOrEmpty(outputTranscription)) { - var messages = await OnResponseDone(conn, outputTranscription, response.UsageMetaData); - await onModelResponseDone(messages); + var messages = await OnResponseDone(_conn, outputTranscription, response.UsageMetaData); + await _onModelResponseDone(messages); } - inputStream.Clear(); - outputStream.Clear(); + _inputStream.Clear(); + _outputStream.Clear(); } } } @@ -205,6 +208,8 @@ private async Task ReceiveMessage( } } + _inputStream.Dispose(); + _outputStream.Dispose(); _session.Dispose(); } @@ -213,7 +218,10 @@ public async Task Disconnect() { if (_session != null) { + _inputStream?.Dispose(); + _outputStream?.Dispose(); await _session.DisconnectAsync(); + _session.Dispose(); } } @@ -242,8 +250,9 @@ await SendEventToModel(new BidiClientPayload public async Task TriggerModelInference(string? instructions = null) { - var content = new Content(instructions ?? "Please respond to user.", AgentRole.User); + if (string.IsNullOrWhiteSpace(instructions)) return; + var content = new Content(instructions, AgentRole.User); await SendEventToModel(new BidiClientPayload { ClientContent = new() @@ -273,13 +282,18 @@ public async Task SendEventToModel(object message) public async Task UpdateSession(RealtimeHubConnection conn, bool isInit = false) { + if (!isInit) + { + return null; + } + var agentService = _services.GetRequiredService(); var realtimeSetting = _services.GetRequiredService(); var agent = await agentService.LoadAgent(conn.CurrentAgentId); var (prompt, request) = PrepareOptions(agent, []); - var config = request.GenerationConfig; + var config = request.GenerationConfig ?? new(); if (config != null) { //Output Modality can either be text or audio @@ -317,7 +331,8 @@ await HookEmitter.Emit(_services, }); } - await SendEventToModel(new RealtimeClientPayload + + var payload = new RealtimeClientPayload { Setup = new RealtimeGenerateContentSetup() { @@ -328,8 +343,14 @@ await SendEventToModel(new RealtimeClientPayload InputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null, OutputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null, SessionResumption = new() + { + Handle = _conn.PrevSessionId + } } - }); + }; + + Console.WriteLine($"Setup payload: {JsonSerializer.Serialize(payload, _jsonOptions)}"); + await SendEventToModel(payload); return prompt; } @@ -596,5 +617,15 @@ private Uri BuildWebsocketUri(string apiKey, string version = "v1alpha") { return new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.{version}.GenerativeService.BidiGenerateContent?key={apiKey}"); } + + private void Reset() + { + _inputStream?.Clear(); + _outputStream?.Clear(); + + _inputStream?.Dispose(); + _outputStream?.Dispose(); + _session?.Dispose(); + } #endregion } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Models/DialogMongoElement.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/DialogMongoElement.cs index 030a93abb..b67fc53cb 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Models/DialogMongoElement.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/DialogMongoElement.cs @@ -47,6 +47,8 @@ public class DialogMetaDataMongoElement public string MessageId { get; set; } = default!; public string MessageType { get; set; } = default!; public string? FunctionName { get; set; } + public string? FunctionArgs { get; set; } + public string? ToolCallId { get; set; } public string? SenderId { get; set; } public DateTime CreateTime { get; set; } @@ -59,6 +61,8 @@ public static DialogMetaData ToDomainElement(DialogMetaDataMongoElement meta) MessageId = meta.MessageId, MessageType = meta.MessageType, FunctionName = meta.FunctionName, + FunctionArgs = meta.FunctionArgs, + ToolCallId = meta.ToolCallId, SenderId = meta.SenderId, CreatedTime = meta.CreateTime, }; @@ -73,6 +77,8 @@ public static DialogMetaDataMongoElement ToMongoElement(DialogMetaData meta) MessageId = meta.MessageId, MessageType = meta.MessageType, FunctionName = meta.FunctionName, + FunctionArgs = meta.FunctionArgs, + ToolCallId = meta.ToolCallId, SenderId = meta.SenderId, CreateTime = meta.CreatedTime, }; diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Conversation.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Conversation.cs index 722c77303..02f5a6951 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Conversation.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Conversation.cs @@ -1,6 +1,5 @@ using BotSharp.Abstraction.Conversations.Models; using BotSharp.Abstraction.Repositories.Filters; -using MongoDB.Driver; using System.Text.Json; namespace BotSharp.Plugin.MongoStorage.Repository; From 350c479486d052af4d2c4f36d06edc34f257e708 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Thu, 15 May 2025 17:46:36 -0500 Subject: [PATCH 09/12] sync and refine --- .../MCP/Services/IMcpService.cs | 2 +- .../Routing/Executor/IFunctionExecutor.cs | 3 +- .../MCP/BotSharpMCPExtensions.cs | 8 +- .../MCP/Helpers/AiFunctionHelper.cs | 20 +++-- .../MCP/Hooks/MCPToolAgentHook.cs | 22 ++++-- .../MCP/Managers/McpClientManager.cs | 74 ++++++++++++------- .../BotSharp.Core/MCP/Services/McpService.cs | 19 +++-- .../BotSharp.Core/MCP/Settings/MCPSettings.cs | 1 - .../Routing/Executor/DummyFunctionExecutor.cs | 10 +-- .../Executor/FunctionCallbackExecutor.cs | 9 ++- .../Executor/FunctionExecutorFactory.cs | 38 ++++------ .../Routing/Executor/MCPToolExecutor.cs | 24 +++--- .../Routing/RoutingService.InvokeFunction.cs | 14 +--- .../Controllers/McpController.cs | 4 +- src/WebStarter/appsettings.json | 15 ++-- 15 files changed, 135 insertions(+), 128 deletions(-) rename src/Infrastructure/{BotSharp.Core => BotSharp.Abstraction}/Routing/Executor/IFunctionExecutor.cs (77%) diff --git a/src/Infrastructure/BotSharp.Abstraction/MCP/Services/IMcpService.cs b/src/Infrastructure/BotSharp.Abstraction/MCP/Services/IMcpService.cs index 71952dacf..32564a3f5 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MCP/Services/IMcpService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MCP/Services/IMcpService.cs @@ -2,5 +2,5 @@ namespace BotSharp.Abstraction.MCP.Services; public interface IMcpService { - IEnumerable GetServerConfigs() => []; + Task> GetServerConfigsAsync() => Task.FromResult>([]); } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/IFunctionExecutor.cs b/src/Infrastructure/BotSharp.Abstraction/Routing/Executor/IFunctionExecutor.cs similarity index 77% rename from src/Infrastructure/BotSharp.Core/Routing/Executor/IFunctionExecutor.cs rename to src/Infrastructure/BotSharp.Abstraction/Routing/Executor/IFunctionExecutor.cs index 4ba2e69f7..e1c604adf 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/IFunctionExecutor.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Routing/Executor/IFunctionExecutor.cs @@ -1,8 +1,7 @@ -namespace BotSharp.Core.Routing.Executor; +namespace BotSharp.Abstraction.Routing.Executor; public interface IFunctionExecutor { public Task ExecuteAsync(RoleDialogModel message); - public Task GetIndicatorAsync(RoleDialogModel message); } diff --git a/src/Infrastructure/BotSharp.Core/MCP/BotSharpMCPExtensions.cs b/src/Infrastructure/BotSharp.Core/MCP/BotSharpMCPExtensions.cs index b3c4e6de6..8eeee7b35 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/BotSharpMCPExtensions.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/BotSharpMCPExtensions.cs @@ -18,15 +18,11 @@ public static IServiceCollection AddBotSharpMCP(this IServiceCollection services { var settings = config.GetSection("MCP").Get(); services.AddScoped(provider => settings); + services.AddScoped(); if (settings != null && settings.Enabled && !settings.McpServerConfigs.IsNullOrEmpty()) { - services.AddScoped(); - - var clientManager = new McpClientManager(settings); - services.AddScoped(provider => clientManager); - - // Register hooks + services.AddScoped(); services.AddScoped(); } return services; diff --git a/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs b/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs index d6f1cb5b1..b9db3ce74 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs @@ -1,19 +1,25 @@ -using System.Text.Json; using ModelContextProtocol.Client; namespace BotSharp.Core.MCP.Helpers; internal static class AiFunctionHelper { - public static FunctionDef MapToFunctionDef(McpClientTool tool) + public static FunctionDef? MapToFunctionDef(McpClientTool tool) { if (tool == null) { - throw new ArgumentNullException(nameof(tool)); + return null; } - var properties = tool.JsonSchema.GetProperty("properties"); - var required = tool.JsonSchema.GetProperty("required"); + if (!tool.JsonSchema.TryGetProperty("properties", out var properties)) + { + properties = JsonDocument.Parse("{}").RootElement; + } + + if (!tool.JsonSchema.TryGetProperty("required", out var required)) + { + required = JsonDocument.Parse("[]").RootElement; + } var funDef = new FunctionDef { @@ -23,8 +29,8 @@ public static FunctionDef MapToFunctionDef(McpClientTool tool) Parameters = new FunctionParametersDef { Type = "object", - Properties = JsonDocument.Parse(properties.GetRawText()), - Required = JsonSerializer.Deserialize>(required.GetRawText()) + Properties = JsonDocument.Parse(properties.GetRawText() ?? "{}"), + Required = JsonSerializer.Deserialize>(required.GetRawText() ?? "[]") ?? [] } }; diff --git a/src/Infrastructure/BotSharp.Core/MCP/Hooks/MCPToolAgentHook.cs b/src/Infrastructure/BotSharp.Core/MCP/Hooks/MCPToolAgentHook.cs index 08c38b6bc..743bf4c06 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Hooks/MCPToolAgentHook.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Hooks/MCPToolAgentHook.cs @@ -41,18 +41,26 @@ private async Task> GetMcpContent(Agent agent) return functionDefs; } - var mcpClientManager = _services.GetRequiredService(); - var mcps = agent.McpTools.Where(x => !x.Disabled); + var mcpClientManager = _services.GetService(); + if (mcpClientManager == null) + { + return functionDefs; + } + + var mcps = agent.McpTools?.Where(x => !x.Disabled) ?? []; foreach (var item in mcps) { var mcpClient = await mcpClientManager.GetMcpClientAsync(item.ServerId); - if (mcpClient != null) + if (mcpClient == null) continue; + + var tools = await mcpClient.ListToolsAsync(); + var toolNames = item.Functions.Select(x => x.Name).ToList(); + var targetTools = tools.Where(x => toolNames.Contains(x.Name, StringComparer.OrdinalIgnoreCase)); + foreach (var tool in targetTools) { - var tools = await mcpClient.ListToolsAsync(); - var toolnames = item.Functions.Select(x => x.Name).ToList(); - foreach (var tool in tools.Where(x => toolnames.Contains(x.Name, StringComparer.OrdinalIgnoreCase))) + var funDef = AiFunctionHelper.MapToFunctionDef(tool); + if (funDef != null) { - var funDef = AiFunctionHelper.MapToFunctionDef(tool); functionDefs.Add(funDef); } } diff --git a/src/Infrastructure/BotSharp.Core/MCP/Managers/McpClientManager.cs b/src/Infrastructure/BotSharp.Core/MCP/Managers/McpClientManager.cs index a4a89d895..50b798eb4 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Managers/McpClientManager.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Managers/McpClientManager.cs @@ -6,45 +6,63 @@ namespace BotSharp.Core.MCP.Managers; public class McpClientManager : IDisposable { - private readonly McpSettings _mcpSettings; + private readonly IServiceProvider _services; + private readonly ILogger _logger; - public McpClientManager(McpSettings mcpSettings) + public McpClientManager( + IServiceProvider services, + ILogger logger) { - _mcpSettings = mcpSettings; + _services = services; + _logger = logger; } - public async Task GetMcpClientAsync(string serverId) + public async Task GetMcpClientAsync(string serverId) { - var config = _mcpSettings.McpServerConfigs.Where(x => x.Id == serverId).FirstOrDefault(); - - IClientTransport transport; - if (config.SseConfig != null) + try { - transport = new SseClientTransport(new SseClientTransportOptions + var settings = _services.GetRequiredService(); + var config = settings.McpServerConfigs.Where(x => x.Id == serverId).FirstOrDefault(); + if (config == null) { - Name = config.Name, - Endpoint = new Uri(config.SseConfig.EndPoint), - AdditionalHeaders = config.SseConfig.AdditionalHeaders, - ConnectionTimeout = config.SseConfig.ConnectionTimeout - }); - } - else if (config.StdioConfig != null) - { - transport = new StdioClientTransport(new StdioClientTransportOptions + return null; + } + + IClientTransport? transport = null; + if (config.SseConfig != null) + { + transport = new SseClientTransport(new SseClientTransportOptions + { + Name = config.Name, + Endpoint = new Uri(config.SseConfig.EndPoint), + AdditionalHeaders = config.SseConfig.AdditionalHeaders, + ConnectionTimeout = config.SseConfig.ConnectionTimeout + }); + } + else if (config.StdioConfig != null) { - Name = config.Name, - Command = config.StdioConfig.Command, - Arguments = config.StdioConfig.Arguments, - EnvironmentVariables = config.StdioConfig.EnvironmentVariables, - ShutdownTimeout = config.StdioConfig.ShutdownTimeout - }); + transport = new StdioClientTransport(new StdioClientTransportOptions + { + Name = config.Name, + Command = config.StdioConfig.Command, + Arguments = config.StdioConfig.Arguments, + EnvironmentVariables = config.StdioConfig.EnvironmentVariables, + ShutdownTimeout = config.StdioConfig.ShutdownTimeout + }); + } + + if (transport == null) + { + return null; + } + + return await McpClientFactory.CreateAsync(transport, settings.McpClientOptions); } - else + catch (Exception ex) { - throw new ArgumentNullException("Invalid MCP server configuration!"); + _logger.LogWarning(ex, $"Error when loading mcp client {serverId}"); + return null; } - - return await McpClientFactory.CreateAsync(transport, _mcpSettings.McpClientOptions); } public void Dispose() diff --git a/src/Infrastructure/BotSharp.Core/MCP/Services/McpService.cs b/src/Infrastructure/BotSharp.Core/MCP/Services/McpService.cs index 3bff4442e..27f5326c7 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Services/McpService.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Services/McpService.cs @@ -1,6 +1,5 @@ using BotSharp.Core.MCP.Managers; using BotSharp.Core.MCP.Settings; -using Microsoft.Extensions.Logging; using ModelContextProtocol.Client; namespace BotSharp.Core.MCP.Services; @@ -9,35 +8,35 @@ public class McpService : IMcpService { private readonly IServiceProvider _services; private readonly ILogger _logger; - private readonly McpClientManager _mcpClientManager; public McpService( IServiceProvider services, - ILogger logger, - McpClientManager mcpClient) + ILogger logger) { _services = services; _logger = logger; - _mcpClientManager = mcpClient; } - public IEnumerable GetServerConfigs() + public async Task> GetServerConfigsAsync() { + var clientManager = _services.GetService(); + if (clientManager == null) return []; + var options = new List(); var settings = _services.GetRequiredService(); var configs = settings?.McpServerConfigs ?? []; foreach (var config in configs) { - var tools = _mcpClientManager.GetMcpClientAsync(config.Id) - .Result.ListToolsAsync() - .Result.Select(x=> x.Name); + var client = await clientManager.GetMcpClientAsync(config.Id); + if (client == null) continue; + var tools = await client.ListToolsAsync(); options.Add(new McpServerOptionModel { Id = config.Id, Name = config.Name, - Tools = tools + Tools = tools.Select(x => x.Name) }); } diff --git a/src/Infrastructure/BotSharp.Core/MCP/Settings/MCPSettings.cs b/src/Infrastructure/BotSharp.Core/MCP/Settings/MCPSettings.cs index 2867712f9..337230576 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Settings/MCPSettings.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Settings/MCPSettings.cs @@ -7,5 +7,4 @@ public class McpSettings public bool Enabled { get; set; } = true; public McpClientOptions McpClientOptions { get; set; } public List McpServerConfigs { get; set; } = []; - } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/DummyFunctionExecutor.cs b/src/Infrastructure/BotSharp.Core/Routing/Executor/DummyFunctionExecutor.cs index d075e1851..91e0bc97e 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/DummyFunctionExecutor.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/Executor/DummyFunctionExecutor.cs @@ -1,19 +1,19 @@ +using BotSharp.Abstraction.Routing.Executor; using BotSharp.Abstraction.Templating; namespace BotSharp.Core.Routing.Executor; public class DummyFunctionExecutor: IFunctionExecutor { - private FunctionDef functionDef; private readonly IServiceProvider _services; + private readonly FunctionDef _functionDef; - public DummyFunctionExecutor(FunctionDef function, IServiceProvider services) + public DummyFunctionExecutor(IServiceProvider services, FunctionDef functionDef) { - functionDef = function; _services = services; + _functionDef = functionDef; } - public async Task ExecuteAsync(RoleDialogModel message) { var render = _services.GetRequiredService(); @@ -25,7 +25,7 @@ public async Task ExecuteAsync(RoleDialogModel message) dict[item.Key] = item.Value; } - var text = render.Render(functionDef.Output, dict); + var text = render.Render(_functionDef.Output!, dict); message.Content = text; return true; } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionCallbackExecutor.cs b/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionCallbackExecutor.cs index 939f4fb35..4b208374f 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionCallbackExecutor.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionCallbackExecutor.cs @@ -1,23 +1,24 @@ +using BotSharp.Abstraction.Routing.Executor; using BotSharp.Abstraction.Functions; namespace BotSharp.Core.Routing.Executor; public class FunctionCallbackExecutor : IFunctionExecutor { - IFunctionCallback functionCallback; + private readonly IFunctionCallback _functionCallback; public FunctionCallbackExecutor(IFunctionCallback functionCallback) { - this.functionCallback = functionCallback; + _functionCallback = functionCallback; } public async Task ExecuteAsync(RoleDialogModel message) { - return await functionCallback.Execute(message); + return await _functionCallback.Execute(message); } public async Task GetIndicatorAsync(RoleDialogModel message) { - return await functionCallback.GetIndication(message); + return await _functionCallback.GetIndication(message); } } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionExecutorFactory.cs b/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionExecutorFactory.cs index 3fb895094..8a4a54865 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionExecutorFactory.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/Executor/FunctionExecutorFactory.cs @@ -1,41 +1,31 @@ using BotSharp.Abstraction.Functions; +using BotSharp.Abstraction.Routing.Executor; namespace BotSharp.Core.Routing.Executor; internal class FunctionExecutorFactory { - public static IFunctionExecutor Create(string functionName, Agent agent, IFunctionCallback functioncall, IServiceProvider serviceProvider) + public static IFunctionExecutor? Create(IServiceProvider services, string functionName, Agent agent) { - if(functioncall != null) + var functionCall = services.GetServices().FirstOrDefault(x => x.Name == functionName); + if (functionCall != null) { - return new FunctionCallbackExecutor(functioncall); + return new FunctionCallbackExecutor(functionCall); } - var funDef = agent?.Functions?.FirstOrDefault(x => x.Name == functionName); - if (funDef != null) + var functions = (agent?.Functions ?? []).Concat(agent?.SecondaryFunctions ?? []); + var funcDef = functions.FirstOrDefault(x => x.Name == functionName); + if (!string.IsNullOrWhiteSpace(funcDef?.Output)) { - if (!string.IsNullOrWhiteSpace(funDef?.Output)) - { - return new DummyFunctionExecutor(funDef,serviceProvider); - } + return new DummyFunctionExecutor(services, funcDef); } - else + + var mcpServerId = agent?.McpTools?.Where(x => x.Functions.Any(y => y.Name == funcDef?.Name))?.FirstOrDefault()?.ServerId; + if (!string.IsNullOrWhiteSpace(mcpServerId)) { - funDef = agent?.SecondaryFunctions?.FirstOrDefault(x => x.Name == functionName); - if (funDef != null) - { - if (!string.IsNullOrWhiteSpace(funDef?.Output)) - { - return new DummyFunctionExecutor(funDef, serviceProvider); - } - else - { - var mcpServerId = agent?.McpTools?.Where(x => x.Functions.Any(y => y.Name == funDef.Name)) - .FirstOrDefault().ServerId; - return new MCPToolExecutor(mcpServerId, functionName, serviceProvider); - } - } + return new McpToolExecutor(services, mcpServerId, functionName); } + return null; } } diff --git a/src/Infrastructure/BotSharp.Core/Routing/Executor/MCPToolExecutor.cs b/src/Infrastructure/BotSharp.Core/Routing/Executor/MCPToolExecutor.cs index f7625b485..c452e8066 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/Executor/MCPToolExecutor.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/Executor/MCPToolExecutor.cs @@ -1,21 +1,20 @@ +using BotSharp.Abstraction.Routing.Executor; using BotSharp.Core.MCP.Managers; using ModelContextProtocol.Client; namespace BotSharp.Core.Routing.Executor; -public class MCPToolExecutor: IFunctionExecutor +public class McpToolExecutor: IFunctionExecutor { - private readonly McpClientManager _clientManager; - private string mcpServer; - private string funcName; private readonly IServiceProvider _services; + private readonly string _mcpServerId; + private readonly string _functionName; - public MCPToolExecutor(string mcpserver, string functionName, IServiceProvider services) + public McpToolExecutor(IServiceProvider services, string mcpServerId, string functionName) { _services = services; - this.mcpServer = mcpserver; - this.funcName = functionName; - _clientManager = services.GetRequiredService(); + _mcpServerId = mcpServerId; + _functionName = functionName; } public async Task ExecuteAsync(RoleDialogModel message) @@ -23,12 +22,13 @@ public async Task ExecuteAsync(RoleDialogModel message) try { // Convert arguments to dictionary format expected by mcpdotnet - Dictionary argDict = JsonToDictionary(message.FunctionArgs); + Dictionary argDict = JsonToDictionary(message.FunctionArgs); - var client = await _clientManager.GetMcpClientAsync(mcpServer); + var clientManager = _services.GetRequiredService(); + var client = await clientManager.GetMcpClientAsync(_mcpServerId); // Call the tool through mcpdotnet - var result = await client.CallToolAsync(funcName, !argDict.IsNullOrEmpty() ? argDict : []); + var result = await client.CallToolAsync(_functionName, !argDict.IsNullOrEmpty() ? argDict : []); // Extract the text content from the result var json = string.Join("\n", result.Content.Where(c => c.Type == "text").Select(c => c.Text)); @@ -39,7 +39,7 @@ public async Task ExecuteAsync(RoleDialogModel message) } catch (Exception ex) { - message.Content = $"Error when calling tool {funcName} of MCP server {mcpServer}. {ex.Message}"; + message.Content = $"Error when calling tool {_functionName} of MCP server {_mcpServerId}. {ex.Message}"; return false; } } diff --git a/src/Infrastructure/BotSharp.Core/Routing/RoutingService.InvokeFunction.cs b/src/Infrastructure/BotSharp.Core/Routing/RoutingService.InvokeFunction.cs index 126453e88..63bd3b20a 100644 --- a/src/Infrastructure/BotSharp.Core/Routing/RoutingService.InvokeFunction.cs +++ b/src/Infrastructure/BotSharp.Core/Routing/RoutingService.InvokeFunction.cs @@ -1,5 +1,3 @@ -using BotSharp.Abstraction.Functions; -using BotSharp.Abstraction.Templating; using BotSharp.Core.Routing.Executor; namespace BotSharp.Core.Routing; @@ -8,14 +6,11 @@ public partial class RoutingService { public async Task InvokeFunction(string name, RoleDialogModel message) { - var function = _services.GetServices().FirstOrDefault(x => x.Name == name); - var currentAgentId = message.CurrentAgentId; var agentService = _services.GetRequiredService(); var agent = await agentService.GetAgent(currentAgentId); - IFunctionExecutor funcExecutor = FunctionExecutorFactory.Create(name, agent, function, _services); - + var funcExecutor = FunctionExecutorFactory.Create(_services, name, agent); if (funcExecutor == null) { message.StopCompletion = true; @@ -24,17 +19,14 @@ public async Task InvokeFunction(string name, RoleDialogModel message) return false; } - // Clone message var clonedMessage = RoleDialogModel.From(message); clonedMessage.FunctionName = name; - var hooks = _services - .GetRequiredService() - .HooksOrderByPriority; + var hooks = _services.GetRequiredService() + .HooksOrderByPriority; var progressService = _services.GetService(); - clonedMessage.Indication = await funcExecutor.GetIndicatorAsync(message); if (progressService?.OnFunctionExecuting != null) diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/McpController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/McpController.cs index 7b74a37ec..6519d0109 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/McpController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/McpController.cs @@ -13,9 +13,9 @@ public McpController(IServiceProvider services) } [HttpGet("/mcp/server-configs")] - public IEnumerable GetMcpServerConfigs() + public async Task> GetMcpServerConfigs() { var mcp = _services.GetRequiredService(); - return mcp.GetServerConfigs(); + return await mcp.GetServerConfigsAsync(); } } diff --git a/src/WebStarter/appsettings.json b/src/WebStarter/appsettings.json index b48d2963a..a1d4ed53f 100644 --- a/src/WebStarter/appsettings.json +++ b/src/WebStarter/appsettings.json @@ -268,13 +268,13 @@ } }, "McpServerConfigs": [ - { - "Id": "PizzaServer", - "Name": "PizzaServer", - "SseConfig": { - "Endpoint": "http://localhost:58905/sse" - } - } + //{ + // "Id": "PizzaServer", + // "Name": "PizzaServer", + // "SseConfig": { + // "Endpoint": "http://localhost:58905/sse" + // } + //} ] }, @@ -502,7 +502,6 @@ "BotSharp.Core.SideCar", "BotSharp.Core.Crontab", "BotSharp.Core.Realtime", - "BotSharp.Core.MCP", "BotSharp.Logger", "BotSharp.Plugin.MongoStorage", "BotSharp.Plugin.Dashboard", From 0c2ba423d8c243487a5cd43739283aa75dcc38bb Mon Sep 17 00:00:00 2001 From: Jicheng Lu Date: Thu, 15 May 2025 22:59:57 -0500 Subject: [PATCH 10/12] refine --- .../Realtime/Models/RealtimeHubConnection.cs | 1 - .../Hooks/RealtimeConversationHook.cs | 16 +- .../{ => Demo}/Functions/GetWeatherFn.cs | 11 +- .../MCP/Helpers/AiFunctionHelper.cs | 15 +- .../Realtime/RealtimeTranscriptionResponse.cs | 2 +- .../Chat/PalmChatCompletionProvider.cs | 5 +- .../Realtime/RealTimeCompletionProvider.cs | 45 ++---- .../Text/PalmTextCompletionProvider.cs | 5 +- .../Realtime/RealTimeCompletionProvider.cs | 138 +++++++++--------- 9 files changed, 105 insertions(+), 133 deletions(-) rename src/Infrastructure/BotSharp.Core/{ => Demo}/Functions/GetWeatherFn.cs (69%) diff --git a/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeHubConnection.cs b/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeHubConnection.cs index 4d66ac5b8..c0dac6d5b 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeHubConnection.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Realtime/Models/RealtimeHubConnection.cs @@ -10,7 +10,6 @@ public class RealtimeHubConnection public string KeypadInputBuffer { get; set; } = string.Empty; public string CurrentAgentId { get; set; } = null!; public string ConversationId { get; set; } = null!; - public string? PrevSessionId { get; set; } public Func OnModelReady { get; set; } = () => string.Empty; public Func OnModelMessageReceived { get; set; } = null!; public Func OnModelAudioResponseDone { get; set; } = null!; diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs index 32cdc1135..4eb4c64e9 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Hooks/RealtimeConversationHook.cs @@ -73,14 +73,14 @@ public async Task OnFunctionExecuted(RoleDialogModel message) return; } - //if (message.StopCompletion) - //{ - // await hub.Completer.TriggerModelInference($"Say to user: \"{message.Content}\""); - //} - //else - //{ - // await hub.Completer.TriggerModelInference(instruction); - //} + if (message.StopCompletion) + { + await hub.Completer.TriggerModelInference($"Say to user: \"{message.Content}\""); + } + else + { + await hub.Completer.TriggerModelInference(instruction); + } } } } diff --git a/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs b/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs similarity index 69% rename from src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs rename to src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs index d203ac8c2..8dbdbfafb 100644 --- a/src/Infrastructure/BotSharp.Core/Functions/GetWeatherFn.cs +++ b/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs @@ -1,7 +1,7 @@ using BotSharp.Abstraction.Functions; using System.Text.Json.Serialization; -namespace BotSharp.Core.Functions; +namespace BotSharp.Core.Demo.Functions; public class GetWeatherFn : IFunctionCallback { @@ -19,6 +19,7 @@ public async Task Execute(RoleDialogModel message) { //var args = JsonSerializer.Deserialize(message.FunctionArgs, BotSharpOptions.defaultJsonOptions); message.Content = $"It is a sunny day."; + //message.StopCompletion = true; return true; } } @@ -27,12 +28,4 @@ class Location { [JsonPropertyName("city")] public string? City { get; set; } - - [JsonPropertyName("state")] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - public string? State { get; set; } - - [JsonPropertyName("county")] - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - public string? County { get; set; } } \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs b/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs index b9db3ce74..3e15cd64b 100644 --- a/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs +++ b/src/Infrastructure/BotSharp.Core/MCP/Helpers/AiFunctionHelper.cs @@ -11,14 +11,17 @@ internal static class AiFunctionHelper return null; } - if (!tool.JsonSchema.TryGetProperty("properties", out var properties)) + var properties = "{}"; + var required = "[]"; + + if (tool.JsonSchema.TryGetProperty("properties", out var p)) { - properties = JsonDocument.Parse("{}").RootElement; + properties = p.GetRawText(); } - if (!tool.JsonSchema.TryGetProperty("required", out var required)) + if (tool.JsonSchema.TryGetProperty("required", out var r)) { - required = JsonDocument.Parse("[]").RootElement; + required = r.GetRawText(); } var funDef = new FunctionDef @@ -29,8 +32,8 @@ internal static class AiFunctionHelper Parameters = new FunctionParametersDef { Type = "object", - Properties = JsonDocument.Parse(properties.GetRawText() ?? "{}"), - Required = JsonSerializer.Deserialize>(required.GetRawText() ?? "[]") ?? [] + Properties = JsonDocument.Parse(properties), + Required = JsonSerializer.Deserialize>(required) ?? [] } }; diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs index 5d75ca97a..189252fa8 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Models/Realtime/RealtimeTranscriptionResponse.cs @@ -42,8 +42,8 @@ public string GetText() public void Clear() { - _contentStream.SetLength(0); _contentStream.Position = 0; + _contentStream.SetLength(0); } public void Dispose() diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/PalmChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/PalmChatCompletionProvider.cs index de7e316fe..7ec9268b9 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/PalmChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/PalmChatCompletionProvider.cs @@ -1,13 +1,10 @@ -using BotSharp.Abstraction.Agents; -using BotSharp.Abstraction.Agents.Enums; -using BotSharp.Abstraction.Loggers; -using BotSharp.Abstraction.Functions.Models; using BotSharp.Abstraction.Routing; using LLMSharp.Google.Palm; using LLMSharp.Google.Palm.DiscussService; namespace BotSharp.Plugin.GoogleAi.Providers.Chat; +[Obsolete] public class PalmChatCompletionProvider : IChatCompletion { private readonly IServiceProvider _services; diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index b3182b9a0..b8d33d909 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -89,7 +89,6 @@ public async Task Connect( var modelSettings = settingsService.GetSetting(Provider, _model); Reset(); - _inputStream = new(); _outputStream = new(); _session = new LlmRealtimeSession(_services, new ChatSessionOptions @@ -99,9 +98,7 @@ public async Task Connect( var uri = BuildWebsocketUri(modelSettings.ApiKey, "v1beta"); await _session.ConnectAsync(uri: uri, cancellationToken: CancellationToken.None); - await onModelReady(); - _ = ReceiveMessage(); } @@ -130,7 +127,6 @@ private async Task ReceiveMessage() else if (response.SessionResumptionUpdate != null) { _logger.LogInformation($"Session resumption update => New handle: {response.SessionResumptionUpdate.NewHandle}, Resumable: {response.SessionResumptionUpdate.Resumable}"); - _conn.PrevSessionId = response.SessionResumptionUpdate?.NewHandle; } else if (response.ToolCall != null && !response.ToolCall.FunctionCalls.IsNullOrEmpty()) { @@ -227,7 +223,7 @@ public async Task Disconnect() public async Task AppenAudioBuffer(string message) { - await SendEventToModel(new BidiClientPayload + await SendEventToModel(new RealtimeClientPayload { RealtimeInput = new() { @@ -239,7 +235,7 @@ await SendEventToModel(new BidiClientPayload public async Task AppenAudioBuffer(ArraySegment data, int length) { var buffer = data.AsSpan(0, length).ToArray(); - await SendEventToModel(new BidiClientPayload + await SendEventToModel(new RealtimeClientPayload { RealtimeInput = new() { @@ -253,7 +249,7 @@ public async Task TriggerModelInference(string? instructions = null) if (string.IsNullOrWhiteSpace(instructions)) return; var content = new Content(instructions, AgentRole.User); - await SendEventToModel(new BidiClientPayload + await SendEventToModel(new RealtimeClientPayload { ClientContent = new() { @@ -284,7 +280,7 @@ public async Task UpdateSession(RealtimeHubConnection conn, bool isInit { if (!isInit) { - return null; + return string.Empty; } var agentService = _services.GetRequiredService(); @@ -294,18 +290,14 @@ public async Task UpdateSession(RealtimeHubConnection conn, bool isInit var (prompt, request) = PrepareOptions(agent, []); var config = request.GenerationConfig ?? new(); - if (config != null) - { - //Output Modality can either be text or audio - config.ResponseModalities = [Modality.AUDIO]; + //Output Modality can either be text or audio + config.ResponseModalities = [Modality.AUDIO]; + config.Temperature = Math.Max(realtimeSetting.Temperature, 0.6f); + config.MaxOutputTokens = realtimeSetting.MaxResponseOutputTokens; - var words = new List(); - HookEmitter.Emit(_services, hook => words.AddRange(hook.OnModelTranscriptPrompt(agent)), agent.Id); + var words = new List(); + HookEmitter.Emit(_services, hook => words.AddRange(hook.OnModelTranscriptPrompt(agent)), agent.Id); - config.Temperature = Math.Max(realtimeSetting.Temperature, 0.6f); - config.MaxOutputTokens = realtimeSetting.MaxResponseOutputTokens; - } - var functions = request.Tools?.SelectMany(s => s.FunctionDeclarations).Select(x => { var fn = new FunctionDef @@ -331,7 +323,6 @@ await HookEmitter.Emit(_services, }); } - var payload = new RealtimeClientPayload { Setup = new RealtimeGenerateContentSetup() @@ -341,15 +332,11 @@ await HookEmitter.Emit(_services, SystemInstruction = request.SystemInstruction, Tools = request.Tools?.ToArray(), InputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null, - OutputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null, - SessionResumption = new() - { - Handle = _conn.PrevSessionId - } + OutputAudioTranscription = realtimeSetting.InputAudioTranscribe ? new() : null } }; - Console.WriteLine($"Setup payload: {JsonSerializer.Serialize(payload, _jsonOptions)}"); + _logger.LogInformation($"Setup payload: {JsonSerializer.Serialize(payload, _jsonOptions)}"); await SendEventToModel(payload); return prompt; @@ -369,7 +356,7 @@ public async Task InsertConversationItem(RoleDialogModel message) } }; - await SendEventToModel(new BidiClientPayload + await SendEventToModel(new RealtimeClientPayload { ToolResponse = new() { @@ -379,7 +366,7 @@ await SendEventToModel(new BidiClientPayload } else if (message.Role == AgentRole.Assistant) { - await SendEventToModel(new BidiClientPayload + await SendEventToModel(new RealtimeClientPayload { ClientContent = new() { @@ -390,7 +377,7 @@ await SendEventToModel(new BidiClientPayload } else if (message.Role == AgentRole.User) { - await SendEventToModel(new BidiClientPayload + await SendEventToModel(new RealtimeClientPayload { ClientContent = new() { @@ -613,7 +600,7 @@ private RoleDialogModel OnUserAudioTranscriptionCompleted(RealtimeHubConnection }; } - private Uri BuildWebsocketUri(string apiKey, string version = "v1alpha") + private Uri BuildWebsocketUri(string apiKey, string version = "v1beta") { return new Uri($"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.{version}.GenerativeService.BidiGenerateContent?key={apiKey}"); } diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Text/PalmTextCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Text/PalmTextCompletionProvider.cs index ea8ecd5e1..ba206a737 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Text/PalmTextCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Text/PalmTextCompletionProvider.cs @@ -1,9 +1,6 @@ -using BotSharp.Abstraction.Agents.Enums; -using BotSharp.Abstraction.Conversations; -using BotSharp.Abstraction.Loggers; - namespace BotSharp.Plugin.GoogleAi.Providers.Text; +[Obsolete] public class PalmTextCompletionProvider : ITextCompletion { private readonly IServiceProvider _services; diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs index 22daf3fff..9b0b14c48 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -45,11 +45,7 @@ public async Task Connect( _model = realtimeSettings.Model; var settings = settingsService.GetSetting(Provider, _model); - if (_session != null) - { - _session.Dispose(); - } - + _session?.Dispose(); _session = new LlmRealtimeSession(_services, new ChatSessionOptions { JsonOptions = _botsharpOptions.JsonSerializerOptions @@ -77,72 +73,6 @@ await _session.ConnectAsync( onInterruptionDetected); } - public async Task Disconnect() - { - if (_session != null) - { - await _session.DisconnectAsync(); - _session.Dispose(); - } - } - - public async Task AppenAudioBuffer(string message) - { - var audioAppend = new - { - type = "input_audio_buffer.append", - audio = message - }; - - await SendEventToModel(audioAppend); - } - - public async Task AppenAudioBuffer(ArraySegment data, int length) - { - var message = Convert.ToBase64String(data.AsSpan(0, length).ToArray()); - await AppenAudioBuffer(message); - } - - public async Task TriggerModelInference(string? instructions = null) - { - // Triggering model inference - if (!string.IsNullOrEmpty(instructions)) - { - await SendEventToModel(new - { - type = "response.create", - response = new - { - instructions - } - }); - } - else - { - await SendEventToModel(new - { - type = "response.create" - }); - } - } - - public async Task CancelModelResponse() - { - await SendEventToModel(new - { - type = "response.cancel" - }); - } - - public async Task RemoveConversationItem(string itemId) - { - await SendEventToModel(new - { - type = "conversation.item.delete", - item_id = itemId - }); - } - private async Task ReceiveMessage( RealtimeModelSettings realtimeSettings, RealtimeHubConnection conn, @@ -279,6 +209,72 @@ private async Task ReceiveMessage( _session.Dispose(); } + public async Task Disconnect() + { + if (_session != null) + { + await _session.DisconnectAsync(); + _session.Dispose(); + } + } + + public async Task AppenAudioBuffer(string message) + { + var audioAppend = new + { + type = "input_audio_buffer.append", + audio = message + }; + + await SendEventToModel(audioAppend); + } + + public async Task AppenAudioBuffer(ArraySegment data, int length) + { + var message = Convert.ToBase64String(data.AsSpan(0, length).ToArray()); + await AppenAudioBuffer(message); + } + + public async Task TriggerModelInference(string? instructions = null) + { + // Triggering model inference + if (!string.IsNullOrEmpty(instructions)) + { + await SendEventToModel(new + { + type = "response.create", + response = new + { + instructions + } + }); + } + else + { + await SendEventToModel(new + { + type = "response.create" + }); + } + } + + public async Task CancelModelResponse() + { + await SendEventToModel(new + { + type = "response.cancel" + }); + } + + public async Task RemoveConversationItem(string itemId) + { + await SendEventToModel(new + { + type = "conversation.item.delete", + item_id = itemId + }); + } + public async Task SendEventToModel(object message) { if (_session == null) return; From d5d03834f1c1018029181a5bcead96fd42ab1e6d Mon Sep 17 00:00:00 2001 From: Jicheng Lu Date: Thu, 15 May 2025 23:02:38 -0500 Subject: [PATCH 11/12] minor change --- .../BotSharp.Core/Demo/Functions/GetWeatherFn.cs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs b/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs index 8dbdbfafb..a78ad4ec7 100644 --- a/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs +++ b/src/Infrastructure/BotSharp.Core/Demo/Functions/GetWeatherFn.cs @@ -1,5 +1,4 @@ using BotSharp.Abstraction.Functions; -using System.Text.Json.Serialization; namespace BotSharp.Core.Demo.Functions; @@ -17,15 +16,8 @@ public GetWeatherFn(IServiceProvider services) public async Task Execute(RoleDialogModel message) { - //var args = JsonSerializer.Deserialize(message.FunctionArgs, BotSharpOptions.defaultJsonOptions); - message.Content = $"It is a sunny day."; + message.Content = $"It is a sunny day!"; //message.StopCompletion = true; return true; } -} - -class Location -{ - [JsonPropertyName("city")] - public string? City { get; set; } } \ No newline at end of file From d91a55232bea7352818a28a4e0ddc637c2b31f6d Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Fri, 16 May 2025 10:49:09 -0500 Subject: [PATCH 12/12] refine tool call and response --- .../Providers/Chat/ChatCompletionProvider.cs | 4 ++-- .../Providers/Chat/GeminiChatCompletionProvider.cs | 2 ++ .../Providers/Realtime/RealTimeCompletionProvider.cs | 2 ++ .../Providers/Chat/ChatCompletionProvider.cs | 5 ++--- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs index 92b1a6b5f..e2d9a1613 100644 --- a/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Chat/ChatCompletionProvider.cs @@ -305,10 +305,10 @@ await onMessageReceived(new RoleDialogModel(choice.Role?.ToString() ?? ChatMessa { messages.Add(new AssistantChatMessage(new List { - ChatToolCall.CreateFunctionToolCall(message.FunctionName, message.FunctionName, BinaryData.FromString(message.FunctionArgs ?? string.Empty)) + ChatToolCall.CreateFunctionToolCall(message.ToolCallId ?? message.FunctionName, message.FunctionName, BinaryData.FromString(message.FunctionArgs ?? "{}")) })); - messages.Add(new ToolChatMessage(message.FunctionName, message.Content)); + messages.Add(new ToolChatMessage(message.ToolCallId ?? message.FunctionName, message.Content)); } else if (message.Role == AgentRole.User) { diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs index 95dda6c1b..608533623 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs @@ -271,6 +271,7 @@ public void SetModelName(string model) { FunctionCall = new FunctionCall { + Id = message.ToolCallId, Name = message.FunctionName, Args = JsonNode.Parse(message.FunctionArgs ?? "{}") } @@ -282,6 +283,7 @@ public void SetModelName(string model) { FunctionResponse = new FunctionResponse { + Id = message.ToolCallId, Name = message.FunctionName, Response = new JsonObject() { diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index f3fb11d7b..5df8ee13a 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -512,6 +512,7 @@ await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) { FunctionCall = new FunctionCall { + Id = message.ToolCallId, Name = message.FunctionName, Args = JsonNode.Parse(message.FunctionArgs ?? "{}") } @@ -523,6 +524,7 @@ await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, text) { FunctionResponse = new FunctionResponse { + Id = message.ToolCallId, Name = message.FunctionName ?? string.Empty, Response = new JsonObject() { diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs index 95d4b909f..c0a9c0d43 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs @@ -1,4 +1,3 @@ -using BotSharp.Abstraction.Agents.Models; using BotSharp.Abstraction.Hooks; using OpenAI.Chat; @@ -273,10 +272,10 @@ await onMessageReceived(new RoleDialogModel(choice.Role?.ToString() ?? ChatMessa { messages.Add(new AssistantChatMessage(new List { - ChatToolCall.CreateFunctionToolCall(message.FunctionName, message.FunctionName, BinaryData.FromString(message.FunctionArgs ?? string.Empty)) + ChatToolCall.CreateFunctionToolCall(message.ToolCallId ?? message.FunctionName, message.FunctionName, BinaryData.FromString(message.FunctionArgs ?? "{}")) })); - messages.Add(new ToolChatMessage(message.FunctionName, message.Content)); + messages.Add(new ToolChatMessage(message.ToolCallId ?? message.FunctionName, message.Content)); } else if (message.Role == AgentRole.User) {