2
2
3
3
import logging
4
4
import sys
5
- from collections .abc import AsyncGenerator , AsyncIterator , Mapping
5
+ from collections .abc import AsyncGenerator , AsyncIterator , Callable , Mapping
6
6
from typing import TYPE_CHECKING , Any , ClassVar
7
7
8
8
if sys .version_info >= (3 , 12 ):
12
12
13
13
import httpx
14
14
from ollama import AsyncClient
15
+ from ollama ._types import Message
15
16
from pydantic import ValidationError
16
17
17
18
from semantic_kernel .connectors .ai .chat_completion_client_base import ChatCompletionClientBase
19
+ from semantic_kernel .connectors .ai .completion_usage import CompletionUsage
20
+ from semantic_kernel .connectors .ai .function_call_choice_configuration import FunctionCallChoiceConfiguration
21
+ from semantic_kernel .connectors .ai .function_choice_behavior import FunctionChoiceType
18
22
from semantic_kernel .connectors .ai .ollama .ollama_prompt_execution_settings import OllamaChatPromptExecutionSettings
19
23
from semantic_kernel .connectors .ai .ollama .ollama_settings import OllamaSettings
20
24
from semantic_kernel .connectors .ai .ollama .services .ollama_base import OllamaBase
25
+ from semantic_kernel .connectors .ai .ollama .services .utils import (
26
+ MESSAGE_CONVERTERS ,
27
+ update_settings_from_function_choice_configuration ,
28
+ )
21
29
from semantic_kernel .contents import AuthorRole
22
30
from semantic_kernel .contents .chat_history import ChatHistory
23
- from semantic_kernel .contents .chat_message_content import ChatMessageContent
31
+ from semantic_kernel .contents .chat_message_content import ITEM_TYPES , ChatMessageContent
32
+ from semantic_kernel .contents .function_call_content import FunctionCallContent
33
+ from semantic_kernel .contents .streaming_chat_message_content import ITEM_TYPES as STREAMING_ITEM_TYPES
24
34
from semantic_kernel .contents .streaming_chat_message_content import StreamingChatMessageContent
25
- from semantic_kernel .exceptions .service_exceptions import ServiceInitializationError , ServiceInvalidResponseError
35
+ from semantic_kernel .contents .streaming_text_content import StreamingTextContent
36
+ from semantic_kernel .contents .text_content import TextContent
37
+ from semantic_kernel .exceptions .service_exceptions import (
38
+ ServiceInitializationError ,
39
+ ServiceInvalidExecutionSettingsError ,
40
+ ServiceInvalidResponseError ,
41
+ )
26
42
from semantic_kernel .utils .telemetry .model_diagnostics .decorators import (
27
43
trace_chat_completion ,
28
44
trace_streaming_chat_completion ,
@@ -40,7 +56,7 @@ class OllamaChatCompletion(OllamaBase, ChatCompletionClientBase):
40
56
Make sure to have the ollama service running either locally or remotely.
41
57
"""
42
58
43
- SUPPORTS_FUNCTION_CALLING : ClassVar [bool ] = False
59
+ SUPPORTS_FUNCTION_CALLING : ClassVar [bool ] = True
44
60
45
61
def __init__ (
46
62
self ,
@@ -97,6 +113,36 @@ def service_url(self) -> str | None:
97
113
return str (self .client ._client .base_url )
98
114
return None
99
115
116
+ @override
117
+ def _prepare_chat_history_for_request (
118
+ self ,
119
+ chat_history : ChatHistory ,
120
+ role_key : str = "role" ,
121
+ content_key : str = "content" ,
122
+ ) -> list [Message ]:
123
+ return [MESSAGE_CONVERTERS [message .role ](message ) for message in chat_history .messages ]
124
+
125
+ @override
126
+ def _verify_function_choice_settings (self , settings : "PromptExecutionSettings" ) -> None :
127
+ if settings .function_choice_behavior and settings .function_choice_behavior .type_ in [
128
+ FunctionChoiceType .REQUIRED ,
129
+ FunctionChoiceType .NONE ,
130
+ ]:
131
+ raise ServiceInvalidExecutionSettingsError (
132
+ "Ollama does not support function choice behavior of type 'required' or 'none' yet."
133
+ )
134
+
135
+ @override
136
+ def _update_function_choice_settings_callback (
137
+ self ,
138
+ ) -> Callable [[FunctionCallChoiceConfiguration , "PromptExecutionSettings" , FunctionChoiceType ], None ]:
139
+ return update_settings_from_function_choice_configuration
140
+
141
+ @override
142
+ def _reset_function_choice_settings (self , settings : "PromptExecutionSettings" ) -> None :
143
+ if hasattr (settings , "tools" ):
144
+ settings .tools = None
145
+
100
146
@override
101
147
@trace_chat_completion (OllamaBase .MODEL_PROVIDER_NAME )
102
148
async def _inner_get_chat_message_contents (
@@ -124,11 +170,9 @@ async def _inner_get_chat_message_contents(
124
170
)
125
171
126
172
return [
127
- ChatMessageContent (
128
- inner_content = response_object ,
129
- ai_model_id = self .ai_model_id ,
130
- role = AuthorRole .ASSISTANT ,
131
- content = response_object .get ("message" , {"content" : None }).get ("content" , None ),
173
+ self ._create_chat_message_content (
174
+ response_object ,
175
+ self ._get_metadata_from_response (response_object ),
132
176
)
133
177
]
134
178
@@ -143,6 +187,11 @@ async def _inner_get_streaming_chat_message_contents(
143
187
settings = self .get_prompt_execution_settings_from_settings (settings )
144
188
assert isinstance (settings , OllamaChatPromptExecutionSettings ) # nosec
145
189
190
+ if settings .tools :
191
+ raise ServiceInvalidExecutionSettingsError (
192
+ "Ollama does not support tool calling in streaming chat completion."
193
+ )
194
+
146
195
prepared_chat_history = self ._prepare_chat_history_for_request (chat_history )
147
196
148
197
response_object = await self .client .chat (
@@ -160,13 +209,81 @@ async def _inner_get_streaming_chat_message_contents(
160
209
161
210
async for part in response_object :
162
211
yield [
163
- StreamingChatMessageContent (
164
- role = AuthorRole .ASSISTANT ,
165
- choice_index = 0 ,
166
- inner_content = part ,
167
- ai_model_id = self .ai_model_id ,
168
- content = part .get ("message" , {"content" : None }).get ("content" , None ),
212
+ self ._create_streaming_chat_message_content (
213
+ part ,
214
+ self ._get_metadata_from_response (part ),
169
215
)
170
216
]
171
217
172
218
# endregion
219
+
220
+ def _create_chat_message_content (self , response : Mapping [str , Any ], metadata : dict [str , Any ]) -> ChatMessageContent :
221
+ """Create a chat message content from the response."""
222
+ items : list [ITEM_TYPES ] = []
223
+ if not (message := response .get ("message" , None )):
224
+ raise ServiceInvalidResponseError ("No message content found in response." )
225
+
226
+ if content := message .get ("content" , None ):
227
+ items .append (
228
+ TextContent (
229
+ text = content ,
230
+ inner_content = message ,
231
+ )
232
+ )
233
+ if tool_calls := message .get ("tool_calls" , None ):
234
+ for tool_call in tool_calls :
235
+ items .append (
236
+ FunctionCallContent (
237
+ inner_content = tool_call ,
238
+ ai_model_id = self .ai_model_id ,
239
+ name = tool_call .get ("function" ).get ("name" ),
240
+ arguments = tool_call .get ("function" ).get ("arguments" ),
241
+ )
242
+ )
243
+
244
+ return ChatMessageContent (
245
+ role = AuthorRole .ASSISTANT ,
246
+ items = items ,
247
+ inner_content = response ,
248
+ metadata = metadata ,
249
+ )
250
+
251
+ def _create_streaming_chat_message_content (
252
+ self , part : Mapping [str , Any ], metadata : dict [str , Any ]
253
+ ) -> StreamingChatMessageContent :
254
+ """Create a streaming chat message content from the response part."""
255
+ items : list [STREAMING_ITEM_TYPES ] = []
256
+ if not (message := part .get ("message" , None )):
257
+ raise ServiceInvalidResponseError ("No message content found in response part." )
258
+
259
+ if content := message .get ("content" , None ):
260
+ items .append (
261
+ StreamingTextContent (
262
+ choice_index = 0 ,
263
+ text = content ,
264
+ inner_content = message ,
265
+ )
266
+ )
267
+
268
+ return StreamingChatMessageContent (
269
+ role = AuthorRole .ASSISTANT ,
270
+ choice_index = 0 ,
271
+ items = items ,
272
+ inner_content = part ,
273
+ ai_model_id = self .ai_model_id ,
274
+ metadata = metadata ,
275
+ )
276
+
277
+ def _get_metadata_from_response (self , response : Mapping [str , Any ]) -> dict [str , Any ]:
278
+ """Get metadata from the response."""
279
+ metadata = {
280
+ "model" : response .get ("model" ),
281
+ }
282
+
283
+ if "prompt_eval_count" in response and "eval_count" in response :
284
+ metadata ["usage" ] = CompletionUsage (
285
+ prompt_tokens = response .get ("prompt_eval_count" ),
286
+ completion_tokens = response .get ("eval_count" ),
287
+ )
288
+
289
+ return metadata
0 commit comments