3
3
import logging
4
4
import os
5
5
import json
6
+ import time
6
7
from typing import Collection
7
8
from opentelemetry .instrumentation .ollama .config import Config
8
9
from opentelemetry .instrumentation .ollama .utils import dont_throw
9
10
from wrapt import wrap_function_wrapper
10
11
11
12
from opentelemetry import context as context_api
12
- from opentelemetry .trace import get_tracer , SpanKind
13
+ from opentelemetry .trace import get_tracer , SpanKind , Tracer
13
14
from opentelemetry .trace .status import Status , StatusCode
15
+ from opentelemetry .metrics import Histogram , Meter , get_meter
14
16
15
17
from opentelemetry .instrumentation .instrumentor import BaseInstrumentor
16
18
from opentelemetry .instrumentation .utils import (
22
24
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY ,
23
25
SpanAttributes ,
24
26
LLMRequestTypeValues ,
27
+ Meters
25
28
)
26
29
from opentelemetry .instrumentation .ollama .version import __version__
27
30
@@ -145,7 +148,7 @@ def _set_input_attributes(span, llm_request_type, kwargs):
145
148
146
149
147
150
@dont_throw
148
- def _set_response_attributes (span , llm_request_type , response ):
151
+ def _set_response_attributes (span , token_histogram , llm_request_type , response ):
149
152
if should_send_prompts ():
150
153
if llm_request_type == LLMRequestTypeValues .COMPLETION :
151
154
_set_span_attribute (
@@ -189,9 +192,42 @@ def _set_response_attributes(span, llm_request_type, response):
189
192
SpanAttributes .LLM_USAGE_PROMPT_TOKENS ,
190
193
input_tokens ,
191
194
)
195
+ _set_span_attribute (
196
+ span ,
197
+ SpanAttributes .LLM_SYSTEM ,
198
+ "Ollama"
199
+ )
200
+
201
+ if (
202
+ token_histogram is not None
203
+ and isinstance (input_tokens , int )
204
+ and input_tokens >= 0
205
+ ):
206
+ token_histogram .record (
207
+ input_tokens ,
208
+ attributes = {
209
+ SpanAttributes .LLM_SYSTEM : "Ollama" ,
210
+ SpanAttributes .LLM_TOKEN_TYPE : "input" ,
211
+ SpanAttributes .LLM_RESPONSE_MODEL : response .get ("model" ),
212
+ },
213
+ )
214
+
215
+ if (
216
+ token_histogram is not None
217
+ and isinstance (output_tokens , int )
218
+ and output_tokens >= 0
219
+ ):
220
+ token_histogram .record (
221
+ output_tokens ,
222
+ attributes = {
223
+ SpanAttributes .LLM_SYSTEM : "Ollama" ,
224
+ SpanAttributes .LLM_TOKEN_TYPE : "output" ,
225
+ SpanAttributes .LLM_RESPONSE_MODEL : response .get ("model" ),
226
+ },
227
+ )
192
228
193
229
194
- def _accumulate_streaming_response (span , llm_request_type , response ):
230
+ def _accumulate_streaming_response (span , token_histogram , llm_request_type , response ):
195
231
if llm_request_type == LLMRequestTypeValues .CHAT :
196
232
accumulated_response = {"message" : {"content" : "" , "role" : "" }}
197
233
elif llm_request_type == LLMRequestTypeValues .COMPLETION :
@@ -206,11 +242,11 @@ def _accumulate_streaming_response(span, llm_request_type, response):
206
242
elif llm_request_type == LLMRequestTypeValues .COMPLETION :
207
243
accumulated_response ["response" ] += res ["response" ]
208
244
209
- _set_response_attributes (span , llm_request_type , res | accumulated_response )
245
+ _set_response_attributes (span , token_histogram , llm_request_type , res | accumulated_response )
210
246
span .end ()
211
247
212
248
213
- async def _aaccumulate_streaming_response (span , llm_request_type , response ):
249
+ async def _aaccumulate_streaming_response (span , token_histogram , llm_request_type , response ):
214
250
if llm_request_type == LLMRequestTypeValues .CHAT :
215
251
accumulated_response = {"message" : {"content" : "" , "role" : "" }}
216
252
elif llm_request_type == LLMRequestTypeValues .COMPLETION :
@@ -225,16 +261,25 @@ async def _aaccumulate_streaming_response(span, llm_request_type, response):
225
261
elif llm_request_type == LLMRequestTypeValues .COMPLETION :
226
262
accumulated_response ["response" ] += res ["response" ]
227
263
228
- _set_response_attributes (span , llm_request_type , res | accumulated_response )
264
+ _set_response_attributes (span , token_histogram , llm_request_type , res | accumulated_response )
229
265
span .end ()
230
266
231
267
232
268
def _with_tracer_wrapper (func ):
233
269
"""Helper for providing tracer for wrapper functions."""
234
270
235
- def _with_tracer (tracer , to_wrap ):
271
+ def _with_tracer (tracer , token_histogram , duration_histogram , to_wrap ):
236
272
def wrapper (wrapped , instance , args , kwargs ):
237
- return func (tracer , to_wrap , wrapped , instance , args , kwargs )
273
+ return func (
274
+ tracer ,
275
+ token_histogram ,
276
+ duration_histogram ,
277
+ to_wrap ,
278
+ wrapped ,
279
+ instance ,
280
+ args ,
281
+ kwargs ,
282
+ )
238
283
239
284
return wrapper
240
285
@@ -253,7 +298,16 @@ def _llm_request_type_by_method(method_name):
253
298
254
299
255
300
@_with_tracer_wrapper
256
- def _wrap (tracer , to_wrap , wrapped , instance , args , kwargs ):
301
+ def _wrap (
302
+ tracer : Tracer ,
303
+ token_histogram : Histogram ,
304
+ duration_histogram : Histogram ,
305
+ to_wrap ,
306
+ wrapped ,
307
+ instance ,
308
+ args ,
309
+ kwargs ,
310
+ ):
257
311
"""Instruments and calls every function defined in TO_WRAP."""
258
312
if context_api .get_value (_SUPPRESS_INSTRUMENTATION_KEY ) or context_api .get_value (
259
313
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
@@ -273,22 +327,43 @@ def _wrap(tracer, to_wrap, wrapped, instance, args, kwargs):
273
327
if span .is_recording ():
274
328
_set_input_attributes (span , llm_request_type , kwargs )
275
329
330
+ start_time = time .perf_counter ()
276
331
response = wrapped (* args , ** kwargs )
332
+ end_time = time .perf_counter ()
277
333
278
334
if response :
335
+ if duration_histogram :
336
+ duration = end_time - start_time
337
+ duration_histogram .record (
338
+ duration ,
339
+ attributes = {
340
+ SpanAttributes .LLM_SYSTEM : "Ollama" ,
341
+ SpanAttributes .LLM_RESPONSE_MODEL : kwargs .get ("model" ),
342
+ },
343
+ )
344
+
279
345
if span .is_recording ():
280
346
if kwargs .get ("stream" ):
281
- return _accumulate_streaming_response (span , llm_request_type , response )
347
+ return _accumulate_streaming_response (span , token_histogram , llm_request_type , response )
282
348
283
- _set_response_attributes (span , llm_request_type , response )
349
+ _set_response_attributes (span , token_histogram , llm_request_type , response )
284
350
span .set_status (Status (StatusCode .OK ))
285
351
286
352
span .end ()
287
353
return response
288
354
289
355
290
356
@_with_tracer_wrapper
291
- async def _awrap (tracer , to_wrap , wrapped , instance , args , kwargs ):
357
+ async def _awrap (
358
+ tracer : Tracer ,
359
+ token_histogram : Histogram ,
360
+ duration_histogram : Histogram ,
361
+ to_wrap ,
362
+ wrapped ,
363
+ instance ,
364
+ args ,
365
+ kwargs ,
366
+ ):
292
367
"""Instruments and calls every function defined in TO_WRAP."""
293
368
if context_api .get_value (_SUPPRESS_INSTRUMENTATION_KEY ) or context_api .get_value (
294
369
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
@@ -309,20 +384,51 @@ async def _awrap(tracer, to_wrap, wrapped, instance, args, kwargs):
309
384
if span .is_recording ():
310
385
_set_input_attributes (span , llm_request_type , kwargs )
311
386
387
+ start_time = time .perf_counter ()
312
388
response = await wrapped (* args , ** kwargs )
313
-
389
+ end_time = time . perf_counter ()
314
390
if response :
391
+ if duration_histogram :
392
+ duration = end_time - start_time
393
+ duration_histogram .record (
394
+ duration ,
395
+ attributes = {
396
+ SpanAttributes .LLM_SYSTEM : "Ollama" ,
397
+ SpanAttributes .LLM_RESPONSE_MODEL : kwargs .get ("model" ),
398
+ },
399
+ )
400
+
315
401
if span .is_recording ():
316
402
if kwargs .get ("stream" ):
317
- return _aaccumulate_streaming_response (span , llm_request_type , response )
403
+ return _aaccumulate_streaming_response (span , token_histogram , llm_request_type , response )
318
404
319
- _set_response_attributes (span , llm_request_type , response )
405
+ _set_response_attributes (span , token_histogram , llm_request_type , response )
320
406
span .set_status (Status (StatusCode .OK ))
321
407
322
408
span .end ()
323
409
return response
324
410
325
411
412
+ def _build_metrics (meter : Meter ):
413
+ token_histogram = meter .create_histogram (
414
+ name = Meters .LLM_TOKEN_USAGE ,
415
+ unit = "token" ,
416
+ description = "Measures number of input and output tokens used" ,
417
+ )
418
+
419
+ duration_histogram = meter .create_histogram (
420
+ name = Meters .LLM_OPERATION_DURATION ,
421
+ unit = "s" ,
422
+ description = "GenAI operation duration" ,
423
+ )
424
+
425
+ return token_histogram , duration_histogram
426
+
427
+
428
+ def is_metrics_collection_enabled () -> bool :
429
+ return (os .getenv ("TRACELOOP_METRICS_ENABLED" ) or "true" ).lower () == "true"
430
+
431
+
326
432
class OllamaInstrumentor (BaseInstrumentor ):
327
433
"""An instrumentor for Ollama's client library."""
328
434
@@ -336,22 +442,37 @@ def instrumentation_dependencies(self) -> Collection[str]:
336
442
def _instrument (self , ** kwargs ):
337
443
tracer_provider = kwargs .get ("tracer_provider" )
338
444
tracer = get_tracer (__name__ , __version__ , tracer_provider )
445
+
446
+ meter_provider = kwargs .get ("meter_provider" )
447
+ meter = get_meter (__name__ , __version__ , meter_provider )
448
+
449
+ if is_metrics_collection_enabled ():
450
+ (
451
+ token_histogram ,
452
+ duration_histogram ,
453
+ ) = _build_metrics (meter )
454
+ else :
455
+ (
456
+ token_histogram ,
457
+ duration_histogram ,
458
+ ) = (None , None )
459
+
339
460
for wrapped_method in WRAPPED_METHODS :
340
461
wrap_method = wrapped_method .get ("method" )
341
462
wrap_function_wrapper (
342
463
"ollama._client" ,
343
464
f"Client.{ wrap_method } " ,
344
- _wrap (tracer , wrapped_method ),
465
+ _wrap (tracer , token_histogram , duration_histogram , wrapped_method ),
345
466
)
346
467
wrap_function_wrapper (
347
468
"ollama._client" ,
348
469
f"AsyncClient.{ wrap_method } " ,
349
- _awrap (tracer , wrapped_method ),
470
+ _awrap (tracer , token_histogram , duration_histogram , wrapped_method ),
350
471
)
351
472
wrap_function_wrapper (
352
473
"ollama" ,
353
474
f"{ wrap_method } " ,
354
- _wrap (tracer , wrapped_method ),
475
+ _wrap (tracer , token_histogram , duration_histogram , wrapped_method ),
355
476
)
356
477
357
478
def _uninstrument (self , ** kwargs ):
0 commit comments