Skip to content

Commit b1fde54

Browse files
authored
fix(ollama): Implemented meter in the instrumentation (#2741)
1 parent 66f8373 commit b1fde54

File tree

4 files changed

+343
-89
lines changed

4 files changed

+343
-89
lines changed

packages/opentelemetry-instrumentation-ollama/opentelemetry/instrumentation/ollama/__init__.py

+139-18
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33
import logging
44
import os
55
import json
6+
import time
67
from typing import Collection
78
from opentelemetry.instrumentation.ollama.config import Config
89
from opentelemetry.instrumentation.ollama.utils import dont_throw
910
from wrapt import wrap_function_wrapper
1011

1112
from opentelemetry import context as context_api
12-
from opentelemetry.trace import get_tracer, SpanKind
13+
from opentelemetry.trace import get_tracer, SpanKind, Tracer
1314
from opentelemetry.trace.status import Status, StatusCode
15+
from opentelemetry.metrics import Histogram, Meter, get_meter
1416

1517
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
1618
from opentelemetry.instrumentation.utils import (
@@ -22,6 +24,7 @@
2224
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
2325
SpanAttributes,
2426
LLMRequestTypeValues,
27+
Meters
2528
)
2629
from opentelemetry.instrumentation.ollama.version import __version__
2730

@@ -145,7 +148,7 @@ def _set_input_attributes(span, llm_request_type, kwargs):
145148

146149

147150
@dont_throw
148-
def _set_response_attributes(span, llm_request_type, response):
151+
def _set_response_attributes(span, token_histogram, llm_request_type, response):
149152
if should_send_prompts():
150153
if llm_request_type == LLMRequestTypeValues.COMPLETION:
151154
_set_span_attribute(
@@ -189,9 +192,42 @@ def _set_response_attributes(span, llm_request_type, response):
189192
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
190193
input_tokens,
191194
)
195+
_set_span_attribute(
196+
span,
197+
SpanAttributes.LLM_SYSTEM,
198+
"Ollama"
199+
)
200+
201+
if (
202+
token_histogram is not None
203+
and isinstance(input_tokens, int)
204+
and input_tokens >= 0
205+
):
206+
token_histogram.record(
207+
input_tokens,
208+
attributes={
209+
SpanAttributes.LLM_SYSTEM: "Ollama",
210+
SpanAttributes.LLM_TOKEN_TYPE: "input",
211+
SpanAttributes.LLM_RESPONSE_MODEL: response.get("model"),
212+
},
213+
)
214+
215+
if (
216+
token_histogram is not None
217+
and isinstance(output_tokens, int)
218+
and output_tokens >= 0
219+
):
220+
token_histogram.record(
221+
output_tokens,
222+
attributes={
223+
SpanAttributes.LLM_SYSTEM: "Ollama",
224+
SpanAttributes.LLM_TOKEN_TYPE: "output",
225+
SpanAttributes.LLM_RESPONSE_MODEL: response.get("model"),
226+
},
227+
)
192228

193229

194-
def _accumulate_streaming_response(span, llm_request_type, response):
230+
def _accumulate_streaming_response(span, token_histogram, llm_request_type, response):
195231
if llm_request_type == LLMRequestTypeValues.CHAT:
196232
accumulated_response = {"message": {"content": "", "role": ""}}
197233
elif llm_request_type == LLMRequestTypeValues.COMPLETION:
@@ -206,11 +242,11 @@ def _accumulate_streaming_response(span, llm_request_type, response):
206242
elif llm_request_type == LLMRequestTypeValues.COMPLETION:
207243
accumulated_response["response"] += res["response"]
208244

209-
_set_response_attributes(span, llm_request_type, res | accumulated_response)
245+
_set_response_attributes(span, token_histogram, llm_request_type, res | accumulated_response)
210246
span.end()
211247

212248

213-
async def _aaccumulate_streaming_response(span, llm_request_type, response):
249+
async def _aaccumulate_streaming_response(span, token_histogram, llm_request_type, response):
214250
if llm_request_type == LLMRequestTypeValues.CHAT:
215251
accumulated_response = {"message": {"content": "", "role": ""}}
216252
elif llm_request_type == LLMRequestTypeValues.COMPLETION:
@@ -225,16 +261,25 @@ async def _aaccumulate_streaming_response(span, llm_request_type, response):
225261
elif llm_request_type == LLMRequestTypeValues.COMPLETION:
226262
accumulated_response["response"] += res["response"]
227263

228-
_set_response_attributes(span, llm_request_type, res | accumulated_response)
264+
_set_response_attributes(span, token_histogram, llm_request_type, res | accumulated_response)
229265
span.end()
230266

231267

232268
def _with_tracer_wrapper(func):
233269
"""Helper for providing tracer for wrapper functions."""
234270

235-
def _with_tracer(tracer, to_wrap):
271+
def _with_tracer(tracer, token_histogram, duration_histogram, to_wrap):
236272
def wrapper(wrapped, instance, args, kwargs):
237-
return func(tracer, to_wrap, wrapped, instance, args, kwargs)
273+
return func(
274+
tracer,
275+
token_histogram,
276+
duration_histogram,
277+
to_wrap,
278+
wrapped,
279+
instance,
280+
args,
281+
kwargs,
282+
)
238283

239284
return wrapper
240285

@@ -253,7 +298,16 @@ def _llm_request_type_by_method(method_name):
253298

254299

255300
@_with_tracer_wrapper
256-
def _wrap(tracer, to_wrap, wrapped, instance, args, kwargs):
301+
def _wrap(
302+
tracer: Tracer,
303+
token_histogram: Histogram,
304+
duration_histogram: Histogram,
305+
to_wrap,
306+
wrapped,
307+
instance,
308+
args,
309+
kwargs,
310+
):
257311
"""Instruments and calls every function defined in TO_WRAP."""
258312
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
259313
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
@@ -273,22 +327,43 @@ def _wrap(tracer, to_wrap, wrapped, instance, args, kwargs):
273327
if span.is_recording():
274328
_set_input_attributes(span, llm_request_type, kwargs)
275329

330+
start_time = time.perf_counter()
276331
response = wrapped(*args, **kwargs)
332+
end_time = time.perf_counter()
277333

278334
if response:
335+
if duration_histogram:
336+
duration = end_time - start_time
337+
duration_histogram.record(
338+
duration,
339+
attributes={
340+
SpanAttributes.LLM_SYSTEM: "Ollama",
341+
SpanAttributes.LLM_RESPONSE_MODEL: kwargs.get("model"),
342+
},
343+
)
344+
279345
if span.is_recording():
280346
if kwargs.get("stream"):
281-
return _accumulate_streaming_response(span, llm_request_type, response)
347+
return _accumulate_streaming_response(span, token_histogram, llm_request_type, response)
282348

283-
_set_response_attributes(span, llm_request_type, response)
349+
_set_response_attributes(span, token_histogram, llm_request_type, response)
284350
span.set_status(Status(StatusCode.OK))
285351

286352
span.end()
287353
return response
288354

289355

290356
@_with_tracer_wrapper
291-
async def _awrap(tracer, to_wrap, wrapped, instance, args, kwargs):
357+
async def _awrap(
358+
tracer: Tracer,
359+
token_histogram: Histogram,
360+
duration_histogram: Histogram,
361+
to_wrap,
362+
wrapped,
363+
instance,
364+
args,
365+
kwargs,
366+
):
292367
"""Instruments and calls every function defined in TO_WRAP."""
293368
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
294369
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
@@ -309,20 +384,51 @@ async def _awrap(tracer, to_wrap, wrapped, instance, args, kwargs):
309384
if span.is_recording():
310385
_set_input_attributes(span, llm_request_type, kwargs)
311386

387+
start_time = time.perf_counter()
312388
response = await wrapped(*args, **kwargs)
313-
389+
end_time = time.perf_counter()
314390
if response:
391+
if duration_histogram:
392+
duration = end_time - start_time
393+
duration_histogram.record(
394+
duration,
395+
attributes={
396+
SpanAttributes.LLM_SYSTEM: "Ollama",
397+
SpanAttributes.LLM_RESPONSE_MODEL: kwargs.get("model"),
398+
},
399+
)
400+
315401
if span.is_recording():
316402
if kwargs.get("stream"):
317-
return _aaccumulate_streaming_response(span, llm_request_type, response)
403+
return _aaccumulate_streaming_response(span, token_histogram, llm_request_type, response)
318404

319-
_set_response_attributes(span, llm_request_type, response)
405+
_set_response_attributes(span, token_histogram, llm_request_type, response)
320406
span.set_status(Status(StatusCode.OK))
321407

322408
span.end()
323409
return response
324410

325411

412+
def _build_metrics(meter: Meter):
413+
token_histogram = meter.create_histogram(
414+
name=Meters.LLM_TOKEN_USAGE,
415+
unit="token",
416+
description="Measures number of input and output tokens used",
417+
)
418+
419+
duration_histogram = meter.create_histogram(
420+
name=Meters.LLM_OPERATION_DURATION,
421+
unit="s",
422+
description="GenAI operation duration",
423+
)
424+
425+
return token_histogram, duration_histogram
426+
427+
428+
def is_metrics_collection_enabled() -> bool:
429+
return (os.getenv("TRACELOOP_METRICS_ENABLED") or "true").lower() == "true"
430+
431+
326432
class OllamaInstrumentor(BaseInstrumentor):
327433
"""An instrumentor for Ollama's client library."""
328434

@@ -336,22 +442,37 @@ def instrumentation_dependencies(self) -> Collection[str]:
336442
def _instrument(self, **kwargs):
337443
tracer_provider = kwargs.get("tracer_provider")
338444
tracer = get_tracer(__name__, __version__, tracer_provider)
445+
446+
meter_provider = kwargs.get("meter_provider")
447+
meter = get_meter(__name__, __version__, meter_provider)
448+
449+
if is_metrics_collection_enabled():
450+
(
451+
token_histogram,
452+
duration_histogram,
453+
) = _build_metrics(meter)
454+
else:
455+
(
456+
token_histogram,
457+
duration_histogram,
458+
) = (None, None)
459+
339460
for wrapped_method in WRAPPED_METHODS:
340461
wrap_method = wrapped_method.get("method")
341462
wrap_function_wrapper(
342463
"ollama._client",
343464
f"Client.{wrap_method}",
344-
_wrap(tracer, wrapped_method),
465+
_wrap(tracer, token_histogram, duration_histogram, wrapped_method),
345466
)
346467
wrap_function_wrapper(
347468
"ollama._client",
348469
f"AsyncClient.{wrap_method}",
349-
_awrap(tracer, wrapped_method),
470+
_awrap(tracer, token_histogram, duration_histogram, wrapped_method),
350471
)
351472
wrap_function_wrapper(
352473
"ollama",
353474
f"{wrap_method}",
354-
_wrap(tracer, wrapped_method),
475+
_wrap(tracer, token_histogram, duration_histogram, wrapped_method),
355476
)
356477

357478
def _uninstrument(self, **kwargs):

packages/opentelemetry-instrumentation-ollama/poetry.lock

+8-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/opentelemetry-instrumentation-ollama/tests/test_chat.py

+32
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import pytest
22
import ollama
33
from opentelemetry.semconv_ai import SpanAttributes
4+
from unittest.mock import MagicMock
5+
from opentelemetry.instrumentation.ollama import _set_response_attributes
6+
from opentelemetry.semconv_ai import LLMRequestTypeValues
47

58

69
@pytest.mark.vcr
@@ -212,3 +215,32 @@ async def test_ollama_async_streaming_chat(exporter):
212215
) + ollama_span.attributes.get(
213216
SpanAttributes.LLM_USAGE_PROMPT_TOKENS
214217
)
218+
219+
220+
@pytest.mark.vcr
221+
def test_token_histogram_recording():
222+
span = MagicMock()
223+
token_histogram = MagicMock()
224+
llm_request_type = LLMRequestTypeValues.COMPLETION
225+
response = {
226+
"model": "llama3",
227+
"prompt_eval_count": 7,
228+
"eval_count": 10,
229+
}
230+
_set_response_attributes(span, token_histogram, llm_request_type, response)
231+
token_histogram.record.assert_any_call(
232+
7,
233+
attributes={
234+
SpanAttributes.LLM_SYSTEM: "Ollama",
235+
SpanAttributes.LLM_TOKEN_TYPE: "input",
236+
SpanAttributes.LLM_RESPONSE_MODEL: "llama3",
237+
},
238+
)
239+
token_histogram.record.assert_any_call(
240+
10,
241+
attributes={
242+
SpanAttributes.LLM_SYSTEM: "Ollama",
243+
SpanAttributes.LLM_TOKEN_TYPE: "output",
244+
SpanAttributes.LLM_RESPONSE_MODEL: "llama3",
245+
},
246+
)

0 commit comments

Comments
 (0)