Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export default [
console: "readonly",
process: "readonly",
fetch: "readonly",
performance: "readonly",
},
},
plugins: {
Expand Down
23 changes: 23 additions & 0 deletions src/lib/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { metrics } from "@opentelemetry/api";

const meter = metrics.getMeter("responses.js");

// Model call metrics
export const modelCallCounter = meter.createCounter("responses_model_calls_total", {
description: "Total number of model (LLM) calls",
});

export const modelCallDuration = meter.createHistogram("responses_model_call_duration_seconds", {
description: "Duration of model (LLM) calls in seconds",
unit: "s",
});

// MCP tool call metrics
export const mcpToolCallCounter = meter.createCounter("responses_mcp_tool_calls_total", {
description: "Total number of MCP tool calls",
});

export const mcpToolCallDuration = meter.createHistogram("responses_mcp_tool_call_duration_seconds", {
description: "Duration of MCP tool calls in seconds",
unit: "s",
});
9 changes: 9 additions & 0 deletions src/mcp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import type { Logger } from "pino";

import type { McpServerParams } from "./schemas";
import { McpResultFormatter } from "./lib/McpResultFormatter";
import { mcpToolCallCounter, mcpToolCallDuration } from "./lib/metrics.js";

export async function connectMcpServer(mcpServer: McpServerParams, log: Logger): Promise<Client> {
const mcp = new Client({ name: "@huggingface/responses.js", version: packageVersion });
Expand Down Expand Up @@ -39,6 +40,8 @@ export async function callMcpTool(
argumentsString: string,
log: Logger
): Promise<{ error: string; output?: undefined } | { error?: undefined; output: string }> {
const start = performance.now();
let statusCode = 200;
try {
const client = await connectMcpServer(mcpServer, log);
const toolArgs: Record<string, unknown> = argumentsString === "" ? {} : JSON.parse(argumentsString);
Expand All @@ -49,10 +52,16 @@ export async function callMcpTool(
output: formattedResult,
};
} catch (error) {
statusCode = 500;
const errorMessage =
error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error);
return {
error: errorMessage,
};
} finally {
const durationSeconds = (performance.now() - start) / 1000;
const metricAttrs = { status_code: statusCode, tool_name: toolName, server_label: mcpServer.server_label };
mcpToolCallCounter.add(1, metricAttrs);
mcpToolCallDuration.record(durationSeconds, metricAttrs);
}
}
8 changes: 8 additions & 0 deletions src/routes/responses/handleOneTurn.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,20 @@ vi.mock("@opentelemetry/api", () => {
setStatus: vi.fn(),
end: vi.fn(),
};
const mockCounter = { add: vi.fn() };
const mockHistogram = { record: vi.fn() };
return {
trace: {
getTracer: vi.fn().mockReturnValue({
startSpan: vi.fn().mockReturnValue(mockSpan),
}),
},
metrics: {
getMeter: vi.fn().mockReturnValue({
createCounter: vi.fn().mockReturnValue(mockCounter),
createHistogram: vi.fn().mockReturnValue(mockHistogram),
}),
},
Comment thread
frac marked this conversation as resolved.
context: { active: vi.fn() },
propagation: { extract: vi.fn() },
SpanStatusCode: { ERROR: 2 },
Expand Down
12 changes: 12 additions & 0 deletions src/routes/responses/handleOneTurn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import type { Logger } from "pino";
import { type IncompleteResponse, StreamingError, SEQUENCE_NUMBER_PLACEHOLDER, tracer } from "./types.js";
import { recordError, requiresApproval } from "./utils.js";
import { closeLastOutputItem } from "./closeOutputItem.js";
import { modelCallCounter, modelCallDuration } from "../../lib/metrics.js";

/*
* Call LLM and stream the response.
Expand Down Expand Up @@ -56,6 +57,8 @@ export async function* handleOneTurnStream(
dispatcher: new Agent({ allowH2: true }),
},
});
const modelCallStart = performance.now();
let modelCallStatusCode = 200;
const stream = await client.chat.completions.create(payload);
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
Comment thread
frac marked this conversation as resolved.
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
Expand Down Expand Up @@ -314,9 +317,18 @@ export async function* handleOneTurnStream(
yield event;
}
} catch (error) {
if (error instanceof OpenAI.APIError) {
modelCallStatusCode = error.status ?? 500;
} else {
modelCallStatusCode = 500;
}
recordError(llmSpan, error);
throw error;
} finally {
const modelCallDurationSeconds = (performance.now() - modelCallStart) / 1000;
const metricAttrs = { status_code: modelCallStatusCode, model_name: payload.model };
modelCallCounter.add(1, metricAttrs);
modelCallDuration.record(modelCallDurationSeconds, metricAttrs);
if (responseObject.usage) {
llmSpan.setAttributes({
"gen_ai.usage.input_tokens": responseObject.usage.input_tokens,
Expand Down
Loading