diff --git a/README.md b/README.md index a61b52db44d8..6fd4a5f53b72 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,7 @@ A/B testing, canary release, blue-green deployment, limit rate, defense against - **OPS friendly** - Zipkin tracing: [Zipkin](docs/en/latest/plugins/zipkin.md) + - OpenTelemetry tracing: [OpenTelemetry](docs/en/latest/plugins/opentelemetry.md) with plugin execution tracing - Open source APM: support [Apache SkyWalking](docs/en/latest/plugins/skywalking.md) - Works with external service discovery: In addition to the built-in etcd, it also supports [Consul](docs/en/latest/discovery/consul.md), [Consul_kv](docs/en/latest/discovery/consul_kv.md), [Nacos](docs/en/latest/discovery/nacos.md), [Eureka](docs/en/latest/discovery/eureka.md) and [Zookeeper (CP)](https://github.com/api7/apisix-seed/blob/main/docs/en/latest/zookeeper.md). - Monitoring And Metrics: [Prometheus](docs/en/latest/plugins/prometheus.md) diff --git a/apisix/plugin.lua b/apisix/plugin.lua index 789eb528d546..258628b5d6a7 100644 --- a/apisix/plugin.lua +++ b/apisix/plugin.lua @@ -1169,6 +1169,9 @@ function _M.run_plugin(phase, plugins, api_ctx) return api_ctx end + -- Get OpenTelemetry plugin for tracing + local otel_plugin = _M.get("opentelemetry") + if phase ~= "log" and phase ~= "header_filter" and phase ~= "body_filter" @@ -1188,11 +1191,26 @@ function _M.run_plugin(phase, plugins, api_ctx) goto CONTINUE end + -- Start OpenTelemetry plugin span + if otel_plugin and otel_plugin.start_plugin_span then + otel_plugin.start_plugin_span(api_ctx, plugins[i]["name"], phase) + end + run_meta_pre_function(conf, api_ctx, plugins[i]["name"]) plugin_run = true api_ctx._plugin_name = plugins[i]["name"] local code, body = phase_func(conf, api_ctx) api_ctx._plugin_name = nil + + -- Finish OpenTelemetry plugin span + if otel_plugin and otel_plugin.finish_plugin_span then + local error_msg = nil + if code and code >= 400 then + error_msg = "plugin returned error code: " .. tostring(code) + end + otel_plugin.finish_plugin_span(api_ctx, plugins[i]["name"], phase, error_msg) + end + if code or body then if is_http then if code >= 400 then @@ -1216,7 +1234,6 @@ function _M.run_plugin(phase, plugins, api_ctx) end end end - ::CONTINUE:: end return api_ctx, plugin_run @@ -1226,11 +1243,26 @@ function _M.run_plugin(phase, plugins, api_ctx) local phase_func = plugins[i][phase] local conf = plugins[i + 1] if phase_func and meta_filter(api_ctx, plugins[i]["name"], conf) then + -- Start OpenTelemetry plugin span + if otel_plugin and otel_plugin.start_plugin_span then + otel_plugin.start_plugin_span(api_ctx, plugins[i]["name"], phase) + end + plugin_run = true run_meta_pre_function(conf, api_ctx, plugins[i]["name"]) api_ctx._plugin_name = plugins[i]["name"] - phase_func(conf, api_ctx) + + local code = phase_func(conf, api_ctx) api_ctx._plugin_name = nil + + -- Finish OpenTelemetry plugin span + if otel_plugin and otel_plugin.finish_plugin_span then + local error_msg = nil + if code and code >= 400 then + error_msg = "plugin returned error code: " .. tostring(code) + end + otel_plugin.finish_plugin_span(api_ctx, plugins[i]["name"], phase, error_msg) + end end end diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index d98ac44ae69d..59f20b04c057 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -182,6 +182,42 @@ local schema = { type = "string", minLength = 1, } + }, + trace_plugins = { + type = "object", + description = "configuration for plugin execution tracing", + properties = { + enabled = { + type = "boolean", + description = "whether to trace individual plugin execution", + default = false + }, + plugin_span_kind = { + type = "string", + enum = {"internal", "server"}, + description = "span kind for plugin execution spans. " + .. "Some observability providers may exclude internal " + .. "spans from metrics and dashboards. Use 'server' " + .. "if you need plugin spans included in " + .. "service-level metrics.", + default = "internal" + }, + excluded_plugins = { + type = "array", + description = "plugins to exclude from tracing " + .. "(e.g., opentelemetry, prometheus)", + items = { + type = "string", + minLength = 1, + }, + default = {"opentelemetry", "prometheus"} + } + }, + default = { + enabled = false, + plugin_span_kind = "internal", + excluded_plugins = {"opentelemetry", "prometheus"} + } } } } @@ -306,6 +342,218 @@ local function inject_attributes(attributes, wanted_attributes, source, with_pre end +-- Plugin span management functions +-- ================================= + +-- Build a consistent key for identifying a plugin phase span +local function build_plugin_phase_key(plugin_name, phase) + return plugin_name .. ":" .. phase +end + +-- Create phase span +local function create_phase_span(api_ctx, plugin_name, phase) + if not api_ctx.otel then + return nil + end + + if not api_ctx.otel_plugin_spans then + api_ctx.otel_plugin_spans = {} + end + + -- Create unique key for plugin+phase combination + local span_key = build_plugin_phase_key(plugin_name, phase) + if not api_ctx.otel_plugin_spans[span_key] then + -- Create span named "plugin_name phase" directly under main request span + local phase_span_ctx = api_ctx.otel.start_span({ + name = plugin_name .. " " .. phase, + kind = api_ctx.otel_plugin_span_kind, + attributes = { + attr.string("apisix.plugin_name", plugin_name), + attr.string("apisix.plugin_phase", phase), + } + }) + + api_ctx.otel_plugin_spans[span_key] = phase_span_ctx + -- Store current plugin context for child spans + api_ctx._current_plugin_phase = span_key + end + + return api_ctx.otel_plugin_spans[span_key] +end + +-- Finish phase span +local function finish_phase_span(api_ctx, plugin_name, phase, error_msg) + if not api_ctx.otel_plugin_spans then + return + end + + local span_key = build_plugin_phase_key(plugin_name, phase) + local phase_span_ctx = api_ctx.otel_plugin_spans[span_key] + + if phase_span_ctx then + api_ctx.otel.stop_span(phase_span_ctx, error_msg) + api_ctx.otel_plugin_spans[span_key] = nil + + -- Clear current plugin phase context when span is finished + if api_ctx._current_plugin_phase == span_key then + api_ctx._current_plugin_phase = nil + end + end +end + +-- Cleanup all plugin spans +local function cleanup_plugin_spans(api_ctx) + if not api_ctx.otel_plugin_spans then + return + end + + for span_key, phase_span_ctx in pairs(api_ctx.otel_plugin_spans) do + if phase_span_ctx then + api_ctx.otel.stop_span(phase_span_ctx) + end + end + + api_ctx.otel_plugin_spans = nil + api_ctx._current_plugin_phase = nil +end + + +-- OpenTelemetry API for plugins +-- ============================= + +-- No-op API when tracing is disabled +local noop_api = setmetatable({ + with_span = function(span_info, fn) + if not fn then + return nil, "with_span: function is required" + end + -- Execute function without tracing, passing nil as span_ctx (no actual span) + local result = {pcall(fn or function() end, nil)} + -- Return unpacked results (starting from index 2 to preserve error-first pattern) + return unpack(result, 2) + end +}, { + __index = function(_, _) + return function() return nil end + end +}) + +-- Create simple OpenTelemetry API for plugins +local function create_otel_api(api_ctx, tracer, main_context) + -- Initialize span stack for tracking current spans + if not api_ctx._otel_span_stack then + api_ctx._otel_span_stack = {} + end + + local api = { + start_span = function(span_info) + if not (span_info and span_info.name) then + return nil + end + + -- Get parent context (prioritize explicit parent, then current phase span, then main) + local current_phase_span = api_ctx._current_plugin_phase and + api_ctx.otel_plugin_spans and + api_ctx.otel_plugin_spans[api_ctx._current_plugin_phase] + + local parent_context = span_info.parent or current_phase_span or main_context + + -- Use the provided kind directly (users should pass span_kind constants) + local span_kind_value = span_info.kind or span_kind.internal + local attributes = span_info.attributes or {} + local span_ctx = tracer:start(parent_context, span_info.name, { + kind = span_kind_value, + attributes = attributes, + }) + + -- Track this span as current (push to stack) + core.table.insert(api_ctx._otel_span_stack, span_ctx) + + return span_ctx + end, + + stop_span = function(span_ctx, error_msg) + if not span_ctx then + return + end + + local span = span_ctx:span() + if not span then + return + end + + if error_msg then + span:set_status(span_status.ERROR, error_msg) + end + + span:finish() + + -- Remove from stack if it's the current span (pop from stack) + if api_ctx._otel_span_stack and + #api_ctx._otel_span_stack > 0 and + api_ctx._otel_span_stack[#api_ctx._otel_span_stack] == span_ctx then + core.table.remove(api_ctx._otel_span_stack) + end + end, + + current_span = function() + -- Return the most recently started span (top of stack) + if api_ctx._otel_span_stack and #api_ctx._otel_span_stack > 0 then + return api_ctx._otel_span_stack[#api_ctx._otel_span_stack] + end + return nil + end, + + get_plugin_context = function(plugin_name, phase) + if not (api_ctx.otel_plugin_spans and phase) then + return nil + end + return api_ctx.otel_plugin_spans[build_plugin_phase_key(plugin_name, phase)] + end, + } + + function api.with_span(span_info, fn) + if not fn then + return nil, "with_span: the function parameter is required" + end + + -- Start the span (this applies the initial attributes) + local span_ctx = api.start_span(span_info) + + -- Execute function with pcall for error protection, passing span_ctx to callback + local result = {pcall(fn, span_ctx)} + + -- Handle results: + -- - If pcall fails: result[1] = false, result[2] = Lua error + -- - If function succeeds: result[1] = true, result[2] = err (from function), result[3+] = other values + local pcall_success, error_msg = result[1], result[2] + + -- Determine the actual error to report: + -- - If pcall failed, use the Lua error + -- - If pcall succeeded but function returned an error, use the function error + -- - Otherwise, no error + local final_error = nil + if not pcall_success then + -- pcall failed - Lua error occurred + final_error = error_msg + elseif error_msg ~= nil then + -- pcall succeeded but function returned an error + final_error = error_msg + end + + if span_ctx then + -- Stop span with error message if there was an error + api.stop_span(span_ctx, final_error) + end + + -- Return unpacked results (starting from index 2 to preserve error-first pattern) + -- This returns: err, ...values + return unpack(result, 2) + end + + return api +end + function _M.rewrite(conf, api_ctx) local metadata = plugin.plugin_metadata(plugin_name) if metadata == nil then @@ -323,7 +571,7 @@ function _M.rewrite(conf, api_ctx) return end - local span_name = vars.method + local span_name = string_format("http.%s", vars.method) local attributes = { attr.string("net.host.name", vars.host), @@ -337,7 +585,7 @@ function _M.rewrite(conf, api_ctx) table.insert(attributes, attr.string("apisix.route_id", api_ctx.route_id)) table.insert(attributes, attr.string("apisix.route_name", api_ctx.route_name)) table.insert(attributes, attr.string("http.route", api_ctx.curr_req_matched._path)) - span_name = span_name .. " " .. api_ctx.curr_req_matched._path + span_name = string_format("http.%s %s", vars.method, api_ctx.curr_req_matched._path) end if api_ctx.service_id then @@ -378,11 +626,56 @@ function _M.rewrite(conf, api_ctx) api_ctx.otel_context_token = ctx:attach() + -- Store tracer and configuration for plugin tracing + if conf.trace_plugins.enabled then + -- Map string span kind to span_kind constant + local kind_mapping = { + internal = span_kind.internal, + server = span_kind.server, + } + api_ctx.otel_plugin_span_kind = kind_mapping[conf.trace_plugins.plugin_span_kind] + + -- Store excluded plugins configuration + api_ctx.otel_excluded_plugins = {} + if conf.trace_plugins.excluded_plugins then + for _, plugin_name in ipairs(conf.trace_plugins.excluded_plugins) do + api_ctx.otel_excluded_plugins[plugin_name] = true + end + end + + -- Create OpenTelemetry API for plugins + api_ctx.otel = create_otel_api(api_ctx, tracer, ctx) + else + -- Always provide API - no-op when tracing disabled + api_ctx.otel = noop_api + end + -- inject trace context into the headers of upstream HTTP request trace_context_propagator:inject(ctx, ngx.req) end +function _M.before_proxy(conf, api_ctx) + -- Only add upstream attributes if we have an active trace context + if not (api_ctx.otel_context_token and api_ctx.picked_server) then return end + + if not (context:current() and context:current():span()) then return end + + -- Build upstream host information from picked_server + local server = api_ctx.picked_server + local upstream_addr = string_format("%s:%s", server.host, server.port) + local upstream_host = server.upstream_host or server.host + + -- Add upstream attributes to the main span + local upstream_attributes = { + attr.string("apisix.upstream.addr", upstream_addr), + attr.string("apisix.upstream.host", upstream_host), + attr.string("apisix.upstream.ip", server.host), + attr.int("apisix.upstream.port", server.port), + } + context:current():span():set_attributes(unpack(upstream_attributes)) +end + function _M.delayed_body_filter(conf, api_ctx) if api_ctx.otel_context_token and ngx.arg[2] then local ctx = context:current() @@ -418,8 +711,52 @@ function _M.log(conf, api_ctx) "upstream response status: " .. upstream_status) end + span:set_attributes(attr.int("http.status_code", upstream_status)) span:finish() + -- Clear the context token to prevent double finishing + api_ctx.otel_context_token = nil + + -- Cleanup plugin spans (guaranteed cleanup on request end) + cleanup_plugin_spans(api_ctx) + end +end + + +-- Public functions for plugin tracing integration +-- =============================================== + +-- Start plugin phase span +-- Safe to call even if OpenTelemetry plugin is not enabled (will be no-op) +function _M.start_plugin_span(api_ctx, plugin_name, phase) + -- Check if plugin tracing is enabled by checking for otel_plugin_span_kind + -- only set when trace_plugins.enabled is true + if not api_ctx.otel_plugin_span_kind then + return nil + end + + -- Check if plugin is excluded from tracing + if api_ctx.otel_excluded_plugins and api_ctx.otel_excluded_plugins[plugin_name] then + return nil + end + + return create_phase_span(api_ctx, plugin_name, phase) +end + + +-- Finish plugin phase span +-- Safe to call even if OpenTelemetry plugin is not enabled (will be no-op) +function _M.finish_plugin_span(api_ctx, plugin_name, phase, error_msg) + -- If tracing disabled, api_ctx.otel_plugin_spans won't be initialized + if not api_ctx.otel_plugin_spans then + return end + + -- Check if plugin is excluded from tracing + if api_ctx.otel_excluded_plugins and api_ctx.otel_excluded_plugins[plugin_name] then + return + end + + finish_phase_span(api_ctx, plugin_name, phase, error_msg) end diff --git a/docs/en/latest/plugins/opentelemetry.md b/docs/en/latest/plugins/opentelemetry.md index 061c26212dd5..a369cd7e7e69 100644 --- a/docs/en/latest/plugins/opentelemetry.md +++ b/docs/en/latest/plugins/opentelemetry.md @@ -90,6 +90,10 @@ curl http://127.0.0.1:9180/apisix/admin/plugin_metadata/opentelemetry -H "X-API- | sampler.options.root.options.fraction | number | False | 0 | [0, 1] | Root sampling ratio when the sampling strategy is `trace_id_ratio`. | | additional_attributes | array[string] | False | - | - | Additional attributes appended to the trace span. Support [built-in variables](https://apisix.apache.org/docs/apisix/apisix-variable/) in values. | | additional_header_prefix_attributes | array[string] | False | - | - | Headers or header prefixes appended to the trace span's attributes. For example, use `x-my-header"` or `x-my-headers-*` to include all headers with the prefix `x-my-headers-`. | +| trace_plugins | object | False | `{"enabled": false, "plugin_span_kind": "internal", "excluded_plugins": ["opentelemetry", "prometheus"]}` | - | Configuration for plugin execution tracing. | +| trace_plugins.enabled | boolean | False | `false` | - | Whether to trace individual plugin execution phases. When enabled, creates child spans for each plugin phase (rewrite, access, header_filter, body_filter, log) with comprehensive request context attributes. | +| trace_plugins.plugin_span_kind | string | False | `internal` | ["internal", "server"] | Span kind for plugin execution spans. Some observability providers may exclude internal spans from metrics and dashboards. Use 'server' if you need plugin spans included in service-level metrics. | +| trace_plugins.excluded_plugins | array[string] | False | `["opentelemetry", "prometheus"]` | - | List of plugin names to exclude from tracing. Useful for excluding plugins like `opentelemetry` or `prometheus` that may add unnecessary overhead when traced. | ## Examples @@ -222,3 +226,336 @@ You should see access log entries similar to the following when you generate req ```text {"time": "18/Feb/2024:15:09:00 +0000","opentelemetry_context_traceparent": "00-fbd0a38d4ea4a128ff1a688197bc58b0-8f4b9d9970a02629-01","opentelemetry_trace_id": "fbd0a38d4ea4a128ff1a688197bc58b0","opentelemetry_span_id": "af3dc7642104748a","remote_addr": "172.10.0.1"} ``` + +### Enable Plugin Execution Tracing + +The `trace_plugins` object allows you to trace individual plugin execution phases. When enabled (`trace_plugins.enabled: true`), the OpenTelemetry plugin creates child spans for each plugin phase (rewrite, access, header_filter, body_filter, log) with comprehensive request context attributes. + +**Note**: Plugin tracing is **disabled by default** (`trace_plugins.enabled: false`). You must explicitly enable it to see plugin execution spans. + +#### Configuration Options + +The `trace_plugins` object supports the following properties: + +- **`enabled`** (boolean, default: `false`): Whether to trace plugin execution phases. +- **`plugin_span_kind`** (string, default: `"internal"`): Span kind for plugin execution spans. Use `"server"` if your observability provider excludes internal spans from metrics. +- **`excluded_plugins`** (array of strings, default: `["opentelemetry", "prometheus"]`): List of plugin names to exclude from tracing. + +Create a Route with plugin tracing enabled: + +```shell +curl "http://127.0.0.1:9180/apisix/admin/routes/1" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -H "Content-Type: application/json" \ + -d '{ + "uri": "/hello", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": true + } + }, + "proxy-rewrite": { + "uri": "/get" + }, + "response-rewrite": { + "headers": { + "X-Response-Time": "$time_iso8601" + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }' +``` + +When you make requests to this route, you will see: + +1. **Main request span**: `http.GET /hello` with request context +2. **Plugin execution spans** + + +#### Example with Custom Span Kind + +For observability providers that exclude internal spans from metrics, configure plugin spans as `server` type: + +```shell +curl "http://127.0.0.1:9180/apisix/admin/routes/1" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -H "Content-Type: application/json" \ + -d '{ + "uri": "/hello", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": true, + "plugin_span_kind": "server" + } + }, + "proxy-rewrite": { + "uri": "/get" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }' +``` + +Plugin tracing is disabled by default. If you don't need plugin tracing, you can omit the `trace_plugins` attribute or set `enabled: false`: + +```shell +curl "http://127.0.0.1:9180/apisix/admin/routes/1" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -H "Content-Type: application/json" \ + -d '{ + "uri": "/hello", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": false + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }' +``` + +#### Excluding Specific Plugins from Tracing + +You can exclude specific plugins from tracing using the `excluded_plugins` option. This is useful for plugins like `opentelemetry` or `prometheus` that may add unnecessary overhead when traced: + +```shell +curl "http://127.0.0.1:9180/apisix/admin/routes/1" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -H "Content-Type: application/json" \ + -d '{ + "uri": "/hello", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": true, + "excluded_plugins": ["opentelemetry", "prometheus", "proxy-rewrite"] + } + }, + "proxy-rewrite": { + "uri": "/get" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }' +``` + +### Custom Span Creation API for Plugins + +When the OpenTelemetry plugin is enabled with `trace_plugins.enabled: true`, other plugins can create custom spans using the `api_ctx.otel` API. + +#### API Functions + +- **`api_ctx.otel.start_span(span_info)`**: Creates a new span with simplified parent context resolution +- **`api_ctx.otel.stop_span(span_ctx, error_msg)`**: Finishes a span (error_msg sets error status if provided) +- **`api_ctx.otel.current_span()`**: Gets the current span context (most recently started span) +- **`api_ctx.otel.get_plugin_context(plugin_name, phase)`**: Gets the span context for a specific plugin phase +- **`api_ctx.otel.with_span(span_info, fn)`**: Creates a span, executes a function, and automatically finishes the span with error handling + +#### Parameters + +- `span_info`: Object containing span configuration + - `name`: Name of the span (required) + - `kind`: Span kind constant (optional, defaults to span_kind.internal) + - `parent`: Parent span context (optional, defaults to current plugin phase span or main request context) + - `attributes`: Array of OpenTelemetry attribute objects (optional) +- `span_ctx`: Context returned by start_span +- `error_msg`: Error message (optional, if provided sets span status to ERROR) +- `plugin_name`: Name of the plugin (required for get_plugin_context) +- `phase`: Plugin phase name (required for get_plugin_context): `"rewrite"`, `"access"`, `"header_filter"`, `"body_filter"`, or `"log"` +- `fn`: Function to execute within the span (required for with_span). The function receives `span_ctx` as its first parameter, allowing you to access the span and set attributes using `span_ctx:span():set_attributes(...)` + +#### Supported Span Kinds + +Use OpenTelemetry span kind constants directly: + +```lua +local span_kind = require("opentelemetry.trace.span_kind") + +-- Available span kinds: +span_kind.internal -- Internal operation (default) +span_kind.server -- Server-side handling of a remote request +span_kind.client -- Request to a remote service +span_kind.producer -- Initiation of an operation (e.g., message publishing) +span_kind.consumer -- Processing of an operation (e.g., message consumption) +``` + +#### Examples + +```lua +local attr = require("opentelemetry.attribute") +local span_kind = require("opentelemetry.trace.span_kind") + +-- Simple span (default: internal, nested under current plugin phase) +local span_ctx = api_ctx.otel.start_span({ + name = "operation-name" +}) + +-- With attributes and resource +local span_ctx = api_ctx.otel.start_span({ + name = "db-query", + resource = "database", + attributes = { + attr.string("db.operation", "SELECT"), + attr.int("user_id", 123) + } +}) + +-- With span kind +local span_ctx = api_ctx.otel.start_span({ + name = "api-call", + resource = "external-api", + kind = span_kind.client, + attributes = { + attr.string("http.method", "GET"), + attr.string("http.url", "https://api.example.com") + } +}) + +-- With custom parent context (get plugin phase context) +local parent_ctx = api_ctx.otel.get_plugin_context("some-plugin", "rewrite") +local span_ctx = api_ctx.otel.start_span({ + name = "child-operation", + parent = parent_ctx, + kind = span_kind.internal +}) + +-- Or use current span as parent +local current_ctx = api_ctx.otel.current_span() +if current_ctx then + local span_ctx = api_ctx.otel.start_span({ + name = "child-operation", + parent = current_ctx, + kind = span_kind.internal + }) +end + +-- Finish span (success) +api_ctx.otel.stop_span(api_ctx.otel.current_span()) + +-- Finish span with error +api_ctx.otel.stop_span(api_ctx.otel.current_span(), "operation failed") +``` + +#### Using `with_span` for Automatic Span Management + +The `with_span` function is a convenience method that automatically creates a span, executes your function, and finishes the span with proper error handling. + +**Function Signature:** +```lua +err, ...values = api_ctx.otel.with_span(span_info, fn) +``` + +The function `fn` receives the `span_ctx` as its first parameter, allowing you to access the span and set attributes during execution: +```lua +function(span_ctx) + -- Access the span and set attributes + local span = span_ctx:span() + span:set_attributes(attr.string("key", "value")) + -- Your code here + return nil, "foo" +end +``` + +**Behavior:** +- Creates a span based on `span_info` +- Executes the function `fn` with error protection, passing `span_ctx` as the first parameter +- Automatically finishes the span after execution +- Sets span status to ERROR if the function throws a Lua error or returns an error +- Returns function results in error-first pattern (err, ...values) + +**Examples:** + +```lua +local attr = require("opentelemetry.attribute") +local span_kind = require("opentelemetry.trace.span_kind") + +-- Simple usage +local err, result = api_ctx.otel.with_span({ + name = "my-operation" +}, function(span_ctx) + return nil, "foo" +end) +-- err is nil, result is "foo" + +-- Setting attributes during execution +local err, result = api_ctx.otel.with_span({ + name = "my-operation" +}, function(span_ctx) + local span = span_ctx:span() + span:set_attributes( + attr.string("operation.type", "example"), + attr.int("items.processed", 42) + ) + return nil, "foo" +end) + +-- With span kind +local err, result = api_ctx.otel.with_span({ + name = "my-operation", + kind = span_kind.client +}, function(span_ctx) + return nil, "foo" +end) +``` + +#### Advanced Usage + +The API supports creating spans with custom parent contexts and rich attributes: + +```lua +-- Get context from another plugin phase +local auth_ctx = api_ctx.otel.get_plugin_context("auth-plugin", "access") +if auth_ctx then + local span_ctx = api_ctx.otel.start_span({ + name = "auth-verification", + parent = auth_ctx, + kind = span_kind.internal, + attributes = { + attr.string("auth.method", "jwt"), + attr.string("user.id", user_id) + } + }) + + -- Perform authentication logic + local success = verify_token(token) + + -- Finish with appropriate status + api_ctx.otel.stop_span(span_ctx, success and nil or "authentication failed") +end +``` diff --git a/t/plugin/opentelemetry-plugin-tracing.t b/t/plugin/opentelemetry-plugin-tracing.t new file mode 100644 index 000000000000..7dda2ec45cf1 --- /dev/null +++ b/t/plugin/opentelemetry-plugin-tracing.t @@ -0,0 +1,427 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +use t::APISIX 'no_plan'; +add_block_preprocessor(sub { + my ($block) = @_; + + if (!$block->extra_yaml_config) { + my $extra_yaml_config = <<_EOC_; +plugins: + - opentelemetry + - proxy-rewrite + - response-rewrite +_EOC_ + $block->set_value("extra_yaml_config", $extra_yaml_config); + } + + if (!$block->request) { + $block->set_value("request", "GET /t"); + } + + if (!defined $block->response_body) { + $block->set_value("response_body", "passed\n"); + } + $block; +}); +repeat_each(1); +no_long_string(); +no_root_location(); +log_level("debug"); + +run_tests; + +__DATA__ + +=== TEST 1: add plugin metadata with plugin tracing enabled +--- config + location /t { + content_by_lua_block { + local core = require("apisix.core") + local plugin = require("apisix.plugin") + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/plugin_metadata/opentelemetry', + ngx.HTTP_PUT, + [[{ + "collector": { + "address": "127.0.0.1:4318" + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 2: create route with opentelemetry plugin and trace_plugins enabled +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/hello", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": true + } + }, + "proxy-rewrite": { + "uri": "/get" + }, + "response-rewrite": { + "headers": { + "X-Response-Time": "$time_iso8601" + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 3: test route with plugin tracing +--- request +GET /hello +--- response_body +passed +--- error_log +plugin execution span created +--- wait: 0.1 + + + +=== TEST 4: create route with opentelemetry plugin and trace_plugins disabled +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/2', + ngx.HTTP_PUT, + [[{ + "uri": "/hello2", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": false + } + }, + "proxy-rewrite": { + "uri": "/get" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 5: test route without plugin tracing +--- request +GET /hello2 +--- response_body +passed +--- error_log +plugin execution span created +--- wait: 0.1 + + + +=== TEST 6: test schema validation for trace_plugins +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/3', + ngx.HTTP_PUT, + [[{ + "uri": "/hello3", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": "invalid_value" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + ngx.say(body) + } + } +--- error_code: 400 +--- response_body +{"error_msg":"invalid configuration: property \"trace_plugins\" validation failed: wrong type: expected object, got string"} + + + +=== TEST 7: test default value for trace_plugins +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/4', + ngx.HTTP_PUT, + [[{ + "uri": "/hello4", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 8: test route with default trace_plugins (should be false) +--- request +GET /hello4 +--- response_body +passed +--- no_error_log +plugin execution span created +--- wait: 0.1 + + + +=== TEST 9: create route with excluded_plugins configuration +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/5', + ngx.HTTP_PUT, + [[{ + "uri": "/hello5", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": true, + "excluded_plugins": ["proxy-rewrite"] + } + }, + "proxy-rewrite": { + "uri": "/get" + }, + "response-rewrite": { + "headers": { + "X-Test": "value" + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 10: test route with excluded_plugins (proxy-rewrite should not be traced) +--- request +GET /hello5 +--- response_body +passed +--- wait: 0.1 + + + +=== TEST 11: create route with plugin_span_kind set to server +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/6', + ngx.HTTP_PUT, + [[{ + "uri": "/hello6", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": true, + "plugin_span_kind": "server" + } + }, + "proxy-rewrite": { + "uri": "/get" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 12: test route with server span kind for plugin tracing +--- request +GET /hello6 +--- response_body +passed +--- wait: 0.1 + + + +=== TEST 13: test schema validation for plugin_span_kind enum +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/7', + ngx.HTTP_PUT, + [[{ + "uri": "/hello7", + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + }, + "trace_plugins": { + "enabled": true, + "plugin_span_kind": "invalid_kind" + } + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "127.0.0.1:1980": 1 + } + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + ngx.say(body) + } + } +--- error_code: 400 +--- response_body +{"error_msg":"invalid configuration: property \"trace_plugins\" validation failed: property \"plugin_span_kind\" validation failed: matches none of the enum values"}