diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ecdc6d396..f5d31ac5ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#3464](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3464)) - `opentelemetry-instrumentation-redis` Add support for redis client-specific instrumentation. ([#3143](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3143)) +- `opentelemetry-util-http` Added support for redacting specific url query string values and url credentials in instrumentations + ([#3508](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3508)) ### Fixed diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py b/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py index 08122d5b59..cd2bc477a5 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py @@ -119,7 +119,7 @@ def response_hook(span: Span, params: typing.Union[ from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE from opentelemetry.trace import Span, SpanKind, TracerProvider, get_tracer from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import remove_url_credentials, sanitize_method +from opentelemetry.util.http import remove_url_credentials, sanitize_method, redact_query_parameters _UrlFilterT = typing.Optional[typing.Callable[[yarl.URL], str]] _RequestHookT = typing.Optional[ @@ -240,9 +240,9 @@ async def on_request_start( method = params.method request_span_name = _get_span_name(method) request_url = ( - remove_url_credentials(trace_config_ctx.url_filter(params.url)) + redact_query_parameters(remove_url_credentials(trace_config_ctx.url_filter(params.url))) if callable(trace_config_ctx.url_filter) - else remove_url_credentials(str(params.url)) + else redact_query_parameters(remove_url_credentials(str(params.url))) ) span_attributes = {} diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py b/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py index d445123dc5..2c48c79505 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py @@ -57,7 +57,7 @@ async def hello(request): from opentelemetry.semconv.metrics import MetricInstruments from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import get_excluded_urls, remove_url_credentials +from opentelemetry.util.http import get_excluded_urls, remove_url_credentials, redact_query_parameters _duration_attrs = [ SpanAttributes.HTTP_METHOD, @@ -146,7 +146,7 @@ def collect_request_attributes(request: web.Request) -> Dict: SpanAttributes.HTTP_ROUTE: _get_view_func(request), SpanAttributes.HTTP_FLAVOR: f"{request.version.major}.{request.version.minor}", SpanAttributes.HTTP_TARGET: request.path, - SpanAttributes.HTTP_URL: remove_url_credentials(http_url), + SpanAttributes.HTTP_URL: redact_query_parameters(remove_url_credentials(http_url)), } http_method = request.method diff --git a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py index 9770d259a7..11797ade57 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py @@ -260,6 +260,7 @@ def client_response_hook(span: Span, scope: dict[str, Any], message: dict[str, A normalise_response_header_name, remove_url_credentials, sanitize_method, + redact_query_parameters, ) @@ -355,7 +356,7 @@ def collect_request_attributes( if _report_old(sem_conv_opt_in_mode): _set_http_url( result, - remove_url_credentials(http_url), + redact_query_parameters(remove_url_credentials(http_url)), _StabilityMode.DEFAULT, ) http_method = scope.get("method", "") diff --git a/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py b/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py index ef70c244e7..0a9347a46e 100644 --- a/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py @@ -244,7 +244,7 @@ async def async_response_hook(span, request, response): from opentelemetry.trace import SpanKind, Tracer, TracerProvider, get_tracer from opentelemetry.trace.span import Span from opentelemetry.trace.status import StatusCode -from opentelemetry.util.http import remove_url_credentials, sanitize_method +from opentelemetry.util.http import remove_url_credentials, sanitize_method, redact_query_parameters _logger = logging.getLogger(__name__) @@ -298,7 +298,7 @@ def _extract_parameters( # In httpx >= 0.20.0, handle_request receives a Request object request: httpx.Request = args[0] method = request.method.encode() - url = httpx.URL(remove_url_credentials(str(request.url))) + url = httpx.URL(redact_query_parameters(remove_url_credentials(str(request.url)))) headers = request.headers stream = request.stream extensions = request.extensions diff --git a/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py b/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py index 9557805f9f..ac69d222a6 100644 --- a/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py @@ -149,6 +149,7 @@ def response_hook(span, request_obj, response): parse_excluded_urls, remove_url_credentials, sanitize_method, + redact_query_parameters, ) from opentelemetry.util.http.httplib import set_ip_on_next_http_connection @@ -232,7 +233,8 @@ def get_or_create_headers(): method = request.method span_name = get_default_span_name(method) - url = remove_url_credentials(request.url) + url = redact_query_parameters(remove_url_credentials(request.url)) + span_attributes = {} _set_http_method( diff --git a/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py b/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py index fa0e53bf95..d041661dde 100644 --- a/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py +++ b/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py @@ -22,7 +22,7 @@ from opentelemetry.propagate import inject from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import remove_url_credentials +from opentelemetry.util.http import remove_url_credentials, redact_query_parameters def _normalize_request(args, kwargs): @@ -75,7 +75,7 @@ def fetch_async( if span.is_recording(): attributes = { - SpanAttributes.HTTP_URL: remove_url_credentials(request.url), + SpanAttributes.HTTP_URL: redact_query_parameters(remove_url_credentials(request.url)), SpanAttributes.HTTP_METHOD: request.method, } for key, value in attributes.items(): @@ -161,7 +161,7 @@ def _finish_tracing_callback( def _create_metric_attributes(response): metric_attributes = { SpanAttributes.HTTP_STATUS_CODE: response.code, - SpanAttributes.HTTP_URL: remove_url_credentials(response.request.url), + SpanAttributes.HTTP_URL: redact_query_parameters(remove_url_credentials(response.request.url)), SpanAttributes.HTTP_METHOD: response.request.method, } diff --git a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py index a80e6d0700..0ebf5240e2 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py @@ -137,6 +137,7 @@ def response_hook(span: Span, request: Request, response: HTTPResponse): parse_excluded_urls, remove_url_credentials, sanitize_method, + redact_query_parameters, ) from opentelemetry.util.types import Attributes @@ -257,7 +258,7 @@ def _instrumented_open_call( span_name = _get_span_name(method) - url = remove_url_credentials(url) + url = redact_query_parameters(remove_url_credentials(url)) data = getattr(request, "data", None) request_size = 0 if data is None else len(data) diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py index a0a2ce9a35..d2f0e75a8e 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py @@ -270,6 +270,7 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he normalise_response_header_name, remove_url_credentials, sanitize_method, + redact_query_parameters ) if TYPE_CHECKING: @@ -365,9 +366,9 @@ def collect_request_attributes( else: # old semconv v1.20.0 if _report_old(sem_conv_opt_in_mode): - result[SpanAttributes.HTTP_URL] = remove_url_credentials( + result[SpanAttributes.HTTP_URL] = redact_query_parameters(remove_url_credentials( wsgiref_util.request_uri(environ) - ) + )) remote_addr = environ.get("REMOTE_ADDR") if remote_addr: diff --git a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py index 71a6403a7d..9f9f91e5b8 100644 --- a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py +++ b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py @@ -58,6 +58,7 @@ SpanAttributes.HTTP_SERVER_NAME, } +PARAMS_TO_REDACT = ["AWSAccessKeyId", "Signature", "sig", "X-Goog-Signature"] class ExcludeList: """Class to exclude certain paths (given as a list of regexes) from tracing requests""" @@ -159,23 +160,23 @@ def parse_excluded_urls(excluded_urls: str) -> ExcludeList: def remove_url_credentials(url: str) -> str: - """Given a string url, remove the username and password only if it is a valid url""" - + """ Given a string url, replace the username and password with the keyword "REDACTED "only if it is a valid url""" try: parsed = urlparse(url) if all([parsed.scheme, parsed.netloc]): # checks for valid url - parsed_url = urlparse(url) - _, _, netloc = parsed.netloc.rpartition("@") - return urlunparse( - ( - parsed_url.scheme, - netloc, - parsed_url.path, - parsed_url.params, - parsed_url.query, - parsed_url.fragment, + if '@' in parsed.netloc: + _, _, host = parsed.netloc.rpartition("@") + new_netloc = "REDACTED:REDACTED@" + host + return urlunparse( + ( + parsed.scheme, + new_netloc, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ) ) - ) except ValueError: # an unparsable url was passed pass return url @@ -255,3 +256,43 @@ def _parse_url_query(url: str): path = parsed_url.path query_params = parsed_url.query return path, query_params + +def redact_query_parameters(url: str) -> str: + """Given a string url, redact sensitive query parameter values""" + try: + parsed = urlparse(url) + if not parsed.query: # No query parameters to redact + return url + + # Check if any of the sensitive parameters are in the query + has_sensitive_params = any(param + "=" in parsed.query for param in PARAMS_TO_REDACT) + if not has_sensitive_params: + return url + + # Process query parameters + query_parts: list[str] = [] + for query_part in parsed.query.split("&"): + if "=" in query_part: + param_name, _ = query_part.split("=", 1) # Parameter name and value + if param_name in PARAMS_TO_REDACT: + query_parts.append(f"{param_name}=REDACTED") + else: + query_parts.append(query_part) + else: + query_parts.append(query_part) # Handle params with no value + + # Reconstruct the URL with redacted query parameters + redacted_query = "&".join(query_parts) + return urlunparse( + ( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + redacted_query, + parsed.fragment, + ) + ) + except ValueError: # an unparsable url was passed + pass + return url \ No newline at end of file diff --git a/util/opentelemetry-util-http/tests/test_redact_query_parameters.py b/util/opentelemetry-util-http/tests/test_redact_query_parameters.py new file mode 100644 index 0000000000..ae0a873a62 --- /dev/null +++ b/util/opentelemetry-util-http/tests/test_redact_query_parameters.py @@ -0,0 +1,51 @@ +import unittest +from opentelemetry.util.http import redact_query_parameters + +class TestRedactSensitiveInfo(unittest.TestCase): + def test_redact_goog_signature(self): + url = "https://www.example.com/path?color=blue&X-Goog-Signature=secret" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&X-Goog-Signature=REDACTED") + + def test_no_redaction_needed(self): + url = "https://www.example.com/path?color=blue&query=secret" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&query=secret") + + def test_no_query_parameters(self): + url = "https://www.example.com/path" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path") + + def test_empty_query_string(self): + url = "https://www.example.com/path?" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?") + + def test_empty_url(self): + url = "" + self.assertEqual(redact_query_parameters(url), "") + + def test_redact_aws_access_key_id(self): + url = "https://www.example.com/path?color=blue&AWSAccessKeyId=secrets" + self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&AWSAccessKeyId=REDACTED") + + def test_api_key_not_in_redact_list(self): + url = "https://www.example.com/path?api_key=secret%20key&user=john" + self.assertNotEqual(redact_query_parameters(url), "https://www.example.com/path?api_key=REDACTED&user=john") + + def test_password_key_not_in_redact_list(self): + url = "https://api.example.com?key=abc&password=123&user=admin" + self.assertNotEqual(redact_query_parameters(url), "https://api.example.com?key=REDACTED&password=REDACTED&user=admin") + + def test_url_with_at_symbol_in_path_and_query(self): + url = "https://github.com/p@th?foo=b@r" + self.assertEqual(redact_query_parameters(url), "https://github.com/p@th?foo=b@r") + + def test_aws_access_key_with_real_format(self): + url = "https://microsoft.com?AWSAccessKeyId=AKIAIOSFODNN7" + self.assertEqual(redact_query_parameters(url), "https://microsoft.com?AWSAccessKeyId=REDACTED") + + def test_signature_parameter(self): + url = "https://service.com?sig=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0" + self.assertEqual(redact_query_parameters(url), "https://service.com?sig=REDACTED") + + def test_signature_with_url_encoding(self): + url = "https://service.com?Signature=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0%3A377" + self.assertEqual(redact_query_parameters(url), "https://service.com?Signature=REDACTED") \ No newline at end of file diff --git a/util/opentelemetry-util-http/tests/test_remove_credentials.py b/util/opentelemetry-util-http/tests/test_remove_credentials.py index 2e50f1495f..c874ab820d 100644 --- a/util/opentelemetry-util-http/tests/test_remove_credentials.py +++ b/util/opentelemetry-util-http/tests/test_remove_credentials.py @@ -10,22 +10,28 @@ def test_remove_no_credentials(self): self.assertEqual(cleaned_url, url) def test_remove_credentials(self): - url = "http://someuser:somepass@opentelemetry.io:8080/test/path?query=value" + url = "http://someuser:somepass@opentelemetry.io:8080/test/path?sig=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://opentelemetry.io:8080/test/path?query=value" + cleaned_url, "http://REDACTED:REDACTED@opentelemetry.io:8080/test/path?sig=value" ) def test_remove_credentials_ipv4_literal(self): url = "http://someuser:somepass@127.0.0.1:8080/test/path?query=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://127.0.0.1:8080/test/path?query=value" + cleaned_url, "http://REDACTED:REDACTED@127.0.0.1:8080/test/path?query=value" ) def test_remove_credentials_ipv6_literal(self): url = "http://someuser:somepass@[::1]:8080/test/path?query=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://[::1]:8080/test/path?query=value" + cleaned_url, "http://REDACTED:REDACTED@[::1]:8080/test/path?query=value" ) + + def test_empty_url(self): + url = "" + cleaned_url = remove_url_credentials(url) + self.assertEqual(cleaned_url, url) + \ No newline at end of file