Skip to content

WIP - Redact specific url query string values and url credentials in instrumentations #3508

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#3464](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3464))
- `opentelemetry-instrumentation-redis` Add support for redis client-specific instrumentation.
([#3143](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3143))
- `opentelemetry-util-http` Added support for redacting specific url query string values and url credentials in instrumentations
([#3508](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3508))

### Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def response_hook(span: Span, params: typing.Union[
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
from opentelemetry.trace import Span, SpanKind, TracerProvider, get_tracer
from opentelemetry.trace.status import Status, StatusCode
from opentelemetry.util.http import remove_url_credentials, sanitize_method
from opentelemetry.util.http import remove_url_credentials, sanitize_method, redact_query_parameters

_UrlFilterT = typing.Optional[typing.Callable[[yarl.URL], str]]
_RequestHookT = typing.Optional[
Expand Down Expand Up @@ -240,9 +240,9 @@ async def on_request_start(
method = params.method
request_span_name = _get_span_name(method)
request_url = (
remove_url_credentials(trace_config_ctx.url_filter(params.url))
redact_query_parameters(remove_url_credentials(trace_config_ctx.url_filter(params.url)))
if callable(trace_config_ctx.url_filter)
else remove_url_credentials(str(params.url))
else redact_query_parameters(remove_url_credentials(str(params.url)))
)

span_attributes = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async def hello(request):
from opentelemetry.semconv.metrics import MetricInstruments
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace.status import Status, StatusCode
from opentelemetry.util.http import get_excluded_urls, remove_url_credentials
from opentelemetry.util.http import get_excluded_urls, remove_url_credentials, redact_query_parameters

_duration_attrs = [
SpanAttributes.HTTP_METHOD,
Expand Down Expand Up @@ -146,7 +146,7 @@ def collect_request_attributes(request: web.Request) -> Dict:
SpanAttributes.HTTP_ROUTE: _get_view_func(request),
SpanAttributes.HTTP_FLAVOR: f"{request.version.major}.{request.version.minor}",
SpanAttributes.HTTP_TARGET: request.path,
SpanAttributes.HTTP_URL: remove_url_credentials(http_url),
SpanAttributes.HTTP_URL: redact_query_parameters(remove_url_credentials(http_url)),
}

http_method = request.method
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ def client_response_hook(span: Span, scope: dict[str, Any], message: dict[str, A
normalise_response_header_name,
remove_url_credentials,
sanitize_method,
redact_query_parameters,
)


Expand Down Expand Up @@ -355,7 +356,7 @@ def collect_request_attributes(
if _report_old(sem_conv_opt_in_mode):
_set_http_url(
result,
remove_url_credentials(http_url),
redact_query_parameters(remove_url_credentials(http_url)),
_StabilityMode.DEFAULT,
)
http_method = scope.get("method", "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ async def async_response_hook(span, request, response):
from opentelemetry.trace import SpanKind, Tracer, TracerProvider, get_tracer
from opentelemetry.trace.span import Span
from opentelemetry.trace.status import StatusCode
from opentelemetry.util.http import remove_url_credentials, sanitize_method
from opentelemetry.util.http import remove_url_credentials, sanitize_method, redact_query_parameters

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -298,7 +298,7 @@ def _extract_parameters(
# In httpx >= 0.20.0, handle_request receives a Request object
request: httpx.Request = args[0]
method = request.method.encode()
url = httpx.URL(remove_url_credentials(str(request.url)))
url = httpx.URL(redact_query_parameters(remove_url_credentials(str(request.url))))
headers = request.headers
stream = request.stream
extensions = request.extensions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def response_hook(span, request_obj, response):
parse_excluded_urls,
remove_url_credentials,
sanitize_method,
redact_query_parameters,
)
from opentelemetry.util.http.httplib import set_ip_on_next_http_connection

Expand Down Expand Up @@ -232,7 +233,8 @@ def get_or_create_headers():
method = request.method
span_name = get_default_span_name(method)

url = remove_url_credentials(request.url)
url = redact_query_parameters(remove_url_credentials(request.url))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think on creating a redact_url function that calls both instead?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that does make sense. I will make changes for it.



span_attributes = {}
_set_http_method(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from opentelemetry.propagate import inject
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace.status import Status, StatusCode
from opentelemetry.util.http import remove_url_credentials
from opentelemetry.util.http import remove_url_credentials, redact_query_parameters


def _normalize_request(args, kwargs):
Expand Down Expand Up @@ -75,7 +75,7 @@ def fetch_async(

if span.is_recording():
attributes = {
SpanAttributes.HTTP_URL: remove_url_credentials(request.url),
SpanAttributes.HTTP_URL: redact_query_parameters(remove_url_credentials(request.url)),
SpanAttributes.HTTP_METHOD: request.method,
}
for key, value in attributes.items():
Expand Down Expand Up @@ -161,7 +161,7 @@ def _finish_tracing_callback(
def _create_metric_attributes(response):
metric_attributes = {
SpanAttributes.HTTP_STATUS_CODE: response.code,
SpanAttributes.HTTP_URL: remove_url_credentials(response.request.url),
SpanAttributes.HTTP_URL: redact_query_parameters(remove_url_credentials(response.request.url)),
SpanAttributes.HTTP_METHOD: response.request.method,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def response_hook(span: Span, request: Request, response: HTTPResponse):
parse_excluded_urls,
remove_url_credentials,
sanitize_method,
redact_query_parameters,
)
from opentelemetry.util.types import Attributes

Expand Down Expand Up @@ -257,7 +258,7 @@ def _instrumented_open_call(

span_name = _get_span_name(method)

url = remove_url_credentials(url)
url = redact_query_parameters(remove_url_credentials(url))

data = getattr(request, "data", None)
request_size = 0 if data is None else len(data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he
normalise_response_header_name,
remove_url_credentials,
sanitize_method,
redact_query_parameters
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -365,9 +366,9 @@ def collect_request_attributes(
else:
# old semconv v1.20.0
if _report_old(sem_conv_opt_in_mode):
result[SpanAttributes.HTTP_URL] = remove_url_credentials(
result[SpanAttributes.HTTP_URL] = redact_query_parameters(remove_url_credentials(
wsgiref_util.request_uri(environ)
)
))

remote_addr = environ.get("REMOTE_ADDR")
if remote_addr:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
SpanAttributes.HTTP_SERVER_NAME,
}

PARAMS_TO_REDACT = ["AWSAccessKeyId", "Signature", "sig", "X-Goog-Signature"]

class ExcludeList:
"""Class to exclude certain paths (given as a list of regexes) from tracing requests"""
Expand Down Expand Up @@ -159,23 +160,23 @@ def parse_excluded_urls(excluded_urls: str) -> ExcludeList:


def remove_url_credentials(url: str) -> str:
"""Given a string url, remove the username and password only if it is a valid url"""

""" Given a string url, replace the username and password with the keyword "REDACTED "only if it is a valid url"""
try:
parsed = urlparse(url)
if all([parsed.scheme, parsed.netloc]): # checks for valid url
parsed_url = urlparse(url)
_, _, netloc = parsed.netloc.rpartition("@")
return urlunparse(
(
parsed_url.scheme,
netloc,
parsed_url.path,
parsed_url.params,
parsed_url.query,
parsed_url.fragment,
if '@' in parsed.netloc:
_, _, host = parsed.netloc.rpartition("@")
new_netloc = "REDACTED:REDACTED@" + host
return urlunparse(
(
parsed.scheme,
new_netloc,
parsed.path,
parsed.params,
parsed.query,
parsed.fragment,
)
)
)
except ValueError: # an unparsable url was passed
pass
return url
Expand Down Expand Up @@ -255,3 +256,43 @@ def _parse_url_query(url: str):
path = parsed_url.path
query_params = parsed_url.query
return path, query_params

def redact_query_parameters(url: str) -> str:
"""Given a string url, redact sensitive query parameter values"""
try:
parsed = urlparse(url)
if not parsed.query: # No query parameters to redact
return url

# Check if any of the sensitive parameters are in the query
has_sensitive_params = any(param + "=" in parsed.query for param in PARAMS_TO_REDACT)
if not has_sensitive_params:
return url

# Process query parameters
query_parts: list[str] = []
for query_part in parsed.query.split("&"):
if "=" in query_part:
param_name, _ = query_part.split("=", 1) # Parameter name and value
if param_name in PARAMS_TO_REDACT:
query_parts.append(f"{param_name}=REDACTED")
else:
query_parts.append(query_part)
else:
query_parts.append(query_part) # Handle params with no value

# Reconstruct the URL with redacted query parameters
redacted_query = "&".join(query_parts)
return urlunparse(
(
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
redacted_query,
parsed.fragment,
)
)
except ValueError: # an unparsable url was passed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is just for urlparse, maybe do an early return instead so we save one indentation level?

pass
return url
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import unittest
from opentelemetry.util.http import redact_query_parameters

class TestRedactSensitiveInfo(unittest.TestCase):
def test_redact_goog_signature(self):
url = "https://www.example.com/path?color=blue&X-Goog-Signature=secret"
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&X-Goog-Signature=REDACTED")

def test_no_redaction_needed(self):
url = "https://www.example.com/path?color=blue&query=secret"
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&query=secret")

def test_no_query_parameters(self):
url = "https://www.example.com/path"
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path")

def test_empty_query_string(self):
url = "https://www.example.com/path?"
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?")

def test_empty_url(self):
url = ""
self.assertEqual(redact_query_parameters(url), "")

def test_redact_aws_access_key_id(self):
url = "https://www.example.com/path?color=blue&AWSAccessKeyId=secrets"
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&AWSAccessKeyId=REDACTED")

def test_api_key_not_in_redact_list(self):
url = "https://www.example.com/path?api_key=secret%20key&user=john"
self.assertNotEqual(redact_query_parameters(url), "https://www.example.com/path?api_key=REDACTED&user=john")

def test_password_key_not_in_redact_list(self):
url = "https://api.example.com?key=abc&password=123&user=admin"
self.assertNotEqual(redact_query_parameters(url), "https://api.example.com?key=REDACTED&password=REDACTED&user=admin")

def test_url_with_at_symbol_in_path_and_query(self):
url = "https://github.com/p@th?foo=b@r"
self.assertEqual(redact_query_parameters(url), "https://github.com/p@th?foo=b@r")

def test_aws_access_key_with_real_format(self):
url = "https://microsoft.com?AWSAccessKeyId=AKIAIOSFODNN7"
self.assertEqual(redact_query_parameters(url), "https://microsoft.com?AWSAccessKeyId=REDACTED")

def test_signature_parameter(self):
url = "https://service.com?sig=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0"
self.assertEqual(redact_query_parameters(url), "https://service.com?sig=REDACTED")

def test_signature_with_url_encoding(self):
url = "https://service.com?Signature=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0%3A377"
self.assertEqual(redact_query_parameters(url), "https://service.com?Signature=REDACTED")
14 changes: 10 additions & 4 deletions util/opentelemetry-util-http/tests/test_remove_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@ def test_remove_no_credentials(self):
self.assertEqual(cleaned_url, url)

def test_remove_credentials(self):
url = "http://someuser:[email protected]:8080/test/path?query=value"
url = "http://someuser:[email protected]:8080/test/path?sig=value"
cleaned_url = remove_url_credentials(url)
self.assertEqual(
cleaned_url, "http://opentelemetry.io:8080/test/path?query=value"
cleaned_url, "http://REDACTED:REDACTED@opentelemetry.io:8080/test/path?sig=value"
)

def test_remove_credentials_ipv4_literal(self):
url = "http://someuser:[email protected]:8080/test/path?query=value"
cleaned_url = remove_url_credentials(url)
self.assertEqual(
cleaned_url, "http://127.0.0.1:8080/test/path?query=value"
cleaned_url, "http://REDACTED:REDACTED@127.0.0.1:8080/test/path?query=value"
)

def test_remove_credentials_ipv6_literal(self):
url = "http://someuser:somepass@[::1]:8080/test/path?query=value"
cleaned_url = remove_url_credentials(url)
self.assertEqual(
cleaned_url, "http://[::1]:8080/test/path?query=value"
cleaned_url, "http://REDACTED:REDACTED@[::1]:8080/test/path?query=value"
)

def test_empty_url(self):
url = ""
cleaned_url = remove_url_credentials(url)
self.assertEqual(cleaned_url, url)