diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 1bdb47f5d..aac71d85d 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -27,7 +27,7 @@ from datadog.dogstatsd.route import get_default_route from datadog.dogstatsd.container import ContainerID from datadog.util.compat import is_p3k, text -from datadog.util.format import normalize_tags +from datadog.util.format import normalize_tags, normalize_metric_name from datadog.version import __version__ # Logging @@ -318,7 +318,7 @@ def __init__( constant_tags = [] self.constant_tags = constant_tags + env_tags if namespace is not None: - namespace = text(namespace) + namespace = normalize_metric_name(text(namespace)) self.namespace = namespace self.use_ms = use_ms self.default_sample_rate = default_sample_rate @@ -763,7 +763,7 @@ def _serialize_metric(self, metric, metric_type, value, tags, sample_rate=1): # Create/format the metric packet return "%s%s:%s|%s%s%s%s" % ( (self.namespace + ".") if self.namespace else "", - metric, + normalize_metric_name(metric), value, metric_type, ("|@" + text(sample_rate)) if sample_rate != 1 else "", diff --git a/datadog/util/format.py b/datadog/util/format.py index f6b1e96af..554ec2d71 100644 --- a/datadog/util/format.py +++ b/datadog/util/format.py @@ -10,7 +10,8 @@ from datadog.util.compat import conditional_lru_cache TAG_INVALID_CHARS_RE = re.compile(r"[^\w\d_\-:/\.]", re.UNICODE) -TAG_INVALID_CHARS_SUBS = "_" +METRIC_NAME_INVALID_CHARS_RE = re.compile(r"[^\w\d_\.]", re.UNICODE) +INVALID_CHARS_SUBS = "_" def pretty_json(obj): @@ -33,10 +34,15 @@ def force_to_epoch_seconds(epoch_sec_or_dt): @conditional_lru_cache def _normalize_tags_with_cache(tag_list): - return [TAG_INVALID_CHARS_RE.sub(TAG_INVALID_CHARS_SUBS, tag) for tag in tag_list] + return [TAG_INVALID_CHARS_RE.sub(INVALID_CHARS_SUBS, tag) for tag in tag_list] def normalize_tags(tag_list): # We have to turn our input tag list into a non-mutable tuple for it to # be hashable (and thus usable) by the @lru_cache decorator. return _normalize_tags_with_cache(tuple(tag_list)) + + +@conditional_lru_cache +def normalize_metric_name(metric_name): + return METRIC_NAME_INVALID_CHARS_RE.sub(INVALID_CHARS_SUBS, metric_name) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index 658804a19..48c4e32a1 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -294,6 +294,10 @@ def test_gauge(self): self.statsd.gauge('gauge', 123.4) self.assert_equal_telemetry('gauge:123.4|g\n', self.recv(2)) + def test_gauge_with_unescaped_name(self): + self.statsd.gauge('my|new.p3rfect#_gauge', 123.4) + self.assert_equal_telemetry('my_new.p3rfect__gauge:123.4|g\n', self.recv(2)) + def test_counter(self): self.statsd.increment('page.views') self.statsd.flush() diff --git a/tests/unit/util/test_format.py b/tests/unit/util/test_format.py index dc4d6b62b..9ab5d9a1d 100644 --- a/tests/unit/util/test_format.py +++ b/tests/unit/util/test_format.py @@ -6,7 +6,7 @@ import pytest -from datadog.util.format import construct_url, normalize_tags +from datadog.util.format import construct_url, normalize_tags, normalize_metric_name class TestConstructURL: @@ -52,3 +52,19 @@ class TestNormalizeTags: @pytest.mark.parametrize("original_tags,expected_tags", test_data) def test_normalize_tags(self, original_tags, expected_tags): assert normalize_tags(original_tags) == expected_tags + +class TestNormalizeMetricName: + """ + Test of the format's `normalize_metric_name` functionality + """ + test_data = [ + ('', ''), + ('just a metric name', 'just_a_metric_name'), + ('xyz.abc!@#$%^&*()0987654321{}}{', 'xyz.abc__________0987654321____'), + ('xyz.abc_123', 'xyz.abc_123'), + ('абśжż西アطر', 'абśжż西アطر'), + ('a😃😃b', 'a__b'), + ] + @pytest.mark.parametrize("original_metric_name,expected_metric_name", test_data) + def test_normalize_metric_name(self, original_metric_name, expected_metric_name): + assert normalize_metric_name(original_metric_name) == expected_metric_name