Skip to content

Commit 1b75672

Browse files
authored
Don't create Elasticsearch span names containing document IDs (open-telemetry#705)
* Fix typo: _DEFALT_OP_NAME * Extract ES document ID from URL, put in attributes Elasticsearch creates URLs for index() and delete() before they hit perform_request(). This means there would be many unique span names containing unique document IDs, of the form 'Elasticsearch/indexname/_doc/documentid'. This extracts the document ID from the URL and replaces it with ':id', then puts it in the span's attributes. * Add TODO comment with link to issue * Add CHANGELOG entry * Don't use custom doc types, deprecated in ES 7 * Update tests to match instrumentation
1 parent 4046fdb commit 1b75672

File tree

3 files changed

+43
-20
lines changed

3 files changed

+43
-20
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3030
([679](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/679))
3131
- `opentelemetry-exporter-richconsole` Initial release
3232
([#686](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/686))
33+
- `opentelemetry-instrumentation-elasticsearch` no longer creates unique span names by including document IDs, replaces them with `:id` and puts the value in attribute `elasticsearch.id`
34+
([#705](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/705))
3335
- `opentelemetry-instrumentation-tornado` now sets `http.client_ip` and `tornado.handler` attributes
3436
([#706](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/706))
3537
- `opentelemetry-instrumentation-requests` added exclude urls functionality

instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def response_hook(span, response):
8181
es.get(index='my-index', doc_type='my-type', id=1)
8282
"""
8383

84+
import re
8485
from logging import getLogger
8586
from os import environ
8687
from typing import Collection
@@ -107,7 +108,7 @@ def response_hook(span, response):
107108
"took",
108109
]
109110

110-
_DEFALT_OP_NAME = "request"
111+
_DEFAULT_OP_NAME = "request"
111112

112113

113114
class ElasticsearchInstrumentor(BaseInstrumentor):
@@ -146,6 +147,9 @@ def _uninstrument(self, **kwargs):
146147
unwrap(elasticsearch.Transport, "perform_request")
147148

148149

150+
_regex_doc_url = re.compile(r"/_doc/([^/]+)")
151+
152+
149153
def _wrap_perform_request(
150154
tracer, span_name_prefix, request_hook=None, response_hook=None
151155
):
@@ -161,7 +165,24 @@ def wrapper(wrapped, _, args, kwargs):
161165
len(args),
162166
)
163167

164-
op_name = span_name_prefix + (url or method or _DEFALT_OP_NAME)
168+
op_name = span_name_prefix + (url or method or _DEFAULT_OP_NAME)
169+
doc_id = None
170+
if url:
171+
# TODO: This regex-based solution avoids creating an unbounded number of span names, but should be replaced by instrumenting individual Elasticsearch methods instead of Transport.perform_request()
172+
# A limitation of the regex is that only the '_doc' mapping type is supported. Mapping types are deprecated since Elasticsearch 7
173+
# https://github.com/open-telemetry/opentelemetry-python-contrib/issues/708
174+
match = _regex_doc_url.search(url)
175+
if match is not None:
176+
# Remove the full document ID from the URL
177+
doc_span = match.span()
178+
op_name = (
179+
span_name_prefix
180+
+ url[: doc_span[0]]
181+
+ "/_doc/:id"
182+
+ url[doc_span[1] :]
183+
)
184+
# Put the document ID in attributes
185+
doc_id = match.group(1)
165186
params = kwargs.get("params", {})
166187
body = kwargs.get("body", None)
167188

@@ -184,6 +205,8 @@ def wrapper(wrapped, _, args, kwargs):
184205
attributes[SpanAttributes.DB_STATEMENT] = str(body)
185206
if params:
186207
attributes["elasticsearch.params"] = str(params)
208+
if doc_id:
209+
attributes["elasticsearch.id"] = doc_id
187210
for key, value in attributes.items():
188211
span.set_attribute(key, value)
189212

instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py

+16-18
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def test_instrumentor(self, request_mock):
6464
request_mock.return_value = (1, {}, {})
6565

6666
es = Elasticsearch()
67-
es.index(index="sw", doc_type="people", id=1, body={"name": "adam"})
67+
es.index(index="sw", doc_type="_doc", id=1, body={"name": "adam"})
6868

6969
spans_list = self.get_finished_spans()
7070
self.assertEqual(len(spans_list), 1)
@@ -79,7 +79,7 @@ def test_instrumentor(self, request_mock):
7979
# check that no spans are generated after uninstrument
8080
ElasticsearchInstrumentor().uninstrument()
8181

82-
es.index(index="sw", doc_type="people", id=1, body={"name": "adam"})
82+
es.index(index="sw", doc_type="_doc", id=1, body={"name": "adam"})
8383

8484
spans_list = self.get_finished_spans()
8585
self.assertEqual(len(spans_list), 1)
@@ -119,7 +119,7 @@ def test_prefix_env(self, request_mock):
119119

120120
def _test_prefix(self, prefix):
121121
es = Elasticsearch()
122-
es.index(index="sw", doc_type="people", id=1, body={"name": "adam"})
122+
es.index(index="sw", doc_type="_doc", id=1, body={"name": "adam"})
123123

124124
spans_list = self.get_finished_spans()
125125
self.assertEqual(len(spans_list), 1)
@@ -133,11 +133,12 @@ def test_result_values(self, request_mock):
133133
'{"found": false, "timed_out": true, "took": 7}',
134134
)
135135
es = Elasticsearch()
136-
es.get(index="test-index", doc_type="tweet", id=1)
136+
es.get(index="test-index", doc_type="_doc", id=1)
137137

138138
spans = self.get_finished_spans()
139139

140140
self.assertEqual(1, len(spans))
141+
self.assertEqual(spans[0].name, "Elasticsearch/test-index/_doc/:id")
141142
self.assertEqual("False", spans[0].attributes["elasticsearch.found"])
142143
self.assertEqual(
143144
"True", spans[0].attributes["elasticsearch.timed_out"]
@@ -159,7 +160,7 @@ def test_trace_error_not_found(self, request_mock):
159160
def _test_trace_error(self, code, exc):
160161
es = Elasticsearch()
161162
try:
162-
es.get(index="test-index", doc_type="tweet", id=1)
163+
es.get(index="test-index", doc_type="_doc", id=1)
163164
except Exception: # pylint: disable=broad-except
164165
pass
165166

@@ -176,15 +177,13 @@ def test_parent(self, request_mock):
176177
request_mock.return_value = (1, {}, {})
177178
es = Elasticsearch()
178179
with self.tracer.start_as_current_span("parent"):
179-
es.index(
180-
index="sw", doc_type="people", id=1, body={"name": "adam"}
181-
)
180+
es.index(index="sw", doc_type="_doc", id=1, body={"name": "adam"})
182181

183182
spans = self.get_finished_spans()
184183
self.assertEqual(len(spans), 2)
185184

186185
parent = spans.by_name("parent")
187-
child = spans.by_name("Elasticsearch/sw/people/1")
186+
child = spans.by_name("Elasticsearch/sw/_doc/:id")
188187
self.assertIsNotNone(child.parent)
189188
self.assertEqual(child.parent.span_id, parent.context.span_id)
190189

@@ -198,13 +197,13 @@ def test_multithread(self, request_mock):
198197
# 3. Check the spans got different parents, and are in the expected order.
199198
def target1(parent_span):
200199
with trace.use_span(parent_span):
201-
es.get(index="test-index", doc_type="tweet", id=1)
200+
es.get(index="test-index", doc_type="_doc", id=1)
202201
ev.set()
203202
ev.wait()
204203

205204
def target2():
206205
ev.wait()
207-
es.get(index="test-index", doc_type="tweet", id=2)
206+
es.get(index="test-index", doc_type="_doc", id=2)
208207
ev.set()
209208

210209
with self.tracer.start_as_current_span("parent") as span:
@@ -220,8 +219,8 @@ def target2():
220219
self.assertEqual(3, len(spans))
221220

222221
s1 = spans.by_name("parent")
223-
s2 = spans.by_name("Elasticsearch/test-index/tweet/1")
224-
s3 = spans.by_name("Elasticsearch/test-index/tweet/2")
222+
s2 = spans.by_attr("elasticsearch.id", "1")
223+
s3 = spans.by_attr("elasticsearch.id", "2")
225224

226225
self.assertIsNotNone(s2.parent)
227226
self.assertEqual(s2.parent.span_id, s1.context.span_id)
@@ -343,10 +342,9 @@ def request_hook(span, method, url, kwargs):
343342
)
344343
es = Elasticsearch()
345344
index = "test-index"
346-
doc_type = "tweet"
347345
doc_id = 1
348346
kwargs = {"params": {"test": True}}
349-
es.get(index=index, doc_type=doc_type, id=doc_id, **kwargs)
347+
es.get(index=index, doc_type="_doc", id=doc_id, **kwargs)
350348

351349
spans = self.get_finished_spans()
352350

@@ -355,7 +353,7 @@ def request_hook(span, method, url, kwargs):
355353
"GET", spans[0].attributes[request_hook_method_attribute]
356354
)
357355
self.assertEqual(
358-
f"/{index}/{doc_type}/{doc_id}",
356+
f"/{index}/_doc/{doc_id}",
359357
spans[0].attributes[request_hook_url_attribute],
360358
)
361359
self.assertEqual(
@@ -390,7 +388,7 @@ def response_hook(span, response):
390388
"hits": [
391389
{
392390
"_index": "test-index",
393-
"_type": "tweet",
391+
"_type": "_doc",
394392
"_id": "1",
395393
"_score": 0.18232156,
396394
"_source": {"name": "tester"},
@@ -405,7 +403,7 @@ def response_hook(span, response):
405403
json.dumps(response_payload),
406404
)
407405
es = Elasticsearch()
408-
es.get(index="test-index", doc_type="tweet", id=1)
406+
es.get(index="test-index", doc_type="_doc", id=1)
409407

410408
spans = self.get_finished_spans()
411409

0 commit comments

Comments
 (0)