-
-
Notifications
You must be signed in to change notification settings - Fork 61
fix(metrics): gracefully handle DogStatsd client creation failures #7756
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
| import threading | ||
| from typing import Callable, Mapping, Optional, Sequence, Union | ||
|
|
||
|
|
@@ -8,6 +9,8 @@ | |
| from snuba.utils.metrics.backends.abstract import MetricsBackend | ||
| from snuba.utils.metrics.types import Tags | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class DatadogMetricsBackend(MetricsBackend): | ||
| """ | ||
|
|
@@ -33,11 +36,24 @@ def __init__( | |
| self.__thread_state = threading.local() | ||
|
|
||
| @property | ||
| def __client(self) -> DogStatsd: | ||
| def __client(self) -> Optional[DogStatsd]: | ||
| try: | ||
| client = self.__thread_state.client | ||
| except AttributeError: | ||
| client = self.__thread_state.client = self.__client_factory() | ||
| try: | ||
| client = self.__thread_state.client = self.__client_factory() | ||
| except Exception as e: | ||
| # During thread cleanup (e.g., ThreadPoolExecutor shutdown), the client | ||
| # factory may fail or set an exception while returning a value, causing | ||
| # a SystemError. We catch this and log it, returning None to allow | ||
| # metrics calls to fail silently rather than crashing the application. | ||
| logger.warning( | ||
| "Failed to create DogStatsd client in thread %s: %s", | ||
| threading.current_thread().name, | ||
| e, | ||
| exc_info=True, | ||
| ) | ||
| return None | ||
|
Comment on lines
+46
to
+56
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: If Suggested FixIn the Prompt for AI AgentDid we get this right? 👍 / 👎 to inform future reviews. |
||
| return client | ||
|
|
||
| def __normalize_tags(self, tags: Optional[Tags]) -> Optional[Sequence[str]]: | ||
|
|
@@ -53,7 +69,10 @@ def increment( | |
| tags: Optional[Tags] = None, | ||
| unit: Optional[str] = None, | ||
| ) -> None: | ||
| self.__client.increment( | ||
| client = self.__client | ||
| if client is None: | ||
| return | ||
| client.increment( | ||
| name, | ||
| value, | ||
| tags=self.__normalize_tags(tags), | ||
|
|
@@ -67,7 +86,10 @@ def gauge( | |
| tags: Optional[Tags] = None, | ||
| unit: Optional[str] = None, | ||
| ) -> None: | ||
| self.__client.gauge( | ||
| client = self.__client | ||
| if client is None: | ||
| return | ||
| client.gauge( | ||
| name, | ||
| value, | ||
| tags=self.__normalize_tags(tags), | ||
|
|
@@ -81,7 +103,10 @@ def timing( | |
| tags: Optional[Tags] = None, | ||
| unit: Optional[str] = None, | ||
| ) -> None: | ||
| self.__client.timing( | ||
| client = self.__client | ||
| if client is None: | ||
| return | ||
| client.timing( | ||
| name, | ||
| value, | ||
| tags=self.__normalize_tags(tags), | ||
|
|
@@ -95,7 +120,10 @@ def distribution( | |
| tags: Optional[Tags] = None, | ||
| unit: Optional[str] = None, | ||
| ) -> None: | ||
| self.__client.distribution( | ||
| client = self.__client | ||
| if client is None: | ||
| return | ||
| client.distribution( | ||
| name, | ||
| value, | ||
| tags=self.__normalize_tags(tags), | ||
|
|
@@ -110,7 +138,10 @@ def events( | |
| priority: str, | ||
| tags: Optional[Tags] = None, | ||
| ) -> None: | ||
| self.__client.event( | ||
| client = self.__client | ||
| if client is None: | ||
| return | ||
| client.event( | ||
| title=title, | ||
| text=text, | ||
| alert_type=alert_type, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Outer except only catches AttributeError, not SystemError
High Severity
The PR description states the issue is that
threading.local()client access duringThreadPoolExecutorshutdown causes a C-levelSystemError. However, the outertry/excepton line 40-42 only catchesAttributeError. If theSystemErroroccurs when readingself.__thread_state.client(line 41) — which is exactly the scenario described — it won't be caught, and the crash the PR intends to fix will still happen. The innerexcept Exceptiononly protects the factory call path, not the attribute access path. The outerexceptneeds to also catchException(or at leastSystemError) to fully address the described issue.