From 5104baab87d03e6dc5cd7d00b0278aca8bed47a0 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sat, 25 Oct 2025 21:19:13 +0530 Subject: [PATCH 1/5] Lint fix --- .../Drivers/Databricks/RetryHttpHandler.cs | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 0f7f0b39e4..725d3fe400 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -16,11 +16,13 @@ */ using System; +using System.Diagnostics; using System.Net; using System.Net.Http; using System.Threading; using System.Threading.Tasks; using System.IO; +using Apache.Arrow.Adbc.Tracing; namespace Apache.Arrow.Adbc.Drivers.Databricks { @@ -52,6 +54,8 @@ protected override async Task SendAsync( HttpRequestMessage request, CancellationToken cancellationToken) { + Activity? activity = Activity.Current; + // Clone the request content if it's not null so we can reuse it for retries var requestContentClone = request.Content != null ? await CloneHttpContentAsync(request.Content) @@ -77,11 +81,35 @@ protected override async Task SendAsync( // If it's not a retryable status code, return immediately if (!IsRetryableStatusCode(response.StatusCode)) { - return response; + // Only log retry summary if retries occurred + if (attemptCount > 0) + { + activity?.SetTag("http.retry.total_attempts", attemptCount); + activity?.SetTag("http.retry.total_wait_seconds", totalRetrySeconds); + activity?.SetTag("http.retry.total_elapsed_seconds", + (DateTime.UtcNow - startTime).TotalSeconds); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); + } + return response; // Clean exit - no logging for normal success + } + + // Retry path - log details when we first enter retry path + if (attemptCount == 0) + { + activity?.SetTag("http.retry.max_timeout_seconds", _retryTimeoutSeconds); + activity?.SetTag("http.request.uri", SanitizeUri(request.RequestUri)); + activity?.AddEvent("http.retry.start", [ + new("status_code", (int)response.StatusCode) + ]); } attemptCount++; + activity?.AddEvent("http.retry.attempt_failed", [ + new("attempt_number", attemptCount), + new("status_code", (int)response.StatusCode) + ]); + // Check if we've exceeded the timeout TimeSpan elapsedTime = DateTime.UtcNow - startTime; if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) @@ -101,12 +129,26 @@ protected override async Task SendAsync( { // Use the Retry-After value waitSeconds = retryAfterSeconds; + activity?.AddEvent("http.retry.using_server_retry_after", [ + new("wait_seconds", waitSeconds), + new("attempt_number", attemptCount) + ]); + activity?.SetTag("http.retry.backoff_strategy", "server_retry_after"); lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using server-specified retry after {waitSeconds} seconds. Attempt {attemptCount}."; } else { // Invalid Retry-After value, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); + activity?.AddEvent("http.retry.invalid_retry_after_header", [ + new("retry_after_value", retryAfterValue), + new("fallback_strategy", "exponential_backoff") + ]); + activity?.AddEvent("http.retry.using_exponential_backoff", [ + new("wait_seconds", waitSeconds), + new("attempt_number", attemptCount) + ]); + activity?.SetTag("http.retry.backoff_strategy", "exponential"); lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Invalid Retry-After header, using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } } @@ -114,6 +156,11 @@ protected override async Task SendAsync( { // No Retry-After header, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); + activity?.AddEvent("http.retry.using_exponential_backoff", [ + new("wait_seconds", waitSeconds), + new("attempt_number", attemptCount) + ]); + activity?.SetTag("http.retry.backoff_strategy", "exponential"); lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } @@ -131,19 +178,44 @@ protected override async Task SendAsync( break; } + activity?.AddEvent("http.retry.waiting", [ + new("wait_seconds", waitSeconds), + new("attempt_number", attemptCount) + ]); + // Wait for the calculated time await Task.Delay(TimeSpan.FromSeconds(waitSeconds), cancellationToken); + // Update metrics tags + activity?.SetTag("http.retry.attempt_count", attemptCount); + activity?.SetTag("http.retry.total_wait_seconds", totalRetrySeconds); + // Increase backoff for next attempt (exponential backoff) currentBackoffSeconds = Math.Min(currentBackoffSeconds * 2, _maxBackoffSeconds); } while (!cancellationToken.IsCancellationRequested); + // Set final summary tags (only reached if retries exhausted) + activity?.SetTag("http.retry.total_attempts", attemptCount); + activity?.SetTag("http.retry.total_elapsed_seconds", + (DateTime.UtcNow - startTime).TotalSeconds); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); + // If we get here, we've either exceeded the timeout or been cancelled if (cancellationToken.IsCancellationRequested) { + activity?.AddEvent("http.retry.cancelled", [ + new("total_attempts", attemptCount) + ]); + activity?.SetTag("http.retry.outcome", "cancelled"); throw new OperationCanceledException("Request cancelled during retry wait", cancellationToken); } + // Timeout exceeded + activity?.AddEvent("http.retry.timeout_exceeded", [ + new("total_attempts", attemptCount), + new("configured_timeout_seconds", _retryTimeoutSeconds) + ]); + activity?.SetTag("http.retry.outcome", "timeout_exceeded"); throw new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError) .SetSqlState("08001"); } @@ -191,5 +263,15 @@ private static async Task CloneHttpContentAsync(HttpContent content } return clone; } + + /// + /// Sanitizes a URI for logging by removing query parameters and sensitive information. + /// + private static string SanitizeUri(Uri? uri) + { + if (uri == null) return "(null)"; + // Return only scheme, host, port, and path + return $"{uri.Scheme}://{uri.Host}{(uri.IsDefaultPort ? "" : $":{uri.Port}")}{uri.AbsolutePath}"; + } } } From c77d024f8fef5bf5ebf4db09f64d4134ba7f1fec Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Sun, 26 Oct 2025 00:57:29 +0530 Subject: [PATCH 2/5] Minimised logging for the retry handlers --- .../Drivers/Databricks/RetryHttpHandler.cs | 79 ++----------------- 1 file changed, 6 insertions(+), 73 deletions(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 725d3fe400..35a715cae4 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -63,7 +63,6 @@ protected override async Task SendAsync( HttpResponseMessage response; string? lastErrorMessage = null; - DateTime startTime = DateTime.UtcNow; int attemptCount = 0; int currentBackoffSeconds = _initialBackoffSeconds; int totalRetrySeconds = 0; @@ -85,34 +84,15 @@ protected override async Task SendAsync( if (attemptCount > 0) { activity?.SetTag("http.retry.total_attempts", attemptCount); - activity?.SetTag("http.retry.total_wait_seconds", totalRetrySeconds); - activity?.SetTag("http.retry.total_elapsed_seconds", - (DateTime.UtcNow - startTime).TotalSeconds); activity?.SetTag("http.response.status_code", (int)response.StatusCode); } - return response; // Clean exit - no logging for normal success - } - - // Retry path - log details when we first enter retry path - if (attemptCount == 0) - { - activity?.SetTag("http.retry.max_timeout_seconds", _retryTimeoutSeconds); - activity?.SetTag("http.request.uri", SanitizeUri(request.RequestUri)); - activity?.AddEvent("http.retry.start", [ - new("status_code", (int)response.StatusCode) - ]); + return response; } attemptCount++; - activity?.AddEvent("http.retry.attempt_failed", [ - new("attempt_number", attemptCount), - new("status_code", (int)response.StatusCode) - ]); - - // Check if we've exceeded the timeout - TimeSpan elapsedTime = DateTime.UtcNow - startTime; - if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) + // Check if we've exceeded the total wait time + if (_retryTimeoutSeconds > 0 && totalRetrySeconds > _retryTimeoutSeconds) { // We've exceeded the timeout, so break out of the loop break; @@ -129,39 +109,20 @@ protected override async Task SendAsync( { // Use the Retry-After value waitSeconds = retryAfterSeconds; - activity?.AddEvent("http.retry.using_server_retry_after", [ - new("wait_seconds", waitSeconds), - new("attempt_number", attemptCount) - ]); - activity?.SetTag("http.retry.backoff_strategy", "server_retry_after"); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using server-specified retry after {waitSeconds} seconds. Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; } else { // Invalid Retry-After value, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - activity?.AddEvent("http.retry.invalid_retry_after_header", [ - new("retry_after_value", retryAfterValue), - new("fallback_strategy", "exponential_backoff") - ]); - activity?.AddEvent("http.retry.using_exponential_backoff", [ - new("wait_seconds", waitSeconds), - new("attempt_number", attemptCount) - ]); - activity?.SetTag("http.retry.backoff_strategy", "exponential"); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Invalid Retry-After header, using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; } } else { // No Retry-After header, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - activity?.AddEvent("http.retry.using_exponential_backoff", [ - new("wait_seconds", waitSeconds), - new("attempt_number", attemptCount) - ]); - activity?.SetTag("http.retry.backoff_strategy", "exponential"); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; } // Dispose the response before retrying @@ -178,43 +139,24 @@ protected override async Task SendAsync( break; } - activity?.AddEvent("http.retry.waiting", [ - new("wait_seconds", waitSeconds), - new("attempt_number", attemptCount) - ]); - // Wait for the calculated time await Task.Delay(TimeSpan.FromSeconds(waitSeconds), cancellationToken); - // Update metrics tags - activity?.SetTag("http.retry.attempt_count", attemptCount); - activity?.SetTag("http.retry.total_wait_seconds", totalRetrySeconds); - // Increase backoff for next attempt (exponential backoff) currentBackoffSeconds = Math.Min(currentBackoffSeconds * 2, _maxBackoffSeconds); } while (!cancellationToken.IsCancellationRequested); - // Set final summary tags (only reached if retries exhausted) activity?.SetTag("http.retry.total_attempts", attemptCount); - activity?.SetTag("http.retry.total_elapsed_seconds", - (DateTime.UtcNow - startTime).TotalSeconds); activity?.SetTag("http.response.status_code", (int)response.StatusCode); // If we get here, we've either exceeded the timeout or been cancelled if (cancellationToken.IsCancellationRequested) { - activity?.AddEvent("http.retry.cancelled", [ - new("total_attempts", attemptCount) - ]); activity?.SetTag("http.retry.outcome", "cancelled"); throw new OperationCanceledException("Request cancelled during retry wait", cancellationToken); } // Timeout exceeded - activity?.AddEvent("http.retry.timeout_exceeded", [ - new("total_attempts", attemptCount), - new("configured_timeout_seconds", _retryTimeoutSeconds) - ]); activity?.SetTag("http.retry.outcome", "timeout_exceeded"); throw new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError) .SetSqlState("08001"); @@ -264,14 +206,5 @@ private static async Task CloneHttpContentAsync(HttpContent content return clone; } - /// - /// Sanitizes a URI for logging by removing query parameters and sensitive information. - /// - private static string SanitizeUri(Uri? uri) - { - if (uri == null) return "(null)"; - // Return only scheme, host, port, and path - return $"{uri.Scheme}://{uri.Host}{(uri.IsDefaultPort ? "" : $":{uri.Port}")}{uri.AbsolutePath}"; - } } } From 165bb89e81856db98c2ffa13a2e90e22c3494773 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 27 Oct 2025 01:20:17 +0530 Subject: [PATCH 3/5] Corrected datetime logic --- csharp/src/Drivers/Databricks/RetryHttpHandler.cs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 35a715cae4..16dbbeb0f4 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -22,7 +22,7 @@ using System.Threading; using System.Threading.Tasks; using System.IO; -using Apache.Arrow.Adbc.Tracing; + namespace Apache.Arrow.Adbc.Drivers.Databricks { @@ -63,6 +63,7 @@ protected override async Task SendAsync( HttpResponseMessage response; string? lastErrorMessage = null; + DateTime startTime = DateTime.UtcNow; int attemptCount = 0; int currentBackoffSeconds = _initialBackoffSeconds; int totalRetrySeconds = 0; @@ -91,8 +92,9 @@ protected override async Task SendAsync( attemptCount++; - // Check if we've exceeded the total wait time - if (_retryTimeoutSeconds > 0 && totalRetrySeconds > _retryTimeoutSeconds) + // Check if we've exceeded the timeout + TimeSpan elapsedTime = DateTime.UtcNow - startTime; + if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) { // We've exceeded the timeout, so break out of the loop break; @@ -109,20 +111,20 @@ protected override async Task SendAsync( { // Use the Retry-After value waitSeconds = retryAfterSeconds; - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using server-specified retry after {waitSeconds} seconds. Attempt {attemptCount}."; } else { // Invalid Retry-After value, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Invalid Retry-After header, using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } } else { // No Retry-After header, use exponential backoff waitSeconds = CalculateBackoffWithJitter(currentBackoffSeconds); - lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Attempt {attemptCount}."; + lastErrorMessage = $"Service temporarily unavailable (HTTP {(int)response.StatusCode}). Using exponential backoff of {waitSeconds} seconds. Attempt {attemptCount}."; } // Dispose the response before retrying From df2f92d43ad0383a02855d5276c2dcf22757074b Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 27 Oct 2025 01:22:10 +0530 Subject: [PATCH 4/5] Lint fix --- csharp/src/Drivers/Databricks/RetryHttpHandler.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 16dbbeb0f4..4abc9c4ba2 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -23,7 +23,6 @@ using System.Threading.Tasks; using System.IO; - namespace Apache.Arrow.Adbc.Drivers.Databricks { /// @@ -207,6 +206,5 @@ private static async Task CloneHttpContentAsync(HttpContent content } return clone; } - } } From 0268e15f8e82a11455a8779131725051e1a679d2 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Mon, 27 Oct 2025 10:52:55 +0530 Subject: [PATCH 5/5] Added logs for each retry attempt --- .../Drivers/Databricks/RetryHttpHandler.cs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs index 4abc9c4ba2..a7aa0f8c21 100644 --- a/csharp/src/Drivers/Databricks/RetryHttpHandler.cs +++ b/csharp/src/Drivers/Databricks/RetryHttpHandler.cs @@ -91,6 +91,9 @@ protected override async Task SendAsync( attemptCount++; + activity?.SetTag("http.retry.attempt", attemptCount); + activity?.SetTag("http.response.status_code", (int)response.StatusCode); + // Check if we've exceeded the timeout TimeSpan elapsedTime = DateTime.UtcNow - startTime; if (_retryTimeoutSeconds > 0 && elapsedTime.TotalSeconds > _retryTimeoutSeconds) @@ -154,13 +157,23 @@ protected override async Task SendAsync( if (cancellationToken.IsCancellationRequested) { activity?.SetTag("http.retry.outcome", "cancelled"); - throw new OperationCanceledException("Request cancelled during retry wait", cancellationToken); + var cancelEx = new OperationCanceledException("Request cancelled during retry wait", cancellationToken); + activity?.AddException(cancelEx, [ + new("error.context", "http.retry.cancelled"), + new("attempts", attemptCount) + ]); } // Timeout exceeded activity?.SetTag("http.retry.outcome", "timeout_exceeded"); - throw new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError) - .SetSqlState("08001"); + var exception = new DatabricksException(lastErrorMessage ?? "Service temporarily unavailable and retry timeout exceeded", AdbcStatusCode.IOError).SetSqlState("08001"); + activity?.AddException(exception, [ + new("error.context", "http.retry.timeout_exceeded"), + new("attempts", attemptCount), + new("total_retry_seconds", totalRetrySeconds), + new("timeout_seconds", _retryTimeoutSeconds) + ]); + throw exception; } ///