Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
00b8d4b
Add design doc: Result Set Heartbeat / Keep-Alive
gopalldb Apr 22, 2026
6130304
Implement result set heartbeat / keep-alive (PECOBLR-2321)
gopalldb Apr 22, 2026
d3f0c47
Fix: don't exclude metadata results from heartbeat eligibility
gopalldb Apr 22, 2026
cedf144
Skip heartbeat for direct results (CLOSED state)
gopalldb Apr 22, 2026
5d9aadc
Document async execution heartbeat policy + update eligibility table
gopalldb Apr 22, 2026
fa92347
Add changelog entry for result set heartbeat feature
gopalldb Apr 22, 2026
291be6c
Add heartbeat eligibility tests + skip async PENDING/RUNNING
gopalldb Apr 22, 2026
fe84cc4
Fix thread-safety and robustness issues in heartbeat
gopalldb Apr 22, 2026
53db645
Address should-fix review feedback
gopalldb Apr 22, 2026
994dbc2
Address test gaps + add DEBUG log on heartbeat start
gopalldb Apr 22, 2026
a51bccc
Add e2e integration test for heartbeat against real warehouse
gopalldb Apr 22, 2026
723ce06
Fix all critical and high-severity heartbeat review findings
gopalldb May 11, 2026
7534523
update merge conflict
gopalldb May 11, 2026
d24d62a
Address additional heartbeat review feedback
gopalldb May 11, 2026
7631ee3
Add missing heartbeat tests for concurrency, sentinel flag, and cance…
gopalldb May 11, 2026
469a459
Merge branch 'main' into design/heartbeat-keep-alive
gopalldb May 11, 2026
a037ae1
Proactive heartbeat stop when all data fetched from server
gopalldb May 12, 2026
b881d4c
Add tests for isAllDataFetched() across all implementations
gopalldb May 12, 2026
1910332
Merge branch 'main' into design/heartbeat-keep-alive
gopalldb May 12, 2026
a859117
Merge branch 'main' into design/heartbeat-keep-alive
gopalldb May 12, 2026
458aa5e
Address review feedback on isAllDataFetched and NPE guards
gopalldb May 12, 2026
a40466a
Remove isAllDataFetched — heartbeat stops when next() returns false
gopalldb May 12, 2026
832a1c8
Add coverage tests for heartbeat config and checkStatementAlive
gopalldb May 12, 2026
fc46126
Add more coverage tests to push past 85% threshold
gopalldb May 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
3. **For DBSQL warehouses, metadata operations are now powered by SHOW SQL commands.** SQL Exec API mode already was powered by SHOW commands, now the same is true for Thrift server mode as well. To revert to native Thrift metadata RPCs, set `UseQueryForMetadata` to `0`.

### Added
- Added result set heartbeat / keep-alive to prevent server-side result expiry during slow consumption. When enabled via `EnableHeartbeat=1`, the driver periodically polls `GetStatementStatus` (SEA) or `GetOperationStatus` (Thrift) to keep the operation alive while the client reads results. Configurable interval via `HeartbeatIntervalSeconds` (default 60s). Heartbeat automatically stops when results are fully consumed, ResultSet is closed, or the server returns a terminal state. Disabled by default due to cost implications (heartbeats keep the warehouse running).

### Updated
- `EnableGeoSpatialSupport` no longer requires `EnableComplexDatatypeSupport=1`. Geospatial types (GEOMETRY, GEOGRAPHY) can now be enabled independently of complex type support (ARRAY, MAP, STRUCT).
Expand Down
535 changes: 535 additions & 0 deletions docs/design/HEARTBEAT_KEEP_ALIVE.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class DatabricksConnection implements IDatabricksConnection, IDatabricksC
private final Set<IDatabricksStatementInternal> statementSet = ConcurrentHashMap.newKeySet();
private SQLWarning warnings = null;
private final IDatabricksConnectionContext connectionContext;
private final ResultHeartbeatManager heartbeatManager;

/**
* Creates an instance of Databricks connection for given connection context.
Expand All @@ -49,6 +50,7 @@ public DatabricksConnection(IDatabricksConnectionContext connectionContext)
this.connectionContext = connectionContext;
DatabricksThreadContextHolder.setConnectionContext(connectionContext);
this.session = new DatabricksSession(connectionContext);
this.heartbeatManager = createHeartbeatManager(connectionContext);
}

@VisibleForTesting
Expand All @@ -58,10 +60,26 @@ public DatabricksConnection(
this.connectionContext = connectionContext;
DatabricksThreadContextHolder.setConnectionContext(connectionContext);
this.session = new DatabricksSession(connectionContext, testDatabricksClient);
this.heartbeatManager = createHeartbeatManager(connectionContext);
UserAgentManager.setUserAgent(connectionContext);
TelemetryHelper.updateTelemetryAppName(connectionContext, null);
}

private static ResultHeartbeatManager createHeartbeatManager(
IDatabricksConnectionContext connectionContext) {
// H6 fix: Use interface methods instead of instanceof check so mocks and
// alternate implementations can also enable heartbeat
if (connectionContext.isHeartbeatEnabled()) {
return new ResultHeartbeatManager(connectionContext.getHeartbeatIntervalSeconds());
}
return null;
}

/** Returns the heartbeat manager, or null if heartbeat is disabled. */
ResultHeartbeatManager getHeartbeatManager() {
return heartbeatManager;
}

@Override
public void open() throws SQLException {
this.session.open();
Expand Down Expand Up @@ -416,6 +434,11 @@ public void rollback() throws SQLException {
@Override
public void close() throws SQLException {
LOGGER.debug("public void close()");
// H5 fix: Shutdown heartbeat FIRST — prevents RPCs on closing connections and
// ensures shutdown runs even if statement.close() throws
if (heartbeatManager != null) {
heartbeatManager.shutdown();
}
for (IDatabricksStatementInternal statement : statementSet) {
statement.close(false);
statementSet.remove(statement);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,26 @@ public boolean isTelemetryEnabled() {
return getParameter(DatabricksJdbcUrlParams.ENABLE_TELEMETRY).equals("1");
}

public boolean isHeartbeatEnabled() {
return getParameter(DatabricksJdbcUrlParams.ENABLE_HEARTBEAT).equals("1");
}

public int getHeartbeatIntervalSeconds() {
int interval =
Integer.parseInt(getParameter(DatabricksJdbcUrlParams.HEARTBEAT_INTERVAL_SECONDS));
if (interval <= 0) {
LOGGER.warn("HeartbeatIntervalSeconds must be positive, got {}. Using default 60.", interval);
return 60;
}
if (interval > 3600) {
LOGGER.warn(
"HeartbeatIntervalSeconds {} is very large (> 1 hour). "
+ "Heartbeat may not keep the operation alive.",
interval);
}
return interval;
}

@Override
public String getVolumeOperationAllowedPaths() {
return getParameter(
Expand Down
180 changes: 179 additions & 1 deletion src/main/java/com/databricks/jdbc/api/impl/DatabricksResultSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.databricks.jdbc.common.Nullable;
import com.databricks.jdbc.common.StatementType;
import com.databricks.jdbc.common.util.WarningUtil;
import com.databricks.jdbc.dbclient.IDatabricksClient;
import com.databricks.jdbc.dbclient.impl.common.StatementId;
import com.databricks.jdbc.exception.DatabricksParsingException;
import com.databricks.jdbc.exception.DatabricksSQLException;
Expand Down Expand Up @@ -123,6 +124,7 @@ public DatabricksResultSet(
this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement);
this.isClosed = false;
this.wasNull = false;
startHeartbeatIfEnabled();
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[CRITICAL] Heartbeat never starts on Thrift result sets — feature is dead-on-arrival on the Thrift path

The Thrift constructor (this method, lines 153-196) does not call startHeartbeatIfEnabled(). Only the SEA constructor at line 127 does.

All Thrift result sets are constructed via DatabricksThriftAccessor (executeStatement, getStatementResult, etc.) using this constructor — so on a transportMode=thrift connection with EnableHeartbeat=1, the manager is created and the eligibility logic correctly returns true for THRIFT_INLINE / THRIFT_ARROW_ENABLED, but no heartbeat ever starts.

Per the design doc's eligibility table, Thrift inline (data only on cluster, server-evictable) is one of the most critical scenarios this feature is meant to cover. It's silently broken.

The eligibility tests in ResultSetHeartbeatEligibilityTest.testThriftInlineIsEligible / testThriftArrowIsEligible mock the instance via reflection and bypass the constructor entirely, so they pass while production reality is broken.

Fix: Add startHeartbeatIfEnabled(); at the end of this constructor (line 196). Add a real-constructor smoke test that builds a Thrift DatabricksResultSet via the production constructor and asserts mgr.getActiveHeartbeatCount() == 1.

}

@VisibleForTesting
Expand Down Expand Up @@ -191,6 +193,7 @@ public DatabricksResultSet(
this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement);
this.isClosed = false;
this.wasNull = false;
startHeartbeatIfEnabled(); // C4 fix: Thrift result sets also need heartbeat
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[High] Heartbeat starts on already-closed operations — 10 ticks of failed RPCs per tiny query

The eligibility check at construction time evaluates Thrift FINISHED_STATE → SUCCEEDED (and SEA's analogue for inline results) as eligible. But for tiny inline queries, the server has already closed the operation by the time the heartbeat first ticks.

Empirically verified against pecotestingworkspace (interval=5s for fast turnaround):

Path Query Active heartbeats observed Self-stop at
Thrift HTTP SELECT 1 AS x 1 at t=0,5,10,…,50s t=55s (11 ticks)
SEA (UseThriftClient=0) SELECT 1 AS x 1 at t=0,5,10,…,50s t=55s (11 ticks)

The self-stop after exactly 11 ticks at the 5s interval is the precise signature of the 10-strike-failure path firing. mitmproxy captured the wire-level evidence:

POST 200  /sql/...  (execute)
POST 404 135b  /sql/...   ← heartbeat tick 1
POST 404 135b  /sql/...   ← heartbeat tick 2
...                       ← repeating 10×
POST 404 135b  /sql/...   ← heartbeat tick 10
POST 200 (close)

At default HeartbeatIntervalSeconds=60, that's 10 minutes of failed-RPC log noise per tiny query.

Important refinement of the prior reviewer's finding: the bug is not Thrift-direct-results-specific. It affects any inline/small-result query on either protocol. The markDirectResultsReceived ordering is one trigger; SEA's state=SUCCEEDED after a small inline result is another.

Fix options:

  1. Have isHeartbeatEligible() also consider whether the server-side operation is expected to persist (e.g., executionResult.getRowCount() > 0 && hasMoreChunks).
  2. For Thrift: detect direct-results-with-close in DatabricksThriftAccessor and pass state=CLOSED to the constructor (mirror SEA's CLOSED-state mapping for closed ops).
  3. For SEA: examine response.getResult() — if chunk_index == 0 && next_chunk_index == null, the result is fully delivered and the op is closed; mark ineligible.

}

/* Constructing results for getUDTs, getTypeInfo, getProcedures metadata calls */
Expand Down Expand Up @@ -278,23 +281,198 @@ public DatabricksResultSet(
@Override
public boolean next() throws SQLException {
checkIfClosed();
if (executionResult == null) {
throw new DatabricksSQLException(
"Cannot iterate: no result data available. "
+ "For async execution, call getExecutionResult() first.",
DatabricksDriverErrorCode.INVALID_STATE);
}
boolean hasNext = this.executionResult.next();
if (cachedTelemetryCollector != null) {
cachedTelemetryCollector.recordResultSetIteration(
statementId.toSQLExecStatementId(), resultSetMetaData.getChunkCount(), hasNext);
}
if (!hasNext) {
stopHeartbeat();
}
return hasNext;
}

@Override
public void close() throws DatabricksSQLException {
stopHeartbeat();
isClosed = true;
this.executionResult.close();
if (executionResult != null) {
executionResult.close();
}
if (parentStatement != null) {
parentStatement.handleResultSetClose(this);
}
}

/** Starts heartbeat polling if enabled on the connection and this result set is eligible. */
private void startHeartbeatIfEnabled() {
if (parentStatement == null || statementId == null) {
return;
}
if (!isHeartbeatEligible()) {
return;
}

try {
// C3 fix: Use JDBC unwrap() to handle pooled connection wrappers (HikariCP, DBCP)
java.sql.Connection rawConn = parentStatement.getStatement().getConnection();
DatabricksConnection conn;
if (rawConn instanceof DatabricksConnection) {
conn = (DatabricksConnection) rawConn;
} else if (rawConn.isWrapperFor(DatabricksConnection.class)) {
conn = rawConn.unwrap(DatabricksConnection.class);
} else {
LOGGER.debug("Cannot start heartbeat: connection is not a DatabricksConnection");
return;
}

ResultHeartbeatManager mgr = conn.getHeartbeatManager();
if (mgr == null) {
return; // heartbeat not enabled
}

// C2 fix: Capture only what the lambda needs — avoid capturing 'this' to prevent
// abandoned ResultSets from keeping the warehouse alive via heartbeat.
// Note: capturing 'client' retains a reference to the session/connection. If the
// connection is GC'd without close(), heartbeat RPCs will fail and self-stop after
// maxConsecutiveFailures (10 ticks, ~10 min at 60s interval). Acceptable tradeoff.
final IDatabricksClient client = conn.getSession().getDatabricksClient();
final StatementId capturedStatementId = this.statementId;
final int maxConsecutiveFailures = 10;
final java.util.concurrent.atomic.AtomicInteger consecutiveFailures =
new java.util.concurrent.atomic.AtomicInteger(0);
// C1 fix: Read the stopped flag from the manager on each tick instead of pre-capturing.
// Pre-capturing caused an orphan-flag bug: startHeartbeat() internally calls
// stopHeartbeat() which removes and replaces the flag, leaving the lambda with a
// permanently-true reference. Reading from the manager each tick always gets the
// current flag.
final ResultHeartbeatManager capturedMgr = mgr;

Runnable heartbeatTask =
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[CRITICAL] Lambda strong-captures this — abandoned ResultSet keeps warehouse alive forever

This lambda invokes stopHeartbeat() (instance method, line 342, 373) and reads statementId (instance field, lines 336/340/352/353/358/367/369). Both implicitly capture this — the entire DatabricksResultSet, including executionResult (Arrow buffers, chunk providers, potentially MB of cached row data).

The future is held in ResultHeartbeatManager.activeHeartbeats for the connection's lifetime. So:

  • A user that does stmt.executeQuery(...).next() once and abandons the ResultSet reference (a real-world bug, but a JDBC driver shouldn't amplify it) will:
    • Never trigger next()→false or close() (the only auto-stop paths)
    • Have the entire ResultSet and its data retained until Connection.close() — typically hours in pooled environments
    • Have the heartbeat poll forever, holding the warehouse open and accumulating cost
  • This is the exact "cost forever" failure mode the design doc Requirements §3 explicitly tries to prevent.
  • It is also a denial-of-service amplifier: an app opening 10k orphaned result sets per hour holds 10k Arrow batches in heap until Connection.close().

The C# ADBC reference avoids this: its poller is per-statement with linked cancellation, so even GC of the statement helps. The Java implementation here is connection-scoped, so GC of the ResultSet alone won't help — the future keeps a hard reference back to the ResultSet.

Fix: Don't capture this. Pull statementId and mgr (or just Runnable stopFn = () -> mgr.stopHeartbeat(localStatementId)) into locals so the lambda has no implicit this reference. Verify with javap -p -c (no synthetic this$0 field on the lambda class) or a simple unit test that holds a WeakReference<DatabricksResultSet> and asserts it's collectable after the strong reference is dropped.

() -> {
// C1 fix: read current flag each tick
java.util.concurrent.atomic.AtomicBoolean stopped =
capturedMgr.getStoppedFlag(capturedStatementId);
if (stopped.get()) {
return; // client/session may be closed, skip RPC
}
try {
boolean alive = client.checkStatementAlive(capturedStatementId);
consecutiveFailures.set(0); // reset on success
if (!alive) {
LOGGER.info(
"Heartbeat detected terminal state for statement {}, stopping",
capturedStatementId);
capturedMgr.stopHeartbeat(capturedStatementId);
}
} catch (Exception e) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Medium] catch (Exception) misses Error — silent heartbeat death without log or cleanup

The heartbeat lambda at line 374 catches Exception, not Throwable. Per the ScheduledExecutorService.scheduleWithFixedDelay javadoc:

If any execution of the task encounters an exception, subsequent executions are suppressed.

Error subclasses (OutOfMemoryError, NoClassDefFoundError, etc.) are not Exception subclasses, so they escape the catch — the scheduler then suppresses the recurring task.

Empirically demonstrated: a JUnit test wires a task that throws Error to ResultHeartbeatManager.startHeartbeat. After 3.5s with a 1s interval:

  • ticks = 1 (only one execution; the rest suppressed)
  • manager.getActiveHeartbeatCount() = 1 (entry leaked in activeHeartbeats)

The consequence is worse than swallowing exceptions: there's no consecutiveFailures increment, no max-failures WARN, no mgr.stopHeartbeat() cleanup. The heartbeat silently dies and the user has no idea their results may expire.

Fix:

} catch (Throwable t) {
  if (capturedMgr.getStoppedFlag(capturedStatementId).get()) return;
  // ... same failure-counter logic ...
  if (t instanceof Error && !(t instanceof VirtualMachineError)) {
    // log + stop cleanly; VirtualMachineError should still propagate
    capturedMgr.stopHeartbeat(capturedStatementId);
  }
  if (t instanceof VirtualMachineError) throw (VirtualMachineError) t;
}

(Or at minimum, change the catch to Throwable so the existing 10-strike path handles Error like any other failure.)

// Re-read flag — may have been set during the RPC (connection closing)
if (capturedMgr.getStoppedFlag(capturedStatementId).get()) {
return;
}
int failures = consecutiveFailures.incrementAndGet();
if (failures == 1) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Medium] Default SQLFeatureNotSupportedException retries 10× and emits misleading "results may expire" WARN

The default checkStatementAlive here throws SQLFeatureNotSupportedException("Heartbeat not supported by this client"). The heartbeat lambda in DatabricksResultSet.java:374-403 catches Exception (no instanceof short-circuit for the unsupported case) — so it counts each call as a transient failure.

Empirically verified with a JUnit test that builds a no-override IDatabricksClient via InvocationHandler.invokeDefault:

  • First call throws SQLFeatureNotSupportedException: "Heartbeat not supported by this client".
  • The heartbeat lambda body (grep on SQLFeatureNotSupportedException / instanceof SQL inside the lambda block) contains zero short-circuit — confirmed absent.

User-visible consequence: if anyone wires a custom IDatabricksClient impl without overriding checkStatementAlive, they get ~10 INFO log lines + 1 misleading WARN over ~10 min:

INFO  Heartbeat failed for statement <id> (first failure): Heartbeat not supported by this client
DEBUG Heartbeat failed for statement <id> (failure 2/10): Heartbeat not supported by this client
...
WARN  Heartbeat stopped for statement <id> after 10 consecutive failures.
      Server-side results may expire. Last error: Heartbeat not supported by this client

The WARN says "results may expire" — but the actual cause is a missing client-side override.

Fix options:

  1. Short-circuit in the lambda: treat SQLFeatureNotSupportedException as permanent → call mgr.stopHeartbeat(...) immediately, log a single WARN naming the offending class:
    catch (Exception e) {
      if (e instanceof SQLFeatureNotSupportedException) {
        LOGGER.warn("Heartbeat permanently disabled for statement {} — "
            + "client {} does not implement checkStatementAlive. "
            + "Set EnableHeartbeat=0 to silence.", capturedStatementId, client.getClass().getName());
        capturedMgr.stopHeartbeat(capturedStatementId);
        return;
      }
      // ... existing transient-failure logic ...
    }
  2. Improve the exception message: include this.getClass().getName() and a remediation hint pointing at EnableHeartbeat=0.

LOGGER.info(
"Heartbeat failed for statement {} (first failure): {}",
capturedStatementId,
e.getMessage());
} else {
LOGGER.debug(
"Heartbeat failed for statement {} (failure {}/{}): {}",
capturedStatementId,
failures,
maxConsecutiveFailures,
e.getMessage());
}
if (failures >= maxConsecutiveFailures) {
LOGGER.warn(
"Heartbeat stopped for statement {} after {} consecutive failures. "
+ "Server-side results may expire. Last error: {}",
capturedStatementId,
failures,
e.getMessage());
capturedMgr.stopHeartbeat(capturedStatementId);
}
}
};

mgr.startHeartbeat(capturedStatementId, heartbeatTask);
LOGGER.debug(
"Heartbeat started for statement {} (resultType={}, interval={}s)",
capturedStatementId,
resultSetType,
mgr.getIntervalSeconds());
} catch (Exception e) {
LOGGER.debug("Failed to start heartbeat: {}", e.getMessage());
}
}

/** Stops the heartbeat for this result set's statement. Idempotent. */
private void stopHeartbeat() {
if (parentStatement == null || statementId == null) {
return;
}
try {
DatabricksConnection conn =
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[High] stopHeartbeat() uses raw (DatabricksConnection) cast — pooled connections silently leak heartbeats

Asymmetric with the C3 fix on the start path. startHeartbeatIfEnabled at lines 322-333 correctly uses instanceof + unwrap(DatabricksConnection.class) to handle HikariCP / DBCP / DatabricksPooledConnection proxies. But stopHeartbeat at lines 421-423 still has:

DatabricksConnection conn =
    (DatabricksConnection) parentStatement.getStatement().getConnection();

On any pooled connection this throws ClassCastException, swallowed silently by the surrounding catch (Exception) { LOGGER.debug(...) }.

Empirical verification:

  • JUnit test inspects this method body — confirms raw cast with no unwrap() fallback.
  • Heartbeats started successfully under a pool (via the start-path unwrap) are never stopped via next() returns false or ResultSet.close(). They only terminate when the physical connection's heartbeatManager.shutdown() runs at pool eviction — which in pooled environments can be hours.

Fix: Extract a private DatabricksConnection resolveDatabricksConnection() helper that mirrors the start-path unwrap logic and call it from both startHeartbeatIfEnabled and stopHeartbeat.

(DatabricksConnection) parentStatement.getStatement().getConnection();
ResultHeartbeatManager mgr = conn.getHeartbeatManager();
if (mgr != null) {
mgr.stopHeartbeat(statementId);
}
} catch (Exception e) {
LOGGER.debug("Failed to stop heartbeat: {}", e.getMessage());
}
}

/**
* Determines whether this result set is eligible for heartbeat polling. Package-visible for
* testing.
*
* <p>Heartbeat is NOT needed when:
*
* <ul>
* <li>No execution result (nothing to fetch, also covers async PENDING/RUNNING with no data)
* <li>SEA inline (InlineJsonResult): all rows loaded in memory at construction
* <li>Update count (DML): no result rows to keep alive
* <li>Direct results (CLOSED state): server already closed, data fully delivered
* <li>Async execution (PENDING/RUNNING): user controls polling via getExecutionResult()
* </ul>
*/
boolean isHeartbeatEligible() {
// No execution result — nothing to fetch
if (executionResult == null) {
return false;
}
// SEA inline — all data loaded in memory at construction
if (resultSetType == ResultSetType.SEA_INLINE) {
return false;
}
// Update count — no result rows
if (statementType == StatementType.UPDATE) {
return false;
}
// Check execution state
if (executionStatus != null) {
com.databricks.jdbc.api.ExecutionState state = executionStatus.getExecutionState();
// Direct results — server already closed
if (state == com.databricks.jdbc.api.ExecutionState.CLOSED) {
return false;
}
// Async execution — user controls polling
if (state == com.databricks.jdbc.api.ExecutionState.PENDING
|| state == com.databricks.jdbc.api.ExecutionState.RUNNING) {
return false;
}
}
return true;
}

private static TelemetryCollector resolveTelemetryCollector(
IDatabricksStatementInternal parentStatement) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,13 @@ public void close(boolean removeFromSession) throws DatabricksSQLException {
this.connection.closeStatement(this);
}
DatabricksThreadContextHolder.clearStatementInfo();
// Safety net: stop any heartbeat for this statement
if (statementId != null) {
ResultHeartbeatManager mgr = connection.getHeartbeatManager();
if (mgr != null) {
mgr.stopHeartbeat(statementId);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[HIGH] Statement.cancel() does not stop the heartbeat

This cancel() calls cancelStatement on the server but does not call mgr.stopHeartbeat(statementId). Only close() (line 175-181) and resetForNewExecution() (line 982-988) clear the heartbeat.

After cancel() returns, the heartbeat keeps polling against a cancelled operation. In the happy path the server returns CANCELED_STATE and the heartbeat task self-stops on the terminal-state check — fine. But if there's a race or "operation not found" before the server registers the cancel, those errors count as transient failures, churning the 10-strike counter and emitting WARN/INFO log noise for up to ~10 minutes after a successful cancel.

Fix: Add a heartbeat stop to cancel(), mirroring the pattern in close():

public void cancel() throws SQLException {
  ...
  if (statementId != null) {
    ResultHeartbeatManager mgr = connection.getHeartbeatManager();
    if (mgr != null) {
      mgr.stopHeartbeat(statementId);
    }
  }
  this.connection.getSession().getDatabricksClient().cancelStatement(statementId);
  ...
}

}
}
shutDownExecutor();
this.updateCount = -1;
this.isClosed = true;
Expand Down Expand Up @@ -246,6 +253,15 @@ public void cancel() throws SQLException {
LOGGER.debug("public void cancel()");
checkIfClosed();

// H11 fix: Stop heartbeat on cancel — server operation is being cancelled,
// no point continuing to poll it
if (statementId != null) {
ResultHeartbeatManager mgr = connection.getHeartbeatManager();
if (mgr != null) {
mgr.stopHeartbeat(statementId);
}
}

if (statementId != null && !directResultsReceived) {
this.connection.getSession().getDatabricksClient().cancelStatement(statementId);
DatabricksThreadContextHolder.clearStatementInfo();
Expand Down Expand Up @@ -672,6 +688,8 @@ public ResultSet executeAsync(String sql) throws SQLException {
LOGGER.debug("ResultSet executeAsync() for statement {%s}", sql);
checkIfClosed();

// No heartbeat during async wait — the user controls polling via getExecutionResult().
// Heartbeat starts later when the ResultSet is constructed (after getExecutionResult()).
resetForNewExecution();

IDatabricksClient client = connection.getSession().getDatabricksClient();
Expand Down Expand Up @@ -969,6 +987,16 @@ private void resetForNewExecution() {
// when the server returns unexpected responses (e.g., WireMock 404 in tests).
// For direct results, the server already closed the handle.

// Stop heartbeat for the previous execution before clearing state.
// Without this, the old heartbeat (keyed by old statementId) would fail and self-terminate
// after 10 consecutive failures — wasteful and noisy in logs.
if (statementId != null) {
ResultHeartbeatManager mgr = connection.getHeartbeatManager();
if (mgr != null) {
mgr.stopHeartbeat(statementId);
}
}

directResultsReceived = false;

// Per JDBC spec, re-executing a Statement implicitly closes the current ResultSet.
Expand Down
Loading
Loading