databricks · gopalldb · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md
@@ -11,6 +11,7 @@
 3. **For DBSQL warehouses, metadata operations are now powered by SHOW SQL commands.** SQL Exec API mode already was powered by SHOW commands, now the same is true for Thrift server mode as well. To revert to native Thrift metadata RPCs, set `UseQueryForMetadata` to `0`.
 
 ### Added
+- Added result set heartbeat / keep-alive to prevent server-side result expiry during slow consumption. When enabled via `EnableHeartbeat=1`, the driver periodically polls `GetStatementStatus` (SEA) or `GetOperationStatus` (Thrift) to keep the operation alive while the client reads results. Configurable interval via `HeartbeatIntervalSeconds` (default 60s). Heartbeat automatically stops when results are fully consumed, ResultSet is closed, or the server returns a terminal state. Disabled by default due to cost implications (heartbeats keep the warehouse running).
 
 ### Updated
 - `EnableGeoSpatialSupport` no longer requires `EnableComplexDatatypeSupport=1`. Geospatial types (GEOMETRY, GEOGRAPHY) can now be enabled independently of complex type support (ARRAY, MAP, STRUCT).

diff --git a/docs/design/HEARTBEAT_KEEP_ALIVE.md b/docs/design/HEARTBEAT_KEEP_ALIVE.md
diff --git a/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnection.java b/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnection.java
@@ -38,6 +38,7 @@ public class DatabricksConnection implements IDatabricksConnection, IDatabricksC
   private final Set<IDatabricksStatementInternal> statementSet = ConcurrentHashMap.newKeySet();
   private SQLWarning warnings = null;
   private final IDatabricksConnectionContext connectionContext;
+  private final ResultHeartbeatManager heartbeatManager;
 
   /**
    * Creates an instance of Databricks connection for given connection context.
@@ -49,6 +50,7 @@ public DatabricksConnection(IDatabricksConnectionContext connectionContext)
     this.connectionContext = connectionContext;
     DatabricksThreadContextHolder.setConnectionContext(connectionContext);
     this.session = new DatabricksSession(connectionContext);
+    this.heartbeatManager = createHeartbeatManager(connectionContext);
   }
 
   @VisibleForTesting
@@ -58,10 +60,26 @@ public DatabricksConnection(
     this.connectionContext = connectionContext;
     DatabricksThreadContextHolder.setConnectionContext(connectionContext);
     this.session = new DatabricksSession(connectionContext, testDatabricksClient);
+    this.heartbeatManager = createHeartbeatManager(connectionContext);
     UserAgentManager.setUserAgent(connectionContext);
     TelemetryHelper.updateTelemetryAppName(connectionContext, null);
   }
 
+  private static ResultHeartbeatManager createHeartbeatManager(
+      IDatabricksConnectionContext connectionContext) {
+    // H6 fix: Use interface methods instead of instanceof check so mocks and
+    // alternate implementations can also enable heartbeat
+    if (connectionContext.isHeartbeatEnabled()) {
+      return new ResultHeartbeatManager(connectionContext.getHeartbeatIntervalSeconds());
+    }
+    return null;
+  }
+
+  /** Returns the heartbeat manager, or null if heartbeat is disabled. */
+  ResultHeartbeatManager getHeartbeatManager() {
+    return heartbeatManager;
+  }
+
   @Override
   public void open() throws SQLException {
     this.session.open();
@@ -416,6 +434,11 @@ public void rollback() throws SQLException {
   @Override
   public void close() throws SQLException {
     LOGGER.debug("public void close()");
+    // H5 fix: Shutdown heartbeat FIRST — prevents RPCs on closing connections and
+    // ensures shutdown runs even if statement.close() throws
+    if (heartbeatManager != null) {
+      heartbeatManager.shutdown();
+    }
     for (IDatabricksStatementInternal statement : statementSet) {
       statement.close(false);
       statementSet.remove(statement);

diff --git a/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java b/src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java
@@ -943,6 +943,26 @@ public boolean isTelemetryEnabled() {
     return getParameter(DatabricksJdbcUrlParams.ENABLE_TELEMETRY).equals("1");
   }
 
+  public boolean isHeartbeatEnabled() {
+    return getParameter(DatabricksJdbcUrlParams.ENABLE_HEARTBEAT).equals("1");
+  }
+
+  public int getHeartbeatIntervalSeconds() {
+    int interval =
+        Integer.parseInt(getParameter(DatabricksJdbcUrlParams.HEARTBEAT_INTERVAL_SECONDS));
+    if (interval <= 0) {
+      LOGGER.warn("HeartbeatIntervalSeconds must be positive, got {}. Using default 60.", interval);
+      return 60;
+    }
+    if (interval > 3600) {
+      LOGGER.warn(
+          "HeartbeatIntervalSeconds {} is very large (> 1 hour). "
+              + "Heartbeat may not keep the operation alive.",
+          interval);
+    }
+    return interval;
+  }
+
   @Override
   public String getVolumeOperationAllowedPaths() {
     return getParameter(

diff --git a/src/main/java/com/databricks/jdbc/api/impl/DatabricksResultSet.java b/src/main/java/com/databricks/jdbc/api/impl/DatabricksResultSet.java
@@ -21,6 +21,7 @@
 import com.databricks.jdbc.common.Nullable;
 import com.databricks.jdbc.common.StatementType;
 import com.databricks.jdbc.common.util.WarningUtil;
+import com.databricks.jdbc.dbclient.IDatabricksClient;
 import com.databricks.jdbc.dbclient.impl.common.StatementId;
 import com.databricks.jdbc.exception.DatabricksParsingException;
 import com.databricks.jdbc.exception.DatabricksSQLException;
@@ -123,6 +124,7 @@ public DatabricksResultSet(
     this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement);
     this.isClosed = false;
     this.wasNull = false;
+    startHeartbeatIfEnabled();
   }
 
   @VisibleForTesting
@@ -191,6 +193,7 @@ public DatabricksResultSet(
     this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement);
     this.isClosed = false;
     this.wasNull = false;
+    startHeartbeatIfEnabled(); // C4 fix: Thrift result sets also need heartbeat
   }
 
   /* Constructing results for getUDTs, getTypeInfo, getProcedures metadata calls */
@@ -278,23 +281,198 @@ public DatabricksResultSet(
   @Override
   public boolean next() throws SQLException {
     checkIfClosed();
+    if (executionResult == null) {
+      throw new DatabricksSQLException(
+          "Cannot iterate: no result data available. "
+              + "For async execution, call getExecutionResult() first.",
+          DatabricksDriverErrorCode.INVALID_STATE);
+    }
     boolean hasNext = this.executionResult.next();
     if (cachedTelemetryCollector != null) {
       cachedTelemetryCollector.recordResultSetIteration(
           statementId.toSQLExecStatementId(), resultSetMetaData.getChunkCount(), hasNext);
     }
+    if (!hasNext) {
+      stopHeartbeat();
+    }
     return hasNext;
   }
 
   @Override
   public void close() throws DatabricksSQLException {
+    stopHeartbeat();
     isClosed = true;
-    this.executionResult.close();
+    if (executionResult != null) {
+      executionResult.close();
+    }
     if (parentStatement != null) {
       parentStatement.handleResultSetClose(this);
     }
   }
 
+  /** Starts heartbeat polling if enabled on the connection and this result set is eligible. */
+  private void startHeartbeatIfEnabled() {
+    if (parentStatement == null || statementId == null) {
+      return;
+    }
+    if (!isHeartbeatEligible()) {
+      return;
+    }
+
+    try {
+      // C3 fix: Use JDBC unwrap() to handle pooled connection wrappers (HikariCP, DBCP)
+      java.sql.Connection rawConn = parentStatement.getStatement().getConnection();
+      DatabricksConnection conn;
+      if (rawConn instanceof DatabricksConnection) {
+        conn = (DatabricksConnection) rawConn;
+      } else if (rawConn.isWrapperFor(DatabricksConnection.class)) {
+        conn = rawConn.unwrap(DatabricksConnection.class);
+      } else {
+        LOGGER.debug("Cannot start heartbeat: connection is not a DatabricksConnection");
+        return;
+      }
+
+      ResultHeartbeatManager mgr = conn.getHeartbeatManager();
+      if (mgr == null) {
+        return; // heartbeat not enabled
+      }
+
+      // C2 fix: Capture only what the lambda needs — avoid capturing 'this' to prevent
+      // abandoned ResultSets from keeping the warehouse alive via heartbeat.
+      // Note: capturing 'client' retains a reference to the session/connection. If the
+      // connection is GC'd without close(), heartbeat RPCs will fail and self-stop after
+      // maxConsecutiveFailures (10 ticks, ~10 min at 60s interval). Acceptable tradeoff.
+      final IDatabricksClient client = conn.getSession().getDatabricksClient();
+      final StatementId capturedStatementId = this.statementId;
+      final int maxConsecutiveFailures = 10;
+      final java.util.concurrent.atomic.AtomicInteger consecutiveFailures =
+          new java.util.concurrent.atomic.AtomicInteger(0);
+      // C1 fix: Read the stopped flag from the manager on each tick instead of pre-capturing.
+      // Pre-capturing caused an orphan-flag bug: startHeartbeat() internally calls
+      // stopHeartbeat() which removes and replaces the flag, leaving the lambda with a
+      // permanently-true reference. Reading from the manager each tick always gets the
+      // current flag.
+      final ResultHeartbeatManager capturedMgr = mgr;
+
+      Runnable heartbeatTask =
+          () -> {
+            // C1 fix: read current flag each tick
+            java.util.concurrent.atomic.AtomicBoolean stopped =
+                capturedMgr.getStoppedFlag(capturedStatementId);
+            if (stopped.get()) {
+              return; // client/session may be closed, skip RPC
+            }
+            try {
+              boolean alive = client.checkStatementAlive(capturedStatementId);
+              consecutiveFailures.set(0); // reset on success
+              if (!alive) {
+                LOGGER.info(
+                    "Heartbeat detected terminal state for statement {}, stopping",
+                    capturedStatementId);
+                capturedMgr.stopHeartbeat(capturedStatementId);
+              }
+            } catch (Exception e) {
+              // Re-read flag — may have been set during the RPC (connection closing)
+              if (capturedMgr.getStoppedFlag(capturedStatementId).get()) {
+                return;
+              }
+              int failures = consecutiveFailures.incrementAndGet();
+              if (failures == 1) {
+                LOGGER.info(
+                    "Heartbeat failed for statement {} (first failure): {}",
+                    capturedStatementId,
+                    e.getMessage());
+              } else {
+                LOGGER.debug(
+                    "Heartbeat failed for statement {} (failure {}/{}): {}",
+                    capturedStatementId,
+                    failures,
+                    maxConsecutiveFailures,
+                    e.getMessage());
+              }
+              if (failures >= maxConsecutiveFailures) {
+                LOGGER.warn(
+                    "Heartbeat stopped for statement {} after {} consecutive failures. "
+                        + "Server-side results may expire. Last error: {}",
+                    capturedStatementId,
+                    failures,
+                    e.getMessage());
+                capturedMgr.stopHeartbeat(capturedStatementId);
+              }
+            }
+          };
+
+      mgr.startHeartbeat(capturedStatementId, heartbeatTask);
+      LOGGER.debug(
+          "Heartbeat started for statement {} (resultType={}, interval={}s)",
+          capturedStatementId,
+          resultSetType,
+          mgr.getIntervalSeconds());
+    } catch (Exception e) {
+      LOGGER.debug("Failed to start heartbeat: {}", e.getMessage());
+    }
+  }
+
+  /** Stops the heartbeat for this result set's statement. Idempotent. */
+  private void stopHeartbeat() {
+    if (parentStatement == null || statementId == null) {
+      return;
+    }
+    try {
+      DatabricksConnection conn =
+          (DatabricksConnection) parentStatement.getStatement().getConnection();
+      ResultHeartbeatManager mgr = conn.getHeartbeatManager();
+      if (mgr != null) {
+        mgr.stopHeartbeat(statementId);
+      }
+    } catch (Exception e) {
+      LOGGER.debug("Failed to stop heartbeat: {}", e.getMessage());
+    }
+  }
+
+  /**
+   * Determines whether this result set is eligible for heartbeat polling. Package-visible for
+   * testing.
+   *
+   * <p>Heartbeat is NOT needed when:
+   *
+   * <ul>
+   *   <li>No execution result (nothing to fetch, also covers async PENDING/RUNNING with no data)
+   *   <li>SEA inline (InlineJsonResult): all rows loaded in memory at construction
+   *   <li>Update count (DML): no result rows to keep alive
+   *   <li>Direct results (CLOSED state): server already closed, data fully delivered
+   *   <li>Async execution (PENDING/RUNNING): user controls polling via getExecutionResult()
+   * </ul>
+   */
+  boolean isHeartbeatEligible() {
+    // No execution result — nothing to fetch
+    if (executionResult == null) {
+      return false;
+    }
+    // SEA inline — all data loaded in memory at construction
+    if (resultSetType == ResultSetType.SEA_INLINE) {
+      return false;
+    }
+    // Update count — no result rows
+    if (statementType == StatementType.UPDATE) {
+      return false;
+    }
+    // Check execution state
+    if (executionStatus != null) {
+      com.databricks.jdbc.api.ExecutionState state = executionStatus.getExecutionState();
+      // Direct results — server already closed
+      if (state == com.databricks.jdbc.api.ExecutionState.CLOSED) {
+        return false;
+      }
+      // Async execution — user controls polling
+      if (state == com.databricks.jdbc.api.ExecutionState.PENDING
+          || state == com.databricks.jdbc.api.ExecutionState.RUNNING) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   private static TelemetryCollector resolveTelemetryCollector(
       IDatabricksStatementInternal parentStatement) {
     try {

diff --git a/src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java b/src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java
@@ -172,6 +172,13 @@ public void close(boolean removeFromSession) throws DatabricksSQLException {
         this.connection.closeStatement(this);
       }
       DatabricksThreadContextHolder.clearStatementInfo();
+      // Safety net: stop any heartbeat for this statement
+      if (statementId != null) {
+        ResultHeartbeatManager mgr = connection.getHeartbeatManager();
+        if (mgr != null) {
+          mgr.stopHeartbeat(statementId);
+        }
+      }
       shutDownExecutor();
       this.updateCount = -1;
       this.isClosed = true;
@@ -246,6 +253,15 @@ public void cancel() throws SQLException {
     LOGGER.debug("public void cancel()");
     checkIfClosed();
 
+    // H11 fix: Stop heartbeat on cancel — server operation is being cancelled,
+    // no point continuing to poll it
+    if (statementId != null) {
+      ResultHeartbeatManager mgr = connection.getHeartbeatManager();
+      if (mgr != null) {
+        mgr.stopHeartbeat(statementId);
+      }
+    }
+
     if (statementId != null && !directResultsReceived) {
       this.connection.getSession().getDatabricksClient().cancelStatement(statementId);
       DatabricksThreadContextHolder.clearStatementInfo();
@@ -672,6 +688,8 @@ public ResultSet executeAsync(String sql) throws SQLException {
     LOGGER.debug("ResultSet executeAsync() for statement {%s}", sql);
     checkIfClosed();
 
+    // No heartbeat during async wait — the user controls polling via getExecutionResult().
+    // Heartbeat starts later when the ResultSet is constructed (after getExecutionResult()).
     resetForNewExecution();
 
     IDatabricksClient client = connection.getSession().getDatabricksClient();
@@ -969,6 +987,16 @@ private void resetForNewExecution() {
     // when the server returns unexpected responses (e.g., WireMock 404 in tests).
     // For direct results, the server already closed the handle.
 
+    // Stop heartbeat for the previous execution before clearing state.
+    // Without this, the old heartbeat (keyed by old statementId) would fail and self-terminate
+    // after 10 consecutive failures — wasteful and noisy in logs.
+    if (statementId != null) {
+      ResultHeartbeatManager mgr = connection.getHeartbeatManager();
+      if (mgr != null) {
+        mgr.stopHeartbeat(statementId);
+      }
+    }
+
     directResultsReceived = false;
 
     // Per JDBC spec, re-executing a Statement implicitly closes the current ResultSet.