authzed
diff --git a/‎internal/dispatch/remote/cluster.go
+167-15 b/‎internal/dispatch/remote/cluster.go
+167-15
@@ -14,15 +14,20 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/rs/zerolog"
 	"go.uber.org/atomic"
+	"google.golang.org/genproto/googleapis/rpc/errdetails"
 	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/connectivity"
+	"google.golang.org/grpc/status"
 	"google.golang.org/protobuf/proto"
 
 	"github.com/authzed/spicedb/internal/dispatch"
 	"github.com/authzed/spicedb/internal/dispatch/keys"
 	log "github.com/authzed/spicedb/internal/logging"
+	corev1 "github.com/authzed/spicedb/pkg/proto/core/v1"
 	v1 "github.com/authzed/spicedb/pkg/proto/dispatch/v1"
 	"github.com/authzed/spicedb/pkg/spiceerrors"
+	"github.com/authzed/spicedb/pkg/tuple"
 )
 
 var dispatchCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
@@ -130,6 +135,7 @@ func NewClusterDispatcher(client ClusterClient, conn *grpc.ClientConn, config Cl
 		secondaryDispatch:               secondaryDispatch,
 		secondaryDispatchExprs:          secondaryDispatchExprs,
 		secondaryInitialResponseDigests: secondaryInitialResponseDigests,
+		supportedResourceSubjectTracker: newSupportedResourceSubjectTracker(),
 	}, nil
 }
 
@@ -142,6 +148,7 @@ type clusterDispatcher struct {
 	secondaryDispatch               map[string]SecondaryDispatch
 	secondaryDispatchExprs          map[string]*DispatchExpr
 	secondaryInitialResponseDigests map[string]*digestAndLock
+	supportedResourceSubjectTracker *supportedResourceSubjectTracker
 }
 
 // digestAndLock is a struct that holds a TDigest and a lock to protect it.
@@ -188,15 +195,22 @@ func (cr *clusterDispatcher) DispatchCheck(ctx context.Context, req *v1.Dispatch
 
 	ctx = context.WithValue(ctx, consistent.CtxKey, requestKey)
 
-	resp, err := dispatchSyncRequest(ctx, cr, "check", req, func(ctx context.Context, client ClusterClient) (*v1.DispatchCheckResponse, error) {
-		resp, err := client.DispatchCheck(ctx, req)
-		if err != nil {
-			return resp, err
-		}
+	resp, err := dispatchSyncRequest(
+		ctx,
+		cr,
+		"check",
+		req,
+		tuple.FromCoreRelationReference(req.ResourceRelation),
+		tuple.RR(req.Subject.Namespace, req.Subject.Relation),
+		func(ctx context.Context, client ClusterClient) (*v1.DispatchCheckResponse, error) {
+			resp, err := client.DispatchCheck(ctx, req)
+			if err != nil {
+				return resp, err
+			}
 
-		err = adjustMetadataForDispatch(resp.Metadata)
-		return resp, err
-	})
+			err = adjustMetadataForDispatch(resp.Metadata)
+			return resp, err
+		})
 	if err != nil {
 		return &v1.DispatchCheckResponse{Metadata: requestFailureMetadata}, err
 	}
@@ -216,6 +230,13 @@ type responseMessage interface {
 	GetMetadata() *v1.ResponseMeta
 }
 
+type streamingRequestMessage interface {
+	requestMessage
+
+	GetResourceRelation() *corev1.RelationReference
+	GetSubjectRelation() *corev1.RelationReference
+}
+
 type respTuple[S responseMessage] struct {
 	resp S
 	err  error
@@ -226,7 +247,15 @@ type secondaryRespTuple[S responseMessage] struct {
 	resp        S
 }
 
-func dispatchSyncRequest[Q requestMessage, S responseMessage](ctx context.Context, cr *clusterDispatcher, reqKey string, req Q, handler func(context.Context, ClusterClient) (S, error)) (S, error) {
+func dispatchSyncRequest[Q requestMessage, S responseMessage](
+	ctx context.Context,
+	cr *clusterDispatcher,
+	reqKey string,
+	req Q,
+	resourceTypeAndRelation tuple.RelationReference,
+	subjectTypeAndRelation tuple.RelationReference,
+	handler func(context.Context, ClusterClient) (S, error),
+) (S, error) {
 	withTimeout, cancelFn := context.WithTimeout(ctx, cr.dispatchOverallTimeout)
 	defer cancelFn()
 
@@ -248,9 +277,11 @@ func dispatchSyncRequest[Q requestMessage, S responseMessage](ctx context.Contex
 	go func() {
 		// Have the main dispatch wait some time before returning, to allow the secondaries to
 		// potentially return first.
-		computedWait := cr.secondaryInitialResponseDigests[reqKey].getWaitTime()
+		computedWait := cr.getPrimaryWaitTime(reqKey, resourceTypeAndRelation, subjectTypeAndRelation)
 		log.Trace().Stringer("computed-wait", computedWait).Msg("primary dispatch started; sleeping for computed wait time")
-		time.Sleep(computedWait)
+		if computedWait > 0 {
+			time.Sleep(computedWait)
+		}
 
 		log.Trace().Msg("running primary dispatch after wait")
 		select {
@@ -296,10 +327,12 @@ func dispatchSyncRequest[Q requestMessage, S responseMessage](ctx context.Contex
 						// For secondary dispatches, ignore any errors, as only the primary will be handled in
 						// that scenario.
 						log.Trace().Stringer("duration", handlerDuration).Str("secondary", secondary.Name).Err(err).Msg("got ignored secondary dispatch error")
+						cr.supportedResourceSubjectTracker.updateForError(err)
 						return
 					}
 
 					log.Trace().Stringer("duration", handlerDuration).Str("secondary", secondary.Name).Msg("secondary dispatch completed")
+					go cr.supportedResourceSubjectTracker.updateForSuccess(resourceTypeAndRelation, subjectTypeAndRelation)
 					cr.secondaryInitialResponseDigests[reqKey].addResultTime(handlerDuration)
 					secondaryResultChan <- secondaryRespTuple[S]{resp: resp, handlerName: secondary.Name}
 				}
@@ -397,7 +430,7 @@ type ctxAndCancel struct {
 // secondary dispatchers. Unlike the non-streaming version, this will first attempt to dispatch
 // from the allowed secondary dispatchers before falling back to the primary, rather than running
 // them in parallel.
-func dispatchStreamingRequest[Q requestMessage, R responseMessage](
+func dispatchStreamingRequest[Q streamingRequestMessage, R responseMessage](
 	ctx context.Context,
 	cr *clusterDispatcher,
 	reqKey string,
@@ -495,9 +528,14 @@ func dispatchStreamingRequest[Q requestMessage, R responseMessage](
 		isPrimary := name == primaryDispatcher
 		if isPrimary {
 			// Have the primary wait a bit to ensure the secondaries have a chance to return first.
-			computedWait := cr.secondaryInitialResponseDigests[reqKey].getWaitTime()
-			time.Sleep(computedWait)
-			hedgeWaitHistogram.WithLabelValues(reqKey).Observe(computedWait.Seconds())
+			computedWait := cr.getPrimaryWaitTime(reqKey,
+				tuple.FromCoreRelationReference(req.GetResourceRelation()),
+				tuple.FromCoreRelationReference(req.GetSubjectRelation()),
+			)
+			if computedWait > 0 {
+				time.Sleep(computedWait)
+				hedgeWaitHistogram.WithLabelValues(reqKey).Observe(computedWait.Seconds())
+			}
 		} else {
 			startTime = time.Now()
 		}
@@ -521,6 +559,10 @@ func dispatchStreamingRequest[Q requestMessage, R responseMessage](
 			primaryDispatch.WithLabelValues("false", reqKey).Inc()
 		}
 		if err != nil {
+			if !isPrimary {
+				cr.supportedResourceSubjectTracker.updateForError(err)
+			}
+
 			log.Warn().Err(err).Str("dispatcher", name).Msg("error when trying to run secondary dispatcher")
 			errorsLock.Lock()
 			errorsByDispatcherName[name] = err
@@ -549,6 +591,10 @@ func dispatchStreamingRequest[Q requestMessage, R responseMessage](
 						finishTime := time.Now()
 						duration := finishTime.Sub(startTime)
 						cr.secondaryInitialResponseDigests[reqKey].addResultTime(duration)
+						go cr.supportedResourceSubjectTracker.updateForSuccess(
+							tuple.FromCoreRelationReference(req.GetResourceRelation()),
+							tuple.FromCoreRelationReference(req.GetSubjectRelation()),
+						)
 					}
 
 					// If a valid result, and we have not yet returned any results, try take a "lock" on
@@ -576,6 +622,10 @@ func dispatchStreamingRequest[Q requestMessage, R responseMessage](
 				}
 
 				if err != nil {
+					if !isPrimary {
+						cr.supportedResourceSubjectTracker.updateForError(err)
+					}
+
 					errorsLock.Lock()
 					errorsByDispatcherName[name] = err
 					errorsLock.Unlock()
@@ -728,6 +778,108 @@ func (cr *clusterDispatcher) ReadyState() dispatch.ReadyState {
 	}
 }
 
+// getPrimaryWaitTime returns the wait time for the primary dispatch, based on the request key and the
+// resource and subject types and relations. If the resource or subject is unsupported, the wait time
+// will be zero.
+func (cr *clusterDispatcher) getPrimaryWaitTime(
+	reqKey string,
+	resourceTypeAndRelation tuple.RelationReference,
+	subjectTypeAndRelation tuple.RelationReference,
+) time.Duration {
+	// If the resource or subject is unsupported, return zero time.
+	if cr.supportedResourceSubjectTracker.isUnsupported(
+		resourceTypeAndRelation,
+		subjectTypeAndRelation,
+	) {
+		return 0
+	}
+
+	return cr.secondaryInitialResponseDigests[reqKey].getWaitTime()
+}
+
+// supportedResourceSubjectTracker is a struct that tracks the resources and subjects that are
+// unsupported by the secondary dispatcher(s). If a resource or subject is unsupported, the primary
+// dispatcher will not wait for the secondary dispatchers to return results, as it is extremely
+// likely that hedging will not be beneficial.
+type supportedResourceSubjectTracker struct {
+	unsupportedResources sync.Map
+	unsupportedSubjects  sync.Map
+}
+
+func newSupportedResourceSubjectTracker() *supportedResourceSubjectTracker {
+	return &supportedResourceSubjectTracker{
+		unsupportedResources: sync.Map{},
+		unsupportedSubjects:  sync.Map{},
+	}
+}
+
+// isUnsupported returns whether the resource or subject is unsupported by the secondary dispatcher(s).
+func (srst *supportedResourceSubjectTracker) isUnsupported(
+	resourceTypeAndRelation tuple.RelationReference,
+	subjectTypeAndRelation tuple.RelationReference,
+) bool {
+	isUnsupportedResource, found := srst.unsupportedResources.Load(resourceTypeAndRelation)
+	if found && isUnsupportedResource.(bool) {
+		return true
+	}
+
+	isUnsupportedSubject, found := srst.unsupportedSubjects.Load(subjectTypeAndRelation)
+	return found && isUnsupportedSubject.(bool)
+}
+
+// updateForSuccess updates the tracker for a successful dispatch, removing the resource and subject
+// from the unsupported set.
+func (srst *supportedResourceSubjectTracker) updateForSuccess(
+	resourceTypeAndRelation tuple.RelationReference,
+	subjectTypeAndRelation tuple.RelationReference,
+) {
+	srst.unsupportedResources.CompareAndSwap(resourceTypeAndRelation, true, false)
+	srst.unsupportedSubjects.CompareAndSwap(subjectTypeAndRelation, true, false)
+}
+
+// updateForError updates the tracker for an error, adding the resource or subject to the unsupported set
+// if the error indicates that the resource or subject is unsupported.
+func (srst *supportedResourceSubjectTracker) updateForError(err error) {
+	// Check for a FAILED_PRECONDITION with error details that indicate an unsupported
+	// resource or subject.
+	st, ok := status.FromError(err)
+	if !ok {
+		return
+	}
+
+	if st.Code() != codes.FailedPrecondition {
+		return
+	}
+
+	for _, detail := range st.Details() {
+		errDetail, ok := detail.(*errdetails.ErrorInfo)
+		if !ok {
+			continue
+		}
+
+		// If the error is an unsupported resource or subject, add it to the tracker.
+		if errDetail.Reason == "UNSUPPORTED_RESOURCE_RELATION" {
+			definitionName := errDetail.Metadata["definition_name"]
+			relationName := errDetail.Metadata["relation_name"]
+			rr := tuple.RR(definitionName, relationName)
+			existing, loaded := srst.unsupportedResources.LoadOrStore(rr, true)
+			if !loaded || !existing.(bool) {
+				srst.unsupportedResources.Store(rr, true)
+			}
+		}
+
+		if errDetail.Reason == "UNSUPPORTED_SUBJECT_RELATION" {
+			definitionName := errDetail.Metadata["definition_name"]
+			relationName := errDetail.Metadata["relation_name"]
+			rr := tuple.RR(definitionName, relationName)
+			existing, loaded := srst.unsupportedSubjects.LoadOrStore(rr, true)
+			if !loaded || !existing.(bool) {
+				srst.unsupportedSubjects.Store(rr, true)
+			}
+		}
+	}
+}
+
 // Always verify that we implement the interface
 var _ dispatch.Dispatcher = &clusterDispatcher{}