Skip to content

Commit

Permalink
fix(consumer): add recovery from no leader partitions
Browse files Browse the repository at this point in the history
When some topic partitions have no leader due to Kafka broker failures,
the Sarama consumer group should be able to continue consuming
partitions that do have leaders and resume consuming the partitions that
previously had no leader once they return to normal.

Signed-off-by: liutao366 <[email protected]>
  • Loading branch information
liutao365 committed Feb 17, 2025
1 parent 9ae475a commit 5dc4e24
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
11 changes: 11 additions & 0 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ type Client interface {
// LeastLoadedBroker retrieves broker that has the least responses pending
LeastLoadedBroker() *Broker

// check if partition is readable
PartitionNotReadable(topic string, partition int32) bool

// Close shuts down all broker connections managed by this client. It is required
// to call this function before a client object passes out of scope, as it will
// otherwise leak memory. You must close any Producers or Consumers using a client
Expand Down Expand Up @@ -1283,3 +1286,11 @@ type nopCloserClient struct {
func (ncc *nopCloserClient) Close() error {
return nil
}

func (client *client) PartitionNotReadable(topic string, partition int32) bool {
pm := client.metadata[topic][partition]
if pm == nil {
return true
}
return pm.Leader == -1
}
22 changes: 22 additions & 0 deletions consumer_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,28 @@ func newConsumerGroupSession(ctx context.Context, parent *consumerGroup, claims
// start consuming
for topic, partitions := range claims {
for _, partition := range partitions {
if parent.client.PartitionNotReadable(topic, partition) {
// partition not readable, wait for it to become readable
go func(topic string, partition int32) {
timer := time.NewTimer(5 * time.Second)
for parent.client.PartitionNotReadable(topic, partition) {
select {
case <-ctx.Done():
return
case <-parent.closed:
return
case <-timer.C:
timer.Reset(5 * time.Second)
}
}
timer.Stop()
sess.waitGroup.Add(1)
defer sess.waitGroup.Done()
defer sess.cancel()
sess.consume(topic, partition)
}(topic, partition)
continue
}
sess.waitGroup.Add(1)

go func(topic string, partition int32) {
Expand Down

0 comments on commit 5dc4e24

Please sign in to comment.