From 373ecb8537e5eeaa4420a48b01b24a0b52b23979 Mon Sep 17 00:00:00 2001 From: Apoorv Mittal Date: Wed, 13 Nov 2024 13:32:06 +0000 Subject: [PATCH 1/2] KAFKA-17783: Adding listeners to remove share partition on partition changes --- .../server/share/SharePartitionManager.java | 83 +++++++++++++++++-- .../main/scala/kafka/cluster/Partition.scala | 16 ++++ 2 files changed, 94 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/kafka/server/share/SharePartitionManager.java b/core/src/main/java/kafka/server/share/SharePartitionManager.java index 4288dd55703d..773bfeb210bb 100644 --- a/core/src/main/java/kafka/server/share/SharePartitionManager.java +++ b/core/src/main/java/kafka/server/share/SharePartitionManager.java @@ -16,6 +16,7 @@ */ package kafka.server.share; +import kafka.cluster.PartitionListener; import kafka.server.ReplicaManager; import org.apache.kafka.clients.consumer.AcknowledgeType; @@ -639,6 +640,12 @@ private SharePartition getOrCreateSharePartition(SharePartitionKey sharePartitio k -> { long start = time.hiResClockMs(); int leaderEpoch = ShareFetchUtils.leaderEpoch(replicaManager, sharePartitionKey.topicIdPartition().topicPartition()); + // Attach listener to Partition which shall invoke partition changes handlers. + // However, as there could be multiple share partitions (per group name) for a single topic-partition, + // hence create separate listener per share group which holds the share partition key + // to identify the share partition. + replicaManager.maybeAddListener(sharePartitionKey.topicIdPartition().topicPartition(), + new SharePartitionListener(sharePartitionKey)); SharePartition partition = new SharePartition( sharePartitionKey.groupId(), sharePartitionKey.topicIdPartition(), @@ -670,7 +677,7 @@ private void maybeCompleteInitializationWithException( } // Remove the partition from the cache as it's failed to initialize. - partitionCacheMap.remove(sharePartitionKey); + removeSharePartitionFromCache(sharePartitionKey); // The partition initialization failed, so complete the request with the exception. // The server should not be in this state, so log the error on broker and surface the same // to the client. The broker should not be in this state, investigate the root cause of the error. @@ -688,10 +695,7 @@ private void handleFencedSharePartitionException( // The share partition is fenced hence remove the partition from map and let the client retry. // But surface the error to the client so client might take some action i.e. re-fetch // the metadata and retry the fetch on new leader. - SharePartition sharePartition = partitionCacheMap.remove(sharePartitionKey); - if (sharePartition != null) { - sharePartition.markFenced(); - } + removeSharePartitionFromCache(sharePartitionKey); } } @@ -716,6 +720,75 @@ private SharePartitionKey sharePartitionKey(String groupId, TopicIdPartition top return new SharePartitionKey(groupId, topicIdPartition); } + private void removeSharePartitionFromCache(SharePartitionKey sharePartitionKey) { + SharePartition sharePartition = partitionCacheMap.remove(sharePartitionKey); + if (sharePartition != null) { + sharePartition.markFenced(); + } + } + + /** + * The SharePartitionListener is used to listen for partition events. The share partition is associated with + * the topic-partition, we need to handle the partition events for the share partition. + *

+ * The partition cache map stores share partitions against share partition key which comprises + * group and topic-partition. Instead of maintaining a separate map for topic-partition to share partitions, + * we can maintain the share partition key in the listener and create a new listener for each share partition. + */ + private class SharePartitionListener implements PartitionListener { + + private final SharePartitionKey sharePartitionKey; + + private SharePartitionListener(SharePartitionKey sharePartitionKey) { + this.sharePartitionKey = sharePartitionKey; + } + + /** + * The onFailed method is called when a Partition is marked offline. + * + * @param topicPartition The topic-partition that has been marked offline. + */ + @Override + public void onFailed(TopicPartition topicPartition) { + log.info("The share partition failed listener is invoked for the topic-partition: {}, share-partition: {}", + topicPartition, sharePartitionKey); + onUpdate(topicPartition); + } + + /** + * The onDeleted method is called when a Partition is deleted. + * + * @param topicPartition The topic-partition that has been deleted. + */ + @Override + public void onDeleted(TopicPartition topicPartition) { + log.info("The share partition delete listener is invoked for the topic-partition: {}, share-partition: {}", + topicPartition, sharePartitionKey); + onUpdate(topicPartition); + } + + /** + * The onFollower method is called when a Partition is marked follower. + * + * @param topicPartition The topic-partition that has been marked as follower. + */ + @Override + public void onFollower(TopicPartition topicPartition) { + log.info("The share partition leader change listener is invoked for the topic-partition: {}, share-partition: {}", + topicPartition, sharePartitionKey); + onUpdate(topicPartition); + } + + private void onUpdate(TopicPartition topicPartition) { + if (!sharePartitionKey.topicIdPartition().topicPartition().equals(topicPartition)) { + log.error("The share partition listener is invoked for the wrong topic-partition: {}, share-partition: {}", + topicPartition, sharePartitionKey); + return; + } + removeSharePartitionFromCache(sharePartitionKey); + } + } + static class ShareGroupMetrics { /** * share-acknowledgement (share-acknowledgement-rate and share-acknowledgement-count) - The total number of offsets acknowledged for share groups (requests to be ack). diff --git a/core/src/main/scala/kafka/cluster/Partition.scala b/core/src/main/scala/kafka/cluster/Partition.scala index e432ead8edb2..1892cc9f61df 100755 --- a/core/src/main/scala/kafka/cluster/Partition.scala +++ b/core/src/main/scala/kafka/cluster/Partition.scala @@ -82,6 +82,11 @@ trait PartitionListener { * that the partition was deleted but only that this broker does not host a replica of it any more. */ def onDeleted(partition: TopicPartition): Unit = {} + + /** + * Called when the Partition on this broker is marked as follower. + */ + def onFollower(partition: TopicPartition): Unit = {} } trait AlterPartitionListener { @@ -701,6 +706,15 @@ class Partition(val topicPartition: TopicPartition, } } + /** + * Invoke the partition listeners when the partition has been marked as follower. + */ + def invokeFollowerListeners(): Unit = { + listeners.forEach { listener => + listener.onFollower(topicPartition) + } + } + private def clear(): Unit = { remoteReplicasMap.clear() assignmentState = SimpleAssignmentState(Seq.empty) @@ -891,6 +905,8 @@ class Partition(val topicPartition: TopicPartition, s"and partition state $partitionState since it is already a follower with leader epoch $leaderEpoch.") } + // Invoke the follower transition listeners for the partition. + invokeFollowerListeners() // We must restart the fetchers when the leader epoch changed regardless of // whether the leader changed as well. isNewLeaderEpoch From 9e0efbb445cef7fa4cebd02980cd94f5d473cc65 Mon Sep 17 00:00:00 2001 From: Apoorv Mittal Date: Wed, 13 Nov 2024 17:03:33 +0000 Subject: [PATCH 2/2] Correcting comments --- .../main/java/kafka/server/share/SharePartitionManager.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/kafka/server/share/SharePartitionManager.java b/core/src/main/java/kafka/server/share/SharePartitionManager.java index 773bfeb210bb..95c93df26c21 100644 --- a/core/src/main/java/kafka/server/share/SharePartitionManager.java +++ b/core/src/main/java/kafka/server/share/SharePartitionManager.java @@ -640,10 +640,10 @@ private SharePartition getOrCreateSharePartition(SharePartitionKey sharePartitio k -> { long start = time.hiResClockMs(); int leaderEpoch = ShareFetchUtils.leaderEpoch(replicaManager, sharePartitionKey.topicIdPartition().topicPartition()); - // Attach listener to Partition which shall invoke partition changes handlers. + // Attach listener to Partition which shall invoke partition change handlers. // However, as there could be multiple share partitions (per group name) for a single topic-partition, - // hence create separate listener per share group which holds the share partition key - // to identify the share partition. + // hence create separate listeners per share partition which holds the share partition key + // to identify the respective share partition. replicaManager.maybeAddListener(sharePartitionKey.topicIdPartition().topicPartition(), new SharePartitionListener(sharePartitionKey)); SharePartition partition = new SharePartition(