diff --git a/bertopic/_bertopic.py b/bertopic/_bertopic.py
index 66a9dcf5..276129ba 100644
--- a/bertopic/_bertopic.py
+++ b/bertopic/_bertopic.py
@@ -505,6 +505,9 @@ def fit_transform(
documents = assigned_documents
embeddings = assigned_embeddings
+ # Update topic_sizes_ when all documents are assigned to zero-shot topics
+ self._update_topic_size(documents)
+
# Sort and Map Topic IDs by their frequency
if not self.nr_topics:
documents = self._sort_mappings_by_frequency(documents)
diff --git a/docs/changelog.md b/docs/changelog.md
index f2ff9850..eaf0cd5c 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -5,6 +5,12 @@ hide:
# Changelog
+## **Unreleased**
+
+
Fixes:
+
+* Fix `topic_sizes_` not being updated in zero-shot topic modeling when using `nr_topics` parameter ([#2384](https://github.com/MaartenGr/BERTopic/issues/2384))
+
## **Version 0.17.3**
*Release date: 8 July, 2025*
diff --git a/tests/test_variations/test_zeroshot.py b/tests/test_variations/test_zeroshot.py
new file mode 100644
index 00000000..05356191
--- /dev/null
+++ b/tests/test_variations/test_zeroshot.py
@@ -0,0 +1,185 @@
+"""
+Tests for zero-shot topic modeling functionality.
+
+This module tests various aspects of zero-shot topic modeling, including
+edge cases with the nr_topics parameter and topic_sizes_ consistency.
+"""
+
+from bertopic import BERTopic
+from umap import UMAP
+
+
+def test_zeroshot_with_nr_topics():
+ """Test zero-shot topic modeling with nr_topics parameter."""
+ docs = [
+ "This is about machine learning and artificial intelligence",
+ "Deep learning neural networks are powerful",
+ "Python programming for data science",
+ "Machine learning algorithms and models",
+ "Artificial intelligence and deep learning",
+ "Data science with Python programming",
+ "Neural networks and machine learning",
+ "Programming in Python for AI",
+ "Deep learning models and algorithms",
+ "Artificial intelligence programming",
+ ]
+
+ zeroshot_topics = ["Technology and Programming"]
+
+ topic_model = BERTopic(
+ zeroshot_topic_list=zeroshot_topics, zeroshot_min_similarity=0.1, nr_topics=2, min_topic_size=2
+ )
+
+ topics, probs = topic_model.fit_transform(docs)
+
+ # Verify topic_sizes_ is properly populated
+ assert topic_model.topic_sizes_ is not None
+ assert len(topic_model.topic_sizes_) > 0
+
+ # Verify total document count matches
+ total_in_sizes = sum(topic_model.topic_sizes_.values())
+ assert total_in_sizes == len(docs)
+
+ # Verify all topics are accounted for
+ for topic in set(topics):
+ assert topic in topic_model.topic_sizes_
+
+
+def test_zeroshot_all_documents_assigned():
+ """Test edge case where all documents are assigned to zero-shot topics."""
+ docs = [
+ "Technology is advancing rapidly",
+ "Software development is important",
+ "Programming languages are evolving",
+ "Computer science research continues",
+ "Digital transformation is happening",
+ "Innovation in technology sector",
+ "Software engineering best practices",
+ "Modern programming techniques",
+ "Computer systems and architecture",
+ "Digital solutions and platforms",
+ "Technology trends and developments",
+ "Software design patterns",
+ "Programming paradigms evolution",
+ "Computing infrastructure advances",
+ "Digital innovation strategies",
+ ]
+
+ zeroshot_topics = ["Technology"]
+ umap_model = UMAP(n_neighbors=5, n_components=2, min_dist=0.0, metric="cosine", random_state=42)
+
+ topic_model = BERTopic(
+ zeroshot_topic_list=zeroshot_topics,
+ zeroshot_min_similarity=0.05,
+ nr_topics=2,
+ min_topic_size=1,
+ umap_model=umap_model,
+ )
+
+ topics, probs = topic_model.fit_transform(docs)
+
+ # Verify all documents are accounted for
+ total_in_sizes = sum(topic_model.topic_sizes_.values())
+ assert total_in_sizes == len(docs)
+ assert topic_model.topic_sizes_ is not None
+
+
+def test_zeroshot_topic_info_consistency():
+ """Test consistency between topic_sizes_ and get_topic_info()."""
+ docs = [
+ "AI and machine learning research",
+ "Deep learning neural networks",
+ "Neural network architectures",
+ "Machine learning algorithms",
+ "Artificial intelligence systems",
+ "Deep learning models training",
+ "Neural network optimization",
+ "Machine learning applications",
+ "AI research and development",
+ "Deep learning frameworks",
+ ]
+ zeroshot_topics = ["Artificial Intelligence"]
+ umap_model = UMAP(n_neighbors=5, n_components=2, min_dist=0.0, metric="cosine", random_state=42)
+
+ topic_model = BERTopic(
+ zeroshot_topic_list=zeroshot_topics,
+ zeroshot_min_similarity=0.1,
+ nr_topics=2,
+ min_topic_size=1,
+ umap_model=umap_model,
+ )
+
+ topics, probs = topic_model.fit_transform(docs)
+
+ # Verify topic info consistency
+ topic_info = topic_model.get_topic_info()
+ assert not topic_info.empty
+ assert topic_info.shape[0] > 0
+
+ # Verify topic_sizes_ and topic_info are consistent
+ topic_info_counts = dict(zip(topic_info.Topic, topic_info.Count))
+ for topic_id, count in topic_model.topic_sizes_.items():
+ assert topic_id in topic_info_counts
+ assert topic_info_counts[topic_id] == count
+
+
+def test_github_issue_2384_reproduction():
+ """Test exact reproduction case from GitHub issue #2384."""
+ # Exact reproduction case from GitHub issue #2384
+ docs = ["I need help with my voucher", "Gift card not working", "Customer service was poor"] * 50
+ zeroshot_topics = ["Voucher inquiries", "Gift card issues", "Customer service feedback"]
+
+ model = BERTopic(
+ zeroshot_topic_list=zeroshot_topics,
+ zeroshot_min_similarity=-1,
+ nr_topics=4,
+ )
+
+ topics, _ = model.fit_transform(docs)
+
+ # Verify the fix
+ assert model.topic_sizes_ is not None
+ assert len(model.topic_sizes_) > 0
+
+ # Verify get_topic_info() works
+ topic_info = model.get_topic_info()
+ assert not topic_info.empty
+ assert topic_info.shape[0] > 0
+
+ # Verify total document count matches
+ total_docs_in_sizes = sum(model.topic_sizes_.values())
+ assert total_docs_in_sizes == len(docs)
+
+ # Verify topic_representations_ still works (no regression)
+ assert model.topic_representations_ is not None
+ assert len(model.topic_representations_) > 0
+
+
+def test_zeroshot_nr_topics_consistency():
+ """Test consistency between using nr_topics and not using it."""
+ docs = ["I need help with my voucher", "Gift card not working", "Customer service was poor"] * 20
+ zeroshot_topics = ["Voucher inquiries", "Gift card issues", "Customer service feedback"]
+
+ # Test without nr_topics
+ model_without = BERTopic(zeroshot_topic_list=zeroshot_topics, zeroshot_min_similarity=-1)
+ topics_without, _ = model_without.fit_transform(docs)
+
+ # Test with nr_topics
+ model_with = BERTopic(zeroshot_topic_list=zeroshot_topics, zeroshot_min_similarity=-1, nr_topics=4)
+ topics_with, _ = model_with.fit_transform(docs)
+
+ # Both should have properly populated topic_sizes_
+ assert model_without.topic_sizes_ is not None
+ assert model_with.topic_sizes_ is not None
+
+ # Both should have same total document count
+ total_without = sum(model_without.topic_sizes_.values())
+ total_with = sum(model_with.topic_sizes_.values())
+ assert total_without == len(docs)
+ assert total_with == len(docs)
+
+ # Both should have working get_topic_info()
+ info_without = model_without.get_topic_info()
+ info_with = model_with.get_topic_info()
+ assert not info_without.empty
+ assert not info_with.empty