TransformerLensOrg · jlarson4 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/transformer_lens/model_bridge/bridge.py b/transformer_lens/model_bridge/bridge.py
@@ -1288,6 +1288,12 @@ def forward(
                 else:
                     kwargs["decoder_input_ids"] = input_ids
 
+            # Ensure pos_embed hook captures full batch dimension.
+            # HF models may generate position_ids with batch=1 as an optimization;
+            # PosEmbedBridge uses this to expand its output to match.
+            if hasattr(self, "pos_embed"):
+                self.pos_embed._current_batch_size = input_ids.shape[0]
+
             original_tl_cache = past_kv_cache
             output = self.original_model(input_ids, **kwargs)
             if (

diff --git a/transformer_lens/model_bridge/generalized_components/pos_embed.py b/transformer_lens/model_bridge/generalized_components/pos_embed.py
@@ -69,5 +69,23 @@ def forward(self, *args: Any, **kwargs: Any) -> torch.Tensor:
             first_arg = self.hook_in(args[0])
             args = (first_arg,) + args[1:]
         output = self.original_component(*args, **kwargs)
+
+        # HF models may return pos embeddings with batch=1 as an optimization.
+        # Expand to match the actual batch size so hooks capture the correct shape.
+        batch_size = getattr(self, "_current_batch_size", None)
+
+        # Read-and-clear: only expand for the forward pass that set the batch size
+        # (prevents stale values from affecting HF generate() steps).
+        if batch_size is not None:
+            self._current_batch_size = None
+        if (
+            batch_size is not None
+            and batch_size > 1
+            and isinstance(output, torch.Tensor)
+            and output.ndim >= 1
+            and output.shape[0] == 1
+        ):
+            output = output.expand(batch_size, *[-1] * (output.ndim - 1))
+
         output = self.hook_out(output)
         return output