diff --git a/examples/models/lfm2/short_conv.py b/examples/models/lfm2/short_conv.py
index ae04580d6c6..08c00a1f414 100644
--- a/examples/models/lfm2/short_conv.py
+++ b/examples/models/lfm2/short_conv.py
@@ -74,7 +74,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         with torch.no_grad():
             self.conv_state.copy_(new_conv_state)
 
-        conv_out = self.conv(Bx)[..., : x.size(-1)]  # (batch_size, dim, seq_len)
+        # Manual depthwise conv: Triton has no template for nn.Conv1d with
+        # groups=dim and dynamic seq_len.  kernel_size is always 3.
+        w = self.conv.weight[:, 0, :]  # (dim, 3)
+        conv_out = (
+            Bx[..., :-2] * w[:, 0:1]
+            + Bx[..., 1:-1] * w[:, 1:2]
+            + Bx[..., 2:] * w[:, 2:3]
+        )  # (batch_size, dim, seq_len)
         y = C * conv_out  # (batch_size, dim, seq_len)
 
         y = y.transpose(-1, -2)  # (batch_size, seq_len, dim)
diff --git a/examples/models/lfm2_5_vl/__init__.py b/examples/models/lfm2_5_vl/__init__.py
new file mode 100644
index 00000000000..f1fe2afba26
--- /dev/null
+++ b/examples/models/lfm2_5_vl/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.examples.models.lfm2_5_vl.convert_weights import convert_weights
+from executorch.examples.models.lfm2_5_vl.model import Lfm2p5VlModel
+
+__all__ = [
+    "convert_weights",
+    "Lfm2p5VlModel",
+]
diff --git a/examples/models/lfm2_5_vl/config/lfm2_5_vl_1_6b_config.json b/examples/models/lfm2_5_vl/config/lfm2_5_vl_1_6b_config.json
new file mode 100644
index 00000000000..396f7bb7a8a
--- /dev/null
+++ b/examples/models/lfm2_5_vl/config/lfm2_5_vl_1_6b_config.json
@@ -0,0 +1,33 @@
+{
+  "dim": 2048,
+  "ffn_dim_multiplier": 1,
+  "hidden_dim": 8192,
+  "n_heads": 32,
+  "n_kv_heads": 8,
+  "n_layers": 16,
+  "norm_eps": 1e-5,
+  "rope_theta": 1000000.0,
+  "use_scaled_rope": false,
+  "vocab_size": 65536,
+  "use_hf_rope": true,
+  "use_qk_norm": true,
+  "qk_norm_before_rope": true,
+  "layer_types": [
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv"
+  ]
+}
diff --git a/examples/models/lfm2_5_vl/config/lfm2_5_vl_450m_config.json b/examples/models/lfm2_5_vl/config/lfm2_5_vl_450m_config.json
new file mode 100644
index 00000000000..975ccbccca7
--- /dev/null
+++ b/examples/models/lfm2_5_vl/config/lfm2_5_vl_450m_config.json
@@ -0,0 +1,33 @@
+{
+  "dim": 1024,
+  "ffn_dim_multiplier": 1,
+  "hidden_dim": 4608,
+  "n_heads": 16,
+  "n_kv_heads": 8,
+  "n_layers": 16,
+  "norm_eps": 1e-5,
+  "rope_theta": 1000000.0,
+  "use_scaled_rope": false,
+  "vocab_size": 65536,
+  "use_hf_rope": true,
+  "use_qk_norm": true,
+  "qk_norm_before_rope": true,
+  "layer_types": [
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv"
+  ]
+}
diff --git a/examples/models/lfm2_5_vl/convert_weights.py b/examples/models/lfm2_5_vl/convert_weights.py
new file mode 100644
index 00000000000..82ccba110ee
--- /dev/null
+++ b/examples/models/lfm2_5_vl/convert_weights.py
@@ -0,0 +1,81 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Convert LFM2.5-VL text decoder weights from HuggingFace to ET format."""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import torch
+from executorch.examples.models.checkpoint import get_mapped_key
+from safetensors.torch import load_file
+
+_LFM2_5_VL_TO_META: dict[str, str] = {
+    "model.language_model.embed_tokens.weight": "tok_embeddings.weight",
+    "model.language_model.embedding_norm.weight": "norm.weight",
+    "model.language_model.layers.{}.self_attn.q_proj.weight": "layers.{}.attention.wq.weight",
+    "model.language_model.layers.{}.self_attn.k_proj.weight": "layers.{}.attention.wk.weight",
+    "model.language_model.layers.{}.self_attn.v_proj.weight": "layers.{}.attention.wv.weight",
+    "model.language_model.layers.{}.self_attn.out_proj.weight": "layers.{}.attention.wo.weight",
+    "model.language_model.layers.{}.self_attn.q_layernorm.weight": "layers.{}.attention.q_norm_fn.weight",
+    "model.language_model.layers.{}.self_attn.k_layernorm.weight": "layers.{}.attention.k_norm_fn.weight",
+    "model.language_model.layers.{}.operator_norm.weight": "layers.{}.attention_norm.weight",
+    "model.language_model.layers.{}.ffn_norm.weight": "layers.{}.ffn_norm.weight",
+    "model.language_model.layers.{}.feed_forward.w1.weight": "layers.{}.feed_forward.w1.weight",
+    "model.language_model.layers.{}.feed_forward.w2.weight": "layers.{}.feed_forward.w2.weight",
+    "model.language_model.layers.{}.feed_forward.w3.weight": "layers.{}.feed_forward.w3.weight",
+    "model.language_model.layers.{}.conv.conv.weight": "layers.{}.conv.conv.weight",
+    "model.language_model.layers.{}.conv.out_proj.weight": "layers.{}.conv.out_proj.weight",
+    "model.language_model.lm_head.weight": "output.weight",
+}
+
+_IN_PROJ_SPLITS = ("B_proj", "C_proj", "x_proj")
+
+
+def lfm2_5_vl_to_meta(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+    """Extract and remap language model weights from a full VL state dict."""
+    converted: dict[str, torch.Tensor] = {}
+
+    for key, value in state_dict.items():
+        if not key.startswith("model.language_model."):
+            continue
+
+        try:
+            new_key = get_mapped_key(key, _LFM2_5_VL_TO_META)
+        except Exception:
+            new_key = key.removeprefix("model.language_model.")
+
+        if new_key.endswith(".conv.in_proj.weight"):
+            for name, chunk in zip(_IN_PROJ_SPLITS, torch.chunk(value, 3, dim=0)):
+                converted[new_key.replace("in_proj", name)] = chunk
+        else:
+            converted[new_key] = value
+
+    if "output.weight" not in converted:
+        converted["output.weight"] = converted["tok_embeddings.weight"]
+
+    return converted
+
+
+def convert_weights(input_dir: str, output_file: str) -> None:
+    sd = load_file(str(Path(input_dir) / "model.safetensors"))
+    sd = lfm2_5_vl_to_meta(sd)
+    torch.save(sd, output_file)
+    print(f"Saved {len(sd)} tensors to {output_file}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Convert LFM2.5-VL weights to ET format.")
+    parser.add_argument("input_dir", help="Directory containing model.safetensors.")
+    parser.add_argument("output", help="Output .pt checkpoint path.")
+    args = parser.parse_args()
+    convert_weights(args.input_dir, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/models/lfm2_5_vl/export_lfm2_5_vl.py b/examples/models/lfm2_5_vl/export_lfm2_5_vl.py
new file mode 100644
index 00000000000..b00bbf732ed
--- /dev/null
+++ b/examples/models/lfm2_5_vl/export_lfm2_5_vl.py
@@ -0,0 +1,243 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Export LFM2.5-VL as a multi-method PTE for ExecuTorch with CUDA/AOTI backend.
+
+All three methods are delegated to the CUDA backend.  Conv layer state is
+threaded through attn_options as explicit IO; KV cache uses mark_static_address
+so AOTI can trace through in-place mutations.
+
+Methods (D = text hidden dim):
+  vision_encoder  : [1, 3, 512, 512] f32 -> [1, 256, D] f32
+  token_embedding : [1, seq_len] i64     -> [1, seq_len, D] f32
+  text_decoder    : ([1, seq_len, D], [seq_len] i64) -> [1, vocab] f32
+
+Usage:
+    python examples/models/lfm2_5_vl/export_lfm2_5_vl.py \\
+        --model_dir LiquidAI/LFM2.5-VL-450M --dtype bf16
+"""
+
+from __future__ import annotations
+
+import logging
+from argparse import ArgumentParser
+from pathlib import Path
+from typing import Optional
+
+import torch
+from torch.export import Dim, ExportedProgram
+from torch.nn.attention import SDPBackend
+
+from executorch.backends.cuda.cuda_backend import CudaBackend
+from executorch.backends.cuda.cuda_partitioner import CudaPartitioner
+from executorch.exir import (
+    EdgeCompileConfig,
+    ExecutorchBackendConfig,
+    to_edge_transform_and_lower,
+)
+from executorch.exir.passes import MemoryPlanningPass
+from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
+
+from executorch.examples.models.lfm2_5_vl.model import (
+    IMAGE_SIZE,
+    MAX_SEQ_LEN,
+    Lfm2p5VlModel,
+)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s",
+)
+
+# ---------------------------------------------------------------------------
+# Blackwell (sm_103) workaround: torch._inductor maps arch 103 -> "100f" but
+# Triton generates PTX targeting sm_103a.  Patch to match.
+# TODO: Remove once PyTorch bump includes the upstream fix in
+# torch/_inductor/codegen/cuda/compile_utils.py
+# ---------------------------------------------------------------------------
+try:
+    from torch._inductor.codecache import cuda_compile_utils
+
+    _orig_nvcc_arch = cuda_compile_utils._nvcc_arch_as_compile_option
+
+    def _patched_nvcc_arch() -> str:
+        arch = cuda_compile_utils.cuda_env.get_cuda_arch()
+        return "103a" if arch == "103" else _orig_nvcc_arch()
+
+    cuda_compile_utils._nvcc_arch_as_compile_option = _patched_nvcc_arch
+except (ImportError, AttributeError):
+    pass
+
+_CONFIG_DIR = Path(__file__).parent / "config"
+
+_DTYPE_MAP: dict[str, torch.dtype] = {
+    "fp32": torch.float32,
+    "fp16": torch.float16,
+    "bf16": torch.bfloat16,
+}
+
+
+def _resolve_params_path(model_dir: str, params: str | None) -> str | None:
+    if params is not None:
+        return params
+    name = model_dir.lower()
+    if "450m" in name:
+        return str(_CONFIG_DIR / "lfm2_5_vl_450m_config.json")
+    if "1.6b" in name or "1_6b" in name:
+        return str(_CONFIG_DIR / "lfm2_5_vl_1_6b_config.json")
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Per-method export
+# ---------------------------------------------------------------------------
+
+
+def _export_image_encoder(lfm2: torch.nn.Module, *, device: str) -> ExportedProgram:
+    class _Encoder(torch.nn.Module):
+        def __init__(self, lfm2: torch.nn.Module) -> None:
+            super().__init__()
+            self.lfm2 = lfm2
+
+        def forward(self, images: torch.Tensor) -> torch.Tensor:
+            return self.lfm2.image_embedding(images)
+
+    example = torch.randint(0, 256, (1, 3, IMAGE_SIZE, IMAGE_SIZE), dtype=torch.float32, device=device)
+    with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
+        return torch.export.export(_Encoder(lfm2), (example,), strict=False)
+
+
+def _export_text_decoder(lfm2: torch.nn.Module, *, dtype: torch.dtype, device: str) -> ExportedProgram:
+    dim = lfm2.text_model_args.dim
+
+    class _Decoder(torch.nn.Module):
+        def __init__(self, text_model: torch.nn.Module) -> None:
+            super().__init__()
+            self.text_model = text_model
+
+        def forward(self, embeddings: torch.Tensor, input_pos: torch.Tensor) -> torch.Tensor:
+            out = self.text_model(None, {"input_pos": input_pos}, embeddings)
+            if isinstance(out, tuple):
+                out = out[0]
+            return out.contiguous()
+
+    seq = 8
+    token_dim = Dim("token_dim", min=1, max=MAX_SEQ_LEN - 1)
+    example_emb = torch.randn(1, seq, dim, dtype=dtype, device=device)
+    example_pos = torch.arange(seq, dtype=torch.int64, device=device)
+
+    with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
+        return torch.export._trace._export(
+            _Decoder(lfm2.text_model),
+            (example_emb, example_pos),
+            dynamic_shapes=({1: token_dim}, {0: token_dim}),
+            strict=False,
+            prefer_deferred_runtime_asserts_over_guards=True,
+        )
+
+
+def _export_token_embedding(lfm2: torch.nn.Module, *, device: str) -> ExportedProgram:
+    embed = lfm2.model_.model.language_model.get_input_embeddings()
+    token_dim = Dim("token_dim_1", min=1, max=MAX_SEQ_LEN)
+    example = torch.zeros(1, MAX_SEQ_LEN, dtype=torch.int64, device=device)
+    with torch.no_grad():
+        return torch.export.export(embed, (example,), dynamic_shapes=[{1: token_dim}], strict=False)
+
+
+# ---------------------------------------------------------------------------
+# Pipeline
+# ---------------------------------------------------------------------------
+
+
+def export_all(
+    model_dir: str,
+    output: str,
+    *,
+    dtype: torch.dtype = torch.bfloat16,
+    max_seq_len: int = MAX_SEQ_LEN,
+    params_path: str | None = None,
+) -> None:
+    logging.info("Loading %s...", model_dir)
+    lfm2_model = Lfm2p5VlModel(
+        model_dir=model_dir,
+        max_seq_len=max_seq_len,
+        max_context_len=max_seq_len,
+        params_path=params_path,
+        use_sdpa_with_kv_cache_op=False,
+    )
+    lfm2 = lfm2_model.get_eager_model().to(dtype=dtype, device="cuda")
+
+    # Mark KV cache and conv state buffers as static addresses so AOTI can
+    # trace through in-place mutations. Must be after .to("cuda") because
+    # marking a CPU buffer that later gets replaced is a no-op.
+    for module in lfm2.text_model.modules():
+        for name, buf in module.named_buffers(recurse=False):
+            if name in ("k_cache", "v_cache", "conv_state"):
+                torch._dynamo.mark_static_address(buf)
+
+    logging.info("[1/3] Vision encoder")
+    vision_ep = _export_image_encoder(lfm2, device="cuda")
+    logging.info("[2/3] Text decoder")
+    decoder_ep = _export_text_decoder(lfm2, dtype=dtype, device="cuda")
+    logging.info("[3/3] Token embedding")
+    token_ep = _export_token_embedding(lfm2, device="cuda")
+
+    programs = {"vision_encoder": vision_ep, "token_embedding": token_ep, "text_decoder": decoder_ep}
+    partitioners = {
+        k: [CudaPartitioner([CudaBackend.generate_method_name_compile_spec(k)])]
+        for k in programs
+    }
+    metadata = {
+        "get_max_seq_len": lfm2.text_model_args.max_seq_len,
+        "get_vocab_size": lfm2.text_model_args.vocab_size,
+        "use_kv_cache": lfm2.text_model_args.use_kv_cache,
+        "get_eos_ids": [7],
+    }
+
+    logging.info("Lowering to Edge IR + CUDA")
+    et_prog = to_edge_transform_and_lower(
+        programs,
+        partitioner=partitioners,
+        compile_config=EdgeCompileConfig(_check_ir_validity=False, _skip_dim_order=True),
+        constant_methods=metadata,
+    )
+
+    logging.info("Finalizing ExecuTorch program")
+    et_program = et_prog.to_executorch(
+        ExecutorchBackendConfig(
+            memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
+            sym_shape_eval_pass={k: ConstraintBasedSymShapeEvalPass() for k in programs},
+        )
+    )
+
+    output_path = Path(output)
+    output_dir = output_path.parent or Path(".")
+    logging.info("Saving %s", output_path)
+    with open(output_path, "wb") as f:
+        et_program.write_to_file(f)
+    et_program.write_tensor_data_to_file(str(output_dir))
+    logging.info("Done — methods: %s", et_program.methods)
+
+
+def main() -> None:
+    parser = ArgumentParser(description="Export LFM2.5-VL to ExecuTorch (CUDA)")
+    parser.add_argument("--model_dir", default="LiquidAI/LFM2.5-VL-450M")
+    parser.add_argument("--dtype", default="bf16", choices=list(_DTYPE_MAP))
+    parser.add_argument("--max_seq_len", type=int, default=MAX_SEQ_LEN)
+    parser.add_argument("--params", default=None)
+    parser.add_argument("--output", default=None)
+    args = parser.parse_args()
+
+    dtype = _DTYPE_MAP[args.dtype]
+    params_path = _resolve_params_path(args.model_dir, args.params)
+    output = args.output or f"lfm2_5_vl_{args.dtype}_cuda.pte"
+
+    export_all(args.model_dir, output, dtype=dtype, max_seq_len=args.max_seq_len, params_path=params_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/models/lfm2_5_vl/model.py b/examples/models/lfm2_5_vl/model.py
new file mode 100644
index 00000000000..a952f3f7062
--- /dev/null
+++ b/examples/models/lfm2_5_vl/model.py
@@ -0,0 +1,141 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""ExecuTorch-friendly LFM2.5-VL model. Mirrors examples/models/llava/model.py."""
+
+from __future__ import annotations
+
+import json
+import math
+from pathlib import Path
+
+import torch
+import torch.nn.functional as F
+from executorch.examples.models.lfm2_5_vl.convert_weights import lfm2_5_vl_to_meta
+from executorch.examples.models.llama.llama_transformer import construct_transformer
+from executorch.examples.models.llama.model_args import ModelArgs
+from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
+    replace_kv_cache_with_custom_kv_cache,
+)
+from executorch.examples.models.llama.source_transformation.sdpa import (
+    replace_sdpa_with_custom_op,
+)
+from executorch.examples.models.model_base import EagerModelBase
+from torch.export import Dim
+from transformers import AutoModelForImageTextToText, AutoProcessor
+
+MAX_SEQ_LEN = 2048
+IMAGE_SIZE = 512
+PATCH_SIZE = 16
+FIXED_H, FIXED_W = 32, 32
+
+_DEFAULT_PARAMS = Path(__file__).parent / "config" / "lfm2_5_vl_1_6b_config.json"
+
+
+class Lfm2p5Vl(torch.nn.Module):
+    def __init__(self, hf_model: AutoModelForImageTextToText, params: ModelArgs) -> None:
+        super().__init__()
+        self.model_ = hf_model
+        self.text_model_args = params
+        self.text_model = construct_transformer(params)
+
+        if params.use_sdpa_with_kv_cache_op:
+            self.text_model = replace_kv_cache_with_custom_kv_cache(self.text_model)
+            self.text_model = replace_sdpa_with_custom_op(self.text_model)
+
+        self.text_model.load_state_dict(
+            state_dict=self._translate_weights(), strict=False, assign=True
+        )
+        self._patch_positional_embeddings()
+
+    def _patch_positional_embeddings(self) -> None:
+        embeddings = self.model_.model.vision_tower.vision_model.embeddings
+        orig = embeddings.position_embedding.weight.data
+        sqrt_n = int(math.sqrt(orig.shape[0]))
+
+        grid = orig.reshape(sqrt_n, sqrt_n, -1).permute(2, 0, 1).unsqueeze(0)
+        resized = F.interpolate(
+            grid, size=(FIXED_H, FIXED_W), mode="bilinear", align_corners=False, antialias=True
+        )
+        pe = resized.squeeze(0).permute(1, 2, 0).reshape(FIXED_H * FIXED_W, -1).contiguous()
+        embeddings.register_buffer("_precomputed_pe", pe, persistent=False)
+        embeddings.resize_positional_embeddings = lambda *_args, **_kw: embeddings._precomputed_pe
+
+    def _translate_weights(self) -> dict[str, torch.Tensor]:
+        raw: dict[str, torch.Tensor] = {}
+        for k, v in self.model_.model.language_model.state_dict().items():
+            raw[f"model.language_model.{k}"] = v
+        for k, v in self.model_.lm_head.state_dict().items():
+            raw[f"model.language_model.lm_head.{k}"] = v
+        return lfm2_5_vl_to_meta(raw)
+
+    def embed_tokens(self, tokens: torch.Tensor) -> torch.Tensor:
+        return self.model_.model.language_model.get_input_embeddings()(tokens)
+
+    def image_embedding(self, nchw_pixels: torch.Tensor) -> torch.Tensor:
+        """[B, 3, 512, 512] float32 pixels in [0, 255] -> [B, 256, D]."""
+        x = (nchw_pixels / 255.0 - 0.5) / 0.5
+
+        x = x.unfold(2, PATCH_SIZE, PATCH_SIZE).unfold(3, PATCH_SIZE, PATCH_SIZE)
+        x = x.permute(0, 2, 3, 4, 5, 1).reshape(1, FIXED_H * FIXED_W, PATCH_SIZE * PATCH_SIZE * 3)
+
+        out = self.model_.model.vision_tower(
+            pixel_values=x,
+            pixel_attention_mask=None,
+            spatial_shapes=torch.tensor([[FIXED_H, FIXED_W]], dtype=torch.int64, device=x.device),
+            return_dict=True,
+        )
+        feats = out.last_hidden_state.reshape(-1, FIXED_H, FIXED_W, out.last_hidden_state.shape[-1])
+        projected = self.model_.model.multi_modal_projector(feats)
+        return projected.reshape(1, -1, projected.shape[-1])
+
+    def forward(self, images: torch.Tensor) -> torch.Tensor:
+        return self.image_embedding(images)
+
+
+class Lfm2p5VlModel(EagerModelBase):
+    def __init__(
+        self,
+        *,
+        use_sdpa_with_kv_cache_op: bool = True,
+        use_kv_cache: bool = True,
+        max_seq_len: int = MAX_SEQ_LEN,
+        max_context_len: int = MAX_SEQ_LEN,
+        model_dir: str = "LiquidAI/LFM2.5-VL-1.6B",
+        params_path: str | None = None,
+    ) -> None:
+        self.use_sdpa_with_kv_cache_op = use_sdpa_with_kv_cache_op
+        self.max_context_len = max_context_len
+        self.max_seq_len = max_seq_len
+        self.model_dir = model_dir
+
+        resolved = Path(params_path) if params_path else _DEFAULT_PARAMS
+        params = json.loads(resolved.read_text())
+
+        self.text_model_args = ModelArgs(
+            max_batch_size=1,
+            max_seq_len=max_seq_len,
+            max_context_len=max_context_len,
+            use_kv_cache=use_kv_cache,
+            use_sdpa_with_kv_cache_op=use_sdpa_with_kv_cache_op,
+            enable_dynamic_shape=False,
+            **params,
+        )
+
+        self.hf_model = AutoModelForImageTextToText.from_pretrained(
+            model_dir, device_map="cpu", torch_dtype=torch.float32
+        )
+        self.processor = AutoProcessor.from_pretrained(model_dir)
+        self.tokenizer = self.processor.tokenizer
+
+    def get_eager_model(self) -> torch.nn.Module:
+        return Lfm2p5Vl(self.hf_model, self.text_model_args).to(dtype=torch.float32)
+
+    def get_example_inputs(self) -> tuple[torch.Tensor, ...]:
+        return (torch.randint(0, 256, (1, 3, IMAGE_SIZE, IMAGE_SIZE), dtype=torch.float32),)
+
+    def get_dynamic_shapes(self) -> None:
+        return None
diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py
index a48d88fa224..440c6dd9b4c 100644
--- a/exir/emit/_emitter.py
+++ b/exir/emit/_emitter.py
@@ -456,10 +456,13 @@ def _tensor_spec_to_evalue(
                 ctypes.c_char * typing.cast(torch.UntypedStorage, spec.storage).nbytes()
             )
 
+            storage = typing.cast(torch.UntypedStorage, spec.storage)
+            if spec.allocated_memory != 0 and storage.device.type != "cpu":
+                storage = storage.cpu()
             buffer_data = (
                 bytes(
                     ctypes.cast(
-                        typing.cast(torch.UntypedStorage, spec.storage).data_ptr(),
+                        storage.data_ptr(),
                         ctypes.POINTER(spec_array_type),
                     ).contents
                 )