diff --git a/book/src/puzzle_04/intro.mojo b/book/src/puzzle_04/intro.mojo
index 4751240d..d88eed07 100644
--- a/book/src/puzzle_04/intro.mojo
+++ b/book/src/puzzle_04/intro.mojo
@@ -6,7 +6,7 @@ alias WIDTH = 3
 alias dtype = DType.float32
 alias layout = Layout.row_major(HEIGHT, WIDTH)
 
-fn kernel[dtype: DType, layout: Layout](tensor: LayoutTensor[mut=True, dtype, layout]):
+fn kernel[dtype: DType, layout: Layout](tensor: LayoutTensor[dtype, layout, MutAnyOrigin]):
     print("Before:")
     print(tensor)
     tensor[0, 0] += 1
@@ -17,8 +17,8 @@ def main():
     ctx = DeviceContext()
 
     a = ctx.enqueue_create_buffer[dtype](HEIGHT * WIDTH).enqueue_fill(0)
-    tensor = LayoutTensor[mut=True, dtype, layout](a.unsafe_ptr())
+    tensor = LayoutTensor[dtype, layout, MutAnyOrigin](a)
     # Note: since `tensor` is a device tensor we can't print it without the kernel wrapper
-    ctx.enqueue_function[kernel[dtype, layout]](tensor, grid_dim=1, block_dim=1)
+    ctx.enqueue_function_checked[kernel[dtype, layout], kernel[dtype, layout]](tensor, grid_dim=1, block_dim=1)
 
     ctx.synchronize()
diff --git a/book/src/puzzle_08/layout_tensor.md b/book/src/puzzle_08/layout_tensor.md
index ee441dfb..3a7fcec1 100644
--- a/book/src/puzzle_08/layout_tensor.md
+++ b/book/src/puzzle_08/layout_tensor.md
@@ -30,7 +30,7 @@ The key insight is how LayoutTensor simplifies shared memory management while ma
    shared = stack_allocation[TPB, Scalar[dtype]]()
 
    # LayoutTensor approach
-   shared = LayoutTensor[dtype, Layout.row_major(TPB), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+   shared = LayoutTensor[dtype, Layout.row_major(TPB), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
    ```
 
 2. **Memory access**: Same syntax
@@ -168,7 +168,7 @@ This solution demonstrates how LayoutTensor simplifies shared memory usage while
 
      ```txt
      # Clean LayoutTensor API with address_space
-     shared = LayoutTensor[dtype, Layout.row_major(TPB), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+     shared = LayoutTensor[dtype, Layout.row_major(TPB), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
      ```
 
    - Natural indexing for both global and shared:
diff --git a/book/src/puzzle_11/layout_tensor.md b/book/src/puzzle_11/layout_tensor.md
index 8c4d6c36..a97dceea 100644
--- a/book/src/puzzle_11/layout_tensor.md
+++ b/book/src/puzzle_11/layout_tensor.md
@@ -24,7 +24,7 @@ The key insight is how LayoutTensor simplifies shared memory management while ma
 
 Notes:
 
-- **LayoutTensor allocation**: Use `LayoutTensor[dtype, Layout.row_major(TPB), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()`
+- **LayoutTensor allocation**: Use `LayoutTensor[dtype, Layout.row_major(TPB), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()`
 - **Window access**: Natural indexing for 3-element windows
 - **Edge handling**: Special cases for first two positions
 - **Memory pattern**: One shared memory load per thread
@@ -116,7 +116,7 @@ The solution implements a sliding window sum using LayoutTensor with these key s
    - LayoutTensor creates block-local storage with address_space:
 
      ```txt
-     shared = LayoutTensor[dtype, Layout.row_major(TPB), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+     shared = LayoutTensor[dtype, Layout.row_major(TPB), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
      ```
 
    - Each thread loads one element:
diff --git a/book/src/puzzle_12/layout_tensor.md b/book/src/puzzle_12/layout_tensor.md
index e544a319..3038bef7 100644
--- a/book/src/puzzle_12/layout_tensor.md
+++ b/book/src/puzzle_12/layout_tensor.md
@@ -25,7 +25,7 @@ The key insight is how LayoutTensor simplifies memory management while maintaini
 
 Notes:
 
-- **LayoutTensor allocation**: Use `LayoutTensor[dtype, Layout.row_major(TPB), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()`
+- **LayoutTensor allocation**: Use `LayoutTensor[dtype, Layout.row_major(TPB), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()`
 - **Element access**: Natural indexing with bounds checking
 - **Layout handling**: Separate layouts for input and output
 - **Thread coordination**: Same synchronization patterns with `barrier()`
diff --git a/book/src/puzzle_13/block_boundary.md b/book/src/puzzle_13/block_boundary.md
index 45c8adb5..115740b8 100644
--- a/book/src/puzzle_13/block_boundary.md
+++ b/book/src/puzzle_13/block_boundary.md
@@ -32,7 +32,7 @@ Notes:
 
 <div class="solution-tips">
 
-1. Use `LayoutTensor[dtype, Layout.row_major(TPB + CONV_2 - 1), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()` for shared memory
+1. Use `LayoutTensor[dtype, Layout.row_major(TPB + CONV_2 - 1), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()` for shared memory
 2. Load main data: `shared_a[local_i] = a[global_i]`
 3. Load boundary: `if local_i < CONV_2 - 1` handle next block data
 4. Load kernel: `shared_b[local_i] = b[local_i]`
@@ -125,8 +125,8 @@ Size calculation:
 
    ```mojo
    # First: account for padding needed for convolution window
-   shared_a = LayoutTensor[dtype, Layout.row_major(TPB + CONV_2 - 1), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
-   shared_b = LayoutTensor[dtype, Layout.row_major(CONV_2), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+   shared_a = LayoutTensor[dtype, Layout.row_major(TPB + CONV_2 - 1), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+   shared_b = LayoutTensor[dtype, Layout.row_major(CONV_2), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
    ```
 
    This allocation pattern ensures we have enough space for both the block's data and the overlap region.
diff --git a/book/src/puzzle_14/complete.md b/book/src/puzzle_14/complete.md
index 393d95d2..73b6dbea 100644
--- a/book/src/puzzle_14/complete.md
+++ b/book/src/puzzle_14/complete.md
@@ -343,10 +343,10 @@ The two kernel phases execute sequentially **without any explicit synchronizatio
 
 ```mojo
 # Phase 1: Local prefix sums
-ctx.enqueue_function[prefix_sum_local_phase[...]](...)
+ctx.enqueue_function_checked[prefix_sum_local_phase[...], prefix_sum_local_phase[...]](...)
 
 # Phase 2: Add block sums (automatically waits for Phase 1)
-ctx.enqueue_function[prefix_sum_block_sum_phase[...]](...)
+ctx.enqueue_function_checked[prefix_sum_block_sum_phase[...], prefix_sum_block_sum_phase[...]](...)
 ```
 
 **Key insight**: Mojo's `DeviceContext` uses a single execution stream (CUDA stream on NVIDIA GPUs, HIP stream on AMD ROCm GPUs), which guarantees that kernel launches execute in the exact order they are enqueued. No explicit synchronization is needed between kernels.
diff --git a/book/src/puzzle_16/shared_memory.md b/book/src/puzzle_16/shared_memory.md
index 662f802a..27a70216 100644
--- a/book/src/puzzle_16/shared_memory.md
+++ b/book/src/puzzle_16/shared_memory.md
@@ -131,8 +131,8 @@ Matrix B:                           b_shared: (similar layout)
 
    ```mojo
    # Create 2D shared memory tensors using LayoutTensor with address_space
-   a_shared = LayoutTensor[dtype, Layout.row_major(TPB, TPB), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
-   b_shared = LayoutTensor[dtype, Layout.row_major(TPB, TPB), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+   a_shared = LayoutTensor[dtype, Layout.row_major(TPB, TPB), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+   b_shared = LayoutTensor[dtype, Layout.row_major(TPB, TPB), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
    ```
 
 2. **Thread Indexing**:
diff --git a/book/src/puzzle_17/puzzle_17.md b/book/src/puzzle_17/puzzle_17.md
index 913c3f60..4be1682b 100644
--- a/book/src/puzzle_17/puzzle_17.md
+++ b/book/src/puzzle_17/puzzle_17.md
@@ -181,7 +181,7 @@ Let's break down how this works in the larger context:
    ```mojo
    gpu_ctx = ctx.get_device_context()
    gpu_ctx.enqueue_memset(...)  # Zero output buffer
-   gpu_ctx.enqueue_function[...](...) # Schedule kernel
+   gpu_ctx.enqueue_function_checked[..., ...](...) # Schedule kernel
    ```
 
    - Device context manages GPU resources
diff --git a/book/src/puzzle_18/puzzle_18.md b/book/src/puzzle_18/puzzle_18.md
index 5294cdb3..3a2427b4 100644
--- a/book/src/puzzle_18/puzzle_18.md
+++ b/book/src/puzzle_18/puzzle_18.md
@@ -265,8 +265,8 @@ The kernel is parameterized with:
 #### Shared memory allocation
 
 ```mojo
-shared_max = LayoutTensor[dtype, Layout.row_major(BLOCK_DIM_X), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
-shared_sum = LayoutTensor[dtype, Layout.row_major(BLOCK_DIM_X), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+shared_max = LayoutTensor[dtype, Layout.row_major(BLOCK_DIM_X), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
+shared_sum = LayoutTensor[dtype, Layout.row_major(BLOCK_DIM_X), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()
 ```
 
 The kernel allocates two shared memory buffers:
diff --git a/book/src/puzzle_19/puzzle_19.md b/book/src/puzzle_19/puzzle_19.md
index d56f0fde..3edac292 100644
--- a/book/src/puzzle_19/puzzle_19.md
+++ b/book/src/puzzle_19/puzzle_19.md
@@ -121,7 +121,7 @@ To complete this puzzle, we'll leverage the tiled matmul kernel from [Puzzle 16]
 
 **Transpose Kernel Implementation Guide:**
 
-1. **Shared Memory Setup**: Use `LayoutTensor[dtype, Layout.row_major(TRANSPOSE_BLOCK_DIM_XY, TRANSPOSE_BLOCK_DIM_XY), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()` to create a square `TRANSPOSE_BLOCK_DIM_XY` × `TRANSPOSE_BLOCK_DIM_XY` shared memory tile for efficient data exchange between threads
+1. **Shared Memory Setup**: Use `LayoutTensor[dtype, Layout.row_major(TRANSPOSE_BLOCK_DIM_XY, TRANSPOSE_BLOCK_DIM_XY), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()` to create a square `TRANSPOSE_BLOCK_DIM_XY` × `TRANSPOSE_BLOCK_DIM_XY` shared memory tile for efficient data exchange between threads
 
 2. **Thread Indexing**: Map threads to matrix elements:
    - `local_row = thread_idx.y`, `local_col = thread_idx.x` (position within the block)
diff --git a/book/src/puzzle_32/conflict_free_patterns.md b/book/src/puzzle_32/conflict_free_patterns.md
index 0e717114..750ca61d 100644
--- a/book/src/puzzle_32/conflict_free_patterns.md
+++ b/book/src/puzzle_32/conflict_free_patterns.md
@@ -354,7 +354,7 @@ constant = shared[0]  # All threads read same address - hardware optimized
 **3. Padding techniques:**
 
 ```mojo
-shared = LayoutTensor[dtype, Layout.row_major(TPB + 1), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()  # Shift access patterns
+shared = LayoutTensor[dtype, Layout.row_major(TPB + 1), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()  # Shift access patterns
 ```
 
 **4. Access pattern analysis:**
diff --git a/book/src/puzzle_34/cluster_coordination_basics.md b/book/src/puzzle_34/cluster_coordination_basics.md
index fd83c630..58d84a24 100644
--- a/book/src/puzzle_34/cluster_coordination_basics.md
+++ b/book/src/puzzle_34/cluster_coordination_basics.md
@@ -65,7 +65,7 @@ Traditional single-block algorithms like those in [Puzzle 27](../puzzle_27/puzzl
 
 ### **Shared memory coordination**
 
-- Allocate shared memory using `LayoutTensor[dtype, Layout.row_major(tpb), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()` (see [shared memory basics from Puzzle 8](../puzzle_08/puzzle_08.md))
+- Allocate shared memory using `LayoutTensor[dtype, Layout.row_major(tpb), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()` (see [shared memory basics from Puzzle 8](../puzzle_08/puzzle_08.md))
 - Process input data scaled by `block_id + 1` to create distinct scaling per block
 - Use bounds checking when accessing input data (pattern from [guards in Puzzle 3](../puzzle_03/puzzle_03.md))
 
@@ -153,7 +153,7 @@ block_id = Int(block_idx.x)                          # Block index for reliable
 
 **Shared memory allocation and data processing:**
 
-- Each block allocates its own shared memory workspace: `LayoutTensor[dtype, Layout.row_major(tpb), MutableAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()`
+- Each block allocates its own shared memory workspace: `LayoutTensor[dtype, Layout.row_major(tpb), MutAnyOrigin, address_space = AddressSpace.SHARED].stack_allocation()`
 - **Scaling strategy**: `data_scale = Float32(block_id + 1)` ensures each block processes data differently
   - Block 0: multiplies by 1.0, Block 1: by 2.0, Block 2: by 3.0, Block 3: by 4.0
 - **Bounds checking**: `if global_i < size:` prevents out-of-bounds memory access
diff --git a/problems/p01/p01.mojo b/problems/p01/p01.mojo
index bda50bdc..0fa69032 100644
--- a/problems/p01/p01.mojo
+++ b/problems/p01/p01.mojo
@@ -30,9 +30,9 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        ctx.enqueue_function[add_10](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10, add_10](
+            out,
+            a,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
         )
diff --git a/problems/p02/p02.mojo b/problems/p02/p02.mojo
index be6b8353..b27bca85 100644
--- a/problems/p02/p02.mojo
+++ b/problems/p02/p02.mojo
@@ -34,10 +34,10 @@ def main():
                 b_host[i] = i
                 expected[i] = a_host[i] + b_host[i]
 
-        ctx.enqueue_function[add](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
-            b.unsafe_ptr(),
+        ctx.enqueue_function_checked[add, add](
+            out,
+            a,
+            b,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
         )
diff --git a/problems/p03/p03.mojo b/problems/p03/p03.mojo
index 474489c5..6421a168 100644
--- a/problems/p03/p03.mojo
+++ b/problems/p03/p03.mojo
@@ -30,9 +30,9 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        ctx.enqueue_function[add_10_guard](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_guard, add_10_guard](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/problems/p04/p04.mojo b/problems/p04/p04.mojo
index 2a954400..d1aa2d78 100644
--- a/problems/p04/p04.mojo
+++ b/problems/p04/p04.mojo
@@ -37,9 +37,9 @@ def main():
                     a_host[i * SIZE + j] = i * SIZE + j
                     expected[i * SIZE + j] = a_host[i * SIZE + j] + 10
 
-        ctx.enqueue_function[add_10_2d](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_2d, add_10_2d](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/problems/p04/p04_layout_tensor.mojo b/problems/p04/p04_layout_tensor.mojo
index 01e4b3f3..89bf9d7e 100644
--- a/problems/p04/p04_layout_tensor.mojo
+++ b/problems/p04/p04_layout_tensor.mojo
@@ -12,8 +12,8 @@ alias layout = Layout.row_major(SIZE, SIZE)
 
 
 fn add_10_2d(
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=True, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, MutAnyOrigin],
     size: Int,
 ):
     row = thread_idx.y
@@ -27,7 +27,7 @@ fn add_10_2d(
 def main():
     with DeviceContext() as ctx:
         out_buf = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(0)
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out_buf.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out_buf)
         print("out shape:", out_tensor.shape[0](), "x", out_tensor.shape[1]())
 
         expected = ctx.enqueue_create_host_buffer[dtype](
@@ -40,9 +40,9 @@ def main():
                 a_host[i] = i
                 expected[i] = a_host[i] + 10
 
-        a_tensor = LayoutTensor[mut=True, dtype, layout](a.unsafe_ptr())
+        a_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](a)
 
-        ctx.enqueue_function[add_10_2d](
+        ctx.enqueue_function_checked[add_10_2d, add_10_2d](
             out_tensor,
             a_tensor,
             SIZE,
diff --git a/problems/p05/p05.mojo b/problems/p05/p05.mojo
index 37e8aa83..003a5565 100644
--- a/problems/p05/p05.mojo
+++ b/problems/p05/p05.mojo
@@ -39,10 +39,10 @@ def main():
                 for j in range(SIZE):
                     expected[i * SIZE + j] = a_host[j] + b_host[i]
 
-        ctx.enqueue_function[broadcast_add](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
-            b.unsafe_ptr(),
+        ctx.enqueue_function_checked[broadcast_add, broadcast_add](
+            out,
+            a,
+            b,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/problems/p05/p05_layout_tensor.mojo b/problems/p05/p05_layout_tensor.mojo
index 42fee181..dd3a5f25 100644
--- a/problems/p05/p05_layout_tensor.mojo
+++ b/problems/p05/p05_layout_tensor.mojo
@@ -18,9 +18,9 @@ fn broadcast_add[
     a_layout: Layout,
     b_layout: Layout,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, a_layout],
-    b: LayoutTensor[mut=False, dtype, b_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, a_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, b_layout, ImmutAnyOrigin],
     size: Int,
 ):
     row = thread_idx.y
@@ -32,16 +32,14 @@ fn broadcast_add[
 def main():
     with DeviceContext() as ctx:
         out_buf = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(0)
-        out_tensor = LayoutTensor[mut=True, dtype, out_layout](
-            out_buf.unsafe_ptr()
-        )
+        out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out_buf)
         print("out shape:", out_tensor.shape[0](), "x", out_tensor.shape[1]())
 
         expected_buf = ctx.enqueue_create_host_buffer[dtype](
             SIZE * SIZE
         ).enqueue_fill(0)
-        expected_tensor = LayoutTensor[mut=True, dtype, out_layout](
-            expected_buf.unsafe_ptr()
+        expected_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](
+            expected_buf
         )
 
         a = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)
@@ -55,10 +53,11 @@ def main():
                 for j in range(SIZE):
                     expected_tensor[i, j] = a_host[j] + b_host[i]
 
-        a_tensor = LayoutTensor[dtype, a_layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[dtype, b_layout](b.unsafe_ptr())
+        a_tensor = LayoutTensor[dtype, a_layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, b_layout, ImmutAnyOrigin](b)
 
-        ctx.enqueue_function[broadcast_add[out_layout, a_layout, b_layout]](
+        alias kernel = broadcast_add[out_layout, a_layout, b_layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             b_tensor,
diff --git a/problems/p06/p06.mojo b/problems/p06/p06.mojo
index c679b21a..6a56d9ce 100644
--- a/problems/p06/p06.mojo
+++ b/problems/p06/p06.mojo
@@ -30,9 +30,9 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        ctx.enqueue_function[add_10_blocks](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_blocks, add_10_blocks](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/problems/p07/p07.mojo b/problems/p07/p07.mojo
index 09db5cd1..623000e1 100644
--- a/problems/p07/p07.mojo
+++ b/problems/p07/p07.mojo
@@ -38,9 +38,9 @@ def main():
                     a_host[k] = k
                     expected[k] = k + 10
 
-        ctx.enqueue_function[add_10_blocks_2d](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_blocks_2d, add_10_blocks_2d](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/problems/p07/p07_layout_tensor.mojo b/problems/p07/p07_layout_tensor.mojo
index 8f939fe4..02ca5153 100644
--- a/problems/p07/p07_layout_tensor.mojo
+++ b/problems/p07/p07_layout_tensor.mojo
@@ -16,8 +16,8 @@ fn add_10_blocks_2d[
     out_layout: Layout,
     a_layout: Layout,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, a_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, a_layout, ImmutAnyOrigin],
     size: Int,
 ):
     row = block_dim.y * block_idx.y + thread_idx.y
@@ -31,9 +31,7 @@ fn add_10_blocks_2d[
 def main():
     with DeviceContext() as ctx:
         out_buf = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(0)
-        out_tensor = LayoutTensor[dtype, out_layout, MutableAnyOrigin](
-            out_buf.unsafe_ptr()
-        )
+        out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out_buf)
 
         expected_buf = ctx.enqueue_create_host_buffer[dtype](
             SIZE * SIZE
@@ -48,11 +46,10 @@ def main():
                     a_host[k] = k
                     expected_buf[k] = k + 10
 
-        a_tensor = LayoutTensor[dtype, a_layout, MutableAnyOrigin](
-            a.unsafe_ptr()
-        )
+        a_tensor = LayoutTensor[dtype, a_layout, ImmutAnyOrigin](a)
 
-        ctx.enqueue_function[add_10_blocks_2d[out_layout, a_layout]](
+        alias kernel = add_10_blocks_2d[out_layout, a_layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             SIZE,
@@ -62,16 +59,14 @@ def main():
 
         ctx.synchronize()
 
-        expected_tensor = LayoutTensor[dtype, out_layout, MutableAnyOrigin](
-            expected_buf.unsafe_ptr()
+        expected_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](
+            expected_buf
         )
 
         with out_buf.map_to_host() as out_buf_host:
             print(
                 "out:",
-                LayoutTensor[dtype, out_layout, MutableAnyOrigin](
-                    out_buf_host.unsafe_ptr()
-                ),
+                LayoutTensor[dtype, out_layout, MutAnyOrigin](out_buf_host),
             )
             print("expected:", expected_tensor)
             for i in range(SIZE):
diff --git a/problems/p08/p08.mojo b/problems/p08/p08.mojo
index dd74f555..1afaa281 100644
--- a/problems/p08/p08.mojo
+++ b/problems/p08/p08.mojo
@@ -43,9 +43,9 @@ def main():
     with DeviceContext() as ctx:
         out = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)
         a = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(1)
-        ctx.enqueue_function[add_10_shared](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_shared, add_10_shared](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/problems/p08/p08_layout_tensor.mojo b/problems/p08/p08_layout_tensor.mojo
index a6fce741..2203d0eb 100644
--- a/problems/p08/p08_layout_tensor.mojo
+++ b/problems/p08/p08_layout_tensor.mojo
@@ -17,15 +17,15 @@ alias layout = Layout.row_major(SIZE)
 fn add_10_shared_layout_tensor[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=True, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     # Allocate shared memory using LayoutTensor with explicit address_space
     shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -48,10 +48,11 @@ def main():
         out = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)
         a = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(1)
 
-        out_tensor = LayoutTensor[dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[dtype, layout](a.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
 
-        ctx.enqueue_function[add_10_shared_layout_tensor[layout]](
+        alias kernel = add_10_shared_layout_tensor[layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             SIZE,
diff --git a/problems/p09/p09.mojo b/problems/p09/p09.mojo
index 5df6563d..da7d208f 100644
--- a/problems/p09/p09.mojo
+++ b/problems/p09/p09.mojo
@@ -60,7 +60,7 @@ fn collaborative_filter(
     shared_workspace = LayoutTensor[
         dtype,
         Layout.row_major(SIZE - 1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/problems/p10/p10.mojo b/problems/p10/p10.mojo
index 1f9fdea4..94627aa1 100644
--- a/problems/p10/p10.mojo
+++ b/problems/p10/p10.mojo
@@ -25,7 +25,7 @@ fn shared_memory_race(
     shared_sum = LayoutTensor[
         dtype,
         Layout.row_major(1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/problems/p11/p11.mojo b/problems/p11/p11.mojo
index 62d3c6d1..cc42c861 100644
--- a/problems/p11/p11.mojo
+++ b/problems/p11/p11.mojo
@@ -39,9 +39,9 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        ctx.enqueue_function[pooling](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[pooling, pooling](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
@@ -52,7 +52,7 @@ def main():
         ctx.synchronize()
 
         with a.map_to_host() as a_host:
-            ptr = a_host.unsafe_ptr()
+            ptr = a_host
             for i in range(SIZE):
                 s = Scalar[dtype](0)
                 for j in range(max(i - 2, 0), i + 1):
diff --git a/problems/p11/p11_layout_tensor.mojo b/problems/p11/p11_layout_tensor.mojo
index b24d2f81..941275f1 100644
--- a/problems/p11/p11_layout_tensor.mojo
+++ b/problems/p11/p11_layout_tensor.mojo
@@ -16,15 +16,15 @@ alias layout = Layout.row_major(SIZE)
 fn pooling[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=True, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     # Allocate shared memory using tensor builder
     shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -45,10 +45,10 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        out_tensor = LayoutTensor[dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[dtype, layout](a.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
 
-        ctx.enqueue_function[pooling[layout]](
+        ctx.enqueue_function_checked[pooling[layout], pooling[layout]](
             out_tensor,
             a_tensor,
             SIZE,
@@ -60,7 +60,7 @@ def main():
         ctx.synchronize()
 
         with a.map_to_host() as a_host:
-            ptr = a_host.unsafe_ptr()
+            ptr = a_host
             for i in range(SIZE):
                 s = Scalar[dtype](0)
                 for j in range(max(i - 2, 0), i + 1):
diff --git a/problems/p12/p12.mojo b/problems/p12/p12.mojo
index 690230cc..383eff4c 100644
--- a/problems/p12/p12.mojo
+++ b/problems/p12/p12.mojo
@@ -36,10 +36,10 @@ def main():
                 a_host[i] = i
                 b_host[i] = i
 
-        ctx.enqueue_function[dot_product](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
-            b.unsafe_ptr(),
+        ctx.enqueue_function_checked[dot_product, dot_product](
+            out,
+            a,
+            b,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/problems/p12/p12_layout_tensor.mojo b/problems/p12/p12_layout_tensor.mojo
index e94c32ee..1636a180 100644
--- a/problems/p12/p12_layout_tensor.mojo
+++ b/problems/p12/p12_layout_tensor.mojo
@@ -18,9 +18,9 @@ alias out_layout = Layout.row_major(1)
 fn dot_product[
     in_layout: Layout, out_layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=True, dtype, in_layout],
-    b: LayoutTensor[mut=True, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
     size: Int,
 ):
     # FILL ME IN (roughly 13 lines)
@@ -41,11 +41,12 @@ def main():
                 a_host[i] = i
                 b_host[i] = i
 
-        out_tensor = LayoutTensor[dtype, out_layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[dtype, layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[dtype, layout](b.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](b)
 
-        ctx.enqueue_function[dot_product[layout, out_layout]](
+        alias kernel = dot_product[layout, out_layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             b_tensor,
diff --git a/problems/p17/op/conv1d.mojo b/problems/p17/op/conv1d.mojo
index f0bcd5c9..1272a31e 100644
--- a/problems/p17/op/conv1d.mojo
+++ b/problems/p17/op/conv1d.mojo
@@ -16,9 +16,9 @@ fn conv1d_kernel[
     conv_size: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    input: LayoutTensor[mut=True, dtype, in_layout],
-    kernel: LayoutTensor[mut=True, dtype, conv_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, in_layout, MutAnyOrigin],
+    kernel: LayoutTensor[dtype, conv_layout, MutAnyOrigin],
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
     local_i = thread_idx.x
@@ -26,13 +26,13 @@ fn conv1d_kernel[
     shared_a = LayoutTensor[
         dtype,
         Layout.row_major(TPB + conv_size - 1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_b = LayoutTensor[
         dtype,
         Layout.row_major(conv_size),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     if global_i < input_size:
diff --git a/problems/p18/op/softmax.mojo b/problems/p18/op/softmax.mojo
index d4d5621e..5876df2b 100644
--- a/problems/p18/op/softmax.mojo
+++ b/problems/p18/op/softmax.mojo
@@ -38,8 +38,8 @@ fn softmax_cpu_kernel[
     input_size: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[dtype, layout, MutableAnyOrigin],
-    input: LayoutTensor[dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, layout, MutAnyOrigin],
 ):
     # FILL IN (roughly 10 lines)
     ...
@@ -65,12 +65,12 @@ struct SoftmaxCustomOp:
         ctx: DeviceContextPtr,
     ) raises:
         # Note: rebind is necessary now but it shouldn't be!
-        var output_tensor = rebind[
-            LayoutTensor[dtype, layout, MutableAnyOrigin]
-        ](output.to_layout_tensor())
-        var input_tensor = rebind[
-            LayoutTensor[dtype, layout, MutableAnyOrigin]
-        ](input.to_layout_tensor())
+        var output_tensor = rebind[LayoutTensor[dtype, layout, MutAnyOrigin]](
+            output.to_layout_tensor()
+        )
+        var input_tensor = rebind[LayoutTensor[dtype, layout, MutAnyOrigin]](
+            input.to_layout_tensor()
+        )
 
         @parameter
         if target == "gpu":
diff --git a/problems/p19/op/attention.mojo b/problems/p19/op/attention.mojo
index ceffc72c..eddb45d1 100644
--- a/problems/p19/op/attention.mojo
+++ b/problems/p19/op/attention.mojo
@@ -37,9 +37,9 @@ fn matmul_idiomatic_tiled[
     inner: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, a_layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, b_layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, a_layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, b_layout, MutAnyOrigin],
 ):
     """Updated idiomatic tiled matrix multiplication from p16."""
     local_row = thread_idx.y
@@ -54,13 +54,13 @@ fn matmul_idiomatic_tiled[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(MATMUL_BLOCK_DIM_XY, MATMUL_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(MATMUL_BLOCK_DIM_XY, MATMUL_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     var acc: output.element_type = 0
@@ -124,8 +124,8 @@ fn transpose_kernel[
     cols: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, layout_out, MutableAnyOrigin],
-    inp: LayoutTensor[mut=False, dtype, layout_in, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout_out, MutAnyOrigin],
+    inp: LayoutTensor[mut=False, dtype, layout_in, MutAnyOrigin],
 ):
     # FILL ME IN (roughly 18 lines)
     ...
@@ -146,13 +146,13 @@ fn softmax_gpu_kernel[
     shared_max = LayoutTensor[
         dtype,
         Layout.row_major(SOFTMAX_BLOCK_DIM_X),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_sum = LayoutTensor[
         dtype,
         Layout.row_major(SOFTMAX_BLOCK_DIM_X),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     global_i = thread_idx.x
@@ -213,10 +213,10 @@ fn attention_cpu_kernel[
     d: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[dtype, layout_out, MutableAnyOrigin],
-    q: LayoutTensor[dtype, layout_q, MutableAnyOrigin],
-    k: LayoutTensor[dtype, layout_k, MutableAnyOrigin],
-    v: LayoutTensor[dtype, layout_v, MutableAnyOrigin],
+    output: LayoutTensor[dtype, layout_out, MutAnyOrigin],
+    q: LayoutTensor[dtype, layout_q, MutAnyOrigin],
+    k: LayoutTensor[dtype, layout_k, MutAnyOrigin],
+    v: LayoutTensor[dtype, layout_v, MutAnyOrigin],
 ):
     """CPU implementation of vector attention."""
     var scores = List[Float32]()
@@ -278,15 +278,15 @@ struct AttentionCustomOp:
 
         # Convert to layout tensors
         var output_tensor = rebind[
-            LayoutTensor[dtype, layout_out, MutableAnyOrigin]
+            LayoutTensor[dtype, layout_out, MutAnyOrigin]
         ](output.to_layout_tensor())
-        var q_tensor = rebind[LayoutTensor[dtype, layout_q, MutableAnyOrigin]](
+        var q_tensor = rebind[LayoutTensor[dtype, layout_q, MutAnyOrigin]](
             q.to_layout_tensor()
         )
-        var k_tensor = rebind[LayoutTensor[dtype, layout_k, MutableAnyOrigin]](
+        var k_tensor = rebind[LayoutTensor[dtype, layout_k, MutAnyOrigin]](
             k.to_layout_tensor()
         )
-        var v_tensor = rebind[LayoutTensor[dtype, layout_v, MutableAnyOrigin]](
+        var v_tensor = rebind[LayoutTensor[dtype, layout_v, MutAnyOrigin]](
             v.to_layout_tensor()
         )
 
@@ -342,7 +342,7 @@ struct AttentionCustomOp:
                 seq_len
             )  # Reused for scores and weights
 
-            k_t = LayoutTensor[mut=True, dtype, layout_k_t, MutableAnyOrigin](
+            k_t = LayoutTensor[mut=True, dtype, layout_k_t, MutAnyOrigin](
                 k_t_buf.unsafe_ptr()
             )
 
diff --git a/problems/p20/op/conv1d.mojo b/problems/p20/op/conv1d.mojo
index b03d972a..e89d81b3 100644
--- a/problems/p20/op/conv1d.mojo
+++ b/problems/p20/op/conv1d.mojo
@@ -19,9 +19,9 @@ fn conv1d_kernel[
     conv_size: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    input: LayoutTensor[mut=True, dtype, in_layout],
-    kernel: LayoutTensor[mut=True, dtype, conv_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, in_layout, MutAnyOrigin],
+    kernel: LayoutTensor[dtype, conv_layout, MutAnyOrigin],
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
     local_i = thread_idx.x
@@ -29,13 +29,13 @@ fn conv1d_kernel[
     shared_a = LayoutTensor[
         dtype,
         Layout.row_major(TPB + conv_size - 1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_b = LayoutTensor[
         dtype,
         Layout.row_major(conv_size),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     if global_i < input_size:
@@ -113,11 +113,10 @@ struct Conv1DCustomOp:
                 ),
                 0,
             )
-            gpu_ctx.enqueue_function[
-                conv1d_kernel[
-                    in_layout, out_layout, conv_layout, input_size, conv_size
-                ]
-            ](
+            alias kernel = conv1d_kernel[
+                in_layout, out_layout, conv_layout, input_size, conv_size
+            ],
+            gpu_ctx.enqueue_function_checked[kernel, kernel](
                 out_tensor,
                 input_tensor,
                 kernel_tensor,
diff --git a/problems/p21/op/embedding.mojo b/problems/p21/op/embedding.mojo
index bc650460..88d535cf 100644
--- a/problems/p21/op/embedding.mojo
+++ b/problems/p21/op/embedding.mojo
@@ -19,9 +19,9 @@ fn embedding_kernel_coalesced[
     embed_dim: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    indices: LayoutTensor[mut=True, DType.int32, indices_layout],
-    weights: LayoutTensor[mut=True, dtype, weights_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    indices: LayoutTensor[DType.int32, indices_layout, MutAnyOrigin],
+    weights: LayoutTensor[dtype, weights_layout, MutAnyOrigin],
 ):
     """
     Memory-coalescing focused embedding kernel.
@@ -63,9 +63,9 @@ fn embedding_kernel_2d[
     embed_dim: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    indices: LayoutTensor[mut=True, DType.int32, indices_layout],
-    weights: LayoutTensor[mut=True, dtype, weights_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    indices: LayoutTensor[DType.int32, indices_layout, MutAnyOrigin],
+    weights: LayoutTensor[dtype, weights_layout, MutAnyOrigin],
 ):
     """
     2D grid non-coalesced embedding kernel.
@@ -154,20 +154,19 @@ struct EmbeddingCustomOp:
             blocks = max(1, ceildiv(total_elements, THREADS_PER_BLOCK))
 
             # Compile and launch optimized kernel
-            compiled_kernel = gpu_ctx.compile_function[
-                embedding_kernel_coalesced[
-                    indices_layout,
-                    weights_layout,
-                    out_layout,
-                    batch_size,
-                    seq_len,
-                    vocab_size,
-                    embed_dim,
-                    output.dtype,
-                ]
-            ]()
-
-            gpu_ctx.enqueue_function(
+            alias kernel = embedding_kernel_coalesced[
+                indices_layout,
+                weights_layout,
+                out_layout,
+                batch_size,
+                seq_len,
+                vocab_size,
+                embed_dim,
+                output.dtype,
+            ]
+            compiled_kernel = gpu_ctx.compile_function_checked[kernel, kernel]()
+
+            gpu_ctx.enqueue_function_checked(
                 compiled_kernel,
                 output_tensor,
                 indices_tensor,
diff --git a/problems/p22/op/layernorm_linear.mojo b/problems/p22/op/layernorm_linear.mojo
index 3d0fedda..ae28d462 100644
--- a/problems/p22/op/layernorm_linear.mojo
+++ b/problems/p22/op/layernorm_linear.mojo
@@ -28,9 +28,9 @@ fn matmul_idiomatic_tiled[
     inner: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, a_layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, b_layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, a_layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, b_layout, MutAnyOrigin],
 ):
     """Idiomatic tiled matrix multiplication from p19."""
     local_row = thread_idx.y
@@ -45,13 +45,13 @@ fn matmul_idiomatic_tiled[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(MATMUL_BLOCK_DIM_XY, MATMUL_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(MATMUL_BLOCK_DIM_XY, MATMUL_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     var acc: output.element_type = 0
@@ -153,8 +153,8 @@ fn transpose_kernel[
     cols: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, layout_out, MutableAnyOrigin],
-    inp: LayoutTensor[mut=False, dtype, layout_in, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout_out, MutAnyOrigin],
+    inp: LayoutTensor[mut=False, dtype, layout_in, MutAnyOrigin],
 ):
     """Transpose matrix using shared memory tiling for coalesced access.
     We will learn more about coalesced access in the next part.
@@ -162,7 +162,7 @@ fn transpose_kernel[
     shared_tile = LayoutTensor[
         dtype,
         Layout.row_major(TRANSPOSE_BLOCK_DIM_XY, TRANSPOSE_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/problems/p23/p23.mojo b/problems/p23/p23.mojo
index 67b36029..91421916 100644
--- a/problems/p23/p23.mojo
+++ b/problems/p23/p23.mojo
@@ -20,9 +20,9 @@ alias SIMD_WIDTH = simd_width_of[dtype, target = get_gpu_target()]()
 fn elementwise_add[
     layout: Layout, dtype: DType, simd_width: Int, rank: Int, size: Int
 ](
-    output: LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     @parameter
@@ -52,9 +52,9 @@ fn tiled_elementwise_add[
     size: Int,
     tile_size: Int,
 ](
-    output: LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     @parameter
@@ -87,9 +87,9 @@ fn manual_vectorized_tiled_elementwise_add[
     size: Int,
     tile_size: Int,
 ](
-    output: LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     # Each tile contains tile_size groups of simd_width elements
@@ -128,9 +128,9 @@ fn vectorize_within_tiles_elementwise_add[
     size: Int,
     tile_size: Int,
 ](
-    output: LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     # Each tile contains tile_size elements (not SIMD groups)
@@ -181,13 +181,13 @@ fn benchmark_elementwise_parameterized[
             a_host[i] = 2 * i
             b_host[i] = 2 * i + 1
 
-    a_tensor = LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin](
+    a_tensor = LayoutTensor[mut=False, dtype, layout, MutAnyOrigin](
         a.unsafe_ptr()
     )
-    b_tensor = LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin](
+    b_tensor = LayoutTensor[mut=False, dtype, layout, MutAnyOrigin](
         b_buf.unsafe_ptr()
     )
-    out_tensor = LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin](
+    out_tensor = LayoutTensor[mut=True, dtype, layout, MutAnyOrigin](
         out.unsafe_ptr()
     )
 
diff --git a/problems/p24/p24.mojo b/problems/p24/p24.mojo
index a0531f05..21f16679 100644
--- a/problems/p24/p24.mojo
+++ b/problems/p24/p24.mojo
@@ -34,9 +34,9 @@ alias out_layout = Layout.row_major(1)
 fn traditional_dot_product_p12_style[
     in_layout: Layout, out_layout: Layout, size: Int
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, in_layout],
-    b: LayoutTensor[mut=False, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
 ):
     """
     This is the complex approach from p12_layout_tensor.mojo - kept for comparison.
@@ -44,7 +44,7 @@ fn traditional_dot_product_p12_style[
     shared = LayoutTensor[
         dtype,
         Layout.row_major(WARP_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     global_i = block_dim.x * block_idx.x + thread_idx.x
@@ -75,9 +75,9 @@ fn traditional_dot_product_p12_style[
 fn simple_warp_dot_product[
     in_layout: Layout, out_layout: Layout, size: Int
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, in_layout],
-    b: LayoutTensor[mut=False, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
     # FILL IN (6 lines at most)
@@ -95,9 +95,9 @@ fn functional_warp_dot_product[
     rank: Int,
     size: Int,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     @parameter
@@ -178,16 +178,15 @@ fn benchmark_simple_warp_parameterized[
     rand_int[dtype, test_size](b)
     expected_output[dtype, n_warps](expected, a, b)
 
-    a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-    b_tensor = LayoutTensor[mut=False, dtype, in_layout](b.unsafe_ptr())
-    out_tensor = LayoutTensor[mut=True, dtype, out_layout](out.unsafe_ptr())
+    a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+    b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b)
+    out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
     @parameter
     @always_inline
     fn traditional_workflow(ctx: DeviceContext) raises:
-        ctx.enqueue_function[
-            simple_warp_dot_product[in_layout, out_layout, test_size]
-        ](
+        alias kernel = simple_warp_dot_product[in_layout, out_layout, test_size]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             b_tensor,
@@ -225,9 +224,9 @@ fn benchmark_functional_warp_parameterized[
     rand_int[dtype, test_size](b)
     expected_output[dtype, n_warps](expected, a, b)
 
-    a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-    b_tensor = LayoutTensor[mut=False, dtype, in_layout](b.unsafe_ptr())
-    out_tensor = LayoutTensor[mut=True, dtype, out_layout](out.unsafe_ptr())
+    a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+    b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b)
+    out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
     @parameter
     @always_inline
@@ -267,15 +266,16 @@ fn benchmark_traditional_parameterized[
     rand_int[dtype, test_size](b)
     expected_output[dtype, n_warps](expected, a, b)
 
-    a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-    b_tensor = LayoutTensor[mut=False, dtype, in_layout](b.unsafe_ptr())
-    out_tensor = LayoutTensor[mut=True, dtype, out_layout](out.unsafe_ptr())
+    a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+    b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b)
+    out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
     @parameter
     @always_inline
     fn traditional_workflow(ctx: DeviceContext) raises:
-        ctx.enqueue_function[
-            traditional_dot_product_p12_style[in_layout, out_layout, test_size]
+        ctx.enqueue_function_checked[
+            traditional_dot_product_p12_style[in_layout, out_layout, test_size],
+            traditional_dot_product_p12_style[in_layout, out_layout, test_size],
         ](
             out_tensor,
             a_tensor,
@@ -306,11 +306,9 @@ def main():
                 n_warps
             ).enqueue_fill(0)
 
-            out_tensor = LayoutTensor[mut=True, dtype, out_layout](
-                out.unsafe_ptr()
-            )
-            a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-            b_tensor = LayoutTensor[mut=False, dtype, in_layout](b.unsafe_ptr())
+            out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
+            a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+            b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b)
 
             with a.map_to_host() as a_host, b.map_to_host() as b_host:
                 for i in range(SIZE):
@@ -318,10 +316,13 @@ def main():
                     b_host[i] = i
 
             if argv()[1] == "--traditional":
-                ctx.enqueue_function[
+                ctx.enqueue_function_checked[
+                    traditional_dot_product_p12_style[
+                        in_layout, out_layout, SIZE
+                    ],
                     traditional_dot_product_p12_style[
                         in_layout, out_layout, SIZE
-                    ]
+                    ],
                 ](
                     out_tensor,
                     a_tensor,
@@ -330,8 +331,9 @@ def main():
                     block_dim=THREADS_PER_BLOCK,
                 )
             elif argv()[1] == "--kernel":
-                ctx.enqueue_function[
-                    simple_warp_dot_product[in_layout, out_layout, SIZE]
+                ctx.enqueue_function_checked[
+                    simple_warp_dot_product[in_layout, out_layout, SIZE],
+                    simple_warp_dot_product[in_layout, out_layout, SIZE],
                 ](
                     out_tensor,
                     a_tensor,
diff --git a/problems/p27/p27.mojo b/problems/p27/p27.mojo
index b21efd7e..211d5a3c 100644
--- a/problems/p27/p27.mojo
+++ b/problems/p27/p27.mojo
@@ -14,9 +14,9 @@ from math import floor
 fn traditional_dot_product[
     in_layout: Layout, out_layout: Layout, tpb: Int
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, in_layout],
-    b: LayoutTensor[mut=False, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
     size: Int,
 ):
     """Traditional dot product using shared memory + barriers + tree reduction.
@@ -25,7 +25,7 @@ fn traditional_dot_product[
     shared = LayoutTensor[
         dtype,
         Layout.row_major(tpb),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     global_i = block_dim.x * block_idx.x + thread_idx.x
@@ -66,9 +66,9 @@ alias dtype = DType.float32
 fn block_sum_dot_product[
     in_layout: Layout, out_layout: Layout, tpb: Int
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, in_layout],
-    b: LayoutTensor[mut=False, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
     size: Int,
 ):
     """Dot product using block.sum() - convenience function like warp.sum()!
@@ -89,9 +89,9 @@ alias bin_layout = Layout.row_major(SIZE)  # Max SIZE elements per bin
 fn block_histogram_bin_extract[
     in_layout: Layout, bin_layout: Layout, out_layout: Layout, tpb: Int
 ](
-    input_data: LayoutTensor[mut=False, dtype, in_layout],
-    bin_output: LayoutTensor[mut=True, dtype, bin_layout],
-    count_output: LayoutTensor[mut=True, DType.int32, out_layout],
+    input_data: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    bin_output: LayoutTensor[dtype, bin_layout, MutAnyOrigin],
+    count_output: LayoutTensor[DType.int32, out_layout, MutAnyOrigin],
     size: Int,
     target_bin: Int,
     num_bins: Int,
@@ -139,8 +139,8 @@ alias vector_layout = Layout.row_major(SIZE)
 fn block_normalize_vector[
     in_layout: Layout, out_layout: Layout, tpb: Int
 ](
-    input_data: LayoutTensor[mut=False, dtype, in_layout],
-    output_data: LayoutTensor[mut=True, dtype, out_layout],
+    input_data: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    output_data: LayoutTensor[dtype, out_layout, MutAnyOrigin],
     size: Int,
 ):
     """Vector mean normalization using block.sum() + block.broadcast() combination.
@@ -205,18 +205,13 @@ def main():
             print("TPB:", TPB)
             print("Expected result:", expected)
 
-            a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-            b_tensor = LayoutTensor[mut=False, dtype, in_layout](
-                b_buf.unsafe_ptr()
-            )
-            out_tensor = LayoutTensor[mut=True, dtype, out_layout](
-                out.unsafe_ptr()
-            )
+            a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+            b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b_buf)
+            out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
             # Traditional approach: works perfectly when size == TPB
-            ctx.enqueue_function[
-                traditional_dot_product[in_layout, out_layout, TPB]
-            ](
+            alias kernel = traditional_dot_product[in_layout, out_layout, TPB]
+            ctx.enqueue_function_checked[kernel, kernel](
                 out_tensor,
                 a_tensor,
                 b_tensor,
@@ -249,18 +244,13 @@ def main():
             print("TPB:", TPB)
             print("Expected result:", expected)
 
-            a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-            b_tensor = LayoutTensor[mut=False, dtype, in_layout](
-                b_buf.unsafe_ptr()
-            )
-            out_tensor = LayoutTensor[mut=True, dtype, out_layout](
-                out.unsafe_ptr()
-            )
+            a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+            b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b_buf)
+            out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
             # Block.sum(): Same result with dramatically simpler code!
-            ctx.enqueue_function[
-                block_sum_dot_product[in_layout, out_layout, TPB]
-            ](
+            alias kernel = block_sum_dot_product[in_layout, out_layout, TPB]
+            ctx.enqueue_function_checked[kernel, kernel](
                 out_tensor,
                 a_tensor,
                 b_tensor,
@@ -306,8 +296,8 @@ def main():
             print("...")
             print()
 
-            input_tensor = LayoutTensor[mut=False, dtype, in_layout](
-                input_buf.unsafe_ptr()
+            input_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](
+                input_buf
             )
 
             # Demonstrate histogram for each bin using block.prefix_sum()
@@ -330,19 +320,18 @@ def main():
                     1
                 ).enqueue_fill(0)
 
-                bin_tensor = LayoutTensor[mut=True, dtype, bin_layout](
-                    bin_data.unsafe_ptr()
-                )
-                count_tensor = LayoutTensor[mut=True, DType.int32, out_layout](
-                    bin_count.unsafe_ptr()
+                bin_tensor = LayoutTensor[dtype, bin_layout, MutAnyOrigin](
+                    bin_data
                 )
+                count_tensor = LayoutTensor[
+                    DType.int32, out_layout, MutAnyOrigin
+                ](bin_count)
 
                 # Execute histogram kernel for this specific bin
-                ctx.enqueue_function[
-                    block_histogram_bin_extract[
-                        in_layout, bin_layout, out_layout, TPB
-                    ]
-                ](
+                alias kernel = block_histogram_bin_extract[
+                    in_layout, bin_layout, out_layout, TPB
+                ],
+                ctx.enqueue_function_checked[kernel, kernel](
                     input_tensor,
                     bin_tensor,
                     count_tensor,
@@ -405,17 +394,18 @@ def main():
             print("Mean value:", mean_value)
             print()
 
-            input_tensor = LayoutTensor[mut=False, dtype, in_layout](
-                input_buf.unsafe_ptr()
+            input_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](
+                input_buf
             )
-            output_tensor = LayoutTensor[mut=True, dtype, vector_layout](
-                output_buf.unsafe_ptr()
+            output_tensor = LayoutTensor[dtype, vector_layout, MutAnyOrigin](
+                output_buf
             )
 
             # Execute vector normalization kernel
-            ctx.enqueue_function[
-                block_normalize_vector[in_layout, vector_layout, TPB]
-            ](
+            alias kernel = block_normalize_vector[
+                in_layout, vector_layout, TPB
+            ],
+            ctx.enqueue_function_checked[kernel, kernel](
                 input_tensor,
                 output_tensor,
                 SIZE,
diff --git a/problems/p28/p28.mojo b/problems/p28/p28.mojo
index b9e8e0ae..7965fa4d 100644
--- a/problems/p28/p28.mojo
+++ b/problems/p28/p28.mojo
@@ -23,9 +23,9 @@ alias layout_async = Layout.row_major(VECTOR_SIZE)
 fn async_copy_overlap_convolution[
     dtype: DType, layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    input: LayoutTensor[mut=False, dtype, layout],
-    kernel: LayoutTensor[mut=False, dtype, Layout.row_major(KERNEL_SIZE)],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, layout, ImmutAnyOrigin],
+    kernel: LayoutTensor[dtype, Layout.row_major(KERNEL_SIZE), ImmutAnyOrigin],
 ):
     """Demonstrates async copy operations building on p14 patterns.
 
@@ -37,13 +37,13 @@ fn async_copy_overlap_convolution[
     input_shared = LayoutTensor[
         dtype,
         Layout.row_major(CONV_TILE_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     kernel_shared = LayoutTensor[
         dtype,
         Layout.row_major(KERNEL_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -76,19 +76,18 @@ def test_async_copy_overlap_convolution():
             for i in range(KERNEL_SIZE):
                 kernel_host[i] = Float32(i + 1)
 
-        input_tensor = LayoutTensor[mut=False, dtype, layout_async](
-            input_buf.unsafe_ptr()
+        input_tensor = LayoutTensor[dtype, layout_async, ImmutAnyOrigin](
+            input_buf
         )
-        output_tensor = LayoutTensor[mut=True, dtype, layout_async](
-            output_buf.unsafe_ptr()
+        output_tensor = LayoutTensor[dtype, layout_async, MutAnyOrigin](
+            output_buf
         )
         kernel_tensor = LayoutTensor[
             mut=False, dtype, Layout.row_major(KERNEL_SIZE)
-        ](kernel_buf.unsafe_ptr())
+        ](kernel_buf)
 
-        ctx.enqueue_function[
-            async_copy_overlap_convolution[dtype, layout_async]
-        ](
+        alias kernel = async_copy_overlap_convolution[dtype, layout_async]
+        ctx.enqueue_function_checked[kernel, kernel](
             output_tensor,
             input_tensor,
             kernel_tensor,
diff --git a/problems/p29/p29.mojo b/problems/p29/p29.mojo
index acc89e8e..cee58e6c 100644
--- a/problems/p29/p29.mojo
+++ b/problems/p29/p29.mojo
@@ -32,8 +32,8 @@ alias BLUR_RADIUS = 2
 fn multi_stage_image_blur_pipeline[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    input: LayoutTensor[mut=False, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     """Multi-stage image blur pipeline with barrier coordination.
@@ -47,13 +47,13 @@ fn multi_stage_image_blur_pipeline[
     input_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     blur_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -91,8 +91,8 @@ alias BUFFER_COUNT = 2
 fn double_buffered_stencil_computation[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    input: LayoutTensor[mut=False, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     """Double-buffered stencil computation with memory barrier coordination.
@@ -105,13 +105,13 @@ fn double_buffered_stencil_computation[
     buffer_A = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     buffer_B = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -119,19 +119,19 @@ fn double_buffered_stencil_computation[
     init_barrier = LayoutTensor[
         DType.uint64,
         Layout.row_major(1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     iter_barrier = LayoutTensor[
         DType.uint64,
         Layout.row_major(1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     final_barrier = LayoutTensor[
         DType.uint64,
         Layout.row_major(1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -209,10 +209,11 @@ def test_multi_stage_pipeline():
                 inp_host[i] = Float32(i % 10) + Float32(i / 100.0)
 
         # Create LayoutTensors
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        inp_tensor = LayoutTensor[mut=False, dtype, layout](inp.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        inp_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](inp)
 
-        ctx.enqueue_function[multi_stage_image_blur_pipeline[layout]](
+        alias kernel = multi_stage_image_blur_pipeline[layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             inp_tensor,
             SIZE,
@@ -268,10 +269,11 @@ def test_double_buffered_stencil():
                 inp_host[i] = Float32(1.0 if i % 20 < 10 else 0.0)
 
         # Create LayoutTensors for Puzzle 29B
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        inp_tensor = LayoutTensor[mut=False, dtype, layout](inp.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        inp_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](inp)
 
-        ctx.enqueue_function[double_buffered_stencil_computation[layout]](
+        alias kernel = double_buffered_stencil_computation[layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             inp_tensor,
             SIZE,
diff --git a/problems/p30/p30.mojo b/problems/p30/p30.mojo
index 1f708bba..c6cd7b14 100644
--- a/problems/p30/p30.mojo
+++ b/problems/p30/p30.mojo
@@ -16,9 +16,9 @@ alias layout = Layout.row_major(SIZE)
 fn kernel1[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=False, dtype, layout],
-    b: LayoutTensor[mut=False, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     i = block_dim.x * block_idx.x + thread_idx.x
@@ -33,9 +33,9 @@ fn kernel1[
 fn kernel2[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=False, dtype, layout],
-    b: LayoutTensor[mut=False, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     tid = block_idx.x * block_dim.x + thread_idx.x
@@ -54,9 +54,9 @@ fn kernel2[
 fn kernel3[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=False, dtype, layout],
-    b: LayoutTensor[mut=False, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     tid = block_idx.x * block_dim.x + thread_idx.x
@@ -88,11 +88,11 @@ fn benchmark_kernel1_parameterized[test_size: Int](mut b: Bencher) raises:
                 a_host[i] = Float32(i + 1)
                 b_host[i] = Float32(i + 2)
 
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[mut=False, dtype, layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[mut=False, dtype, layout](b_buf.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](b_buf)
 
-        ctx.enqueue_function[kernel1[layout]](
+        ctx.enqueue_function_checked[kernel1[layout], kernel1[layout]](
             out_tensor,
             a_tensor,
             b_tensor,
@@ -100,7 +100,7 @@ fn benchmark_kernel1_parameterized[test_size: Int](mut b: Bencher) raises:
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
         )
-        keep(out.unsafe_ptr())
+        keep(out)
         ctx.synchronize()
 
     bench_ctx = DeviceContext()
@@ -123,11 +123,11 @@ fn benchmark_kernel2_parameterized[test_size: Int](mut b: Bencher) raises:
                 a_host[i] = Float32(i + 1)
                 b_host[i] = Float32(i + 2)
 
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[mut=False, dtype, layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[mut=False, dtype, layout](b_buf.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](b_buf)
 
-        ctx.enqueue_function[kernel2[layout]](
+        ctx.enqueue_function_checked[kernel2[layout], kernel2[layout]](
             out_tensor,
             a_tensor,
             b_tensor,
@@ -135,7 +135,7 @@ fn benchmark_kernel2_parameterized[test_size: Int](mut b: Bencher) raises:
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
         )
-        keep(out.unsafe_ptr())
+        keep(out)
         ctx.synchronize()
 
     bench_ctx = DeviceContext()
@@ -158,11 +158,11 @@ fn benchmark_kernel3_parameterized[test_size: Int](mut b: Bencher) raises:
                 a_host[i] = Float32(i + 1)
                 b_host[i] = Float32(i + 2)
 
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[mut=False, dtype, layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[mut=False, dtype, layout](b_buf.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](b_buf)
 
-        ctx.enqueue_function[kernel3[layout]](
+        ctx.enqueue_function_checked[kernel3[layout], kernel3[layout]](
             out_tensor,
             a_tensor,
             b_tensor,
@@ -170,7 +170,7 @@ fn benchmark_kernel3_parameterized[test_size: Int](mut b: Bencher) raises:
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
         )
-        keep(out.unsafe_ptr())
+        keep(out)
         ctx.synchronize()
 
     bench_ctx = DeviceContext()
@@ -192,11 +192,11 @@ def test_kernel1():
                 b_host[i] = Float32(i + 2)
 
         # Create LayoutTensors
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[mut=False, dtype, layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[mut=False, dtype, layout](b.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](b)
 
-        ctx.enqueue_function[kernel1[layout]](
+        ctx.enqueue_function_checked[kernel1[layout], kernel1[layout]](
             out_tensor,
             a_tensor,
             b_tensor,
@@ -232,11 +232,11 @@ def test_kernel2():
                 b_host[i] = Float32(i + 2)
 
         # Create LayoutTensors
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[mut=False, dtype, layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[mut=False, dtype, layout](b.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](b)
 
-        ctx.enqueue_function[kernel2[layout]](
+        ctx.enqueue_function_checked[kernel2[layout], kernel2[layout]](
             out_tensor,
             a_tensor,
             b_tensor,
@@ -275,11 +275,11 @@ def test_kernel3():
                 b_host[i] = Float32(i + 2)
 
         # Create LayoutTensors
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[mut=False, dtype, layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[mut=False, dtype, layout](b.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](b)
 
-        ctx.enqueue_function[kernel3[layout]](
+        ctx.enqueue_function_checked[kernel3[layout], kernel3[layout]](
             out_tensor,
             a_tensor,
             b_tensor,
diff --git a/problems/p31/p31.mojo b/problems/p31/p31.mojo
index 62bed3f8..373a38ad 100644
--- a/problems/p31/p31.mojo
+++ b/problems/p31/p31.mojo
@@ -49,7 +49,7 @@ fn sophisticated_kernel[
     shared_cache = LayoutTensor[
         dtype,
         Layout.row_major(1024 * 12),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()  # 48KB
 
@@ -146,7 +146,7 @@ fn balanced_kernel[
     shared_cache = LayoutTensor[
         dtype,
         Layout.row_major(1024 * 4),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()  # 16KB total
 
diff --git a/problems/p32/p32.mojo b/problems/p32/p32.mojo
index 71e35fe5..e6cf50cd 100644
--- a/problems/p32/p32.mojo
+++ b/problems/p32/p32.mojo
@@ -32,7 +32,7 @@ fn no_conflict_kernel[
     shared_buf = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -75,7 +75,7 @@ fn two_way_conflict_kernel[
     shared_buf = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/problems/p33/p33.mojo b/problems/p33/p33.mojo
index 4cf1b28d..b8788dc1 100644
--- a/problems/p33/p33.mojo
+++ b/problems/p33/p33.mojo
@@ -43,13 +43,13 @@ fn matmul_idiomatic_tiled[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(TILE_SIZE, TILE_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(TILE_SIZE, TILE_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -158,13 +158,13 @@ fn tensor_core_matrix_multiplication[
     A_sram_tile = LayoutTensor[
         A.dtype,
         Layout.row_major(BM, BK),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     B_sram_tile = LayoutTensor[
         B.dtype,
         Layout.row_major(BK, BN),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -172,7 +172,7 @@ fn tensor_core_matrix_multiplication[
     C_warp_accum = LayoutTensor[
         C.dtype,
         Layout.row_major(WM, WN),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.GENERIC,
     ].stack_allocation()
 
diff --git a/problems/p34/p34.mojo b/problems/p34/p34.mojo
index 6c9bf308..d064d9c6 100644
--- a/problems/p34/p34.mojo
+++ b/problems/p34/p34.mojo
@@ -39,7 +39,7 @@ fn cluster_coordination_basics[
     shared_data = LayoutTensor[
         dtype,
         Layout.row_major(tpb),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/solutions/p01/p01.mojo b/solutions/p01/p01.mojo
index a769de40..2ec97047 100644
--- a/solutions/p01/p01.mojo
+++ b/solutions/p01/p01.mojo
@@ -30,9 +30,9 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        ctx.enqueue_function[add_10](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10, add_10](
+            out,
+            a,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
         )
diff --git a/solutions/p02/p02.mojo b/solutions/p02/p02.mojo
index 02d1a858..6628e174 100644
--- a/solutions/p02/p02.mojo
+++ b/solutions/p02/p02.mojo
@@ -34,10 +34,10 @@ def main():
                 b_host[i] = i
                 expected[i] = a_host[i] + b_host[i]
 
-        ctx.enqueue_function[add](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
-            b.unsafe_ptr(),
+        ctx.enqueue_function_checked[add, add](
+            out,
+            a,
+            b,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
         )
diff --git a/solutions/p03/p03.mojo b/solutions/p03/p03.mojo
index 1282610f..fbd818b5 100644
--- a/solutions/p03/p03.mojo
+++ b/solutions/p03/p03.mojo
@@ -31,9 +31,9 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        ctx.enqueue_function[add_10_guard](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_guard, add_10_guard](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/solutions/p04/p04.mojo b/solutions/p04/p04.mojo
index cb71c88a..ddd9ad82 100644
--- a/solutions/p04/p04.mojo
+++ b/solutions/p04/p04.mojo
@@ -38,9 +38,9 @@ def main():
                     a_host[y * SIZE + x] = y * SIZE + x
                     expected[y * SIZE + x] = a_host[y * SIZE + x] + 10
 
-        ctx.enqueue_function[add_10_2d](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_2d, add_10_2d](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/solutions/p04/p04_layout_tensor.mojo b/solutions/p04/p04_layout_tensor.mojo
index 09965f57..9eb656e6 100644
--- a/solutions/p04/p04_layout_tensor.mojo
+++ b/solutions/p04/p04_layout_tensor.mojo
@@ -12,8 +12,8 @@ alias layout = Layout.row_major(SIZE, SIZE)
 
 # ANCHOR: add_10_2d_layout_tensor_solution
 fn add_10_2d(
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=True, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, MutAnyOrigin],
     size: Int,
 ):
     row = thread_idx.y
@@ -28,9 +28,9 @@ fn add_10_2d(
 def main():
     with DeviceContext() as ctx:
         out_buf = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(0)
-        out_tensor = LayoutTensor[mut=True, dtype, layout](
-            out_buf.unsafe_ptr()
-        ).reshape[layout]()
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out_buf).reshape[
+            layout
+        ]()
         print("out shape:", out_tensor.shape[0](), "x", out_tensor.shape[1]())
 
         expected = ctx.enqueue_create_host_buffer[dtype](
@@ -43,11 +43,11 @@ def main():
                 a_host[i] = i
                 expected[i] = a_host[i] + 10
 
-        a_tensor = LayoutTensor[mut=True, dtype, layout](
-            a.unsafe_ptr()
-        ).reshape[layout]()
+        a_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](a).reshape[
+            layout
+        ]()
 
-        ctx.enqueue_function[add_10_2d](
+        ctx.enqueue_function_checked[add_10_2d, add_10_2d](
             out_tensor,
             a_tensor,
             SIZE,
diff --git a/solutions/p05/p05.mojo b/solutions/p05/p05.mojo
index aa59180d..c9ca2411 100644
--- a/solutions/p05/p05.mojo
+++ b/solutions/p05/p05.mojo
@@ -42,10 +42,10 @@ def main():
                 for x in range(SIZE):
                     expected[y * SIZE + x] = a_host[x] + b_host[y]
 
-        ctx.enqueue_function[broadcast_add](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
-            b.unsafe_ptr(),
+        ctx.enqueue_function_checked[broadcast_add, broadcast_add](
+            out,
+            a,
+            b,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/solutions/p05/p05_layout_tensor.mojo b/solutions/p05/p05_layout_tensor.mojo
index 10c1b8f5..ab864d8e 100644
--- a/solutions/p05/p05_layout_tensor.mojo
+++ b/solutions/p05/p05_layout_tensor.mojo
@@ -18,9 +18,9 @@ fn broadcast_add[
     a_layout: Layout,
     b_layout: Layout,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, a_layout],
-    b: LayoutTensor[mut=False, dtype, b_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, a_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, b_layout, ImmutAnyOrigin],
     size: Int,
 ):
     row = thread_idx.y
@@ -35,16 +35,14 @@ fn broadcast_add[
 def main():
     with DeviceContext() as ctx:
         out_buf = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(0)
-        out_tensor = LayoutTensor[mut=True, dtype, out_layout](
-            out_buf.unsafe_ptr()
-        )
+        out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out_buf)
         print("out shape:", out_tensor.shape[0](), "x", out_tensor.shape[1]())
 
         expected_buf = ctx.enqueue_create_host_buffer[dtype](
             SIZE * SIZE
         ).enqueue_fill(0)
-        expected_tensor = LayoutTensor[mut=True, dtype, out_layout](
-            expected_buf.unsafe_ptr()
+        expected_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](
+            expected_buf
         )
 
         a = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)
@@ -58,10 +56,11 @@ def main():
                 for j in range(SIZE):
                     expected_tensor[i, j] = a_host[j] + b_host[i]
 
-        a_tensor = LayoutTensor[dtype, a_layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[dtype, b_layout](b.unsafe_ptr())
+        a_tensor = LayoutTensor[dtype, a_layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, b_layout, ImmutAnyOrigin](b)
 
-        ctx.enqueue_function[broadcast_add[out_layout, a_layout, b_layout]](
+        alias kernel = broadcast_add[out_layout, a_layout, b_layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             b_tensor,
diff --git a/solutions/p06/p06.mojo b/solutions/p06/p06.mojo
index 84823c3b..209141a2 100644
--- a/solutions/p06/p06.mojo
+++ b/solutions/p06/p06.mojo
@@ -31,9 +31,9 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        ctx.enqueue_function[add_10_blocks](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_blocks, add_10_blocks](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/solutions/p07/p07.mojo b/solutions/p07/p07.mojo
index 0c86ec6e..31bc4d96 100644
--- a/solutions/p07/p07.mojo
+++ b/solutions/p07/p07.mojo
@@ -39,9 +39,9 @@ def main():
                     a_host[k] = k
                     expected[k] = k + 10
 
-        ctx.enqueue_function[add_10_blocks_2d](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_blocks_2d, add_10_blocks_2d](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/solutions/p07/p07_layout_tensor.mojo b/solutions/p07/p07_layout_tensor.mojo
index 7a81832c..ecb06c8e 100644
--- a/solutions/p07/p07_layout_tensor.mojo
+++ b/solutions/p07/p07_layout_tensor.mojo
@@ -16,8 +16,8 @@ fn add_10_blocks_2d[
     out_layout: Layout,
     a_layout: Layout,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, a_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, a_layout, ImmutAnyOrigin],
     size: Int,
 ):
     row = block_dim.y * block_idx.y + thread_idx.y
@@ -32,9 +32,7 @@ fn add_10_blocks_2d[
 def main():
     with DeviceContext() as ctx:
         out_buf = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(0)
-        out_tensor = LayoutTensor[dtype, out_layout, MutableAnyOrigin](
-            out_buf.unsafe_ptr()
-        )
+        out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out_buf)
 
         expected_buf = ctx.enqueue_create_host_buffer[dtype](
             SIZE * SIZE
@@ -49,11 +47,10 @@ def main():
                     a_host[k] = k
                     expected_buf[k] = k + 10
 
-        a_tensor = LayoutTensor[dtype, a_layout, MutableAnyOrigin](
-            a.unsafe_ptr()
-        )
+        a_tensor = LayoutTensor[dtype, a_layout, ImmutAnyOrigin](a)
 
-        ctx.enqueue_function[add_10_blocks_2d[out_layout, a_layout]](
+        alias kernel = add_10_blocks_2d[out_layout, a_layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             SIZE,
@@ -63,16 +60,14 @@ def main():
 
         ctx.synchronize()
 
-        expected_tensor = LayoutTensor[dtype, out_layout, MutableAnyOrigin](
-            expected_buf.unsafe_ptr()
+        expected_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](
+            expected_buf
         )
 
         with out_buf.map_to_host() as out_buf_host:
             print(
                 "out:",
-                LayoutTensor[dtype, out_layout, MutableAnyOrigin](
-                    out_buf_host.unsafe_ptr()
-                ),
+                LayoutTensor[dtype, out_layout, MutAnyOrigin](out_buf_host),
             )
             print("expected:", expected_tensor)
             for i in range(SIZE):
diff --git a/solutions/p08/p08.mojo b/solutions/p08/p08.mojo
index cd62ffd0..fe26e8bc 100644
--- a/solutions/p08/p08.mojo
+++ b/solutions/p08/p08.mojo
@@ -49,9 +49,9 @@ def main():
     with DeviceContext() as ctx:
         out = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)
         a = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(1)
-        ctx.enqueue_function[add_10_shared](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[add_10_shared, add_10_shared](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/solutions/p08/p08_layout_tensor.mojo b/solutions/p08/p08_layout_tensor.mojo
index 8b46e8c0..edcf3b3b 100644
--- a/solutions/p08/p08_layout_tensor.mojo
+++ b/solutions/p08/p08_layout_tensor.mojo
@@ -17,15 +17,15 @@ alias layout = Layout.row_major(SIZE)
 fn add_10_shared_layout_tensor[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=True, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     # Allocate shared memory using tensor builder
     shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -53,10 +53,11 @@ def main():
         out = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)
         a = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(1)
 
-        out_tensor = LayoutTensor[dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[dtype, layout](a.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
 
-        ctx.enqueue_function[add_10_shared_layout_tensor[layout]](
+        alias kernel = add_10_shared_layout_tensor[layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             SIZE,
diff --git a/solutions/p10/p10.mojo b/solutions/p10/p10.mojo
index b68cb0ed..d1afb2da 100644
--- a/solutions/p10/p10.mojo
+++ b/solutions/p10/p10.mojo
@@ -27,7 +27,7 @@ fn shared_memory_race(
     shared_sum = LayoutTensor[
         dtype,
         Layout.row_major(1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/solutions/p11/p11.mojo b/solutions/p11/p11.mojo
index ad84d2fd..c726363a 100644
--- a/solutions/p11/p11.mojo
+++ b/solutions/p11/p11.mojo
@@ -51,9 +51,9 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        ctx.enqueue_function[pooling](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
+        ctx.enqueue_function_checked[pooling, pooling](
+            out,
+            a,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
@@ -64,7 +64,7 @@ def main():
         ctx.synchronize()
 
         with a.map_to_host() as a_host:
-            ptr = a_host.unsafe_ptr()
+            ptr = a_host
             for i in range(SIZE):
                 s = Scalar[dtype](0)
                 for j in range(max(i - 2, 0), i + 1):
diff --git a/solutions/p11/p11_layout_tensor.mojo b/solutions/p11/p11_layout_tensor.mojo
index 509b93b7..6b7e0a40 100644
--- a/solutions/p11/p11_layout_tensor.mojo
+++ b/solutions/p11/p11_layout_tensor.mojo
@@ -16,15 +16,15 @@ alias layout = Layout.row_major(SIZE)
 fn pooling[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    a: LayoutTensor[mut=True, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     # Allocate shared memory using tensor builder
     shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -62,10 +62,10 @@ def main():
             for i in range(SIZE):
                 a_host[i] = i
 
-        out_tensor = LayoutTensor[dtype, layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[dtype, layout](a.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
 
-        ctx.enqueue_function[pooling[layout]](
+        ctx.enqueue_function_checked[pooling[layout], pooling[layout]](
             out_tensor,
             a_tensor,
             SIZE,
@@ -77,7 +77,7 @@ def main():
         ctx.synchronize()
 
         with a.map_to_host() as a_host:
-            ptr = a_host.unsafe_ptr()
+            ptr = a_host
             for i in range(SIZE):
                 s = Scalar[dtype](0)
                 for j in range(max(i - 2, 0), i + 1):
diff --git a/solutions/p12/p12.mojo b/solutions/p12/p12.mojo
index b8fa0d28..58de6394 100644
--- a/solutions/p12/p12.mojo
+++ b/solutions/p12/p12.mojo
@@ -67,10 +67,10 @@ def main():
                 a_host[i] = i
                 b_host[i] = i
 
-        ctx.enqueue_function[dot_product](
-            out.unsafe_ptr(),
-            a.unsafe_ptr(),
-            b.unsafe_ptr(),
+        ctx.enqueue_function_checked[dot_product, dot_product](
+            out,
+            a,
+            b,
             SIZE,
             grid_dim=BLOCKS_PER_GRID,
             block_dim=THREADS_PER_BLOCK,
diff --git a/solutions/p12/p12_layout_tensor.mojo b/solutions/p12/p12_layout_tensor.mojo
index 2e4f2fa9..0368b86e 100644
--- a/solutions/p12/p12_layout_tensor.mojo
+++ b/solutions/p12/p12_layout_tensor.mojo
@@ -17,15 +17,15 @@ alias out_layout = Layout.row_major(1)
 fn dot_product[
     in_layout: Layout, out_layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=True, dtype, in_layout],
-    b: LayoutTensor[mut=True, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
     size: Int,
 ):
     shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     global_i = block_dim.x * block_idx.x + thread_idx.x
@@ -66,11 +66,12 @@ def main():
                 a_host[i] = i
                 b_host[i] = i
 
-        out_tensor = LayoutTensor[dtype, out_layout](out.unsafe_ptr())
-        a_tensor = LayoutTensor[dtype, layout](a.unsafe_ptr())
-        b_tensor = LayoutTensor[dtype, layout](b.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
+        a_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](a)
+        b_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](b)
 
-        ctx.enqueue_function[dot_product[layout, out_layout]](
+        alias kernel = dot_product[layout, out_layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             b_tensor,
diff --git a/solutions/p13/p13.mojo b/solutions/p13/p13.mojo
index d973b72a..8569a367 100644
--- a/solutions/p13/p13.mojo
+++ b/solutions/p13/p13.mojo
@@ -29,13 +29,13 @@ fn conv_1d_simple[
     shared_a = LayoutTensor[
         dtype,
         Layout.row_major(SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_b = LayoutTensor[
         dtype,
         Layout.row_major(CONV),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     if global_i < SIZE:
@@ -97,13 +97,13 @@ fn conv_1d_block_boundary[
     shared_a = LayoutTensor[
         dtype,
         Layout.row_major(TPB + CONV_2 - 1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_b = LayoutTensor[
         dtype,
         Layout.row_major(CONV_2),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     if global_i < SIZE_2:
diff --git a/solutions/p14/p14.mojo b/solutions/p14/p14.mojo
index 4fe26559..6aeb7fae 100644
--- a/solutions/p14/p14.mojo
+++ b/solutions/p14/p14.mojo
@@ -27,7 +27,7 @@ fn prefix_sum_simple[
     shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     if global_i < size:
@@ -77,7 +77,7 @@ fn prefix_sum_local_phase[
     shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/solutions/p15/p15.mojo b/solutions/p15/p15.mojo
index 20ca2d33..cac1dcbd 100644
--- a/solutions/p15/p15.mojo
+++ b/solutions/p15/p15.mojo
@@ -29,7 +29,7 @@ fn axis_sum[
     cache = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/solutions/p16/p16.mojo b/solutions/p16/p16.mojo
index d91dcde3..8fe16e59 100644
--- a/solutions/p16/p16.mojo
+++ b/solutions/p16/p16.mojo
@@ -53,13 +53,13 @@ fn single_block_matmul[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB, TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB, TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -104,13 +104,13 @@ fn matmul_tiled[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB, TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB, TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -174,13 +174,13 @@ fn matmul_idiomatic_tiled[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB, TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB, TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/solutions/p17/op/conv1d.mojo b/solutions/p17/op/conv1d.mojo
index 2c0ec755..c6882db6 100644
--- a/solutions/p17/op/conv1d.mojo
+++ b/solutions/p17/op/conv1d.mojo
@@ -17,9 +17,9 @@ fn conv1d_kernel[
     conv_size: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    input: LayoutTensor[mut=True, dtype, in_layout],
-    kernel: LayoutTensor[mut=True, dtype, conv_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, in_layout, MutAnyOrigin],
+    kernel: LayoutTensor[dtype, conv_layout, MutAnyOrigin],
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
     local_i = thread_idx.x
@@ -27,13 +27,13 @@ fn conv1d_kernel[
     shared_a = LayoutTensor[
         dtype,
         Layout.row_major(TPB + conv_size - 1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_b = LayoutTensor[
         dtype,
         Layout.row_major(conv_size),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     if global_i < input_size:
@@ -112,11 +112,10 @@ struct Conv1DCustomOp:
                 0,
             )
             # ANCHOR: conv1d_custom_op_solution
-            gpu_ctx.enqueue_function[
-                conv1d_kernel[
-                    in_layout, out_layout, conv_layout, input_size, conv_size
-                ]
-            ](
+            alias kernel = conv1d_kernel[
+                in_layout, out_layout, conv_layout, input_size, conv_size
+            ]
+            gpu_ctx.enqueue_function_checked[kernel, kernel](
                 output_tensor,
                 input_tensor,
                 kernel_tensor,
diff --git a/solutions/p18/op/softmax.mojo b/solutions/p18/op/softmax.mojo
index 3341d899..02843522 100644
--- a/solutions/p18/op/softmax.mojo
+++ b/solutions/p18/op/softmax.mojo
@@ -27,13 +27,13 @@ fn softmax_gpu_kernel[
     shared_max = LayoutTensor[
         dtype,
         Layout.row_major(BLOCK_DIM_X),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_sum = LayoutTensor[
         dtype,
         Layout.row_major(BLOCK_DIM_X),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     global_i = thread_idx.x
@@ -93,8 +93,8 @@ fn softmax_cpu_kernel[
     input_size: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[dtype, layout, MutableAnyOrigin],
-    input: LayoutTensor[dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, layout, MutAnyOrigin],
 ):
     var max_val: Scalar[dtype] = min_finite[dtype]()
     for i in range(input_size):
@@ -130,12 +130,12 @@ struct SoftmaxCustomOp:
         ctx: DeviceContextPtr,
     ) raises:
         # Note: rebind is necessary now but it shouldn't be!
-        var output_tensor = rebind[
-            LayoutTensor[dtype, layout, MutableAnyOrigin]
-        ](output.to_layout_tensor())
-        var input_tensor = rebind[
-            LayoutTensor[dtype, layout, MutableAnyOrigin]
-        ](input.to_layout_tensor())
+        var output_tensor = rebind[LayoutTensor[dtype, layout, MutAnyOrigin]](
+            output.to_layout_tensor()
+        )
+        var input_tensor = rebind[LayoutTensor[dtype, layout, MutAnyOrigin]](
+            input.to_layout_tensor()
+        )
 
         @parameter
         if target == "gpu":
diff --git a/solutions/p19/op/attention.mojo b/solutions/p19/op/attention.mojo
index 80c79c5a..903fc4fa 100644
--- a/solutions/p19/op/attention.mojo
+++ b/solutions/p19/op/attention.mojo
@@ -39,9 +39,9 @@ fn matmul_idiomatic_tiled[
     inner: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, a_layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, b_layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, a_layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, b_layout, MutAnyOrigin],
 ):
     """Updated idiomatic tiled matrix multiplication from p16."""
     local_row = thread_idx.y
@@ -56,13 +56,13 @@ fn matmul_idiomatic_tiled[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(MATMUL_BLOCK_DIM_XY, MATMUL_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(MATMUL_BLOCK_DIM_XY, MATMUL_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     var acc: output.element_type = 0
@@ -126,14 +126,14 @@ fn transpose_kernel[
     cols: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, layout_out, MutableAnyOrigin],
-    inp: LayoutTensor[mut=False, dtype, layout_in, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout_out, MutAnyOrigin],
+    inp: LayoutTensor[mut=False, dtype, layout_in, MutAnyOrigin],
 ):
     """Transpose matrix using shared memory tiling for coalesced access."""
     shared_tile = LayoutTensor[
         dtype,
         Layout.row_major(TRANSPOSE_BLOCK_DIM_XY, TRANSPOSE_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -172,13 +172,13 @@ fn softmax_gpu_kernel[
     shared_max = LayoutTensor[
         dtype,
         Layout.row_major(SOFTMAX_BLOCK_DIM_X),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_sum = LayoutTensor[
         dtype,
         Layout.row_major(SOFTMAX_BLOCK_DIM_X),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     global_i = thread_idx.x
@@ -239,10 +239,10 @@ fn attention_cpu_kernel[
     d: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[dtype, layout_out, MutableAnyOrigin],
-    q: LayoutTensor[dtype, layout_q, MutableAnyOrigin],
-    k: LayoutTensor[dtype, layout_k, MutableAnyOrigin],
-    v: LayoutTensor[dtype, layout_v, MutableAnyOrigin],
+    output: LayoutTensor[dtype, layout_out, MutAnyOrigin],
+    q: LayoutTensor[dtype, layout_q, MutAnyOrigin],
+    k: LayoutTensor[dtype, layout_k, MutAnyOrigin],
+    v: LayoutTensor[dtype, layout_v, MutAnyOrigin],
 ):
     """CPU implementation of vector attention."""
     var scores = List[Float32]()
@@ -304,15 +304,15 @@ struct AttentionCustomOp:
 
         # Convert to layout tensors
         var output_tensor = rebind[
-            LayoutTensor[dtype, layout_out, MutableAnyOrigin]
+            LayoutTensor[dtype, layout_out, MutAnyOrigin]
         ](output.to_layout_tensor())
-        var q_tensor = rebind[LayoutTensor[dtype, layout_q, MutableAnyOrigin]](
+        var q_tensor = rebind[LayoutTensor[dtype, layout_q, MutAnyOrigin]](
             q.to_layout_tensor()
         )
-        var k_tensor = rebind[LayoutTensor[dtype, layout_k, MutableAnyOrigin]](
+        var k_tensor = rebind[LayoutTensor[dtype, layout_k, MutAnyOrigin]](
             k.to_layout_tensor()
         )
-        var v_tensor = rebind[LayoutTensor[dtype, layout_v, MutableAnyOrigin]](
+        var v_tensor = rebind[LayoutTensor[dtype, layout_v, MutAnyOrigin]](
             v.to_layout_tensor()
         )
 
@@ -367,7 +367,7 @@ struct AttentionCustomOp:
                 seq_len
             )  # Reused for scores and weights
 
-            k_t = LayoutTensor[mut=True, dtype, layout_k_t, MutableAnyOrigin](
+            k_t = LayoutTensor[mut=True, dtype, layout_k_t, MutAnyOrigin](
                 k_t_buf.unsafe_ptr()
             )
 
@@ -390,7 +390,7 @@ struct AttentionCustomOp:
             # This computes Q · K^T[i] = Q · K[i] for each column i of K^T (which is row i of K)
             # Reuse scores_weights_buf as (1, seq_len) for scores
             scores_2d = LayoutTensor[
-                mut=True, dtype, layout_scores_2d, MutableAnyOrigin
+                mut=True, dtype, layout_scores_2d, MutAnyOrigin
             ](scores_weights_buf.unsafe_ptr())
             gpu_ctx.enqueue_function[
                 matmul_idiomatic_tiled[
diff --git a/solutions/p20/op/conv1d.mojo b/solutions/p20/op/conv1d.mojo
index 21a2f075..ef974087 100644
--- a/solutions/p20/op/conv1d.mojo
+++ b/solutions/p20/op/conv1d.mojo
@@ -18,9 +18,9 @@ fn conv1d_kernel[
     conv_size: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    input: LayoutTensor[mut=True, dtype, in_layout],
-    kernel: LayoutTensor[mut=True, dtype, conv_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, in_layout, MutAnyOrigin],
+    kernel: LayoutTensor[dtype, conv_layout, MutAnyOrigin],
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
     local_i = thread_idx.x
@@ -28,13 +28,13 @@ fn conv1d_kernel[
     shared_a = LayoutTensor[
         dtype,
         Layout.row_major(TPB + conv_size - 1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_b = LayoutTensor[
         dtype,
         Layout.row_major(conv_size),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     if global_i < input_size:
@@ -111,11 +111,10 @@ struct Conv1DCustomOp:
                 0,
             )
             # ANCHOR: conv1d_custom_op_solution
-            gpu_ctx.enqueue_function[
-                conv1d_kernel[
-                    in_layout, out_layout, conv_layout, input_size, conv_size
-                ]
-            ](
+            alias kernel = conv1d_kernel[
+                in_layout, out_layout, conv_layout, input_size, conv_size
+            ]
+            gpu_ctx.enqueue_function_checked[kernel, kernel](
                 out_tensor,
                 input_tensor,
                 kernel_tensor,
diff --git a/solutions/p21/op/embedding.mojo b/solutions/p21/op/embedding.mojo
index 95ed5582..284da92c 100644
--- a/solutions/p21/op/embedding.mojo
+++ b/solutions/p21/op/embedding.mojo
@@ -19,9 +19,9 @@ fn embedding_kernel_coalesced[
     embed_dim: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    indices: LayoutTensor[mut=True, DType.int32, indices_layout],
-    weights: LayoutTensor[mut=True, dtype, weights_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    indices: LayoutTensor[DType.int32, indices_layout, MutAnyOrigin],
+    weights: LayoutTensor[dtype, weights_layout, MutAnyOrigin],
 ):
     """
     Memory-coalescing focused embedding kernel.
@@ -71,9 +71,9 @@ fn embedding_kernel_2d[
     embed_dim: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    indices: LayoutTensor[mut=True, DType.int32, indices_layout],
-    weights: LayoutTensor[mut=True, dtype, weights_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    indices: LayoutTensor[DType.int32, indices_layout, MutAnyOrigin],
+    weights: LayoutTensor[dtype, weights_layout, MutAnyOrigin],
 ):
     """
     2D grid non-coalesced embedding kernel.
@@ -171,20 +171,19 @@ struct EmbeddingCustomOp:
             blocks = max(1, ceildiv(total_elements, THREADS_PER_BLOCK))
 
             # Compile and launch optimized kernel
-            compiled_kernel = gpu_ctx.compile_function[
-                embedding_kernel_coalesced[
-                    indices_layout,
-                    weights_layout,
-                    out_layout,
-                    batch_size,
-                    seq_len,
-                    vocab_size,
-                    embed_dim,
-                    output.dtype,
-                ]
-            ]()
-
-            gpu_ctx.enqueue_function(
+            alias kernel = embedding_kernel_coalesced[
+                indices_layout,
+                weights_layout,
+                out_layout,
+                batch_size,
+                seq_len,
+                vocab_size,
+                embed_dim,
+                output.dtype,
+            ]
+            compiled_kernel = gpu_ctx.compile_function_checked[kernel, kernel]()
+
+            gpu_ctx.enqueue_function_checked(
                 compiled_kernel,
                 output_tensor,
                 indices_tensor,
diff --git a/solutions/p22/op/layernorm_linear.mojo b/solutions/p22/op/layernorm_linear.mojo
index dc055250..50856061 100644
--- a/solutions/p22/op/layernorm_linear.mojo
+++ b/solutions/p22/op/layernorm_linear.mojo
@@ -26,9 +26,9 @@ fn matmul_idiomatic_tiled[
     inner: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, a_layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, b_layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, a_layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, b_layout, MutAnyOrigin],
 ):
     """Idiomatic tiled matrix multiplication from p19."""
     local_row = thread_idx.y
@@ -43,13 +43,13 @@ fn matmul_idiomatic_tiled[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(MATMUL_BLOCK_DIM_XY, MATMUL_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(MATMUL_BLOCK_DIM_XY, MATMUL_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     var acc: output.element_type = 0
@@ -167,8 +167,8 @@ fn transpose_kernel[
     cols: Int,
     dtype: DType = DType.float32,
 ](
-    output: LayoutTensor[mut=True, dtype, layout_out, MutableAnyOrigin],
-    inp: LayoutTensor[mut=False, dtype, layout_in, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout_out, MutAnyOrigin],
+    inp: LayoutTensor[mut=False, dtype, layout_in, MutAnyOrigin],
 ):
     """Transpose matrix using shared memory tiling for coalesced access.
     We will learn more about coalesced access in the next part.
@@ -176,7 +176,7 @@ fn transpose_kernel[
     shared_tile = LayoutTensor[
         dtype,
         Layout.row_major(TRANSPOSE_BLOCK_DIM_XY, TRANSPOSE_BLOCK_DIM_XY),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
diff --git a/solutions/p23/p23.mojo b/solutions/p23/p23.mojo
index c30683af..a654ed4a 100644
--- a/solutions/p23/p23.mojo
+++ b/solutions/p23/p23.mojo
@@ -20,9 +20,9 @@ alias SIMD_WIDTH = simd_width_of[dtype, target = get_gpu_target()]()
 fn elementwise_add[
     layout: Layout, dtype: DType, simd_width: Int, rank: Int, size: Int
 ](
-    output: LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     @parameter
@@ -60,9 +60,9 @@ fn tiled_elementwise_add[
     size: Int,
     tile_size: Int,
 ](
-    output: LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     @parameter
@@ -100,9 +100,9 @@ fn manual_vectorized_tiled_elementwise_add[
     size: Int,
     tile_size: Int,
 ](
-    output: LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     # Each tile contains tile_size groups of simd_width elements
@@ -150,9 +150,9 @@ fn vectorize_within_tiles_elementwise_add[
     size: Int,
     tile_size: Int,
 ](
-    output: LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     # Each tile contains tile_size elements (not SIMD groups)
@@ -203,13 +203,13 @@ fn benchmark_elementwise_parameterized[
             a_host[i] = 2 * i
             b_host[i] = 2 * i + 1
 
-    a_tensor = LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin](
+    a_tensor = LayoutTensor[mut=False, dtype, layout, MutAnyOrigin](
         a.unsafe_ptr()
     )
-    b_tensor = LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin](
+    b_tensor = LayoutTensor[mut=False, dtype, layout, MutAnyOrigin](
         b_buf.unsafe_ptr()
     )
-    out_tensor = LayoutTensor[mut=True, dtype, layout, MutableAnyOrigin](
+    out_tensor = LayoutTensor[mut=True, dtype, layout, MutAnyOrigin](
         out.unsafe_ptr()
     )
 
diff --git a/solutions/p24/p24.mojo b/solutions/p24/p24.mojo
index 17d7d243..c4fd95c5 100644
--- a/solutions/p24/p24.mojo
+++ b/solutions/p24/p24.mojo
@@ -34,9 +34,9 @@ alias out_layout = Layout.row_major(1)
 fn traditional_dot_product_p12_style[
     in_layout: Layout, out_layout: Layout, size: Int
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, in_layout],
-    b: LayoutTensor[mut=False, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
 ):
     """
     This is the complex approach from p12_layout_tensor.mojo - kept for comparison.
@@ -44,7 +44,7 @@ fn traditional_dot_product_p12_style[
     shared = LayoutTensor[
         dtype,
         Layout.row_major(WARP_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     global_i = block_dim.x * block_idx.x + thread_idx.x
@@ -75,9 +75,9 @@ fn traditional_dot_product_p12_style[
 fn simple_warp_dot_product[
     in_layout: Layout, out_layout: Layout, size: Int
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, in_layout],
-    b: LayoutTensor[mut=False, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
 
@@ -106,9 +106,9 @@ fn functional_warp_dot_product[
     rank: Int,
     size: Int,
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
-    a: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
-    b: LayoutTensor[mut=False, dtype, layout, MutableAnyOrigin],
+    output: LayoutTensor[mut=True, dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
+    b: LayoutTensor[mut=False, dtype, layout, MutAnyOrigin],
     ctx: DeviceContext,
 ) raises:
     @parameter
@@ -203,16 +203,15 @@ fn benchmark_simple_warp_parameterized[
     rand_int[dtype, test_size](b)
     expected_output[dtype, n_warps](expected, a, b)
 
-    a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-    b_tensor = LayoutTensor[mut=False, dtype, in_layout](b.unsafe_ptr())
-    out_tensor = LayoutTensor[mut=True, dtype, out_layout](out.unsafe_ptr())
+    a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+    b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b)
+    out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
     @parameter
     @always_inline
     fn traditional_workflow(ctx: DeviceContext) raises:
-        ctx.enqueue_function[
-            simple_warp_dot_product[in_layout, out_layout, test_size]
-        ](
+        alias kernel = simple_warp_dot_product[in_layout, out_layout, test_size]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             a_tensor,
             b_tensor,
@@ -250,9 +249,9 @@ fn benchmark_functional_warp_parameterized[
     rand_int[dtype, test_size](b)
     expected_output[dtype, n_warps](expected, a, b)
 
-    a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-    b_tensor = LayoutTensor[mut=False, dtype, in_layout](b.unsafe_ptr())
-    out_tensor = LayoutTensor[mut=True, dtype, out_layout](out.unsafe_ptr())
+    a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+    b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b)
+    out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
     @parameter
     @always_inline
@@ -292,15 +291,16 @@ fn benchmark_traditional_parameterized[
     rand_int[dtype, test_size](b)
     expected_output[dtype, n_warps](expected, a, b)
 
-    a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-    b_tensor = LayoutTensor[mut=False, dtype, in_layout](b.unsafe_ptr())
-    out_tensor = LayoutTensor[mut=True, dtype, out_layout](out.unsafe_ptr())
+    a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+    b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b)
+    out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
     @parameter
     @always_inline
     fn traditional_workflow(ctx: DeviceContext) raises:
-        ctx.enqueue_function[
-            traditional_dot_product_p12_style[in_layout, out_layout, test_size]
+        ctx.enqueue_function_checked[
+            traditional_dot_product_p12_style[in_layout, out_layout, test_size],
+            traditional_dot_product_p12_style[in_layout, out_layout, test_size],
         ](
             out_tensor,
             a_tensor,
@@ -331,11 +331,9 @@ def main():
                 n_warps
             ).enqueue_fill(0)
 
-            out_tensor = LayoutTensor[mut=True, dtype, out_layout](
-                out.unsafe_ptr()
-            )
-            a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-            b_tensor = LayoutTensor[mut=False, dtype, in_layout](b.unsafe_ptr())
+            out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
+            a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+            b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b)
 
             with a.map_to_host() as a_host, b.map_to_host() as b_host:
                 for i in range(SIZE):
@@ -343,10 +341,13 @@ def main():
                     b_host[i] = i
 
             if argv()[1] == "--traditional":
-                ctx.enqueue_function[
+                ctx.enqueue_function_checked[
+                    traditional_dot_product_p12_style[
+                        in_layout, out_layout, SIZE
+                    ],
                     traditional_dot_product_p12_style[
                         in_layout, out_layout, SIZE
-                    ]
+                    ],
                 ](
                     out_tensor,
                     a_tensor,
@@ -355,8 +356,9 @@ def main():
                     block_dim=THREADS_PER_BLOCK,
                 )
             elif argv()[1] == "--kernel":
-                ctx.enqueue_function[
-                    simple_warp_dot_product[in_layout, out_layout, SIZE]
+                ctx.enqueue_function_checked[
+                    simple_warp_dot_product[in_layout, out_layout, SIZE],
+                    simple_warp_dot_product[in_layout, out_layout, SIZE],
                 ](
                     out_tensor,
                     a_tensor,
diff --git a/solutions/p27/p27.mojo b/solutions/p27/p27.mojo
index 38ac5f0c..d5d7eb6f 100644
--- a/solutions/p27/p27.mojo
+++ b/solutions/p27/p27.mojo
@@ -21,9 +21,9 @@ alias dtype = DType.float32
 fn block_sum_dot_product[
     in_layout: Layout, out_layout: Layout, tpb: Int
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, in_layout],
-    b: LayoutTensor[mut=False, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
     size: Int,
 ):
     """Dot product using block.sum() - convenience function like warp.sum()!
@@ -56,9 +56,9 @@ fn block_sum_dot_product[
 fn traditional_dot_product[
     in_layout: Layout, out_layout: Layout, tpb: Int
 ](
-    output: LayoutTensor[mut=True, dtype, out_layout],
-    a: LayoutTensor[mut=False, dtype, in_layout],
-    b: LayoutTensor[mut=False, dtype, in_layout],
+    output: LayoutTensor[dtype, out_layout, MutAnyOrigin],
+    a: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    b: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
     size: Int,
 ):
     """Traditional dot product using shared memory + barriers + tree reduction.
@@ -67,7 +67,7 @@ fn traditional_dot_product[
     shared = LayoutTensor[
         dtype,
         Layout.row_major(tpb),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     global_i = block_dim.x * block_idx.x + thread_idx.x
@@ -103,9 +103,9 @@ alias bin_layout = Layout.row_major(SIZE)  # Max SIZE elements per bin
 fn block_histogram_bin_extract[
     in_layout: Layout, bin_layout: Layout, out_layout: Layout, tpb: Int
 ](
-    input_data: LayoutTensor[mut=False, dtype, in_layout],
-    bin_output: LayoutTensor[mut=True, dtype, bin_layout],
-    count_output: LayoutTensor[mut=True, DType.int32, out_layout],
+    input_data: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    bin_output: LayoutTensor[dtype, bin_layout, MutAnyOrigin],
+    count_output: LayoutTensor[DType.int32, out_layout, MutAnyOrigin],
     size: Int,
     target_bin: Int,
     num_bins: Int,
@@ -167,8 +167,8 @@ alias vector_layout = Layout.row_major(SIZE)  # For full vector output
 fn block_normalize_vector[
     in_layout: Layout, out_layout: Layout, tpb: Int
 ](
-    input_data: LayoutTensor[mut=False, dtype, in_layout],
-    output_data: LayoutTensor[mut=True, dtype, out_layout],
+    input_data: LayoutTensor[dtype, in_layout, ImmutAnyOrigin],
+    output_data: LayoutTensor[dtype, out_layout, MutAnyOrigin],
     size: Int,
 ):
     """Vector mean normalization using block.sum() + block.broadcast() combination.
@@ -239,18 +239,13 @@ def main():
             print("TPB:", TPB)
             print("Expected result:", expected)
 
-            a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-            b_tensor = LayoutTensor[mut=False, dtype, in_layout](
-                b_buf.unsafe_ptr()
-            )
-            out_tensor = LayoutTensor[mut=True, dtype, out_layout](
-                out.unsafe_ptr()
-            )
+            a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+            b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b_buf)
+            out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
             # Traditional approach: works perfectly when size == TPB
-            ctx.enqueue_function[
-                traditional_dot_product[in_layout, out_layout, TPB]
-            ](
+            alias kernel = traditional_dot_product[in_layout, out_layout, TPB]
+            ctx.enqueue_function_checked[kernel, kernel](
                 out_tensor,
                 a_tensor,
                 b_tensor,
@@ -283,18 +278,13 @@ def main():
             print("TPB:", TPB)
             print("Expected result:", expected)
 
-            a_tensor = LayoutTensor[mut=False, dtype, in_layout](a.unsafe_ptr())
-            b_tensor = LayoutTensor[mut=False, dtype, in_layout](
-                b_buf.unsafe_ptr()
-            )
-            out_tensor = LayoutTensor[mut=True, dtype, out_layout](
-                out.unsafe_ptr()
-            )
+            a_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](a)
+            b_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](b_buf)
+            out_tensor = LayoutTensor[dtype, out_layout, MutAnyOrigin](out)
 
             # Block.sum(): Same result with dramatically simpler code!
-            ctx.enqueue_function[
-                block_sum_dot_product[in_layout, out_layout, TPB]
-            ](
+            alias kernel = block_sum_dot_product[in_layout, out_layout, TPB]
+            ctx.enqueue_function_checked[kernel, kernel](
                 out_tensor,
                 a_tensor,
                 b_tensor,
@@ -340,8 +330,8 @@ def main():
             print("...")
             print()
 
-            input_tensor = LayoutTensor[mut=False, dtype, in_layout](
-                input_buf.unsafe_ptr()
+            input_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](
+                input_buf
             )
 
             # Demonstrate histogram for each bin using block.prefix_sum()
@@ -364,19 +354,18 @@ def main():
                     1
                 ).enqueue_fill(0)
 
-                bin_tensor = LayoutTensor[mut=True, dtype, bin_layout](
-                    bin_data.unsafe_ptr()
-                )
-                count_tensor = LayoutTensor[mut=True, DType.int32, out_layout](
-                    bin_count.unsafe_ptr()
+                bin_tensor = LayoutTensor[dtype, bin_layout, MutAnyOrigin](
+                    bin_data
                 )
+                count_tensor = LayoutTensor[
+                    DType.int32, out_layout, MutAnyOrigin
+                ](bin_count)
 
                 # Execute histogram kernel for this specific bin
-                ctx.enqueue_function[
-                    block_histogram_bin_extract[
-                        in_layout, bin_layout, out_layout, TPB
-                    ]
-                ](
+                alias kernel = block_histogram_bin_extract[
+                    in_layout, bin_layout, out_layout, TPB
+                ]
+                ctx.enqueue_function_checked[kernel, kernel](
                     input_tensor,
                     bin_tensor,
                     count_tensor,
@@ -439,17 +428,16 @@ def main():
             print("Mean value:", mean_value)
             print()
 
-            input_tensor = LayoutTensor[mut=False, dtype, in_layout](
-                input_buf.unsafe_ptr()
+            input_tensor = LayoutTensor[dtype, in_layout, ImmutAnyOrigin](
+                input_buf
             )
-            output_tensor = LayoutTensor[mut=True, dtype, vector_layout](
-                output_buf.unsafe_ptr()
+            output_tensor = LayoutTensor[dtype, vector_layout, MutAnyOrigin](
+                output_buf
             )
 
             # Execute vector normalization kernel
-            ctx.enqueue_function[
-                block_normalize_vector[in_layout, vector_layout, TPB]
-            ](
+            alias kernel = block_normalize_vector[in_layout, vector_layout, TPB]
+            ctx.enqueue_function_checked[kernel, kernel](
                 input_tensor,
                 output_tensor,
                 SIZE,
diff --git a/solutions/p28/p28.mojo b/solutions/p28/p28.mojo
index 550e07e7..63f676a6 100644
--- a/solutions/p28/p28.mojo
+++ b/solutions/p28/p28.mojo
@@ -24,9 +24,9 @@ alias layout_async = Layout.row_major(VECTOR_SIZE)
 fn async_copy_overlap_convolution[
     dtype: DType, layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    input: LayoutTensor[mut=False, dtype, layout],
-    kernel: LayoutTensor[mut=False, dtype, Layout.row_major(KERNEL_SIZE)],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, layout, ImmutAnyOrigin],
+    kernel: LayoutTensor[dtype, Layout.row_major(KERNEL_SIZE), ImmutAnyOrigin],
 ):
     """Demonstrates async copy operations building on p14 patterns.
 
@@ -38,13 +38,13 @@ fn async_copy_overlap_convolution[
     input_shared = LayoutTensor[
         dtype,
         Layout.row_major(CONV_TILE_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     kernel_shared = LayoutTensor[
         dtype,
         Layout.row_major(KERNEL_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -111,19 +111,18 @@ def test_async_copy_overlap_convolution():
             for i in range(KERNEL_SIZE):
                 kernel_host[i] = Float32(i + 1)
 
-        input_tensor = LayoutTensor[mut=False, dtype, layout_async](
-            input_buf.unsafe_ptr()
+        input_tensor = LayoutTensor[dtype, layout_async, ImmutAnyOrigin](
+            input_buf
         )
-        output_tensor = LayoutTensor[mut=True, dtype, layout_async](
-            output_buf.unsafe_ptr()
+        output_tensor = LayoutTensor[dtype, layout_async, MutAnyOrigin](
+            output_buf
         )
         kernel_tensor = LayoutTensor[
             mut=False, dtype, Layout.row_major(KERNEL_SIZE)
-        ](kernel_buf.unsafe_ptr())
+        ](kernel_buf)
 
-        ctx.enqueue_function[
-            async_copy_overlap_convolution[dtype, layout_async]
-        ](
+        alias kernel = async_copy_overlap_convolution[dtype, layout_async]
+        ctx.enqueue_function_checked[kernel, kernel](
             output_tensor,
             input_tensor,
             kernel_tensor,
diff --git a/solutions/p29/p29.mojo b/solutions/p29/p29.mojo
index 14073287..333b2423 100644
--- a/solutions/p29/p29.mojo
+++ b/solutions/p29/p29.mojo
@@ -28,8 +28,8 @@ alias BLUR_RADIUS = 2
 fn multi_stage_image_blur_pipeline[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    input: LayoutTensor[mut=False, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     """Multi-stage image blur pipeline with barrier coordination.
@@ -43,13 +43,13 @@ fn multi_stage_image_blur_pipeline[
     input_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     blur_shared = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -136,8 +136,8 @@ alias BUFFER_COUNT = 2
 fn double_buffered_stencil_computation[
     layout: Layout
 ](
-    output: LayoutTensor[mut=True, dtype, layout],
-    input: LayoutTensor[mut=False, dtype, layout],
+    output: LayoutTensor[dtype, layout, MutAnyOrigin],
+    input: LayoutTensor[dtype, layout, ImmutAnyOrigin],
     size: Int,
 ):
     """Double-buffered stencil computation with memory barrier coordination.
@@ -150,13 +150,13 @@ fn double_buffered_stencil_computation[
     buffer_A = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     buffer_B = LayoutTensor[
         dtype,
         Layout.row_major(TPB),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -164,19 +164,19 @@ fn double_buffered_stencil_computation[
     init_barrier = LayoutTensor[
         DType.uint64,
         Layout.row_major(1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     iter_barrier = LayoutTensor[
         DType.uint64,
         Layout.row_major(1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     final_barrier = LayoutTensor[
         DType.uint64,
         Layout.row_major(1),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -284,10 +284,11 @@ def test_multi_stage_pipeline():
                 inp_host[i] = Float32(i % 10) + Float32(i / 100.0)
 
         # Create LayoutTensors
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        inp_tensor = LayoutTensor[mut=False, dtype, layout](inp.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        inp_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](inp)
 
-        ctx.enqueue_function[multi_stage_image_blur_pipeline[layout]](
+        alias kernel = multi_stage_image_blur_pipeline[layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             inp_tensor,
             SIZE,
@@ -343,10 +344,11 @@ def test_double_buffered_stencil():
                 inp_host[i] = Float32(1.0 if i % 20 < 10 else 0.0)
 
         # Create LayoutTensors for Puzzle 26B
-        out_tensor = LayoutTensor[mut=True, dtype, layout](out.unsafe_ptr())
-        inp_tensor = LayoutTensor[mut=False, dtype, layout](inp.unsafe_ptr())
+        out_tensor = LayoutTensor[dtype, layout, MutAnyOrigin](out)
+        inp_tensor = LayoutTensor[dtype, layout, ImmutAnyOrigin](inp)
 
-        ctx.enqueue_function[double_buffered_stencil_computation[layout]](
+        alias kernel = double_buffered_stencil_computation[layout]
+        ctx.enqueue_function_checked[kernel, kernel](
             out_tensor,
             inp_tensor,
             SIZE,
diff --git a/solutions/p33/p33.mojo b/solutions/p33/p33.mojo
index 70d3097d..9950e686 100644
--- a/solutions/p33/p33.mojo
+++ b/solutions/p33/p33.mojo
@@ -43,13 +43,13 @@ fn matmul_idiomatic_tiled[
     a_shared = LayoutTensor[
         dtype,
         Layout.row_major(TILE_SIZE, TILE_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     b_shared = LayoutTensor[
         dtype,
         Layout.row_major(TILE_SIZE, TILE_SIZE),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -158,13 +158,13 @@ fn tensor_core_matrix_multiplication[
     A_sram_tile = LayoutTensor[
         A.dtype,
         Layout.row_major(BM, BK),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     B_sram_tile = LayoutTensor[
         B.dtype,
         Layout.row_major(BK, BN),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -172,7 +172,7 @@ fn tensor_core_matrix_multiplication[
     C_warp_accum = LayoutTensor[
         C.dtype,
         Layout.row_major(WM, WN),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.LOCAL,
     ].stack_allocation()
 
diff --git a/solutions/p34/p34.mojo b/solutions/p34/p34.mojo
index 82ba485b..b4e4ee72 100644
--- a/solutions/p34/p34.mojo
+++ b/solutions/p34/p34.mojo
@@ -39,7 +39,7 @@ fn cluster_coordination_basics[
     shared_data = LayoutTensor[
         dtype,
         Layout.row_major(tpb),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
 
@@ -99,7 +99,7 @@ fn cluster_collective_operations[
     shared_mem = LayoutTensor[
         dtype,
         Layout.row_major(tpb),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()
     shared_mem[local_i] = my_value
@@ -149,7 +149,7 @@ fn advanced_cluster_patterns[
     shared_data = LayoutTensor[
         dtype,
         Layout.row_major(tpb),
-        MutableAnyOrigin,
+        MutAnyOrigin,
         address_space = AddressSpace.SHARED,
     ].stack_allocation()