modular · rajuptvs · Nov 1, 2025 · Nov 1, 2025 · Nov 1, 2025 · Nov 1, 2025
diff --git a/problems/p03/p03.mojo b/problems/p03/p03.mojo
@@ -13,7 +13,7 @@ alias dtype = DType.float32
 fn add_10_guard(
     output: UnsafePointer[Scalar[dtype]],
     a: UnsafePointer[Scalar[dtype]],
-    size: Int,
+    size: UInt,
 ):
     i = thread_idx.x
     # FILL ME IN (roughly 2 lines)

diff --git a/problems/p04/p04.mojo b/problems/p04/p04.mojo
@@ -13,7 +13,7 @@ alias dtype = DType.float32
 fn add_10_2d(
     output: UnsafePointer[Scalar[dtype]],
     a: UnsafePointer[Scalar[dtype]],
-    size: Int,
+    size: UInt,
 ):
     row = thread_idx.y
     col = thread_idx.x

diff --git a/problems/p04/p04_layout_tensor.mojo b/problems/p04/p04_layout_tensor.mojo
@@ -14,7 +14,7 @@ alias layout = Layout.row_major(SIZE, SIZE)
 fn add_10_2d(
     output: LayoutTensor[mut=True, dtype, layout],
     a: LayoutTensor[mut=True, dtype, layout],
-    size: Int,
+    size: UInt,
 ):
     row = thread_idx.y
     col = thread_idx.x

diff --git a/problems/p05/p05.mojo b/problems/p05/p05.mojo
@@ -14,7 +14,7 @@ fn broadcast_add(
     output: UnsafePointer[Scalar[dtype]],
     a: UnsafePointer[Scalar[dtype]],
     b: UnsafePointer[Scalar[dtype]],
-    size: Int,
+    size: UInt,
 ):
     row = thread_idx.y
     col = thread_idx.x

diff --git a/problems/p05/p05_layout_tensor.mojo b/problems/p05/p05_layout_tensor.mojo
@@ -21,7 +21,7 @@ fn broadcast_add[
     output: LayoutTensor[mut=True, dtype, out_layout],
     a: LayoutTensor[mut=False, dtype, a_layout],
     b: LayoutTensor[mut=False, dtype, b_layout],
-    size: Int,
+    size: UInt,
 ):
     row = thread_idx.y
     col = thread_idx.x

diff --git a/problems/p06/p06.mojo b/problems/p06/p06.mojo
@@ -13,7 +13,7 @@ alias dtype = DType.float32
 fn add_10_blocks(
     output: UnsafePointer[Scalar[dtype]],
     a: UnsafePointer[Scalar[dtype]],
-    size: Int,
+    size: UInt,
 ):
     i = block_dim.x * block_idx.x + thread_idx.x
     # FILL ME IN (roughly 2 lines)

diff --git a/problems/p07/p07.mojo b/problems/p07/p07.mojo
@@ -13,7 +13,7 @@ alias dtype = DType.float32
 fn add_10_blocks_2d(
     output: UnsafePointer[Scalar[dtype]],
     a: UnsafePointer[Scalar[dtype]],
-    size: Int,
+    size: UInt,
 ):
     row = block_dim.y * block_idx.y + thread_idx.y
     col = block_dim.x * block_idx.x + thread_idx.x

diff --git a/problems/p07/p07_layout_tensor.mojo b/problems/p07/p07_layout_tensor.mojo
@@ -18,7 +18,7 @@ fn add_10_blocks_2d[
 ](
     output: LayoutTensor[mut=True, dtype, out_layout],
     a: LayoutTensor[mut=False, dtype, a_layout],
-    size: Int,
+    size: UInt,
 ):
     row = block_dim.y * block_idx.y + thread_idx.y
     col = block_dim.x * block_idx.x + thread_idx.x

diff --git a/problems/p08/p08.mojo b/problems/p08/p08.mojo
@@ -16,7 +16,7 @@ alias dtype = DType.float32
 fn add_10_shared(
     output: UnsafePointer[Scalar[dtype]],
     a: UnsafePointer[Scalar[dtype]],
-    size: Int,
+    size: UInt,
 ):
     shared = stack_allocation[
         TPB,

diff --git a/problems/p08/p08_layout_tensor.mojo b/problems/p08/p08_layout_tensor.mojo
@@ -19,7 +19,7 @@ fn add_10_shared_layout_tensor[
 ](
     output: LayoutTensor[mut=True, dtype, layout],
     a: LayoutTensor[mut=True, dtype, layout],
-    size: Int,
+    size: UInt,
 ):
     # Allocate shared memory using LayoutTensor with explicit address_space
     shared = LayoutTensor[

diff --git a/problems/p10/p10.mojo b/problems/p10/p10.mojo
@@ -17,7 +17,7 @@ alias layout = Layout.row_major(SIZE, SIZE)
 fn shared_memory_race(
     output: LayoutTensor[mut=True, dtype, layout],
     a: LayoutTensor[mut=False, dtype, layout],
-    size: Int,
+    size: UInt,
 ):
     row = thread_idx.y
     col = thread_idx.x
@@ -45,7 +45,7 @@ fn shared_memory_race(
 fn add_10_2d(
     output: LayoutTensor[mut=True, dtype, layout],
     a: LayoutTensor[mut=True, dtype, layout],
-    size: Int,
+    size: UInt,
 ):
     row = thread_idx.y
     col = thread_idx.x

diff --git a/problems/p11/p11.mojo b/problems/p11/p11.mojo
@@ -16,7 +16,7 @@ alias dtype = DType.float32
 fn pooling(
     output: UnsafePointer[Scalar[dtype]],
     a: UnsafePointer[Scalar[dtype]],
-    size: Int,
+    size: UInt,
 ):
     shared = stack_allocation[
         TPB,

diff --git a/problems/p11/p11_layout_tensor.mojo b/problems/p11/p11_layout_tensor.mojo
@@ -18,7 +18,7 @@ fn pooling[
 ](
     output: LayoutTensor[mut=True, dtype, layout],
     a: LayoutTensor[mut=True, dtype, layout],
-    size: Int,
+    size: UInt,
 ):
     # Allocate shared memory using tensor builder
     shared = LayoutTensor[

diff --git a/problems/p12/p12.mojo b/problems/p12/p12.mojo
@@ -17,7 +17,7 @@ fn dot_product(
     output: UnsafePointer[Scalar[dtype]],
     a: UnsafePointer[Scalar[dtype]],
     b: UnsafePointer[Scalar[dtype]],
-    size: Int,
+    size: UInt,
 ):
     # FILL ME IN (roughly 13 lines)
     ...

diff --git a/problems/p12/p12_layout_tensor.mojo b/problems/p12/p12_layout_tensor.mojo
@@ -21,7 +21,7 @@ fn dot_product[
     output: LayoutTensor[mut=True, dtype, out_layout],
     a: LayoutTensor[mut=True, dtype, in_layout],
     b: LayoutTensor[mut=True, dtype, in_layout],
-    size: Int,
+    size: UInt,
 ):
     # FILL ME IN (roughly 13 lines)
     ...

diff --git a/problems/p14/p14.mojo b/problems/p14/p14.mojo
@@ -20,7 +20,7 @@ fn prefix_sum_simple[
 ](
     output: LayoutTensor[mut=True, dtype, layout],
     a: LayoutTensor[mut=False, dtype, layout],
-    size: Int,
+    size: UInt,
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
     local_i = thread_idx.x
@@ -43,7 +43,7 @@ fn prefix_sum_local_phase[
 ](
     output: LayoutTensor[mut=True, dtype, out_layout],
     a: LayoutTensor[mut=False, dtype, in_layout],
-    size: Int,
+    size: UInt,
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
     local_i = thread_idx.x

diff --git a/problems/p15/p15.mojo b/problems/p15/p15.mojo
@@ -23,7 +23,7 @@ fn axis_sum[
 ](
     output: LayoutTensor[mut=True, dtype, out_layout],
     a: LayoutTensor[mut=False, dtype, in_layout],
-    size: Int,
+    size: UInt,
 ):
     global_i = block_dim.x * block_idx.x + thread_idx.x
     local_i = thread_idx.x

diff --git a/problems/p17/op/conv1d.mojo b/problems/p17/op/conv1d.mojo
@@ -12,8 +12,8 @@ fn conv1d_kernel[
     in_layout: Layout,
     out_layout: Layout,
     conv_layout: Layout,
-    input_size: Int,
-    conv_size: Int,
+    input_size: UInt,
+    conv_size: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[mut=True, dtype, out_layout],
@@ -80,8 +80,8 @@ struct Conv1DCustomOp:
     fn execute[
         # The kind of device this will be run on: "cpu" or "gpu"
         target: StaticString,
-        input_size: Int,
-        conv_size: Int,
+        input_size: UInt,
+        conv_size: UInt,
         dtype: DType = DType.float32,
     ](
         output: OutputTensor[rank=1],

diff --git a/problems/p18/op/softmax.mojo b/problems/p18/op/softmax.mojo
@@ -19,7 +19,7 @@ alias BLOCK_DIM_X = 1 << log2_ceil(SIZE)
 
 fn softmax_gpu_kernel[
     layout: Layout,
-    input_size: Int,
+    input_size: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[mut=True, dtype, layout],
@@ -35,7 +35,7 @@ fn softmax_gpu_kernel[
 # ANCHOR: softmax_cpu_kernel
 fn softmax_cpu_kernel[
     layout: Layout,
-    input_size: Int,
+    input_size: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[dtype, layout, MutableAnyOrigin],
@@ -57,7 +57,7 @@ struct SoftmaxCustomOp:
     @staticmethod
     fn execute[
         target: StaticString,  # "cpu" or "gpu"
-        input_size: Int,
+        input_size: UInt,
         dtype: DType = DType.float32,
     ](
         output: OutputTensor[rank=1],

diff --git a/problems/p19/op/attention.mojo b/problems/p19/op/attention.mojo
@@ -32,9 +32,9 @@ fn matmul_idiomatic_tiled[
     a_layout: Layout,
     b_layout: Layout,
     out_layout: Layout,
-    rows: Int,
-    cols: Int,
-    inner: Int,
+    rows: UInt,
+    cols: UInt,
+    inner: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
@@ -120,8 +120,8 @@ fn matmul_idiomatic_tiled[
 fn transpose_kernel[
     layout_in: Layout,  # Layout for input matrix (seq_len, d)
     layout_out: Layout,  # Layout for output matrix (d, seq_len)
-    rows: Int,
-    cols: Int,
+    rows: UInt,
+    cols: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[mut=True, dtype, layout_out, MutableAnyOrigin],
@@ -137,7 +137,7 @@ fn transpose_kernel[
 # Apply softmax to attention scores taken from p16
 fn softmax_gpu_kernel[
     layout: Layout,
-    input_size: Int,
+    input_size: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[mut=True, dtype, layout],
@@ -209,8 +209,8 @@ fn attention_cpu_kernel[
     layout_k: Layout,
     layout_v: Layout,
     layout_out: Layout,
-    seq_len: Int,
-    d: Int,
+    seq_len: UInt,
+    d: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[dtype, layout_out, MutableAnyOrigin],
@@ -259,8 +259,8 @@ struct AttentionCustomOp:
     @staticmethod
     fn execute[
         target: StaticString,  # "cpu" or "gpu"
-        seq_len: Int,
-        d: Int,
+        seq_len: UInt,
+        d: UInt,
         dtype: DType = DType.float32,
     ](
         output: OutputTensor[rank=1],  # Output vector (d,)

diff --git a/problems/p20/op/conv1d.mojo b/problems/p20/op/conv1d.mojo
@@ -15,8 +15,8 @@ fn conv1d_kernel[
     in_layout: Layout,
     out_layout: Layout,
     conv_layout: Layout,
-    input_size: Int,
-    conv_size: Int,
+    input_size: UInt,
+    conv_size: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[mut=True, dtype, out_layout],

diff --git a/problems/p21/op/embedding.mojo b/problems/p21/op/embedding.mojo
@@ -13,10 +13,10 @@ fn embedding_kernel_coalesced[
     indices_layout: Layout,
     weights_layout: Layout,
     out_layout: Layout,
-    batch_size: Int,
-    seq_len: Int,
-    vocab_size: Int,
-    embed_dim: Int,
+    batch_size: UInt,
+    seq_len: UInt,
+    vocab_size: UInt,
+    embed_dim: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[mut=True, dtype, out_layout],
@@ -57,10 +57,10 @@ fn embedding_kernel_2d[
     indices_layout: Layout,
     weights_layout: Layout,
     out_layout: Layout,
-    batch_size: Int,
-    seq_len: Int,
-    vocab_size: Int,
-    embed_dim: Int,
+    batch_size: UInt,
+    seq_len: UInt,
+    vocab_size: UInt,
+    embed_dim: UInt,
     dtype: DType = DType.float32,
 ](
     output: LayoutTensor[mut=True, dtype, out_layout],
@@ -108,10 +108,10 @@ struct EmbeddingCustomOp:
     @staticmethod
     fn execute[
         target: StaticString,
-        batch_size: Int,
-        seq_len: Int,
-        vocab_size: Int,
-        embed_dim: Int,
+        batch_size: UInt,
+        seq_len: UInt,
+        vocab_size: UInt,
+        embed_dim: UInt,
     ](
         output: OutputTensor[
             dtype = DType.float32, rank=3
@@ -194,10 +194,10 @@ struct Embedding2DCustomOp:
     @staticmethod
     fn execute[
         target: StaticString,
-        batch_size: Int,
-        seq_len: Int,
-        vocab_size: Int,
-        embed_dim: Int,
+        batch_size: UInt,
+        seq_len: UInt,
+        vocab_size: UInt,
+        embed_dim: UInt,
     ](
         output: OutputTensor[
             dtype = DType.float32, rank=3