Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion problems/p03/p03.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_guard(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
i = thread_idx.x
# FILL ME IN (roughly 2 lines)
Expand Down
2 changes: 1 addition & 1 deletion problems/p04/p04.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_2d(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p04/p04_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ alias layout = Layout.row_major(SIZE, SIZE)
fn add_10_2d(
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=True, dtype, layout],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p05/p05.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fn broadcast_add(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p05/p05_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn broadcast_add[
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=False, dtype, a_layout],
b: LayoutTensor[mut=False, dtype, b_layout],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p06/p06.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_blocks(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
i = block_dim.x * block_idx.x + thread_idx.x
# FILL ME IN (roughly 2 lines)
Expand Down
2 changes: 1 addition & 1 deletion problems/p07/p07.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ alias dtype = DType.float32
fn add_10_blocks_2d(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
row = block_dim.y * block_idx.y + thread_idx.y
col = block_dim.x * block_idx.x + thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p07/p07_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn add_10_blocks_2d[
](
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=False, dtype, a_layout],
size: Int,
size: UInt,
):
row = block_dim.y * block_idx.y + thread_idx.y
col = block_dim.x * block_idx.x + thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p08/p08.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ alias dtype = DType.float32
fn add_10_shared(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
shared = stack_allocation[
TPB,
Expand Down
2 changes: 1 addition & 1 deletion problems/p08/p08_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn add_10_shared_layout_tensor[
](
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=True, dtype, layout],
size: Int,
size: UInt,
):
# Allocate shared memory using LayoutTensor with explicit address_space
shared = LayoutTensor[
Expand Down
4 changes: 2 additions & 2 deletions problems/p10/p10.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ alias layout = Layout.row_major(SIZE, SIZE)
fn shared_memory_race(
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=False, dtype, layout],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down Expand Up @@ -45,7 +45,7 @@ fn shared_memory_race(
fn add_10_2d(
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=True, dtype, layout],
size: Int,
size: UInt,
):
row = thread_idx.y
col = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p11/p11.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ alias dtype = DType.float32
fn pooling(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
shared = stack_allocation[
TPB,
Expand Down
2 changes: 1 addition & 1 deletion problems/p11/p11_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn pooling[
](
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=True, dtype, layout],
size: Int,
size: UInt,
):
# Allocate shared memory using tensor builder
shared = LayoutTensor[
Expand Down
2 changes: 1 addition & 1 deletion problems/p12/p12.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ fn dot_product(
output: UnsafePointer[Scalar[dtype]],
a: UnsafePointer[Scalar[dtype]],
b: UnsafePointer[Scalar[dtype]],
size: Int,
size: UInt,
):
# FILL ME IN (roughly 13 lines)
...
Expand Down
2 changes: 1 addition & 1 deletion problems/p12/p12_layout_tensor.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn dot_product[
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=True, dtype, in_layout],
b: LayoutTensor[mut=True, dtype, in_layout],
size: Int,
size: UInt,
):
# FILL ME IN (roughly 13 lines)
...
Expand Down
4 changes: 2 additions & 2 deletions problems/p14/p14.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn prefix_sum_simple[
](
output: LayoutTensor[mut=True, dtype, layout],
a: LayoutTensor[mut=False, dtype, layout],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand All @@ -43,7 +43,7 @@ fn prefix_sum_local_phase[
](
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=False, dtype, in_layout],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand Down
2 changes: 1 addition & 1 deletion problems/p15/p15.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn axis_sum[
](
output: LayoutTensor[mut=True, dtype, out_layout],
a: LayoutTensor[mut=False, dtype, in_layout],
size: Int,
size: UInt,
):
global_i = block_dim.x * block_idx.x + thread_idx.x
local_i = thread_idx.x
Expand Down
8 changes: 4 additions & 4 deletions problems/p17/op/conv1d.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ fn conv1d_kernel[
in_layout: Layout,
out_layout: Layout,
conv_layout: Layout,
input_size: Int,
conv_size: Int,
input_size: UInt,
conv_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout],
Expand Down Expand Up @@ -80,8 +80,8 @@ struct Conv1DCustomOp:
fn execute[
# The kind of device this will be run on: "cpu" or "gpu"
target: StaticString,
input_size: Int,
conv_size: Int,
input_size: UInt,
conv_size: UInt,
dtype: DType = DType.float32,
](
output: OutputTensor[rank=1],
Expand Down
6 changes: 3 additions & 3 deletions problems/p18/op/softmax.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ alias BLOCK_DIM_X = 1 << log2_ceil(SIZE)

fn softmax_gpu_kernel[
layout: Layout,
input_size: Int,
input_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, layout],
Expand All @@ -35,7 +35,7 @@ fn softmax_gpu_kernel[
# ANCHOR: softmax_cpu_kernel
fn softmax_cpu_kernel[
layout: Layout,
input_size: Int,
input_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[dtype, layout, MutableAnyOrigin],
Expand All @@ -57,7 +57,7 @@ struct SoftmaxCustomOp:
@staticmethod
fn execute[
target: StaticString, # "cpu" or "gpu"
input_size: Int,
input_size: UInt,
dtype: DType = DType.float32,
](
output: OutputTensor[rank=1],
Expand Down
20 changes: 10 additions & 10 deletions problems/p19/op/attention.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ fn matmul_idiomatic_tiled[
a_layout: Layout,
b_layout: Layout,
out_layout: Layout,
rows: Int,
cols: Int,
inner: Int,
rows: UInt,
cols: UInt,
inner: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout, MutableAnyOrigin],
Expand Down Expand Up @@ -120,8 +120,8 @@ fn matmul_idiomatic_tiled[
fn transpose_kernel[
layout_in: Layout, # Layout for input matrix (seq_len, d)
layout_out: Layout, # Layout for output matrix (d, seq_len)
rows: Int,
cols: Int,
rows: UInt,
cols: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, layout_out, MutableAnyOrigin],
Expand All @@ -137,7 +137,7 @@ fn transpose_kernel[
# Apply softmax to attention scores taken from p16
fn softmax_gpu_kernel[
layout: Layout,
input_size: Int,
input_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, layout],
Expand Down Expand Up @@ -209,8 +209,8 @@ fn attention_cpu_kernel[
layout_k: Layout,
layout_v: Layout,
layout_out: Layout,
seq_len: Int,
d: Int,
seq_len: UInt,
d: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[dtype, layout_out, MutableAnyOrigin],
Expand Down Expand Up @@ -259,8 +259,8 @@ struct AttentionCustomOp:
@staticmethod
fn execute[
target: StaticString, # "cpu" or "gpu"
seq_len: Int,
d: Int,
seq_len: UInt,
d: UInt,
dtype: DType = DType.float32,
](
output: OutputTensor[rank=1], # Output vector (d,)
Expand Down
4 changes: 2 additions & 2 deletions problems/p20/op/conv1d.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ fn conv1d_kernel[
in_layout: Layout,
out_layout: Layout,
conv_layout: Layout,
input_size: Int,
conv_size: Int,
input_size: UInt,
conv_size: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout],
Expand Down
32 changes: 16 additions & 16 deletions problems/p21/op/embedding.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ fn embedding_kernel_coalesced[
indices_layout: Layout,
weights_layout: Layout,
out_layout: Layout,
batch_size: Int,
seq_len: Int,
vocab_size: Int,
embed_dim: Int,
batch_size: UInt,
seq_len: UInt,
vocab_size: UInt,
embed_dim: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout],
Expand Down Expand Up @@ -57,10 +57,10 @@ fn embedding_kernel_2d[
indices_layout: Layout,
weights_layout: Layout,
out_layout: Layout,
batch_size: Int,
seq_len: Int,
vocab_size: Int,
embed_dim: Int,
batch_size: UInt,
seq_len: UInt,
vocab_size: UInt,
embed_dim: UInt,
dtype: DType = DType.float32,
](
output: LayoutTensor[mut=True, dtype, out_layout],
Expand Down Expand Up @@ -108,10 +108,10 @@ struct EmbeddingCustomOp:
@staticmethod
fn execute[
target: StaticString,
batch_size: Int,
seq_len: Int,
vocab_size: Int,
embed_dim: Int,
batch_size: UInt,
seq_len: UInt,
vocab_size: UInt,
embed_dim: UInt,
](
output: OutputTensor[
dtype = DType.float32, rank=3
Expand Down Expand Up @@ -194,10 +194,10 @@ struct Embedding2DCustomOp:
@staticmethod
fn execute[
target: StaticString,
batch_size: Int,
seq_len: Int,
vocab_size: Int,
embed_dim: Int,
batch_size: UInt,
seq_len: UInt,
vocab_size: UInt,
embed_dim: UInt,
](
output: OutputTensor[
dtype = DType.float32, rank=3
Expand Down
Loading