Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add N for hardware indices #541

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ using StaticArrays
using Adapt

"""
@kernel function f(args) end
@kernel [N] function f(args) end

Takes a function definition and generates a [`Kernel`](@ref) constructor from it.
The enclosed function is allowed to contain kernel language constructs.
In order to call it the kernel has first to be specialized on the backend
and then invoked on the arguments.

The optional `N` parameter can be used to fix the number of dimensions used for the ndrange.

# Kernel language

- [`@Const`](@ref)
Expand Down Expand Up @@ -50,11 +52,11 @@ synchronize(backend)
```
"""
macro kernel(expr)
__kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
__kernel(DynamicSize(), expr, #=generate_cpu=# true, #=force_inbounds=# false)
end

"""
@kernel config function f(args) end
@kernel [N] config function f(args) end

This allows for two different configurations:

Expand All @@ -68,17 +70,20 @@ This allows for two different configurations:
"""
macro kernel(ex...)
if length(ex) == 1
__kernel(ex[1], true, false)
__kernel(DynamicSize(), ex[1], true, false)
else
generate_cpu = true
force_inbounds = false
N = DynamicSize() # TODO parse N
for i in 1:(length(ex) - 1)
if ex[i] isa Expr && ex[i].head == :(=) &&
ex[i].args[1] == :cpu && ex[i].args[2] isa Bool
generate_cpu = ex[i].args[2]
elseif ex[i] isa Expr && ex[i].head == :(=) &&
ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool
force_inbounds = ex[i].args[2]
elseif ex[i] isa Int
N = StaticSize(ex[i])
else
error(
"Configuration should be of form:\n" *
Expand All @@ -88,7 +93,7 @@ macro kernel(ex...)
)
end
end
__kernel(ex[end], generate_cpu, force_inbounds)
__kernel(N, ex[end], generate_cpu, force_inbounds)
end
end

Expand Down Expand Up @@ -584,17 +589,18 @@ in a workgroup.
```
As well as the on-device functionality.
"""
struct Kernel{Backend, WorkgroupSize <: _Size, NDRange <: _Size, Fun}
struct Kernel{Backend, N <: _Size, WorkgroupSize <: _Size, NDRange <: _Size, Fun}
backend::Backend
f::Fun
end

function Base.similar(kernel::Kernel{D, WS, ND}, f::F) where {D, WS, ND, F}
Kernel{D, WS, ND, F}(kernel.backend, f)
function Base.similar(kernel::Kernel{D, N, WS, ND}, f::F) where {D, N, WS, ND, F}
Kernel{D, N, WS, ND, F}(kernel.backend, f)
end

workgroupsize(::Kernel{D, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize
ndrange(::Kernel{D, WorkgroupSize, NDRange}) where {D, WorkgroupSize, NDRange} = NDRange
workgroupsize(::Kernel{D, N, WorkgroupSize}) where {D, N, WorkgroupSize} = WorkgroupSize
ndrange(::Kernel{D, N, WorkgroupSize, NDRange}) where {D, N, WorkgroupSize, NDRange} = NDRange
ndims(::Kernel{D, N}) where {D, N} = N
backend(kernel::Kernel) = kernel.backend

"""
Expand All @@ -603,6 +609,7 @@ Partition a kernel for the given ndrange and workgroupsize.
@inline function partition(kernel, ndrange, workgroupsize)
static_ndrange = KernelAbstractions.ndrange(kernel)
static_workgroupsize = KernelAbstractions.workgroupsize(kernel)
static_ndims = KernelAbstractions.ndims(kernel)

if ndrange === nothing && static_ndrange <: DynamicSize ||
workgroupsize === nothing && static_workgroupsize <: DynamicSize
Expand Down Expand Up @@ -653,12 +660,20 @@ Partition a kernel for the given ndrange and workgroupsize.
workgroupsize = CartesianIndices(workgroupsize)
end

if static_ndims <: StaticSize
N = only(get(static_ndims))
if N !== length(ndrange)
error("Mismatch between static kernel dimension (N=$N) and ndrange=$ndrange")
end
end

# TODO: Add static_ndims
iterspace = NDRange{length(ndrange), static_blocks, static_workgroupsize}(blocks, workgroupsize)
return iterspace, dynamic
end

function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, S <: _Size, NDRange <: _Size, XPUName}
return Kernel{Backend, S, NDRange, XPUName}(backend, xpu_name)
function construct(backend::Backend, ::N, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, N <: _Size, S <: _Size, NDRange <: _Size, XPUName}
return Kernel{Backend, N, S, NDRange, XPUName}(backend, xpu_name)
end

###
Expand Down
6 changes: 3 additions & 3 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ function find_return(stmt)
end

# XXX: Proper errors
function __kernel(expr, generate_cpu = true, force_inbounds = false)
function __kernel(N, expr, generate_cpu = true, force_inbounds = false)
def = splitdef(expr)
name = def[:name]
args = def[:args]
Expand Down Expand Up @@ -57,10 +57,10 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
$name(dev, size, range) = $name(dev, $StaticSize(size), $StaticSize(range))
function $name(dev::Dev, sz::S, range::NDRange) where {Dev, S <: $_Size, NDRange <: $_Size}
if $isgpu(dev)
return $construct(dev, sz, range, $gpu_name)
return $construct(dev, $(N), sz, range, $gpu_name)
else
if $generate_cpu
return $construct(dev, sz, range, $cpu_name)
return $construct(dev, $(N), sz, range, $cpu_name)
else
error("This kernel is unavailable for backend CPU")
end
Expand Down
11 changes: 11 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@
end
@test_throws ErrorException("This kernel is unavailable for backend CPU") my_no_cpu_kernel(CPU())

@kernel 1 function OneD()
end

@kernel 2 function TwoD()
end

@test OneD(CPU())(ndrange=1024) === nothing
@test_throws ErrorException("Mismatch between static kernel dimension (N=1) and ndrange=(1024, 1)") OneD(CPU())(ndrange=(1024, 1))
@test_throws ErrorException("Mismatch between static kernel dimension (N=2) and ndrange=(1024, 1)") TwoD(CPU())(ndrange=1024)

Check failure on line 32 in test/runtests.jl

View workflow job for this annotation

GitHub Actions / CI (1.8, macOS-latest, x64)

Test Failed Expression: (TwoD(CPU()))(ndrange = 1024) Expected: ErrorException("Mismatch between static kernel dimension (N=2) and ndrange=(1024, 1)") Thrown: ErrorException("Mismatch between static kernel dimension (N=2) and ndrange=(1024,)")
@test TwoD(CPU())(ndrange=(1024,1)) === nothing

# testing multiple configurations at the same time
@kernel cpu = false inbounds = false function my_no_cpu_kernel2(a)
end
Expand Down
4 changes: 2 additions & 2 deletions test/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ identity(x) = x
function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; skip_tests = Set{String}())
@conditional_testset "partition" skip_tests begin
backend = Backend()
let kernel = KernelAbstractions.Kernel{typeof(backend), StaticSize{(64,)}, DynamicSize, typeof(identity)}(backend, identity)
let kernel = KernelAbstractions.Kernel{typeof(backend), DynamicSize, StaticSize{(64,)}, DynamicSize, typeof(identity)}(backend, identity)
iterspace, dynamic = KernelAbstractions.partition(kernel, (128,), nothing)
@test length(blocks(iterspace)) == 2
@test dynamic isa NoDynamicCheck
Expand All @@ -26,7 +26,7 @@ function unittest_testsuite(Backend, backend_str, backend_mod, BackendArrayT; sk
@test_throws ErrorException KernelAbstractions.partition(kernel, (129,), (65,))
@test KernelAbstractions.backend(kernel) == backend
end
let kernel = KernelAbstractions.Kernel{typeof(backend), StaticSize{(64,)}, StaticSize{(128,)}, typeof(identity)}(backend, identity)
let kernel = KernelAbstractions.Kernel{typeof(backend), DynamicSize, StaticSize{(64,)}, StaticSize{(128,)}, typeof(identity)}(backend, identity)
iterspace, dynamic = KernelAbstractions.partition(kernel, (128,), nothing)
@test length(blocks(iterspace)) == 2
@test dynamic isa NoDynamicCheck
Expand Down
Loading