Skip to content

Commit 0121280

Browse files
committed
Use POCL as a CPU backend
1 parent e1202c4 commit 0121280

26 files changed

+2545
-410
lines changed

.github/workflows/ci.yml

+1-5
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,8 @@ jobs:
2222
fail-fast: false
2323
matrix:
2424
version:
25-
- '1.6'
26-
- '1.7'
27-
- '1.8'
28-
- '1.9'
2925
- '1.10'
30-
- '~1.11.0-0'
26+
- '1.11'
3127
os:
3228
- ubuntu-latest
3329
- macOS-latest

Project.toml

+18-17
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,32 @@
11
name = "KernelAbstractions"
22
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
33
authors = ["Valentin Churavy <[email protected]> and contributors"]
4-
version = "0.9.33"
4+
version = "0.10.0-dev"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
88
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
9-
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
9+
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
1010
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
11-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
11+
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
1212
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
13+
OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4"
1314
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
14-
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
15-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
15+
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
16+
SPIRVIntrinsics = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
1617
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
1718
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
19+
pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
20+
21+
[weakdeps]
22+
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
23+
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
24+
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
25+
26+
[extensions]
27+
EnzymeExt = "EnzymeCore"
28+
LinearAlgebraExt = "LinearAlgebra"
29+
SparseArraysExt = "SparseArrays"
1830

1931
[compat]
2032
Adapt = "0.4, 1.0, 2.0, 3.0, 4"
@@ -24,23 +36,12 @@ InteractiveUtils = "1.6"
2436
LinearAlgebra = "1.6"
2537
MacroTools = "0.5"
2638
PrecompileTools = "1"
27-
Requires = "1.3"
2839
SparseArrays = "<0.0.1, 1.6"
2940
StaticArrays = "0.12, 1.0"
3041
UUIDs = "<0.0.1, 1.6"
31-
julia = "1.6"
32-
33-
[extensions]
34-
EnzymeExt = "EnzymeCore"
35-
LinearAlgebraExt = "LinearAlgebra"
36-
SparseArraysExt = "SparseArrays"
42+
julia = "1.10"
3743

3844
[extras]
3945
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
4046
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
4147
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
42-
43-
[weakdeps]
44-
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
45-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
46-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

cuda/Project.toml

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[deps]
2+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3+
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"

examples/histogram.jl

+24-28
Original file line numberDiff line numberDiff line change
@@ -74,32 +74,28 @@ function move(backend, input)
7474
end
7575

7676
@testset "histogram tests" begin
77-
if Base.VERSION < v"1.7.0" && !KernelAbstractions.isgpu(backend)
78-
@test_skip false
79-
else
80-
rand_input = [rand(1:128) for i in 1:1000]
81-
linear_input = [i for i in 1:1024]
82-
all_two = [2 for i in 1:512]
83-
84-
histogram_rand_baseline = create_histogram(rand_input)
85-
histogram_linear_baseline = create_histogram(linear_input)
86-
histogram_two_baseline = create_histogram(all_two)
87-
88-
rand_input = move(backend, rand_input)
89-
linear_input = move(backend, linear_input)
90-
all_two = move(backend, all_two)
91-
92-
rand_histogram = KernelAbstractions.zeros(backend, Int, 128)
93-
linear_histogram = KernelAbstractions.zeros(backend, Int, 1024)
94-
two_histogram = KernelAbstractions.zeros(backend, Int, 2)
95-
96-
histogram!(rand_histogram, rand_input)
97-
histogram!(linear_histogram, linear_input)
98-
histogram!(two_histogram, all_two)
99-
KernelAbstractions.synchronize(CPU())
100-
101-
@test isapprox(Array(rand_histogram), histogram_rand_baseline)
102-
@test isapprox(Array(linear_histogram), histogram_linear_baseline)
103-
@test isapprox(Array(two_histogram), histogram_two_baseline)
104-
end
77+
rand_input = [rand(1:128) for i in 1:1000]
78+
linear_input = [i for i in 1:1024]
79+
all_two = [2 for i in 1:512]
80+
81+
histogram_rand_baseline = create_histogram(rand_input)
82+
histogram_linear_baseline = create_histogram(linear_input)
83+
histogram_two_baseline = create_histogram(all_two)
84+
85+
rand_input = move(backend, rand_input)
86+
linear_input = move(backend, linear_input)
87+
all_two = move(backend, all_two)
88+
89+
rand_histogram = KernelAbstractions.zeros(backend, Int, 128)
90+
linear_histogram = KernelAbstractions.zeros(backend, Int, 1024)
91+
two_histogram = KernelAbstractions.zeros(backend, Int, 2)
92+
93+
histogram!(rand_histogram, rand_input)
94+
histogram!(linear_histogram, linear_input)
95+
histogram!(two_histogram, all_two)
96+
KernelAbstractions.synchronize(CPU())
97+
98+
@test isapprox(Array(rand_histogram), histogram_rand_baseline)
99+
@test isapprox(Array(linear_histogram), histogram_linear_baseline)
100+
@test isapprox(Array(two_histogram), histogram_two_baseline)
105101
end

examples/naive_transpose.jl

+1-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ function naive_transpose!(a, b)
1515
end
1616
backend = get_backend(a)
1717
@assert get_backend(b) == backend
18-
groupsize = KernelAbstractions.isgpu(backend) ? 256 : 1024
19-
kernel! = naive_transpose_kernel!(backend, groupsize)
18+
kernel! = naive_transpose_kernel!(backend, 256)
2019
kernel!(a, b, ndrange = size(a))
2120
return
2221
end

src/KernelAbstractions.jl

+32-48
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ synchronize(backend)
5050
```
5151
"""
5252
macro kernel(expr)
53-
return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false, #=unsafe_indicies=# false)
53+
return __kernel(expr, #=force_inbounds=# false, #=unsafe_indicies=# false)
5454
end
5555

5656
"""
@@ -66,18 +66,20 @@ This allows for two different configurations:
6666
6767
!!! warn
6868
This is an experimental feature.
69+
70+
!!! note
71+
`cpu={true, false}` is deprecated for KernelAbstractions 1.0
6972
"""
7073
macro kernel(ex...)
7174
if length(ex) == 1
72-
return __kernel(ex[1], true, false, false)
75+
return __kernel(ex[1], false, false)
7376
else
74-
generate_cpu = true
7577
unsafe_indicies = false
7678
force_inbounds = false
7779
for i in 1:(length(ex) - 1)
7880
if ex[i] isa Expr && ex[i].head == :(=) &&
7981
ex[i].args[1] == :cpu && ex[i].args[2] isa Bool
80-
generate_cpu = ex[i].args[2]
82+
#deprecated
8183
elseif ex[i] isa Expr && ex[i].head == :(=) &&
8284
ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool
8385
force_inbounds = ex[i].args[2]
@@ -94,7 +96,7 @@ macro kernel(ex...)
9496
)
9597
end
9698
end
97-
return __kernel(ex[end], generate_cpu, force_inbounds, unsafe_indicies)
99+
return __kernel(ex[end], force_inbounds, unsafe_indicies)
98100
end
99101
end
100102

@@ -190,6 +192,8 @@ After releasing the memory of an array, it should no longer be accessed.
190192
"""
191193
function unsafe_free! end
192194

195+
unsafe_free!(::AbstractArray) = return
196+
193197
###
194198
# Kernel language
195199
# - @localmem
@@ -254,6 +258,9 @@ For storage that only persists between `@synchronize` statements, an `MArray` ca
254258
instead.
255259
256260
See also [`@uniform`](@ref).
261+
262+
!!! note
263+
`@private` is deprecated for KernelAbstractions 1.0
257264
"""
258265
macro private(T, dims)
259266
if dims isa Integer
@@ -269,6 +276,9 @@ end
269276
270277
Creates a private local of `mem` per item in the workgroup. This can be safely used
271278
across [`@synchronize`](@ref) statements.
279+
280+
!!! note
281+
`@private` is deprecated for KernelAbstractions 1.0
272282
"""
273283
macro private(expr)
274284
return esc(expr)
@@ -279,6 +289,9 @@ end
279289
280290
`expr` is evaluated outside the workitem scope. This is useful for variable declarations
281291
that span workitems, or are reused across `@synchronize` statements.
292+
293+
!!! note
294+
`@uniform` is deprecated for KernelAbstractions 1.0
282295
"""
283296
macro uniform(value)
284297
return esc(value)
@@ -330,6 +343,8 @@ Access the hidden context object used by KernelAbstractions.
330343
!!! warn
331344
Only valid to be used from a kernel with `cpu=false`.
332345
346+
!!! note
347+
`@context` will be supported on all backends in KernelAbstractions 1.0
333348
```
334349
function f(@context, a)
335350
I = @index(Global, Linear)
@@ -478,31 +493,11 @@ Abstract type for all GPU based KernelAbstractions backends.
478493
479494
!!! note
480495
New backend implementations **must** sub-type this abstract type.
481-
"""
482-
abstract type GPU <: Backend end
483-
484-
"""
485-
CPU(; static=false)
486-
487-
Instantiate a CPU (multi-threaded) backend.
488-
489-
## Options:
490-
- `static`: Uses a static thread assignment, this can be beneficial for NUMA aware code.
491-
Defaults to false.
492-
"""
493-
struct CPU <: Backend
494-
static::Bool
495-
CPU(; static::Bool = false) = new(static)
496-
end
497-
498-
"""
499-
isgpu(::Backend)::Bool
500496
501-
Returns true for all [`GPU`](@ref) backends.
497+
!!! note
498+
`GPU` will be removed in KernelAbstractions v1.0
502499
"""
503-
isgpu(::GPU) = true
504-
isgpu(::CPU) = false
505-
500+
abstract type GPU <: Backend end
506501

507502
"""
508503
get_backend(A::AbstractArray)::Backend
@@ -518,12 +513,9 @@ function get_backend end
518513
# Should cover SubArray, ReshapedArray, ReinterpretArray, Hermitian, AbstractTriangular, etc.:
519514
get_backend(A::AbstractArray) = get_backend(parent(A))
520515

521-
get_backend(::Array) = CPU()
522-
523516
# Define:
524517
# adapt_storage(::Backend, a::Array) = adapt(BackendArray, a)
525518
# adapt_storage(::Backend, a::BackendArray) = a
526-
Adapt.adapt_storage(::CPU, a::Array) = a
527519

528520
"""
529521
allocate(::Backend, Type, dims...)::AbstractArray
@@ -743,7 +735,7 @@ Partition a kernel for the given ndrange and workgroupsize.
743735
return iterspace, dynamic
744736
end
745737

746-
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, S <: _Size, NDRange <: _Size, XPUName}
738+
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: GPU, S <: _Size, NDRange <: _Size, XPUName}
747739
return Kernel{Backend, S, NDRange, XPUName}(backend, xpu_name)
748740
end
749741

@@ -760,6 +752,10 @@ include("compiler.jl")
760752
function __workitems_iterspace end
761753
function __validindex end
762754

755+
# for reflection
756+
function mkcontext end
757+
function launch_config end
758+
763759
include("macros.jl")
764760

765761
###
@@ -829,8 +825,11 @@ end
829825
end
830826

831827
# CPU backend
828+
include("pocl/pocl.jl")
829+
using .POCL
830+
export POCLBackend
832831

833-
include("cpu.jl")
832+
const CPU = POCLBackend
834833

835834
# precompile
836835
PrecompileTools.@compile_workload begin
@@ -844,19 +843,4 @@ PrecompileTools.@compile_workload begin
844843
end
845844
end
846845

847-
if !isdefined(Base, :get_extension)
848-
using Requires
849-
end
850-
851-
@static if !isdefined(Base, :get_extension)
852-
function __init__()
853-
@require EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" include("../ext/EnzymeExt.jl")
854-
end
855-
end
856-
857-
if !isdefined(Base, :get_extension)
858-
include("../ext/LinearAlgebraExt.jl")
859-
include("../ext/SparseArraysExt.jl")
860-
end
861-
862846
end #module

src/cpu.jl

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
unsafe_free!(::AbstractArray) = return
21
synchronize(::CPU) = nothing
32

43
allocate(::CPU, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, dims)

0 commit comments

Comments
 (0)