Skip to content

Commit a28a0fc

Browse files
committed
Revert "removing heuristic"
This reverts commit 54796ad.
1 parent 54796ad commit a28a0fc

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

src/gpuarrays.jl

+21
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,33 @@
11
# GPUArrays.jl interface
22

3+
import KernelAbstractions
4+
import KernelAbstractions: Backend
5+
36
#
47
# Device functionality
58
#
69

710

811
## execution
912

13+
@inline function GPUArrays.launch_heuristic(::oneAPIBackend, obj::O, args::Vararg{Any,N};
14+
elements::Int, elements_per_thread::Int) where {O,N}
15+
ndrange = ceil(Int, elements / elements_per_thread)
16+
ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange,
17+
nothing)
18+
19+
# this might not be the final context, since we may tune the workgroupsize
20+
ctx = KA.mkcontext(obj, ndrange, iterspace)
21+
22+
kernel = @oneapi launch=false obj.f(ctx, args...)
23+
24+
items = launch_configuration(kernel)
25+
# XXX: how many groups is a good number? the API doesn't tell us.
26+
# measured on a low-end IGP, 32 blocks seems like a good sweet spot.
27+
# note that this only matters for grid-stride kernels, like broadcast.
28+
return (threads=items, blocks=32)
29+
end
30+
1031
const GLOBAL_RNGs = Dict{ZeDevice,GPUArrays.RNG}()
1132
function GPUArrays.default_rng(::Type{<:oneArray})
1233
dev = device()

0 commit comments

Comments
 (0)