Skip to content

Commit b1f3cec

Browse files
committed
make use of Event(f, args)
1 parent a91948f commit b1f3cec

File tree

2 files changed

+10
-13
lines changed

2 files changed

+10
-13
lines changed

src/backends/cpu.jl

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,11 @@ function wait(::CPU, ev::CPUEvent, progress=nothing)
3939
end
4040
end
4141

42-
function async_copy!(::CPU, A, B; dependencies=nothing)
43-
wait(CPU(), MultiEvent(dependencies), yield)
44-
copyto!(A, B)
45-
return CPUEvent(nothing)
42+
function async_copy!(::CPU, A, B; dependencies=nothing, progress=yield)
43+
Event(copyto!, A, B, dependencies=dependencies, progress=progress)
4644
end
4745

48-
function (obj::Kernel{CPU})(args...; ndrange=nothing, workgroupsize=nothing, dependencies=nothing)
46+
function (obj::Kernel{CPU})(args...; ndrange=nothing, workgroupsize=nothing, dependencies=nothing, progress=yield)
4947
if ndrange isa Integer
5048
ndrange = (ndrange,)
5149
end
@@ -65,13 +63,12 @@ function (obj::Kernel{CPU})(args...; ndrange=nothing, workgroupsize=nothing, dep
6563
ndrange = nothing
6664
end
6765

68-
t = Threads.@spawn __run(obj, ndrange, iterspace, args, dependencies, Val(dynamic))
69-
return CPUEvent(t)
66+
Event(__run, obj, ndrange, iterspace, args, Val(dynamic),
67+
dependencies=dependencies, progress=progress)
7068
end
7169

7270
# Inference barriers
73-
function __run(obj, ndrange, iterspace, args, dependencies, ::Val{dynamic}) where dynamic
74-
wait(CPU(), MultiEvent(dependencies), yield)
71+
function __run(obj, ndrange, iterspace, args, ::Val{dynamic}) where dynamic
7572
N = length(iterspace)
7673
Nthreads = Threads.nthreads()
7774
if Nthreads == 1

src/backends/cuda.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,12 @@ function __pin!(a)
109109
return nothing
110110
end
111111

112-
function async_copy!(::CUDA, A, B; dependencies=nothing)
112+
function async_copy!(::CUDA, A, B; dependencies=nothing, progress=yield)
113113
A isa Array && __pin!(A)
114114
B isa Array && __pin!(B)
115115

116116
stream = next_stream()
117-
wait(CUDA(), MultiEvent(dependencies), yield, stream)
117+
wait(CUDA(), MultiEvent(dependencies), progress, stream)
118118
event = CuEvent(CUDAdrv.EVENT_DISABLE_TIMING)
119119
GC.@preserve A B begin
120120
destptr = pointer(A)
@@ -133,7 +133,7 @@ end
133133
###
134134
# Kernel launch
135135
###
136-
function (obj::Kernel{CUDA})(args...; ndrange=nothing, dependencies=nothing, workgroupsize=nothing)
136+
function (obj::Kernel{CUDA})(args...; ndrange=nothing, dependencies=nothing, workgroupsize=nothing, progress=yield)
137137
if ndrange isa Integer
138138
ndrange = (ndrange,)
139139
end
@@ -142,7 +142,7 @@ function (obj::Kernel{CUDA})(args...; ndrange=nothing, dependencies=nothing, wor
142142
end
143143

144144
stream = next_stream()
145-
wait(CUDA(), MultiEvent(dependencies), yield, stream)
145+
wait(CUDA(), MultiEvent(dependencies), progress, stream)
146146

147147
if KernelAbstractions.workgroupsize(obj) <: DynamicSize && workgroupsize === nothing
148148
# TODO: allow for NDRange{1, DynamicSize, DynamicSize}(nothing, nothing)

0 commit comments

Comments
 (0)