We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
fill!
1 parent 26237ff commit 55138e8Copy full SHA for 55138e8
src/host/construction.jl
@@ -14,12 +14,22 @@ function Base.fill!(A::AnyGPUArray{T}, x) where T
14
15
@kernel function fill_kernel!(a, val)
16
idx = @index(Global, Linear)
17
- @inbounds a[idx] = val
+ stride = prod(@ndrange())
18
+ while idx <= length(a)
19
+ @inbounds a[idx] = val
20
+ idx += stride
21
+ end
22
end
23
24
# ndims check for 0D support
25
kernel = fill_kernel!(get_backend(A))
- kernel(A, x; ndrange = length(A))
26
+
27
+ # Calculate ndrange to ensure that a total grid size >typemax(UInt32) is never
28
+ # chosen. Grid stride to accomodate grid size limitations on AMD and Metal backends
29
+ len = length(A)
30
+ ndrange = cld(len, cld(len, typemax(UInt32) - 1024))
31
32
+ kernel(A, x; ndrange)
33
A
34
35
0 commit comments