@@ -5,7 +5,7 @@ include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) #
5
5
6
6
# Function to use as a baseline for CPU metrics
7
7
function create_histogram (input)
8
- histogram_output = zeros (Int , maximum (input))
8
+ histogram_output = zeros (eltype (input) , maximum (input))
9
9
for i in input
10
10
histogram_output[i] += 1
11
11
end
22
22
@uniform gs = @groupsize ()[1 ]
23
23
@uniform N = length (histogram_output)
24
24
25
- shared_histogram = @localmem Int (gs)
25
+ shared_histogram = @localmem eltype (input) (gs)
26
26
27
27
# This will go through all input elements and assign them to a location in
28
28
# shmem. Note that if there is not enough shem, we create different shmem
77
77
if Base. VERSION < v " 1.7.0" && ! KernelAbstractions. isgpu (backend)
78
78
@test_skip false
79
79
else
80
- rand_input = [rand (1 : 128 ) for i in 1 : 1000 ]
81
- linear_input = [i for i in 1 : 1024 ]
82
- all_two = [2 for i in 1 : 512 ]
80
+ # Use Int32 as some backends don't support 64-bit atomics
81
+ rand_input = Int32[rand (1 : 128 ) for i in 1 : 1000 ]
82
+ linear_input = Int32[i for i in 1 : 1024 ]
83
+ all_two = Int32[2 for i in 1 : 512 ]
83
84
84
85
histogram_rand_baseline = create_histogram (rand_input)
85
86
histogram_linear_baseline = create_histogram (linear_input)
89
90
linear_input = move (backend, linear_input)
90
91
all_two = move (backend, all_two)
91
92
92
- rand_histogram = KernelAbstractions. zeros (backend, Int , 128 )
93
- linear_histogram = KernelAbstractions. zeros (backend, Int , 1024 )
94
- two_histogram = KernelAbstractions. zeros (backend, Int , 2 )
93
+ rand_histogram = KernelAbstractions. zeros (backend, Int32 , 128 )
94
+ linear_histogram = KernelAbstractions. zeros (backend, Int32 , 1024 )
95
+ two_histogram = KernelAbstractions. zeros (backend, Int32 , 2 )
95
96
96
97
histogram! (rand_histogram, rand_input)
97
98
histogram! (linear_histogram, linear_input)
0 commit comments