Skip to content

Commit a2d3219

Browse files
committed
Disable sqrt fast math test on CUDA 11.0.
1 parent b85130d commit a2d3219

File tree

1 file changed

+15
-12
lines changed

1 file changed

+15
-12
lines changed

test/core/codegen.jl

+15-12
Original file line numberDiff line numberDiff line change
@@ -158,26 +158,29 @@ end
158158
end
159159

160160
@testset "fastmath" begin
161-
function sqrt_kernel(x)
162-
i = threadIdx().x
163-
@inbounds x[i] = sqrt(x[i])
164-
return
165-
end
166-
167161
function div_kernel(x)
168162
i = threadIdx().x
169163
@fastmath @inbounds x[i] = 1 / x[i]
170164
return
171165
end
172166

173-
asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}))
174-
@test occursin("sqrt.r", asm)
175-
176-
asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}; fastmath=true))
177-
@test occursin("sqrt.approx.ftz", asm)
178-
179167
asm = sprint(io->CUDA.code_ptx(io, div_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}; fastmath=true))
180168
@test occursin("div.approx.ftz", asm)
169+
170+
# libdevice only contains fast math versions of sqrt for CUDA 11.1+
171+
if CUDA.runtime_version() >= v"11.1"
172+
function sqrt_kernel(x)
173+
i = threadIdx().x
174+
@inbounds x[i] = sqrt(x[i])
175+
return
176+
end
177+
178+
asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}))
179+
@test occursin("sqrt.r", asm)
180+
181+
asm = sprint(io->CUDA.code_ptx(io, sqrt_kernel, Tuple{CuDeviceArray{Float32,1,AS.Global}}; fastmath=true))
182+
@test occursin("sqrt.approx.ftz", asm)
183+
end
181184
end
182185

183186
end

0 commit comments

Comments
 (0)