Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/custom_cuda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ N = 1024
threads = 256
blocks = cld(N, threads)

a = cuNumeric.full(N, 1.0f0)
b = cuNumeric.full(N, 2.0f0)
a = cuNumeric.fill(1.0f0, N)
b = cuNumeric.fill(2.0f0, N)
c = cuNumeric.ones(Float32, N)

# task = cuNumeric.@cuda_task kernel_add(a, b, c, UInt32(1))
Expand Down
16 changes: 8 additions & 8 deletions src/ndarray/binary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ function Base.:(-)(rhs1::NDArray{A,N}, rhs2::NDArray{B,N}) where {A,B,N}
promote_shape(size(rhs1), size(rhs2))
T_OUT = __checked_promote_op(-, A, B)
out = cuNumeric.zeros(T_OUT, size(rhs1))
return nda_binary_op(
return nda_binary_op!(
out,
cuNumeric.SUBTRACT,
unchecked_promote_arr(rhs1, T_OUT),
Expand All @@ -100,15 +100,15 @@ function Base.:(+)(rhs1::NDArray{A,N}, rhs2::NDArray{B,N}) where {A,B,N}
promote_shape(size(rhs1), size(rhs2))
T_OUT = __checked_promote_op(+, A, B)
out = cuNumeric.zeros(T_OUT, size(rhs1))
return nda_binary_op(
return nda_binary_op!(
out, cuNumeric.ADD, unchecked_promote_arr(rhs1, T_OUT), unchecked_promote_arr(rhs2, T_OUT)
)
end

function Base.:(*)(val::V, arr::NDArray{A}) where {A,V}
T = __my_promote_type(A, V)
out = cuNumeric.zeros(T, size(arr))
return nda_binary_op(out, cuNumeric.MULTIPLY, NDArray(T(val)), unchecked_promote_arr(arr, T))
return nda_binary_op!(out, cuNumeric.MULTIPLY, NDArray(T(val)), unchecked_promote_arr(arr, T))
end

function Base.:(*)(arr::NDArray{A}, val::V) where {A,V}
Expand Down Expand Up @@ -191,7 +191,7 @@ for (julia_fn, op_code) in binary_op_map
@inline function __broadcast(
f::typeof($(julia_fn)), out::NDArray, rhs1::NDArray{T}, rhs2::NDArray{T}
) where {T}
return nda_binary_op(out, $(op_code), rhs1, rhs2)
return nda_binary_op!(out, $(op_code), rhs1, rhs2)
end
end
end
Expand All @@ -204,7 +204,7 @@ for (julia_fn, op_code) in floaty_binary_op_map
@inline function __broadcast(
f::typeof($(julia_fn)), out::NDArray, rhs1::NDArray{T}, rhs2::NDArray{T}
) where {T}
return nda_binary_op(out, $(op_code), rhs1, rhs2)
return nda_binary_op!(out, $(op_code), rhs1, rhs2)
end

# If input is not already float, promote to that
Expand All @@ -220,7 +220,7 @@ end
f::typeof(Base.:(+)), out::NDArray{O}, rhs1::NDArray{Bool}, rhs2::NDArray{Bool}
) where {O<:Integer}
assertpromotion(".+", Bool, O)
return nda_binary_op(
return nda_binary_op!(
out, cuNumeric.ADD, unchecked_promote_arr(rhs1, O), unchecked_promote_arr(rhs2, O)
)
end
Expand All @@ -229,7 +229,7 @@ end
f::typeof(Base.:(-)), out::NDArray{O}, rhs1::NDArray{Bool}, rhs2::NDArray{Bool}
) where {O<:Integer}
assertpromotion(".-", Bool, O)
return nda_binary_op(
return nda_binary_op!(
out, cuNumeric.SUBTRACT, unchecked_promote_arr(rhs1, O), unchecked_promote_arr(rhs2, O)
)
end
Expand All @@ -250,7 +250,7 @@ end
@inline function __broadcast(
f::typeof(Base.literal_pow), out::NDArray, _, input::NDArray{T}, power::NDArray{T}
) where {T}
return nda_binary_op(out, cuNumeric.POWER, input, power)
return nda_binary_op!(out, cuNumeric.POWER, input, power)
end

# This is more "Julian" since a user expects map to broadcast
Expand Down
79 changes: 36 additions & 43 deletions src/ndarray/detail/ndarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,19 @@ get_n_dim(ptr::NDArray_t) = Int(ccall((:nda_array_dim, libnda), Int32, (NDArray_
abstract type AbstractNDArray{T<:SUPPORTED_TYPES,N} end

@doc"""
**Internal API**

The NDArray type represents a multi-dimensional array in cuNumeric.
It is a wrapper around a Legate array and provides various methods for array manipulation and operations.
Finalizer calls `nda_destroy_array` to clean up the underlying Legate array when the NDArray is garbage collected.
"""
mutable struct NDArray{T,N} <: AbstractNDArray{T,N}
mutable struct NDArray{T, N, PADDED} <: AbstractNDArray{T,N}
ptr::NDArray_t
nbytes::Int64
padding::Union{Nothing,NTuple{N,Int}} where {N}
padding::Union{Nothing,NTuple{N,Int}}

function NDArray(ptr::NDArray_t; T=get_julia_type(ptr), n_dim=get_n_dim(ptr))
function NDArray(ptr::NDArray_t, ::Type{T}, ::Val{N}) where {T, N}
nbytes = cuNumeric.nda_nbytes(ptr)
cuNumeric.register_alloc!(nbytes)
handle = new{T,Int(n_dim)}(ptr, nbytes, nothing)
handle = new{T,N, false}(ptr, nbytes, nothing)
finalizer(handle) do h
cuNumeric.nda_destroy_array(h.ptr)
cuNumeric.register_free!(h.nbytes)
Expand All @@ -51,6 +49,9 @@ mutable struct NDArray{T,N} <: AbstractNDArray{T,N}
end
end

# Dynamic fallback, not great but required if we cannot infer things
NDArray(ptr::NDArray_t; T = get_julia_type(ptr), N::Integer = get_n_dim(ptr)) = NDArray(ptr, T, Val(N))

# struct WrappedNDArray{T,N} <: AbstractNDArray{T,N}
# ndarr::NDArray{T,N}
# jlarr::Array{T,N}
Expand All @@ -77,28 +78,28 @@ end
# return NDArray(ptr, T = T, n_dim = 1)
# end

NDArray(value::T) where {T<:SUPPORTED_TYPES} = nda_full_array(UInt64[], value)
NDArray(value::T) where {T<:SUPPORTED_TYPES} = nda_full_array((), value)

# construction
function nda_zeros_array(shape::Vector{UInt64}, ::Type{T}) where {T}
n_dim = Int32(length(shape))
function nda_zeros_array(dims::Dims{N}, ::Type{T}) where {T, N}
shape = collect(UInt64, dims)
legate_type = Legate.to_legate_type(T)
ptr = ccall((:nda_zeros_array, libnda),
NDArray_t, (Int32, Ptr{UInt64}, Legate.LegateTypeAllocated),
n_dim, shape, legate_type)
return NDArray(ptr; T=T, n_dim=n_dim)
Int32(N), shape, legate_type)
return NDArray(ptr, T, Val(N))
end

function nda_full_array(shape::Vector{UInt64}, value::T) where {T}
n_dim = Int32(length(shape))
function nda_full_array(dims::Dims{N}, value::T) where {T, N}
shape = collect(UInt64, dims)
type = Legate.to_legate_type(T)

ptr = ccall((:nda_full_array, libnda),
NDArray_t,
(Int32, Ptr{UInt64}, Legate.LegateTypeAllocated, Ptr{Cvoid}),
n_dim, shape, type, Ref(value))
Int32(N), shape, type, Ref(value))

return NDArray(ptr; T=T, n_dim=n_dim)
return NDArray(ptr, T, Val(N))
end

function nda_random(arr::NDArray, gen_code)
Expand All @@ -107,19 +108,19 @@ function nda_random(arr::NDArray, gen_code)
arr.ptr, Int32(gen_code))
end

function nda_random_array(shape::Vector{UInt64})
n_dim = Int32(length(shape))
function nda_random_array(dims::Dims{N}) where {N}
shape = collect(UInt64, dims)
ptr = ccall((:nda_random_array, libnda),
NDArray_t, (Int32, Ptr{UInt64}),
n_dim, shape)
return NDArray(ptr; n_dim=n_dim)
Int32(N), shape)
return NDArray(ptr, Float64, Val(N)) #* T is always Float64 cause of cupynumeric
end

function nda_get_slice(arr::NDArray{T,N}, slices::Vector{Slice}) where {T,N}
ptr = ccall((:nda_get_slice, libnda),
NDArray_t, (NDArray_t, Ptr{Slice}, Cint),
arr.ptr, pointer(slices), length(slices))
return NDArray(ptr; T=T, n_dim=N)
return NDArray(ptr, T, Val(N))
end

# queries
Expand Down Expand Up @@ -147,7 +148,7 @@ function nda_reshape_array(arr::NDArray{T}, newshape::Vector{UInt64}) where {T}
ptr = ccall((:nda_reshape_array, libnda),
NDArray_t, (NDArray_t, Int32, Ptr{UInt64}),
arr.ptr, n_dim, newshape)
return NDArray(ptr; T=T, n_dim=n_dim)
return NDArray(ptr, T, Val(n_dim))
end

function nda_astype(arr::NDArray{OLD_T,N}, ::Type{NEW_T}) where {OLD_T,NEW_T,N}
Expand All @@ -156,7 +157,7 @@ function nda_astype(arr::NDArray{OLD_T,N}, ::Type{NEW_T}) where {OLD_T,NEW_T,N}
NDArray_t,
(NDArray_t, Legate.LegateTypeAllocated),
arr.ptr, type)
return NDArray(ptr; T=NEW_T, n_dim=N)
return NDArray(ptr, NEW_T, Val(N))
end

function nda_fill_array(arr::NDArray{T}, value::T) where {T}
Expand Down Expand Up @@ -193,14 +194,14 @@ function nda_move(dst::NDArray{T,N}, src::NDArray{T,N}) where {T,N}
end

# operations
function nda_binary_op(out::NDArray, op_code::BinaryOpCode, rhs1::NDArray, rhs2::NDArray)
function nda_binary_op!(out::NDArray, op_code::BinaryOpCode, rhs1::NDArray, rhs2::NDArray)
ccall((:nda_binary_op, libnda),
Cvoid, (NDArray_t, BinaryOpCode, NDArray_t, NDArray_t),
out.ptr, op_code, rhs1.ptr, rhs2.ptr)
return out
end

function nda_unary_op(out::NDArray, op_code::UnaryOpCode, input::NDArray)
function nda_unary_op!(out::NDArray, op_code::UnaryOpCode, input::NDArray)
ccall((:nda_unary_op, libnda),
Cvoid, (NDArray_t, UnaryOpCode, NDArray_t),
out.ptr, op_code, input.ptr)
Expand All @@ -218,7 +219,7 @@ function nda_array_equal(rhs1::NDArray{T,N}, rhs2::NDArray{T,N}) where {T,N}
ptr = ccall((:nda_array_equal, libnda),
NDArray_t, (NDArray_t, NDArray_t),
rhs1.ptr, rhs2.ptr)
return NDArray(ptr; T=Bool, n_dim=1)
return NDArray(ptr, Bool, Val(1))
end

function nda_diag(arr::NDArray, k::Int32)
Expand Down Expand Up @@ -255,7 +256,7 @@ function nda_multiply_scalar(rhs1::NDArray{T,N}, value::T) where {T,N}
ptr = ccall((:nda_multiply_scalar, libnda),
NDArray_t, (NDArray_t, Legate.LegateTypeAllocated, Ptr{Cvoid}),
rhs1.ptr, type, Ref(value))
return NDArray(ptr; T=T, n_dim=N)
return NDArray(ptr, T, Val(N))
end

function nda_add_scalar(rhs1::NDArray{T,N}, value::T) where {T,N}
Expand All @@ -264,7 +265,7 @@ function nda_add_scalar(rhs1::NDArray{T,N}, value::T) where {T,N}
ptr = ccall((:nda_add_scalar, libnda),
NDArray_t, (NDArray_t, Legate.LegateTypeAllocated, Ptr{Cvoid}),
rhs1.ptr, type, Ref(value))
return NDArray(ptr; T=T, n_dim=N)
return NDArray(ptr, T, Val(N))
end

function nda_three_dot_arg(rhs1::NDArray{T}, rhs2::NDArray{T}, out::NDArray{T}) where {T}
Expand All @@ -286,7 +287,7 @@ function nda_eye(rows::Int32, ::Type{T}) where {T}
ptr = ccall((:nda_eye, libnda),
NDArray_t, (Int32, Legate.LegateTypeAllocated),
rows, legate_type)
return NDArray(ptr; T=T, n_dim=2)
return NDArray(ptr, T, Val(2))
end

function nda_trace(
Expand All @@ -297,7 +298,7 @@ function nda_trace(
NDArray_t,
(NDArray_t, Int32, Int32, Int32, Legate.LegateTypeAllocated),
arr.ptr, offset, a1, a2, legate_type)
return NDArray(ptr; T=T, n_dim=1)
return NDArray(ptr, T, Val(1))
end

function nda_transpose(arr::NDArray)
Expand All @@ -317,7 +318,7 @@ function nda_attach_external(arr::AbstractArray{T,N}) where {T,N}
NDArray_t, (Ptr{Cvoid}, UInt64, Int32, Ptr{UInt64}, Legate.LegateTypeAllocated),
ptr, nbytes, N, shape, legate_type)

return NDArray(nda_ptr; T=T, n_dim=N)
return NDArray(nda_ptr, T, Val(N))
end

# return underlying logical store to the NDArray obj
Expand Down Expand Up @@ -401,14 +402,6 @@ function slice_array(slices::Vararg{Tuple{Union{Int,Nothing},Union{Int,Nothing}}
return v
end

@doc"""
padded_shape(arr::NDArray)

**Internal API**

Return the size of the given `NDArray`. This will include the padded size.
"""
padded_shape(arr::NDArray) = Tuple(Int.(cuNumeric.nda_array_shape(arr)))

@doc"""
shape(arr::NDArray)
Expand All @@ -417,11 +410,11 @@ padded_shape(arr::NDArray) = Tuple(Int.(cuNumeric.nda_array_shape(arr)))

Return the size of the given `NDArray`.
"""
function shape(arr::NDArray)
if !isnothing(arr.padding)
return arr.padding
end
return cuNumeric.padded_shape(arr)
shape(arr::NDArray{<:Any, N, true}) where N = arr.padding

function shape(arr::NDArray{<:Any, N, false}) where {N}
shp = cuNumeric.nda_array_shape(arr)
return ntuple(i -> Int(shp[i]), Val(N))
end

@doc"""
Expand Down
Loading
Loading