Skip to content
This repository was archived by the owner on May 4, 2019. It is now read-only.

Commit 604385d

Browse files
committed
Stop lying about eltype
1 parent 2103a21 commit 604385d

13 files changed

+91
-39
lines changed

src/abstractdataarray.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
An `N`-dimensional `AbstractArray` whose entries can take on values of type
55
`T` or the value `NA`.
66
"""
7-
abstract type AbstractDataArray{T, N} <: AbstractArray{T, N} end
7+
abstract type AbstractDataArray{T, N} <: AbstractArray{Union{T, NAtype}, N} end
88

99
"""
1010
AbstractDataVector{T}
@@ -20,8 +20,6 @@ A 2-dimensional [`AbstractDataArray`](@ref) with element type `T`.
2020
"""
2121
const AbstractDataMatrix{T} = AbstractDataArray{T, 2}
2222

23-
Base.eltype{T, N}(d::AbstractDataArray{T, N}) = T
24-
2523
# Generic iteration over AbstractDataArray's
2624

2725
Base.start(x::AbstractDataArray) = 1

src/broadcast.jl

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,30 @@ Base.Broadcast._containertype(::Type{T}) where T<:DataArray = DataArra
189189
Base.Broadcast._containertype(::Type{T}) where T<:PooledDataArray = PooledDataArray
190190
Base.Broadcast.broadcast_indices(::Type{T}, A) where T<:AbstractDataArray = indices(A)
191191

192+
@inline function broadcast_t(f, ::Type{T}, shape, A, Bs...) where {T}
193+
dest = Base.Broadcast.containertype(A, Bs...)(extractT(T), Base.index_lengths(shape...))
194+
return broadcast!(f, dest, A, Bs...)
195+
end
196+
197+
# This is mainly to handle isna.(x) since isna is probably the only
198+
# function that can guarantee that NAs will never propagate
199+
@inline function broadcast_t(f, ::Type{Bool}, shape, A, Bs...)
200+
dest = similar(BitArray, shape)
201+
return broadcast!(f, dest, A, Bs...)
202+
end
203+
204+
# This one is almost identical to the version in Base and can hopefully be
205+
# removed at some point. The main issue in Base is that it tests for
206+
# isleaftype(T) which is false for Union{T,NAtype}. If the test in Base
207+
# can be modified to cover simple unions of leaftypes then this method
208+
# can probably be deleted and the two _t methods adjusted to match the Base
209+
# invokation from Base.Broadcast.broadcast_c
192210
@inline function Base.Broadcast.broadcast_c{S<:AbstractDataArray}(f, ::Type{S}, A, Bs...)
193211
T = Base.Broadcast._broadcast_eltype(f, A, Bs...)
194212
shape = Base.Broadcast.broadcast_indices(A, Bs...)
195-
dest = S(T, Base.index_lengths(shape...))
196-
return broadcast!(f, dest, A, Bs...)
213+
return broadcast_t(f, T, shape, A, Bs...)
197214
end
215+
216+
# This one is much faster than normal broadcasting but the method won't get called
217+
# in fusing operations like (!).(isna.(x))
218+
Base.broadcast(::typeof(isna), da::DataArray) = copy(da.na)

src/dataarray.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ function Base.resize!{T}(da::DataArray{T,1}, n::Int)
162162
end
163163

164164
function Base.similar(da::DataArray, T::Type, dims::Dims) #-> DataArray{T}
165-
return DataArray(Array{T}(dims), trues(dims))
165+
return DataArray(Array{extractT(T)}(dims), trues(dims))
166166
end
167167

168168
Base.size(d::DataArray) = size(d.data) # -> (Int...)
@@ -244,8 +244,6 @@ end
244244

245245
dropna(dv::DataVector) = dv.data[.!dv.na] # -> Vector
246246

247-
Base.broadcast(::typeof(isna), da::DataArray) = copy(da.na)
248-
249247
Base.any(::typeof(isna), da::DataArray) = any(da.na) # -> Bool
250248
Base.all(::typeof(isna), da::DataArray) = all(da.na) # -> Bool
251249

src/natype.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,33 @@ struct NAException <: Exception
3636
end
3737
NAException() = NAException("NA found")
3838

39+
# Restrict to Number to avoid infinite recursion
40+
## Numbers
41+
Base.promote_rule(::Type{Union{T,NAtype}}, ::Type{Union{S,NAtype}}) where {T<:Number,S<:Number} =
42+
Union{promote_type(T, S),NAtype}
43+
Base.promote_rule(::Type{Union{T,NAtype}}, ::Type{S}) where {T<:Number,S<:Number} =
44+
Union{promote_type(T, S),NAtype}
45+
## Dates
46+
Base.promote_rule(::Type{Union{T,NAtype}}, ::Type{Union{S,NAtype}}) where {T<:Dates.AbstractTime,S<:Dates.AbstractTime} =
47+
Union{promote_type(T, S),NAtype}
48+
Base.promote_rule(::Type{Union{T,NAtype}}, ::Type{S}) where {T<:Dates.AbstractTime,S<:Dates.AbstractTime} =
49+
Union{promote_type(T, S),NAtype}
50+
51+
# Restrict to Number to avoid maching everything
52+
Base.convert(::Type{Union{T,NAtype}}, x::Number) where {T<:Number} = convert(T, x)
53+
Base.convert(::Type{Union{T,NAtype}}, x::Dates.AbstractTime) where {T<:Dates.AbstractTime} = convert(T, x)
54+
3955
Base.length(x::NAtype) = 1
4056
Base.size(x::NAtype) = ()
4157
Base.size(x::NAtype, i::Integer) = i < 1 ? throw(BoundsError()) : 1
4258
Base.ndims(x::NAtype) = 0
4359
Base.getindex(x::NAtype, i) = i == 1 ? NA : throw(BoundsError())
4460

61+
extractT(::Type{T}) where {T} = T
62+
extractT(::Type{Union{T,NAtype}}) where {T} = T
63+
64+
Base.zero(::Type{Union{T,NAtype}}) where {T} = zero(T)
65+
4566
"""
4667
isna(x) -> Bool
4768

src/operators.jl

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ for f in [:+,:-,:*,:/]
205205
end
206206

207207
# Unary operators, DataArrays.
208+
@dataarray_unary(+, Any, T)
208209
@dataarray_unary(-, Bool, Int)
209210
@dataarray_unary(-, Any, T)
210211
@dataarray_unary(!, Bool, T)
@@ -531,31 +532,31 @@ function (-){TA,TJ<:Number}(J::UniformScaling{TJ},A::DataArray{TA,2})
531532
end
532533

533534
(+)(A::DataArray{Bool,2},J::UniformScaling{Bool}) =
534-
invoke(+, Tuple{AbstractArray{Bool,2},UniformScaling{Bool}}, A, J)
535+
invoke(+, Tuple{AbstractArray{Union{Bool,NAtype},2},UniformScaling{Bool}}, A, J)
535536
(+)(J::UniformScaling{Bool},A::DataArray{Bool,2}) =
536-
invoke(+, Tuple{UniformScaling{Bool},AbstractArray{Bool,2}}, J, A)
537+
invoke(+, Tuple{UniformScaling{Bool},AbstractArray{Union{Bool,NAtype},2}}, J, A)
537538
(-)(A::DataArray{Bool,2},J::UniformScaling{Bool}) =
538-
invoke(-, Tuple{AbstractArray{Bool,2},UniformScaling{Bool}}, A, J)
539+
invoke(-, Tuple{AbstractArray{Union{Bool,NAtype},2},UniformScaling{Bool}}, A, J)
539540
(-)(J::UniformScaling{Bool},A::DataArray{Bool,2}) =
540-
invoke(-, Tuple{UniformScaling{Bool},AbstractArray{Bool,2}}, J, A)
541+
invoke(-, Tuple{UniformScaling{Bool},AbstractArray{Union{Bool,NAtype},2}}, J, A)
541542

542543
(+){TA,TJ}(A::AbstractDataArray{TA,2},J::UniformScaling{TJ}) =
543-
invoke(+, Tuple{AbstractArray{TA,2},UniformScaling{TJ}}, A, J)
544+
invoke(+, Tuple{AbstractArray{Union{TA,NAtype},2},UniformScaling{TJ}}, A, J)
544545
(+){TA}(J::UniformScaling,A::AbstractDataArray{TA,2}) =
545-
invoke(+, Tuple{UniformScaling,AbstractArray{TA,2}}, J, A)
546+
invoke(+, Tuple{UniformScaling,AbstractArray{Union{TA,NAtype},2}}, J, A)
546547
(-){TA,TJ<:Number}(A::AbstractDataArray{TA,2},J::UniformScaling{TJ}) =
547-
invoke(-, Tuple{AbstractArray{TA,2},UniformScaling{TJ}}, A, J)
548+
invoke(-, Tuple{AbstractArray{Union{TA,NAtype},2},UniformScaling{TJ}}, A, J)
548549
(-){TA,TJ<:Number}(J::UniformScaling{TJ},A::AbstractDataArray{TA,2}) =
549-
invoke(-, Tuple{UniformScaling{TJ},AbstractArray{TA,2}}, J, A)
550+
invoke(-, Tuple{UniformScaling{TJ},AbstractArray{Union{TA,NAtype},2}}, J, A)
550551

551552
(+)(A::AbstractDataArray{Bool,2},J::UniformScaling{Bool}) =
552-
invoke(+, Tuple{AbstractArray{Bool,2},UniformScaling{Bool}}, A, J)
553+
invoke(+, Tuple{AbstractArray{Union{Bool,NAtype},2},UniformScaling{Bool}}, A, J)
553554
(+)(J::UniformScaling{Bool},A::AbstractDataArray{Bool,2}) =
554-
invoke(+, Tuple{UniformScaling{Bool},AbstractArray{Bool,2}}, J, A)
555+
invoke(+, Tuple{UniformScaling{Bool},AbstractArray{Union{Bool,NAtype},2}}, J, A)
555556
(-)(A::AbstractDataArray{Bool,2},J::UniformScaling{Bool}) =
556-
invoke(-, Tuple{AbstractArray{Bool,2},UniformScaling{Bool}}, A, J)
557+
invoke(-, Tuple{AbstractArray{Union{Bool,NAtype},2},UniformScaling{Bool}}, A, J)
557558
(-)(J::UniformScaling{Bool},A::AbstractDataArray{Bool,2}) =
558-
invoke(-, Tuple{UniformScaling{Bool},AbstractArray{Bool,2}}, J, A)
559+
invoke(-, Tuple{UniformScaling{Bool},AbstractArray{Union{BoolNAtype},2}}, J, A)
559560

560561
end # if isdefined(Base, :UniformScaling)
561562

src/pooleddataarray.jl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ end
106106
PooledDataArray(d::PooledDataArray) = d
107107

108108
# Constructor from array, w/ pool, missingness, and ref type
109-
function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{T, N},
109+
function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{<:Union{T,NAtype}, N},
110110
pool::Vector{T},
111-
m::AbstractArray{Bool, N},
111+
m::AbstractArray{<:Union{Bool,NAtype}, N},
112112
r::Type{R} = DEFAULT_POOLED_REF_TYPE)
113113
if length(pool) > typemax(R)
114114
throw(ArgumentError("Cannot construct a PooledDataVector with type $R with a pool of size $(length(pool))"))
@@ -466,7 +466,7 @@ julia> p # has been modified
466466
"B"
467467
```
468468
"""
469-
function setlevels!{T,R}(x::PooledDataArray{T,R}, newpool::AbstractVector{T})
469+
function setlevels!{T,R}(x::PooledDataArray{T,R}, newpool::AbstractVector)
470470
if newpool == myunique(newpool) # no NAs or duplicates
471471
x.pool = newpool
472472
return x
@@ -483,9 +483,6 @@ function setlevels!{T,R}(x::PooledDataArray{T,R}, newpool::AbstractVector{T})
483483
end
484484
end
485485

486-
setlevels!{T, R}(x::PooledDataArray{T, R},
487-
newpool::AbstractVector) = setlevels!(x, convert(Array{T}, newpool))
488-
489486
function setlevels(x::PooledDataArray, d::Dict)
490487
newpool = copy(DataArray(x.pool))
491488
# An NA in `v` is put in the pool; that will cause it to become NA
@@ -553,7 +550,7 @@ end
553550
##############################################################################
554551

555552
function Base.similar{T,R}(pda::PooledDataArray{T,R}, S::Type, dims::Dims)
556-
PooledDataArray(RefArray(zeros(R, dims)), S[])
553+
PooledDataArray(RefArray(zeros(R, dims)), extractT(S)[])
557554
end
558555

559556
##############################################################################

src/reducedim.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ end
303303

304304
function Base.mean!{T}(R::AbstractArray{T}, A::DataArray; skipna::Bool=false,
305305
init::Bool=true)
306-
init && fill!(R, zero(eltype(R)))
306+
init && fill!(R, 0)
307307
if skipna
308308
C = Array{Int}(size(R))
309309
_mapreducedim_skipna_impl!(identity, +, R, C, A)

src/statistics.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,25 +39,25 @@ gl(n::Integer, k::Integer) = gl(n, k, n*k)
3939
StatsBase.describe(X::DataVector) = StatsBase.describe(STDOUT, X)
4040

4141
function StatsBase.describe{T<:Real}(io::IO, X::AbstractDataVector{T})
42-
nacount = sum(isna.(X))
42+
nacount = sum(isna, X)
4343
pna = 100nacount/length(X)
4444
if pna != 100 # describe will fail if dropna returns an empty vector
4545
describe(io, dropna(X))
4646
else
4747
println(io, "Summary Stats:")
48-
println(io, "Type: $(eltype(X))")
48+
println(io, "Type: $(T)")
4949
end
5050
println(io, "Number Missing: $(nacount)")
5151
@printf(io, "%% Missing: %.6f\n", pna)
5252
return
5353
end
5454

5555
function StatsBase.describe(io::IO, X::AbstractDataVector)
56-
nacount = sum(isna.(X))
56+
nacount = sum(isna, X)
5757
pna = 100nacount/length(X)
5858
println(io, "Summary Stats:")
5959
println(io, "Length: $(length(X))")
60-
println(io, "Type: $(eltype(X))")
60+
println(io, "Type: $(extractT(eltype(X)))")
6161
println(io, "Number Unique: $(length(unique(X)))")
6262
println(io, "Number Missing: $(nacount)")
6363
@printf(io, "%% Missing: %.6f\n", pna)

test/broadcast.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,12 @@
128128
@test map!(abs, x, x) == @data([1, 2])
129129
@test isequal(map!(+, DataArray(Float64, 3), @data([1, NA, 3]), @data([NA, 2, 3])), @data([NA, NA, 6]))
130130
@test map!(isequal, DataArray(Float64, 3), @data([1, NA, NA]), @data([1, NA, 3])) == @data([true, true, false])
131+
132+
# isna doesn't propagate NAs so it should return BitArrays
133+
x = isna.(@data [NA, 1, 2])
134+
@test x isa BitArray
135+
@test x == [true, false, false]
136+
x = (!).(isna.(@data [NA, 1, 2]))
137+
@test x isa BitArray
138+
@test x == [false, true, true]
131139
end

test/constructors.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
@test isequal(dv, convert(DataArray, 1:3))
2626

2727
dv = DataArray(Int, 3)
28-
@test isequal(eltype(dv), Int)
28+
@test isequal(eltype(dv), Union{Int,NAtype})
2929
@test isequal(dv.na, trues(3))
3030

3131
dv = convert(DataArray, zeros(3))
@@ -67,7 +67,7 @@
6767
@test isequal(pdv, convert(PooledDataArray, PooledDataArray([1, 2, 3])))
6868

6969
pdv = PooledDataArray(Int, 3)
70-
@test isequal(eltype(pdv), Int)
70+
@test isequal(eltype(pdv), Union{Int,NAtype})
7171
@test all(isna.(pdv) .== trues(3))
7272

7373
pdv = convert(PooledDataArray, zeros(3))
@@ -106,7 +106,7 @@
106106
@test isequal(dm, convert(DataArray, trues(2, 2)))
107107

108108
dm = DataArray(Int, 2, 2)
109-
@test isequal(eltype(dm), Int)
109+
@test isequal(eltype(dm), Union{Int,NAtype})
110110
@test isequal(dm.na, trues(2, 2))
111111

112112
@test_nowarn convert(DataArray, zeros(2, 2))

test/data.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,13 @@
7171
@test size(dvint) == (4,)
7272
@test length(dvint) == 4
7373
@test sum(isna.(dvint)) == 1
74-
@test eltype(dvint) == Int
74+
@test eltype(dvint) == Union{Int,NAtype}
7575

7676
#test_group("PooledDataVector methods")
7777
@test size(pdvstr) == (7,)
7878
@test length(pdvstr) == 7
7979
@test sum(isna.(pdvstr)) == 1
80-
@test eltype(pdvstr) == String
80+
@test eltype(pdvstr) == Union{String,NAtype}
8181

8282
#test_group("DataVector operations")
8383
@test isequal(dvint .+ 1, DataArray([2, 3, 4, 5], [false, false, true, false]))
@@ -99,7 +99,7 @@
9999
@test all(convert(Vector{Int}, dvint2) .== [5:8;])
100100
@test all([i + 1 for i in dvint2] .== [6:9;])
101101
@test all([length(x)::Int for x in dvstr] == [3, 3, 1, 4])
102-
@test repr(dvint) == "[1, 2, NA, 4]"
102+
@test repr(dvint) == "Union{DataArrays.NAtype, $Int}[1, 2, NA, 4]"
103103

104104
#test_group("PooledDataVector to something else")
105105
@test all(dropna(pdvstr) .== ["one", "one", "two", "two", "one", "one"])

test/dataarray.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@
9999
end
100100

101101
# Inferrability of map (#276)
102-
@test eltype(map(x -> x > 1, @data [1, 2])) == Bool
102+
@test eltype(map(x -> x > 1, @data [1, 2])) == Union{Bool,NAtype}
103103

104104
@testset "Issue #278" begin
105105
x = @data ones(4)

test/nas.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,12 @@
6262
@test_throws NAException for v in each_failna(dv); end
6363
@test collect(each_dropna(dv)) == a
6464
@test collect(each_replacena(dv, 4)) == [4, 4, a..., 4]
65+
66+
@testset "promotion" for (T1, T2) in ((Int, Float64),
67+
(Dates.Minute, Dates.Second))
68+
@test promote_type(T1, Union{T2,NAtype}) == Union{T2,NAtype}
69+
@test promote_type(Union{T1,NAtype}, T2) == Union{T2,NAtype}
70+
@test promote_type(Union{T1,NAtype}, Union{T2,NAtype}) == Union{T2,NAtype}
71+
end
72+
6573
end

0 commit comments

Comments
 (0)