diff --git a/README.md b/README.md index f2695cf..c3e3045 100644 --- a/README.md +++ b/README.md @@ -11,30 +11,22 @@ Documentation: [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaStats.github.io/DataArrays.jl/stable) [![](https://img.shields.io/badge/docs-latest-blue.svg)](https://JuliaStats.github.io/DataArrays.jl/latest) +The DataArrays package provides array types for working efficiently with [missing data](https://en.wikipedia.org/wiki/Missing_data) +in Julia, based on the `null` value from the [Nulls.jl](https://github.com/JuliaData/Nulls.jl) package. +In particular, it provides the following: -The DataArrays package extends Julia by introducing data structures that can contain missing data. In particular, the package introduces three new data types to Julia: - -* `NA`: A singleton type that represents a single missing value. * `DataArray{T}`: An array-like data structure that can contain values of type `T`, but can also contain missing values. * `PooledDataArray{T}`: A variant of `DataArray{T}` optimized for representing arrays that contain many repetitions of a small number of unique values -- as commonly occurs when working with categorical data. -# The `NA` Value - -Many languages represent missing values using a reserved value like `NULL` or `NA`. A missing integer value, for example, might be represented as a `NULL` value in SQL or as an `NA` value in R. - -Julia takes its conception of `NA` from R, where `NA` denotes missingness based on lack of information. If, for example, we were to measure people's heights as integers, an `NA` might reflect our ignorance of a specific person's height. - -Conceptualizing the use of `NA` as a signal of uncertainty will help you understand how `NA` interacts with other values. For example, it explains why `NA + 1` is `NA`, but `NA & false` is `false`. In general, `NA` corrupts any computation whose results cannot be determined without knowledge of the value that is `NA`. - # DataArray's -Most Julian arrays cannot contain `NA` values: only `Array{NAtype}` and heterogeneous Arrays can contain `NA` values. Of these, only heterogeneous arrays could contain values of any type other than `NAtype`. +Most Julian arrays cannot contain `null` values: only `Array{Union{T, Null}}` and more generally `Array{>:Null}` can contain `null` values. -The generic use of heterogeneous Arrays is discouraged in Julia because it is inefficient: accessing any value requires dereferencing a pointer. The `DataArray` type allows one to work around this inefficiency by providing tightly-typed arrays that can contain values of exactly one type, but can also contain `NA` values. +The generic use of heterogeneous `Array` is discouraged in Julia versions below 0.7 because it is inefficient: accessing any value requires dereferencing a pointer. The `DataArray` type allows one to work around this inefficiency by providing tightly-typed arrays that can contain values of exactly one type, but can also contain `null` values. -For example, a `DataArray{Int}` can contain integers and NA values. We can construct one as follows: +For example, a `DataArray{Int}` can contain integers and `null` values. We can construct one as follows: - da = @data([1, 2, NA, 4]) + da = @data([1, 2, null, 4]) # PooledDataArray's diff --git a/REQUIRE b/REQUIRE index 91d46e2..713528c 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,5 @@ julia 0.6 +Nulls 0.1.2 StatsBase 0.15.0 Reexport SpecialFunctions diff --git a/benchmark/operators.jl b/benchmark/operators.jl index 06f5567..bffab22 100644 --- a/benchmark/operators.jl +++ b/benchmark/operators.jl @@ -6,11 +6,11 @@ srand(1776) const TEST_NAMES = [ "Vector", - "DataVector No NA", - "DataVector Half NA", + "DataVector No null", + "DataVector Half null", "Matrix", - "DataMatrix No NA", - "DataMatrix Half NA" + "DataMatrix No null", + "DataMatrix Half null" ] function make_test_types(genfunc, sz) diff --git a/benchmark/reduce.jl b/benchmark/reduce.jl index eebc227..26888de 100644 --- a/benchmark/reduce.jl +++ b/benchmark/reduce.jl @@ -6,10 +6,10 @@ srand(1776) const TEST_NAMES = [ "Vector", - "DataVector No NA skipna=false", - "DataVector No NA skipna=true", - "DataVector Half NA skipna=false", - "DataVector Half NA skipna=true" + "DataVector No null skipnull=false", + "DataVector No null skipnull=true", + "DataVector Half null skipnull=false", + "DataVector Half null skipnull=true" ] function make_test_types(genfunc, sz) @@ -29,9 +29,9 @@ macro perf(fn, replications) println($fn) fns = [()->$fn(Data[1]), ()->$fn(Data[2]), - ()->$fn(Data[2]; skipna=true), + ()->$fn(Data[2]; skipnull=true), ()->$fn(Data[3]), - ()->$fn(Data[3]; skipna=true)] + ()->$fn(Data[3]; skipnull=true)] gc_disable() df = compare(fns, $replications) gc_enable() diff --git a/benchmark/reducedim.jl b/benchmark/reducedim.jl index 29a6406..07d87fa 100644 --- a/benchmark/reducedim.jl +++ b/benchmark/reducedim.jl @@ -6,10 +6,10 @@ srand(1776) const TEST_NAMES = [ "Matrix", - "DataMatrix No NA skipna=false", - "DataMatrix No NA skipna=true", - "DataMatrix Half NA skipna=false", - "DataMatrix Half NA skipna=true" + "DataMatrix No null skipnull=false", + "DataMatrix No null skipnull=true", + "DataMatrix Half null skipnull=false", + "DataMatrix Half null skipnull=true" ] function make_test_types(genfunc, sz) @@ -29,9 +29,9 @@ macro perf(fn, dim, replications) println($fn, " (region = ", $dim, ")") fns = [()->$fn(Data[1], $dim), ()->$fn(Data[2], $dim), - ()->$fn(Data[2], $dim; skipna=true), + ()->$fn(Data[2], $dim; skipnull=true), ()->$fn(Data[3], $dim), - ()->$fn(Data[3], $dim; skipna=true)] + ()->$fn(Data[3], $dim; skipnull=true)] gc_disable() df = compare(fns, $replications) gc_enable() diff --git a/docs/src/da.md b/docs/src/da.md index 61c9415..c6abe5b 100644 --- a/docs/src/da.md +++ b/docs/src/da.md @@ -1,14 +1,7 @@ -# Representing missing data - ```@meta CurrentModule = DataArrays ``` -```@docs -NA -NAtype -``` - ## Arrays with possibly missing data ```@docs @@ -19,9 +12,7 @@ DataArray DataVector DataMatrix @data -isna -dropna -padna +padnull levels ``` diff --git a/docs/src/index.md b/docs/src/index.md index f298467..90993a7 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,11 +1,10 @@ # DataArrays.jl -This package provides functionality for working with [missing data](https://en.wikipedia.org/wiki/Missing_data) -in Julia. +This package provides array types for working efficiently with [missing data](https://en.wikipedia.org/wiki/Missing_data) +in Julia, based on the `null` value from the [Nulls.jl](https://github.com/JuliaData/Nulls.jl) package. In particular, it provides the following: -* `NA`: A singleton representing a missing value -* `DataArray{T}`: An array type that can house both values of type `T` and missing values +* `DataArray{T}`: An array type that can house both values of type `T` and missing values (of type `Null`) * `PooledDataArray{T}`: An array type akin to `DataArray` but optimized for arrays with a smaller set of unique values, as commonly occurs with categorical data diff --git a/spec/literals.md b/spec/literals.md index 0e85c27..b36a4a0 100644 --- a/spec/literals.md +++ b/spec/literals.md @@ -19,26 +19,19 @@ Julia's parser rewrites both of these literals as calls to the `vcat` function. The `vcat` function computes the tightest type that would enclose all of the values in the literal array. (REVISE) -Because of the strange place occupied by `NAtype` in Julia's type -hierarchy, the tightest type that would enclose any literal array -containing a single `NA` would be `Any`, which is not very useful. -As such, the DataArrays package needs to provide an alternative -tool for writing out literal DataArray's. - -This is accomplished by using two macros, `@data` and `@pdata`, -which rewrite array literals into a form that will allow proper -typing. +Two macros, `@data` and `@pdata`, rewrite array literals into a form +that will allow direct construction of `DataArray`s and `PooledDataArray`s. # Basic Principle The basic mechanism that powers the `@data` and `@pdata` macros is the rewriting of array literals as a call to DataArray or PooledDataArray with a rewritten array literal and a Boolean mask that specifies where -`NA` occurred in the original literal. +`null` occurred in the original literal. For example, - @data [1, 2, NA, 4] + @data [1, 2, null, 4] will be rewritten as, @@ -46,24 +39,24 @@ will be rewritten as, Note the added `1` created during the rewriting of the array literal. This value is called a `stub` and is always the first value found -in the literal array that is not `NA`. The use of stubs explains two +in the literal array that is not `null`. The use of stubs explains two important properties of the `@data` and `@pdata` macros: * If the entries of the array literal are not fixed values, but function calls, these function calls must be pure. Otherwise the impure funcion may be called more times than expected. -* It is not possible to specify a literal DataArray that contains only `NA` values. -* None of the variables used in a literal array can be called `NA`. This is just good style anyway, so it is not much of a limitation. +* It is not possible to specify a literal DataArray that contains only `null` values. +* None of the variables used in a literal array can be called `null`. This is just good style anyway, so it is not much of a limitation. # Limitations We restate the limitations noted above: * If the entries of the array literal are not fixed values, but function calls, these function calls must be pure. Otherwise the impure funcion may be called more times than expected. -* It is not possible to specify a literal DataArray that contains only `NA` values. -* None of the variables used in a literal array can be called `NA`. This is just good style anyway, so it is not much of a limitation. +* It is not possible to specify a literal DataArray that contains only `null` values. +* None of the variables used in a literal array can be called `null`. This is just good style anyway, so it is not much of a limitation. Note that the latter limitation is not very important, because a DataArray -with only `NA` values is already problematic because it has no well-defined +with only `null` values is already problematic because it has no well-defined type in Julia. One final limitation is that the rewriting rules are not able to diff --git a/src/DataArrays.jl b/src/DataArrays.jl index 738f735..f8d8ca6 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -4,6 +4,7 @@ module DataArrays using Base: promote_op using Base.Cartesian, Reexport @reexport using StatsBase + @reexport using Nulls using SpecialFunctions const DEFAULT_POOLED_REF_TYPE = UInt32 @@ -25,23 +26,10 @@ module DataArrays DataArray, DataMatrix, DataVector, - dropna, - each_failna, - each_dropna, - each_replacena, - EachFailNA, - EachDropNA, - EachReplaceNA, FastPerm, getpoolidx, gl, - head, - isna, - levels, - NA, - NAException, - NAtype, - padna, + padnull, pdata, PooledDataArray, PooledDataMatrix, @@ -51,11 +39,9 @@ module DataArrays rep, replace!, setlevels!, - setlevels, - tail + setlevels include("utils.jl") - include("natype.jl") include("abstractdataarray.jl") include("dataarray.jl") include("pooleddataarray.jl") @@ -71,7 +57,6 @@ module DataArrays include("extras.jl") include("grouping.jl") include("statistics.jl") - include("predicates.jl") include("literals.jl") include("deprecated.jl") end diff --git a/src/abstractdataarray.jl b/src/abstractdataarray.jl index b09dc92..849d4b5 100644 --- a/src/abstractdataarray.jl +++ b/src/abstractdataarray.jl @@ -2,9 +2,9 @@ AbstractDataArray{T, N} An `N`-dimensional `AbstractArray` whose entries can take on values of type -`T` or the value `NA`. +`T` or the value `null`. """ -abstract type AbstractDataArray{T, N} <: AbstractArray{Data{T}, N} end +abstract type AbstractDataArray{T, N} <: AbstractArray{Union{T,Null}, N} end """ AbstractDataVector{T} @@ -20,7 +20,7 @@ A 2-dimensional [`AbstractDataArray`](@ref) with element type `T`. """ const AbstractDataMatrix{T} = AbstractDataArray{T, 2} -Base.eltype(d::AbstractDataArray{T, N}) where {T, N} = Union{T,NAtype} +Base.eltype(d::AbstractDataArray{T, N}) where {T, N} = Union{T,Null} # Generic iteration over AbstractDataArray's @@ -28,101 +28,76 @@ Base.start(x::AbstractDataArray) = 1 Base.next(x::AbstractDataArray, state::Integer) = (x[state], state + 1) Base.done(x::AbstractDataArray, state::Integer) = state > length(x) -Base.broadcast{T}(::typeof(isna), a::AbstractArray{T}) = - NAtype <: T ? BitArray(map(x->isa(x, NAtype), a)) : falses(size(a)) # -> BitArray - +# FIXME: type piracy """ - isna(a::AbstractArray, i) -> Bool + isnull(a::AbstractArray, i) -> Bool -Determine whether the element of `a` at index `i` is missing, i.e. `NA`. +Determine whether the element of `a` at index `i` is missing, i.e. `null`. # Examples ```jldoctest -julia> X = @data [1, 2, NA]; +julia> X = @data [1, 2, null]; -julia> isna(X, 2) +julia> isnull(X, 2) false -julia> isna(X, 3) +julia> isnull(X, 3) true ``` """ -isna(a::AbstractArray{T}, i::Real) where {T} = NAtype <: T ? isa(a[i], NAtype) : false # -> Bool - -""" - dropna(v::AbstractVector) -> AbstractVector - -Return a copy of `v` with all `NA` elements removed. - -# Examples - -```jldoctest -julia> dropna(@data [NA, 1, NA, 2]) -2-element Array{Int64,1}: - 1 - 2 - -julia> dropna([4, 5, 6]) -3-element Array{Int64,1}: - 4 - 5 - 6 -``` -""" -dropna(v::AbstractVector) = copy(v) # -> AbstractVector +Base.isnull(a::AbstractArray{T}, i::Real) where {T} = Null <: T ? isa(a[i], Null) : false # -> Bool # Iterators # TODO: Use values() # Use DataValueIterator type? -struct EachFailNA{T} - da::AbstractDataArray{T} +struct EachFailNull{T<:AbstractDataArray} + da::T end -each_failna(da::AbstractDataArray{T}) where {T} = EachFailNA(da) -Base.length(itr::EachFailNA) = length(itr.da) -Base.start(itr::EachFailNA) = 1 -Base.done(itr::EachFailNA, ind::Integer) = ind > length(itr) -function Base.next(itr::EachFailNA, ind::Integer) - if isna(itr.da[ind]) - throw(NAException()) +Nulls.fail(da::AbstractDataArray) = EachFailNull(da) +Base.length(itr::EachFailNull) = length(itr.da) +Base.start(itr::EachFailNull) = 1 +Base.done(itr::EachFailNull, ind::Integer) = ind > length(itr) +Base.eltype(itr::EachFailNull) = Nulls.T(eltype(itr.da)) +function Base.next(itr::EachFailNull, ind::Integer) + if isnull(itr.da[ind]) + throw(NullException()) else (itr.da[ind], ind + 1) end end -struct EachDropNA{T} - da::AbstractDataArray{T} +struct EachDropNull{T<:AbstractDataArray} + da::T end -each_dropna(da::AbstractDataArray{T}) where {T} = EachDropNA(da) -function _next_nonna_ind(da::AbstractDataArray{T}, ind::Int) where T +Nulls.skip(da::AbstractDataArray) = EachDropNull(da) +function _next_nonna_ind(da::AbstractDataArray, ind::Int) ind += 1 - while ind <= length(da) && isna(da, ind) + while ind <= length(da) && isnull(da, ind) ind += 1 end ind end -Base.length(itr::EachDropNA) = length(itr.da) - sum(itr.da.na) -Base.start(itr::EachDropNA) = _next_nonna_ind(itr.da, 0) -Base.done(itr::EachDropNA, ind::Int) = ind > length(itr.da) -function Base.next(itr::EachDropNA, ind::Int) +Base.length(itr::EachDropNull) = length(itr.da) - sum(itr.da.na) +Base.start(itr::EachDropNull) = _next_nonna_ind(itr.da, 0) +Base.done(itr::EachDropNull, ind::Int) = ind > length(itr.da) +Base.eltype(itr::EachDropNull) = Nulls.T(eltype(itr.da)) +function Base.next(itr::EachDropNull, ind::Int) (itr.da[ind], _next_nonna_ind(itr.da, ind)) end -struct EachReplaceNA{S, T} - da::AbstractDataArray{S} +struct EachReplaceNull{S<:AbstractDataArray, T} + da::S replacement::T end -function each_replacena(da::AbstractDataArray, replacement::Any) - EachReplaceNA(da, convert(eltype(da), replacement)) -end -function each_replacena(replacement::Any) - x -> each_replacena(x, replacement) -end -Base.length(itr::EachReplaceNA) = length(itr.da) -Base.start(itr::EachReplaceNA) = 1 -Base.done(itr::EachReplaceNA, ind::Integer) = ind > length(itr) -function Base.next(itr::EachReplaceNA, ind::Integer) - item = isna(itr.da, ind) ? itr.replacement : itr.da[ind] +Nulls.replace(da::AbstractDataArray, replacement::Any) = + EachReplaceNull(da, replacement) +Base.length(itr::EachReplaceNull) = length(itr.da) +Base.start(itr::EachReplaceNull) = 1 +Base.done(itr::EachReplaceNull, ind::Integer) = ind > length(itr) +Base.eltype(itr::EachReplaceNull) = Union{Nulls.T(eltype(itr.da)), typeof(itr.replacement)} +function Base.next(itr::EachReplaceNull, ind::Integer) + item = isnull(itr.da, ind) ? itr.replacement : itr.da[ind] (item, ind + 1) end diff --git a/src/broadcast.jl b/src/broadcast.jl index d38a369..5af1f2f 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -5,7 +5,7 @@ _broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_indices(x...)) # Get ref for value for a PooledDataArray, adding to the pool if # necessary -_unsafe_pdaref!(Bpool, Brefdict::Dict, val::NAtype) = 0 +_unsafe_pdaref!(Bpool, Brefdict::Dict, val::Null) = 0 function _unsafe_pdaref!(Bpool, Brefdict::Dict, val) @get! Brefdict val begin push!(Bpool, val) @@ -13,16 +13,16 @@ function _unsafe_pdaref!(Bpool, Brefdict::Dict, val) end end -# Generate a branch for each possible combination of NA/not NA. This +# Generate a branch for each possible combination of null/not null. This # gives good performance at the cost of 2^narrays branches. function gen_na_conds(f, nd, arrtype, outtype, - daidx=find(t -> t <: DataArray || t <: PooledDataArray, arrtype), pos=1, isna=()) + daidx=find(t -> t <: DataArray || t <: PooledDataArray, arrtype), pos=1, isnull=()) if pos > length(daidx) args = Any[Symbol("v_$(k)") for k = 1:length(arrtype)] for i = 1:length(daidx) - if isna[i] - args[daidx[i]] = NA + if isnull[i] + args[daidx[i]] = null end end @@ -39,15 +39,15 @@ function gen_na_conds(f, nd, arrtype, outtype, else k = daidx[pos] quote - if $(Symbol("isna_$(k)")) - $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isna..., true))) + if $(Symbol("isnull_$(k)")) + $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isnull..., true))) else $(if arrtype[k] <: DataArray :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("data_$(k)"))[$(Symbol("state_$(k)_0"))]) else :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("pool_$(k)"))[$(Symbol("r_$(k)"))]) end) - $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isna..., false))) + $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isnull..., false))) end end end @@ -128,13 +128,13 @@ Base.map!(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) where {F} = # body begin - # Advance iterators for DataArray and determine NA status + # Advance iterators for DataArray and determine null status $(Expr(:block, [ As[k] <: DataArray ? quote - @inbounds $(Symbol("isna_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0"))) + @inbounds $(Symbol("isnull_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0"))) end : As[k] <: PooledDataArray ? quote @inbounds $(Symbol("r_$(k)")) = @nref $nd $(Symbol("refs_$(k)")) d->$(Symbol("j_$(k)_d")) - $(Symbol("isna_$(k)")) = $(Symbol("r_$(k)")) == 0 + $(Symbol("isnull_$(k)")) = $(Symbol("r_$(k)")) == 0 end : nothing for k = 1:N]...)) @@ -190,12 +190,12 @@ Base.Broadcast._containertype(::Type{T}) where T<:PooledDataArray = PooledDa Base.Broadcast.broadcast_indices(::Type{T}, A) where T<:AbstractDataArray = indices(A) @inline function broadcast_t(f, ::Type{T}, shape, A, Bs...) where {T} - dest = Base.Broadcast.containertype(A, Bs...)(extractT(T), Base.index_lengths(shape...)) + dest = Base.Broadcast.containertype(A, Bs...)(Nulls.T(T), Base.index_lengths(shape...)) return broadcast!(f, dest, A, Bs...) end -# This is mainly to handle isna.(x) since isna is probably the only -# function that can guarantee that NAs will never propagate +# This is mainly to handle isnull.(x) since isnull is probably the only +# function that can guarantee that nulls will never propagate @inline function broadcast_t(f, ::Type{Bool}, shape, A, Bs...) dest = similar(BitArray, shape) return broadcast!(f, dest, A, Bs...) @@ -203,7 +203,7 @@ end # This one is almost identical to the version in Base and can hopefully be # removed at some point. The main issue in Base is that it tests for -# isleaftype(T) which is false for Union{T,NAtype}. If the test in Base +# isleaftype(T) which is false for Union{T,Null}. If the test in Base # can be modified to cover simple unions of leaftypes then this method # can probably be deleted and the two _t methods adjusted to match the Base # invokation from Base.Broadcast.broadcast_c @@ -214,5 +214,5 @@ end end # This one is much faster than normal broadcasting but the method won't get called -# in fusing operations like (!).(isna.(x)) -Base.broadcast(::typeof(isna), da::DataArray) = copy(da.na) +# in fusing operations like (!).(isnull.(x)) +Base.broadcast(::typeof(isnull), da::DataArray) = copy(da.na) diff --git a/src/dataarray.jl b/src/dataarray.jl index 2e4617a..009652f 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -5,48 +5,64 @@ Construct a `DataArray`, an `N`-dimensional array with element type `T` that allows missing values. The resulting array uses the data in `d` with `m` as a bitmask to signify missingness. -That is, for each index `i` in `d`, if `m[i]` is `true`, the array contains `NA` at index `i`, +That is, for each index `i` in `d`, if `m[i]` is `true`, the array contains `null` at index `i`, otherwise it contains `d[i]`. DataArray(T::Type, dims...) Construct a `DataArray` with element type `T` and dimensions specified by `dims`. All elements -default to `NA`. +default to `null`. # Examples ```jldoctest julia> DataArray([1, 2, 3], [true, false, true]) 3-element DataArrays.DataArray{Int64,1}: - NA + null 2 - NA + null julia> DataArray(Float64, 3, 3) 3×3 DataArrays.DataArray{Float64,2}: - NA NA NA - NA NA NA - NA NA NA + null null null + null null null + null null null ``` """ mutable struct DataArray{T, N} <: AbstractDataArray{T, N} data::Array{T, N} na::BitArray{N} - function DataArray{T,N}(d::Array{T, N}, m::BitArray{N}) where {T, N} + function DataArray{T,N}(d::Array{<:Union{T, Null}, N}, m::BitArray{N}) where {T, N} # Ensure data values and missingness metadata match if size(d) != size(m) msg = "Data and missingness arrays must be the same size" throw(ArgumentError(msg)) end - # additionally check that d does not contain NA entries - if eltype(d) === Any - for i in eachindex(d) - if isassigned(d, i) && isna(d, i) - m[i] = true + # if input array can contain null values, we need to mark corresponding entries as missing + if eltype(d) >: Null + # If the original eltype is wider than the target eltype T, conversion may fail + # in the presence of nulls: we need to allocate a copy, leaving entries + # corresponding to nulls uninitialized + if eltype(d) <: T + @inbounds for i in eachindex(d) + if isassigned(d, i) && isnull(d, i) + m[i] = true + end end + else + d2 = similar(d, T) + @inbounds for i in eachindex(d) + isassigned(d, i) || continue + if isnull(d, i) + m[i] = true + else + d2[i] = d[i] + end + end + return new(d2, m) end - elseif eltype(d) <: NAtype + elseif eltype(d) <: Null m = trues(m) end new(d, m) @@ -55,7 +71,7 @@ end function DataArray(d::Array{T, N}, m::BitArray{N} = falses(size(d))) where {T, N} # -> DataArray{T} - return DataArray{T, N}(d, m) + return DataArray{Nulls.T(T), N}(d, m) end function DataArray(d::Array, m::AbstractArray{Bool}) # -> DataArray{T} @@ -63,11 +79,11 @@ function DataArray(d::Array, m::AbstractArray{Bool}) # -> DataArray{T} end function DataArray(T::Type, dims::Integer...) # -> DataArray{T} - return DataArray(Array{T}(dims...), trues(dims...)) + return DataArray(Array{Nulls.T(T)}(dims...), trues(dims...)) end function DataArray(T::Type, dims::NTuple{N, Int}) where N # -> DataArray{T} - return DataArray(Array{T}(dims...), trues(dims...)) + return DataArray(Array{Nulls.T(T)}(dims...), trues(dims...)) end """ @@ -146,7 +162,7 @@ function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Inte dest end -Base.fill!(A::DataArray, ::NAtype) = (fill!(A.na, true); A) +Base.fill!(A::DataArray, ::Null) = (fill!(A.na, true); A) Base.fill!(A::DataArray, v) = (fill!(A.data, v); fill!(A.na, false); A) function Base.deepcopy(d::DataArray) # -> DataArray{T} @@ -162,7 +178,7 @@ function Base.resize!(da::DataArray{T,1}, n::Int) where T end function Base.similar(da::DataArray, T::Type, dims::Dims) #-> DataArray{T} - return DataArray(Array{extractT(T)}(dims), trues(dims)) + return DataArray(Array{Nulls.T(T)}(dims), trues(dims)) end Base.size(d::DataArray) = size(d.data) # -> (Int...) @@ -189,12 +205,7 @@ end function Base.convert{S, T, N}(::Type{Array{S, N}}, x::DataArray{T, N}) # -> Array{S, N} - if any(isna, x) - err = "Cannot convert DataArray with NA's to desired type" - throw(NAException(err)) - else - return convert(Array{S, N}, x.data) - end + return S[v for v in x] end function Base.convert{S, T, N}(::Type{Array{S}}, da::DataArray{T, N}) @@ -202,15 +213,15 @@ function Base.convert{S, T, N}(::Type{Array{S}}, da::DataArray{T, N}) end function Base.convert{T}(::Type{Vector}, dv::DataVector{T}) - return convert(Array{T, 1}, dv) + return convert(Array{Union{T, Null}, 1}, dv) end function Base.convert{T}(::Type{Matrix}, dm::DataMatrix{T}) - return convert(Array{T, 2}, dm) + return convert(Array{Union{T, Null}, 2}, dm) end function Base.convert{T, N}(::Type{Array}, da::DataArray{T, N}) - return convert(Array{T, N}, da) + return convert(Array{Union{T, Null}, N}, da) end function Base.convert{S, T, N}( @@ -242,12 +253,13 @@ function Base.convert{T, N}(::Type{Array}, da::DataArray{T, N}, replacement::Any return convert(Array{T, N}, da, replacement) end -dropna(dv::DataVector) = dv.data[.!dv.na] # -> Vector +Base.collect(itr::EachDropNull{<:DataVector}) = itr.da.data[.!itr.da.na] # -> Vector +Base.collect(itr::EachFailNull{<:DataVector}) = copy(itr.da.data) # -> Vector -Base.any(::typeof(isna), da::DataArray) = any(da.na) # -> Bool -Base.all(::typeof(isna), da::DataArray) = all(da.na) # -> Bool +Base.any(::typeof(isnull), da::DataArray) = any(da.na) # -> Bool +Base.all(::typeof(isnull), da::DataArray) = all(da.na) # -> Bool -isna(da::DataArray, I::Real, Is::Real...) = getindex(da.na, I, Is...) +Base.isnull(da::DataArray, I::Real, Is::Real...) = getindex(da.na, I, Is...) function Base.isfinite(da::DataArray) # -> DataArray{Bool} n = length(da) @@ -272,10 +284,14 @@ function Base.convert{S, T, N}(::Type{DataArray{S, N}}, a::AbstractArray{T, N}) # -> DataArray{S, N} return DataArray(convert(Array{S, N}, a), falses(size(a))) end +function Base.convert{S, T>:Null, N}(::Type{DataArray{S, N}}, + a::AbstractArray{T, N}) # -> DataArray{S, N} + return DataArray(convert(Array{Union{S, Null}, N}, a), falses(size(a))) +end Base.convert{S, T, N}(::Type{DataArray{S}}, x::AbstractArray{T, N}) = - convert(DataArray{S, N}, x) + convert(DataArray{Nulls.T(S), N}, x) Base.convert{T, N}(::Type{DataArray}, x::AbstractArray{T, N}) = - convert(DataArray{T, N}, x) + convert(DataArray{Nulls.T(T), N}, x) Base.convert{T, N}(::Type{DataArray{T, N}}, x::DataArray{T, N}) = x function Base.convert{S, T, N}(::Type{DataArray{S, N}}, x::DataArray{T, N}) # -> DataArray{S, N} v = similar(x.data, S) @@ -301,11 +317,11 @@ julia> data([1, 2, 3]) 2 3 -julia> data(@data [1, 2, NA]) +julia> data(@data [1, 2, null]) 3-element DataArrays.DataArray{Int64,1}: 1 2 - NA + null ``` """ data(a::AbstractArray) = convert(DataArray, a) @@ -316,9 +332,9 @@ data(a::AbstractArray) = convert(DataArray, a) for f in (:(Base.float),) @eval begin function ($f)(da::DataArray) # -> DataArray - if any(isna, da) - err = "Cannot convert DataArray with NA's to desired type" - throw(NAException(err)) + if any(isnull, da) + err = "Cannot convert DataArray with nulls to desired type" + throw(NullException(err)) else ($f)(da.data) end @@ -329,18 +345,18 @@ end """ finduniques(da::DataArray) -> (Vector, Int) -Get the unique values in `da` as well as the index of the first `NA` value +Get the unique values in `da` as well as the index of the first `null` value in `da` if present, or 0 otherwise. """ function finduniques(da::DataArray{T}) where T # -> Vector{T}, Int out = Vector{T}(0) seen = Set{T}() n = length(da) - firstna = 0 + firstnull = 0 for i in 1:n - if isna(da, i) - if firstna == 0 - firstna = length(out) + 1 + if isnull(da, i) + if firstnull == 0 + firstnull = length(out) + 1 else continue end @@ -349,17 +365,17 @@ function finduniques(da::DataArray{T}) where T # -> Vector{T}, Int push!(out, da.data[i]) end end - return out, firstna + return out, firstnull end function Base.unique(da::DataArray{T}) where T # -> DataVector{T} - unique_values, firstna = finduniques(da) + unique_values, firstnull = finduniques(da) n = length(unique_values) - if firstna > 0 + if firstnull > 0 res = DataArray(Vector{T}(n + 1)) i = 1 for val in unique_values - if i == firstna + if i == firstnull res.na[i] = true i += 1 end @@ -367,7 +383,7 @@ function Base.unique(da::DataArray{T}) where T # -> DataVector{T} i += 1 end - if firstna == n + 1 + if firstnull == n + 1 res.na[n + 1] = true end @@ -377,29 +393,7 @@ function Base.unique(da::DataArray{T}) where T # -> DataVector{T} end end -""" - levels(da::DataArray) -> DataVector - -Return a vector of the unique values in `da`, excluding any `NA`s. - - levels(a::AbstractArray) -> Vector - -Equivalent to `unique(a)`. - -# Examples - -```jldoctest -julia> levels(@data [1, 2, NA]) -2-element DataArrays.DataArray{Int64,1}: - 1 - 2 -``` -""" -function levels(da::DataArray) # -> DataVector{T} - unique_values, firstna = finduniques(da) +function Nulls.levels(da::DataArray) # -> DataVector{T} + unique_values, firstnull = finduniques(da) return DataArray(unique_values) end - -function levels(a::AbstractArray) # -> Vector{T} - return unique(a) -end diff --git a/src/datavector.jl b/src/datavector.jl index b697b23..02cf1e8 100644 --- a/src/datavector.jl +++ b/src/datavector.jl @@ -2,7 +2,7 @@ # TODO: Macroize these definitions -function Base.push!(dv::DataVector, v::NAtype) +function Base.push!(dv::DataVector, v::Null) resize!(dv.data, length(dv.data) + 1) push!(dv.na, true) return v @@ -17,13 +17,13 @@ end function Base.pop!(dv::DataVector) d, m = pop!(dv.data), pop!(dv.na) if m - return NA + return null else return d end end -function Base.unshift!(dv::DataVector{T}, v::NAtype) where T +function Base.unshift!(dv::DataVector{T}, v::Null) where T ccall(:jl_array_grow_beg, Void, (Any, UInt), dv.data, 1) unshift!(dv.na, true) return v @@ -38,7 +38,7 @@ end function Base.shift!(dv::DataVector{T}) where T d, m = shift!(dv.data), shift!(dv.na) if m - return NA + return null else return d end @@ -53,7 +53,7 @@ end function Base.splice!(dv::DataVector, inds::Union{Integer, UnitRange{Int}}, ins::AbstractVector) # We cannot merely use the implementation in Base because this - # needs to handle NA in the replacement vector + # needs to handle null in the replacement vector v = dv[inds] m = length(ins) a = dv.data @@ -79,7 +79,7 @@ function Base.splice!(dv::DataVector, inds::Union{Integer, UnitRange{Int}}, ins: end for k = 1:m - if !isna(ins, k) + if !isnull(ins, k) if isa(ins, DataVector) a[f+k-1] = ins.data[k] elseif isa(ins, PooledDataVector) @@ -90,7 +90,7 @@ function Base.splice!(dv::DataVector, inds::Union{Integer, UnitRange{Int}}, ins: end end - splice!(dv.na, inds, isna.(ins)) + splice!(dv.na, inds, isnull.(ins)) v end @@ -100,7 +100,7 @@ function Base.deleteat!(dv::DataVector, inds) dv end -function Base.push!(pdv::PooledDataVector{T,R}, v::NAtype) where {T,R} +function Base.push!(pdv::PooledDataVector{T,R}, v::Null) where {T,R} push!(pdv.refs, zero(R)) return v end @@ -113,7 +113,7 @@ end Base.pop!(pdv::PooledDataVector) = pdv.pool[pop!(pdv.refs)] -function Base.unshift!(pdv::PooledDataVector{T,R}, v::NAtype) where {T,R} +function Base.unshift!(pdv::PooledDataVector{T,R}, v::Null) where {T,R} unshift!(pdv.refs, zero(R)) return v end @@ -157,29 +157,29 @@ end Base.sizehint!(pda::PooledDataVector, newsz::Integer) = sizehint!(pda.refs, newsz) -# Pad a vector with NA's +# Pad a vector with nulls """ - padna(dv::AbstractDataVector, front::Integer, back::Integer) -> DataVector + padnull(dv::AbstractDataVector, front::Integer, back::Integer) -> DataVector -Pad `dv` with `NA` values. `front` is an integer number of `NA`s to add at the -beginning of the array and `back` is the number of `NA`s to add at the end. +Pad `dv` with `null` values. `front` is an integer number of `null`s to add at the +beginning of the array and `back` is the number of `null`s to add at the end. # Examples ```jldoctest -julia> padna(@data([1, 2, 3]), 1, 2) +julia> padnull(@data([1, 2, 3]), 1, 2) 6-element DataArrays.DataArray{Int64,1}: - NA + null 1 2 3 - NA - NA + null + null ``` """ -function padna(dv::AbstractDataVector, - front::Integer, - back::Integer) +function padnull(dv::AbstractDataVector, + front::Integer, + back::Integer) n = length(dv) res = similar(dv, front + n + back) for i in 1:n diff --git a/src/deprecated.jl b/src/deprecated.jl index baa973d..eb35bbf 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,4 +1,4 @@ -using Base: @deprecate, depwarn +using Base: @deprecate, @deprecate_binding, depwarn # Deprecate in Julia 0.6 cycle function Base.isnan(da::DataArray) @@ -6,10 +6,9 @@ function Base.isnan(da::DataArray) return isnan.(da) end -@deprecate isna(x::AbstractArray) isna.(x) -@deprecate anyna(x) any(isna, x) -@deprecate allna(x) all(isna, x) -@deprecate padNA(dv::AbstractDataVector, front::Integer, back::Integer) padna(dv, front, back) +@deprecate isna(x::AbstractArray) isnull.(x) +@deprecate anyna(x) any(isnull, x) +@deprecate allna(x) all(isnull, x) function reldiff(v::Vector{T}) where T depwarn("reldiff is deprecated.", :reldiff) @@ -74,3 +73,20 @@ for f in [:(&), :(|), :(xor)] @deprecate ($f)(a::DataArray{<:Integer}, b::DataArray{<:Integer}) ($f).(a, b) end end + +@deprecate_binding NAtype Null +@deprecate_binding NA null +@deprecate_binding NAException NullException +@deprecate isna isnull +@deprecate dropna(x) collect(Nulls.skip(x)) +@deprecate padna padnull +@deprecate each_failna Nulls.fail +@deprecate each_dropna Nulls.skip +@deprecate each_replacena Nulls.replace +@deprecate_binding EachFailNA DataArrays.EachFailNull +@deprecate_binding EachDropNA DataArrays.EachDropNull +@deprecate_binding EachReplaceNA DataArrays.EachReplaceNull +import SpecialFunctions: digamma, erf, erfc +@deprecate digamma(x::Null) isnull(x) ? null : SpecialFunctions.digamma(x) +@deprecate erf(x::Null) isnull(x) ? null : SpecialFunctions.erf(x) +@deprecate erfc(x::Null) isnull(x) ? null : SpecialFunctions.erfc(x) diff --git a/src/extras.jl b/src/extras.jl index b35670c..d042ca4 100644 --- a/src/extras.jl +++ b/src/extras.jl @@ -21,11 +21,11 @@ end function StatsBase.countmap(x::AbstractDataArray{T}) where {T} - addcounts!(Dict{Data{T}, Int}(), x) + addcounts!(Dict{Union{T,Null}, Int}(), x) end function StatsBase.countmap(x::AbstractDataArray{T}, wv::Weights{W}) where {T,W} - addcounts!(Dict{Data{T}, W}(), x, wv) + addcounts!(Dict{Union{T,Null}, W}(), x, wv) end """ diff --git a/src/grouping.jl b/src/grouping.jl index 3d8875d..4736924 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -1,7 +1,7 @@ -function groupsort_indexer(x::AbstractVector, ngroups::Integer, nalast::Bool=false) +function groupsort_indexer(x::AbstractVector, ngroups::Integer, nulllast::Bool=false) # translated from Wes McKinney's groupsort_indexer in pandas (file: src/groupby.pyx). - # count group sizes, location 0 for NA + # count group sizes, location 0 for null n = length(x) # counts = x.pool counts = fill(0, ngroups + 1) @@ -11,7 +11,7 @@ function groupsort_indexer(x::AbstractVector, ngroups::Integer, nalast::Bool=fal # mark the start of each contiguous group of like-indexed data where = fill(1, ngroups + 1) - if nalast + if nulllast for i = 3:ngroups+1 where[i] = where[i - 1] + counts[i - 1] end @@ -32,4 +32,4 @@ function groupsort_indexer(x::AbstractVector, ngroups::Integer, nalast::Bool=fal result, where, counts end -groupsort_indexer(pv::PooledDataVector, nalast::Bool=false) = groupsort_indexer(pv.refs, length(pv.pool), nalast) +groupsort_indexer(pv::PooledDataVector, nulllast::Bool=false) = groupsort_indexer(pv.refs, length(pv.pool), nulllast) diff --git a/src/indexing.jl b/src/indexing.jl index 7300ba8..94bfc80 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -1,48 +1,48 @@ ## Unsafe scalar indexing # Extract relevant fields of a DataArray to a tuple -# The extracted tuple can be passed to `unsafe_isna`, -# `unsafe_getindex_notna`, `unsafe_setna!`, `unsafe_setnotna!`, and +# The extracted tuple can be passed to `unsafe_isnull`, +# `unsafe_getindex_notnull`, `unsafe_setnull!`, `unsafe_setnotnull!`, and # `unsafe_dasetindex!`. This has a meaningful performance impact within # very tight loops. daextract(da::DataArray) = (da.data, da.na.chunks) daextract(pda::PooledDataArray) = (pda.refs, pda.pool) daextract(a) = nothing -# Check for NA -unsafe_isna(da::DataArray, extr, idx::Real) = Base.unsafe_bitgetindex(extr[2], idx) -unsafe_isna(pda::PooledDataArray, extr, idx::Real) = extr[1][idx] == 0 -unsafe_isna(a, extr, idx::Real) = false -unsafe_getindex_notna(da::DataArray, extr, idx::Real) = getindex(extr[1], idx) -unsafe_getindex_notna(pda::PooledDataArray, extr, idx::Real) = getindex(extr[2], extr[1][idx]) -unsafe_getindex_notna(a, extr, idx::Real) = Base.unsafe_getindex(a, idx) +# Check for null +unsafe_isnull(da::DataArray, extr, idx::Real) = Base.unsafe_bitgetindex(extr[2], idx) +unsafe_isnull(pda::PooledDataArray, extr, idx::Real) = extr[1][idx] == 0 +unsafe_isnull(a, extr, idx::Real) = false +unsafe_getindex_notnull(da::DataArray, extr, idx::Real) = getindex(extr[1], idx) +unsafe_getindex_notnull(pda::PooledDataArray, extr, idx::Real) = getindex(extr[2], extr[1][idx]) +unsafe_getindex_notnull(a, extr, idx::Real) = Base.unsafe_getindex(a, idx) -# Set NA or data portion of DataArray +# Set null or data portion of DataArray unsafe_bitsettrue!(chunks::Vector{UInt64}, idx::Real) = chunks[Base._div64(Int(idx)-1)+1] |= (UInt64(1) << Base._mod64(Int(idx)-1)) unsafe_bitsetfalse!(chunks::Vector{UInt64}, idx::Real) = chunks[Base._div64(Int(idx)-1)+1] &= ~(UInt64(1) << Base._mod64(Int(idx)-1)) -unsafe_setna!(da::DataArray, extr, idx::Real) = unsafe_bitsettrue!(extr[2], idx) -unsafe_setna!(da::PooledDataArray, extr, idx::Real) = setindex!(extr[1], 0, idx) -unsafe_setnotna!(da::DataArray, extr, idx::Real) = unsafe_bitsetfalse!(extr[2], idx) -unsafe_setnotna!(da::PooledDataArray, extr, idx::Real) = nothing +unsafe_setnull!(da::DataArray, extr, idx::Real) = unsafe_bitsettrue!(extr[2], idx) +unsafe_setnull!(da::PooledDataArray, extr, idx::Real) = setindex!(extr[1], 0, idx) +unsafe_setnotnull!(da::DataArray, extr, idx::Real) = unsafe_bitsetfalse!(extr[2], idx) +unsafe_setnotnull!(da::PooledDataArray, extr, idx::Real) = nothing -# Fast setting of NA values in DataArrays +# Fast setting of null values in DataArrays # These take the data and chunks (extracted as da.data and # da.na.chunks), a value, and a linear index. They assume # a certain initialization pattern: # # - For DataArrays, da.na should be falses # - For PooledDataArrays, pda.refs should be zeros -unsafe_dasetindex!(data::Array, na_chunks::Vector{UInt64}, val::NAtype, idx::Real) = +unsafe_dasetindex!(data::Array, na_chunks::Vector{UInt64}, val::Null, idx::Real) = unsafe_bitsettrue!(na_chunks, idx) unsafe_dasetindex!(data::Array, na_chunks::Vector{UInt64}, val, idx::Real) = setindex!(data, val, idx) -unsafe_dasetindex!(da::DataArray, extr, val::NAtype, idx::Real) = - unsafe_setna!(da, extr, idx) -unsafe_dasetindex!(da::PooledDataArray, extr, val::NAtype, idx::Real) = nothing +unsafe_dasetindex!(da::DataArray, extr, val::Null, idx::Real) = + unsafe_setnull!(da, extr, idx) +unsafe_dasetindex!(da::PooledDataArray, extr, val::Null, idx::Real) = nothing unsafe_dasetindex!(da::DataArray, extr, val, idx::Real) = setindex!(extr[1], val, idx) unsafe_dasetindex!(pda::PooledDataArray, extr, val, idx::Real) = setindex!(extr[1], getpoolidx(pda, val), idx) @@ -71,27 +71,27 @@ end ## General indexing functions -# Indexing with NA throws an error +# Indexing with null throws an error function Base.to_index(A::DataArray) - any(A.na) && throw(NAException("cannot index an array with a DataArray containing NA values")) + any(A.na) && throw(NullException()) Base.to_index(A.data) end if isdefined(Base, :checkindex) && isdefined(Base, :AbstractUnitRange) - Base.checkindex(::Type{Bool}, ::AbstractUnitRange, ::NAtype) = - throw(NAException("cannot index an array with a DataArray containing NA values")) + Base.checkindex(::Type{Bool}, ::AbstractUnitRange, ::Null) = + throw(NullException()) elseif isdefined(Base, :checkindex) - Base.checkindex(::Type{Bool}, ::UnitRange, ::NAtype) = - throw(NAException("cannot index an array with a DataArray containing NA values")) + Base.checkindex(::Type{Bool}, ::UnitRange, ::Null) = + throw(NullException()) else Base.checkbounds(::Type{Bool}, sz::Int, I::AbstractDataVector{Bool}) = length(I) == sz function Base.checkbounds{T<:Real}(::Type{Bool}, sz::Int, I::AbstractDataArray{T}) - any(isna, I) && throw(NAException("cannot index into an array with a DataArray containing NAs")) + any(isnull, I) && throw(NullException()) extr = daextract(I) b = true for i = 1:length(I) - @inbounds v = unsafe_getindex_notna(I, extr, i) + @inbounds v = unsafe_getindex_notnull(I, extr, i) b &= Base.checkbounds(Bool, sz, v) end b @@ -119,7 +119,7 @@ Base.IndexStyle(::Type{<:AbstractDataArray}) = Base.IndexLinear() # Scalar case function Base.getindex(da::DataArray, I::Real) if getindex(da.na, I) - return NA + return null else return getindex(da.data, I) end @@ -129,7 +129,7 @@ end N = length(I) quote $(Expr(:meta, :inline)) - flipbits!(dest.na) # similar initializes with NAs + flipbits!(dest.na) # similar initializes with nulls @nexprs $N d->(J_d = I[d]) srcextr = daextract(src) destextr = daextract(dest) @@ -139,10 +139,10 @@ end @nloops $N j d->J_d begin offset_0 = @ncall $N sub2ind srcsz j d, Ds = next(D, Ds) - if unsafe_isna(src, srcextr, offset_0) - unsafe_dasetindex!(dest, destextr, NA, d) + if unsafe_isnull(src, srcextr, offset_0) + unsafe_dasetindex!(dest, destextr, null, d) else - unsafe_dasetindex!(dest, destextr, unsafe_getindex_notna(src, srcextr, offset_0), d) + unsafe_dasetindex!(dest, destextr, unsafe_getindex_notnull(src, srcextr, offset_0), d) end end dest @@ -154,7 +154,7 @@ end # Scalar case function Base.getindex(pda::PooledDataArray, I::Real) if getindex(pda.refs, I) == 0 - return NA + return null else return pda.pool[getindex(pda.refs, I)] end @@ -162,7 +162,7 @@ end @inline function Base.getindex(pda::PooledDataArray, I::Integer...) if getindex(pda.refs, I...) == 0 - return NA + return null else return pda.pool[getindex(pda.refs, I...)] end @@ -175,7 +175,7 @@ end ## setindex!: DataArray -function Base.setindex!(da::DataArray, val::NAtype, i::Real) +function Base.setindex!(da::DataArray, val::Null, i::Real) da.na[i] = true return da end @@ -188,7 +188,7 @@ end ## setindex!: PooledDataArray -function Base.setindex!(pda::PooledDataArray, val::NAtype, ind::Real) +function Base.setindex!(pda::PooledDataArray, val::Null, ind::Real) pda.refs[ind] = 0 return pda end @@ -218,10 +218,10 @@ end @nexprs $N d->(offset_d = 1) # really only need offset_$N = 1 if !isa(x, AbstractArray) @nloops $N i d->I_d d->(@inbounds offset_{d-1} = offset_d + (i_d - 1)*stride_d) begin - if isa(x, NAtype) - @inbounds unsafe_setna!(A, Aextr, offset_0) + if isa(x, Null) + @inbounds unsafe_setnull!(A, Aextr, offset_0) else - @inbounds unsafe_setnotna!(A, Aextr, offset_0) + @inbounds unsafe_setnotnull!(A, Aextr, offset_0) @inbounds unsafe_dasetindex!(A, Aextr, x, offset_0) end end @@ -243,11 +243,11 @@ end else Xextr = daextract(X) @nloops $N i d->I_d d->(@inbounds offset_{d-1} = offset_d + (i_d - 1)*stride_d) begin - @inbounds if isa(X, AbstractDataArray) && unsafe_isna(X, Xextr, k) - unsafe_setna!(A, Aextr, offset_0) + @inbounds if isa(X, AbstractDataArray) && unsafe_isnull(X, Xextr, k) + unsafe_setnull!(A, Aextr, offset_0) else - unsafe_setnotna!(A, Aextr, offset_0) - unsafe_dasetindex!(A, Aextr, unsafe_getindex_notna(X, Xextr, k), offset_0) + unsafe_setnotnull!(A, Aextr, offset_0) + unsafe_dasetindex!(A, Aextr, unsafe_getindex_notnull(X, Xextr, k), offset_0) end k += 1 end diff --git a/src/linalg.jl b/src/linalg.jl index d53cb6c..f35025c 100644 --- a/src/linalg.jl +++ b/src/linalg.jl @@ -15,12 +15,12 @@ function impute!(X::Matrix, missing_entries::Vector, end # Should be done with a proper N-dimensional Int array. -function findna(dm::DataMatrix) +function findnull(dm::DataMatrix) indices = Any[] n, p = size(dm) for i = 1:n for j = 1:p - if isna(dm[i, j]) + if isnull(dm[i, j]) push!(indices, [i, j]) end end @@ -34,7 +34,7 @@ function global_mean(dm::DataMatrix) n, p = size(dm) for i = 1:n for j = 1:p - if !isna(dm[i, j]) + if !isnull(dm[i, j]) mu += dm[i, j] n += 1 end @@ -50,7 +50,7 @@ function na_safe_rowmeans(dm::DataMatrix) mu = 0.0 n = 0 for j = 1:p - if !isna(dm[i, j]) + if !isnull(dm[i, j]) mu += dm[i, j] n += 1 end @@ -62,7 +62,7 @@ function na_safe_rowmeans(dm::DataMatrix) return mus end -# TODO: Default to failure in the face of NA's +# TODO: Default to failure in the face of nulls function Base.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4) # Make a copy of the data that we can alter in place @@ -72,7 +72,7 @@ function Base.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4) n, p = size(dm) # Estimate missingness and print a message. - missing_entries = findna(dm) + missing_entries = findnull(dm) missingness = length(missing_entries) / (n * p) if tracing @printf "Matrix is missing %.2f%% of entries\n" missingness * 100 @@ -83,8 +83,8 @@ function Base.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4) mu_i = na_safe_rowmeans(dm) for i = 1:n for j = 1:p - if isna(dm[i, j]) - if isna(mu_i[i]) + if isnull(dm[i, j]) + if isnull(mu_i[i]) dm[i, j] = global_mu else dm[i, j] = mu_i[i] @@ -93,7 +93,7 @@ function Base.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4) end end - # Convert dm to a Float array now that we've removed all NA's + # Convert dm to a Float array now that we've removed all nulls dm = float(dm) # Count iterations of proper imputation method diff --git a/src/literals.jl b/src/literals.jl index 7ced625..8468dbc 100644 --- a/src/literals.jl +++ b/src/literals.jl @@ -3,7 +3,7 @@ function fixargs(args::Vector{Any}, stub::Any) data = Array{Any}(n) na = BitArray(n) for i in 1:n - if args[i] == :NA + if args[i] == :null || args[i] == :NA data[i] = stub na[i] = true else @@ -14,34 +14,34 @@ function fixargs(args::Vector{Any}, stub::Any) return data, na end -# We assume that data has at least one "value" that isn't NA +# We assume that data has at least one "value" that isn't null function findstub_vector(ex::Expr) - if ex.args[1] != :NA + if ex.args[1] != :null && ex.args[1] != :NA return ex.args[1] end for i in 2:length(ex.args) - if ex.args[i] != :NA + if ex.args[i] != :null && ex.args[i] != :NA return ex.args[i] end end - return NA + return null end -# We assume that data has at least one "value" that isn't NA +# We assume that data has at least one "value" that isn't null function findstub_matrix(ex::Expr) - if ex.args[1].args[1] != :NA + if ex.args[1].args[1] != :null && ex.args[1].args[1] != :NA return ex.args[1].args[1] end nrows = length(ex.args) for row in 1:nrows subex = ex.args[row] for i in 1:length(subex.args) - if subex.args[i] != :NA + if subex.args[i] != :null && subex.args[i] != :NA return subex.args[i] end end end - return NA + return null end function parsevector(ex::Expr) @@ -55,7 +55,7 @@ function parsevector(ex::Expr) na = reshape(na, 1, length(na)) end - if isequal(stub, NA) + if isequal(stub, null) return Expr(ex.head == :hcat ? (:typed_hcat) : (:ref), Any, data...), na else return Expr(ex.head, data...), na @@ -82,7 +82,7 @@ function parsematrix(ex::Expr) end if ex.head == :typed_vcat return Expr(:typed_vcat, ex.args[1], datarows...), Expr(:vcat, narows...) - elseif isequal(stub, NA) + elseif isequal(stub, null) return Expr(:typed_vcat, Any, datarows...), Expr(:vcat, narows...) else return Expr(:vcat, datarows...), Expr(:vcat, narows...) @@ -108,10 +108,10 @@ Create a [`DataArray`](@ref) based on the given expression. # Examples ```jldoctest -julia> @data [1, NA, 3] +julia> @data [1, null, 3] 3-element DataArrays.DataArray{Int64,1}: 1 - NA + null 3 julia> @data hcat(1:3, 4:6) @@ -125,7 +125,7 @@ macro data(ex) if !(ex.head in (:vect, :vcat, :hcat, :ref, :typed_vcat, :typed_hcat)) return quote tmp = $(esc(ex)) - DataArray(tmp, broadcast(x->isequal(x, NA), tmp)) + DataArray(tmp, broadcast(x->isequal(x, null), tmp)) end end dataexpr, naexpr = parsedata(ex) @@ -140,10 +140,10 @@ Create a [`PooledDataArray`](@ref) based on the given expression. # Examples ```jldoctest -julia> @pdata ["Hello", NA, "World"] +julia> @pdata ["Hello", null, "World"] 3-element DataArrays.PooledDataArray{String,UInt32,1}: "Hello" - NA + null "World" ``` """ @@ -151,7 +151,7 @@ macro pdata(ex) if !(ex.head in (:vect, :vcat, :hcat, :ref, :typed_vcat, :typed_hcat)) return quote tmp = $(esc(ex)) - PooledDataArray(tmp, broadcast(x->isequal(x, NA), tmp)) + PooledDataArray(tmp, broadcast(x->isequal(x, null), tmp)) end end dataexpr, naexpr = parsedata(ex) diff --git a/src/natype.jl b/src/natype.jl deleted file mode 100644 index ab55f06..0000000 --- a/src/natype.jl +++ /dev/null @@ -1,91 +0,0 @@ -############################################################################## -## -## NA's via the NAtype -## -## Inspirations: -## * R's NA's -## * Panda's discussion of NA's: -## http://pandas.pydata.org/pandas-docs/stable/missing_data.html -## * NumPy's analysis of the issue: -## https://github.com/numpy/numpy/blob/master/doc/neps/missing-data.rst -## -## NAtype is a composite type representing missingness: -## * An object of NAtype can be generated by writing NA -## -############################################################################## - -""" - NAtype - -The type of a missing value, `NA`. -""" -struct NAtype -end - -""" - NA - -A value denoting missingness within the domain of any type. -""" -const NA = NAtype() - -const Data{T} = Union{T,NAtype} - -Base.show(io::IO, x::NAtype) = print(io, "NA") - -struct NAException <: Exception - msg::String -end -NAException() = NAException("NA found") - -# Restrict to Number to avoid infinite recursion -# Might be possible to get rid of these restrictions if the promotion in base gets changed. -## Numbers -Base.promote_rule(::Type{Data{T}}, ::Type{Data{S}}) where {T<:Number,S<:Number} = - Union{promote_type(T, S),NAtype} -Base.promote_rule(::Type{Data{T}}, ::Type{S}) where {T<:Number,S<:Number} = - Union{promote_type(T, S),NAtype} -## Dates -Base.promote_rule(::Type{Data{T}}, ::Type{Data{S}}) where {T<:Dates.AbstractTime,S<:Dates.AbstractTime} = - Union{promote_type(T, S),NAtype} -Base.promote_rule(::Type{Data{T}}, ::Type{S}) where {T<:Dates.AbstractTime,S<:Dates.AbstractTime} = - Union{promote_type(T, S),NAtype} - -Base.promote_rule(::Type{NAtype}, ::Type{T}) where {T} = Union{T,NAtype} - -# Restrict to Number to avoid maching everything -Base.convert(::Type{Data{T}}, x::Number) where {T<:Number} = convert(T, x) -Base.convert(::Type{Data{T}}, x::Dates.AbstractTime) where {T<:Dates.AbstractTime} = convert(T, x) - -Base.length(x::NAtype) = 1 -Base.size(x::NAtype) = () -Base.size(x::NAtype, i::Integer) = i < 1 ? throw(BoundsError()) : 1 -Base.ndims(x::NAtype) = 0 -Base.getindex(x::NAtype, i) = i == 1 ? NA : throw(BoundsError()) - -# extractT(::Type{Data{T}}) where {T} = T -extractT(::Type{Union{T,NAtype}}) where {T} = T -extractT(::Type{T}) where {T} = T -extractT(::Type{NAtype}) = NAtype - -Base.zero(::Type{Data{T}}) where {T} = zero(T) - -""" - isna(x) -> Bool - -Determine whether `x` is missing, i.e. `NA`. - -# Examples - -```jldoctest -julia> isna(1) -false - -julia> isna(NA) -true -``` -""" -isna(x::NAtype) = true -isna(x::Any) = false - -Base.isnan(::NAtype) = NA diff --git a/src/operators.jl b/src/operators.jl index 1c5ad8b..2f69147 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -72,7 +72,7 @@ end # Unary operator macros for DataArrays # -# Apply unary operator to non-NA members of a DataArray or +# Apply unary operator to non-null members of a DataArray or # AbstractDataArray macro dataarray_unary(f, intype, outtype, N...) esc(quote @@ -199,11 +199,6 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) )) end -# Unary operators, NA -for f in [:+,:-,:*,:/] - @eval $(f)(d::NAtype) = NA -end - # Unary operators, DataArrays. @dataarray_unary(+, Any, T) @dataarray_unary(-, Bool, Int) @@ -213,7 +208,6 @@ end # Treat ctranspose and * in a special way for (f, elf) in ((:(Base.ctranspose), :conj), (:(Base.transpose), :identity)) @eval begin - $(f)(::NAtype) = NA function $(f){T}(d::DataMatrix{T}) # (c)transpose in Base uses a cache-friendly algorithm for # numeric arrays, which is faster than our naive algorithm, @@ -238,10 +232,10 @@ for (f, elf) in ((:(Base.ctranspose), :conj), (:(Base.transpose), :identity)) end end -# Propagates NA's +# Propagates nulls # For a dissenting view, # http://radfordneal.wordpress.com/2011/05/21/slowing-down-matrix-multiplication-in-r/ -# But we're getting 10x R while maintaining NA's +# But we're getting 10x R while maintaining nulls for (adata, bdata) in ((true, false), (false, true), (true, true)) @eval begin function (*)(a::$(adata ? :(Union{DataVector, DataMatrix}) : :(Union{Vector, Matrix})), @@ -256,7 +250,7 @@ for (adata, bdata) in ((true, false), (false, true), (true, true)) p1 = size(a, 2) corrupt_rows = falses(n1) for j in 1:p1, i in 1:n1 - # Propagate NA's + # Propagate nulls # Corrupt all rows based on i corrupt_rows[i] |= a.na[i, j] end @@ -269,7 +263,7 @@ for (adata, bdata) in ((true, false), (false, true), (true, true)) p2 = size(b, 2) corrupt_cols = falses(p2) for j in 1:p2, i in 1:n2 - # Propagate NA's + # Propagate nulls # Corrupt all columns based on j corrupt_cols[j] |= b.na[i, j] end @@ -299,7 +293,6 @@ end # inputs for f in (:(Base.abs), :(Base.abs2), :(Base.conj), :(Base.sign)) @eval begin - $(f)(::NAtype) = NA @dataarray_unary $(f) Number T end end @@ -311,7 +304,6 @@ for f in (:(Base.acos), :(Base.acosh), :(Base.asin), :(Base.asinh), :(Base.atan) :(Base.exp), :(Base.exp2), :(Base.expm1), :(Base.log), :(Base.log10), :(Base.log1p), :(Base.log2), :(Base.exponent), :(Base.sqrt), :(Base.gamma), :(Base.lgamma)) @eval begin - ($f)(::NAtype) = NA @dataarray_unary $(f) AbstractFloat T @dataarray_unary $(f) Real Float64 end @@ -319,7 +311,6 @@ end ## SpecialFunctions (should be a conditional module when supported) for f in (:(SpecialFunctions.digamma), :(SpecialFunctions.erf), :(SpecialFunctions.erfc)) @eval begin - ($f)(::NAtype) = NA @dataarray_unary $(f) AbstractFloat T @dataarray_unary $(f) Real Float64 end @@ -328,8 +319,6 @@ end # Elementary functions that take varargs for f in (:(Base.round), :(Base.ceil), :(Base.floor), :(Base.trunc)) @eval begin - ($f)(::NAtype, args::Integer...) = NA - # ambiguity @dataarray_unary $(f) Real T 1 @dataarray_unary $(f) Real T 2 @@ -354,36 +343,10 @@ for f in (:(Base.round), :(Base.ceil), :(Base.floor), :(Base.trunc)) end end -# -# Bit operators -# - -@swappable (&)(a::NAtype, b::Bool) = b ? NA : false -@swappable (|)(a::NAtype, b::Bool) = b ? true : NA -@swappable (⊻)(a::NAtype, b::Bool) = NA - -# To avoid ambiguity warning -@swappable (|)(a::NAtype, b::Function) = NA - -for f in (:(&), :(|), :(⊻)) - @eval begin - # Scalar with NA - ($f)(::NAtype, ::NAtype) = NA - @swappable ($f)(::NAtype, b::Integer) = NA - end -end - # # Comparison operators # -Base.isequal(::NAtype, ::NAtype) = true -Base.isequal(::NAtype, b) = false -Base.isequal(a, ::NAtype) = false -Base.isless(::NAtype, ::NAtype) = false -Base.isless(::NAtype, b) = false -Base.isless(a, ::NAtype) = true - # This is for performance only; the definition in Base is sufficient # for AbstractDataArrays function Base.isequal(a::DataArray, b::DataArray) @@ -413,85 +376,39 @@ function (==)(a::DataArray, b::DataArray) adata = a.data bdata = b.data bchunks = b.na.chunks - has_na = false - @bitenumerate a.na i na begin - if na || Base.unsafe_bitgetindex(bchunks, i) - has_na = true + has_null = false + @bitenumerate a.na i anull begin + if anull || Base.unsafe_bitgetindex(bchunks, i) + has_null = true else @inbounds adata[i] == bdata[i] || return false end end - has_na ? NA : true + has_null ? null : true end -# ambiguity -@swappable (==)(a::DataArray, b::AbstractDataArray) = - invoke(==, Tuple{AbstractDataArray,AbstractDataArray}, a, b) - @swappable function (==)(a::DataArray, b::AbstractArray) size(a) == size(b) || return false adata = a.data - has_na = false - @bitenumerate a.na i na begin - if na - has_na = true + has_null = false + @bitenumerate a.na i anull begin + if anull + has_null = true else @inbounds adata[i] == b[i] || return false end end - has_na ? NA : true -end - -function (==)(a::AbstractDataArray, b::AbstractDataArray) - size(a) == size(b) || return false - has_na = false - for i = 1:length(a) - if isna(a[i]) || isna(b[i]) - has_na = true - else - a[i] == b[i] || return false - end - end - has_na ? NA : true -end - -@swappable function (==)(a::AbstractDataArray, b::AbstractArray) - size(a) == size(b) || return false - has_na = false - for i = 1:length(a) - if isna(a[i]) - has_na = true - else - a[i] == b[i] || return false - end - end - has_na ? NA : true + has_null ? null : true end # ambiguity -@swappable (==)(::NAtype, ::WeakRef) = NA - -for sf in [:(==),:(!=),:(>),:(>=),:(<),:(<=)] - @eval begin - # Scalar with NA - ($(sf))(::NAtype, ::NAtype) = NA - @swappable ($(sf))(::NAtype, b) = NA - end -end +@swappable (==)(a::DataArray, b::AbstractArray{>:Null}) = + invoke(==, Tuple{AbstractDataArray,AbstractArray}, a, b) # # Binary operators # -for f in (:(+), :(-), :(*), :(/), - :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem), :(Base.min), :(Base.max)) - @eval begin - # Scalar with NA - ($f)(::NAtype, ::NAtype) = NA - @swappable ($f)(d::NAtype, x::Number) = NA - end -end - # Define methods for UniformScaling. Otherwise we get ambiguity # warnings... if isdefined(Base, :UniformScaling) @@ -532,38 +449,38 @@ function (-)(J::UniformScaling{TJ},A::DataArray{TA,2}) where {TA,TJ<:Number} end (+)(A::DataArray{Bool,2},J::UniformScaling{Bool}) = - invoke(+, Tuple{AbstractArray{Data{Bool},2},UniformScaling{Bool}}, A, J) + invoke(+, Tuple{AbstractArray{Union{Bool,Null},2},UniformScaling{Bool}}, A, J) (+)(J::UniformScaling{Bool},A::DataArray{Bool,2}) = - invoke(+, Tuple{UniformScaling{Bool},AbstractArray{Data{Bool},2}}, J, A) + invoke(+, Tuple{UniformScaling{Bool},AbstractArray{Union{Bool,Null},2}}, J, A) (-)(A::DataArray{Bool,2},J::UniformScaling{Bool}) = - invoke(-, Tuple{AbstractArray{Data{Bool},2},UniformScaling{Bool}}, A, J) + invoke(-, Tuple{AbstractArray{Union{Bool,Null},2},UniformScaling{Bool}}, A, J) (-)(J::UniformScaling{Bool},A::DataArray{Bool,2}) = - invoke(-, Tuple{UniformScaling{Bool},AbstractArray{Data{Bool},2}}, J, A) + invoke(-, Tuple{UniformScaling{Bool},AbstractArray{Union{Bool,Null},2}}, J, A) (+)(A::AbstractDataArray{TA,2},J::UniformScaling{TJ}) where {TA,TJ} = - invoke(+, Tuple{AbstractArray{Data{TA},2},UniformScaling{TJ}}, A, J) + invoke(+, Tuple{AbstractArray{Union{TA,Null},2},UniformScaling{TJ}}, A, J) (+)(J::UniformScaling,A::AbstractDataArray{TA,2}) where {TA} = - invoke(+, Tuple{UniformScaling,AbstractArray{Data{TA},2}}, J, A) + invoke(+, Tuple{UniformScaling,AbstractArray{Union{TA,Null},2}}, J, A) (-)(A::AbstractDataArray{TA,2},J::UniformScaling{TJ}) where {TA,TJ<:Number} = - invoke(-, Tuple{AbstractArray{Data{TA},2},UniformScaling{TJ}}, A, J) + invoke(-, Tuple{AbstractArray{Union{TA,Null},2},UniformScaling{TJ}}, A, J) (-)(J::UniformScaling{TJ},A::AbstractDataArray{TA,2}) where {TA,TJ<:Number} = - invoke(-, Tuple{UniformScaling{TJ},AbstractArray{Data{TA},2}}, J, A) + invoke(-, Tuple{UniformScaling{TJ},AbstractArray{Union{TA,Null},2}}, J, A) (+)(A::AbstractDataArray{Bool,2},J::UniformScaling{Bool}) = - invoke(+, Tuple{AbstractArray{Data{Bool},2},UniformScaling{Bool}}, A, J) + invoke(+, Tuple{AbstractArray{Union{Bool,Null},2},UniformScaling{Bool}}, A, J) (+)(J::UniformScaling{Bool},A::AbstractDataArray{Bool,2}) = - invoke(+, Tuple{UniformScaling{Bool},AbstractArray{Data{Bool},2}}, J, A) + invoke(+, Tuple{UniformScaling{Bool},AbstractArray{Union{Bool,Null},2}}, J, A) (-)(A::AbstractDataArray{Bool,2},J::UniformScaling{Bool}) = - invoke(-, Tuple{AbstractArray{Data{Bool},2},UniformScaling{Bool}}, A, J) + invoke(-, Tuple{AbstractArray{Union{Bool,Null},2},UniformScaling{Bool}}, A, J) (-)(J::UniformScaling{Bool},A::AbstractDataArray{Bool,2}) = - invoke(-, Tuple{UniformScaling{Bool},AbstractArray{Data{Bool},2}}, J, A) + invoke(-, Tuple{UniformScaling{Bool},AbstractArray{Union{Bool,Null},2}}, J, A) end # if isdefined(Base, :UniformScaling) for f in (:(*), :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem)) @eval begin - # Array with NA - @swappable $(f){T,N}(::NAtype, b::AbstractArray{T,N}) = + # Array with null + @swappable $(f){T,N}(::Null, b::AbstractArray{T,N}) = DataArray(Array{T,N}(size(b)), trues(size(b))) # DataArray with scalar @@ -572,16 +489,11 @@ for f in (:(*), :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem)) end for f in (:(+), :(-)) - # Array with NA - @eval @swappable $(f){T,N}(::NAtype, b::AbstractArray{T,N}) = + # Array with null + @eval @swappable $(f){T,N}(::Null, b::AbstractArray{T,N}) = DataArray(Array{T,N}(size(b)), trues(size(b))) end -(^)(::NAtype, ::NAtype) = NA -(^)(a, ::NAtype) = NA -(^)(::NAtype, ::Integer) = NA -(^)(::NAtype, ::Number) = NA - for f in (:(+), :(-)) @eval begin # Necessary to avoid ambiguity warnings @@ -593,17 +505,10 @@ for f in (:(+), :(-)) end # / is defined separately since it is not swappable -(/)(b::AbstractArray{T,N}, ::NAtype) where {T,N} = +(/)(b::AbstractArray{T,N}, ::Null) where {T,N} = DataArray(Array{T,N}(size(b)), trues(size(b))) @dataarray_binary_scalar(/, /, nothing, false) -for f in [:(Base.maximum), :(Base.minimum)] - @eval begin - ($f)(::NAtype, ::NAtype) = NA - @swappable $(f)(::Number, ::NAtype) = NA - end -end - function Base.LinAlg.diff(dv::DataVector) n = length(dv) new_data = diff(dv.data) @@ -649,12 +554,12 @@ Base.cumsum(dv::DataVector) = accumulate(+, dv) Base.cumprod(dv::DataVector) = accumulate(*, dv) for f in unary_vector_operators - @eval ($f)(dv::DataVector) = any(dv.na) ? NA : ($f)(dv.data) + @eval ($f)(dv::DataVector) = any(dv.na) ? null : ($f)(dv.data) end for f in binary_vector_operators @eval ($f)(dv1::DataVector, dv2::DataVector) = - any(dv1.na) || any(dv2.na) ? NA : ($f)(dv1.data, dv2.data) + any(dv1.na) || any(dv2.na) ? null : ($f)(dv1.data, dv2.data) end for f in (:(Base.minimum), :(Base.maximum), :(Base.prod), :(Base.sum), @@ -688,56 +593,56 @@ end function Base.all(dv::DataArray{Bool}) data = dv.data - has_na = false + hasnulls = false @bitenumerate dv.na i na begin if !na data[i] || return false else - has_na = true + hasnulls = true end end - has_na ? NA : true + hasnulls ? null : true end function Base.all(dv::AbstractDataArray{Bool}) - has_na = false + hasnulls = false for i in 1:length(dv) x = dv[i] - if !isna(x) + if !isnull(x) x || return false else - has_na = true + hasnulls = true end end - has_na ? NA : true + hasnulls ? null : true end function Base.any(dv::DataArray{Bool}) - has_na = false + hasnulls = false @bitenumerate dv.na i na begin if !na if dv.data[i] return true end else - has_na = true + hasnulls = true end end - has_na ? NA : false + hasnulls ? null : false end function Base.any(dv::AbstractDataArray{Bool}) - has_na = false + hasnulls = false for i in 1:length(dv) - if !isna(dv[i]) + if !isnull(dv[i]) if dv[i] return true end else - has_na = true + hasnulls = true end end - has_na ? NA : false + hasnulls ? null : false end function rle(v::AbstractVector{T}) where T @@ -774,8 +679,8 @@ function rle(v::AbstractDataVector{T}) where T lengths = Vector{Int16}(n) total_lengths = 1 for i in 2:n - if isna(v[i]) || isna(current_value) - if isna(v[i]) && isna(current_value) + if isnull(v[i]) || isnull(current_value) + if isnull(v[i]) && isnull(current_value) current_length += 1 else values[total_values] = current_value diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index fe8b13a..cf77032 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -4,7 +4,7 @@ ## ## An AbstractDataArray with efficient storage when values are repeated. A ## PDA wraps an array of unsigned integers, which are used to index into a -## compressed pool of values. NA's are 0's in the refs array. The unsigned +## compressed pool of values. nulls are 0's in the refs array. The unsigned ## integer type used for the refs array defaults to UInt32. The `compact` ## function converts to a smallest integer size that will index the entire pool. ## @@ -53,15 +53,15 @@ julia> p = PooledDataArray(d) PooledDataArray(T::Type, [R::Type=$DEFAULT_POOLED_REF_TYPE], [dims...]) Construct a `PooledDataArray` with element type `T`, reference storage type `R`, and dimensions -`dims`. If the dimensions are specified and nonzero, the array is filled with `NA` values. +`dims`. If the dimensions are specified and nonzero, the array is filled with `null` values. # Examples ```jldoctest julia> PooledDataArray(Int, 2, 2) 2×2 DataArrays.PooledDataArray{Int64,UInt32,2}: - NA NA - NA NA + null null + null null ``` """ mutable struct PooledDataArray{T, R<:Integer, N} <: AbstractDataArray{T, N} @@ -106,9 +106,9 @@ end PooledDataArray(d::PooledDataArray) = d # Constructor from array, w/ pool, missingness, and ref type -function PooledDataArray(d::AbstractArray{<:Data{T}, N}, +function PooledDataArray(d::AbstractArray{<:Union{T,Null}, N}, pool::Vector{T}, - m::AbstractArray{<:Data{Bool}, N}, + m::AbstractArray{<:Union{Bool,Null}, N}, r::Type{R} = DEFAULT_POOLED_REF_TYPE) where {T,R<:Integer,N} if length(pool) > typemax(R) throw(ArgumentError("Cannot construct a PooledDataVector with type $R with a pool of size $(length(pool))")) @@ -144,7 +144,7 @@ function PooledDataArray(d::AbstractArray{T, N}, PooledDataArray(d, pool, m, r) end -# Construct an all-NA PooledDataVector of a specific type +# Construct an all-null PooledDataVector of a specific type PooledDataArray(t::Type, dims::Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims)) PooledDataArray(t::Type, dims::Int...) = PooledDataArray(Array{t}(dims), trues(dims)) PooledDataArray(t::Type, r::Type{R}, dims::Tuple{Vararg{Int}}) where {R<:Integer} = PooledDataArray(Array{t}(dims), trues(dims), r) @@ -216,7 +216,7 @@ end ############################################################################## ## -## Predicates, including the new isna() +## Predicates, including the new isnull() ## ############################################################################## @@ -228,17 +228,17 @@ function Base.isfinite(pda::PooledDataArray) PooledDataArray(RefArray(copy(pda.refs)), isfinite(pda.pool)) end -Base.broadcast(::typeof(isna), pda::PooledDataArray) = pda.refs .== 0 -isna(pda::PooledDataArray, i::Real) = pda.refs[i] == 0 # -> Bool +Base.broadcast(::typeof(isnull), pda::PooledDataArray) = pda.refs .== 0 +Base.isnull(pda::PooledDataArray, i::Real) = pda.refs[i] == 0 # -> Bool -function Base.any(::typeof(isna), pda::PooledDataArray) +function Base.any(::typeof(isnull), pda::PooledDataArray) for ref in pda.refs ref == 0 && return true end return false end -function Base.all(::typeof(isna), pda::PooledDataArray) +function Base.all(::typeof(isnull), pda::PooledDataArray) for ref in pda.refs ref == 0 || return false end @@ -315,11 +315,11 @@ function Base.unique(pda::PooledDataArray{T}) where T sizehint!(unique_values, nlevels) seen = Set{eltype(pda.refs)}() - firstna = 0 + firstnull = 0 for i in 1:n - if isna(pda, i) - if firstna == 0 - firstna = length(unique_values) + 1 + if isnull(pda, i) + if firstnull == 0 + firstnull = length(unique_values) + 1 end elseif !in(pda.refs[i], seen) push!(seen, pda.refs[i]) @@ -328,25 +328,25 @@ function Base.unique(pda::PooledDataArray{T}) where T continue end - if firstna > 0 && length(unique_values) == nlevels + if firstnull > 0 && length(unique_values) == nlevels break end end - if firstna > 0 + if firstnull > 0 n = length(unique_values) res = DataArray(Vector{T}(n + 1)) i = 0 for val in unique_values i += 1 - if i == firstna + if i == firstnull res.na[i] = true i += 1 end res.data[i] = val end - if firstna == n + 1 + if firstnull == n + 1 res.na[n + 1] = true end @@ -356,7 +356,7 @@ function Base.unique(pda::PooledDataArray{T}) where T end end -levels(pda::PooledDataArray{T}) where {T} = copy(pda.pool) +Nulls.levels(pda::PooledDataArray{T}) where {T} = copy(pda.pool) function PooledDataArray(x::PooledDataArray{S,R,N}, newpool::Vector{S}) where {S,R,N} @@ -372,7 +372,7 @@ function PooledDataArray(x::PooledDataArray{S,R,N}, end myunique(x::AbstractVector) = unique(x) -myunique(x::AbstractDataVector) = unique(dropna(x)) +myunique(x::AbstractDataVector) = unique(Nulls.skip(x)) """ setlevels(x::PooledDataArray, newpool::Union{AbstractVector, Dict}) @@ -424,7 +424,7 @@ function setlevels(x::PooledDataArray{T,R}, newpool::AbstractVector) where {T,R} pool = myunique(newpool) refs = zeros(R, length(x)) tidx::Array{R} = indexin(newpool, pool) - tidx[isna.(newpool)] = 0 + tidx[isnull.(newpool)] = 0 for i in 1:length(refs) if x.refs[i] != 0 refs[i] = tidx[x.refs[i]] @@ -467,13 +467,13 @@ julia> p # has been modified ``` """ function setlevels!(x::PooledDataArray{T,R}, newpool::AbstractVector) where {T,R} - if newpool == myunique(newpool) # no NAs or duplicates + if newpool == myunique(newpool) # no nulls or duplicates x.pool = newpool return x else x.pool = myunique(newpool) tidx::Array{R} = indexin(newpool, x.pool) - tidx[isna.(newpool)] = 0 + tidx[isnull.(newpool)] = 0 for i in 1:length(x.refs) if x.refs[i] != 0 x.refs[i] = tidx[x.refs[i]] @@ -485,7 +485,7 @@ end function setlevels(x::PooledDataArray, d::Dict) newpool = copy(DataArray(x.pool)) - # An NA in `v` is put in the pool; that will cause it to become NA + # An null in `v` is put in the pool; that will cause it to become null for (k,v) in d idx = findin(newpool, [k]) if length(idx) == 1 @@ -505,9 +505,9 @@ function setlevels!(x::PooledDataArray{T,R}, d::Dict{T,T}) where {T,R} x end -function setlevels!(x::PooledDataArray{T,R}, d::Dict{T,Any}) where {T,R} # this version handles NAs in d's values +function setlevels!(x::PooledDataArray{T,R}, d::Dict{T,Any}) where {T,R} # this version handles nulls in d's values newpool = copy(DataArray(x.pool)) - # An NA in `v` is put in the pool; that will cause it to become NA + # An null in `v` is put in the pool; that will cause it to become null for (k,v) in d idx = findin(newpool, [k]) if length(idx) == 1 @@ -551,7 +551,7 @@ end function Base.similar(pda::PooledDataArray{T,R}, S::Type, dims::Dims) where {T,R} - PooledDataArray(RefArray(zeros(R, dims)), extractT(S)[]) + PooledDataArray(RefArray(zeros(R, dims)), Nulls.T(S)[]) end ############################################################################## @@ -592,7 +592,7 @@ function getpoolidx(pda::PooledDataArray{T,R}, val::Any) where {T,R} return pool_idx end -getpoolidx(pda::PooledDataArray{T,R}, val::NAtype) where {T,R} = zero(R) +getpoolidx(pda::PooledDataArray{T,R}, val::Null) where {T,R} = zero(R) ############################################################################## ## @@ -624,13 +624,13 @@ end Replace all occurrences of `from` in `x` with `to`, modifying `x` in place. """ -function replace!(x::PooledDataArray{NAtype}, fromval::NAtype, toval::NAtype) - NA # no-op to deal with warning +function replace!(x::PooledDataArray{Null}, fromval::Null, toval::Null) + null # no-op to deal with warning end -function replace!(x::PooledDataArray, fromval::NAtype, toval::NAtype) - NA # no-op to deal with warning +function replace!(x::PooledDataArray, fromval::Null, toval::Null) + null # no-op to deal with warning end -function replace!(x::PooledDataArray{S}, fromval::T, toval::NAtype) where {S, T} +function replace!(x::PooledDataArray{S}, fromval::T, toval::Null) where {S, T} fromidx = findfirst(x.pool, fromval) if fromidx == 0 throw(ErrorException("can't replace a value not in the pool in a PooledDataVector!")) @@ -638,9 +638,9 @@ function replace!(x::PooledDataArray{S}, fromval::T, toval::NAtype) where {S, T} x.refs[x.refs .== fromidx] = 0 - return NA + return null end -function replace!(x::PooledDataArray{S}, fromval::NAtype, toval::T) where {S, T} +function replace!(x::PooledDataArray{S}, fromval::Null, toval::T) where {S, T} toidx = findfirst(x.pool, toval) # if toval is in the pool, just do the assignment if toidx != 0 @@ -709,7 +709,7 @@ Perm(o::O, v::PooledDataVector) where {O<:Base.Sort.Ordering} = FastPerm(o, v) ############################################################################## ## -## PooledDataVecs: EXPLANATION SHOULD GO HERE +## PooledDataVecs: EXPLAnullTION SHOULD GO HERE ## ############################################################################## @@ -768,12 +768,12 @@ function PooledDataVecs(v1::AbstractArray, # loop through once to fill the poolref dict for i = 1:length(v1) - if !isna(v1[i]) + if !isnull(v1[i]) poolref[v1[i]] = 0 end end for i = 1:length(v2) - if !isna(v2[i]) + if !isnull(v2[i]) poolref[v2[i]] = 0 end end @@ -789,14 +789,14 @@ function PooledDataVecs(v1::AbstractArray, # fill in newrefs zeroval = zero(DEFAULT_POOLED_REF_TYPE) for i = 1:length(v1) - if isna(v1[i]) + if isnull(v1[i]) refs1[i] = zeroval else refs1[i] = poolref[v1[i]] end end for i = 1:length(v2) - if isna(v2[i]) + if isnull(v2[i]) refs2[i] = zeroval else refs2[i] = poolref[v2[i]] @@ -866,7 +866,7 @@ function Base.convert{S, T, R, N}( res = Array{S}(size(pda)) for i in 1:length(pda) if pda.refs[i] == zero(R) - throw(NAException()) + throw(NullException()) else res[i] = pda.pool[pda.refs[i]] end @@ -915,7 +915,8 @@ function Base.convert{T, R, N}(::Type{Array}, pda::PooledDataArray{T, R, N}, rep return convert(Array{T, N}, pda, replacement) end -function dropna(pdv::PooledDataVector{T}) where T +function Base.collect(itr::EachDropNull{<:PooledDataVector{T}}) where T + pdv = itr.da n = length(pdv) res = Array{T}(n) total = 0 diff --git a/src/predicates.jl b/src/predicates.jl deleted file mode 100644 index c339bbc..0000000 --- a/src/predicates.jl +++ /dev/null @@ -1,18 +0,0 @@ -numeric_predicates = [:(Base.iseven), :(Base.ispow2), :(Base.isfinite), :(Base.isinf), :(Base.isodd)] -isdefined(Base, :isprime) && push!(numeric_predicates, :(Base.isprime)) - -type_predicates = (:(Base.isinteger), :(Base.isreal)) - -container_predicates = (:(Base.isempty),) - -for p in numeric_predicates - @eval begin - ($p)(v::NAtype) = NA - end -end - -for p in type_predicates - @eval begin - ($p)(v::NAtype) = NA - end -end diff --git a/src/reduce.jl b/src/reduce.jl index 79dd322..965fa94 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -1,11 +1,11 @@ -## mapreduce implementation that skips NA +## mapreduce implementation that skips null -function skipna_init(f, op, na::BitArray, data::Array, ifirst::Int, ilast::Int) - # Get first non-NA element +function skipnull_init(f, op, na::BitArray, data::Array, ifirst::Int, ilast::Int) + # Get first non-null element ifirst = Base.findnextnot(na, ifirst) @inbounds d1 = data[ifirst] - # Get next non-NA element + # Get next non-null element ifirst = Base.findnextnot(na, ifirst+1) @inbounds d2 = data[ifirst] @@ -13,12 +13,12 @@ function skipna_init(f, op, na::BitArray, data::Array, ifirst::Int, ilast::Int) (op(f(d1), f(d2)), ifirst) end -function mapreduce_seq_impl_skipna(f, op, T, A::DataArray, ifirst::Int, ilast::Int) +function mapreduce_seq_impl_skipnull(f, op, T, A::DataArray, ifirst::Int, ilast::Int) data = A.data na = A.na chunks = na.chunks - v, i = skipna_init(f, op, na, data, ifirst, ilast) + v, i = skipnull_init(f, op, na, data, ifirst, ilast) while i < ilast i += 1 @@ -31,18 +31,18 @@ function mapreduce_seq_impl_skipna(f, op, T, A::DataArray, ifirst::Int, ilast::I end # Pairwise map-reduce -function mapreduce_pairwise_impl_skipna(f, op, A::DataArray{T}, bytefirst::Int, bytelast::Int, n_notna::Int, blksize::Int) where T +function mapreduce_pairwise_impl_skipnull(f, op, A::DataArray{T}, bytefirst::Int, bytelast::Int, n_notna::Int, blksize::Int) where T if n_notna <= blksize ifirst = 64*(bytefirst-1)+1 ilast = min(64*bytelast, length(A)) - # Fall back to Base implementation if no NAs in block + # Fall back to Base implementation if no nulls in block return ilast - ifirst + 1 == n_notna ? Base.mapreduce_seq_impl(f, op, A.data, ifirst, ilast) : - mapreduce_seq_impl_skipna(f, op, T, A, ifirst, ilast) + mapreduce_seq_impl_skipnull(f, op, T, A, ifirst, ilast) end # Find byte in the middle of range # The block size is restricted so that there will always be at - # least two non-NA elements in the returned range + # least two non-null elements in the returned range chunks = A.na.chunks nmid = 0 imid = bytefirst-1 @@ -51,8 +51,8 @@ function mapreduce_pairwise_impl_skipna(f, op, A::DataArray{T}, bytefirst::Int, @inbounds nmid += count_zeros(chunks[imid]) end - v1 = mapreduce_pairwise_impl_skipna(f, op, A, bytefirst, imid, nmid, blksize) - v2 = mapreduce_pairwise_impl_skipna(f, op, A, imid+1, bytelast, n_notna-nmid, blksize) + v1 = mapreduce_pairwise_impl_skipnull(f, op, A, bytefirst, imid, nmid, blksize) + v2 = mapreduce_pairwise_impl_skipnull(f, op, A, imid+1, bytelast, n_notna-nmid, blksize) op(v1, v2) end @@ -62,16 +62,16 @@ else const sum_pairwise_blocksize = Base.sum_pairwise_blocksize end -mapreduce_impl_skipna(f, op, A::DataArray{T}) where {T} = - mapreduce_seq_impl_skipna(f, op, T, A, 1, length(A.data)) -mapreduce_impl_skipna(f, op::typeof(+), A::DataArray) = - mapreduce_pairwise_impl_skipna(f, op, A, 1, length(A.na.chunks), - length(A.na)-countnz(A.na), - max(128, sum_pairwise_blocksize(f))) +mapreduce_impl_skipnull(f, op, A::DataArray{T}) where {T} = + mapreduce_seq_impl_skipnull(f, op, T, A, 1, length(A.data)) +mapreduce_impl_skipnull(f, op::typeof(+), A::DataArray) = + mapreduce_pairwise_impl_skipnull(f, op, A, 1, length(A.na.chunks), + length(A.na)-countnz(A.na), + max(128, sum_pairwise_blocksize(f))) ## general mapreduce interface -function _mapreduce_skipna(f, op, A::DataArray{T}) where T +function _mapreduce_skipnull(f, op, A::DataArray{T}) where T n = length(A) na = A.na @@ -80,38 +80,56 @@ function _mapreduce_skipna(f, op, A::DataArray{T}) where T nna == n-1 && return Base.r_promote(op, f(A.data[Base.findnextnot(na, 1)])) nna == 0 && return Base.mapreduce_impl(f, op, A.data, 1, n) - mapreduce_impl_skipna(f, op, A) + mapreduce_impl_skipnull(f, op, A) end # This is only safe when we can guarantee that if a function is passed -# NA, it returns NA. Otherwise we will fall back to the implementation +# null, it returns null. Otherwise we will fall back to the implementation # in Base, which is slow because it's type-unstable, but guarantees the # correct semantics const SafeMapFuns = Union{typeof(identity), typeof(abs), typeof(abs2), typeof(exp), typeof(log), typeof(Base.centralizedabs2fun)} const SafeReduceFuns = Union{typeof(+), typeof(*), typeof(max), typeof(min)} function Base._mapreduce(f::SafeMapFuns, op::SafeReduceFuns, A::DataArray) - any(A.na) && return NA + any(A.na) && return null Base._mapreduce(f, op, A.data) end -function Base.mapreduce(f, op::Function, A::DataArray; skipna::Bool=false) - (op === +) ? (skipna ? _mapreduce_skipna(f, +, A) : Base._mapreduce(f, +, A)) : - (op === *) ? (skipna ? _mapreduce_skipna(f, *, A) : Base._mapreduce(f, *, A)) : - (op === &) ? (skipna ? _mapreduce_skipna(f, &, A) : Base._mapreduce(f, &, A)) : - (op === |) ? (skipna ? _mapreduce_skipna(f, |, A) : Base._mapreduce(f, |, A)) : - skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) +function Base.mapreduce(f, op::Function, A::DataArray; + skipnull::Bool=false, skipna::Bool=false) + if skipna && !skipnull + Base.depwarn("skipna=$skipna is deprecated, use skipnull=$skipna instead", :mapreduce) + skipnull = true + end + (op === +) ? (skipnull ? _mapreduce_skipnull(f, +, A) : Base._mapreduce(f, +, A)) : + (op === *) ? (skipnull ? _mapreduce_skipnull(f, *, A) : Base._mapreduce(f, *, A)) : + (op === &) ? (skipnull ? _mapreduce_skipnull(f, &, A) : Base._mapreduce(f, &, A)) : + (op === |) ? (skipnull ? _mapreduce_skipnull(f, |, A) : Base._mapreduce(f, |, A)) : + skipnull ? _mapreduce_skipnull(f, op, A) : Base._mapreduce(f, op, A) end # To silence deprecations, but could be more efficient -Base.mapreduce(f, op::Union{typeof(|), typeof(&)}, A::DataArray; skipna::Bool=false) = - skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) +function Base.mapreduce(f, op::Union{typeof(|), typeof(&)}, A::DataArray; + skipnull::Bool=false, skipna::Bool=false) + if skipna && !skipnull + Base.depwarn("skipna=$skipna is deprecated, use skipnull=$skipna instead", :mapreduce) + skipnull = true + end + skipnull ? _mapreduce_skipnull(f, op, A) : Base._mapreduce(f, op, A) +end -Base.mapreduce(f, op, A::DataArray; skipna::Bool=false) = - skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) +function Base.mapreduce(f, op, A::DataArray; + skipnull::Bool=false, skipna::Bool=false) + if skipna && !skipnull + Base.depwarn("skipna=$skipna is deprecated, use skipnull=$skipna instead", :mapreduce) + skipnull = true + end + skipnull ? _mapreduce_skipnull(f, op, A) : Base._mapreduce(f, op, A) +end -Base.reduce(op, A::DataArray; skipna::Bool=false) = - mapreduce(identity, op, A; skipna=skipna) +Base.reduce(op, A::DataArray; + skipnull::Bool=false, skipna::Bool=false) = + mapreduce(identity, op, A; skipnull=skipnull, skipna=skipna) ## usual reductions @@ -120,27 +138,34 @@ for (fn, op) in ((:(Base.sum), +), (:(Base.minimum), min), (:(Base.maximum), max)) @eval begin - $fn(f::Union{Function,$(supertype(typeof(abs)))}, a::DataArray; skipna::Bool=false) = - mapreduce(f, $op, a; skipna=skipna) - $fn(a::DataArray; skipna::Bool=false) = - mapreduce(identity, $op, a; skipna=skipna) + $fn(f::Union{Function,$(supertype(typeof(abs)))}, a::DataArray; + skipnull::Bool=false, skipna::Bool=false) = + mapreduce(f, $op, a; skipnull=skipnull, skipna=skipna) + $fn(a::DataArray; skipnull::Bool=false, skipna::Bool=false) = + mapreduce(identity, $op, a; skipnull=skipnull, skipna=skipna) end end for (fn, f, op) in ((:(Base.sumabs), abs, +), (:(Base.sumabs2), abs2, +)) - @eval $fn(a::DataArray; skipna::Bool=false) = mapreduce($f, $op, a; skipna=skipna) + @eval $fn(a::DataArray; skipnull::Bool=false, skipna::Bool=false) = + mapreduce($f, $op, a; skipnull=skipnull, skipna=skipna) end ## mean -Base.mean(a::DataArray; skipna::Bool=false) = - sum(a; skipna=skipna) / (length(a.na)-(skipna ? countnz(a.na) : 0)) +Base.mean(a::DataArray; skipnull::Bool=false, skipna::Bool=false) = + sum(a; skipnull=skipnull, skipna=skipna) / (length(a.na)-(skipna || skipnull ? countnz(a.na) : 0)) ## variance -function Base.varm(A::DataArray{T}, m::Number; corrected::Bool=true, skipna::Bool=false) where T - if skipna +function Base.varm(A::DataArray{T}, m::Number; + corrected::Bool=true, skipnull::Bool=false, skipna::Bool=false) where T + if skipna || skipnull + if skipna && !skipnull + Base.depwarn("skipna=$skipna is deprecated, use skipnull=$skipna instead", :mapreduce) + end + n = length(A) na = A.na @@ -150,45 +175,51 @@ function Base.varm(A::DataArray{T}, m::Number; corrected::Bool=true, skipna::Boo abs2(A.data[Base.findnextnot(na, 1)] - m)/(1 - corrected)) /(nna == 0 ? Base.centralize_sumabs2(A.data, m, 1, n) : - mapreduce_impl_skipna(Base.centralizedabs2fun(m), +, A), + mapreduce_impl_skipnull(Base.centralizedabs2fun(m), +, A), n - nna - corrected) else - any(A.na) && return NA + any(A.na) && return null Base.varm(A.data, m; corrected=corrected) end end -Base.varm(A::DataArray{T}, m::NAtype; corrected::Bool=true, skipna::Bool=false) where {T} = NA - -function Base.var(A::DataArray; corrected::Bool=true, mean=nothing, skipna::Bool=false) - mean == 0 ? Base.varm(A, 0; corrected=corrected, skipna=skipna) : - mean == nothing ? varm(A, Base.mean(A; skipna=skipna); corrected=corrected, skipna=skipna) : - isa(mean, Data{Number}) ? - varm(A, mean; corrected=corrected, skipna=skipna) : +Base.varm(A::DataArray{T}, m::Null; + corrected::Bool=true, skipnull::Bool=false, skipna::Bool=false) where {T} = null + +function Base.var(A::DataArray; + corrected::Bool=true, mean=nothing, skipnull::Bool=false, skipna::Bool=false) + mean == 0 ? Base.varm(A, 0; corrected=corrected, skipnull=skipnull, skipna=skipna) : + mean == nothing ? varm(A, Base.mean(A; skipnull=skipnull, skipna=skipna); + corrected=corrected, skipnull=skipnull, skipna=skipna) : + isa(mean, Union{Number,Null}) ? + varm(A, mean; corrected=corrected, skipnull=skipnull, skipna=skipna) : throw(ErrorException("Invalid value of mean.")) end -Base.stdm(A::DataArray, m::Number; corrected::Bool=true, skipna::Bool=false) = - sqrt(varm(A, m; corrected=corrected, skipna=skipna)) +Base.stdm(A::DataArray, m::Number; + corrected::Bool=true, skipnull::Bool=false, skipna::Bool=false) = + sqrt(varm(A, m; corrected=corrected, skipnull=skipnull, skipna=skipna)) -Base.std(A::DataArray; corrected::Bool=true, mean=nothing, skipna::Bool=false) = - sqrt(var(A; corrected=corrected, mean=mean, skipna=skipna)) +Base.std(A::DataArray; + corrected::Bool=true, mean=nothing, skipnull::Bool=false, skipna::Bool=false) = + sqrt(var(A; corrected=corrected, mean=mean, skipnull=skipnull, skipna=skipna)) ## weighted mean -function Base.mean(a::DataArray, w::Weights; skipna::Bool=false) - if skipna +function Base.mean(a::DataArray, w::Weights; skipnull::Bool=false, skipna::Bool=false) + if skipna || skipnull v = a .* w.values - sum(v; skipna=true) / sum(DataArray(w.values, v.na); skipna=true) + sum(v; skipnull=true) / sum(DataArray(w.values, v.na); skipnull=true) else - any(isna, a) ? NA : mean(a.data, w) + any(isnull, a) ? null : mean(a.data, w) end end -function Base.mean(a::DataArray, w::Weights{W,V}; skipna::Bool=false) where {W,V<:DataArray} - if skipna +function Base.mean(a::DataArray, w::Weights{W,V}; + skipnull::Bool=false, skipna::Bool=false) where {W,V<:DataArray} + if skipna || skipnull v = a .* w.values - sum(v; skipna=true) / sum(DataArray(w.values.data, v.na); skipna=true) + sum(v; skipnull=true) / sum(DataArray(w.values.data, v.na); skipnull=true) else - any(isna, a) || any(isna, w.values) ? NA : wsum(a.data, w.values.data) / w.sum + any(isnull, a) || any(isnull, w.values) ? null : wsum(a.data, w.values.data) / w.sum end end diff --git a/src/reducedim.jl b/src/reducedim.jl index f7bdcd6..ae0cec4 100644 --- a/src/reducedim.jl +++ b/src/reducedim.jl @@ -3,7 +3,7 @@ using Base.check_reducedims # Determine if there are any true values in a BitArray in a given -# range. We use this for reductions with skipna=false along the first +# range. We use this for reductions with skipnull=false along the first # dimension. function _any(B::BitArray, istart::Int, iend::Int) chunks = B.chunks @@ -20,7 +20,7 @@ function _any(B::BitArray, istart::Int, iend::Int) end # Counts the number of ones in a given range. We use this for counting -# the values for mean and var with skipna=false along the first +# the values for mean and var with skipnull=false along the first # dimension. function _count(B::BitArray, istart::Int, iend::Int) chunks = B.chunks @@ -37,7 +37,7 @@ function _count(B::BitArray, istart::Int, iend::Int) return n end -## NA-preserving +## null-preserving @generated function _mapreducedim!(f::SafeMapFuns, op::SafeReduceFuns, R::DataArray, A::DataArray{T,N} where {T}) where {N} quote @@ -54,7 +54,7 @@ end extr = daextract(R) for i = 1:nslices if _any(na, ibase+1, ibase+lsiz) - unsafe_setna!(R, extr, i) + unsafe_setnull!(R, extr, i) else v = Base.mapreduce_impl(f, op, data, ibase+1, ibase+lsiz) @inbounds unsafe_dasetindex!(R, extr, v, i) @@ -67,8 +67,8 @@ end new_data = R.data - # If reducing to a DataArray, skip strides with NAs. - # In my benchmarks, it is actually faster to compute a new NA + # If reducing to a DataArray, skip strides with nulls. + # In my benchmarks, it is actually faster to compute a new null # array and BitArray it than to operate on the BitArray # directly. new_na = fill(false, size(new_data)) @@ -100,7 +100,7 @@ end end end -## NA-preserving to array +## null-preserving to array @generated function _mapreducedim!(f::SafeMapFuns, op::SafeReduceFuns, R::AbstractArray, A::DataArray{T,N} where {T}) where {N} quote @@ -117,7 +117,7 @@ end extr = daextract(R) for i = 1:nslices if _any(na, ibase+1, ibase+lsiz) - throw(NAException("cannot reduce a DataArray containing NAs to an AbstractArray")) + throw(NullException()) else v = Base.mapreduce_impl(f, op, data, ibase+1, ibase+lsiz) @inbounds unsafe_dasetindex!(R, extr, v, i) @@ -127,8 +127,8 @@ end else @nextract $N sizeR d->size(R,d) - # If reducing to a non-DataArray, throw an error at the start on NA - any(isna, A) && throw(NAException("cannot reduce a DataArray containing NAs to an AbstractArray")) + # If reducing to a non-DataArray, throw an error at the start on null + any(isnull, A) && throw(NullException()) @nloops $N i data d->(j_d = sizeR_d==1 ? 1 : i_d) begin @inbounds x = (@nref $N data i) v = f(x) @@ -142,15 +142,15 @@ end end _mapreducedim!(f, op, R, A) = Base._mapreducedim!(f, op, R, A) -## NA-skipping +## null-skipping _getdata(A) = A _getdata(A::DataArray) = A.data # mapreduce across a dimension. If specified, C contains the number of -# non-NA values reduced into each element of R. -@generated function _mapreducedim_skipna_impl!(f, op, R::AbstractArray, - C::Union{Array{Int}, Void}, - A::DataArray{T,N} where {T}) where {N} +# non-null values reduced into each element of R. +@generated function _mapreducedim_skipnull_impl!(f, op, R::AbstractArray, + C::Union{Array{Int}, Void}, + A::DataArray{T,N} where {T}) where {N} quote data = A.data @@ -207,19 +207,19 @@ _getdata(A::DataArray) = A.data end end -_mapreducedim_skipna!(f, op, R::AbstractArray, A::DataArray) = - _mapreducedim_skipna_impl!(f, op, R, nothing, A) +_mapreducedim_skipnull!(f, op, R::AbstractArray, A::DataArray) = + _mapreducedim_skipnull_impl!(f, op, R, nothing, A) -# for MinFun/MaxFun, min or max is NA if all values along a dimension are NA -function _mapreducedim_skipna!(f, op::Union{typeof(min), typeof(max)}, R::DataArray, A::DataArray) +# for MinFun/MaxFun, min or max is null if all values along a dimension are null +function _mapreducedim_skipnull!(f, op::Union{typeof(min), typeof(max)}, R::DataArray, A::DataArray) R.na = BitArray(all!(fill(true, size(R)), A.na)) - _mapreducedim_skipna_impl!(f, op, R, nothing, A) + _mapreducedim_skipnull_impl!(f, op, R, nothing, A) end -function _mapreducedim_skipna!(f, op::Union{typeof(min), typeof(max)}, R::AbstractArray, A::DataArray) +function _mapreducedim_skipnull!(f, op::Union{typeof(min), typeof(max)}, R::AbstractArray, A::DataArray) if any(all!(fill(true, size(R)), A.na)) - throw(NAException("all values along specified dimension are NA for one element of reduced dimension; cannot reduce to non-DataArray")) + throw(NullException()) end - _mapreducedim_skipna_impl!(f, op, R, nothing, A) + _mapreducedim_skipnull_impl!(f, op, R, nothing, A) end ## general reducedim interface @@ -245,23 +245,39 @@ function Base.reducedim_initarray0(A::DataArray, region, v0, ::Type{R}) where R DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) end -function Base.mapreducedim!(f::Function, op, R::AbstractArray, A::DataArray; skipna::Bool=false) - skipna ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) +function Base.mapreducedim!(f::Function, op, R::AbstractArray, A::DataArray; + skipnull::Bool=false, skipna::Bool=false) + if skipna && !skipnull + Base.depwarn("skipna=$skipna is deprecated, use skipnull=$skipna instead", :mapreduce) + skipnull = true + end + skipnull ? _mapreducedim_skipnull!(f, op, R, A) : _mapreducedim!(f, op, R, A) +end +function Base.mapreducedim!(f, op, R::AbstractArray, A::DataArray; + skipnull::Bool=false, skipna::Bool=false) + if skipn && !skipnull + Base.depwarn("skipna=$skipna is deprecated, use skipnull=$skipna instead", :mapreduce) + skipnull = true + end + skipnull ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) end -Base.mapreducedim!(f, op, R::AbstractArray, A::DataArray; skipna::Bool=false) = - skipna ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) -Base.reducedim!(op, R::DataArray{RT}, A::AbstractArray; skipna::Bool=false) where {RT} = +Base.reducedim!(op, R::DataArray{RT}, A::AbstractArray; + skipnull::Bool=false, skipna::Bool=false) where {RT} = Base.mapreducedim!(identity, op, R, A, zero(RT); skipna=skipna) -Base.mapreducedim(f, op, A::DataArray, region, v0; skipna::Bool=false) = - Base.mapreducedim!(f, op, Base.reducedim_initarray(A, region, v0), A; skipna=skipna) -Base.mapreducedim(f, op, A::DataArray{T}, region; skipna::Bool=false) where {T} = - Base.mapreducedim!(f, op, Base.reducedim_init(f, op, A, region), A; skipna=skipna) +Base.mapreducedim(f, op, A::DataArray, region, v0; + skipnull::Bool=false, skipna::Bool=false) = + Base.mapreducedim!(f, op, Base.reducedim_initarray(A, region, v0), A; + skipnull=skipnull, skipna=skipna) +Base.mapreducedim(f, op, A::DataArray{T}, region; + skipnull::Bool=false, skipna::Bool=false) where {T} = + Base.mapreducedim!(f, op, Base.reducedim_init(f, op, A, region), A; + skipnull=skipnull, skipna=skipna) Base.reducedim(op, A::DataArray, region, v0; skipna::Bool=false) = - Base.mapreducedim(identity, op, A, region, v0; skipna=skipna) + Base.mapreducedim(identity, op, A, region, v0; skipnull=skipnull, skipna=skipna) Base.reducedim(op, A::DataArray, region; skipna::Bool=false) = - Base.mapreducedim(identity, op, A, region; skipna=skipna) + Base.mapreducedim(identity, op, A, region; skipnull=skipnull, skipna=skipna) ## usual reductions @@ -272,15 +288,18 @@ for (basfn, Op) in [(:sum, +), (:prod, *), fname! = Expr(:., :Base, Base.Meta.quot(Symbol(string(basfn, '!')))) @eval begin $(fname!)(f::Union{Function,$(supertype(typeof(abs)))}, r::AbstractArray, A::DataArray; - init::Bool=true, skipna::Bool=false) = - Base.mapreducedim!(f, $(Op), Base.initarray!(r, $(Op), init), A; skipna=skipna) - $(fname!)(r::AbstractArray, A::DataArray; init::Bool=true, skipna::Bool=false) = - $(fname!)(identity, r, A; init=init, skipna=skipna) - - $(fname)(f::Union{Function,$(supertype(typeof(abs)))}, A::DataArray, region; skipna::Bool=false) = - Base.mapreducedim(f, $(Op), A, region; skipna=skipna) - $(fname)(A::DataArray, region; skipna::Bool=false) = - $(fname)(identity, A, region; skipna=skipna) + init::Bool=true, skipnull::Bool=false, skipna::Bool=false) = + Base.mapreducedim!(f, $(Op), Base.initarray!(r, $(Op), init), A; + skipnull=skipnull, skipna=skipna) + $(fname!)(r::AbstractArray, A::DataArray; + init::Bool=true, skipnull::Bool=false, skipna::Bool=false) = + $(fname!)(identity, r, A; init=init, skipnull=skipnull, skipna=skipna) + + $(fname)(f::Union{Function,$(supertype(typeof(abs)))}, A::DataArray, region; + skipnull::Bool=false, skipna::Bool=false) = + Base.mapreducedim(f, $(Op), A, region; skipnull=skipnull, skipna=skipna) + $(fname)(A::DataArray, region; skipnull::Bool=false, skipna::Bool=false) = + $(fname)(identity, A, region; skipnull=skipnull, skipna=skipna) end end @@ -292,32 +311,33 @@ for (basfn, fbase, Fun) in [(:sumabs, :sum, abs), fname! = Expr(:., :Base, Base.Meta.quot(Symbol(string(basfn, '!')))) fbase! = Expr(:., :Base, Base.Meta.quot(Symbol(string(fbase, '!')))) @eval begin - $(fname!)(r::AbstractArray, A::DataArray; init::Bool=true, skipna::Bool=false) = - $(fbase!)($(Fun), r, A; init=init, skipna=skipna) - $(fname)(A::DataArray, region; skipna::Bool=false) = - $(fbase)($(Fun), A, region; skipna=skipna) + $(fname!)(r::AbstractArray, A::DataArray; + init::Bool=true, skipnull::Bool=false, skipna::Bool=false) = + $(fbase!)($(Fun), r, A; init=init, skipnull=skipnull, skipna=skipna) + $(fname)(A::DataArray, region; skipnull::Bool=false, skipna::Bool=false) = + $(fbase)($(Fun), A, region; skipnull=skipnull, skipna=skipna) end end ## mean -function Base.mean!(R::AbstractArray{T}, A::DataArray; skipna::Bool=false, - init::Bool=true) where {T} +function Base.mean!(R::AbstractArray{T}, A::DataArray; + skipnull::Bool=false, skipna::Bool=false, init::Bool=true) where {T} init && fill!(R, 0) - if skipna + if skipna || skipnull C = Array{Int}(size(R)) - _mapreducedim_skipna_impl!(identity, +, R, C, A) + _mapreducedim_skipnull_impl!(identity, +, R, C, A) broadcast!(/, R, R, C) else - sum!(R, A; skipna=false) + sum!(R, A; skipnull=false) broadcast!(/, R, R, convert(T, length(A)/length(R))) R end end -Base.mean(A::DataArray{T}, region; skipna::Bool=false) where {T} = - mean!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A; skipna=skipna, - init=false) +Base.mean(A::DataArray{T}, region; skipnull::Bool=false, skipna::Bool=false) where {T} = + mean!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A; + skipnull=skipnull, skipna=skipna, init=false) ## var @@ -347,10 +367,10 @@ end ibase = 0 extr = daextract(R) for i = 1:nslices - if unsafe_isna(S, Sextr, i) || _any(na, ibase+1, ibase+lsiz) - unsafe_setna!(R, extr, i) + if unsafe_isnull(S, Sextr, i) || _any(na, ibase+1, ibase+lsiz) + unsafe_setnull!(R, extr, i) else - @inbounds s = unsafe_getindex_notna(S, Sextr, i) + @inbounds s = unsafe_getindex_notnull(S, Sextr, i) v = Base.mapreduce_impl(MapReduceDim2ArgHelperFun(f, s), op, data, ibase+1, ibase+lsiz) @inbounds unsafe_dasetindex!(R, extr, v, i) end @@ -372,7 +392,7 @@ end if vna @inbounds new_na[state_0] = true else - @inbounds s = unsafe_getindex_notna(S, Sextr, state_0) + @inbounds s = unsafe_getindex_notnull(S, Sextr, state_0) @inbounds x = data[k] v = f(x, s) @inbounds v0 = new_data[state_0] @@ -390,11 +410,11 @@ end end end -# A version of _mapreducedim_skipna! that accepts an array S of the same size +# A version of _mapreducedim_skipnull! that accepts an array S of the same size # as R, the elements of which are passed as a second argument to f. -@generated function _mapreducedim_skipna_2arg!(f, op, R::AbstractArray, - C::Union{Array{Int}, Void}, - A::DataArray{T,N} where {T}, S::AbstractArray) where {N} +@generated function _mapreducedim_skipnull_2arg!(f, op, R::AbstractArray, + C::Union{Array{Int}, Void}, + A::DataArray{T,N} where {T}, S::AbstractArray) where {N} quote data = A.data na = A.na @@ -409,7 +429,7 @@ end @nextract $N sizeR d->size(new_data,d) sizA1 = size(data, 1) - # If there are any NAs in S, assume these will produce NAs in R + # If there are any nulls in S, assume these will produce nulls in R if isa(S, DataArray) copy!(R.na, S.na) end @@ -422,10 +442,10 @@ end @inbounds v = new_data[i] !isa(C, Void) && (C[i] = lsiz - _count(na, ibase+1, ibase+lsiz)) - # If S[i] is NA, skip this iteration - @inbounds sna = unsafe_isna(S, Sextr, i) + # If S[i] is null, skip this iteration + @inbounds sna = unsafe_isnull(S, Sextr, i) if !sna - @inbounds s = unsafe_getindex_notna(S, Sextr, i) + @inbounds s = unsafe_getindex_notnull(S, Sextr, i) # TODO: use pairwise impl for sum for k = ibase+1:ibase+lsiz @inbounds Base.unsafe_bitgetindex(na_chunks, k) && continue @@ -447,11 +467,11 @@ end @nloops($N, i, A, d->(state_{d-1} = state_d), d->(skip_d || (state_d = state_0)), begin - @inbounds xna = Base.unsafe_bitgetindex(na_chunks, k) | unsafe_isna(S, Sextr, state_0) + @inbounds xna = Base.unsafe_bitgetindex(na_chunks, k) | unsafe_isnull(S, Sextr, state_0) if xna !isa(C, Void) && @inbounds C[state_0] -= 1 else - @inbounds s = unsafe_getindex_notna(S, Sextr, state_0) + @inbounds s = unsafe_getindex_notnull(S, Sextr, state_0) @inbounds x = data[k] v = f(x, s) @inbounds v0 = new_data[state_0] @@ -471,18 +491,22 @@ struct Abs2MinusFun end (::Abs2MinusFun)(x, m) = abs2(x - m) function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected::Bool=true, - skipna::Bool=false, init::Bool=true) + skipnull::Bool=false, skipna::Bool=false, init::Bool=true) + if skipna && !skipnull + Base.depwarn("skipna=$skipna is deprecated, use skipnull=$skipna instead", :mapreduce) + skipnull = true + end if isempty(A) fill!(R, convert(eltype(R), NaN)) else init && fill!(R, zero(eltype(R))) - if skipna + if skipnull C = Array{Int}(size(R)) # Compute R = abs2(A-m) - _mapreducedim_skipna_2arg!(Abs2MinusFun(), +, R, C, A, m) + _mapreducedim_skipnull_2arg!(Abs2MinusFun(), +, R, C, A, m) - # Divide by number of non-NA values + # Divide by number of non-null values if corrected for i = 1:length(C) @inbounds C[i] = max(C[i] - 1, 0) @@ -500,27 +524,29 @@ function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected: end Base.varm(A::DataArray{T}, m::AbstractArray, region; corrected::Bool=true, - skipna::Bool=false) where {T} = + skipnull::Bool=false, skipna::Bool=false) where {T} = Base.varm!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A, m; - corrected=corrected, skipna=skipna, init=false) + corrected=corrected, skipnull=skipnull, skipna=skipna, init=false) function Base.var(A::DataArray{T}, region::Union{Integer, AbstractArray, Tuple}; - corrected::Bool=true, mean=nothing, skipna::Bool=false) where T + corrected::Bool=true, mean=nothing, + skipnull::Bool=false, skipna::Bool=false) where T if mean == 0 Base.varm(A, Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), region; - corrected=corrected, skipna=skipna) + corrected=corrected, skipnull=skipnull, skipna=skipna) elseif mean == nothing - if skipna + if skipna || skipnull # Can reduce mean into ordinary array m = zeros(Base.momenttype(T), length.(Base.reduced_indices(A, region))) - Base.varm(A, Base.mean!(m, A; skipna=skipna), region; - corrected=corrected, skipna=skipna) + Base.varm(A, Base.mean!(m, A; skipnull=skipnull, skipna=skipna), region; + corrected=corrected, skipnull=skipnull, skipna=skipna) else - Base.varm(A, Base.mean(A, region; skipna=skipna), region; - corrected=corrected, skipna=skipna) + Base.varm(A, Base.mean(A, region; skipnull=skipnull, skipna=skipna), region; + corrected=corrected, skipnull=skipnull, skipna=skipna) end elseif isa(mean, AbstractArray) - Base.varm(A, mean::AbstractArray, region; corrected=corrected, skipna=skipna) + Base.varm(A, mean::AbstractArray, region; + corrected=corrected, skipnull=skipnull, skipna=skipna) else throw(ErrorException("invalid value of mean")) end diff --git a/src/sort.jl b/src/sort.jl index a7795c3..e8b2494 100644 --- a/src/sort.jl +++ b/src/sort.jl @@ -1,11 +1,11 @@ # This code is heavily based on the floating point sort code in Base -nas2end!(v::AbstractVector, o::Base.Sort.ForwardOrdering) = nas2right!(v,o) -nas2end!(v::AbstractVector, o::Base.Sort.ReverseOrdering) = nas2left!(v,o) -nas2end!(v::AbstractVector{Int}, o::Base.Order.Perm{O}) where {O<:Base.Order.ForwardOrdering} = nas2right!(v,o) -nas2end!(v::AbstractVector{Int}, o::Base.Order.Perm{O}) where {O<:Base.Order.ReverseOrdering} = nas2left!(v,o) +nulls2end!(v::AbstractVector, o::Base.Sort.ForwardOrdering) = nulls2right!(v,o) +nulls2end!(v::AbstractVector, o::Base.Sort.ReverseOrdering) = nas2left!(v,o) +nulls2end!(v::AbstractVector{Int}, o::Base.Order.Perm{O}) where {O<:Base.Order.ForwardOrdering} = nulls2right!(v,o) +nulls2end!(v::AbstractVector{Int}, o::Base.Order.Perm{O}) where {O<:Base.Order.ReverseOrdering} = nas2left!(v,o) -myisna(o::Base.Order.Ordering, chunks, i::Int) = Base.unsafe_bitgetindex(chunks, i) +myisnull(o::Base.Order.Ordering, chunks, i::Int) = Base.unsafe_bitgetindex(chunks, i) swap(o::Base.Order.DirectOrdering, data, i, j) = setindex!(data, data[i], j) function swap(o::Base.Order.Perm, data, i, j) @@ -19,12 +19,12 @@ function nas2left!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Orde data, chunks = datachunks(o, v) i = lo - @inbounds while i <= hi && myisna(o, chunks, i) + @inbounds while i <= hi && myisnull(o, chunks, i) i += 1 end j = i + 1 @inbounds while j <= hi - if myisna(o, chunks, j) + if myisnull(o, chunks, j) swap(o, data, i, j) i += 1 end @@ -37,16 +37,16 @@ function nas2left!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Orde return i, hi end -function nas2right!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) +function nulls2right!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) data, chunks = datachunks(o, v) i = hi - @inbounds while lo <= i && myisna(o, chunks, i) + @inbounds while lo <= i && myisnull(o, chunks, i) i -= 1 end j = i - 1 @inbounds while lo <= j - if myisna(o, chunks, j) + if myisnull(o, chunks, j) swap(o, data, i, j) i -= 1 end @@ -60,13 +60,13 @@ function nas2right!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Ord end function dasort!(v::DataVector, a::Base.Sort.Algorithm, o::Base.Order.DirectOrdering) - lo, hi = nas2end!(v, o) + lo, hi = nulls2end!(v, o) sort!(v.data, lo, hi, a, o) v end function dapermsort!(v::AbstractVector{Int}, a::Base.Sort.Algorithm, o::Base.Order.Perm{O,T}) where {O<:Base.Order.DirectOrdering,T<:DataVector} - lo, hi = nas2end!(v, o) + lo, hi = nulls2end!(v, o) sort!(v, lo, hi, a, Base.Order.Perm(o.order, o.data.data)) end diff --git a/src/statistics.jl b/src/statistics.jl index 7236f69..1dc6aab 100644 --- a/src/statistics.jl +++ b/src/statistics.jl @@ -39,10 +39,10 @@ gl(n::Integer, k::Integer) = gl(n, k, n*k) StatsBase.describe(X::DataVector) = StatsBase.describe(STDOUT, X) function StatsBase.describe(io::IO, X::AbstractDataVector{T}) where T<:Real - nacount = sum(isna, X) + nacount = sum(isnull, X) pna = 100nacount/length(X) - if pna != 100 # describe will fail if dropna returns an empty vector - describe(io, dropna(X)) + if pna != 100 # describe will fail if Nulls.skip returns an empty vector + describe(io, collect(Nulls.skip(X))) else println(io, "Summary Stats:") println(io, "Type: $(T)") @@ -53,11 +53,11 @@ function StatsBase.describe(io::IO, X::AbstractDataVector{T}) where T<:Real end function StatsBase.describe(io::IO, X::AbstractDataVector) - nacount = sum(isna, X) + nacount = sum(isnull, X) pna = 100nacount/length(X) println(io, "Summary Stats:") println(io, "Length: $(length(X))") - println(io, "Type: $(extractT(eltype(X)))") + println(io, "Type: $(Nulls.T(eltype(X)))") println(io, "Number Unique: $(length(unique(X)))") println(io, "Number Missing: $(nacount)") @printf(io, "%% Missing: %.6f\n", pna) diff --git a/src/utils.jl b/src/utils.jl index 29cfbd8..1b80cd1 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,8 +1,8 @@ -# iterate over vals array to find the most generic non-NA type +# iterate over vals array to find the most generic non-null type function typeloop(vals) toptype = None for i = 1:length(vals) - if vals[i] != :NA + if vals[i] != :null toptype = promote_type(toptype, typeof(vals[i])) end end diff --git a/test/abstractarray.jl b/test/abstractarray.jl index 5d49d43..6d5fbad 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -1,18 +1,18 @@ @testset "AbstractArray" begin - unsorted_dv = @data [2, 1, NA] + unsorted_dv = @data [2, 1, null] # TODO: Make this work # tiedrank(dv) @test first(unsorted_dv) == 2 - @test isna(last(unsorted_dv)) + @test isnull(last(unsorted_dv)) - # isna with AbstractArray + # isnull with AbstractArray a = [1, 2, 3] - @test isna.(a) == falses(3) - a = Any[1, 2, NA, 3] - @test isna.(a) == [false, false, true, false] + @test isnull.(a) == falses(3) + a = Any[1, 2, null, 3] + @test isnull.(a) == [false, false, true, false] for i = 1:length(a) - @test isna(a, i) == isna.(a)[i] + @test isnull(a, i) == isnull.(a)[i] end end diff --git a/test/booleans.jl b/test/booleans.jl index 4a8a6ca..ce0d4fc 100644 --- a/test/booleans.jl +++ b/test/booleans.jl @@ -1,29 +1,29 @@ @testset "Booleans" begin - @test NA | true === true - @test isna(NA | false) - @test isna(NA | NA) - @test true | NA === true - @test isna(false | NA) + @test null | true === true + @test isnull(null | false) + @test isnull(null | null) + @test true | null === true + @test isnull(false | null) - @test isna(NA & true) - @test NA & false === false - @test isna(NA & NA) - @test isna(true & NA) - @test false & NA === false + @test isnull(null & true) + @test null & false === false + @test isnull(null & null) + @test isnull(true & null) + @test false & null === false - @test isna(NA ⊻ true) - @test isna(NA ⊻ false) - @test isna(NA ⊻ NA) - @test isna(true ⊻ NA) - @test isna(false ⊻ NA) + @test isnull(null ⊻ true) + @test isnull(null ⊻ false) + @test isnull(null ⊻ null) + @test isnull(true ⊻ null) + @test isnull(false ⊻ null) - @test any((@data [1, 2, NA]) .== 1) === true - @test any((@data [NA, 1, 2]) .== 1) === true - @test isna(any((@data [1, 2, NA]) .== 3)) + @test any((@data [1, 2, null]) .== 1) === true + @test any((@data [null, 1, 2]) .== 1) === true + @test isnull(any((@data [1, 2, null]) .== 3)) @test any((@data [1, 2, 3] ).== 4) === false - @test isna(all((@data [1, 1, NA]) .== 1)) - @test isna(all((@data [NA, 1, 1]) .== 1)) + @test isnull(all((@data [1, 1, null]) .== 1)) + @test isnull(all((@data [null, 1, 1]) .== 1)) @test all((@data [1, 1, 1]) .== 1) === true @test all((@data [1, 2, 1]) .== 1) === false end diff --git a/test/broadcast.jl b/test/broadcast.jl index 61e89ff..975e078 100644 --- a/test/broadcast.jl +++ b/test/broadcast.jl @@ -26,11 +26,11 @@ @test broadcast(+, arr([1, 0]), arr([1, 4])) == [2, 4] @test broadcast(+, arr([1, 0]), 2) == [3, 2] - @test isequal(broadcast(+, arr(eye(2)), arr(@data [NA, 4])), @data [NA NA; 4 5]) - @test isequal(broadcast(+, arr(eye(2)), arr(@data [NA 4])), @data [NA 4; NA 5]) - @test isequal(broadcast(+, arr(@data [1 NA]), arr([1, 4])), @data [2 NA; 5 NA]) - @test isequal(broadcast(+, arr(@data [1, NA]), arr([1 4])), @data [2 5; NA NA]) - @test isequal(broadcast(+, arr(@data [1, NA]), arr([1, 4])), @data [2, NA]) + @test isequal(broadcast(+, arr(eye(2)), arr(@data [null, 4])), @data [null null; 4 5]) + @test isequal(broadcast(+, arr(eye(2)), arr(@data [null 4])), @data [null 4; null 5]) + @test isequal(broadcast(+, arr(@data [1 null]), arr([1, 4])), @data [2 null; 5 null]) + @test isequal(broadcast(+, arr(@data [1, null]), arr([1 4])), @data [2 5; null null]) + @test isequal(broadcast(+, arr(@data [1, null]), arr([1, 4])), @data [2, null]) @test @inferred(arr(eye(2)) .+ arr([1, 4])) == arr([2 1; 4 5]) @test arr(eye(2)) .+ arr([1 4]) == arr([2 4; 1 5]) @@ -114,26 +114,26 @@ # Test String broadcast @test broadcast(==, @data(["a", "b", "c", "d"]), "a") == @data([true,false,false,false]) - # Test broadcasting of functions that do something besides propagate NA - @test isequal(broadcast(isequal, @data([NA, 1]), @data([NA 1])), @data([true false; false true])) - @test isequal(broadcast(isequal, @pdata([NA, 1]), @data([NA 1])), @data([true false; false true])) - @test isequal(broadcast(isequal, @data([NA, 1]), @pdata([NA 1])), @data([true false; false true])) - @test isequal(broadcast(isequal, @pdata([NA, 1]), @pdata([NA 1])), @pdata([true false; false true])) - @test isequal(broadcast(&, @data([NA, false]), @data([NA true false])), @data([NA NA false; false false false])) - @test isequal(broadcast(|, @data([NA, false]), @data([NA true false])), @data([NA true NA; NA true false])) + # Test broadcasting of functions that do something besides propagate null + @test isequal(broadcast(isequal, @data([null, 1]), @data([null 1])), @data([true false; false true])) + @test isequal(broadcast(isequal, @pdata([null, 1]), @data([null 1])), @data([true false; false true])) + @test isequal(broadcast(isequal, @data([null, 1]), @pdata([null 1])), @data([true false; false true])) + @test isequal(broadcast(isequal, @pdata([null, 1]), @pdata([null 1])), @pdata([true false; false true])) + @test isequal(broadcast(&, @data([null, false]), @data([null true false])), @data([null null false; false false false])) + @test isequal(broadcast(|, @data([null, false]), @data([null true false])), @data([null true null; null true false])) # Test map! @test map!(+, DataArray(Float64, 2), @data([1, 2]), @data([1, 2])) == @data([2, 4]) x = @data([-1, -2]) @test map!(abs, x, x) == @data([1, 2]) - @test isequal(map!(+, DataArray(Float64, 3), @data([1, NA, 3]), @data([NA, 2, 3])), @data([NA, NA, 6])) - @test map!(isequal, DataArray(Float64, 3), @data([1, NA, NA]), @data([1, NA, 3])) == @data([true, true, false]) + @test isequal(map!(+, DataArray(Float64, 3), @data([1, null, 3]), @data([null, 2, 3])), @data([null, null, 6])) + @test map!(isequal, DataArray(Float64, 3), @data([1, null, null]), @data([1, null, 3])) == @data([true, true, false]) - # isna doesn't propagate NAs so it should return BitArrays - x = isna.(@data [NA, 1, 2]) + # isnull doesn't propagate nulls so it should return BitArrays + x = isnull.(@data [null, 1, 2]) @test x isa BitArray @test x == [true, false, false] - x = (!).(isna.(@data [NA, 1, 2])) + x = (!).(isnull.(@data [null, 1, 2])) @test x isa BitArray @test x == [false, true, true] end diff --git a/test/constructors.jl b/test/constructors.jl index 6b7ee99..a675f88 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -1,10 +1,10 @@ @testset "Constructors" begin # - # NA's + # nulls # - @test isna(NAtype()) - @test isna(NA) + @test isnull(Null()) + @test isnull(null) # # DataVector's @@ -25,7 +25,7 @@ @test isequal(dv, convert(DataArray, 1:3)) dv = DataArray(Int, 3) - @test isequal(eltype(dv), Data{Int}) + @test isequal(eltype(dv), Union{Int,Null}) @test isequal(dv.na, trues(3)) dv = convert(DataArray, zeros(3)) @@ -46,20 +46,31 @@ dv = convert(DataArray, trues(3)) @test isequal(dv, convert(DataArray, trues(3))) + @test DataArray([1, null]) isa DataVector{Int} + @test isequal(DataArray([1, null]), [1, null]) + @test DataArray{Int}([1, null]) isa DataVector{Int} + @test isequal(DataArray{Int}([1, null]), [1, null]) + @test DataArray{Any}([1, null]) isa DataVector{Any} + @test isequal(DataArray{Any}([1, null]), [1, null]) + @test DataArray{Int, 1}([1, null]) isa DataVector{Int} + @test isequal(DataArray{Int, 1}([1, null]), [1, null]) + @test DataArray{Any, 1}([1, null]) isa DataVector{Any} + @test isequal(DataArray{Any, 1}([1, null]), [1, null]) + # # PooledDataArray's # pdv = PooledDataArray([1, 2, 3], falses(3)) @test all(pdv .== [1, 2, 3]) - @test all(isna.(pdv) .== falses(3)) + @test all(isnull.(pdv) .== falses(3)) @test isequal(pdv, PooledDataArray([1, 2, 3], [false, false, false])) @test isequal(pdv, PooledDataArray([1, 2, 3])) pdv = convert(PooledDataArray, trues(3)) @test all(pdv .== [true, true, true]) - @test all(isna.(pdv) .== falses(3)) + @test all(isnull.(pdv) .== falses(3)) @test isequal(pdv, convert(PooledDataArray, trues(3))) pdv = PooledDataArray([1, 2, 3], falses(3)) @@ -67,8 +78,8 @@ @test isequal(pdv, convert(PooledDataArray, PooledDataArray([1, 2, 3]))) pdv = PooledDataArray(Int, 3) - @test isequal(eltype(pdv), Data{Int}) - @test all(isna.(pdv) .== trues(3)) + @test isequal(eltype(pdv), Union{Int, Null}) + @test all(isnull.(pdv) .== trues(3)) pdv = convert(PooledDataArray, zeros(3)) @test isequal(pdv, convert(PooledDataArray, zeros(3))) @@ -106,7 +117,7 @@ @test isequal(dm, convert(DataArray, trues(2, 2))) dm = DataArray(Int, 2, 2) - @test isequal(eltype(dm), Data{Int}) + @test isequal(eltype(dm), Union{Int, Null}) @test isequal(dm.na, trues(2, 2)) @test_nowarn convert(DataArray, zeros(2, 2)) @@ -122,4 +133,15 @@ @test_nowarn convert(DataArray, eye(3, 2)) @test_nowarn convert(DataArray, eye(2)) @test_nowarn convert(DataArray, diagm(Float64[pi, pi])) + + @test DataArray([1 null]) isa DataMatrix{Int} + @test isequal(DataArray([1 null]), [1 null]) + @test DataArray{Int}([1 null]) isa DataMatrix{Int} + @test isequal(DataArray{Int}([1 null]), [1 null]) + @test DataArray{Any}([1 null]) isa DataMatrix{Any} + @test isequal(DataArray{Any}([1 null]), [1 null]) + @test DataArray{Int, 2}([1 null]) isa DataMatrix{Int} + @test isequal(DataArray{Int, 2}([1 null]), [1 null]) + @test DataArray{Any, 2}([1 null]) isa DataMatrix{Any} + @test isequal(DataArray{Any, 2}([1 null]), [1 null]) end diff --git a/test/containers.jl b/test/containers.jl index 339028e..a2e835d 100644 --- a/test/containers.jl +++ b/test/containers.jl @@ -1,21 +1,21 @@ @testset "Containers" begin dv = @data ones(3) push!(dv, 3.0) - push!(dv, NA) + push!(dv, null) - @test isequal(dv, (@data [1.0, 1.0, 1.0, 3.0, NA])) + @test isequal(dv, (@data [1.0, 1.0, 1.0, 3.0, null])) a, b = pop!(dv), pop!(dv) - @test isna(a) + @test isnull(a) @test b == 3.0 unshift!(dv, 3.0) - unshift!(dv, NA) + unshift!(dv, null) - @test isequal(dv, (@data [NA, 3.0, 1.0, 1.0, 1.0])) + @test isequal(dv, (@data [null, 3.0, 1.0, 1.0, 1.0])) a, b = shift!(dv), shift!(dv) - @test isna(a) + @test isnull(a) @test b == 3.0 ## SPLICE @@ -44,15 +44,15 @@ end end - dv1 = @data [1.0, 2.0, NA, 2.0, NA, 3.0] + dv1 = @data [1.0, 2.0, null, 2.0, null, 3.0] for dv in (dv1, convert(DataVector{Number}, dv1), convert(PooledDataArray, dv1)) for spliceout in (2, 3, 2:3, 5:6) test_splice(dv, spliceout) test_deleteat(dv, spliceout) for splicein in ([], [3], @data([3]), @pdata([3]), - [3, 4, 5], [3., 4., 5.], @data([3, NA, 4]), - @pdata([3, NA, 4]), @data([NA, 3.0, 4.0]), - @pdata([NA, 3.0, 4.0])) + [3, 4, 5], [3., 4., 5.], @data([3, null, 4]), + @pdata([3, null, 4]), @data([null, 3.0, 4.0]), + @pdata([null, 3.0, 4.0])) test_splice(dv, spliceout, splicein) end end diff --git a/test/conversions.jl b/test/conversions.jl index d38158f..1a04c6b 100644 --- a/test/conversions.jl +++ b/test/conversions.jl @@ -1,18 +1,18 @@ @testset "Conversions" begin - @test isequal(@data([1, 2, NA]), - convert(DataArray, @pdata([1, 2, NA]))) + @test isequal(@data([1, 2, null]), + convert(DataArray, @pdata([1, 2, null]))) # Test vector() and matrix() conversion tools dv = @data ones(5) @test isa(convert(Vector{Float64}, dv), Vector{Float64}) - dv[1] = NA + dv[1] = null # Should raise errors: # vector(dv) # convert(Vector{Float64}, dv) dm = @data ones(3, 3) @test isa(convert(Matrix{Float64}, dm), Matrix{Float64}) - dm[1, 1] = NA + dm[1, 1] = null # Should raise errors: # matrix(dm) # convert(Matrix{Float64}, dm) @@ -21,4 +21,27 @@ convert(DataArray{Integer}, a) a[1] = 2 convert(DataArray{Integer}, a) + + @test convert(DataArray, [1, null]) isa DataVector{Int} + @test isequal(convert(DataArray, [1, null]), [1, null]) + @test convert(DataArray{Int}, [1, null]) isa DataVector{Int} + @test isequal(convert(DataArray{Int}, [1, null]), [1, null]) + @test convert(DataArray{Any}, [1, null]) isa DataVector{Any} + @test isequal(convert(DataArray{Any}, [1, null]), [1, null]) + @test convert(DataArray{Int, 1}, [1, null]) isa DataVector{Int} + @test isequal(convert(DataArray{Int, 1}, [1, null]), [1, null]) + @test convert(DataArray{Any, 1}, [1, null]) isa DataVector{Any} + @test isequal(convert(DataArray{Any, 1}, [1, null]), [1, null]) + + @test convert(Array, @data [1, null]) isa Vector{Union{Int, Null}} + @test isequal(convert(Array, @data [1, null]), [1, null]) + @test_throws MethodError convert(Array{Int}, @data [1, null]) + @test convert(Array{Union{Int, Null}}, @data [1, null]) isa Vector{Union{Int, Null}} + @test isequal(convert(Array{Union{Int, Null}}, @data [1, null]), [1, null]) + @test convert(Array{Any}, @data [1, null]) isa Vector{Any} + @test isequal(convert(Array{Any}, @data [1, null]), [1, null]) + @test convert(Array{Union{Int, Null}, 1}, @data [1, null]) isa Vector{Union{Int, Null}} + @test isequal(convert(Array{Union{Int, Null}}, @data [1, null]), [1, null]) + @test convert(Array{Any, 1}, @data [1, null]) isa Vector{Any} + @test isequal(convert(Array{Any, 1}, @data [1, null]), [1, null]) end diff --git a/test/data.jl b/test/data.jl index 6796c30..592ee10 100644 --- a/test/data.jl +++ b/test/data.jl @@ -1,19 +1,18 @@ -@testset "Data types and NA's" begin +@testset "Data types and nulls" begin # TODO: Convert these test_group things to nested testsets - #test_group("NA's") - @test length(NA) == 1 - @test size(NA) == () - @test isna(3 == NA) - @test isna(NA == 3) - @test isna(NA == NA) + #test_group("nulls") + @test isnull(3 == null) + @test isnull(null == 3) + @test isnull(null == null) #test_group("DataVector creation") - dvint = @data [1, 2, NA, 4] + dvint = @data [1, 2, null, 4] dvint2 = DataArray(collect(5:8)) dvint3 = convert(DataArray, 5:8) - dvflt = @data [1.0, 2, NA, 4] - dvstr = @data ["one", "two", NA, "four"] - dvdict = DataArray(Dict, 4) # for issue #199 + dvflt = @data [1.0, 2, null, 4] + dvstr = @data ["one", "two", null, "four"] + # FIXME: triggers a segfault on Julia 0.6.0 + # dvdict = DataArray(Dict, 4) # for issue DataFrames#199 dvany = convert(DataArray{Any, 1}, dvint) @test isa(dvint, DataVector{Int}) @@ -24,7 +23,7 @@ @test_throws ArgumentError DataArray([5:8], falses(2)) #test_group("PooledDataVector creation") - pdvstr = @pdata ["one", "one", "two", "two", NA, "one", "one"] + pdvstr = @pdata ["one", "one", "two", "two", null, "one", "one"] @test isa(pdvstr, PooledDataVector{String}) @test isequal(PooledDataArray(pdvstr), pdvstr) @@ -42,7 +41,7 @@ @test string(pdvpp) == "[one, two, two]" @test string(PooledDataArray(["one", "two", "four"], ["one", "two", "three"])) == - "[one, two, NA]" + "[one, two, null]" #test_group("PooledDataVector utf8 support") pdvpp = PooledDataArray([String("hello")], [false]) @@ -52,44 +51,44 @@ #test_group("DataVector access") @test dvint[1] == 1 - @test isna(dvint[3]) - @test isequal(dvflt[3:4], @data([NA, 4.0])) - @test isequal(dvint[[true, false, true, false]], @data([1, NA])) + @test isnull(dvint[3]) + @test isequal(dvflt[3:4], @data([null, 4.0])) + @test isequal(dvint[[true, false, true, false]], @data([1, null])) @test isequal(dvstr[[1, 2, 1, 4]], @data(["one", "two", "one", "four"])) # Indexing produces #undef? - # @test isequal(dvstr[[1, 2, 1, 3]], DataVector["one", "two", "one", NA]) + # @test isequal(dvstr[[1, 2, 1, 3]], DataVector["one", "two", "one", null]) #test_group("PooledDataVector access") @test pdvstr[1] == "one" - @test isna(pdvstr[5]) + @test isnull(pdvstr[5]) @test isequal(pdvstr[1:3], @data(["one", "one", "two"])) @test isequal(pdvstr[[true, false, true, false, true, false, true]], - @pdata(["one", "two", NA, "one"])) + @pdata(["one", "two", null, "one"])) @test isequal(pdvstr[[1, 3, 1, 2]], @data(["one", "two", "one", "one"])) #test_group("DataVector methods") @test size(dvint) == (4,) @test length(dvint) == 4 - @test sum(isna.(dvint)) == 1 - @test eltype(dvint) == Data{Int} + @test sum(isnull.(dvint)) == 1 + @test eltype(dvint) == Union{Int,Null} #test_group("PooledDataVector methods") @test size(pdvstr) == (7,) @test length(pdvstr) == 7 - @test sum(isna.(pdvstr)) == 1 - @test eltype(pdvstr) == Data{String} + @test sum(isnull.(pdvstr)) == 1 + @test eltype(pdvstr) == Union{String,Null} #test_group("DataVector operations") @test isequal(dvint .+ 1, DataArray([2, 3, 4, 5], [false, false, true, false])) - @test isequal(dvint .* 2, @data([2, 4, NA, 8])) - @test isequal(dvint .== 2, @data([false, true, NA, false])) - @test isequal(dvint .> 1, @data([false, true, NA, true])) + @test isequal(dvint .* 2, @data([2, 4, null, 8])) + @test isequal(dvint .== 2, @data([false, true, null, false])) + @test isequal(dvint .> 1, @data([false, true, null, true])) #test_group("PooledDataVector operations") - # @test isequal(pdvstr .== "two", PooledDataVector[false, false, true, true, NA, false, false]) + # @test isequal(pdvstr .== "two", PooledDataVector[false, false, true, true, null, false, false]) #test_group("DataVector to something else") - @test all(dropna(dvint) .== [1, 2, 4]) + @test collect(Nulls.skip(dvint)) == [1, 2, 4] @test all(convert(Vector, dvint, 0) .== [1, 2, 0, 4]) @test all(convert(Vector, dvany, 0) .== [1, 2, 0, 4]) utf8three = convert(String, "three") @@ -98,31 +97,33 @@ @test all(convert(Vector, dvstr, asciithree) .== ["one", "two", "three", "four"]) @test all(convert(Vector{Int}, dvint2) .== [5:8;]) @test all([i + 1 for i in dvint2] .== [6:9;]) - @test all([length(x)::Int for x in dvstr] == [3, 3, 1, 4]) - @test repr(dvint) == "Union{DataArrays.NAtype, $Int}[1, 2, NA, 4]" + #@test all([length(x)::Int for x in dvstr] == [3, 3, 1, 4]) + # Julia 0.6 and 0.7 differ in ordering of Unions + @test repr(dvint) in ("Union{$Int, Nulls.Null}[1, 2, null, 4]", + "Union{Nulls.Null, $Int}[1, 2, null, 4]") #test_group("PooledDataVector to something else") - @test all(dropna(pdvstr) .== ["one", "one", "two", "two", "one", "one"]) + @test collect(Nulls.skip(pdvstr)) == ["one", "one", "two", "two", "one", "one"] @test all(convert(Vector, pdvstr, "nine") .== ["one", "one", "two", "two", "nine", "one", "one"]) - @test all([length(i)::Int for i in pdvstr] .== [3, 3, 3, 3, 1, 3, 3]) + #@test all([length(i)::Int for i in pdvstr] .== [3, 3, 3, 3, 1, 3, 3]) @test string(pdvstr[1:3]) == "[one, one, two]" #test_group("DataVector Filter and Replace") - @test isequal(dropna(dvint), [1, 2, 4]) + @test collect(Nulls.skip(dvint)) == [1, 2, 4] @test isequal(convert(Vector, dvint, 7), [1, 2, 7, 4]) - @test sum(dropna(dvint)) == 7 + @test sum(Nulls.skip(dvint)) == 7 @test sum(convert(Vector, dvint, 7)) == 14 #test_group("PooledDataVector Filter and Replace") - @test reduce(string, "", dropna(pdvstr)) == "oneonetwotwooneone" + @test reduce(string, "", Nulls.skip(pdvstr)) == "oneonetwotwooneone" @test reduce(string, "", convert(Vector, pdvstr, "!")) == "oneonetwotwo!oneone" #test_group("DataVector assignment") - assigntest = @data [1, 2, NA, 4] + assigntest = @data [1, 2, null, 4] assigntest[1] = 8 - @test isequal(assigntest, (@data [8, 2, NA, 4])) + @test isequal(assigntest, (@data [8, 2, null, 4])) assigntest[1:2] = 9 - @test isequal(assigntest, (@data [9, 9, NA, 4])) + @test isequal(assigntest, (@data [9, 9, null, 4])) assigntest[[1,3]] = 10 @test isequal(assigntest, (@data [10, 9, 10, 4])) assigntest[[true, false, true, true]] = 11 @@ -133,15 +134,15 @@ @test isequal(assigntest, (@data [14, 13, 11, 15])) assigntest[[true, false, true, false]] = [16, 17] @test isequal(assigntest, (@data [16, 13, 17, 15])) - assigntest[1] = NA - @test isequal(assigntest, (@data [NA, 13, 17, 15])) - assigntest[[1, 2]] = NA - @test isequal(assigntest, (@data [NA, NA, 17, 15])) - assigntest[[true, false, true, false]] = NA - @test isequal(assigntest, (@data [NA, NA, NA, 15])) + assigntest[1] = null + @test isequal(assigntest, (@data [null, 13, 17, 15])) + assigntest[[1, 2]] = null + @test isequal(assigntest, (@data [null, null, 17, 15])) + assigntest[[true, false, true, false]] = null + @test isequal(assigntest, (@data [null, null, null, 15])) assigntest[1] = 1 - assigntest[2:4] = NA - @test isequal(assigntest, (@data [1, NA, NA, NA])) + assigntest[2:4] = null + @test isequal(assigntest, (@data [1, null, null, null])) #test_group("PooledDataVector assignment") ret = (pdvstr[2] = "three") @@ -165,17 +166,17 @@ @test ret == ["four", "five"] @test isequal(pdvstr2[1:2], (@data ["one", "four"])) pdvstr2 = @pdata ["one", "one", "two", "two", "three"] - @test isna(begin pdvstr2[1] = NA end) - @test all(isna(begin pdvstr2[[1, 2]] = NA end)) - @test all(isna(begin pdvstr2[[false, false, true, false, false]] = NA end)) - @test all(isna(begin pdvstr2[4:5] = NA end)) - @test all(isna.(pdvstr2)) + @test isnull(begin pdvstr2[1] = null end) + @test all(isnull(begin pdvstr2[[1, 2]] = null end)) + @test all(isnull(begin pdvstr2[[false, false, true, false, false]] = null end)) + @test all(isnull(begin pdvstr2[4:5] = null end)) + @test all(isnull.(pdvstr2)) #test_group("PooledDataVector replace!") pdvstr2 = @pdata ["one", "one", "two", "two", "three"] @test replace!(pdvstr2, "two", "four") == "four" @test replace!(pdvstr2, "three", "four") == "four" - @test isna.(replace!(pdvstr2, "one", NA)) - @test replace!(pdvstr2, NA, "five") == "five" + @test isnull.(replace!(pdvstr2, "one", null)) + @test replace!(pdvstr2, null, "five") == "five" @test isequal(pdvstr2, (@data ["five", "five", "four", "four", "four"])) end diff --git a/test/dataarray.jl b/test/dataarray.jl index e8597e8..767e219 100644 --- a/test/dataarray.jl +++ b/test/dataarray.jl @@ -24,9 +24,9 @@ similar(dm, 2, 2) similar(dt, 2, 2, 2) - @test isequal(DataArray([NA, NA], [true, true]), DataArray([NA, NA], [false, false])) - @test isequal(DataArray(Any[1, NA], [false, true]), DataArray(Any[1, NA], [false, false])) - @test isequal(DataArray(Any[1, 2], [false, true]), DataArray(Any[1, NA], [false, false])) + @test isequal(DataArray([null, null], [true, true]), DataArray([null, null], [false, false])) + @test isequal(DataArray(Any[1, null], [false, true]), DataArray(Any[1, null], [false, false])) + @test isequal(DataArray(Any[1, 2], [false, true]), DataArray(Any[1, null], [false, false])) x = DataArray([9, 9, 8]) y = DataArray([1, 9, 3, 2, 2]) @@ -36,35 +36,35 @@ y = [1, 9, 3, 2, 2] @test append!(x, y) == [9, 9, 8, 1, 9, 3, 2, 2] - x = @data [1, 2, NA] - y = @data [3, NA, 5] + x = @data [1, 2, null] + y = @data [3, null, 5] @test isequal(copy(x), x) @test isequal(copy!(y, x), x) - x = @data [1, NA, -2, 1, NA, 4] - @test isequal(unique(x), @data [1, NA, -2, 4]) - @test isequal(unique(reverse(x)), @data [4, NA, 1, -2]) - @test isequal(unique(dropna(x)), @data [1, -2, 4]) - @test isequal(unique(reverse(dropna(x))), @data [4, 1, -2]) + x = @data [1, null, -2, 1, null, 4] + @test isequal(unique(x), @data [1, null, -2, 4]) + @test isequal(unique(reverse(x)), @data [4, null, 1, -2]) + @test isequal(unique(Nulls.skip(x)), @data [1, -2, 4]) + @test isequal(unique(reverse(collect(Nulls.skip(x)))), @data [4, 1, -2]) @test isequal(levels(x), @data [1, -2, 4]) @test isequal(levels(reverse(x)), @data [4, 1, -2]) - # check case where only NA occurs in final position - @test isequal(unique(@data [1, 2, 1, NA]), @data [1, 2, NA]) + # check case where only null occurs in final position + @test isequal(unique(@data [1, 2, 1, null]), @data [1, 2, null]) # Test copy! function nonbits(dv) ret = similar(dv, Integer) for i = 1:length(dv) - # if !isna(dv, i) + # if !isnull(dv, i) ret[i] = dv[i] # end end ret end - set1 = Any[@data([1, NA, 3]), - @data([NA, 5]), @data([1, 2, 3, 4, 5]), data(Int[]), - @data([NA, 5, 3]), @data([1, 5, 3])] + set1 = Any[@data([1, null, 3]), + @data([null, 5]), @data([1, 2, 3, 4, 5]), data(Int[]), + @data([null, 5, 3]), @data([1, 5, 3])] set2 = map(nonbits, set1) set3 = map(pdata, set1) @@ -99,7 +99,7 @@ end # Inferrability of map (#276) - @test eltype(map(x -> x > 1, @data [1, 2])) == Data{Bool} + @test eltype(map(x -> x > 1, @data [1, 2])) == Union{Bool,Null} @testset "Issue #278" begin x = @data ones(4) diff --git a/test/datamatrix.jl b/test/datamatrix.jl index 391d7a0..3af92a7 100644 --- a/test/datamatrix.jl +++ b/test/datamatrix.jl @@ -35,25 +35,25 @@ @test all(b * b .== m_b * m_b) # - # DataVector * DataMatrix w/ NA's + # DataVector * DataMatrix w/ nulls # - b[1, 1] = NA + b[1, 1] = null res = a * b[1:1, :] - @test all(isna.(res[:, 1])) - @test all(.!(isna.(res[:, 2]))) - @test all(.!(isna.(res[:, 3]))) + @test all(isnull.(res[:, 1])) + @test all(.!(isnull.(res[:, 2]))) + @test all(.!(isnull.(res[:, 3]))) res = a * b[2:2, :] - @test all(.!(isna.(res))) + @test all(.!(isnull.(res))) # - # DataMatrix w NA's * DataVector + # DataMatrix w nulls * DataVector # res = b * a - @test isna.(res[1]) - @test .!(isna.(res[2])) - @test .!(isna.(res[3])) + @test isnull.(res[1]) + @test .!(isnull.(res[2])) + @test .!(isnull.(res[3])) # # DataMatrix * DataMatrix @@ -61,49 +61,49 @@ res = b * b # 3x3 Float64 DataMatrix: - # NA NA NA - # NA 1.0 0.0 - # NA 0.0 1.0 - @test isna.(res[1, 1]) - @test isna.(res[1, 2]) - @test isna.(res[1, 3]) - @test isna.(res[2, 1]) - @test .!(isna.(res[2, 2])) - @test .!(isna.(res[2, 3])) - @test isna.(res[3, 1]) - @test .!(isna.(res[3, 2])) - @test .!(isna.(res[3, 3])) + # null null null + # null 1.0 0.0 + # null 0.0 1.0 + @test isnull.(res[1, 1]) + @test isnull.(res[1, 2]) + @test isnull.(res[1, 3]) + @test isnull.(res[2, 1]) + @test .!(isnull.(res[2, 2])) + @test .!(isnull.(res[2, 3])) + @test isnull.(res[3, 1]) + @test .!(isnull.(res[3, 2])) + @test .!(isnull.(res[3, 3])) res = b * @data eye(3) # 3x3 Float64 DataMatrix: - # NA NA NA + # null null null # 0.0 1.0 0.0 # 0.0 0.0 1.0 - @test isna.(res[1, 1]) - @test isna.(res[1, 2]) - @test isna.(res[1, 3]) - @test .!(isna.(res[2, 1])) - @test .!(isna.(res[2, 2])) - @test .!(isna.(res[2, 3])) - @test .!(isna.(res[3, 1])) - @test .!(isna.(res[3, 2])) - @test .!(isna.(res[3, 3])) + @test isnull.(res[1, 1]) + @test isnull.(res[1, 2]) + @test isnull.(res[1, 3]) + @test .!(isnull.(res[2, 1])) + @test .!(isnull.(res[2, 2])) + @test .!(isnull.(res[2, 3])) + @test .!(isnull.(res[3, 1])) + @test .!(isnull.(res[3, 2])) + @test .!(isnull.(res[3, 3])) res = (@data eye(3)) * b # julia> dataeye(3) * b # 3x3 Float64 DataMatrix: - # NA 0.0 0.0 - # NA 1.0 0.0 - # NA 0.0 1.0 - @test isna.(res[1, 1]) - @test .!(isna.(res[1, 2])) - @test .!(isna.(res[1, 3])) - @test isna.(res[2, 1]) - @test .!(isna.(res[2, 2])) - @test .!(isna.(res[2, 3])) - @test isna.(res[3, 1]) - @test .!(isna.(res[3, 2])) - @test .!(isna.(res[3, 3])) + # null 0.0 0.0 + # null 1.0 0.0 + # null 0.0 1.0 + @test isnull.(res[1, 1]) + @test .!(isnull.(res[1, 2])) + @test .!(isnull.(res[1, 3])) + @test isnull.(res[2, 1]) + @test .!(isnull.(res[2, 2])) + @test .!(isnull.(res[2, 3])) + @test isnull.(res[3, 1]) + @test .!(isnull.(res[3, 2])) + @test .!(isnull.(res[3, 3])) # Test row operations dm = @data eye(6, 2) diff --git a/test/extras.jl b/test/extras.jl index 1ecddb8..aec2643 100644 --- a/test/extras.jl +++ b/test/extras.jl @@ -3,12 +3,12 @@ ## countmap ########## - d = @data [NA,3,3] + d = @data [null,3,3] w = weights([1.1,2.2,3.3]) - # cm = Dict{DataArrays.Data{Int}, Int}([(NA, 1), (3, 2)]) - # cmw = Dict{DataArrays.Data{Int}, Real}([(NA, 1.1), (3, 5.5)]) - cm = Dict{Union{NAtype,Int}, Int}([(NA, 1), (3, 2)]) - cmw = Dict{Union{NAtype,Int}, Real}([(NA, 1.1), (3, 5.5)]) + # cm = Dict{Union{Int,Null}, Int}([(null, 1), (3, 2)]) + # cmw = Dict{Union{Int,Null}, Real}([(null, 1.1), (3, 5.5)]) + cm = Dict{Union{Null,Int}, Int}([(null, 1), (3, 2)]) + cmw = Dict{Union{Null,Int}, Real}([(null, 1.1), (3, 5.5)]) @test isequal(countmap(d), cm) @test isequal(countmap(d, w), cmw) @@ -37,13 +37,13 @@ ## repeat ########## - @test isequal(repeat(@data [3.0, 2.0, NA]; inner = 2, outer = 1), - @data [3.0, 3.0, 2.0, 2.0, NA, NA]) - @test isequal(repeat(@pdata ["a", "b", NA]; inner = 2, outer = 1), - @pdata ["a", "a", "b", "b", NA, NA]) - @test isequal(repeat(@data [1 2; 3 NA]; inner = [1, 2], outer = [2, 1]), - @data [1 1 2 2; 3 3 NA NA; 1 1 2 2; 3 3 NA NA]) - @test isequal(repeat(@pdata [:a :b NA]; inner = [2,1], outer = [1,3]), - @pdata [:a :b NA :a :b NA :a :b NA; - :a :b NA :a :b NA :a :b NA]) + @test isequal(repeat(@data [3.0, 2.0, null]; inner = 2, outer = 1), + @data [3.0, 3.0, 2.0, 2.0, null, null]) + @test isequal(repeat(@pdata ["a", "b", null]; inner = 2, outer = 1), + @pdata ["a", "a", "b", "b", null, null]) + @test isequal(repeat(@data [1 2; 3 null]; inner = [1, 2], outer = [2, 1]), + @data [1 1 2 2; 3 3 null null; 1 1 2 2; 3 3 null null]) + @test isequal(repeat(@pdata [:a :b null]; inner = [2,1], outer = [1,3]), + @pdata [:a :b null :a :b null :a :b null; + :a :b null :a :b null :a :b null]) # end diff --git a/test/indexing.jl b/test/indexing.jl index 834dc0f..8fc75a4 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -7,14 +7,14 @@ # Scalar getindex for i = 1:100 if na[i] - @test isna(A[i]) + @test isnull(A[i]) else @test A[i] == data[i] end end for i = 1:10, j = 1:10 if na[i, j] - @test isna(A[i, j]) + @test isnull(A[i, j]) else @test A[i, j] == data[i, j] end @@ -25,7 +25,7 @@ v = A[rg] for i = 1:length(rg) if na[rg[i]] - @test isna(v[i]) + @test isnull(v[i]) else @test v[i] == data[rg[i]] end @@ -34,7 +34,7 @@ v = A[rg, 9] for i = 1:length(rg) if na[rg[i], 9] - @test isna(v[i]) + @test isnull(v[i]) else @test v[i] == data[rg[i], 9] end @@ -44,7 +44,7 @@ v = A[rg, rg2] for j = 1:length(rg2), i = 1:length(rg) if na[rg[i], rg2[j]] - @test isna(v[i, j]) + @test isnull(v[i, j]) else @test v[i, j] == data[rg[i], rg2[j]] end @@ -56,14 +56,14 @@ v = A[b] for i = 1:length(rg) if na[rg[i]] - @test isna(v[i]) + @test isnull(v[i]) else @test v[i] == data[rg[i]] end end # getindex with DataVectors with missingness throws - @test_throws NAException A[@data([1, 2, 3, NA])] + @test_throws NullException A[@data([1, 2, 3, null])] # setindex! with scalar indices data = rand(10, 10) @@ -80,7 +80,7 @@ na = bitrand(10, 10) for i = 1:100 - na[i] && (A[i] = NA) + na[i] && (A[i] = null) end # setindex! with scalar and vector indices @@ -91,12 +91,12 @@ @test A[rg[i]] == 1.0 end - # setindex! with NA and vector indices + # setindex! with null and vector indices rg = 5:13 na[rg] = true - A[rg] = NA + A[rg] = null for i = 1:length(rg) - @test isna(A[rg[i]]) + @test isnull(A[rg[i]]) end # setindex! with vector and vector indices @@ -118,7 +118,7 @@ A[rg1, rg2] = datype2(newdata, newna) for j = rg2, i = rg1 if na[i, j] - @test isna(A[i, j]) + @test isnull(A[i, j]) else @test A[i, j] == data[i, j] end diff --git a/test/linalg.jl b/test/linalg.jl index 34712c2..76ab002 100644 --- a/test/linalg.jl +++ b/test/linalg.jl @@ -1,6 +1,6 @@ @testset "LinAlg" begin d = @data eye(3, 3) - d[1, 1] = NA + d[1, 1] = null @test_nowarn svd(d) end diff --git a/test/literals.jl b/test/literals.jl index 2620b4b..868e45a 100644 --- a/test/literals.jl +++ b/test/literals.jl @@ -7,85 +7,85 @@ @test isequal(dv, DataArray(Float64[], Bool[])) @test typeof(dv) == DataVector{Float64} - dv = @data [1, NA, 3] + dv = @data [1, null, 3] @test isequal(dv, DataArray([1, 0, 3], [false, true, false])) - dv = @data [1 NA 3] + dv = @data [1 null 3] @test isequal(dv, DataArray([1 0 3], [false true false])) - dv = @data Float64[1, NA, 3] + dv = @data Float64[1, null, 3] @test isequal(dv, DataArray(Float64[1, 0, 3], [false, true, false])) @test typeof(dv) == DataVector{Float64} - dv = @data Float64[1 NA 3] + dv = @data Float64[1 null 3] @test isequal(dv, DataArray(Float64[1 0 3], [false true false])) @test typeof(dv) == DataMatrix{Float64} - dv = @data [NA, NA] + dv = @data [null, null] @test isequal(dv, DataArray(Any, 2)) @test typeof(dv) == DataVector{Any} - dv = @data [NA NA] + dv = @data [null null] @test isequal(dv, DataArray(Any, 1, 2)) @test typeof(dv) == DataMatrix{Any} - dm = @data [1 NA; 3 4] + dm = @data [1 null; 3 4] @test isequal(dm, DataArray([1 0; 3 4], [false true; false false])) - dm = @data Float64[1 NA; 3 4] + dm = @data Float64[1 null; 3 4] @test isequal(dm, DataArray(Float64[1 0; 3 4], [false true; false false])) @test typeof(dm) == DataMatrix{Float64} - dm = @data [NA NA; NA NA] + dm = @data [null null; null null] @test isequal(dm, DataArray(Any, 2, 2)) @test typeof(dm) == DataMatrix{Any} - pdv = @pdata [1, NA, 3] + pdv = @pdata [1, null, 3] @test isequal(pdv, PooledDataArray([1, 0, 3], [false, true, false])) - pdv = @pdata Float64[1, NA, 3] + pdv = @pdata Float64[1, null, 3] @test isequal(pdv, PooledDataArray(Float64[1, 0, 3], [false, true, false])) @test typeof(pdv) == PooledDataArray{Float64,UInt32,1} - pdv = @pdata [1 NA 3] + pdv = @pdata [1 null 3] @test isequal(pdv, PooledDataArray([1 0 3], [false true false])) - pdv = @pdata Float64[1 NA 3] + pdv = @pdata Float64[1 null 3] @test isequal(pdv, PooledDataArray(Float64[1 0 3], [false true false])) @test typeof(pdv) == PooledDataArray{Float64,UInt32,2} - pdm = @pdata [1 NA; 3 4] + pdm = @pdata [1 null; 3 4] @test isequal(pdm, PooledDataArray([1 0; 3 4], [false true; false false])) - pdm = @pdata Float64[1 NA; 3 4] + pdm = @pdata Float64[1 null; 3 4] @test isequal(pdm, PooledDataArray(Float64[1 0; 3 4], [false true; false false])) @test typeof(pdm) == PooledDataArray{Float64,UInt32,2} - pdm = @pdata [1 NA; + pdm = @pdata [1 null; 3 4] @test isequal(pdm, PooledDataArray([1 0; 3 4], @@ -107,12 +107,12 @@ pdm2 = @pdata ones(4, 4) pdm3 = @pdata rand(4, 4) - mixed1 = @data ["x", 1, 1.23, NA] - mixed2 = @data [NA, "x", 1, 1.23, NA] + mixed1 = @data ["x", 1, 1.23, null] + mixed2 = @data [null, "x", 1, 1.23, null] @test isequal(mixed1, DataArray(Any["x", 1, 1.23, 0], [false, false, false, true])) - @test isequal(mixed2, DataArray(Any[NA, "x", 1, 1.23, 0], + @test isequal(mixed2, DataArray(Any[null, "x", 1, 1.23, 0], [true, false, false, false, true])) x = 5.1 @@ -131,14 +131,14 @@ @test isequal(@data([1, 2, x]), DataArray([1, 2, x], [false, false, false])) - ex = :([1, 2, NA]) + ex = :([1, 2, null]) DataArrays.parsedata(ex) - @test isequal(@data([1, 2, NA]), + @test isequal(@data([1, 2, null]), DataArray([1, 2, 1], [false, false, true])) - ex = :([1, 2, x, NA]) + ex = :([1, 2, x, null]) DataArrays.parsedata(ex) - @test isequal(@data([1, 2, x, NA]), + @test isequal(@data([1, 2, x, null]), DataArray([1, 2, x, 1], [false, false, false, true])) # Matrices @@ -158,20 +158,20 @@ @test isequal(@data([1 2; x x]), DataArray([1 2; x x], [false false; false false])) - ex = :([1 2; NA NA]) + ex = :([1 2; null null]) DataArrays.parsedata(ex) - @test isequal(@data([1 2; NA NA]), + @test isequal(@data([1 2; null null]), DataArray([1 2; 1 1], [false false; true true])) - ex = :([1 2; x NA]) + ex = :([1 2; x null]) DataArrays.parsedata(ex) - @test isequal(@data([1 2; x NA]), + @test isequal(@data([1 2; x null]), DataArray([1 2; x 1], [false false; false true])) # Complex vector expressions - ex = :([1 + 1, 2 + 2, x * x, NA]) + ex = :([1 + 1, 2 + 2, x * x, null]) DataArrays.parsedata(ex) - @test isequal(@data([1 + 1, 2 + 2, x * x, NA]), + @test isequal(@data([1 + 1, 2 + 2, x * x, null]), DataArray([1 + 1, 2 + 2, x * x, 1], [false, false, false, true])) @@ -194,9 +194,9 @@ [false, false])) # Complex matrix expressions - ex = :([1 + 1 2 + 2; x * x NA]) + ex = :([1 + 1 2 + 2; x * x null]) DataArrays.parsedata(ex) - @test isequal(@data([1 + 1 2 + 2; x * x NA]), + @test isequal(@data([1 + 1 2 + 2; x * x null]), DataArray([1 + 1 2 + 2; x * x 1], [false false; false true])) @@ -210,11 +210,11 @@ [false false; false false])) - @test isequal(DataArrays.fixargs(:([1, 2, NA, x]).args, -1), + @test isequal(DataArrays.fixargs(:([1, 2, null, x]).args, -1), (Any[1, 2, -1, :x], Any[false, false, true, false])) - @test isequal(DataArrays.findstub_vector(:([1, 2, NA, x])), 1) - @test isequal(DataArrays.findstub_vector(:([NA, NA, NA, x])), :x) + @test isequal(DataArrays.findstub_vector(:([1, 2, null, x])), 1) + @test isequal(DataArrays.findstub_vector(:([null, null, null, x])), :x) # Lots of variables a, b, c, d = 1, 2, 3, 4 diff --git a/test/nas.jl b/test/nas.jl index f80150c..345f1ae 100644 --- a/test/nas.jl +++ b/test/nas.jl @@ -1,75 +1,75 @@ -@testset "NAs" begin - @testset "any(isna, x)" begin - # any(isna, a::AbstractArray) - @test any(isna, Any[NA, 1]) - @test !any(isna, [1, 2]) - @test !any(isna, repeat([1, 2], outer = [1, 2])) - @test !any(isna, repeat([1, 2], outer = [1, 2, 2])) +@testset "nulls" begin + @testset "any(isnull, x)" begin + # any(isnull, a::AbstractArray) + @test any(isnull, Any[null, 1]) + @test !any(isnull, [1, 2]) + @test !any(isnull, repeat([1, 2], outer = [1, 2])) + @test !any(isnull, repeat([1, 2], outer = [1, 2, 2])) - # any(isna, da::DataArray) - @test !any(isna, DataArray([1, 2], falses(2))) - @test !any(isna, DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + # any(isnull, da::DataArray) + @test !any(isnull, DataArray([1, 2], falses(2))) + @test !any(isnull, DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) da = DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2)) - @test !any(isna, da) - da[2] = NA - @test any(isna, da) + @test !any(isnull, da) + da[2] = null + @test any(isnull, da) - # any(isna, pda::PooledDataArray) - @test !any(isna, PooledDataArray([1, 2], falses(2))) - @test !any(isna, PooledDataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + # any(isnull, pda::PooledDataArray) + @test !any(isnull, PooledDataArray([1, 2], falses(2))) + @test !any(isnull, PooledDataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) pda = PooledDataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2)) - @test !any(isna, pda) - pda[2] = NA - @test any(isna, pda) + @test !any(isnull, pda) + pda[2] = null + @test any(isnull, pda) end - @testset "all(isna, x)" begin - # all(isna, a::AbstractArray) - @test all(isna, Any[NA, NA]) - @test !all(isna, Any[NA, 1]) - @test !all(isna, [1, 2]) - @test !all(isna, repeat([1, 2], outer = [1, 2])) - @test !all(isna, repeat([1, 2], outer = [1, 2, 2])) + @testset "all(isnull, x)" begin + # all(isnull, a::AbstractArray) + @test all(isnull, Any[null, null]) + @test !all(isnull, Any[null, 1]) + @test !all(isnull, [1, 2]) + @test !all(isnull, repeat([1, 2], outer = [1, 2])) + @test !all(isnull, repeat([1, 2], outer = [1, 2, 2])) - # all(isna, da::DataArray) - @test !all(isna, DataArray([1, 2], falses(2))) - @test !all(isna, DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + # all(isnull, da::DataArray) + @test !all(isnull, DataArray([1, 2], falses(2))) + @test !all(isnull, DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) da = DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2)) - da[1] = NA - @test !all(isna, da) - da[:] = NA - @test all(isna, da) + da[1] = null + @test !all(isnull, da) + da[:] = null + @test all(isnull, da) - # all(isna, da::PooledDataArray) - @test !all(isna, PooledDataArray([1, 2], falses(2))) - @test !all(isna, PooledDataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + # all(isnull, da::PooledDataArray) + @test !all(isnull, PooledDataArray([1, 2], falses(2))) + @test !all(isnull, PooledDataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) pda = PooledDataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2)) - pda[1] = NA - @test !all(isna, pda) - pda[:] = NA - @test all(isna, pda) + pda[1] = null + @test !all(isnull, pda) + pda[:] = null + @test all(isnull, pda) end dv = DataArray(collect(1:6), fill(false, 6)) - a = dropna(dv) - @test collect(each_failna(dv)) == a - @test collect(each_dropna(dv)) == a - @test collect(each_replacena(dv, 4)) == a + a = dv[.!isnull.(dv)] + @test collect(Nulls.fail(dv)) == a + @test collect(Nulls.skip(dv)) == a + @test collect(Nulls.replace(dv, 4)) == a - dv[[1, 2, end]] = NA - - a = dropna(dv) - @test_throws NAException for v in each_failna(dv); end - @test collect(each_dropna(dv)) == a - @test collect(each_replacena(dv, 4)) == [4, 4, a..., 4] + dv[[1, 2, end]] = null @testset "promotion" for (T1, T2) in ((Int, Float64), (Dates.Minute, Dates.Second)) @eval begin - @test promote_type($T1, Data{$T2}) == Data{$T2} - @test promote_type(Data{$T1}, $T2) == Data{$T2} - @test promote_type(Data{$T1}, Data{$T2}) == Data{$T2} + @test promote_type($T1, Union{$T2, Null}) == Union{$T2, Null} + @test promote_type(Union{$T1, Null}, $T2) == Union{$T2, Null} + @test promote_type(Union{$T1, Null}, Union{$T2, Null}) == Union{$T2, Null} end end + a = dv[.!isnull.(dv)] + @test_throws NullException for v in Nulls.fail(dv); end + @test collect(Nulls.skip(dv)) == a + @test collect(Nulls.replace(dv, 4)) == [4, 4, a..., 4] + end diff --git a/test/newtests/dataarray.jl b/test/newtests/dataarray.jl index 498a753..2c252b0 100644 --- a/test/newtests/dataarray.jl +++ b/test/newtests/dataarray.jl @@ -119,11 +119,9 @@ module TestDataArrays convert(Vector, DataArray([1, 0, 3], [false, true, false]), -1) convert(Vector, DataArray([1, 2, 3], [false, false, false]), -1) - # dropna(da::DataArray) - dropna(DataArray([1, 0, 3], [false, true, false])) - dropna(DataArray([1, 2, 3], [false, false, false])) - # dropna{T}(da::AbstractDataVector{T}) - # dropna(@data([1, NA, 3])) + # Nulls.skip(da::DataArray) + Nulls.skip(DataArray([1, 0, 3], [false, true, false])) + Nulls.skip(DataArray([1, 2, 3], [false, false, false])) # Iterators @@ -137,83 +135,83 @@ module TestDataArrays [1, 2, 3][dinds] # Base.getindex{S, T}(x::Vector{S}, inds::AbstractDataArray{T}) - dinds = @data([1, 2, NA]) - @test_throws NAException [1.0, 2.0, 3.0, 4.0][dinds] + dinds = @data([1, 2, null]) + @test_throws NullException [1.0, 2.0, 3.0, 4.0][dinds] # Base.getindex{S, T}(x::Array{S}, inds::AbstractDataArray{T}) - dinds = @data([1, 2, NA]) - @test_throws NAException [1.0 2.0; 3.0 4.0][dinds] + dinds = @data([1, 2, null]) + @test_throws NullException [1.0 2.0; 3.0 4.0][dinds] # Base.getindex(d::DataArray, i::SingleIndex) - da = @data([1, 2, NA, 4]) + da = @data([1, 2, null, 4]) da[1] da[3] # da[1.0] deprecated # da[3.0] deprecated # Base.getindex(d::DataArray, inds::AbstractDataVector{Bool}) - da = @data([1, 2, NA, 4]) - dinds = @data([true, false, false, NA]) - @test_throws NAException da[dinds] + da = @data([1, 2, null, 4]) + dinds = @data([true, false, false, null]) + @test_throws NullException da[dinds] # Base.getindex(d::DataArray, inds::AbstractDataVector) - da = @data([1, 2, NA, 4]) - dinds = @data([1, 2, NA, 2]) - @test_throws NAException da[dinds] + da = @data([1, 2, null, 4]) + dinds = @data([1, 2, null, 2]) + @test_throws NullException da[dinds] # Base.getindex{T <: Number, N}(d::DataArray{T,N}, inds::BooleanIndex) - # da = @data([1, 2, NA, 4]) - # inds = [1, 2, NA, 2] + # da = @data([1, 2, null, 4]) + # inds = [1, 2, null, 2] # da[inds] # Base.getindex(d::DataArray, inds::BooleanIndex) - # da = @data([1.0, 2.0, NA, 4.0]) - # inds = [1, 2, NA, 2] + # da = @data([1.0, 2.0, null, 4.0]) + # inds = [1, 2, null, 2] # da[inds] # Base.getindex{T <: Number, N}(d::DataArray{T, N}, inds::MultiIndex) - da = @data([1.0, 2.0, NA, 4.0]) + da = @data([1.0, 2.0, null, 4.0]) inds = [1, 2, 2] da[inds] # Base.getindex(d::DataArray, inds::MultiIndex) - da = @data([1.0, 2.0, NA, 4.0]) + da = @data([1.0, 2.0, null, 4.0]) inds = [1, 2, 2] da[inds] # Base.getindex{T <: Number, N}(d::DataArray{T, N}, inds::BooleanIndex) - da = @data([1.0, 2.0, NA, 4.0]) + da = @data([1.0, 2.0, null, 4.0]) inds = [true, true, false, false] da[inds] # Base.getindex{T <: Number, N}(d::DataArray{T, N}, inds::MultiIndex) - da = @data([1.0, 2.0, NA, 4.0]) + da = @data([1.0, 2.0, null, 4.0]) inds = [1, 2, 2] da[inds] - # Base.setindex!(da::DataArray, val::NAtype, i::SingleIndex) - da = @data([1.0, 2.0, NA, 4.0]) - da[1] = NA + # Base.setindex!(da::DataArray, val::nulltype, i::SingleIndex) + da = @data([1.0, 2.0, null, 4.0]) + da[1] = null # Base.setindex!(da::DataArray, val::Any, i::SingleIndex) - da = @data([1.0, 2.0, NA, 4.0]) + da = @data([1.0, 2.0, null, 4.0]) da[1] = 3.0 - # Base.setindex!(da::DataArray{NAtype}, val::NAtype, inds::AbstractVector{Bool}) - # da = DataArray([NA, NA], falses(2)) - # da[[true, false]] = NA + # Base.setindex!(da::DataArray{Null}, val::Null, inds::AbstractVector{Bool}) + # da = DataArray([null, null], falses(2)) + # da[[true, false]] = null - # Base.setindex!(da::DataArray{NAtype}, val::NAtype, inds::AbstractVector) - # da = DataArray([NA, NA], falses(2)) - # da[[1, 2]] = NA + # Base.setindex!(da::DataArray{Null}, val::Null, inds::AbstractVector) + # da = DataArray([null, null], falses(2)) + # da[[1, 2]] = null - # Base.setindex!(da::DataArray, val::NAtype, inds::AbstractVector{Bool}) + # Base.setindex!(da::DataArray, val::Null, inds::AbstractVector{Bool}) da = @data([1, 2]) - da[[true, false]] = NA + da[[true, false]] = null - # Base.setindex!(da::DataArray, val::NAtype, inds::AbstractVector) + # Base.setindex!(da::DataArray, val::Null, inds::AbstractVector) da = @data([1, 2]) - da[[1, 2]] = NA + da[[1, 2]] = null # Base.setindex!(da::AbstractDataArray, vals::AbstractVector, inds::AbstractVector{Bool}) da = @data([1, 2]) @@ -239,15 +237,15 @@ module TestDataArrays da = @data([1, 2]) da[[1, 2]] = 5 - # isna(a::AbstractArray) - isna.([1, 2]) - isna.(repeat([1, 2], outer = [1, 2])) - isna.(repeat([1, 2], outer = [1, 2, 2])) + # isnull(a::AbstractArray) + isnull.([1, 2]) + isnull.(repeat([1, 2], outer = [1, 2])) + isnull.(repeat([1, 2], outer = [1, 2, 2])) - # isna(da::DataArray) - isna.(DataArray([1, 2], falses(2))) - isna.(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - isna.(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + # isnull(da::DataArray) + isnull.(DataArray([1, 2], falses(2))) + isnull.(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + isnull.(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # Base.isnan(da::DataArray) isnan.(DataArray([1, 2], falses(2))) diff --git a/test/newtests/datamatrix.jl b/test/newtests/datamatrix.jl index 51bf78b..93696f5 100644 --- a/test/newtests/datamatrix.jl +++ b/test/newtests/datamatrix.jl @@ -3,114 +3,114 @@ module TestDataMatrixs using DataArrays # Base.getindex(d::DataMatrix, i::SingleIndex, j::SingleIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[1, 2] # Base.getindex(x::DataMatrix, i::SingleIndex, col_inds::AbstractDataVector{Bool}) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[1, @data([true, false])] # Base.getindex(x::DataMatrix, i::SingleIndex, col_inds::AbstractDataVector) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[1, @data([1, 2])] # Base.getindex(x::DataMatrix, i::SingleIndex, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[1, [1, 2]] # Base.getindex(x::DataMatrix, row_inds::AbstractDataVector{Bool}, j::SingleIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[@data([true, false]), 1] # Base.getindex(x::DataMatrix, row_inds::AbstractDataVector, j::SingleIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[@data([1, 2]), 1] # Base.getindex(x::DataMatrix, row_inds::MultiIndex, j::SingleIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[[1, 2], 1] # Base.getindex(x::DataMatrix, row_inds::AbstractDataVector{Bool}, col_inds::AbstractDataVector{Bool}) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[@data([true, false]), @data([true, false])] # Base.getindex(x::DataMatrix, row_inds::AbstractDataVector{Bool}, col_inds::AbstractDataVector) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[@data([true, false]), @data([1, 2])] # Base.getindex(x::DataMatrix, row_inds::AbstractDataVector{Bool}, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[@data([true, false]), [1, 2]] # Base.getindex(x::DataMatrix, row_inds::AbstractDataVector, col_inds::AbstractDataVector{Bool}) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[@data([1, 2]), @data([true, false])] # Base.getindex(x::DataMatrix, row_inds::AbstractDataVector, col_inds::AbstractDataVector) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[@data([1, 2]), @data([1, 2])] # Base.getindex(x::DataMatrix, row_inds::AbstractDataVector, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[@data([1, 2]), [1, 2]] # Base.getindex(x::DataMatrix, row_inds::MultiIndex, col_inds::AbstractDataVector{Bool}) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[[1, 2], @data([true, false])] # Base.getindex(x::DataMatrix, row_inds::MultiIndex, col_inds::AbstractDataVector) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[[1, 2], @data([1, 2])] # Base.getindex(x::DataMatrix, row_inds::MultiIndex, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[[1, 2], [1, 2]] - # Base.setindex!(dm::DataMatrix, val::NAtype, i::SingleIndex, j::SingleIndex) - dm = @data([1 2; NA 4]) - dm[1, 1] = NA + # Base.setindex!(dm::DataMatrix, val::Null, i::SingleIndex, j::SingleIndex) + dm = @data([1 2; null 4]) + dm[1, 1] = null # Base.setindex!(dm::DataMatrix, val::Any, i::SingleIndex, j::SingleIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[1, 1] = 3 - # Base.setindex!(dm::DataMatrix, val::NAtype, row_inds::MultiIndex, j::SingleIndex) - dm = @data([1 2; NA 4]) + # Base.setindex!(dm::DataMatrix, val::Null, row_inds::MultiIndex, j::SingleIndex) + dm = @data([1 2; null 4]) dm[[1, 2], 1] = 3 # Base.setindex!{S, T}(dm::DataMatrix{S}, vals::Vector{T}, row_inds::MultiIndex, j::SingleIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[[1, 2], 1] = [3, 4] # Base.setindex!(dm::DataMatrix, val::Any, row_inds::MultiIndex, j::SingleIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[[1, 2], 1] = 3 - # Base.setindex!(dm::DataMatrix, val::NAtype, i::SingleIndex, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) - dm[[1, 2], 1] = NA + # Base.setindex!(dm::DataMatrix, val::Null, i::SingleIndex, col_inds::MultiIndex) + dm = @data([1 2; null 4]) + dm[[1, 2], 1] = null # Base.setindex!{S, T}(dm::DataMatrix{S}, vals::Vector{T}, i::SingleIndex, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[1, [1, 2]] = [3, 4] # Base.setindex!(dm::DataMatrix, val::Any, i::SingleIndex, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[1, [1, 2]] = 3 - # Base.setindex!(dm::DataMatrix, val::NAtype, row_inds::MultiIndex, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) - dm[[1, 2], [1, 2]] = NA + # Base.setindex!(dm::DataMatrix, val::Null, row_inds::MultiIndex, col_inds::MultiIndex) + dm = @data([1 2; null 4]) + dm[[1, 2], [1, 2]] = null # Base.setindex!{S, T}(dm::DataMatrix{S}, vals::Vector{T}, row_inds::MultiIndex, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[[1, 2], [1, 2]] = [1, 2, 3, 4] # Base.setindex!(dm::DataMatrix, val::Any, row_inds::MultiIndex, col_inds::MultiIndex) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) dm[[1, 2], [1, 2]] = 5 # Base.diag{T}(dm::DataMatrix{T}) - dm = @data([1 2; NA 4]) + dm = @data([1 2; null 4]) diag(dm) end diff --git a/test/newtests/datavector.jl b/test/newtests/datavector.jl index 2bdf1b0..e5ecfdc 100644 --- a/test/newtests/datavector.jl +++ b/test/newtests/datavector.jl @@ -3,13 +3,13 @@ module TestDataVectors using DataArrays # Base.getindex(d::DataVector, i::SingleIndex, j::SingleIndex) - da = @data([1, 2, NA, 4]) + da = @data([1, 2, null, 4]) da[1, 1] da[2, 1] # da[1, 2] - # Base.push!{T}(dv::DataVector{T}, v::NAtype) - push!(da, NA) + # Base.push!{T}(dv::DataVector{T}, v::Null) + push!(da, null) # Base.push!{S, T}(dv::DataVector{S}, v::T) push!(da, 6) @@ -17,8 +17,8 @@ module TestDataVectors # Base.pop!(dv::DataVector) pop!(da) - # Base.unshift!{T}(dv::DataVector{T}, v::NAtype) - unshift!(da, NA) + # Base.unshift!{T}(dv::DataVector{T}, v::Null) + unshift!(da, null) # Base.unshift!{S, T}(dv::DataVector{S}, v::T) unshift!(da, -1) @@ -29,9 +29,9 @@ module TestDataVectors # Base.map(f::Function, dv::DataVector) map(sin, da) - # Base.push!{T,R}(pdv::PooledDataVector{T,R}, v::NAtype) - pda = @pdata([1, 2, NA, 4]) - push!(pda, NA) + # Base.push!{T,R}(pdv::PooledDataVector{T,R}, v::Null) + pda = @pdata([1, 2, null, 4]) + push!(pda, null) # Base.push!{S,R,T}(pdv::PooledDataVector{S,R}, v::T) push!(pda, 6) @@ -39,8 +39,8 @@ module TestDataVectors # Base.pop!(pdv::PooledDataVector) = pdv.pool[pop!(pdv.refs)] pop!(pda) - # Base.unshift!{T,R}(pdv::PooledDataVector{T,R}, v::NAtype) - unshift!(pda, NA) + # Base.unshift!{T,R}(pdv::PooledDataVector{T,R}, v::Null) + unshift!(pda, null) # Base.unshift!{S,R,T}(pdv::PooledDataVector{S,R}, v::T) unshift!(pda, 6) @@ -52,7 +52,7 @@ module TestDataVectors reverse(da) reverse(pda) - # padna(dv::AbstractDataVector, front::Integer, back::Integer) - padna(da, 5, 5) - padna(pda, 5, 5) + # padnull(dv::AbstractDataVector, front::Integer, back::Integer) + padnull(da, 5, 5) + padnull(pda, 5, 5) end diff --git a/test/operators.jl b/test/operators.jl index fa3329f..a98f9b8 100644 --- a/test/operators.jl +++ b/test/operators.jl @@ -48,41 +48,41 @@ end gamma, lgamma] - # All unary operators return NA when evaluating NA + # All unary operators return null when evaluating null for f in [+, -] - @test isna(f(NA)) + @test isnull(f(null)) end - # All elementary functions return NA when evaluating NA + # All elementary functions return null when evaluating null for f in elementary_functions - @test isna(f(NA)) + @test isnull(f(null)) end - # All comparison operators return NA when comparing NA with NA - # All comparison operators return NA when comparing scalars with NA - # All comparison operators return NA when comparing NA with scalars + # All comparison operators return null when comparing null with null + # All comparison operators return null when comparing scalars with null + # All comparison operators return null when comparing null with scalars for f in comparison_operators - @test isna(f(NA, NA)) - @test isna(f(NA, 1)) - @test isna(f(1, NA)) + @test isnull(f(null, null)) + @test isnull(f(null, 1)) + @test isnull(f(1, null)) end - # All arithmetic operators return NA when operating on two NA's - # All arithmetic operators return NA when operating on a scalar and an NA - # All arithmetic operators return NA when operating on an NA and a scalar + # All arithmetic operators return null when operating on two nulls + # All arithmetic operators return null when operating on a scalar and an null + # All arithmetic operators return null when operating on an null and a scalar for f in arithmetic_operators - @test isna(f(NA, NA)) - @test isna(f(1, NA)) - @test isna(f(NA, 1)) + @test isnull(f(null, null)) + @test isnull(f(1, null)) + @test isnull(f(null, 1)) end - # All bit operators return NA when operating on two NA's - # All bit operators return NA when operating on a scalar and an NA - # All bit operators return NA when operating on an NA and a scalar + # All bit operators return null when operating on two nulls + # All bit operators return null when operating on a scalar and an null + # All bit operators return null when operating on an null and a scalar for f in bit_operators - @test isna(f(NA, NA)) - @test isna(f(1, NA)) - @test isna(f(NA, 1)) + @test isnull(f(null, null)) + @test isnull(f(1, null)) + @test isnull(f(null, 1)) end # Unary operators on DataVector's should be equivalent to elementwise @@ -112,18 +112,18 @@ end # Test for both bits and non-bits types for da in (da, convert(DataArray{Number}, da)) let da = copy(da), dat = copy(dat) - # No NA + # No null @test isequal(da.', dat) @test isequal(da', conj(dat)) - # With NA + # With null # XXX we should fix indexing so that this isn't necessary for i = 1:length(da) - da[i] == 5 && (da[i] = NA) - dat[i] == 5 && (dat[i] = NA) + da[i] == 5 && (da[i] = null) + dat[i] == 5 && (dat[i] = null) end - # Make sure that NAs are undefined in the non-bits array + # Make sure that nulls are undefined in the non-bits array da = conj(conj(da)) @test isequal(da.', dat) @test isequal(da', conj(dat)) @@ -141,21 +141,21 @@ end end end - # Broadcasting operations between NA's and DataVector's + # Broadcasting operations between nulls and DataVector's dv = convert(DataArray, ones(5)) @test_da_pda dv begin for f in [+, *, Base.div, Base.mod, Base.fld, Base.rem] for i in 1:length(dv) - @test isna(f(dv, NA)[i]) - @test isna(f(NA, dv)[i]) + @test isnull(f(dv, null)[i]) + @test isnull(f(null, dv)[i]) @test f(dv, 1)[i] == f(dv[i], 1) @test f(1, dv)[i] == f(1, dv[i]) end end for f in arithmetic_operators for i in 1:length(dv) - @test isna(f.(dv, NA)[i]) - @test isna(f.(NA, dv)[i]) + @test isnull(f.(dv, null)[i]) + @test isnull(f.(null, dv)[i]) @test f.(dv, 1)[i] == f(dv[i], 1) @test f.(1, dv)[i] == f(1, dv[i]) end @@ -164,7 +164,7 @@ end @test_da_pda dv begin for i in 1:length(dv) - @test isna((dv / NA)[i]) + @test isnull((dv / null)[i]) @test (dv / 1)[i] == dv[i] / 1 end end @@ -180,15 +180,15 @@ end # Binary operations on (DataVector, Vector) or (Vector, DataVector) v = ones(5) dv = convert(DataArray, ones(5)) - dv[1] = NA + dv[1] = null bv = [true, false, false, true, true] bbv = BitArray([true, false, false, true, true]) bdv = @data [false, true, false, false, true] @test_da_pda dv begin for f in [+, -, *, ^] for i in 1:length(dv) - @test isna(f.(v, dv)[i]) && isna(dv[i]) || f.(v, dv)[i] == f(v[i], dv[i]) - @test isna(f.(dv, v)[i]) && isna(dv[i]) || f.(dv, v)[i] == f(dv[i], v[i]) + @test isnull(f.(v, dv)[i]) && isnull(dv[i]) || f.(v, dv)[i] == f(v[i], dv[i]) + @test isnull(f.(dv, v)[i]) && isnull(dv[i]) || f.(dv, v)[i] == f(dv[i], v[i]) end end for f in bit_operators @@ -205,16 +205,16 @@ end dv = convert(DataArray, ones(5)) # Dates are an example of type for which - and .- return a different type from its inputs dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) - dv[1] = dvd[1] = NA + dv[1] = dvd[1] = null @test_da_pda dv begin for f in [+, -, *, ^] for i in 1:length(dv) - @test isna(f.(dv, dv)[i]) && isna(dv[i]) || f.(dv, dv)[i] == f(dv[i], dv[i]) + @test isnull(f.(dv, dv)[i]) && isnull(dv[i]) || f.(dv, dv)[i] == f(dv[i], dv[i]) end end for f in [+,-] for i in 1:length(dv) - @test isna((f)(dv, dv)[i]) && isna(dv[i]) || (f)(dv, dv)[i] == (f)(dv[i], dv[i]) + @test isnull((f)(dv, dv)[i]) && isnull(dv[i]) || (f)(dv, dv)[i] == (f)(dv[i], dv[i]) end end for f in bit_operators @@ -223,15 +223,15 @@ end end end for i in 1:length(dvd) - @test isna((dvd - dvd)[i]) && isna(dvd[i]) || (dvd - dvd)[i] == dvd[i] - dvd[i] - @test isna((dvd .- dvd)[i]) && isna(dvd[i]) || (dvd .- dvd)[i] == dvd[i] - dvd[i] + @test isnull((dvd - dvd)[i]) && isnull(dvd[i]) || (dvd - dvd)[i] == dvd[i] - dvd[i] + @test isnull((dvd .- dvd)[i]) && isnull(dvd[i]) || (dvd .- dvd)[i] == dvd[i] - dvd[i] end end # + and - with UniformScaling mI = zeros(Int, 5, 5) + 5I for dm in (convert(DataArray, ones(5, 5)), convert(DataArray, trues(5, 5))) - dm[1] = NA + dm[1] = null @test_da_pda dm begin @test isequal(dm + 5I, dm + mI) @test isequal(5I + dm, mI + dm) @@ -245,11 +245,11 @@ end convert(DataVector{Int}, dv), convert(DataVector{Float32}, dv)) for i in 1:length(curdv) - @test isna((curdv./curdv)[i]) && isna(curdv[i]) || + @test isnull((curdv./curdv)[i]) && isnull(curdv[i]) || isequal((curdv./curdv)[i], (curdv[i]./curdv[i])) - @test isna((curdv./2)[i]) && isna(curdv[i]) || + @test isnull((curdv./2)[i]) && isnull(curdv[i]) || isequal((curdv./2)[i], (curdv[i]./2)) - @test isna((curdv/2)[i]) && isna(curdv[i]) || + @test isnull((curdv/2)[i]) && isnull(curdv[i]) || isequal((curdv/2)[i], (curdv[i]/2)) end end @@ -259,9 +259,9 @@ end for f in map(eval, DataArrays.unary_vector_operators) @test isequal(f(dv), f(dv.data)) end - dv[1] = NA + dv[1] = null for f in map(eval, DataArrays.unary_vector_operators) - @test isna(f(dv)) + @test isnull(f(dv)) end # Pairwise vector operators on DataVector's @@ -274,39 +274,39 @@ end @test isequal(f(dv), f(dv.data)) @test isequal(f(dvd), f(dvd.data)) end - dv = @data([NA, 269, 835.0, 448, 772]) - dvd = @data([NA, Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) + dv = @data([null, 269, 835.0, 448, 772]) + dvd = @data([null, Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) for f in pairwise_vector_operators v = f(dv) - @test isna(v[1]) + @test isnull(v[1]) @test isequal(v[2:4], f(dv.data)[2:4]) d = f(dvd) - @test isna(d[1]) + @test isnull(d[1]) @test isequal(d[2:3], f(dvd.data)[2:3]) end - dv = @data([911, NA, 835.0, 448, 772]) - dvd = @data([Base.Date("2000-01-01"), NA, Base.Date("2010-01-01"), Base.Date("2010-01-05")]) + dv = @data([911, null, 835.0, 448, 772]) + dvd = @data([Base.Date("2000-01-01"), null, Base.Date("2010-01-01"), Base.Date("2010-01-05")]) for f in pairwise_vector_operators v = f(dv) - @test isna(v[1]) - @test isna(v[2]) + @test isnull(v[1]) + @test isnull(v[2]) @test isequal(v[3:4], f(dv.data)[3:4]) d = f(dvd) - @test isna(d[1]) - @test isna(d[2]) + @test isnull(d[1]) + @test isnull(d[2]) @test isequal(d[3:3], f(dvd.data)[3:3]) end - dv = @data([911, 269, 835.0, 448, NA]) - dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05"), NA]) + dv = @data([911, 269, 835.0, 448, null]) + dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05"), null]) for f in pairwise_vector_operators v = f(dv) - @test isna(v[4]) + @test isnull(v[4]) @test isequal(v[1:3], f(dv.data)[1:3]) d = f(dvd) - @test isna(d[3]) + @test isnull(d[3]) @test isequal(d[1:2], f(dvd.data)[1:2]) end @@ -317,13 +317,13 @@ end @test f(dv)[i] == f(dv.data)[i] end end - dv[4] = NA + dv[4] = null for f in [Base.cumprod, Base.cumsum] for i in 1:3 @test f(dv)[i] == f(dv.data)[i] end for i in 4:5 - @test isna(f(dv)[i]) + @test isnull(f(dv)[i]) end end @@ -333,9 +333,9 @@ end @test f(dv, dv) == f(dv.data, dv.data) || (isnan(f(dv, dv)) && isnan(f(dv.data, dv.data))) end - dv[1] = NA + dv[1] = null for f in map(eval, DataArrays.binary_vector_operators) - @test isna(f(dv, dv)) + @test isnull(f(dv, dv)) end # Boolean operators on DataVector's @@ -356,14 +356,14 @@ end end dv = convert(DataArray, falses(5)) - dv[1] = NA + dv[1] = null @test_da_pda dv begin - @test isna(any(dv)) + @test isnull(any(dv)) @test all(dv) == false end dv = convert(DataArray, falses(5)) - dv[2] = NA + dv[2] = null dv[3] = true @test_da_pda dv begin @test any(dv) == true @@ -371,31 +371,31 @@ end end dv = convert(DataArray, falses(5)) - dv[2] = NA + dv[2] = null @test_da_pda dv begin - @test isna(any(dv)) + @test isnull(any(dv)) @test all(dv) == false end dv = convert(DataArray, falses(1)) - dv[1] = NA + dv[1] = null @test_da_pda dv begin - @test isna(any(dv)) - @test isna(all(dv)) + @test isnull(any(dv)) + @test isnull(all(dv)) end dv = convert(DataArray, trues(5)) - dv[1] = NA + dv[1] = null @test_da_pda dv begin @test any(dv) == true - @test isna(all(dv)) + @test isnull(all(dv)) end dv = convert(DataArray, trues(5)) - dv[2] = NA + dv[2] = null @test_da_pda dv begin @test any(dv) == true - @test isna(all(dv)) + @test isnull(all(dv)) end # @@ -403,13 +403,13 @@ end # v = [1, 2] - dv = @data([1, NA]) - alt_dv = @data([2, NA]) - pdv = convert(PooledDataArray, @data([1, NA])) - alt_pdv = convert(PooledDataArray, @data([2, NA])) + dv = @data([1, null]) + alt_dv = @data([2, null]) + pdv = convert(PooledDataArray, @data([1, null])) + alt_pdv = convert(PooledDataArray, @data([2, null])) - @test isna(NA == NA) - @test isna(NA != NA) + @test isnull(null == null) + @test isnull(null != null) function test_da_eq(v1::AbstractArray, v2::AbstractArray, out) for a in (v1, convert(DataArray, v1), convert(PooledDataArray, v1)) @@ -430,15 +430,15 @@ end end end - # Comparing two otherwise equal DataArray with NAs returns NA - test_da_eq(dv, dv, NA) - test_da_eq(dv, v, NA) - test_da_eq(dv, @data([NA, 1]), NA) - # Comparing two equal arrays with no NAs returns true + # Comparing two otherwise equal DataArray with nulls returns null + test_da_eq(dv, dv, null) + test_da_eq(dv, v, null) + test_da_eq(dv, @data([null, 1]), null) + # Comparing two equal arrays with no nulls returns true test_da_eq(v, v, true) - # Comparing two unequal arrays with no NAs returns false + # Comparing two unequal arrays with no nulls returns false test_da_eq(v, @data([1, 3]), false) - # Comparing two otherwise unequal arrays with NAs returns false + # Comparing two otherwise unequal arrays with nulls returns false test_da_eq(dv, alt_dv, false) # Comparing two arrays of unequal sizes returns false test_da_eq(dv, [1], false) @@ -449,20 +449,20 @@ end @test !isequal(dv, alt_dv) @test !isequal(pdv, alt_pdv) - @test isequal(@data([1, NA]) .== @data([1, NA]), @data([true, NA])) - @test isequal(@pdata([1, NA]) .== @pdata([1, NA]), @data([true, NA])) + @test isequal(@data([1, null]) .== @data([1, null]), @data([true, null])) + @test isequal(@pdata([1, null]) .== @pdata([1, null]), @data([true, null])) - @test all(isna.(NA .== convert(DataArray, ones(5)))) - @test all(isna, isna.(convert(DataArray, ones(5))) .== NA) - @test all(isna.(NA .== PooledDataArray(convert(DataArray, ones(5))))) - @test all(isna, isna.(convert(PooledDataArray, convert(DataArray, ones(5)))) .== NA) + @test all(isnull.(null .== convert(DataArray, ones(5)))) + @test all(isnull, isnull.(convert(DataArray, ones(5))) .== null) + @test all(isnull.(null .== PooledDataArray(convert(DataArray, ones(5))))) + @test all(isnull, isnull.(convert(PooledDataArray, convert(DataArray, ones(5)))) .== null) # Run length encoding dv = convert(DataArray, ones(5)) - dv[3] = NA + dv[3] = null v, l = DataArrays.rle(dv) - @test isequal(v, @data([1.0, NA, 1.0])) + @test isequal(v, @data([1.0, null, 1.0])) @test l == [2, 1, 2] rdv = DataArrays.inverse_rle(v, l) @@ -471,8 +471,8 @@ end # Issue #90 a = @data([false, true, false, true]) b = @data([false, false, true, true]) - a[:] = NA - b[:] = NA - @test all(isna, a .& b) - @test all(isna, a .| b) + a[:] = null + b[:] = null + @test all(isnull, a .& b) + @test all(isnull, a .| b) end diff --git a/test/padding.jl b/test/padding.jl index 449d335..da3482d 100644 --- a/test/padding.jl +++ b/test/padding.jl @@ -1,7 +1,7 @@ @testset "Padding" begin dv = @data ones(3) - @test isequal(dv, padna(dv, 0, 0)) - @test length(padna(dv, 2, 0)) == length(dv) + 2 - @test length(padna(dv, 0, 2)) == length(dv) + 2 - @test length(padna(dv, 2, 2)) == length(dv) + 4 + @test isequal(dv, padnull(dv, 0, 0)) + @test length(padnull(dv, 2, 0)) == length(dv) + 2 + @test length(padnull(dv, 0, 2)) == length(dv) + 2 + @test length(padnull(dv, 2, 2)) == length(dv) + 4 end diff --git a/test/pooleddataarray.jl b/test/pooleddataarray.jl index 1fbb2ee..689916a 100644 --- a/test/pooleddataarray.jl +++ b/test/pooleddataarray.jl @@ -1,16 +1,16 @@ @testset "PDA" begin - p = @pdata [9, 9, 8, NA, 1, 1] + p = @pdata [9, 9, 8, null, 1, 1] pcopy = copy(p) @test levels(p) == [1, 8, 9] @test levels(setlevels(p, ["a", "b", "c"])) == ["a", "b", "c"] - @test dropna(setlevels(p, (@data ["a", "b", NA]))) == ["b", "a", "a"] - @test dropna(setlevels(p, (@data ["a", "b", "a"]))) == ["a", "a", "b", "a", "a"] + @test collect(Nulls.skip(setlevels(p, (@data ["a", "b", null])))) == ["b", "a", "a"] + @test collect(Nulls.skip(setlevels(p, (@data ["a", "b", "a"])))) == ["a", "a", "b", "a", "a"] @test levels(setlevels(p, (@data ["a", "b", "a"]))) == ["a", "b"] @test levels(setlevels(p, Dict([(1, 111)]))) == [111, 8, 9] - @test levels(setlevels(p, Dict([(1, 111), (8, NA)]))) == [111, 9] + @test levels(setlevels(p, Dict([(1, 111), (8, null)]))) == [111, 9] @test levels(PooledDataArray(p, [9, 8, 1])) == [9, 8, 1] @test levels(PooledDataArray(p, [9, 8])) == [9, 8] - @test dropna(PooledDataArray(p, [9, 8])) == [9, 9, 8] + @test collect(Nulls.skip(PooledDataArray(p, [9, 8]))) == [9, 9, 8] @test levels(PooledDataArray(p, levels(p)[[3,2,1]])) == [9,8,1] v = collect(1:6) @test isequal(p, reorder(p)) @@ -19,27 +19,27 @@ @test levels(setlevels!(copy(p), [10,80,90])) == [10, 80, 90] @test levels(setlevels!(copy(p), [1,8,1])) == [1, 8] - @test levels(setlevels!(copy(p), (@data [1, 8, NA]))) == [1, 8] + @test levels(setlevels!(copy(p), (@data [1, 8, null]))) == [1, 8] @test levels(setlevels!(copy(p), [1,8,9, 10])) == [1, 8, 9, 10] @test levels(setlevels!(copy(p), Dict([(1, 111)]))) == [111, 8, 9] - @test levels(setlevels!(copy(p), Dict([(1, 111), (8, NA)]))) == [111, 9] + @test levels(setlevels!(copy(p), Dict([(1, 111), (8, null)]))) == [111, 9] # issue #201 @test levels(setlevels!(@pdata([1.0, 2.0]), [3,4])) == [3.0, 4.0] - y = @pdata [1, NA, -2, 1, NA, 4, NA] - @test isequal(unique(y), @pdata [1, NA, -2, 4]) - @test isequal(unique(reverse(y)), @data [NA, 4, 1, -2]) - @test isequal(unique(dropna(y)), @data [1, -2, 4]) - @test isequal(unique(reverse(dropna(y))), @data [4, 1, -2]) + y = @pdata [1, null, -2, 1, null, 4, null] + @test isequal(unique(y), @pdata [1, null, -2, 4]) + @test isequal(unique(reverse(y)), @data [null, 4, 1, -2]) + @test isequal(unique(Nulls.skip(y)), @data [1, -2, 4]) + @test isequal(unique(reverse(collect(Nulls.skip(y)))), @data [4, 1, -2]) - z = @pdata ["frank", NA, "gertrude", "frank", NA, "herbert", NA] - @test isequal(unique(z), @pdata ["frank", NA, "gertrude", "herbert"]) - @test isequal(unique(reverse(z)), @pdata [NA, "herbert", "frank", "gertrude"]) - @test isequal(unique(dropna(z)), @pdata ["frank", "gertrude", "herbert"]) - @test isequal(unique(reverse(dropna(z))), @pdata ["herbert", "frank", "gertrude"]) + z = @pdata ["frank", null, "gertrude", "frank", null, "herbert", null] + @test isequal(unique(z), @pdata ["frank", null, "gertrude", "herbert"]) + @test isequal(unique(reverse(z)), @pdata [null, "herbert", "frank", "gertrude"]) + @test isequal(unique(Nulls.skip(z)), @pdata ["frank", "gertrude", "herbert"]) + @test isequal(unique(reverse(collect(Nulls.skip(z)))), @pdata ["herbert", "frank", "gertrude"]) - # check case where only NA occurs in final position - @test isequal(unique(@pdata [1, 2, 1, NA]), @pdata [1, 2, NA]) + # check case where only null occurs in final position + @test isequal(unique(@pdata [1, 2, 1, null]), @pdata [1, 2, null]) pp = PooledDataArray(Any[]) @test length(pp) == 0 @@ -61,7 +61,7 @@ end pcopy = copy(p) - @test levels(append!(pcopy, @pdata [4, NA, 6, 5])) == [1, 8, 9, 4, 5, 6] + @test levels(append!(pcopy, @pdata [4, null, 6, 5])) == [1, 8, 9, 4, 5, 6] x = PooledDataArray([9, 9, 8]) y = PooledDataArray([1, 9, 3, 2, 2]) @@ -102,9 +102,9 @@ @test typeof(da) == DataArray{Float32,2} # permute - pda = @pdata([NA, "A", "B", "C", "A", "B"]) - @test isequal(Base.permute!!(copy(pda), [2, 5, 3, 6, 4, 1]), @pdata(["A", "A", "B", "B", "C", NA])) - @test isequal(Base.ipermute!!(copy(pda), [6, 1, 3, 5, 2, 4]), @pdata(["A", "A", "B", "B", "C", NA])) + pda = @pdata([null, "A", "B", "C", "A", "B"]) + @test isequal(Base.permute!!(copy(pda), [2, 5, 3, 6, 4, 1]), @pdata(["A", "A", "B", "B", "C", null])) + @test isequal(Base.ipermute!!(copy(pda), [6, 1, 3, 5, 2, 4]), @pdata(["A", "A", "B", "B", "C", null])) a1 = 1:200 a2 = 100:300 @@ -135,7 +135,7 @@ masks = [[1], [2], [3], [1, 3]] for mask in masks y = PooledDataArray(x) - y[mask] = NA + y[mask] = null @test isequal(sort(unique(y)), sort(DataArray(unique(y)))) end z = PooledDataArray([1, 2], [1, 2, 3]) diff --git a/test/reduce.jl b/test/reduce.jl index a30384b..98a7697 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -14,14 +14,14 @@ end ## extended test of sum - for skipna in (true, false) - @test sum(@data(Int8[]); skipna=skipna) === Int32(0) - @test sum(@data(Int[]); skipna=skipna) === 0 - @test sum(@data(Float64[]); skipna=skipna) === 0.0 + for skipnull in (true, false) + @test sum(@data(Int8[]); skipnull=skipnull) === Int32(0) + @test sum(@data(Int[]); skipnull=skipnull) === 0 + @test sum(@data(Float64[]); skipnull=skipnull) === 0.0 - @test sum(@data([Int8(3)]); skipna=skipna) === Int32(3) - @test sum(@data([3]); skipna=skipna) === 3 - @test sum(@data([3.0]); skipna=skipna) === 3.0 + @test sum(@data([Int8(3)]); skipnull=skipnull) === Int32(3) + @test sum(@data([3]); skipnull=skipnull) === 3 + @test sum(@data([3.0]); skipnull=skipnull) === 3.0 z = DataArray(reshape(1:16, (2,2,2,2))) fz = convert(DataArray{Float64}, z) @@ -31,49 +31,49 @@ end @test sum(bfz) == 136 end - @test sum(@data(Int[NA])) === NA - @test sum(@data(Int[NA]); skipna=true) === 0 - @test sum(@data(Int[NA, NA])) === NA - @test sum(@data(Int[NA, NA]); skipna=true) === 0 - @test sum(@data(Int[NA, NA, 1]); skipna=true) === 1 - @test sum(@data(Int[NA, NA, 1, 2]); skipna=true) === 3 - @test sum(@data(Int[NA, 1, NA, 1, 2]); skipna=true) === 4 + @test sum(@data(Int[null])) === null + @test sum(@data(Int[null]); skipnull=true) === 0 + @test sum(@data(Int[null, null])) === null + @test sum(@data(Int[null, null]); skipnull=true) === 0 + @test sum(@data(Int[null, null, 1]); skipnull=true) === 1 + @test sum(@data(Int[null, null, 1, 2]); skipnull=true) === 3 + @test sum(@data(Int[null, 1, null, 1, 2]); skipnull=true) === 4 z = DataArray(reshape(1:16, (2,2,2,2))) - z[6] = NA + z[6] = null fz = convert(DataArray{Float64}, z) bfz = convert(DataArray{BigFloat}, z) - @test isna(sum(z)) - @test isna(sum(fz)) - @test isna(sum(bfz)) - @test sum(z; skipna=true) === 130 - @test sum(fz; skipna=true) === 130.0 - @test sum(bfz; skipna=true) == 130 + @test isnull(sum(z)) + @test isnull(sum(fz)) + @test isnull(sum(bfz)) + @test sum(z; skipnull=true) === 130 + @test sum(fz; skipnull=true) === 130.0 + @test sum(bfz; skipnull=true) == 130 bs = DataArrays.sum_pairwise_blocksize(identity) for n in [bs-64, bs-1, bs, bs+1, bs+2, 2*bs-2:2*bs+3..., 4*bs-2:4*bs+3...] da = DataArray(randn(n)) s = sum(da.data) @test sum(da) ≈ s - @test sum(da; skipna=true) ≈ s + @test sum(da; skipnull=true) ≈ s da2 = copy(da) - da2[1:2:end] = NA - @test isna(sum(da2)) - @test sum(da2; skipna=true) ≈ sum(dropna(da2)) + da2[1:2:end] = null + @test isnull(sum(da2)) + @test sum(da2; skipnull=true) ≈ sum(Nulls.skip(da2)) da2 = convert(DataArray{BigFloat}, da2) - @test isna(sum(da2)) - @test sum(da2; skipna=true) ≈ sum(dropna(da2)) + @test isnull(sum(da2)) + @test sum(da2; skipnull=true) ≈ sum(Nulls.skip(da2)) da2 = copy(da) - da2[2:2:end] = NA - @test isna(sum(da2)) - @test sum(da2; skipna=true) ≈ sum(dropna(da2)) + da2[2:2:end] = null + @test isnull(sum(da2)) + @test sum(da2; skipnull=true) ≈ sum(Nulls.skip(da2)) da2 = convert(DataArray{BigFloat}, da2) - @test isna(sum(da2)) - @test sum(da2; skipna=true) ≈ sum(dropna(da2)) + @test isnull(sum(da2)) + @test sum(da2; skipnull=true) ≈ sum(Nulls.skip(da2)) end ## other reductions @@ -91,25 +91,25 @@ end for n in [0, 1, 2, 62, 63, 64, 65, 66] da = DataArray(randn(n)) @same_behavior fn(da) fn(da.data) - @same_behavior fn(da; skipna=true) fn(da.data) + @same_behavior fn(da; skipnull=true) fn(da.data) da2 = copy(da) - da2[1:2:end] = NA - n > 0 && @test isna(fn(da2)) - @same_behavior fn(da2; skipna=true) fn(dropna(da2)) + da2[1:2:end] = null + n > 0 && @test isnull(fn(da2)) + @same_behavior fn(da2; skipnull=true) fn(Nulls.skip(da2)) da2 = convert(DataArray{BigFloat}, da2) - n > 0 && @test isna(fn(da2)) - @same_behavior fn(da2; skipna=true) fn(dropna(da2)) + n > 0 && @test isnull(fn(da2)) + @same_behavior fn(da2; skipnull=true) fn(Nulls.skip(da2)) da2 = copy(da) - da2[2:2:end] = NA - n > 1 && @test isna(fn(da2)) - @same_behavior fn(da2; skipna=true) fn(dropna(da2)) + da2[2:2:end] = null + n > 1 && @test isnull(fn(da2)) + @same_behavior fn(da2; skipnull=true) fn(Nulls.skip(da2)) da2 = convert(DataArray{BigFloat}, da2) - n > 1 && @test isna(fn(da2)) - @same_behavior fn(da2; skipna=true) fn(dropna(da2)) + n > 1 && @test isnull(fn(da2)) + @same_behavior fn(da2; skipnull=true) fn(Nulls.skip(da2)) end end @@ -120,30 +120,30 @@ end s = mapreduce(identity, fn, da.data) @test mapreduce(identity, fn, da) == s - @test mapreduce(identity, fn, da; skipna=true) == s + @test mapreduce(identity, fn, da; skipnull=true) == s @test reduce(fn, da) == s - @test reduce(fn, da; skipna=true) == s + @test reduce(fn, da; skipnull=true) == s end # make sure reductions of & and | are still calling Base - @test isna(reduce(&, @data([true, NA]))) - @test !reduce(&, @data([false, NA])) - @test reduce(|, @data([true, NA])) - @test isna(reduce(|, @data([false, NA]))) + @test isnull(reduce(&, @data([true, null]))) + @test !reduce(&, @data([false, null])) + @test reduce(|, @data([true, null])) + @test isnull(reduce(|, @data([false, null]))) # weighted mean da1 = DataArray(randn(128)) da2 = DataArray(randn(128)) @same_behavior mean(da1, weights(da2)) mean(da1.data, weights(da2.data)) @same_behavior mean(da1, weights(da2.data)) mean(da1.data, weights(da2.data)) - @same_behavior mean(da1, weights(da2); skipna=true) mean(da1.data, weights(da2.data)) - @same_behavior mean(da1, weights(da2.data); skipna=true) mean(da1.data, weights(da2.data)) + @same_behavior mean(da1, weights(da2); skipnull=true) mean(da1.data, weights(da2.data)) + @same_behavior mean(da1, weights(da2.data); skipnull=true) mean(da1.data, weights(da2.data)) - da1[1:3:end] = NA - @same_behavior mean(da1, weights(da2); skipna=true) mean(dropna(da1), weights(da2.data[(!).(da1.na)])) - @same_behavior mean(da1, weights(da2.data); skipna=true) mean(dropna(da1), weights(da2.data[(!).(da1.na)])) + da1[1:3:end] = null + @same_behavior mean(da1, weights(da2); skipnull=true) mean(Nulls.skip(da1), weights(da2.data[(!).(da1.na)])) + @same_behavior mean(da1, weights(da2.data); skipnull=true) mean(Nulls.skip(da1), weights(da2.data[(!).(da1.na)])) - da2[1:2:end] = NA + da2[1:2:end] = null keep = .!da1.na .& .!da2.na - @same_behavior mean(da1, weights(da2); skipna=true) mean(da1.data[keep], weights(da2.data[keep])) + @same_behavior mean(da1, weights(da2); skipnull=true) mean(da1.data[keep], weights(da2.data[keep])) end diff --git a/test/reducedim.jl b/test/reducedim.jl index 6bee1d2..ec157d2 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -2,8 +2,8 @@ macro test_da_approx_eq(da1, da2) quote v1 = $(esc(da1)) v2 = $(esc(da2)) - na = isna.(v1) - @test na == isna.(v2) + na = isnull.(v1) + @test na == isnull.(v2) defined = (!).(na) if any(defined) @test isapprox(v1[defined], v2[defined], nans = true) @@ -38,13 +38,13 @@ end test_count() # mapslices from Base, hacked to work for these cases - function safe_mapslices{T}(f::Function, A::AbstractArray{T}, region, skipna) + function safe_mapslices{T}(f::Function, A::AbstractArray{T}, region, skipnull) dims = intersect(region, 1:ndims(A)) if isempty(dims) - if skipna - naval = f(T[], 1) + if skipnull + naval = f(Nulls.T(T)[], 1) A = copy(A) - A[isna.(A)] = isempty(naval) ? NA : naval[1] + A[isnull.(A)] = isempty(naval) ? null : naval[1] end return A end @@ -67,7 +67,7 @@ end idx[d] = 1:size(A,d) end - r1 = f(copy(reshape(A[idx...], Asliceshape)); skipna=skipna) + r1 = f(copy(reshape(A[idx...], Asliceshape)); skipnull=skipnull) # determine result size and allocate Rsize = copy(dimsA) @@ -98,11 +98,11 @@ end idx[otherdims] = ia ridx[otherdims] = ia try - R[ridx...] = f(copy(reshape(A[idx...], Asliceshape)); skipna=skipna) + R[ridx...] = f(copy(reshape(A[idx...], Asliceshape)); skipnull=skipnull) catch e if (isa(e, ErrorException) && e.msg == "Reducing over an empty array is not allowed.") || (isa(e, ArgumentError) && e.msg == "reducing over an empty collection is not allowed") - R[ridx...] = NA + R[ridx...] = null else println(typeof(e)) rethrow(e) @@ -114,65 +114,65 @@ end return R end - myvarzm(x; skipna::Bool=false) = var(x; mean=0, skipna=skipna) - myvar1m(x; skipna::Bool=false) = var(x; mean=1, skipna=skipna) + myvarzm(x; skipnull::Bool=false) = var(x; mean=0, skipnull=skipnull) + myvar1m(x; skipnull::Bool=false) = var(x; mean=1, skipnull=skipnull) for Areduc in (DataArray(rand(3, 4, 5, 6)), DataArray(rand(3, 4, 5, 6), rand(3, 4, 5, 6) .< 0.2)) - for skipna = (false, true) + for skipnull = (false, true) for region in Any[ 1, 2, 3, 4, 5, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4), (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)] - # println("region = $region, skipna = $skipna") + # println("region = $region, skipnull = $skipnull") outputs = Any[DataArray(fill(NaN, length.(Base.reduced_indices(indices(Areduc), region))))] - has_na = any(isna, Areduc) - if has_na && !skipna + hasnulls = any(isnull, Areduc) + if hasnulls && !skipnull # Should throw an error reducing to non-DataArray - @test_throws NAException sum!(outputs[1].data, Areduc; skipna=skipna) + @test_throws NullException sum!(outputs[1].data, Areduc; skipnull=skipnull) else # Should be able to reduce to non-DataArray push!(outputs, outputs[1].data) end for r in outputs - @test_da_approx_eq sum!(r, Areduc; skipna=skipna) safe_mapslices(sum, Areduc, region, skipna) - @test_da_approx_eq prod!(r, Areduc; skipna=skipna) safe_mapslices(prod, Areduc, region, skipna) - if !has_na - @test_da_approx_eq maximum!(r, Areduc; skipna=skipna) safe_mapslices(maximum, Areduc, region, skipna) - @test_da_approx_eq minimum!(r, Areduc; skipna=skipna) safe_mapslices(minimum, Areduc, region, skipna) + @test_da_approx_eq sum!(r, Areduc; skipnull=skipnull) safe_mapslices(sum, Areduc, region, skipnull) + @test_da_approx_eq prod!(r, Areduc; skipnull=skipnull) safe_mapslices(prod, Areduc, region, skipnull) + if !hasnulls + @test_da_approx_eq maximum!(r, Areduc; skipnull=skipnull) safe_mapslices(maximum, Areduc, region, skipnull) + @test_da_approx_eq minimum!(r, Areduc; skipnull=skipnull) safe_mapslices(minimum, Areduc, region, skipnull) end - @test_da_approx_eq Base.sumabs!(r, Areduc; skipna=skipna) safe_mapslices(sum, abs(Areduc), region, skipna) - @test_da_approx_eq Base.sumabs2!(r, Areduc; skipna=skipna) safe_mapslices(sum, abs2(Areduc), region, skipna) - @test_da_approx_eq mean!(r, Areduc; skipna=skipna) safe_mapslices(mean, Areduc, region, skipna) + @test_da_approx_eq Base.sumabs!(r, Areduc; skipnull=skipnull) safe_mapslices(sum, abs(Areduc), region, skipnull) + @test_da_approx_eq Base.sumabs2!(r, Areduc; skipnull=skipnull) safe_mapslices(sum, abs2(Areduc), region, skipnull) + @test_da_approx_eq mean!(r, Areduc; skipnull=skipnull) safe_mapslices(mean, Areduc, region, skipnull) end - @test_da_approx_eq sum(Areduc, region; skipna=skipna) safe_mapslices(sum, Areduc, region, skipna) - @test_da_approx_eq prod(Areduc, region; skipna=skipna) safe_mapslices(prod, Areduc, region, skipna) - @test_da_approx_eq maximum(Areduc, region; skipna=skipna) safe_mapslices(maximum, Areduc, region, skipna) - @test_da_approx_eq minimum(Areduc, region; skipna=skipna) safe_mapslices(minimum, Areduc, region, skipna) - @test_da_approx_eq Base.sumabs(Areduc, region; skipna=skipna) safe_mapslices(sum, abs(Areduc), region, skipna) - @test_da_approx_eq Base.sumabs2(Areduc, region; skipna=skipna) safe_mapslices(sum, abs2(Areduc), region, skipna) - @test_da_approx_eq mean(Areduc, region; skipna=skipna) safe_mapslices(mean, Areduc, region, skipna) + @test_da_approx_eq sum(Areduc, region; skipnull=skipnull) safe_mapslices(sum, Areduc, region, skipnull) + @test_da_approx_eq prod(Areduc, region; skipnull=skipnull) safe_mapslices(prod, Areduc, region, skipnull) + @test_da_approx_eq maximum(Areduc, region; skipnull=skipnull) safe_mapslices(maximum, Areduc, region, skipnull) + @test_da_approx_eq minimum(Areduc, region; skipnull=skipnull) safe_mapslices(minimum, Areduc, region, skipnull) + @test_da_approx_eq Base.sumabs(Areduc, region; skipnull=skipnull) safe_mapslices(sum, abs(Areduc), region, skipnull) + @test_da_approx_eq Base.sumabs2(Areduc, region; skipnull=skipnull) safe_mapslices(sum, abs2(Areduc), region, skipnull) + @test_da_approx_eq mean(Areduc, region; skipnull=skipnull) safe_mapslices(mean, Areduc, region, skipnull) if region != 5 - @test_da_approx_eq var(Areduc, region; skipna=skipna) safe_mapslices(var, Areduc, region, skipna) - @test_da_approx_eq var(Areduc, region; mean=0, skipna=skipna) safe_mapslices(myvarzm, Areduc, region, skipna) + @test_da_approx_eq var(Areduc, region; skipnull=skipnull) safe_mapslices(var, Areduc, region, skipnull) + @test_da_approx_eq var(Areduc, region; mean=0, skipnull=skipnull) safe_mapslices(myvarzm, Areduc, region, skipnull) for r in outputs - @test_da_approx_eq var(Areduc, region; mean=fill!(r, 1), skipna=skipna) safe_mapslices(myvar1m, Areduc, region, skipna) + @test_da_approx_eq var(Areduc, region; mean=fill!(r, 1), skipnull=skipnull) safe_mapslices(myvar1m, Areduc, region, skipnull) end end end end end - # Test NA-skipping behavior for maximum - a = @data([NA NA; 3 4]) - @test isequal(maximum(a, 1; skipna=true), [3 4]) - @test isequal(maximum!(zeros(1, 2), a; skipna=true), [3 4]) + # Test null-skipping behavior for maximum + a = @data([null null; 3 4]) + @test isequal(maximum(a, 1; skipnull=true), [3 4]) + @test isequal(maximum!(zeros(1, 2), a; skipnull=true), [3 4]) - # Maximum should give an NA in the output if all values along dimension are NA - @test isequal(maximum(a, 2; skipna=true), @data([NA 4])') + # Maximum should give an null in the output if all values along dimension are null + @test isequal(maximum(a, 2; skipnull=true), @data([null 4])') # Maximum should refuse to reduce to a non-DataArray - @test_throws NAException maximum!(zeros(2, 1), a; skipna=true) + @test_throws NullException maximum!(zeros(2, 1), a; skipnull=true) end diff --git a/test/runtests.jl b/test/runtests.jl index d9f3ef3..d39730e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,6 @@ using Base.Test using DataArrays -using DataArrays: Data my_tests = ["abstractarray.jl", "booleans.jl", diff --git a/test/sort.jl b/test/sort.jl index f078bb0..19f50fc 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -1,5 +1,5 @@ @testset "Sort" begin - dv1 = @data([9, 1, 8, NA, 3, 3, 7, NA]) + dv1 = @data([9, 1, 8, null, 3, 3, 7, null]) dv2 = 1.0 * dv1 dv3 = DataArray(collect(1:8)) pdv1 = convert(PooledDataArray, dv1) @@ -17,11 +17,11 @@ ra = randn(n-nna) a[.!na] = ra for da in (DataArray(a, na), PooledDataArray(a, na), (pda = PooledDataArray(a, na); setlevels!(pda, shuffle!(pda.pool)))) - @test isequal(sort(da), [DataArray(sort(dropna(da))); DataArray(T, nna)]) - @test isequal(sort(da; lt=(x,y)->isless(x,y)), [DataArray(sort(dropna(da))); DataArray(T, nna)]) - @test isequal(da[sortperm(da)], [DataArray(sort(dropna(da))); DataArray(T, nna)]) - @test isequal(sort(da, rev=true), [DataArray(T, nna); DataArray(sort(dropna(da), rev=true))]) - @test isequal(da[sortperm(da, rev=true)], [DataArray(T, nna); DataArray(sort(dropna(da), rev=true))]) + @test isequal(sort(da), [DataArray(sort!(collect(Nulls.skip(da)))); DataArray(T, nna)]) + @test isequal(sort(da; lt=(x,y)->isless(x,y)), [DataArray(sort!(collect(Nulls.skip(da)))); DataArray(T, nna)]) + @test isequal(da[sortperm(da)], [DataArray(sort!(collect(Nulls.skip(da)))); DataArray(T, nna)]) + @test isequal(sort(da, rev=true), [DataArray(T, nna); DataArray(sort!(collect(Nulls.skip(da)), rev=true))]) + @test isequal(da[sortperm(da, rev=true)], [DataArray(T, nna); DataArray(sort!(collect(Nulls.skip(da)), rev=true))]) end end end diff --git a/test/statistics.jl b/test/statistics.jl index 61e3ef9..8880cb9 100644 --- a/test/statistics.jl +++ b/test/statistics.jl @@ -18,7 +18,7 @@ end Number Missing: 0 % Missing: 0.000000 """ - describe(io, @data([1, NA])) + describe(io, @data([1, null])) @test String(take!(io)) == """ Summary Stats: Mean: 1.000000 @@ -41,7 +41,7 @@ end Number Missing: 0 % Missing: 0.000000 """ - describe(io, @data(["s", NA])) + describe(io, @data(["s", null])) @test String(take!(io)) == """ Summary Stats: Length: 2