diff --git a/src/DataArrays.jl b/src/DataArrays.jl index 69d6687..73298c1 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -46,6 +46,7 @@ module DataArrays PooledDataVector, reldiff, reorder, + reorder!, rep, replace!, setlevels!, diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index fe86867..61a5c69 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -433,9 +433,70 @@ function setlevels!{T,R}(x::PooledDataArray{T,R}, d::Dict{T,Any}) # this version setlevels!(x, newpool) end +############################################################################## +## +## reorder() +## +############################################################################## + +""" + `reorder(pda,newpool)` reorders the current pool and references related to that pool using alphabetical order of the `newpool`. + + Input: + - `pda` reference object to be used to contruct a new one + - `newpool` to replace the current one + + Output: + A new PooledDataObject object +""" reorder(x::PooledDataArray) = PooledDataArray(x, sort(levels(x))) # just re-sort the pool -reorder(x::PooledDataArray, y::AbstractVector...) = reorder(mean, x, y...) +""" + `reorder(pda,newpool)` reorders the current pool and references related to that pool. A new pool should be a subset of the + old one(see `inclusioncheck` argument). If you want to change pool identifiers, use `setlevels` first, before using `reorder`. + + Input: + - `pda` reference object to be used to contruct a new one + - `newpool` to replace the current one + - `inclusioncheck` (default true) checks whether `newpoll` ⊆ `pda.pool` + + Output: + A new PooledDataObject object +""" +reorder(pda::PooledDataArray, newpool::AbstractVector, inclusioncheck=true) = begin + inclusioncheck && !issubset(newpool, pda.pool) && throw(ArgumentError("A new pool must be a subset of the current one.")) + + PooledDataArray(pda, newpool) +end + +""" + `reorder!(pda,newpool)` reorders the current pool and references related to that pool. A new pool should be a subset of the + old one(see `inclusioncheck` argument). If you want to change pool identifiers, use `setlevels` first, before using `reorder!`. + + Input: + - `pda` PooledDataArray to be changed + - `newpool` to replace the current one + - `inclusioncheck` (default true) checks whether `newpoll` ⊆ `pda.pool` + + Output: + Current `pda` object +""" +reorder!{T,R<:Integer,N}(pda::PooledDataArray{T,R,N}, newpool::Vector{T}, inclusioncheck=true) = begin + inclusioncheck && !issubset(newpool, pda.pool) && throw(ArgumentError("A new pool must be a subset of the current one.")) + + tidx::Array{R} = findat(newpool, pda.pool) + oldrefs = pda.refs + for i in 1:length(oldrefs) + if oldrefs[i] != 0 + oldrefs[i] = tidx[oldrefs[i]] + end + end + pda.pool = newpool + return pda +end + +# commented due to #167 issue +#reorder(x::PooledDataArray, y::AbstractVector...) = reorder(mean, x, y...) ### FIXME: this can't work because we don't know about DataFrames # reorder(fun::Function, x::PooledDataArray, y::AbstractVector...) = diff --git a/test/pooleddataarray.jl b/test/pooleddataarray.jl index f747b76..6f48572 100644 --- a/test/pooleddataarray.jl +++ b/test/pooleddataarray.jl @@ -51,6 +51,7 @@ module TestPDA pim = @pdata [1 + im, 2 + im, 3 + im, 2 + im, 1 + im] @assert levels(pim) == [1 + im, 2 + im, 3 + im] + # Test explicitly setting refs type testarray = [1, 1, 2, 2, 0, 0, 3, 3] testdata = @data [1, 1, 2, 2, 0, 0, 3, 3] @@ -107,4 +108,26 @@ module TestPDA pda = @pdata([NA, "A", "B", "C", "A", "B"]) @test isequal(Base.permute!!(copy(pda), [2, 5, 3, 6, 4, 1]), @pdata(["A", "A", "B", "B", "C", NA])) @test isequal(Base.ipermute!!(copy(pda), [6, 1, 3, 5, 2, 4]), @pdata(["A", "A", "B", "B", "C", NA])) + + #1. reordering levels + pda = @pdata(["high" , "medium" , "low" , "high" , NA, "medium"]) + #1.1 positive scenarios + @test isequal(pda.pool, Vector{eltype(pda.pool)}(["high", "low", "medium"])) #alphabetically + @test isequal(pda.refs, Vector{eltype(pda.refs)}([1,3,2,1,0,3])) #high is 1, medium is 3, low is 1 according to alphabetical order + + reorder!(pda, ["low","medium","high"]) #reorder according to e.g. visual plot needs + @test isequal(pda.pool, Vector{eltype(pda.pool)}(["low", "medium", "high"])) #semantic order + @test isequal(pda.refs, Vector{eltype(pda.refs)}([3,2,1,3,0,2])) + + reorder!(pda, ["low","medium"]) + @test isequal(pda.pool, Vector{eltype(pda.pool)}(["low", "medium"])) #semantic order + @test isequal(pda.refs, Vector{eltype(pda.refs)}([0,2,1,0,0,2])) + + newpda = reorder(pda, ["low"]) + @test newpda !== pda + #1.2 negative scenarios + pda = @pdata(["high" , "medium" , "low" , "high" , NA, "medium"]) + @test_throws ArgumentError reorder(pda, ["very low","very high"]) #new levels must be a subset of the original one + reorder!(pda, ["new low","new medium"], false) #don't check inclusion and change level names + @test isequal(pda.refs, Vector{eltype(pda.refs)}([0,0,0,0,0,0])) #we have a mess, it's not reordering end