Skip to content

Commit fe549ee

Browse files
committed
add zscore functions. Close JuliaLang#75.
1 parent 08437dd commit fe549ee

File tree

3 files changed

+109
-0
lines changed

3 files changed

+109
-0
lines changed

src/StatsBase.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ module StatsBase
4747
mode, # find a mode from data (the first one)
4848
modes, # find all modes from data
4949

50+
zscore, # compute Z-scores
51+
zscore!, # compute Z-scores inplace or to a pre-allocated array
52+
5053
percentile, # quantile using percentage (instead of fraction) as argument
5154
nquantile, # quantiles at [0:n]/n
5255

src/scalarstats.jl

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,89 @@ mad{T<:Real}(v::Range{T}) = mad!([v])
191191
iqr{T<:Real}(v::AbstractArray{T}) = (q = quantile(v, [.25, .75]); q[2] - q[1])
192192

193193

194+
#############################
195+
#
196+
# Z-scores
197+
#
198+
#############################
199+
200+
function _zscore!(Z::AbstractArray, X::AbstractArray, μ::Real, σ::Real)
201+
# Z and X are assumed to have the same size
202+
= inv(σ)
203+
if μ == zero(μ)
204+
for i = 1 : length(X)
205+
@inbounds Z[i] = X[i] *
206+
end
207+
else
208+
for i = 1 : length(X)
209+
@inbounds Z[i] = (X[i] - μ) *
210+
end
211+
end
212+
return Z
213+
end
214+
215+
@ngenerate N typeof(Z) function _zscore!{S,T,N}(Z::AbstractArray{S,N}, X::AbstractArray{T,N}, μ::AbstractArray, σ::AbstractArray)
216+
# Z and X are assumed to have the same size
217+
# μ and σ are assumed to have the same size, that is compatible with size(X)
218+
siz1 = size(X, 1)
219+
@nextract N ud d->size(μ, d)
220+
if size(μ, 1) == 1 && siz1 > 1
221+
@nloops N i d->(d>1 ? (1:size(X,d)) : (1:1)) d->(j_d = ud_d ==1 ? 1 : i_d) begin
222+
v = (@nref N μ j)
223+
c = inv(@nref N σ j)
224+
for i_1 = 1:siz1
225+
(@nref N Z i) = ((@nref N X i) - v) * c
226+
end
227+
end
228+
else
229+
@nloops N i X d->(j_d = ud_d ==1 ? 1 : i_d) begin
230+
(@nref N Z i) = ((@nref N X i) - (@nref N μ j)) / (@nref N σ j)
231+
end
232+
end
233+
return Z
234+
end
235+
236+
function _zscore_chksize(X::AbstractArray, μ::AbstractArray, σ::AbstractArray)
237+
size(μ) == size(σ) || throw(DimensionMismatch("μ and σ should have the same size."))
238+
for i=1:ndims(X)
239+
dμ_i = size(μ,i)
240+
(dμ_i == 1 || dμ_i == size(X,i)) || throw(DimensionMismatch("X and μ have incompatible sizes."))
241+
end
242+
end
243+
244+
function zscore!{ZT<:FloatingPoint,T<:Real}(Z::AbstractArray{ZT}, X::AbstractArray{T}, μ::Real, σ::Real)
245+
size(Z) == size(X) || throw(DimensionMismatch("Z and X must have the same size."))
246+
_zscore!(Z, X, μ, σ)
247+
end
248+
249+
function zscore!{ZT<:FloatingPoint,T<:Real,U<:Real,S<:Real}(Z::AbstractArray{ZT}, X::AbstractArray{T},
250+
μ::AbstractArray{U}, σ::AbstractArray{S})
251+
size(Z) == size(X) || throw(DimensionMismatch("Z and X must have the same size."))
252+
_zscore_chksize(X, μ, σ)
253+
_zscore!(Z, X, μ, σ)
254+
end
255+
256+
zscore!{T<:FloatingPoint}(X::AbstractArray{T}, μ::Real, σ::Real) = _zscore!(X, X, μ, σ)
257+
258+
zscore!{T<:FloatingPoint,U<:Real,S<:Real}(X::AbstractArray{T}, μ::AbstractArray{U}, σ::AbstractArray{S}) =
259+
(_zscore_chksize(X, μ, σ); _zscore!(X, X, μ, σ))
260+
261+
function zscore{T<:Real}(X::AbstractArray{T}, μ::Real, σ::Real)
262+
ZT = typeof((zero(T) - zero(μ)) / one(σ))
263+
_zscore!(Array(ZT, size(X)), X, μ, σ)
264+
end
265+
266+
function zscore{T<:Real,U<:Real,S<:Real}(X::AbstractArray{T}, μ::AbstractArray{U}, σ::AbstractArray{S})
267+
_zscore_chksize(X, μ, σ)
268+
ZT = typeof((zero(T) - zero(U)) / one(S))
269+
_zscore!(Array(ZT, size(X)), X, μ, σ)
270+
end
271+
272+
zscore{T<:Real}(X::AbstractArray{T}) = ((μ, σ) = mean_and_std(X); zscore(X, μ, σ))
273+
zscore{T<:Real}(X::AbstractArray{T}, dim::Int) = ((μ, σ) = mean_and_std(X, dim); zscore(X, μ, σ))
274+
275+
276+
194277
#############################
195278
#
196279
# entropy and friends

test/scalarstats.jl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,29 @@ using Base.Test
3333
@test modes([1, 2, 3, 3, 2, 2, 1]) == [2]
3434
@test sort(modes([1, 3, 2, 3, 3, 2, 2, 1])) == [2, 3]
3535

36+
## zscores
37+
38+
@test zscore([-3:3], 1.5, 0.5) == [-9.0:2.0:3.0]
39+
40+
a = [3 4 5 6; 7 8 1 2; 6 9 3 0]
41+
z1 = [4. 6. 8. 10.; 5. 6. -1. 0.; 1.5 3.0 0.0 -1.5]
42+
z2 = [8. 2. 3. 1.; 24. 10. -1. -1.; 20. 12. 1. -2.]
43+
44+
@test_approx_eq zscore(a, [1, 2, 3], [0.5, 1.0, 2.0]) z1
45+
@test_approx_eq zscore(a, [1 3 2 4], [0.25 0.5 1.0 2.0]) z2
46+
47+
@test zscore!(float64([-3:3]), 1.5, 0.5) == [-9.0:2.0:3.0]
48+
@test_approx_eq zscore!(float64(a), [1, 2, 3], [0.5, 1.0, 2.0]) z1
49+
@test_approx_eq zscore!(float64(a), [1 3 2 4], [0.25 0.5 1.0 2.0]) z2
50+
51+
@test zscore!(zeros(7), [-3:3], 1.5, 0.5) == [-9.0:2.0:3.0]
52+
@test_approx_eq zscore!(zeros(size(a)), a, [1, 2, 3], [0.5, 1.0, 2.0]) z1
53+
@test_approx_eq zscore!(zeros(size(a)), a, [1 3 2 4], [0.25 0.5 1.0 2.0]) z2
54+
55+
@test_approx_eq zscore(a) zscore(a, mean(a), std(a))
56+
@test_approx_eq zscore(a, 1) zscore(a, mean(a,1), std(a,1))
57+
@test_approx_eq zscore(a, 2) zscore(a, mean(a,2), std(a,2))
58+
3659

3760
###### quantile & friends
3861

0 commit comments

Comments
 (0)