Skip to content

Expand the testing coverage using R as the reference #414

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Feb 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
82310a9
introduce a new R script to generate reference testing data
lindahua Sep 6, 2015
7f89d39
add rdistributions.R, which uses S4 classes to represent distributions
lindahua Sep 6, 2015
b8eef5d
use S4 objects in gender.R script
lindahua Sep 6, 2015
2243143
Use R6 class for more elegant expression of distribution classes
lindahua Sep 6, 2015
d504fd9
minor correction
lindahua Sep 6, 2015
f65289e
initialize() method uses '<-' instead of '='
lindahua Sep 6, 2015
842074d
output quads & separate parseDistrs.R
lindahua Sep 6, 2015
4225eb4
move R scripts to test/ref
lindahua Sep 9, 2015
c5b45b2
move testing list of distributions to test/ref
lindahua Sep 9, 2015
6615fbd
add the R-tests for Beta, Cauchy, Chisq, and Exponential
lindahua Sep 9, 2015
9cdc434
add more continuous distributions to R-tests
lindahua Sep 10, 2015
003afce
Merge branch 'master' into dh/rtest
lindahua Jan 26, 2017
0d23dde
Adopt new test items to ref from latest master
lindahua Jan 26, 2017
f07d2ea
refactor R testing facilities
lindahua Jan 26, 2017
d42353b
New test framework is working with a subset of test cases
lindahua Jan 26, 2017
e666914
support Arcsine distribution in R tests
lindahua Jan 26, 2017
a5adccc
All discrete distribution test cases passed R tests
lindahua Feb 1, 2017
fae3cbe
add a number of continuous distributions to R tests
lindahua Feb 1, 2017
6e22b76
Add BetaBinomial in R tests
lindahua Feb 1, 2017
3dc30bc
add cases from continuous.jl to R test list
lindahua Feb 1, 2017
6c6bf6e
Frechet & Noncentral{Beta|Chisq|F|T} passed R tests
lindahua Feb 1, 2017
9679069
Add Cosine, GeneralizedPareto, and Levy distributions to R tests
lindahua Feb 1, 2017
7a89aac
Divide R implementations into multiple files
lindahua Feb 2, 2017
9642425
refactor distribution constructors, remove parseDistrs.R
lindahua Feb 2, 2017
4d378a6
Add readme.md for R references
lindahua Feb 2, 2017
aea9975
add the reference entry instruction to ref/readme.md
lindahua Feb 2, 2017
6ffc581
Add VonMises to R tests
lindahua Feb 2, 2017
344bb8b
remove duplicated test cases in test/continuous.jl
lindahua Feb 2, 2017
ece3891
Remove obsoleted Python references
lindahua Feb 2, 2017
47e14cd
Incorporate generalized extreme value distribution to R tests
lindahua Feb 2, 2017
1a61989
add noncentral hypergeometric to R tests
lindahua Feb 2, 2017
055b065
Add R tests for NormalInverseGaussian
lindahua Feb 2, 2017
bad311d
Minor changes to preserve distribution elem type in properties
lindahua Feb 3, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
doc/build
.Rhistory
50 changes: 27 additions & 23 deletions src/testutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ end

# testing the implementation of a discrete univariate distribution
#
function test_distr(distr::DiscreteUnivariateDistribution, n::Int)
function test_distr(distr::DiscreteUnivariateDistribution, n::Int; testquan::Bool=true)

test_range(distr)
vs = get_evalsamples(distr, 0.00001)

test_support(distr, vs)
test_evaluation(distr, vs)
test_evaluation(distr, vs, testquan)
test_range_evaluation(distr)

test_stats(distr, vs)
Expand All @@ -41,13 +41,13 @@ end

# testing the implementation of a continuous univariate distribution
#
function test_distr(distr::ContinuousUnivariateDistribution, n::Int)
function test_distr(distr::ContinuousUnivariateDistribution, n::Int; testquan::Bool=true)

test_range(distr)
vs = get_evalsamples(distr, 0.01, 2000)

test_support(distr, vs)
test_evaluation(distr, vs)
test_evaluation(distr, vs, testquan)

xs = test_samples(distr, n)
allow_test_stats(distr) && test_stats(distr, xs)
Expand Down Expand Up @@ -326,7 +326,7 @@ function test_range_evaluation(d::DiscreteUnivariateDistribution)
end


function test_evaluation(d::DiscreteUnivariateDistribution, vs::AbstractVector)
function test_evaluation(d::DiscreteUnivariateDistribution, vs::AbstractVector, testquan::Bool=true)
nv = length(vs)
p = Vector{Float64}(nv)
c = Vector{Float64}(nv)
Expand Down Expand Up @@ -354,13 +354,15 @@ function test_evaluation(d::DiscreteUnivariateDistribution, vs::AbstractVector)
@test isapprox(lc[i] , log(c[i]) , atol=1.0e-12)
@test isapprox(lcc[i] , log(cc[i]), atol=1.0e-12)

ep = 1.0e-8
if p[i] > 2 * ep # ensure p[i] is large enough to guarantee a reliable result
@test quantile(d, c[i] - ep) == v
@test cquantile(d, cc[i] + ep) == v
@test invlogcdf(d, lc[i] - ep) == v
if 0.0 < c[i] < 1.0
@test invlogccdf(d, lcc[i] + ep) == v
if testquan
ep = 1.0e-8
if p[i] > 2 * ep # ensure p[i] is large enough to guarantee a reliable result
@test quantile(d, c[i] - ep) == v
@test cquantile(d, cc[i] + ep) == v
@test invlogcdf(d, lc[i] - ep) == v
if 0.0 < c[i] < 1.0
@test invlogccdf(d, lcc[i] + ep) == v
end
end
end
end
Expand All @@ -376,7 +378,7 @@ function test_evaluation(d::DiscreteUnivariateDistribution, vs::AbstractVector)
end


function test_evaluation(d::ContinuousUnivariateDistribution, vs::AbstractVector)
function test_evaluation(d::ContinuousUnivariateDistribution, vs::AbstractVector, testquan::Bool=true)
nv = length(vs)
p = Vector{Float64}(nv)
c = Vector{Float64}(nv)
Expand All @@ -401,14 +403,16 @@ function test_evaluation(d::ContinuousUnivariateDistribution, vs::AbstractVector
@test isapprox(lc[i] , log(c[i]) , atol=1.0e-12)
@test isapprox(lcc[i] , log(cc[i]), atol=1.0e-12)

# TODO: remove this line when we have more accurate implementation
# of quantile for InverseGaussian
qtol = isa(d, InverseGaussian) ? 1.0e-4 : 1.0e-10
if p[i] > 1.0e-6
@test isapprox(quantile(d, c[i]) , v, atol=qtol * (abs(v) + 1.0))
@test isapprox(cquantile(d, cc[i]) , v, atol=qtol * (abs(v) + 1.0))
@test isapprox(invlogcdf(d, lc[i]) , v, atol=qtol * (abs(v) + 1.0))
@test isapprox(invlogccdf(d, lcc[i]), v, atol=qtol * (abs(v) + 1.0))
if testquan
# TODO: remove this line when we have more accurate implementation
# of quantile for InverseGaussian
qtol = isa(d, InverseGaussian) ? 1.0e-4 : 1.0e-10
if p[i] > 1.0e-6
@test isapprox(quantile(d, c[i]) , v, atol=qtol * (abs(v) + 1.0))
@test isapprox(cquantile(d, cc[i]) , v, atol=qtol * (abs(v) + 1.0))
@test isapprox(invlogcdf(d, lc[i]) , v, atol=qtol * (abs(v) + 1.0))
@test isapprox(invlogccdf(d, lcc[i]), v, atol=qtol * (abs(v) + 1.0))
end
end
end

Expand Down Expand Up @@ -451,10 +455,10 @@ function test_stats(d::DiscreteUnivariateDistribution, vs::AbstractVector)
@test isapprox(var(d) , xvar , atol=1.0e-8)
@test isapprox(std(d) , xstd , atol=1.0e-8)

if isfinite(skewness(d))
if applicable(skewness, d) && isfinite(skewness(d))
@test isapprox(skewness(d), xskew , atol=1.0e-8)
end
if isfinite(kurtosis(d))
if applicable(kurtosis, d) && isfinite(kurtosis(d))
@test isapprox(kurtosis(d), xkurt , atol=1.0e-8)
end
if applicable(entropy, d)
Expand Down
2 changes: 1 addition & 1 deletion src/univariate/continuous/betaprime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ relation ship that if $X \sim \operatorname{Beta}(\alpha, \beta)$ then $\frac{X}
\sim \operatorname{BetaPrime}(\alpha, \beta)$

```julia
BetaPrime() # equivalent to BetaPrime(0, 1)
BetaPrime() # equivalent to BetaPrime(1, 1)
BetaPrime(a) # equivalent to BetaPrime(a, a)
BetaPrime(a, b) # Beta prime distribution with shape parameters a and b

Expand Down
2 changes: 1 addition & 1 deletion src/univariate/continuous/cosine.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ var{T<:Real}(d::Cosine{T}) = d.σ^2 * (1//3 - 2/T(π)^2)

skewness{T<:Real}(d::Cosine{T}) = zero(T)

kurtosis{T<:Real}(d::Cosine{T}) = 6*(90-T(pi))/(5*(T(π)^2-6)^2)
kurtosis{T<:Real}(d::Cosine{T}) = 6*(90-T(π)^4) / (5*(T(π)^2-6)^2)


#### Evaluation
Expand Down
4 changes: 2 additions & 2 deletions src/univariate/continuous/erlang.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ end

Erlang{T<:Real}(α::Int, θ::T) = Erlang{T}(α, θ)
Erlang(α::Int, θ::Integer) = Erlang{Float64}(α, Float64(θ))
Erlang(α::Real) = Erlang(α, 1.0)
Erlang() = Erlang(1.0, 1.0)
Erlang(α::Int) = Erlang(α, 1.0)
Erlang() = Erlang(1, 1.0)

@distr_support Erlang 0.0 Inf

Expand Down
8 changes: 4 additions & 4 deletions src/univariate/continuous/generalizedextremevalue.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ $x \in \begin{cases}
\end{cases}$

```julia
GeneralizedExtremeValue(k, s, m) # Generalized Pareto distribution with shape k, scale s and location m.
GeneralizedExtremeValue(m, s, k) # Generalized Pareto distribution with shape k, scale s and location m.

params(d) # Get the parameters, i.e. (k, s, m)
shape(d) # Get the shape parameter, i.e. k (sometimes called c)
scale(d) # Get the scale parameter, i.e. s
params(d) # Get the parameters, i.e. (m, s, k)
location(d) # Get the location parameter, i.e. m
scale(d) # Get the scale parameter, i.e. s
shape(d) # Get the shape parameter, i.e. k (sometimes called c)
```

External links
Expand Down
16 changes: 8 additions & 8 deletions src/univariate/continuous/generalizedpareto.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
doc"""
GeneralizedPareto(ξ, σ, μ)
GeneralizedPareto(μ, σ, ξ)

The *Generalized Pareto distribution* with shape parameter `ξ`, scale `σ` and location `μ` has probability density function

$f(x; \xi, \sigma, \mu) = \begin{cases}
$f(x; \mu, \sigma, \xi) = \begin{cases}
\frac{1}{\sigma}(1 + \xi \frac{x - \mu}{\sigma} )^{-\frac{1}{\xi} - 1} & \text{for } \xi \neq 0 \\
\frac{1}{\sigma} e^{-\frac{\left( x - \mu \right) }{\sigma}} & \text{for } \xi = 0
\end{cases}~,
Expand All @@ -14,14 +14,14 @@ $f(x; \xi, \sigma, \mu) = \begin{cases}


```julia
GeneralizedPareto() # Generalized Pareto distribution with unit shape and unit scale, i.e. GeneralizedPareto(1, 1, 0)
GeneralizedPareto(k, s) # Generalized Pareto distribution with shape k and scale s, i.e. GeneralizedPareto(k, s, 0)
GeneralizedPareto(k, s, m) # Generalized Pareto distribution with shape k, scale s and location m.
GeneralizedPareto() # Generalized Pareto distribution with unit shape and unit scale, i.e. GeneralizedPareto(0, 1, 1)
GeneralizedPareto(k, s) # Generalized Pareto distribution with shape k and scale s, i.e. GeneralizedPareto(0, k, s)
GeneralizedPareto(m, k, s) # Generalized Pareto distribution with shape k, scale s and location m.

params(d) # Get the parameters, i.e. (k, s, m)
shape(d) # Get the shape parameter, i.e. k
scale(d) # Get the scale parameter, i.e. s
params(d) # Get the parameters, i.e. (m, s, k)
location(d) # Get the location parameter, i.e. m
scale(d) # Get the scale parameter, i.e. s
shape(d) # Get the shape parameter, i.e. k
```

External links
Expand Down
2 changes: 1 addition & 1 deletion src/univariate/continuous/gumbel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ mode(d::Gumbel) = d.μ

var{T<:Real}(d::Gumbel{T}) = T(π)^2/6 * d.θ^2

skewness{T<:Real}(d::Gumbel{T}) = 112*sqrt(T(6))*zeta(T(3))/π/π/π
skewness{T<:Real}(d::Gumbel{T}) = 12*sqrt(T(6))*zeta(T(3)) / π^3

kurtosis{T<:Real}(d::Gumbel{T}) = T(12)/5

Expand Down
2 changes: 1 addition & 1 deletion src/univariate/continuous/levy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ mode(d::Levy) = d.σ / 3 + d.μ

entropy(d::Levy) = (1 - 3digamma(1) + log(16 * d.σ^2 * π)) / 2

median(d::Levy) = d.μ + d.σ / (2 * T(erfcinv(0.5))^2)
median{T<:Real}(d::Levy{T}) = d.μ + d.σ / (2 * T(erfcinv(0.5))^2)


#### Evaluation
Expand Down
2 changes: 2 additions & 0 deletions src/univariate/continuous/normal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ convert{T <: Real, S <: Real}(::Type{Normal{T}}, d::Normal{S}) = Normal(T(d.μ),
params(d::Normal) = (d.μ, d.σ)
@inline partype{T<:Real}(d::Normal{T}) = T

location(d::Normal) = d.μ
scale(d::Normal) = d.σ

#### Statistics

Expand Down
3 changes: 2 additions & 1 deletion src/univariate/continuous/normalcanon.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ canonform(d::Normal) = convert(NormalCanon, d)
params(d::NormalCanon) = (d.η, d.λ)
@inline partype{T<:Real}(d::NormalCanon{T}) = T


#### Statistics

mean(d::NormalCanon) = d.μ
Expand All @@ -48,6 +47,8 @@ std(d::NormalCanon) = sqrt(var(d))

entropy(d::NormalCanon) = (-log(d.λ) + log2π + 1) / 2

location(d::NormalCanon) = mean(d)
scale(d::NormalCanon) = std(d)

#### Evaluation

Expand Down
1 change: 1 addition & 0 deletions src/univariate/continuous/normalinversegaussian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ params(d::NormalInverseGaussian) = (d.μ, d.α, d.β, d.δ)
mean(d::NormalInverseGaussian) = d.μ + d.δ * d.β / sqrt(d.α^2 - d.β^2)
var(d::NormalInverseGaussian) = d.δ * d.α^2 / sqrt(d.α^2 - d.β^2)^3
skewness(d::NormalInverseGaussian) = 3d.β / (d.α * sqrt(d.δ * sqrt(d.α^2 - d.β^2)))
kurtosis(d::NormalInverseGaussian) = 3 * (1 + 4*d.β^2/d.α^2) / (d.δ * sqrt(d.α^2 - d.β^2))

function pdf(d::NormalInverseGaussian, x::Real)
μ, α, β, δ = params(d)
Expand Down
6 changes: 3 additions & 3 deletions src/univariate/continuous/triangular.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ function var(d::TriangularDist)
_pretvar(a, b, c) / 18
end

function skewness(d::TriangularDist)
function skewness{T<:Real}(d::TriangularDist{T})
(a, b, c) = params(d)
sqrt2 * (a + b - 2c) * (2a - b - c) * (a - 2b + c) / (5 * _pretvar(a, b, c)^3//2)
sqrt2 * (a + b - 2c) * (2a - b - c) * (a - 2b + c) / ( 5 * _pretvar(a, b, c)^(T(3)/2) )
end

kurtosis{T<:Real}(d::TriangularDist{T}) = T(-3)/5

entropy(d::TriangularDist) = 1//2 + log((d.b - d.a) / 2)
entropy{T<:Real}(d::TriangularDist{T}) = one(T)/2 + log((d.b - d.a) / 2)


#### Evaluation
Expand Down
61 changes: 2 additions & 59 deletions test/continuous.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,80 +4,23 @@ using Distributions
using Base.Test
using Calculus.derivative

### load reference data
#
# Note
# -------
# To generate the reference data:
# (1) make sure that python, numpy, and scipy are installed in your system
# (2) enter the sub-directory test
# (3) run: python discrete_ref.py > discrete_ref.csv
#
# For most cases, you don't have. You only need to run this when you
# implement a new distribution and want to add new test cases, then
# you should add the new test cases to discrete_ref.py and run this
# procedure to update the reference data.
#
n_tsamples = 100


# additional distributions that have no direct counterparts in scipy
println(" -----")

# additional distributions that have no direct counterparts in R references
for distr in [
Biweight(),
Biweight(1,3),
Epanechnikov(),
Epanechnikov(1,3),
Frechet(0.5, 1.0),
Frechet(3.0, 1.0),
Frechet(20.0, 1.0),
Frechet(120.0, 1.0),
Frechet(0.5, 2.0),
Frechet(3.0, 2.0),
GeneralizedPareto(),
GeneralizedPareto(1.0, 1.0),
GeneralizedPareto(1.0, 1.0, 1.0),
GeneralizedPareto(0.1, 2.0, 0.0),
GeneralizedPareto(0.0, 0.5, 0.0),
GeneralizedPareto(-1.5, 0.5, 2.0),
InverseGaussian(1.0, 1.0),
InverseGaussian(2.0, 7.0),
Levy(),
Levy(2, 8),
Levy(3.0, 3),
LogNormal(0.0, 1.0),
LogNormal(0.0, 2.0),
LogNormal(3.0, 0.5),
LogNormal(3.0, 1.0),
LogNormal(3.0, 2.0),
NoncentralBeta(2,2,0),
NoncentralBeta(2,6,5),
NoncentralChisq(2,2),
NoncentralChisq(2,5),
NoncentralF(2,2,2),
NoncentralF(8,10,5),
NoncentralT(2,2),
NoncentralT(10,2),
Triweight(),
Triweight(2),
Triweight(1, 3),
Triweight(1),
]
println(" testing $(distr)")
test_distr(distr, n_tsamples)
test_distr(distr, n_tsamples; testquan=false)
end


# for distr in [
# VonMises(0.0, 1.0),
# VonMises(0.5, 1.0),
# VonMises(0.5, 2.0) ]

# println(" testing $(distr)")
# test_samples(distr, n_tsamples)
# end

# Test for non-Float64 input
using ForwardDiff
@test string(logpdf(Normal(0,1),big(1))) == "-1.418938533204672741780329736405617639861397473637783412817151540482765695927251"
Expand Down
Loading