Skip to content

Commit 5ba728d

Browse files
authored
Relax label vector type to AbstractVector (JuliaML#65)
* Relax label vector type to AbstractVector * info for test * update README * add test cases for JLD2
1 parent 96ba7e4 commit 5ba728d

File tree

6 files changed

+217
-117
lines changed

6 files changed

+217
-117
lines changed

.editorconfig

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[*.jl]
2+
indent_style = space
3+
indent_size = 4

Project.toml

+4-1
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@ julia = "1.3"
1515

1616
[extras]
1717
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
18+
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
19+
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
20+
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
1821
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1922
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2023

2124
[targets]
22-
test = ["DelimitedFiles", "Test", "SparseArrays"]
25+
test = ["DelimitedFiles", "FileIO", "JLD2", "RDatasets", "SparseArrays", "Test"]

README.md

+34-20
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,33 @@ for options.
2222
```julia
2323
using LIBSVM
2424
using RDatasets
25-
using Printf, Statistics
25+
using Printf
26+
using Statistics
2627

2728
# Load Fisher's classic iris data
2829
iris = dataset("datasets", "iris")
2930

31+
# First four dimension of input data is features
32+
X = Matrix(iris[:, 1:4])'
33+
3034
# LIBSVM handles multi-class data automatically using a one-against-one strategy
31-
labels = levelcode.(iris[:Species])
35+
y = iris.Species
3236

33-
# First dimension of input data is features; second is instances
34-
instances = convert(Array, iris[:, 1:4])'
37+
# Split the dataset into training set and testing set
38+
Xtrain = X[:, 1:2:end]
39+
Xtest = X[:, 2:2:end]
40+
ytrain = y[1:2:end]
41+
ytest = y[2:2:end]
3542

3643
# Train SVM on half of the data using default parameters. See documentation
3744
# of svmtrain for options
38-
model = svmtrain(instances[:, 1:2:end], labels[1:2:end]);
45+
model = svmtrain(Xtrain, ytrain)
3946

4047
# Test model on the other half of the data.
41-
(predicted_labels, decision_values) = svmpredict(model, instances[:, 2:2:end]);
48+
ŷ, decision_values = svmpredict(model, Xtest);
4249

4350
# Compute accuracy
44-
@printf "Accuracy: %.2f%%\n" mean((predicted_labels .== labels[2:2:end]))*100
51+
@printf "Accuracy: %.2f%%\n" mean(ŷ .== ytest) * 100
4552
```
4653

4754
### ScikitLearn API
@@ -52,22 +59,29 @@ You can alternatively use `ScikitLearn.jl` API with same options as `svmtrain`:
5259
using LIBSVM
5360
using RDatasets
5461

55-
#Classification C-SVM
62+
# Classification C-SVM
5663
iris = dataset("datasets", "iris")
57-
labels = levelcode.(iris[:, :Species])
58-
instances = convert(Array, iris[:, 1:4])
59-
model = fit!(SVC(), instances[1:2:end, :], labels[1:2:end])
60-
yp = predict(model, instances[2:2:end, :])
64+
X = Matrix(iris[:, 1:4])
65+
y = iris.Species
66+
67+
Xtrain = X[1:2:end, :]
68+
Xtest = X[2:2:end, :]
69+
ytrain = y[1:2:end]
70+
ytest = y[2:2:end]
71+
72+
model = fit!(SVC(), Xtrain, ytrain)
73+
ŷ = predict(model, Xtest)
74+
```
75+
76+
```julia
77+
# Epsilon-Regression
6178

62-
#epsilon-regression
6379
whiteside = RDatasets.dataset("MASS", "whiteside")
64-
X = Array(whiteside[:Gas])
65-
if typeof(X) <: AbstractVector
66-
X = reshape(X, (length(X),1))
67-
end
68-
y = Array(whiteside[:Temp])
69-
svrmod = fit!(EpsilonSVR(cost = 10., gamma = 1.), X, y)
70-
yp = predict(svrmod, X)
80+
X = Matrix(whiteside[:, 3:3]) # the `Gas` column
81+
y = whiteside.Temp
82+
83+
model = fit!(EpsilonSVR(cost = 10., gamma = 1.), X, y)
84+
ŷ = predict(model, X)
7185
```
7286

7387
## Credits

src/LIBSVM.jl

+43-32
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
1-
__precompile__()
21
module LIBSVM
2+
3+
34
import LIBLINEAR
5+
46
using SparseArrays
57
using libsvm_jll
68

79
export svmtrain, svmpredict, fit!, predict, transform,
8-
SVC, NuSVC, OneClassSVM, NuSVR, EpsilonSVR, LinearSVC,
9-
Linearsolver, Kernel
10+
SVC, NuSVC, OneClassSVM, NuSVR, EpsilonSVR, LinearSVC,
11+
Linearsolver, Kernel
1012

1113
include("LibSVMtypes.jl")
1214
include("constants.jl")
1315

1416
verbosity = false
1517

16-
struct SupportVectors{T, U}
18+
struct SupportVectors{T,U}
1719
l::Int32
1820
nSV::Vector{Int32}
19-
y::Vector{T}
21+
y::AbstractVector{T}
2022
X::AbstractMatrix{U}
2123
indices::Vector{Int32}
2224
SVnodes::Vector{SVMNode}
@@ -36,8 +38,7 @@ function SupportVectors(smc::SVMModel, y, X)
3638

3739
yi = smc.param.svm_type == 2 ? Float64[] : y[sv_indices]
3840

39-
SupportVectors(smc.l, nSV, yi , X[:,sv_indices],
40-
sv_indices, nodes)
41+
SupportVectors(smc.l, nSV, yi , X[:,sv_indices], sv_indices, nodes)
4142
end
4243

4344
struct SVM{T}
@@ -68,7 +69,7 @@ struct SVM{T}
6869
probability::Bool
6970
end
7071

71-
function SVM(smc::SVMModel, y::T, X, weights, labels, svmtype, kernel) where T
72+
function SVM(smc::SVMModel, y, X, weights, labels, svmtype, kernel)
7273
svs = SupportVectors(smc, y, X)
7374
coefs = zeros(smc.l, smc.nr_class-1)
7475
for k in 1:(smc.nr_class-1)
@@ -266,39 +267,48 @@ function set_num_threads(nt::Integer)
266267
end
267268

268269
"""
269-
```julia
270-
svmtrain{T, U<:Real}(X::AbstractMatrix{U}, y::AbstractVector{T}=[];
271-
svmtype::Type=SVC, kernel::Kernel.KERNEL=Kernel.RadialBasis, degree::Integer=3,
272-
gamma::Float64=1.0/size(X, 1), coef0::Float64=0.0,
273-
cost::Float64=1.0, nu::Float64=0.5, epsilon::Float64=0.1,
274-
tolerance::Float64=0.001, shrinking::Bool=true,
275-
probability::Bool=false, weights::Union{Dict{T, Float64}, Cvoid}=nothing,
276-
cachesize::Float64=200.0, verbose::Bool=false)
277-
```
270+
svmtrain(
271+
X::AbstractMatrix{U}, y::AbstractVector{T} = [];
272+
svmtype::Type = SVC,
273+
kernel::Kernel.KERNEL = Kernel.RadialBasis,
274+
degree::Integer = 3,
275+
gamma::Float64 = 1.0/size(X, 1),
276+
coef0::Float64 = 0.0,
277+
cost::Float64=1.0,
278+
nu::Float64 = 0.5,
279+
epsilon::Float64 = 0.1,
280+
tolerance::Float64 = 0.001,
281+
shrinking::Bool = true,
282+
probability::Bool = false,
283+
weights::Union{Dict{T,Float64},Cvoid} = nothing,
284+
cachesize::Float64 = 200.0,
285+
verbose::Bool = false
286+
) where {T,U<:Real}
287+
278288
Train Support Vector Machine using LIBSVM using response vector `y`
279-
and training data `X`. The shape of `X` needs to be (nfeatures, nsamples).
289+
and training data `X`. The shape of `X` needs to be `(nfeatures, nsamples)`.
280290
For one-class SVM use only `X`.
281291
282292
# Arguments
283293
284-
* `svmtype::Type=LIBSVM.SVC`: Type of SVM to train `SVC` (for C-SVM), `NuSVC`
294+
* `svmtype::Type = LIBSVM.SVC`: Type of SVM to train `SVC` (for C-SVM), `NuSVC`
285295
`OneClassSVM`, `EpsilonSVR` or `NuSVR`. Defaults to `OneClassSVM` if
286296
`y` is not used.
287-
* `kernel::Kernels.KERNEL=Kernel.RadialBasis`: Model kernel `Linear`, `Polynomial`,
297+
* `kernel::Kernels.KERNEL = Kernel.RadialBasis`: Model kernel `Linear`, `Polynomial`,
288298
`RadialBasis`, `Sigmoid` or `Precomputed`.
289-
* `degree::Integer=3`: Kernel degree. Used for polynomial kernel
290-
* `gamma::Float64=1.0/size(X, 1)` : γ for kernels
291-
* `coef0::Float64=0.0`: parameter for sigmoid and polynomial kernel
292-
* `cost::Float64=1.0`: cost parameter C of C-SVC, epsilon-SVR, and nu-SVR
293-
* `nu::Float64=0.5`: parameter nu of nu-SVC, one-class SVM, and nu-SVR
294-
* `epsilon::Float64=0.1`: epsilon in loss function of epsilon-SVR
295-
* `tolerance::Float64=0.001`: tolerance of termination criterion
296-
* `shrinking::Bool=true`: whether to use the shrinking heuristics
297-
* `probability::Bool=false`: whether to train a SVC or SVR model for probability estimates
299+
* `degree::Integer = 3`: Kernel degree. Used for polynomial kernel
300+
* `gamma::Float64 = 1.0/size(X, 1)` : γ for kernels
301+
* `coef0::Float64 = 0.0`: parameter for sigmoid and polynomial kernel
302+
* `cost::Float64 = 1.0`: cost parameter C of C-SVC, epsilon-SVR, and nu-SVR
303+
* `nu::Float64 = 0.5`: parameter nu of nu-SVC, one-class SVM, and nu-SVR
304+
* `epsilon::Float64 = 0.1`: epsilon in loss function of epsilon-SVR
305+
* `tolerance::Float64 = 0.001`: tolerance of termination criterion
306+
* `shrinking::Bool = true`: whether to use the shrinking heuristics
307+
* `probability::Bool = false`: whether to train a SVC or SVR model for probability estimates
298308
* `weights::Union{Dict{T, Float64}, Cvoid}=nothing`: dictionary of class weights
299-
* `cachesize::Float64=100.0`: cache memory size in MB
300-
* `verbose::Bool=false`: print training output from LIBSVM if true
301-
* `nt::Integer=0`: number of OpenMP cores to use, if 0 it is set to OMP_NUM_THREADS, if negative it is set to the max number of threads
309+
* `cachesize::Float64 = 100.0`: cache memory size in MB
310+
* `verbose::Bool = false`: print training output from LIBSVM if true
311+
* `nt::Integer = 0`: number of OpenMP cores to use, if 0 it is set to OMP_NUM_THREADS, if negative it is set to the max number of threads
302312
303313
Consult LIBSVM documentation for advice on the choise of correct
304314
parameters and model tuning.
@@ -419,4 +429,5 @@ end
419429
include("ScikitLearnTypes.jl")
420430
include("ScikitLearnAPI.jl")
421431

432+
422433
end

src/ScikitLearnAPI.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ LinearSVC(;solver = Linearsolver.L2R_L2LOSS_SVC_DUAL,
6565
cost, p, bias, verbose, nothing)
6666
@declare_hyperparameters(LinearSVC, [:solver, :weights, :tolerance, :cost, :p, :bias])
6767

68-
function fit!(model::Union{AbstractSVC,AbstractSVR}, X::AbstractMatrix, y::Vector=[])
68+
function fit!(model::Union{AbstractSVC,AbstractSVR}, X::AbstractMatrix, y::AbstractVector = [])
6969
#Build arguments for calling svmtrain
7070
model.gamma == :auto && (model.gamma = 1.0/size(X', 1))
7171
kwargs = Tuple{Symbol, Any}[]
@@ -97,7 +97,7 @@ function get_params(model::Union{AbstractSVC,AbstractSVR, LinearSVC})
9797
return params
9898
end
9999

100-
function fit!(model::LinearSVC, X::AbstractMatrix, y::Vector)
100+
function fit!(model::LinearSVC, X::AbstractMatrix, y::AbstractVector)
101101
model.fit = LIBLINEAR.linear_train(y, X', solver_type = Int32(model.solver),
102102
weights = model.weights, C = model.cost, bias = model.bias,
103103
p = model.p, eps = model.tolerance, verbose = model.verbose)

0 commit comments

Comments
 (0)