Skip to content

Commit 843278d

Browse files
committed
fd-update
1 parent 7338045 commit 843278d

File tree

150 files changed

+48105
-6107
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

150 files changed

+48105
-6107
lines changed

__site/A-model-tuning-hm.png

-21.9 KB
Binary file not shown.

__site/A-model-tuning-hpt.svg

-877
This file was deleted.

__site/ISL-lab-5-g1.svg

-997
This file was deleted.

__site/ISL-lab-5-g3.svg

-1,425
This file was deleted.

__site/ISL-lab-5-g4.svg

-1,100
This file was deleted.

__site/_data/kc_housing.csv

+21,614
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# This file was generated, do not modify it. # hide
2+
using MLJ, PrettyPrinting, DataFrames, Statistics, CSV, Dates
3+
using PyPlot, HTTP
4+
MLJ.color_off() # hide
5+
6+
req = HTTP.get("https://raw.githubusercontent.com/bbrandom91/KC_Housing/master/kc_house_data.csv")
7+
8+
df = CSV.read(req.body, missingstring="NA")
9+
describe(df)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
select!(df, Not([:yr_renovated, :sqft_basement, :zipcode]));
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# This file was generated, do not modify it. # hide
2+
plt.figure(figsize=(8,6))
3+
plt.hist(df.price, color = "blue", edgecolor = "white", bins=50,
4+
density=true)
5+
plt.xlabel("Price", fontsize=14)
6+
plt.ylabel("Frequency", fontsize=14)
7+
plt.savefig(joinpath(@OUTPUT, "hist_price.svg")) # hide
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# This file was generated, do not modify it. # hide
2+
plt.figure(figsize=(8,6))
3+
plt.hist(df.price[df.isrenovated .== true], color="blue", density=true,
4+
edgecolor="white", bins=50, label="renovated", alpha=0.5)
5+
plt.hist(df.price[df.isrenovated .== false], color="red", density=true,
6+
edgecolor="white", bins=50, label="unrenovated", alpha=0.5)
7+
plt.xlabel("Price", fontsize=14)
8+
plt.ylabel("Frequency", fontsize=14)
9+
plt.legend(fontsize=12)
10+
plt.savefig(joinpath(@OUTPUT, "hist_price2.svg")) # hide
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# This file was generated, do not modify it. # hide
2+
@load DecisionTreeRegressor
3+
4+
y, X = unpack(df, ==(:price), col -> true)
5+
train, test = partition(eachindex(y), 0.7, shuffle=true, rng=5)
6+
7+
tree = machine(DecisionTreeRegressor(), X, y)
8+
9+
fit!(tree, rows=train);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
rms(y[test], predict(tree, rows=test))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
@load RandomForestRegressor pkg=ScikitLearn
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
coerce!(X, Finite => Count);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# This file was generated, do not modify it. # hide
2+
rf_mdl = RandomForestRegressor()
3+
rf = machine(rf_mdl, X, y)
4+
fit!(rf, rows=train)
5+
6+
rms(y[test], predict(rf, rows=test))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# This file was generated, do not modify it. # hide
2+
cv3 = CV(; nfolds=3)
3+
res = evaluate(rf_mdl, X, y, resampling=CV(shuffle=true),
4+
measure=rms, verbosity=0)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
@load XGBoostRegressor
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# This file was generated, do not modify it. # hide
2+
select!(df, Not([:id, :date]))
3+
schema(df)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# This file was generated, do not modify it. # hide
2+
coerce!(X, Count => Continuous)
3+
4+
xgb = XGBoostRegressor()
5+
xgbm = machine(xgb, X, y)
6+
fit!(xgbm, rows=train)
7+
8+
rms(y[test], predict(xgbm, rows=test))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# This file was generated, do not modify it. # hide
2+
r1 = range(xgb, :max_depth, lower=3, upper=10)
3+
r2 = range(xgb, :num_round, lower=1, upper=25);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# This file was generated, do not modify it. # hide
2+
tm = TunedModel(model=xgb, tuning=Grid(resolution=7),
3+
resampling=CV(rng=11), ranges=[r1,r2,r3,r4,r5,r6,r7],
4+
measure=rms)
5+
mtm = machine(tm, X, y)
6+
fit!(mtm, rows=train)
7+
8+
rms(y[test], predict(mtm, rows=test))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
PyPlot.close_figs() # hide
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# This file was generated, do not modify it. # hide
2+
coerce!(df, :zipcode => Multiclass)
3+
df.isrenovated = @. !ismissing(df.yr_renovated)
4+
df.has_basement = @. !ismissing(df.sqft_basement)
5+
schema(df)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
coerce!(df, :isrenovated => OrderedFactor, :has_basement => OrderedFactor);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
unique(df.waterfront)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# This file was generated, do not modify it. # hide
2+
df.waterfront = (df.waterfront .!= "FALSE")
3+
coerce!(df, :waterfront => OrderedFactor);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# This file was generated, do not modify it. # hide
2+
coerce!(df, autotype(df, :few_to_finite))
3+
schema(df)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This file was generated, do not modify it. # hide
2+
df.price = df.price ./ 1000;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# This file was generated, do not modify it. # hide
2+
for col in names(df)
3+
nmissings = sum(ismissing, df[!,col])
4+
if nmissings > 0
5+
println(rpad("$col has ", 25), nmissings, " missings")
6+
end
7+
end

__site/assets/end-to-end/HouseKingCounty/code/output/ex1.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
21×8 DataFrame
2+
│ Row │ variable │ mean │ min │ median │ max │ nunique │ nmissing │ eltype │
3+
│ │ Symbol │ Union… │ Any │ Union… │ Any │ Union… │ Nothing │ DataType │
4+
├─────┼───────────────┼────────────┼─────────────────┼───────────┼─────────────────┼─────────┼──────────┼──────────┤
5+
│ 1 │ id │ 4.5803e9 │ 1000102 │ 3.90493e9 │ 9900000190 │ │ │ Int64 │
6+
│ 2 │ date │ │ 20140502T000000 │ │ 20150527T000000 │ 372 │ │ String │
7+
│ 3 │ price │ 540088.0 │ 75000.0 │ 450000.0 │ 7.7e6 │ │ │ Float64 │
8+
│ 4 │ bedrooms │ 3.37084 │ 0 │ 3.0 │ 33 │ │ │ Int64 │
9+
│ 5 │ bathrooms │ 2.11476 │ 0.0 │ 2.25 │ 8.0 │ │ │ Float64 │
10+
│ 6 │ sqft_living │ 2079.9 │ 290 │ 1910.0 │ 13540 │ │ │ Int64 │
11+
│ 7 │ sqft_lot │ 15107.0 │ 520 │ 7618.0 │ 1651359 │ │ │ Int64 │
12+
│ 8 │ floors │ 1.49431 │ 1.0 │ 1.5 │ 3.5 │ │ │ Float64 │
13+
│ 9 │ waterfront │ 0.00754176 │ 0 │ 0.0 │ 1 │ │ │ Int64 │
14+
│ 10 │ view │ 0.234303 │ 0 │ 0.0 │ 4 │ │ │ Int64 │
15+
│ 11 │ condition │ 3.40943 │ 1 │ 3.0 │ 5 │ │ │ Int64 │
16+
│ 12 │ grade │ 7.65687 │ 1 │ 7.0 │ 13 │ │ │ Int64 │
17+
│ 13 │ sqft_above │ 1788.39 │ 290 │ 1560.0 │ 9410 │ │ │ Int64 │
18+
│ 14 │ sqft_basement │ 291.509 │ 0 │ 0.0 │ 4820 │ │ │ Int64 │
19+
│ 15 │ yr_built │ 1971.01 │ 1900 │ 1975.0 │ 2015 │ │ │ Int64 │
20+
│ 16 │ yr_renovated │ 84.4023 │ 0 │ 0.0 │ 2015 │ │ │ Int64 │
21+
│ 17 │ zipcode │ 98077.9 │ 98001 │ 98065.0 │ 98199 │ │ │ Int64 │
22+
│ 18 │ lat │ 47.5601 │ 47.1559 │ 47.5718 │ 47.7776 │ │ │ Float64 │
23+
│ 19 │ long │ -122.214 │ -122.519 │ -122.23 │ -121.315 │ │ │ Float64 │
24+
│ 20 │ sqft_living15 │ 1986.55 │ 399 │ 1840.0 │ 6210 │ │ │ Int64 │
25+
│ 21 │ sqft_lot15 │ 12768.5 │ 651 │ 7620.0 │ 871200 │ │ │ Int64 │

__site/assets/end-to-end/HouseKingCounty/code/output/ex10.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex11.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex12.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex13.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex14.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
179.96112573828466

__site/assets/end-to-end/HouseKingCounty/code/output/ex15.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
RandomForestRegressor(
2+
n_estimators = 100,
3+
criterion = "mse",
4+
max_depth = nothing,
5+
min_samples_split = 2,
6+
min_samples_leaf = 1,
7+
min_weight_fraction_leaf = 0.0,
8+
max_features = "auto",
9+
max_leaf_nodes = nothing,
10+
min_impurity_decrease = 0.0,
11+
bootstrap = true,
12+
oob_score = false,
13+
n_jobs = nothing,
14+
random_state = nothing,
15+
verbose = 0,
16+
warm_start = false) @ 4…24

__site/assets/end-to-end/HouseKingCounty/code/output/ex16.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex17.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
137.65885615237326

__site/assets/end-to-end/HouseKingCounty/code/output/ex18.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
┌───────────┬───────────────┬────────────────────────────────────────────┐
2+
_.measure_.measurement_.per_fold
3+
├───────────┼───────────────┼────────────────────────────────────────────┤
4+
rms136.0 │ [148.0, 138.0, 123.0, 127.0, 143.0, 134.0] │
5+
└───────────┴───────────────┴────────────────────────────────────────────┘
6+
_.per_observation = [missing]

__site/assets/end-to-end/HouseKingCounty/code/output/ex19.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
XGBoostRegressor(
2+
num_round = 1,
3+
booster = "gbtree",
4+
disable_default_eval_metric = 0,
5+
eta = 0.3,
6+
gamma = 0.0,
7+
max_depth = 6,
8+
min_child_weight = 1.0,
9+
max_delta_step = 0.0,
10+
subsample = 1.0,
11+
colsample_bytree = 1.0,
12+
colsample_bylevel = 1.0,
13+
lambda = 1.0,
14+
alpha = 0.0,
15+
tree_method = "auto",
16+
sketch_eps = 0.03,
17+
scale_pos_weight = 1.0,
18+
updater = "auto",
19+
refresh_leaf = 1,
20+
process_type = "default",
21+
grow_policy = "depthwise",
22+
max_leaves = 0,
23+
max_bin = 256,
24+
predictor = "cpu_predictor",
25+
sample_type = "uniform",
26+
normalize_type = "tree",
27+
rate_drop = 0.0,
28+
one_drop = 0,
29+
skip_drop = 0.0,
30+
feature_selector = "cyclic",
31+
top_k = 0,
32+
tweedie_variance_power = 1.5,
33+
objective = "reg:linear",
34+
base_score = 0.5,
35+
eval_metric = "rmse",
36+
seed = 0) @ 9…92

__site/assets/end-to-end/HouseKingCounty/code/output/ex2.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
┌───────────────┬─────────┬────────────┐
2+
│ _.names │ _.types │ _.scitypes │
3+
├───────────────┼─────────┼────────────┤
4+
│ price │ Float64 │ Continuous │
5+
│ bedrooms │ Int64 │ Count │
6+
│ bathrooms │ Float64 │ Continuous │
7+
│ sqft_living │ Int64 │ Count │
8+
│ sqft_lot │ Int64 │ Count │
9+
│ floors │ Float64 │ Continuous │
10+
│ waterfront │ Int64 │ Count │
11+
│ view │ Int64 │ Count │
12+
│ condition │ Int64 │ Count │
13+
│ grade │ Int64 │ Count │
14+
│ sqft_above │ Int64 │ Count │
15+
│ sqft_basement │ Int64 │ Count │
16+
│ yr_built │ Int64 │ Count │
17+
│ yr_renovated │ Int64 │ Count │
18+
│ zipcode │ Int64 │ Count │
19+
│ lat │ Float64 │ Continuous │
20+
│ long │ Float64 │ Continuous │
21+
│ sqft_living15 │ Int64 │ Count │
22+
│ sqft_lot15 │ Int64 │ Count │
23+
└───────────────┴─────────┴────────────┘
24+
_.nrows = 21613

__site/assets/end-to-end/HouseKingCounty/code/output/ex20.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
480.70583223890355

__site/assets/end-to-end/HouseKingCounty/code/output/ex21.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
UndefVarError: r3 not defined
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex23.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex3.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
┌───────────────┬────────────────────────────────┬────────────────┐
2+
_.names_.types_.scitypes
3+
├───────────────┼────────────────────────────────┼────────────────┤
4+
priceFloat64Continuous
5+
bedroomsInt64Count
6+
bathroomsFloat64Continuous
7+
sqft_livingInt64Count
8+
sqft_lotInt64Count
9+
floorsFloat64Continuous
10+
waterfrontInt64Count
11+
viewInt64Count
12+
conditionInt64Count
13+
gradeInt64Count
14+
sqft_aboveInt64Count
15+
sqft_basementInt64Count
16+
yr_builtInt64Count
17+
yr_renovatedInt64Count
18+
zipcodeCategoricalValue{Int64,UInt32} │ Multiclass{70} │
19+
latFloat64Continuous
20+
longFloat64Continuous
21+
sqft_living15Int64Count
22+
sqft_lot15Int64Count
23+
isrenovatedBoolCount
24+
has_basementBoolCount
25+
└───────────────┴────────────────────────────────┴────────────────┘
26+
_.nrows = 21613

__site/assets/end-to-end/HouseKingCounty/code/output/ex4.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex5.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2-element Array{Int64,1}:
2+
0
3+
1

__site/assets/end-to-end/HouseKingCounty/code/output/ex6.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex7.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
┌───────────────┬──────────────────────────────────┬───────────────────┐
2+
_.names_.types_.scitypes
3+
├───────────────┼──────────────────────────────────┼───────────────────┤
4+
priceFloat64Continuous
5+
bedroomsCategoricalValue{Int64,UInt32} │ OrderedFactor{13} │
6+
bathroomsCategoricalValue{Float64,UInt32} │ OrderedFactor{30} │
7+
sqft_livingInt64Count
8+
sqft_lotInt64Count
9+
floorsCategoricalValue{Float64,UInt32} │ OrderedFactor{6} │
10+
waterfrontCategoricalValue{Bool,UInt32} │ OrderedFactor{1} │
11+
viewCategoricalValue{Int64,UInt32} │ OrderedFactor{5} │
12+
conditionCategoricalValue{Int64,UInt32} │ OrderedFactor{5} │
13+
gradeCategoricalValue{Int64,UInt32} │ OrderedFactor{12} │
14+
sqft_aboveInt64Count
15+
sqft_basementInt64Count
16+
yr_builtInt64Count
17+
yr_renovatedCategoricalValue{Int64,UInt32} │ OrderedFactor{70} │
18+
zipcodeCategoricalValue{Int64,UInt32} │ Multiclass{70} │
19+
latFloat64Continuous
20+
longFloat64Continuous
21+
sqft_living15Int64Count
22+
sqft_lot15Int64Count
23+
isrenovatedCategoricalValue{Bool,UInt32} │ OrderedFactor{1} │
24+
has_basementCategoricalValue{Bool,UInt32} │ OrderedFactor{1} │
25+
└───────────────┴──────────────────────────────────┴───────────────────┘
26+
_.nrows = 21613

__site/assets/end-to-end/HouseKingCounty/code/output/ex8.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

__site/assets/end-to-end/HouseKingCounty/code/output/ex9.out

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nothing

0 commit comments

Comments
 (0)