Skip to content

Commit 08665ef

Browse files
committed
Restructured the regularization module
+Simplified code +Changed images +Improved text
1 parent b9a2c1b commit 08665ef

13 files changed

+230
-184
lines changed

code/overview/regularization/regularization_base.py

+5-13
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,16 @@
11
import matplotlib.pyplot as plt
22
import seaborn as sns
33
from sklearn.preprocessing import PolynomialFeatures
4+
from sklearn.datasets import make_regression
45
import numpy as np
56

6-
# We will create some x-values and randomly choose some as data points
7-
X = np.linspace(0, 10, 100)
8-
# We are fixing the random number seed for consistency
9-
rn = np.random.RandomState(0)
10-
# Shuffle the data for variety
11-
rn.shuffle(X)
12-
# Grab the first 30 of our shuffled points and sort them for plotting
13-
X = np.sort(X[:30])
14-
# Our output will be a quadratic function
15-
y = X**2
16-
# We will add some variance to the data so that it's more interesting
17-
y = y + (((np.random.rand(30) * 2) - 1) * 30)
7+
# Create a data set for analysis
8+
x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
9+
y = y ** 2
1810

1911
# Plot data
2012
sns.set_style("darkgrid")
21-
plt.scatter(X, y, marker='o')
13+
plt.scatter(x, y, marker='o')
2214
plt.xticks(())
2315
plt.yticks(())
2416
plt.tight_layout()

code/overview/regularization/regularization_lasso.py

+19-25
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,36 @@
11
import matplotlib.pyplot as plt
22
import seaborn as sns
33
from sklearn.preprocessing import PolynomialFeatures
4+
from sklearn.datasets import make_regression
45
from sklearn.linear_model import LinearRegression, Lasso
56
from sklearn.pipeline import Pipeline
67
import numpy as np
78

8-
# We will create some x-values and randomly choose some as data points
9-
X = np.linspace(0, 10, 100)
10-
# We are fixing the random number seed for consistency
11-
rn = np.random.RandomState(0)
12-
# Shuffle the data for variety
13-
rn.shuffle(X)
14-
# Grab the first 30 of our shuffled points and sort them for plotting
15-
X = np.sort(X[:30])
16-
# Our output will be a quadratic function
17-
y = X**2
18-
# We will add some variance to the data so that it's more interesting
19-
y = y + (((np.random.rand(30) * 2) - 1) * 30)
9+
# Create a data set for analysis
10+
x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
11+
y = y ** 2
2012

21-
# Pipeline lets us setup a fixed number of steps for our modeling
22-
model = Pipeline([('poly', PolynomialFeatures(degree=6)), \
13+
# Pipeline lets us set the steps for our modeling
14+
# We are comparing a standard polynomial model against one with lasso
15+
model = Pipeline([('poly', PolynomialFeatures(degree=10)), \
2316
('linear', LinearRegression(fit_intercept=False))])
24-
regModel = Pipeline([('poly', PolynomialFeatures(degree=6)), \
25-
('lasso', Lasso(alpha=0.1, max_iter=100000))])
17+
regModel = Pipeline([('poly', PolynomialFeatures(degree=10)), \
18+
('lasso', Lasso(alpha=5, max_iter=1000000))])
19+
2620
# Now we train on our data
27-
model = model.fit(X[:, np.newaxis], y)
28-
regModel = regModel.fit(X[:, np.newaxis], y)
21+
model = model.fit(x, y)
22+
regModel = regModel.fit(x, y)
2923
# Now we pridict
30-
X_plot = np.linspace(0, 10, 100)
31-
X_plot = X_plot[:, np.newaxis]
32-
y_plot = model.predict(X_plot)
33-
yReg_plot = regModel.predict(X_plot)
24+
x_plot = np.linspace(min(x)[0], max(x)[0], 100)
25+
x_plot = x_plot[:, np.newaxis]
26+
y_plot = model.predict(x_plot)
27+
yReg_plot = regModel.predict(x_plot)
3428

3529
# Plot data
3630
sns.set_style("darkgrid")
37-
plt.plot(X_plot, y_plot, color='black')
38-
plt.plot(X_plot, yReg_plot, color='red')
39-
plt.scatter(X, y, marker='o')
31+
plt.plot(x_plot, y_plot, color='black')
32+
plt.plot(x_plot, yReg_plot, color='red')
33+
plt.scatter(x, y, marker='o')
4034
plt.xticks(())
4135
plt.yticks(())
4236
plt.tight_layout()

code/overview/regularization/regularization_linear.py

+11-19
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,29 @@
11
import matplotlib.pyplot as plt
22
import seaborn as sns
33
from sklearn.preprocessing import PolynomialFeatures
4+
from sklearn.datasets import make_regression
45
from sklearn.linear_model import LinearRegression
56
from sklearn.pipeline import Pipeline
67
import numpy as np
78

8-
# We will create some x-values and randomly choose some as data points
9-
X = np.linspace(0, 10, 100)
10-
# We are fixing the random number seed for consistency
11-
rn = np.random.RandomState(0)
12-
# Shuffle the data for variety
13-
rn.shuffle(X)
14-
# Grab the first 30 of our shuffled points and sort them for plotting
15-
X = np.sort(X[:30])
16-
# Our output will be a quadratic function
17-
y = X**2
18-
# We will add some variance to the data so that it's more interesting
19-
y = y + (((np.random.rand(30) * 2) - 1) * 30)
9+
# Create a data set for analysis
10+
x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
11+
y = y ** 2
2012

21-
# Pipeline lets us setup a fixed number of steps for our modeling
13+
# Pipeline lets us set the steps for our modeling
14+
# We are using a simple linear model here
2215
model = Pipeline([('poly', PolynomialFeatures(degree=1)), \
2316
('linear', LinearRegression(fit_intercept=False))])
17+
2418
# Now we train on our data
25-
model = model.fit(X[:, np.newaxis], y)
19+
model = model.fit(x, y)
2620
# Now we pridict
27-
X_plot = np.linspace(0, 10, 100)
28-
X_plot = X_plot[:, np.newaxis]
29-
y_plot = model.predict(X_plot)
21+
y_predictions = model.predict(x)
3022

3123
# Plot data
3224
sns.set_style("darkgrid")
33-
plt.plot(X_plot, y_plot, color='black')
34-
plt.scatter(X, y, marker='o')
25+
plt.plot(x, y_predictions, color='black')
26+
plt.scatter(x, y, marker='o')
3527
plt.xticks(())
3628
plt.yticks(())
3729
plt.tight_layout()

code/overview/regularization/regularization_polynomial.py

+14-19
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,32 @@
11
import matplotlib.pyplot as plt
22
import seaborn as sns
33
from sklearn.preprocessing import PolynomialFeatures
4+
from sklearn.datasets import make_regression
45
from sklearn.linear_model import LinearRegression
56
from sklearn.pipeline import Pipeline
67
import numpy as np
78

8-
# We will create some x-values and randomly choose some as data points
9-
X = np.linspace(0, 10, 100)
10-
# We are fixing the random number seed for consistency
11-
rn = np.random.RandomState(0)
12-
# Shuffle the data for variety
13-
rn.shuffle(X)
14-
# Grab the first 30 of our shuffled points and sort them for plotting
15-
X = np.sort(X[:30])
16-
# Our output will be a quadratic function
17-
y = X**2
18-
# We will add some variance to the data so that it's more interesting
19-
y = y + (((np.random.rand(30) * 2) - 1) * 30)
9+
# Create a data set for analysis
10+
x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
11+
y = y ** 2
2012

21-
# Pipeline lets us setup a fixed number of steps for our modeling
13+
# Pipeline lets us set the steps for our modeling
14+
# We are using a polynomial model here (polynomial with degree 10)
2215
model = Pipeline([('poly', PolynomialFeatures(degree=10)), \
2316
('linear', LinearRegression(fit_intercept=False))])
17+
2418
# Now we train on our data
25-
model = model.fit(X[:, np.newaxis], y)
19+
model = model.fit(x, y)
2620
# Now we pridict
27-
X_plot = np.linspace(0, 10, 100)
28-
X_plot = X_plot[:, np.newaxis]
29-
y_plot = model.predict(X_plot)
21+
# The next two lines are used to model input for our prediction graph
22+
x_plot = np.linspace(min(x)[0], max(x)[0], 100)
23+
x_plot = x_plot[:, np.newaxis]
24+
y_predictions = model.predict(x_plot)
3025

3126
# Plot data
3227
sns.set_style("darkgrid")
33-
plt.plot(X_plot, y_plot, color='black')
34-
plt.scatter(X, y, marker='o')
28+
plt.plot(x_plot, y_predictions, color='black')
29+
plt.scatter(x, y, marker='o')
3530
plt.xticks(())
3631
plt.yticks(())
3732
plt.tight_layout()

code/overview/regularization/regularization_quadratic.py

+14-19
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,32 @@
11
import matplotlib.pyplot as plt
22
import seaborn as sns
33
from sklearn.preprocessing import PolynomialFeatures
4+
from sklearn.datasets import make_regression
45
from sklearn.linear_model import LinearRegression
56
from sklearn.pipeline import Pipeline
67
import numpy as np
78

8-
# We will create some x-values and randomly choose some as data points
9-
X = np.linspace(0, 10, 100)
10-
# We are fixing the random number seed for consistency
11-
rn = np.random.RandomState(0)
12-
# Shuffle the data for variety
13-
rn.shuffle(X)
14-
# Grab the first 30 of our shuffled points and sort them for plotting
15-
X = np.sort(X[:30])
16-
# Our output will be a quadratic function
17-
y = X**2
18-
# We will add some variance to the data so that it's more interesting
19-
y = y + (((np.random.rand(30) * 2) - 1) * 30)
9+
# Create a data set for analysis
10+
x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
11+
y = y ** 2
2012

21-
# Pipeline lets us setup a fixed number of steps for our modeling
13+
# Pipeline lets us set the steps for our modeling
14+
# We are using a quadratic model here (polynomial with degree 2)
2215
model = Pipeline([('poly', PolynomialFeatures(degree=2)), \
2316
('linear', LinearRegression(fit_intercept=False))])
17+
2418
# Now we train on our data
25-
model = model.fit(X[:, np.newaxis], y)
19+
model = model.fit(x, y)
2620
# Now we pridict
27-
X_plot = np.linspace(0, 10, 100)
28-
X_plot = X_plot[:, np.newaxis]
29-
y_plot = model.predict(X_plot)
21+
# The next two lines are used to model input for our prediction graph
22+
x_plot = np.linspace(min(x)[0], max(x)[0], 100)
23+
x_plot = x_plot[:, np.newaxis]
24+
y_predictions = model.predict(x_plot)
3025

3126
# Plot data
3227
sns.set_style("darkgrid")
33-
plt.plot(X_plot, y_plot, color='black')
34-
plt.scatter(X, y, marker='o')
28+
plt.plot(x_plot, y_predictions, color='black')
29+
plt.scatter(x, y, marker='o')
3530
plt.xticks(())
3631
plt.yticks(())
3732
plt.tight_layout()

code/overview/regularization/regularization_ridge.py

+19-24
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,37 @@
11
import matplotlib.pyplot as plt
22
import seaborn as sns
33
from sklearn.preprocessing import PolynomialFeatures
4+
from sklearn.datasets import make_regression
45
from sklearn.linear_model import LinearRegression, Ridge
56
from sklearn.pipeline import Pipeline
67
import numpy as np
78

8-
# We will create some x-values and randomly choose some as data points
9-
X = np.linspace(0, 10, 100)
10-
# We are fixing the random number seed for consistency
11-
rn = np.random.RandomState(0)
12-
# Shuffle the data for variety
13-
rn.shuffle(X)
14-
# Grab the first 30 of our shuffled points and sort them for plotting
15-
X = np.sort(X[:30])
16-
# Our output will be a quadratic function
17-
y = X**2
18-
# We will add some variance to the data so that it's more interesting
19-
y = y + (((np.random.rand(30) * 2) - 1) * 30)
9+
# Create a data set for analysis
10+
x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
11+
y = y ** 2
2012

21-
# Pipeline lets us setup a fixed number of steps for our modeling
22-
model = Pipeline([('poly', PolynomialFeatures(degree=6)), \
13+
# Pipeline lets us set the steps for our modeling
14+
# We are comparing a standard polynomial model against one with ridge
15+
model = Pipeline([('poly', PolynomialFeatures(degree=10)), \
2316
('linear', LinearRegression(fit_intercept=False))])
24-
regModel = Pipeline([('poly', PolynomialFeatures(degree=6)), \
17+
regModel = Pipeline([('poly', PolynomialFeatures(degree=10)), \
2518
('ridge', Ridge(alpha=5.0))])
19+
2620
# Now we train on our data
27-
model = model.fit(X[:, np.newaxis], y)
28-
regModel = regModel.fit(X[:, np.newaxis], y)
21+
model = model.fit(x, y)
22+
regModel = regModel.fit(x, y)
2923
# Now we pridict
30-
X_plot = np.linspace(0, 10, 100)
31-
X_plot = X_plot[:, np.newaxis]
32-
y_plot = model.predict(X_plot)
33-
yReg_plot = regModel.predict(X_plot)
24+
# The next four lines are used to model input for our prediction graph
25+
x_plot = np.linspace(min(x)[0], max(x)[0], 100)
26+
x_plot = x_plot[:, np.newaxis]
27+
y_plot = model.predict(x_plot)
28+
yReg_plot = regModel.predict(x_plot)
3429

3530
# Plot data
3631
sns.set_style("darkgrid")
37-
plt.plot(X_plot, y_plot, color='black')
38-
plt.plot(X_plot, yReg_plot, color='red')
39-
plt.scatter(X, y, marker='o')
32+
plt.plot(x_plot, y_plot, color='black')
33+
plt.plot(x_plot, yReg_plot, color='red')
34+
plt.scatter(x, y, marker='o')
4035
plt.xticks(())
4136
plt.yticks(())
4237
plt.tight_layout()
Binary file not shown.
Loading
Loading
Loading
Loading
Loading

0 commit comments

Comments
 (0)