instillai
diff --git a/‎code/overview/regularization/regularization_base.py
Lines changed: 5 additions & 13 deletions b/‎code/overview/regularization/regularization_base.py
Lines changed: 5 additions & 13 deletions
diff --git a/‎code/overview/regularization/regularization_lasso.py
Lines changed: 19 additions & 25 deletions b/‎code/overview/regularization/regularization_lasso.py
Lines changed: 19 additions & 25 deletions
diff --git a/‎code/overview/regularization/regularization_linear.py
Lines changed: 11 additions & 19 deletions b/‎code/overview/regularization/regularization_linear.py
Lines changed: 11 additions & 19 deletions
diff --git a/‎code/overview/regularization/regularization_polynomial.py
Lines changed: 14 additions & 19 deletions b/‎code/overview/regularization/regularization_polynomial.py
Lines changed: 14 additions & 19 deletions
diff --git a/‎code/overview/regularization/regularization_quadratic.py
Lines changed: 14 additions & 19 deletions b/‎code/overview/regularization/regularization_quadratic.py
Lines changed: 14 additions & 19 deletions
diff --git a/‎code/overview/regularization/regularization_ridge.py
Lines changed: 19 additions & 24 deletions b/‎code/overview/regularization/regularization_ridge.py
Lines changed: 19 additions & 24 deletions
diff --git a/‎docs/source/content/overview/_img/Regularization_Base.png
-6.14 KB b/‎docs/source/content/overview/_img/Regularization_Base.png
-6.14 KB
diff --git a/‎docs/source/content/overview/_img/Regularization_Lasso.png
1.05 KB b/‎docs/source/content/overview/_img/Regularization_Lasso.png
1.05 KB
diff --git a/‎docs/source/content/overview/_img/Regularization_Linear.png
-7.05 KB b/‎docs/source/content/overview/_img/Regularization_Linear.png
-7.05 KB
diff --git a/‎docs/source/content/overview/_img/Regularization_Polynomial.png
3.38 KB b/‎docs/source/content/overview/_img/Regularization_Polynomial.png
3.38 KB
diff --git a/‎docs/source/content/overview/_img/Regularization_Quadratic.png
8 KB b/‎docs/source/content/overview/_img/Regularization_Quadratic.png
8 KB
diff --git a/‎docs/source/content/overview/_img/Regularization_Ridge.png
4.34 KB b/‎docs/source/content/overview/_img/Regularization_Ridge.png
4.34 KB
@@ -1,24 +1,16 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.preprocessing import PolynomialFeatures
+from sklearn.datasets import make_regression
 import numpy as np
 
-# We will create some x-values and randomly choose some as data points
-X = np.linspace(0, 10, 100)
-# We are fixing the random number seed for consistency
-rn = np.random.RandomState(0)
-# Shuffle the data for variety
-rn.shuffle(X)
-# Grab the first 30 of our shuffled points and sort them for plotting
-X = np.sort(X[:30])
-# Our output will be a quadratic function
-y = X**2
-# We will add some variance to the data so that it's more interesting
-y = y + (((np.random.rand(30) * 2) - 1) * 30)
+# Create a data set for analysis
+x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
+y = y ** 2
 
 # Plot data
 sns.set_style("darkgrid")
-plt.scatter(X, y, marker='o')
+plt.scatter(x, y, marker='o')
 plt.xticks(())
 plt.yticks(())
 plt.tight_layout()
 
@@ -1,42 +1,36 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.preprocessing import PolynomialFeatures
+from sklearn.datasets import make_regression
 from sklearn.linear_model import LinearRegression, Lasso
 from sklearn.pipeline import Pipeline
 import numpy as np
 
-# We will create some x-values and randomly choose some as data points
-X = np.linspace(0, 10, 100)
-# We are fixing the random number seed for consistency
-rn = np.random.RandomState(0)
-# Shuffle the data for variety
-rn.shuffle(X)
-# Grab the first 30 of our shuffled points and sort them for plotting
-X = np.sort(X[:30])
-# Our output will be a quadratic function
-y = X**2
-# We will add some variance to the data so that it's more interesting
-y = y + (((np.random.rand(30) * 2) - 1) * 30)
+# Create a data set for analysis
+x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
+y = y ** 2
 
-# Pipeline lets us setup a fixed number of steps for our modeling
-model = Pipeline([('poly', PolynomialFeatures(degree=6)), \
+# Pipeline lets us set the steps for our modeling
+# We are comparing a standard polynomial model against one with lasso
+model = Pipeline([('poly', PolynomialFeatures(degree=10)), \
 ('linear', LinearRegression(fit_intercept=False))])
-regModel = Pipeline([('poly', PolynomialFeatures(degree=6)), \
-('lasso', Lasso(alpha=0.1, max_iter=100000))])
+regModel = Pipeline([('poly', PolynomialFeatures(degree=10)), \
+('lasso', Lasso(alpha=5, max_iter=1000000))])
+
 # Now we train on our data
-model = model.fit(X[:, np.newaxis], y)
-regModel = regModel.fit(X[:, np.newaxis], y)
+model = model.fit(x, y)
+regModel = regModel.fit(x, y)
 # Now we pridict
-X_plot = np.linspace(0, 10, 100)
-X_plot = X_plot[:, np.newaxis]
-y_plot = model.predict(X_plot)
-yReg_plot = regModel.predict(X_plot)
+x_plot = np.linspace(min(x)[0], max(x)[0], 100)
+x_plot = x_plot[:, np.newaxis]
+y_plot = model.predict(x_plot)
+yReg_plot = regModel.predict(x_plot)
 
 # Plot data
 sns.set_style("darkgrid")
-plt.plot(X_plot, y_plot, color='black')
-plt.plot(X_plot, yReg_plot, color='red')
-plt.scatter(X, y, marker='o')
+plt.plot(x_plot, y_plot, color='black')
+plt.plot(x_plot, yReg_plot, color='red')
+plt.scatter(x, y, marker='o')
 plt.xticks(())
 plt.yticks(())
 plt.tight_layout()
 
@@ -1,37 +1,29 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.preprocessing import PolynomialFeatures
+from sklearn.datasets import make_regression
 from sklearn.linear_model import LinearRegression
 from sklearn.pipeline import Pipeline
 import numpy as np
 
-# We will create some x-values and randomly choose some as data points
-X = np.linspace(0, 10, 100)
-# We are fixing the random number seed for consistency
-rn = np.random.RandomState(0)
-# Shuffle the data for variety
-rn.shuffle(X)
-# Grab the first 30 of our shuffled points and sort them for plotting
-X = np.sort(X[:30])
-# Our output will be a quadratic function
-y = X**2
-# We will add some variance to the data so that it's more interesting
-y = y + (((np.random.rand(30) * 2) - 1) * 30)
+# Create a data set for analysis
+x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
+y = y ** 2
 
-# Pipeline lets us setup a fixed number of steps for our modeling
+# Pipeline lets us set the steps for our modeling
+# We are using a simple linear model here
 model = Pipeline([('poly', PolynomialFeatures(degree=1)), \
 ('linear', LinearRegression(fit_intercept=False))])
+
 # Now we train on our data
-model = model.fit(X[:, np.newaxis], y)
+model = model.fit(x, y)
 # Now we pridict
-X_plot = np.linspace(0, 10, 100)
-X_plot = X_plot[:, np.newaxis]
-y_plot = model.predict(X_plot)
+y_predictions = model.predict(x)
 
 # Plot data
 sns.set_style("darkgrid")
-plt.plot(X_plot, y_plot, color='black')
-plt.scatter(X, y, marker='o')
+plt.plot(x, y_predictions, color='black')
+plt.scatter(x, y, marker='o')
 plt.xticks(())
 plt.yticks(())
 plt.tight_layout()
 
@@ -1,37 +1,32 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.preprocessing import PolynomialFeatures
+from sklearn.datasets import make_regression
 from sklearn.linear_model import LinearRegression
 from sklearn.pipeline import Pipeline
 import numpy as np
 
-# We will create some x-values and randomly choose some as data points
-X = np.linspace(0, 10, 100)
-# We are fixing the random number seed for consistency
-rn = np.random.RandomState(0)
-# Shuffle the data for variety
-rn.shuffle(X)
-# Grab the first 30 of our shuffled points and sort them for plotting
-X = np.sort(X[:30])
-# Our output will be a quadratic function
-y = X**2
-# We will add some variance to the data so that it's more interesting
-y = y + (((np.random.rand(30) * 2) - 1) * 30)
+# Create a data set for analysis
+x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
+y = y ** 2
 
-# Pipeline lets us setup a fixed number of steps for our modeling
+# Pipeline lets us set the steps for our modeling
+# We are using a polynomial model here (polynomial with degree 10)
 model = Pipeline([('poly', PolynomialFeatures(degree=10)), \
 ('linear', LinearRegression(fit_intercept=False))])
+
 # Now we train on our data
-model = model.fit(X[:, np.newaxis], y)
+model = model.fit(x, y)
 # Now we pridict
-X_plot = np.linspace(0, 10, 100)
-X_plot = X_plot[:, np.newaxis]
-y_plot = model.predict(X_plot)
+# The next two lines are used to model input for our prediction graph
+x_plot = np.linspace(min(x)[0], max(x)[0], 100)
+x_plot = x_plot[:, np.newaxis]
+y_predictions = model.predict(x_plot)
 
 # Plot data
 sns.set_style("darkgrid")
-plt.plot(X_plot, y_plot, color='black')
-plt.scatter(X, y, marker='o')
+plt.plot(x_plot, y_predictions, color='black')
+plt.scatter(x, y, marker='o')
 plt.xticks(())
 plt.yticks(())
 plt.tight_layout()
 
@@ -1,37 +1,32 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.preprocessing import PolynomialFeatures
+from sklearn.datasets import make_regression
 from sklearn.linear_model import LinearRegression
 from sklearn.pipeline import Pipeline
 import numpy as np
 
-# We will create some x-values and randomly choose some as data points
-X = np.linspace(0, 10, 100)
-# We are fixing the random number seed for consistency
-rn = np.random.RandomState(0)
-# Shuffle the data for variety
-rn.shuffle(X)
-# Grab the first 30 of our shuffled points and sort them for plotting
-X = np.sort(X[:30])
-# Our output will be a quadratic function
-y = X**2
-# We will add some variance to the data so that it's more interesting
-y = y + (((np.random.rand(30) * 2) - 1) * 30)
+# Create a data set for analysis
+x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
+y = y ** 2
 
-# Pipeline lets us setup a fixed number of steps for our modeling
+# Pipeline lets us set the steps for our modeling
+# We are using a quadratic model here (polynomial with degree 2)
 model = Pipeline([('poly', PolynomialFeatures(degree=2)), \
 ('linear', LinearRegression(fit_intercept=False))])
+
 # Now we train on our data
-model = model.fit(X[:, np.newaxis], y)
+model = model.fit(x, y)
 # Now we pridict
-X_plot = np.linspace(0, 10, 100)
-X_plot = X_plot[:, np.newaxis]
-y_plot = model.predict(X_plot)
+# The next two lines are used to model input for our prediction graph
+x_plot = np.linspace(min(x)[0], max(x)[0], 100)
+x_plot = x_plot[:, np.newaxis]
+y_predictions = model.predict(x_plot)
 
 # Plot data
 sns.set_style("darkgrid")
-plt.plot(X_plot, y_plot, color='black')
-plt.scatter(X, y, marker='o')
+plt.plot(x_plot, y_predictions, color='black')
+plt.scatter(x, y, marker='o')
 plt.xticks(())
 plt.yticks(())
 plt.tight_layout()
 
@@ -1,42 +1,37 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.preprocessing import PolynomialFeatures
+from sklearn.datasets import make_regression
 from sklearn.linear_model import LinearRegression, Ridge
 from sklearn.pipeline import Pipeline
 import numpy as np
 
-# We will create some x-values and randomly choose some as data points
-X = np.linspace(0, 10, 100)
-# We are fixing the random number seed for consistency
-rn = np.random.RandomState(0)
-# Shuffle the data for variety
-rn.shuffle(X)
-# Grab the first 30 of our shuffled points and sort them for plotting
-X = np.sort(X[:30])
-# Our output will be a quadratic function
-y = X**2
-# We will add some variance to the data so that it's more interesting
-y = y + (((np.random.rand(30) * 2) - 1) * 30)
+# Create a data set for analysis
+x, y = make_regression(n_samples=100, n_features = 1, noise=15, random_state=0)
+y = y ** 2
 
-# Pipeline lets us setup a fixed number of steps for our modeling
-model = Pipeline([('poly', PolynomialFeatures(degree=6)), \
+# Pipeline lets us set the steps for our modeling
+# We are comparing a standard polynomial model against one with ridge
+model = Pipeline([('poly', PolynomialFeatures(degree=10)), \
 ('linear', LinearRegression(fit_intercept=False))])
-regModel = Pipeline([('poly', PolynomialFeatures(degree=6)), \
+regModel = Pipeline([('poly', PolynomialFeatures(degree=10)), \
 ('ridge', Ridge(alpha=5.0))])
+
 # Now we train on our data
-model = model.fit(X[:, np.newaxis], y)
-regModel = regModel.fit(X[:, np.newaxis], y)
+model = model.fit(x, y)
+regModel = regModel.fit(x, y)
 # Now we pridict
-X_plot = np.linspace(0, 10, 100)
-X_plot = X_plot[:, np.newaxis]
-y_plot = model.predict(X_plot)
-yReg_plot = regModel.predict(X_plot)
+# The next four lines are used to model input for our prediction graph
+x_plot = np.linspace(min(x)[0], max(x)[0], 100)
+x_plot = x_plot[:, np.newaxis]
+y_plot = model.predict(x_plot)
+yReg_plot = regModel.predict(x_plot)
 
 # Plot data
 sns.set_style("darkgrid")
-plt.plot(X_plot, y_plot, color='black')
-plt.plot(X_plot, yReg_plot, color='red')
-plt.scatter(X, y, marker='o')
+plt.plot(x_plot, y_plot, color='black')
+plt.plot(x_plot, yReg_plot, color='red')
+plt.scatter(x, y, marker='o')
 plt.xticks(())
 plt.yticks(())
 plt.tight_layout()