Skip to content

Commit 95f1cd3

Browse files
committed
Pushing the docs to dev/ for branch: main, commit e82550268fce38b38c09d90483cdef8c9bde846f
1 parent ed7ec50 commit 95f1cd3

File tree

1,346 files changed

+7144
-7217
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,346 files changed

+7144
-7217
lines changed
Binary file not shown.

dev/_downloads/1b8827af01c9a70017a4739bcf2e21a8/plot_gpr_co2.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,32 +33,34 @@
3333
# We will derive a dataset from the Mauna Loa Observatory that collected air
3434
# samples. We are interested in estimating the concentration of CO2 and
3535
# extrapolate it for further year. First, we load the original dataset available
36-
# in OpenML.
36+
# in OpenML as a pandas dataframe. This will be replaced with Polars
37+
# once `fetch_openml` adds a native support for it.
3738
from sklearn.datasets import fetch_openml
3839

3940
co2 = fetch_openml(data_id=41187, as_frame=True)
4041
co2.frame.head()
4142

4243
# %%
43-
# First, we process the original dataframe to create a date index and select
44-
# only the CO2 column.
45-
import pandas as pd
44+
# First, we process the original dataframe to create a date column and select
45+
# it along with the CO2 column.
46+
import polars as pl
4647

47-
co2_data = co2.frame
48-
co2_data["date"] = pd.to_datetime(co2_data[["year", "month", "day"]])
49-
co2_data = co2_data[["date", "co2"]].set_index("date")
48+
co2_data = pl.DataFrame(co2.frame[["year", "month", "day", "co2"]]).select(
49+
pl.date("year", "month", "day"), "co2"
50+
)
5051
co2_data.head()
5152

5253
# %%
53-
co2_data.index.min(), co2_data.index.max()
54+
co2_data["date"].min(), co2_data["date"].max()
5455

5556
# %%
5657
# We see that we get CO2 concentration for some days from March, 1958 to
5758
# December, 2001. We can plot these raw information to have a better
5859
# understanding.
5960
import matplotlib.pyplot as plt
6061

61-
co2_data.plot()
62+
plt.plot(co2_data["date"], co2_data["co2"])
63+
plt.xlabel("date")
6264
plt.ylabel("CO$_2$ concentration (ppm)")
6365
_ = plt.title("Raw air samples measurements from the Mauna Loa Observatory")
6466

@@ -67,15 +69,14 @@
6769
# for which no measurements were collected. Such a processing will have an
6870
# smoothing effect on the data.
6971

70-
try:
71-
co2_data_resampled_monthly = co2_data.resample("ME")
72-
except ValueError:
73-
# pandas < 2.2 uses M instead of ME
74-
co2_data_resampled_monthly = co2_data.resample("M")
75-
76-
77-
co2_data = co2_data_resampled_monthly.mean().dropna(axis="index", how="any")
78-
co2_data.plot()
72+
co2_data = (
73+
co2_data.sort(by="date")
74+
.group_by_dynamic("date", every="1mo")
75+
.agg(pl.col("co2").mean())
76+
.drop_nulls()
77+
)
78+
plt.plot(co2_data["date"], co2_data["co2"])
79+
plt.xlabel("date")
7980
plt.ylabel("Monthly average of CO$_2$ concentration (ppm)")
8081
_ = plt.title(
8182
"Monthly average of air samples measurements\nfrom the Mauna Loa Observatory"
@@ -88,7 +89,9 @@
8889
#
8990
# As a first step, we will divide the data and the target to estimate. The data
9091
# being a date, we will convert it into a numeric.
91-
X = (co2_data.index.year + co2_data.index.month / 12).to_numpy().reshape(-1, 1)
92+
X = co2_data.select(
93+
pl.col("date").dt.year() + pl.col("date").dt.month() / 12
94+
).to_numpy()
9295
y = co2_data["co2"].to_numpy()
9396

9497
# %%
Binary file not shown.

dev/_downloads/91a0c94f9f7c19d59a0ad06e77512326/plot_gpr_co2.ipynb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"cell_type": "markdown",
2323
"metadata": {},
2424
"source": [
25-
"## Build the dataset\n\nWe will derive a dataset from the Mauna Loa Observatory that collected air\nsamples. We are interested in estimating the concentration of CO2 and\nextrapolate it for further year. First, we load the original dataset available\nin OpenML.\n\n"
25+
"## Build the dataset\n\nWe will derive a dataset from the Mauna Loa Observatory that collected air\nsamples. We are interested in estimating the concentration of CO2 and\nextrapolate it for further year. First, we load the original dataset available\nin OpenML as a pandas dataframe. This will be replaced with Polars\nonce `fetch_openml` adds a native support for it.\n\n"
2626
]
2727
},
2828
{
@@ -40,7 +40,7 @@
4040
"cell_type": "markdown",
4141
"metadata": {},
4242
"source": [
43-
"First, we process the original dataframe to create a date index and select\nonly the CO2 column.\n\n"
43+
"First, we process the original dataframe to create a date column and select\nit along with the CO2 column.\n\n"
4444
]
4545
},
4646
{
@@ -51,7 +51,7 @@
5151
},
5252
"outputs": [],
5353
"source": [
54-
"import pandas as pd\n\nco2_data = co2.frame\nco2_data[\"date\"] = pd.to_datetime(co2_data[[\"year\", \"month\", \"day\"]])\nco2_data = co2_data[[\"date\", \"co2\"]].set_index(\"date\")\nco2_data.head()"
54+
"import polars as pl\n\nco2_data = pl.DataFrame(co2.frame[[\"year\", \"month\", \"day\", \"co2\"]]).select(\n pl.date(\"year\", \"month\", \"day\"), \"co2\"\n)\nco2_data.head()"
5555
]
5656
},
5757
{
@@ -62,7 +62,7 @@
6262
},
6363
"outputs": [],
6464
"source": [
65-
"co2_data.index.min(), co2_data.index.max()"
65+
"co2_data[\"date\"].min(), co2_data[\"date\"].max()"
6666
]
6767
},
6868
{
@@ -80,7 +80,7 @@
8080
},
8181
"outputs": [],
8282
"source": [
83-
"import matplotlib.pyplot as plt\n\nco2_data.plot()\nplt.ylabel(\"CO$_2$ concentration (ppm)\")\n_ = plt.title(\"Raw air samples measurements from the Mauna Loa Observatory\")"
83+
"import matplotlib.pyplot as plt\n\nplt.plot(co2_data[\"date\"], co2_data[\"co2\"])\nplt.xlabel(\"date\")\nplt.ylabel(\"CO$_2$ concentration (ppm)\")\n_ = plt.title(\"Raw air samples measurements from the Mauna Loa Observatory\")"
8484
]
8585
},
8686
{
@@ -98,7 +98,7 @@
9898
},
9999
"outputs": [],
100100
"source": [
101-
"try:\n co2_data_resampled_monthly = co2_data.resample(\"ME\")\nexcept ValueError:\n # pandas < 2.2 uses M instead of ME\n co2_data_resampled_monthly = co2_data.resample(\"M\")\n\n\nco2_data = co2_data_resampled_monthly.mean().dropna(axis=\"index\", how=\"any\")\nco2_data.plot()\nplt.ylabel(\"Monthly average of CO$_2$ concentration (ppm)\")\n_ = plt.title(\n \"Monthly average of air samples measurements\\nfrom the Mauna Loa Observatory\"\n)"
101+
"co2_data = (\n co2_data.sort(by=\"date\")\n .group_by_dynamic(\"date\", every=\"1mo\")\n .agg(pl.col(\"co2\").mean())\n .drop_nulls()\n)\nplt.plot(co2_data[\"date\"], co2_data[\"co2\"])\nplt.xlabel(\"date\")\nplt.ylabel(\"Monthly average of CO$_2$ concentration (ppm)\")\n_ = plt.title(\n \"Monthly average of air samples measurements\\nfrom the Mauna Loa Observatory\"\n)"
102102
]
103103
},
104104
{
@@ -116,7 +116,7 @@
116116
},
117117
"outputs": [],
118118
"source": [
119-
"X = (co2_data.index.year + co2_data.index.month / 12).to_numpy().reshape(-1, 1)\ny = co2_data[\"co2\"].to_numpy()"
119+
"X = co2_data.select(\n pl.col(\"date\").dt.year() + pl.col(\"date\").dt.month() / 12\n).to_numpy()\ny = co2_data[\"co2\"].to_numpy()"
120120
]
121121
},
122122
{

dev/_downloads/scikit-learn-docs.zip

-14.1 KB
Binary file not shown.
-289 Bytes
239 Bytes
8 Bytes
570 Bytes
374 Bytes
1022 Bytes
650 Bytes
170 Bytes
-38 Bytes
110 Bytes
180 Bytes
13 Bytes
-2.93 KB
-7.67 KB
-88 Bytes
-56 Bytes
-23 Bytes
39 Bytes
-12 Bytes
-59 Bytes
13 Bytes
-112 Bytes
148 Bytes
239 Bytes
-60 Bytes
797 Bytes
-641 Bytes
-976 Bytes
445 Bytes
-87 Bytes
35 Bytes
-91 Bytes

dev/_sources/auto_examples/applications/plot_cyclical_feature_engineering.rst.txt

Lines changed: 1 addition & 1 deletion

dev/_sources/auto_examples/applications/plot_digits_denoising.rst.txt

Lines changed: 1 addition & 1 deletion

dev/_sources/auto_examples/applications/plot_face_recognition.rst.txt

Lines changed: 4 additions & 4 deletions

dev/_sources/auto_examples/applications/plot_model_complexity_influence.rst.txt

Lines changed: 15 additions & 15 deletions

0 commit comments

Comments
 (0)