Skip to content

Commit e4e0692

Browse files
author
Tom Holland
committed
Replace filling missing columns with ignoring file and providing a warning
1 parent 352be31 commit e4e0692

File tree

2 files changed

+12
-54
lines changed

2 files changed

+12
-54
lines changed

pyprobe/cyclers/biologic.py

+12-20
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import glob
55
import re
6+
import warnings
67
from datetime import datetime
78
from typing import List
89

@@ -72,7 +73,6 @@ def read_file(filepath: str) -> pl.DataFrame:
7273
dataframe = dataframe.with_columns(
7374
(pl.col("time/s") * 1000000 + start).cast(pl.Datetime).alias("Date")
7475
)
75-
7676
return dataframe
7777

7878
@classmethod
@@ -110,32 +110,24 @@ def raw_dataframe(self) -> pl.DataFrame:
110110
files = glob.glob(self.input_data_path)
111111
files = self.sort_files(files)
112112
dataframes = [self.read_file(file) for file in files]
113-
all_columns = [col for df in dataframes for col in df.columns]
113+
all_columns = set([col for df in dataframes for col in df.columns])
114+
indices_to_remove = []
114115
for i in range(len(dataframes)):
115-
dataframes[i] = self.fill_missing_columns(dataframes[i], all_columns)
116+
if len(dataframes[i].columns) < len(all_columns):
117+
indices_to_remove.append(i)
118+
warnings.warn(
119+
f"File {files[i]} has missing columns, it has not been read."
120+
)
121+
continue
116122
if i > 0:
117123
dataframes[i] = dataframes[i].with_columns(
118124
pl.col("Ns") + dataframes[i - 1]["Ns"].max() + 1
119125
)
126+
dataframes = [
127+
df for i, df in enumerate(dataframes) if i not in indices_to_remove
128+
]
120129
return pl.concat(dataframes, how="vertical")
121130

122-
def fill_missing_columns(
123-
self, dataframe: pl.DataFrame, column_list: List[str]
124-
) -> pl.DataFrame:
125-
"""Fill missing columns in a DataFrame with zeros.
126-
127-
Args:
128-
dataframe: The DataFrame to fill.
129-
column_list: The list of required columns.
130-
131-
Returns:
132-
pl.DataFrame: The DataFrame with filled columns.
133-
"""
134-
missing_columns = set(column_list) - set(dataframe.columns)
135-
for col in missing_columns:
136-
dataframe = dataframe.with_columns(pl.zeros(dataframe.height).alias(col))
137-
return dataframe
138-
139131
@property
140132
def processed_dataframe(self) -> pl.DataFrame:
141133
"""Process a DataFrame from battery cycler data.

tests/cyclers/test_biologic.py

-34
Original file line numberDiff line numberDiff line change
@@ -28,40 +28,6 @@ def test_read_file(biologic_cycler):
2828
"Q charge/mA.h",
2929
"Date",
3030
]
31-
assert 0 == 1
32-
33-
34-
def test_fill_missing_columns(biologic_cycler):
35-
"""Test the fill_missing_columns method."""
36-
dataframe = pl.DataFrame(
37-
{
38-
"time/s": [0.0, 1.0, 2.0, 3.0],
39-
"Ns": [0, 1, 2, 3],
40-
"Ecell/V": [4, 5, 6, 7],
41-
}
42-
)
43-
all_columns = [
44-
"time/s",
45-
"Ns",
46-
"Ecell/V",
47-
"I/mA",
48-
"Q charge/mA.h",
49-
"Q discharge/mA.h",
50-
]
51-
filled_dataframe = biologic_cycler.fill_missing_columns(dataframe, all_columns)
52-
expected_dataframe = pl.DataFrame(
53-
{
54-
"time/s": [0.0, 1.0, 2.0, 3.0],
55-
"Ns": [0, 1, 2, 3],
56-
"Ecell/V": [4, 5, 6, 7],
57-
"I/mA": [0.0, 0.0, 0.0, 0.0],
58-
"Q charge/mA.h": [0.0, 0.0, 0.0, 0.0],
59-
"Q discharge/mA.h": [0.0, 0.0, 0.0, 0.0],
60-
}
61-
)
62-
pl_testing.assert_frame_equal(
63-
filled_dataframe, expected_dataframe, check_column_order=False
64-
)
6531

6632

6733
def test_sort_files(biologic_cycler):

0 commit comments

Comments
 (0)