Skip to content

Commit 352be31

Browse files
author
Tom Holland
committed
Fill missing biologic columns with zeros
1 parent cc0b35d commit 352be31

File tree

2 files changed

+59
-8
lines changed

2 files changed

+59
-8
lines changed

pyprobe/cyclers/biologic.py

+25-8
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def read_file(filepath: str) -> pl.DataFrame:
5151
_, value = start_time_line.split(" : ")
5252
start_time = datetime.strptime(value.strip(), "%m/%d/%Y %H:%M:%S.%f")
5353

54-
columns_to_read = ["time", "Ns", "I", "Ecell", "Q charge", "Q discharge"]
54+
columns_to_read = ["time/", "Ns", "I/", "Ecell/", "Q charge/", "Q discharge/"]
5555

5656
all_columns = pl.scan_csv(
5757
filepath, skip_rows=n_header_lines - 1, separator="\t"
@@ -73,9 +73,7 @@ def read_file(filepath: str) -> pl.DataFrame:
7373
(pl.col("time/s") * 1000000 + start).cast(pl.Datetime).alias("Date")
7474
)
7575

76-
7776
return dataframe
78-
7977

8078
@classmethod
8179
def sort_files(cls, file_list: List[str]) -> List[str]:
@@ -112,13 +110,32 @@ def raw_dataframe(self) -> pl.DataFrame:
112110
files = glob.glob(self.input_data_path)
113111
files = self.sort_files(files)
114112
dataframes = [self.read_file(file) for file in files]
115-
116-
for i in range(1, len(dataframes)):
117-
dataframes[i] = dataframes[i].with_columns(
118-
pl.col("Ns") + dataframes[i - 1]["Ns"].max() + 1
119-
)
113+
all_columns = [col for df in dataframes for col in df.columns]
114+
for i in range(len(dataframes)):
115+
dataframes[i] = self.fill_missing_columns(dataframes[i], all_columns)
116+
if i > 0:
117+
dataframes[i] = dataframes[i].with_columns(
118+
pl.col("Ns") + dataframes[i - 1]["Ns"].max() + 1
119+
)
120120
return pl.concat(dataframes, how="vertical")
121121

122+
def fill_missing_columns(
123+
self, dataframe: pl.DataFrame, column_list: List[str]
124+
) -> pl.DataFrame:
125+
"""Fill missing columns in a DataFrame with zeros.
126+
127+
Args:
128+
dataframe: The DataFrame to fill.
129+
column_list: The list of required columns.
130+
131+
Returns:
132+
pl.DataFrame: The DataFrame with filled columns.
133+
"""
134+
missing_columns = set(column_list) - set(dataframe.columns)
135+
for col in missing_columns:
136+
dataframe = dataframe.with_columns(pl.zeros(dataframe.height).alias(col))
137+
return dataframe
138+
122139
@property
123140
def processed_dataframe(self) -> pl.DataFrame:
124141
"""Process a DataFrame from battery cycler data.

tests/cyclers/test_biologic.py

+34
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,40 @@ def test_read_file(biologic_cycler):
2828
"Q charge/mA.h",
2929
"Date",
3030
]
31+
assert 0 == 1
32+
33+
34+
def test_fill_missing_columns(biologic_cycler):
35+
"""Test the fill_missing_columns method."""
36+
dataframe = pl.DataFrame(
37+
{
38+
"time/s": [0.0, 1.0, 2.0, 3.0],
39+
"Ns": [0, 1, 2, 3],
40+
"Ecell/V": [4, 5, 6, 7],
41+
}
42+
)
43+
all_columns = [
44+
"time/s",
45+
"Ns",
46+
"Ecell/V",
47+
"I/mA",
48+
"Q charge/mA.h",
49+
"Q discharge/mA.h",
50+
]
51+
filled_dataframe = biologic_cycler.fill_missing_columns(dataframe, all_columns)
52+
expected_dataframe = pl.DataFrame(
53+
{
54+
"time/s": [0.0, 1.0, 2.0, 3.0],
55+
"Ns": [0, 1, 2, 3],
56+
"Ecell/V": [4, 5, 6, 7],
57+
"I/mA": [0.0, 0.0, 0.0, 0.0],
58+
"Q charge/mA.h": [0.0, 0.0, 0.0, 0.0],
59+
"Q discharge/mA.h": [0.0, 0.0, 0.0, 0.0],
60+
}
61+
)
62+
pl_testing.assert_frame_equal(
63+
filled_dataframe, expected_dataframe, check_column_order=False
64+
)
3165

3266

3367
def test_sort_files(biologic_cycler):

0 commit comments

Comments
 (0)