Fill missing biologic columns with zeros

Tom Holland · Tom Holland · commit 352be317920d · 2024-06-12T16:06:37.000+01:00
diff --git a/pyprobe/cyclers/biologic.py b/pyprobe/cyclers/biologic.py
@@ -51,7 +51,7 @@ def read_file(filepath: str) -> pl.DataFrame:
         _, value = start_time_line.split(" : ")
         start_time = datetime.strptime(value.strip(), "%m/%d/%Y %H:%M:%S.%f")
 
-        columns_to_read = ["time", "Ns", "I", "Ecell", "Q charge", "Q discharge"]
+        columns_to_read = ["time/", "Ns", "I/", "Ecell/", "Q charge/", "Q discharge/"]
 
         all_columns = pl.scan_csv(
             filepath, skip_rows=n_header_lines - 1, separator="\t"
@@ -73,9 +73,7 @@ def read_file(filepath: str) -> pl.DataFrame:
             (pl.col("time/s") * 1000000 + start).cast(pl.Datetime).alias("Date")
         )
 
-        
         return dataframe
-    
 
     @classmethod
     def sort_files(cls, file_list: List[str]) -> List[str]:
@@ -112,13 +110,32 @@ def raw_dataframe(self) -> pl.DataFrame:
         files = glob.glob(self.input_data_path)
         files = self.sort_files(files)
         dataframes = [self.read_file(file) for file in files]
-
-        for i in range(1, len(dataframes)):
-            dataframes[i] = dataframes[i].with_columns(
-                pl.col("Ns") + dataframes[i - 1]["Ns"].max() + 1
-            )
+        all_columns = [col for df in dataframes for col in df.columns]
+        for i in range(len(dataframes)):
+            dataframes[i] = self.fill_missing_columns(dataframes[i], all_columns)
+            if i > 0:
+                dataframes[i] = dataframes[i].with_columns(
+                    pl.col("Ns") + dataframes[i - 1]["Ns"].max() + 1
+                )
         return pl.concat(dataframes, how="vertical")
 
+    def fill_missing_columns(
+        self, dataframe: pl.DataFrame, column_list: List[str]
+    ) -> pl.DataFrame:
+        """Fill missing columns in a DataFrame with zeros.
+
+        Args:
+            dataframe: The DataFrame to fill.
+            column_list: The list of required columns.
+
+        Returns:
+            pl.DataFrame: The DataFrame with filled columns.
+        """
+        missing_columns = set(column_list) - set(dataframe.columns)
+        for col in missing_columns:
+            dataframe = dataframe.with_columns(pl.zeros(dataframe.height).alias(col))
+        return dataframe
+
     @property
     def processed_dataframe(self) -> pl.DataFrame:
         """Process a DataFrame from battery cycler data.
diff --git a/tests/cyclers/test_biologic.py b/tests/cyclers/test_biologic.py
@@ -28,6 +28,40 @@ def test_read_file(biologic_cycler):
         "Q charge/mA.h",
         "Date",
     ]
+    assert 0 == 1
+
+
+def test_fill_missing_columns(biologic_cycler):
+    """Test the fill_missing_columns method."""
+    dataframe = pl.DataFrame(
+        {
+            "time/s": [0.0, 1.0, 2.0, 3.0],
+            "Ns": [0, 1, 2, 3],
+            "Ecell/V": [4, 5, 6, 7],
+        }
+    )
+    all_columns = [
+        "time/s",
+        "Ns",
+        "Ecell/V",
+        "I/mA",
+        "Q charge/mA.h",
+        "Q discharge/mA.h",
+    ]
+    filled_dataframe = biologic_cycler.fill_missing_columns(dataframe, all_columns)
+    expected_dataframe = pl.DataFrame(
+        {
+            "time/s": [0.0, 1.0, 2.0, 3.0],
+            "Ns": [0, 1, 2, 3],
+            "Ecell/V": [4, 5, 6, 7],
+            "I/mA": [0.0, 0.0, 0.0, 0.0],
+            "Q charge/mA.h": [0.0, 0.0, 0.0, 0.0],
+            "Q discharge/mA.h": [0.0, 0.0, 0.0, 0.0],
+        }
+    )
+    pl_testing.assert_frame_equal(
+        filled_dataframe, expected_dataframe, check_column_order=False
+    )
 
 
 def test_sort_files(biologic_cycler):