|
3 | 3 |
|
4 | 4 | import glob
|
5 | 5 | import re
|
| 6 | +import warnings |
6 | 7 | from datetime import datetime
|
7 | 8 | from typing import List
|
8 | 9 |
|
@@ -72,7 +73,6 @@ def read_file(filepath: str) -> pl.DataFrame:
|
72 | 73 | dataframe = dataframe.with_columns(
|
73 | 74 | (pl.col("time/s") * 1000000 + start).cast(pl.Datetime).alias("Date")
|
74 | 75 | )
|
75 |
| - |
76 | 76 | return dataframe
|
77 | 77 |
|
78 | 78 | @classmethod
|
@@ -110,32 +110,24 @@ def raw_dataframe(self) -> pl.DataFrame:
|
110 | 110 | files = glob.glob(self.input_data_path)
|
111 | 111 | files = self.sort_files(files)
|
112 | 112 | dataframes = [self.read_file(file) for file in files]
|
113 |
| - all_columns = [col for df in dataframes for col in df.columns] |
| 113 | + all_columns = set([col for df in dataframes for col in df.columns]) |
| 114 | + indices_to_remove = [] |
114 | 115 | for i in range(len(dataframes)):
|
115 |
| - dataframes[i] = self.fill_missing_columns(dataframes[i], all_columns) |
| 116 | + if len(dataframes[i].columns) < len(all_columns): |
| 117 | + indices_to_remove.append(i) |
| 118 | + warnings.warn( |
| 119 | + f"File {files[i]} has missing columns, it has not been read." |
| 120 | + ) |
| 121 | + continue |
116 | 122 | if i > 0:
|
117 | 123 | dataframes[i] = dataframes[i].with_columns(
|
118 | 124 | pl.col("Ns") + dataframes[i - 1]["Ns"].max() + 1
|
119 | 125 | )
|
| 126 | + dataframes = [ |
| 127 | + df for i, df in enumerate(dataframes) if i not in indices_to_remove |
| 128 | + ] |
120 | 129 | return pl.concat(dataframes, how="vertical")
|
121 | 130 |
|
122 |
| - def fill_missing_columns( |
123 |
| - self, dataframe: pl.DataFrame, column_list: List[str] |
124 |
| - ) -> pl.DataFrame: |
125 |
| - """Fill missing columns in a DataFrame with zeros. |
126 |
| -
|
127 |
| - Args: |
128 |
| - dataframe: The DataFrame to fill. |
129 |
| - column_list: The list of required columns. |
130 |
| -
|
131 |
| - Returns: |
132 |
| - pl.DataFrame: The DataFrame with filled columns. |
133 |
| - """ |
134 |
| - missing_columns = set(column_list) - set(dataframe.columns) |
135 |
| - for col in missing_columns: |
136 |
| - dataframe = dataframe.with_columns(pl.zeros(dataframe.height).alias(col)) |
137 |
| - return dataframe |
138 |
| - |
139 | 131 | @property
|
140 | 132 | def processed_dataframe(self) -> pl.DataFrame:
|
141 | 133 | """Process a DataFrame from battery cycler data.
|
|
0 commit comments