@@ -51,7 +51,7 @@ def read_file(filepath: str) -> pl.DataFrame:
51
51
_ , value = start_time_line .split (" : " )
52
52
start_time = datetime .strptime (value .strip (), "%m/%d/%Y %H:%M:%S.%f" )
53
53
54
- columns_to_read = ["time" , "Ns" , "I" , "Ecell" , "Q charge" , "Q discharge" ]
54
+ columns_to_read = ["time/ " , "Ns" , "I/ " , "Ecell/ " , "Q charge/ " , "Q discharge/ " ]
55
55
56
56
all_columns = pl .scan_csv (
57
57
filepath , skip_rows = n_header_lines - 1 , separator = "\t "
@@ -73,9 +73,7 @@ def read_file(filepath: str) -> pl.DataFrame:
73
73
(pl .col ("time/s" ) * 1000000 + start ).cast (pl .Datetime ).alias ("Date" )
74
74
)
75
75
76
-
77
76
return dataframe
78
-
79
77
80
78
@classmethod
81
79
def sort_files (cls , file_list : List [str ]) -> List [str ]:
@@ -112,13 +110,32 @@ def raw_dataframe(self) -> pl.DataFrame:
112
110
files = glob .glob (self .input_data_path )
113
111
files = self .sort_files (files )
114
112
dataframes = [self .read_file (file ) for file in files ]
115
-
116
- for i in range (1 , len (dataframes )):
117
- dataframes [i ] = dataframes [i ].with_columns (
118
- pl .col ("Ns" ) + dataframes [i - 1 ]["Ns" ].max () + 1
119
- )
113
+ all_columns = [col for df in dataframes for col in df .columns ]
114
+ for i in range (len (dataframes )):
115
+ dataframes [i ] = self .fill_missing_columns (dataframes [i ], all_columns )
116
+ if i > 0 :
117
+ dataframes [i ] = dataframes [i ].with_columns (
118
+ pl .col ("Ns" ) + dataframes [i - 1 ]["Ns" ].max () + 1
119
+ )
120
120
return pl .concat (dataframes , how = "vertical" )
121
121
122
+ def fill_missing_columns (
123
+ self , dataframe : pl .DataFrame , column_list : List [str ]
124
+ ) -> pl .DataFrame :
125
+ """Fill missing columns in a DataFrame with zeros.
126
+
127
+ Args:
128
+ dataframe: The DataFrame to fill.
129
+ column_list: The list of required columns.
130
+
131
+ Returns:
132
+ pl.DataFrame: The DataFrame with filled columns.
133
+ """
134
+ missing_columns = set (column_list ) - set (dataframe .columns )
135
+ for col in missing_columns :
136
+ dataframe = dataframe .with_columns (pl .zeros (dataframe .height ).alias (col ))
137
+ return dataframe
138
+
122
139
@property
123
140
def processed_dataframe (self ) -> pl .DataFrame :
124
141
"""Process a DataFrame from battery cycler data.
0 commit comments