|
3 | 3 |
|
4 | 4 | import glob
|
5 | 5 | import re
|
| 6 | +import warnings |
6 | 7 | from datetime import datetime
|
7 | 8 | from typing import List
|
8 | 9 |
|
@@ -51,14 +52,16 @@ def read_file(filepath: str) -> pl.DataFrame:
|
51 | 52 | _, value = start_time_line.split(" : ")
|
52 | 53 | start_time = datetime.strptime(value.strip(), "%m/%d/%Y %H:%M:%S.%f")
|
53 | 54 |
|
54 |
| - columns_to_read = ["time", "Ns", "I", "Ecell", "Q charge", "Q discharge"] |
| 55 | + columns_to_read = ["time/", "Ns", "I/", "Ecell/", "Q charge/", "Q discharge/"] |
55 | 56 |
|
56 | 57 | all_columns = pl.scan_csv(
|
57 | 58 | filepath, skip_rows=n_header_lines - 1, separator="\t"
|
58 | 59 | ).columns
|
59 |
| - selected_columns = [ |
60 |
| - col for col in all_columns if any(sub in col for sub in columns_to_read) |
61 |
| - ] |
| 60 | + selected_columns = [] |
| 61 | + for substring in columns_to_read: |
| 62 | + found_columns = [col for col in all_columns if substring in col] |
| 63 | + selected_columns.extend(found_columns) |
| 64 | + |
62 | 65 | dataframe = pl.read_csv(
|
63 | 66 | filepath,
|
64 | 67 | skip_rows=n_header_lines - 1,
|
@@ -107,11 +110,22 @@ def raw_dataframe(self) -> pl.DataFrame:
|
107 | 110 | files = glob.glob(self.input_data_path)
|
108 | 111 | files = self.sort_files(files)
|
109 | 112 | dataframes = [self.read_file(file) for file in files]
|
110 |
| - |
111 |
| - for i in range(1, len(dataframes)): |
112 |
| - dataframes[i] = dataframes[i].with_columns( |
113 |
| - pl.col("Ns") + dataframes[i - 1]["Ns"].max() + 1 |
114 |
| - ) |
| 113 | + all_columns = set([col for df in dataframes for col in df.columns]) |
| 114 | + indices_to_remove = [] |
| 115 | + for i in range(len(dataframes)): |
| 116 | + if len(dataframes[i].columns) < len(all_columns): |
| 117 | + indices_to_remove.append(i) |
| 118 | + warnings.warn( |
| 119 | + f"File {files[i]} has missing columns, it has not been read." |
| 120 | + ) |
| 121 | + continue |
| 122 | + if i > 0: |
| 123 | + dataframes[i] = dataframes[i].with_columns( |
| 124 | + pl.col("Ns") + dataframes[i - 1]["Ns"].max() + 1 |
| 125 | + ) |
| 126 | + dataframes = [ |
| 127 | + df for i, df in enumerate(dataframes) if i not in indices_to_remove |
| 128 | + ] |
115 | 129 | return pl.concat(dataframes, how="vertical")
|
116 | 130 |
|
117 | 131 | @property
|
|
0 commit comments