Skip to content

Commit 55d2874

Browse files
[bugfix] fix select columns in odps dataset v1 (#18)
1 parent 16ddbc3 commit 55d2874

File tree

2 files changed

+6
-17
lines changed

2 files changed

+6
-17
lines changed

.pre-commit-config.yaml

-5
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ repos:
2828
hooks:
2929
- id: codespell
3030
args: ["--skip", "*.json"]
31-
- repo: https://github.com/PyCQA/docformatter
32-
rev: v1.7.5
33-
hooks:
34-
- id: docformatter
35-
args: ["--in-place", "--wrap-descriptions", "79"]
3631
- repo: https://github.com/executablebooks/mdformat
3732
rev: 0.7.17
3833
hooks:

tzrec/datasets/odps_dataset_v1.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -125,20 +125,14 @@ def __init__(
125125
) -> None:
126126
super().__init__(input_path, batch_size, selected_cols, drop_remainder)
127127
self.schema = []
128-
self._ordered_cols = None
129128
reader = common_io.table.TableReader(
130129
self._input_path.split(",")[0],
131-
selected_cols=",".join(self._selected_cols or []),
132130
)
133-
if self._selected_cols:
134-
self._ordered_cols = []
135-
for field in reader.get_schema():
136-
# pyre-ignore [58]
137-
if field["colname"] in self._selected_cols:
138-
self.schema.append(field)
139-
self._ordered_cols.append(field["colname"])
140-
else:
141-
self.schema = reader.get_schema()
131+
self._ordered_cols = []
132+
for field in reader.get_schema():
133+
if not selected_cols or field["colname"] in selected_cols:
134+
self.schema.append(field)
135+
self._ordered_cols.append(field["colname"])
142136
reader.close()
143137

144138
def _iter_one_table(
@@ -148,7 +142,7 @@ def _iter_one_table(
148142
input_path,
149143
slice_id=worker_id,
150144
slice_count=num_workers,
151-
selected_cols=",".join(self._selected_cols or []),
145+
selected_cols=",".join(self._ordered_cols or []),
152146
)
153147
while True:
154148
try:

0 commit comments

Comments
 (0)