Skip to content

Commit aa51ec0

Browse files
committed
* fix decode error
1 parent 9798e0d commit aa51ec0

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

data_juicer/utils/constant.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -172,14 +172,15 @@ def get_access_log(cls, dj_cfg=None, dataset=None):
172172
elif 'jsonl' in dj_cfg.dataset_path:
173173
tmp_f_name = dj_cfg.dataset_path. \
174174
replace('.jsonl', '.tmp.jsonl')
175-
with open(dj_cfg.dataset_path, 'r') as orig_file:
175+
with open(dj_cfg.dataset_path, 'r',
176+
encoding='utf-8') as orig_file:
176177
first_line = orig_file.readline()
177178

178179
assert tmp_f_name is not None and first_line is not None, \
179180
'error when loading the first line, when ' \
180181
f'dj_cfg.dataset_path={dj_cfg.dataset_path}'
181182

182-
with open(tmp_f_name, 'w') as tmp_file:
183+
with open(tmp_f_name, 'w', encoding='utf-8') as tmp_file:
183184
tmp_file.write(first_line)
184185

185186
tmp_dj_cfg.dataset_path = tmp_f_name

0 commit comments

Comments
 (0)