Skip to content

Commit 124a20f

Browse files
authored
fix decode error (#618)
* * fix decode error * * start the coverage earlier
1 parent 9798e0d commit 124a20f

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

data_juicer/utils/constant.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -172,14 +172,15 @@ def get_access_log(cls, dj_cfg=None, dataset=None):
172172
elif 'jsonl' in dj_cfg.dataset_path:
173173
tmp_f_name = dj_cfg.dataset_path. \
174174
replace('.jsonl', '.tmp.jsonl')
175-
with open(dj_cfg.dataset_path, 'r') as orig_file:
175+
with open(dj_cfg.dataset_path, 'r',
176+
encoding='utf-8') as orig_file:
176177
first_line = orig_file.readline()
177178

178179
assert tmp_f_name is not None and first_line is not None, \
179180
'error when loading the first line, when ' \
180181
f'dj_cfg.dataset_path={dj_cfg.dataset_path}'
181182

182-
with open(tmp_f_name, 'w') as tmp_file:
183+
with open(tmp_f_name, 'w', encoding='utf-8') as tmp_file:
183184
tmp_file.write(first_line)
184185

185186
tmp_dj_cfg.dataset_path = tmp_f_name

tests/run.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
import unittest
1313
import coverage
1414

15+
# start the coverage immediately
16+
cov = coverage.Coverage(include='data_juicer/**')
17+
cov.start()
18+
1519
from loguru import logger
1620

1721
from data_juicer.utils.unittest_utils import set_clear_model_flag, get_partial_test_cases
@@ -91,9 +95,7 @@ def gather_test_cases(test_dir, pattern, tag, mode='partial'):
9195

9296

9397
def main():
94-
cov = coverage.Coverage(include='data_juicer/**')
95-
cov.start()
96-
98+
global cov
9799
runner = unittest.TextTestRunner()
98100
test_suite = gather_test_cases(os.path.abspath(args.test_dir),
99101
args.pattern, args.tag, args.mode)

0 commit comments

Comments
 (0)