Skip to content

Commit 1fb30f8

Browse files
committed
remove _init_parameters attr for gpu ops
1 parent 949d534 commit 1fb30f8

26 files changed

+5
-54
lines changed

data_juicer/core/ray_data.py

+4-26
Original file line numberDiff line numberDiff line change
@@ -119,23 +119,12 @@ def _run_single_op(self, op):
119119
1) if op.is_batched_op() else 1
120120
if isinstance(op, Mapper):
121121
if op.use_cuda():
122-
try:
123-
init_params = op._init_parameters
124-
except AttributeError:
125-
raise ValueError(
126-
f'This Op[{op._name}] enables CUDA, you should add'
127-
' `_init_parameters` attribute to the Op class by '
128-
'add `self._init_parameters = self.remove_extra_parameters(locals())`' # noqa: E501
129-
' after super().__init__().')
130-
op_args = init_params.pop('args', ())
131-
op_kwargs = init_params.pop('kwargs', {})
132-
op_kwargs.update(init_params)
133-
122+
op_kwargs = op._op_cfg[op._name]
134123
self.data = self.data.map_batches(
135124
op.__class__,
136125
fn_args=None,
137126
fn_kwargs=None,
138-
fn_constructor_args=op_args,
127+
fn_constructor_args=None,
139128
fn_constructor_kwargs=op_kwargs,
140129
batch_size=batch_size,
141130
num_gpus=num_gpus,
@@ -148,23 +137,12 @@ def _run_single_op(self, op):
148137
num_gpus=num_gpus)
149138
elif isinstance(op, Filter):
150139
if op.use_cuda():
151-
try:
152-
init_params = op._init_parameters
153-
except AttributeError:
154-
raise ValueError(
155-
f'This Op[{op._name}] enables CUDA, you should add'
156-
' `_init_parameters` attribute to the Op class by '
157-
'add `self._init_parameters = self.remove_extra_parameters(locals())`' # noqa: E501
158-
' after super().__init__().')
159-
op_args = init_params.pop('args', ())
160-
op_kwargs = init_params.pop('kwargs', {})
161-
op_kwargs.update(init_params)
162-
140+
op_kwargs = op._op_cfg[op._name]
163141
self.data = self.data.map_batches(
164142
op.__class__,
165143
fn_args=None,
166144
fn_kwargs=None,
167-
fn_constructor_args=op_args,
145+
fn_constructor_args=None,
168146
fn_constructor_kwargs=op_kwargs,
169147
batch_size=batch_size,
170148
num_gpus=num_gpus,

data_juicer/ops/filter/image_aesthetics_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def __init__(self,
4848
"""
4949

5050
super().__init__(*args, **kwargs)
51-
self._init_parameters = self.remove_extra_parameters(locals())
5251
if hf_scorer_model == '':
5352
hf_scorer_model = \
5453
'shunk031/aesthetics-predictor-v2-sac-logos-ava1-l14-linearMSE'

data_juicer/ops/filter/image_nsfw_filter.py

-2
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ def __init__(self,
4242
:param kwargs: extra args
4343
"""
4444
super().__init__(*args, **kwargs)
45-
self._init_parameters = self.remove_extra_parameters(locals())
46-
4745
self.score_threshold = score_threshold
4846
if any_or_all not in ['any', 'all']:
4947
raise ValueError(f'Keep strategy [{any_or_all}] is not supported. '

data_juicer/ops/filter/image_pair_similarity_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def __init__(self,
4444
:param kwargs: extra args
4545
"""
4646
super().__init__(*args, **kwargs)
47-
self._init_parameters = self.remove_extra_parameters(locals())
4847
self.min_score = min_score
4948
self.max_score = max_score
5049
if any_or_all not in ['any', 'all']:

data_juicer/ops/filter/image_text_matching_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ def __init__(self,
5353
:param kwargs: extra args
5454
"""
5555
super().__init__(*args, **kwargs)
56-
self._init_parameters = self.remove_extra_parameters(locals())
5756
self.min_score = min_score
5857
self.max_score = max_score
5958
if reduce_mode not in ['avg', 'max', 'min']:

data_juicer/ops/filter/image_text_similarity_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ def __init__(self,
5454
:param kwargs: extra args
5555
"""
5656
super().__init__(*args, **kwargs)
57-
self._init_parameters = self.remove_extra_parameters(locals())
5857
self.min_score = min_score
5958
self.max_score = max_score
6059
if reduce_mode not in ['avg', 'max', 'min']:

data_juicer/ops/filter/image_watermark_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ def __init__(self,
4646
:param kwargs: extra args
4747
"""
4848
super().__init__(*args, **kwargs)
49-
self._init_parameters = self.remove_extra_parameters(locals())
5049
self.prob_threshold = prob_threshold
5150
if any_or_all not in ['any', 'all']:
5251
raise ValueError(f'Keep strategy [{any_or_all}] is not supported. '

data_juicer/ops/filter/phrase_grounding_recall_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ def __init__(self,
115115
:param kwargs: extra args
116116
"""
117117
super().__init__(*args, **kwargs)
118-
self._init_parameters = self.remove_extra_parameters(locals())
119118
self.min_recall = min_recall
120119
self.max_recall = max_recall
121120
if reduce_mode not in ['avg', 'max', 'min']:

data_juicer/ops/filter/video_aesthetics_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ def __init__(self,
7575
"""
7676

7777
super().__init__(*args, **kwargs)
78-
self._init_parameters = self.remove_extra_parameters(locals())
7978
if hf_scorer_model == '':
8079
hf_scorer_model = \
8180
'shunk031/aesthetics-predictor-v2-sac-logos-ava1-l14-linearMSE'

data_juicer/ops/filter/video_frames_text_similarity_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ def __init__(self,
7575
:param kwargs: extra args
7676
"""
7777
super().__init__(*args, **kwargs)
78-
self._init_parameters = self.remove_extra_parameters(locals())
7978
self.min_score = min_score
8079
self.max_score = max_score
8180
if frame_sampling_method not in ['all_keyframes', 'uniform']:

data_juicer/ops/filter/video_motion_score_filter.py

-2
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,6 @@ def __init__(self,
8282
:param kwargs: extra args
8383
"""
8484
super().__init__(*args, **kwargs)
85-
self._init_parameters = self.remove_extra_parameters(locals())
86-
8785
self.min_score = min_score
8886
self.max_score = max_score
8987
self.sampling_fps = sampling_fps

data_juicer/ops/filter/video_nsfw_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def __init__(self,
6666
:param kwargs: extra args
6767
"""
6868
super().__init__(*args, **kwargs)
69-
self._init_parameters = self.remove_extra_parameters(locals())
7069
self.score_threshold = score_threshold
7170
if frame_sampling_method not in ['all_keyframes', 'uniform']:
7271
raise ValueError(

data_juicer/ops/filter/video_ocr_area_ratio_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ def __init__(self,
7272
:param kwargs: extra args
7373
"""
7474
super().__init__(*args, **kwargs)
75-
self._init_parameters = self.remove_extra_parameters(locals())
7675
self.min_area_ratio = min_area_ratio
7776
self.max_area_ratio = max_area_ratio
7877
self.frame_sample_num = frame_sample_num

data_juicer/ops/filter/video_tagging_from_frames_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ def __init__(self,
6262
:param kwargs: extra args
6363
"""
6464
super().__init__(*args, **kwargs)
65-
self._init_parameters = self.remove_extra_parameters(locals())
6665
if contain not in ['any', 'all']:
6766
raise ValueError(f'the containing type [{contain}] is not '
6867
f'supported. Can only be one of ["any", "all"].')

data_juicer/ops/filter/video_watermark_filter.py

-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def __init__(self,
7070
:param kwargs: extra args
7171
"""
7272
super().__init__(*args, **kwargs)
73-
self._init_parameters = self.remove_extra_parameters(locals())
7473
self.prob_threshold = prob_threshold
7574
if frame_sampling_method not in ['all_keyframes', 'uniform']:
7675
raise ValueError(

data_juicer/ops/mapper/generate_qa_from_examples_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ def __init__(self,
9696
:param kwargs: Extra keyword arguments.
9797
"""
9898
super().__init__(**kwargs)
99-
self._init_parameters = self.remove_extra_parameters(locals())
10099

101100
if not seed_file:
102101
raise ValueError(

data_juicer/ops/mapper/generate_qa_from_text_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ def __init__(self,
6969
"""
7070

7171
super().__init__(**kwargs)
72-
self._init_parameters = self.remove_extra_parameters(locals())
7372

7473
if output_pattern is None:
7574
self.output_pattern = r'Human:(.*?)Assistant:(.*?)(?=Human|$)' # noqa: E501

data_juicer/ops/mapper/image_captioning_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ def __init__(self,
8282
:param kwargs: extra args
8383
"""
8484
super().__init__(*args, **kwargs)
85-
self._init_parameters = self.remove_extra_parameters(locals())
8685

8786
if keep_candidate_mode not in [
8887
'random_any', 'similar_one_simhash', 'all'

data_juicer/ops/mapper/image_tagging_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def __init__(self,
3737
:param kwargs: extra args
3838
"""
3939
super().__init__(*args, **kwargs)
40-
self._init_parameters = self.remove_extra_parameters(locals())
4140
self.model_key = prepare_model(
4241
model_type='recognizeAnything',
4342
pretrained_model_name_or_path='ram_plus_swin_large_14m.pth',

data_juicer/ops/mapper/optimize_qa_mapper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def __init__(self,
6666
:param kwargs: Extra keyword arguments.
6767
"""
6868
super().__init__(**kwargs)
69-
self._init_parameters = self.remove_extra_parameters(locals())
69+
7070
self.system_prompt = system_prompt or self.DEFAULT_SYSTEM_PROMPT
7171
self.input_template = input_template or self.DEFAULT_INPUT_TEMPLATE
7272
self.qa_pair_template = qa_pair_template or \

data_juicer/ops/mapper/video_captioning_from_audio_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ def __init__(self, keep_original_sample: bool = True, *args, **kwargs):
3333
:param kwargs: extra args
3434
"""
3535
super().__init__(*args, **kwargs)
36-
self._init_parameters = self.remove_extra_parameters(locals())
3736
AUTOINSTALL.check([
3837
'transformers', 'transformers_stream_generator', 'einops',
3938
'accelerate', 'tiktoken'

data_juicer/ops/mapper/video_captioning_from_frames_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ def __init__(
109109
:param kwargs: extra args
110110
"""
111111
super().__init__(*args, **kwargs)
112-
self._init_parameters = self.remove_extra_parameters(locals())
113112

114113
if keep_candidate_mode not in [
115114
'random_any', 'similar_one_simhash', 'all'

data_juicer/ops/mapper/video_captioning_from_summarizer_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ def __init__(self,
8282
:param kwargs: extra args
8383
"""
8484
super().__init__(*args, **kwargs)
85-
self._init_parameters = self.remove_extra_parameters(locals())
8685
AUTOINSTALL.check([
8786
'torch',
8887
'transformers',

data_juicer/ops/mapper/video_captioning_from_video_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ def __init__(
109109
:param kwargs: extra args
110110
"""
111111
super().__init__(*args, **kwargs)
112-
self._init_parameters = self.remove_extra_parameters(locals())
113112

114113
if keep_candidate_mode not in [
115114
'random_any', 'similar_one_simhash', 'all'

data_juicer/ops/mapper/video_tagging_from_audio_mapper.py

-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def __init__(self,
3838
:param kwargs: extra args
3939
"""
4040
super().__init__(*args, **kwargs)
41-
self._init_parameters = self.remove_extra_parameters(locals())
4241
AUTOINSTALL.check(['torchaudio'])
4342
self.model_key = prepare_model(model_type='huggingface',
4443
pretrained_model_name_or_path=hf_ast,

data_juicer/ops/mapper/video_tagging_from_frames_mapper.py

-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ def __init__(self,
5656
:param kwargs: extra args
5757
"""
5858
super().__init__(*args, **kwargs)
59-
self._init_parameters = self.remove_extra_parameters(locals())
60-
6159
if frame_sampling_method not in ['all_keyframes', 'uniform']:
6260
raise ValueError(
6361
f'Frame sampling method [{frame_sampling_method}] is not '

0 commit comments

Comments
 (0)