Skip to content

Commit 293993e

Browse files
committed
add hashid based on op init params to default output frame dir
1 parent 5c253e5 commit 293993e

File tree

1 file changed

+11
-15
lines changed

1 file changed

+11
-15
lines changed

data_juicer/ops/mapper/video_extract_frames_mapper.py

+11-15
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pydantic import PositiveInt
66

77
from data_juicer.utils.constant import Fields
8-
from data_juicer.utils.file_utils import create_directory_if_not_exists
8+
from data_juicer.utils.file_utils import dict_to_hash
99
from data_juicer.utils.mm_utils import (
1010
SpecialTokens, close_video, extract_key_frames,
1111
extract_key_frames_by_seconds, extract_video_frames_uniformly,
@@ -24,17 +24,11 @@ class VideoExtractFramesMapper(Mapper):
2424
"""Mapper to extract frames from video files according to specified methods.
2525
Extracted Frames Data Format:
2626
The data format for the extracted frames is a dictionary mapping
27-
video keys to lists of file paths where the extracted frames are saved.
28-
The dictionary follows the structure:
27+
video key to extracted frames directory where the extracted
28+
frames are saved. The dictionary follows the structure:
2929
{
30-
"video_key_1": [
31-
"/${frame_dir}/video_key_1_filename/frame_1.jpg",
32-
"/${frame_dir}/video_key_1_filename/frame_2.jpg",
33-
...],
34-
"video_key_2": [
35-
"/${frame_dir}/video_key_2_filename/frame_1.jpg",
36-
"/${frame_dir}/video_key_2_filename/frame_2.jpg",
37-
...],
30+
"video_key_1": "/${frame_dir}/video_key_1_filename/",
31+
"video_key_2": "/${frame_dir}/video_key_2_filename/",
3832
...
3933
}
4034
"""
@@ -99,11 +93,13 @@ def _get_default_frame_dir(self, original_filepath):
9993
dir_token = f'/{Fields.multimodal_data_output_dir}/'
10094
if dir_token in original_dir:
10195
original_dir = original_dir.split(dir_token)[0]
102-
new_dir = os.path.join(
96+
saved_dir = os.path.join(
10397
original_dir, f'{Fields.multimodal_data_output_dir}/{OP_NAME}')
104-
create_directory_if_not_exists(new_dir)
105-
return osp.join(new_dir,
106-
osp.splitext(osp.basename(original_filepath))[0])
98+
original_filename = osp.splitext(osp.basename(original_filepath))[0]
99+
hash_val = dict_to_hash(self._init_parameters)
100+
101+
return osp.join(saved_dir,
102+
f'{original_filename}__dj_hash_#{hash_val}#')
107103

108104
def process_single(self, sample, context=False):
109105
# check if it's generated already

0 commit comments

Comments
 (0)