6
6
from common import Common
7
7
import pathlib
8
8
import threading
9
+ import more_itertools
9
10
10
11
class Upload :
11
12
@@ -17,25 +18,34 @@ def findFiles(dir,pattern="*"):
17
18
_list .append (path )
18
19
return _list
19
20
20
- def s3_upload_files (bucket ,dir ,topic_name ,pattern ,retry_upload_seconds ):
21
+ def s3_upload_file (s3_client ,bucket ,file_name ,object_name ):
22
+ try :
23
+ response = s3_client .upload_file (file_name ,bucket ,object_name )
24
+ logging .info (f"upload successful at s3://{ bucket } /{ object_name } " )
25
+ if not file_name .endswith (".bin" ):
26
+ logging .debug (f"deleting uploaded file { file_name } " )
27
+ os .remove (file_name )
28
+ except ClientError as e :
29
+ logging .error (f"{ file_path } upload failed error { e } " )
30
+
31
+ def s3_upload (bucket ,dir ,topic_name ,retry_upload_seconds ,thread_count ):
21
32
s3_client = boto3 .client ('s3' )
33
+ count = 0
22
34
while True :
23
35
_topic_dir = os .path .join (dir , topic_name )
24
36
_count_partition_dirs = len (Common .listDirs (_topic_dir ))
25
- _list = Upload .findFiles (_topic_dir ,pattern )
26
- if len (_list ) > _count_partition_dirs :
27
- for f in _list :
28
- f = str (f )
29
- if os .path .getsize (f ) > 0 :
30
- try :
31
- object_name = f .split (dir )[1 ]
32
- response = s3_client .upload_file (f ,bucket ,object_name )
33
- logging .info (f"upload successful at s3://{ bucket } /{ object_name } " )
34
- if not f .endswith (".bin" ):
35
- logging .debug (f"deleting uploaded file { f } " )
36
- os .remove (f )
37
- except ClientError as e :
38
- logging .error (f"{ file_path } upload failed error { e } " )
37
+ _list = Upload .findFiles (_topic_dir )
38
+ if len (_list ) > _count_partition_dirs and threading .active_count () <= thread_count :
39
+ for file_name in _list :
40
+ file_name = str (file_name )
41
+ if os .path .getsize (file_name ) > 0 :
42
+ object_name = file_name .split (dir )[1 ]
43
+ t = threading .Thread (
44
+ target = Upload .s3_upload_file ,
45
+ args = [s3_client ,bucket ,file_name ,object_name ],
46
+ name = "S3 Upload Threads"
47
+ ).start ()
48
+ count += 1
39
49
else :
40
- logging .info ("waiting for new files to be generated " )
50
+ logging .info (f"s3 upload retry for new files in { retry_upload_seconds } seconds " )
41
51
time .sleep (retry_upload_seconds )
0 commit comments