Skip to content

Commit 425ba4b

Browse files
authored
Pilot 7131: add a new parameter size in preupload api. (#205)
* add size parameter in preupload api and update integrity check * add size parameter in preupload api and update integrity check * add test cases for resumable integrity check * fixup test cases * update PR * fixup the order/order_by incorrect test cases
1 parent 7284611 commit 425ba4b

File tree

7 files changed

+121
-39
lines changed

7 files changed

+121
-39
lines changed

app/resources/custom_error.py

+4
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ class Error:
4949
'File: %s does not exist in the folder.\n'
5050
'Please remove the resumable upload log and retry uploading the entire folder again.'
5151
),
52+
'INVALID_RESUMABLE_FILE_SIZE': (
53+
'The file size of %s is not the same as the previous upload. '
54+
'Expected size: %s, Actual size: %s. Please verify the file content and try again.'
55+
),
5256
'INVALID_FOLDERNAME': (
5357
'The input folder name is not valid. Please follow the rule:\n'
5458
' - cannot contains special characters.\n'

app/services/file_manager/file_upload/file_upload.py

+62-25
Original file line numberDiff line numberDiff line change
@@ -268,35 +268,24 @@ def simple_upload( # noqa: C901
268268
return [file_object.item_id for file_object in pre_upload_infos]
269269

270270

271-
def resume_upload(
272-
manifest_json: Dict[str, Any],
273-
num_of_thread: int = 1,
274-
):
275-
"""
271+
def resume_get_unfinished_items(
272+
upload_client: UploadClient, all_files: Dict[str, Any], item_ids: List[str]
273+
) -> List[FileObject]:
274+
'''
276275
Summary:
277-
Resume upload from the manifest file
278-
Parameters:
279-
- manifest_json: the manifest json which store the upload information
280-
- num_of_thread: the number of thread to upload the file
281-
"""
282-
upload_start_time = time.time()
276+
Function will loop over `all_files` batchly and check if the file is already uploaded.
277+
During the process, the logic wll check if the size registered in the backend is matched
278+
with the local file size. If not, the function will raise an error.
283279
284-
upload_client = UploadClient(
285-
project_code=manifest_json.get('project_code'),
286-
zone=manifest_json.get('zone'),
287-
job_type='AS_FOLDER',
288-
current_folder_node=manifest_json.get('current_folder_node', ''),
289-
parent_folder_id=manifest_json.get('parent_folder_id', ''),
290-
tags=manifest_json.get('tags'),
291-
)
280+
Parameter:
281+
- upload_client(UploadClient): the upload client object
282+
- all_files(Dict[str, Any]): the file object dictionary
283+
- item_ids(List[str]): the list of item ids that will be checked
284+
Return:
285+
- unfinished_items(List[FileObject]): the list of file object that is not uploaded yet
286+
'''
292287

293-
# check files in manifest if some of them are already uploaded
294288
unfinished_items = []
295-
all_files = manifest_json.get('file_objects')
296-
item_ids = []
297-
for item_id in all_files:
298-
item_ids.append(item_id)
299-
300289
# here add the batch of 500 per loop, the pre upload api cannot
301290
# process very large amount of file at same time. otherwise it will timeout
302291
# here is list of pre upload result. We decided to call pre upload api by batch
@@ -312,8 +301,26 @@ def resume_upload(
312301
SrvErrorHandler.customized_handle(
313302
ECustomizedError.INVALID_RESUMABLE_UPLOAD, if_exit=True, value=missing_item.get('object_path')
314303
)
304+
# check if the file is already registered
315305
elif x.get('result').get('status') == ItemStatus.REGISTERED:
316306
file_info = all_files.get(file_meta.get('id'))
307+
# check if size is matched during resume vs preupload
308+
logger.info(
309+
f'Check file size: {file_info.get("object_path")}, '
310+
f'expected size: {file_info.get("total_size")}, '
311+
f'actual size: {x.get("result").get("size")}'
312+
)
313+
if file_info.get('total_size') != x.get('result').get('size'):
314+
SrvErrorHandler.customized_handle(
315+
ECustomizedError.INVALID_RESUMABLE_FILE_SIZE,
316+
if_exit=True,
317+
value=(
318+
file_info.get('object_path'),
319+
x.get('result').get('size'),
320+
file_info.get('total_size'),
321+
),
322+
)
323+
317324
unfinished_files.append(
318325
FileObject(
319326
file_info.get('object_path'),
@@ -329,6 +336,36 @@ def resume_upload(
329336
if len(unfinished_files) > 0:
330337
unfinished_items.extend(upload_client.resume_upload(unfinished_files))
331338

339+
return unfinished_items
340+
341+
342+
def resume_upload(
343+
manifest_json: Dict[str, Any],
344+
num_of_thread: int = 1,
345+
):
346+
"""
347+
Summary:
348+
Resume upload from the manifest file
349+
Parameters:
350+
- manifest_json: the manifest json which store the upload information
351+
- num_of_thread: the number of thread to upload the file
352+
"""
353+
upload_start_time = time.time()
354+
355+
upload_client = UploadClient(
356+
project_code=manifest_json.get('project_code'),
357+
zone=manifest_json.get('zone'),
358+
job_type='AS_FOLDER',
359+
current_folder_node=manifest_json.get('current_folder_node', ''),
360+
parent_folder_id=manifest_json.get('parent_folder_id', ''),
361+
tags=manifest_json.get('tags'),
362+
)
363+
364+
# check files in manifest if some of them are already uploaded
365+
all_files = manifest_json.get('file_objects')
366+
item_ids = list(all_files.keys())
367+
unfinished_items = resume_get_unfinished_items(upload_client, all_files, item_ids)
368+
332369
mhandler.SrvOutPutHandler.resume_warning(len(unfinished_items))
333370
mhandler.SrvOutPutHandler.resume_check_success()
334371

app/services/file_manager/file_upload/upload_client.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ def pre_upload(self, file_objects: List[FileObject], output_path: str) -> List[F
225225
'parent_folder_id': self.parent_folder_id,
226226
'folder_tags': self.tags,
227227
'data': [
228-
{'resumable_filename': x.file_name, 'resumable_relative_path': x.parent_path} for x in file_objects
228+
{'resumable_filename': x.file_name, 'resumable_relative_path': x.parent_path, 'size': x.total_size}
229+
for x in file_objects
229230
],
230231
}
231232
if self.source_id:

app/services/output_manager/error_handler.py

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class ECustomizedError(enum.Enum):
3535
INVALID_PATHS = 'INVALID_PATHS'
3636
INVALID_RESUMABLE_FILE = 'INVALID_RESUMABLE_FILE'
3737
INVALID_RESUMABLE_UPLOAD = 'INVALID_RESUMABLE_UPLOAD'
38+
INVALID_RESUMABLE_FILE_SIZE = 'INVALID_RESUMABLE_FILE_SIZE'
3839
TOU_CONTENT = 'TOU_CONTENT'
3940
INVALID_TOKEN = 'INVALID_TOKEN'
4041
PERMISSION_DENIED = 'PERMISSION_DENIED'

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "app"
3-
version = "3.14.1"
3+
version = "3.15.0"
44
description = "This service is designed to support pilot platform"
55
authors = ["Indoc Systems"]
66

tests/app/services/file_manager/file_upload/test_file_upload.py

+39
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ def test_folder_merge_skip_with_all_duplication(mocker, mock_upload_client, capf
319319
def test_resume_upload(mocker):
320320
mocker.patch('app.services.file_manager.file_upload.models.FileObject.generate_meta', return_value=(1, 1))
321321
test_obj = FileObject('object/path', 'local_path', 'resumable_id', 'job_id', 'item_id')
322+
test_obj.total_size = 1
322323

323324
manifest_json = {
324325
'project_code': 'project_code',
@@ -328,11 +329,13 @@ def test_resume_upload(mocker):
328329
'current_folder_node': 'current_folder_node',
329330
'tags': 'tags',
330331
'file_objects': {test_obj.item_id: test_obj.to_dict()},
332+
'total_size': 1,
331333
}
332334

333335
get_return = test_obj.to_dict()
334336
get_return.update({'status': ItemStatus.REGISTERED})
335337
get_return.update({'id': get_return.get('item_id')})
338+
get_return.update({'size': 1})
336339
get_mock = mocker.patch(
337340
'app.services.file_manager.file_upload.file_upload.get_file_info_by_geid', return_value=[{'result': get_return}]
338341
)
@@ -378,3 +381,39 @@ def test_resume_upload_failed_when_REGISTERED_doesnt_exist(mocker, capfd):
378381

379382
get_mock.assert_called_once()
380383
assert resume_upload_mock.call_count == 0
384+
385+
386+
def test_resume_upload_integrity_check_failed(mocker, capfd):
387+
mocker.patch('app.services.file_manager.file_upload.models.FileObject.generate_meta', return_value=(1, 1))
388+
test_obj = FileObject('object/path', 'local_path', 'resumable_id', 'job_id', 'item_id')
389+
test_obj.total_size = 2 # wrong size
390+
391+
manifest_json = {
392+
'project_code': 'project_code',
393+
'operator': 'operator',
394+
'zone': AppConfig.Env.green_zone,
395+
'parent_folder_id': 'parent_folder_id',
396+
'current_folder_node': 'current_folder_node',
397+
'tags': 'tags',
398+
'file_objects': {test_obj.item_id: test_obj.to_dict()},
399+
'total_size': 1,
400+
}
401+
402+
get_return = test_obj.to_dict()
403+
get_return.update({'status': ItemStatus.REGISTERED})
404+
get_return.update({'id': get_return.get('item_id')})
405+
get_return.update({'size': 1})
406+
get_mock = mocker.patch(
407+
'app.services.file_manager.file_upload.file_upload.get_file_info_by_geid', return_value=[{'result': get_return}]
408+
)
409+
410+
try:
411+
resume_upload(manifest_json, 1)
412+
except SystemExit:
413+
out, _ = capfd.readouterr()
414+
expect = customized_error_msg(ECustomizedError.INVALID_RESUMABLE_FILE_SIZE) % ('object/path', 1, 2)
415+
assert expect in out
416+
else:
417+
AssertionError('SystemExit not raised')
418+
419+
get_mock.assert_called_once()

tests/app/services/project_manager/test_project.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def test_list_project(httpx_mock, mocker, capsys):
99
mocker.patch('app.services.user_authentication.token_manager.SrvTokenManager.check_valid', return_value=0)
1010
httpx_mock.add_response(
1111
method='GET',
12-
url='http://bff_cli/v1/projects?page=0&page_size=10&order=created_at&order_by=desc',
12+
url='http://bff_cli/v1/projects?page=0&page_size=10&order=desc&order_by=created_at',
1313
json={
1414
'code': 200,
1515
'error_msg': '',
@@ -38,7 +38,7 @@ def test_list_project(httpx_mock, mocker, capsys):
3838
},
3939
)
4040
project_mgr = SrvProjectManager()
41-
project_mgr.list_projects(page=0, page_size=10, order='created_at', order_by='desc')
41+
project_mgr.list_projects(page=0, page_size=10, order='desc', order_by='created_at')
4242
out, _ = capsys.readouterr()
4343
print_out = out.split('\n')
4444
assert print_out[0] == ' Project Name Project Code '
@@ -54,11 +54,11 @@ def test_list_project_no_project(httpx_mock, mocker, capsys):
5454
mocker.patch('app.services.user_authentication.token_manager.SrvTokenManager.check_valid', return_value=0)
5555
httpx_mock.add_response(
5656
method='GET',
57-
url='http://bff_cli/v1/projects?page=0&page_size=10&order=created_at&order_by=desc',
57+
url='http://bff_cli/v1/projects?page=0&page_size=10&order=desc&order_by=created_at',
5858
json={'code': 200, 'error_msg': '', 'result': [], 'total': 0, 'page': 0},
5959
)
6060
project_mgr = SrvProjectManager()
61-
project_mgr.list_projects(page=0, page_size=10, order='created_at', order_by='desc')
61+
project_mgr.list_projects(page=0, page_size=10, order='desc', order_by='created_at')
6262
out, _ = capsys.readouterr()
6363
print_out = out.split('\n')
6464
assert print_out[0] == ' Project Name Project Code '
@@ -71,7 +71,7 @@ def test_list_project_desc_by_code(httpx_mock, mocker, capsys):
7171
mocker.patch('app.services.user_authentication.token_manager.SrvTokenManager.check_valid', return_value=0)
7272
httpx_mock.add_response(
7373
method='GET',
74-
url='http://bff_cli/v1/projects?page=0&page_size=10&order=code&order_by=desc',
74+
url='http://bff_cli/v1/projects?page=0&page_size=10&order=desc&order_by=code',
7575
json={
7676
'code': 200,
7777
'error_msg': '',
@@ -92,7 +92,7 @@ def test_list_project_desc_by_code(httpx_mock, mocker, capsys):
9292
},
9393
)
9494
project_mgr = SrvProjectManager()
95-
project_mgr.list_projects(page=0, page_size=10, order='code', order_by='desc')
95+
project_mgr.list_projects(page=0, page_size=10, order='desc', order_by='code')
9696
out, _ = capsys.readouterr()
9797
print_out = out.split('\n')
9898
assert print_out[0] == ' Project Name Project Code '
@@ -115,7 +115,7 @@ def test_list_project_desc_by_name(httpx_mock, mocker, capsys):
115115
mocker.patch('app.services.user_authentication.token_manager.SrvTokenManager.check_valid', return_value=0)
116116
httpx_mock.add_response(
117117
method='GET',
118-
url='http://bff_cli/v1/projects?page=0&page_size=10&order=code&order_by=desc',
118+
url='http://bff_cli/v1/projects?page=0&page_size=10&order=desc&order_by=name',
119119
json={
120120
'code': 200,
121121
'error_msg': '',
@@ -136,7 +136,7 @@ def test_list_project_desc_by_name(httpx_mock, mocker, capsys):
136136
},
137137
)
138138
project_mgr = SrvProjectManager()
139-
project_mgr.list_projects(page=0, page_size=10, order='code', order_by='desc')
139+
project_mgr.list_projects(page=0, page_size=10, order='desc', order_by='name')
140140
out, _ = capsys.readouterr()
141141
print_out = out.split('\n')
142142
assert print_out[0] == ' Project Name Project Code '
@@ -159,7 +159,7 @@ def test_list_project_desc_by_name_with_page_size(httpx_mock, mocker, capsys):
159159
mocker.patch('app.services.user_authentication.token_manager.SrvTokenManager.check_valid', return_value=0)
160160
httpx_mock.add_response(
161161
method='GET',
162-
url='http://bff_cli/v1/projects?page=0&page_size=3&order=code&order_by=desc',
162+
url='http://bff_cli/v1/projects?page=0&page_size=3&order=desc&order_by=name',
163163
json={
164164
'code': 200,
165165
'error_msg': '',
@@ -173,7 +173,7 @@ def test_list_project_desc_by_name_with_page_size(httpx_mock, mocker, capsys):
173173
},
174174
)
175175
project_mgr = SrvProjectManager()
176-
project_mgr.list_projects(page=0, page_size=3, order='code', order_by='desc')
176+
project_mgr.list_projects(page=0, page_size=3, order='desc', order_by='name')
177177
out, _ = capsys.readouterr()
178178
print_out = out.split('\n')
179179
assert print_out[0] == ' Project Name Project Code '
@@ -189,7 +189,7 @@ def test_list_project_desc_by_name_with_page_size_and_page(httpx_mock, mocker, c
189189
mocker.patch('app.services.user_authentication.token_manager.SrvTokenManager.check_valid', return_value=0)
190190
httpx_mock.add_response(
191191
method='GET',
192-
url='http://bff_cli/v1/projects?page=1&page_size=3&order=code&order_by=desc',
192+
url='http://bff_cli/v1/projects?page=1&page_size=3&order=desc&order_by=name',
193193
json={
194194
'code': 200,
195195
'error_msg': '',
@@ -203,7 +203,7 @@ def test_list_project_desc_by_name_with_page_size_and_page(httpx_mock, mocker, c
203203
},
204204
)
205205
project_mgr = SrvProjectManager()
206-
project_mgr.list_projects(page=1, page_size=3, order='code', order_by='desc')
206+
project_mgr.list_projects(page=1, page_size=3, order='desc', order_by='name')
207207
out, _ = capsys.readouterr()
208208
print_out = out.split('\n')
209209
assert print_out[0] == ' Project Name Project Code '

0 commit comments

Comments
 (0)