Skip to content

Commit 9f4cdab

Browse files
committed
Improve special-files download script
- Now download files via chunks, to make the download of large files possible on low bandwiths. - Show some kind of download progress - Move default download sources to text files (out of python code) - Brush up documentation Signed-off-by: Fon E. Noel NFEBE <[email protected]>
1 parent f7307a5 commit 9f4cdab

File tree

4 files changed

+84
-37
lines changed

4 files changed

+84
-37
lines changed

README.md

+8-8
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,11 @@ Run `./upload-test.py test-tree/challenging-names --only=414 --remote-dir=test-4
114114
##### Large uploads
115115
###### Uploads
116116

117-
To test for large file uploads. A couple of large files are needed and can be downloaded via:
117+
To test large file (`400MB` +) uploads, a couple of large files are required. Some ready-made test files can be downloaded via:
118118

119-
`./special-files-downloader --large`
119+
`./special-files-downloader.py --large`
120120

121-
If you have you own large files you would like to test with, you can list the links to those files in a text file like so:
121+
If you have your own large files or other kinds of files you would like to run tests with, you can list the links to those files in a text file like so:
122122

123123
'my_files.txt'
124124
```
@@ -127,16 +127,16 @@ https://link.com/to/file_2.extension
127127
https://link.com/to/file_3.extension
128128
```
129129

130-
and then run `./special-files-downloader --my-sources my_files.txt`
130+
and then run `./special-files-downloader.py --my-source my_files.txt`
131131

132-
- *You can specify as many paths as you want*
133-
- *You can name the the source text file anything you want but pass the right name and path to `--my-sources`*
132+
- *You can specify as many paths as you want inside the file*
133+
- *You can name the the source text file anything you want but pass the right name and path to `--my-source`*
134134

135-
**You don't need to download any files if you already have some special files on your computer, simply copy such files into one of these directories `test-tree/special-files/large`, `test-tree/special-files/zips`, `test-tree/special-files/mixed` or `test-tree/special-files/custom`**
135+
**You don't need to download any files if you already have some special files on your computer, simply copy such files into one of these directories `test-tree/special-files/`, `test-tree/special-files/large`, `test-tree/special-files/zips`, or `test-tree/special-files/custom`**
136136

137137
Once the files are on disk:
138138

139-
Run `./upload-test.py test-tree/special-files --remote-dir=special-files --log-file=special-files-log.txt --remote=prod --archive-path="/archives/QA (0a21-0000)/My Files/"`
139+
Run `./upload-test.py test-tree/special-files/large --remote-dir=large-files --log-file=large-files-log.txt --remote=prod --archive-path="/archives/QA (0a21-0000)/My Files/"`
140140

141141
### What file types and scenarios are left out?
142142

source_large_files.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://www.quintic.com/software/sample_videos/Cricket%20Bowling%20150fps%201200.avi

source_zip_files.txt

Whitespace-only changes.

special-files-downloader.py

+75-29
Original file line numberDiff line numberDiff line change
@@ -2,64 +2,110 @@
22
import os
33
import sys
44
import argparse
5-
import requests
65
from urllib.parse import urlparse
6+
import requests
77

8-
SPECIAL_FILES_PATH = "test-tree/special-files/"
9-
LARGE_FILE_URIS = [
10-
"https://www.quintic.com/software/sample_videos/Cricket%20Bowling%20150fps%201200.avi"
11-
]
8+
SPECIAL_FILES_ROOT = "test-tree/special-files/"
9+
LARGE_FILES_PATH = SPECIAL_FILES_ROOT + "large/"
10+
ZIP_FILES_PATH = SPECIAL_FILES_ROOT + "zips/"
11+
CUSTOM_FILES_PATH = SPECIAL_FILES_ROOT + "custom/"
12+
CHUNK_SIZE = 1024 * 1024
13+
LARGE_FILE_URLS = []
1214
ZIP_FILE_URLS = []
1315

1416

1517
def parse_cli():
18+
"""Prepare parser"""
1619
parser = argparse.ArgumentParser(
1720
prog="special-files-downloader", description="Download special test files"
1821
)
1922

20-
parser.add_argument("--large", help=f"Download large files for testing")
21-
parser.add_argument("--zip", help=f"Download zip files for testing")
2223
parser.add_argument(
23-
"--my-sources", help=f"Download files from links listed in text file path"
24+
"--large", help="Download large files for testing", action="store_true"
25+
)
26+
parser.add_argument(
27+
"--zip", help="Download zip files for testing", action="store_true"
28+
)
29+
parser.add_argument(
30+
"--my-source", help="Download files from links listed in text file path"
2431
)
2532
parser.add_argument(
26-
"--all",
27-
help="Download all earmarked special files.",
33+
"--all", help="Download all earmarked special files.", action="store_true"
2834
)
2935

3036
return parser
3137

3238

33-
def download_file_from_url(uri, path):
34-
response = requests.get(uri, allow_redirects=False)
35-
fname = os.path.basename(urlparse(uri).path)
36-
if response.status_code == 200:
37-
with open(path + fname, "wb") as file:
38-
file.write(response.content)
39+
def check_paths():
40+
"""Ensure special-file folders required in test-tree are present"""
41+
if not os.path.exists(SPECIAL_FILES_ROOT):
42+
os.makedirs(SPECIAL_FILES_ROOT)
43+
if not os.path.exists(LARGE_FILES_PATH):
44+
os.makedirs(LARGE_FILES_PATH)
45+
if not os.path.exists(ZIP_FILES_PATH):
46+
os.makedirs(ZIP_FILES_PATH)
47+
if not os.path.exists(CUSTOM_FILES_PATH):
48+
os.makedirs(CUSTOM_FILES_PATH)
49+
50+
51+
def get_file_urls():
52+
"""Get links to default special-files required for testing"""
53+
global LARGE_FILE_URLS
54+
global ZIP_FILE_URLS
55+
large_files_handle = open("source_large_files.txt", "r", encoding="utf-8")
56+
large_files = large_files_handle.readlines()
57+
large_files_handle.close()
58+
LARGE_FILE_URLS = map(lambda x: x.strip(), large_files)
59+
zip_files_handle = open("source_zip_files.txt", "r", encoding="utf-8")
60+
zip_files = zip_files_handle.readlines()
61+
zip_files_handle.close()
62+
ZIP_FILE_URLS = map(lambda x: x.strip(), zip_files)
63+
64+
65+
def download_file_from_url(url, path):
66+
"""Download file in url to path"""
67+
fname = os.path.basename(urlparse(url).path)
68+
print(f"\nDownloading {fname}\n")
69+
underline = "=" * len(fname)
70+
underline = underline + "============"
71+
print(underline + "\n")
72+
size = 0
73+
with requests.get(url, stream=True) as response:
74+
if response.status_code == 200:
75+
with open(path + fname, "wb") as file:
76+
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
77+
file.write(chunk)
78+
size = size + (CHUNK_SIZE)
79+
print(f"Downloaded {size} bytes of {fname} ...")
3980

4081

4182
def main():
83+
"Script entry point"
84+
check_paths()
85+
get_file_urls()
4286
parser = parse_cli()
4387
if len(sys.argv) == 1:
4488
parser.print_help()
45-
print("No downloads done....")
89+
print(
90+
"\n========================\n| No downloads done... |\n========================\n"
91+
)
4692
args = parser.parse_args()
4793

4894
if args.large:
49-
for uri in LARGE_FILE_URIS:
50-
download_file_from_url(uri, SPECIAL_FILES_PATH + "large/")
95+
for url in LARGE_FILE_URLS:
96+
download_file_from_url(url, LARGE_FILES_PATH)
5197
if args.zip:
52-
for uri in ZIP_FILE_URLS:
53-
download_file_from_url(uri, SPECIAL_FILES_PATH + "zips/")
98+
for url in ZIP_FILE_URLS:
99+
download_file_from_url(url, ZIP_FILE_URLS)
54100
if args.all:
55-
ALL = ZIP_FILE_URLS + LARGE_FILE_URIS
56-
for uri in ALL:
57-
download_file_from_url(uri, SPECIAL_FILES_PATH + "mixed/")
58-
if args.my_sources:
59-
sources = open(args.my_sources, "r")
60-
sources = sources.readlines()
61-
for source in sources:
62-
download_file_from_url(source, SPECIAL_FILES_PATH + "custom/")
101+
ALL_URLS = ZIP_FILE_URLS + LARGE_FILE_URLS
102+
for url in ALL_URLS:
103+
download_file_from_url(url, SPECIAL_FILES_ROOT)
104+
if args.my_source:
105+
source = open(args.my_source, "r")
106+
source = source.readlines()
107+
for url in source:
108+
download_file_from_url(url.strip(), CUSTOM_FILES_PATH)
63109

64110

65111
if __name__ == "__main__":

0 commit comments

Comments
 (0)