Skip to content

Commit 677c26f

Browse files
committed
Introduce basics for modification detection
As part of this work, we need to verify that files that are uploaded to permanent are download without changes. This modification detection could also help verify completion of uploads and downloads. Signed-off-by: Fon E. Noel NFEBE <[email protected]>
1 parent 1cd24e5 commit 677c26f

File tree

2 files changed

+127
-3
lines changed

2 files changed

+127
-3
lines changed

README.md

+8-3
Original file line numberDiff line numberDiff line change
@@ -169,15 +169,20 @@ To test a nest with more levels, simply paste a nested folder structure inside `
169169

170170
Run
171171

172-
`./test-download.py --remote=prod --archive-path="/archives/rclone QA 1 (0a21-0000)/My Files/" --remote-dir=nested`
173-
174-
Check the downloads folder in `test-tree/downloads` and ensure that the `downloads/nested` directory has a structure like the nested directory uploaded in the [nested uploads test](#nested-uploads).
172+
`./test-download.py --remote=prod --archive-path="/archives/rclone QA 1 (0a21-0000)/My Files/" --remote-dir=misc/nested`
175173

174+
To verify that everything in the nest folder was downloaded correctly run `./verify.py --nested-complete`.
176175

177176
### What file types and scenarios are left out?
178177

179178
Anything not included in the section above describing what is currently covered is by implication excluded from these tests.
180179

180+
## Hash verification
181+
182+
### Modification Detection
183+
184+
To verify that files that were successfully uploaded and downloaded have remained unchanged as we would expect run `./verify.py --succeeded`.
185+
181186
## Troubleshooting
182187

183188
- Remember that the commands are examples and some of the arguments may not apply to your specific environment.

verify.py

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import sys
4+
import hashlib
5+
import argparse
6+
7+
DOWNLOAD_MISC_DIR = "test-tree/downloads/misc"
8+
MISC_DIR = "test-tree/misc"
9+
CHUNK_SIZE = 1024
10+
11+
OKBLUE = "\033[94m"
12+
OKGREEN = "\033[92m"
13+
FAIL = "\033[91m"
14+
WARNING = "\033[93m"
15+
ENDC = "\033[0m"
16+
17+
18+
def hash_file(file_path):
19+
""" "Make and return SHA-1 hash of file at file_path"""
20+
h = hashlib.sha1()
21+
with open(file_path, "rb") as file:
22+
chunk = 0
23+
while chunk != b"":
24+
# read only CHUNK_SIZE bytes at a time
25+
chunk = file.read(CHUNK_SIZE)
26+
h.update(chunk)
27+
return h.hexdigest()
28+
29+
30+
def crawl_upload_and_download_paths():
31+
"""Build a list of uploaded and downloaded paths"""
32+
uploaded_paths = []
33+
downloaded_paths = []
34+
for subdir, _, files in os.walk(MISC_DIR):
35+
for file in files:
36+
uploaded_paths.append(os.path.join(subdir, file))
37+
38+
for subdir, _, files in os.walk(DOWNLOAD_MISC_DIR):
39+
for file in files:
40+
downloaded_paths.append(os.path.join(subdir, file))
41+
return uploaded_paths, downloaded_paths
42+
43+
44+
def make_file_to_harsh_maps():
45+
uploaded_paths, downloaded_paths = crawl_upload_and_download_paths()
46+
pre_upload_hashes = []
47+
post_upload_hashes = []
48+
for path in uploaded_paths:
49+
pre_upload_hashes.append({"path": path, "hash": hash_file(path)})
50+
for path in downloaded_paths:
51+
post_upload_hashes.append({"path": path, "hash": hash_file(path)})
52+
return pre_upload_hashes, post_upload_hashes
53+
54+
55+
def parse_cli():
56+
"""Prepare parser"""
57+
parser = argparse.ArgumentParser(
58+
prog="verify", description="Check results of upload/download operations"
59+
)
60+
parser.add_argument(
61+
"--misc-complete",
62+
help="Verify that both the upload and download of the complete misc folder was successful",
63+
action="store_true",
64+
)
65+
parser.add_argument(
66+
"--nested-complete",
67+
help="Verify that both the upload and download of the complete nested folder was successful",
68+
action="store_true",
69+
)
70+
parser.add_argument(
71+
"--succeeded",
72+
help="Verify that files that were successfully uploaded where downloaded successfully",
73+
action="store_true",
74+
)
75+
76+
return parser
77+
78+
79+
def main():
80+
parser = parse_cli()
81+
args = parser.parse_args()
82+
pre_upload_hash_data, post_upload_hash_data = make_file_to_harsh_maps()
83+
pre_upload_hashes = map(lambda x: x.get("hash"), pre_upload_hash_data)
84+
85+
failed_once = False
86+
if args.succeeded:
87+
for file_data in post_upload_hash_data:
88+
print(f"{OKBLUE}Verifying hash for {file_data.get('path')} ...{ENDC}")
89+
if file_data.get("hash") not in pre_upload_hashes:
90+
print(
91+
f"{WARNING}The hash to the path {file_data.get('path')} is missing!{ENDC}"
92+
)
93+
print(
94+
f"{WARNING}File has either been modified (on disk or permanent) or is missing!{ENDC}\n"
95+
)
96+
failed_once = True
97+
if not failed_once:
98+
print(f"{OKGREEN}\nVerification complete!{ENDC}\n")
99+
print(
100+
f"{OKGREEN}All downloaded files have matching hashes in pre-uploaded file hashes.{ENDC}\n"
101+
)
102+
else:
103+
print(
104+
f"{FAIL}\nVerification complete but failed! Missing hash(es) detected.\n{ENDC}"
105+
)
106+
print(
107+
f"{FAIL}At least once missing hash detected, check the logs above.\n{ENDC}"
108+
)
109+
elif args.misc_complete:
110+
pass
111+
elif args.nested_complete:
112+
pass
113+
else:
114+
print("Not sure what to do!\n\n")
115+
parser.print_help()
116+
117+
118+
if __name__ == "__main__":
119+
main()

0 commit comments

Comments
 (0)