Skip to content

Commit fd22c02

Browse files
[Feature:Plagiarism] Support arbitrary other gradeable paths (#68)
* renaming sequence length to hash size and prior term to other gradeables * suggested edits * linting * 99% implemented (syntax error????) * Fix compilation issues * fix ranking files * Make requested changes * Fix tests * progress * Implement arbitrary other gradeables * lint * lint (v2.0) Co-authored-by: sbelsk <[email protected]>
1 parent f1b2990 commit fd22c02

File tree

2 files changed

+116
-113
lines changed

2 files changed

+116
-113
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ tools/assignments/*
33
__pycache__
44
tests/__pycache__
55
vendor/
6-
cmake-build-debug/*
6+
cmake-build-debug
77
.idea
88
.DS_Store
9+
.vscode

bin/concatenate_all.py

Lines changed: 114 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import time
1212
import humanize
1313
import fnmatch
14+
import hashlib
1415
from pathlib import Path
1516

1617
IGNORED_FILES = [
@@ -57,6 +58,65 @@ def getConcatFilesInDir(input_dir, regex_patterns):
5758
return result
5859

5960

61+
# This function is passed a path to a gradeable and an output path to place files in and
62+
# concatenates all of the files for each submission into a single file in the output directory
63+
# returns the total size of the files concatenated
64+
def processGradeable(basepath, config, input_dir, output_dir, total_concat):
65+
# basic error checking
66+
if not Path(input_dir).exists():
67+
raise SystemExit(f"ERROR: Unable to find directory {input_dir}")
68+
69+
if Path(input_dir).group() != Path(basepath).group():
70+
raise SystemExit(f"ERROR: Group for directory {input_dir} does not"
71+
f"match group for {basepath} directory")
72+
73+
# loop over each user
74+
for user in sorted(os.listdir(input_dir)):
75+
user_path = os.path.join(input_dir, user)
76+
if not os.path.isdir(user_path):
77+
continue
78+
elif user in config["ignore_submissions"]:
79+
continue
80+
81+
if config["version"] == "active_version":
82+
# get the user's active version from their settings file if it exists, else get
83+
# most recent version for compatibility with early versions of Submitty
84+
submissions_details_path = os.path.join(user_path, 'user_assignment_settings.json')
85+
if os.path.exists(submissions_details_path):
86+
with open(submissions_details_path) as details_file:
87+
details_json = json.load(details_file)
88+
my_active_version = int(details_json["active_version"])
89+
else:
90+
# get the most recent version
91+
my_active_version = sorted(os.listdir(user_path))[-1]
92+
93+
# loop over each version
94+
for version in sorted(os.listdir(user_path)):
95+
version_path = os.path.join(user_path, version)
96+
if dir == "results":
97+
# only the "details" folder within "results" contains files relevant to Lichen
98+
version_path = os.path.join(version_path, "details")
99+
if not os.path.isdir(version_path):
100+
continue
101+
if config["version"] == "active_version" and int(version) != my_active_version:
102+
continue
103+
104+
output_file_path = os.path.join(output_dir, user, version, "submission.concatenated")
105+
106+
if not os.path.exists(os.path.dirname(output_file_path)):
107+
os.makedirs(os.path.dirname(output_file_path))
108+
109+
# append to concatenated file
110+
with open(output_file_path, "a") as output_file:
111+
concatenated_contents = getConcatFilesInDir(version_path, config["regex"])
112+
output_file.write(concatenated_contents)
113+
total_concat += sys.getsizeof(concatenated_contents)
114+
115+
# If we've exceeded the concatenation limit, kill program
116+
checkTotalSize(total_concat)
117+
return total_concat
118+
119+
60120
def checkTotalSize(total_concat):
61121
if total_concat > LICHEN_CONFIG['concat_max_total_bytes']:
62122
raise SystemExit("ERROR! exceeded"
@@ -85,37 +145,44 @@ def validate(config, args):
85145
with open(langs_data_json_path, 'r') as langs_data_file:
86146
langs_data = json.load(langs_data_file)
87147
if language not in langs_data:
88-
raise SystemExit(f"ERROR! tokenizing not supported for language {language}")
148+
raise SystemExit(f"ERROR: tokenizing not supported for language {language}")
89149

90150
# Check values of common code threshold and hash size
91151
if (threshold < 2):
92-
raise SystemExit("ERROR! threshold must be >= 2")
152+
raise SystemExit("ERROR: threshold must be >= 2")
93153

94154
if (hash_size < 1):
95-
raise SystemExit("ERROR! hash_size must be >= 1")
155+
raise SystemExit("ERROR: hash_size must be >= 1")
96156

97157
# Check for backwards crawling
98158
for e in regex_patterns:
99159
if ".." in e:
100-
raise SystemExit('ERROR! Invalid path component ".." in regex')
160+
raise SystemExit('ERROR: Invalid path component ".." in regex')
101161

102162
for gradeable in other_gradeables:
103163
for field in gradeable:
104164
if ".." in field:
105-
raise SystemExit('ERROR! Invalid component ".." in other_gradeable path')
165+
raise SystemExit('ERROR: Invalid component ".." in other_gradeable path')
106166

107167
# check permissions to make sure we have access to the other gradeables
108168
my_course_group_perms = Path(args.basepath).group()
109169
for gradeable in other_gradeables:
110170
if Path(args.datapath, gradeable["other_semester"], gradeable["other_course"]).group()\
111171
!= my_course_group_perms:
112-
raise SystemExit("ERROR! Invalid permissions to access course "
172+
raise SystemExit("ERROR: Invalid permissions to access course "
113173
f"{gradeable['other_semester']}/{gradeable['other_course']}")
114174

175+
# check permissions for each path we are given (if any are provided)
176+
if config.get("other_gradeable_paths") is not None:
177+
for path in config["other_gradeable_paths"]:
178+
if Path(path).group() != my_course_group_perms:
179+
raise SystemExit(f"ERROR: Group for directory {path} does not"
180+
f"match group for {args.basepath} directory")
181+
115182
# make sure the regex directory is one of the acceptable directories
116183
for dir in regex_dirs:
117184
if dir not in ["submissions", "results", "checkout"]:
118-
raise SystemExit("ERROR! ", dir, " is not a valid input directory for Lichen")
185+
raise SystemExit(f"ERROR: {dir} is not a valid input directory for Lichen")
119186

120187

121188
def main():
@@ -138,145 +205,80 @@ def main():
138205
semester = config["semester"]
139206
course = config["course"]
140207
gradeable = config["gradeable"]
141-
version_mode = config["version"]
142208
regex_patterns = config["regex"]
143209
regex_dirs = config["regex_dirs"]
144210
other_gradeables = config["other_gradeables"]
145-
users_to_ignore = config["ignore_submissions"]
211+
# optional field -> other_gradeable_paths=None if key doesn't exist
212+
other_gradeable_paths = config.get("other_gradeable_paths")
146213

147214
# ==========================================================================
148215
# loop through and concatenate the selected files for each user in this gradeable
149216
total_concat = 0
150217

151218
for dir in regex_dirs:
152-
gradeable_path = os.path.join(args.datapath, semester, course, dir, gradeable)
153-
# loop over each user
154-
for user in sorted(os.listdir(gradeable_path)):
155-
user_path = os.path.join(gradeable_path, user)
156-
if not os.path.isdir(user_path):
157-
continue
158-
elif user in users_to_ignore:
159-
continue
160-
161-
if version_mode == "active_version":
162-
# get the user's active version from their settings file if it exists, else get
163-
# most recent version for compatibility with early versions of Submitty
164-
submissions_details_path = os.path.join(user_path, 'user_assignment_settings.json')
165-
if os.path.exists(submissions_details_path):
166-
with open(submissions_details_path) as details_file:
167-
details_json = json.load(details_file)
168-
my_active_version = int(details_json["active_version"])
169-
else:
170-
# get the most recent version
171-
my_active_version = sorted(os.listdir(user_path))[-1]
172-
173-
# loop over each version
174-
for version in sorted(os.listdir(user_path)):
175-
version_path = os.path.join(user_path, version)
176-
if dir == "results":
177-
# only the "details" folder within "results" contains files relevant to Lichen
178-
version_path = os.path.join(version_path, "details")
179-
if not os.path.isdir(version_path):
180-
continue
181-
if version_mode == "active_version" and int(version) != my_active_version:
182-
continue
183-
184-
output_file_path = os.path.join(args.basepath, "users", user,
185-
version, "submission.concatenated")
186-
187-
if not os.path.exists(os.path.dirname(output_file_path)):
188-
os.makedirs(os.path.dirname(output_file_path))
189-
190-
# append to concatenated file
191-
with open(output_file_path, "a") as output_file:
192-
concatenated_contents = getConcatFilesInDir(version_path, regex_patterns)
193-
output_file.write(concatenated_contents)
194-
total_concat += sys.getsizeof(concatenated_contents)
195-
196-
checkTotalSize(total_concat)
219+
input_path = os.path.join(args.datapath, semester, course, dir, gradeable)
220+
output_path = os.path.join(args.basepath, "users")
221+
total_concat = processGradeable(args.basepath, config,
222+
input_path, output_path, total_concat)
197223

198224
# ==========================================================================
199225
# loop over all of the other gradeables and concatenate their submissions
200226
for other_gradeable in other_gradeables:
201227
for dir in regex_dirs:
202-
other_gradeable_path = os.path.join(args.datapath,
203-
other_gradeable["other_semester"],
204-
other_gradeable["other_course"],
205-
dir,
206-
other_gradeable["other_gradeable"])
207-
# loop over each user
208-
for other_user in sorted(os.listdir(other_gradeable_path)):
209-
other_user_path = os.path.join(other_gradeable_path, other_user)
210-
if not os.path.isdir(other_user_path):
211-
continue
212-
213-
if version_mode == "active_version":
214-
# get the user's active version from their settings file if it exists, else get
215-
# most recent version for compatibility with early versions of Submitty
216-
other_submissions_details_path = os.path.join(other_user_path,
217-
'user_assignment_settings.json')
218-
if os.path.exists(other_submissions_details_path):
219-
with open(other_submissions_details_path) as other_details_file:
220-
other_details_json = json.load(other_details_file)
221-
my_active_version = int(other_details_json["active_version"])
222-
else:
223-
# get the most recent version
224-
my_active_version = sorted(os.listdir(other_user_path))[-1]
225-
226-
# loop over each version
227-
for other_version in sorted(os.listdir(other_user_path)):
228-
other_version_path = os.path.join(other_user_path, other_version)
229-
if dir == "results":
230-
# only the "details" dir within "results" contains files relevant to Lichen
231-
other_version_path = os.path.join(other_version_path, "details")
232-
if not os.path.isdir(other_version_path):
233-
continue
234-
235-
other_output_file_path = os.path.join(args.basepath, "other_gradeables",
236-
f"{other_gradeable['other_semester']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}", # noqa: E501
237-
other_user, other_version,
238-
"submission.concatenated")
239-
240-
if not os.path.exists(os.path.dirname(other_output_file_path)):
241-
os.makedirs(os.path.dirname(other_output_file_path))
242-
243-
# append to concatenated file
244-
with open(other_output_file_path, "a") as other_output_file:
245-
other_concatenated_contents = getConcatFilesInDir(other_version_path,
246-
regex_patterns)
247-
other_output_file.write(other_concatenated_contents)
248-
total_concat += sys.getsizeof(other_concatenated_contents)
249-
250-
checkTotalSize(total_concat)
228+
input_path = os.path.join(args.datapath,
229+
other_gradeable["other_semester"],
230+
other_gradeable["other_course"],
231+
dir,
232+
other_gradeable["other_gradeable"])
233+
234+
output_path = os.path.join(args.basepath, "other_gradeables",
235+
f"{other_gradeable['other_semester']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}") # noqa: E501
236+
total_concat = processGradeable(args.basepath, config,
237+
input_path, output_path, total_concat)
238+
239+
# take care of any manually-specified paths if they exist
240+
if other_gradeable_paths is not None:
241+
for path in other_gradeable_paths:
242+
# We hash the path as the name of the gradeable
243+
dir_name = hashlib.md5(path.encode('utf-8')).hexdigest()
244+
output_path = os.path.join(args.basepath, "other_gradeables", dir_name)
245+
total_concat = processGradeable(args.basepath, config, path,
246+
output_path, total_concat)
251247

252248
# ==========================================================================
253249
# iterate over all of the created submissions, checking to see if they are empty
254250
# and printing a message if so
255251

252+
empty_directories = [] # holds a list of users who had no files concatenated
253+
256254
for user in os.listdir(os.path.join(args.basepath, "users")):
257255
user_path = os.path.join(args.basepath, "users", user)
258256
for version in os.listdir(user_path):
259257
version_path = os.path.join(user_path, version)
260258
my_concatenated_file = os.path.join(version_path, "submission.concatenated")
261259
with open(my_concatenated_file, "r") as my_cf:
262260
if my_cf.read() == "":
263-
print("Warning: No files matched provided regex in selected directories "
264-
f"for user {user} version {version}")
261+
empty_directories.append(f"{user}:{version}")
262+
if len(empty_directories) > 0:
263+
print("Warning: No files matched provided regex in selected directories for user(s):",
264+
", ".join(empty_directories))
265265

266266
# do the same for the other gradeables
267-
for other_gradeable in other_gradeables:
268-
other_gradeable_dir_name = f"{other_gradeable['other_semester']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}" # noqa: E501
269-
for other_user in os.listdir(os.path.join(args.basepath, "other_gradeables",
270-
other_gradeable_dir_name)):
267+
for other_gradeable in os.listdir(os.path.join(args.basepath, "other_gradeables")):
268+
empty_directories = []
269+
for other_user in os.listdir(os.path.join(args.basepath,
270+
"other_gradeables", other_gradeable)):
271271
other_user_path = os.path.join(args.basepath, "other_gradeables",
272-
other_gradeable_dir_name, other_user)
272+
other_gradeable, other_user)
273273
for other_version in os.listdir(other_user_path):
274274
other_version_path = os.path.join(other_user_path, other_version)
275275
my_concatenated_file = os.path.join(other_version_path, "submission.concatenated")
276276
with open(my_concatenated_file, "r") as my_cf:
277277
if my_cf.read() == "":
278-
print("Warning: No files matched provided regex in selected directories "
279-
f"for user {other_user} version {other_version}")
278+
empty_directories.append(f"{other_user}:{other_version}")
279+
if len(empty_directories) > 0:
280+
print("Warning: No files matched provided regex in selected directories for user(s):",
281+
", ".join(empty_directories), "in gradeable", other_gradeable)
280282

281283
# ==========================================================================
282284
# concatenate provided code

0 commit comments

Comments
 (0)