11
11
import time
12
12
import humanize
13
13
import fnmatch
14
+ import hashlib
14
15
from pathlib import Path
15
16
16
17
IGNORED_FILES = [
@@ -57,6 +58,65 @@ def getConcatFilesInDir(input_dir, regex_patterns):
57
58
return result
58
59
59
60
61
+ # This function is passed a path to a gradeable and an output path to place files in and
62
+ # concatenates all of the files for each submission into a single file in the output directory
63
+ # returns the total size of the files concatenated
64
+ def processGradeable (basepath , config , input_dir , output_dir , total_concat ):
65
+ # basic error checking
66
+ if not Path (input_dir ).exists ():
67
+ raise SystemExit (f"ERROR: Unable to find directory { input_dir } " )
68
+
69
+ if Path (input_dir ).group () != Path (basepath ).group ():
70
+ raise SystemExit (f"ERROR: Group for directory { input_dir } does not"
71
+ f"match group for { basepath } directory" )
72
+
73
+ # loop over each user
74
+ for user in sorted (os .listdir (input_dir )):
75
+ user_path = os .path .join (input_dir , user )
76
+ if not os .path .isdir (user_path ):
77
+ continue
78
+ elif user in config ["ignore_submissions" ]:
79
+ continue
80
+
81
+ if config ["version" ] == "active_version" :
82
+ # get the user's active version from their settings file if it exists, else get
83
+ # most recent version for compatibility with early versions of Submitty
84
+ submissions_details_path = os .path .join (user_path , 'user_assignment_settings.json' )
85
+ if os .path .exists (submissions_details_path ):
86
+ with open (submissions_details_path ) as details_file :
87
+ details_json = json .load (details_file )
88
+ my_active_version = int (details_json ["active_version" ])
89
+ else :
90
+ # get the most recent version
91
+ my_active_version = sorted (os .listdir (user_path ))[- 1 ]
92
+
93
+ # loop over each version
94
+ for version in sorted (os .listdir (user_path )):
95
+ version_path = os .path .join (user_path , version )
96
+ if dir == "results" :
97
+ # only the "details" folder within "results" contains files relevant to Lichen
98
+ version_path = os .path .join (version_path , "details" )
99
+ if not os .path .isdir (version_path ):
100
+ continue
101
+ if config ["version" ] == "active_version" and int (version ) != my_active_version :
102
+ continue
103
+
104
+ output_file_path = os .path .join (output_dir , user , version , "submission.concatenated" )
105
+
106
+ if not os .path .exists (os .path .dirname (output_file_path )):
107
+ os .makedirs (os .path .dirname (output_file_path ))
108
+
109
+ # append to concatenated file
110
+ with open (output_file_path , "a" ) as output_file :
111
+ concatenated_contents = getConcatFilesInDir (version_path , config ["regex" ])
112
+ output_file .write (concatenated_contents )
113
+ total_concat += sys .getsizeof (concatenated_contents )
114
+
115
+ # If we've exceeded the concatenation limit, kill program
116
+ checkTotalSize (total_concat )
117
+ return total_concat
118
+
119
+
60
120
def checkTotalSize (total_concat ):
61
121
if total_concat > LICHEN_CONFIG ['concat_max_total_bytes' ]:
62
122
raise SystemExit ("ERROR! exceeded"
@@ -85,37 +145,44 @@ def validate(config, args):
85
145
with open (langs_data_json_path , 'r' ) as langs_data_file :
86
146
langs_data = json .load (langs_data_file )
87
147
if language not in langs_data :
88
- raise SystemExit (f"ERROR! tokenizing not supported for language { language } " )
148
+ raise SystemExit (f"ERROR: tokenizing not supported for language { language } " )
89
149
90
150
# Check values of common code threshold and hash size
91
151
if (threshold < 2 ):
92
- raise SystemExit ("ERROR! threshold must be >= 2" )
152
+ raise SystemExit ("ERROR: threshold must be >= 2" )
93
153
94
154
if (hash_size < 1 ):
95
- raise SystemExit ("ERROR! hash_size must be >= 1" )
155
+ raise SystemExit ("ERROR: hash_size must be >= 1" )
96
156
97
157
# Check for backwards crawling
98
158
for e in regex_patterns :
99
159
if ".." in e :
100
- raise SystemExit ('ERROR! Invalid path component ".." in regex' )
160
+ raise SystemExit ('ERROR: Invalid path component ".." in regex' )
101
161
102
162
for gradeable in other_gradeables :
103
163
for field in gradeable :
104
164
if ".." in field :
105
- raise SystemExit ('ERROR! Invalid component ".." in other_gradeable path' )
165
+ raise SystemExit ('ERROR: Invalid component ".." in other_gradeable path' )
106
166
107
167
# check permissions to make sure we have access to the other gradeables
108
168
my_course_group_perms = Path (args .basepath ).group ()
109
169
for gradeable in other_gradeables :
110
170
if Path (args .datapath , gradeable ["other_semester" ], gradeable ["other_course" ]).group ()\
111
171
!= my_course_group_perms :
112
- raise SystemExit ("ERROR! Invalid permissions to access course "
172
+ raise SystemExit ("ERROR: Invalid permissions to access course "
113
173
f"{ gradeable ['other_semester' ]} /{ gradeable ['other_course' ]} " )
114
174
175
+ # check permissions for each path we are given (if any are provided)
176
+ if config .get ("other_gradeable_paths" ) is not None :
177
+ for path in config ["other_gradeable_paths" ]:
178
+ if Path (path ).group () != my_course_group_perms :
179
+ raise SystemExit (f"ERROR: Group for directory { path } does not"
180
+ f"match group for { args .basepath } directory" )
181
+
115
182
# make sure the regex directory is one of the acceptable directories
116
183
for dir in regex_dirs :
117
184
if dir not in ["submissions" , "results" , "checkout" ]:
118
- raise SystemExit ("ERROR! " , dir , " is not a valid input directory for Lichen" )
185
+ raise SystemExit (f "ERROR: { dir } is not a valid input directory for Lichen" )
119
186
120
187
121
188
def main ():
@@ -138,145 +205,80 @@ def main():
138
205
semester = config ["semester" ]
139
206
course = config ["course" ]
140
207
gradeable = config ["gradeable" ]
141
- version_mode = config ["version" ]
142
208
regex_patterns = config ["regex" ]
143
209
regex_dirs = config ["regex_dirs" ]
144
210
other_gradeables = config ["other_gradeables" ]
145
- users_to_ignore = config ["ignore_submissions" ]
211
+ # optional field -> other_gradeable_paths=None if key doesn't exist
212
+ other_gradeable_paths = config .get ("other_gradeable_paths" )
146
213
147
214
# ==========================================================================
148
215
# loop through and concatenate the selected files for each user in this gradeable
149
216
total_concat = 0
150
217
151
218
for dir in regex_dirs :
152
- gradeable_path = os .path .join (args .datapath , semester , course , dir , gradeable )
153
- # loop over each user
154
- for user in sorted (os .listdir (gradeable_path )):
155
- user_path = os .path .join (gradeable_path , user )
156
- if not os .path .isdir (user_path ):
157
- continue
158
- elif user in users_to_ignore :
159
- continue
160
-
161
- if version_mode == "active_version" :
162
- # get the user's active version from their settings file if it exists, else get
163
- # most recent version for compatibility with early versions of Submitty
164
- submissions_details_path = os .path .join (user_path , 'user_assignment_settings.json' )
165
- if os .path .exists (submissions_details_path ):
166
- with open (submissions_details_path ) as details_file :
167
- details_json = json .load (details_file )
168
- my_active_version = int (details_json ["active_version" ])
169
- else :
170
- # get the most recent version
171
- my_active_version = sorted (os .listdir (user_path ))[- 1 ]
172
-
173
- # loop over each version
174
- for version in sorted (os .listdir (user_path )):
175
- version_path = os .path .join (user_path , version )
176
- if dir == "results" :
177
- # only the "details" folder within "results" contains files relevant to Lichen
178
- version_path = os .path .join (version_path , "details" )
179
- if not os .path .isdir (version_path ):
180
- continue
181
- if version_mode == "active_version" and int (version ) != my_active_version :
182
- continue
183
-
184
- output_file_path = os .path .join (args .basepath , "users" , user ,
185
- version , "submission.concatenated" )
186
-
187
- if not os .path .exists (os .path .dirname (output_file_path )):
188
- os .makedirs (os .path .dirname (output_file_path ))
189
-
190
- # append to concatenated file
191
- with open (output_file_path , "a" ) as output_file :
192
- concatenated_contents = getConcatFilesInDir (version_path , regex_patterns )
193
- output_file .write (concatenated_contents )
194
- total_concat += sys .getsizeof (concatenated_contents )
195
-
196
- checkTotalSize (total_concat )
219
+ input_path = os .path .join (args .datapath , semester , course , dir , gradeable )
220
+ output_path = os .path .join (args .basepath , "users" )
221
+ total_concat = processGradeable (args .basepath , config ,
222
+ input_path , output_path , total_concat )
197
223
198
224
# ==========================================================================
199
225
# loop over all of the other gradeables and concatenate their submissions
200
226
for other_gradeable in other_gradeables :
201
227
for dir in regex_dirs :
202
- other_gradeable_path = os .path .join (args .datapath ,
203
- other_gradeable ["other_semester" ],
204
- other_gradeable ["other_course" ],
205
- dir ,
206
- other_gradeable ["other_gradeable" ])
207
- # loop over each user
208
- for other_user in sorted (os .listdir (other_gradeable_path )):
209
- other_user_path = os .path .join (other_gradeable_path , other_user )
210
- if not os .path .isdir (other_user_path ):
211
- continue
212
-
213
- if version_mode == "active_version" :
214
- # get the user's active version from their settings file if it exists, else get
215
- # most recent version for compatibility with early versions of Submitty
216
- other_submissions_details_path = os .path .join (other_user_path ,
217
- 'user_assignment_settings.json' )
218
- if os .path .exists (other_submissions_details_path ):
219
- with open (other_submissions_details_path ) as other_details_file :
220
- other_details_json = json .load (other_details_file )
221
- my_active_version = int (other_details_json ["active_version" ])
222
- else :
223
- # get the most recent version
224
- my_active_version = sorted (os .listdir (other_user_path ))[- 1 ]
225
-
226
- # loop over each version
227
- for other_version in sorted (os .listdir (other_user_path )):
228
- other_version_path = os .path .join (other_user_path , other_version )
229
- if dir == "results" :
230
- # only the "details" dir within "results" contains files relevant to Lichen
231
- other_version_path = os .path .join (other_version_path , "details" )
232
- if not os .path .isdir (other_version_path ):
233
- continue
234
-
235
- other_output_file_path = os .path .join (args .basepath , "other_gradeables" ,
236
- f"{ other_gradeable ['other_semester' ]} __{ other_gradeable ['other_course' ]} __{ other_gradeable ['other_gradeable' ]} " , # noqa: E501
237
- other_user , other_version ,
238
- "submission.concatenated" )
239
-
240
- if not os .path .exists (os .path .dirname (other_output_file_path )):
241
- os .makedirs (os .path .dirname (other_output_file_path ))
242
-
243
- # append to concatenated file
244
- with open (other_output_file_path , "a" ) as other_output_file :
245
- other_concatenated_contents = getConcatFilesInDir (other_version_path ,
246
- regex_patterns )
247
- other_output_file .write (other_concatenated_contents )
248
- total_concat += sys .getsizeof (other_concatenated_contents )
249
-
250
- checkTotalSize (total_concat )
228
+ input_path = os .path .join (args .datapath ,
229
+ other_gradeable ["other_semester" ],
230
+ other_gradeable ["other_course" ],
231
+ dir ,
232
+ other_gradeable ["other_gradeable" ])
233
+
234
+ output_path = os .path .join (args .basepath , "other_gradeables" ,
235
+ f"{ other_gradeable ['other_semester' ]} __{ other_gradeable ['other_course' ]} __{ other_gradeable ['other_gradeable' ]} " ) # noqa: E501
236
+ total_concat = processGradeable (args .basepath , config ,
237
+ input_path , output_path , total_concat )
238
+
239
+ # take care of any manually-specified paths if they exist
240
+ if other_gradeable_paths is not None :
241
+ for path in other_gradeable_paths :
242
+ # We hash the path as the name of the gradeable
243
+ dir_name = hashlib .md5 (path .encode ('utf-8' )).hexdigest ()
244
+ output_path = os .path .join (args .basepath , "other_gradeables" , dir_name )
245
+ total_concat = processGradeable (args .basepath , config , path ,
246
+ output_path , total_concat )
251
247
252
248
# ==========================================================================
253
249
# iterate over all of the created submissions, checking to see if they are empty
254
250
# and printing a message if so
255
251
252
+ empty_directories = [] # holds a list of users who had no files concatenated
253
+
256
254
for user in os .listdir (os .path .join (args .basepath , "users" )):
257
255
user_path = os .path .join (args .basepath , "users" , user )
258
256
for version in os .listdir (user_path ):
259
257
version_path = os .path .join (user_path , version )
260
258
my_concatenated_file = os .path .join (version_path , "submission.concatenated" )
261
259
with open (my_concatenated_file , "r" ) as my_cf :
262
260
if my_cf .read () == "" :
263
- print ("Warning: No files matched provided regex in selected directories "
264
- f"for user { user } version { version } " )
261
+ empty_directories .append (f"{ user } :{ version } " )
262
+ if len (empty_directories ) > 0 :
263
+ print ("Warning: No files matched provided regex in selected directories for user(s):" ,
264
+ ", " .join (empty_directories ))
265
265
266
266
# do the same for the other gradeables
267
- for other_gradeable in other_gradeables :
268
- other_gradeable_dir_name = f" { other_gradeable [ 'other_semester' ] } __ { other_gradeable [ 'other_course' ] } __ { other_gradeable [ 'other_gradeable' ] } " # noqa: E501
269
- for other_user in os .listdir (os .path .join (args .basepath , "other_gradeables" ,
270
- other_gradeable_dir_name )):
267
+ for other_gradeable in os . listdir ( os . path . join ( args . basepath , " other_gradeables" )) :
268
+ empty_directories = []
269
+ for other_user in os .listdir (os .path .join (args .basepath ,
270
+ "other_gradeables" , other_gradeable )):
271
271
other_user_path = os .path .join (args .basepath , "other_gradeables" ,
272
- other_gradeable_dir_name , other_user )
272
+ other_gradeable , other_user )
273
273
for other_version in os .listdir (other_user_path ):
274
274
other_version_path = os .path .join (other_user_path , other_version )
275
275
my_concatenated_file = os .path .join (other_version_path , "submission.concatenated" )
276
276
with open (my_concatenated_file , "r" ) as my_cf :
277
277
if my_cf .read () == "" :
278
- print ("Warning: No files matched provided regex in selected directories "
279
- f"for user { other_user } version { other_version } " )
278
+ empty_directories .append (f"{ other_user } :{ other_version } " )
279
+ if len (empty_directories ) > 0 :
280
+ print ("Warning: No files matched provided regex in selected directories for user(s):" ,
281
+ ", " .join (empty_directories ), "in gradeable" , other_gradeable )
280
282
281
283
# ==========================================================================
282
284
# concatenate provided code
0 commit comments