Skip to content

Commit 70cbbec

Browse files
committed
copied most of what you need
1 parent b272223 commit 70cbbec

File tree

93 files changed

+1718616
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+1718616
-0
lines changed

.dockerignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
__pycache__
2+
venv
3+
.pytest_cache
4+
Dockerfile
5+
.git

backend.dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Sets up the backend, without writing data into the database.
2+
3+
# Python image
4+
FROM python:3.10.5-slim
5+
6+
# gcc required for python-Levenshtein
7+
RUN apt-get update \
8+
&& apt-get install gcc -y \
9+
&& apt-get clean
10+
11+
# Set current working directory inside container to /backend
12+
WORKDIR /backend
13+
14+
# pip install the venv python modules
15+
COPY requirements.txt .
16+
RUN pip install --no-cache-dir -r requirements.txt
17+
18+
# Copy entire backend into the container
19+
COPY . .
20+
21+
# Expose port 8000 to the outside world
22+
EXPOSE 8000
23+
24+
# ENV OVERWRITE='False'
25+
26+
# Run the server
27+
ENTRYPOINT ["python3", "-u", "runserver.py", "--overwrite"]

cache/__init__.py

Whitespace-only changes.

cache/cache.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
"""
2+
Loads cached data such as exclusions, program mappings, etc, into local
3+
JSON files for faster algorithms performance.
4+
This should be run from the backend directory or via runprocessors
5+
"""
6+
7+
from functools import reduce
8+
import operator
9+
import re
10+
from typing import Any
11+
12+
from algorithms.cache.cache_config import (CACHE_CONFIG, CACHED_EQUIVALENTS_FILE,
13+
CACHED_EXCLUSIONS_FILE,
14+
CACHED_WARNINGS_FILE,
15+
CONDITIONS_PROCESSED_FILE,
16+
COURSE_MAPPINGS_FILE,
17+
COURSES_PROCESSED_FILE,
18+
MAPPINGS_FILE,
19+
PROGRAM_MAPPINGS_FILE,
20+
PROGRAMS_FORMATTED_FILE)
21+
from data.utility.data_helpers import read_data, write_data
22+
23+
def cache_equivalents():
24+
"""
25+
Reads from processed courses and stores the exclusions in a map mapping
26+
COURSE: {
27+
EXCLUSION_1: 1,
28+
EXCLUSION_2: 1,
29+
EXCLUSION_3: 1
30+
}
31+
NOTE: Should run this after all the conditions have been processed as sometimes
32+
exclusions are included inside the conditions text
33+
"""
34+
35+
courses = read_data(COURSES_PROCESSED_FILE)
36+
37+
cached_exclusions = {}
38+
39+
for course, data in courses.items():
40+
cached_exclusions[course] = data["equivalents"]
41+
42+
write_data(cached_exclusions, CACHED_EQUIVALENTS_FILE)
43+
44+
def cache_exclusions():
45+
"""
46+
Reads from processed courses and stores the exclusions in a map mapping
47+
COURSE: {
48+
EXCLUSION_1: 1,
49+
EXCLUSION_2: 1,
50+
EXCLUSION_3: 1
51+
}
52+
NOTE: Should run this after all the conditions have been processed as sometimes
53+
exclusions are included inside the conditions text
54+
"""
55+
56+
courses = read_data(COURSES_PROCESSED_FILE)
57+
58+
cached_exclusions = {}
59+
60+
for course, data in courses.items():
61+
cached_exclusions[course] = data["exclusions"] | data["equivalents"]
62+
63+
write_data(cached_exclusions, CACHED_EXCLUSIONS_FILE)
64+
65+
66+
def cache_handbook_note():
67+
"""
68+
Reads from processed conditions and stores the warnings in a map mapping
69+
COURSE: WARNING
70+
71+
NOTE: Condition warnings are created during the manual fix stage, so this
72+
will need to be re-run as more conditions are manually fixed.
73+
"""
74+
75+
conditions = read_data(CONDITIONS_PROCESSED_FILE)
76+
77+
cached_handbook_note = {}
78+
79+
for course, data in conditions.items():
80+
if "handbook_note" in data:
81+
cached_handbook_note[course] = data["handbook_note"]
82+
83+
write_data(cached_handbook_note, CACHED_WARNINGS_FILE)
84+
85+
86+
def cache_mappings():
87+
"""
88+
Writes to mappings.json and courseMappings.json (i.e maps courses to corresponding school/faculty)
89+
"""
90+
mappings = {}
91+
courseMappings = {}
92+
courses = read_data(COURSES_PROCESSED_FILE)
93+
94+
# Tokenise faculty using regex, e.g 'UNSW Business School' -> 'F Business'
95+
def tokeniseFaculty(Faculty):
96+
faculty_token = "F "
97+
if re.search("Faculty of.+", Faculty):
98+
match_object = re.search("(?<=Faculty\sof\s)[^\s\n\,]+", Faculty)
99+
elif re.search("UNSW", Faculty):
100+
match_object = re.search(r"(?<=UNSW\s)[^\s\n\,]+", Faculty)
101+
else:
102+
match_object = re.search("^([\w]+)", Faculty)
103+
match = match_object.group()
104+
faculty_token += match
105+
return faculty_token
106+
107+
# Tokenise faculty using regex, e.g 'School of Psychology' -> 'S Psychology'
108+
def tokeniseSchool(School):
109+
school_token = "S "
110+
if re.search("School\sof\sthe.+", School):
111+
match_object = re.search("(?<=School\sof\sthe\s)[^\s\n\,]+", School)
112+
elif re.search("School\sof\s.+", School):
113+
match_object = re.search("(?<=School\sof\s)[^\s\n\,]+", School)
114+
elif re.search("^(UC)", School):
115+
match_object = re.search("(?<=UC\s)[^\s\n\,]+", School)
116+
match = school_token + "UC-" + match_object.group()
117+
return match
118+
elif re.search("UNSW", School):
119+
match_object = re.search("(?<=UNSW\s)[^\s\n\,]+", School)
120+
else:
121+
match_object = re.search("^([\w]+)", School)
122+
match = match_object.group()
123+
school_token += match
124+
return school_token
125+
126+
# add faculties to mappings.json
127+
for course in courses:
128+
faculty = courses[course]['faculty']
129+
if faculty not in mappings:
130+
faculty_token = tokeniseFaculty(faculty)
131+
mappings[faculty] = faculty_token
132+
courseMappings[faculty_token] = {}
133+
# add schools to mappings.json
134+
for course in courses.values():
135+
if 'school' in course:
136+
school = course['school']
137+
if school not in mappings:
138+
school_token = tokeniseSchool(school)
139+
mappings[school] = school_token
140+
courseMappings[school_token] = {}
141+
write_data(mappings, MAPPINGS_FILE)
142+
143+
# finalise
144+
for course in courses.values():
145+
courseCode = course['code']
146+
courseFaculty = course['faculty']
147+
if 'school' in course:
148+
courseSchool = course['school']
149+
courseMappings[mappings[courseSchool]][courseCode] = 1
150+
courseMappings[mappings[courseFaculty]][courseCode] = 1
151+
152+
write_data(courseMappings, COURSE_MAPPINGS_FILE)
153+
154+
def cache_program_mappings():
155+
"""
156+
Maps CODE# to programs, e.g.
157+
{
158+
"ACTL#": {
159+
"3586": 1,
160+
"4520": 1,
161+
}
162+
}
163+
164+
Achieves this by looking for a keyword in the program's title
165+
"""
166+
167+
keyword_codes: dict[str, list[str]] = read_data(CACHE_CONFIG)
168+
169+
mappings = {
170+
code: {} for code
171+
in reduce(operator.add, keyword_codes.values())
172+
}
173+
174+
programs: dict[str, Any] = read_data(PROGRAMS_FORMATTED_FILE)
175+
176+
for program in programs.values():
177+
for keyword in keyword_codes.keys():
178+
if keyword.lower() in program["title"].lower():
179+
for code in keyword_codes[keyword]:
180+
mappings[code][program["code"]] = 1
181+
182+
write_data(mappings, PROGRAM_MAPPINGS_FILE)

cache/cache_config.json

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"Actuarial": ["ACTL#", "ZBUS#"],
3+
"Business": ["BUSN#", "ZBUS#"],
4+
"Commerce": ["COMM#", "ZBUS#"],
5+
"Economics": ["ECON#", "ZBUS#"],
6+
"Politics, Philosophy and Economics": ["PPES#", "ZBUS#"],
7+
"Information Systems": ["INFS#", "ZBUS#"],
8+
"Data Science and Decisions": ["DATA#"],
9+
"Computer Science": ["COMP#"],
10+
"Engineering": ["ENGG#"],
11+
"Food Science": ["FOOD#"],
12+
"Medicine": ["MEDC#"],
13+
"Medical Science": ["MSCI#"],
14+
"Medicinal Chemistry": ["MCHM#"],
15+
"Advanced Math": ["MATH#"],
16+
"Advanced Science": ["ASCI#"],
17+
"Science": ["SCIF#"],
18+
"Education": ["EDST#"],
19+
"Law": ["LAWS#"],
20+
"Criminology": ["CRIM#"],
21+
"International Studies": ["INST#"],
22+
"Social Science": ["SOSS#"],
23+
"Social Work": ["SOCW#"],
24+
"Music": ["MUSC#"],
25+
"Design": ["DDES#"],
26+
"Politic": ["PPEC#"],
27+
"Media Arts": ["DART#"],
28+
"Arts": ["ARTS#"],
29+
"Media": ["MDIA#"],
30+
"CDF": ["CCDF#"],
31+
"Media (Screen && Sound Production)": ["MSSP#"],
32+
"Aviation": ["AVIA#"]
33+
}

cache/cache_config.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""
2+
Is the configuration file for `cache.py`
3+
"""
4+
5+
# Config for mapping course -> code
6+
# This json has two fields. `"codes"` and `"keywords"`
7+
# "codes" is a list of valid codes
8+
# "keyword_mapping" is a dict where the key is a keyword and the value
9+
# is the codes that keyword maps to
10+
CACHE_CONFIG = "algorithms/cache/cache_config.json"
11+
12+
# INPUT SOURCES
13+
COURSES_PROCESSED_FILE = "data/final_data/coursesProcessed.json"
14+
15+
PROGRAMS_FORMATTED_FILE = "data/scrapers/programsFormattedRaw.json"
16+
17+
CACHED_EXCLUSIONS_FILE = "algorithms/cache/exclusions.json"
18+
19+
CACHED_EQUIVALENTS_FILE = "algorithms/cache/equivalents.json"
20+
21+
CONDITIONS_PROCESSED_FILE = "data/final_data/conditionsProcessed.json"
22+
23+
24+
# OUTPUT SOURCES
25+
CACHED_WARNINGS_FILE = "algorithms/cache/handbook_note.json"
26+
27+
MAPPINGS_FILE = "algorithms/cache/mappings.json"
28+
29+
COURSE_MAPPINGS_FILE = "algorithms/cache/courseMappings.json"
30+
31+
PROGRAM_MAPPINGS_FILE = "algorithms/cache/programMappings.json"

0 commit comments

Comments
 (0)