csesoc
diff --git a/‎.dockerignore
Lines changed: 5 additions & 0 deletions b/‎.dockerignore
Lines changed: 5 additions & 0 deletions
diff --git a/‎backend.dockerfile
Lines changed: 27 additions & 0 deletions b/‎backend.dockerfile
Lines changed: 27 additions & 0 deletions
diff --git a/‎cache/__init__.py b/‎cache/__init__.py
diff --git a/‎cache/cache.py
Lines changed: 182 additions & 0 deletions b/‎cache/cache.py
Lines changed: 182 additions & 0 deletions
diff --git a/‎cache/cache_config.json
Lines changed: 33 additions & 0 deletions b/‎cache/cache_config.json
Lines changed: 33 additions & 0 deletions
diff --git a/‎cache/cache_config.py
Lines changed: 31 additions & 0 deletions b/‎cache/cache_config.py
Lines changed: 31 additions & 0 deletions
@@ -0,0 +1,5 @@
+__pycache__
+venv
+.pytest_cache
+Dockerfile
+.git
@@ -0,0 +1,27 @@
+# Sets up the backend, without writing data into the database.
+
+# Python image
+FROM python:3.10.5-slim
+
+# gcc required for python-Levenshtein
+RUN apt-get update \
+    && apt-get install gcc -y \
+    && apt-get clean
+
+# Set current working directory inside container to /backend
+WORKDIR /backend
+
+# pip install the venv python modules
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy entire backend into the container
+COPY . .
+
+# Expose port 8000 to the outside world
+EXPOSE 8000
+
+# ENV OVERWRITE='False'
+
+# Run the server
+ENTRYPOINT ["python3", "-u", "runserver.py", "--overwrite"]
@@ -0,0 +1,182 @@
+"""
+Loads cached data such as exclusions, program mappings, etc, into local
+JSON files for faster algorithms performance.
+This should be run from the backend directory or via runprocessors
+"""
+
+from functools import reduce
+import operator
+import re
+from typing import Any
+
+from algorithms.cache.cache_config import (CACHE_CONFIG, CACHED_EQUIVALENTS_FILE,
+                                           CACHED_EXCLUSIONS_FILE,
+                                           CACHED_WARNINGS_FILE,
+                                           CONDITIONS_PROCESSED_FILE,
+                                           COURSE_MAPPINGS_FILE,
+                                           COURSES_PROCESSED_FILE,
+                                           MAPPINGS_FILE,
+                                           PROGRAM_MAPPINGS_FILE,
+                                           PROGRAMS_FORMATTED_FILE)
+from data.utility.data_helpers import read_data, write_data
+
+def cache_equivalents():
+    """
+    Reads from processed courses and stores the exclusions in a map mapping
+    COURSE: {
+        EXCLUSION_1: 1,
+        EXCLUSION_2: 1,
+        EXCLUSION_3: 1
+    }
+    NOTE: Should run this after all the conditions have been processed as sometimes
+    exclusions are included inside the conditions text
+    """
+
+    courses = read_data(COURSES_PROCESSED_FILE)
+
+    cached_exclusions = {}
+
+    for course, data in courses.items():
+        cached_exclusions[course] =  data["equivalents"]
+
+    write_data(cached_exclusions, CACHED_EQUIVALENTS_FILE)
+
+def cache_exclusions():
+    """
+    Reads from processed courses and stores the exclusions in a map mapping
+    COURSE: {
+        EXCLUSION_1: 1,
+        EXCLUSION_2: 1,
+        EXCLUSION_3: 1
+    }
+    NOTE: Should run this after all the conditions have been processed as sometimes
+    exclusions are included inside the conditions text
+    """
+
+    courses = read_data(COURSES_PROCESSED_FILE)
+
+    cached_exclusions = {}
+
+    for course, data in courses.items():
+        cached_exclusions[course] = data["exclusions"] | data["equivalents"]
+
+    write_data(cached_exclusions, CACHED_EXCLUSIONS_FILE)
+
+
+def cache_handbook_note():
+    """
+    Reads from processed conditions and stores the warnings in a map mapping
+    COURSE: WARNING
+
+    NOTE: Condition warnings are created during the manual fix stage, so this
+    will need to be re-run as more conditions are manually fixed.
+    """
+
+    conditions = read_data(CONDITIONS_PROCESSED_FILE)
+
+    cached_handbook_note = {}
+
+    for course, data in conditions.items():
+        if "handbook_note" in data:
+            cached_handbook_note[course] = data["handbook_note"]
+
+    write_data(cached_handbook_note, CACHED_WARNINGS_FILE)
+
+
+def cache_mappings():
+    """
+    Writes to mappings.json and courseMappings.json (i.e maps courses to corresponding school/faculty)
+    """
+    mappings = {}
+    courseMappings = {}
+    courses = read_data(COURSES_PROCESSED_FILE)
+
+    # Tokenise faculty using regex, e.g 'UNSW Business School' -> 'F Business'
+    def tokeniseFaculty(Faculty):
+        faculty_token = "F "
+        if re.search("Faculty of.+", Faculty):
+            match_object = re.search("(?<=Faculty\sof\s)[^\s\n\,]+", Faculty)
+        elif re.search("UNSW", Faculty):
+            match_object = re.search(r"(?<=UNSW\s)[^\s\n\,]+", Faculty)
+        else:
+            match_object = re.search("^([\w]+)", Faculty)
+        match = match_object.group()
+        faculty_token += match
+        return faculty_token
+
+    # Tokenise faculty using regex, e.g 'School of Psychology' -> 'S Psychology'
+    def tokeniseSchool(School):
+        school_token = "S "
+        if re.search("School\sof\sthe.+", School):
+            match_object = re.search("(?<=School\sof\sthe\s)[^\s\n\,]+", School)
+        elif re.search("School\sof\s.+", School):
+            match_object = re.search("(?<=School\sof\s)[^\s\n\,]+", School)
+        elif re.search("^(UC)", School):
+            match_object = re.search("(?<=UC\s)[^\s\n\,]+", School)
+            match = school_token + "UC-" +  match_object.group()
+            return match
+        elif re.search("UNSW", School):
+            match_object = re.search("(?<=UNSW\s)[^\s\n\,]+", School)
+        else:
+            match_object = re.search("^([\w]+)", School)
+        match = match_object.group()
+        school_token += match
+        return school_token
+
+    # add faculties to mappings.json
+    for course in courses:
+        faculty = courses[course]['faculty']
+        if faculty not in mappings:
+            faculty_token = tokeniseFaculty(faculty)
+            mappings[faculty] = faculty_token
+            courseMappings[faculty_token] = {}
+    # add schools to mappings.json
+    for course in courses.values():
+        if 'school' in course:
+            school = course['school']
+            if school not in mappings:
+                school_token = tokeniseSchool(school)
+                mappings[school] = school_token
+                courseMappings[school_token] = {}
+    write_data(mappings, MAPPINGS_FILE)
+
+    # finalise
+    for course in courses.values():
+        courseCode = course['code']
+        courseFaculty = course['faculty']
+        if 'school' in course:
+            courseSchool = course['school']
+            courseMappings[mappings[courseSchool]][courseCode] = 1
+        courseMappings[mappings[courseFaculty]][courseCode] = 1
+
+    write_data(courseMappings, COURSE_MAPPINGS_FILE)
+
+def cache_program_mappings():
+    """
+    Maps CODE# to programs, e.g.
+    {
+        "ACTL#": {
+            "3586": 1,
+            "4520": 1,
+        }
+    }
+
+    Achieves this by looking for a keyword in the program's title
+    """
+
+    keyword_codes: dict[str, list[str]] = read_data(CACHE_CONFIG)
+
+    mappings = {
+        code: {} for code
+        in reduce(operator.add, keyword_codes.values())
+    }
+
+    programs: dict[str, Any] = read_data(PROGRAMS_FORMATTED_FILE)
+
+    for program in programs.values():
+        for keyword in keyword_codes.keys():
+            if keyword.lower() in program["title"].lower():
+                for code in keyword_codes[keyword]:
+                    mappings[code][program["code"]] = 1
+
+    write_data(mappings, PROGRAM_MAPPINGS_FILE)
@@ -0,0 +1,33 @@
+{
+    "Actuarial": ["ACTL#", "ZBUS#"],
+    "Business": ["BUSN#", "ZBUS#"],
+    "Commerce": ["COMM#", "ZBUS#"],
+    "Economics": ["ECON#", "ZBUS#"],
+    "Politics, Philosophy and Economics": ["PPES#", "ZBUS#"],
+    "Information Systems": ["INFS#", "ZBUS#"],
+    "Data Science and Decisions": ["DATA#"],
+    "Computer Science": ["COMP#"],
+    "Engineering": ["ENGG#"],
+    "Food Science": ["FOOD#"],
+    "Medicine": ["MEDC#"],
+    "Medical Science": ["MSCI#"],
+    "Medicinal Chemistry": ["MCHM#"],
+    "Advanced Math": ["MATH#"],
+    "Advanced Science": ["ASCI#"],
+    "Science": ["SCIF#"],
+    "Education": ["EDST#"],
+    "Law": ["LAWS#"],
+    "Criminology": ["CRIM#"],
+    "International Studies": ["INST#"],
+    "Social Science": ["SOSS#"],
+    "Social Work": ["SOCW#"],
+    "Music": ["MUSC#"],
+    "Design": ["DDES#"],
+    "Politic": ["PPEC#"],
+    "Media Arts": ["DART#"],
+    "Arts": ["ARTS#"],
+    "Media": ["MDIA#"],
+    "CDF": ["CCDF#"],
+    "Media (Screen && Sound Production)": ["MSSP#"],
+    "Aviation": ["AVIA#"]
+}
@@ -0,0 +1,31 @@
+"""
+Is the configuration file for `cache.py`
+"""
+
+# Config for mapping course -> code
+# This json has two fields. `"codes"` and `"keywords"`
+# "codes" is a list of valid codes
+# "keyword_mapping" is a dict where the key is a keyword and the value
+# is the codes that keyword maps to
+CACHE_CONFIG = "algorithms/cache/cache_config.json"
+
+# INPUT SOURCES
+COURSES_PROCESSED_FILE = "data/final_data/coursesProcessed.json"
+
+PROGRAMS_FORMATTED_FILE = "data/scrapers/programsFormattedRaw.json"
+
+CACHED_EXCLUSIONS_FILE = "algorithms/cache/exclusions.json"
+
+CACHED_EQUIVALENTS_FILE = "algorithms/cache/equivalents.json"
+
+CONDITIONS_PROCESSED_FILE = "data/final_data/conditionsProcessed.json"
+
+
+# OUTPUT SOURCES
+CACHED_WARNINGS_FILE = "algorithms/cache/handbook_note.json"
+
+MAPPINGS_FILE = "algorithms/cache/mappings.json"
+
+COURSE_MAPPINGS_FILE = "algorithms/cache/courseMappings.json"
+
+PROGRAM_MAPPINGS_FILE = "algorithms/cache/programMappings.json"