diff --git a/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/DepartmentCrawlingPath.kt b/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/DepartmentCrawlingPath.kt index d05b8a8..77a990c 100644 --- a/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/DepartmentCrawlingPath.kt +++ b/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/DepartmentCrawlingPath.kt @@ -1,42 +1,42 @@ package com.dmforu.crawling.parser -enum class DepartmentCrawlingPath (val type: String, val majorPath: String, val noticePath: String) { +enum class DepartmentCrawlingPath(val type: String, val path: String) { // 기계공학부 - MECHANICAL_ENGINEERING_DEPARTMENT("기계공학과", "dmu_23205", "1707"), - MECHANICAL_DESIGN_ENGINEERING("기계설계공학과", "dmu_23207", "1716"), + MECHANICAL_ENGINEERING_DEPARTMENT("기계공학과", "4461"), + MECHANICAL_DESIGN_ENGINEERING("기계설계공학과", "4474"), // 로봇자동화공학부 - AUTOMATION_ENGINEERING("자동화공학과", "dmu_23209", "1728"), - ROBOT_SOFTWARE_ENGINEERING("로봇소프트웨어과", "dmu_23211", "1737"), + AUTOMATION_ENGINEERING("자동화공학과", "4487"), + ROBOT_SOFTWARE_ENGINEERING("로봇소프트웨어과", "4502"), // 전기전자통신공학부 - ELECTRICAL_ENGINEERING("전기공학과", "dmu_23212", "1749"), - INFORMATION_ELECTRONIC_ENGINEERING("정보전자공학과", "dmu_23214", "1758"), - SEMICONDUCTOR_ELECTRONIC_ENGINEERING("반도체전자공학과", "dmu_23216", "1767"), - INFORMATION_COMMUNICATION_ENGINEERING("정보통신공학과", "dmu_23218", "1776"), - FIRE_SAFETY_MANAGEMENT("소방안전관리과", "dmu_23268", "2503"), + ELECTRICAL_ENGINEERING("전기공학과", "4518"), + INFORMATION_ELECTRONIC_ENGINEERING("정보전자공학과", "4530"), + SEMICONDUCTOR_ELECTRONIC_ENGINEERING("반도체전자공학과", "4530"), + INFORMATION_COMMUNICATION_ENGINEERING("정보통신공학과", "4543"), + FIRE_SAFETY_MANAGEMENT("소방안전관리과", "4557"), // 컴퓨터공학부 - WEB_APPLIED_SOFTWARE_ENGINEERING("웹응용소프트웨어공학과", "dmu_23220", "1788"), - COMPUTER_SOFTWARE_ENGINEERING("컴퓨터소프트웨어공학과", "dmu_23222", "1797"), - AI_SOFTWARE_ENGINEERING("인공지능소프트웨어학과", "dmu_23259", "1806"), + WEB_APPLIED_SOFTWARE_ENGINEERING("웹응용소프트웨어공학과", "4568"), + COMPUTER_SOFTWARE_ENGINEERING("컴퓨터소프트웨어공학과", "4580"), + AI_SOFTWARE_ENGINEERING("인공지능소프트웨어학과", "4593"), // 생활환경공학부 - BIO_CHEMICAL_ENGINEERING("생명화학공학과", "dmu_23245", "1818"), - BIO_CONVERGENCE_ENGINEERING("바이오융합공학과", "dmu_23264", "1827"), - ARCHITECTURE("건축과", "dmu_23015", "1836"), - INTERIOR_ARCHITECTURE_DESIGN("실내건축디자인과", "dmu_23256", "1845"), - VISUAL_DESIGN("시각디자인과", "dmu_23068", "1854"), - AR_VR_CONTENTS_DESIGN("AR-VR콘텐츠디자인과", "dmu_23269", "2633"), + BIO_CHEMICAL_ENGINEERING("생명화학공학과", "4605"), + BIO_CONVERGENCE_ENGINEERING("바이오융합공학과", "4617"), + ARCHITECTURE("건축과", "4629"), + INTERIOR_ARCHITECTURE_DESIGN("실내건축디자인과", "4643"), + VISUAL_DESIGN("시각디자인과", "4654"), + AR_VR_CONTENTS_DESIGN("AR-VR콘텐츠디자인과", "4666"), // 경영학부 - BUSINESS_MANAGEMENT("경영학과", "dmu_23232", "1866"), - TAX_ACCOUNTING("세무회계학과", "dmu_23234", "1875"), - DISTRIBUTION_MARKETING("유통마케팅학과", "dmu_23236", "1884"), - HOTEL_TOURISM("호텔관광학과", "dmu_23250", "1893"), - BUSINESS_INFORMATION("경영정보학과", "dmu_23238", "1902"), - BIG_DATA_MANAGEMENT("빅데이터경영과", "dmu_23260", "1911"), + BUSINESS_MANAGEMENT("경영학과", "4677"), + TAX_ACCOUNTING("세무회계학과", "4687"), + DISTRIBUTION_MARKETING("유통마케팅학과", "4697"), + HOTEL_TOURISM("호텔관광학과", "4708"), + BUSINESS_INFORMATION("경영정보학과", "4719"), + BIG_DATA_MANAGEMENT("빅데이터경영과", "4729"), - // 자유전공학과 https://www.dongyang.ac.kr/dmu_23274/3071/subview.do - UNDECLARED_MAJOR("자유전공학과", "dmu_23274", "3071"); + // 자유전공학과 + UNDECLARED_MAJOR("자유전공학과", "4739"); } \ No newline at end of file diff --git a/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/DepartmentNoticeParser.kt b/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/DepartmentNoticeParser.kt index 2faef0c..e78abe2 100644 --- a/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/DepartmentNoticeParser.kt +++ b/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/DepartmentNoticeParser.kt @@ -36,14 +36,7 @@ class DepartmentNoticeParser( val number = row.select(".td-num").text().toIntOrNull() ?: return null val title = row.select(".td-subject a").text() val author = row.select(".td-write").text() - - // TODO 리팩토링 대상 - val url = if (major.type == "자유전공학과") { - generateUrlFromSearch2(row.select(".td-subject a").attr("onclick")) - } else { - generateUrlFromSearch(row.select(".td-subject a").attr("href")) - } - + val url = generateUrlFromSearch(row.select(".td-subject a").attr("href")) val date = LocalDate.parse(row.select(".td-date").text(), formatter) return Notice.of( @@ -58,7 +51,7 @@ class DepartmentNoticeParser( private fun generateSearchUrl(): String { return StringBuilder() - .append("https://www.dongyang.ac.kr/").append(major.majorPath).append("/").append(major.noticePath) + .append("https://www.dongyang.ac.kr/dmu/").append(major.path) .append("/subview.do?page=").append(pageNumber++).toString() } @@ -74,18 +67,6 @@ class DepartmentNoticeParser( .append("/view.do?layout=unknown").toString() } - //TODO 리팩토링 대상 - private fun generateUrlFromSearch2(url: String): String { - val matcher: Matcher = pattern2.matcher(url) - - verifyValidMatcher(matcher) - - return StringBuilder() - .append("https://www.dongyang.ac.kr/bbs/").append(matcher.group(1)) - .append("/").append(matcher.group(2)).append("/").append(matcher.group(3)) - .append("/artclView.do?layout=unknown").toString() - } - private fun verifyValidMatcher(matcher: Matcher) { if (!matcher.find()) { throw GenerateNoticeUrlException() @@ -95,7 +76,6 @@ class DepartmentNoticeParser( companion object { private val pattern: Pattern = Pattern.compile("\\('([^']+)'\\,'([^']+)'\\,'([^']+)'\\,'([^']+)'") - private val pattern2: Pattern = Pattern.compile("jf_viewArtcl\\('([^']+)', '([^']+)', '([^']+)'\\)") private val formatter: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy.MM.dd") } } \ No newline at end of file diff --git a/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/ScheduleParser.kt b/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/ScheduleParser.kt index b0c9149..c92a828 100644 --- a/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/ScheduleParser.kt +++ b/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/ScheduleParser.kt @@ -18,14 +18,24 @@ class ScheduleParser( } private fun fetchYearSchedule(year: Int): List { - val document = htmlLoader.get(DMU_SCHEDULE_URL + year) + val beforeDocument = htmlLoader.get(DMU_SCHEDULE_URL + (year - 1)) // 작년 일정 + val document = htmlLoader.get(DMU_SCHEDULE_URL + year) // 올해 일정 + + // 작년 일정에서 1~2월 가져오기 + val prevYearMonths = fetchMonthSchedules(beforeDocument, 1..2) + // 올해 일정에서 3~12월 가져오기 + val currentYearMonths = fetchMonthSchedules(document, 3..12) + + // 두 리스트 합쳐서 반환 + return prevYearMonths + currentYearMonths + } + + private fun fetchMonthSchedules(document: Document, filterRange: IntRange): List { val monthTables = document.select(YEAR_SCHEDULE_SELECTOR) return monthTables.mapNotNull { monthTable -> val monthSchedule = fetchMonthSchedule(monthTable) - - // 일정 정보가 업로드 되지 않는 달은 제외한다. (다음 년도 3월 이후의 정보) - if (monthSchedule.monthSchedule.isEmpty()) null else monthSchedule + if (monthSchedule.month !in filterRange || monthSchedule.monthSchedule.isEmpty()) null else monthSchedule } } @@ -73,7 +83,7 @@ class ScheduleParser( } companion object { - private const val DMU_SCHEDULE_URL = "https://www.dongyang.ac.kr/dongyang/71/subview.do?year=" + private const val DMU_SCHEDULE_URL = "https://www.dongyang.ac.kr/dmu/4749/subview.do?year=" private const val YEAR_SCHEDULE_SELECTOR = ".yearSchdulWrap" private const val MONTH_DATE_SELECTOR = "p" private const val MONTH_SCHEDULE_SELECTOR = ".scheList li" diff --git a/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/UniversityNoticeParser.kt b/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/UniversityNoticeParser.kt index 3464a78..8ab0d4c 100644 --- a/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/UniversityNoticeParser.kt +++ b/dmforu-crawling/src/main/kotlin/com/dmforu/crawling/parser/UniversityNoticeParser.kt @@ -50,7 +50,7 @@ class UniversityNoticeParser( private fun generateSearchUrl(): String { return StringBuilder() - .append("https://www.dongyang.ac.kr/dongyang/129/subview.do?page=").append(pageNumber++) + .append("https://www.dongyang.ac.kr/dmu/4904/subview.do?page=").append(pageNumber++) .toString() }