Skip to content

Commit 4935754

Browse files
committed
Start working on Deno library
Creating a scraper in Deno seems to be a very enjoyable process but shipping a second binary in a docker container isn't hence why i've made the desision to create this full client as a deno library.
1 parent f8dc61e commit 4935754

File tree

4 files changed

+250
-0
lines changed

4 files changed

+250
-0
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44
},
55
"deno.enable": true,
66
"deno.lint": true,
7+
"deno.unstable": true,
78
}

client.ts

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
export type { CVToScan, CVToScanPersonalDetails, CVToScanEducation, CVToScanLanguage, CVToScanWorkExperience } from './cv.ts'
2+
export { newCvToScan, LangLevel } from './cv.ts'
3+
4+
import { envT, readEnv, serverT } from './env.ts'
5+
import { CVToScan } from './cv.ts'
6+
7+
import { Sha512 } from "https://deno.land/[email protected]/hash/sha512.ts"
8+
9+
export enum LoginUsersRestriction {
10+
None, // This scraper doesn't have any domain login users
11+
One, // This scraper expects one user to login with
12+
OneOrMore, // This scraper expects at least one user to login with but can have more
13+
}
14+
15+
export class RTCVScraperClient {
16+
private env: envT
17+
private servers: Array<ServerConn>
18+
private referenceCache: { [reference: string | number]: Date } = {}
19+
dummyMode = false
20+
21+
constructor(loginUsersResitrction: LoginUsersRestriction) {
22+
this.env = readEnv()
23+
24+
const loginUsersCount = this.env.login_users?.length
25+
if (loginUsersResitrction == LoginUsersRestriction.One) {
26+
if (!loginUsersCount) throw 'Expected exactly one login user but got none'
27+
if (loginUsersCount != 1) throw `Expected exactly one login user but got ${loginUsersCount}`
28+
} else if (loginUsersResitrction == LoginUsersRestriction.OneOrMore) {
29+
if (!loginUsersCount) throw 'Expected exactly one ore more login users but got none'
30+
}
31+
32+
this.servers = [
33+
new ServerConn(this.env.primary_server),
34+
...(this.env.alternative_servers?.map(s => new ServerConn(s)) || []),
35+
]
36+
}
37+
38+
async authenticate(): Promise<this> {
39+
if (this.dummyMode) return this
40+
41+
await Promise.all(this.servers.map(s => s.checkHasScraperRole()))
42+
return this
43+
}
44+
45+
// Get the user that is used to login to the site we scrape
46+
get loginUser() {
47+
return this.loginUsers[0]!
48+
}
49+
50+
// Get the users that are used to login to the site we scrape
51+
get loginUsers() {
52+
return this.env.login_users!
53+
}
54+
55+
async sendCV(cv: CVToScan) {
56+
if (this.hasCachedReference(cv.referenceNumber)) return
57+
this.setCachedReference(cv.referenceNumber)
58+
59+
if (this.dummyMode) return
60+
61+
await Promise.all([
62+
// We only care if the cv was accepted by the primary server
63+
this.servers[0].sendCV(cv),
64+
// From the other servers we will ignore the errors
65+
...this.servers.slice(1).map(s => s.sendCV(cv).catch(_ => {/* Ignore errors */ }))
66+
])
67+
}
68+
69+
setCachedReference(referenceNr: string | number, options?: { ttlHours?: 12 | 24 | 72 }) {
70+
if (referenceNr === '') throw 'Reference number cannot be empty'
71+
if (this.hasCachedReference(referenceNr)) return
72+
const expireDate = new Date()
73+
expireDate.setHours(expireDate.getHours() + (options?.ttlHours ?? 72))
74+
this.referenceCache[referenceNr] = expireDate
75+
}
76+
77+
hasCachedReference(referenceNr: string | number): boolean {
78+
const expireDate = this.referenceCache[referenceNr]
79+
if (!expireDate) return false
80+
81+
if (expireDate < new Date()) {
82+
// This reference number has been expired
83+
delete this.referenceCache[referenceNr]
84+
return false
85+
}
86+
87+
return true
88+
}
89+
}
90+
91+
class ServerConn {
92+
server_location: string
93+
authHeader: string
94+
95+
constructor(data: serverT) {
96+
this.server_location = data.server_location
97+
98+
const hashedApiKey = new Sha512().update(data.api_key).hex()
99+
this.authHeader = `Basic ${data.api_key_id}:${hashedApiKey}`
100+
}
101+
102+
private async doRequest(method: 'GET' | 'POST', path: string, body?: unknown) {
103+
const url = this.server_location + path
104+
const options = {
105+
method,
106+
headers: {
107+
'Content-Type': 'application/json',
108+
'Authorization': this.authHeader,
109+
},
110+
body: body ? JSON.stringify(body) : undefined,
111+
}
112+
113+
const req = await fetch(url, options)
114+
return await req.json()
115+
}
116+
117+
async sendCV(cv: CVToScan) {
118+
await this.doRequest('POST', '/api/v1/scraper/scanCV', { cv: cv })
119+
}
120+
121+
async checkHasScraperRole() {
122+
const keyInfo: { roles: Array<{ role: number }> } = await this.doRequest('GET', '/api/v1/auth/keyinfo')
123+
const hasScraperRole = keyInfo.roles.some(r => r.role == 1)
124+
if (!hasScraperRole) throw `api key for ${this.server_location} does not have the scraper role`
125+
}
126+
}

cv.ts

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
export interface CVToScan {
2+
referenceNumber: string
3+
link?: string,
4+
presentation?: string
5+
personalDetails: CVToScanPersonalDetails
6+
preferredJobs: Array<string>
7+
workExperiences: Array<CVToScanWorkExperience>
8+
educations: Array<CVToScanEducation>
9+
languages: Array<CVToScanLanguage>
10+
driversLicenses: Array<string>
11+
}
12+
13+
export function newCvToScan(referenceNumber: string): CVToScan {
14+
return {
15+
referenceNumber,
16+
personalDetails: {},
17+
preferredJobs: [],
18+
workExperiences: [],
19+
educations: [],
20+
languages: [],
21+
driversLicenses: [],
22+
}
23+
}
24+
25+
export interface CVToScanEducation {
26+
is: 0 | 1 | 2
27+
name: string
28+
description: string
29+
institute: string
30+
isCompleted?: boolean
31+
hasDiploma?: boolean
32+
startDate: string | null // RFC3339
33+
endDate: string | null // RFC3339
34+
}
35+
36+
export interface CVToScanWorkExperience {
37+
profession: string
38+
description: string
39+
employer: string
40+
stillEmployed?: boolean
41+
weeklyHoursWorked?: number
42+
startDate: string | null // RFC3339
43+
endDate: string | null // RFC3339
44+
}
45+
46+
export interface CVToScanPersonalDetails {
47+
city?: string
48+
country?: string
49+
dob?: string | null // RFC3339
50+
email?: string
51+
firstName?: string
52+
gender?: string
53+
houseNumber?: string
54+
houseNumberSuffix?: string
55+
initials?: string
56+
phoneNumber?: string
57+
streetName?: string
58+
surName?: string
59+
surNamePrefix?: string
60+
zip?: string
61+
}
62+
63+
export interface CVToScanLanguage {
64+
levelSpoken: LangLevel | null
65+
levelWritten: LangLevel | null
66+
name: string
67+
}
68+
69+
export enum LangLevel {
70+
Unknown = 0,
71+
Reasonable = 1,
72+
Good = 2,
73+
Excellent = 3,
74+
}

env.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import vs from 'https://deno.land/x/[email protected]/mod.ts'
2+
3+
const server = vs.object({
4+
schemaObject: {
5+
server_location: vs.string({ minLength: 1 }),
6+
api_key_id: vs.string({ minLength: 1 }),
7+
api_key: vs.string({ minLength: 1 }),
8+
}
9+
})
10+
11+
export type serverT = ReturnType<typeof server.applyTo>
12+
13+
const loginUser = vs.object({
14+
schemaObject: {
15+
username: vs.string({ minLength: 1 }),
16+
password: vs.string({ minLength: 1 }),
17+
}
18+
})
19+
20+
const envValidationSchema = vs.object({
21+
schemaObject: {
22+
login_users: vs.array({
23+
ifUndefined: undefined,
24+
each: loginUser,
25+
}),
26+
primary_server: server,
27+
alternative_servers: vs.array({
28+
ifUndefined: undefined,
29+
each: server,
30+
}),
31+
},
32+
})
33+
34+
export type envT = ReturnType<typeof envValidationSchema.applyTo>
35+
36+
export function readEnv(): envT {
37+
try {
38+
const envContents = Deno.readTextFileSync('env.json')
39+
const env = JSON.parse(envContents)
40+
const validatedEnv = envValidationSchema.applyTo(env)
41+
return validatedEnv
42+
} catch (e) {
43+
throw [
44+
`\nUnable to read ./env.json in ${Deno.cwd()}`,
45+
`hint: you can create a env.json on the RTCV dashboard for this scraper?`,
46+
`error: ${e}`,
47+
].join('\n')
48+
}
49+
}

0 commit comments

Comments
 (0)