-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindeed.py
More file actions
40 lines (35 loc) · 1.26 KB
/
indeed.py
File metadata and controls
40 lines (35 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from bs4 import BeautifulSoup
LIMIT=50
URL = f"https://kr.indeed.com/%EC%B7%A8%EC%97%85?q=python&limit={LIMIT}"
def get_last_pages():
result = requests.get(URL)
soup = BeautifulSoup(result.text,"html.parser")
pagination = soup.find("ul", {"class":"pagination-list"})
links = pagination("li")
pages=[]
for link in links[:-1]:
pages.append(int(link.string))
max_page = pages[-1]
return max_page
def extract_job(html):
title = html.find("h2", {"class":"jobTitle"}, {"class":"jobTitle jobTitle-newJob"}).find("span", title=True).text
company = html.find("span", {"class":"companyName"}).find("a", {"class":"companyOverviewLink"}).text
location = html.find("div",{"class":"companyLocation"}).text
#job_id = html["data-jk"]
print ({"title":title, "company":company, "location":location})
def extract_jobs(last_page):
jobs=[]
for page in range(last_page):
print(f"Scrapping Indeed : Page {page}")
result=requests.get(f"{URL}&start={page*LIMIT}")
soup = BeautifulSoup(result.text,"html.parser")
results=soup("div",{"class":"tapItem"})
for result in results:
job = extract_job(result)
jobs.append(job)
return jobs
def get_jobs():
last_page = get_last_pages()
jobs = extract_jobs(last_page)
return jobs