|
2 | 2 | 這個檔案留個紀錄而已,第一版用來爬中籤號碼的code,
|
3 | 3 | 後來覺得用selenium爬運算太慢,也擔心放上GCP要跑會有問題,所以後來寫了search_v2
|
4 | 4 | '''
|
5 |
| -from bs4 import BeautifulSoup as bs |
6 |
| -from selenium import webdriver |
| 5 | +# from bs4 import BeautifulSoup as bs |
| 6 | +# from selenium import webdriver |
7 | 7 |
|
8 |
| -PATH = "/chromedriver.exe" |
| 8 | +# PATH = "chromedriver.exe" |
9 | 9 |
|
10 |
| -url = "https://vhpi.5000.gov.tw/" |
| 10 | +# url = "https://vhpi.5000.gov.tw/" |
11 | 11 |
|
12 | 12 | # 先用selenium取得的網頁原始碼,丟進bs裡做成湯
|
13 |
| -driver = webdriver.Chrome(PATH) |
14 |
| -driver.get(url) |
| 13 | +# driver = webdriver.Chrome(PATH) |
| 14 | +# driver.get(url) |
15 | 15 |
|
16 |
| -source = bs(driver.page_source, "html.parser") |
17 |
| -driver.close() |
| 16 | +# source = bs(driver.page_source, "html.parser") |
| 17 | +# driver.close() |
18 | 18 |
|
19 | 19 |
|
20 | 20 | # 定義取得中獎號碼list的function
|
21 |
| -def get_draw_nums(css_id): |
22 |
| - target_lis = source.select_one(css_id).find_all("li") |
23 |
| - win_nums = [] |
24 |
| - for i in target_lis: |
25 |
| - win_nums.append(i.text) |
26 |
| - return win_nums |
| 21 | +# def get_draw_nums(css_id): |
| 22 | +# target_lis = source.select_one(css_id).find_all("li") |
| 23 | +# win_nums = [] |
| 24 | +# for i in target_lis: |
| 25 | +# win_nums.append(i.text) |
| 26 | +# return win_nums |
27 | 27 |
|
28 | 28 |
|
29 | 29 | # 從網頁原始碼中抓出8種券的css id
|
30 |
| -css_id_list = ["#domesticTravel", "#iYuan", "#agriculture", "#artFunE", |
31 |
| - "#artFunP", "#sports", "#hakka", "#rgionalRevitalization"] |
32 |
| - |
33 |
| -newest = {} |
34 |
| -for n, item in enumerate(css_id_list): |
35 |
| - newest[item[1::]] = get_draw_nums(css_id_list[n]) |
36 |
| -print(newest) |
| 30 | +# css_id_list = ["#domesticTravel", "#iYuan", "#agriculture", "#artFunE", |
| 31 | +# "#artFunP", "#sports", "#hakka", "#rgionalRevitalization"] |
| 32 | +# |
| 33 | +# newest = {} |
| 34 | +# for n, item in enumerate(css_id_list): |
| 35 | +# newest[item[1::]] = get_draw_nums(css_id_list[n]) |
| 36 | +# print(newest) |
37 | 37 |
|
38 | 38 | # TODO: 用selenium怕無法上雲端,資料其實都存在<footer>底下的script tag裡了,改抓那裡的資料處理string來分析
|
39 | 39 | # TODO: 將week_1資料存進資料庫,這樣如有新增資料只需要刷過一次就可以了
|
|
0 commit comments