Skip to content

Commit e67fb14

Browse files
committed
winNo4 parser fixed, backup json file added
1 parent 28ca623 commit e67fb14

File tree

3 files changed

+29
-24
lines changed

3 files changed

+29
-24
lines changed

search.py

+21-21
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,38 @@
22
這個檔案留個紀錄而已,第一版用來爬中籤號碼的code,
33
後來覺得用selenium爬運算太慢,也擔心放上GCP要跑會有問題,所以後來寫了search_v2
44
'''
5-
from bs4 import BeautifulSoup as bs
6-
from selenium import webdriver
5+
# from bs4 import BeautifulSoup as bs
6+
# from selenium import webdriver
77

8-
PATH = "/chromedriver.exe"
8+
# PATH = "chromedriver.exe"
99

10-
url = "https://vhpi.5000.gov.tw/"
10+
# url = "https://vhpi.5000.gov.tw/"
1111

1212
# 先用selenium取得的網頁原始碼,丟進bs裡做成湯
13-
driver = webdriver.Chrome(PATH)
14-
driver.get(url)
13+
# driver = webdriver.Chrome(PATH)
14+
# driver.get(url)
1515

16-
source = bs(driver.page_source, "html.parser")
17-
driver.close()
16+
# source = bs(driver.page_source, "html.parser")
17+
# driver.close()
1818

1919

2020
# 定義取得中獎號碼list的function
21-
def get_draw_nums(css_id):
22-
target_lis = source.select_one(css_id).find_all("li")
23-
win_nums = []
24-
for i in target_lis:
25-
win_nums.append(i.text)
26-
return win_nums
21+
# def get_draw_nums(css_id):
22+
# target_lis = source.select_one(css_id).find_all("li")
23+
# win_nums = []
24+
# for i in target_lis:
25+
# win_nums.append(i.text)
26+
# return win_nums
2727

2828

2929
# 從網頁原始碼中抓出8種券的css id
30-
css_id_list = ["#domesticTravel", "#iYuan", "#agriculture", "#artFunE",
31-
"#artFunP", "#sports", "#hakka", "#rgionalRevitalization"]
32-
33-
newest = {}
34-
for n, item in enumerate(css_id_list):
35-
newest[item[1::]] = get_draw_nums(css_id_list[n])
36-
print(newest)
30+
# css_id_list = ["#domesticTravel", "#iYuan", "#agriculture", "#artFunE",
31+
# "#artFunP", "#sports", "#hakka", "#rgionalRevitalization"]
32+
#
33+
# newest = {}
34+
# for n, item in enumerate(css_id_list):
35+
# newest[item[1::]] = get_draw_nums(css_id_list[n])
36+
# print(newest)
3737

3838
# TODO: 用selenium怕無法上雲端,資料其實都存在<footer>底下的script tag裡了,改抓那裡的資料處理string來分析
3939
# TODO: 將week_1資料存進資料庫,這樣如有新增資料只需要刷過一次就可以了

search_v2.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,16 @@
2828
winNo3 = ast.literal_eval(winNo3_str)
2929
# print(winNo3)
3030

31-
3231
# 第四周中獎號碼:winNo4 (注意這種string的切法後面要切乾淨,丟進ast.literal_eval時才能做出正確的dictionary
33-
winNo4_str = script[3].split("\n\n window.")[0].split(" = ")[1]
32+
winNo4_str = script[3].split("\n\n window.")[0].split(" = ")[1].split(";")[0]
3433
winNo4 = ast.literal_eval(winNo4_str)
3534
# print(winNo4)
35+
# print result for json file
36+
# print(winNo4_str.replace("'", '"'))
37+
3638
except:
3739
# 如果官網原始資料有異動而出錯,直接挖備援檔案winNo.json裡的資料來用
40+
print("failed")
3841
with open("winNo.json", "r") as backup_data:
3942
data = json.load(backup_data)
4043
winNo1 = data["winNo1"]

winNo.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -199,5 +199,7 @@
199199
],
200200
"rgionalRevitalization": ["771","706","064","168","191","459","135","314","366"]
201201
},
202-
"winNo4": {}
202+
"winNo4": {"domesticTravel": ["32", "02", "87", "93", "82", "17"],
203+
"iYuan": ["29", "82" , "71"]
204+
}
203205
}

0 commit comments

Comments
 (0)