-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathtaapiio-indicators.py
90 lines (71 loc) · 3.16 KB
/
taapiio-indicators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import asyncio
import time
import aiohttp
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import csv
chrome_driver_path = "/Users/harrison/Desktop/Python Career/Chrome-Webdriver/chromedriver"
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://taapi.io/indicators/")
indicators_body = driver.find_element_by_id("indicators-endpoints-data")
indicators = [{'Indicator': table_row.find_element_by_tag_name("a").text,
'Request URL': table_row.find_element_by_tag_name("a").get_attribute("href")} for table_row in
indicators_body.find_elements_by_tag_name("tr")]
driver.close()
driver.quit()
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
"X-Http-Proto": "HTTP/1.1",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "en-US,en;q=0.9",
"Request Line": "GET / HTTP/1.1",
}
async def download_site(session, url):
async with session.get(url) as client_response:
return await client_response.text()
async def download_all_sites(site_headers):
timeout = aiohttp.ClientTimeout(total=60)
connector = aiohttp.TCPConnector(limit_per_host=50)
async with aiohttp.ClientSession(timeout=timeout, connector=connector, headers=site_headers) as session:
tasks = []
for indicator in indicators:
task = asyncio.ensure_future(download_site(session, indicator['Request URL']))
tasks.append(task)
results = await asyncio.gather(*tasks, return_exceptions=True)
return results
expected_urls = len(indicators)
start_time = time.time()
responses = asyncio.run(download_all_sites(headers))
duration = time.time() - start_time
print(f"Scraped {len(indicators)} indicator URLs in {duration} seconds")
for (i, response) in enumerate(responses):
try:
source = BeautifulSoup(response, "html.parser")
try:
indicator_url = source.select("div pre code")[0].text
indicators[i]['Request URL'] = indicator_url.replace("[GET] ", "").replace("BTC", "{symbol}").replace(
"MY_SECRET", "{api_key}").replace("interval=1h", "interval={interval}")
except Exception:
print(f"Could not obtain url for {indicators[i]['Indicator']}")
except Exception as e:
print(f"Exception Occured: {e}")
valid_urls = [indicator['Request URL'] for indicator in indicators if "{symbol}" in indicator['Request URL']]
print(f"\nFound {len(valid_urls)} valid URLs.\n")
# Check output
if expected_urls == len(valid_urls):
print(f"{len(valid_urls)}/{expected_urls} URLS Found.")
print("Download Success!")
csv_columns = ['Indicator', 'Request URL']
dict_data = indicators
csv_file = "output.csv"
try:
with open(csv_file, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
for data in dict_data:
writer.writerow(data)
except Exception as e:
print(f"CSV Error: {e}")
else:
print("URL Fetch does not match expected URLS.")