Skip to content

Commit 85c6434

Browse files
author
Ronald Schmidt
authored
Merge pull request #25 from ecoron/0.9.2
0.9.2
2 parents 1493dc1 + 19fa06e commit 85c6434

File tree

9 files changed

+80
-74
lines changed

9 files changed

+80
-74
lines changed

Diff for: README.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ It might be usefull for SEO and research tasks.
2323
Extract these result types
2424
--------------------------
2525

26-
* ads_main - advertisments within regular search results
26+
* ads_main - advertisements within regular search results
2727
* image - result from image search
2828
* news - news teaser within regular search results
2929
* results - standard search result

Diff for: docs/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
# The short X.Y version.
6161
version = '0.9'
6262
# The full version, including alpha/beta/rc tags.
63-
release = '0.9.1'
63+
release = '0.9.2'
6464

6565
# The language for content autogenerated by Sphinx. Refer to documentation
6666
# for a list of supported languages.

Diff for: docs/index.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ It might be usefull for SEO and research tasks.
2626
Extract these result types
2727
--------------------------
2828

29-
* ads_main - advertisments within regular search results
29+
* ads_main - advertisements within regular search results
3030
* image - result from image search
3131
* news - news teaser within regular search results
3232
* results - standard search result

Diff for: examples/example_related.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
def scrape_to_csv(config, keywords):
88
scrap = serpscrap.SerpScrap()
99
scrap.init(config=config.get(), keywords=keywords)
10-
return scrap.as_csv('/tmp/planet-earth')
10+
return scrap.as_csv('/tmp/cryptocurrency')
1111

1212

1313
def get_related(config, keywords, related):
@@ -25,7 +25,7 @@ def get_related(config, keywords, related):
2525
config.set('scrape_urls', False)
2626
config.set('num_workers', 1)
2727

28-
keywords = ['planet earth']
28+
keywords = ['cryptocurrency']
2929

3030
related = keywords
3131
related = get_related(config, keywords, related)

Diff for: requirements.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
lxml
22
chardet==3.0.4
33
beautifulsoup4==4.6.0
4-
html2text==2016.9.19
4+
html2text==2017.10.4
55
PySocks==1.6.7
6-
sqlalchemy==1.1.13
7-
selenium==3.5.0
6+
sqlalchemy==1.1.15
7+
selenium==3.8.0
88
cssselect==1.0.1

Diff for: scrapcore/database.py

+6
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ def set_values_from_parser(self, parser):
111111
for link in value:
112112
parsed = urlparse(link['link'])
113113

114+
if link['snippet'] is not None:
115+
# try to remove inline css, which is in some results since 12/2017
116+
tmp_snipped = link['snippet'].split('}')
117+
if len(tmp_snipped) > 1:
118+
link['snippet'] = tmp_snipped[len(tmp_snipped)-1]
119+
114120
# fill with nones to prevent key errors
115121
[link.update({key: None}) for key in (
116122
'snippet',

Diff for: scrapcore/parser/google_parser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class GoogleParser(Parser):
104104
'container': '#center_col',
105105
'result_container': '.ads-ad',
106106
'link': 'h3 > a:nth-child(2)::attr(href)',
107-
'snippet': '.ads-creative::text',
107+
'snippet': 'div.ads-creative::text',
108108
'title': 'h3 > a:nth-child(2)::text',
109109
'visible_link': '.ads-visurl cite::text',
110110
'rating': 'div._Ond _Bu span::text',

Diff for: scrapcore/user_agent.py

+60-60
Original file line numberDiff line numberDiff line change
@@ -2,69 +2,69 @@
22
import random
33

44
user_agents_mobile = [
5-
'Mozilla/5.0 (iPhone, CPU iPhone OS 10_2_1 like Mac OS X) AppleWebKit/602.4.6 (KHTML, like Gecko) Version/10.0 Mobile/14D27 Safari/602.1',
6-
'Mozilla/5.0 (iPhone, CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
7-
'Mozilla/5.0 (iPhone, CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1',
8-
'Mozilla/5.0 (Linux, Android 7.0, SAMSUNG SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.0 Chrome/51.0.2704.106 Mobile Safari/537.36',
9-
'Mozilla/5.0 (Linux, Android 6.0.1, SM-G920F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36',
10-
'Mozilla/5.0 (Linux, Android 7.0, SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36',
11-
'Mozilla/5.0 (Linux, Android 7.0, SAMSUNG SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.0 Chrome/51.0.2704.106 Mobile Safari/537.36',
12-
'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G900F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36',
13-
'Mozilla/5.0 (iPhone, CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0 Mobile/14B100 Safari/602.1',
14-
'Mozilla/5.0 (iPhone, CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E277 Safari/602.1',
15-
'Mozilla/5.0 (Linux, Android 7.0, SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36',
16-
'Mozilla/5.0 (iPhone, CPU iPhone OS 10_2_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) GSA/24.1.151204851 Mobile/14D27 Safari/602.1',
17-
'Mozilla/5.0 (Linux, Android 6.0.1, SM-G900F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36',
18-
'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G920F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.0 Chrome/51.0.2704.106 Mobile Safari/537.36',
19-
'Mozilla/5.0 (Linux, Android 6.0, ALE-L21 Build/HuaweiALE-L21) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36',
20-
'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G920F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36',
21-
'Mozilla/5.0 (Linux, Android 6.0.1, SM-G925F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36',
22-
'Mozilla/5.0 (iPhone, CPU iPhone OS 10_0_2 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Mobile/14A456 Safari/602.1',
23-
'Mozilla/5.0 (iPhone, CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) GSA/24.1.151204851 Mobile/14E304 Safari/602.1',
24-
'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-A510F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36',
25-
'Mozilla/5.0 (Linux, Android 6.0.1, SM-G920F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Crosswalk/20.50.533.12 Mobile Safari/537.36',
26-
'Mozilla/5.0 (Linux, Android 7.0, SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Crosswalk/20.50.533.12 Mobile Safari/537.36',
27-
'Mozilla/5.0 (Linux, Android 6.0.1, SM-A510F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36',
28-
'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G800F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36',
29-
'Mozilla/5.0 (iPhone, CPU iPhone OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G36 Safari/601.1',
30-
'Mozilla/5.0 (Linux, Android 7.0, SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Crosswalk/20.50.533.12 Mobile Safari/537.36',
31-
'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G925F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.0 Chrome/51.0.2704.106 Mobile Safari/537.36',
32-
'Mozilla/5.0 (Linux, Android 6.0, HUAWEI VNS-L31 Build/HUAWEIVNS-L31) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.132 Mobile Safari/537.36',
33-
'Mozilla/5.0 (Android 6.0.1, Mobile, rv:52.0) Gecko/52.0 Firefox/52.0',
34-
'Mozilla/5.0 (Linux, Android 6.0.1, SAMSUNG SM-G903F Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36',
5+
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_2 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B202 Safari/604.1',
6+
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_3 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A432 Safari/604.1',
7+
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B150 Safari/604.1',
8+
'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.0 Mobile/14G60 Safari/602.1',
9+
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B93 Safari/604.1',
10+
'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
11+
'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36',
12+
'Mozilla/5.0 (Linux; Android 7.0; SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
13+
'Mozilla/5.0 (Linux; Android 7.0; SM-G950F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
14+
'Mozilla/5.0 (Linux; Android 7.0; SM-G920F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
15+
'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G935F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36',
16+
'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.0 Mobile/14F89 Safari/602.1',
17+
'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G950F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36',
18+
'Mozilla/5.0 (iPhone; CPU iPhone OS 10_2_1 like Mac OS X) AppleWebKit/602.4.6 (KHTML, like Gecko) Version/10.0 Mobile/14D27 Safari/602.1',
19+
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_2 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A421 Safari/604.1',
20+
'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G920F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36',
21+
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
22+
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
23+
'Mozilla/5.0 (Linux; Android 7.0; SM-G925F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
24+
'Mozilla/5.0 (Linux; Android 6.0; ALE-L21 Build/HuaweiALE-L21) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
25+
'Mozilla/5.0 (Linux; Android 6.0.1; SM-G900F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
26+
'Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G925F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36',
27+
'Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-G900F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/6.2 Chrome/56.0.2924.87 Mobile Safari/537.36',
28+
'Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1',
29+
'Mozilla/5.0 (Linux; Android 7.0; HUAWEI VNS-L31 Build/HUAWEIVNS-L31) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
30+
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_2 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Version/11.0 Mobile/15C114 Safari/604.1',
31+
'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.98 Mobile Safari/537.36',
32+
'Mozilla/5.0 (Android 7.0; Mobile; rv:57.0) Gecko/57.0 Firefox/57.0',
33+
'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
34+
'Mozilla/5.0 (Linux; Android 7.0; SM-A510F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
3535
]
3636

3737
user_agents_computer = [
38-
'Mozilla/5.0 (Windows NT 6.1, WOW64, rv:52.0) Gecko/20100101 Firefox/52.0',
39-
'Mozilla/5.0 (Windows NT 10.0, WOW64, rv:52.0) Gecko/20100101 Firefox/52.0',
40-
'Mozilla/5.0 (Windows NT 6.1, WOW64, Trident/7.0, rv:11.0) like Gecko',
41-
'Mozilla/5.0 (Windows NT 10.0, Win64, x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
42-
'Mozilla/5.0 (Windows NT 6.1, WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
43-
'Mozilla/5.0 (Windows NT 10.0, WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
44-
'Mozilla/5.0 (Windows NT 10.0, Win64, x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393',
45-
'Mozilla/5.0 (Windows NT 6.1, Win64, x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
46-
'Mozilla/5.0 (X11, Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.101 Safari/537.36',
47-
'Mozilla/5.0 (Windows NT 6.1, Trident/7.0, rv:11.0) like Gecko',
48-
'Mozilla/5.0 (Windows NT 6.3, WOW64, rv:52.0) Gecko/20100101 Firefox/52.0',
49-
'Mozilla/5.0 (Macintosh, Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30',
50-
'Mozilla/5.0 (Windows NT 6.1, rv:52.0) Gecko/20100101 Firefox/52.0',
51-
'Mozilla/5.0 (Windows NT 6.1, WOW64, rv:45.0) Gecko/20100101 Firefox/45.0',
52-
# 'Mozilla/5.0 (Windows NT 10.0, WOW64, Trident/7.0, rv:11.0) like Gecko',
53-
'Mozilla/5.0 (Windows NT 6.3, WOW64, Trident/7.0, rv:11.0) like Gecko',
54-
'Mozilla/5.0 (Macintosh, Intel Mac OS X 10_12_3) AppleWebKit/602.4.8 (KHTML, like Gecko) Version/10.0.3 Safari/602.4.8',
55-
'Mozilla/5.0 (Windows NT 10.0, Win64, x64, rv:52.0) Gecko/20100101 Firefox/52.0',
56-
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
57-
'Mozilla/5.0 (Windows NT 6.3, WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
58-
'Mozilla/5.0 (Windows NT 10.0, rv:52.0) Gecko/20100101 Firefox/52.0',
59-
'Mozilla/5.0 (Windows NT 6.1, Win64, x64, rv:52.0) Gecko/20100101 Firefox/52.0',
60-
'Mozilla/5.0 (Macintosh, Intel Mac OS X 10_11_6) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30',
61-
'Mozilla/5.0 (Windows NT 6.3, Win64, x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
62-
'Mozilla/5.0 (Windows NT 6.1, WOW64, rv:51.0) Gecko/20100101 Firefox/51.0',
63-
'Mozilla/5.0 (Windows NT 6.1, WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
64-
'Mozilla/5.0 (Windows NT 10.0, WOW64, rv:51.0) Gecko/20100101 Firefox/51.0',
65-
'Mozilla/5.0 (Windows NT 6.0, rv:52.0) Gecko/20100101 Firefox/52.0',
66-
'Mozilla/5.0 (Windows NT 6.1, Win64, x64, Trident/7.0, rv:11.0) like Gecko',
67-
'Mozilla/5.0 (Windows NT 5.1, rv:52.0) Gecko/20100101 Firefox/52.0',
38+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
39+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
40+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
41+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
42+
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
43+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
44+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
45+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
46+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063',
47+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
48+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5',
49+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5',
50+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
51+
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
52+
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
53+
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
54+
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
55+
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
56+
'Mozilla/5.0 (Windows NT 6.1; rv:57.0) Gecko/20100101 Firefox/57.0',
57+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
58+
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
59+
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
60+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393',
61+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36',
62+
'Mozilla/5.0 (Windows NT 6.1; rv:56.0) Gecko/20100101 Firefox/56.0',
63+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5',
64+
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0',
65+
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
66+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
67+
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0',
6868
]
6969

7070

Diff for: setup.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# -*- coding: utf-8 -*-
33
from setuptools import setup, find_packages
44

5-
version = '0.9.1'
5+
version = '0.9.2'
66

77

88
setup(
@@ -25,10 +25,10 @@
2525
'PySocks==1.6.7',
2626
'chardet==3.0.4',
2727
'beautifulsoup4==4.6.0',
28-
'html2text==2016.9.19',
28+
'html2text==2017.10.4',
2929
'lxml',
30-
'sqlalchemy==1.1.13',
31-
'selenium==3.5.0',
30+
'sqlalchemy==1.1.15',
31+
'selenium==3.8.0',
3232
'cssselect==1.0.1',
3333
],
3434
classifiers=[
@@ -40,5 +40,5 @@
4040
'Programming Language :: Python :: 3.5',
4141
'Programming Language :: Python :: 3.6',
4242
],
43-
keywords='serp-scraper url-scraper ad-detection',
43+
keywords='seo scraper ad-detection scraping keywords',
4444
)

0 commit comments

Comments
 (0)