Skip to content

Commit 97a6b12

Browse files
committed
Move spamhaus DROP/EDROP exclusion to crawl.py
1 parent 74c2a16 commit 97a6b12

File tree

4 files changed

+35
-59
lines changed

4 files changed

+35
-59
lines changed

crawl.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ def list_excluded_networks(txt, networks=None):
410410
networks = set()
411411
lines = txt.strip().split("\n")
412412
for line in lines:
413-
line = line.split('#')[0].strip()
413+
line = line.split('#')[0].split(';')[0].strip()
414414
try:
415415
network = ip_network(unicode(line))
416416
except ValueError:
@@ -422,11 +422,16 @@ def list_excluded_networks(txt, networks=None):
422422

423423
def update_excluded_networks():
424424
"""
425-
Adds bogons into the excluded IPv4 and IPv6 networks.
425+
Updates excluded networks with current bogons.
426426
"""
427+
CONF['exclude_ipv4_networks'] = CONF['default_exclude_ipv4_networks']
428+
CONF['exclude_ipv6_networks'] = CONF['default_exclude_ipv6_networks']
429+
427430
if CONF['exclude_ipv4_bogons']:
428431
urls = [
429432
"http://www.team-cymru.org/Services/Bogons/fullbogons-ipv4.txt",
433+
"http://www.spamhaus.org/drop/drop.txt",
434+
"https://www.spamhaus.org/drop/edrop.txt",
430435
]
431436
for url in urls:
432437
try:
@@ -498,11 +503,14 @@ def init_conf(argv):
498503
if exclude_asns:
499504
CONF['exclude_asns'] = set(exclude_asns.split("\n"))
500505

501-
CONF['exclude_ipv4_networks'] = list_excluded_networks(
506+
CONF['default_exclude_ipv4_networks'] = list_excluded_networks(
502507
conf.get('crawl', 'exclude_ipv4_networks'))
503-
CONF['exclude_ipv6_networks'] = list_excluded_networks(
508+
CONF['default_exclude_ipv6_networks'] = list_excluded_networks(
504509
conf.get('crawl', 'exclude_ipv6_networks'))
505510

511+
CONF['exclude_ipv4_networks'] = CONF['default_exclude_ipv4_networks']
512+
CONF['exclude_ipv6_networks'] = CONF['default_exclude_ipv6_networks']
513+
506514
CONF['exclude_ipv4_bogons'] = conf.getboolean('crawl',
507515
'exclude_ipv4_bogons')
508516
CONF['exclude_ipv6_bogons'] = conf.getboolean('crawl',
@@ -561,8 +569,8 @@ def main(argv):
561569
redis_pipe.delete(key)
562570
redis_pipe.delete('pending')
563571
redis_pipe.execute()
564-
set_pending()
565572
update_excluded_networks()
573+
set_pending()
566574
REDIS_CONN.set('crawl:master:state', "running")
567575

568576
# Spawn workers (greenlets) including one worker reserved for cron tasks

seeder.py

+1-54
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@
3434
import operator
3535
import os
3636
import random
37-
import requests
3837
import sys
3938
import time
4039
from collections import defaultdict
4140
from ConfigParser import ConfigParser
42-
from ipaddress import ip_address, ip_network
4341

4442
from utils import new_redis_conn
4543

@@ -58,16 +56,12 @@ def __init__(self):
5856
self.nodes = []
5957
self.addresses = defaultdict(list)
6058
self.now = 0
61-
self.blocklist = set()
62-
self.blocklist_timestamp = 0
6359

6460
def export_nodes(self, dump):
6561
"""
6662
Exports nodes to generate A and AAAA records from the latest snapshot.
6763
"""
6864
self.now = int(time.time())
69-
if self.now - self.blocklist_timestamp > 3600:
70-
self.update_blocklist()
7165
if dump != self.dump:
7266
try:
7367
self.nodes = json.loads(open(dump, "r").read(),
@@ -157,7 +151,6 @@ def filter_nodes(self):
157151
2) Uptime must be equal or greater than the configured min. age
158152
3) Max. one node per ASN
159153
4) Uses default port
160-
5) Not listed in blocklist
161154
"""
162155
consensus_height = self.get_consensus_height()
163156
min_age = self.get_min_age()
@@ -169,10 +162,7 @@ def filter_nodes(self):
169162
services = node[5]
170163
height = node[6]
171164
asn = node[13]
172-
if (port != CONF['port'] or
173-
asn is None or
174-
age < min_age or
175-
self.is_blocked(address)):
165+
if port != CONF['port'] or asn is None or age < min_age:
176166
continue
177167
if consensus_height and abs(consensus_height - height) > 2:
178168
continue
@@ -205,49 +195,6 @@ def get_min_age(self):
205195
logging.info("Min. age: %d", min_age)
206196
return min_age
207197

208-
def is_blocked(self, address):
209-
"""
210-
Returns True if address is found in blocklist, False if otherwise.
211-
"""
212-
if address.endswith(".onion") or ":" in address:
213-
return False
214-
for network in self.blocklist:
215-
if ip_address(address) in network:
216-
logging.debug("Blocked: %s", address)
217-
return True
218-
return False
219-
220-
def update_blocklist(self):
221-
"""
222-
Fetches the latest DROP (don't route or peer) list from Spamhaus:
223-
http://www.spamhaus.org/faq/section/DROP%20FAQ
224-
"""
225-
urls = [
226-
"http://www.spamhaus.org/drop/drop.txt",
227-
"http://www.spamhaus.org/drop/edrop.txt",
228-
]
229-
self.blocklist.clear()
230-
for url in urls:
231-
try:
232-
response = requests.get(url, timeout=15)
233-
except requests.exceptions.RequestException as err:
234-
logging.warning(err)
235-
continue
236-
if response.status_code == 200:
237-
for line in response.content.strip().split("\n"):
238-
if line.startswith(";"):
239-
continue
240-
network = line.split(";")[0].strip()
241-
try:
242-
self.blocklist.add(ip_network(unicode(network)))
243-
except ValueError:
244-
continue
245-
else:
246-
logging.warning("HTTP%d: %s (%s)",
247-
response.status_code, url, response.content)
248-
logging.debug("Blocklist entries: %d", len(self.blocklist))
249-
self.blocklist_timestamp = self.now
250-
251198

252199
def cron():
253200
"""

tests/crawl.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../crawl.py
+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
import os
4+
5+
from crawl import CONF, init_conf, update_excluded_networks
6+
7+
8+
def test_update_excluded_networks():
9+
filepath = os.path.realpath(__file__)
10+
confpath = os.path.join(
11+
os.path.dirname(filepath), '..', 'conf', 'crawl.conf.default')
12+
init_conf([filepath, confpath, 'master'])
13+
14+
assert len(CONF['default_exclude_ipv4_networks']) == 22
15+
assert len(CONF['default_exclude_ipv6_networks']) == 0
16+
17+
update_excluded_networks()
18+
19+
assert len(CONF['exclude_ipv4_networks']) > 0
20+
assert len(CONF['exclude_ipv6_networks']) == 0

0 commit comments

Comments
 (0)