Skip to content

Commit 5ffa535

Browse files
committed
- Pefactoring Site / PastX
- Python3 - PEP 8 formatting - use logging module
1 parent beae971 commit 5ffa535

13 files changed

+418
-387
lines changed

.gitignore

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1-
#python specific
1+
# project
2+
settings.py
3+
output.log
4+
5+
# python specific
26
*.pyc
37

4-
## generic files to ignore
8+
# generic files to ignore
59
*~
610
*.lock
711
*.DS_Store
812
*.swp
913
*.out
14+

Readme.md

+16-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
1-
![Dumpmon Logo](assets/logo-small.png?raw=true)
1+
Forked from: https://github.com/jordan-wright/dumpmon - original
2+
version is a twitter-bot, this version save everything in a redis
3+
database.
4+
25
# dumpmon
3-
## Twitter-bot which monitors paste sites for interesting content
6+
Monitors paste sites (pastebin, slexy, paste) for leaked content
7+
8+
# install
9+
## requirements:
10+
11+
$ pip install beautifulsoup4
12+
$ pip install requests
13+
$ pip install redis
14+
$ cp settings.py-example settings.py
15+
16+
edit settings.py file
417

5-
For more overview, check out the blog post [here.](http://raidersec.blogspot.com/2013/03/introducing-dumpmon-twitter-bot-that.html)
18+
$ python dumpmon.py
619

7-
## Full documentation in the works

dumpmon.py

+29-32
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# dumpmon.py
1+
# dumpmon.py
22
# Author: Jordan Wright
33
# Version: 0.0 (in dev)
44

@@ -14,40 +14,37 @@
1414
from lib.Pastie import Pastie, PastiePaste
1515
from lib.helper import log
1616
from time import sleep
17-
import twitter
18-
from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
1917
import threading
18+
import logging
19+
2020

2121
def monitor():
22-
'''
23-
monitor() - Main function... creates and starts threads
24-
25-
'''
26-
log('[*] Monitoring...')
27-
log('[*] Ctrl+C to quit')
28-
bot = twitter.Api(consumer_key=CONSUMER_KEY,
29-
consumer_secret=CONSUMER_SECRET,
30-
access_token_key=ACCESS_TOKEN,
31-
access_token_secret=ACCESS_TOKEN_SECRET)
32-
# Create lock for both output log and tweet action
33-
log_lock = threading.Lock()
34-
tweet_lock = threading.Lock()
35-
36-
pastebin_thread = threading.Thread(target=Pastebin().monitor, args=[bot,log_lock, tweet_lock])
37-
slexy_thread = threading.Thread(target=Slexy().monitor, args=[bot,log_lock, tweet_lock])
38-
pastie_thead = threading.Thread(target=Pastie().monitor, args=[bot,log_lock, tweet_lock])
39-
40-
for thread in (pastebin_thread, slexy_thread, pastie_thead):
41-
thread.daemon = True
42-
thread.start()
43-
44-
# Let threads run
45-
try:
46-
while(1):
47-
sleep(5)
48-
except KeyboardInterrupt:
49-
log('Stopped.')
22+
import argparse
23+
parser = argparse.ArgumentParser()
24+
parser.add_argument(
25+
"-v", "--verbose", help="more verbose", action="store_true")
26+
args = parser.parse_args()
27+
level = logging.INFO
28+
if args.verbose:
29+
level = logging.DEBUG
30+
logging.basicConfig(
31+
format='%(asctime)s [%(levelname)s] %(message)s', level=level)
32+
logging.info('Monitoring...')
33+
34+
pastebin_thread = threading.Thread(target=Pastebin().monitor)
35+
slexy_thread = threading.Thread(target=Slexy().monitor)
36+
pastie_thead = threading.Thread(target=Pastie().monitor)
37+
38+
for thread in (pastebin_thread, slexy_thread, pastie_thead):
39+
thread.daemon = True
40+
thread.start()
41+
42+
try:
43+
while(1):
44+
sleep(5)
45+
except KeyboardInterrupt:
46+
logging.warn('Stopped.')
5047

5148

5249
if __name__ == "__main__":
53-
monitor()
50+
monitor()

lib/Paste.py

+49-53
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,56 @@
1-
from regexes import regexes
1+
from .regexes import regexes
22
import settings
3-
4-
def log(text):
5-
'''
6-
log(text): Logs message to both STDOUT and to .output_log file
7-
8-
'''
9-
if text:
10-
print text.encode('utf-8')
11-
with open(settings.log_file, 'a') as logfile:
12-
logfile.write(text.encode('utf-8') + '\n')
3+
import logging
134

145
class Paste(object):
15-
def __init__(self):
16-
'''
17-
class Paste: Generic "Paste" object to contain attributes of a standard paste
6+
def __init__(self):
7+
'''
8+
class Paste: Generic "Paste" object to contain attributes of a standard paste
189
19-
'''
20-
self.emails = 0
21-
self.hashes = 0
22-
self.num_emails = 0
23-
self.num_hashes = 0
24-
self.text = None
25-
self.type = None
26-
self.db_keywords = 0.0
10+
'''
11+
self.emails = 0
12+
self.hashes = 0
13+
self.num_emails = 0
14+
self.num_hashes = 0
15+
self.text = None
16+
self.type = None
17+
self.db_keywords = 0.0
2718

28-
def match(self):
29-
'''
30-
Matches the paste against a series of regular expressions to determine if the paste is 'interesting'
19+
def match(self):
20+
'''
21+
Matches the paste against a series of regular expressions to determine if the paste is 'interesting'
3122
32-
Sets the following attributes:
33-
self.emails
34-
self.hashes
35-
self.num_emails
36-
self.num_hashes
37-
self.db_keywords
38-
self.type
23+
Sets the following attributes:
24+
self.emails
25+
self.hashes
26+
self.num_emails
27+
self.num_hashes
28+
self.db_keywords
29+
self.type
3930
40-
'''
41-
# Get the amount of emails
42-
self.emails = list(set(regexes['email'].findall(self.text)))
43-
self.hashes = regexes['hash32'].findall(self.text)
44-
self.num_emails = len(self.emails)
45-
self.num_hashes = len(self.hashes)
46-
for regex in regexes['db_keywords']:
47-
if regex.search(self.text):
48-
log('\t[+] ' + regex.search(self.text).group(1))
49-
self.db_keywords += round(1/float(len(regexes['db_keywords'])), 2)
50-
for regex in regexes['blacklist']:
51-
if regex.search(self.text):
52-
log('\t[-] ' + regex.search(self.text).group(1))
53-
self.db_keywords -= round(1.25 * (1/float(len(regexes['db_keywords']))), 2)
54-
if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):
55-
self.type = 'db_dump'
56-
if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text): self.type = 'Cisco'
57-
if regexes['honeypot'].search(self.text): self.type = 'honeypot'
58-
if regexes['google_api'].search(self.text): self.type = 'google_api'
59-
#if regexes['juniper'].search(self.text): self.type = 'Juniper'
60-
return self.type
31+
'''
32+
# Get the amount of emails
33+
self.emails = list(set(regexes['email'].findall(self.text)))
34+
self.hashes = regexes['hash32'].findall(self.text)
35+
self.num_emails = len(self.emails)
36+
self.num_hashes = len(self.hashes)
37+
for regex in regexes['db_keywords']:
38+
if regex.search(self.text):
39+
logging.debug('\t[+] ' + regex.search(self.text).group(1))
40+
self.db_keywords += round(1/float(
41+
len(regexes['db_keywords'])), 2)
42+
for regex in regexes['blacklist']:
43+
if regex.search(self.text):
44+
logging.debug('\t[-] ' + regex.search(self.text).group(1))
45+
self.db_keywords -= round(1.25 * (
46+
1/float(len(regexes['db_keywords']))), 2)
47+
if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):
48+
self.type = 'db_dump'
49+
if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text):
50+
self.type = 'Cisco'
51+
if regexes['honeypot'].search(self.text):
52+
self.type = 'honeypot'
53+
if regexes['google_api'].search(self.text):
54+
self.type = 'google_api'
55+
# if regexes['juniper'].search(self.text): self.type = 'Juniper'
56+
return self.type

lib/Pastebin.py

+39-51
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,46 @@
1-
from Site import Site
2-
from Paste import Paste
1+
from .Site import Site
2+
from .Paste import Paste
33
from bs4 import BeautifulSoup
4-
import helper
4+
from . import helper
55
from time import sleep
66
from settings import SLEEP_PASTEBIN
7+
import logging
8+
79

810
class PastebinPaste(Paste):
9-
def __init__(self, id):
10-
self.id = id
11-
self.headers = None
12-
self.url = 'http://pastebin.com/raw.php?i=' + self.id
13-
super(PastebinPaste, self).__init__()
11+
def __init__(self, id):
12+
self.id = id
13+
self.headers = None
14+
self.url = 'http://pastebin.com/raw.php?i=' + self.id
15+
super(PastebinPaste, self).__init__()
16+
1417

1518
class Pastebin(Site):
16-
def __init__(self, last_id=None):
17-
if not last_id: last_id = None
18-
self.ref_id = last_id
19-
self.BASE_URL = 'http://pastebin.com'
20-
super(Pastebin, self).__init__()
21-
def update(self):
22-
'''update(self) - Fill Queue with new Pastebin IDs'''
23-
print '[*] Retrieving Pastebin ID\'s'
24-
results = BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all(lambda tag: tag.name=='td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])
25-
new_pastes = []
26-
if not self.ref_id: results = results[:60]
27-
for entry in results:
28-
paste = PastebinPaste(entry.a['href'][1:])
29-
# Check to see if we found our last checked URL
30-
if paste.id == self.ref_id:
31-
break
32-
new_pastes.append(paste)
33-
for entry in new_pastes[::-1]:
34-
print '[+] Adding URL: ' + entry.url
35-
self.put(entry)
36-
def monitor(self, bot, l_lock, t_lock):
37-
self.update()
38-
while(1):
39-
while not self.empty():
40-
paste = self.get()
41-
self.ref_id = paste.id
42-
with l_lock:
43-
helper.log('[*] Checking ' + paste.url)
44-
paste.text = helper.download(paste.url)
45-
with l_lock:
46-
tweet = helper.build_tweet(paste)
47-
if tweet:
48-
print tweet
49-
with t_lock:
50-
helper.record(tweet)
51-
bot.PostUpdate(tweet)
52-
self.update()
53-
# If no new results... sleep for 5 sec
54-
while self.empty():
55-
with l_lock:
56-
helper.log('[*] No results... sleeping')
57-
sleep(SLEEP_PASTEBIN)
58-
self.update()
19+
def __init__(self, last_id=None):
20+
if not last_id:
21+
last_id = None
22+
self.ref_id = last_id
23+
self.BASE_URL = 'http://pastebin.com'
24+
self.sleep = SLEEP_PASTEBIN
25+
super(Pastebin, self).__init__()
26+
27+
def update(self):
28+
'''update(self) - Fill Queue with new Pastebin IDs'''
29+
logging.info('Retrieving Pastebin ID\'s')
30+
results = BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all(
31+
lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])
32+
new_pastes = []
33+
if not self.ref_id:
34+
results = results[:60]
35+
for entry in results:
36+
paste = PastebinPaste(entry.a['href'][1:])
37+
# Check to see if we found our last checked URL
38+
if paste.id == self.ref_id:
39+
break
40+
new_pastes.append(paste)
41+
for entry in new_pastes[::-1]:
42+
logging.debug('Adding URL: ' + entry.url)
43+
self.put(entry)
44+
45+
def get_paste_text(self, paste):
46+
return helper.download(paste.url)

0 commit comments

Comments
 (0)