Skip to content

Commit c426e65

Browse files
committed
back to original version
1 parent 04123a1 commit c426e65

8 files changed

+157
-99
lines changed

Readme.md

+10-13
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
1-
Forked from: https://github.com/jordan-wright/dumpmon - original
2-
version is a twitter-bot, this version save everything in a redis
3-
database.
4-
1+
![Dumpmon Logo](assets/logo-small.png?raw=true)
52
# dumpmon
6-
Monitors paste sites (pastebin, slexy, paste) for leaked content
3+
## Twitter-bot which monitors paste sites for interesting content
74

8-
# install
9-
## requirements:
5+
For more overview, check out the blog post [here.](http://raidersec.blogspot.com/2013/03/introducing-dumpmon-twitter-bot-that.html)
106

11-
$ pip install beautifulsoup4
12-
$ pip install requests
13-
$ pip install redis
14-
$ cp settings.py-example settings.py
7+
## Dependencies
8+
[python-twitter](https://code.google.com/p/python-twitter/)
9+
$ pip install beautifulsoup4
10+
$ pip install requests
1511

16-
edit settings.py file
12+
Next, edit the settings.py to include your Twitter application settings.
1713

18-
$ python dumpmon.py
14+
## Executing dumpmon
1915

16+
python dumpmon.py

dumpmon.py

+24-17
Original file line numberDiff line numberDiff line change
@@ -14,36 +14,43 @@
1414
from lib.Pastie import Pastie, PastiePaste
1515
from lib.helper import log
1616
from time import sleep
17+
import twitter
18+
from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
1719
import threading
18-
import logging
1920

2021

2122
def monitor():
22-
import argparse
23-
parser = argparse.ArgumentParser()
24-
parser.add_argument(
25-
"-v", "--verbose", help="more verbose", action="store_true")
26-
args = parser.parse_args()
27-
level = logging.INFO
28-
if args.verbose:
29-
level = logging.DEBUG
30-
logging.basicConfig(
31-
format='%(asctime)s [%(levelname)s] %(message)s', level=level)
32-
logging.info('Monitoring...')
33-
34-
pastebin_thread = threading.Thread(target=Pastebin().monitor)
35-
slexy_thread = threading.Thread(target=Slexy().monitor)
36-
pastie_thead = threading.Thread(target=Pastie().monitor)
23+
'''
24+
monitor() - Main function... creates and starts threads
25+
26+
'''
27+
log('[*] Monitoring...')
28+
log('[*] Ctrl+C to quit')
29+
bot = twitter.Api(consumer_key=CONSUMER_KEY,
30+
consumer_secret=CONSUMER_SECRET,
31+
access_token_key=ACCESS_TOKEN,
32+
access_token_secret=ACCESS_TOKEN_SECRET)
33+
# Create lock for both output log and tweet action
34+
log_lock = threading.Lock()
35+
tweet_lock = threading.Lock()
36+
37+
pastebin_thread = threading.Thread(
38+
target=Pastebin().monitor, args=[bot, log_lock, tweet_lock])
39+
slexy_thread = threading.Thread(
40+
target=Slexy().monitor, args=[bot, log_lock, tweet_lock])
41+
pastie_thead = threading.Thread(
42+
target=Pastie().monitor, args=[bot, log_lock, tweet_lock])
3743

3844
for thread in (pastebin_thread, slexy_thread, pastie_thead):
3945
thread.daemon = True
4046
thread.start()
4147

48+
# Let threads run
4249
try:
4350
while(1):
4451
sleep(5)
4552
except KeyboardInterrupt:
46-
logging.warn('Stopped.')
53+
log('Stopped.')
4754

4855

4956
if __name__ == "__main__":

lib/Paste.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
1-
from .regexes import regexes
1+
from regexes import regexes
22
import settings
3-
import logging
3+
4+
5+
def log(text):
6+
'''
7+
log(text): Logs message to both STDOUT and to .output_log file
8+
9+
'''
10+
if text:
11+
print text.encode('utf-8')
12+
with open(settings.log_file, 'a') as logfile:
13+
logfile.write(text.encode('utf-8') + '\n')
14+
415

516
class Paste(object):
617
def __init__(self):
@@ -36,12 +47,12 @@ def match(self):
3647
self.num_hashes = len(self.hashes)
3748
for regex in regexes['db_keywords']:
3849
if regex.search(self.text):
39-
logging.debug('\t[+] ' + regex.search(self.text).group(1))
50+
log('\t[+] ' + regex.search(self.text).group(1))
4051
self.db_keywords += round(1/float(
4152
len(regexes['db_keywords'])), 2)
4253
for regex in regexes['blacklist']:
4354
if regex.search(self.text):
44-
logging.debug('\t[-] ' + regex.search(self.text).group(1))
55+
log('\t[-] ' + regex.search(self.text).group(1))
4556
self.db_keywords -= round(1.25 * (
4657
1/float(len(regexes['db_keywords']))), 2)
4758
if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):

lib/Pastebin.py

+32-9
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from .Site import Site
2-
from .Paste import Paste
1+
from Site import Site
2+
from Paste import Paste
33
from bs4 import BeautifulSoup
4-
from . import helper
4+
import helper
55
from time import sleep
66
from settings import SLEEP_PASTEBIN
7-
import logging
7+
from twitter import TwitterError
88

99

1010
class PastebinPaste(Paste):
@@ -21,12 +21,11 @@ def __init__(self, last_id=None):
2121
last_id = None
2222
self.ref_id = last_id
2323
self.BASE_URL = 'http://pastebin.com'
24-
self.sleep = SLEEP_PASTEBIN
2524
super(Pastebin, self).__init__()
2625

2726
def update(self):
2827
'''update(self) - Fill Queue with new Pastebin IDs'''
29-
logging.info('Retrieving Pastebin ID\'s')
28+
print '[*] Retrieving Pastebin ID\'s'
3029
results = BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all(
3130
lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])
3231
new_pastes = []
@@ -39,8 +38,32 @@ def update(self):
3938
break
4039
new_pastes.append(paste)
4140
for entry in new_pastes[::-1]:
42-
logging.debug('Adding URL: ' + entry.url)
41+
print '[+] Adding URL: ' + entry.url
4342
self.put(entry)
4443

45-
def get_paste_text(self, paste):
46-
return helper.download(paste.url)
44+
def monitor(self, bot, l_lock, t_lock):
45+
self.update()
46+
while(1):
47+
while not self.empty():
48+
paste = self.get()
49+
self.ref_id = paste.id
50+
with l_lock:
51+
helper.log('[*] Checking ' + paste.url)
52+
paste.text = helper.download(paste.url)
53+
with l_lock:
54+
tweet = helper.build_tweet(paste)
55+
if tweet:
56+
print tweet
57+
with t_lock:
58+
helper.record(tweet)
59+
try:
60+
bot.PostUpdate(tweet)
61+
except TwitterError:
62+
pass
63+
self.update()
64+
# If no new results... sleep for 5 sec
65+
while self.empty():
66+
with l_lock:
67+
helper.log('[*] No results... sleeping')
68+
sleep(SLEEP_PASTEBIN)
69+
self.update()

lib/Pastie.py

+35-9
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
from .Site import Site
2-
from .Paste import Paste
1+
from Site import Site
2+
from Paste import Paste
33
from bs4 import BeautifulSoup
4-
from . import helper
4+
import helper
5+
from time import sleep
56
from settings import SLEEP_PASTIE
6-
import logging
7+
from twitter import TwitterError
78

89

910
class PastiePaste(Paste):
@@ -20,12 +21,11 @@ def __init__(self, last_id=None):
2021
last_id = None
2122
self.ref_id = last_id
2223
self.BASE_URL = 'http://pastie.org'
23-
self.sleep = SLEEP_PASTIE
2424
super(Pastie, self).__init__()
2525

2626
def update(self):
2727
'''update(self) - Fill Queue with new Pastie IDs'''
28-
logging.info('Retrieving Pastie ID\'s')
28+
print '[*] Retrieving Pastie ID\'s'
2929
results = [tag for tag in BeautifulSoup(helper.download(
3030
self.BASE_URL + '/pastes')).find_all('p', 'link') if tag.a]
3131
new_pastes = []
@@ -39,8 +39,34 @@ def update(self):
3939
break
4040
new_pastes.append(paste)
4141
for entry in new_pastes[::-1]:
42-
logging.debug('Adding URL: ' + entry.url)
42+
print '[+] Adding URL: ' + entry.url
4343
self.put(entry)
4444

45-
def get_paste_text(self, paste):
46-
return BeautifulSoup(helper.download(paste.url)).pre.text
45+
def monitor(self, bot, l_lock, t_lock):
46+
self.update()
47+
while(1):
48+
while not self.empty():
49+
paste = self.get()
50+
self.ref_id = paste.id
51+
with l_lock:
52+
helper.log('[*] Checking ' + paste.url)
53+
# goober pastie - Not actually showing *raw* text.. Still need
54+
# to parse it out
55+
paste.text = BeautifulSoup(helper.download(paste.url)).pre.text
56+
with l_lock:
57+
tweet = helper.build_tweet(paste)
58+
if tweet:
59+
print tweet
60+
with t_lock:
61+
helper.record(tweet)
62+
try:
63+
bot.PostUpdate(tweet)
64+
except TwitterError:
65+
pass
66+
self.update()
67+
# If no new results... sleep for 5 sec
68+
while self.empty():
69+
with l_lock:
70+
helper.log('[*] No results... sleeping')
71+
sleep(SLEEP_PASTIE)
72+
self.update()

lib/Site.py

+3-32
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,12 @@
1-
try:
2-
from queue import Queue # python3
3-
except ImportError:
4-
from Queue import Queue # python2
1+
from Queue import Queue
52
import requests
63
import time
74
from requests import ConnectionError
8-
import logging
9-
import redis
10-
import json
115

126

137
class Site(object):
148
'''
15-
Site - parent class used for a generic
16-
'Queue' structure with a few helper methods
17-
and features. Implements the following methods:
18-
9+
Site - parent class used for a generic 'Queue' structure with a few helper methods and features. Impelements the following methods:
1910
empty() - Is the Queue empty
2011
get(): Get the next item in the queue
2112
put(item): Puts an item in the queue
@@ -30,8 +21,6 @@ class Site(object):
3021
# I would have used the built-in queue, but there is no support for a peek() method
3122
# that I could find... So, I decided to implement my own queue with a few
3223
# changes
33-
redisc = redis.StrictRedis(host='localhost', port=6379, db=0)
34-
3524
def __init__(self, queue=None):
3625
if queue is None:
3726
self.queue = []
@@ -63,22 +52,4 @@ def clear(self):
6352
self.queue = []
6453

6554
def list(self):
66-
print('\n'.join(url for url in self.queue))
67-
68-
def monitor(self):
69-
self.update()
70-
while(1):
71-
while not self.empty():
72-
paste = self.get()
73-
self.ref_id = paste.id
74-
logging.debug('Checking ' + paste.url)
75-
paste.text = self.get_paste_text(paste)
76-
if paste.match():
77-
logging.info('Found interesting stuff')
78-
self.redisc.set(paste.url, paste.text)
79-
self.update()
80-
while self.empty():
81-
logging.debug('No results... sleeping')
82-
time.sleep(self.sleep)
83-
self.update()
84-
55+
print '\n'.join(url for url in self.queue)

lib/Slexy.py

+32-9
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from .Site import Site
2-
from .Paste import Paste
1+
from Site import Site
2+
from Paste import Paste
33
from bs4 import BeautifulSoup
4-
from . import helper
4+
import helper
55
from time import sleep
66
from settings import SLEEP_SLEXY
7-
import logging
7+
from twitter import TwitterError
88

99

1010
class SlexyPaste(Paste):
@@ -21,12 +21,11 @@ def __init__(self, last_id=None):
2121
last_id = None
2222
self.ref_id = last_id
2323
self.BASE_URL = 'http://slexy.org'
24-
self.sleep = SLEEP_SLEXY
2524
super(Slexy, self).__init__()
2625

2726
def update(self):
2827
'''update(self) - Fill Queue with new Slexy IDs'''
29-
logging.info('Retrieving Slexy ID\'s')
28+
print '[*] Retrieving Slexy ID\'s'
3029
results = BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all(
3130
lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href'])
3231
new_pastes = []
@@ -39,8 +38,32 @@ def update(self):
3938
break
4039
new_pastes.append(paste)
4140
for entry in new_pastes[::-1]:
42-
logging.debug('Adding URL: ' + entry.url)
41+
print '[+] Adding URL: ' + entry.url
4342
self.put(entry)
4443

45-
def get_paste_text(self, paste):
46-
return helper.download(paste.url)
44+
def monitor(self, bot, l_lock, t_lock):
45+
self.update()
46+
while(1):
47+
while not self.empty():
48+
paste = self.get()
49+
self.ref_id = paste.id
50+
with l_lock:
51+
helper.log('[*] Checking ' + paste.url)
52+
paste.text = helper.download(paste.url)
53+
with l_lock:
54+
tweet = helper.build_tweet(paste)
55+
if tweet:
56+
print tweet
57+
with t_lock:
58+
helper.record(tweet)
59+
try:
60+
bot.PostUpdate(tweet)
61+
except TwitterError:
62+
pass
63+
self.update()
64+
# If no new results... sleep for 5 sec
65+
while self.empty():
66+
with l_lock:
67+
helper.log('[*] No results... sleeping')
68+
sleep(SLEEP_SLEXY)
69+
self.update()

0 commit comments

Comments
 (0)