jordan-wright
diff --git a/‎.gitignore
+7-2 b/‎.gitignore
+7-2
diff --git a/‎Readme.md
+16-4 b/‎Readme.md
+16-4
diff --git a/‎dumpmon.py
+29-32 b/‎dumpmon.py
+29-32
diff --git a/‎lib/Paste.py
+49-53 b/‎lib/Paste.py
+49-53
diff --git a/‎lib/Pastebin.py
+39-51 b/‎lib/Pastebin.py
+39-51
@@ -1,9 +1,14 @@
-#python specific
+# project
+settings.py
+output.log
+
+# python specific
 *.pyc
 
-## generic files to ignore
+# generic files to ignore
 *~
 *.lock
 *.DS_Store
 *.swp
 *.out
+
@@ -1,7 +1,19 @@
-![Dumpmon Logo](assets/logo-small.png?raw=true)
+Forked from: https://github.com/jordan-wright/dumpmon - original
+version is a twitter-bot, this version save everything in a redis
+database.
+
 # dumpmon
-## Twitter-bot which monitors paste sites for interesting content
+Monitors paste sites (pastebin, slexy, paste) for leaked content
+
+# install
+## requirements:
+
+    $ pip install beautifulsoup4
+    $ pip install requests
+    $ pip install redis
+    $ cp settings.py-example settings.py
+
+edit settings.py file
 
-For more overview, check out the blog post [here.](http://raidersec.blogspot.com/2013/03/introducing-dumpmon-twitter-bot-that.html)
+    $ python dumpmon.py
 
-## Full documentation in the works
 
@@ -1,4 +1,4 @@
-# dumpmon.py 
+# dumpmon.py
 # Author: Jordan Wright
 # Version: 0.0 (in dev)
 
@@ -14,40 +14,37 @@
 from lib.Pastie import Pastie, PastiePaste
 from lib.helper import log
 from time import sleep
-import twitter
-from settings import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
 import threading
+import logging
+
 
 def monitor():
-	'''
-	monitor() - Main function... creates and starts threads
-
-	'''
-	log('[*] Monitoring...')
-	log('[*] Ctrl+C to quit')
-	bot = twitter.Api(consumer_key=CONSUMER_KEY,
-                      consumer_secret=CONSUMER_SECRET,
-                      access_token_key=ACCESS_TOKEN,
-                      access_token_secret=ACCESS_TOKEN_SECRET)
-	# Create lock for both output log and tweet action
-	log_lock = threading.Lock()
-	tweet_lock = threading.Lock()
-
-	pastebin_thread = threading.Thread(target=Pastebin().monitor, args=[bot,log_lock, tweet_lock])
-	slexy_thread = threading.Thread(target=Slexy().monitor, args=[bot,log_lock, tweet_lock])
-	pastie_thead = threading.Thread(target=Pastie().monitor, args=[bot,log_lock, tweet_lock])
-
-	for thread in (pastebin_thread, slexy_thread, pastie_thead):
-		thread.daemon = True
-		thread.start()
-
-	# Let threads run
-	try:
-		while(1):
-			sleep(5)
-	except KeyboardInterrupt:
-		log('Stopped.')
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v", "--verbose", help="more verbose", action="store_true")
+    args = parser.parse_args()
+    level = logging.INFO
+    if args.verbose:
+        level = logging.DEBUG
+    logging.basicConfig(
+        format='%(asctime)s [%(levelname)s] %(message)s', level=level)
+    logging.info('Monitoring...')
+
+    pastebin_thread = threading.Thread(target=Pastebin().monitor)
+    slexy_thread = threading.Thread(target=Slexy().monitor)
+    pastie_thead = threading.Thread(target=Pastie().monitor)
+
+    for thread in (pastebin_thread, slexy_thread, pastie_thead):
+        thread.daemon = True
+        thread.start()
+
+    try:
+        while(1):
+            sleep(5)
+    except KeyboardInterrupt:
+        logging.warn('Stopped.')
 
 
 if __name__ == "__main__":
-	monitor()
+    monitor()
@@ -1,60 +1,56 @@
-from regexes import regexes
+from .regexes import regexes
 import settings
-
-def log(text):
-	'''
-	log(text): Logs message to both STDOUT and to .output_log file
-
-	'''
-	if text:
-		print text.encode('utf-8')
-		with open(settings.log_file, 'a') as logfile:
-			logfile.write(text.encode('utf-8') + '\n')
+import logging
 
 class Paste(object):
-	def __init__(self):
-		'''
-		class Paste: Generic "Paste" object to contain attributes of a standard paste
+    def __init__(self):
+        '''
+        class Paste: Generic "Paste" object to contain attributes of a standard paste
 
-		'''
-		self.emails = 0
-		self.hashes = 0
-		self.num_emails = 0
-		self.num_hashes = 0
-		self.text = None
-		self.type = None
-		self.db_keywords = 0.0
+        '''
+        self.emails = 0
+        self.hashes = 0
+        self.num_emails = 0
+        self.num_hashes = 0
+        self.text = None
+        self.type = None
+        self.db_keywords = 0.0
 
-	def match(self):
-		'''
-		Matches the paste against a series of regular expressions to determine if the paste is 'interesting'
+    def match(self):
+        '''
+        Matches the paste against a series of regular expressions to determine if the paste is 'interesting'
 
-		Sets the following attributes:
-			self.emails
-			self.hashes
-			self.num_emails
-			self.num_hashes
-			self.db_keywords
-			self.type
+        Sets the following attributes:
+                self.emails
+                self.hashes
+                self.num_emails
+                self.num_hashes
+                self.db_keywords
+                self.type
 
-		'''
-		# Get the amount of emails
-		self.emails = list(set(regexes['email'].findall(self.text)))
-		self.hashes = regexes['hash32'].findall(self.text)
-		self.num_emails = len(self.emails)
-		self.num_hashes = len(self.hashes)
-		for regex in regexes['db_keywords']:
-			if regex.search(self.text):
-				log('\t[+] ' + regex.search(self.text).group(1))
-				self.db_keywords += round(1/float(len(regexes['db_keywords'])), 2)
-		for regex in regexes['blacklist']:
-			if regex.search(self.text):
-				log('\t[-] ' + regex.search(self.text).group(1))
-				self.db_keywords -= round(1.25 * (1/float(len(regexes['db_keywords']))), 2)
-		if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):
-			self.type = 'db_dump'
-		if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text): self.type = 'Cisco'
-		if regexes['honeypot'].search(self.text): self.type = 'honeypot'
-		if regexes['google_api'].search(self.text): self.type = 'google_api'
-		#if regexes['juniper'].search(self.text): self.type = 'Juniper'
-		return self.type
+        '''
+        # Get the amount of emails
+        self.emails = list(set(regexes['email'].findall(self.text)))
+        self.hashes = regexes['hash32'].findall(self.text)
+        self.num_emails = len(self.emails)
+        self.num_hashes = len(self.hashes)
+        for regex in regexes['db_keywords']:
+            if regex.search(self.text):
+                logging.debug('\t[+] ' + regex.search(self.text).group(1))
+                self.db_keywords += round(1/float(
+                    len(regexes['db_keywords'])), 2)
+        for regex in regexes['blacklist']:
+            if regex.search(self.text):
+                logging.debug('\t[-] ' + regex.search(self.text).group(1))
+                self.db_keywords -= round(1.25 * (
+                    1/float(len(regexes['db_keywords']))), 2)
+        if (self.num_emails >= settings.EMAIL_THRESHOLD) or (self.num_hashes >= settings.HASH_THRESHOLD) or (self.db_keywords >= settings.DB_KEYWORDS_THRESHOLD):
+            self.type = 'db_dump'
+        if regexes['cisco_hash'].search(self.text) or regexes['cisco_pass'].search(self.text):
+            self.type = 'Cisco'
+        if regexes['honeypot'].search(self.text):
+            self.type = 'honeypot'
+        if regexes['google_api'].search(self.text):
+            self.type = 'google_api'
+        # if regexes['juniper'].search(self.text): self.type = 'Juniper'
+        return self.type
@@ -1,58 +1,46 @@
-from Site import Site
-from Paste import Paste
+from .Site import Site
+from .Paste import Paste
 from bs4 import BeautifulSoup
-import helper
+from . import helper
 from time import sleep
 from settings import SLEEP_PASTEBIN
+import logging
+
 
 class PastebinPaste(Paste):
-	def __init__(self, id):
-		self.id = id
-		self.headers = None
-		self.url = 'http://pastebin.com/raw.php?i=' + self.id
-		super(PastebinPaste, self).__init__()
+    def __init__(self, id):
+        self.id = id
+        self.headers = None
+        self.url = 'http://pastebin.com/raw.php?i=' + self.id
+        super(PastebinPaste, self).__init__()
+
 
 class Pastebin(Site):
-	def __init__(self, last_id=None):
-		if not last_id: last_id = None
-		self.ref_id = last_id
-		self.BASE_URL = 'http://pastebin.com'
-		super(Pastebin, self).__init__()
-	def update(self):
-		'''update(self) - Fill Queue with new Pastebin IDs'''
-		print '[*] Retrieving Pastebin ID\'s'
-		results = BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all(lambda tag: tag.name=='td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])	
-		new_pastes = []
-		if not self.ref_id: results = results[:60]
-		for entry in results:
-			paste = PastebinPaste(entry.a['href'][1:])
-			# Check to see if we found our last checked URL
-			if paste.id == self.ref_id:
-				break
-			new_pastes.append(paste)
-		for entry in new_pastes[::-1]:
-			print '[+] Adding URL: ' + entry.url
-			self.put(entry)
-	def monitor(self, bot, l_lock, t_lock):
-		self.update()
-		while(1):
-			while not self.empty():
-				paste = self.get()
-				self.ref_id = paste.id
-				with l_lock:
-					helper.log('[*] Checking ' + paste.url)
-				paste.text = helper.download(paste.url)
-				with l_lock:
-					tweet = helper.build_tweet(paste)
-				if tweet:
-					print tweet
-					with t_lock:
-						helper.record(tweet)
-						bot.PostUpdate(tweet)
-			self.update()
-			# If no new results... sleep for 5 sec
-			while self.empty():
-				with l_lock:
-					helper.log('[*] No results... sleeping')
-				sleep(SLEEP_PASTEBIN)
-				self.update()
+    def __init__(self, last_id=None):
+        if not last_id:
+            last_id = None
+        self.ref_id = last_id
+        self.BASE_URL = 'http://pastebin.com'
+        self.sleep = SLEEP_PASTEBIN
+        super(Pastebin, self).__init__()
+
+    def update(self):
+        '''update(self) - Fill Queue with new Pastebin IDs'''
+        logging.info('Retrieving Pastebin ID\'s')
+        results = BeautifulSoup(helper.download(self.BASE_URL + '/archive')).find_all(
+            lambda tag: tag.name == 'td' and tag.a and '/archive/' not in tag.a['href'] and tag.a['href'][1:])
+        new_pastes = []
+        if not self.ref_id:
+            results = results[:60]
+        for entry in results:
+            paste = PastebinPaste(entry.a['href'][1:])
+            # Check to see if we found our last checked URL
+            if paste.id == self.ref_id:
+                break
+            new_pastes.append(paste)
+        for entry in new_pastes[::-1]:
+            logging.debug('Adding URL: ' + entry.url)
+            self.put(entry)
+
+    def get_paste_text(self, paste):
+        return helper.download(paste.url)