Stable release v2.2.2 (Fixes #122)

s0md3v · web-flow · commit dab63ee393a0 · 2019-04-05T17:45:33.000+05:30
diff --git a/.travis.yml b/.travis.yml
@@ -2,7 +2,6 @@ language: python
 os:
   - linux
 python:
-  - 2.7
   - 3.6
 install:
   - pip install -r requirements.txt
@@ -14,4 +13,4 @@ before_script:
   - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
 script:
   - python photon.py -u "https://stackoverflow.com" -l 1 -d 1 -t 100 --regex "\d{10}" --dns --output="d3v"
-  - python photon.py -u "https://stackoverflow.com" -l 1 -t 10 --seeds="https://stackoverflow.com/jobs" --only-urls --export=json --ninja
+  - python photon.py -u "https://rocket.chat" -l 1 -t 10 --seeds="https://stackoverflow.com/jobs" --only-urls --export=json --wayback
diff --git a/core/flash.py b/core/flash.py
@@ -1,55 +1,17 @@
 from __future__ import print_function
-import sys
-import threading
+import concurrent.futures
 
 from core.colors import info
 
-try:
-    import concurrent.futures
-except ImportError:
-    pass
-
-
-def threader(function, *urls):
-    """Start multiple threads for a function."""
-    threads = []
-    # Because URLs is a tuple
-    urls = urls[0]
-    # Iterating over URLs
-    for url in urls:
-        task = threading.Thread(target=function, args=(url,))
-        threads.append(task)
-    # Start threads
-    for thread in threads:
-        thread.start()
-    # Wait for all threads to complete their work
-    for thread in threads:
-        thread.join()
-    # Delete threads
-    del threads[:]
-
-
 def flash(function, links, thread_count):
     """Process the URLs and uses a threadpool to execute a function."""
     # Convert links (set) to list
     links = list(links)
-    if sys.version_info < (3, 2):
-        for begin in range(0, len(links), thread_count):  # Range with step
-            end = begin + thread_count
-            splitted = links[begin:end]
-            threader(function, splitted)
-            progress = end
-            if progress > len(links):  # Fix if overflow
-                progress = len(links)
-            print('\r%s Progress: %i/%i' % (info, progress, len(links)),
-                  end='\r')
-            sys.stdout.flush()
-    else:
-        threadpool = concurrent.futures.ThreadPoolExecutor(
+    threadpool = concurrent.futures.ThreadPoolExecutor(
             max_workers=thread_count)
-        futures = (threadpool.submit(function, link) for link in links)
-        for i, _ in enumerate(concurrent.futures.as_completed(futures)):
-            if i + 1 == len(links) or (i + 1) % thread_count == 0:
-                print('%s Progress: %i/%i' % (info, i + 1, len(links)),
-                      end='\r')
+    futures = (threadpool.submit(function, link) for link in links)
+    for i, _ in enumerate(concurrent.futures.as_completed(futures)):
+        if i + 1 == len(links) or (i + 1) % thread_count == 0:
+            print('%s Progress: %i/%i' % (info, i + 1, len(links)),
+                    end='\r')
     print('')
diff --git a/core/requester.py b/core/requester.py
@@ -16,7 +16,6 @@ def requester(
         headers=None,
         timeout=10,
         host=None,
-        ninja=False,
         user_agents=None,
         failed=None,
         processed=None
@@ -32,7 +31,7 @@ def requester(
     # Pause/sleep the program for specified time
     time.sleep(delay)
 
-    def normal(url):
+    def make_request(url):
         """Default request"""
         final_headers = headers or {
             'Host': host,
@@ -66,50 +65,4 @@ def normal(url):
             response.close()
             return 'dummy'
 
-    def facebook(url):
-        """Interact with the developer.facebook.com API."""
-        return requests.get(
-            'https://developers.facebook.com/tools/debug/echo/?q=' + url,
-            verify=False
-        ).text
-
-    def pixlr(url):
-        """Interact with the pixlr.com API."""
-        if url == main_url:
-            # Because pixlr throws error if http://example.com is used
-            url = main_url + '/'
-        return requests.get(
-            'https://pixlr.com/proxy/?url=' + url,
-            headers={'Accept-Encoding': 'gzip'},
-            verify=False
-        ).text
-
-    def code_beautify(url):
-        """Interact with the codebeautify.org API."""
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0',
-            'Accept': 'text/plain, */*; q=0.01',
-            'Accept-Encoding': 'gzip',
-            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
-            'Origin': 'https://codebeautify.org',
-            'Connection': 'close',
-        }
-        return requests.post(
-            'https://codebeautify.com/URLService',
-            headers=headers,
-            data='path=' + url,
-            verify=False
-        ).text
-
-    def photopea(url):
-        """Interact with the www.photopea.com API."""
-        return requests.get(
-            'https://www.photopea.com/mirror.php?url=' + url, verify=False).text
-
-    if ninja:  # If the ninja mode is enabled
-        # Select a random request function i.e. random API
-        response = random.choice(
-            [photopea, normal, facebook, pixlr, code_beautify])(url)
-        return response or 'dummy'
-    else:
-        return normal(url)
+    return make_request(url)
diff --git a/core/utils.py b/core/utils.py
@@ -41,7 +41,8 @@ def is_link(url, processed, files):
         is_file = url.endswith(BAD_TYPES)
         if is_file:
             files.add(url)
-        return is_file
+            return False
+        return True
     return False
 
 
@@ -78,7 +79,7 @@ def writer(datasets, dataset_names, output_dir):
             filepath = output_dir + '/' + dataset_name + '.txt'
             with open(filepath, 'w+') as out_file:
                 joined = '\n'.join(dataset)
-                out_file.write(str(joined.encode('utf-8')))
+                out_file.write(str(joined.encode('utf-8').decode('utf-8')))
                 out_file.write('\n')
 
 def timer(diff, processed):
diff --git a/photon.py b/photon.py
@@ -6,13 +6,28 @@
 import argparse
 import os
 import re
+import requests
 import sys
 import time
 import warnings
 
-import requests
-
 from core.colors import good, info, run, green, red, white, end
+
+# Just a fancy ass banner
+print('''%s      ____  __          __
+     / %s__%s \/ /_  ____  / /_____  ____
+    / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\
+   / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / /
+  /_/   /_/ /_/\____/\__/\____/_/ /_/ %sv1.2.2%s\n''' %
+      (red, white, red, white, red, white, red, white, red, white, red, white,
+       red, white, end))
+
+try:
+    from urllib.parse import urlparse  # For Python 3
+except ImportError:
+    print ('%s Photon runs only on Python 3.2 and above.' % info)
+    quit()
+
 import core.config
 from core.config import INTELS
 from core.flash import flash
@@ -23,28 +38,6 @@
 from core.utils import top_level, extract_headers, verb, is_link, entropy, regxy, remove_regex, timer, writer
 from core.zap import zap
 
-try:
-    from urllib.parse import urlparse  # For Python 3
-    python2, python3 = False, True
-except ImportError:
-    from urlparse import urlparse  # For Python 2
-    python2, python3 = True, False
-
-
-try:
-    input = raw_input
-except NameError:
-    pass
-
-
-# Just a fancy ass banner
-print('''%s      ____  __          __
-     / %s__%s \/ /_  ____  / /_____  ____
-    / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\
-   / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / /
-  /_/   /_/ /_/\____/\__/\____/_/ /_/ %sv1.2.1%s\n''' %
-      (red, white, red, white, red, white, red, white, red, white, red, white,
-       red, white, end))
 
 # Disable SSL related warnings
 warnings.filterwarnings('ignore')
@@ -82,8 +75,6 @@
                     action='store_true')
 parser.add_argument('--dns', help='enumerate subdomains and DNS data',
                     dest='dns', action='store_true')
-parser.add_argument('--ninja', help='ninja mode', dest='ninja',
-                    action='store_true')
 parser.add_argument('--keys', help='find secret keys', dest='api',
                     action='store_true')
 parser.add_argument('--update', help='update photon', dest='update',
@@ -118,7 +109,6 @@
 timeout = args.timeout or 6  # HTTP request timeout
 cook = args.cook or None  # Cookie
 api = bool(args.api)  # Extract high entropy strings i.e. API keys and stuff
-ninja = bool(args.ninja)  # Ninja mode toggle
 crawl_level = args.level or 2  # Crawling level
 thread_count = args.threads or 2  # Number of threads
 only_urls = bool(args.only_urls)  # Only URLs mode is off by default
@@ -135,12 +125,11 @@
 # URLs that have get params in them e.g. example.com/page.php?id=2
 fuzzable = set()
 endpoints = set()  # URLs found from javascript files
-processed = set()  # URLs that have been crawled
+processed = set(['dummy'])  # URLs that have been crawled
 # URLs that belong to the target i.e. in-scope
 internal = set(args.seeds)
 
 everything = []
-bad_intel = set()  # Unclean intel urls
 bad_scripts = set()  # Unclean javascript file urls
 
 core.config.verbose = verbose
@@ -180,13 +169,13 @@
 
 supress_regex = False
 
-def intel_extractor(response):
+def intel_extractor(url, response):
     """Extract intel from the response body."""
     matches = re.findall(r'([\w\.-]+s[\w\.-]+\.amazonaws\.com)|([\w\.-]+@[\w\.-]+\.[\.\w]+)', response)
     if matches:
         for match in matches:
             verb('Intel', match)
-            bad_intel.add(match)
+            intel.add(url + ':' + ''.join(list(match)))
 
 
 def js_extractor(response):
@@ -198,12 +187,22 @@ def js_extractor(response):
         verb('JS file', match)
         bad_scripts.add(match)
 
+def remove_file(url):
+    if url.count('/') > 2:
+        replacable = re.search(r'/[^/]*?$', url).group()
+        if replacable != '/':
+            return url.replace(replacable, '')
+        else:
+            return url
+    else:
+        return url
+
 def extractor(url):
     """Extract details from the response body."""
-    response = requester(url, main_url, delay, cook, headers, timeout, host, ninja, user_agents, failed, processed)
+    response = requester(url, main_url, delay, cook, headers, timeout, host, user_agents, failed, processed)
     if clone:
         mirror(url, response)
-    matches = re.findall(r'<[aA].*(href|HREF)=([^\s>]+)', response)
+    matches = re.findall(r'<[aA][^>]*?(href|HREF)=([^\s>]+)', response)
     for link in matches:
         # Remove everything after a "#" to deal with in-page anchors
         link = link[1].replace('\'', '').replace('"', '').split('#')[0]
@@ -219,19 +218,25 @@ def extractor(url):
             elif link[:2] == '//':
                 if link.split('/')[2].startswith(host):
                     verb('Internal page', link)
-                    internal.add(schema + link)
+                    internal.add(schema + '://' + link)
                 else:
                     verb('External page', link)
                     external.add(link)
             elif link[:1] == '/':
                 verb('Internal page', link)
-                internal.add(main_url + link)
+                internal.add(remove_file(url) + link)
             else:
                 verb('Internal page', link)
-                internal.add(main_url + '/' + link)
+                usable_url = remove_file(url)
+                if usable_url.endswith('/'):
+                    internal.add(usable_url + link)
+                elif link.startswith('/'):
+                    internal.add(usable_url + link)
+                else:
+                    internal.add(usable_url + '/' + link)
 
     if not only_urls:
-        intel_extractor(response)
+        intel_extractor(url, response)
         js_extractor(response)
     if args.regex and not supress_regex:
         regxy(args.regex, response, supress_regex, custom)
@@ -245,7 +250,7 @@ def extractor(url):
 
 def jscanner(url):
     """Extract endpoints from JavaScript code."""
-    response = requester(url, main_url, delay, cook, headers, timeout, host, ninja, user_agents, failed, processed)
+    response = requester(url, main_url, delay, cook, headers, timeout, host, user_agents, failed, processed)
     # Extract URLs/endpoints
     matches = re.findall(r'[\'"](/.*?)[\'"]|[\'"](http.*?)[\'"]', response)
     # Iterate over the matches, match is a tuple
@@ -301,10 +306,8 @@ def jscanner(url):
         if '=' in url:
             fuzzable.add(url)
 
-    for match in bad_intel:
-        for x in match:  # Because "match" is a tuple
-            if x != '':  # If the value isn't empty
-                intel.add(x)
+    for match in intel:
+        intel.add(match)
         for url in external:
             try:
                 if top_level(url, fix_protocol=True) in INTELS: