Merge pull request #65 from diogoosorio/apply-pep8-convention

codingo · web-flow · commit d83392835d84 · 2017-10-10T10:15:26.000+10:00
Apply PEP8 convetion to the codebase
diff --git a/.travis.yml b/.travis.yml
@@ -5,9 +5,9 @@ python:
   - 3.6
 install:
   - pip --version
-  - pip install -r requirements.txt
+  - pip install -r test-requirements.txt
   - pip install pep8 
 before_script:
-  - pep8 --ignore=E501,W293,E202,E241,W291   *.py 
+  - pep8 -v *.py lib/
 script:
   - pytest
diff --git a/VHostScan.py b/VHostScan.py
@@ -7,10 +7,17 @@
 from socket import gethostbyaddr
 from lib.core.virtual_host_scanner import *
 from lib.helpers.output_helper import *
-from lib.helpers.file_helper import get_combined_word_lists, load_random_user_agents
+from lib.helpers.file_helper import get_combined_word_lists
+from lib.helpers.file_helper import load_random_user_agents
 from lib.core.__version__ import __version__
 from lib.input import cli_argument_parser
 
+DEFAULT_WORDLIST_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    'wordlists',
+    'virtual-host-scanning.txt'
+)
+
 
 def print_banner():
     print("+-+-+-+-+-+-+-+-+-+  v. %s" % __version__)
@@ -20,13 +27,14 @@ def print_banner():
 
 def main():
     print_banner()
-    
+
     parser = cli_argument_parser()
     arguments = parser.parse(sys.argv[1:])
 
     wordlist = []
     word_list_types = []
-    default_wordlist = "./wordlists/virtual-host-scanning.txt" if not arguments.stdin else None
+
+    default_wordlist = DEFAULT_WORDLIST_FILE if not arguments.stdin else None
 
     if arguments.stdin:
         word_list_types.append('stdin')
@@ -42,11 +50,14 @@ def main():
         print("[!] No words found in provided wordlists, unable to scan.")
         sys.exit(1)
 
-    print("[+] Starting virtual host scan for {host} using port {port} and {inputs}".format(
-        host=arguments.target_hosts,
-        port=arguments.port,
-        inputs=', '.join(word_list_types),
-    ))
+    print(
+        "[+] Starting virtual host scan for {host} using "
+        "port {port} and {inputs}".format(
+            host=arguments.target_hosts,
+            port=arguments.port,
+            inputs=', '.join(word_list_types),
+        )
+    )
 
     user_agents = []
     if arguments.user_agent:
@@ -62,10 +73,14 @@ def main():
     if(arguments.add_waf_bypass_headers):
         print("[>] WAF flag set, sending simple WAF bypass headers.")
 
-    print("[>] Ignoring HTTP codes: %s" % (arguments.ignore_http_codes))
+    print("[>] Ignoring HTTP codes: {}".format(arguments.ignore_http_codes))
 
     if(arguments.ignore_content_length > 0):
-        print("[>] Ignoring Content length: %s" % (arguments.ignore_content_length))
+        print(
+            "[>] Ignoring Content length: {}".format(
+                arguments.ignore_content_length
+            )
+        )
 
     if arguments.first_hit:
         print("[>] First hit is set.")
@@ -78,7 +93,12 @@ def main():
             wordlist.extend(aliases)
 
     scanner_args = vars(arguments)
-    scanner_args.update({'target': arguments.target_hosts, 'wordlist': wordlist, 'user_agents': user_agents})
+    scanner_args.update({
+        'target': arguments.target_hosts,
+        'wordlist': wordlist,
+        'user_agents': user_agents
+    })
+
     scanner = virtual_host_scanner(**scanner_args)
     scanner.scan()
     output = output_helper(scanner, arguments)
diff --git a/lib/__init__.py b/lib/__init__.py
@@ -5,4 +5,4 @@
 See the file 'doc/COPYING' for copying permission
 """
 
-pass
+pass
diff --git a/lib/core/__init__.py b/lib/core/__init__.py
@@ -5,4 +5,4 @@
 See the file 'doc/COPYING' for copying permission
 """
 
-pass
+pass
diff --git a/lib/core/__version__.py b/lib/core/__version__.py
@@ -1,5 +1,5 @@
-# +-+-+-+-+-+-+-+-+-+  
+# +-+-+-+-+-+-+-+-+-+
 # |V|H|o|s|t|S|c|a|n|  Developed by @codingo_ & @__timk
 # +-+-+-+-+-+-+-+-+-+  https://github.com/codingo/VHostScan
 
-__version__ = '1.6.1'
+__version__ = '1.6.2'
diff --git a/lib/core/discovered_host.py b/lib/core/discovered_host.py
@@ -8,4 +8,4 @@ def __init__(self):
         self.response_code = 0
         self.hash = ''
         self.keys = []
-        self.content = b''
+        self.content = b''
diff --git a/lib/core/virtual_host_scanner.py b/lib/core/virtual_host_scanner.py
@@ -8,19 +8,26 @@
 from lib.core.discovered_host import *
 from urllib3.util import ssl_
 
-DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
+DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '\
+                     'AppleWebKit/537.36 (KHTML, like Gecko) '\
+                     'Chrome/61.0.3163.100 Safari/537.36'
 
 _target_host = None
 _ssl_wrap_socket = ssl_.ssl_wrap_socket
+
+
 def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
                     ca_certs=None, server_hostname=None,
                     ssl_version=None, ciphers=None, ssl_context=None,
                     ca_cert_dir=None):
-        ssl_wrap_socket_(sock, keyfile=keyfile, certfile=certfile, cert_reqs=cert_reqs,
-                    ca_certs=ca_certs, server_hostname=_target_host,
-                    ssl_version=ssl_version, ciphers=ciphers, ssl_context=ssl_context,
-                    ca_cert_dir=ca_cert_dir)
-ssl_.ssl_wrap_socket = _ssl_wrap_socket 
+        ssl_wrap_socket_(sock, keyfile=keyfile, certfile=certfile,
+                         cert_reqs=cert_reqs, ca_certs=ca_certs,
+                         server_hostname=_target_host, ssl_version=ssl_version,
+                         ciphers=ciphers, ssl_context=ssl_context,
+                         ca_cert_dir=ca_cert_dir)
+
+ssl_.ssl_wrap_socket = _ssl_wrap_socket
+
 
 class virtual_host_scanner(object):
     """Virtual host scanning class
@@ -35,43 +42,52 @@ class virtual_host_scanner(object):
         ignore_content_length: integer value of content length to ignore
         output: folder to write output file to
     """
-
-
     def __init__(self, target, wordlist, **kwargs):
         self.target = target
         self.wordlist = wordlist
         self.base_host = kwargs.get('base_host')
         self.rate_limit = int(kwargs.get('rate_limit', 0))
         self.port = int(kwargs.get('port', 80))
         self.real_port = int(kwargs.get('real_port', 80))
-        self.ignore_content_length = int(kwargs.get('ignore_content_length', 0))
         self.ssl = kwargs.get('ssl', False)
         self.fuzzy_logic = kwargs.get('fuzzy_logic', False)
-        self.add_waf_bypass_headers = kwargs.get('add_waf_bypass_headers', False)
         self.unique_depth = int(kwargs.get('unique_depth', 1))
         self.ignore_http_codes = kwargs.get('ignore_http_codes', '404')
         self.first_hit = kwargs.get('first_hit')
 
+        self.ignore_content_length = int(
+            kwargs.get('ignore_content_length', 0)
+        )
+
+        self.add_waf_bypass_headers = kwargs.get(
+            'add_waf_bypass_headers',
+            False
+        )
+
         # this can be made redundant in future with better exceptions
-        self.completed_scan=False
-        
-        # this is maintained until likely-matches is refactored to use new class
+        self.completed_scan = False
+
+        # this is maintained until likely-matches is refactored to use
+        # new class
         self.results = []
-        
-        # store associated data for discovered hosts in array for oN, oJ, etc'
+
+        # store associated data for discovered hosts
+        # in array for oN, oJ, etc'
         self.hosts = []
 
         # available user-agents
-        self.user_agents = list(kwargs.get('user_agents')) or [DEFAULT_USER_AGENT]
+        self.user_agents = list(kwargs.get('user_agents')) \
+            or [DEFAULT_USER_AGENT]
 
     @property
     def ignore_http_codes(self):
         return self._ignore_http_codes
 
     @ignore_http_codes.setter
     def ignore_http_codes(self, codes):
-        self._ignore_http_codes = [int(code) for code in codes.replace(' ', '').split(',')]
-
+        self._ignore_http_codes = [
+            int(code) for code in codes.replace(' ', '').split(',')
+        ]
 
     def scan(self):
         if not self.base_host:
@@ -83,9 +99,14 @@ def scan(self):
         for virtual_host in self.wordlist:
             hostname = virtual_host.replace('%s', self.base_host)
 
+            if self.real_port == 80:
+                host_header = hostname
+            else:
+                host_header = '{}:{}'.format(hostname, self.real_port)
+
             headers = {
                 'User-Agent': random.choice(self.user_agents),
-                'Host': hostname if self.real_port == 80 else '{}:{}'.format(hostname, self.real_port),
+                'Host': host_header,
                 'Accept': '*/*'
             }
 
@@ -96,8 +117,13 @@ def scan(self):
                     'X-Remote-IP': '127.0.0.1',
                     'X-Remote-Addr': '127.0.0.1'
                 })
-            
-            dest_url = '{}://{}:{}/'.format('https' if self.ssl else 'http', self.target, self.port)
+
+            dest_url = '{}://{}:{}/'.format(
+                'https' if self.ssl else 'http',
+                self.target,
+                self.port
+            )
+
             _target_host = hostname
 
             try:
@@ -108,7 +134,9 @@ def scan(self):
             if res.status_code in self.ignore_http_codes:
                 continue
 
-            if self.ignore_content_length > 0 and self.ignore_content_length == int(res.headers.get('content-length')):
+            response_length = int(res.headers.get('content-length', 0))
+            if self.ignore_content_length and \
+               self.ignore_content_length == response_length:
                 continue
 
             # hash the page results to aid in identifing unique content
@@ -119,49 +147,59 @@ def scan(self):
             # add url and hash into array for likely matches
             self.results.append(hostname + ',' + page_hash)
 
-            if len(self.hosts) == 2 and self.first_hit:
+            if len(self.hosts) >= 1 and self.first_hit:
                 break
 
-            #rate limit the connection, if the int is 0 it is ignored
+            # rate limit the connection, if the int is 0 it is ignored
             time.sleep(self.rate_limit)
 
-        self.completed_scan=True
-
+        self.completed_scan = True
 
     def likely_matches(self):
         if self.completed_scan is False:
-            print("[!] Likely matches cannot be printed as a scan has not yet been run.")
-            return      
+            print("[!] Likely matches cannot be printed "
+                  "as a scan has not yet been run.")
+            return
 
         # segment results from previous scan into usable results
-        segmented_data={}
+        segmented_data = {}
         for item in self.results:
             result = item.split(",")
             segmented_data[result[0]] = result[1]
 
-        dataframe = pd.DataFrame([[key, value] for key, value in segmented_data.items()], columns=["key_col", "val_col"])
-        segmented_data = dataframe.groupby("val_col").filter(lambda x: len(x) <= self.unique_depth)
-        matches = ((segmented_data["key_col"].values).tolist())
+        dataframe = pd.DataFrame([
+            [key, value] for key, value in segmented_data.items()],
+            columns=["key_col", "val_col"]
+        )
 
-        return matches
+        segmented_data = dataframe.groupby("val_col").filter(
+            lambda x: len(x) <= self.unique_depth
+        )
+
+        return segmented_data["key_col"].values.tolist()
 
     def create_host(self, response, hostname, page_hash):
         """
         Creates a host using the responce and the hash.
         Prints current result in real time.
         """
-        output = '[#] Found: {} (code: {}, length: {}, hash: {})\n'.format(hostname, response.status_code, 
-                                                                    response.headers.get('content-length'), page_hash)
+        output = '[#] Found: {} (code: {}, length: {}, hash: {})\n'.format(
+            hostname,
+            response.status_code,
+            response.headers.get('content-length'),
+            page_hash
+        )
+
         host = discovered_host()
         host.hostname = hostname
         host.response_code = response.status_code
         host.hash = page_hash
-        host.content = response.content
+        host.contnet = response.content
 
         for key, val in response.headers.items():
             output += '  {}: {}\n'.format(key, val)
             host.keys.append('{}: {}'.format(key, val))
 
         print(output)
 
-        return host
+        return host
diff --git a/lib/helpers/__init__.py b/lib/helpers/__init__.py
@@ -5,4 +5,4 @@
 See the file 'doc/COPYING' for copying permission
 """
 
-pass
+pass
diff --git a/lib/helpers/file_helper.py b/lib/helpers/file_helper.py
@@ -8,7 +8,7 @@ def __init__(self, output_file):
 
     def check_directory(self):
         directory = os.path.dirname(self.output_file)
-        
+
         try:
             os.stat(directory)
         except:
@@ -18,7 +18,7 @@ def check_directory(self):
     # placeholder for error checking on -oJ implementation
     def is_json(json_file):
         try:
-            with open(json_file, "r") as f: 
+            with open(json_file, "r") as f:
                 json_object = json.load(f)
         except ValueError:
             return False
@@ -27,7 +27,7 @@ def is_json(json_file):
     def write_file(self, contents):
         # check if host directory exists, if not create it
         self.check_directory()
-        
+
         with open(self.output_file, "w") as o:
             o.write(contents)
 
diff --git a/lib/helpers/output_helper.py b/lib/helpers/output_helper.py
diff --git a/lib/input.py b/lib/input.py
diff --git a/test-requirements.txt b/test-requirements.txt

-Original file line number
+Diff line change
 See the file 'doc/COPYING' for copying permission
 """
 -pass
 +pass