Skip to content

Commit 36a1d23

Browse files
Stabilized v1.3.1
2 parents ffd2445 + 0994f8c commit 36a1d23

17 files changed

+1728
-1093
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,9 @@ If you have problems with starting installer.sh, you should try to use `dos2unix
170170

171171
# Tasks to complete before new release
172172
- [ ] CLI rework (more fancy and user-friendly)
173-
- [ ] Report storage database rework
174-
- [ ] HTML report rework
175-
173+
- [ ] Report storage database rework (more information to store)
174+
- [ ] HTML report rework (modern style and look; functionality expansion)
175+
176176
# DPULSE mentions in social medias
177177

178178
## Honorable mentions:

datagather_modules/crawl_processor.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,17 @@
11
import sys
2+
import socket
3+
import re
4+
import urllib
5+
from collections import defaultdict
6+
from urllib.parse import urlparse
7+
import whois
8+
import requests
9+
from bs4 import BeautifulSoup
10+
from colorama import Fore, Style
11+
212
sys.path.append('service')
313
from logs_processing import logging
414

5-
try:
6-
import socket
7-
import whois
8-
import re
9-
import requests
10-
import urllib.parse
11-
from colorama import Fore, Style
12-
from urllib.parse import urlparse
13-
from collections import defaultdict
14-
from bs4 import BeautifulSoup
15-
import random
16-
except ImportError as e:
17-
print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL)
18-
sys.exit()
19-
2015
def ip_gather(short_domain):
2116
ip_address = socket.gethostbyname(short_domain)
2217
return ip_address

datagather_modules/data_assembler.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,24 @@
11
import sys
2-
sys.path.append('service')
3-
sys.path.append('pagesearch')
4-
sys.path.append('dorking')
5-
sys.path.append('snapshotting')
2+
from datetime import datetime
3+
import os
4+
from colorama import Fore, Style
65

6+
sys.path.extend(['service', 'pagesearch', 'dorking', 'snapshotting'])
7+
8+
from logs_processing import logging
9+
from config_processing import read_config
10+
from db_creator import get_dorking_query
711
import crawl_processor as cp
812
import dorking_handler as dp
913
import networking_processor as np
1014
from pagesearch_parsers import subdomains_parser
11-
from logs_processing import logging
1215
from api_virustotal import api_virustotal_check
1316
from api_securitytrails import api_securitytrails_check
1417
from api_hudsonrock import api_hudsonrock_check
15-
from db_creator import get_dorking_query
1618
from screen_snapshotting import take_screenshot
17-
from config_processing import read_config
1819
from html_snapshotting import save_page_as_html
1920
from archive_snapshotting import download_snapshot
2021

21-
try:
22-
import requests
23-
from datetime import datetime
24-
import os
25-
from colorama import Fore, Style
26-
import sqlite3
27-
import configparser
28-
except ImportError as e:
29-
print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL)
30-
sys.exit()
31-
3222
def establishing_dork_db_connection(dorking_flag):
3323
dorking_db_paths = {
3424
'basic': 'dorking//basic_dorking.db',
@@ -118,6 +108,10 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
118108
for key in common_socials:
119109
common_socials[key] = list(set(common_socials[key]))
120110
total_socials = sum(len(values) for values in common_socials.values())
111+
total_ports = len(ports)
112+
total_ips = len(subdomain_ip) + 1
113+
total_vulns = len(vulns)
114+
121115
print(Fore.LIGHTMAGENTA_EX + "\n[BASIC SCAN END]\n" + Style.RESET_ALL)
122116
if report_file_type == 'xlsx':
123117
if pagesearch_flag.lower() == 'y':
@@ -206,7 +200,17 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
206200
if subdomains[0] != 'No subdomains were found':
207201
to_search_array = [subdomains, social_medias, sd_socials]
208202
print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH]\n" + Style.RESET_ALL)
209-
ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, keywords_messages_list = subdomains_parser(to_search_array[0], report_folder, keywords, keywords_flag)
203+
(
204+
ps_emails_return,
205+
accessible_subdomains,
206+
emails_amount,
207+
files_counter,
208+
cookies_counter,
209+
api_keys_counter,
210+
website_elements_counter,
211+
exposed_passwords_counter,
212+
keywords_messages_list
213+
), ps_string = subdomains_parser(to_search_array[0], report_folder, keywords, keywords_flag)
210214
total_links_counter = accessed_links_counter = "No results because PageSearch does not gather these categories"
211215
if len(keywords_messages_list) == 0:
212216
keywords_messages_list = ['No keywords were found']
@@ -215,11 +219,13 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
215219
print(Fore.RED + "Cant start PageSearch because no subdomains were detected")
216220
ps_emails_return = ""
217221
accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = 'No results because no subdomains were found'
222+
ps_string = 'No PageSearch listing provided because no subdomains were found'
218223
keywords_messages_list = ['No data was gathered because no subdomains were found']
219224
pass
220225
elif pagesearch_flag.lower() == 'n':
221226
accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = keywords_messages_list = "No results because user did not selected PageSearch for this scan"
222227
ps_emails_return = ""
228+
ps_string = 'No PageSearch listing provided because user did not selected PageSearch mode for this scan'
223229
pass
224230

225231
if dorking_flag == 'n':
@@ -282,7 +288,7 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
282288
hostnames, cpes, tags, vulns, common_socials, total_socials, ps_emails_return,
283289
accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter,
284290
website_elements_counter, exposed_passwords_counter, total_links_counter, accessed_links_counter, keywords_messages_list, dorking_status, dorking_file_path,
285-
virustotal_output, securitytrails_output, hudsonrock_output]
291+
virustotal_output, securitytrails_output, hudsonrock_output, ps_string, total_ports, total_ips, total_vulns]
286292

287293
report_info_array = [casename, db_casename, db_creation_date, report_folder, ctime, report_file_type, report_ctime, api_scan_db, used_api_flag]
288294
logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} ENDS HERE')

dorking/dorking_handler.py

Lines changed: 91 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,18 @@
11
import sys
2+
import random
3+
import time
4+
import os
5+
import logging
6+
from colorama import Fore, Style
7+
import undetected_chromedriver as uc
8+
from selenium.webdriver.common.by import By
9+
from selenium.webdriver.common.keys import Keys
10+
211
sys.path.append('service')
3-
from config_processing import read_config
412
from logs_processing import logging
513
from ua_rotator import user_agent_rotator
614
from proxies_rotator import proxies_rotator
7-
8-
try:
9-
import requests.exceptions
10-
from colorama import Fore, Style
11-
import mechanicalsoup
12-
import re
13-
import requests
14-
import sqlite3
15-
import time
16-
import os
17-
except ImportError as e:
18-
print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL)
19-
sys.exit()
15+
from config_processing import read_config
2016

2117
def proxy_transfer():
2218
proxy_flag, proxies_list = proxies_rotator.get_proxies()
@@ -27,44 +23,96 @@ def proxy_transfer():
2723
working_proxies = proxies_rotator.check_proxies(proxies_list)
2824
return proxy_flag, working_proxies
2925

30-
def solid_google_dorking(query, dorking_delay, delay_step, proxy_flag, proxies_list, pages=100):
26+
def solid_google_dorking(query, proxy_flag, proxies_list, pages=1):
27+
result_query = []
28+
request_count = 0
3129
try:
32-
browser = mechanicalsoup.StatefulBrowser()
33-
if proxy_flag == 1:
34-
browser.session.proxies = proxies_rotator.get_random_proxy(proxies_list)
35-
else:
30+
config_values = read_config()
31+
options = uc.ChromeOptions()
32+
options.binary_location = r"{}".format(config_values['dorking_browser'])
33+
dorking_browser_mode = config_values['dorking_browser_mode']
34+
if dorking_browser_mode.lower() == 'headless':
35+
options.add_argument("--headless=new")
36+
elif dorking_browser_mode.lower() == 'nonheadless':
3637
pass
37-
browser.open("https://www.google.com/")
38-
browser.select_form('form[action="/search"]')
39-
browser["q"] = str(query)
40-
browser.submit_selected(btnName="btnG")
41-
result_query = []
42-
request_count = 0
38+
options.add_argument("--no-sandbox")
39+
options.add_argument("--disable-dev-shm-usage")
40+
options.add_argument("--disable-blink-features=AutomationControlled")
41+
options.add_argument("--disable-infobars")
42+
options.add_argument("--disable-extensions")
43+
options.add_argument(f"user-agent={user_agent_rotator.get_random_user_agent()}")
44+
if proxy_flag == 1:
45+
proxy = proxies_rotator.get_random_proxy(proxies_list)
46+
options.add_argument(f'--proxy-server={proxy["http"]}')
47+
driver = uc.Chrome(options=options)
4348
for page in range(pages):
4449
try:
45-
for link in browser.links():
46-
target = link.attrs['href']
47-
if (target.startswith('/url?') and not target.startswith("/url?q=http://webcache.googleusercontent.com")):
48-
target = re.sub(r"^/url\?q=([^&]*)&.*", r"\1", target)
49-
result_query.append(target)
50+
driver.get("https://www.google.com")
51+
time.sleep(random.uniform(2, 4))
52+
try:
53+
accepted = False
54+
try:
55+
accept_btn = driver.find_element(By.XPATH, '//button[contains(text(), "Принять все") or contains(text(), "Accept all")]')
56+
driver.execute_script("arguments[0].click();", accept_btn)
57+
print(Fore.GREEN + 'Pressed "Accept all" button!' + Style.RESET_ALL)
58+
accepted = True
59+
time.sleep(random.uniform(2, 3))
60+
except:
61+
pass
62+
if not accepted:
63+
iframes = driver.find_elements(By.TAG_NAME, "iframe")
64+
for iframe in iframes:
65+
driver.switch_to.frame(iframe)
66+
try:
67+
accept_btn = driver.find_element(By.XPATH, '//button[contains(text(), "Принять все") or contains(text(), "Accept all")]')
68+
driver.execute_script("arguments[0].click();", accept_btn)
69+
print(Fore.GREEN + 'Pressed "Accept all" button!' + Style.RESET_ALL)
70+
accepted = True
71+
driver.switch_to.default_content()
72+
time.sleep(random.uniform(2, 3))
73+
break
74+
except:
75+
driver.switch_to.default_content()
76+
continue
77+
driver.switch_to.default_content()
78+
if not accepted:
79+
print(Fore.GREEN + "Google TOS button was not found. Seems good..." + Style.RESET_ALL)
80+
except Exception:
81+
print(Fore.RED + f'Error with pressing "Accept all" button. Closing...' + Style.RESET_ALL)
82+
driver.save_screenshot("consent_error.png")
83+
driver.switch_to.default_content()
84+
search_box = driver.find_element(By.NAME, "q")
85+
for char in query:
86+
search_box.send_keys(char)
87+
time.sleep(random.uniform(0.05, 0.2))
88+
time.sleep(random.uniform(0.5, 1.2))
89+
search_box.send_keys(Keys.RETURN)
90+
time.sleep(random.uniform(2.5, 4))
91+
links = driver.find_elements(By.CSS_SELECTOR, 'a')
92+
for link in links:
93+
href = link.get_attribute('href')
94+
if href and href.startswith('http') and 'google.' not in href and 'webcache.googleusercontent.com' not in href:
95+
result_query.append(href)
5096
request_count += 1
51-
if request_count % delay_step == 0:
52-
time.sleep(dorking_delay)
53-
browser.session.headers['User-Agent'] = user_agent_rotator.get_random_user_agent()
54-
browser.follow_link(nr=page + 1)
55-
except mechanicalsoup.LinkNotFoundError:
56-
break
97+
try:
98+
next_button = driver.find_element(By.ID, 'pnnext')
99+
next_button.click()
100+
time.sleep(random.uniform(2, 3))
101+
except:
102+
break
57103
except Exception as e:
58-
logging.error(f'DORKING PROCESSING: ERROR. REASON: {e}')
59-
del result_query[-2:]
104+
logging.error(f'DORKING PROCESSING (SELENIUM): ERROR. REASON: {e}')
105+
continue
106+
driver.quit()
107+
if len(result_query) >= 2:
108+
del result_query[-2:]
60109
return result_query
61-
except requests.exceptions.ConnectionError as e:
62-
print(Fore.RED + "Error while establishing connection with domain. No results will appear. See journal for details" + Style.RESET_ALL)
63-
logging.error(f'DORKING PROCESSING: ERROR. REASON: {e}')
64110
except Exception as e:
65111
logging.error(f'DORKING PROCESSING: ERROR. REASON: {e}')
112+
print(Fore.RED + "Error while running Selenium dorking. See journal for details." + Style.RESET_ALL)
113+
return []
66114

67-
def save_results_to_txt(folderpath, table, queries, pages=10):
115+
def save_results_to_txt(folderpath, table, queries, pages=1):
68116
try:
69117
config_values = read_config()
70118
dorking_delay = int(config_values['dorking_delay (secs)'])
@@ -80,7 +128,7 @@ def save_results_to_txt(folderpath, table, queries, pages=10):
80128
for i, query in enumerate(queries, start=1):
81129
f.write(f"QUERY #{i}: {query}\n")
82130
try:
83-
results = solid_google_dorking(query, dorking_delay, delay_step, proxy_flag, proxies_list, pages)
131+
results = solid_google_dorking(query, proxy_flag, proxies_list, pages)
84132
if not results:
85133
f.write("=> NO RESULT FOUND\n")
86134
total_results.append((query, 0))

0 commit comments

Comments
 (0)