Skip to content

Commit ea85a57

Browse files
authored
Merge pull request #492 from target/ScanURLValidation
Adding Validation to ScanURL
2 parents c5991a8 + 2f6a78a commit ea85a57

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

src/python/strelka/scanners/scan_url.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import re
22

3+
import validators
4+
35
from strelka import strelka
46

57

@@ -52,9 +54,19 @@ def scan(self, data, file, options, expire_at):
5254
urls = set(url_regex.findall(normalized_data))
5355
for url in urls:
5456
# Strip leading and trailing punctuation characters from the URL.
55-
clean_url = url.strip(b"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~").decode()
56-
if clean_url not in self.event["urls"]:
57-
self.event["urls"].append(clean_url)
57+
strip_trailing_url = url.strip(
58+
b"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
59+
).decode()
60+
61+
# Check to see if there are nonURL chars stil in URL:
62+
nonurl_regex_pattern = r'[\^&\(\)+\[\]{}\|"]'
63+
split_uls = re.split(nonurl_regex_pattern, strip_trailing_url)
64+
for split_result in split_uls:
65+
if (
66+
validators.url(split_result)
67+
and split_result not in self.event["urls"]
68+
):
69+
self.event["urls"].append(split_result)
5870

5971
except Exception as e:
6072
self.flags.append(f"scanner_error: {e}")

src/python/strelka/tests/test_scan_url.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ def test_scan_url_text(mocker):
1818
"flags": [],
1919
"urls": unordered(
2020
[
21-
"example.com",
2221
"http://foobar.example.com",
2322
"https://barfoo.example.com",
2423
"ftp://barfoo.example.com",

0 commit comments

Comments
 (0)