Skip to content

Commit 22b0b0e

Browse files
authored
Merge pull request MikeMeliz#13 from the-siegfried/update-docstrings
update docstrings
2 parents 5ac23bb + 131af5b commit 22b0b0e

File tree

3 files changed

+14
-13
lines changed

3 files changed

+14
-13
lines changed

modules/checker.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111

1212

1313
def url_canon(website, verbose):
14-
"""
14+
""" URL normalisation/canonicalization
1515
16-
:param website: String -
17-
:param verbose: Boolean -
18-
:return: String 'website' -
16+
:param website: String - URL of website.
17+
:param verbose: Boolean - Verbose logging switch.
18+
:return: String 'website' - normalised result.
1919
"""
2020
if not website.startswith("http"):
2121
if not website.startswith("www."):
@@ -29,17 +29,18 @@ def url_canon(website, verbose):
2929

3030

3131
def extract_domain(url, remove_http=True):
32-
"""
32+
""" Parses the provided 'url' to provide only the netloc or
33+
scheme + netloc parts of the provided url.
3334
34-
:param url: String -
35-
:param remove_http: Boolean -
36-
:return: String 'domain_name' -
35+
:param url: String - Url to parse.
36+
:param remove_http: Boolean
37+
:return: String 'domain_name' - Resulting parsed Url
3738
"""
3839
uri = urlparse(url)
3940
if remove_http:
4041
domain_name = f"{uri.netloc}"
4142
else:
42-
domain_name = f"{uri.netloc}://{uri.netloc}"
43+
domain_name = f"{uri.scheme}://{uri.netloc}"
4344
return domain_name
4445

4546

modules/crawler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(self, website, c_depth, c_pause, out_path, logs, verbose):
2222
def excludes(self, link):
2323
""" Excludes links that are not required.
2424
25-
:param link:
25+
:param link: String
2626
:return: Boolean
2727
"""
2828
# BUG: For NoneType Exceptions, got to find a solution here
@@ -57,8 +57,8 @@ def excludes(self, link):
5757
def canonical(self, link):
5858
""" Canonization of the link.
5959
60-
:param link:
61-
:return:
60+
:param link: String
61+
:return: String 'final_link': parsed canonical url.
6262
"""
6363
# Already formatted
6464
if link.startswith(self.website):

torcrawl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def main():
176176
website = ''
177177
out_path = ''
178178

179-
# Canon/ion of website and create path for output
179+
# Canonicalization of web url and create path for output.
180180
if len(args.url) > 0:
181181
website = url_canon(args.url, args.verbose)
182182
if args.folder is not None:

0 commit comments

Comments
 (0)