Skip to content

Commit e14970b

Browse files
committed
ENH: linkcheck: also write all links to output.json
* TST: linkcheck: make tests more flexible * CLN: linkcheck: flake8, mypy * REF: linkcheck: docpath->filename, write_jsonline->write_linkstat * REF: linkcheck: remove redundant call to doc2path * TST: linkcheck: show JSON obj structure in test * REF: linkcheck: remove docname from JSON obj because it's redundant (use path2doc(filename) if necessary) * TST: linkcheck: regex row["info"] due to included id()
1 parent 5018422 commit e14970b

File tree

2 files changed

+88
-10
lines changed

2 files changed

+88
-10
lines changed

sphinx/builders/linkcheck.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
:license: BSD, see LICENSE for details.
99
"""
1010

11+
import json
1112
import queue
1213
import re
1314
import socket
@@ -90,6 +91,8 @@ def init(self) -> None:
9091
socket.setdefaulttimeout(5.0)
9192
# create output file
9293
open(path.join(self.outdir, 'output.txt'), 'w').close()
94+
# create JSON output file
95+
open(path.join(self.outdir, 'output.json'), 'w').close()
9396

9497
# create queues and worker threads
9598
self.wqueue = queue.Queue() # type: queue.Queue
@@ -225,9 +228,16 @@ def check() -> Tuple[str, str, int]:
225228

226229
def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
227230
uri, docname, lineno, status, info, code = result
231+
232+
filename = self.env.doc2path(docname, None)
233+
linkstat = dict(filename=filename, lineno=lineno,
234+
status=status, code=code, uri=uri,
235+
info=info)
228236
if status == 'unchecked':
237+
self.write_linkstat(linkstat)
229238
return
230239
if status == 'working' and info == 'old':
240+
self.write_linkstat(linkstat)
231241
return
232242
if lineno:
233243
logger.info('(line %4d) ', lineno, nonl=True)
@@ -236,18 +246,22 @@ def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
236246
logger.info(darkgray('-ignored- ') + uri + ': ' + info)
237247
else:
238248
logger.info(darkgray('-ignored- ') + uri)
249+
self.write_linkstat(linkstat)
239250
elif status == 'local':
240251
logger.info(darkgray('-local- ') + uri)
241-
self.write_entry('local', docname, lineno, uri)
252+
self.write_entry('local', docname, filename, lineno, uri)
253+
self.write_linkstat(linkstat)
242254
elif status == 'working':
243255
logger.info(darkgreen('ok ') + uri + info)
256+
self.write_linkstat(linkstat)
244257
elif status == 'broken':
245-
self.write_entry('broken', docname, lineno, uri + ': ' + info)
246258
if self.app.quiet or self.app.warningiserror:
247259
logger.warning(__('broken link: %s (%s)'), uri, info,
248-
location=(self.env.doc2path(docname), lineno))
260+
location=(filename, lineno))
249261
else:
250262
logger.info(red('broken ') + uri + red(' - ' + info))
263+
self.write_entry('broken', docname, filename, lineno, uri + ': ' + info)
264+
self.write_linkstat(linkstat)
251265
elif status == 'redirected':
252266
try:
253267
text, color = {
@@ -259,9 +273,11 @@ def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
259273
}[code]
260274
except KeyError:
261275
text, color = ('with unknown code', purple)
262-
self.write_entry('redirected ' + text, docname, lineno,
263-
uri + ' to ' + info)
276+
linkstat['text'] = text
264277
logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info))
278+
self.write_entry('redirected ' + text, docname, filename,
279+
lineno, uri + ' to ' + info)
280+
self.write_linkstat(linkstat)
265281

266282
def get_target_uri(self, docname: str, typ: str = None) -> str:
267283
return ''
@@ -301,10 +317,15 @@ def write_doc(self, docname: str, doctree: Node) -> None:
301317
if self.broken:
302318
self.app.statuscode = 1
303319

304-
def write_entry(self, what: str, docname: str, line: int, uri: str) -> None:
305-
with open(path.join(self.outdir, 'output.txt'), 'a', encoding='utf-8') as output:
306-
output.write("%s:%s: [%s] %s\n" % (self.env.doc2path(docname, None),
307-
line, what, uri))
320+
def write_entry(self, what: str, docname: str, filename: str, line: int,
321+
uri: str) -> None:
322+
with open(path.join(self.outdir, 'output.txt'), 'a') as output:
323+
output.write("%s:%s: [%s] %s\n" % (filename, line, what, uri))
324+
325+
def write_linkstat(self, data: dict) -> None:
326+
with open(path.join(self.outdir, 'output.json'), 'a') as output:
327+
output.write(json.dumps(data))
328+
output.write('\n')
308329

309330
def finish(self) -> None:
310331
for worker in self.workers:

tests/test_build_linkcheck.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
:license: BSD, see LICENSE for details.
99
"""
1010

11+
import json
12+
import re
1113
from unittest import mock
1214
import pytest
1315

@@ -20,7 +22,7 @@ def test_defaults(app, status, warning):
2022
content = (app.outdir / 'output.txt').read_text()
2123

2224
print(content)
23-
# looking for '#top' and 'does-not-exist' not found should fail
25+
# looking for '#top' and '#does-not-exist' not found should fail
2426
assert "Anchor 'top' not found" in content
2527
assert "Anchor 'does-not-exist' not found" in content
2628
# looking for non-existent URL should fail
@@ -31,6 +33,61 @@ def test_defaults(app, status, warning):
3133
assert len(content.splitlines()) == 5
3234

3335

36+
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
37+
def test_defaults_json(app, status, warning):
38+
app.builder.build_all()
39+
40+
assert (app.outdir / 'output.json').exists()
41+
content = (app.outdir / 'output.json').read_text()
42+
print(content)
43+
44+
rows = [json.loads(x) for x in content.splitlines()]
45+
row = rows[0]
46+
for attr in ["filename", "lineno", "status", "code", "uri",
47+
"info"]:
48+
assert attr in row
49+
50+
assert len(content.splitlines()) == 8
51+
assert len(rows) == 8
52+
# the output order of the rows is not stable
53+
# due to possible variance in network latency
54+
rowsby = {row["uri"]:row for row in rows}
55+
assert rowsby["https://www.google.com#!bar"] == {
56+
'filename': 'links.txt',
57+
'lineno': 10,
58+
'status': 'working',
59+
'code': 0,
60+
'uri': 'https://www.google.com#!bar',
61+
'info': ''
62+
}
63+
# looking for non-existent URL should fail
64+
dnerow = rowsby['https://localhost:7777/doesnotexist']
65+
assert dnerow['filename'] == 'links.txt'
66+
assert dnerow['lineno'] == 13
67+
assert dnerow['status'] == 'broken'
68+
assert dnerow['code'] == 0
69+
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
70+
assert re.match(
71+
r"HTTPSConnectionPool\(host='localhost', port=7777\): Max retries exceeded with url: /doesnotexist \(Caused by NewConnectionError\('<urllib3.connection.VerifiedHTTPSConnection object at ([x\d\w]+)>: Failed to establish a new connection: \[Errno 111] Connection refused'\)\)",
72+
dnerow["info"])
73+
assert rowsby['https://www.google.com/image2.png'] == {
74+
'filename': 'links.txt',
75+
'lineno': 16,
76+
'status': 'broken',
77+
'code': 0,
78+
'uri': 'https://www.google.com/image2.png',
79+
'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png'
80+
}
81+
# looking for '#top' and '#does-not-exist' not found should fail
82+
assert "Anchor 'top' not found" == \
83+
rowsby["https://www.google.com/#top"]["info"]
84+
assert "Anchor 'does-not-exist' not found" == \
85+
rowsby["http://www.sphinx-doc.org/en/1.7/intro.html#does-not-exist"]["info"]
86+
# images should fail
87+
assert "Not Found for url: https://www.google.com/image.png" in \
88+
rowsby["https://www.google.com/image.png"]["info"]
89+
90+
3491
@pytest.mark.sphinx(
3592
'linkcheck', testroot='linkcheck', freshenv=True,
3693
confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"],

0 commit comments

Comments
 (0)