Skip to content

Commit

Permalink
Allow anchors to be ignored for specific URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
picnixz committed Jul 18, 2023
1 parent 571becb commit 3ca85aa
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 0 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Expand Up @@ -18,6 +18,9 @@ Deprecated
Features added
--------------

* #11484: linkcheck: allow HTML anchors to be ignored per URL via
:confval:`linkcheck_anchors_ignore_for_url` while still checking
the validity of the page itself. Patch by Bénédikt Tran
* #11415: Add a checksum to JavaScript and CSS asset URIs included within
generated HTML, using the CRC32 algorithm.
* :meth:`~sphinx.application.Sphinx.require_sphinx` now allows the version
Expand Down
11 changes: 11 additions & 0 deletions doc/usage/configuration.rst
Expand Up @@ -2827,8 +2827,19 @@ Options for the linkcheck builder
'https://www.sphinx-doc.org/en/1.7/intro.html#'
]

Use :confval:`linkcheck_anchors_ignore_for_url` to ignore anchors of
specific page while still checking the validity of the page itself.

.. versionadded:: 1.5

.. confval:: linkcheck_anchors_ignore_for_url

A list of regular expressions that match URLs Sphinx should not check the
validity of anchors therein. This allows skipping anchor checks while still
checking the validity of the page itself. Default is ``[]``.

.. versionadded:: 7.1

.. confval:: linkcheck_auth

Pass authentication information when doing a ``linkcheck`` build.
Expand Down
9 changes: 9 additions & 0 deletions sphinx/builders/linkcheck.py
Expand Up @@ -258,6 +258,9 @@ def __init__(self, env: BuildEnvironment, config: Config, rqueue: Queue[CheckRes

self.anchors_ignore = [re.compile(x)
for x in self.config.linkcheck_anchors_ignore]
self.anchors_ignore_for_url = [re.compile(x)
for x in self.config.linkcheck_anchors_ignore_for_url]

self.documents_exclude = [re.compile(doc)
for doc in self.config.linkcheck_exclude_documents]
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
Expand Down Expand Up @@ -293,6 +296,11 @@ def check_uri() -> tuple[str, str, int]:
if rex.match(anchor):
anchor = None
break
else:
for rex in self.anchors_ignore_for_url:
if rex.match(req_url):
anchor = None
break
else:
req_url = uri
anchor = None
Expand Down Expand Up @@ -575,6 +583,7 @@ def setup(app: Sphinx) -> dict[str, Any]:
# Anchors starting with ! are ignored since they are
# commonly used for dynamic pages
app.add_config_value('linkcheck_anchors_ignore', ["^!"], False)
app.add_config_value('linkcheck_anchors_ignore_for_url', [], False)
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, False)

app.add_event('linkcheck-process-uri')
Expand Down
3 changes: 3 additions & 0 deletions tests/roots/test-linkcheck-anchors-ignore-for-url/conf.py
@@ -0,0 +1,3 @@
exclude_patterns = ['_build']
linkcheck_anchors = True
linkcheck_timeout = 0.05
7 changes: 7 additions & 0 deletions tests/roots/test-linkcheck-anchors-ignore-for-url/index.rst
@@ -0,0 +1,7 @@
* `Example valid url, no anchor <http://localhost:7777/valid>`_
* `Example valid url, valid anchor <http://localhost:7777/valid#valid-anchor>`_
* `Example valid url, invalid anchor <http://localhost:7777/valid#invalid-anchor>`_
* `Example ignored url, no anchor <http://localhost:7777/ignored>`_
* `Example ignored url, invalid anchor <http://localhost:7777/ignored#invalid-anchor>`_
* `Example invalid url, no anchor <http://localhost:7777/invalid>`_
* `Example invalid url, invalid anchor <http://localhost:7777/invalid#anchor>`_
58 changes: 58 additions & 0 deletions tests/test_build_linkcheck.py
Expand Up @@ -178,6 +178,64 @@ def test_anchors_ignored(app):
assert not content


class AnchorsIgnoreForUrlHandler(http.server.BaseHTTPRequestHandler):
def do_HEAD(self):
if self.path in ('/valid', '/ignored'):
self.send_response(200, "OK")
else:
self.send_response(404, "Not Found")
self.end_headers()

def do_GET(self):
self.do_HEAD()
if self.path == '/valid':
self.wfile.write(b"<h1 id='valid-anchor'>valid anchor</h1>\n")
elif self.path == '/ignored':
self.wfile.write(b"no anchor but page exists\n")


@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-anchors-ignore-for-url', freshenv=True,
confoverrides={'linkcheck_anchors_ignore_for_url': [
'http://localhost:7777/ignored', # existing page
'http://localhost:7777/invalid', # unknown page
]})
def test_anchors_ignored_for_url(app):
with http_server(AnchorsIgnoreForUrlHandler):
app.build()

assert (app.outdir / 'output.txt').exists()
content = (app.outdir / 'output.json').read_text(encoding='utf8')

attrs = ('filename', 'lineno', 'status', 'code', 'uri', 'info')
data = [json.loads(x) for x in content.splitlines()]
assert len(data) == 7
assert all(all(attr in row for attr in attrs) for row in data)

# rows may be unsorted due to network latency or
# the order the threads are processing the links
rows = {r['uri']: {'status': r['status'], 'info': r['info']} for r in data}

assert rows['http://localhost:7777/valid']['status'] == 'working'
assert rows['http://localhost:7777/valid#valid-anchor']['status'] == 'working'
assert rows['http://localhost:7777/valid#invalid-anchor'] == {
'status': 'broken',
'info': "Anchor 'invalid-anchor' not found",
}

assert rows['http://localhost:7777/ignored']['status'] == 'working'
assert rows['http://localhost:7777/ignored#invalid-anchor']['status'] == 'working'

assert rows['http://localhost:7777/invalid'] == {
'status': 'broken',
'info': '404 Client Error: Not Found for url: http://localhost:7777/invalid',
}
assert rows['http://localhost:7777/invalid#anchor'] == {
'status': 'broken',
'info': '404 Client Error: Not Found for url: http://localhost:7777/invalid',
}


@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True)
def test_raises_for_invalid_status(app):
class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler):
Expand Down

0 comments on commit 3ca85aa

Please sign in to comment.