diff --git a/CHANGES b/CHANGES index e66f11cbf7e..f4716eab8db 100644 --- a/CHANGES +++ b/CHANGES @@ -18,6 +18,9 @@ Deprecated Features added -------------- +* #11484: linkcheck: allow HTML anchors to be ignored per URL via + :confval:`linkcheck_anchors_ignore_for_url` while still checking + the validity of the page itself. Patch by Bénédikt Tran * #11415: Add a checksum to JavaScript and CSS asset URIs included within generated HTML, using the CRC32 algorithm. * :meth:`~sphinx.application.Sphinx.require_sphinx` now allows the version diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 133e2099df9..359325183bc 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -2827,8 +2827,19 @@ Options for the linkcheck builder 'https://www.sphinx-doc.org/en/1.7/intro.html#' ] + Use :confval:`linkcheck_anchors_ignore_for_url` to ignore anchors of + specific page while still checking the validity of the page itself. + .. versionadded:: 1.5 +.. confval:: linkcheck_anchors_ignore_for_url + + A list of regular expressions that match URLs Sphinx should not check the + validity of anchors therein. This allows skipping anchor checks while still + checking the validity of the page itself. Default is ``[]``. + + .. versionadded:: 7.1 + .. confval:: linkcheck_auth Pass authentication information when doing a ``linkcheck`` build. diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 6c99f96e685..d47fcd88dc5 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -258,6 +258,9 @@ def __init__(self, env: BuildEnvironment, config: Config, rqueue: Queue[CheckRes self.anchors_ignore = [re.compile(x) for x in self.config.linkcheck_anchors_ignore] + self.anchors_ignore_for_url = [re.compile(x) + for x in self.config.linkcheck_anchors_ignore_for_url] + self.documents_exclude = [re.compile(doc) for doc in self.config.linkcheck_exclude_documents] self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info @@ -293,6 +296,11 @@ def check_uri() -> tuple[str, str, int]: if rex.match(anchor): anchor = None break + else: + for rex in self.anchors_ignore_for_url: + if rex.match(req_url): + anchor = None + break else: req_url = uri anchor = None @@ -575,6 +583,7 @@ def setup(app: Sphinx) -> dict[str, Any]: # Anchors starting with ! are ignored since they are # commonly used for dynamic pages app.add_config_value('linkcheck_anchors_ignore', ["^!"], False) + app.add_config_value('linkcheck_anchors_ignore_for_url', [], False) app.add_config_value('linkcheck_rate_limit_timeout', 300.0, False) app.add_event('linkcheck-process-uri') diff --git a/tests/roots/test-linkcheck-anchors-ignore-for-url/conf.py b/tests/roots/test-linkcheck-anchors-ignore-for-url/conf.py new file mode 100644 index 00000000000..0005bfadafe --- /dev/null +++ b/tests/roots/test-linkcheck-anchors-ignore-for-url/conf.py @@ -0,0 +1,3 @@ +exclude_patterns = ['_build'] +linkcheck_anchors = True +linkcheck_timeout = 0.05 diff --git a/tests/roots/test-linkcheck-anchors-ignore-for-url/index.rst b/tests/roots/test-linkcheck-anchors-ignore-for-url/index.rst new file mode 100644 index 00000000000..df287b4c425 --- /dev/null +++ b/tests/roots/test-linkcheck-anchors-ignore-for-url/index.rst @@ -0,0 +1,7 @@ +* `Example valid url, no anchor `_ +* `Example valid url, valid anchor `_ +* `Example valid url, invalid anchor `_ +* `Example ignored url, no anchor `_ +* `Example ignored url, invalid anchor `_ +* `Example invalid url, no anchor `_ +* `Example invalid url, invalid anchor `_ diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 260cf2c4214..c6370f642cb 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -178,6 +178,64 @@ def test_anchors_ignored(app): assert not content +class AnchorsIgnoreForUrlHandler(http.server.BaseHTTPRequestHandler): + def do_HEAD(self): + if self.path in ('/valid', '/ignored'): + self.send_response(200, "OK") + else: + self.send_response(404, "Not Found") + self.end_headers() + + def do_GET(self): + self.do_HEAD() + if self.path == '/valid': + self.wfile.write(b"

valid anchor

\n") + elif self.path == '/ignored': + self.wfile.write(b"no anchor but page exists\n") + + +@pytest.mark.sphinx( + 'linkcheck', testroot='linkcheck-anchors-ignore-for-url', freshenv=True, + confoverrides={'linkcheck_anchors_ignore_for_url': [ + 'http://localhost:7777/ignored', # existing page + 'http://localhost:7777/invalid', # unknown page + ]}) +def test_anchors_ignored_for_url(app): + with http_server(AnchorsIgnoreForUrlHandler): + app.build() + + assert (app.outdir / 'output.txt').exists() + content = (app.outdir / 'output.json').read_text(encoding='utf8') + + attrs = ('filename', 'lineno', 'status', 'code', 'uri', 'info') + data = [json.loads(x) for x in content.splitlines()] + assert len(data) == 7 + assert all(all(attr in row for attr in attrs) for row in data) + + # rows may be unsorted due to network latency or + # the order the threads are processing the links + rows = {r['uri']: {'status': r['status'], 'info': r['info']} for r in data} + + assert rows['http://localhost:7777/valid']['status'] == 'working' + assert rows['http://localhost:7777/valid#valid-anchor']['status'] == 'working' + assert rows['http://localhost:7777/valid#invalid-anchor'] == { + 'status': 'broken', + 'info': "Anchor 'invalid-anchor' not found", + } + + assert rows['http://localhost:7777/ignored']['status'] == 'working' + assert rows['http://localhost:7777/ignored#invalid-anchor']['status'] == 'working' + + assert rows['http://localhost:7777/invalid'] == { + 'status': 'broken', + 'info': '404 Client Error: Not Found for url: http://localhost:7777/invalid', + } + assert rows['http://localhost:7777/invalid#anchor'] == { + 'status': 'broken', + 'info': '404 Client Error: Not Found for url: http://localhost:7777/invalid', + } + + @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True) def test_raises_for_invalid_status(app): class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler):