diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8db64c118d0..07fb0680036 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -13,7 +13,7 @@ env:
FORCE_COLOR: "1"
PYTHONDEVMODE: "1" # -X dev
PYTHONWARNDEFAULTENCODING: "1" # -X warn_default_encoding
- PYTHONWARNINGS: "error,always:unclosed:ResourceWarning::" # default: all warnings as errors, except ResourceWarnings about unclosed items
+ PYTHONWARNINGS: "error"
jobs:
ubuntu:
diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py
index e828fc20988..47852ce45fc 100644
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@@ -262,6 +262,7 @@ def __init__(self, env: BuildEnvironment, config: Config, rqueue: Queue[CheckRes
for doc in self.config.linkcheck_exclude_documents]
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
in self.config.linkcheck_auth]
+ self._session = requests._Session()
super().__init__(daemon=True)
@@ -316,21 +317,21 @@ def check_uri() -> tuple[str, str, int]:
try:
if anchor and self.config.linkcheck_anchors:
# Read the whole document and see if #anchor exists
- response = requests.get(req_url, stream=True, config=self.config,
- auth=auth_info, **kwargs)
- response.raise_for_status()
- found = check_anchor(response, unquote(anchor))
+ with self._session.get(req_url, stream=True, config=self.config,
+ auth=auth_info, **kwargs) as response:
+ response.raise_for_status()
+ found = check_anchor(response, unquote(anchor))
- if not found:
- raise Exception(__("Anchor '%s' not found") % anchor)
+ if not found:
+ raise Exception(__("Anchor '%s' not found") % anchor)
else:
try:
# try a HEAD request first, which should be easier on
# the server and the network
- response = requests.head(req_url, allow_redirects=True,
- config=self.config, auth=auth_info,
- **kwargs)
- response.raise_for_status()
+ with self._session.head(req_url, allow_redirects=True,
+ config=self.config, auth=auth_info,
+ **kwargs) as response:
+ response.raise_for_status()
# Servers drop the connection on HEAD requests, causing
# ConnectionError.
except (ConnectionError, HTTPError, TooManyRedirects) as err:
@@ -338,10 +339,10 @@ def check_uri() -> tuple[str, str, int]:
raise
# retry with GET request if that fails, some servers
# don't like HEAD requests.
- response = requests.get(req_url, stream=True,
- config=self.config,
- auth=auth_info, **kwargs)
- response.raise_for_status()
+ with self._session.get(req_url, stream=True,
+ config=self.config,
+ auth=auth_info, **kwargs) as response:
+ response.raise_for_status()
except HTTPError as err:
if err.response.status_code == 401:
# We'll take "Unauthorized" as working.
@@ -424,6 +425,8 @@ def check(docname: str) -> tuple[str, str, int]:
check_request = self.wqueue.get()
next_check, hyperlink = check_request
if hyperlink is None:
+ # An empty hyperlink is a signal to shutdown the worker; cleanup resources here
+ self._session.close()
break
uri, docname, lineno = hyperlink
diff --git a/sphinx/util/requests.py b/sphinx/util/requests.py
index c64754fa2a1..9da08099fc9 100644
--- a/sphinx/util/requests.py
+++ b/sphinx/util/requests.py
@@ -90,3 +90,36 @@ def head(url: str, **kwargs: Any) -> requests.Response:
with ignore_insecure_warning(**kwargs):
return requests.head(url, **kwargs)
+
+
+class _Session(requests.Session):
+
+ def get(self, url: str, **kwargs: Any) -> requests.Response: # type: ignore
+ """Sends a GET request like requests.get().
+
+ This sets up User-Agent header and TLS verification automatically."""
+ headers = kwargs.setdefault('headers', {})
+ config = kwargs.pop('config', None)
+ if config:
+ kwargs.setdefault('verify', _get_tls_cacert(url, config))
+ headers.setdefault('User-Agent', _get_user_agent(config))
+ else:
+ headers.setdefault('User-Agent', useragent_header[0][1])
+
+ with ignore_insecure_warning(**kwargs):
+ return super().get(url, **kwargs)
+
+ def head(self, url: str, **kwargs: Any) -> requests.Response: # type: ignore
+ """Sends a HEAD request like requests.head().
+
+ This sets up User-Agent header and TLS verification automatically."""
+ headers = kwargs.setdefault('headers', {})
+ config = kwargs.pop('config', None)
+ if config:
+ kwargs.setdefault('verify', _get_tls_cacert(url, config))
+ headers.setdefault('User-Agent', _get_user_agent(config))
+ else:
+ headers.setdefault('User-Agent', useragent_header[0][1])
+
+ with ignore_insecure_warning(**kwargs):
+ return super().head(url, **kwargs)
diff --git a/tests/roots/test-linkcheck-localserver-too-many-redirects/conf.py b/tests/roots/test-linkcheck-localserver-too-many-redirects/conf.py
new file mode 100644
index 00000000000..aea9add395b
--- /dev/null
+++ b/tests/roots/test-linkcheck-localserver-too-many-redirects/conf.py
@@ -0,0 +1,3 @@
+exclude_patterns = ['_build']
+linkcheck_timeout = 0.075
+linkcheck_workers = 1
diff --git a/tests/roots/test-linkcheck-localserver-too-many-redirects/index.rst b/tests/roots/test-linkcheck-localserver-too-many-redirects/index.rst
new file mode 100644
index 00000000000..c617e942fd2
--- /dev/null
+++ b/tests/roots/test-linkcheck-localserver-too-many-redirects/index.rst
@@ -0,0 +1 @@
+`local server `_
diff --git a/tests/roots/test-linkcheck/conf.py b/tests/roots/test-linkcheck/conf.py
index 0c197b6f03b..1ee90b7b52d 100644
--- a/tests/roots/test-linkcheck/conf.py
+++ b/tests/roots/test-linkcheck/conf.py
@@ -2,3 +2,4 @@
exclude_patterns = ['_build']
linkcheck_anchors = True
linkcheck_timeout = 0.075
+linkcheck_workers = 1
diff --git a/tests/roots/test-linkcheck/links.rst b/tests/roots/test-linkcheck/links.rst
index 49afba2b39a..88c757e03d9 100644
--- a/tests/roots/test-linkcheck/links.rst
+++ b/tests/roots/test-linkcheck/links.rst
@@ -11,3 +11,4 @@ Some additional anchors to exercise ignore code
.. image:: http://localhost:7777/image.png
.. figure:: http://localhost:7777/image2.png
+* `Valid anchored url `_
diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py
index acfebd64999..8c25ddf0396 100644
--- a/tests/test_build_linkcheck.py
+++ b/tests/test_build_linkcheck.py
@@ -15,6 +15,7 @@
from unittest import mock
import pytest
+from urllib3.poolmanager import PoolManager
from sphinx.builders.linkcheck import HyperlinkAvailabilityCheckWorker, RateLimit
from sphinx.testing.util import strip_escseq
@@ -27,24 +28,77 @@
class DefaultsHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = "HTTP/1.1"
+
def do_HEAD(self):
if self.path[1:].rstrip() == "":
self.send_response(200, "OK")
self.end_headers()
+ elif self.path[1:].rstrip() == "anchor.html":
+ self.send_response(200, "OK")
+ self.end_headers()
else:
self.send_response(404, "Not Found")
self.end_headers()
def do_GET(self):
- self.do_HEAD()
if self.path[1:].rstrip() == "":
- self.wfile.write(b"ok\n\n")
+ content = b"ok\n\n"
+ elif self.path[1:].rstrip() == "anchor.html":
+ doc = ''
+ content = doc.encode('utf-8')
+ else:
+ content = b""
+
+ if content:
+ self.send_response(200, "OK")
+ self.send_header("Content-Length", str(len(content)))
+ self.end_headers()
+ self.wfile.write(content)
+ else:
+ self.send_response(404, "Not Found")
+ self.end_headers()
+
+
+class ConnectionMeasurement:
+ """Measure the number of distinct host connections created during linkchecking"""
+
+ def __init__(self):
+ self.connections = set()
+ self.urllib3_connection_from_url = PoolManager.connection_from_url
+ self.patcher = mock.patch.object(
+ target=PoolManager,
+ attribute='connection_from_url',
+ new=self._collect_connections(),
+ )
+
+ def _collect_connections(self):
+ def connection_collector(obj, url):
+ connection = self.urllib3_connection_from_url(obj, url)
+ self.connections.add(connection)
+ return connection
+ return connection_collector
+
+ def __enter__(self):
+ self.patcher.start()
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ for connection in self.connections:
+ connection.close()
+ self.patcher.stop()
+
+ @property
+ def connection_count(self):
+ return len(self.connections)
@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
def test_defaults(app):
with http_server(DefaultsHandler):
- app.build()
+ with ConnectionMeasurement() as m:
+ app.build()
+ assert m.connection_count == 1
# Text output
assert (app.outdir / 'output.txt').exists()
@@ -69,8 +123,8 @@ def test_defaults(app):
for attr in ("filename", "lineno", "status", "code", "uri", "info"):
assert attr in row
- assert len(content.splitlines()) == 9
- assert len(rows) == 9
+ assert len(content.splitlines()) == 10
+ assert len(rows) == 10
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
@@ -95,6 +149,15 @@ def test_defaults(app):
assert rowsby["http://localhost:7777#does-not-exist"]["info"] == "Anchor 'does-not-exist' not found"
# images should fail
assert "Not Found for url: http://localhost:7777/image.png" in rowsby["http://localhost:7777/image.png"]["info"]
+ # anchor should be found
+ assert rowsby['http://localhost:7777/anchor.html#found'] == {
+ 'filename': 'links.rst',
+ 'lineno': 14,
+ 'status': 'working',
+ 'code': 0,
+ 'uri': 'http://localhost:7777/anchor.html#found',
+ 'info': '',
+ }
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-too-many-retries', freshenv=True)
@@ -166,6 +229,8 @@ def test_anchors_ignored(app):
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True)
def test_raises_for_invalid_status(app):
class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = "HTTP/1.1"
+
def do_GET(self):
self.send_error(500, "Internal Server Error")
@@ -181,13 +246,15 @@ def do_GET(self):
def capture_headers_handler(records):
class HeadersDumperHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = "HTTP/1.1"
+
def do_HEAD(self):
self.do_GET()
def do_GET(self):
+ records.append(self.headers.as_string())
self.send_response(200, "OK")
self.end_headers()
- records.append(self.headers.as_string())
return HeadersDumperHandler
@@ -276,6 +343,8 @@ def test_linkcheck_request_headers_default(app):
def make_redirect_handler(*, support_head):
class RedirectOnceHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = "HTTP/1.1"
+
def do_HEAD(self):
if support_head:
self.do_GET()
@@ -289,6 +358,7 @@ def do_GET(self):
else:
self.send_response(302, "Found")
self.send_header("Location", "http://localhost:7777/?redirected=1")
+ self.send_header("Content-Length", "0") # see: https://github.com/psf/requests/issues/6407
self.end_headers()
def log_date_time_string(self):
@@ -366,13 +436,18 @@ def test_linkcheck_allowed_redirects(app, warning):
class OKHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = "HTTP/1.1"
+
def do_HEAD(self):
self.send_response(200, "OK")
self.end_headers()
def do_GET(self):
- self.do_HEAD()
- self.wfile.write(b"ok\n")
+ content = b"ok\n"
+ self.send_response(200, "OK")
+ self.send_header("Content-Length", str(len(content)))
+ self.end_headers()
+ self.wfile.write(content)
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
@@ -477,18 +552,22 @@ def test_connect_to_selfsigned_nonexistent_cert_file(app):
class InfiniteRedirectOnHeadHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = "HTTP/1.1"
+
def do_HEAD(self):
self.send_response(302, "Found")
self.send_header("Location", "http://localhost:7777/")
self.end_headers()
def do_GET(self):
+ content = b"ok\n"
self.send_response(200, "OK")
+ self.send_header("Content-Length", str(len(content)))
self.end_headers()
- self.wfile.write(b"ok\n")
+ self.wfile.write(content)
-@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
+@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-too-many-redirects', freshenv=True)
def test_TooManyRedirects_on_HEAD(app, monkeypatch):
import requests.sessions
@@ -511,6 +590,8 @@ def test_TooManyRedirects_on_HEAD(app, monkeypatch):
def make_retry_after_handler(responses):
class RetryAfterHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = "HTTP/1.1"
+
def do_HEAD(self):
status, retry_after = responses.pop(0)
self.send_response(status)
@@ -662,8 +743,10 @@ def test_limit_rate_bails_out_after_waiting_max_time(app):
class ConnectionResetHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = "HTTP/1.1"
+
def do_HEAD(self):
- self.connection.close()
+ self.close_connection = True
def do_GET(self):
self.send_response(200, "OK")
diff --git a/tests/utils.py b/tests/utils.py
index 429bbd2b2e2..32636b7936c 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -19,7 +19,7 @@
class HttpServerThread(threading.Thread):
def __init__(self, handler, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.server = http.server.HTTPServer(("localhost", 7777), handler)
+ self.server = http.server.ThreadingHTTPServer(("localhost", 7777), handler)
def run(self):
self.server.serve_forever(poll_interval=0.001)