Skip to content

Commit

Permalink
linkcheck: Use context managers for HTTP requests (#11318)
Browse files Browse the repository at this point in the history
This closes HTTP responses when no content reads are required, as
when requests are made in streaming mode, ``requests`` doesn't know
whether the caller may intend to later read content from a streamed
HTTP response object and holds the socket open.

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
  • Loading branch information
jayaddison and AA-Turner committed May 9, 2023
1 parent 2b1c106 commit c9d0933
Show file tree
Hide file tree
Showing 13 changed files with 38 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Expand Up @@ -13,7 +13,7 @@ env:
FORCE_COLOR: "1"
PYTHONDEVMODE: "1" # -X dev
PYTHONWARNDEFAULTENCODING: "1" # -X warn_default_encoding
PYTHONWARNINGS: "error,always:unclosed:ResourceWarning::" # default: all warnings as errors, except ResourceWarnings about unclosed items
PYTHONWARNINGS: "error" # default: all warnings as errors

jobs:
ubuntu:
Expand Down
22 changes: 10 additions & 12 deletions sphinx/builders/linkcheck.py
Expand Up @@ -316,32 +316,30 @@ def check_uri() -> tuple[str, str, int]:
try:
if anchor and self.config.linkcheck_anchors:
# Read the whole document and see if #anchor exists
response = requests.get(req_url, stream=True, config=self.config,
auth=auth_info, **kwargs)
response.raise_for_status()
found = check_anchor(response, unquote(anchor))
with requests.get(req_url, stream=True, config=self.config, auth=auth_info,
**kwargs) as response:
response.raise_for_status()
found = check_anchor(response, unquote(anchor))

if not found:
raise Exception(__("Anchor '%s' not found") % anchor)
else:
try:
# try a HEAD request first, which should be easier on
# the server and the network
response = requests.head(req_url, allow_redirects=True,
config=self.config, auth=auth_info,
**kwargs)
response.raise_for_status()
with requests.head(req_url, allow_redirects=True, config=self.config,
auth=auth_info, **kwargs) as response:
response.raise_for_status()
# Servers drop the connection on HEAD requests, causing
# ConnectionError.
except (ConnectionError, HTTPError, TooManyRedirects) as err:
if isinstance(err, HTTPError) and err.response.status_code == 429:
raise
# retry with GET request if that fails, some servers
# don't like HEAD requests.
response = requests.get(req_url, stream=True,
config=self.config,
auth=auth_info, **kwargs)
response.raise_for_status()
with requests.get(req_url, stream=True, config=self.config,
auth=auth_info, **kwargs) as response:
response.raise_for_status()
except HTTPError as err:
if err.response.status_code == 401:
# We'll take "Unauthorized" as working.
Expand Down
2 changes: 1 addition & 1 deletion tests/roots/test-linkcheck-anchors-ignore/conf.py
@@ -1,3 +1,3 @@
exclude_patterns = ['_build']
linkcheck_anchors = True
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
2 changes: 1 addition & 1 deletion tests/roots/test-linkcheck-documents_exclude/conf.py
Expand Up @@ -3,4 +3,4 @@
'^broken_link$',
'br[0-9]ken_link',
]
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
2 changes: 1 addition & 1 deletion tests/roots/test-linkcheck-localserver-anchor/conf.py
@@ -1,3 +1,3 @@
exclude_patterns = ['_build']
linkcheck_anchors = True
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
2 changes: 1 addition & 1 deletion tests/roots/test-linkcheck-localserver-https/conf.py
@@ -1,2 +1,2 @@
exclude_patterns = ['_build']
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
@@ -1,2 +1,2 @@
exclude_patterns = ['_build']
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
2 changes: 1 addition & 1 deletion tests/roots/test-linkcheck-localserver/conf.py
@@ -1,2 +1,2 @@
exclude_patterns = ['_build']
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
2 changes: 1 addition & 1 deletion tests/roots/test-linkcheck-raw-node/conf.py
@@ -1,2 +1,2 @@
exclude_patterns = ['_build']
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
2 changes: 1 addition & 1 deletion tests/roots/test-linkcheck-too-many-retries/conf.py
@@ -1,3 +1,3 @@
exclude_patterns = ['_build']
linkcheck_anchors = True
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
2 changes: 1 addition & 1 deletion tests/roots/test-linkcheck/conf.py
@@ -1,4 +1,4 @@
root_doc = 'links'
exclude_patterns = ['_build']
linkcheck_anchors = True
linkcheck_timeout = 0.075
linkcheck_timeout = 0.05
1 change: 1 addition & 0 deletions tests/roots/test-linkcheck/links.rst
Expand Up @@ -11,3 +11,4 @@ Some additional anchors to exercise ignore code
.. image:: http://localhost:7777/image.png
.. figure:: http://localhost:7777/image2.png

* `Valid anchored url <http://localhost:7777/anchor.html#found>`_
19 changes: 17 additions & 2 deletions tests/test_build_linkcheck.py
Expand Up @@ -31,6 +31,9 @@ def do_HEAD(self):
if self.path[1:].rstrip() == "":
self.send_response(200, "OK")
self.end_headers()
elif self.path[1:].rstrip() == "anchor.html":
self.send_response(200, "OK")
self.end_headers()
else:
self.send_response(404, "Not Found")
self.end_headers()
Expand All @@ -39,6 +42,9 @@ def do_GET(self):
self.do_HEAD()
if self.path[1:].rstrip() == "":
self.wfile.write(b"ok\n\n")
elif self.path[1:].rstrip() == "anchor.html":
doc = '<!DOCTYPE html><html><body><a id="found"></a></body></html>'
self.wfile.write(doc.encode('utf-8'))


@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
Expand Down Expand Up @@ -69,8 +75,8 @@ def test_defaults(app):
for attr in ("filename", "lineno", "status", "code", "uri", "info"):
assert attr in row

assert len(content.splitlines()) == 9
assert len(rows) == 9
assert len(content.splitlines()) == 10
assert len(rows) == 10
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
Expand All @@ -95,6 +101,15 @@ def test_defaults(app):
assert rowsby["http://localhost:7777#does-not-exist"]["info"] == "Anchor 'does-not-exist' not found"
# images should fail
assert "Not Found for url: http://localhost:7777/image.png" in rowsby["http://localhost:7777/image.png"]["info"]
# anchor should be found
assert rowsby['http://localhost:7777/anchor.html#found'] == {
'filename': 'links.rst',
'lineno': 14,
'status': 'working',
'code': 0,
'uri': 'http://localhost:7777/anchor.html#found',
'info': '',
}


@pytest.mark.sphinx('linkcheck', testroot='linkcheck-too-many-retries', freshenv=True)
Expand Down

0 comments on commit c9d0933

Please sign in to comment.