From c44f300309042d955167525cb4587f92002cc178 Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Sat, 26 Aug 2023 20:43:58 +0300 Subject: [PATCH 1/6] linkcheck: Use correct function to convert from UTC time to UNIX epoch After subtracting the offset, we get time in UTC. But time.mktime() expects a time struct in local time, not in UTC. The correct function for converting UTC time to UNIX epoch is calendar.timegm(). This fixes hanging tests when the local timezone is to the west of UTC. One could test this using: TZ=America/New_York python3 -m pytest -k test_too_many_requests_retry_after_HTTP_date --- sphinx/builders/linkcheck.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index e1479e01be0..24b9d92cf14 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -2,6 +2,7 @@ from __future__ import annotations +import calendar import contextlib import json import re @@ -491,7 +492,7 @@ def limit_rate(self, response_url: str, retry_after: str) -> float | None: parsed = parsedate_tz(retry_after) assert parsed is not None # the 10th element is the GMT offset in seconds - next_check = time.mktime(parsed[:9]) - (parsed[9] or 0) + next_check = float(calendar.timegm(parsed[:9])) - (parsed[9] or 0) except (AssertionError, TypeError, ValueError): # TypeError: Invalid date format. # ValueError: Invalid date, e.g. Oct 52th. From 6a45b703ba4f363b4bbcd57bcefb1c9869f14d6b Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Sun, 27 Aug 2023 15:43:38 +0300 Subject: [PATCH 2/6] Add a test and a changelog entry for the previous commit --- CHANGES | 3 ++- tests/test_build_linkcheck.py | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/CHANGES b/CHANGES index 5b057c55fde..a1cc0fa2cc4 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,7 @@ Bugs fixed * #11618: Fix a regression in the MoveModuleTargets transform, introduced in #10478 (#9662). +* #11649: Use :py:func:`calendar.timegm` to convert from UTC time to UNIX epoch. Release 7.2.3 (released Aug 23, 2023) ===================================== @@ -24,7 +25,7 @@ Bugs fixed when ``autodoc_preserve_defaults`` is ``True``. * Restore support string methods on path objects. This is deprecated and will be removed in Sphinx 8. - Use :py:func`os.fspath` to convert :py:class:`~pathlib.Path` objects to strings, + Use :py:func:`os.fspath` to convert :py:class:`~pathlib.Path` objects to strings, or :py:class:`~pathlib.Path`'s methods to work with path objects. Release 7.2.2 (released Aug 17, 2023) diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 80f73b5ee99..e7616283bf0 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -4,6 +4,7 @@ import http.server import json +import os import re import textwrap import time @@ -772,11 +773,22 @@ def test_too_many_requests_retry_after_int_delay(app, capsys, status): ) +@pytest.mark.parametrize('tz', ['GMT', 'GMT+3', 'GMT-3']) @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) -def test_too_many_requests_retry_after_HTTP_date(app, capsys): - retry_after = wsgiref.handlers.format_date_time(time.time()) - with http_server(make_retry_after_handler([(429, retry_after), (200, None)])): - app.build() +def test_too_many_requests_retry_after_HTTP_date(app, capsys, tz): + old_tz = os.environ.get('TZ') + os.environ['TZ'] = tz + if hasattr(time, 'tzset'): + time.tzset() + try: + retry_after = wsgiref.handlers.format_date_time(time.time()) + with http_server(make_retry_after_handler([(429, retry_after), (200, None)])): + app.build() + finally: + if old_tz is None: + del os.environ['TZ'] + else: + os.environ['TZ'] = old_tz content = (app.outdir / 'output.json').read_text(encoding='utf8') assert json.loads(content) == { "filename": "index.rst", From e76ad828eba80e13bc1d071cf7da4ac97ecce149 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Sun, 27 Aug 2023 23:05:00 +0100 Subject: [PATCH 3/6] Update CHANGES --- CHANGES | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index a1cc0fa2cc4..9d24cc1d0bd 100644 --- a/CHANGES +++ b/CHANGES @@ -6,7 +6,8 @@ Bugs fixed * #11618: Fix a regression in the MoveModuleTargets transform, introduced in #10478 (#9662). -* #11649: Use :py:func:`calendar.timegm` to convert from UTC time to UNIX epoch. +* #11649: linkcheck: Fix conversions from UTC to UNIX time + for timezones west of London. Release 7.2.3 (released Aug 23, 2023) ===================================== From 3cbd582c1dae384c4d68aba034542e5c6c390586 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 28 Aug 2023 02:18:01 +0100 Subject: [PATCH 4/6] Use rfc1123_to_epoch --- sphinx/builders/linkcheck.py | 10 +++------- sphinx/util/http_date.py | 29 ++++++++++++++++++++++++----- tests/test_build_linkcheck.py | 29 +++++++++++++++-------------- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 24b9d92cf14..f2509582b77 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -2,13 +2,11 @@ from __future__ import annotations -import calendar import contextlib import json import re import socket import time -from email.utils import parsedate_tz from html.parser import HTMLParser from os import path from queue import PriorityQueue, Queue @@ -30,6 +28,7 @@ red, turquoise, ) +from sphinx.util.http_date import rfc1123_to_epoch from sphinx.util.nodes import get_node_line if TYPE_CHECKING: @@ -489,11 +488,8 @@ def limit_rate(self, response_url: str, retry_after: str) -> float | None: except ValueError: try: # An HTTP-date: time of next attempt. - parsed = parsedate_tz(retry_after) - assert parsed is not None - # the 10th element is the GMT offset in seconds - next_check = float(calendar.timegm(parsed[:9])) - (parsed[9] or 0) - except (AssertionError, TypeError, ValueError): + next_check = rfc1123_to_epoch(retry_after) + except (ValueError, TypeError): # TypeError: Invalid date format. # ValueError: Invalid date, e.g. Oct 52th. pass diff --git a/sphinx/util/http_date.py b/sphinx/util/http_date.py index e3c452419d2..8e245cbf316 100644 --- a/sphinx/util/http_date.py +++ b/sphinx/util/http_date.py @@ -4,7 +4,12 @@ """ import time -from email.utils import formatdate, parsedate +import warnings +from email.utils import formatdate, parsedate_tz + +from sphinx.deprecation import RemovedInSphinx90Warning + +_GMT_OFFSET = float(time.localtime().tm_gmtoff) def epoch_to_rfc1123(epoch: float) -> str: @@ -14,7 +19,21 @@ def epoch_to_rfc1123(epoch: float) -> str: def rfc1123_to_epoch(rfc1123: str) -> float: """Return epoch offset from HTTP-date string.""" - t = parsedate(rfc1123) - if t: - return time.mktime(t) - raise ValueError + t = parsedate_tz(rfc1123) + if t is None: + raise ValueError + if not rfc1123.endswith(" GMT"): + warnings.warn( + "HTTP-date string does not meet RFC 7231 requirements " + f"(must end with 'GMT'): {rfc1123!r}", + RemovedInSphinx90Warning, stacklevel=3, + ) + epoch_secs = time.mktime(time.struct_time(t[:9])) + _GMT_OFFSET + if (gmt_offset := t[9]) != 0: + warnings.warn( + "HTTP-date string does not meet RFC 7231 requirements " + f"(must be GMT time): {rfc1123!r}", + RemovedInSphinx90Warning, stacklevel=3, + ) + return epoch_secs - (gmt_offset or 0) + return epoch_secs diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index e7616283bf0..38a0bd1eeb1 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -4,8 +4,8 @@ import http.server import json -import os import re +import sys import textwrap import time import wsgiref.handlers @@ -17,6 +17,7 @@ import pytest from urllib3.poolmanager import PoolManager +import sphinx.util.http_date from sphinx.builders.linkcheck import ( CheckRequest, Hyperlink, @@ -773,22 +774,22 @@ def test_too_many_requests_retry_after_int_delay(app, capsys, status): ) -@pytest.mark.parametrize('tz', ['GMT', 'GMT+3', 'GMT-3']) +@pytest.mark.parametrize('tz', [None, 'GMT', 'GMT+3', 'GMT-3']) @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) -def test_too_many_requests_retry_after_HTTP_date(app, capsys, tz): - old_tz = os.environ.get('TZ') - os.environ['TZ'] = tz - if hasattr(time, 'tzset'): - time.tzset() - try: - retry_after = wsgiref.handlers.format_date_time(time.time()) +def test_too_many_requests_retry_after_HTTP_date(tz, app, monkeypatch, capsys): + retry_after = wsgiref.handlers.format_date_time(time.time()) + + with monkeypatch.context() as m: + if tz is not None: + m.setenv('TZ', tz) + if sys.platform != "win32": + time.tzset() + m.setattr(sphinx.util.http_date, '_GMT_OFFSET', + float(time.localtime().tm_gmtoff)) + with http_server(make_retry_after_handler([(429, retry_after), (200, None)])): app.build() - finally: - if old_tz is None: - del os.environ['TZ'] - else: - os.environ['TZ'] = old_tz + content = (app.outdir / 'output.json').read_text(encoding='utf8') assert json.loads(content) == { "filename": "index.rst", From f5a444defb09fe91ca4bbcf7a241da87bc82cf12 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 28 Aug 2023 03:14:45 +0100 Subject: [PATCH 5/6] Re-export wsgiref.handlers.format_date_time --- sphinx/util/http_date.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sphinx/util/http_date.py b/sphinx/util/http_date.py index 8e245cbf316..f4ba7c88eac 100644 --- a/sphinx/util/http_date.py +++ b/sphinx/util/http_date.py @@ -5,16 +5,16 @@ import time import warnings -from email.utils import formatdate, parsedate_tz +from email.utils import parsedate_tz +from wsgiref.handlers import format_date_time from sphinx.deprecation import RemovedInSphinx90Warning _GMT_OFFSET = float(time.localtime().tm_gmtoff) -def epoch_to_rfc1123(epoch: float) -> str: - """Return HTTP-date string from epoch offset.""" - return formatdate(epoch, usegmt=True) +epoch_to_rfc1123 = format_date_time +"""Return HTTP-date string from epoch offset.""" def rfc1123_to_epoch(rfc1123: str) -> float: From 96760559db779d1a1522ea292551091cdbd5fa46 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 28 Aug 2023 04:31:20 +0100 Subject: [PATCH 6/6] Revert "Re-export wsgiref.handlers.format_date_time" This reverts commit f5a444defb09fe91ca4bbcf7a241da87bc82cf12. --- sphinx/util/http_date.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sphinx/util/http_date.py b/sphinx/util/http_date.py index f4ba7c88eac..8e245cbf316 100644 --- a/sphinx/util/http_date.py +++ b/sphinx/util/http_date.py @@ -5,16 +5,16 @@ import time import warnings -from email.utils import parsedate_tz -from wsgiref.handlers import format_date_time +from email.utils import formatdate, parsedate_tz from sphinx.deprecation import RemovedInSphinx90Warning _GMT_OFFSET = float(time.localtime().tm_gmtoff) -epoch_to_rfc1123 = format_date_time -"""Return HTTP-date string from epoch offset.""" +def epoch_to_rfc1123(epoch: float) -> str: + """Return HTTP-date string from epoch offset.""" + return formatdate(epoch, usegmt=True) def rfc1123_to_epoch(rfc1123: str) -> float: