Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Cache downloaded inventories as local files #632

Merged
merged 1 commit into from
Nov 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ classifiers = [
"Typing :: Typed",
]
dependencies = [
"click>=7.0",
pawamoy marked this conversation as resolved.
Show resolved Hide resolved
"Jinja2>=2.11.1",
"Markdown>=3.3",
"MarkupSafe>=1.1",
"mkdocs>=1.4",
"mkdocs-autorefs>=0.3.1",
"platformdirs>=2.2.0",
"pymdown-extensions>=6.3",
"importlib-metadata>=4.6; python_version < '3.10'",
"typing-extensions>=4.1; python_version < '3.10'",
Expand Down
76 changes: 76 additions & 0 deletions src/mkdocstrings/_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import datetime
import gzip
import hashlib
import os
import urllib.parse
import urllib.request
from typing import BinaryIO, Callable

import click
import platformdirs

from mkdocstrings.loggers import get_logger

log = get_logger(__name__)


def download_url_with_gz(url: str) -> bytes:
req = urllib.request.Request( # noqa: S310
url,
headers={"Accept-Encoding": "gzip", "User-Agent": "mkdocstrings/0.15.0"},
)
with urllib.request.urlopen(req) as resp: # noqa: S310
content: BinaryIO = resp
if "gzip" in resp.headers.get("content-encoding", ""):
content = gzip.GzipFile(fileobj=resp) # type: ignore[assignment]
return content.read()


# This is mostly a copy of https://github.com/mkdocs/mkdocs/blob/master/mkdocs/utils/cache.py
# In the future maybe they can be deduplicated.


def download_and_cache_url(
url: str,
download: Callable[[str], bytes],
cache_duration: datetime.timedelta,
comment: bytes = b"# ",
) -> bytes:
"""Downloads a file from the URL, stores it under ~/.cache/, and returns its content.

For tracking the age of the content, a prefix is inserted into the stored file, rather than relying on mtime.

Args:
url: URL to use.
download: Callback that will accept the URL and actually perform the download.
cache_duration: How long to consider the URL content cached.
comment: The appropriate comment prefix for this file format.
"""
directory = os.path.join(platformdirs.user_cache_dir("mkdocs"), "mkdocstrings_url_cache")
name_hash = hashlib.sha256(url.encode()).hexdigest()[:32]
path = os.path.join(directory, name_hash + os.path.splitext(url)[1])

now = int(datetime.datetime.now(datetime.timezone.utc).timestamp())
prefix = b"%s%s downloaded at timestamp " % (comment, url.encode())
# Check for cached file and try to return it
if os.path.isfile(path):
try:
with open(path, "rb") as f:
line = f.readline()
if line.startswith(prefix):
line = line[len(prefix) :]
timestamp = int(line)
if datetime.timedelta(seconds=(now - timestamp)) <= cache_duration:
log.debug(f"Using cached '{path}' for '{url}'")
return f.read()
except (OSError, ValueError) as e:
log.debug(f"{type(e).__name__}: {e}")

# Download and cache the file
log.debug(f"Downloading '{url}' to '{path}'")
content = download(url)
os.makedirs(directory, exist_ok=True)
with click.open_file(path, "wb", atomic=True) as f:
pawamoy marked this conversation as resolved.
Show resolved Hide resolved
f.write(b"%s%d\n" % (prefix, now))
f.write(content)
return content
15 changes: 6 additions & 9 deletions src/mkdocstrings/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@

from __future__ import annotations

import datetime
import functools
import gzip
import os
import sys
from concurrent import futures
from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Iterable, List, Mapping, Tuple, TypeVar
from urllib import request
from io import BytesIO
from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Mapping, Tuple, TypeVar

from mkdocs.config import Config
from mkdocs.config import config_options as opt
from mkdocs.plugins import BasePlugin
from mkdocs.utils import write_file
from mkdocs_autorefs.plugin import AutorefsPlugin

from mkdocstrings._cache import download_and_cache_url, download_url_with_gz
from mkdocstrings.extension import MkdocstringsExtension
from mkdocstrings.handlers.base import BaseHandler, Handlers
from mkdocstrings.loggers import get_logger
Expand Down Expand Up @@ -317,11 +318,7 @@ def _load_inventory(cls, loader: InventoryLoaderType, url: str, **kwargs: Any) -
A mapping from identifier to absolute URL.
"""
log.debug(f"Downloading inventory from {url!r}")
req = request.Request(url, headers={"Accept-Encoding": "gzip", "User-Agent": "mkdocstrings/0.15.0"})
with request.urlopen(req) as resp: # noqa: S310 (URL audit OK: comes from a checked-in config)
content: BinaryIO = resp
if "gzip" in resp.headers.get("content-encoding", ""):
content = gzip.GzipFile(fileobj=resp) # type: ignore[assignment]
result = dict(loader(content, url=url, **kwargs))
content = download_and_cache_url(url, download_url_with_gz, datetime.timedelta(days=1))
result = dict(loader(BytesIO(content), url=url, **kwargs))
log.debug(f"Loaded inventory from {url!r}: {len(result)} items")
return result