Skip to content

Commit

Permalink
feat: Cache downloaded inventories as local files
Browse files Browse the repository at this point in the history
  • Loading branch information
oprypin committed Nov 12, 2023
1 parent b3edf89 commit 78e7e26
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 9 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ classifiers = [
"Typing :: Typed",
]
dependencies = [
"click>=7.0",
"Jinja2>=2.11.1",
"Markdown>=3.3",
"MarkupSafe>=1.1",
"mkdocs>=1.4",
"mkdocs-autorefs>=0.3.1",
"platformdirs>=2.2.0",
"pymdown-extensions>=6.3",
"importlib-metadata>=4.6; python_version < '3.10'",
"typing-extensions>=4.1; python_version < '3.10'",
Expand Down
76 changes: 76 additions & 0 deletions src/mkdocstrings/_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import datetime
import gzip
import hashlib
import os
import urllib.parse
import urllib.request
from typing import BinaryIO, Callable

import click
import platformdirs

from mkdocstrings.loggers import get_logger

log = get_logger(__name__)


def download_url_with_gz(url: str) -> bytes:
req = urllib.request.Request( # noqa: S310
url,
headers={"Accept-Encoding": "gzip", "User-Agent": "mkdocstrings/0.15.0"},
)
with urllib.request.urlopen(req) as resp: # noqa: S310
content: BinaryIO = resp
if "gzip" in resp.headers.get("content-encoding", ""):
content = gzip.GzipFile(fileobj=resp) # type: ignore[assignment]
return content.read()


# This is mostly a copy of https://github.com/mkdocs/mkdocs/blob/master/mkdocs/utils/cache.py
# In the future maybe they can be deduplicated.


def download_and_cache_url(
url: str,
download: Callable[[str], bytes],
cache_duration: datetime.timedelta,
comment: bytes = b"# ",
) -> bytes:
"""Downloads a file from the URL, stores it under ~/.cache/, and returns its content.
For tracking the age of the content, a prefix is inserted into the stored file, rather than relying on mtime.
Args:
url: URL to use.
download: Callback that will accept the URL and actually perform the download.
cache_duration: How long to consider the URL content cached.
comment: The appropriate comment prefix for this file format.
"""
directory = os.path.join(platformdirs.user_cache_dir("mkdocs"), "mkdocstrings_url_cache")
name_hash = hashlib.sha256(url.encode()).hexdigest()[:32]
path = os.path.join(directory, name_hash + os.path.splitext(url)[1])

now = int(datetime.datetime.now(datetime.timezone.utc).timestamp())
prefix = b"%s%s downloaded at timestamp " % (comment, url.encode())
# Check for cached file and try to return it
if os.path.isfile(path):
try:
with open(path, "rb") as f:
line = f.readline()
if line.startswith(prefix):
line = line[len(prefix) :]
timestamp = int(line)
if datetime.timedelta(seconds=(now - timestamp)) <= cache_duration:
log.debug(f"Using cached '{path}' for '{url}'")
return f.read()
except (OSError, ValueError) as e:
log.debug(f"{type(e).__name__}: {e}")

# Download and cache the file
log.debug(f"Downloading '{url}' to '{path}'")
content = download(url)
os.makedirs(directory, exist_ok=True)
with click.open_file(path, "wb", atomic=True) as f:
f.write(b"%s%d\n" % (prefix, now))
f.write(content)
return content
15 changes: 6 additions & 9 deletions src/mkdocstrings/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@

from __future__ import annotations

import datetime
import functools
import gzip
import os
import sys
from concurrent import futures
from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Iterable, List, Mapping, Tuple, TypeVar
from urllib import request
from io import BytesIO
from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Mapping, Tuple, TypeVar

from mkdocs.config import Config
from mkdocs.config import config_options as opt
from mkdocs.plugins import BasePlugin
from mkdocs.utils import write_file
from mkdocs_autorefs.plugin import AutorefsPlugin

from mkdocstrings._cache import download_and_cache_url, download_url_with_gz
from mkdocstrings.extension import MkdocstringsExtension
from mkdocstrings.handlers.base import BaseHandler, Handlers
from mkdocstrings.loggers import get_logger
Expand Down Expand Up @@ -317,11 +318,7 @@ def _load_inventory(cls, loader: InventoryLoaderType, url: str, **kwargs: Any) -
A mapping from identifier to absolute URL.
"""
log.debug(f"Downloading inventory from {url!r}")
req = request.Request(url, headers={"Accept-Encoding": "gzip", "User-Agent": "mkdocstrings/0.15.0"})
with request.urlopen(req) as resp: # noqa: S310 (URL audit OK: comes from a checked-in config)
content: BinaryIO = resp
if "gzip" in resp.headers.get("content-encoding", ""):
content = gzip.GzipFile(fileobj=resp) # type: ignore[assignment]
result = dict(loader(content, url=url, **kwargs))
content = download_and_cache_url(url, download_url_with_gz, datetime.timedelta(days=1))
result = dict(loader(BytesIO(content), url=url, **kwargs))
log.debug(f"Loaded inventory from {url!r}: {len(result)} items")
return result

0 comments on commit 78e7e26

Please sign in to comment.