Skip to content

Commit

Permalink
Cache the docname<->filename map in Project instances (#11575)
Browse files Browse the repository at this point in the history
  • Loading branch information
AA-Turner committed Aug 10, 2023
1 parent 4dd2ed4 commit 8cabf08
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 70 deletions.
2 changes: 1 addition & 1 deletion sphinx/environment/__init__.py
Expand Up @@ -58,7 +58,7 @@

# This is increased every time an environment attribute is added
# or changed to properly invalidate pickle files.
ENV_VERSION = 59
ENV_VERSION = 60

# config status
CONFIG_UNSET = -1
Expand Down
83 changes: 49 additions & 34 deletions sphinx/project.py
Expand Up @@ -9,7 +9,7 @@
from sphinx.locale import __
from sphinx.util import logging
from sphinx.util.matching import get_matching_files
from sphinx.util.osutil import SEP, path_stabilize, relpath
from sphinx.util.osutil import path_stabilize, relpath

if TYPE_CHECKING:
from collections.abc import Iterable
Expand All @@ -21,43 +21,57 @@
class Project:
"""A project is the source code set of the Sphinx document(s)."""

def __init__(self, srcdir: str | os.PathLike[str], source_suffix: dict[str, str]) -> None:
def __init__(self, srcdir: str | os.PathLike[str], source_suffix: Iterable[str]) -> None:
#: Source directory.
self.srcdir = srcdir

#: source_suffix. Same as :confval:`source_suffix`.
self.source_suffix = source_suffix
self.source_suffix = tuple(source_suffix)
self._first_source_suffix = next(iter(self.source_suffix), "")

#: The name of documents belongs to this project.
#: The name of documents belonging to this project.
self.docnames: set[str] = set()

# Bijective mapping between docnames and (srcdir relative) paths.
self._path_to_docname: dict[str, str] = {}
self._docname_to_path: dict[str, str] = {}

def restore(self, other: Project) -> None:
"""Take over a result of last build."""
self.docnames = other.docnames
self._path_to_docname = other._path_to_docname
self._docname_to_path = other._docname_to_path

def discover(self, exclude_paths: Iterable[str] = (),
include_paths: Iterable[str] = ("**",)) -> set[str]:
"""Find all document files in the source directory and put them in
:attr:`docnames`.
"""
self.docnames = set()

self.docnames.clear()
self._path_to_docname.clear()
self._docname_to_path.clear()

for filename in get_matching_files(
self.srcdir,
include_paths,
[*exclude_paths] + EXCLUDE_PATHS,
):
docname = self.path2doc(filename)
if docname:
if docname := self.path2doc(filename):
if docname in self.docnames:
pattern = os.path.join(self.srcdir, docname) + '.*'
files = [relpath(f, self.srcdir) for f in glob(pattern)]
logger.warning(__('multiple files found for the document "%s": %r\n'
'Use %r for the build.'),
docname, files, self.doc2path(docname), once=True)
docname, files, self.doc2path(docname, absolute=True),
once=True)
elif os.access(os.path.join(self.srcdir, filename), os.R_OK):
self.docnames.add(docname)
self._path_to_docname[filename] = docname
self._docname_to_path[docname] = filename
else:
logger.warning(__("document not readable. Ignored."), location=docname)
logger.warning(__("Ignored unreadable document %r."),
filename, location=docname)

return self.docnames

Expand All @@ -66,32 +80,33 @@ def path2doc(self, filename: str | os.PathLike[str]) -> str | None:
*filename* should be absolute or relative to the source directory.
"""
if str(filename).startswith(str(self.srcdir)):
filename = relpath(filename, self.srcdir)
for suffix in self.source_suffix:
if str(filename).endswith(suffix):
filename = path_stabilize(filename)
return filename[:-len(suffix)]

# the file does not have docname
return None

def doc2path(self, docname: str, basedir: bool = True) -> str:
try:
return self._path_to_docname[filename] # type: ignore[index]
except KeyError:
if os.path.isabs(filename):
try:
filename = os.path.relpath(filename, self.srcdir)
except ValueError:
pass
for suffix in self.source_suffix:
if os.path.basename(filename).endswith(suffix):
return path_stabilize(filename).removesuffix(suffix)

# the file does not have a docname
return None

def doc2path(self, docname: str, absolute: bool) -> str:
"""Return the filename for the document name.
If *basedir* is True, return as an absolute path.
If *absolute* is True, return as an absolute path.
Else, return as a relative path to the source directory.
"""
docname = docname.replace(SEP, os.path.sep)
basename = os.path.join(self.srcdir, docname)
for suffix in self.source_suffix:
if os.path.isfile(basename + suffix):
break
else:
# document does not exist
suffix = list(self.source_suffix)[0]

if basedir:
return basename + suffix
else:
return docname + suffix
try:
filename = self._docname_to_path[docname]
except KeyError:
# Backwards compatibility: the document does not exist
filename = docname + self._first_source_suffix

if absolute:
return os.path.join(self.srcdir, filename)
return filename
10 changes: 5 additions & 5 deletions sphinx/util/osutil.py
Expand Up @@ -26,16 +26,16 @@
SEP = "/"


def os_path(canonicalpath: str) -> str:
return canonicalpath.replace(SEP, path.sep)
def os_path(canonical_path: str, /) -> str:
return canonical_path.replace(SEP, path.sep)


def canon_path(nativepath: str | os.PathLike[str]) -> str:
def canon_path(native_path: str | os.PathLike[str], /) -> str:
"""Return path in OS-independent form"""
return str(nativepath).replace(path.sep, SEP)
return os.fspath(native_path).replace(path.sep, SEP)


def path_stabilize(filepath: str | os.PathLike[str]) -> str:
def path_stabilize(filepath: str | os.PathLike[str], /) -> str:
"Normalize path separator and unicode string"
new_path = canon_path(filepath)
return unicodedata.normalize('NFC', new_path)
Expand Down
66 changes: 36 additions & 30 deletions tests/test_project.py
Expand Up @@ -4,69 +4,75 @@

from sphinx.project import Project

DOCNAMES = {'autodoc', 'bom', 'extapi', 'extensions', 'footnote', 'images',
'includes', 'index', 'lists', 'markup', 'math', 'objects',
'subdir/excluded', 'subdir/images', 'subdir/includes'}
SUBDIR_DOCNAMES = {'subdir/excluded', 'subdir/images', 'subdir/includes'}

def test_project_discover(rootdir):
project = Project(str(rootdir / 'test-root'), {})

docnames = {'autodoc', 'bom', 'extapi', 'extensions', 'footnote', 'images',
'includes', 'index', 'lists', 'markup', 'math', 'objects',
'subdir/excluded', 'subdir/images', 'subdir/includes'}
subdir_docnames = {'subdir/excluded', 'subdir/images', 'subdir/includes'}

def test_project_discover_basic(rootdir):
# basic case
project.source_suffix = ['.txt']
assert project.discover() == docnames
project = Project(rootdir / 'test-root', ['.txt'])
assert project.discover() == DOCNAMES


def test_project_discover_exclude_patterns(rootdir):
project = Project(rootdir / 'test-root', ['.txt'])

# exclude_paths option
assert project.discover(['subdir/*']) == docnames - subdir_docnames
assert project.discover(['subdir/*']) == DOCNAMES - SUBDIR_DOCNAMES
assert project.discover(['.txt', 'subdir/*']) == DOCNAMES - SUBDIR_DOCNAMES

# exclude_patterns
assert project.discover(['.txt', 'subdir/*']) == docnames - subdir_docnames

def test_project_discover_multiple_suffixes(rootdir):
# multiple source_suffixes
project.source_suffix = ['.txt', '.foo']
assert project.discover() == docnames | {'otherext'}
project = Project(rootdir / 'test-root', ['.txt', '.foo'])
assert project.discover() == DOCNAMES | {'otherext'}


def test_project_discover_complicated_suffix(rootdir):
# complicated source_suffix
project.source_suffix = ['.foo.png']
project = Project(rootdir / 'test-root', ['.foo.png'])
assert project.discover() == {'img'}


def test_project_discover_templates_path(rootdir):
# templates_path
project.source_suffix = ['.html']
project = Project(rootdir / 'test-root', ['.html'])
assert project.discover() == {'_templates/layout',
'_templates/customsb',
'_templates/contentssb'}

assert project.discover(['_templates']) == set()


@pytest.mark.sphinx(testroot='basic')
def test_project_path2doc(app):
project = Project(app.srcdir, app.config.source_suffix)
def test_project_path2doc(rootdir):
project = Project(rootdir / 'test-basic', {'.rst': 'restructuredtext'})
assert project.path2doc('index.rst') == 'index'
assert project.path2doc('index.foo') is None # unknown extension
assert project.path2doc('index.foo.rst') == 'index.foo'
assert project.path2doc('index') is None
assert project.path2doc('path/to/index.rst') == 'path/to/index'
assert project.path2doc(str(app.srcdir / 'to/index.rst')) == 'to/index'
assert project.path2doc(rootdir / 'test-basic' / 'to/index.rst') == 'to/index'


@pytest.mark.sphinx(srcdir='project_doc2path', testroot='basic')
def test_project_doc2path(app):
source_suffix = {'.rst': 'restructuredtext', '.txt': 'restructuredtext'}

project = Project(app.srcdir, source_suffix)
assert project.doc2path('index') == str(app.srcdir / 'index.rst')

# first source_suffix is used for missing file
assert project.doc2path('foo') == str(app.srcdir / 'foo.rst')

# matched source_suffix is used if exists
(app.srcdir / 'foo.txt').write_text('', encoding='utf8')
assert project.doc2path('foo') == str(app.srcdir / 'foo.txt')
project.discover()

# absolute path
assert project.doc2path('index', basedir=True) == str(app.srcdir / 'index.rst')
assert project.doc2path('index', absolute=True) == str(app.srcdir / 'index.rst')

# relative path
assert project.doc2path('index', basedir=False) == 'index.rst'
assert project.doc2path('index', absolute=False) == 'index.rst'

# first source_suffix is used for missing file
assert project.doc2path('foo', absolute=False) == 'foo.rst'

# matched source_suffix is used if exists
(app.srcdir / 'bar.txt').touch()
project.discover()
assert project.doc2path('bar', absolute=False) == 'bar.txt'

0 comments on commit 8cabf08

Please sign in to comment.