diff --git a/sphinx/environment/__init__.py b/sphinx/environment/__init__.py index d3a9b7c1e47..2e6b86da23f 100644 --- a/sphinx/environment/__init__.py +++ b/sphinx/environment/__init__.py @@ -58,7 +58,7 @@ # This is increased every time an environment attribute is added # or changed to properly invalidate pickle files. -ENV_VERSION = 59 +ENV_VERSION = 60 # config status CONFIG_UNSET = -1 diff --git a/sphinx/project.py b/sphinx/project.py index b75c684db7f..d6eaeee2fd6 100644 --- a/sphinx/project.py +++ b/sphinx/project.py @@ -9,7 +9,7 @@ from sphinx.locale import __ from sphinx.util import logging from sphinx.util.matching import get_matching_files -from sphinx.util.osutil import SEP, path_stabilize, relpath +from sphinx.util.osutil import path_stabilize, relpath if TYPE_CHECKING: from collections.abc import Iterable @@ -21,43 +21,57 @@ class Project: """A project is the source code set of the Sphinx document(s).""" - def __init__(self, srcdir: str | os.PathLike[str], source_suffix: dict[str, str]) -> None: + def __init__(self, srcdir: str | os.PathLike[str], source_suffix: Iterable[str]) -> None: #: Source directory. self.srcdir = srcdir #: source_suffix. Same as :confval:`source_suffix`. - self.source_suffix = source_suffix + self.source_suffix = tuple(source_suffix) + self._first_source_suffix = next(iter(self.source_suffix), "") - #: The name of documents belongs to this project. + #: The name of documents belonging to this project. self.docnames: set[str] = set() + # Bijective mapping between docnames and (srcdir relative) paths. + self._path_to_docname: dict[str, str] = {} + self._docname_to_path: dict[str, str] = {} + def restore(self, other: Project) -> None: """Take over a result of last build.""" self.docnames = other.docnames + self._path_to_docname = other._path_to_docname + self._docname_to_path = other._docname_to_path def discover(self, exclude_paths: Iterable[str] = (), include_paths: Iterable[str] = ("**",)) -> set[str]: """Find all document files in the source directory and put them in :attr:`docnames`. """ - self.docnames = set() + + self.docnames.clear() + self._path_to_docname.clear() + self._docname_to_path.clear() + for filename in get_matching_files( self.srcdir, include_paths, [*exclude_paths] + EXCLUDE_PATHS, ): - docname = self.path2doc(filename) - if docname: + if docname := self.path2doc(filename): if docname in self.docnames: pattern = os.path.join(self.srcdir, docname) + '.*' files = [relpath(f, self.srcdir) for f in glob(pattern)] logger.warning(__('multiple files found for the document "%s": %r\n' 'Use %r for the build.'), - docname, files, self.doc2path(docname), once=True) + docname, files, self.doc2path(docname, absolute=True), + once=True) elif os.access(os.path.join(self.srcdir, filename), os.R_OK): self.docnames.add(docname) + self._path_to_docname[filename] = docname + self._docname_to_path[docname] = filename else: - logger.warning(__("document not readable. Ignored."), location=docname) + logger.warning(__("Ignored unreadable document %r."), + filename, location=docname) return self.docnames @@ -66,32 +80,33 @@ def path2doc(self, filename: str | os.PathLike[str]) -> str | None: *filename* should be absolute or relative to the source directory. """ - if str(filename).startswith(str(self.srcdir)): - filename = relpath(filename, self.srcdir) - for suffix in self.source_suffix: - if str(filename).endswith(suffix): - filename = path_stabilize(filename) - return filename[:-len(suffix)] - - # the file does not have docname - return None - - def doc2path(self, docname: str, basedir: bool = True) -> str: + try: + return self._path_to_docname[filename] # type: ignore[index] + except KeyError: + if os.path.isabs(filename): + try: + filename = os.path.relpath(filename, self.srcdir) + except ValueError: + pass + for suffix in self.source_suffix: + if os.path.basename(filename).endswith(suffix): + return path_stabilize(filename).removesuffix(suffix) + + # the file does not have a docname + return None + + def doc2path(self, docname: str, absolute: bool) -> str: """Return the filename for the document name. - If *basedir* is True, return as an absolute path. + If *absolute* is True, return as an absolute path. Else, return as a relative path to the source directory. """ - docname = docname.replace(SEP, os.path.sep) - basename = os.path.join(self.srcdir, docname) - for suffix in self.source_suffix: - if os.path.isfile(basename + suffix): - break - else: - # document does not exist - suffix = list(self.source_suffix)[0] - - if basedir: - return basename + suffix - else: - return docname + suffix + try: + filename = self._docname_to_path[docname] + except KeyError: + # Backwards compatibility: the document does not exist + filename = docname + self._first_source_suffix + + if absolute: + return os.path.join(self.srcdir, filename) + return filename diff --git a/sphinx/util/osutil.py b/sphinx/util/osutil.py index a2c15c34ca5..94f975b6439 100644 --- a/sphinx/util/osutil.py +++ b/sphinx/util/osutil.py @@ -26,16 +26,16 @@ SEP = "/" -def os_path(canonicalpath: str) -> str: - return canonicalpath.replace(SEP, path.sep) +def os_path(canonical_path: str, /) -> str: + return canonical_path.replace(SEP, path.sep) -def canon_path(nativepath: str | os.PathLike[str]) -> str: +def canon_path(native_path: str | os.PathLike[str], /) -> str: """Return path in OS-independent form""" - return str(nativepath).replace(path.sep, SEP) + return os.fspath(native_path).replace(path.sep, SEP) -def path_stabilize(filepath: str | os.PathLike[str]) -> str: +def path_stabilize(filepath: str | os.PathLike[str], /) -> str: "Normalize path separator and unicode string" new_path = canon_path(filepath) return unicodedata.normalize('NFC', new_path) diff --git a/tests/test_project.py b/tests/test_project.py index 790865fc1cf..45ae7c81bce 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -4,35 +4,41 @@ from sphinx.project import Project +DOCNAMES = {'autodoc', 'bom', 'extapi', 'extensions', 'footnote', 'images', + 'includes', 'index', 'lists', 'markup', 'math', 'objects', + 'subdir/excluded', 'subdir/images', 'subdir/includes'} +SUBDIR_DOCNAMES = {'subdir/excluded', 'subdir/images', 'subdir/includes'} -def test_project_discover(rootdir): - project = Project(str(rootdir / 'test-root'), {}) - - docnames = {'autodoc', 'bom', 'extapi', 'extensions', 'footnote', 'images', - 'includes', 'index', 'lists', 'markup', 'math', 'objects', - 'subdir/excluded', 'subdir/images', 'subdir/includes'} - subdir_docnames = {'subdir/excluded', 'subdir/images', 'subdir/includes'} +def test_project_discover_basic(rootdir): # basic case - project.source_suffix = ['.txt'] - assert project.discover() == docnames + project = Project(rootdir / 'test-root', ['.txt']) + assert project.discover() == DOCNAMES + + +def test_project_discover_exclude_patterns(rootdir): + project = Project(rootdir / 'test-root', ['.txt']) # exclude_paths option - assert project.discover(['subdir/*']) == docnames - subdir_docnames + assert project.discover(['subdir/*']) == DOCNAMES - SUBDIR_DOCNAMES + assert project.discover(['.txt', 'subdir/*']) == DOCNAMES - SUBDIR_DOCNAMES - # exclude_patterns - assert project.discover(['.txt', 'subdir/*']) == docnames - subdir_docnames +def test_project_discover_multiple_suffixes(rootdir): # multiple source_suffixes - project.source_suffix = ['.txt', '.foo'] - assert project.discover() == docnames | {'otherext'} + project = Project(rootdir / 'test-root', ['.txt', '.foo']) + assert project.discover() == DOCNAMES | {'otherext'} + +def test_project_discover_complicated_suffix(rootdir): # complicated source_suffix - project.source_suffix = ['.foo.png'] + project = Project(rootdir / 'test-root', ['.foo.png']) assert project.discover() == {'img'} + +def test_project_discover_templates_path(rootdir): # templates_path - project.source_suffix = ['.html'] + project = Project(rootdir / 'test-root', ['.html']) assert project.discover() == {'_templates/layout', '_templates/customsb', '_templates/contentssb'} @@ -40,15 +46,14 @@ def test_project_discover(rootdir): assert project.discover(['_templates']) == set() -@pytest.mark.sphinx(testroot='basic') -def test_project_path2doc(app): - project = Project(app.srcdir, app.config.source_suffix) +def test_project_path2doc(rootdir): + project = Project(rootdir / 'test-basic', {'.rst': 'restructuredtext'}) assert project.path2doc('index.rst') == 'index' assert project.path2doc('index.foo') is None # unknown extension assert project.path2doc('index.foo.rst') == 'index.foo' assert project.path2doc('index') is None assert project.path2doc('path/to/index.rst') == 'path/to/index' - assert project.path2doc(str(app.srcdir / 'to/index.rst')) == 'to/index' + assert project.path2doc(rootdir / 'test-basic' / 'to/index.rst') == 'to/index' @pytest.mark.sphinx(srcdir='project_doc2path', testroot='basic') @@ -56,17 +61,18 @@ def test_project_doc2path(app): source_suffix = {'.rst': 'restructuredtext', '.txt': 'restructuredtext'} project = Project(app.srcdir, source_suffix) - assert project.doc2path('index') == str(app.srcdir / 'index.rst') - - # first source_suffix is used for missing file - assert project.doc2path('foo') == str(app.srcdir / 'foo.rst') - - # matched source_suffix is used if exists - (app.srcdir / 'foo.txt').write_text('', encoding='utf8') - assert project.doc2path('foo') == str(app.srcdir / 'foo.txt') + project.discover() # absolute path - assert project.doc2path('index', basedir=True) == str(app.srcdir / 'index.rst') + assert project.doc2path('index', absolute=True) == str(app.srcdir / 'index.rst') # relative path - assert project.doc2path('index', basedir=False) == 'index.rst' + assert project.doc2path('index', absolute=False) == 'index.rst' + + # first source_suffix is used for missing file + assert project.doc2path('foo', absolute=False) == 'foo.rst' + + # matched source_suffix is used if exists + (app.srcdir / 'bar.txt').touch() + project.discover() + assert project.doc2path('bar', absolute=False) == 'bar.txt'