From d8ad20331c46570b5795511111423839ba36765b Mon Sep 17 00:00:00 2001 From: Bruno Oliveira Date: Sat, 17 Feb 2024 10:39:15 -0300 Subject: [PATCH] Change importlib to first try to import modules using the standard mechanism As detailed in https://github.com/pytest-dev/pytest/issues/11475#issuecomment-1937043670, currently with `--import-mode=importlib` pytest will try to import every file by using a unique module name, regardless if that module could be imported using the normal import mechanism without touching `sys.path`. This has the consequence that non-test modules available in `sys.path` (via other mechanism, such as being installed into a virtualenv, PYTHONPATH, etc) would end up being imported as standalone modules, instead of imported with their expected module names. To illustrate: ``` .env/ lib/ site-packages/ anndata/ core.py ``` Given `anndata` is installed into the virtual environment, `python -c "import anndata.core"` works, but pytest with `importlib` mode would import that module as a standalone module named `".env.lib.site-packages.anndata.core"`, because importlib module was designed to import test files which are not reachable from `sys.path`, but now it is clear that normal modules should be imported using the standard mechanisms if possible. Now `imporlib` mode will first try to import the module normally, without changing `sys.path`, and if that fails it falls back to importing the module as a standalone module. This supersedes #11931. Fix #11475 Close #11931 --- changelog/11311.improvement.rst | 3 +- changelog/11475.improvement.rst | 1 + doc/en/explanation/goodpractices.rst | 6 +- doc/en/explanation/pythonpath.rst | 80 ++++++++--- src/_pytest/pathlib.py | 132 ++++++++++++++--- testing/test_doctest.py | 10 +- testing/test_pathlib.py | 205 +++++++++++++++++++++++++++ 7 files changed, 391 insertions(+), 46 deletions(-) create mode 100644 changelog/11475.improvement.rst diff --git a/changelog/11311.improvement.rst b/changelog/11311.improvement.rst index 0072f39743a..232e53b8420 100644 --- a/changelog/11311.improvement.rst +++ b/changelog/11311.improvement.rst @@ -1,4 +1,3 @@ -When using ``--override-ini`` for paths in invocations without a configuration file defined, the current working directory is used -as the relative directory. +When using ``--override-ini`` for paths in invocations without a configuration file defined, the current working directory is used as the relative directory. Previoulsy this would raise an :class:`AssertionError`. diff --git a/changelog/11475.improvement.rst b/changelog/11475.improvement.rst new file mode 100644 index 00000000000..fc6e8be3a3a --- /dev/null +++ b/changelog/11475.improvement.rst @@ -0,0 +1 @@ +:ref:`--import-mode=importlib ` now tries to import modules using the standard import mechanism (but still without changing :py:data:`sys.path`), falling back to importing modules directly only if that fails. diff --git a/doc/en/explanation/goodpractices.rst b/doc/en/explanation/goodpractices.rst index efde420cd8f..1390ba4e8fe 100644 --- a/doc/en/explanation/goodpractices.rst +++ b/doc/en/explanation/goodpractices.rst @@ -60,8 +60,10 @@ Within Python modules, ``pytest`` also discovers tests using the standard :ref:`unittest.TestCase ` subclassing technique. -Choosing a test layout / import rules -------------------------------------- +.. _`test layout`: + +Choosing a test layout +---------------------- ``pytest`` supports two common test layouts: diff --git a/doc/en/explanation/pythonpath.rst b/doc/en/explanation/pythonpath.rst index 5b533f47fdc..a52a5b1be58 100644 --- a/doc/en/explanation/pythonpath.rst +++ b/doc/en/explanation/pythonpath.rst @@ -15,14 +15,23 @@ changing :data:`sys.path`. Some aspects of the import process can be controlled through the ``--import-mode`` command-line flag, which can assume these values: +.. _`import-mode-prepend`: + * ``prepend`` (default): the directory path containing each module will be inserted into the *beginning* - of :py:data:`sys.path` if not already there, and then imported with the :func:`importlib.import_module ` function. + of :py:data:`sys.path` if not already there, and then imported with + the :func:`importlib.import_module ` function. + + It is highly recommended to arrange your test modules as packages by adding ``__init__.py`` files to your directories + containing tests. This will make the tests part of a proper Python package, allowing pytest to resolve their full + name (for example ``tests.core.test_core`` for ``test_core.py`` inside the ``tests.core`` package). - This requires test module names to be unique when the test directory tree is not arranged in - packages, because the modules will put in :py:data:`sys.modules` after importing. + If the test directory tree is not arranged as packages, then each test file needs to have a unique name + compared to the other test files, otherwise pytest will raise an error if it finds two tests with the same name. This is the classic mechanism, dating back from the time Python 2 was still supported. +.. _`import-mode-append`: + * ``append``: the directory containing each module is appended to the end of :py:data:`sys.path` if not already there, and imported with :func:`importlib.import_module `. @@ -38,32 +47,71 @@ these values: the tests will run against the installed version of ``pkg_under_test`` when ``--import-mode=append`` is used whereas with ``prepend`` they would pick up the local version. This kind of confusion is why - we advocate for using :ref:`src ` layouts. + we advocate for using :ref:`src-layouts `. Same as ``prepend``, requires test module names to be unique when the test directory tree is not arranged in packages, because the modules will put in :py:data:`sys.modules` after importing. -* ``importlib``: new in pytest-6.0, this mode uses more fine control mechanisms provided by :mod:`importlib` to import test modules. This gives full control over the import process, and doesn't require changing :py:data:`sys.path`. +.. _`import-mode-importlib`: + +* ``importlib``: this mode uses more fine control mechanisms provided by :mod:`importlib` to import test modules, without changing :py:data:`sys.path`. + + It works like this: + + 1. Given a certain module path, for example ``tests/core/test_models.py``, derives a canonical name + like ``tests.core.test_models`` and tries to import it. + + For non-test modules this will work if they are accessible via :py:data:`sys.path`, so + for example ``.env/lib/site-packages/app/core.py`` will be importable as ``app.core``. + This is desirable happens when plugins import non-test modules (for example doctesting). + + If this step succeeds, the module is returned. + + For test modules, unless they are reachable from :py:data:`sys.path`, this step will fail. - For this reason this doesn't require test module names to be unique. + 2. If the previous step fails, we import the module directly using ``importlib`` facilities, which lets us import it without + changing :py:data:`sys.path`. - One drawback however is that test modules are non-importable by each other. Also, utility - modules in the tests directories are not automatically importable because the tests directory is no longer - added to :py:data:`sys.path`. + Because Python requires the module to also be available in :py:data:`sys.modules`, pytest derives unique name for it based + on its relative location from the ``rootdir``, and adds the module to :py:data:`sys.modules`. - Initially we intended to make ``importlib`` the default in future releases, however it is clear now that - it has its own set of drawbacks so the default will remain ``prepend`` for the foreseeable future. + For example, ``tests/core/test_models.py`` will end up being imported as the module ``tests.core.test_models``. + + Advantages of this mode: + + * pytest will not change :py:data:`sys.path` at all. + * Test module names do not need to be unique -- pytest will generate a unique name automatically based on the ``rootdir``. + + Disadvantages: + + * Test modules are non-importable by each other. + * Testing utility modules in the tests directories (for example a ``tests.helpers`` module containing test-related functions/classes) + are not importable. The recommendation in this case it to place testing utility modules together with the application/library + code, for example ``app.testing.helpers``. + + Important: by "test utility modules" we mean functions/classes which are imported by + other tests directly; this does not include fixtures, which should be placed in ``conftest.py`` files, along + with the test modules, and are discovered automatically by pytest. + + .. versionadded:: 6.0 + +.. note:: + + Initially we intended to make ``importlib`` the default in future releases, however it is clear now that + it has its own set of drawbacks so the default will remain ``prepend`` for the foreseeable future. .. seealso:: The :confval:`pythonpath` configuration variable. + :ref:`test layout`. + ``prepend`` and ``append`` import modes scenarios ------------------------------------------------- Here's a list of scenarios when using ``prepend`` or ``append`` import modes where pytest needs to -change ``sys.path`` in order to import test modules or ``conftest.py`` files, and the issues users +change :py:data:`sys.path` in order to import test modules or ``conftest.py`` files, and the issues users might encounter because of that. Test modules / ``conftest.py`` files inside packages @@ -92,7 +140,7 @@ pytest will find ``foo/bar/tests/test_foo.py`` and realize it is part of a packa there's an ``__init__.py`` file in the same folder. It will then search upwards until it can find the last folder which still contains an ``__init__.py`` file in order to find the package *root* (in this case ``foo/``). To load the module, it will insert ``root/`` to the front of -``sys.path`` (if not there already) in order to load +:py:data:`sys.path` (if not there already) in order to load ``test_foo.py`` as the *module* ``foo.bar.tests.test_foo``. The same logic applies to the ``conftest.py`` file: it will be imported as ``foo.conftest`` module. @@ -122,8 +170,8 @@ When executing: pytest will find ``foo/bar/tests/test_foo.py`` and realize it is NOT part of a package given that there's no ``__init__.py`` file in the same folder. It will then add ``root/foo/bar/tests`` to -``sys.path`` in order to import ``test_foo.py`` as the *module* ``test_foo``. The same is done -with the ``conftest.py`` file by adding ``root/foo`` to ``sys.path`` to import it as ``conftest``. +:py:data:`sys.path` in order to import ``test_foo.py`` as the *module* ``test_foo``. The same is done +with the ``conftest.py`` file by adding ``root/foo`` to :py:data:`sys.path` to import it as ``conftest``. For this reason this layout cannot have test modules with the same name, as they all will be imported in the global import namespace. @@ -136,7 +184,7 @@ Invoking ``pytest`` versus ``python -m pytest`` ----------------------------------------------- Running pytest with ``pytest [...]`` instead of ``python -m pytest [...]`` yields nearly -equivalent behaviour, except that the latter will add the current directory to ``sys.path``, which +equivalent behaviour, except that the latter will add the current directory to :py:data:`sys.path`, which is standard ``python`` behavior. See also :ref:`invoke-python`. diff --git a/src/_pytest/pathlib.py b/src/_pytest/pathlib.py index 1e0891153e5..f1fc1366341 100644 --- a/src/_pytest/pathlib.py +++ b/src/_pytest/pathlib.py @@ -522,35 +522,38 @@ def import_path( raise ImportError(path) if mode is ImportMode.importlib: + # Try to import this module using the standard import mechanisms, but + # without touching sys.path. + try: + pkg_root, module_name = resolve_pkg_root_and_module_name( + path, consider_ns_packages=True + ) + except ValueError: + pass + else: + mod = _import_module_using_spec( + module_name, path, pkg_root, insert_modules=False + ) + if mod is not None: + return mod + + # Could not import the module with the current sys.path, so we fall back + # to importing the file as a single module, not being a part of a package. module_name = module_name_from_path(path, root) with contextlib.suppress(KeyError): return sys.modules[module_name] - for meta_importer in sys.meta_path: - spec = meta_importer.find_spec(module_name, [str(path.parent)]) - if spec is not None: - break - else: - spec = importlib.util.spec_from_file_location(module_name, str(path)) - - if spec is None: + mod = _import_module_using_spec( + module_name, path, path.parent, insert_modules=True + ) + if mod is None: raise ImportError(f"Can't find module {module_name} at location {path}") - mod = importlib.util.module_from_spec(spec) - sys.modules[module_name] = mod - spec.loader.exec_module(mod) # type: ignore[union-attr] - insert_missing_modules(sys.modules, module_name) return mod - pkg_path = resolve_package_path(path) - if pkg_path is not None: - pkg_root = pkg_path.parent - names = list(path.with_suffix("").relative_to(pkg_root).parts) - if names[-1] == "__init__": - names.pop() - module_name = ".".join(names) - else: - pkg_root = path.parent - module_name = path.stem + try: + pkg_root, module_name = resolve_pkg_root_and_module_name(path) + except ValueError: + pkg_root, module_name = path.parent, path.stem # Change sys.path permanently: restoring it at the end of this function would cause surprising # problems because of delayed imports: for example, a conftest.py file imported by this function @@ -592,6 +595,40 @@ def import_path( return mod +def _import_module_using_spec( + module_name: str, module_path: Path, module_location: Path, *, insert_modules: bool +) -> Optional[ModuleType]: + """ + Tries to import a module by its canonical name, path to the .py file, and its + parent location. + + :param insert_modules: + If True, will call insert_missing_modules to create empty intermediate modules + for made-up module names (when importing test files not reachable from sys.path). + Note: we can probably drop insert_missing_modules altogether: instead of + generating module names such as "src.tests.test_foo", which require intermediate + empty modules, we might just as well generate unique module names like + "src_tests_test_foo". + """ + # Checking with sys.meta_path first in case one of its hooks can import this module, + # such as our own assertion-rewrite hook. + for meta_importer in sys.meta_path: + spec = meta_importer.find_spec(module_name, [str(module_location)]) + if spec is not None: + break + else: + spec = importlib.util.spec_from_file_location(module_name, str(module_path)) + if spec is not None: + mod = importlib.util.module_from_spec(spec) + sys.modules[module_name] = mod + spec.loader.exec_module(mod) # type: ignore[union-attr] + if insert_modules: + insert_missing_modules(sys.modules, module_name) + return mod + + return None + + # Implement a special _is_same function on Windows which returns True if the two filenames # compare equal, to circumvent os.path.samefile returning False for mounts in UNC (#7678). if sys.platform.startswith("win"): @@ -628,7 +665,14 @@ def module_name_from_path(path: Path, root: Path) -> str: if len(path_parts) >= 2 and path_parts[-1] == "__init__": path_parts = path_parts[:-1] - return ".".join(path_parts) + module_name = ".".join(path_parts) + # Modules starting with "." are considered relative, but given we + # are returning a made-up path that is intended to be imported as a global package and + # not as a relative module, replace the "." at the start with "_", which should be enough + # for our purposes. + if module_name.startswith("."): + module_name = "_" + module_name[1:] + return module_name def insert_missing_modules(modules: Dict[str, ModuleType], module_name: str) -> None: @@ -689,6 +733,48 @@ def resolve_package_path(path: Path) -> Optional[Path]: return result +def resolve_pkg_root_and_module_name( + path: Path, *, consider_ns_packages: bool = False +) -> Tuple[Path, str]: + """ + Return the path to the directory of the root package that contains the + given Python file, and its module name: + + src/ + app/ + __init__.py + core/ + __init__.py + models.py + + Passing the full path to `models.py` will yield Path("src") and "app.core.models". + + If consider_ns_packages is True, then we additionally check if the top-level directory + without __init__.py is reachable from sys.path; if it is, it is then considered a namespace package: + + https://packaging.python.org/en/latest/guides/packaging-namespace-packages + + This is not the default because we need to analyze carefully if it is safe to assume this + for all imports. + + Raises ValueError if the given path does not belong to a package (missing any __init__.py files). + """ + pkg_path = resolve_package_path(path) + if pkg_path is not None: + pkg_root = pkg_path.parent + # pkg_root.parent does not contain a __init__.py file, as per resolve_package_path, + # but if it is reachable from sys.argv, it should be considered a namespace package. + # https://packaging.python.org/en/latest/guides/packaging-namespace-packages/ + if consider_ns_packages and str(pkg_root.parent) in sys.path: + pkg_root = pkg_root.parent + names = list(path.with_suffix("").relative_to(pkg_root).parts) + if names[-1] == "__init__": + names.pop() + module_name = ".".join(names) + return pkg_root, module_name + raise ValueError(f"Could not resolve for {path}") + + def scandir( path: Union[str, "os.PathLike[str]"], sort_key: Callable[["os.DirEntry[str]"], object] = lambda entry: entry.name, diff --git a/testing/test_doctest.py b/testing/test_doctest.py index 32897a916fe..58fce244f45 100644 --- a/testing/test_doctest.py +++ b/testing/test_doctest.py @@ -117,12 +117,12 @@ def test_simple_doctestfile(self, pytester: Pytester): def test_importmode(self, pytester: Pytester): pytester.makepyfile( **{ - "namespacepkg/innerpkg/__init__.py": "", - "namespacepkg/innerpkg/a.py": """ + "src/namespacepkg/innerpkg/__init__.py": "", + "src/namespacepkg/innerpkg/a.py": """ def some_func(): return 42 """, - "namespacepkg/innerpkg/b.py": """ + "src/namespacepkg/innerpkg/b.py": """ from namespacepkg.innerpkg.a import some_func def my_func(): ''' @@ -133,6 +133,10 @@ def my_func(): """, } ) + # For 'namespacepkg' to be considered a namespace package, its containing directory + # needs to be reachable from sys.path: + # https://packaging.python.org/en/latest/guides/packaging-namespace-packages + pytester.syspathinsert(pytester.path / "src") reprec = pytester.inline_run("--doctest-modules", "--import-mode=importlib") reprec.assertoutcome(passed=1) diff --git a/testing/test_pathlib.py b/testing/test_pathlib.py index 075259009de..29f2a743d2e 100644 --- a/testing/test_pathlib.py +++ b/testing/test_pathlib.py @@ -3,12 +3,14 @@ import os.path from pathlib import Path import pickle +import shutil import sys from textwrap import dedent from types import ModuleType from typing import Any from typing import Generator from typing import Iterator +from typing import Tuple import unittest.mock from _pytest.monkeypatch import MonkeyPatch @@ -25,6 +27,7 @@ from _pytest.pathlib import maybe_delete_a_numbered_dir from _pytest.pathlib import module_name_from_path from _pytest.pathlib import resolve_package_path +from _pytest.pathlib import resolve_pkg_root_and_module_name from _pytest.pathlib import safe_exists from _pytest.pathlib import symlink_or_skip from _pytest.pathlib import visit @@ -598,6 +601,38 @@ def test_module_name_from_path(self, tmp_path: Path) -> None: result = module_name_from_path(tmp_path / "__init__.py", tmp_path) assert result == "__init__" + # Modules which start with "." are considered relative and will not be imported + # unless part of a package, so we replace it with a "_" when generating the fake module name. + result = module_name_from_path(tmp_path / ".env/tests/test_foo.py", tmp_path) + assert result == "_env.tests.test_foo" + + def test_resolve_pkg_root_and_module_name( + self, tmp_path: Path, monkeypatch: MonkeyPatch + ) -> None: + # Create a directory structure first without __init__.py files. + (tmp_path / "src/app/core").mkdir(parents=True) + models_py = tmp_path / "src/app/core/models.py" + models_py.touch() + with pytest.raises(ValueError): + _ = resolve_pkg_root_and_module_name(models_py) + + # Create the __init__.py files, it should now resolve to a proper module name. + (tmp_path / "src/app/__init__.py").touch() + (tmp_path / "src/app/core/__init__.py").touch() + assert resolve_pkg_root_and_module_name(models_py) == ( + tmp_path / "src", + "app.core.models", + ) + + # If we add tmp_path to sys.path, src becomes a namespace package. + monkeypatch.syspath_prepend(tmp_path) + assert resolve_pkg_root_and_module_name( + models_py, consider_ns_packages=True + ) == ( + tmp_path, + "src.app.core.models", + ) + def test_insert_missing_modules( self, monkeypatch: MonkeyPatch, tmp_path: Path ) -> None: @@ -685,6 +720,176 @@ def test_my_test(): result = pytester.runpytest("--import-mode=importlib") result.stdout.fnmatch_lines("* 1 passed *") + def create_installed_doctests_and_tests_dir( + self, path: Path, monkeypatch: MonkeyPatch + ) -> Tuple[Path, Path, Path]: + """ + Create a directory structure where the application code is installed in a virtual environment, + and the tests are in an outside ".tests" directory. + + Return the paths to the core module (installed in the virtualenv), and the test modules. + """ + app = path / "src/app" + app.mkdir(parents=True) + (app / "__init__.py").touch() + core_py = app / "core.py" + core_py.write_text( + dedent( + """ + def foo(): + ''' + >>> 1 + 1 + 2 + ''' + """ + ), + encoding="ascii", + ) + + # Install it into a site-packages directory, and add it to sys.path, mimicking what + # happens when installing into a virtualenv. + site_packages = path / ".env/lib/site-packages" + site_packages.mkdir(parents=True) + shutil.copytree(app, site_packages / "app") + assert (site_packages / "app/core.py").is_file() + + monkeypatch.syspath_prepend(site_packages) + + # Create the tests files, outside 'src' and the virtualenv. + # We use the same test name on purpose, but in different directories, to ensure + # this works as advertised. + conftest_path1 = path / ".tests/a/conftest.py" + conftest_path1.parent.mkdir(parents=True) + conftest_path1.write_text( + dedent( + """ + import pytest + @pytest.fixture + def a_fix(): return "a" + """ + ), + encoding="ascii", + ) + test_path1 = path / ".tests/a/test_core.py" + test_path1.write_text( + dedent( + """ + import app.core + def test(a_fix): + assert a_fix == "a" + """, + ), + encoding="ascii", + ) + + conftest_path2 = path / ".tests/b/conftest.py" + conftest_path2.parent.mkdir(parents=True) + conftest_path2.write_text( + dedent( + """ + import pytest + @pytest.fixture + def b_fix(): return "b" + """ + ), + encoding="ascii", + ) + + test_path2 = path / ".tests/b/test_core.py" + test_path2.write_text( + dedent( + """ + import app.core + def test(b_fix): + assert b_fix == "b" + """, + ), + encoding="ascii", + ) + return (site_packages / "app/core.py"), test_path1, test_path2 + + def test_import_using_normal_mechanism_first( + self, monkeypatch: MonkeyPatch, pytester: Pytester + ) -> None: + """ + Test import_path imports from the canonical location when possible first, only + falling back to its normal flow when the module being imported is not reachable via sys.path (#11475). + """ + core_py, test_path1, test_path2 = self.create_installed_doctests_and_tests_dir( + pytester.path, monkeypatch + ) + + # core_py is reached from sys.path, so should be imported normally. + mod = import_path(core_py, mode="importlib", root=pytester.path) + assert mod.__name__ == "app.core" + assert mod.__file__ and Path(mod.__file__) == core_py + + # tests are not reachable from sys.path, so they are imported as a standalone modules. + # Instead of '.tests.a.test_core', we import as "_tests.a.test_core" because + # importlib considers module names starting with '.' to be local imports. + mod = import_path(test_path1, mode="importlib", root=pytester.path) + assert mod.__name__ == "_tests.a.test_core" + mod = import_path(test_path2, mode="importlib", root=pytester.path) + assert mod.__name__ == "_tests.b.test_core" + + def test_import_using_normal_mechanism_first_integration( + self, monkeypatch: MonkeyPatch, pytester: Pytester + ) -> None: + """ + Same test as above, but verify the behavior calling pytest. + + We should not make this call in the same test as above, as the modules have already + been imported by separate import_path() calls. + """ + core_py, test_path1, test_path2 = self.create_installed_doctests_and_tests_dir( + pytester.path, monkeypatch + ) + result = pytester.runpytest( + "--import-mode=importlib", + "--doctest-modules", + "--pyargs", + "app", + "./.tests", + ) + result.stdout.fnmatch_lines( + [ + f"{core_py.relative_to(pytester.path)} . *", + f"{test_path1.relative_to(pytester.path)} . *", + f"{test_path2.relative_to(pytester.path)} . *", + "* 3 passed*", + ] + ) + + def test_import_path_imports_correct_file(self, pytester: Pytester) -> None: + """ + Import the module by the given path, even if other module with the same name + is reachable from sys.path. + """ + pytester.syspathinsert() + # Create a 'x.py' module reachable from sys.path that raises AssertionError + # if imported. + x_at_root = pytester.path / "x.py" + x_at_root.write_text("raise AssertionError('x at root')", encoding="ascii") + + # Create another x.py module, but in some subdirectories to ensure it is not + # accessible from sys.path. + x_in_sub_folder = pytester.path / "a/b/x.py" + x_in_sub_folder.parent.mkdir(parents=True) + x_in_sub_folder.write_text("X = 'a/b/x'", encoding="ascii") + + # Import our x.py module from the subdirectories. + # The 'x.py' module from sys.path was not imported for sure because + # otherwise we would get an AssertionError. + mod = import_path( + x_in_sub_folder, mode=ImportMode.importlib, root=pytester.path + ) + assert mod.__file__ and Path(mod.__file__) == x_in_sub_folder + assert mod.X == "a/b/x" + + # Attempt to import root 'x.py'. + with pytest.raises(AssertionError, match="x at root"): + _ = import_path(x_at_root, mode=ImportMode.importlib, root=pytester.path) + def test_safe_exists(tmp_path: Path) -> None: d = tmp_path.joinpath("some_dir")