Skip to content

Commit

Permalink
Fix ignoring input files for symlink reasons
Browse files Browse the repository at this point in the history
This relates to psf#4015, psf#4161 and the behaviour of os.getcwd()

Black is a big user of pathlib and as such loves doing `.resolve()`,
since for a long time it was the only good way of getting an absolute
path in pathlib. However, this has two problems:

The first minor problem is performance, e.g. in psf#3751 I (safely) got rid
of a bunch of `.resolve()` which made Black 40% faster on cached runs.

The second more important problem is that always resolving symlinks
results in unintuitive exclusion behaviour. For instance, a gitignored
symlink should never alter formatting of your actual code. This kind of
thing was reported by users a few times.

In psf#3846, I improved the exclusion rule logic for symlinks in
`gen_python_files` and everything was good.

But `gen_python_files` isn't enough, there's also `get_sources`, which
handles user specified paths directly (instead of files Black
discovers). So in psf#4015, I made a very similar change to psf#3846 for
`get_sources`, and this is where some problems began.

The core issue was the line:
```
root_relative_path = path.absolute().relative_to(root).as_posix()
```
The first issue is that despite root being computed from user inputs, we
call `.resolve()` while computing it (likely unecessarily). Which means
that `path` may not actually be relative to `root`. So I started off
this PR trying to fix that, when I ran into the second issue. Which is
that `os.getcwd()` (as called by `os.path.abspath` or `Path.absolute` or
`Path.cwd`) also often resolves symlinks!
```
>>> import os
>>> os.environ.get("PWD")
'/Users/shantanu/dev/black/symlink/bug'
>>> os.getcwd()
'/Users/shantanu/dev/black/actual/bug'
```
This also meant that the breakage often would not show up when input
relative paths.

This doesn't affect `gen_python_files` / psf#3846 because things are always
absolute and known to be relative to `root`.

Anyway, it looks like psf#4161 fixed the crash by just swallowing the error
and ignoring the file. Instead, we should just try to compute the actual
relative path. I think this PR should be quite safe, but we could also
consider reverting some of the previous changes; the associated issues
weren't too popular.

At the same time, I think there's still behaviour that can be improved
and I kind of want to make larger changes, but maybe I'll save that for
if we do something like psf#3952

Hopefully fixes psf#4205, fixes psf#4209, actual fix for psf#4077
  • Loading branch information
hauntsaninja committed Feb 11, 2024
1 parent a201003 commit 418aba9
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 53 deletions.
15 changes: 6 additions & 9 deletions src/black/__init__.py
Expand Up @@ -44,12 +44,12 @@
STDIN_PLACEHOLDER,
)
from black.files import (
best_effort_relative_path,
find_project_root,
find_pyproject_toml,
find_user_pyproject_toml,
gen_python_files,
get_gitignore,
get_root_relative_path,
parse_pyproject_toml,
path_is_excluded,
resolves_outside_root_or_cannot_stat,
Expand Down Expand Up @@ -734,6 +734,7 @@ def get_sources(
"""Compute the set of files to be formatted."""
sources: Set[Path] = set()

assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
using_default_exclude = exclude is None
exclude = re_compile_maybe_verbose(DEFAULT_EXCLUDES) if exclude is None else exclude
gitignore: Optional[Dict[Path, PathSpec]] = None
Expand All @@ -749,11 +750,12 @@ def get_sources(

# Compare the logic here to the logic in `gen_python_files`.
if is_stdin or path.is_file():
root_relative_path = get_root_relative_path(path, root, report)

if root_relative_path is None:
if resolves_outside_root_or_cannot_stat(path, root, report):
if verbose:
out(f'Skipping invalid source: "{path}"', fg="red")
continue

root_relative_path = best_effort_relative_path(path, root).as_posix()
root_relative_path = "/" + root_relative_path

# Hard-exclude any files that matches the `--force-exclude` regex.
Expand All @@ -763,11 +765,6 @@ def get_sources(
)
continue

if resolves_outside_root_or_cannot_stat(path, root, report):
if verbose:
out(f'Skipping invalid source: "{path}"', fg="red")
continue

if is_stdin:
path = Path(f"{STDIN_PLACEHOLDER}{str(path)}")

Expand Down
44 changes: 27 additions & 17 deletions src/black/files.py
Expand Up @@ -48,6 +48,11 @@ def _load_toml(path: Union[Path, str]) -> Dict[str, Any]:
return tomllib.load(f)


@lru_cache
def _cached_resolve(path: Path) -> Path:
return path.resolve()


@lru_cache
def find_project_root(
srcs: Sequence[str], stdin_filename: Optional[str] = None
Expand All @@ -67,9 +72,9 @@ def find_project_root(
if stdin_filename is not None:
srcs = tuple(stdin_filename if s == "-" else s for s in srcs)
if not srcs:
srcs = [str(Path.cwd().resolve())]
srcs = [str(_cached_resolve(Path.cwd()))]

path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
path_srcs = [_cached_resolve(Path(Path.cwd(), src)) for src in srcs]

# A list of lists of parents for each 'src'. 'src' is included as a
# "parent" of itself if it is a directory
Expand Down Expand Up @@ -236,7 +241,7 @@ def find_user_pyproject_toml() -> Path:
else:
config_root = os.environ.get("XDG_CONFIG_HOME", "~/.config")
user_config_path = Path(config_root).expanduser() / "black"
return user_config_path.resolve()
return _cached_resolve(user_config_path)


@lru_cache
Expand Down Expand Up @@ -266,27 +271,31 @@ def resolves_outside_root_or_cannot_stat(
try:
if sys.version_info < (3, 8, 6):
path = path.absolute() # https://bugs.python.org/issue33660
resolved_path = path.resolve()
return get_root_relative_path(resolved_path, root, report) is None
resolved_path = _cached_resolve(path)
except OSError as e:
if report:
report.path_ignored(path, f"cannot be read because {e}")
return True


def get_root_relative_path(
path: Path,
root: Path,
report: Optional[Report] = None,
) -> Optional[str]:
"""Returns the file path relative to the 'root' directory"""
try:
root_relative_path = path.absolute().relative_to(root).as_posix()
resolved_path.relative_to(root).as_posix()
except ValueError:
if report:
report.path_ignored(path, f"is a symbolic link that points outside {root}")
return None
return root_relative_path
return True
return False


def best_effort_relative_path(path: Path, root: Path) -> Path:
# Precondition: resolves_outside_root_or_cannot_stat(path, root) is False
try:
return path.absolute().relative_to(root)
except ValueError:
pass
root_parent = next((p for p in path.parents if _cached_resolve(p) == root), None)
if root_parent is not None:
return path.relative_to(root_parent)
# something adversarial, fallback to path guaranteed by precondition
return _cached_resolve(path).relative_to(root)


def _path_is_ignored(
Expand Down Expand Up @@ -339,7 +348,8 @@ def gen_python_files(

assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
for child in paths:
root_relative_path = child.absolute().relative_to(root).as_posix()
assert child.is_absolute()
root_relative_path = child.relative_to(root).as_posix()

# First ignore files matching .gitignore, if passed
if gitignore_dict and _path_is_ignored(
Expand Down
120 changes: 93 additions & 27 deletions tests/test_black.py
Expand Up @@ -2567,32 +2567,32 @@ def test_symlinks(self) -> None:
gitignore = PathSpec.from_lines("gitwildmatch", [])

regular = MagicMock()
regular.absolute.return_value = root / "regular.py"
regular.relative_to.return_value = Path("regular.py")
regular.resolve.return_value = root / "regular.py"
regular.is_dir.return_value = False
regular.is_file.return_value = True

outside_root_symlink = MagicMock()
outside_root_symlink.absolute.return_value = root / "symlink.py"
outside_root_symlink.relative_to.return_value = Path("symlink.py")
outside_root_symlink.resolve.return_value = Path("/nowhere")
outside_root_symlink.is_dir.return_value = False
outside_root_symlink.is_file.return_value = True

ignored_symlink = MagicMock()
ignored_symlink.absolute.return_value = root / ".mypy_cache" / "symlink.py"
ignored_symlink.relative_to.return_value = Path(".mypy_cache") / "symlink.py"
ignored_symlink.is_dir.return_value = False
ignored_symlink.is_file.return_value = True

# A symlink that has an excluded name, but points to an included name
symlink_excluded_name = MagicMock()
symlink_excluded_name.absolute.return_value = root / "excluded_name"
symlink_excluded_name.relative_to.return_value = Path("excluded_name")
symlink_excluded_name.resolve.return_value = root / "included_name.py"
symlink_excluded_name.is_dir.return_value = False
symlink_excluded_name.is_file.return_value = True

# A symlink that has an included name, but points to an excluded name
symlink_included_name = MagicMock()
symlink_included_name.absolute.return_value = root / "included_name.py"
symlink_included_name.relative_to.return_value = Path("included_name.py")
symlink_included_name.resolve.return_value = root / "excluded_name"
symlink_included_name.is_dir.return_value = False
symlink_included_name.is_file.return_value = True
Expand Down Expand Up @@ -2626,39 +2626,100 @@ def test_symlinks(self) -> None:
outside_root_symlink.resolve.assert_called_once()
ignored_symlink.resolve.assert_not_called()

def test_get_sources_symlink_and_force_exclude(self) -> None:
with TemporaryDirectory() as tempdir:
tmp = Path(tempdir).resolve()
actual = tmp / "actual"
actual.mkdir()
symlink = tmp / "symlink"
symlink.symlink_to(actual)

actual_proj = actual / "project"
actual_proj.mkdir()
(actual_proj / "module.py").write_text("print('hello')")

symlink_proj = symlink / "project"

with change_directory(symlink_proj):
assert_collected_sources(
src=["module.py"],
root=symlink_proj.resolve(),
expected=["module.py"],
)

absolute_module = symlink_proj / "module.py"
assert_collected_sources(
src=[absolute_module],
root=symlink_proj.resolve(),
expected=[absolute_module],
)

# a few tricky tests for force_exclude
flat_symlink = symlink_proj / "symlink_module.py"
flat_symlink.symlink_to(actual_proj / "module.py")
assert_collected_sources(
src=[flat_symlink],
root=symlink_proj.resolve(),
force_exclude=r"/symlink_module.py",
expected=[],
)

target = actual_proj / "target"
target.mkdir()
(target / "another.py").write_text("print('hello')")
(symlink_proj / "nested").symlink_to(target)

assert_collected_sources(
src=[symlink_proj / "nested" / "another.py"],
root=symlink_proj.resolve(),
force_exclude=r"nested",
expected=[],
)
assert_collected_sources(
src=[symlink_proj / "nested" / "another.py"],
root=symlink_proj.resolve(),
force_exclude=r"target",
expected=[symlink_proj / "nested" / "another.py"],
)

def test_get_sources_with_stdin_symlink_outside_root(
self,
) -> None:
path = THIS_DIR / "data" / "include_exclude_tests"
stdin_filename = str(path / "b/exclude/a.py")
outside_root_symlink = Path("/target_directory/a.py")
root = Path("target_directory/").resolve()
with patch("pathlib.Path.resolve", return_value=outside_root_symlink):
assert_collected_sources(
root=Path("target_directory/"),
root=root,
src=["-"],
expected=[],
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin(self) -> None:
src = ["-"]
expected = ["-"]
assert_collected_sources(src, expected, include="", exclude=r"/exclude/|a\.py")
assert_collected_sources(
src,
root=THIS_DIR.resolve(),
expected=expected,
include="",
exclude=r"/exclude/|a\.py",
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename(self) -> None:
src = ["-"]
stdin_filename = str(THIS_DIR / "data/collections.py")
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
assert_collected_sources(
src,
expected,
root=THIS_DIR.resolve(),
expected=expected,
exclude=r"/exclude/a\.py",
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename_and_exclude(self) -> None:
# Exclude shouldn't exclude stdin_filename since it is mimicking the
# file being passed directly. This is the same as
Expand All @@ -2669,12 +2730,12 @@ def test_get_sources_with_stdin_filename_and_exclude(self) -> None:
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
assert_collected_sources(
src,
expected,
root=THIS_DIR.resolve(),
expected=expected,
exclude=r"/exclude/|a\.py",
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None:
# Extend exclude shouldn't exclude stdin_filename since it is mimicking the
# file being passed directly. This is the same as
Expand All @@ -2685,40 +2746,45 @@ def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None:
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
assert_collected_sources(
src,
expected,
root=THIS_DIR.resolve(),
expected=expected,
extend_exclude=r"/exclude/|a\.py",
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename_and_force_exclude(self) -> None:
# Force exclude should exclude the file when passing it through
# stdin_filename
path = THIS_DIR / "data" / "include_exclude_tests"
stdin_filename = str(path / "b/exclude/a.py")
assert_collected_sources(
src=["-"],
root=THIS_DIR.resolve(),
expected=[],
force_exclude=r"/exclude/|a\.py",
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename_and_force_exclude_and_symlink(
self,
) -> None:
# Force exclude should exclude a symlink based on the symlink, not its target
path = THIS_DIR / "data" / "include_exclude_tests"
stdin_filename = str(path / "symlink.py")
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
target = path / "b/exclude/a.py"
with patch("pathlib.Path.resolve", return_value=target):
assert_collected_sources(
src=["-"],
expected=expected,
force_exclude=r"exclude/a\.py",
stdin_filename=stdin_filename,
)
with TemporaryDirectory() as tempdir:
tmp = Path(tempdir).resolve()
(tmp / "exclude").mkdir()
(tmp / "exclude" / "a.py").write_text("print('hello')")
(tmp / "symlink.py").symlink_to(tmp / "exclude" / "a.py")

stdin_filename = str(tmp / "symlink.py")
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
with change_directory(tmp):
assert_collected_sources(
src=["-"],
root=tmp,
expected=expected,
force_exclude=r"exclude/a\.py",
stdin_filename=stdin_filename,
)


class TestDeFactoAPI:
Expand Down

0 comments on commit 418aba9

Please sign in to comment.