From aea1c03f17ca3cd9efe4f2816615c6250ae3ba4d Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sat, 24 Jun 2023 17:32:56 -0700 Subject: [PATCH 01/11] Skip some unnecessary normalisation This speeds up black by about 40% when the cache is full --- src/black/files.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/black/files.py b/src/black/files.py index 65b2d0a8402..dd0a6264d4c 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -276,12 +276,18 @@ def normalize_path_maybe_ignore( return root_relative_path -def path_is_ignored( +def _path_is_ignored( path: Path, gitignore_dict: Dict[Path, PathSpec], report: Report ) -> bool: + assert path.is_absolute() + # Note that this logic is sensitive to the ordering of gitignore_path. Callers must ensure + # that gitignore_dict is ordered from least specific to most specific. + # This logic is currently applied post-symlink resolution. It might be more correct to apply + # ignores before resolving the symlink. for gitignore_path, pattern in gitignore_dict.items(): - relative_path = normalize_path_maybe_ignore(path, gitignore_path, report) - if relative_path is None: + try: + relative_path = path.relative_to(gitignore_path).as_posix() + except ValueError: break if pattern.match_file(relative_path): report.path_ignored(path, "matches a .gitignore file content") @@ -326,7 +332,7 @@ def gen_python_files( continue # First ignore files matching .gitignore, if passed - if gitignore_dict and path_is_ignored(child, gitignore_dict, report): + if gitignore_dict and _path_is_ignored(root / normalized_path, gitignore_dict, report): continue # Then ignore with `--exclude` `--extend-exclude` and `--force-exclude` options. From 74dd1476563e8566c29801d6638fb47a62565b46 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sat, 24 Jun 2023 17:37:41 -0700 Subject: [PATCH 02/11] lint --- src/black/files.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/black/files.py b/src/black/files.py index dd0a6264d4c..282acc9d32a 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -280,10 +280,10 @@ def _path_is_ignored( path: Path, gitignore_dict: Dict[Path, PathSpec], report: Report ) -> bool: assert path.is_absolute() - # Note that this logic is sensitive to the ordering of gitignore_path. Callers must ensure - # that gitignore_dict is ordered from least specific to most specific. - # This logic is currently applied post-symlink resolution. It might be more correct to apply - # ignores before resolving the symlink. + # Note that this logic is sensitive to the ordering of gitignore_path. Callers must + # ensure that gitignore_dict is ordered from least specific to most specific. + # This logic is currently applied post-symlink resolution. It might be more correct + # to apply ignores before resolving the symlink. for gitignore_path, pattern in gitignore_dict.items(): try: relative_path = path.relative_to(gitignore_path).as_posix() @@ -332,7 +332,9 @@ def gen_python_files( continue # First ignore files matching .gitignore, if passed - if gitignore_dict and _path_is_ignored(root / normalized_path, gitignore_dict, report): + if gitignore_dict and _path_is_ignored( + root / normalized_path, gitignore_dict, report + ): continue # Then ignore with `--exclude` `--extend-exclude` and `--force-exclude` options. From a9d0c076eef68c4d85b10d36bc442955817acea0 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sat, 24 Jun 2023 17:37:48 -0700 Subject: [PATCH 03/11] changelog --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 460f9c95114..7b76301ff46 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -45,6 +45,8 @@ +- Speed up _Black_ by about 40% when the cache is full (#3751) + ### Output From 58cc0ed81bcddc404c036b9e69ef4df3e246a8b3 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sat, 24 Jun 2023 17:38:23 -0700 Subject: [PATCH 04/11] comment typo --- src/black/files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/black/files.py b/src/black/files.py index 282acc9d32a..d6be7903f8e 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -280,7 +280,7 @@ def _path_is_ignored( path: Path, gitignore_dict: Dict[Path, PathSpec], report: Report ) -> bool: assert path.is_absolute() - # Note that this logic is sensitive to the ordering of gitignore_path. Callers must + # Note that this logic is sensitive to the ordering of gitignore_dict. Callers must # ensure that gitignore_dict is ordered from least specific to most specific. # This logic is currently applied post-symlink resolution. It might be more correct # to apply ignores before resolving the symlink. From cba566debdc8c1d924940648e5557d1488cf52c0 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sat, 24 Jun 2023 17:48:50 -0700 Subject: [PATCH 05/11] fix a test --- src/black/files.py | 10 ++++++---- tests/test_black.py | 4 +++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/black/files.py b/src/black/files.py index d6be7903f8e..3ab12db2b5d 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -277,9 +277,9 @@ def normalize_path_maybe_ignore( def _path_is_ignored( - path: Path, gitignore_dict: Dict[Path, PathSpec], report: Report + path: Path, root: Path, gitignore_dict: Dict[Path, PathSpec], report: Report ) -> bool: - assert path.is_absolute() + path = root / path # Note that this logic is sensitive to the ordering of gitignore_dict. Callers must # ensure that gitignore_dict is ordered from least specific to most specific. # This logic is currently applied post-symlink resolution. It might be more correct @@ -290,7 +290,9 @@ def _path_is_ignored( except ValueError: break if pattern.match_file(relative_path): - report.path_ignored(path, "matches a .gitignore file content") + report.path_ignored( + path.relative_to(root), "matches a .gitignore file content" + ) return True return False @@ -333,7 +335,7 @@ def gen_python_files( # First ignore files matching .gitignore, if passed if gitignore_dict and _path_is_ignored( - root / normalized_path, gitignore_dict, report + normalized_path, root, gitignore_dict, report ): continue diff --git a/tests/test_black.py b/tests/test_black.py index abb304a246d..cbdeb38ad40 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -511,6 +511,8 @@ def _mocked_calls() -> bool: "pathlib.Path.cwd", return_value=working_directory ), patch("pathlib.Path.is_dir", side_effect=mock_n_calls([True])): ctx = FakeContext() + # Note that the root folder (project_root) isn't the folder + # named "root" (aka working_directory) ctx.obj["root"] = project_root report = MagicMock(verbose=True) black.get_sources( @@ -530,7 +532,7 @@ def _mocked_calls() -> bool: for _, mock_args, _ in report.path_ignored.mock_calls ), "A symbolic link was reported." report.path_ignored.assert_called_once_with( - Path("child", "b.py"), "matches a .gitignore file content" + Path("root", "child", "b.py"), "matches a .gitignore file content" ) def test_report_verbose(self) -> None: From 710395232a2bfa13c36d744067d1af7df5969447 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sat, 24 Jun 2023 17:52:35 -0700 Subject: [PATCH 06/11] fixing the test broke mypy --- src/black/files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/black/files.py b/src/black/files.py index 3ab12db2b5d..dea05ad0284 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -277,9 +277,9 @@ def normalize_path_maybe_ignore( def _path_is_ignored( - path: Path, root: Path, gitignore_dict: Dict[Path, PathSpec], report: Report + root_relative_path: str, root: Path, gitignore_dict: Dict[Path, PathSpec], report: Report ) -> bool: - path = root / path + path = root / root_relative_path # Note that this logic is sensitive to the ordering of gitignore_dict. Callers must # ensure that gitignore_dict is ordered from least specific to most specific. # This logic is currently applied post-symlink resolution. It might be more correct From ba5cdec6f00be2107343040b5e7d8df2aa534fe5 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sat, 24 Jun 2023 17:53:06 -0700 Subject: [PATCH 07/11] fixing mypy broke black --- src/black/files.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/black/files.py b/src/black/files.py index dea05ad0284..3dcf3c1d0aa 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -277,7 +277,10 @@ def normalize_path_maybe_ignore( def _path_is_ignored( - root_relative_path: str, root: Path, gitignore_dict: Dict[Path, PathSpec], report: Report + root_relative_path: str, + root: Path, + gitignore_dict: Dict[Path, PathSpec], + report: Report, ) -> bool: path = root / root_relative_path # Note that this logic is sensitive to the ordering of gitignore_dict. Callers must From f6e6015041a92803ea987583a901f3525bdd4aac Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sat, 24 Jun 2023 18:07:06 -0700 Subject: [PATCH 08/11] number may be repo specific --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 7b76301ff46..17de42abd61 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -45,7 +45,7 @@ -- Speed up _Black_ by about 40% when the cache is full (#3751) +- Speed up _Black_ significantly when the cache is full (#3751) ### Output From 996770c14939c1367eba5616638f8e0d8a96f901 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sun, 25 Jun 2023 00:59:49 -0700 Subject: [PATCH 09/11] remove unnecessary symlink comment --- src/black/files.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/black/files.py b/src/black/files.py index 3dcf3c1d0aa..26e17f9ef33 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -285,8 +285,6 @@ def _path_is_ignored( path = root / root_relative_path # Note that this logic is sensitive to the ordering of gitignore_dict. Callers must # ensure that gitignore_dict is ordered from least specific to most specific. - # This logic is currently applied post-symlink resolution. It might be more correct - # to apply ignores before resolving the symlink. for gitignore_path, pattern in gitignore_dict.items(): try: relative_path = path.relative_to(gitignore_path).as_posix() From d017a02ef0384ffef7abc05cc43593ccfcb9535f Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sun, 25 Jun 2023 01:00:52 -0700 Subject: [PATCH 10/11] don't recompute relative to root --- src/black/files.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/black/files.py b/src/black/files.py index 26e17f9ef33..3aa85eb5a25 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -291,9 +291,7 @@ def _path_is_ignored( except ValueError: break if pattern.match_file(relative_path): - report.path_ignored( - path.relative_to(root), "matches a .gitignore file content" - ) + report.path_ignored(root_relative_path, "matches a .gitignore file content") return True return False From bc9b735f93f58cb434c659252b5eb961c0ee50ee Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Sun, 25 Jun 2023 01:01:46 -0700 Subject: [PATCH 11/11] Revert "don't recompute relative to root" This reverts commit d017a02ef0384ffef7abc05cc43593ccfcb9535f. --- src/black/files.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/black/files.py b/src/black/files.py index 3aa85eb5a25..26e17f9ef33 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -291,7 +291,9 @@ def _path_is_ignored( except ValueError: break if pattern.match_file(relative_path): - report.path_ignored(root_relative_path, "matches a .gitignore file content") + report.path_ignored( + path.relative_to(root), "matches a .gitignore file content" + ) return True return False