From d908b985194547e2e19e707532e75c5c05326425 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Sat, 29 Jul 2023 18:14:35 +0200 Subject: [PATCH 1/7] Improve caching by comparing file hashes as fallback for mtime and size --- CHANGES.md | 1 + .../reference/reference_classes.rst | 7 + .../reference/reference_functions.rst | 8 - pyproject.toml | 2 +- src/black/__init__.py | 12 +- src/black/cache.py | 167 +++++++++++------- src/black/concurrency.py | 10 +- tests/test_black.py | 154 ++++++++++------ 8 files changed, 229 insertions(+), 132 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 709c767b329..a6948c57b72 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -31,6 +31,7 @@ - Avoid importing `IPython` if notebook cells do not contain magics (#3782) +- Improve caching by comparing file hashes as fallback for mtime and size. (#3821) ### Output diff --git a/docs/contributing/reference/reference_classes.rst b/docs/contributing/reference/reference_classes.rst index 29b25003af2..913680c1b34 100644 --- a/docs/contributing/reference/reference_classes.rst +++ b/docs/contributing/reference/reference_classes.rst @@ -186,6 +186,13 @@ Black Classes :show-inheritance: :members: +:class:`Cache` +------------------------ + +.. autoclass:: black.cache.Cache + :show-inheritance: + :member: + Enum Classes ~~~~~~~~~~~~~ diff --git a/docs/contributing/reference/reference_functions.rst b/docs/contributing/reference/reference_functions.rst index 09517f73961..dd92e37a7d4 100644 --- a/docs/contributing/reference/reference_functions.rst +++ b/docs/contributing/reference/reference_functions.rst @@ -94,18 +94,10 @@ Split functions Caching ------- -.. autofunction:: black.cache.filter_cached - .. autofunction:: black.cache.get_cache_dir .. autofunction:: black.cache.get_cache_file -.. autofunction:: black.cache.get_cache_info - -.. autofunction:: black.cache.read_cache - -.. autofunction:: black.cache.write_cache - Utilities --------- diff --git a/pyproject.toml b/pyproject.toml index d29b768c289..6cd3f34bc10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ dependencies = [ "pathspec>=0.9.0", "platformdirs>=2", "tomli>=1.1.0; python_version < '3.11'", - "typing_extensions>=3.10.0.0; python_version < '3.10'", + "typing_extensions>=4.0.1; python_version < '3.11'", ] dynamic = ["readme", "version"] diff --git a/src/black/__init__.py b/src/black/__init__.py index 923a51867b5..10668485797 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -34,7 +34,7 @@ from pathspec.patterns.gitwildmatch import GitWildMatchPatternError from _black_version import version as __version__ -from black.cache import Cache, get_cache_info, read_cache, write_cache +from black.cache import Cache from black.comments import normalize_fmt_off from black.const import ( DEFAULT_EXCLUDES, @@ -775,12 +775,9 @@ def reformat_one( if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode): changed = Changed.YES else: - cache: Cache = {} + cache = Cache.read(mode) if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): - cache = read_cache(mode) - res_src = src.resolve() - res_src_s = str(res_src) - if res_src_s in cache and cache[res_src_s] == get_cache_info(res_src): + if not cache.is_changed(src): changed = Changed.CACHED if changed is not Changed.CACHED and format_file_in_place( src, fast=fast, write_back=write_back, mode=mode @@ -789,7 +786,8 @@ def reformat_one( if (write_back is WriteBack.YES and changed is not Changed.CACHED) or ( write_back is WriteBack.CHECK and changed is Changed.NO ): - write_cache(cache, [src], mode) + cache.update([src]) + cache.write() report.done(src, changed) except Exception as exc: if report.verbose: diff --git a/src/black/cache.py b/src/black/cache.py index 9455ff44772..897b44d85aa 100644 --- a/src/black/cache.py +++ b/src/black/cache.py @@ -1,21 +1,28 @@ """Caching of formatted files with feature-based invalidation.""" - +import hashlib import os import pickle +import sys import tempfile +from dataclasses import dataclass, field from pathlib import Path -from typing import Dict, Iterable, Set, Tuple +from typing import Dict, Iterable, NamedTuple, Set, Tuple from platformdirs import user_cache_dir from _black_version import version as __version__ from black.mode import Mode -# types -Timestamp = float -FileSize = int -CacheInfo = Tuple[Timestamp, FileSize] -Cache = Dict[str, CacheInfo] +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + + +class FileData(NamedTuple): + st_mtime: float + st_size: int + hash: str def get_cache_dir() -> Path: @@ -37,61 +44,103 @@ def get_cache_dir() -> Path: CACHE_DIR = get_cache_dir() -def read_cache(mode: Mode) -> Cache: - """Read the cache if it exists and is well formed. - - If it is not well formed, the call to write_cache later should resolve the issue. - """ - cache_file = get_cache_file(mode) - if not cache_file.exists(): - return {} - - with cache_file.open("rb") as fobj: - try: - cache: Cache = pickle.load(fobj) - except (pickle.UnpicklingError, ValueError, IndexError): - return {} - - return cache - - def get_cache_file(mode: Mode) -> Path: return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle" -def get_cache_info(path: Path) -> CacheInfo: - """Return the information used to check if a file is already formatted or not.""" - stat = path.stat() - return stat.st_mtime, stat.st_size - +@dataclass +class Cache: + mode: Mode + cache_file: Path + file_data: Dict[str, FileData] = field(default_factory=dict) + + @classmethod + def read(cls, mode: Mode) -> Self: + """Read the cache if it exists and is well formed. + + If it is not well formed, the call to write later should + resolve the issue. + """ + cache_file = get_cache_file(mode) + if not cache_file.exists(): + return cls(mode, cache_file) + + with cache_file.open("rb") as fobj: + try: + file_data: Dict[str, FileData] = pickle.load(fobj) + except (pickle.UnpicklingError, ValueError, IndexError): + return cls(mode, cache_file) + + return cls(mode, cache_file, file_data) + + @staticmethod + def hash_digest(path: Path) -> str: + """Return hash digest for path.""" + + with open(path, "rb") as fp: + data = fp.read() + return hashlib.sha256(data).hexdigest() + + @staticmethod + def get_file_data(path: Path) -> FileData: + """Return file data for path.""" + + stat = path.stat() + hash = Cache.hash_digest(path) + return FileData(stat.st_mtime, stat.st_size, hash) + + def is_changed(self, source: Path) -> bool: + """Check if source has changed compared to cached version.""" + res_src = source.resolve() + old = self.file_data.get(str(res_src)) + if old is None: + return True + + st = res_src.stat() + if st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime): + new_hash = Cache.hash_digest(res_src) + if st.st_size != old.st_size or new_hash != old.hash: + return True + return False + + def filtered_cached(self, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]: + """Split an iterable of paths in `sources` into two sets. + + The first contains paths of files that modified on disk or are not in the + cache. The other contains paths to non-modified files. + """ + changed: Set[Path] = set() + done: Set[Path] = set() + for src in sources: + res_src = src.resolve() + old = self.file_data.get(str(res_src)) + st = res_src.stat() + + if old is None: + changed.add(src) + continue + elif st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime): + new_hash = Cache.hash_digest(res_src) + if st.st_size != old.st_size or new_hash != old.hash: + changed.add(src) + continue + done.add(src) + return changed, done -def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]: - """Split an iterable of paths in `sources` into two sets. + def update(self, sources: Iterable[Path]) -> None: + """Update the cache file data.""" + self.file_data.update( + **{str(src.resolve()): Cache.get_file_data(src) for src in sources} + ) - The first contains paths of files that modified on disk or are not in the - cache. The other contains paths to non-modified files. - """ - todo, done = set(), set() - for src in sources: - res_src = src.resolve() - if cache.get(str(res_src)) != get_cache_info(res_src): - todo.add(src) - else: - done.add(src) - return todo, done - - -def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None: - """Update the cache file.""" - cache_file = get_cache_file(mode) - try: - CACHE_DIR.mkdir(parents=True, exist_ok=True) - new_cache = { - **cache, - **{str(src.resolve()): get_cache_info(src) for src in sources}, - } - with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f: - pickle.dump(new_cache, f, protocol=4) - os.replace(f.name, cache_file) - except OSError: - pass + def write(self) -> None: + """Write a new cache file.""" + try: + CACHE_DIR.mkdir(parents=True, exist_ok=True) + with tempfile.NamedTemporaryFile( + dir=str(self.cache_file.parent), delete=False + ) as f: + pickle.dump(self.file_data, f, protocol=4) + os.replace(f.name, self.cache_file) + except OSError: + pass diff --git a/src/black/concurrency.py b/src/black/concurrency.py index 893eba6675a..6dce2504315 100644 --- a/src/black/concurrency.py +++ b/src/black/concurrency.py @@ -17,7 +17,7 @@ from mypy_extensions import mypyc_attr from black import WriteBack, format_file_in_place -from black.cache import Cache, filter_cached, read_cache, write_cache +from black.cache import Cache from black.mode import Mode from black.output import err from black.report import Changed, Report @@ -133,10 +133,9 @@ async def schedule_formatting( `write_back`, `fast`, and `mode` options are passed to :func:`format_file_in_place`. """ - cache: Cache = {} + cache = Cache.read(mode) if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): - cache = read_cache(mode) - sources, cached = filter_cached(cache, sources) + sources, cached = cache.filtered_cached(sources) for src in sorted(cached): report.done(src, Changed.CACHED) if not sources: @@ -185,4 +184,5 @@ async def schedule_formatting( if cancelled: await asyncio.gather(*cancelled, return_exceptions=True) if sources_to_cache: - write_cache(cache, sources_to_cache, mode) + cache.update(sources_to_cache) + cache.write() diff --git a/tests/test_black.py b/tests/test_black.py index 3b3ab721c5f..c2593883dc0 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -41,7 +41,7 @@ import black.files from black import Feature, TargetVersion from black import re_compile_maybe_verbose as compile_pattern -from black.cache import get_cache_dir, get_cache_file +from black.cache import FileData, get_cache_dir, get_cache_file from black.debug import DebugVisitor from black.output import color_diff, diff from black.report import Report @@ -1121,10 +1121,10 @@ def test_single_file_force_pyi(self) -> None: self.invokeBlack([str(path), "--pyi"]) actual = path.read_text(encoding="utf-8") # verify cache with --pyi is separate - pyi_cache = black.read_cache(pyi_mode) - self.assertIn(str(path), pyi_cache) - normal_cache = black.read_cache(DEFAULT_MODE) - self.assertNotIn(str(path), normal_cache) + pyi_cache = black.Cache.read(pyi_mode) + assert not pyi_cache.is_changed(path) + normal_cache = black.Cache.read(DEFAULT_MODE) + assert normal_cache.is_changed(path) self.assertFormatEqual(expected, actual) black.assert_equivalent(contents, actual) black.assert_stable(contents, actual, pyi_mode) @@ -1146,11 +1146,11 @@ def test_multi_file_force_pyi(self) -> None: actual = path.read_text(encoding="utf-8") self.assertEqual(actual, expected) # verify cache with --pyi is separate - pyi_cache = black.read_cache(pyi_mode) - normal_cache = black.read_cache(reg_mode) + pyi_cache = black.Cache.read(pyi_mode) + normal_cache = black.Cache.read(reg_mode) for path in paths: - self.assertIn(str(path), pyi_cache) - self.assertNotIn(str(path), normal_cache) + assert not pyi_cache.is_changed(path) + assert normal_cache.is_changed(path) def test_pipe_force_pyi(self) -> None: source, expected = read_data("miscellaneous", "force_pyi") @@ -1171,10 +1171,10 @@ def test_single_file_force_py36(self) -> None: self.invokeBlack([str(path), *PY36_ARGS]) actual = path.read_text(encoding="utf-8") # verify cache with --target-version is separate - py36_cache = black.read_cache(py36_mode) - self.assertIn(str(path), py36_cache) - normal_cache = black.read_cache(reg_mode) - self.assertNotIn(str(path), normal_cache) + py36_cache = black.Cache.read(py36_mode) + assert not py36_cache.is_changed(path) + normal_cache = black.Cache.read(reg_mode) + assert normal_cache.is_changed(path) self.assertEqual(actual, expected) @event_loop() @@ -1194,11 +1194,11 @@ def test_multi_file_force_py36(self) -> None: actual = path.read_text(encoding="utf-8") self.assertEqual(actual, expected) # verify cache with --target-version is separate - pyi_cache = black.read_cache(py36_mode) - normal_cache = black.read_cache(reg_mode) + pyi_cache = black.Cache.read(py36_mode) + normal_cache = black.Cache.read(reg_mode) for path in paths: - self.assertIn(str(path), pyi_cache) - self.assertNotIn(str(path), normal_cache) + assert not pyi_cache.is_changed(path) + assert normal_cache.is_changed(path) def test_pipe_force_py36(self) -> None: source, expected = read_data("miscellaneous", "force_py36") @@ -1953,19 +1953,21 @@ def test_cache_broken_file(self) -> None: with cache_dir() as workspace: cache_file = get_cache_file(mode) cache_file.write_text("this is not a pickle", encoding="utf-8") - assert black.read_cache(mode) == {} + assert black.Cache.read(mode).file_data == {} src = (workspace / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") invokeBlack([str(src)]) - cache = black.read_cache(mode) - assert str(src) in cache + cache = black.Cache.read(mode) + assert not cache.is_changed(src) def test_cache_single_file_already_cached(self) -> None: mode = DEFAULT_MODE with cache_dir() as workspace: src = (workspace / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") - black.write_cache({}, [src], mode) + cache = black.Cache.read(mode) + cache.update([src]) + cache.write() invokeBlack([str(src)]) assert src.read_text(encoding="utf-8") == "print('hello')" @@ -1979,13 +1981,15 @@ def test_cache_multiple_files(self) -> None: one.write_text("print('hello')", encoding="utf-8") two = (workspace / "two.py").resolve() two.write_text("print('hello')", encoding="utf-8") - black.write_cache({}, [one], mode) + cache = black.Cache.read(mode) + cache.update([one]) + cache.write() invokeBlack([str(workspace)]) assert one.read_text(encoding="utf-8") == "print('hello')" assert two.read_text(encoding="utf-8") == 'print("hello")\n' - cache = black.read_cache(mode) - assert str(one) in cache - assert str(two) in cache + cache = black.Cache.read(mode) + assert not cache.is_changed(one) + assert not cache.is_changed(two) @pytest.mark.parametrize("color", [False, True], ids=["no-color", "with-color"]) def test_no_cache_when_writeback_diff(self, color: bool) -> None: @@ -1993,8 +1997,8 @@ def test_no_cache_when_writeback_diff(self, color: bool) -> None: with cache_dir() as workspace: src = (workspace / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") - with patch("black.read_cache") as read_cache, patch( - "black.write_cache" + with patch.object(black.Cache, "read") as read_cache, patch.object( + black.Cache, "write" ) as write_cache: cmd = [str(src), "--diff"] if color: @@ -2002,8 +2006,8 @@ def test_no_cache_when_writeback_diff(self, color: bool) -> None: invokeBlack(cmd) cache_file = get_cache_file(mode) assert cache_file.exists() is False + read_cache.assert_called_once() write_cache.assert_not_called() - read_cache.assert_not_called() @pytest.mark.parametrize("color", [False, True], ids=["no-color", "with-color"]) @event_loop() @@ -2036,17 +2040,18 @@ def test_no_cache_when_stdin(self) -> None: def test_read_cache_no_cachefile(self) -> None: mode = DEFAULT_MODE with cache_dir(): - assert black.read_cache(mode) == {} + assert black.Cache.read(mode).file_data == {} def test_write_cache_read_cache(self) -> None: mode = DEFAULT_MODE with cache_dir() as workspace: src = (workspace / "test.py").resolve() src.touch() - black.write_cache({}, [src], mode) - cache = black.read_cache(mode) - assert str(src) in cache - assert cache[str(src)] == black.get_cache_info(src) + write_cache = black.Cache.read(mode) + write_cache.update([src]) + write_cache.write() + read_cache = black.Cache.read(mode) + assert not read_cache.is_changed(src) def test_filter_cached(self) -> None: with TemporaryDirectory() as workspace: @@ -2057,21 +2062,62 @@ def test_filter_cached(self) -> None: uncached.touch() cached.touch() cached_but_changed.touch() - cache = { - str(cached): black.get_cache_info(cached), - str(cached_but_changed): (0.0, 0), - } - todo, done = black.cache.filter_cached( - cache, {uncached, cached, cached_but_changed} - ) + cache = black.Cache.read(DEFAULT_MODE) + + orig_func = black.Cache.get_file_data + + def wrapped_func(path: Path) -> FileData: + if path == cached: + return orig_func(path) + return FileData(0.0, 0, "") + + with patch.object(black.Cache, "get_file_data", side_effect=wrapped_func): + cache.update([cached, cached_but_changed]) + todo, done = cache.filtered_cached({uncached, cached, cached_but_changed}) assert todo == {uncached, cached_but_changed} assert done == {cached} + def test_filter_cached_hash(self) -> None: + with TemporaryDirectory() as workspace: + path = Path(workspace) + src = (path / "test.py").resolve() + src.write_text("print('hello')", encoding="utf-8") + st = src.stat() + cache = black.Cache.read(DEFAULT_MODE) + cache.update([src]) + cached_file_data = cache.file_data[str(src)] + + todo, done = cache.filtered_cached([src]) + assert todo == set() + assert done == {src} + assert cached_file_data.st_mtime == st.st_mtime + + # Modify st_mtime + src.write_text("print('hello')", encoding="utf-8") + new_st = src.stat() + todo, done = cache.filtered_cached([src]) + assert todo == set() + assert done == {src} + assert cached_file_data.st_mtime < new_st.st_mtime + assert cached_file_data.st_size == new_st.st_size + assert cached_file_data.hash == black.Cache.hash_digest(src) + + # Modify contents + src.write_text("print('hello world')", encoding="utf-8") + new_st = src.stat() + todo, done = cache.filtered_cached([src]) + assert todo == {src} + assert done == set() + assert cached_file_data.st_mtime < new_st.st_mtime + assert cached_file_data.st_size != new_st.st_size + assert cached_file_data.hash != black.Cache.hash_digest(src) + def test_write_cache_creates_directory_if_needed(self) -> None: mode = DEFAULT_MODE with cache_dir(exists=False) as workspace: assert not workspace.exists() - black.write_cache({}, [], mode) + cache = black.Cache.read(mode) + cache.write() assert workspace.exists() @event_loop() @@ -2085,15 +2131,17 @@ def test_failed_formatting_does_not_get_cached(self) -> None: clean = (workspace / "clean.py").resolve() clean.write_text('print("hello")\n', encoding="utf-8") invokeBlack([str(workspace)], exit_code=123) - cache = black.read_cache(mode) - assert str(failing) not in cache - assert str(clean) in cache + cache = black.Cache.read(mode) + assert cache.is_changed(failing) + assert not cache.is_changed(clean) def test_write_cache_write_fail(self) -> None: mode = DEFAULT_MODE - with cache_dir(), patch.object(Path, "open") as mock: - mock.side_effect = OSError - black.write_cache({}, [], mode) + with cache_dir(): + cache = black.Cache.read(mode) + with patch.object(Path, "open") as mock: + mock.side_effect = OSError + cache.write() def test_read_cache_line_lengths(self) -> None: mode = DEFAULT_MODE @@ -2101,11 +2149,13 @@ def test_read_cache_line_lengths(self) -> None: with cache_dir() as workspace: path = (workspace / "file.py").resolve() path.touch() - black.write_cache({}, [path], mode) - one = black.read_cache(mode) - assert str(path) in one - two = black.read_cache(short_mode) - assert str(path) not in two + cache = black.Cache.read(mode) + cache.update([path]) + cache.write() + one = black.Cache.read(mode) + assert not one.is_changed(path) + two = black.Cache.read(short_mode) + assert two.is_changed(path) def assert_collected_sources( From 7dd6427ab0c684a480b3f33a4c01c53a8250b549 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Sat, 29 Jul 2023 21:28:19 +0200 Subject: [PATCH 2/7] Fix tests --- docs/contributing/reference/reference_classes.rst | 2 +- tests/test_black.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/contributing/reference/reference_classes.rst b/docs/contributing/reference/reference_classes.rst index 913680c1b34..dc615579e30 100644 --- a/docs/contributing/reference/reference_classes.rst +++ b/docs/contributing/reference/reference_classes.rst @@ -191,7 +191,7 @@ Black Classes .. autoclass:: black.cache.Cache :show-inheritance: - :member: + :members: Enum Classes ~~~~~~~~~~~~~ diff --git a/tests/test_black.py b/tests/test_black.py index c2593883dc0..672bab9b0ac 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -2093,13 +2093,16 @@ def test_filter_cached_hash(self) -> None: assert cached_file_data.st_mtime == st.st_mtime # Modify st_mtime - src.write_text("print('hello')", encoding="utf-8") - new_st = src.stat() + cached_file_data = cache.file_data[str(src)] = FileData( + cached_file_data.st_mtime - 1, + cached_file_data.st_size, + cached_file_data.hash, + ) todo, done = cache.filtered_cached([src]) assert todo == set() assert done == {src} - assert cached_file_data.st_mtime < new_st.st_mtime - assert cached_file_data.st_size == new_st.st_size + assert cached_file_data.st_mtime < st.st_mtime + assert cached_file_data.st_size == st.st_size assert cached_file_data.hash == black.Cache.hash_digest(src) # Modify contents From 07fe5b17327a8313ddc1458e1543838990a42259 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Sun, 13 Aug 2023 12:12:34 +0200 Subject: [PATCH 3/7] Code review * Skip hashing if file sizes don't match * Use is_changed in filtered_cached * Combine update and write --- src/black/__init__.py | 3 +-- src/black/cache.py | 28 +++++++++------------------- src/black/concurrency.py | 3 +-- tests/test_black.py | 20 ++++++++------------ 4 files changed, 19 insertions(+), 35 deletions(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index 10668485797..dc06eab8dd0 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -786,8 +786,7 @@ def reformat_one( if (write_back is WriteBack.YES and changed is not Changed.CACHED) or ( write_back is WriteBack.CHECK and changed is Changed.NO ): - cache.update([src]) - cache.write() + cache.write([src]) report.done(src, changed) except Exception as exc: if report.verbose: diff --git a/src/black/cache.py b/src/black/cache.py index 897b44d85aa..ca44497625e 100644 --- a/src/black/cache.py +++ b/src/black/cache.py @@ -97,9 +97,11 @@ def is_changed(self, source: Path) -> bool: return True st = res_src.stat() - if st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime): + if st.st_size != old.st_size: + return True + if int(st.st_mtime) != int(old.st_mtime): new_hash = Cache.hash_digest(res_src) - if st.st_size != old.st_size or new_hash != old.hash: + if new_hash != old.hash: return True return False @@ -112,29 +114,17 @@ def filtered_cached(self, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path] changed: Set[Path] = set() done: Set[Path] = set() for src in sources: - res_src = src.resolve() - old = self.file_data.get(str(res_src)) - st = res_src.stat() - - if old is None: + if self.is_changed(src): changed.add(src) - continue - elif st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime): - new_hash = Cache.hash_digest(res_src) - if st.st_size != old.st_size or new_hash != old.hash: - changed.add(src) - continue - done.add(src) + else: + done.add(src) return changed, done - def update(self, sources: Iterable[Path]) -> None: - """Update the cache file data.""" + def write(self, sources: Iterable[Path]) -> None: + """Update the cache file data and write a new cache file.""" self.file_data.update( **{str(src.resolve()): Cache.get_file_data(src) for src in sources} ) - - def write(self) -> None: - """Write a new cache file.""" try: CACHE_DIR.mkdir(parents=True, exist_ok=True) with tempfile.NamedTemporaryFile( diff --git a/src/black/concurrency.py b/src/black/concurrency.py index 6dce2504315..ce016578399 100644 --- a/src/black/concurrency.py +++ b/src/black/concurrency.py @@ -184,5 +184,4 @@ async def schedule_formatting( if cancelled: await asyncio.gather(*cancelled, return_exceptions=True) if sources_to_cache: - cache.update(sources_to_cache) - cache.write() + cache.write(sources_to_cache) diff --git a/tests/test_black.py b/tests/test_black.py index 672bab9b0ac..90dc7439b6d 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1966,8 +1966,7 @@ def test_cache_single_file_already_cached(self) -> None: src = (workspace / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") cache = black.Cache.read(mode) - cache.update([src]) - cache.write() + cache.write([src]) invokeBlack([str(src)]) assert src.read_text(encoding="utf-8") == "print('hello')" @@ -1982,8 +1981,7 @@ def test_cache_multiple_files(self) -> None: two = (workspace / "two.py").resolve() two.write_text("print('hello')", encoding="utf-8") cache = black.Cache.read(mode) - cache.update([one]) - cache.write() + cache.write([one]) invokeBlack([str(workspace)]) assert one.read_text(encoding="utf-8") == "print('hello')" assert two.read_text(encoding="utf-8") == 'print("hello")\n' @@ -2048,8 +2046,7 @@ def test_write_cache_read_cache(self) -> None: src = (workspace / "test.py").resolve() src.touch() write_cache = black.Cache.read(mode) - write_cache.update([src]) - write_cache.write() + write_cache.write([src]) read_cache = black.Cache.read(mode) assert not read_cache.is_changed(src) @@ -2072,7 +2069,7 @@ def wrapped_func(path: Path) -> FileData: return FileData(0.0, 0, "") with patch.object(black.Cache, "get_file_data", side_effect=wrapped_func): - cache.update([cached, cached_but_changed]) + cache.write([cached, cached_but_changed]) todo, done = cache.filtered_cached({uncached, cached, cached_but_changed}) assert todo == {uncached, cached_but_changed} assert done == {cached} @@ -2084,7 +2081,7 @@ def test_filter_cached_hash(self) -> None: src.write_text("print('hello')", encoding="utf-8") st = src.stat() cache = black.Cache.read(DEFAULT_MODE) - cache.update([src]) + cache.write([src]) cached_file_data = cache.file_data[str(src)] todo, done = cache.filtered_cached([src]) @@ -2120,7 +2117,7 @@ def test_write_cache_creates_directory_if_needed(self) -> None: with cache_dir(exists=False) as workspace: assert not workspace.exists() cache = black.Cache.read(mode) - cache.write() + cache.write([]) assert workspace.exists() @event_loop() @@ -2144,7 +2141,7 @@ def test_write_cache_write_fail(self) -> None: cache = black.Cache.read(mode) with patch.object(Path, "open") as mock: mock.side_effect = OSError - cache.write() + cache.write([]) def test_read_cache_line_lengths(self) -> None: mode = DEFAULT_MODE @@ -2153,8 +2150,7 @@ def test_read_cache_line_lengths(self) -> None: path = (workspace / "file.py").resolve() path.touch() cache = black.Cache.read(mode) - cache.update([path]) - cache.write() + cache.write([path]) one = black.Cache.read(mode) assert not one.is_changed(path) two = black.Cache.read(short_mode) From 64bbbcea16722e052593e675c191c4a0d4b4b7a1 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Tue, 15 Aug 2023 09:33:20 +0200 Subject: [PATCH 4/7] Update tests/test_black.py Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com> --- tests/test_black.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_black.py b/tests/test_black.py index 90dc7439b6d..8ae92172d43 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -2066,7 +2066,9 @@ def test_filter_cached(self) -> None: def wrapped_func(path: Path) -> FileData: if path == cached: return orig_func(path) - return FileData(0.0, 0, "") + if path == cached_but_changed: + return FileData(0.0, 0, "") + raise AssertionError with patch.object(black.Cache, "get_file_data", side_effect=wrapped_func): cache.write([cached, cached_but_changed]) From 85b4a91c1b9dc70cb53af2dc37c9eaf4b985e453 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Tue, 15 Aug 2023 09:53:14 +0200 Subject: [PATCH 5/7] Delay cache read --- src/black/__init__.py | 3 ++- src/black/cache.py | 21 +++++++++-------- src/black/concurrency.py | 3 ++- tests/test_black.py | 50 ++++++++++++++++++++-------------------- 4 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index dc06eab8dd0..caef480875a 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -775,8 +775,9 @@ def reformat_one( if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode): changed = Changed.YES else: - cache = Cache.read(mode) + cache = Cache(mode) if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): + cache.read() if not cache.is_changed(src): changed = Changed.CACHED if changed is not Changed.CACHED and format_file_in_place( diff --git a/src/black/cache.py b/src/black/cache.py index ca44497625e..a89e39f7cd3 100644 --- a/src/black/cache.py +++ b/src/black/cache.py @@ -51,27 +51,28 @@ def get_cache_file(mode: Mode) -> Path: @dataclass class Cache: mode: Mode - cache_file: Path + cache_file: Path = field(init=False) file_data: Dict[str, FileData] = field(default_factory=dict) - @classmethod - def read(cls, mode: Mode) -> Self: + def __post_init__(self) -> None: + self.cache_file = get_cache_file(self.mode) + + def read(self) -> Self: """Read the cache if it exists and is well formed. If it is not well formed, the call to write later should resolve the issue. """ - cache_file = get_cache_file(mode) - if not cache_file.exists(): - return cls(mode, cache_file) + if not self.cache_file.exists(): + return self - with cache_file.open("rb") as fobj: + with self.cache_file.open("rb") as fobj: try: - file_data: Dict[str, FileData] = pickle.load(fobj) + self.file_data = pickle.load(fobj) except (pickle.UnpicklingError, ValueError, IndexError): - return cls(mode, cache_file) + pass - return cls(mode, cache_file, file_data) + return self @staticmethod def hash_digest(path: Path) -> str: diff --git a/src/black/concurrency.py b/src/black/concurrency.py index ce016578399..3ae17c3be99 100644 --- a/src/black/concurrency.py +++ b/src/black/concurrency.py @@ -133,8 +133,9 @@ async def schedule_formatting( `write_back`, `fast`, and `mode` options are passed to :func:`format_file_in_place`. """ - cache = Cache.read(mode) + cache = Cache(mode) if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): + cache.read() sources, cached = cache.filtered_cached(sources) for src in sorted(cached): report.done(src, Changed.CACHED) diff --git a/tests/test_black.py b/tests/test_black.py index 8ae92172d43..c0a4f3d166f 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1121,9 +1121,9 @@ def test_single_file_force_pyi(self) -> None: self.invokeBlack([str(path), "--pyi"]) actual = path.read_text(encoding="utf-8") # verify cache with --pyi is separate - pyi_cache = black.Cache.read(pyi_mode) + pyi_cache = black.Cache(pyi_mode).read() assert not pyi_cache.is_changed(path) - normal_cache = black.Cache.read(DEFAULT_MODE) + normal_cache = black.Cache(DEFAULT_MODE).read() assert normal_cache.is_changed(path) self.assertFormatEqual(expected, actual) black.assert_equivalent(contents, actual) @@ -1146,8 +1146,8 @@ def test_multi_file_force_pyi(self) -> None: actual = path.read_text(encoding="utf-8") self.assertEqual(actual, expected) # verify cache with --pyi is separate - pyi_cache = black.Cache.read(pyi_mode) - normal_cache = black.Cache.read(reg_mode) + pyi_cache = black.Cache(pyi_mode).read() + normal_cache = black.Cache(reg_mode).read() for path in paths: assert not pyi_cache.is_changed(path) assert normal_cache.is_changed(path) @@ -1171,9 +1171,9 @@ def test_single_file_force_py36(self) -> None: self.invokeBlack([str(path), *PY36_ARGS]) actual = path.read_text(encoding="utf-8") # verify cache with --target-version is separate - py36_cache = black.Cache.read(py36_mode) + py36_cache = black.Cache(py36_mode).read() assert not py36_cache.is_changed(path) - normal_cache = black.Cache.read(reg_mode) + normal_cache = black.Cache(reg_mode).read() assert normal_cache.is_changed(path) self.assertEqual(actual, expected) @@ -1194,8 +1194,8 @@ def test_multi_file_force_py36(self) -> None: actual = path.read_text(encoding="utf-8") self.assertEqual(actual, expected) # verify cache with --target-version is separate - pyi_cache = black.Cache.read(py36_mode) - normal_cache = black.Cache.read(reg_mode) + pyi_cache = black.Cache(py36_mode).read() + normal_cache = black.Cache(reg_mode).read() for path in paths: assert not pyi_cache.is_changed(path) assert normal_cache.is_changed(path) @@ -1953,11 +1953,11 @@ def test_cache_broken_file(self) -> None: with cache_dir() as workspace: cache_file = get_cache_file(mode) cache_file.write_text("this is not a pickle", encoding="utf-8") - assert black.Cache.read(mode).file_data == {} + assert black.Cache(mode).read().file_data == {} src = (workspace / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") invokeBlack([str(src)]) - cache = black.Cache.read(mode) + cache = black.Cache(mode).read() assert not cache.is_changed(src) def test_cache_single_file_already_cached(self) -> None: @@ -1965,7 +1965,7 @@ def test_cache_single_file_already_cached(self) -> None: with cache_dir() as workspace: src = (workspace / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") - cache = black.Cache.read(mode) + cache = black.Cache(mode).read() cache.write([src]) invokeBlack([str(src)]) assert src.read_text(encoding="utf-8") == "print('hello')" @@ -1980,12 +1980,12 @@ def test_cache_multiple_files(self) -> None: one.write_text("print('hello')", encoding="utf-8") two = (workspace / "two.py").resolve() two.write_text("print('hello')", encoding="utf-8") - cache = black.Cache.read(mode) + cache = black.Cache(mode).read() cache.write([one]) invokeBlack([str(workspace)]) assert one.read_text(encoding="utf-8") == "print('hello')" assert two.read_text(encoding="utf-8") == 'print("hello")\n' - cache = black.Cache.read(mode) + cache = black.Cache(mode).read() assert not cache.is_changed(one) assert not cache.is_changed(two) @@ -2004,8 +2004,8 @@ def test_no_cache_when_writeback_diff(self, color: bool) -> None: invokeBlack(cmd) cache_file = get_cache_file(mode) assert cache_file.exists() is False - read_cache.assert_called_once() write_cache.assert_not_called() + read_cache.assert_not_called() @pytest.mark.parametrize("color", [False, True], ids=["no-color", "with-color"]) @event_loop() @@ -2038,16 +2038,16 @@ def test_no_cache_when_stdin(self) -> None: def test_read_cache_no_cachefile(self) -> None: mode = DEFAULT_MODE with cache_dir(): - assert black.Cache.read(mode).file_data == {} + assert black.Cache(mode).read().file_data == {} def test_write_cache_read_cache(self) -> None: mode = DEFAULT_MODE with cache_dir() as workspace: src = (workspace / "test.py").resolve() src.touch() - write_cache = black.Cache.read(mode) + write_cache = black.Cache(mode).read() write_cache.write([src]) - read_cache = black.Cache.read(mode) + read_cache = black.Cache(mode).read() assert not read_cache.is_changed(src) def test_filter_cached(self) -> None: @@ -2059,7 +2059,7 @@ def test_filter_cached(self) -> None: uncached.touch() cached.touch() cached_but_changed.touch() - cache = black.Cache.read(DEFAULT_MODE) + cache = black.Cache(DEFAULT_MODE).read() orig_func = black.Cache.get_file_data @@ -2082,7 +2082,7 @@ def test_filter_cached_hash(self) -> None: src = (path / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") st = src.stat() - cache = black.Cache.read(DEFAULT_MODE) + cache = black.Cache(DEFAULT_MODE).read() cache.write([src]) cached_file_data = cache.file_data[str(src)] @@ -2118,7 +2118,7 @@ def test_write_cache_creates_directory_if_needed(self) -> None: mode = DEFAULT_MODE with cache_dir(exists=False) as workspace: assert not workspace.exists() - cache = black.Cache.read(mode) + cache = black.Cache(mode).read() cache.write([]) assert workspace.exists() @@ -2133,14 +2133,14 @@ def test_failed_formatting_does_not_get_cached(self) -> None: clean = (workspace / "clean.py").resolve() clean.write_text('print("hello")\n', encoding="utf-8") invokeBlack([str(workspace)], exit_code=123) - cache = black.Cache.read(mode) + cache = black.Cache(mode).read() assert cache.is_changed(failing) assert not cache.is_changed(clean) def test_write_cache_write_fail(self) -> None: mode = DEFAULT_MODE with cache_dir(): - cache = black.Cache.read(mode) + cache = black.Cache(mode).read() with patch.object(Path, "open") as mock: mock.side_effect = OSError cache.write([]) @@ -2151,11 +2151,11 @@ def test_read_cache_line_lengths(self) -> None: with cache_dir() as workspace: path = (workspace / "file.py").resolve() path.touch() - cache = black.Cache.read(mode) + cache = black.Cache(mode).read() cache.write([path]) - one = black.Cache.read(mode) + one = black.Cache(mode).read() assert not one.is_changed(path) - two = black.Cache.read(short_mode) + two = black.Cache(short_mode).read() assert two.is_changed(path) From 088ea2f876806003f06a9ac2c52cb7f7ee337992 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Tue, 15 Aug 2023 11:15:18 +0200 Subject: [PATCH 6/7] Revert "Delay cache read" --- src/black/__init__.py | 3 +-- src/black/cache.py | 21 ++++++++--------- src/black/concurrency.py | 3 +-- tests/test_black.py | 50 ++++++++++++++++++++-------------------- 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index caef480875a..dc06eab8dd0 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -775,9 +775,8 @@ def reformat_one( if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode): changed = Changed.YES else: - cache = Cache(mode) + cache = Cache.read(mode) if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): - cache.read() if not cache.is_changed(src): changed = Changed.CACHED if changed is not Changed.CACHED and format_file_in_place( diff --git a/src/black/cache.py b/src/black/cache.py index a89e39f7cd3..ca44497625e 100644 --- a/src/black/cache.py +++ b/src/black/cache.py @@ -51,28 +51,27 @@ def get_cache_file(mode: Mode) -> Path: @dataclass class Cache: mode: Mode - cache_file: Path = field(init=False) + cache_file: Path file_data: Dict[str, FileData] = field(default_factory=dict) - def __post_init__(self) -> None: - self.cache_file = get_cache_file(self.mode) - - def read(self) -> Self: + @classmethod + def read(cls, mode: Mode) -> Self: """Read the cache if it exists and is well formed. If it is not well formed, the call to write later should resolve the issue. """ - if not self.cache_file.exists(): - return self + cache_file = get_cache_file(mode) + if not cache_file.exists(): + return cls(mode, cache_file) - with self.cache_file.open("rb") as fobj: + with cache_file.open("rb") as fobj: try: - self.file_data = pickle.load(fobj) + file_data: Dict[str, FileData] = pickle.load(fobj) except (pickle.UnpicklingError, ValueError, IndexError): - pass + return cls(mode, cache_file) - return self + return cls(mode, cache_file, file_data) @staticmethod def hash_digest(path: Path) -> str: diff --git a/src/black/concurrency.py b/src/black/concurrency.py index 3ae17c3be99..ce016578399 100644 --- a/src/black/concurrency.py +++ b/src/black/concurrency.py @@ -133,9 +133,8 @@ async def schedule_formatting( `write_back`, `fast`, and `mode` options are passed to :func:`format_file_in_place`. """ - cache = Cache(mode) + cache = Cache.read(mode) if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): - cache.read() sources, cached = cache.filtered_cached(sources) for src in sorted(cached): report.done(src, Changed.CACHED) diff --git a/tests/test_black.py b/tests/test_black.py index c0a4f3d166f..8ae92172d43 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1121,9 +1121,9 @@ def test_single_file_force_pyi(self) -> None: self.invokeBlack([str(path), "--pyi"]) actual = path.read_text(encoding="utf-8") # verify cache with --pyi is separate - pyi_cache = black.Cache(pyi_mode).read() + pyi_cache = black.Cache.read(pyi_mode) assert not pyi_cache.is_changed(path) - normal_cache = black.Cache(DEFAULT_MODE).read() + normal_cache = black.Cache.read(DEFAULT_MODE) assert normal_cache.is_changed(path) self.assertFormatEqual(expected, actual) black.assert_equivalent(contents, actual) @@ -1146,8 +1146,8 @@ def test_multi_file_force_pyi(self) -> None: actual = path.read_text(encoding="utf-8") self.assertEqual(actual, expected) # verify cache with --pyi is separate - pyi_cache = black.Cache(pyi_mode).read() - normal_cache = black.Cache(reg_mode).read() + pyi_cache = black.Cache.read(pyi_mode) + normal_cache = black.Cache.read(reg_mode) for path in paths: assert not pyi_cache.is_changed(path) assert normal_cache.is_changed(path) @@ -1171,9 +1171,9 @@ def test_single_file_force_py36(self) -> None: self.invokeBlack([str(path), *PY36_ARGS]) actual = path.read_text(encoding="utf-8") # verify cache with --target-version is separate - py36_cache = black.Cache(py36_mode).read() + py36_cache = black.Cache.read(py36_mode) assert not py36_cache.is_changed(path) - normal_cache = black.Cache(reg_mode).read() + normal_cache = black.Cache.read(reg_mode) assert normal_cache.is_changed(path) self.assertEqual(actual, expected) @@ -1194,8 +1194,8 @@ def test_multi_file_force_py36(self) -> None: actual = path.read_text(encoding="utf-8") self.assertEqual(actual, expected) # verify cache with --target-version is separate - pyi_cache = black.Cache(py36_mode).read() - normal_cache = black.Cache(reg_mode).read() + pyi_cache = black.Cache.read(py36_mode) + normal_cache = black.Cache.read(reg_mode) for path in paths: assert not pyi_cache.is_changed(path) assert normal_cache.is_changed(path) @@ -1953,11 +1953,11 @@ def test_cache_broken_file(self) -> None: with cache_dir() as workspace: cache_file = get_cache_file(mode) cache_file.write_text("this is not a pickle", encoding="utf-8") - assert black.Cache(mode).read().file_data == {} + assert black.Cache.read(mode).file_data == {} src = (workspace / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") invokeBlack([str(src)]) - cache = black.Cache(mode).read() + cache = black.Cache.read(mode) assert not cache.is_changed(src) def test_cache_single_file_already_cached(self) -> None: @@ -1965,7 +1965,7 @@ def test_cache_single_file_already_cached(self) -> None: with cache_dir() as workspace: src = (workspace / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") - cache = black.Cache(mode).read() + cache = black.Cache.read(mode) cache.write([src]) invokeBlack([str(src)]) assert src.read_text(encoding="utf-8") == "print('hello')" @@ -1980,12 +1980,12 @@ def test_cache_multiple_files(self) -> None: one.write_text("print('hello')", encoding="utf-8") two = (workspace / "two.py").resolve() two.write_text("print('hello')", encoding="utf-8") - cache = black.Cache(mode).read() + cache = black.Cache.read(mode) cache.write([one]) invokeBlack([str(workspace)]) assert one.read_text(encoding="utf-8") == "print('hello')" assert two.read_text(encoding="utf-8") == 'print("hello")\n' - cache = black.Cache(mode).read() + cache = black.Cache.read(mode) assert not cache.is_changed(one) assert not cache.is_changed(two) @@ -2004,8 +2004,8 @@ def test_no_cache_when_writeback_diff(self, color: bool) -> None: invokeBlack(cmd) cache_file = get_cache_file(mode) assert cache_file.exists() is False + read_cache.assert_called_once() write_cache.assert_not_called() - read_cache.assert_not_called() @pytest.mark.parametrize("color", [False, True], ids=["no-color", "with-color"]) @event_loop() @@ -2038,16 +2038,16 @@ def test_no_cache_when_stdin(self) -> None: def test_read_cache_no_cachefile(self) -> None: mode = DEFAULT_MODE with cache_dir(): - assert black.Cache(mode).read().file_data == {} + assert black.Cache.read(mode).file_data == {} def test_write_cache_read_cache(self) -> None: mode = DEFAULT_MODE with cache_dir() as workspace: src = (workspace / "test.py").resolve() src.touch() - write_cache = black.Cache(mode).read() + write_cache = black.Cache.read(mode) write_cache.write([src]) - read_cache = black.Cache(mode).read() + read_cache = black.Cache.read(mode) assert not read_cache.is_changed(src) def test_filter_cached(self) -> None: @@ -2059,7 +2059,7 @@ def test_filter_cached(self) -> None: uncached.touch() cached.touch() cached_but_changed.touch() - cache = black.Cache(DEFAULT_MODE).read() + cache = black.Cache.read(DEFAULT_MODE) orig_func = black.Cache.get_file_data @@ -2082,7 +2082,7 @@ def test_filter_cached_hash(self) -> None: src = (path / "test.py").resolve() src.write_text("print('hello')", encoding="utf-8") st = src.stat() - cache = black.Cache(DEFAULT_MODE).read() + cache = black.Cache.read(DEFAULT_MODE) cache.write([src]) cached_file_data = cache.file_data[str(src)] @@ -2118,7 +2118,7 @@ def test_write_cache_creates_directory_if_needed(self) -> None: mode = DEFAULT_MODE with cache_dir(exists=False) as workspace: assert not workspace.exists() - cache = black.Cache(mode).read() + cache = black.Cache.read(mode) cache.write([]) assert workspace.exists() @@ -2133,14 +2133,14 @@ def test_failed_formatting_does_not_get_cached(self) -> None: clean = (workspace / "clean.py").resolve() clean.write_text('print("hello")\n', encoding="utf-8") invokeBlack([str(workspace)], exit_code=123) - cache = black.Cache(mode).read() + cache = black.Cache.read(mode) assert cache.is_changed(failing) assert not cache.is_changed(clean) def test_write_cache_write_fail(self) -> None: mode = DEFAULT_MODE with cache_dir(): - cache = black.Cache(mode).read() + cache = black.Cache.read(mode) with patch.object(Path, "open") as mock: mock.side_effect = OSError cache.write([]) @@ -2151,11 +2151,11 @@ def test_read_cache_line_lengths(self) -> None: with cache_dir() as workspace: path = (workspace / "file.py").resolve() path.touch() - cache = black.Cache(mode).read() + cache = black.Cache.read(mode) cache.write([path]) - one = black.Cache(mode).read() + one = black.Cache.read(mode) assert not one.is_changed(path) - two = black.Cache(short_mode).read() + two = black.Cache.read(short_mode) assert two.is_changed(path) From 4278c86d412e7c0573351091993e34216ef6142d Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Sat, 19 Aug 2023 02:33:15 +0200 Subject: [PATCH 7/7] Code review --- src/black/cache.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/black/cache.py b/src/black/cache.py index ca44497625e..ff15da2a94e 100644 --- a/src/black/cache.py +++ b/src/black/cache.py @@ -77,8 +77,7 @@ def read(cls, mode: Mode) -> Self: def hash_digest(path: Path) -> str: """Return hash digest for path.""" - with open(path, "rb") as fp: - data = fp.read() + data = path.read_bytes() return hashlib.sha256(data).hexdigest() @staticmethod