Skip to content

Commit

Permalink
fix regression about filtering files changed
Browse files Browse the repository at this point in the history
This reintroduces the approach where we filter the list of changed files according to diff information about lines-changed-only value.

The advantage (which still holds true) is that we don't run a clang tool on a file if there is no changes of concern.
  • Loading branch information
2bndy5 committed Jan 3, 2024
1 parent f10bfc2 commit ceaf4d8
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 73 deletions.
1 change: 1 addition & 0 deletions cpp_linter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def main():
args.extensions,
ignored,
not_ignored,
args.lines_changed_only,
)
if files:
rest_api_client.verify_files_are_present(files)
Expand Down
2 changes: 1 addition & 1 deletion cpp_linter/clang_tools/clang_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __repr__(self) -> str:
def formalize_style_name(style: str) -> str:
if style.startswith("llvm") or style.startswith("gnu"):
return style.upper()
if style not in (
if style in (
"google",
"chromium",
"microsoft",
Expand Down
20 changes: 20 additions & 0 deletions cpp_linter/common_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,26 @@ def is_file_in_list(paths: List[str], file_name: str, prompt: str) -> bool:
return False


def has_line_changes(
lines_changed_only: int, diff_chunks: List[List[int]], additions: List[int]
) -> bool:
"""Does this file actually apply to condition specified by ``lines_changed_only``?
:param file_obj: The file in question.
:param lines_changed_only: A value that means:
- 0 = We don't care. Analyze the whole file.
- 1 = Only analyze lines in the diff chunks, which may include unchanged
lines but not lines with subtractions.
- 2 = Only analyze lines with additions.
"""
return (
(lines_changed_only == 1 and len(diff_chunks) > 0)
or (lines_changed_only == 2 and len(additions) > 0)
or not lines_changed_only
)


def is_source_or_ignored(
file_name: str,
ext_list: List[str],
Expand Down
13 changes: 10 additions & 3 deletions cpp_linter/git/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
GitError,
)
from .. import CACHE_PATH
from ..common_fs import FileObj, is_source_or_ignored
from ..common_fs import FileObj, is_source_or_ignored, has_line_changes
from ..loggers import logger
from .git_str import parse_diff as legacy_parse_diff

Expand Down Expand Up @@ -87,13 +87,15 @@ def parse_diff(
extensions: List[str],
ignored: List[str],
not_ignored: List[str],
lines_changed_only: int,
) -> List[FileObj]:
"""Parse a given diff into file objects.
:param diff_obj: The complete git diff object for an event.
:param extensions: A list of file extensions to focus on only.
:param ignored: A list of paths or files to ignore.
:param not_ignored: A list of paths or files to explicitly not ignore.
:param lines_changed_only: A value that dictates what file changes to focus on.
:returns: A `list` of `dict` containing information about the files changed.
.. note:: Deleted files are omitted because we only want to analyze updates.
Expand All @@ -104,7 +106,9 @@ def parse_diff(
diff_obj = Diff.parse_diff(diff_obj)
except GitError as exc:
logger.warning(f"pygit2.Diff.parse_diff() threw {exc}")
return legacy_parse_diff(diff_obj, extensions, ignored, not_ignored)
return legacy_parse_diff(
diff_obj, extensions, ignored, not_ignored, lines_changed_only
)
for patch in diff_obj:
if patch.delta.status not in ADDITIVE_STATUS:
continue
Expand All @@ -113,7 +117,10 @@ def parse_diff(
):
continue
diff_chunks, additions = parse_patch(patch.hunks)
file_objects.append(FileObj(patch.delta.new_file.path, additions, diff_chunks))
if has_line_changes(lines_changed_only, diff_chunks, additions):
file_objects.append(
FileObj(patch.delta.new_file.path, additions, diff_chunks)
)
return file_objects


Expand Down
12 changes: 9 additions & 3 deletions cpp_linter/git/git_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
:py:meth:`pygit2.Diff.parse_diff()` function fails in `cpp_linter.git.parse_diff()`"""
import re
from typing import Optional, List, Tuple, cast
from ..common_fs import FileObj, is_source_or_ignored
from ..common_fs import FileObj, is_source_or_ignored, has_line_changes
from ..loggers import logger


Expand Down Expand Up @@ -36,14 +36,19 @@ def _get_filename_from_diff(front_matter: str) -> Optional[re.Match]:


def parse_diff(
full_diff: str, extensions: List[str], ignored: List[str], not_ignored: List[str]
full_diff: str,
extensions: List[str],
ignored: List[str],
not_ignored: List[str],
lines_changed_only: int,
) -> List[FileObj]:
"""Parse a given diff into file objects.
:param full_diff: The complete diff for an event.
:param extensions: A list of file extensions to focus on only.
:param ignored: A list of paths or files to ignore.
:param not_ignored: A list of paths or files to explicitly not ignore.
:param lines_changed_only: A value that dictates what file changes to focus on.
:returns: A `list` of `FileObj` instances containing information about the files
changed.
"""
Expand All @@ -65,7 +70,8 @@ def parse_diff(
if not is_source_or_ignored(filename, extensions, ignored, not_ignored):
continue
diff_chunks, additions = _parse_patch(diff[first_hunk.start() :])
file_objects.append(FileObj(filename, additions, diff_chunks))
if has_line_changes(lines_changed_only, diff_chunks, additions):
file_objects.append(FileObj(filename, additions, diff_chunks))
return file_objects


Expand Down
7 changes: 6 additions & 1 deletion cpp_linter/rest_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,18 @@ def make_headers(self, use_diff: bool = False) -> Dict[str, str]:
raise NotImplementedError("must be implemented in the derivative")

def get_list_of_changed_files(
self, extensions: List[str], ignored: List[str], not_ignored: List[str]
self,
extensions: List[str],
ignored: List[str],
not_ignored: List[str],
lines_changed_only: int,
) -> List[FileObj]:
"""Fetch a list of the event's changed files.
:param extensions: A list of file extensions to focus on only.
:param ignored: A list of paths or files to ignore.
:param not_ignored: A list of paths or files to explicitly not ignore.
:param lines_changed_only: A value that dictates what file changes to focus on.
"""
raise NotImplementedError("must be implemented in the derivative")

Expand Down
18 changes: 15 additions & 3 deletions cpp_linter/rest_api/github_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@ def set_exit_code(
)

def get_list_of_changed_files(
self, extensions: List[str], ignored: List[str], not_ignored: List[str]
self,
extensions: List[str],
ignored: List[str],
not_ignored: List[str],
lines_changed_only: int,
) -> List[FileObj]:
start_log_group("Get list of specified source files")
if environ.get("CI", "false") == "true":
Expand All @@ -88,9 +92,17 @@ def get_list_of_changed_files(
files_link, headers=self.make_headers(use_diff=True)
)
log_response_msg(response_buffer)
files = parse_diff(response_buffer.text, extensions, ignored, not_ignored)
files = parse_diff(
response_buffer.text,
extensions,
ignored,
not_ignored,
lines_changed_only,
)
else:
files = parse_diff(get_diff(), extensions, ignored, not_ignored)
files = parse_diff(
get_diff(), extensions, ignored, not_ignored, lines_changed_only
)
return files

def verify_files_are_present(self, files: List[FileObj]) -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,44 +1,4 @@
[
{
"filename": "src/hexen/info.c",
"line_filter": {
"diff_chunks": [
[
143,
149
],
[
179,
185
],
[
248,
255
]
],
"lines_added": []
}
},
{
"filename": "src/hexen/p_enemy.c",
"line_filter": {
"diff_chunks": [
[
1102,
1108
],
[
3853,
3859
],
[
3929,
3935
]
],
"lines_added": []
}
},
{
"filename": "src/hexen/p_local.h",
"line_filter": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19369,18 +19369,6 @@
]
}
},
{
"filename": "libvips/colour/profiles.h",
"line_filter": {
"diff_chunks": [
[
10,
13
]
],
"lines_added": []
}
},
{
"filename": "libvips/colour/rad2float.c",
"line_filter": {
Expand Down
18 changes: 14 additions & 4 deletions tests/capture_tools_output/test_tools_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def prep_tmp_dir(
monkeypatch: pytest.MonkeyPatch,
repo: str,
commit: str,
lines_changed_only: int,
copy_configs: bool = False,
):
"""Some extra setup for test's temp directory to ensure needed files exist."""
Expand All @@ -128,7 +129,10 @@ def prep_tmp_dir(
monkeypatch.chdir(str(repo_cache))
CACHE_PATH.mkdir(exist_ok=True)
files = gh_client.get_list_of_changed_files(
extensions=["c", "h", "hpp", "cpp"], ignored=[".github"], not_ignored=[]
extensions=["c", "h", "hpp", "cpp"],
ignored=[".github"],
not_ignored=[],
lines_changed_only=lines_changed_only,
)
gh_client.verify_files_are_present(files)
repo_path = tmp_path / repo.split("/")[1]
Expand Down Expand Up @@ -181,6 +185,7 @@ def test_lines_changed_only(
extensions=extensions,
ignored=[".github"],
not_ignored=[],
lines_changed_only=lines_changed_only,
)
if files:
expected_results_json = (
Expand All @@ -191,8 +196,7 @@ def test_lines_changed_only(
)
### uncomment this paragraph to update/generate the expected test's results
# expected_results_json.write_text(
# json.dumps([f.serialize() for f in files], indent=2)
# + "\n",
# json.dumps([f.serialize() for f in files], indent=2) + "\n",
# encoding="utf-8",
# )
test_result = json.loads(expected_results_json.read_text(encoding="utf-8"))
Expand Down Expand Up @@ -237,6 +241,7 @@ def test_format_annotations(
tmp_path,
monkeypatch,
**TEST_REPO_COMMIT_PAIRS[0],
lines_changed_only=lines_changed_only,
copy_configs=True,
)
format_advice, tidy_advice = capture_clang_tools_output(
Expand Down Expand Up @@ -312,6 +317,7 @@ def test_tidy_annotations(
tmp_path,
monkeypatch,
**TEST_REPO_COMMIT_PAIRS[4],
lines_changed_only=lines_changed_only,
copy_configs=False,
)
format_advice, tidy_advice = capture_clang_tools_output(
Expand Down Expand Up @@ -429,7 +435,11 @@ def test_parse_diff(

Path(CACHE_PATH).mkdir()
files = parse_diff(
get_diff(), extensions=["cpp", "hpp"], ignored=[], not_ignored=[]
get_diff(),
extensions=["cpp", "hpp"],
ignored=[],
not_ignored=[],
lines_changed_only=0,
)
if sha == TEST_REPO_COMMIT_PAIRS[4]["commit"] or patch:
assert files
Expand Down
10 changes: 5 additions & 5 deletions tests/test_git_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ def test_pygit2_bug1260(caplog: pytest.LogCaptureFixture):
caplog.set_level(logging.WARNING, logger=logger.name)
# the bug in libgit2 should trigger a call to
# cpp_linter.git_str.legacy_parse_diff()
files = parse_diff(diff_str, ["cpp"], [], [])
files = parse_diff(diff_str, ["cpp"], [], [], 0)
assert caplog.messages, "this test is no longer needed; bug was fixed in pygit2"
# if we get here test, then is satisfied
assert not files # no line changes means no file to focus on


def test_typical_diff():
"""For coverage completeness. Also tests for files with spaces in the names."""
from_c = parse_diff(TYPICAL_DIFF, ["cpp"], [], [])
from_py = parse_diff_str(TYPICAL_DIFF, ["cpp"], [], [])
from_c = parse_diff(TYPICAL_DIFF, ["cpp"], [], [], 0)
from_py = parse_diff_str(TYPICAL_DIFF, ["cpp"], [], [], 0)
assert [f.serialize() for f in from_c] == [f.serialize() for f in from_py]
for file_obj in from_c:
# file name should have spaces
Expand All @@ -65,13 +65,13 @@ def test_binary_diff():
"Binary files /dev/null and b/some picture.png differ",
]
)
files = parse_diff_str(diff_str, ["cpp"], [], [])
files = parse_diff_str(diff_str, ["cpp"], [], [], 0)
# binary files are ignored during parsing
assert not files


def test_ignored_diff():
"""For coverage completeness"""
files = parse_diff_str(TYPICAL_DIFF, ["hpp"], [], [])
files = parse_diff_str(TYPICAL_DIFF, ["hpp"], [], [], 0)
# binary files are ignored during parsing
assert not files
2 changes: 1 addition & 1 deletion tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def test_get_changed_files(
text="",
)

files = gh_client.get_list_of_changed_files([], [], [])
files = gh_client.get_list_of_changed_files([], [], [], 0)
assert not files


Expand Down

0 comments on commit ceaf4d8

Please sign in to comment.