Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a SARIF output formatter #1113

Merged
merged 1 commit into from Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions bandit/__init__.py
Expand Up @@ -16,4 +16,5 @@
from bandit.core.issue import * # noqa
from bandit.core.test_properties import * # noqa

__author__ = metadata.metadata("bandit")["Author"]
__version__ = metadata.version("bandit")
372 changes: 372 additions & 0 deletions bandit/formatters/sarif.py
@@ -0,0 +1,372 @@
# Copyright (c) Microsoft. All Rights Reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
# Note: this code mostly incorporated from
# https://github.com/microsoft/bandit-sarif-formatter
#
r"""
===============
SARIF formatter
===============

This formatter outputs the issues in SARIF formatted JSON.

:Example:

.. code-block:: javascript

{
"runs": [
{
"tool": {
"driver": {
"name": "Bandit",
"organization": "PyCQA",
"rules": [
{
"id": "B101",
"name": "assert_used",
"properties": {
"tags": [
"security",
"external/cwe/cwe-703"
],
"precision": "high"
},
"helpUri": "https://bandit.readthedocs.io/en/1.7.8/plugins/b101_assert_used.html"
}
],
"version": "1.7.8",
"semanticVersion": "1.7.8"
}
},
"invocations": [
{
"executionSuccessful": true,
"endTimeUtc": "2024-03-05T03:28:48Z"
}
],
"properties": {
"metrics": {
"_totals": {
"loc": 1,
"nosec": 0,
"skipped_tests": 0,
"SEVERITY.UNDEFINED": 0,
"CONFIDENCE.UNDEFINED": 0,
"SEVERITY.LOW": 1,
"CONFIDENCE.LOW": 0,
"SEVERITY.MEDIUM": 0,
"CONFIDENCE.MEDIUM": 0,
"SEVERITY.HIGH": 0,
"CONFIDENCE.HIGH": 1
},
"./examples/assert.py": {
"loc": 1,
"nosec": 0,
"skipped_tests": 0,
"SEVERITY.UNDEFINED": 0,
"SEVERITY.LOW": 1,
"SEVERITY.MEDIUM": 0,
"SEVERITY.HIGH": 0,
"CONFIDENCE.UNDEFINED": 0,
"CONFIDENCE.LOW": 0,
"CONFIDENCE.MEDIUM": 0,
"CONFIDENCE.HIGH": 1
}
}
},
"results": [
{
"message": {
"text": "Use of assert detected. The enclosed code will be removed when compiling to optimised byte code."
},
"level": "note",
"locations": [
{
"physicalLocation": {
"region": {
"snippet": {
"text": "assert True\n"
},
"endColumn": 11,
"endLine": 1,
"startColumn": 0,
"startLine": 1
},
"artifactLocation": {
"uri": "examples/assert.py"
},
"contextRegion": {
"snippet": {
"text": "assert True\n"
},
"endLine": 1,
"startLine": 1
}
}
}
],
"properties": {
"issue_confidence": "HIGH",
"issue_severity": "LOW"
},
"ruleId": "B101",
"ruleIndex": 0
}
]
}
],
"version": "2.1.0",
"$schema": "https://json.schemastore.org/sarif-2.1.0.json"
}

.. versionadded:: 1.7.8

""" # noqa: E501
import logging
import pathlib
import sys
import urllib.parse as urlparse
from datetime import datetime

import sarif_om as om
from jschema_to_python.to_json import to_json

import bandit
from bandit.core import docs_utils

LOG = logging.getLogger(__name__)
SCHEMA_URI = "https://json.schemastore.org/sarif-2.1.0.json"
SCHEMA_VER = "2.1.0"
TS_FORMAT = "%Y-%m-%dT%H:%M:%SZ"


def report(manager, fileobj, sev_level, conf_level, lines=-1):
"""Prints issues in SARIF format

:param manager: the bandit manager object
:param fileobj: The output file object, which may be sys.stdout
:param sev_level: Filtering severity level
:param conf_level: Filtering confidence level
:param lines: Number of lines to report, -1 for all
"""

log = om.SarifLog(
schema_uri=SCHEMA_URI,
version=SCHEMA_VER,
runs=[
om.Run(
tool=om.Tool(
driver=om.ToolComponent(
name="Bandit",
organization=bandit.__author__,
semantic_version=bandit.__version__,
version=bandit.__version__,
)
),
invocations=[
om.Invocation(
end_time_utc=datetime.utcnow().strftime(TS_FORMAT),
execution_successful=True,
)
],
properties={"metrics": manager.metrics.data},
)
],
)

run = log.runs[0]
invocation = run.invocations[0]

skips = manager.get_skipped()
add_skipped_file_notifications(skips, invocation)

issues = manager.get_issue_list(sev_level=sev_level, conf_level=conf_level)

add_results(issues, run)

serializedLog = to_json(log)

with fileobj:
fileobj.write(serializedLog)

if fileobj.name != sys.stdout.name:
LOG.info("SARIF output written to file: %s", fileobj.name)


def add_skipped_file_notifications(skips, invocation):
if skips is None or len(skips) == 0:
return

if invocation.tool_configuration_notifications is None:
invocation.tool_configuration_notifications = []

for skip in skips:
(file_name, reason) = skip

notification = om.Notification(
level="error",
message=om.Message(text=reason),
locations=[
om.Location(
physical_location=om.PhysicalLocation(
artifact_location=om.ArtifactLocation(
uri=to_uri(file_name)
)
)
)
],
)

invocation.tool_configuration_notifications.append(notification)


def add_results(issues, run):
if run.results is None:
run.results = []

rules = {}
rule_indices = {}
for issue in issues:
result = create_result(issue, rules, rule_indices)
run.results.append(result)

if len(rules) > 0:
run.tool.driver.rules = list(rules.values())


def create_result(issue, rules, rule_indices):
issue_dict = issue.as_dict()

rule, rule_index = create_or_find_rule(issue_dict, rules, rule_indices)

physical_location = om.PhysicalLocation(
artifact_location=om.ArtifactLocation(
uri=to_uri(issue_dict["filename"])
)
)

add_region_and_context_region(
physical_location,
issue_dict["line_range"],
issue_dict["col_offset"],
issue_dict["end_col_offset"],
issue_dict["code"],
)

return om.Result(
rule_id=rule.id,
rule_index=rule_index,
message=om.Message(text=issue_dict["issue_text"]),
level=level_from_severity(issue_dict["issue_severity"]),
locations=[om.Location(physical_location=physical_location)],
properties={
"issue_confidence": issue_dict["issue_confidence"],
"issue_severity": issue_dict["issue_severity"],
},
)


def level_from_severity(severity):
if severity == "HIGH":
return "error"
elif severity == "MEDIUM":
return "warning"
elif severity == "LOW":
return "note"
else:
return "warning"


def add_region_and_context_region(
physical_location, line_range, col_offset, end_col_offset, code
):
if code:
first_line_number, snippet_lines = parse_code(code)
snippet_line = snippet_lines[line_range[0] - first_line_number]
snippet = om.ArtifactContent(text=snippet_line)
else:
snippet = None

physical_location.region = om.Region(
start_line=line_range[0],
end_line=line_range[1] if len(line_range) > 1 else line_range[0],
start_column=col_offset + 1,
end_column=end_col_offset + 1,
snippet=snippet,
)

if code:
physical_location.context_region = om.Region(
start_line=first_line_number,
end_line=first_line_number + len(snippet_lines) - 1,
snippet=om.ArtifactContent(text="".join(snippet_lines)),
)


def parse_code(code):
code_lines = code.split("\n")

# The last line from the split has nothing in it; it's an artifact of the
# last "real" line ending in a newline. Unless, of course, it doesn't:
last_line = code_lines[len(code_lines) - 1]

last_real_line_ends_in_newline = False
if len(last_line) == 0:
code_lines.pop()
last_real_line_ends_in_newline = True

snippet_lines = []
first_line_number = 0
first = True
for code_line in code_lines:
number_and_snippet_line = code_line.split(" ", 1)
if first:
first_line_number = int(number_and_snippet_line[0])
first = False

snippet_line = number_and_snippet_line[1] + "\n"
snippet_lines.append(snippet_line)

if not last_real_line_ends_in_newline:
last_line = snippet_lines[len(snippet_lines) - 1]
snippet_lines[len(snippet_lines) - 1] = last_line[: len(last_line) - 1]

return first_line_number, snippet_lines


def create_or_find_rule(issue_dict, rules, rule_indices):
rule_id = issue_dict["test_id"]
if rule_id in rules:
return rules[rule_id], rule_indices[rule_id]

rule = om.ReportingDescriptor(
id=rule_id,
name=issue_dict["test_name"],
help_uri=docs_utils.get_url(rule_id),
properties={
"tags": [
"security",
f"external/cwe/cwe-{issue_dict['issue_cwe'].get('id')}",
],
"precision": issue_dict["issue_confidence"].lower(),
},
)

index = len(rules)
rules[rule_id] = rule
rule_indices[rule_id] = index
return rule, index


def to_uri(file_path):
pure_path = pathlib.PurePath(file_path)
if pure_path.is_absolute():
return pure_path.as_uri()
else:
# Replace backslashes with slashes.
posix_path = pure_path.as_posix()
# %-encode special characters.
return urlparse.quote(posix_path)
5 changes: 5 additions & 0 deletions doc/source/formatters/sarif.rst
@@ -0,0 +1,5 @@
-----
sarif
-----

.. automodule:: bandit.formatters.sarif
2 changes: 1 addition & 1 deletion doc/source/man/bandit.rst
Expand Up @@ -44,7 +44,7 @@ OPTIONS
(-l for LOW, -ll for MEDIUM, -lll for HIGH)
-i, --confidence report only issues of a given confidence level or
higher (-i for LOW, -ii for MEDIUM, -iii for HIGH)
-f {csv,custom,html,json,screen,txt,xml,yaml}, --format {csv,custom,html,json,screen,txt,xml,yaml}
-f {csv,custom,html,json,sarif,screen,txt,xml,yaml}, --format {csv,custom,html,json,sarif,screen,txt,xml,yaml}
specify output format
--msg-template MSG_TEMPLATE
specify output message template (only usable with
Expand Down