Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL query formatting improvements #1752

Merged
merged 10 commits into from
Apr 9, 2023
128 changes: 93 additions & 35 deletions debug_toolbar/panels/sql/utils.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,128 @@
import re
from functools import lru_cache
from html import escape

import sqlparse
from django.utils.html import escape
from django.dispatch import receiver
from django.test.signals import setting_changed
from sqlparse import tokens as T

from debug_toolbar import settings as dt_settings


class BoldKeywordFilter:
"""sqlparse filter to bold SQL keywords"""
class ElideSelectListsFilter:
"""sqlparse filter to elide the select list from top-level SELECT ... FROM clauses,
if present"""

def process(self, stream):
"""Process the token stream"""
allow_elision = True
for token_type, value in stream:
yield token_type, value
if token_type in T.Keyword:
keyword = value.upper()
if allow_elision and keyword == "SELECT":
yield from self.elide_until_from(stream)
allow_elision = keyword in ["EXCEPT", "INTERSECT", "UNION"]

@staticmethod
def elide_until_from(stream):
has_dot = False
saved_tokens = []
for token_type, value in stream:
is_keyword = token_type in T.Keyword
if is_keyword:
yield T.Text, "<strong>"
yield token_type, escape(value)
if is_keyword:
yield T.Text, "</strong>"
if token_type in T.Keyword and value.upper() == "FROM":
# Do not elide a select lists that do not contain dots (used to separate
# table names from column names) in order to preserve
# SELECT COUNT(*) AS `__count` FROM ...
# and
# SELECT (1) AS `a` FROM ...
# queries.
if not has_dot:
yield from saved_tokens
else:
# U+2022: Unicode character 'BULLET'
yield T.Other, " \u2022\u2022\u2022 "
yield token_type, value
break
if not has_dot:
if token_type in T.Punctuation and value == ".":
has_dot = True
else:
saved_tokens.append((token_type, value))


class BoldKeywordFilter:
"""sqlparse filter to bold SQL keywords"""

def process(self, stmt):
idx = 0
while idx < len(stmt.tokens):
token = stmt[idx]
if token.is_keyword:
stmt.insert_before(idx, sqlparse.sql.Token(T.Other, "<strong>"))
stmt.insert_after(
idx + 1,
sqlparse.sql.Token(T.Other, "</strong>"),
skip_ws=False,
)
idx += 2
elif token.is_group:
self.process(token)
idx += 1


def escaped_value(token):
# Don't escape T.Whitespace tokens because AlignedIndentFilter inserts its tokens as
# T.Whitesapce, and in our case those tokens are actually HTML.
if token.ttype in (T.Other, T.Whitespace):
return token.value
return escape(token.value, quote=False)


class EscapedStringSerializer:
"""sqlparse post-processor to convert a Statement into a string escaped for
inclusion in HTML ."""

@staticmethod
def process(stmt):
return "".join(escaped_value(token) for token in stmt.flatten())


def reformat_sql(sql, with_toggle=False):
formatted = parse_sql(sql, aligned_indent=True)
formatted = parse_sql(sql)
if not with_toggle:
return formatted
simple = simplify(parse_sql(sql, aligned_indent=False))
uncollapsed = f'<span class="djDebugUncollapsed">{simple}</span>'
simplified = parse_sql(sql, simplify=True)
uncollapsed = f'<span class="djDebugUncollapsed">{simplified}</span>'
collapsed = f'<span class="djDebugCollapsed djdt-hidden">{formatted}</span>'
return collapsed + uncollapsed


def parse_sql(sql, aligned_indent=False):
return _parse_sql(
sql,
dt_settings.get_config()["PRETTIFY_SQL"],
aligned_indent,
)


@lru_cache(maxsize=128)
def _parse_sql(sql, pretty, aligned_indent):
stack = get_filter_stack(pretty, aligned_indent)
def parse_sql(sql, *, simplify=False):
stack = get_filter_stack(simplify=simplify)
return "".join(stack.run(sql))


@lru_cache(maxsize=None)
def get_filter_stack(prettify, aligned_indent):
def get_filter_stack(*, simplify):
stack = sqlparse.engine.FilterStack()
if prettify:
stack.enable_grouping()
if aligned_indent:
if simplify:
stack.preprocess.append(ElideSelectListsFilter())
else:
if dt_settings.get_config()["PRETTIFY_SQL"]:
stack.enable_grouping()
stack.stmtprocess.append(
sqlparse.filters.AlignedIndentFilter(char="&nbsp;", n="<br/>")
)
stack.preprocess.append(BoldKeywordFilter()) # add our custom filter
stack.postprocess.append(sqlparse.filters.SerializerUnicode()) # tokens -> strings
stack.stmtprocess.append(BoldKeywordFilter())
stack.postprocess.append(EscapedStringSerializer()) # Statement -> str
return stack


simplify_re = re.compile(r"SELECT</strong> (...........*?) <strong>FROM")


def simplify(sql):
return simplify_re.sub(r"SELECT</strong> &#8226;&#8226;&#8226; <strong>FROM", sql)
@receiver(setting_changed)
def clear_caches(*, setting, **kwargs):
if setting == "DEBUG_TOOLBAR_CONFIG":
parse_sql.cache_clear()
get_filter_stack.cache_clear()


def contrasting_color_generator():
Expand Down
3 changes: 3 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ Pending
is rendered, so that the correct values will be displayed in the rendered
stack trace, as they may have changed between the time the stack trace was
captured and when it is rendered.
* Improved SQL statement formatting performance. Additionally, fixed the
indentation of ``CASE`` statements and stopped simplifying ``.count()``
queries.

3.8.1 (2022-12-03)
------------------
Expand Down
89 changes: 69 additions & 20 deletions tests/panels/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from django.test.utils import override_settings

import debug_toolbar.panels.sql.tracking as sql_tracking
from debug_toolbar import settings as dt_settings

try:
import psycopg
Expand Down Expand Up @@ -458,42 +457,92 @@ def test_regression_infinite_recursion(self):
# ensure the stacktrace is populated
self.assertTrue(len(query["stacktrace"]) > 0)

@override_settings(
DEBUG_TOOLBAR_CONFIG={"PRETTIFY_SQL": True},
)
def test_prettify_sql(self):
"""
Test case to validate that the PRETTIFY_SQL setting changes the output
of the sql when it's toggled. It does not validate what it does
though.
"""
list(User.objects.filter(username__istartswith="spam"))

response = self.panel.process_request(self.request)
self.panel.generate_stats(self.request, response)
pretty_sql = self.panel._queries[-1]["sql"]
self.assertEqual(len(self.panel._queries), 1)
with override_settings(DEBUG_TOOLBAR_CONFIG={"PRETTIFY_SQL": True}):
list(User.objects.filter(username__istartswith="spam"))
response = self.panel.process_request(self.request)
self.panel.generate_stats(self.request, response)
pretty_sql = self.panel._queries[-1]["sql"]
self.assertEqual(len(self.panel._queries), 1)

# Reset the queries
self.panel._queries = []
# Run it again, but with prettify off. Verify that it's different.
dt_settings.get_config()["PRETTIFY_SQL"] = False
list(User.objects.filter(username__istartswith="spam"))
response = self.panel.process_request(self.request)
self.panel.generate_stats(self.request, response)
self.assertEqual(len(self.panel._queries), 1)
self.assertNotEqual(pretty_sql, self.panel._queries[-1]["sql"])
with override_settings(DEBUG_TOOLBAR_CONFIG={"PRETTIFY_SQL": False}):
list(User.objects.filter(username__istartswith="spam"))
response = self.panel.process_request(self.request)
self.panel.generate_stats(self.request, response)
self.assertEqual(len(self.panel._queries), 1)
self.assertNotEqual(pretty_sql, self.panel._queries[-1]["sql"])

self.panel._queries = []
# Run it again, but with prettify back on.
# This is so we don't have to check what PRETTIFY_SQL does exactly,
# but we know it's doing something.
dt_settings.get_config()["PRETTIFY_SQL"] = True
list(User.objects.filter(username__istartswith="spam"))
with override_settings(DEBUG_TOOLBAR_CONFIG={"PRETTIFY_SQL": True}):
list(User.objects.filter(username__istartswith="spam"))
response = self.panel.process_request(self.request)
self.panel.generate_stats(self.request, response)
self.assertEqual(len(self.panel._queries), 1)
self.assertEqual(pretty_sql, self.panel._queries[-1]["sql"])

def test_simplification(self):
"""
Test case to validate that select lists for .count() and .exist() queries do not
get elided, but other select lists do.
"""
User.objects.count()
User.objects.exists()
list(User.objects.values_list("id"))
response = self.panel.process_request(self.request)
self.panel.generate_stats(self.request, response)
self.assertEqual(len(self.panel._queries), 1)
self.assertEqual(pretty_sql, self.panel._queries[-1]["sql"])
self.assertEqual(len(self.panel._queries), 3)
self.assertNotIn("\u2022", self.panel._queries[0]["sql"])
self.assertNotIn("\u2022", self.panel._queries[1]["sql"])
self.assertIn("\u2022", self.panel._queries[2]["sql"])

def test_top_level_simplification(self):
"""
Test case to validate that top-level select lists get elided, but other select
lists for subselects do not.
"""
list(User.objects.filter(id__in=User.objects.filter(is_staff=True)))
list(User.objects.filter(id__lt=20).union(User.objects.filter(id__gt=10)))
if connection.vendor != "mysql":
list(
User.objects.filter(id__lt=20).intersection(
User.objects.filter(id__gt=10)
)
)
list(
User.objects.filter(id__lt=20).difference(
User.objects.filter(id__gt=10)
)
)
response = self.panel.process_request(self.request)
self.panel.generate_stats(self.request, response)
if connection.vendor != "mysql":
self.assertEqual(len(self.panel._queries), 4)
else:
self.assertEqual(len(self.panel._queries), 2)
# WHERE ... IN SELECT ... queries should have only one elided select list
self.assertEqual(self.panel._queries[0]["sql"].count("SELECT"), 4)
self.assertEqual(self.panel._queries[0]["sql"].count("\u2022"), 3)
# UNION queries should have two elidid select lists
self.assertEqual(self.panel._queries[1]["sql"].count("SELECT"), 4)
self.assertEqual(self.panel._queries[1]["sql"].count("\u2022"), 6)
if connection.vendor != "mysql":
# INTERSECT queries should have two elidid select lists
self.assertEqual(self.panel._queries[2]["sql"].count("SELECT"), 4)
self.assertEqual(self.panel._queries[2]["sql"].count("\u2022"), 6)
# EXCEPT queries should have two elidid select lists
self.assertEqual(self.panel._queries[3]["sql"].count("SELECT"), 4)
self.assertEqual(self.panel._queries[3]["sql"].count("\u2022"), 6)

@override_settings(
DEBUG=True,
Expand Down