Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PostgreSQL Explain lexer #2398

Merged
merged 8 commits into from Apr 4, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion AUTHORS
Expand Up @@ -11,6 +11,7 @@ Other contributors, listed alphabetically, are:
* Ali Afshar -- image formatter
* Thomas Aglassinger -- Easytrieve, JCL, Rexx, Transact-SQL and VBScript
lexers
* Maxence Ahlouche -- PostgreSQL Explain lexer
* Muthiah Annamalai -- Ezhil lexer
* Kumar Appaiah -- Debian control lexer
* Andreas Amann -- AppleScript lexer
Expand Down Expand Up @@ -162,6 +163,7 @@ Other contributors, listed alphabetically, are:
* Paulo Moura -- Logtalk lexer
* Mher Movsisyan -- DTD lexer
* Dejan Muhamedagic -- Crmsh lexer
* Adrien Nayrat -- PostgreSQL Explain lexer
* Ana Nelson -- Ragel, ANTLR, R console lexers
* Kurt Neufeld -- Markdown lexer
* Nam T. Nguyen -- Monokai style
Expand Down Expand Up @@ -190,7 +192,7 @@ Other contributors, listed alphabetically, are:
* Justin Reidy -- MXML lexer
* Norman Richards -- JSON lexer
* Corey Richardson -- Rust lexer updates
* Fabrizio Riguzzi -- cplint leder
* Fabrizio Riguzzi -- cplint leder
* Lubomir Rintel -- GoodData MAQL and CL lexers
* Andre Roberge -- Tango style
* Georg Rollinger -- HSAIL lexer
Expand Down
1 change: 1 addition & 0 deletions pygments/lexers/_mapping.py
Expand Up @@ -370,6 +370,7 @@
'PortugolLexer': ('pygments.lexers.pascal', 'Portugol', ('portugol',), ('*.alg', '*.portugol'), ()),
'PostScriptLexer': ('pygments.lexers.graphics', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)),
'PostgresConsoleLexer': ('pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)),
'PostgresExplainLexer': ('pygments.lexers.sql', 'PostgreSQL EXPLAIN dialect', ('postgres-explain',), ('*.explain',), ('text/x-postgresql-explain',)),
'PostgresLexer': ('pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)),
'PovrayLexer': ('pygments.lexers.graphics', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)),
'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'pwsh', 'posh', 'ps1', 'psm1'), ('*.ps1', '*.psm1'), ('text/x-powershell',)),
Expand Down
55 changes: 55 additions & 0 deletions pygments/lexers/_postgres_builtins.py
Expand Up @@ -571,6 +571,61 @@
'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE',
)

# Most of these keywords are from ExplainNode function
# in src/backend/commands/explain.c

EXPLAIN_KEYWORDS = (
'Aggregate',
'Append',
'Bitmap Heap Scan',
'Bitmap Index Scan',
'BitmapAnd',
'BitmapOr',
'CTE Scan',
'Custom Scan',
'Delete',
'Foreign Scan',
'Function Scan',
'Gather Merge',
'Gather',
'Group',
'GroupAggregate',
'Hash Join',
'Hash',
'HashAggregate',
'Incremental Sort',
'Index Only Scan',
'Index Scan',
'Insert',
'Limit',
'LockRows',
'Materialize',
'Memoize',
'Merge Append',
'Merge Join',
'Merge',
'MixedAggregate',
'Named Tuplestore Scan',
'Nested Loop',
'ProjectSet',
'Recursive Union',
'Result',
'Sample Scan',
'Seq Scan',
'SetOp',
'Sort',
'SubPlan',
'Subquery Scan',
'Table Function Scan',
'Tid Range Scan',
'Tid Scan',
'Unique',
'Update',
'Values Scan',
'WindowAgg',
'WorkTable Scan',
)


if __name__ == '__main__': # pragma: no cover
import re
Expand Down
191 changes: 188 additions & 3 deletions pygments/lexers/sql.py
Expand Up @@ -30,6 +30,9 @@
- highlights errors in the output and notification levels;
- handles psql backslash commands.

`PostgresExplainLexer`
A lexer to highlight Postgres execution plan.

The ``tests/examplefiles`` contains a few test files with data to be
parsed by these lexers.

Expand All @@ -45,7 +48,7 @@
from pygments.lexers import get_lexer_by_name, ClassNotFound

from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
PSEUDO_TYPES, PLPGSQL_KEYWORDS
PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS
from pygments.lexers._mysql_builtins import \
MYSQL_CONSTANTS, \
MYSQL_DATATYPES, \
Expand All @@ -57,8 +60,8 @@


__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
'SqliteConsoleLexer', 'RqlLexer']
'PostgresExplainLexer', 'SqlLexer', 'TransactSqlLexer',
'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']

line_re = re.compile('.*?\n')
sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
Expand Down Expand Up @@ -368,6 +371,188 @@ def get_tokens_unprocessed(self, data):
return


class PostgresExplainLexer(RegexLexer):
"""
Handle PostgreSQL EXPLAIN output

"""

name = 'PostgreSQL EXPLAIN dialect'
aliases = ['postgres-explain']
filenames = ['*.explain']
mimetypes = ['text/x-postgresql-explain']

tokens = {
'root': [
(r'(:|\(|\)|ms|kB|->|\.\.|\,)', Punctuation),
(r'(\s+)', Whitespace),


# This match estimated cost and effectively measured counters with ANALYZE
# Then, w move to instrumentation state
(r'(cost|actual )(=)?', bygroups(Name.Class, Punctuation),
jeanas marked this conversation as resolved.
Show resolved Hide resolved
'instrumentation'),

# Misc keywords
(words(('actual', 'Memory Usage', 'Memory', 'Buckets', 'Batches',
'originally', 'row', 'rows', 'Hits', 'Misses',
'Evictions', 'Overflows'), suffix=r'\b'),
Comment.Single),

(r'(hit|read|dirtied|written|write|time|calls)(=)', bygroups(Comment.Single, Operator)),
(r'(shared|temp|local)', Keyword.Pseudo),

# We move to sort state in order to emphasize specific keywords (especially disk access)
(r'(Sort Method)(: )', bygroups(Comment.Prepoc, Punctuation), 'sort'),
anayrat marked this conversation as resolved.
Show resolved Hide resolved

# These keywords can be followed by an object, like a table
(r'(Sort Key|Group Key|Presorted Key|Hash Key)(: )', bygroups(Comment.Preproc, Punctuation), 'object_name'),
anayrat marked this conversation as resolved.
Show resolved Hide resolved
(r'(Cache Key|Cache Mode)(: )', bygroups(Comment, Punctuation), 'object_name'),

# These keywords can be followed by a predicate
(words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',
'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',
'TID Cond', 'Run Condition', 'Order By', 'Function Call',
'Table Function Call', 'Inner Unique', 'Params Evaluated',
'Single Copy', 'Sampling', 'One-Time Filter', 'Output',
'Relations', 'Remote SQL'), suffix=r'\b'),
Comment.Preproc, 'predicate'),

# Special keyword to handle ON CONFLICT
(r'Conflict ', Comment.Preproc, 'conflict'),

# Special keyword for InitPlan or SubPlan
(r'(InitPlan|SubPlan)( \d+ )',
bygroups(Keyword, Number.Integer), 'init_plan'),

(words(('Sort Method', 'Join Filter', 'Planning time',
'Planning Time', 'Execution time', 'Execution Time',
'Workers Planned', 'Workers Launched', 'Buffers',
'Planning', 'Worker', 'Query Identifier', 'Time',
'Full-sort Groups'), suffix=r'\b'), Comment.Preproc),

# Emphasize these keywords

(words(('Rows Removed by Join Filter', 'Rows Removed by Filter',
'Rows Removed by Index Recheck',
'Heap Fetches', 'never executed'),
suffix=r'\b'), Name.Exception),
(r'(I/O Timings)(: )', bygroups(Name.Exception, Punctuation)),

(words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),

# join keywords
(r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),
(r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),
(r'Backward', Comment.Preproc),
(r'(Intersect|Except|Hash)', Comment.Preproc),

(r'(CTE )(\w*)?', bygroups(Comment, Name.Variable)),


# Treat "on" and "using" as a punctuation
(r'(on |using )', Punctuation, 'object_name'),


# strings
(r"'(''|[^'])*'", String.Single),
# numbers
(r'\d+\.\d+', Number.Float),
(r'(\d+)', Number.Integer),

# boolean
(r'(true|false)', Name.Constant),
# explain header
(r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),
# Settings
(r'(Settings)(: )', bygroups(Comment.Preproc, Punctuation), 'setting'),

# Handle JIT counters
(r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),
(r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo),

# Handle Triggers counters
(r'(Trigger) (\S*)(: )', bygroups(Comment.Preproc, Name.Variable, Punctuation)),

],
'expression': [
# matches any kind of parenthesized expression
# the first opening paren is matched by the 'caller'
(r'\(', Punctuation, '#push'),
(r'\)', Punctuation, '#pop'),
(r'(never executed)', Name.Exception),
(r'[^)(]+', Comment),
],
'object_name': [

# This is a cost or analyze measure
(r'(\(cost|\(actual )(=)?', bygroups(Name.Class, Punctuation), 'instrumentation'),

# if object_name is parenthesized, mark opening paren as
# punctuation, call 'expression', and exit state
(r'\(', Punctuation, 'expression'),
(r'(on)', Punctuation),
# matches possibly schema-qualified table and column names
(r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),
(r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),
(r'\'\S*\'', Name.Variable),

# if we encounter a comma, another object is listed
(r',\n', Punctuation, 'object_name'),
(r',', Punctuation, 'object_name'),

# special case: "*SELECT*"
(r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),
(r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),
(r'"ANY_subquery"', Name.Variable),

# Variable $1 ...
(r'\$\d+', Name.Variable),
# cast
(r'::\w+', Name.Variable),
(r' ', Whitespace),
(r'"', Punctuation),
(r'\[\.\.\.\]', Punctuation),
(r'\)', Punctuation, '#pop'),
],
'predicate': [
# if predicate is parenthesized, mark paren as punctuation
(r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),
# otherwise color until newline
(r'[^\n]*', Name.Variable, '#pop'),
],
'instrumentation': [
(r'=|\.\.', Punctuation),
(r' ', Whitespace),
(r'(rows|width|time|loops)', Name.Class),
(r'\d+\.\d+', Number.Float),
(r'(\d+)', Number.Integer),
(r'\)', Punctuation, '#pop'),
],
'conflict': [
(r'(Resolution: )(\w+)', bygroups(Comment.Prepoc, Name.Variable)),
(r'(Arbiter \w+:)', Comment.Prepoc, 'object_name'),
(r'(Filter: )', Comment.Prepoc, 'predicate'),
],
'setting': [
(r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),
(r'\, ', Punctuation),
],
'init_plan': [
(r'\(', Punctuation),
(r'returns \$\d+(,\$\d+)?', Name.Variable),
(r'\)', Punctuation, '#pop'),
],
'sort': [
(r':|kB', Punctuation),
(r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc),
(r'(external|merge|Disk|sort)', Name.Exception),
(r'(\d+)', Number.Integer),
(r' ', Whitespace),
jeanas marked this conversation as resolved.
Show resolved Hide resolved
],
}


class SqlLexer(RegexLexer):
"""
Lexer for Structured Query Language. Currently, this lexer does
Expand Down