Skip to content

Commit

Permalink
Add PostgreSQL Explain lexer (#2398)
Browse files Browse the repository at this point in the history
This lexer add support for PostgreSQL Explain plan :

https://www.postgresql.org/docs/current/sql-explain.html

This was heavily inspired by Maxence Ahlouche work, thanks to him :
https://github.com/maahl/pg_explain_lexer

Co-authored-by: Jean Abou Samra <jean@abou-samra.fr>
  • Loading branch information
anayrat and jeanas committed Apr 4, 2023
1 parent 3c6e2af commit ef0abba
Show file tree
Hide file tree
Showing 6 changed files with 3,125 additions and 4 deletions.
4 changes: 3 additions & 1 deletion AUTHORS
Expand Up @@ -11,6 +11,7 @@ Other contributors, listed alphabetically, are:
* Ali Afshar -- image formatter
* Thomas Aglassinger -- Easytrieve, JCL, Rexx, Transact-SQL and VBScript
lexers
* Maxence Ahlouche -- PostgreSQL Explain lexer
* Muthiah Annamalai -- Ezhil lexer
* Kumar Appaiah -- Debian control lexer
* Andreas Amann -- AppleScript lexer
Expand Down Expand Up @@ -162,6 +163,7 @@ Other contributors, listed alphabetically, are:
* Paulo Moura -- Logtalk lexer
* Mher Movsisyan -- DTD lexer
* Dejan Muhamedagic -- Crmsh lexer
* Adrien Nayrat -- PostgreSQL Explain lexer
* Ana Nelson -- Ragel, ANTLR, R console lexers
* David Neto, Google LLC -- WebGPU Shading Language lexer
* Kurt Neufeld -- Markdown lexer
Expand Down Expand Up @@ -191,7 +193,7 @@ Other contributors, listed alphabetically, are:
* Justin Reidy -- MXML lexer
* Norman Richards -- JSON lexer
* Corey Richardson -- Rust lexer updates
* Fabrizio Riguzzi -- cplint leder
* Fabrizio Riguzzi -- cplint leder
* Lubomir Rintel -- GoodData MAQL and CL lexers
* Andre Roberge -- Tango style
* Georg Rollinger -- HSAIL lexer
Expand Down
1 change: 1 addition & 0 deletions pygments/lexers/_mapping.py
Expand Up @@ -370,6 +370,7 @@
'PortugolLexer': ('pygments.lexers.pascal', 'Portugol', ('portugol',), ('*.alg', '*.portugol'), ()),
'PostScriptLexer': ('pygments.lexers.graphics', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)),
'PostgresConsoleLexer': ('pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)),
'PostgresExplainLexer': ('pygments.lexers.sql', 'PostgreSQL EXPLAIN dialect', ('postgres-explain',), ('*.explain',), ('text/x-postgresql-explain',)),
'PostgresLexer': ('pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)),
'PovrayLexer': ('pygments.lexers.graphics', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)),
'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'pwsh', 'posh', 'ps1', 'psm1'), ('*.ps1', '*.psm1'), ('text/x-powershell',)),
Expand Down
55 changes: 55 additions & 0 deletions pygments/lexers/_postgres_builtins.py
Expand Up @@ -571,6 +571,61 @@
'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE',
)

# Most of these keywords are from ExplainNode function
# in src/backend/commands/explain.c

EXPLAIN_KEYWORDS = (
'Aggregate',
'Append',
'Bitmap Heap Scan',
'Bitmap Index Scan',
'BitmapAnd',
'BitmapOr',
'CTE Scan',
'Custom Scan',
'Delete',
'Foreign Scan',
'Function Scan',
'Gather Merge',
'Gather',
'Group',
'GroupAggregate',
'Hash Join',
'Hash',
'HashAggregate',
'Incremental Sort',
'Index Only Scan',
'Index Scan',
'Insert',
'Limit',
'LockRows',
'Materialize',
'Memoize',
'Merge Append',
'Merge Join',
'Merge',
'MixedAggregate',
'Named Tuplestore Scan',
'Nested Loop',
'ProjectSet',
'Recursive Union',
'Result',
'Sample Scan',
'Seq Scan',
'SetOp',
'Sort',
'SubPlan',
'Subquery Scan',
'Table Function Scan',
'Tid Range Scan',
'Tid Scan',
'Unique',
'Update',
'Values Scan',
'WindowAgg',
'WorkTable Scan',
)


if __name__ == '__main__': # pragma: no cover
import re
Expand Down
194 changes: 191 additions & 3 deletions pygments/lexers/sql.py
Expand Up @@ -30,6 +30,9 @@
- highlights errors in the output and notification levels;
- handles psql backslash commands.
`PostgresExplainLexer`
A lexer to highlight Postgres execution plan.
The ``tests/examplefiles`` contains a few test files with data to be
parsed by these lexers.
Expand All @@ -45,7 +48,7 @@
from pygments.lexers import get_lexer_by_name, ClassNotFound

from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
PSEUDO_TYPES, PLPGSQL_KEYWORDS
PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS
from pygments.lexers._mysql_builtins import \
MYSQL_CONSTANTS, \
MYSQL_DATATYPES, \
Expand All @@ -57,8 +60,8 @@


__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
'SqliteConsoleLexer', 'RqlLexer']
'PostgresExplainLexer', 'SqlLexer', 'TransactSqlLexer',
'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']

line_re = re.compile('.*?\n')
sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
Expand Down Expand Up @@ -368,6 +371,191 @@ def get_tokens_unprocessed(self, data):
return


class PostgresExplainLexer(RegexLexer):
"""
Handle PostgreSQL EXPLAIN output
"""

name = 'PostgreSQL EXPLAIN dialect'
aliases = ['postgres-explain']
filenames = ['*.explain']
mimetypes = ['text/x-postgresql-explain']

tokens = {
'root': [
(r'(:|\(|\)|ms|kB|->|\.\.|\,)', Punctuation),
(r'(\s+)', Whitespace),

# This match estimated cost and effectively measured counters with ANALYZE
# Then, we move to instrumentation state
(r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
(r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),

# Misc keywords
(words(('actual', 'Memory Usage', 'Memory', 'Buckets', 'Batches',
'originally', 'row', 'rows', 'Hits', 'Misses',
'Evictions', 'Overflows'), suffix=r'\b'),
Comment.Single),

(r'(hit|read|dirtied|written|write|time|calls)(=)', bygroups(Comment.Single, Operator)),
(r'(shared|temp|local)', Keyword.Pseudo),

# We move to sort state in order to emphasize specific keywords (especially disk access)
(r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),

# These keywords can be followed by an object, like a table
(r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',
bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),
(r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),

# These keywords can be followed by a predicate
(words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',
'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',
'TID Cond', 'Run Condition', 'Order By', 'Function Call',
'Table Function Call', 'Inner Unique', 'Params Evaluated',
'Single Copy', 'Sampling', 'One-Time Filter', 'Output',
'Relations', 'Remote SQL'), suffix=r'\b'),
Comment.Preproc, 'predicate'),

# Special keyword to handle ON CONFLICT
(r'Conflict ', Comment.Preproc, 'conflict'),

# Special keyword for InitPlan or SubPlan
(r'(InitPlan|SubPlan)( )(\d+)( )',
bygroups(Keyword, Whitespace, Number.Integer, Whitespace),
'init_plan'),

(words(('Sort Method', 'Join Filter', 'Planning time',
'Planning Time', 'Execution time', 'Execution Time',
'Workers Planned', 'Workers Launched', 'Buffers',
'Planning', 'Worker', 'Query Identifier', 'Time',
'Full-sort Groups'), suffix=r'\b'), Comment.Preproc),

# Emphasize these keywords

(words(('Rows Removed by Join Filter', 'Rows Removed by Filter',
'Rows Removed by Index Recheck',
'Heap Fetches', 'never executed'),
suffix=r'\b'), Name.Exception),
(r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),

(words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),

# join keywords
(r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),
(r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),
(r'Backward', Comment.Preproc),
(r'(Intersect|Except|Hash)', Comment.Preproc),

(r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),


# Treat "on" and "using" as a punctuation
(r'(on|using)', Punctuation, 'object_name'),


# strings
(r"'(''|[^'])*'", String.Single),
# numbers
(r'\d+\.\d+', Number.Float),
(r'(\d+)', Number.Integer),

# boolean
(r'(true|false)', Name.Constant),
# explain header
(r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),
# Settings
(r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),

# Handle JIT counters
(r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),
(r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo),

# Handle Triggers counters
(r'(Trigger)( )(\S*)(:)( )',
bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),

],
'expression': [
# matches any kind of parenthesized expression
# the first opening paren is matched by the 'caller'
(r'\(', Punctuation, '#push'),
(r'\)', Punctuation, '#pop'),
(r'(never executed)', Name.Exception),
(r'[^)(]+', Comment),
],
'object_name': [

# This is a cost or analyze measure
(r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
(r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),

# if object_name is parenthesized, mark opening paren as
# punctuation, call 'expression', and exit state
(r'\(', Punctuation, 'expression'),
(r'(on)', Punctuation),
# matches possibly schema-qualified table and column names
(r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),
(r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),
(r'\'\S*\'', Name.Variable),

# if we encounter a comma, another object is listed
(r',\n', Punctuation, 'object_name'),
(r',', Punctuation, 'object_name'),

# special case: "*SELECT*"
(r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),
(r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),
(r'"ANY_subquery"', Name.Variable),

# Variable $1 ...
(r'\$\d+', Name.Variable),
# cast
(r'::\w+', Name.Variable),
(r' +', Whitespace),
(r'"', Punctuation),
(r'\[\.\.\.\]', Punctuation),
(r'\)', Punctuation, '#pop'),
],
'predicate': [
# if predicate is parenthesized, mark paren as punctuation
(r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),
# otherwise color until newline
(r'[^\n]*', Name.Variable, '#pop'),
],
'instrumentation': [
(r'=|\.\.', Punctuation),
(r' +', Whitespace),
(r'(rows|width|time|loops)', Name.Class),
(r'\d+\.\d+', Number.Float),
(r'(\d+)', Number.Integer),
(r'\)', Punctuation, '#pop'),
],
'conflict': [
(r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),
(r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),
(r'(Filter: )', Comment.Preproc, 'predicate'),
],
'setting': [
(r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),
(r'\, ', Punctuation),
],
'init_plan': [
(r'\(', Punctuation),
(r'returns \$\d+(,\$\d+)?', Name.Variable),
(r'\)', Punctuation, '#pop'),
],
'sort': [
(r':|kB', Punctuation),
(r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc),
(r'(external|merge|Disk|sort)', Name.Exception),
(r'(\d+)', Number.Integer),
(r' +', Whitespace),
],
}


class SqlLexer(RegexLexer):
"""
Lexer for Structured Query Language. Currently, this lexer does
Expand Down

0 comments on commit ef0abba

Please sign in to comment.