Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: SimpleSQLGrammar quote parsing regression #5700

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -57,36 +57,39 @@ TOKEN:
TOKEN : /* Numeric Constants */
{
< S_DOUBLE: ((<S_LONG>)? "." <S_LONG> ( ["e","E"] (["+", "-"])? <S_LONG>)?
|
<S_LONG> "." (["e","E"] (["+", "-"])? <S_LONG>)?
|
<S_LONG> ["e","E"] (["+", "-"])? <S_LONG>
)>
| < S_LONG: ( <DIGIT> )+ >
| < #DIGIT: ["0" - "9"] >
|
<S_LONG> "." (["e","E"] (["+", "-"])? <S_LONG>)?
|
<S_LONG> ["e","E"] (["+", "-"])? <S_LONG>
)>
| < S_LONG: ( <DIGIT> )+ >
| < #DIGIT: ["0" - "9"] >
}

TOKEN:
{
< COMPLEX_IDENTIFIER: (<S_IDENTIFIER> | <S_QUOTED_IDENTIFIER>) ((["\r","\n"," "])* "." (["\r","\n"," "])* (<S_IDENTIFIER> | <S_QUOTED_IDENTIFIER>))+ >
| < S_IDENTIFIER: ( <LETTER> | <UNICODE_LETTERS> )+ ( <DIGIT> | <LETTER> | <UNICODE_LETTERS> | <SPECIAL_CHARS> )* >
| < #LETTER: ["a"-"z", "A"-"Z", "_", "$"] >
| < #SPECIAL_CHARS: "$" | "_" | "#" | "@" >
| < S_IDENTIFIER: ( <LETTER> | <UNICODE_LETTERS> | <FIRST_CHAR_SPECIAL_CHARS> ) ( <LETTER> | <UNICODE_LETTERS> | <SPECIAL_CHARS> | <DIGIT> )* >
| < #LETTER: ["a"-"z", "A"-"Z"] >
| < #FIRST_CHAR_SPECIAL_CHARS: "$" | "_" >
| < #SPECIAL_CHARS: <FIRST_CHAR_SPECIAL_CHARS> | "#" | "@" >

| < #ESC_S_QUOTE_A: ( "''" ) > /* probably the closest to a universal standard */
| < #ESC_S_QUOTE_B: ( "\\'" ) > /* Valid in Postgres and MySQL (if NO_BACKSLASH_ESCAPES not enabled), NOT valid in Oracle or MSSQL */
| < #ESC_D_QUOTE_A: ( "\"\"" ) > /* probably the 2nd closest to a universal standard */
| < #ESC_D_QUOTE_B: ( "\\\"" ) > /* Valid in Postgres and MySQL (if NO_BACKSLASH_ESCAPES not enabled), NOT valid in Oracle or MSSQL */
| < #ESC_NON_QUOTE: "\\" ["n","t","b","r","f","\\","0"] >
| < #ESC_S_QUOTE: ( "''" ) > /* probably the closest to a universal standard */
| < #ESC_D_QUOTE: ( "\"\"" ) > /* probably the 2nd closest to a universal standard */
| < #ESC_ANY_CHAR: "\\" ~[] > // Matches any character following '\'
/* SQL-standard is that string literals are delimited only by single-quote, and double-quotes are only for identifiers... */
| < #S_QUOTED_STRING_HYBRID: ( "'" ( <ESC_S_QUOTE_A> | <ESC_S_QUOTE_B> | <ESC_D_QUOTE_B> | <ESC_NON_QUOTE> | ~["\\","'"] )* ("'" | "\\'")) >
/*
Negative match in hybrid string tokens looks for a single slash (i.e. "\\") so that slashes are matched
with the ESC_ANY_CHAR token rather than prematuraly ending a match
*/
| < #S_QUOTED_STRING_HYBRID: ( "'" ( <ESC_S_QUOTE> | <ESC_ANY_CHAR> | ~["\\","'"] )* ("'" | "\\'")) >
/* ... but many DBs tolerate double-quotes around string literals, including MySQL (unless you enable ANSI SQL mode), and MSSQL (if you disable SET QUOTED_IDENTIFIER) */
| < #D_QUOTED_STRING_HYBRID: ( "\"" ( <ESC_S_QUOTE_B> | <ESC_D_QUOTE_A> | <ESC_D_QUOTE_B> | <ESC_NON_QUOTE> | ~["\\","\""] )* ("\"" | "\\\"")) >
| < #D_QUOTED_STRING_HYBRID: ( "\"" ( <ESC_D_QUOTE> | <ESC_ANY_CHAR> | ~["\\","\""] )* ("\"" | "\\\"")) >
/* Finally... */
| < S_CHAR_LITERAL: (["U","E","N","R","B"]|"RB"|"_utf8")? (<S_QUOTED_STRING_HYBRID> | <D_QUOTED_STRING_HYBRID>) >

| < S_QUOTED_IDENTIFIER: "\"" (~["\n","\r","\""])+ "\"" | ("`" (~["\n","\r","`"])+ "`") | ( "[" ~["0"-"9","]"] (~["\n","\r","]"])* "]" ) >
| < EMPTY_QUOTE: "\"" "\"">
| < EMPTY_QUOTE: "\"" "\"">

/*
Built list from http://stackoverflow.com/a/37668315/45756
Expand Down Expand Up @@ -514,4 +517,3 @@ TOKEN: /* symbols */
"\u00A1"-"\uFF65" /* everything else */
] >
}

Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ class SimpleSqlGrammarTest extends Specification {
@Unroll
def test() {
when:
def tokenManager = new SimpleSqlGrammarTokenManager(new SimpleCharStream(new StringReader(input)));
def tokenManager = new SimpleSqlGrammarTokenManager(new SimpleCharStream(new StringReader(input)))
def grammar = new SimpleSqlGrammar(tokenManager)

def tokens = new ArrayList<String>()
Token token
System.out.println("----------------------------------------------------------------")
System.out.println("'" + input + "'")
while ((token = grammar.getNextToken()).kind != SimpleSqlGrammarConstants.EOF) {
System.out.println(" " + String.format('%1$-32s', SimpleSqlGrammarConstants.tokenImage[token.kind]) + ": '" + token.toString() + "'")
tokens.add(token.toString())
}

Expand All @@ -37,7 +40,7 @@ class SimpleSqlGrammarTest extends Specification {
"mysql escaped quotes '\\''" | ["mysql", " ", "escaped", " ", "quotes", " ", "'\\''"]
"invalid ' sql" | ["invalid", " ", "'", " ", "sql"]
"'invalid' ' sql" | ["'invalid'", " ", "'", " ", "sql"]
"utf8-〠@chars works" | ["utf8", "-", "〠@chars", " ", "works"]
"utf8-〠@chars works" | ["utf8", "-", "〠@chars", " ", "works"]
"single '\\' works" | ["single", " ", "'\\'", " ", "works"]
"double '\\\\' works" | ["double", " ", "'\\\\'", " ", "works"]
"unquoted \\\\ works" | ["unquoted", " ", "\\", "\\", " ", "works"]
Expand All @@ -52,5 +55,13 @@ class SimpleSqlGrammarTest extends Specification {
"This has a \\ and symbol ≤ (u2264)" | ["This", " ", "has", " ", "a", " ", "\\", " ", "and", " ", "symbol", " ", "≤", " ", "(", "u2264", ")"]
"This ≤ (u2264) is before the \\" | ["This", " ", "≤", " ", "(", "u2264", ")", " ", "is", " ", "before", " ", "the", " ", "\\"]
"This has an unicode char ÀÀÀÀÀÀ+++ãããioú≤₢" | ["This", " ", "has", " ", "an", " ", "unicode"," ", "char", " ", "ÀÀÀÀÀÀ", "+", "+", "+", "ãããioú", "≤", "₢"]
"select 'foo\\_bar' from sys.dual;" | ["select", " ", "'foo\\_bar'", " ", "from", " ", "sys.dual", ";"]
"select \"foo\\_bar\" from sys.dual;" | ["select", " ", "\"foo\\_bar\"", " ", "from", " ", "sys.dual", ";"]
"select 'foo\\sbar' from sys.dual;" | ["select", " ", "'foo\\sbar'", " ", "from", " ", "sys.dual", ";"]
"select \"foo\\sbar\" from sys.dual;" | ["select", " ", "\"foo\\sbar\"", " ", "from", " ", "sys.dual", ";"]
"select '' from sys.dual;" | ["select", " ", "''", " ", "from", " ", "sys.dual", ";"]
"select \"\" from sys.dual;" | ["select", " ", "\"\"", " ", "from", " ", "sys.dual", ";"]
"select q'~;\\~' from sys.dual;" | ["select", " ", "q", "'~;\\~'", " ", "from", " ", "sys.dual", ";"]
"select q'{\\\n;\n\\}' from sys.dual;" | ["select", " ", "q", "'{\\\n;\n\\}'", " ", "from", " ", "sys.dual", ";"]
}
}