Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up lexer #1449

Merged
merged 2 commits into from
Sep 20, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
76 changes: 32 additions & 44 deletions src/Language/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
use GraphQL\Utils\Utils;

/**
* A Lexer is a stateful stream generator in that every time
* it is advanced, it returns the next token in the Source. Assuming the
* source lexes, the final Token emitted by the lexer will be of kind
* EOF, after which the lexer will repeatedly return the same EOF token
* whenever called.
* A lexer is a stateful stream generator, it returns the next token in the Source when advanced.
* Assuming the source is valid, the final returned token will be EOF,
* after which the lexer will repeatedly return the same EOF token whenever called.
*
* Algorithm is O(N) both on memory and time.
*
Expand All @@ -20,8 +18,8 @@
*/
class Lexer
{
// https://spec.graphql.org/October2021/#sec-Punctuators
private const TOKEN_BANG = 33;
private const TOKEN_HASH = 35;
private const TOKEN_DOLLAR = 36;
private const TOKEN_AMP = 38;
private const TOKEN_PAREN_L = 40;
Expand Down Expand Up @@ -119,19 +117,19 @@ private function readToken(Token $prev): Token
[, $code, $bytes] = $this->readChar(true);

switch ($code) {
case self::TOKEN_BANG:
case self::TOKEN_BANG: // !
return new Token(Token::BANG, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_HASH: // #
case 35: // #
$this->moveStringCursor(-1, -1 * $bytes);

return $this->readComment($line, $col, $prev);
case self::TOKEN_DOLLAR:
case self::TOKEN_DOLLAR: // $
return new Token(Token::DOLLAR, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_AMP:
case self::TOKEN_AMP: // &
return new Token(Token::AMP, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_PAREN_L:
case self::TOKEN_PAREN_L: // (
return new Token(Token::PAREN_L, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_PAREN_R:
case self::TOKEN_PAREN_R: // )
return new Token(Token::PAREN_R, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_DOT: // .
[, $charCode1] = $this->readChar(true);
Expand All @@ -142,21 +140,21 @@ private function readToken(Token $prev): Token
}

break;
case self::TOKEN_COLON:
case self::TOKEN_COLON: // :
return new Token(Token::COLON, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_EQUALS:
case self::TOKEN_EQUALS: // =
return new Token(Token::EQUALS, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_AT:
case self::TOKEN_AT: // @
return new Token(Token::AT, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_BRACKET_L:
case self::TOKEN_BRACKET_L: // [
return new Token(Token::BRACKET_L, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_BRACKET_R:
case self::TOKEN_BRACKET_R: // ]
return new Token(Token::BRACKET_R, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_BRACE_L:
case self::TOKEN_BRACE_L: // {
return new Token(Token::BRACE_L, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_PIPE:
case self::TOKEN_PIPE: // |
return new Token(Token::PIPE, $position, $position + 1, $line, $col, $prev);
case self::TOKEN_BRACE_R:
case self::TOKEN_BRACE_R: // }
return new Token(Token::BRACE_R, $position, $position + 1, $line, $col, $prev);
// A-Z
case 65:
Expand Down Expand Up @@ -416,18 +414,17 @@ private function readString(int $line, int $col, Token $prev): Token
$start = $this->position;

// Skip leading quote and read first string char:
[$char, $code, $bytes] = $this->moveStringCursor(1, 1)->readChar();
[$char, $code, $bytes] = $this->moveStringCursor(1, 1)
->readChar();

$chunk = '';
$value = '';

while (
$code !== null
// not LineTerminator
&& $code !== 10 && $code !== 13
&& $code !== 10 && $code !== 13 // not LineTerminator
) {
// Closing Quote (")
if ($code === 34) {
if ($code === 34) { // Closing Quote (")
$value .= $chunk;

// Skip quote
Expand Down Expand Up @@ -462,8 +459,8 @@ private function readString(int $line, int $col, Token $prev): Token
$value .= '\\';
break;
case 98:
$value .= \chr(8);
break; // \b (backspace)
$value .= \chr(8); // \b (backspace)
break;
case 102:
$value .= "\f";
break;
Expand All @@ -478,7 +475,7 @@ private function readString(int $line, int $col, Token $prev): Token
break;
case 117:
$position = $this->position;
[$hex] = $this->readChars(4, true);
[$hex] = $this->readChars(4);
if (\preg_match('/[0-9a-fA-F]{4}/', $hex) !== 1) {
throw new SyntaxError(
$this->source,
Expand All @@ -493,7 +490,7 @@ private function readString(int $line, int $col, Token $prev): Token
// UTF-16 surrogate pair detection and handling.
$highOrderByte = $code >> 8;
if ($highOrderByte >= 0xD8 && $highOrderByte <= 0xDF) {
[$utf16Continuation] = $this->readChars(6, true);
[$utf16Continuation] = $this->readChars(6);
if (\preg_match('/^\\\u[0-9a-fA-F]{4}$/', $utf16Continuation) !== 1) {
throw new SyntaxError(
$this->source,
Expand Down Expand Up @@ -623,11 +620,8 @@ private function assertValidStringCharacterCode(int $code, int $position): void
{
// SourceCharacter
if ($code < 0x0020 && $code !== 0x0009) {
throw new SyntaxError(
$this->source,
$position,
'Invalid character within String: ' . Utils::printCharCode($code)
);
$char = Utils::printCharCode($code);
throw new SyntaxError($this->source, $position, "Invalid character within String: {$char}");
}
}

Expand All @@ -639,11 +633,8 @@ private function assertValidBlockStringCharacterCode(int $code, int $position):
{
// SourceCharacter
if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) {
throw new SyntaxError(
$this->source,
$position,
'Invalid character within String: ' . Utils::printCharCode($code)
);
$char = Utils::printCharCode($code);
throw new SyntaxError($this->source, $position, "Invalid character within String: {$char}");
}
}

Expand Down Expand Up @@ -739,7 +730,6 @@ private function readChar(bool $advance = false, int $byteStreamPosition = null)
$bytes = 4;
}

$utf8char = '';
for ($pos = $byteStreamPosition; $pos < $byteStreamPosition + $bytes; ++$pos) {
$utf8char .= $this->source->body[$pos];
}
Expand All @@ -762,7 +752,7 @@ private function readChar(bool $advance = false, int $byteStreamPosition = null)
*
* @return array{string, int}
*/
private function readChars(int $charCount, bool $advance): array
private function readChars(int $charCount): array
{
$result = '';
$totalBytes = 0;
Expand All @@ -775,9 +765,7 @@ private function readChars(int $charCount, bool $advance): array
$result .= $char;
}

if ($advance) {
$this->moveStringCursor($charCount, $totalBytes);
}
$this->moveStringCursor($charCount, $totalBytes);

return [$result, $totalBytes];
}
Expand Down