Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make no-misleading-character-class report more granular errors #18082

Merged
merged 19 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
104 changes: 46 additions & 58 deletions lib/rules/no-misleading-character-class.js
fasttime marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
const astUtils = require("./utils/ast-utils.js");
const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source");

//------------------------------------------------------------------------------
// Helpers
Expand Down Expand Up @@ -226,62 +227,6 @@ module.exports = {
const sourceCode = context.sourceCode;
const parser = new RegExpParser();

/**
* Generates a granular loc for context.report, if directly calculable.
* @param {Character[]} chars Individual characters being reported on.
* @param {Node} node Parent string node to report within.
* @returns {Object | null} Granular loc for context.report, if directly calculable.
* @see https://github.com/eslint/eslint/pull/17515
*/
function generateReportLocation(chars, node) {

// Limit to to literals and expression-less templates with raw values === their value.
switch (node.type) {
case "TemplateLiteral":
if (node.expressions.length || sourceCode.getText(node).slice(1, -1) !== node.quasis[0].value.cooked) {
return null;
}
break;

case "Literal":
if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) {
return null;
}
break;

default:
return null;
}

return {
start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start),
end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end)
};
}

/**
* Finds the report loc(s) for a range of matches.
* @param {Character[][]} matches Characters that should trigger a report.
* @param {Node} node The node to report.
* @returns {Object | null} Node loc(s) for context.report.
*/
function getNodeReportLocations(matches, node) {
const locs = [];

for (const chars of matches) {
const loc = generateReportLocation(chars, node);

// If a report can't match to a range, don't report any others
if (!loc) {
return [node.loc];
}

locs.push(loc);
}

return locs;
}

/**
* Verify a given regular expression.
* @param {Node} node The node to report.
Expand Down Expand Up @@ -320,12 +265,55 @@ module.exports = {
} else {
foundKindMatches.set(kind, [...findCharacterSequences[kind](chars)]);
}

}
}
}
});

let codeUnits = null;

/**
* Finds the report loc(s) for a range of matches.
* Only literals and expression-less templates generate granular errors.
* @param {Character[][]} matches Lists of individual characters being reported on.
* @returns {Location[]} locs for context.report.
* @see https://github.com/eslint/eslint/pull/17515
*/
function getNodeReportLocations(matches) {
return matches.map(chars => {
const firstIndex = chars[0].start;
const lastIndex = chars.at(-1).end - 1;
let start;
let end;

if (node.type === "TemplateLiteral") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should check if the template literal has expressions. For example, this crashes:

/* eslint no-misleading-character-class: "error" */

new RegExp(`${"[👍]"}`);
TypeError: Cannot read properties of undefined (reading 'start')
Occurred while linting C:\projects\eslint\foo.js:3
Rule: "no-misleading-character-class"
    at C:\projects\eslint\lib\rules\no-misleading-character-class.js:294:64

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct! Those cases should be covered by unit tests.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 7bb7e55.

const source = sourceCode.getText(node);
const offset = node.range[0];

codeUnits ??= parseTemplateToken(source);
start = offset + codeUnits[firstIndex].start;
end = offset + codeUnits[lastIndex].end;
} else if (typeof node.value === "string") { // String Literal
const source = node.raw;
const offset = node.range[0];

codeUnits ??= parseStringLiteral(source);
start = offset + codeUnits[firstIndex].start;
end = offset + codeUnits[lastIndex].end;
} else { // RegExp Literal
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

else can be anything, for example:

image

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 7bb7e55.

const offset = node.range[0] + 1; // Add 1 to skip the leading slash.

start = offset + firstIndex;
end = offset + lastIndex + 1;
}

return {
start: sourceCode.getLocFromIndex(start),
end: sourceCode.getLocFromIndex(end)
};
});
}

for (const [kind, matches] of foundKindMatches) {
let suggest;

Expand All @@ -336,7 +324,7 @@ module.exports = {
}];
}

const locs = getNodeReportLocations(matches, node);
const locs = getNodeReportLocations(matches);

for (const loc of locs) {
context.report({
Expand Down
226 changes: 226 additions & 0 deletions lib/rules/utils/char-source.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/**
* @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token.
* @author Francesco Trotta
*/

"use strict";

/**
* Represents a code unit produced by the evaluation of a JavaScript common token like a string
* literal or template token.
*/
class CodeUnit {
constructor(start, source) {
this.start = start;
this.source = source;
}

get end() {
return this.start + this.length;
}

get length() {
return this.source.length;
}
}

/**
* An object used to keep track of the position in a source text where the next characters will be read.
*/
class SourceReader {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might rename this as TextSource. SourceReader implies that there's a read() method or something that this object is supposed to be doing, but it's just a data structure.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the feedback. I've added a read() method in 448d9a9 to read characters relative to the current position. So it's no longer necessary to store the position in a variable before accessing the source like it was done before. And I'm using += for relative position changes. I think this makes the code a little clearer but if that's not the case we can revert to how it was done previously and just address the suggestions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the read()!

SourceReader is still a little ambiguous IMO. Maybe, CharsReader? CharactersReader? TextReader? Not a blocker from my end.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I don't have a preference for the name, so pick one that sounds better to you.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd vote for TextReader then, to align with #18082 (comment).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed in 072f256.

constructor(source) {
this.source = source;
this.pos = 0;
}
}

const SIMPLE_ESCAPE_SEQUENCES =
{ __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" };

/**
* Reads a hex escape sequence.
* @param {SourceReader} reader The reader should be positioned on the first hexadecimal digit.
* @param {number} length The number of hexadecimal digits.
* @returns {string} A code unit.
*/
function readHexSequence(reader, length) {
const { source, pos } = reader;
const str = source.slice(pos, pos + length);
const charCode = parseInt(str, 16);

reader.pos = pos + length;
return String.fromCharCode(charCode);
}

/**
* Reads a Unicode escape sequence.
* @param {SourceReader} reader The reader should be positioned after the "u".
* @returns {string} A code unit.
*/
function readUnicodeSequence(reader) {
const { source, pos } = reader;
const regExp = /\{(?<hexDigits>[\dA-Fa-f]+)\}/uy;

regExp.lastIndex = pos;
const match = regExp.exec(source);

if (match) {
const codePoint = parseInt(match.groups.hexDigits, 16);

reader.pos = regExp.lastIndex;
return String.fromCodePoint(codePoint);
}
return readHexSequence(reader, 4);
}

/**
* Reads an octal escape sequence.
* @param {SourceReader} reader The reader should be positioned after the first octal digit.
* @param {number} maxLength The maximum number of octal digits.
* @returns {string} A code unit.
*/
function readOctalSequence(reader, maxLength) {
const posAfterBackslash = reader.pos - 1;
const [octalStr] = reader.source.slice(posAfterBackslash, posAfterBackslash + maxLength).match(/^[0-7]+/u);

reader.pos = posAfterBackslash + octalStr.length;
const octal = parseInt(octalStr, 8);

return String.fromCharCode(octal);
}

/**
* Reads an escape sequence or line continuation.
* @param {SourceReader} reader The reader should be positioned after the backslash.
* @returns {string} A string of zero, one or two code units.
*/
function readEscapeSequenceOrLineContinuation(reader) {
const { source, pos } = reader;
const char = source[pos];

reader.pos = pos + 1;
const unitChar = SIMPLE_ESCAPE_SEQUENCES[char];

if (unitChar) {
return unitChar;
}
switch (char) {
case "x":
return readHexSequence(reader, 2);
case "u":
return readUnicodeSequence(reader);
case "\r":
if (source[pos + 1] === "\n") {
reader.pos = pos + 2;
}

// fallthrough
case "\n":
case "\u2028":
case "\u2029":
return "";
case "0":
case "1":
case "2":
case "3":
return readOctalSequence(reader, 3);
case "4":
case "5":
case "6":
case "7":
return readOctalSequence(reader, 2);
default:
return char;
}
}

/**
* Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements.
* @param {SourceReader} reader The reader should be positioned on the backslash.
* @returns {Generator<CodeUnit>} Zero, one or two `CodeUnit` elements.
*/
function *mapEscapeSequenceOrLineContinuation(reader) {
const start = reader.pos++;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little unclear here...are you intentionally moving the reader position by one? Or did you intend for const start = reader.pos + 1?

Either way, I think this line needs reworking to make its intention clear.

const str = readEscapeSequenceOrLineContinuation(reader);
const end = reader.pos;
const source = reader.source.slice(start, end);

switch (str.length) {
case 0:
break;
case 1:
yield new CodeUnit(start, source);
break;
default:
yield new CodeUnit(start, source);
yield new CodeUnit(start, source);
break;
}
}

/**
* Parses a string literal.
* @param {string} source The string literal to parse, including the delimiting quotes.
* @returns {CodeUnit[]} A list of code units produced by the string literal.
*/
function parseStringLiteral(source) {
const reader = new SourceReader(source);
const quote = source[0];

reader.pos = 1;
const codeUnits = [];

for (;;) {
const { pos } = reader;
const char = source[pos];

if (char === quote) {
break;
}
if (char === "\\") {
codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
} else {
reader.pos = pos + 1;
codeUnits.push(new CodeUnit(pos, char));
}
}
return codeUnits;
}

/**
* Parses a template token.
* @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`.
* @returns {CodeUnit[]} A list of code units produced by the template token.
*/
function parseTemplateToken(source) {
const reader = new SourceReader(source);

reader.pos = 1;
const codeUnits = [];

for (;;) {
const { pos } = reader;
const char = source[pos];

if (char === "`" || char === "$" && source[pos + 1] === "{") {
break;
}
if (char === "\\") {
codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
} else {
let unitSource;

if (char === "\r" && source[pos + 1] === "\n") {
unitSource = "\r\n";
reader.pos = pos + 2;
} else {
unitSource = char;
reader.pos = pos + 1;
}
codeUnits.push(new CodeUnit(pos, unitSource));
}
}
return codeUnits;
}

module.exports = { parseStringLiteral, parseTemplateToken };