eslint · mdjermanovic · Mar 5, 2024 · Feb 4, 2024 · Feb 4, 2024 · Feb 4, 2024
@@ -8,6 +8,7 @@ const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
 const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
 const astUtils = require("./utils/ast-utils.js");
 const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
+const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source");
 
 //------------------------------------------------------------------------------
 // Helpers
@@ -226,62 +227,6 @@ module.exports = {
         const sourceCode = context.sourceCode;
         const parser = new RegExpParser();
 
-        /**
-         * Generates a granular loc for context.report, if directly calculable.
-         * @param {Character[]} chars Individual characters being reported on.
-         * @param {Node} node Parent string node to report within.
-         * @returns {Object | null} Granular loc for context.report, if directly calculable.
-         * @see https://github.com/eslint/eslint/pull/17515
-         */
-        function generateReportLocation(chars, node) {
-
-            // Limit to to literals and expression-less templates with raw values === their value.
-            switch (node.type) {
-                case "TemplateLiteral":
-                    if (node.expressions.length || sourceCode.getText(node).slice(1, -1) !== node.quasis[0].value.cooked) {
-                        return null;
-                    }
-                    break;
-
-                case "Literal":
-                    if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) {
-                        return null;
-                    }
-                    break;
-
-                default:
-                    return null;
-            }
-
-            return {
-                start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start),
-                end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end)
-            };
-        }
-
-        /**
-         * Finds the report loc(s) for a range of matches.
-         * @param {Character[][]} matches Characters that should trigger a report.
-         * @param {Node} node The node to report.
-         * @returns {Object | null} Node loc(s) for context.report.
-         */
-        function getNodeReportLocations(matches, node) {
-            const locs = [];
-
-            for (const chars of matches) {
-                const loc = generateReportLocation(chars, node);
-
-                // If a report can't match to a range, don't report any others
-                if (!loc) {
-                    return [node.loc];
-                }
-
-                locs.push(loc);
-            }
-
-            return locs;
-        }
-
         /**
          * Verify a given regular expression.
          * @param {Node} node The node to report.
@@ -320,12 +265,55 @@ module.exports = {
                             } else {
                                 foundKindMatches.set(kind, [...findCharacterSequences[kind](chars)]);
                             }
-
                         }
                     }
                 }
             });
 
+            let codeUnits = null;
+
+            /**
+             * Finds the report loc(s) for a range of matches.
+             * Only literals and expression-less templates generate granular errors.
+             * @param {Character[][]} matches Lists of individual characters being reported on.
+             * @returns {Location[]} locs for context.report.
+             * @see https://github.com/eslint/eslint/pull/17515
+             */
+            function getNodeReportLocations(matches) {
+                return matches.map(chars => {
+                    const firstIndex = chars[0].start;
+                    const lastIndex = chars.at(-1).end - 1;
+                    let start;
+                    let end;
+
+                    if (node.type === "TemplateLiteral") {
+                        const source = sourceCode.getText(node);
+                        const offset = node.range[0];
+
+                        codeUnits ??= parseTemplateToken(source);
+                        start = offset + codeUnits[firstIndex].start;
+                        end = offset + codeUnits[lastIndex].end;
+                    } else if (typeof node.value === "string") { // String Literal
+                        const source = node.raw;
+                        const offset = node.range[0];
+
+                        codeUnits ??= parseStringLiteral(source);
+                        start = offset + codeUnits[firstIndex].start;
+                        end = offset + codeUnits[lastIndex].end;
+                    } else { // RegExp Literal
+                        const offset = node.range[0] + 1; // Add 1 to skip the leading slash.
+
+                        start = offset + firstIndex;
+                        end = offset + lastIndex + 1;
+                    }
+
+                    return {
+                        start: sourceCode.getLocFromIndex(start),
+                        end: sourceCode.getLocFromIndex(end)
+                    };
+                });
+            }
+
             for (const [kind, matches] of foundKindMatches) {
                 let suggest;
 
@@ -336,7 +324,7 @@ module.exports = {
                     }];
                 }
 
-                const locs = getNodeReportLocations(matches, node);
+                const locs = getNodeReportLocations(matches);
 
                 for (const loc of locs) {
                     context.report({

@@ -0,0 +1,226 @@
+/**
+ * @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token.
+ * @author Francesco Trotta
+ */
+
+"use strict";
+
+/**
+ * Represents a code unit produced by the evaluation of a JavaScript common token like a string
+ * literal or template token.
+ */
+class CodeUnit {
+    constructor(start, source) {
+        this.start = start;
+        this.source = source;
+    }
+
+    get end() {
+        return this.start + this.length;
+    }
+
+    get length() {
+        return this.source.length;
+    }
+}
+
+/**
+ * An object used to keep track of the position in a source text where the next characters will be read.
+ */
+class SourceReader {
+    constructor(source) {
+        this.source = source;
+        this.pos = 0;
+    }
+}
+
+const SIMPLE_ESCAPE_SEQUENCES =
+{ __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" };
+
+/**
+ * Reads a hex escape sequence.
+ * @param {SourceReader} reader The reader should be positioned on the first hexadecimal digit.
+ * @param {number} length The number of hexadecimal digits.
+ * @returns {string} A code unit.
+ */
+function readHexSequence(reader, length) {
+    const { source, pos } = reader;
+    const str = source.slice(pos, pos + length);
+    const charCode = parseInt(str, 16);
+
+    reader.pos = pos + length;
+    return String.fromCharCode(charCode);
+}
+
+/**
+ * Reads a Unicode escape sequence.
+ * @param {SourceReader} reader The reader should be positioned after the "u".
+ * @returns {string} A code unit.
+ */
+function readUnicodeSequence(reader) {
+    const { source, pos } = reader;
+    const regExp = /\{(?<hexDigits>[\dA-Fa-f]+)\}/uy;
+
+    regExp.lastIndex = pos;
+    const match = regExp.exec(source);
+
+    if (match) {
+        const codePoint = parseInt(match.groups.hexDigits, 16);
+
+        reader.pos = regExp.lastIndex;
+        return String.fromCodePoint(codePoint);
+    }
+    return readHexSequence(reader, 4);
+}
+
+/**
+ * Reads an octal escape sequence.
+ * @param {SourceReader} reader The reader should be positioned after the first octal digit.
+ * @param {number} maxLength The maximum number of octal digits.
+ * @returns {string} A code unit.
+ */
+function readOctalSequence(reader, maxLength) {
+    const posAfterBackslash = reader.pos - 1;
+    const [octalStr] = reader.source.slice(posAfterBackslash, posAfterBackslash + maxLength).match(/^[0-7]+/u);
+
+    reader.pos = posAfterBackslash + octalStr.length;
+    const octal = parseInt(octalStr, 8);
+
+    return String.fromCharCode(octal);
+}
+
+/**
+ * Reads an escape sequence or line continuation.
+ * @param {SourceReader} reader The reader should be positioned after the backslash.
+ * @returns {string} A string of zero, one or two code units.
+ */
+function readEscapeSequenceOrLineContinuation(reader) {
+    const { source, pos } = reader;
+    const char = source[pos];
+
+    reader.pos = pos + 1;
+    const unitChar = SIMPLE_ESCAPE_SEQUENCES[char];
+
+    if (unitChar) {
+        return unitChar;
+    }
+    switch (char) {
+        case "x":
+            return readHexSequence(reader, 2);
+        case "u":
+            return readUnicodeSequence(reader);
+        case "\r":
+            if (source[pos + 1] === "\n") {
+                reader.pos = pos + 2;
+            }
+
+            // fallthrough
+        case "\n":
+        case "\u2028":
+        case "\u2029":
+            return "";
+        case "0":
+        case "1":
+        case "2":
+        case "3":
+            return readOctalSequence(reader, 3);
+        case "4":
+        case "5":
+        case "6":
+        case "7":
+            return readOctalSequence(reader, 2);
+        default:
+            return char;
+    }
+}
+
+/**
+ * Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements.
+ * @param {SourceReader} reader The reader should be positioned on the backslash.
+ * @returns {Generator<CodeUnit>} Zero, one or two `CodeUnit` elements.
+ */
+function *mapEscapeSequenceOrLineContinuation(reader) {
+    const start = reader.pos++;
+    const str = readEscapeSequenceOrLineContinuation(reader);
+    const end = reader.pos;
+    const source = reader.source.slice(start, end);
+
+    switch (str.length) {
+        case 0:
+            break;
+        case 1:
+            yield new CodeUnit(start, source);
+            break;
+        default:
+            yield new CodeUnit(start, source);
+            yield new CodeUnit(start, source);
+            break;
+    }
+}
+
+/**
+ * Parses a string literal.
+ * @param {string} source The string literal to parse, including the delimiting quotes.
+ * @returns {CodeUnit[]} A list of code units produced by the string literal.
+ */
+function parseStringLiteral(source) {
+    const reader = new SourceReader(source);
+    const quote = source[0];
+
+    reader.pos = 1;
+    const codeUnits = [];
+
+    for (;;) {
+        const { pos } = reader;
+        const char = source[pos];
+
+        if (char === quote) {
+            break;
+        }
+        if (char === "\\") {
+            codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
+        } else {
+            reader.pos = pos + 1;
+            codeUnits.push(new CodeUnit(pos, char));
+        }
+    }
+    return codeUnits;
+}
+
+/**
+ * Parses a template token.
+ * @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`.
+ * @returns {CodeUnit[]} A list of code units produced by the template token.
+ */
+function parseTemplateToken(source) {
+    const reader = new SourceReader(source);
+
+    reader.pos = 1;
+    const codeUnits = [];
+
+    for (;;) {
+        const { pos } = reader;
+        const char = source[pos];
+
+        if (char === "`" || char === "$" && source[pos + 1] === "{") {
+            break;
+        }
+        if (char === "\\") {
+            codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
+        } else {
+            let unitSource;
+
+            if (char === "\r" && source[pos + 1] === "\n") {
+                unitSource = "\r\n";
+                reader.pos = pos + 2;
+            } else {
+                unitSource = char;
+                reader.pos = pos + 1;
+            }
+            codeUnits.push(new CodeUnit(pos, unitSource));
+        }
+    }
+    return codeUnits;
+}
+
+module.exports = { parseStringLiteral, parseTemplateToken };