Skip to content

Commit 24d6a47

Browse files
committedSep 7, 2024·
feat(linter): implement eslint/no-invalid-regexp (#5443)
closes #611 @leaysgur Kicking of `no-invalid-regexp`, feel free to take over and claim the bounty on #611 😁 I can continue if you wanna work on other stuff (getting confused in prettier land ;-))
1 parent 2c3f3fe commit 24d6a47

File tree

3 files changed

+549
-0
lines changed

3 files changed

+549
-0
lines changed
 

‎crates/oxc_linter/src/rules.rs

+2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ mod eslint {
7777
pub mod no_global_assign;
7878
pub mod no_import_assign;
7979
pub mod no_inner_declarations;
80+
pub mod no_invalid_regexp;
8081
pub mod no_irregular_whitespace;
8182
pub mod no_iterator;
8283
pub mod no_label_var;
@@ -568,6 +569,7 @@ oxc_macros::declare_all_lint_rules! {
568569
eslint::no_useless_concat,
569570
eslint::no_useless_constructor,
570571
eslint::no_var,
572+
eslint::no_invalid_regexp,
571573
eslint::no_void,
572574
eslint::no_with,
573575
eslint::radix,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,341 @@
1+
use oxc_allocator::Allocator;
2+
use oxc_ast::{ast::Argument, AstKind};
3+
use oxc_diagnostics::{LabeledSpan, OxcDiagnostic};
4+
use oxc_macros::declare_oxc_lint;
5+
use oxc_regular_expression::{FlagsParser, ParserOptions, PatternParser};
6+
use oxc_span::Span;
7+
use rustc_hash::FxHashSet;
8+
use serde::Deserialize;
9+
10+
use crate::{context::LintContext, rule::Rule, AstNode};
11+
12+
#[derive(Debug, Default, Clone)]
13+
pub struct NoInvalidRegexp(Box<NoInvalidRegexpConfig>);
14+
15+
declare_oxc_lint!(
16+
/// ### What it does
17+
/// Disallow invalid regular expression strings in RegExp constructors.
18+
///
19+
/// ### Why is this bad?
20+
/// An invalid pattern in a regular expression literal is a SyntaxError when the code is parsed,
21+
/// but an invalid string in RegExp constructors throws a SyntaxError only when the code is executed.
22+
///
23+
/// ### Examples
24+
///
25+
/// Examples of **incorrect** code for this rule:
26+
/// ```js
27+
/// RegExp('[')
28+
/// RegExp('.', 'z')
29+
/// new RegExp('\\')
30+
/// ```
31+
///
32+
/// Examples of **correct** code for this rule:
33+
/// ```js
34+
/// RegExp('.')
35+
/// new RegExp
36+
/// this.RegExp('[')
37+
/// ```
38+
NoInvalidRegexp,
39+
correctness,
40+
);
41+
42+
#[derive(Debug, Clone, Deserialize, Default)]
43+
struct NoInvalidRegexpConfig {
44+
#[serde(default, rename = "allowConstructorFlags")]
45+
/// Case-sensitive array of flags.
46+
allow_constructor_flags: Vec<char>,
47+
}
48+
49+
impl Rule for NoInvalidRegexp {
50+
fn from_configuration(value: serde_json::Value) -> Self {
51+
value
52+
.as_array()
53+
.and_then(|arr| arr.first())
54+
.and_then(|value| serde_json::from_value(value.clone()).ok())
55+
.map_or_else(Self::default, |value| Self(Box::new(value)))
56+
}
57+
58+
fn run<'a>(&self, node: &AstNode<'a>, ctx: &LintContext<'a>) {
59+
let (pattern_arg, flags_arg) = match node.kind() {
60+
AstKind::NewExpression(expr) if expr.callee.is_specific_id("RegExp") => {
61+
parse_arguments_to_check(expr.arguments.first(), expr.arguments.get(1))
62+
}
63+
AstKind::CallExpression(expr) if expr.callee.is_specific_id("RegExp") => {
64+
parse_arguments_to_check(expr.arguments.first(), expr.arguments.get(1))
65+
}
66+
// Other kinds, skip
67+
_ => return,
68+
};
69+
70+
// No arguments, skip
71+
if pattern_arg.is_none() && flags_arg.is_none() {
72+
return;
73+
}
74+
75+
let allocator = Allocator::default();
76+
77+
// Validate flags first if exists
78+
let mut parsed_flags = None;
79+
if let Some((flags_span_start, flags_text)) = flags_arg {
80+
// Check for duplicated flags
81+
// For compatibility with ESLint, we need to check "user-defined duplicated" flags here
82+
// "valid duplicated" flags are also checked
83+
let mut unique_flags = FxHashSet::default();
84+
let mut violations = vec![];
85+
for (idx, ch) in flags_text.char_indices() {
86+
if !unique_flags.insert(ch) {
87+
violations.push(idx);
88+
}
89+
}
90+
if !violations.is_empty() {
91+
return ctx.diagnostic(
92+
// Use the same prefix with `oxc_regular_expression`
93+
OxcDiagnostic::warn("Invalid regular expression: Duplicated flag").with_labels(
94+
violations
95+
.iter()
96+
.map(|&start| {
97+
#[allow(clippy::cast_possible_truncation)]
98+
let start = flags_span_start + start as u32;
99+
LabeledSpan::new_with_span(None, Span::new(start, start))
100+
})
101+
.collect::<Vec<_>>(),
102+
),
103+
);
104+
}
105+
106+
// Omit user defined invalid flags
107+
for flag in &self.0.allow_constructor_flags {
108+
match flag {
109+
// Keep valid flags, even if they are defined
110+
'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' => continue,
111+
_ => {
112+
unique_flags.remove(flag);
113+
}
114+
}
115+
}
116+
117+
// Use parser to check:
118+
// - Unknown invalid flags
119+
// - Invalid flags combination: u+v
120+
// - (Valid duplicated flags are already checked above)
121+
// It can be done without `FlagsParser`, though
122+
let flags_text = unique_flags.iter().collect::<String>();
123+
let options = ParserOptions::default().with_span_offset(flags_span_start);
124+
match FlagsParser::new(&allocator, flags_text.as_str(), options).parse() {
125+
Ok(flags) => parsed_flags = Some(flags),
126+
Err(diagnostic) => return ctx.diagnostic(diagnostic),
127+
}
128+
}
129+
130+
// Then, validate pattern if exists
131+
// Pattern check is skipped when 1st argument is NOT a `StringLiteral`
132+
// e.g. `new RegExp(var)`, `RegExp("str" + var)`
133+
if let Some((pattern_span_start, pattern_text)) = pattern_arg {
134+
let mut options = ParserOptions::default().with_span_offset(pattern_span_start);
135+
if let Some(flags) = parsed_flags {
136+
if flags.unicode || flags.unicode_sets {
137+
options = options.with_unicode_mode();
138+
}
139+
if flags.unicode_sets {
140+
options = options.with_unicode_sets_mode();
141+
}
142+
}
143+
match PatternParser::new(&allocator, pattern_text, options).parse() {
144+
Ok(_) => {}
145+
Err(diagnostic) => ctx.diagnostic(diagnostic),
146+
}
147+
}
148+
}
149+
}
150+
151+
/// Returns: (span_start, text)
152+
/// span_start + 1 for opening string bracket.
153+
type ParsedArgument<'a> = (u32, &'a str);
154+
fn parse_arguments_to_check<'a>(
155+
arg1: Option<&Argument<'a>>,
156+
arg2: Option<&Argument<'a>>,
157+
) -> (Option<ParsedArgument<'a>>, Option<ParsedArgument<'a>>) {
158+
match (arg1, arg2) {
159+
// ("pattern", "flags")
160+
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => (
161+
Some((pattern.span.start + 1, pattern.value.as_str())),
162+
Some((flags.span.start + 1, flags.value.as_str())),
163+
),
164+
// (pattern, "flags")
165+
(Some(_arg), Some(Argument::StringLiteral(flags))) => {
166+
(None, Some((flags.span.start + 1, flags.value.as_str())))
167+
}
168+
// ("pattern")
169+
(Some(Argument::StringLiteral(pattern)), None) => {
170+
(Some((pattern.span.start + 1, pattern.value.as_str())), None)
171+
}
172+
// (pattern), ()
173+
_ => (None, None),
174+
}
175+
}
176+
177+
#[test]
178+
fn test() {
179+
use crate::tester::Tester;
180+
181+
let pass = vec![
182+
("[RegExp(''), /a/uv]", None),
183+
("RegExp()", None),
184+
("RegExp('.', 'g')", None),
185+
("new RegExp('.')", None),
186+
("new RegExp", None),
187+
("new RegExp('.', 'im')", None),
188+
("global.RegExp('\\\\')", None),
189+
("new RegExp('.', y)", None),
190+
("new RegExp('.', 'y')", None),
191+
("new RegExp('.', 'u')", None),
192+
("new RegExp('.', 'yu')", None),
193+
("new RegExp('/', 'yu')", None),
194+
("new RegExp('\\/', 'yu')", None),
195+
("new RegExp('\\\\u{65}', 'u')", None),
196+
("new RegExp('\\\\u{65}*', 'u')", None),
197+
("new RegExp('[\\\\u{0}-\\\\u{1F}]', 'u')", None),
198+
("new RegExp('.', 's')", None),
199+
("new RegExp('(?<=a)b')", None),
200+
("new RegExp('(?<!a)b')", None),
201+
("new RegExp('(?<a>b)\\k<a>')", None),
202+
("new RegExp('(?<a>b)\\k<a>', 'u')", None),
203+
("new RegExp('\\\\p{Letter}', 'u')", None),
204+
// unknown flags
205+
("RegExp('{', flags)", None),
206+
("new RegExp('{', flags)", None),
207+
("RegExp('\\\\u{0}*', flags)", None),
208+
("new RegExp('\\\\u{0}*', flags)", None),
209+
("RegExp('{', flags)", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))),
210+
(
211+
"RegExp('\\\\u{0}*', flags)",
212+
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
213+
),
214+
// unknown pattern
215+
("new RegExp(pattern, 'g')", None),
216+
("new RegExp('.' + '', 'g')", None),
217+
("new RegExp(pattern, '')", None),
218+
("new RegExp(pattern)", None),
219+
// ES2020
220+
("new RegExp('(?<\\\\ud835\\\\udc9c>.)', 'g')", None),
221+
("new RegExp('(?<\\\\u{1d49c}>.)', 'g')", None),
222+
("new RegExp('(?<𝒜>.)', 'g');", None),
223+
("new RegExp('\\\\p{Script=Nandinagari}', 'u');", None),
224+
// ES2022
225+
("new RegExp('a+(?<Z>z)?', 'd')", None),
226+
("new RegExp('\\\\p{Script=Cpmn}', 'u')", None),
227+
("new RegExp('\\\\p{Script=Cypro_Minoan}', 'u')", None),
228+
("new RegExp('\\\\p{Script=Old_Uyghur}', 'u')", None),
229+
("new RegExp('\\\\p{Script=Ougr}', 'u')", None),
230+
("new RegExp('\\\\p{Script=Tangsa}', 'u')", None),
231+
("new RegExp('\\\\p{Script=Tnsa}', 'u')", None),
232+
("new RegExp('\\\\p{Script=Toto}', 'u')", None),
233+
("new RegExp('\\\\p{Script=Vith}', 'u')", None),
234+
("new RegExp('\\\\p{Script=Vithkuqi}', 'u')", None),
235+
// ES2024
236+
("new RegExp('[A--B]', 'v')", None),
237+
("new RegExp('[A&&B]', 'v')", None),
238+
("new RegExp('[A--[0-9]]', 'v')", None),
239+
("new RegExp('[\\\\p{Basic_Emoji}--\\\\q{a|bc|def}]', 'v')", None),
240+
("new RegExp('[A--B]', flags)", None),
241+
("new RegExp('[[]\\\\u{0}*', flags)", None),
242+
// ES2025
243+
// ("new RegExp('((?<k>a)|(?<k>b))')", None),
244+
// allowConstructorFlags
245+
("new RegExp('.', 'g')", Some(serde_json::json!([{ "allowConstructorFlags": [] }]))),
246+
("new RegExp('.', 'g')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
247+
("new RegExp('.', 'a')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
248+
("new RegExp('.', 'ag')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
249+
("new RegExp('.', 'ga')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
250+
(
251+
"new RegExp(pattern, 'ga')",
252+
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
253+
),
254+
(
255+
"new RegExp('.' + '', 'ga')",
256+
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
257+
),
258+
(
259+
"new RegExp('.', 'a')",
260+
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
261+
),
262+
(
263+
"new RegExp('.', 'z')",
264+
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
265+
),
266+
(
267+
"new RegExp('.', 'az')",
268+
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
269+
),
270+
(
271+
"new RegExp('.', 'za')",
272+
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
273+
),
274+
(
275+
"new RegExp('.', 'agz')",
276+
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
277+
),
278+
];
279+
280+
let fail = vec![
281+
("RegExp('[');", None),
282+
("RegExp('.', 'z');", None),
283+
("RegExp('.', 'a');", Some(serde_json::json!([{}]))),
284+
("new RegExp('.', 'a');", Some(serde_json::json!([{ "allowConstructorFlags": [] }]))),
285+
("new RegExp('.', 'z');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
286+
("RegExp('.', 'a');", Some(serde_json::json!([{ "allowConstructorFlags": ["A"] }]))),
287+
("RegExp('.', 'A');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
288+
("new RegExp('.', 'az');", Some(serde_json::json!([{ "allowConstructorFlags": ["z"] }]))),
289+
("new RegExp('.', 'aa');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
290+
(
291+
"new RegExp('.', 'aa');",
292+
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "a"] }])),
293+
),
294+
("new RegExp('.', 'aA');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
295+
(
296+
"new RegExp('.', 'aaz');",
297+
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
298+
),
299+
(
300+
"new RegExp('.', 'azz');",
301+
Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])),
302+
),
303+
("new RegExp('.', 'aga');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
304+
("new RegExp('.', 'uu');", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))),
305+
("new RegExp('.', 'ouo');", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))),
306+
("new RegExp(')');", None),
307+
("new RegExp('\\\\a', 'u');", None),
308+
(
309+
"new RegExp('\\\\a', 'u');",
310+
Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }])),
311+
),
312+
("RegExp('\\\\u{0}*');", None),
313+
("new RegExp('\\\\u{0}*');", None),
314+
("new RegExp('\\\\u{0}*', '');", None),
315+
(
316+
"new RegExp('\\\\u{0}*', 'a');",
317+
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
318+
),
319+
("RegExp('\\\\u{0}*');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))),
320+
("new RegExp('\\\\');", None),
321+
("RegExp(')' + '', 'a');", None),
322+
(
323+
"new RegExp('.' + '', 'az');",
324+
Some(serde_json::json!([{ "allowConstructorFlags": ["z"] }])),
325+
),
326+
(
327+
"new RegExp(pattern, 'az');",
328+
Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])),
329+
),
330+
// ES2024
331+
("new RegExp('[[]', 'v');", None),
332+
("new RegExp('.', 'uv');", None),
333+
("new RegExp(pattern, 'uv');", None),
334+
("new RegExp('[A--B]' /* valid only with `v` flag */, 'u')", None),
335+
("new RegExp('[[]\\\\u{0}*' /* valid only with `u` flag */, 'v')", None),
336+
// ES2025
337+
("new RegExp('(?<k>a)(?<k>b)')", None),
338+
];
339+
340+
Tester::new(NoInvalidRegexp::NAME, pass, fail).test_and_snapshot();
341+
}

0 commit comments

Comments
 (0)