|
| 1 | +use oxc_allocator::Allocator; |
| 2 | +use oxc_ast::{ast::Argument, AstKind}; |
| 3 | +use oxc_diagnostics::{LabeledSpan, OxcDiagnostic}; |
| 4 | +use oxc_macros::declare_oxc_lint; |
| 5 | +use oxc_regular_expression::{FlagsParser, ParserOptions, PatternParser}; |
| 6 | +use oxc_span::Span; |
| 7 | +use rustc_hash::FxHashSet; |
| 8 | +use serde::Deserialize; |
| 9 | + |
| 10 | +use crate::{context::LintContext, rule::Rule, AstNode}; |
| 11 | + |
| 12 | +#[derive(Debug, Default, Clone)] |
| 13 | +pub struct NoInvalidRegexp(Box<NoInvalidRegexpConfig>); |
| 14 | + |
| 15 | +declare_oxc_lint!( |
| 16 | + /// ### What it does |
| 17 | + /// Disallow invalid regular expression strings in RegExp constructors. |
| 18 | + /// |
| 19 | + /// ### Why is this bad? |
| 20 | + /// An invalid pattern in a regular expression literal is a SyntaxError when the code is parsed, |
| 21 | + /// but an invalid string in RegExp constructors throws a SyntaxError only when the code is executed. |
| 22 | + /// |
| 23 | + /// ### Examples |
| 24 | + /// |
| 25 | + /// Examples of **incorrect** code for this rule: |
| 26 | + /// ```js |
| 27 | + /// RegExp('[') |
| 28 | + /// RegExp('.', 'z') |
| 29 | + /// new RegExp('\\') |
| 30 | + /// ``` |
| 31 | + /// |
| 32 | + /// Examples of **correct** code for this rule: |
| 33 | + /// ```js |
| 34 | + /// RegExp('.') |
| 35 | + /// new RegExp |
| 36 | + /// this.RegExp('[') |
| 37 | + /// ``` |
| 38 | + NoInvalidRegexp, |
| 39 | + correctness, |
| 40 | +); |
| 41 | + |
| 42 | +#[derive(Debug, Clone, Deserialize, Default)] |
| 43 | +struct NoInvalidRegexpConfig { |
| 44 | + #[serde(default, rename = "allowConstructorFlags")] |
| 45 | + /// Case-sensitive array of flags. |
| 46 | + allow_constructor_flags: Vec<char>, |
| 47 | +} |
| 48 | + |
| 49 | +impl Rule for NoInvalidRegexp { |
| 50 | + fn from_configuration(value: serde_json::Value) -> Self { |
| 51 | + value |
| 52 | + .as_array() |
| 53 | + .and_then(|arr| arr.first()) |
| 54 | + .and_then(|value| serde_json::from_value(value.clone()).ok()) |
| 55 | + .map_or_else(Self::default, |value| Self(Box::new(value))) |
| 56 | + } |
| 57 | + |
| 58 | + fn run<'a>(&self, node: &AstNode<'a>, ctx: &LintContext<'a>) { |
| 59 | + let (pattern_arg, flags_arg) = match node.kind() { |
| 60 | + AstKind::NewExpression(expr) if expr.callee.is_specific_id("RegExp") => { |
| 61 | + parse_arguments_to_check(expr.arguments.first(), expr.arguments.get(1)) |
| 62 | + } |
| 63 | + AstKind::CallExpression(expr) if expr.callee.is_specific_id("RegExp") => { |
| 64 | + parse_arguments_to_check(expr.arguments.first(), expr.arguments.get(1)) |
| 65 | + } |
| 66 | + // Other kinds, skip |
| 67 | + _ => return, |
| 68 | + }; |
| 69 | + |
| 70 | + // No arguments, skip |
| 71 | + if pattern_arg.is_none() && flags_arg.is_none() { |
| 72 | + return; |
| 73 | + } |
| 74 | + |
| 75 | + let allocator = Allocator::default(); |
| 76 | + |
| 77 | + // Validate flags first if exists |
| 78 | + let mut parsed_flags = None; |
| 79 | + if let Some((flags_span_start, flags_text)) = flags_arg { |
| 80 | + // Check for duplicated flags |
| 81 | + // For compatibility with ESLint, we need to check "user-defined duplicated" flags here |
| 82 | + // "valid duplicated" flags are also checked |
| 83 | + let mut unique_flags = FxHashSet::default(); |
| 84 | + let mut violations = vec![]; |
| 85 | + for (idx, ch) in flags_text.char_indices() { |
| 86 | + if !unique_flags.insert(ch) { |
| 87 | + violations.push(idx); |
| 88 | + } |
| 89 | + } |
| 90 | + if !violations.is_empty() { |
| 91 | + return ctx.diagnostic( |
| 92 | + // Use the same prefix with `oxc_regular_expression` |
| 93 | + OxcDiagnostic::warn("Invalid regular expression: Duplicated flag").with_labels( |
| 94 | + violations |
| 95 | + .iter() |
| 96 | + .map(|&start| { |
| 97 | + #[allow(clippy::cast_possible_truncation)] |
| 98 | + let start = flags_span_start + start as u32; |
| 99 | + LabeledSpan::new_with_span(None, Span::new(start, start)) |
| 100 | + }) |
| 101 | + .collect::<Vec<_>>(), |
| 102 | + ), |
| 103 | + ); |
| 104 | + } |
| 105 | + |
| 106 | + // Omit user defined invalid flags |
| 107 | + for flag in &self.0.allow_constructor_flags { |
| 108 | + match flag { |
| 109 | + // Keep valid flags, even if they are defined |
| 110 | + 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' => continue, |
| 111 | + _ => { |
| 112 | + unique_flags.remove(flag); |
| 113 | + } |
| 114 | + } |
| 115 | + } |
| 116 | + |
| 117 | + // Use parser to check: |
| 118 | + // - Unknown invalid flags |
| 119 | + // - Invalid flags combination: u+v |
| 120 | + // - (Valid duplicated flags are already checked above) |
| 121 | + // It can be done without `FlagsParser`, though |
| 122 | + let flags_text = unique_flags.iter().collect::<String>(); |
| 123 | + let options = ParserOptions::default().with_span_offset(flags_span_start); |
| 124 | + match FlagsParser::new(&allocator, flags_text.as_str(), options).parse() { |
| 125 | + Ok(flags) => parsed_flags = Some(flags), |
| 126 | + Err(diagnostic) => return ctx.diagnostic(diagnostic), |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + // Then, validate pattern if exists |
| 131 | + // Pattern check is skipped when 1st argument is NOT a `StringLiteral` |
| 132 | + // e.g. `new RegExp(var)`, `RegExp("str" + var)` |
| 133 | + if let Some((pattern_span_start, pattern_text)) = pattern_arg { |
| 134 | + let mut options = ParserOptions::default().with_span_offset(pattern_span_start); |
| 135 | + if let Some(flags) = parsed_flags { |
| 136 | + if flags.unicode || flags.unicode_sets { |
| 137 | + options = options.with_unicode_mode(); |
| 138 | + } |
| 139 | + if flags.unicode_sets { |
| 140 | + options = options.with_unicode_sets_mode(); |
| 141 | + } |
| 142 | + } |
| 143 | + match PatternParser::new(&allocator, pattern_text, options).parse() { |
| 144 | + Ok(_) => {} |
| 145 | + Err(diagnostic) => ctx.diagnostic(diagnostic), |
| 146 | + } |
| 147 | + } |
| 148 | + } |
| 149 | +} |
| 150 | + |
| 151 | +/// Returns: (span_start, text) |
| 152 | +/// span_start + 1 for opening string bracket. |
| 153 | +type ParsedArgument<'a> = (u32, &'a str); |
| 154 | +fn parse_arguments_to_check<'a>( |
| 155 | + arg1: Option<&Argument<'a>>, |
| 156 | + arg2: Option<&Argument<'a>>, |
| 157 | +) -> (Option<ParsedArgument<'a>>, Option<ParsedArgument<'a>>) { |
| 158 | + match (arg1, arg2) { |
| 159 | + // ("pattern", "flags") |
| 160 | + (Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => ( |
| 161 | + Some((pattern.span.start + 1, pattern.value.as_str())), |
| 162 | + Some((flags.span.start + 1, flags.value.as_str())), |
| 163 | + ), |
| 164 | + // (pattern, "flags") |
| 165 | + (Some(_arg), Some(Argument::StringLiteral(flags))) => { |
| 166 | + (None, Some((flags.span.start + 1, flags.value.as_str()))) |
| 167 | + } |
| 168 | + // ("pattern") |
| 169 | + (Some(Argument::StringLiteral(pattern)), None) => { |
| 170 | + (Some((pattern.span.start + 1, pattern.value.as_str())), None) |
| 171 | + } |
| 172 | + // (pattern), () |
| 173 | + _ => (None, None), |
| 174 | + } |
| 175 | +} |
| 176 | + |
| 177 | +#[test] |
| 178 | +fn test() { |
| 179 | + use crate::tester::Tester; |
| 180 | + |
| 181 | + let pass = vec![ |
| 182 | + ("[RegExp(''), /a/uv]", None), |
| 183 | + ("RegExp()", None), |
| 184 | + ("RegExp('.', 'g')", None), |
| 185 | + ("new RegExp('.')", None), |
| 186 | + ("new RegExp", None), |
| 187 | + ("new RegExp('.', 'im')", None), |
| 188 | + ("global.RegExp('\\\\')", None), |
| 189 | + ("new RegExp('.', y)", None), |
| 190 | + ("new RegExp('.', 'y')", None), |
| 191 | + ("new RegExp('.', 'u')", None), |
| 192 | + ("new RegExp('.', 'yu')", None), |
| 193 | + ("new RegExp('/', 'yu')", None), |
| 194 | + ("new RegExp('\\/', 'yu')", None), |
| 195 | + ("new RegExp('\\\\u{65}', 'u')", None), |
| 196 | + ("new RegExp('\\\\u{65}*', 'u')", None), |
| 197 | + ("new RegExp('[\\\\u{0}-\\\\u{1F}]', 'u')", None), |
| 198 | + ("new RegExp('.', 's')", None), |
| 199 | + ("new RegExp('(?<=a)b')", None), |
| 200 | + ("new RegExp('(?<!a)b')", None), |
| 201 | + ("new RegExp('(?<a>b)\\k<a>')", None), |
| 202 | + ("new RegExp('(?<a>b)\\k<a>', 'u')", None), |
| 203 | + ("new RegExp('\\\\p{Letter}', 'u')", None), |
| 204 | + // unknown flags |
| 205 | + ("RegExp('{', flags)", None), |
| 206 | + ("new RegExp('{', flags)", None), |
| 207 | + ("RegExp('\\\\u{0}*', flags)", None), |
| 208 | + ("new RegExp('\\\\u{0}*', flags)", None), |
| 209 | + ("RegExp('{', flags)", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))), |
| 210 | + ( |
| 211 | + "RegExp('\\\\u{0}*', flags)", |
| 212 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])), |
| 213 | + ), |
| 214 | + // unknown pattern |
| 215 | + ("new RegExp(pattern, 'g')", None), |
| 216 | + ("new RegExp('.' + '', 'g')", None), |
| 217 | + ("new RegExp(pattern, '')", None), |
| 218 | + ("new RegExp(pattern)", None), |
| 219 | + // ES2020 |
| 220 | + ("new RegExp('(?<\\\\ud835\\\\udc9c>.)', 'g')", None), |
| 221 | + ("new RegExp('(?<\\\\u{1d49c}>.)', 'g')", None), |
| 222 | + ("new RegExp('(?<𝒜>.)', 'g');", None), |
| 223 | + ("new RegExp('\\\\p{Script=Nandinagari}', 'u');", None), |
| 224 | + // ES2022 |
| 225 | + ("new RegExp('a+(?<Z>z)?', 'd')", None), |
| 226 | + ("new RegExp('\\\\p{Script=Cpmn}', 'u')", None), |
| 227 | + ("new RegExp('\\\\p{Script=Cypro_Minoan}', 'u')", None), |
| 228 | + ("new RegExp('\\\\p{Script=Old_Uyghur}', 'u')", None), |
| 229 | + ("new RegExp('\\\\p{Script=Ougr}', 'u')", None), |
| 230 | + ("new RegExp('\\\\p{Script=Tangsa}', 'u')", None), |
| 231 | + ("new RegExp('\\\\p{Script=Tnsa}', 'u')", None), |
| 232 | + ("new RegExp('\\\\p{Script=Toto}', 'u')", None), |
| 233 | + ("new RegExp('\\\\p{Script=Vith}', 'u')", None), |
| 234 | + ("new RegExp('\\\\p{Script=Vithkuqi}', 'u')", None), |
| 235 | + // ES2024 |
| 236 | + ("new RegExp('[A--B]', 'v')", None), |
| 237 | + ("new RegExp('[A&&B]', 'v')", None), |
| 238 | + ("new RegExp('[A--[0-9]]', 'v')", None), |
| 239 | + ("new RegExp('[\\\\p{Basic_Emoji}--\\\\q{a|bc|def}]', 'v')", None), |
| 240 | + ("new RegExp('[A--B]', flags)", None), |
| 241 | + ("new RegExp('[[]\\\\u{0}*', flags)", None), |
| 242 | + // ES2025 |
| 243 | + // ("new RegExp('((?<k>a)|(?<k>b))')", None), |
| 244 | + // allowConstructorFlags |
| 245 | + ("new RegExp('.', 'g')", Some(serde_json::json!([{ "allowConstructorFlags": [] }]))), |
| 246 | + ("new RegExp('.', 'g')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 247 | + ("new RegExp('.', 'a')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 248 | + ("new RegExp('.', 'ag')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 249 | + ("new RegExp('.', 'ga')", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 250 | + ( |
| 251 | + "new RegExp(pattern, 'ga')", |
| 252 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])), |
| 253 | + ), |
| 254 | + ( |
| 255 | + "new RegExp('.' + '', 'ga')", |
| 256 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])), |
| 257 | + ), |
| 258 | + ( |
| 259 | + "new RegExp('.', 'a')", |
| 260 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])), |
| 261 | + ), |
| 262 | + ( |
| 263 | + "new RegExp('.', 'z')", |
| 264 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])), |
| 265 | + ), |
| 266 | + ( |
| 267 | + "new RegExp('.', 'az')", |
| 268 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])), |
| 269 | + ), |
| 270 | + ( |
| 271 | + "new RegExp('.', 'za')", |
| 272 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])), |
| 273 | + ), |
| 274 | + ( |
| 275 | + "new RegExp('.', 'agz')", |
| 276 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])), |
| 277 | + ), |
| 278 | + ]; |
| 279 | + |
| 280 | + let fail = vec![ |
| 281 | + ("RegExp('[');", None), |
| 282 | + ("RegExp('.', 'z');", None), |
| 283 | + ("RegExp('.', 'a');", Some(serde_json::json!([{}]))), |
| 284 | + ("new RegExp('.', 'a');", Some(serde_json::json!([{ "allowConstructorFlags": [] }]))), |
| 285 | + ("new RegExp('.', 'z');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 286 | + ("RegExp('.', 'a');", Some(serde_json::json!([{ "allowConstructorFlags": ["A"] }]))), |
| 287 | + ("RegExp('.', 'A');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 288 | + ("new RegExp('.', 'az');", Some(serde_json::json!([{ "allowConstructorFlags": ["z"] }]))), |
| 289 | + ("new RegExp('.', 'aa');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 290 | + ( |
| 291 | + "new RegExp('.', 'aa');", |
| 292 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a", "a"] }])), |
| 293 | + ), |
| 294 | + ("new RegExp('.', 'aA');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 295 | + ( |
| 296 | + "new RegExp('.', 'aaz');", |
| 297 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])), |
| 298 | + ), |
| 299 | + ( |
| 300 | + "new RegExp('.', 'azz');", |
| 301 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a", "z"] }])), |
| 302 | + ), |
| 303 | + ("new RegExp('.', 'aga');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 304 | + ("new RegExp('.', 'uu');", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))), |
| 305 | + ("new RegExp('.', 'ouo');", Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }]))), |
| 306 | + ("new RegExp(')');", None), |
| 307 | + ("new RegExp('\\\\a', 'u');", None), |
| 308 | + ( |
| 309 | + "new RegExp('\\\\a', 'u');", |
| 310 | + Some(serde_json::json!([{ "allowConstructorFlags": ["u"] }])), |
| 311 | + ), |
| 312 | + ("RegExp('\\\\u{0}*');", None), |
| 313 | + ("new RegExp('\\\\u{0}*');", None), |
| 314 | + ("new RegExp('\\\\u{0}*', '');", None), |
| 315 | + ( |
| 316 | + "new RegExp('\\\\u{0}*', 'a');", |
| 317 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])), |
| 318 | + ), |
| 319 | + ("RegExp('\\\\u{0}*');", Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }]))), |
| 320 | + ("new RegExp('\\\\');", None), |
| 321 | + ("RegExp(')' + '', 'a');", None), |
| 322 | + ( |
| 323 | + "new RegExp('.' + '', 'az');", |
| 324 | + Some(serde_json::json!([{ "allowConstructorFlags": ["z"] }])), |
| 325 | + ), |
| 326 | + ( |
| 327 | + "new RegExp(pattern, 'az');", |
| 328 | + Some(serde_json::json!([{ "allowConstructorFlags": ["a"] }])), |
| 329 | + ), |
| 330 | + // ES2024 |
| 331 | + ("new RegExp('[[]', 'v');", None), |
| 332 | + ("new RegExp('.', 'uv');", None), |
| 333 | + ("new RegExp(pattern, 'uv');", None), |
| 334 | + ("new RegExp('[A--B]' /* valid only with `v` flag */, 'u')", None), |
| 335 | + ("new RegExp('[[]\\\\u{0}*' /* valid only with `u` flag */, 'v')", None), |
| 336 | + // ES2025 |
| 337 | + ("new RegExp('(?<k>a)(?<k>b)')", None), |
| 338 | + ]; |
| 339 | + |
| 340 | + Tester::new(NoInvalidRegexp::NAME, pass, fail).test_and_snapshot(); |
| 341 | +} |
0 commit comments