Skip to content

Commit 5a73a66

Browse files
committedOct 3, 2024·
refactor(regular_expression)!: Simplify public APIs (#6262)
This PR makes 2 changes to improve the existing API that are not very useful. - Remove `(Literal)Parser` and `FlagsParser` and their ASTs - Add `with_flags(flags_text)` helper to `ParserOptions` Here are the details. > Remove `(Literal)Parser` and `FlagsParser` and their ASTs Previously, the `oxc_regular_expression` crate exposed 3 parsers. - `(Literal)Parser`: assumes `/pattern/flags` format - `PatternParser`: assumes `pattern` part only - `FlagsParser`: assumes `flags` part only However, it turns out that in actual usecases, only the `PatternParser` is actually sufficient, as the pattern and flags are validated and sliced in advance on the `oxc_parser` side. The current usecase for `(Literal)Parser` is mostly for internal testing. There were also some misuses of `(Literal)Parser` that restore `format!("/{pattern}/{flags}")` back and use `(Literal)Parser`. Therefore, only `PatternParser` is now published, and unnecessary ASTs have been removed. (This also obsoletes #5592 .) > Added `with_flags(flags_text)` helper to `ParserOptions` Strictly speaking, there was a subtle difference between the "flag" strings that users were aware of and the "mode" recognised by the parser. Therefore, it was a common mistake to forget to enable `unicode_mode` when using the `v` flag. With this helper, crate users no longer need to distinguish between flags and modes.
1 parent 5957214 commit 5a73a66

29 files changed

+476
-940
lines changed
 

‎crates/oxc_ast/src/generated/assert_layouts.rs

-36
Original file line numberDiff line numberDiff line change
@@ -1412,24 +1412,6 @@ const _: () = {
14121412
assert!(size_of::<LanguageVariant>() == 1usize);
14131413
assert!(align_of::<LanguageVariant>() == 1usize);
14141414

1415-
assert!(size_of::<RegularExpression>() == 72usize);
1416-
assert!(align_of::<RegularExpression>() == 8usize);
1417-
assert!(offset_of!(RegularExpression, span) == 0usize);
1418-
assert!(offset_of!(RegularExpression, pattern) == 8usize);
1419-
assert!(offset_of!(RegularExpression, flags) == 56usize);
1420-
1421-
assert!(size_of::<Flags>() == 16usize);
1422-
assert!(align_of::<Flags>() == 4usize);
1423-
assert!(offset_of!(Flags, span) == 0usize);
1424-
assert!(offset_of!(Flags, global) == 8usize);
1425-
assert!(offset_of!(Flags, ignore_case) == 9usize);
1426-
assert!(offset_of!(Flags, multiline) == 10usize);
1427-
assert!(offset_of!(Flags, unicode) == 11usize);
1428-
assert!(offset_of!(Flags, sticky) == 12usize);
1429-
assert!(offset_of!(Flags, dot_all) == 13usize);
1430-
assert!(offset_of!(Flags, has_indices) == 14usize);
1431-
assert!(offset_of!(Flags, unicode_sets) == 15usize);
1432-
14331415
assert!(size_of::<Pattern>() == 48usize);
14341416
assert!(align_of::<Pattern>() == 8usize);
14351417
assert!(offset_of!(Pattern, span) == 0usize);
@@ -2967,24 +2949,6 @@ const _: () = {
29672949
assert!(size_of::<LanguageVariant>() == 1usize);
29682950
assert!(align_of::<LanguageVariant>() == 1usize);
29692951

2970-
assert!(size_of::<RegularExpression>() == 56usize);
2971-
assert!(align_of::<RegularExpression>() == 4usize);
2972-
assert!(offset_of!(RegularExpression, span) == 0usize);
2973-
assert!(offset_of!(RegularExpression, pattern) == 8usize);
2974-
assert!(offset_of!(RegularExpression, flags) == 40usize);
2975-
2976-
assert!(size_of::<Flags>() == 16usize);
2977-
assert!(align_of::<Flags>() == 4usize);
2978-
assert!(offset_of!(Flags, span) == 0usize);
2979-
assert!(offset_of!(Flags, global) == 8usize);
2980-
assert!(offset_of!(Flags, ignore_case) == 9usize);
2981-
assert!(offset_of!(Flags, multiline) == 10usize);
2982-
assert!(offset_of!(Flags, unicode) == 11usize);
2983-
assert!(offset_of!(Flags, sticky) == 12usize);
2984-
assert!(offset_of!(Flags, dot_all) == 13usize);
2985-
assert!(offset_of!(Flags, has_indices) == 14usize);
2986-
assert!(offset_of!(Flags, unicode_sets) == 15usize);
2987-
29882952
assert!(size_of::<Pattern>() == 32usize);
29892953
assert!(align_of::<Pattern>() == 4usize);
29902954
assert!(offset_of!(Pattern, span) == 0usize);

‎crates/oxc_linter/src/rules/eslint/no_control_regex.rs

+17-32
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
use oxc_allocator::Allocator;
2-
use oxc_ast::{
3-
ast::{Argument, RegExpFlags},
4-
AstKind,
5-
};
2+
use oxc_ast::{ast::Argument, AstKind};
63
use oxc_diagnostics::OxcDiagnostic;
74
use oxc_macros::declare_oxc_lint;
85
use oxc_regular_expression::{
@@ -92,25 +89,19 @@ impl Rule for NoControlRegex {
9289
// get pattern from arguments. Missing or non-string arguments
9390
// will be runtime errors, but are not covered by this rule.
9491
let alloc = Allocator::default();
95-
let pattern_with_slashes = format!("/{}/", &pattern.value);
9692
let flags = extract_regex_flags(&expr.arguments);
93+
let flags_text = flags.map_or(String::new(), |f| f.to_string());
9794
let parser = Parser::new(
9895
&alloc,
99-
pattern_with_slashes.as_str(),
100-
ParserOptions {
101-
span_offset: expr
102-
.arguments
103-
.first()
104-
.map_or(0, |arg| arg.span().start),
105-
unicode_mode: flags
106-
.is_some_and(|flags| flags.contains(RegExpFlags::U)),
107-
unicode_sets_mode: flags
108-
.is_some_and(|flags| flags.contains(RegExpFlags::V)),
109-
},
96+
pattern.value.as_str(),
97+
ParserOptions::default()
98+
.with_span_offset(
99+
expr.arguments.first().map_or(0, |arg| arg.span().start),
100+
)
101+
.with_flags(&flags_text),
110102
);
111103

112-
let Some(pattern) = parser.parse().ok().map(|pattern| pattern.pattern)
113-
else {
104+
let Ok(pattern) = parser.parse() else {
114105
return;
115106
};
116107

@@ -133,25 +124,19 @@ impl Rule for NoControlRegex {
133124
// get pattern from arguments. Missing or non-string arguments
134125
// will be runtime errors, but are not covered by this rule.
135126
let alloc = Allocator::default();
136-
let pattern_with_slashes = format!("/{}/", &pattern.value);
137127
let flags = extract_regex_flags(&expr.arguments);
128+
let flags_text = flags.map_or(String::new(), |f| f.to_string());
138129
let parser = Parser::new(
139130
&alloc,
140-
pattern_with_slashes.as_str(),
141-
ParserOptions {
142-
span_offset: expr
143-
.arguments
144-
.first()
145-
.map_or(0, |arg| arg.span().start),
146-
unicode_mode: flags
147-
.is_some_and(|flags| flags.contains(RegExpFlags::U)),
148-
unicode_sets_mode: flags
149-
.is_some_and(|flags| flags.contains(RegExpFlags::V)),
150-
},
131+
pattern.value.as_str(),
132+
ParserOptions::default()
133+
.with_span_offset(
134+
expr.arguments.first().map_or(0, |arg| arg.span().start),
135+
)
136+
.with_flags(&flags_text),
151137
);
152138

153-
let Some(pattern) = parser.parse().ok().map(|pattern| pattern.pattern)
154-
else {
139+
let Ok(pattern) = parser.parse() else {
155140
return;
156141
};
157142

‎crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs

+49-56
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,28 @@
11
use oxc_allocator::Allocator;
22
use oxc_ast::{ast::Argument, AstKind};
3-
use oxc_diagnostics::{LabeledSpan, OxcDiagnostic};
3+
use oxc_diagnostics::OxcDiagnostic;
44
use oxc_macros::declare_oxc_lint;
5-
use oxc_regular_expression::{FlagsParser, ParserOptions, PatternParser};
5+
use oxc_regular_expression::{Parser, ParserOptions};
66
use oxc_span::Span;
77
use rustc_hash::FxHashSet;
88
use serde::Deserialize;
99

1010
use crate::{context::LintContext, rule::Rule, AstNode};
1111

12+
// Use the same prefix with `oxc_regular_expression` crate
13+
fn duplicated_flag_diagnostic(span: Span) -> OxcDiagnostic {
14+
OxcDiagnostic::warn("Invalid regular expression: Duplicated flag").with_label(span)
15+
}
16+
17+
fn unknown_flag_diagnostic(span: Span) -> OxcDiagnostic {
18+
OxcDiagnostic::warn("Invalid regular expression: Unknown flag").with_label(span)
19+
}
20+
21+
fn invalid_unicode_flags_diagnostic(span: Span) -> OxcDiagnostic {
22+
OxcDiagnostic::warn("Invalid regular expression: `u` and `v` flags should be used alone")
23+
.with_label(span)
24+
}
25+
1226
#[derive(Debug, Default, Clone)]
1327
pub struct NoInvalidRegexp(Box<NoInvalidRegexpConfig>);
1428

@@ -72,75 +86,54 @@ impl Rule for NoInvalidRegexp {
7286
return;
7387
}
7488

75-
let allocator = Allocator::default();
76-
7789
// Validate flags first if exists
78-
let mut parsed_flags = None;
7990
if let Some((flags_span_start, flags_text)) = flags_arg {
80-
// Check for duplicated flags
81-
// For compatibility with ESLint, we need to check "user-defined duplicated" flags here
82-
// "valid duplicated" flags are also checked
91+
let (mut u_flag_found, mut v_flag_found) = (false, false);
8392
let mut unique_flags = FxHashSet::default();
84-
let mut violations = vec![];
8593
for (idx, ch) in flags_text.char_indices() {
86-
if !unique_flags.insert(ch) {
87-
violations.push(idx);
88-
}
89-
}
90-
if !violations.is_empty() {
91-
return ctx.diagnostic(
92-
// Use the same prefix with `oxc_regular_expression`
93-
OxcDiagnostic::warn("Invalid regular expression: Duplicated flag").with_labels(
94-
violations
95-
.iter()
96-
.map(|&start| {
97-
#[allow(clippy::cast_possible_truncation)]
98-
let start = flags_span_start + start as u32;
99-
LabeledSpan::new_with_span(None, Span::new(start, start))
100-
})
101-
.collect::<Vec<_>>(),
102-
),
103-
);
104-
}
94+
#[allow(clippy::cast_possible_truncation)]
95+
let start = flags_span_start + idx as u32;
10596

106-
// Omit user defined invalid flags
107-
for flag in &self.0.allow_constructor_flags {
108-
match flag {
109-
// Keep valid flags, even if they are defined
110-
'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' => continue,
111-
_ => {
112-
unique_flags.remove(flag);
97+
// Invalid combination: u+v
98+
if ch == 'u' {
99+
if v_flag_found {
100+
return ctx
101+
.diagnostic(invalid_unicode_flags_diagnostic(Span::new(start, start)));
113102
}
103+
u_flag_found = true;
104+
}
105+
if ch == 'v' {
106+
if u_flag_found {
107+
return ctx
108+
.diagnostic(invalid_unicode_flags_diagnostic(Span::new(start, start)));
109+
}
110+
v_flag_found = true;
111+
}
112+
113+
// Duplicated: user defined, invalid or valid
114+
if !unique_flags.insert(ch) {
115+
return ctx.diagnostic(duplicated_flag_diagnostic(Span::new(start, start)));
114116
}
115-
}
116117

117-
// Use parser to check:
118-
// - Unknown invalid flags
119-
// - Invalid flags combination: u+v
120-
// - (Valid duplicated flags are already checked above)
121-
// It can be done without `FlagsParser`, though
122-
let flags_text = unique_flags.iter().collect::<String>();
123-
let options = ParserOptions::default().with_span_offset(flags_span_start);
124-
match FlagsParser::new(&allocator, flags_text.as_str(), options).parse() {
125-
Ok(flags) => parsed_flags = Some(flags),
126-
Err(diagnostic) => return ctx.diagnostic(diagnostic),
118+
// Unknown: not valid, not user defined
119+
if !(matches!(ch, 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y')
120+
|| self.0.allow_constructor_flags.contains(&ch))
121+
{
122+
return ctx.diagnostic(unknown_flag_diagnostic(Span::new(start, start)));
123+
}
127124
}
128125
}
129126

130127
// Then, validate pattern if exists
131128
// Pattern check is skipped when 1st argument is NOT a `StringLiteral`
132129
// e.g. `new RegExp(var)`, `RegExp("str" + var)`
130+
let allocator = Allocator::default();
133131
if let Some((pattern_span_start, pattern_text)) = pattern_arg {
134-
let mut options = ParserOptions::default().with_span_offset(pattern_span_start);
135-
if let Some(flags) = parsed_flags {
136-
if flags.unicode || flags.unicode_sets {
137-
options = options.with_unicode_mode();
138-
}
139-
if flags.unicode_sets {
140-
options = options.with_unicode_sets_mode();
141-
}
142-
}
143-
match PatternParser::new(&allocator, pattern_text, options).parse() {
132+
let options = ParserOptions::default()
133+
.with_span_offset(pattern_span_start)
134+
.with_flags(flags_arg.map_or("", |(_, flags_text)| flags_text));
135+
136+
match Parser::new(&allocator, pattern_text, options).parse() {
144137
Ok(_) => {}
145138
Err(diagnostic) => ctx.diagnostic(diagnostic),
146139
}

‎crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,14 @@ impl NoRegexSpaces {
105105
}
106106

107107
let alloc = Allocator::default();
108-
let pattern_with_slashes = format!("/{}/", &pattern.value);
109-
let parser = Parser::new(&alloc, pattern_with_slashes.as_str(), ParserOptions::default());
110-
let regex = parser.parse().ok()?;
108+
let parser = Parser::new(
109+
&alloc,
110+
pattern.value.as_str(),
111+
ParserOptions::default().with_span_offset(pattern.span.start + 1),
112+
);
113+
let parsed_pattern = parser.parse().ok()?;
111114

112-
find_consecutive_spaces(&regex.pattern)
113-
.map(|span| Span::new(span.start + pattern.span.start, span.end + pattern.span.start))
115+
find_consecutive_spaces(&parsed_pattern)
114116
}
115117

116118
fn is_regexp_new_expression(expr: &NewExpression<'_>) -> bool {

‎crates/oxc_linter/src/snapshots/no_invalid_regexp.snap

+13-13
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ source: crates/oxc_linter/src/tester.rs
6262
╰────
6363

6464
eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
65-
╭─[no_invalid_regexp.tsx:1:18]
65+
╭─[no_invalid_regexp.tsx:1:19]
6666
1 │ new RegExp('.', 'aA');
67-
· ▲
67+
·
6868
╰────
6969

7070
eslint(no-invalid-regexp): Invalid regular expression: Duplicated flag
@@ -91,10 +91,10 @@ source: crates/oxc_linter/src/tester.rs
9191
· ▲
9292
╰────
9393

94-
eslint(no-invalid-regexp): Invalid regular expression: Duplicated flag
95-
╭─[no_invalid_regexp.tsx:1:20]
94+
eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
95+
╭─[no_invalid_regexp.tsx:1:18]
9696
1 │ new RegExp('.', 'ouo');
97-
·
97+
· ▲
9898
╰────
9999

100100
eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
@@ -164,9 +164,9 @@ source: crates/oxc_linter/src/tester.rs
164164
╰────
165165

166166
eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
167-
╭─[no_invalid_regexp.tsx:1:22]
167+
╭─[no_invalid_regexp.tsx:1:23]
168168
1new RegExp(pattern, 'az');
169-
· ▲
169+
·
170170
╰────
171171

172172
eslint(no-invalid-regexp): Invalid regular expression: Unterminated character class
@@ -175,16 +175,16 @@ source: crates/oxc_linter/src/tester.rs
175175
· ───
176176
╰────
177177

178-
eslint(no-invalid-regexp): Invalid regular expression: Invalid flags, `u` and `v` should be used alone
179-
╭─[no_invalid_regexp.tsx:1:18]
178+
eslint(no-invalid-regexp): Invalid regular expression: `u` and `v` flags should be used alone
179+
╭─[no_invalid_regexp.tsx:1:19]
180180
1 │ new RegExp('.', 'uv');
181-
· ──
181+
·
182182
╰────
183183

184-
eslint(no-invalid-regexp): Invalid regular expression: Invalid flags, `u` and `v` should be used alone
185-
╭─[no_invalid_regexp.tsx:1:22]
184+
eslint(no-invalid-regexp): Invalid regular expression: `u` and `v` flags should be used alone
185+
╭─[no_invalid_regexp.tsx:1:23]
186186
1 │ new RegExp(pattern, 'uv');
187-
· ──
187+
·
188188
╰────
189189

190190
eslint(no-invalid-regexp): Invalid regular expression: Character class atom range out of order

‎crates/oxc_parser/examples/regular_expression.rs

+9-12
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::{env, fs, path::Path, sync::Arc};
44
use oxc_allocator::Allocator;
55
use oxc_ast::{ast, AstKind, Visit};
66
use oxc_parser::{ParseOptions, Parser};
7-
use oxc_regular_expression::{FlagsParser, ParserOptions, PatternParser};
7+
use oxc_regular_expression::{Parser as RegExpParser, ParserOptions as RegExpParserOptions};
88
use oxc_span::SourceType;
99

1010
// `cargo run -p oxc_parser --example regular_expression`
@@ -45,6 +45,7 @@ struct RegularExpressionVisitor {
4545
impl<'a> Visit<'a> for RegularExpressionVisitor {
4646
fn enter_node(&mut self, kind: AstKind<'a>) {
4747
let allocator = Allocator::default();
48+
4849
match kind {
4950
AstKind::RegExpLiteral(re) => {
5051
println!("🍀 {}", re.span.source_text(self.source_text.as_ref()));
@@ -61,12 +62,12 @@ impl<'a> Visit<'a> for RegularExpressionVisitor {
6162
{
6263
println!("🍀 {}", new_expr.span.source_text(&self.source_text));
6364

64-
let pattern = match new_expr.arguments.first() {
65-
Some(ast::Argument::StringLiteral(sl)) => &sl.value,
65+
let (pattern, pattern_span) = match new_expr.arguments.first() {
66+
Some(ast::Argument::StringLiteral(sl)) => (&sl.value, &sl.span),
6667
Some(ast::Argument::TemplateLiteral(tl))
6768
if tl.is_no_substitution_template() =>
6869
{
69-
&tl.quasi().unwrap()
70+
(&tl.quasi().unwrap(), &tl.span)
7071
}
7172
_ => return,
7273
};
@@ -81,16 +82,12 @@ impl<'a> Visit<'a> for RegularExpressionVisitor {
8182
_ => "",
8283
};
8384

84-
let flags =
85-
FlagsParser::new(&allocator, flags, ParserOptions::default()).parse().unwrap();
86-
let parsed = PatternParser::new(
85+
let parsed = RegExpParser::new(
8786
&allocator,
8887
pattern,
89-
ParserOptions {
90-
span_offset: new_expr.span.start + 12, // = "new RegExp(\"".len()
91-
unicode_mode: flags.unicode || flags.unicode_sets,
92-
unicode_sets_mode: flags.unicode_sets,
93-
},
88+
RegExpParserOptions::default()
89+
.with_span_offset(pattern_span.start + 1)
90+
.with_flags(flags),
9491
)
9592
.parse();
9693

‎crates/oxc_parser/src/js/expression.rs

+8-10
Original file line numberDiff line numberDiff line change
@@ -340,14 +340,16 @@ impl<'a> ParserImpl<'a> {
340340
let span = self.start_span();
341341
// split out pattern
342342
let (pattern_end, flags) = self.read_regex()?;
343-
let pattern_start = self.cur_token().start + 1; // +1 to exclude `/`
343+
let pattern_start = self.cur_token().start + 1; // +1 to exclude left `/`
344344
let pattern_text = &self.source_text[pattern_start as usize..pattern_end as usize];
345+
let flags_start = pattern_end + 1; // +1 to include right `/`
346+
let flags_text = &self.source_text[flags_start as usize..self.cur_token().end as usize];
345347
self.bump_any();
346348
let pattern = self
347349
.options
348350
.parse_regular_expression
349351
.then_some(())
350-
.map(|()| self.parse_regex_pattern(pattern_start, pattern_text, flags))
352+
.map(|()| self.parse_regex_pattern(pattern_start, pattern_text, flags_text))
351353
.map_or_else(
352354
|| RegExpPattern::Raw(pattern_text),
353355
|pat| {
@@ -361,15 +363,11 @@ impl<'a> ParserImpl<'a> {
361363
&mut self,
362364
span_offset: u32,
363365
pattern: &'a str,
364-
flags: RegExpFlags,
366+
flags: &'a str,
365367
) -> Option<Box<'a, Pattern<'a>>> {
366-
use oxc_regular_expression::{ParserOptions, PatternParser};
367-
let options = ParserOptions {
368-
span_offset,
369-
unicode_mode: flags.contains(RegExpFlags::U) || flags.contains(RegExpFlags::V),
370-
unicode_sets_mode: flags.contains(RegExpFlags::V),
371-
};
372-
match PatternParser::new(self.ast.allocator, pattern, options).parse() {
368+
use oxc_regular_expression::{Parser, ParserOptions};
369+
let options = ParserOptions::default().with_span_offset(span_offset).with_flags(flags);
370+
match Parser::new(self.ast.allocator, pattern, options).parse() {
373371
Ok(regular_expression) => Some(self.ast.alloc(regular_expression)),
374372
Err(diagnostic) => {
375373
self.error(diagnostic);

‎crates/oxc_regular_expression/examples/parse_literal.rs

+47-44
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,63 @@
11
#![allow(clippy::print_stdout)]
22

33
use oxc_allocator::Allocator;
4-
use oxc_regular_expression::{ast, Parser, ParserOptions};
4+
use oxc_regular_expression::{Parser, ParserOptions};
55

66
fn main() {
77
let allocator = Allocator::default();
88

9-
for source_text in [
10-
"/ab/",
11-
"/abc/i",
12-
"/abcd/igv",
13-
"/emo👈🏻ji/u",
14-
"/ab|c/i",
15-
"/a|b+|c/i",
16-
"/a{0}|b{1,2}|c{3,}/i",
17-
"/(?=a)|(?<=b)|(?!c)|(?<!d)/i",
18-
r"/\n\cM\0\x41\./",
19-
r"/\n\cM\0\x41\u1234\./u",
20-
r"/\n\cM\0\x41\u{1f600}\./u",
21-
r"/a\k<f>x\1c/u",
22-
r"/(cg)(?<n>cg)(?:g)/",
23-
r"/{3}/", // Error
24-
r"/Em🥹j/",
25-
r"/^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$/",
26-
r"/^(?<!ab)$/",
27-
r"/a)/", // Error
28-
r"/c]/",
29-
r"/[abc]/",
30-
r"/[|\]]/",
31-
r"/[a&&b]/v",
32-
r"/[a--b]/v",
33-
r"/[a&&&]/v", // Error
34-
r"/[a---]/v", // Error
35-
r"/[^a--b--c]/v",
36-
r"/[a[b[c[d[e[f[g[h[i[j[k[l]]]]]]]]]]]]/v",
37-
r"/[\q{abc|d|e|}]/v",
38-
r"/\p{Basic_Emoji}/v",
39-
r"/\p{Basic_Emoji}/u", // Error
40-
r"/[[^\q{}]]/v", // Error
41-
r"/(?<a>)(?<a>)/", // Error
42-
r"/(?noname)/v", // Error
43-
r"/[\bb]/",
9+
for (pattern, flags) in [
10+
(r"ab", ""),
11+
(r"abc", "i"),
12+
(r"abcd", "igv"),
13+
(r"emo👈🏻ji", "u"),
14+
(r"ab|c", "i"),
15+
(r"a|b+|c", "i"),
16+
(r"a{0}|b{1,2}|c{3,}", "i"),
17+
(r"(?=a)|(?<=b)|(?!c)|(?<!d)", "i"),
18+
(r"\n\cM\0\x41\.", ""),
19+
(r"\n\cM\0\x41\u1234\.", "u"),
20+
(r"\n\cM\0\x41\u{1f600}\.", "u"),
21+
(r"a\k<f>x\1c", "u"),
22+
(r"(cg)(?<n>cg)(?:g)", ""),
23+
(r"{3}", ""), // Error
24+
(r"Em🥹j", ""),
25+
(r"^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$", ""),
26+
(r"^(?<!ab)$", ""),
27+
(r"a)", ""), // Error
28+
(r"c]", ""),
29+
(r"[abc]", ""),
30+
(r"[|\]]", ""),
31+
(r"[a&&b]", "v"),
32+
(r"[a--b]", "v"),
33+
(r"[a&&&]", "v"), // Error
34+
(r"[a---]", "v"), // Error
35+
(r"[^a--b--c]", "v"),
36+
(r"[a[b[c[d[e[f[g[h[i[j[k[l]]]]]]]]]]]]", "v"),
37+
(r"[\q{abc|d|e|}]", "v"),
38+
(r"\p{Basic_Emoji}", "v"),
39+
(r"\p{Basic_Emoji}", "u"), // Error
40+
(r"[[^\q{}]]", "v"), // Error
41+
(r"(?<a>)(?<a>)", ""), // Error
42+
(r"(?noname)", "v"), // Error
43+
(r"[\bb]", ""),
44+
(r"a{2,1}", "v"), // Error
4445
] {
45-
println!("Parse: {source_text}");
46-
let parser = Parser::new(&allocator, source_text, ParserOptions::default());
46+
let parser = Parser::new(
47+
&allocator,
48+
pattern,
49+
ParserOptions::default().with_span_offset(1).with_flags(flags),
50+
);
4751
let ret = parser.parse();
4852

53+
let literal = format!("/{pattern}/{flags}");
54+
println!("Parse: {literal}");
4955
match ret {
50-
Ok(ast::RegularExpression { pattern, flags, .. }) => {
51-
println!("✨ {}", pattern.span.source_text(source_text));
52-
println!("{pattern:#?}");
53-
println!("✨ {}", flags.span.source_text(source_text));
54-
println!("{flags:?}");
56+
Ok(pattern) => {
57+
println!("✨ {pattern:#?}");
5558
}
5659
Err(error) => {
57-
let error = error.with_source_code(source_text);
60+
let error = error.with_source_code(literal);
5861
println!("💥 {error:?}");
5962
}
6063
}

‎crates/oxc_regular_expression/examples/regex_visitor.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ impl Visit<'_> for TestVisitor {
2020
}
2121

2222
fn main() {
23-
let source_text = r"/(https?:\/\/github\.com\/(([^\s]+)\/([^\s]+))\/([^\s]+\/)?(issues|pull)\/([0-9]+))|(([^\s]+)\/([^\s]+))?#([1-9][0-9]*)($|[\s\:\;\-\(\=])/";
23+
let source_text = r"(https?:\/\/github\.com\/(([^\s]+)\/([^\s]+))\/([^\s]+\/)?(issues|pull)\/([0-9]+))|(([^\s]+)\/([^\s]+))?#([1-9][0-9]*)($|[\s\:\;\-\(\=])";
24+
2425
let allocator = Allocator::default();
2526
let parser = Parser::new(&allocator, source_text, ParserOptions::default());
26-
let pattern = parser.parse().unwrap().pattern;
27+
let pattern = parser.parse().unwrap();
28+
2729
let mut visitor = TestVisitor;
2830
visitor.visit_pattern(&pattern);
2931
}

‎crates/oxc_regular_expression/src/ast.rs

-26
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,6 @@ use serde::Serialize;
99
#[cfg(feature = "serialize")]
1010
use tsify::Tsify;
1111

12-
#[ast]
13-
#[derive(Debug)]
14-
#[generate_derive(CloneIn, ContentEq, ContentHash)]
15-
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
16-
pub struct RegularExpression<'a> {
17-
pub span: Span,
18-
pub pattern: Pattern<'a>,
19-
pub flags: Flags,
20-
}
21-
22-
#[ast]
23-
#[derive(Debug, Clone)]
24-
#[generate_derive(CloneIn, ContentEq, ContentHash)]
25-
#[cfg_attr(feature = "serialize", derive(Serialize, Tsify))]
26-
pub struct Flags {
27-
pub span: Span,
28-
pub global: bool,
29-
pub ignore_case: bool,
30-
pub multiline: bool,
31-
pub unicode: bool,
32-
pub sticky: bool,
33-
pub dot_all: bool,
34-
pub has_indices: bool,
35-
pub unicode_sets: bool,
36-
}
37-
3812
/// The root of the `PatternParser` result.
3913
#[ast]
4014
#[derive(Debug)]

‎crates/oxc_regular_expression/src/ast_impl/display.rs

+21-43
Original file line numberDiff line numberDiff line change
@@ -7,37 +7,6 @@ use std::{
77
use crate::ast::*;
88
use crate::surrogate_pair::{combine_surrogate_pair, is_lead_surrogate, is_trail_surrogate};
99

10-
impl<'a> Display for RegularExpression<'a> {
11-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
12-
write!(f, "/{}/{}", self.pattern, self.flags)
13-
}
14-
}
15-
16-
impl Display for Flags {
17-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
18-
let mut flags = String::with_capacity(8);
19-
20-
// write flags in the order they are described in the `MDN`
21-
// <https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#advanced_searching_with_flags>
22-
for (v, ch) in [
23-
(self.has_indices, 'd'),
24-
(self.global, 'g'),
25-
(self.ignore_case, 'i'),
26-
(self.multiline, 'm'),
27-
(self.dot_all, 's'),
28-
(self.unicode, 'u'),
29-
(self.unicode_sets, 'v'),
30-
(self.sticky, 'y'),
31-
] {
32-
if v {
33-
flags.push(ch);
34-
}
35-
}
36-
37-
write!(f, "{flags}")
38-
}
39-
}
40-
4110
impl<'a> Display for Pattern<'a> {
4211
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4312
write!(f, "{}", self.body)
@@ -513,10 +482,8 @@ mod test {
513482
(r"/\t\n\v\f\r/u", None),
514483
(r"/\p{L}/u", None),
515484
(r"/\d/g", None),
516-
// Lose the flags ordering --
517-
("/abcd/igv", Some("/abcd/giv")),
518-
(r"/\d/ug", Some(r"/\d/gu")),
519-
// --
485+
("/abcd/igv", Some("/abcd/igv")),
486+
(r"/\d/ug", Some(r"/\d/ug")),
520487
// we capitalize hex unicodes.
521488
(r"/\n\cM\0\x41\u{1f600}\./u", Some(r"/\n\cM\0\x41\u{1F600}\./u")),
522489
(r"/\u02c1/u", Some(r"/\u02C1/u")),
@@ -577,15 +544,26 @@ mod test {
577544
(r"/([\-a-z]{0,31})/iu", None),
578545
];
579546

580-
fn test_display(allocator: &Allocator, (source, expect): &Case) {
581-
let expect = expect.unwrap_or(source);
582-
let actual = Parser::new(allocator, source, ParserOptions::default()).parse().unwrap();
583-
assert_eq!(expect, actual.to_string());
584-
}
585-
586547
#[test]
587-
fn test() {
548+
fn test_display() {
588549
let allocator = &Allocator::default();
589-
CASES.iter().for_each(|case| test_display(allocator, case));
550+
551+
for (input, output) in CASES {
552+
let (left_slash, right_slash) = (input.find('/').unwrap(), input.rfind('/').unwrap());
553+
554+
let pattern = &input[left_slash + 1..right_slash];
555+
let flags = &input[right_slash + 1..];
556+
557+
let actual = Parser::new(
558+
allocator,
559+
pattern,
560+
ParserOptions::default().with_span_offset(1).with_flags(flags),
561+
)
562+
.parse()
563+
.unwrap();
564+
565+
let expect = output.unwrap_or(input);
566+
assert_eq!(expect, format!("/{actual}/{flags}")); // This uses `Display` impls
567+
}
590568
}
591569
}

‎crates/oxc_regular_expression/src/body_parser/mod.rs

-278
This file was deleted.

‎crates/oxc_regular_expression/src/diagnostics.rs

-37
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,6 @@ use oxc_span::Span;
33

44
const PREFIX: &str = "Invalid regular expression:";
55

6-
// For (Literal)Parser ---
7-
8-
#[cold]
9-
pub fn unexpected_literal_char(span: Span) -> OxcDiagnostic {
10-
OxcDiagnostic::error(format!("{PREFIX} Unexpected literal character")).with_label(span)
11-
}
12-
13-
#[cold]
14-
pub fn unterminated_literal(span: Span, kind: &str) -> OxcDiagnostic {
15-
OxcDiagnostic::error(format!("{PREFIX} Unterminated {kind}")).with_label(span)
16-
}
17-
18-
#[cold]
19-
pub fn empty_literal(span: Span) -> OxcDiagnostic {
20-
OxcDiagnostic::error(format!("{PREFIX} Empty literal")).with_label(span)
21-
}
22-
23-
// For FlagsParser ---
24-
25-
#[cold]
26-
pub fn duplicated_flag(span: Span) -> OxcDiagnostic {
27-
OxcDiagnostic::error(format!("{PREFIX} Duplicated flag")).with_label(span)
28-
}
29-
30-
#[cold]
31-
pub fn unknown_flag(span: Span) -> OxcDiagnostic {
32-
OxcDiagnostic::error(format!("{PREFIX} Unknown flag")).with_label(span)
33-
}
34-
35-
#[cold]
36-
pub fn invalid_unicode_flags(span: Span) -> OxcDiagnostic {
37-
OxcDiagnostic::error(format!("{PREFIX} Invalid flags, `u` and `v` should be used alone"))
38-
.with_label(span)
39-
}
40-
41-
// For PatternParser ---
42-
436
#[cold]
447
pub fn duplicated_capturing_group_names(spans: Vec<Span>) -> OxcDiagnostic {
458
OxcDiagnostic::error(format!("{PREFIX} Duplicated capturing group names")).with_labels(spans)

‎crates/oxc_regular_expression/src/flags_parser.rs

-68
This file was deleted.

‎crates/oxc_regular_expression/src/generated/derive_clone_in.rs

-28
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,6 @@ use oxc_allocator::{Allocator, CloneIn};
88
#[allow(clippy::wildcard_imports)]
99
use crate::ast::*;
1010

11-
impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for RegularExpression<'old_alloc> {
12-
type Cloned = RegularExpression<'new_alloc>;
13-
fn clone_in(&self, allocator: &'new_alloc Allocator) -> Self::Cloned {
14-
RegularExpression {
15-
span: CloneIn::clone_in(&self.span, allocator),
16-
pattern: CloneIn::clone_in(&self.pattern, allocator),
17-
flags: CloneIn::clone_in(&self.flags, allocator),
18-
}
19-
}
20-
}
21-
22-
impl<'alloc> CloneIn<'alloc> for Flags {
23-
type Cloned = Flags;
24-
fn clone_in(&self, allocator: &'alloc Allocator) -> Self::Cloned {
25-
Flags {
26-
span: CloneIn::clone_in(&self.span, allocator),
27-
global: CloneIn::clone_in(&self.global, allocator),
28-
ignore_case: CloneIn::clone_in(&self.ignore_case, allocator),
29-
multiline: CloneIn::clone_in(&self.multiline, allocator),
30-
unicode: CloneIn::clone_in(&self.unicode, allocator),
31-
sticky: CloneIn::clone_in(&self.sticky, allocator),
32-
dot_all: CloneIn::clone_in(&self.dot_all, allocator),
33-
has_indices: CloneIn::clone_in(&self.has_indices, allocator),
34-
unicode_sets: CloneIn::clone_in(&self.unicode_sets, allocator),
35-
}
36-
}
37-
}
38-
3911
impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for Pattern<'old_alloc> {
4012
type Cloned = Pattern<'new_alloc>;
4113
fn clone_in(&self, allocator: &'new_alloc Allocator) -> Self::Cloned {

‎crates/oxc_regular_expression/src/generated/derive_content_eq.rs

-20
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,6 @@ use oxc_span::cmp::ContentEq;
88
#[allow(clippy::wildcard_imports)]
99
use crate::ast::*;
1010

11-
impl<'a> ContentEq for RegularExpression<'a> {
12-
fn content_eq(&self, other: &Self) -> bool {
13-
ContentEq::content_eq(&self.pattern, &other.pattern)
14-
&& ContentEq::content_eq(&self.flags, &other.flags)
15-
}
16-
}
17-
18-
impl ContentEq for Flags {
19-
fn content_eq(&self, other: &Self) -> bool {
20-
ContentEq::content_eq(&self.global, &other.global)
21-
&& ContentEq::content_eq(&self.ignore_case, &other.ignore_case)
22-
&& ContentEq::content_eq(&self.multiline, &other.multiline)
23-
&& ContentEq::content_eq(&self.unicode, &other.unicode)
24-
&& ContentEq::content_eq(&self.sticky, &other.sticky)
25-
&& ContentEq::content_eq(&self.dot_all, &other.dot_all)
26-
&& ContentEq::content_eq(&self.has_indices, &other.has_indices)
27-
&& ContentEq::content_eq(&self.unicode_sets, &other.unicode_sets)
28-
}
29-
}
30-
3111
impl<'a> ContentEq for Pattern<'a> {
3212
fn content_eq(&self, other: &Self) -> bool {
3313
ContentEq::content_eq(&self.body, &other.body)

‎crates/oxc_regular_expression/src/generated/derive_content_hash.rs

-20
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,6 @@ use oxc_span::hash::ContentHash;
1010
#[allow(clippy::wildcard_imports)]
1111
use crate::ast::*;
1212

13-
impl<'a> ContentHash for RegularExpression<'a> {
14-
fn content_hash<H: Hasher>(&self, state: &mut H) {
15-
ContentHash::content_hash(&self.pattern, state);
16-
ContentHash::content_hash(&self.flags, state);
17-
}
18-
}
19-
20-
impl ContentHash for Flags {
21-
fn content_hash<H: Hasher>(&self, state: &mut H) {
22-
ContentHash::content_hash(&self.global, state);
23-
ContentHash::content_hash(&self.ignore_case, state);
24-
ContentHash::content_hash(&self.multiline, state);
25-
ContentHash::content_hash(&self.unicode, state);
26-
ContentHash::content_hash(&self.sticky, state);
27-
ContentHash::content_hash(&self.dot_all, state);
28-
ContentHash::content_hash(&self.has_indices, state);
29-
ContentHash::content_hash(&self.unicode_sets, state);
30-
}
31-
}
32-
3313
impl<'a> ContentHash for Pattern<'a> {
3414
fn content_hash<H: Hasher>(&self, state: &mut H) {
3515
ContentHash::content_hash(&self.body, state);
+2-8
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
#![allow(clippy::missing_errors_doc)]
22

33
mod ast_impl;
4-
mod body_parser;
54
mod diagnostics;
6-
mod flags_parser;
7-
mod literal_parser;
85
mod options;
9-
mod span_factory;
6+
mod parser;
107
mod surrogate_pair;
118

129
mod generated {
@@ -16,7 +13,4 @@ mod generated {
1613
}
1714

1815
pub mod ast;
19-
pub use crate::{
20-
ast_impl::visit, body_parser::PatternParser, flags_parser::FlagsParser, literal_parser::Parser,
21-
options::ParserOptions,
22-
};
16+
pub use crate::{ast_impl::visit, options::ParserOptions, parser::Parser};

‎crates/oxc_regular_expression/src/literal_parser.rs

-165
This file was deleted.

‎crates/oxc_regular_expression/src/options.rs

+14-7
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,28 @@ pub struct ParserOptions {
66
pub unicode_mode: bool,
77
/// Extended Unicode mode(`v` flag) enabled or not.
88
pub unicode_sets_mode: bool,
9+
// TODO: Add `handle_escape_with_quote_type` like option to support `new RegExp("with \"escape\"")`
910
}
1011

1112
impl ParserOptions {
1213
#[must_use]
13-
pub fn with_span_offset(self, span_offset: u32) -> ParserOptions {
14+
pub fn with_span_offset(self, span_offset: u32) -> Self {
1415
ParserOptions { span_offset, ..self }
1516
}
1617

1718
#[must_use]
18-
pub fn with_unicode_mode(self) -> ParserOptions {
19-
ParserOptions { unicode_mode: true, ..self }
20-
}
19+
pub fn with_flags(self, flags: &str) -> Self {
20+
let (mut unicode_mode, mut unicode_sets_mode) = (false, false);
21+
for ch in flags.chars() {
22+
if ch == 'u' {
23+
unicode_mode = true;
24+
}
25+
if ch == 'v' {
26+
unicode_mode = true;
27+
unicode_sets_mode = true;
28+
}
29+
}
2130

22-
#[must_use]
23-
pub fn with_unicode_sets_mode(self) -> ParserOptions {
24-
ParserOptions { unicode_mode: true, unicode_sets_mode: true, ..self }
31+
ParserOptions { unicode_mode, unicode_sets_mode, ..self }
2532
}
2633
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
mod parser_impl;
2+
mod reader;
3+
mod span_factory;
4+
mod state;
5+
mod unicode;
6+
mod unicode_property;
7+
8+
pub use parser_impl::Parser;
9+
10+
#[cfg(test)]
11+
mod test {
12+
use crate::{Parser, ParserOptions};
13+
use oxc_allocator::Allocator;
14+
15+
fn default() -> ParserOptions {
16+
ParserOptions::default()
17+
}
18+
fn with_unicode_mode() -> ParserOptions {
19+
ParserOptions { unicode_mode: true, ..Default::default() }
20+
}
21+
fn with_unicode_sets_mode() -> ParserOptions {
22+
ParserOptions { unicode_mode: true, unicode_sets_mode: true, ..Default::default() }
23+
}
24+
25+
#[test]
26+
fn should_pass() {
27+
let allocator = Allocator::default();
28+
29+
for (source_text, options) in &[
30+
("", default()),
31+
("a", default()),
32+
("a+", default()),
33+
("a*", default()),
34+
("a?", default()),
35+
("^$^$^$", default()),
36+
("(?=a){1}", default()),
37+
("(?!a){1}", default()),
38+
("a{1}", default()),
39+
("a{1", default()),
40+
("a|{", default()),
41+
("a{", default()),
42+
("a{,", default()),
43+
("a{1,", default()),
44+
("a{1,}", default()),
45+
("a{1,2}", default()),
46+
("x{9007199254740991}", default()),
47+
("x{9007199254740991,9007199254740991}", default()),
48+
("a|b", default()),
49+
("a|b|c", default()),
50+
("a|b+?|c", default()),
51+
("a+b*?c{1}d{2,}e{3,4}?", default()),
52+
(r"^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$", default()),
53+
("a.b..", default()),
54+
(r"\d\D\s\S\w\W", default()),
55+
(r"\x", default()),
56+
(r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{Basic_Emoji}", default()),
57+
(r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{P}", with_unicode_mode()),
58+
(r"^\p{General_Category=cntrl}+$", with_unicode_mode()),
59+
(r"\p{Basic_Emoji}", with_unicode_sets_mode()),
60+
(r"\n\cM\0\x41\u1f60\.\/", default()),
61+
(r"\c0", default()),
62+
(r"\0", default()),
63+
(r"\0", with_unicode_mode()),
64+
(r"\u", default()),
65+
(r"\u{", default()),
66+
(r"\u{}", default()),
67+
(r"\u{0}", default()),
68+
(r"\u{1f600}", default()),
69+
(r"\u{1f600}", with_unicode_mode()),
70+
("(?:abc)", default()),
71+
(r"(?<\u{1d49c}>.)\x1f", default()),
72+
("a]", default()),
73+
("a}", default()),
74+
("]", default()),
75+
("[]", default()),
76+
("[a]", default()),
77+
("[ab]", default()),
78+
("[a-b]", default()),
79+
("[-]", default()),
80+
("[a-]", default()),
81+
("[-a]", default()),
82+
("[-a-]", default()),
83+
(r"[a\-b]", default()),
84+
(r"[-a-b]", default()),
85+
(r"[a-b-]", default()),
86+
(r"[a\-b-]", default()),
87+
(r"[\[\]\-]", default()),
88+
("[a-z0-9]", default()),
89+
("[a-a]", default()),
90+
(r"[\d-\D]", default()),
91+
(r"^([\ud801[\udc28-\udc4f])$", default()),
92+
(r"[a-c]]", default()),
93+
(
94+
r"[ϗϙϛϝϟϡϣϥϧϩϫϭϯ-ϳϵϸϻ-ϼа-џѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎ-ӏӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹӻӽӿԁԃԅԇԉԋԍԏԑԓԕԗԙԛԝԟԡԣա-ևᴀ-ᴫᵢ-ᵷᵹ-ᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕ-ẝẟạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹỻỽỿ-ἇἐ-ἕἠ-ἧἰ-ἷὀ-ὅὐ-ὗὠ-ὧὰ]",
95+
default(),
96+
),
97+
(r"[a-z0-9[.\\]]", with_unicode_sets_mode()),
98+
(r"[a&&b&&c]", with_unicode_sets_mode()),
99+
(r"[a--b--c]", with_unicode_sets_mode()),
100+
(r"[[a-z]--b--c]", with_unicode_sets_mode()),
101+
(r"[[[[[[[[[[[[[[[[[[[[[[[[a]]]]]]]]]]]]]]]]]]]]]]]]", with_unicode_sets_mode()),
102+
(r"[\q{}\q{a}\q{bc}\q{d|e|f}\q{|||}]", with_unicode_sets_mode()),
103+
(r"(?<foo>A)\k<foo>", default()),
104+
(r"(?<!a>)\k<a>", default()),
105+
(r"\k", default()),
106+
(r"\k<4>", default()),
107+
(r"\k<a>", default()),
108+
(r"(?<a>)\k<a>", default()),
109+
(r"(?<a>)\k<a>", with_unicode_mode()),
110+
(r"\1", default()),
111+
(r"\1()", default()),
112+
(r"\1()", with_unicode_mode()),
113+
(r"(?<n1>..)(?<n2>..)", default()),
114+
// TODO: ES2025 Duplicate named capturing groups
115+
// (r"(?<n1>..)|(?<n1>..)", default()),
116+
// (r"(?<year>[0-9]{4})-[0-9]{2}|[0-9]{2}-(?<year>[0-9]{4})", default()),
117+
// (r"(?:(?<a>x)|(?<a>y))\k<a>", default()),
118+
] {
119+
let res = Parser::new(&allocator, source_text, *options).parse();
120+
if let Err(err) = res {
121+
panic!("Failed to parse {source_text} with {options:?}\n💥 {err}");
122+
}
123+
}
124+
}
125+
126+
#[test]
127+
fn should_fail() {
128+
let allocator = Allocator::default();
129+
130+
for (source_text, options) in &[
131+
("a)", default()),
132+
(r"a\", default()),
133+
("a]", with_unicode_mode()),
134+
("a}", with_unicode_mode()),
135+
("a|+", default()),
136+
("a|{", with_unicode_mode()),
137+
("a{", with_unicode_mode()),
138+
("a{1", with_unicode_mode()),
139+
("a{1,", with_unicode_mode()),
140+
("a{,", with_unicode_mode()),
141+
("x{9007199254740992}", default()),
142+
("x{9007199254740991,9007199254740992}", default()),
143+
("x{99999999999999999999999999999999999999999999999999}", default()),
144+
(r"\99999999999999999999999999999999999999999999999999", default()),
145+
(r"\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}", with_unicode_mode()),
146+
("(?=a", default()),
147+
("(?<!a", default()),
148+
(r"\c0", with_unicode_mode()),
149+
(r"\xa", with_unicode_mode()),
150+
(r"a\u", with_unicode_mode()),
151+
(r"\p{Emoji_Presentation", with_unicode_mode()),
152+
(r"\p{Script=", with_unicode_mode()),
153+
(r"\ka", with_unicode_mode()),
154+
(r"\k", with_unicode_mode()),
155+
(r"\k<", with_unicode_mode()),
156+
(r"\k<>", with_unicode_mode()),
157+
(r"\k<4>", with_unicode_mode()),
158+
(r"\k<a", with_unicode_mode()),
159+
(r"\1", with_unicode_mode()),
160+
(r"\k<a>", with_unicode_mode()),
161+
("a(?:", default()),
162+
("(a", default()),
163+
("(?<a>", default()),
164+
(r"(?<a\>.)", default()),
165+
(r"(?<a\>.)", with_unicode_mode()),
166+
(r"(?<\>.)", default()),
167+
(r"(?<\>.)", with_unicode_mode()),
168+
("(?)", default()),
169+
("(?=a){1}", with_unicode_mode()),
170+
("(?!a){1}", with_unicode_mode()),
171+
(r"[\d-\D]", with_unicode_mode()),
172+
("[", default()),
173+
("[", with_unicode_sets_mode()),
174+
("[[", with_unicode_sets_mode()),
175+
("[[]", with_unicode_sets_mode()),
176+
("[z-a]", default()),
177+
(r"[a-c]]", with_unicode_mode()),
178+
(
179+
r"^([a-zªµºß-öø-ÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ-ĸĺļľŀłńņň-ʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżž-ƀƃƅƈƌ-ƍƒƕƙ-ƛƞơƣƥƨƪ-ƫƭưƴƶƹ-ƺƽ-ƿdžljnjǎǐǒǔǖǘǚǜ-ǝǟǡǣǥǧǩǫǭǯ-ǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳ-ȹȼȿ-ɀɂɇɉɋɍɏ-ʓʕ-ʯͱͳͷͻ-ͽΐά-ώϐ-ϑϕ-ϗϙϛϝϟϡϣϥϧϩϫϭϯ-ϳϵϸϻ-ϼа-џѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎ-ӏӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹӻӽӿԁԃԅԇԉԋԍԏԑԓԕԗԙԛԝԟԡԣա-ևᴀ-ᴫᵢ-ᵷᵹ-ᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕ-ẝẟạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹỻỽỿ-ἇἐ-ἕἠ-ἧἰ-ἷὀ-ὅὐ-ὗὠ-ὧὰ-ώᾀ-ᾇᾐ-ᾗᾠ-ᾧᾰ-ᾴᾶ-ᾷιῂ-ῄῆ-ῇῐ-ΐῖ-ῗῠ-ῧῲ-ῴῶ-ῷⁱⁿℊℎ-ℏℓℯℴℹℼ-ℽⅆ-ⅉⅎↄⰰ-ⱞⱡⱥ-ⱦⱨⱪⱬⱱⱳ-ⱴⱶ-ⱼⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣ-ⳤⴀ-ⴥꙁꙃꙅꙇꙉꙋꙍꙏꙑꙓꙕꙗꙙꙛꙝꙟꙣꙥꙧꙩꙫꙭꚁꚃꚅꚇꚉꚋꚍꚏꚑꚓꚕꚗꜣꜥꜧꜩꜫꜭꜯ-ꜱꜳꜵꜷꜹꜻꜽꜿꝁꝃꝅꝇꝉꝋꝍꝏꝑꝓꝕꝗꝙꝛꝝꝟꝡꝣꝥꝧꝩꝫꝭꝯꝱ-ꝸꝺꝼꝿꞁꞃꞅꞇꞌff-stﬓ-ﬗa-z]|\ud801[\udc28-\udc4f]|\ud835[\udc1a-\udc33\udc4e-\udc54\udc56-\udc67\udc82-\udc9b\udcb6-\udcb9\udcbb\udcbd-\udcc3\udcc5-\udccf\udcea-\udd03\udd1e-\udd37\udd52-\udd6b\udd86-\udd9f\uddba-\uddd3\uddee-\ude07\ude22-\ude3b\ude56-\ude6f\ude8a-\udea5\udec2-\udeda\udedc-\udee1\udefc-\udf14\udf16-\udf1b\udf36-\udf4e\udf50-\udf55\udf70-\udf88\udf8a-\udf8f\udfaa-\udfc2\udfc4-\udfc9\udfcb])$",
180+
default(),
181+
),
182+
(r"[[\d-\D]]", with_unicode_sets_mode()),
183+
(r"[a&&b--c]", with_unicode_sets_mode()),
184+
(r"[a--b&&c]", with_unicode_sets_mode()),
185+
(r"[\q{]", with_unicode_sets_mode()),
186+
(r"[\q{\a}]", with_unicode_sets_mode()),
187+
// TODO: ES2025 Duplicate named capturing groups
188+
(r"(?<n>..)|(?<n>..)", default()), // This will be valid
189+
// (r"(?<a>|(?<a>))", default()), // Nested, still invalid
190+
] {
191+
assert!(
192+
Parser::new(&allocator, source_text, *options).parse().is_err(),
193+
"{source_text} should fail to parse with {options:?}!"
194+
);
195+
}
196+
}
197+
198+
#[test]
199+
fn should_fail_early_errors() {
200+
let allocator = Allocator::default();
201+
202+
for (source_text, options, is_err) in &[
203+
// No tests for 4,294,967,295 left parens
204+
(r"(?<n>..)(?<n>..)", default(), true),
205+
(r"a{2,1}", default(), true),
206+
(r"(?<a>)\k<n>", default(), true),
207+
(r"()\2", with_unicode_mode(), true),
208+
(r"[a-\d]", with_unicode_mode(), true),
209+
(r"[\d-z]", with_unicode_mode(), true),
210+
(r"[\d-\d]", with_unicode_mode(), true),
211+
(r"[z-a]", default(), true),
212+
(r"\u{110000}", with_unicode_mode(), true),
213+
(r"(?<\uD800\uDBFF>)", default(), true),
214+
(r"\u{0}\u{110000}", with_unicode_mode(), true),
215+
(r"(?<a\uD800\uDBFF>)", default(), true),
216+
(r"\p{Foo=Bar}", with_unicode_mode(), true),
217+
(r"\p{Foo}", with_unicode_mode(), true),
218+
(r"\p{Basic_Emoji}", with_unicode_mode(), true),
219+
(r"\P{Basic_Emoji}", with_unicode_sets_mode(), true),
220+
(r"[^\p{Basic_Emoji}]", with_unicode_sets_mode(), true),
221+
(r"[[^\p{Basic_Emoji}]]", with_unicode_sets_mode(), true),
222+
(r"[^\q{}]", with_unicode_sets_mode(), true),
223+
(r"[[^\q{}]]", with_unicode_sets_mode(), true),
224+
(r"[[^\q{ng}]]", with_unicode_sets_mode(), true),
225+
(r"[[^\q{a|}]]", with_unicode_sets_mode(), true),
226+
(r"[[^\q{ng}\q{o|k}]]", with_unicode_sets_mode(), true),
227+
(r"[[^\q{o|k}\q{ng}\q{o|k}]]", with_unicode_sets_mode(), true),
228+
(r"[[^\q{o|k}\q{o|k}\q{ng}]]", with_unicode_sets_mode(), true),
229+
(r"[[^\q{}&&\q{ng}]]", with_unicode_sets_mode(), true),
230+
(r"[[^\q{ng}&&\q{o|k}]]", with_unicode_sets_mode(), false),
231+
(r"[[^\q{ng}&&\q{o|k}&&\q{ng}]]", with_unicode_sets_mode(), false),
232+
(r"[[^\q{ng}--\q{o|k}]]", with_unicode_sets_mode(), true),
233+
(r"[[^\q{o|k}--\q{ng}]]", with_unicode_sets_mode(), false),
234+
(r"[[z-a]]", with_unicode_sets_mode(), true),
235+
(r"[[[[[^[[[[\q{ng}]]]]]]]]]", with_unicode_sets_mode(), true),
236+
(r"[^[[[[[[[[[[[[[[[[\q{ng}]]]]]]]]]]]]]]]]]", with_unicode_sets_mode(), true),
237+
] {
238+
assert_eq!(
239+
Parser::new(&allocator, source_text, *options).parse().is_err(),
240+
*is_err,
241+
"{source_text} should early error with {options:?}!"
242+
);
243+
}
244+
}
245+
246+
#[test]
247+
fn should_handle_empty() {
248+
let allocator = Allocator::default();
249+
let pattern = Parser::new(&allocator, "", default()).parse().unwrap();
250+
251+
assert_eq!(pattern.body.body[0].body.len(), 1);
252+
}
253+
254+
#[test]
255+
fn should_handle_unicode() {
256+
let allocator = Allocator::default();
257+
let source_text = "このEmoji🥹の数が変わる";
258+
259+
for (options, expected) in
260+
&[(default(), 15), (with_unicode_mode(), 14), (with_unicode_sets_mode(), 14)]
261+
{
262+
let pattern = Parser::new(&allocator, source_text, *options).parse().unwrap();
263+
assert_eq!(pattern.body.body[0].body.len(), *expected);
264+
}
265+
}
266+
}

‎crates/oxc_regular_expression/src/body_parser/parser.rs renamed to ‎crates/oxc_regular_expression/src/parser/parser_impl.rs

+7-9
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,21 @@ use oxc_diagnostics::Result;
33
use oxc_span::Atom as SpanAtom;
44

55
use crate::{
6-
ast,
7-
body_parser::{reader::Reader, state::State, unicode, unicode_property},
8-
diagnostics,
6+
ast, diagnostics,
97
options::ParserOptions,
10-
span_factory::SpanFactory,
8+
parser::{reader::Reader, span_factory::SpanFactory, state::State, unicode, unicode_property},
119
surrogate_pair,
1210
};
1311

14-
pub struct PatternParser<'a> {
12+
pub struct Parser<'a> {
1513
allocator: &'a Allocator,
1614
source_text: &'a str,
1715
span_factory: SpanFactory,
1816
reader: Reader<'a>,
1917
state: State<'a>,
2018
}
2119

22-
impl<'a> PatternParser<'a> {
20+
impl<'a> Parser<'a> {
2321
pub fn new(allocator: &'a Allocator, source_text: &'a str, options: ParserOptions) -> Self {
2422
// `RegExp` can not be empty.
2523
// - Literal `//` means just a single line comment
@@ -35,7 +33,7 @@ impl<'a> PatternParser<'a> {
3533
}
3634
}
3735

38-
pub fn parse(&mut self) -> Result<ast::Pattern<'a>> {
36+
pub fn parse(mut self) -> Result<ast::Pattern<'a>> {
3937
// Pre parse whole pattern to collect:
4038
// - the number of (named|unnamed) capturing groups
4139
// - For `\1` in `\1()` to be handled as indexed reference
@@ -757,7 +755,7 @@ impl<'a> PatternParser<'a> {
757755
let (kind, body) = self.parse_class_contents()?;
758756

759757
if self.reader.eat(']') {
760-
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);
758+
let strings = Parser::may_contain_strings_in_class_contents(&kind, &body);
761759

762760
// [SS:EE] CharacterClass :: [^ ClassContents ]
763761
// It is a Syntax Error if MayContainStrings of the ClassContents is true.
@@ -1317,7 +1315,7 @@ impl<'a> PatternParser<'a> {
13171315
let (kind, body) = self.parse_class_contents()?;
13181316

13191317
if self.reader.eat(']') {
1320-
let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body);
1318+
let strings = Parser::may_contain_strings_in_class_contents(&kind, &body);
13211319

13221320
// [SS:EE] NestedClass :: [^ ClassContents ]
13231321
// It is a Syntax Error if MayContainStrings of the ClassContents is true.

‎crates/oxc_regular_expression/src/body_parser/state.rs renamed to ‎crates/oxc_regular_expression/src/parser/state.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use rustc_hash::FxHashSet;
22

3-
use crate::body_parser::reader::Reader;
3+
use crate::parser::reader::Reader;
44

55
/// Currently all of properties are read only from outside of this module.
66
/// Even inside of this module, it is not changed after initialized.

‎crates/oxc_transformer/src/regexp/mod.rs

+6-8
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,10 @@ fn try_parse_pattern<'a>(
241241
flags: RegExpFlags,
242242
ctx: &mut TraverseCtx<'a>,
243243
) -> Result<Pattern<'a>> {
244-
use oxc_regular_expression::{ParserOptions, PatternParser};
245-
246-
let options = ParserOptions {
247-
span_offset: span.start + 1, // exclude `/`
248-
unicode_mode: flags.contains(RegExpFlags::U) || flags.contains(RegExpFlags::V),
249-
unicode_sets_mode: flags.contains(RegExpFlags::V),
250-
};
251-
PatternParser::new(ctx.ast.allocator, raw, options).parse()
244+
use oxc_regular_expression::{Parser, ParserOptions};
245+
246+
let options = ParserOptions::default()
247+
.with_span_offset(span.start + 1) // exclude `/`
248+
.with_flags(&flags.to_string());
249+
Parser::new(ctx.ast.allocator, raw, options).parse()
252250
}

‎tasks/coverage/src/driver.rs

+5-12
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,12 @@ use std::{ops::ControlFlow, path::PathBuf};
22

33
use oxc::{
44
allocator::Allocator,
5-
ast::{
6-
ast::{Program, RegExpFlags},
7-
Trivias,
8-
},
5+
ast::{ast::Program, Trivias},
96
codegen::CodegenOptions,
107
diagnostics::OxcDiagnostic,
118
minifier::CompressOptions,
129
parser::{ParseOptions, ParserReturn},
13-
regular_expression::{ParserOptions, PatternParser},
10+
regular_expression::{Parser, ParserOptions},
1411
semantic::{
1512
post_transform_checker::{check_semantic_after_transform, check_semantic_ids},
1613
Semantic, SemanticBuilderReturn,
@@ -166,15 +163,11 @@ impl Driver {
166163
continue;
167164
};
168165
let printed1 = pattern.to_string();
169-
let flags = literal.regex.flags;
170-
let printed2 = match PatternParser::new(
166+
let flags = literal.regex.flags.to_string();
167+
let printed2 = match Parser::new(
171168
&allocator,
172169
&printed1,
173-
ParserOptions {
174-
span_offset: 0,
175-
unicode_mode: flags.contains(RegExpFlags::U) || flags.contains(RegExpFlags::V),
176-
unicode_sets_mode: flags.contains(RegExpFlags::V),
177-
},
170+
ParserOptions::default().with_flags(&flags),
178171
)
179172
.parse()
180173
{

0 commit comments

Comments
 (0)
Please sign in to comment.