|
1 |
| -use lazy_static::lazy_static; |
| 1 | +use oxc_allocator::Allocator; |
2 | 2 | use oxc_ast::{
|
3 |
| - ast::{Argument, RegExpFlags, RegExpPattern}, |
| 3 | + ast::{Argument, RegExpFlags}, |
4 | 4 | AstKind,
|
5 | 5 | };
|
6 | 6 | use oxc_diagnostics::OxcDiagnostic;
|
7 | 7 | use oxc_macros::declare_oxc_lint;
|
| 8 | +use oxc_regular_expression::{ |
| 9 | + ast::{Character, Pattern}, |
| 10 | + visit::Visit, |
| 11 | + Parser, ParserOptions, |
| 12 | +}; |
8 | 13 | use oxc_span::{GetSpan, Span};
|
9 |
| -use regex::{Matches, Regex}; |
10 | 14 |
|
11 | 15 | use crate::{ast_util::extract_regex_flags, context::LintContext, rule::Rule, AstNode};
|
12 | 16 |
|
@@ -64,196 +68,123 @@ declare_oxc_lint!(
|
64 | 68 |
|
65 | 69 | impl Rule for NoControlRegex {
|
66 | 70 | fn run<'a>(&self, node: &AstNode<'a>, context: &LintContext<'a>) {
|
67 |
| - if let Some(RegexPatternData { pattern, flags, span }) = regex_pattern(node) { |
68 |
| - let mut violations: Vec<&str> = Vec::new(); |
69 |
| - let pattern = pattern.as_ref(); |
70 |
| - let pattern_text = pattern.source_text(context.source_text()); |
71 |
| - for matched_ctl_pattern in control_patterns(pattern_text.as_ref()) { |
72 |
| - let ctl = matched_ctl_pattern.as_str(); |
| 71 | + match node.kind() { |
| 72 | + // regex literal |
| 73 | + AstKind::RegExpLiteral(reg) => { |
| 74 | + let Some(pattern) = reg.regex.pattern.as_pattern() else { |
| 75 | + return; |
| 76 | + }; |
73 | 77 |
|
74 |
| - // check for an even number of backslashes, since these will |
75 |
| - // prevent the pattern from being a control sequence |
76 |
| - if ctl.starts_with('\\') && matched_ctl_pattern.start() > 0 { |
77 |
| - let pattern_chars: Vec<char> = pattern_text.chars().collect(); // ew |
| 78 | + check_pattern(context, pattern, reg.span); |
| 79 | + } |
78 | 80 |
|
79 |
| - // Convert byte index to char index |
80 |
| - let byte_start = matched_ctl_pattern.start(); |
81 |
| - let char_start = pattern_text[..byte_start].chars().count(); |
| 81 | + // new RegExp() |
| 82 | + AstKind::NewExpression(expr) => { |
| 83 | + // constructor is RegExp, |
| 84 | + if expr.callee.is_specific_id("RegExp") |
| 85 | + // which is provided at least 1 parameter, |
| 86 | + && expr.arguments.len() > 0 |
| 87 | + { |
| 88 | + // where the first one is a string literal |
| 89 | + // note: improvements required for strings used via identifier |
| 90 | + // references |
| 91 | + if let Argument::StringLiteral(pattern) = &expr.arguments[0] { |
| 92 | + // get pattern from arguments. Missing or non-string arguments |
| 93 | + // will be runtime errors, but are not covered by this rule. |
| 94 | + let alloc = Allocator::default(); |
| 95 | + let pattern_with_slashes = format!("/{}/", &pattern.value); |
| 96 | + let flags = extract_regex_flags(&expr.arguments); |
| 97 | + let parser = Parser::new( |
| 98 | + &alloc, |
| 99 | + pattern_with_slashes.as_str(), |
| 100 | + ParserOptions { |
| 101 | + span_offset: expr |
| 102 | + .arguments |
| 103 | + .first() |
| 104 | + .map_or(0, |arg| arg.span().start), |
| 105 | + unicode_mode: flags |
| 106 | + .is_some_and(|flags| flags.contains(RegExpFlags::U)), |
| 107 | + unicode_sets_mode: flags |
| 108 | + .is_some_and(|flags| flags.contains(RegExpFlags::V)), |
| 109 | + }, |
| 110 | + ); |
82 | 111 |
|
83 |
| - let mut first_backslash = char_start; |
84 |
| - while first_backslash > 0 && pattern_chars[first_backslash] == '\\' { |
85 |
| - first_backslash -= 1; |
86 |
| - } |
| 112 | + let Some(pattern) = parser.parse().ok().map(|pattern| pattern.pattern) |
| 113 | + else { |
| 114 | + return; |
| 115 | + }; |
87 | 116 |
|
88 |
| - let mut num_slashes = char_start - first_backslash; |
89 |
| - if first_backslash == 0 && pattern_chars[first_backslash] == '\\' { |
90 |
| - num_slashes += 1; |
91 |
| - } |
92 |
| - // even # of slashes |
93 |
| - if num_slashes % 2 == 0 { |
94 |
| - continue; |
| 117 | + check_pattern(context, &pattern, expr.span); |
95 | 118 | }
|
96 | 119 | }
|
| 120 | + } |
97 | 121 |
|
98 |
| - if ctl.starts_with(r"\x") || ctl.starts_with(r"\u") { |
99 |
| - let mut numeric_part = &ctl[2..]; |
| 122 | + // RegExp() |
| 123 | + AstKind::CallExpression(expr) => { |
| 124 | + // constructor is RegExp, |
| 125 | + if expr.callee.is_specific_id("RegExp") |
| 126 | + // which is provided at least 1 parameter, |
| 127 | + && expr.arguments.len() > 0 |
| 128 | + { |
| 129 | + // where the first one is a string literal |
| 130 | + // note: improvements required for strings used via identifier |
| 131 | + // references |
| 132 | + if let Argument::StringLiteral(pattern) = &expr.arguments[0] { |
| 133 | + // get pattern from arguments. Missing or non-string arguments |
| 134 | + // will be runtime errors, but are not covered by this rule. |
| 135 | + let alloc = Allocator::default(); |
| 136 | + let pattern_with_slashes = format!("/{}/", &pattern.value); |
| 137 | + let flags = extract_regex_flags(&expr.arguments); |
| 138 | + let parser = Parser::new( |
| 139 | + &alloc, |
| 140 | + pattern_with_slashes.as_str(), |
| 141 | + ParserOptions { |
| 142 | + span_offset: expr |
| 143 | + .arguments |
| 144 | + .first() |
| 145 | + .map_or(0, |arg| arg.span().start), |
| 146 | + unicode_mode: flags |
| 147 | + .is_some_and(|flags| flags.contains(RegExpFlags::U)), |
| 148 | + unicode_sets_mode: flags |
| 149 | + .is_some_and(|flags| flags.contains(RegExpFlags::V)), |
| 150 | + }, |
| 151 | + ); |
100 | 152 |
|
101 |
| - // extract numeric part from \u{00} |
102 |
| - if numeric_part.starts_with('{') { |
103 |
| - let has_unicode_flag = match flags { |
104 |
| - Some(flags) if flags.contains(RegExpFlags::U) => true, |
105 |
| - _ => { |
106 |
| - continue; |
107 |
| - } |
| 153 | + let Some(pattern) = parser.parse().ok().map(|pattern| pattern.pattern) |
| 154 | + else { |
| 155 | + return; |
108 | 156 | };
|
109 | 157 |
|
110 |
| - // 1. Unicode control pattern is missing a curly brace |
111 |
| - // and is therefore invalid. (note: we may want to |
112 |
| - // report this?) |
113 |
| - // 2. Unicode flag is missing, which is needed for |
114 |
| - // interpreting \u{`nn`} as a unicode character |
115 |
| - if !has_unicode_flag || !numeric_part.ends_with('}') { |
116 |
| - continue; |
117 |
| - } |
118 |
| - |
119 |
| - numeric_part = &numeric_part[1..numeric_part.len() - 1]; |
120 |
| - } |
121 |
| - |
122 |
| - match u32::from_str_radix(numeric_part, 16) { |
123 |
| - Err(_) => continue, |
124 |
| - Ok(as_num) if as_num > 0x1f => continue, |
125 |
| - Ok(_) => { /* noop */ } |
| 158 | + check_pattern(context, &pattern, expr.span); |
126 | 159 | }
|
127 | 160 | }
|
128 |
| - |
129 |
| - violations.push(ctl); |
130 | 161 | }
|
131 |
| - |
132 |
| - if !violations.is_empty() { |
133 |
| - let violations = violations.join(", "); |
134 |
| - context.diagnostic(no_control_regex_diagnostic(&violations, span)); |
135 |
| - } |
136 |
| - } |
| 162 | + _ => {} |
| 163 | + }; |
137 | 164 | }
|
138 | 165 | }
|
139 | 166 |
|
140 |
| -/// Since we don't implement `ToOwned` trait. |
141 |
| -enum PatRef<'a> { |
142 |
| - Owned(RegExpPattern<'a>), |
143 |
| - Borrowed(&'a RegExpPattern<'a>), |
144 |
| -} |
| 167 | +fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) { |
| 168 | + let mut finder = ControlCharacterFinder { control_chars: Vec::new() }; |
| 169 | + finder.visit_pattern(pattern); |
145 | 170 |
|
146 |
| -impl<'a> AsRef<RegExpPattern<'a>> for PatRef<'a> { |
147 |
| - fn as_ref(&self) -> &RegExpPattern<'a> { |
148 |
| - match self { |
149 |
| - Self::Owned(pat) => pat, |
150 |
| - Self::Borrowed(pat) => pat, |
151 |
| - } |
| 171 | + if !finder.control_chars.is_empty() { |
| 172 | + let violations = finder.control_chars.join(", "); |
| 173 | + context.diagnostic(no_control_regex_diagnostic(&violations, span)); |
152 | 174 | }
|
153 | 175 | }
|
154 | 176 |
|
155 |
| -struct RegexPatternData<'a> { |
156 |
| - /// A regex pattern, either from a literal (`/foo/`) a RegExp constructor |
157 |
| - /// (`new RegExp("foo")`), or a RegExp function call (`RegExp("foo")) |
158 |
| - pattern: PatRef<'a>, |
159 |
| - /// Regex flags, if found. It's possible for this to be `Some` but have |
160 |
| - /// no flags. |
161 |
| - /// |
162 |
| - /// Note that flags are represented by a `u8` and therefore safely clonable |
163 |
| - /// with low performance overhead. |
164 |
| - flags: Option<RegExpFlags>, |
165 |
| - /// The pattern's span. For [`oxc_ast::ast::Expression::NewExpression`]s |
166 |
| - /// and [`oxc_ast::ast::Expression::CallExpression`]s, |
167 |
| - /// this will match the entire new/call expression. |
168 |
| - /// |
169 |
| - /// Note that spans are 8 bytes and safely clonable with low performance overhead |
170 |
| - span: Span, |
| 177 | +struct ControlCharacterFinder { |
| 178 | + control_chars: Vec<String>, |
171 | 179 | }
|
172 | 180 |
|
173 |
| -/// Returns the regex pattern inside a node, if it's applicable. |
174 |
| -/// |
175 |
| -/// e.g.: |
176 |
| -/// * /foo/ -> "foo" |
177 |
| -/// * new RegExp("foo") -> foo |
178 |
| -/// |
179 |
| -/// note: [`RegExpFlags`] and [`Span`]s are both tiny and cloneable. |
180 |
| -fn regex_pattern<'a>(node: &AstNode<'a>) -> Option<RegexPatternData<'a>> { |
181 |
| - let kind = node.kind(); |
182 |
| - match kind { |
183 |
| - // regex literal |
184 |
| - AstKind::RegExpLiteral(reg) => Some(RegexPatternData { |
185 |
| - pattern: PatRef::Borrowed(®.regex.pattern), |
186 |
| - flags: Some(reg.regex.flags), |
187 |
| - span: reg.span, |
188 |
| - }), |
189 |
| - |
190 |
| - // new RegExp() |
191 |
| - AstKind::NewExpression(expr) => { |
192 |
| - // constructor is RegExp, |
193 |
| - if expr.callee.is_specific_id("RegExp") |
194 |
| - // which is provided at least 1 parameter, |
195 |
| - && expr.arguments.len() > 0 |
196 |
| - { |
197 |
| - // where the first one is a string literal |
198 |
| - // note: improvements required for strings used via identifier |
199 |
| - // references |
200 |
| - if let Argument::StringLiteral(pattern) = &expr.arguments[0] { |
201 |
| - // get pattern from arguments. Missing or non-string arguments |
202 |
| - // will be runtime errors, but are not covered by this rule. |
203 |
| - // Note that we're intentionally reporting the entire "new |
204 |
| - // RegExp("pat") expression, not just "pat". |
205 |
| - Some(RegexPatternData { |
206 |
| - pattern: PatRef::Owned(RegExpPattern::Raw(pattern.value.as_ref())), |
207 |
| - flags: extract_regex_flags(&expr.arguments), |
208 |
| - span: kind.span(), |
209 |
| - }) |
210 |
| - } else { |
211 |
| - None |
212 |
| - } |
213 |
| - } else { |
214 |
| - None |
215 |
| - } |
216 |
| - } |
217 |
| - |
218 |
| - // RegExp() |
219 |
| - AstKind::CallExpression(expr) => { |
220 |
| - // constructor is RegExp, |
221 |
| - if expr.callee.is_specific_id("RegExp") |
222 |
| - // which is provided at least 1 parameter, |
223 |
| - && expr.arguments.len() > 0 |
224 |
| - { |
225 |
| - // where the first one is a string literal |
226 |
| - // note: improvements required for strings used via identifier |
227 |
| - // references |
228 |
| - if let Argument::StringLiteral(pattern) = &expr.arguments[0] { |
229 |
| - // get pattern from arguments. Missing or non-string arguments |
230 |
| - // will be runtime errors, but are not covered by this rule. |
231 |
| - // Note that we're intentionally reporting the entire "new |
232 |
| - // RegExp("pat") expression, not just "pat". |
233 |
| - Some(RegexPatternData { |
234 |
| - pattern: PatRef::Owned(RegExpPattern::Raw(pattern.value.as_ref())), |
235 |
| - flags: extract_regex_flags(&expr.arguments), |
236 |
| - span: kind.span(), |
237 |
| - }) |
238 |
| - } else { |
239 |
| - None |
240 |
| - } |
241 |
| - } else { |
242 |
| - None |
243 |
| - } |
| 181 | +impl<'a> Visit<'a> for ControlCharacterFinder { |
| 182 | + fn visit_character(&mut self, ch: &Character) { |
| 183 | + // Control characters are in the range 0x00 to 0x1F |
| 184 | + if ch.value <= 0x1F { |
| 185 | + self.control_chars.push(ch.to_string()); |
244 | 186 | }
|
245 |
| - _ => None, |
246 |
| - } |
247 |
| -} |
248 |
| - |
249 |
| -fn control_patterns(pattern: &str) -> Matches<'static, '_> { |
250 |
| - lazy_static! { |
251 |
| - static ref CTL_PAT: Regex = Regex::new( |
252 |
| - r"([\x00-\x1f]|(?:\\x\w{2})|(?:\\u\w{4})|(?:\\u\{\w{1,4}\}))" |
253 |
| - // r"((?:\\x\w{2}))" |
254 |
| - ).unwrap(); |
255 | 187 | }
|
256 |
| - CTL_PAT.find_iter(pattern) |
257 | 188 | }
|
258 | 189 |
|
259 | 190 | #[cfg(test)]
|
|
0 commit comments