diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index 1a3df56b5..0c2a35265 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -124,6 +124,7 @@ pub struct ParserBuilder { ignore_whitespace: bool, nest_limit: u32, octal: bool, + empty_min_range: bool, } impl Default for ParserBuilder { @@ -139,6 +140,7 @@ impl ParserBuilder { ignore_whitespace: false, nest_limit: 250, octal: false, + empty_min_range: false, } } @@ -149,6 +151,7 @@ impl ParserBuilder { capture_index: Cell::new(0), nest_limit: self.nest_limit, octal: self.octal, + empty_min_range: self.empty_min_range, initial_ignore_whitespace: self.ignore_whitespace, ignore_whitespace: Cell::new(self.ignore_whitespace), comments: RefCell::new(vec![]), @@ -221,6 +224,18 @@ impl ParserBuilder { self.ignore_whitespace = yes; self } + + /// Allow using `{,n}` as an equivalent to `{0,n}`. + /// + /// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`. + /// Most regular expression engines don't support the `{,n}` syntax, but + /// some others do it, namely Python's `re` library. + /// + /// This is disabled by default. + pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder { + self.empty_min_range = yes; + self + } } /// A regular expression parser. @@ -246,6 +261,9 @@ pub struct Parser { /// The initial setting for `ignore_whitespace` as provided by /// `ParserBuilder`. It is used when resetting the parser's state. initial_ignore_whitespace: bool, + /// Whether the parser supports `{,n}` repetitions as an equivalent to + /// `{0,n}.` + empty_min_range: bool, /// Whether whitespace should be ignored. When enabled, comments are /// also permitted. ignore_whitespace: Cell, @@ -1114,15 +1132,14 @@ impl<'s, P: Borrow> ParserI<'s, P> { self.parse_decimal(), ast::ErrorKind::DecimalEmpty, ast::ErrorKind::RepetitionCountDecimalEmpty, - )?; - let mut range = ast::RepetitionRange::Exactly(count_start); + ); if self.is_eof() { return Err(self.error( Span::new(start, self.pos()), ast::ErrorKind::RepetitionCountUnclosed, )); } - if self.char() == ',' { + let range = if self.char() == ',' { if !self.bump_and_bump_space() { return Err(self.error( Span::new(start, self.pos()), @@ -1130,16 +1147,33 @@ impl<'s, P: Borrow> ParserI<'s, P> { )); } if self.char() != '}' { + let count_start = match count_start { + Ok(c) => c, + Err(err) + if err.kind + == ast::ErrorKind::RepetitionCountDecimalEmpty => + { + if self.parser().empty_min_range { + 0 + } else { + return Err(err); + } + } + err => err?, + }; let count_end = specialize_err( self.parse_decimal(), ast::ErrorKind::DecimalEmpty, ast::ErrorKind::RepetitionCountDecimalEmpty, )?; - range = ast::RepetitionRange::Bounded(count_start, count_end); + ast::RepetitionRange::Bounded(count_start, count_end) } else { - range = ast::RepetitionRange::AtLeast(count_start); + ast::RepetitionRange::AtLeast(count_start?) } - } + } else { + ast::RepetitionRange::Exactly(count_start?) + }; + if self.is_eof() || self.char() != '}' { return Err(self.error( Span::new(start, self.pos()), @@ -2459,6 +2493,11 @@ mod tests { ParserI::new(parser, pattern) } + fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> { + let parser = ParserBuilder::new().empty_min_range(true).build(); + ParserI::new(parser, pattern) + } + fn parser_nest_limit( pattern: &str, nest_limit: u32, @@ -3376,6 +3415,20 @@ bar ast: Box::new(lit('a', 0)), })) ); + assert_eq!( + parser_empty_min_range(r"a{,9}").parse(), + Ok(Ast::repetition(ast::Repetition { + span: span(0..5), + op: ast::RepetitionOp { + span: span(1..5), + kind: ast::RepetitionKind::Range( + ast::RepetitionRange::Bounded(0, 9) + ), + }, + greedy: true, + ast: Box::new(lit('a', 0)), + })) + ); assert_eq!( parser_ignore_whitespace(r"a{5,9} ?").parse(), Ok(Ast::repetition(ast::Repetition { @@ -4596,8 +4649,8 @@ bar assert_eq!( parser(r"\b{ ").parse().unwrap_err(), TestError { - span: span(4..4), - kind: ast::ErrorKind::RepetitionCountDecimalEmpty, + span: span(2..4), + kind: ast::ErrorKind::RepetitionCountUnclosed, } ); // In this case, we got some valid chars that makes it look like the