diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs index 2d2172957..6e02fd349 100644 --- a/regex-automata/src/nfa/thompson/compiler.rs +++ b/regex-automata/src/nfa/thompson/compiler.rs @@ -1663,7 +1663,7 @@ impl Compiler { capture_index: u32, name: Option<&str>, ) -> Result { - let name = name.map(|n| Arc::from(n)); + let name = name.map(Arc::from); self.builder.borrow_mut().add_capture_start( StateID::ZERO, capture_index, diff --git a/regex-automata/src/nfa/thompson/nfa.rs b/regex-automata/src/nfa/thompson/nfa.rs index 1f57f8ebd..86dd323c1 100644 --- a/regex-automata/src/nfa/thompson/nfa.rs +++ b/regex-automata/src/nfa/thompson/nfa.rs @@ -1471,13 +1471,13 @@ impl fmt::Debug for Inner { } let pattern_len = self.start_pattern.len(); if pattern_len > 1 { - writeln!(f, "")?; + writeln!(f)?; for pid in 0..pattern_len { let sid = self.start_pattern[pid]; writeln!(f, "START({:06?}): {:?}", pid, sid.as_usize())?; } } - writeln!(f, "")?; + writeln!(f)?; writeln!( f, "transition equivalence classes: {:?}", @@ -1819,7 +1819,7 @@ impl SparseTransitions { &self, unit: alphabet::Unit, ) -> Option { - unit.as_u8().map_or(None, |byte| self.matches_byte(byte)) + unit.as_u8().and_then(|byte| self.matches_byte(byte)) } /// This follows the matching transition for a particular byte. @@ -1909,7 +1909,7 @@ impl DenseTransitions { &self, unit: alphabet::Unit, ) -> Option { - unit.as_u8().map_or(None, |byte| self.matches_byte(byte)) + unit.as_u8().and_then(|byte| self.matches_byte(byte)) } /// This follows the matching transition for a particular byte. diff --git a/regex-automata/src/nfa/thompson/pikevm.rs b/regex-automata/src/nfa/thompson/pikevm.rs index 0128c151a..20934e8dd 100644 --- a/regex-automata/src/nfa/thompson/pikevm.rs +++ b/regex-automata/src/nfa/thompson/pikevm.rs @@ -1290,7 +1290,7 @@ impl PikeVM { // the only thing in 'curr'. So we might as well just skip // ahead until we find something that we know might advance us // forward. - if let Some(ref pre) = pre { + if let Some(pre) = pre { let span = Span::from(at..input.end()); match pre.find(input.haystack(), span) { None => break, @@ -1344,7 +1344,7 @@ impl PikeVM { // search. If we re-computed it at every position, we would be // simulating an unanchored search when we were tasked to perform // an anchored search. - if (!hm.is_some() || allmatches) + if (hm.is_none() || allmatches) && (!anchored || at == input.start()) { // Since we are adding to the 'curr' active states and since diff --git a/regex-automata/src/nfa/thompson/range_trie.rs b/regex-automata/src/nfa/thompson/range_trie.rs index cd77cc150..a2f0994ed 100644 --- a/regex-automata/src/nfa/thompson/range_trie.rs +++ b/regex-automata/src/nfa/thompson/range_trie.rs @@ -235,7 +235,7 @@ impl RangeTrie { /// Clear this range trie such that it is empty. Clearing a range trie /// and reusing it can beneficial because this may reuse allocations. pub fn clear(&mut self) { - self.free.extend(self.states.drain(..)); + self.free.append(&mut self.states); self.add_empty(); // final self.add_empty(); // root } @@ -296,7 +296,7 @@ impl RangeTrie { assert!(!ranges.is_empty()); assert!(ranges.len() <= 4); - let mut stack = mem::replace(&mut self.insert_stack, vec![]); + let mut stack = core::mem::replace(&mut self.insert_stack, vec![]); stack.clear(); stack.push(NextInsert::new(ROOT, ranges)); @@ -866,7 +866,7 @@ impl Split { impl fmt::Debug for RangeTrie { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "")?; + writeln!(f)?; for (i, state) in self.states.iter().enumerate() { let status = if i == FINAL.as_usize() { '*' } else { ' ' }; writeln!(f, "{}{:06}: {:?}", status, i, state)?; diff --git a/regex-automata/src/util/determinize/mod.rs b/regex-automata/src/util/determinize/mod.rs index ba32991d0..09bd3a123 100644 --- a/regex-automata/src/util/determinize/mod.rs +++ b/regex-automata/src/util/determinize/mod.rs @@ -131,7 +131,7 @@ pub(crate) fn next( if !state.look_need().is_empty() { // Add look-ahead assertions that are now true based on the current // input unit. - let mut look_have = state.look_have().clone(); + let mut look_have = state.look_have(); match unit.as_u8() { Some(b'\r') => { if !rev || !state.is_half_crlf() { diff --git a/regex-automata/src/util/determinize/state.rs b/regex-automata/src/util/determinize/state.rs index effa6f44d..f4bee35ad 100644 --- a/regex-automata/src/util/determinize/state.rs +++ b/regex-automata/src/util/determinize/state.rs @@ -115,7 +115,7 @@ pub(crate) struct State(Arc<[u8]>); /// without having to convert it into a State first. impl core::borrow::Borrow<[u8]> for State { fn borrow(&self) -> &[u8] { - &*self.0 + &self.0 } } @@ -177,7 +177,7 @@ impl State { } fn repr(&self) -> Repr<'_> { - Repr(&*self.0) + Repr(&self.0) } } @@ -461,7 +461,7 @@ impl<'a> Repr<'a> { /// If this state is not a match state, then this always returns 0. fn match_len(&self) -> usize { if !self.is_match() { - return 0; + 0 } else if !self.has_pattern_ids() { 1 } else { diff --git a/regex-automata/src/util/interpolate.rs b/regex-automata/src/util/interpolate.rs index f274629df..951646910 100644 --- a/regex-automata/src/util/interpolate.rs +++ b/regex-automata/src/util/interpolate.rs @@ -107,7 +107,7 @@ pub fn string( } // Handle escaping of '$'. if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') { - dst.push_str("$"); + dst.push('$'); replacement = &replacement[2..]; continue; } @@ -115,7 +115,7 @@ pub fn string( let cap_ref = match find_cap_ref(replacement.as_bytes()) { Some(cap_ref) => cap_ref, None => { - dst.push_str("$"); + dst.push('$'); replacement = &replacement[1..]; continue; } @@ -321,10 +321,7 @@ fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option> { /// Returns true if and only if the given byte is allowed in a capture name /// written in non-brace form. fn is_valid_cap_letter(b: u8) -> bool { - match b { - b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, - _ => false, - } + matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_') } #[cfg(test)] diff --git a/regex-automata/src/util/pool.rs b/regex-automata/src/util/pool.rs index d90d4ecff..cbfc18ccf 100644 --- a/regex-automata/src/util/pool.rs +++ b/regex-automata/src/util/pool.rs @@ -678,7 +678,7 @@ mod inner { #[inline] pub(super) fn value(&self) -> &T { match self.value { - Ok(ref v) => &**v, + Ok(ref v) => v, // SAFETY: This is safe because the only way a PoolGuard gets // created for self.value=Err is when the current thread // corresponds to the owning thread, of which there can only @@ -703,7 +703,7 @@ mod inner { #[inline] pub(super) fn value_mut(&mut self) -> &mut T { match self.value { - Ok(ref mut v) => &mut **v, + Ok(ref mut v) => v, // SAFETY: This is safe because the only way a PoolGuard gets // created for self.value=None is when the current thread // corresponds to the owning thread, of which there can only diff --git a/regex-automata/src/util/prefilter/mod.rs b/regex-automata/src/util/prefilter/mod.rs index 51fc92233..4c4a26b8e 100644 --- a/regex-automata/src/util/prefilter/mod.rs +++ b/regex-automata/src/util/prefilter/mod.rs @@ -478,17 +478,17 @@ pub(crate) trait PrefilterI: impl PrefilterI for Arc

{ #[cfg_attr(feature = "perf-inline", inline(always))] fn find(&self, haystack: &[u8], span: Span) -> Option { - (&**self).find(haystack, span) + (**self).find(haystack, span) } #[cfg_attr(feature = "perf-inline", inline(always))] fn prefix(&self, haystack: &[u8], span: Span) -> Option { - (&**self).prefix(haystack, span) + (**self).prefix(haystack, span) } #[cfg_attr(feature = "perf-inline", inline(always))] fn memory_usage(&self) -> usize { - (&**self).memory_usage() + (**self).memory_usage() } #[cfg_attr(feature = "perf-inline", inline(always))] diff --git a/regex-automata/src/util/search.rs b/regex-automata/src/util/search.rs index 39aec522b..72e05f14a 100644 --- a/regex-automata/src/util/search.rs +++ b/regex-automata/src/util/search.rs @@ -1365,16 +1365,17 @@ impl<'a> Iterator for PatternSetIter<'a> { type Item = PatternID; fn next(&mut self) -> Option { - while let Some((index, &yes)) = self.it.next() { + // Only valid 'PatternID' values can be inserted into the set + // and construction of the set panics if the capacity would + // permit storing invalid pattern IDs. Thus, 'yes' is only true + // precisely when 'index' corresponds to a valid 'PatternID'. + self.it.by_ref().find_map(|(index, &yes)| { if yes { - // Only valid 'PatternID' values can be inserted into the set - // and construction of the set panics if the capacity would - // permit storing invalid pattern IDs. Thus, 'yes' is only true - // precisely when 'index' corresponds to a valid 'PatternID'. - return Some(PatternID::new_unchecked(index)); + Some(PatternID::new_unchecked(index)) + } else { + None } - } - None + }) } fn size_hint(&self) -> (usize, Option) { @@ -1689,13 +1690,14 @@ impl Anchored { /// # Ok::<(), Box>(()) /// ``` #[non_exhaustive] -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Default, Debug, Eq, PartialEq)] pub enum MatchKind { /// Report all possible matches. All, /// Report only the leftmost matches. When multiple leftmost matches exist, /// report the match corresponding to the part of the regex that appears /// first in the syntax. + #[default] LeftmostFirst, // There is prior art in RE2 that shows that we should be able to add // LeftmostLongest too. The tricky part of it is supporting ungreedy @@ -1721,12 +1723,6 @@ impl MatchKind { } } -impl Default for MatchKind { - fn default() -> MatchKind { - MatchKind::LeftmostFirst - } -} - /// An error indicating that a search stopped before reporting whether a /// match exists or not. /// diff --git a/regex-automata/src/util/sparse_set.rs b/regex-automata/src/util/sparse_set.rs index cbaa0b6f4..e9dfde1dc 100644 --- a/regex-automata/src/util/sparse_set.rs +++ b/regex-automata/src/util/sparse_set.rs @@ -234,6 +234,6 @@ impl<'a> Iterator for SparseSetIter<'a> { #[cfg_attr(feature = "perf-inline", inline(always))] fn next(&mut self) -> Option { - self.0.next().map(|&id| id) + self.0.next().copied() } } diff --git a/regex-automata/src/util/utf8.rs b/regex-automata/src/util/utf8.rs index 91b27efe0..6c86e8d5f 100644 --- a/regex-automata/src/util/utf8.rs +++ b/regex-automata/src/util/utf8.rs @@ -99,18 +99,13 @@ pub(crate) fn decode_last(bytes: &[u8]) -> Option> { /// `None`. #[cfg_attr(feature = "perf-inline", inline(always))] fn len(byte: u8) -> Option { - if byte <= 0x7F { - return Some(1); - } else if byte & 0b1100_0000 == 0b1000_0000 { - return None; - } else if byte <= 0b1101_1111 { - Some(2) - } else if byte <= 0b1110_1111 { - Some(3) - } else if byte <= 0b1111_0111 { - Some(4) - } else { - None + match byte { + 0b0000_0000..=0b0111_1111 => Some(1), + 0b1000_0000..=0b1011_1111 => None, + 0b1100_0000..=0b1101_1111 => Some(2), + 0b1110_0000..=0b1110_1111 => Some(3), + 0b1111_0000..=0b1111_0111 => Some(4), + _ => None, } } diff --git a/regex-automata/src/util/wire.rs b/regex-automata/src/util/wire.rs index ecf4fd8c0..2af072881 100644 --- a/regex-automata/src/util/wire.rs +++ b/regex-automata/src/util/wire.rs @@ -482,12 +482,8 @@ pub(crate) fn write_label( /// is longer than 255 bytes. (The size restriction exists so that searching /// for a label during deserialization can be done in small bounded space.) pub(crate) fn write_label_len(label: &str) -> usize { - if label.len() > 255 { - panic!("label must not be longer than 255 bytes"); - } - if label.as_bytes().iter().position(|&b| b == 0).is_some() { - panic!("label must not contain NUL bytes"); - } + assert!(label.len() <= 255, "label must not be longer than 255 bytes"); + assert!(label.bytes().all(|b| b != 0), "label must not contain NUL bytes"); let label_len = label.len() + 1; // +1 for the NUL terminator label_len + padding_len(label_len) }