rust-lang · archer884 · Oct 28, 2022 · Oct 28, 2022
diff --git a/src/re_bytes.rs b/src/re_bytes.rs
@@ -318,6 +318,35 @@ impl Regex {
         Split { finder: self.find_iter(text), last: 0 }
     }
 
+    /// Returns an iterator of substrings of `text` separated by a match of the
+    /// regular expression. Differs from the iterator produced by split in that
+    /// split_inclusive leaves the matched part as the terminator of the
+    /// substring.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// # use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"\r?\n").unwrap();
+    /// let text = b"Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
+    /// let v: Vec<&[u8]> = re.split_inclusive(text).collect();
+    /// assert_eq!(v, [
+    ///     &b"Mary had a little lamb\n"[..],
+    ///     &b"little lamb\r\n"[..],
+    ///     &b"little lamb."[..]
+    /// ]);
+    /// # }
+    /// ```
+    pub fn split_inclusive<'r, 't>(
+        &'r self,
+        text: &'t [u8],
+    ) -> SplitInclusive<'r, 't> {
+        SplitInclusive { finder: self.find_iter(text), last: 0 }
+    }
+
     /// Returns an iterator of at most `limit` substrings of `text` delimited
     /// by a match of the regular expression. (A `limit` of `0` will return no
     /// substrings.) Namely, each element of the iterator corresponds to text
@@ -767,6 +796,43 @@ impl<'r, 't> Iterator for Split<'r, 't> {
 
 impl<'r, 't> FusedIterator for Split<'r, 't> {}
 
+/// Yields all substrings delimited by a regular expression match inclusive of
+/// the match.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the byte string being split.
+#[derive(Debug)]
+pub struct SplitInclusive<'r, 't> {
+    finder: Matches<'r, 't>,
+    last: usize,
+}
+
+impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
+    type Item = &'t [u8];
+
+    fn next(&mut self) -> Option<&'t [u8]> {
+        let text = self.finder.0.text();
+        match self.finder.next() {
+            None => {
+                if self.last > text.len() {
+                    None
+                } else {
+                    let s = &text[self.last..];
+                    self.last = text.len() + 1; // Next call will return None
+                    Some(s)
+                }
+            }
+            Some(m) => {
+                let matched = &text[self.last..m.end()];
+                self.last = m.end();
+                Some(matched)
+            }
+        }
+    }
+}
+
+impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}
+
 /// Yields at most `N` substrings delimited by a regular expression match.
 ///
 /// The last substring will be whatever remains after splitting.

diff --git a/src/re_unicode.rs b/src/re_unicode.rs
@@ -371,6 +371,30 @@ impl Regex {
         Split { finder: self.find_iter(text), last: 0 }
     }
 
+    /// Returns an iterator of substrings of `text` delimited by a match of the
+    /// regular expression. Each element of the iterator will include the
+    /// delimiting match if it appears at the beginning of the element.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// # use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"\r?\n").unwrap();
+    /// let text = "Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
+    /// let v: Vec<&str> = re.split_inclusive(text).collect();
+    /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\r\n", "little lamb."]);
+    /// # }
+    /// ```
+    pub fn split_inclusive<'r, 't>(
+        &'r self,
+        text: &'t str,
+    ) -> SplitInclusive<'r, 't> {
+        SplitInclusive { finder: self.find_iter(text), last: 0 }
+    }
+
     /// Returns an iterator of at most `limit` substrings of `text` delimited
     /// by a match of the regular expression. (A `limit` of `0` will return no
     /// substrings.) Namely, each element of the iterator corresponds to text
@@ -809,6 +833,43 @@ impl<'r, 't> Iterator for Split<'r, 't> {
 
 impl<'r, 't> FusedIterator for Split<'r, 't> {}
 
+/// Yields all substrings delimited by a regular expression match inclusive of
+/// the match.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the byte string being split.
+#[derive(Debug)]
+pub struct SplitInclusive<'r, 't> {
+    finder: Matches<'r, 't>,
+    last: usize,
+}
+
+impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
+    type Item = &'t str;
+
+    fn next(&mut self) -> Option<&'t str> {
+        let text = self.finder.0.text();
+        match self.finder.next() {
+            None => {
+                if self.last > text.len() {
+                    None
+                } else {
+                    let s = &text[self.last..];
+                    self.last = text.len() + 1; // Next call will return None
+                    Some(s)
+                }
+            }
+            Some(m) => {
+                let matched = &text[self.last..m.end()];
+                self.last = m.end();
+                Some(matched)
+            }
+        }
+    }
+}
+
+impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}
+
 /// Yields at most `N` substrings delimited by a regular expression match.
 ///
 /// The last substring will be whatever remains after splitting.