From 24e9ecd599e7940eebbcef019d513afd583b2eca Mon Sep 17 00:00:00 2001
From: JMA <archer884@zoho.com>
Date: Fri, 28 Oct 2022 13:27:17 -0500
Subject: [PATCH 1/2] Add split_inclusive

---
 src/re_bytes.rs   | 62 ++++++++++++++++++++++++++++++++++++++++++++
 src/re_unicode.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index d71969257..24917c810 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -318,6 +318,31 @@ impl Regex {
         Split { finder: self.find_iter(text), last: 0 }
     }
 
+    /// Returns an iterator of substrings of `text` delimited by a match of the
+    /// regular expression. Each element of the iterator will include the
+    /// delimiting match if it appears at the beginning of the element.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// To split a string delimited by fruit and include the fruit:
+    ///
+    /// ```rust
+    /// # use regex::bytes::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"(apple|banana|pear)").unwrap();
+    /// let fields: Vec<&[u8]> = re.split_inclusive(b"apples: 3 bananas: 2 pears: 4").collect();
+    /// assert_eq!(fields, vec![&b""[..], &b"apples: 3 "[..], &b"bananas: 2 "[..], &b"pears: 4"[..]]);
+    /// # }
+    /// ```
+    pub fn split_inclusive<'r, 't>(
+        &'r self,
+        text: &'t [u8],
+    ) -> SplitInclusive<'r, 't> {
+        SplitInclusive { finder: self.find_iter(text), last: 0 }
+    }
+
     /// Returns an iterator of at most `limit` substrings of `text` delimited
     /// by a match of the regular expression. (A `limit` of `0` will return no
     /// substrings.) Namely, each element of the iterator corresponds to text
@@ -767,6 +792,43 @@ impl<'r, 't> Iterator for Split<'r, 't> {
 
 impl<'r, 't> FusedIterator for Split<'r, 't> {}
 
+/// Yields all substrings delimited by a regular expression match inclusive of
+/// the match.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the byte string being split.
+#[derive(Debug)]
+pub struct SplitInclusive<'r, 't> {
+    finder: Matches<'r, 't>,
+    last: usize,
+}
+
+impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
+    type Item = &'t [u8];
+
+    fn next(&mut self) -> Option<&'t [u8]> {
+        let text = self.finder.0.text();
+        match self.finder.next() {
+            None => {
+                if self.last > text.len() {
+                    None
+                } else {
+                    let s = &text[self.last..];
+                    self.last = text.len() + 1; // Next call will return None
+                    Some(s)
+                }
+            }
+            Some(m) => {
+                let matched = &text[self.last..m.start()];
+                self.last = m.start();
+                Some(matched)
+            }
+        }
+    }
+}
+
+impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}
+
 /// Yields at most `N` substrings delimited by a regular expression match.
 ///
 /// The last substring will be whatever remains after splitting.
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index 60d81a7d9..4797ce595 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -371,6 +371,34 @@ impl Regex {
         Split { finder: self.find_iter(text), last: 0 }
     }
 
+    /// Returns an iterator of substrings of `text` delimited by a match of the
+    /// regular expression. Each element of the iterator will include the
+    /// delimiting match if it appears at the beginning of the element.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// To split a string delimited by fruit and include the fruit:
+    ///
+    /// ```rust
+    /// # use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"(apple|banana|pear)").unwrap();
+    /// let fields: Vec<&str> = re
+    ///     .split_inclusive("apples: 3 bananas: 2 pears: 4")
+    ///     .map(|s| s.trim())
+    ///     .collect();
+    /// assert_eq!(fields, vec!["", "apples: 3", "bananas: 2", "pears: 4"]);
+    /// # }
+    /// ```
+    pub fn split_inclusive<'r, 't>(
+        &'r self,
+        text: &'t str,
+    ) -> SplitInclusive<'r, 't> {
+        SplitInclusive { finder: self.find_iter(text), last: 0 }
+    }
+
     /// Returns an iterator of at most `limit` substrings of `text` delimited
     /// by a match of the regular expression. (A `limit` of `0` will return no
     /// substrings.) Namely, each element of the iterator corresponds to text
@@ -809,6 +837,43 @@ impl<'r, 't> Iterator for Split<'r, 't> {
 
 impl<'r, 't> FusedIterator for Split<'r, 't> {}
 
+/// Yields all substrings delimited by a regular expression match inclusive of
+/// the match.
+///
+/// `'r` is the lifetime of the compiled regular expression and `'t` is the
+/// lifetime of the byte string being split.
+#[derive(Debug)]
+pub struct SplitInclusive<'r, 't> {
+    finder: Matches<'r, 't>,
+    last: usize,
+}
+
+impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
+    type Item = &'t str;
+
+    fn next(&mut self) -> Option<&'t str> {
+        let text = self.finder.0.text();
+        match self.finder.next() {
+            None => {
+                if self.last > text.len() {
+                    None
+                } else {
+                    let s = &text[self.last..];
+                    self.last = text.len() + 1; // Next call will return None
+                    Some(s)
+                }
+            }
+            Some(m) => {
+                let matched = &text[self.last..m.start()];
+                self.last = m.start();
+                Some(matched)
+            }
+        }
+    }
+}
+
+impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}
+
 /// Yields at most `N` substrings delimited by a regular expression match.
 ///
 /// The last substring will be whatever remains after splitting.

From 857e063725a23f0aa34b9da4cd77bebabbd68274 Mon Sep 17 00:00:00 2001
From: JMA <archer884@zoho.com>
Date: Fri, 28 Oct 2022 14:11:10 -0500
Subject: [PATCH 2/2] Update documentation, fix terminator

This changeset adjusts the documentation to more closely match that
found in std for split_inclusive. It also changes the behavior such that
the matched substring appears at the end of each element as a terminator
rather than at the head of each element.

Sorry; I never actually *read* the split_inclusive docs in std.
---
 src/re_bytes.rs   | 24 ++++++++++++++----------
 src/re_unicode.rs | 16 ++++++----------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/re_bytes.rs b/src/re_bytes.rs
index 24917c810..9e10efbae 100644
--- a/src/re_bytes.rs
+++ b/src/re_bytes.rs
@@ -318,22 +318,26 @@ impl Regex {
         Split { finder: self.find_iter(text), last: 0 }
     }
 
-    /// Returns an iterator of substrings of `text` delimited by a match of the
-    /// regular expression. Each element of the iterator will include the
-    /// delimiting match if it appears at the beginning of the element.
+    /// Returns an iterator of substrings of `text` separated by a match of the
+    /// regular expression. Differs from the iterator produced by split in that
+    /// split_inclusive leaves the matched part as the terminator of the
+    /// substring.
     ///
     /// This method will *not* copy the text given.
     ///
     /// # Example
     ///
-    /// To split a string delimited by fruit and include the fruit:
-    ///
     /// ```rust
     /// # use regex::bytes::Regex;
     /// # fn main() {
-    /// let re = Regex::new(r"(apple|banana|pear)").unwrap();
-    /// let fields: Vec<&[u8]> = re.split_inclusive(b"apples: 3 bananas: 2 pears: 4").collect();
-    /// assert_eq!(fields, vec![&b""[..], &b"apples: 3 "[..], &b"bananas: 2 "[..], &b"pears: 4"[..]]);
+    /// let re = Regex::new(r"\r?\n").unwrap();
+    /// let text = b"Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
+    /// let v: Vec<&[u8]> = re.split_inclusive(text).collect();
+    /// assert_eq!(v, [
+    ///     &b"Mary had a little lamb\n"[..],
+    ///     &b"little lamb\r\n"[..],
+    ///     &b"little lamb."[..]
+    /// ]);
     /// # }
     /// ```
     pub fn split_inclusive<'r, 't>(
@@ -819,8 +823,8 @@ impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
                 }
             }
             Some(m) => {
-                let matched = &text[self.last..m.start()];
-                self.last = m.start();
+                let matched = &text[self.last..m.end()];
+                self.last = m.end();
                 Some(matched)
             }
         }
diff --git a/src/re_unicode.rs b/src/re_unicode.rs
index 4797ce595..667e032f5 100644
--- a/src/re_unicode.rs
+++ b/src/re_unicode.rs
@@ -379,17 +379,13 @@ impl Regex {
     ///
     /// # Example
     ///
-    /// To split a string delimited by fruit and include the fruit:
-    ///
     /// ```rust
     /// # use regex::Regex;
     /// # fn main() {
-    /// let re = Regex::new(r"(apple|banana|pear)").unwrap();
-    /// let fields: Vec<&str> = re
-    ///     .split_inclusive("apples: 3 bananas: 2 pears: 4")
-    ///     .map(|s| s.trim())
-    ///     .collect();
-    /// assert_eq!(fields, vec!["", "apples: 3", "bananas: 2", "pears: 4"]);
+    /// let re = Regex::new(r"\r?\n").unwrap();
+    /// let text = "Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
+    /// let v: Vec<&str> = re.split_inclusive(text).collect();
+    /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\r\n", "little lamb."]);
     /// # }
     /// ```
     pub fn split_inclusive<'r, 't>(
@@ -864,8 +860,8 @@ impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
                 }
             }
             Some(m) => {
-                let matched = &text[self.last..m.start()];
-                self.last = m.start();
+                let matched = &text[self.last..m.end()];
+                self.last = m.end();
                 Some(matched)
             }
         }