Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add split_inclusive #917

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/re_bytes.rs
Expand Up @@ -318,6 +318,35 @@ impl Regex {
Split { finder: self.find_iter(text), last: 0 }
}

/// Returns an iterator of substrings of `text` separated by a match of the
/// regular expression. Differs from the iterator produced by split in that
/// split_inclusive leaves the matched part as the terminator of the
/// substring.
///
/// This method will *not* copy the text given.
///
/// # Example
///
/// ```rust
/// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"\r?\n").unwrap();
/// let text = b"Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
/// let v: Vec<&[u8]> = re.split_inclusive(text).collect();
/// assert_eq!(v, [
/// &b"Mary had a little lamb\n"[..],
/// &b"little lamb\r\n"[..],
/// &b"little lamb."[..]
/// ]);
/// # }
/// ```
pub fn split_inclusive<'r, 't>(
&'r self,
text: &'t [u8],
) -> SplitInclusive<'r, 't> {
SplitInclusive { finder: self.find_iter(text), last: 0 }
}

/// Returns an iterator of at most `limit` substrings of `text` delimited
/// by a match of the regular expression. (A `limit` of `0` will return no
/// substrings.) Namely, each element of the iterator corresponds to text
Expand Down Expand Up @@ -767,6 +796,43 @@ impl<'r, 't> Iterator for Split<'r, 't> {

impl<'r, 't> FusedIterator for Split<'r, 't> {}

/// Yields all substrings delimited by a regular expression match inclusive of
/// the match.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the byte string being split.
#[derive(Debug)]
pub struct SplitInclusive<'r, 't> {
finder: Matches<'r, 't>,
last: usize,
}

impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
type Item = &'t [u8];

fn next(&mut self) -> Option<&'t [u8]> {
let text = self.finder.0.text();
match self.finder.next() {
None => {
if self.last > text.len() {
None
} else {
let s = &text[self.last..];
self.last = text.len() + 1; // Next call will return None
Some(s)
}
}
Some(m) => {
let matched = &text[self.last..m.end()];
self.last = m.end();
Some(matched)
}
}
}
}

impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}

/// Yields at most `N` substrings delimited by a regular expression match.
///
/// The last substring will be whatever remains after splitting.
Expand Down
61 changes: 61 additions & 0 deletions src/re_unicode.rs
Expand Up @@ -371,6 +371,30 @@ impl Regex {
Split { finder: self.find_iter(text), last: 0 }
}

/// Returns an iterator of substrings of `text` delimited by a match of the
/// regular expression. Each element of the iterator will include the
/// delimiting match if it appears at the beginning of the element.
///
/// This method will *not* copy the text given.
///
/// # Example
///
/// ```rust
/// # use regex::Regex;
/// # fn main() {
/// let re = Regex::new(r"\r?\n").unwrap();
/// let text = "Mary had a little lamb\nlittle lamb\r\nlittle lamb.";
/// let v: Vec<&str> = re.split_inclusive(text).collect();
/// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\r\n", "little lamb."]);
/// # }
/// ```
pub fn split_inclusive<'r, 't>(
&'r self,
text: &'t str,
) -> SplitInclusive<'r, 't> {
SplitInclusive { finder: self.find_iter(text), last: 0 }
}

/// Returns an iterator of at most `limit` substrings of `text` delimited
/// by a match of the regular expression. (A `limit` of `0` will return no
/// substrings.) Namely, each element of the iterator corresponds to text
Expand Down Expand Up @@ -809,6 +833,43 @@ impl<'r, 't> Iterator for Split<'r, 't> {

impl<'r, 't> FusedIterator for Split<'r, 't> {}

/// Yields all substrings delimited by a regular expression match inclusive of
/// the match.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the byte string being split.
#[derive(Debug)]
pub struct SplitInclusive<'r, 't> {
finder: Matches<'r, 't>,
last: usize,
}

impl<'r, 't> Iterator for SplitInclusive<'r, 't> {
type Item = &'t str;

fn next(&mut self) -> Option<&'t str> {
let text = self.finder.0.text();
match self.finder.next() {
None => {
if self.last > text.len() {
None
} else {
let s = &text[self.last..];
self.last = text.len() + 1; // Next call will return None
Some(s)
}
}
Some(m) => {
let matched = &text[self.last..m.end()];
self.last = m.end();
Some(matched)
}
}
}
}

impl<'r, 't> FusedIterator for SplitInclusive<'r, 't> {}

/// Yields at most `N` substrings delimited by a regular expression match.
///
/// The last substring will be whatever remains after splitting.
Expand Down