From 088d7f3269665a11aabadd89335eb09316e9c785 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 22 Mar 2024 20:34:26 -0400 Subject: [PATCH] api: add Cow guarantee to replace API This adds a guarantee to the API of the `replace`, `replace_all` and `replacen` routines that, when `Cow::Borrowed` is returned, it is guaranteed that it is equivalent to the `haystack` given. The implementation has always matched this behavior, but this elevates the implementation behavior to an API guarantee. There do exists implementations where this guarantee might not be upheld in every case. For example, if the final result were the empty string, we could return a `Cow::Borrowed`. Similarly, if the final result were a substring of `haystack`, then `Cow::Borrowed` could be returned in that case too. In practice, these sorts of optimizations are tricky to do in practice, and seem like niche corner cases that aren't important to optimize. Nevertheless, having this guarantee is useful because it can be used as a signal that the original input remains unchanged. This came up in discussions with @quicknir on Discord. Namely, in cases where one is doing a sequence of replacements and in most cases nothing is replaced, using a `Cow` is nice to be able to avoid copying the haystack over and over again. But to get this to work right, you have to know whether a `Cow::Borrowed` matches the input or not. If it doesn't, then you'd need to transform it into an owned string. For example, this code tries to do replacements on each of a sequence of `Cow` values, where the common case is no replacement: ```rust use std::borrow::Cow; use regex::Regex; fn trim_strs(strs: &mut Vec>) { strs .iter_mut() .for_each(|s| moo(s, ®ex_replace)); } fn moo Cow>(c: &mut Cow, f: F) { let result = f(&c); match result { Cow::Owned(s) => *c = Cow::Owned(s), Cow::Borrowed(s) => { *c = Cow::Borrowed(s); } } } fn regex_replace(s: &str) -> Cow { Regex::new(r"does-not-matter").unwrap().replace_all(s, "whatever") } ``` But this doesn't pass `borrowck`. Instead, you could write `moo` like this: ```rust fn moo Cow>(c: &mut Cow, f: F) { let result = f(&c); match result { Cow::Owned(s) => *c = Cow::Owned(s), Cow::Borrowed(s) => { if !std::ptr::eq(s, &**c) { *c = Cow::Owned(s.to_owned()) } } } } ``` But the `std::ptr:eq` call here is a bit strange. Instead, after this PR and the new guarantee, one can write it like this: ```rust fn moo Cow>(c: &mut Cow, f: F) { if let Cow::Owned(s) = f(&c) { *c = Cow::Owned(s); } } ``` --- src/regex/bytes.rs | 17 +++++++++++++++++ src/regex/string.rs | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/regex/bytes.rs b/src/regex/bytes.rs index ea4f7cd65..7b7aad574 100644 --- a/src/regex/bytes.rs +++ b/src/regex/bytes.rs @@ -651,6 +651,9 @@ impl Regex { /// case, this implementation will likely return a `Cow::Borrowed` value /// such that no allocation is performed. /// + /// When a `Cow::Borrowed` is returned, the value returned is guaranteed + /// to be equivalent to the `haystack` given. + /// /// # Replacement string syntax /// /// All instances of `$ref` in the replacement string are replaced with @@ -761,6 +764,13 @@ impl Regex { /// replacement provided. This is the same as calling `replacen` with /// `limit` set to `0`. /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// When a `Cow::Borrowed` is returned, the value returned is guaranteed + /// to be equivalent to the `haystack` given. + /// /// The documentation for [`Regex::replace`] goes into more detail about /// what kinds of replacement strings are supported. /// @@ -855,6 +865,13 @@ impl Regex { /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is /// equivalent to `Regex::replacen(hay, 0, rep)`. /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// When a `Cow::Borrowed` is returned, the value returned is guaranteed + /// to be equivalent to the `haystack` given. + /// /// The documentation for [`Regex::replace`] goes into more detail about /// what kinds of replacement strings are supported. /// diff --git a/src/regex/string.rs b/src/regex/string.rs index 824f45c69..dba94d46e 100644 --- a/src/regex/string.rs +++ b/src/regex/string.rs @@ -642,6 +642,9 @@ impl Regex { /// case, this implementation will likely return a `Cow::Borrowed` value /// such that no allocation is performed. /// + /// When a `Cow::Borrowed` is returned, the value returned is guaranteed + /// to be equivalent to the `haystack` given. + /// /// # Replacement string syntax /// /// All instances of `$ref` in the replacement string are replaced with @@ -748,6 +751,13 @@ impl Regex { /// replacement provided. This is the same as calling `replacen` with /// `limit` set to `0`. /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// When a `Cow::Borrowed` is returned, the value returned is guaranteed + /// to be equivalent to the `haystack` given. + /// /// The documentation for [`Regex::replace`] goes into more detail about /// what kinds of replacement strings are supported. /// @@ -842,6 +852,13 @@ impl Regex { /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is /// equivalent to `Regex::replacen(hay, 0, rep)`. /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// When a `Cow::Borrowed` is returned, the value returned is guaranteed + /// to be equivalent to the `haystack` given. + /// /// The documentation for [`Regex::replace`] goes into more detail about /// what kinds of replacement strings are supported. ///