diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 25df2b3014..2813a16763 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -54,10 +54,14 @@ jobs:
           os: ubuntu-latest
           rust: stable
           target: i686-unknown-linux-gnu
-        - build: stable-mips
+        - build: stable-powerpc64
           os: ubuntu-latest
           rust: stable
-          target: mips64-unknown-linux-gnuabi64
+          target: powerpc64-unknown-linux-gnu
+        - build: stable-s390x
+          os: ubuntu-latest
+          rust: stable
+          target: s390x-unknown-linux-gnu
         - build: beta
           os: ubuntu-latest
           rust: beta
@@ -77,7 +81,7 @@ jobs:
     - name: Checkout repository
       uses: actions/checkout@v3
     - name: Install Rust
-      uses: dtolnay/rust-toolchain@v1
+      uses: dtolnay/rust-toolchain@master
       with:
         toolchain: ${{ matrix.rust }}
     - name: Install and configure Cross
@@ -92,12 +96,6 @@ jobs:
         cd "$dir"
         curl -LO "https://github.com/cross-rs/cross/releases/download/$CROSS_VERSION/cross-x86_64-unknown-linux-musl.tar.gz"
         tar xf cross-x86_64-unknown-linux-musl.tar.gz
-
-        # We used to install 'cross' from master, but it kept failing. So now
-        # we build from a known-good version until 'cross' becomes more stable
-        # or we find an alternative. Notably, between v0.2.1 and current
-        # master (2022-06-14), the number of Cross's dependencies has doubled.
-        # cargo install --bins --git https://github.com/rust-embedded/cross --tag v0.2.1
         echo "CARGO=cross" >> $GITHUB_ENV
         echo "TARGET=--target ${{ matrix.target }}" >> $GITHUB_ENV
     - name: Show command used for Cargo
@@ -141,9 +139,28 @@ jobs:
     - name: Checkout repository
       uses: actions/checkout@v3
     - name: Install Rust
-      uses: dtolnay/rust-toolchain@v1
+      uses: dtolnay/rust-toolchain@master
       with:
-        toolchain: 1.60.0
+        toolchain: 1.65.0
+    # The memchr 2.6 release purportedly bumped its MSRV to Rust 1.60, but it
+    # turned out that on aarch64, it was using something that wasn't stabilized
+    # until Rust 1.61[1]. (This was an oversight on my part. I had previously
+    # thought everything I needed was on Rust 1.60.) To resolve that, I just
+    # bumped memchr's MSRV to 1.61. Since it was so soon after the memchr 2.6
+    # release, I treated this as a bugfix.
+    #
+    # But the regex crate's MSRV is at Rust 1.60, and it now depends on at
+    # least memchr 2.6 (to make use of its `alloc` feature). So we can't set
+    # a lower minimal version. And I can't just bump the MSRV in a patch
+    # release as a bug fix because regex 1.9 was released quite some time ago.
+    # I could just release regex 1.10 and bump the MSRV there, but eh, I don't
+    # want to put out another minor version release just for this.
+    #
+    # So... pin memchr to 2.6.2, which at least works on x86-64 on Rust 1.60.
+    #
+    # [1]: https://github.com/BurntSushi/memchr/issues/136
+    - name: Pin memchr to 2.6.2
+      run: cargo update -p memchr --precise 2.6.2
     - name: Basic build
       run: cargo build --verbose
     - name: Build docs
@@ -162,7 +179,7 @@ jobs:
     - name: Checkout repository
       uses: actions/checkout@v3
     - name: Install Rust
-      uses: dtolnay/rust-toolchain@v1
+      uses: dtolnay/rust-toolchain@master
       with:
         toolchain: stable
     - name: Run full test suite
@@ -175,7 +192,7 @@ jobs:
     - name: Checkout repository
       uses: actions/checkout@v3
     - name: Install Rust
-      uses: dtolnay/rust-toolchain@v1
+      uses: dtolnay/rust-toolchain@master
       with:
         toolchain: stable
     - name: Run full test suite
@@ -188,7 +205,7 @@ jobs:
     - name: Checkout repository
       uses: actions/checkout@v3
     - name: Install Rust
-      uses: dtolnay/rust-toolchain@v1
+      uses: dtolnay/rust-toolchain@master
       with:
         toolchain: stable
     - name: Run full test suite
@@ -201,7 +218,7 @@ jobs:
     - name: Checkout repository
       uses: actions/checkout@v3
     - name: Install Rust
-      uses: dtolnay/rust-toolchain@v1
+      uses: dtolnay/rust-toolchain@master
       with:
         toolchain: stable
     - name: Run full test suite
@@ -216,7 +233,7 @@ jobs:
     - name: Checkout repository
       uses: actions/checkout@v3
     - name: Install Rust
-      uses: dtolnay/rust-toolchain@v1
+      uses: dtolnay/rust-toolchain@master
       with:
         # We use nightly here so that we can use miri I guess?
         # It caught me by surprise that miri seems to only be
@@ -233,7 +250,7 @@ jobs:
     - name: Checkout repository
       uses: actions/checkout@v3
     - name: Install Rust
-      uses: dtolnay/rust-toolchain@v1
+      uses: dtolnay/rust-toolchain@master
       with:
         toolchain: stable
         components: rustfmt
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a6a2bcb411..420e08f741 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,198 @@
+1.10.2 (2023-10-16)
+===================
+This is a new patch release that fixes a search regression where incorrect
+matches could be reported.
+
+Bug fixes:
+
+* [BUG #1110](https://github.com/rust-lang/regex/issues/1110):
+Revert broadening of reverse suffix literal optimization introduced in 1.10.1.
+
+
+1.10.1 (2023-10-14)
+===================
+This is a new patch release with a minor increase in the number of valid
+patterns and a broadening of some literal optimizations.
+
+New features:
+
+* [FEATURE 04f5d7be](https://github.com/rust-lang/regex/commit/04f5d7be4efc542864cc400f5d43fbea4eb9bab6):
+Loosen ASCII-compatible rules such that regexes like `(?-u:☃)` are now allowed.
+
+Performance improvements:
+
+* [PERF 8a8d599f](https://github.com/rust-lang/regex/commit/8a8d599f9d2f2d78e9ad84e4084788c2d563afa5):
+Broader the reverse suffix optimization to apply in more cases.
+
+
+1.10.0 (2023-10-09)
+===================
+This is a new minor release of `regex` that adds support for start and end
+word boundary assertions. That is, `\<` and `\>`. The minimum supported Rust
+version has also been raised to 1.65, which was released about one year ago.
+
+The new word boundary assertions are:
+
+* `\<` or `\b{start}`: a Unicode start-of-word boundary (`\W|\A` on the left,
+`\w` on the right).
+* `\>` or `\b{end}`: a Unicode end-of-word boundary (`\w` on the left, `\W|\z`
+on the right)).
+* `\b{start-half}`: half of a Unicode start-of-word boundary (`\W|\A` on the
+left).
+* `\b{end-half}`: half of a Unicode end-of-word boundary (`\W|\z` on the
+right).
+
+The `\<` and `\>` are GNU extensions to POSIX regexes. They have been added
+to the `regex` crate because they enjoy somewhat broad support in other regex
+engines as well (for example, vim). The `\b{start}` and `\b{end}` assertions
+are aliases for `\<` and `\>`, respectively.
+
+The `\b{start-half}` and `\b{end-half}` assertions are not found in any
+other regex engine (although regex engines with general look-around support
+can certainly express them). They were added principally to support the
+implementation of word matching in grep programs, where one generally wants to
+be a bit more flexible in what is considered a word boundary.
+
+New features:
+
+* [FEATURE #469](https://github.com/rust-lang/regex/issues/469):
+Add support for `\<` and `\>` word boundary assertions.
+* [FEATURE(regex-automata) #1031](https://github.com/rust-lang/regex/pull/1031):
+DFAs now have a `start_state` method that doesn't use an `Input`.
+
+Performance improvements:
+
+* [PERF #1051](https://github.com/rust-lang/regex/pull/1051):
+Unicode character class operations have been optimized in `regex-syntax`.
+* [PERF #1090](https://github.com/rust-lang/regex/issues/1090):
+Make patterns containing lots of literal characters use less memory.
+
+Bug fixes:
+
+* [BUG #1046](https://github.com/rust-lang/regex/issues/1046):
+Fix a bug that could result in incorrect match spans when using a Unicode word
+boundary and searching non-ASCII strings.
+* [BUG(regex-syntax) #1047](https://github.com/rust-lang/regex/issues/1047):
+Fix panics that can occur in `Ast->Hir` translation (not reachable from `regex`
+crate).
+* [BUG(regex-syntax) #1088](https://github.com/rust-lang/regex/issues/1088):
+Remove guarantees in the API that connect the `u` flag with a specific HIR
+representation.
+
+`regex-automata` breaking change release:
+
+This release includes a `regex-automata 0.4.0` breaking change release, which
+was necessary in order to support the new word boundary assertions. For
+example, the `Look` enum has new variants and the `LookSet` type now uses `u32`
+instead of `u16` to represent a bitset of look-around assertions. These are
+overall very minor changes, and most users of `regex-automata` should be able
+to move to `0.4` from `0.3` without any changes at all.
+
+`regex-syntax` breaking change release:
+
+This release also includes a `regex-syntax 0.8.0` breaking change release,
+which, like `regex-automata`, was necessary in order to support the new word
+boundary assertions. This release also includes some changes to the `Ast`
+type to reduce heap usage in some cases. If you are using the `Ast` type
+directly, your code may require some minor modifications. Otherwise, users of
+`regex-syntax 0.7` should be able to migrate to `0.8` without any code changes.
+
+`regex-lite` release:
+
+The `regex-lite 0.1.1` release contains support for the new word boundary
+assertions. There are no breaking changes.
+
+
+1.9.6 (2023-09-30)
+==================
+This is a patch release that fixes a panic that can occur when the default
+regex size limit is increased to a large number.
+
+* [BUG aa4e4c71](https://github.com/rust-lang/regex/commit/aa4e4c7120b0090ce0624e3c42a2ed06dd8b918a):
+Fix a bug where computing the maximum haystack length for the bounded
+backtracker could result underflow and thus provoke a panic later in a search
+due to a broken invariant.
+
+
+1.9.5 (2023-09-02)
+==================
+This is a patch release that hopefully mostly fixes a performance bug that
+occurs when sharing a regex across multiple threads.
+
+Issue [#934](https://github.com/rust-lang/regex/issues/934)
+explains this in more detail. It is [also noted in the crate
+documentation](https://docs.rs/regex/latest/regex/#sharing-a-regex-across-threads-can-result-in-contention).
+The bug can appear when sharing a regex across multiple threads simultaneously,
+as might be the case when using a regex from a `OnceLock`, `lazy_static` or
+similar primitive. Usually high contention only results when using many threads
+to execute searches on small haystacks.
+
+One can avoid the contention problem entirely through one of two methods.
+The first is to use lower level APIs from `regex-automata` that require passing
+state explicitly, such as [`meta::Regex::search_with`](https://docs.rs/regex-automata/latest/regex_automata/meta/struct.Regex.html#method.search_with).
+The second is to clone a regex and send it to other threads explicitly. This
+will not use any additional memory usage compared to sharing the regex. The
+only downside of this approach is that it may be less convenient, for example,
+it won't work with things like `OnceLock` or `lazy_static` or `once_cell`.
+
+With that said, as of this release, the contention performance problems have
+been greatly reduced. This was achieved by changing the free-list so that it
+was sharded across threads, and that ensuring each sharded mutex occupies a
+single cache line to mitigate false sharing. So while contention may still
+impact performance in some cases, it should be a lot better now.
+
+Because of the changes to how the free-list works, please report any issues you
+find with this release. That not only includes search time regressions but also
+significant regressions in memory usage. Reporting improvements is also welcome
+as well! If possible, provide a reproduction.
+
+Bug fixes:
+
+* [BUG #934](https://github.com/rust-lang/regex/issues/934):
+Fix a performance bug where high contention on a single regex led to massive
+slow downs.
+
+
+1.9.4 (2023-08-26)
+==================
+This is a patch release that fixes a bug where `RegexSet::is_match(..)` could
+incorrectly return false (even when `RegexSet::matches(..).matched_any()`
+returns true).
+
+Bug fixes:
+
+* [BUG #1070](https://github.com/rust-lang/regex/issues/1070):
+Fix a bug where a prefilter was incorrectly configured for a `RegexSet`.
+
+
+1.9.3 (2023-08-05)
+==================
+This is a patch release that fixes a bug where some searches could result in
+incorrect match offsets being reported. It is difficult to characterize the
+types of regexes susceptible to this bug. They generally involve patterns
+that contain no prefix or suffix literals, but have an inner literal along with
+a regex prefix that can conditionally match.
+
+Bug fixes:
+
+* [BUG #1060](https://github.com/rust-lang/regex/issues/1060):
+Fix a bug with the reverse inner literal optimization reporting incorrect match
+offsets.
+
+
+1.9.2 (2023-08-05)
+==================
+This is a patch release that fixes another memory usage regression. This
+particular regression occurred only when using a `RegexSet`. In some cases,
+much more heap memory (by one or two orders of magnitude) was allocated than in
+versions prior to 1.9.0.
+
+Bug fixes:
+
+* [BUG #1059](https://github.com/rust-lang/regex/issues/1059):
+Fix a memory usage regression when using a `RegexSet`.
+
+
 1.9.1 (2023-07-07)
 ==================
 This is a patch release which fixes a memory usage regression. In the regex
diff --git a/Cargo.toml b/Cargo.toml
index bfd6aea615..3ba14c904c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "regex"
-version = "1.9.1"  #:version
+version = "1.10.2"  #:version
 authors = ["The Rust Project Developers", "Andrew Gallant <jamslam@gmail.com>"]
 license = "MIT OR Apache-2.0"
 readme = "README.md"
@@ -15,7 +15,7 @@ categories = ["text-processing"]
 autotests = false
 exclude = ["/scripts/*", "/.github/*"]
 edition = "2021"
-rust-version = "1.60.0"
+rust-version = "1.65"
 
 [workspace]
 members = [
@@ -52,6 +52,7 @@ std = [
 # to actually emit the log messages somewhere.
 logging = [
   "aho-corasick?/logging",
+  "memchr?/logging",
   "regex-automata/logging",
 ]
 # The 'use_std' feature is DEPRECATED. It will be removed in regex 2. Until
@@ -167,20 +168,20 @@ optional = true
 
 # For skipping along search text quickly when a leading byte is known.
 [dependencies.memchr]
-version = "2.5.0"
+version = "2.6.0"
 optional = true
 
 # For the actual regex engines.
 [dependencies.regex-automata]
 path = "regex-automata"
-version = "0.3.1"
+version = "0.4.3"
 default-features = false
 features = ["alloc", "syntax", "meta", "nfa-pikevm"]
 
 # For parsing regular expressions.
 [dependencies.regex-syntax]
 path = "regex-syntax"
-version = "0.7.3"
+version = "0.8.2"
 default-features = false
 
 [dev-dependencies]
diff --git a/HACKING.md b/HACKING.md
deleted file mode 100644
index 34af5b517c..0000000000
--- a/HACKING.md
+++ /dev/null
@@ -1,341 +0,0 @@
-Your friendly guide to hacking and navigating the regex library.
-
-This guide assumes familiarity with Rust and Cargo, and at least a perusal of
-the user facing documentation for this crate.
-
-If you're looking for background on the implementation in this library, then
-you can do no better than Russ Cox's article series on implementing regular
-expressions using finite automata: https://swtch.com/~rsc/regexp/
-
-
-## Architecture overview
-
-As you probably already know, this library executes regular expressions using
-finite automata. In particular, a design goal is to make searching linear
-with respect to both the regular expression and the text being searched.
-Meeting that design goal on its own is not so hard and can be done with an
-implementation of the Pike VM (similar to Thompson's construction, but supports
-capturing groups), as described in: https://swtch.com/~rsc/regexp/regexp2.html
---- This library contains such an implementation in src/pikevm.rs.
-
-Making it fast is harder. One of the key problems with the Pike VM is that it
-can be in more than one state at any point in time, and must shuffle capture
-positions between them. The Pike VM also spends a lot of time following the
-same epsilon transitions over and over again. We can employ one trick to
-speed up the Pike VM: extract one or more literal prefixes from the regular
-expression and execute specialized code to quickly find matches of those
-prefixes in the search text. The Pike VM can then be avoided for most the
-search, and instead only executed when a prefix is found. The code to find
-prefixes is in the regex-syntax crate (in this repository). The code to search
-for literals is in src/literals.rs. When more than one literal prefix is found,
-we fall back to an Aho-Corasick DFA using the aho-corasick crate. For one
-literal, we use a variant of the Boyer-Moore algorithm. Both Aho-Corasick and
-Boyer-Moore use `memchr` when appropriate. The Boyer-Moore variant in this
-library also uses elementary frequency analysis to choose the right byte to run
-`memchr` with.
-
-Of course, detecting prefix literals can only take us so far. Not all regular
-expressions have literal prefixes. To remedy this, we try another approach
-to executing the Pike VM: backtracking, whose implementation can be found in
-src/backtrack.rs. One reason why backtracking can be faster is that it avoids
-excessive shuffling of capture groups. Of course, backtracking is susceptible
-to exponential runtimes, so we keep track of every state we've visited to make
-sure we never visit it again. This guarantees linear time execution, but we
-pay for it with the memory required to track visited states. Because of the
-memory requirement, we only use this engine on small search strings *and* small
-regular expressions.
-
-Lastly, the real workhorse of this library is the "lazy" DFA in src/dfa.rs.
-It is distinct from the Pike VM in that the DFA is explicitly represented in
-memory and is only ever in one state at a time. It is said to be "lazy" because
-the DFA is computed as text is searched, where each byte in the search text
-results in at most one new DFA state. It is made fast by caching states. DFAs
-are susceptible to exponential state blow up (where the worst case is computing
-a new state for every input byte, regardless of what's in the state cache). To
-avoid using a lot of memory, the lazy DFA uses a bounded cache. Once the cache
-is full, it is wiped and state computation starts over again. If the cache is
-wiped too frequently, then the DFA gives up and searching falls back to one of
-the aforementioned algorithms.
-
-All of the above matching engines expose precisely the same matching semantics.
-This is indeed tested. (See the section below about testing.)
-
-The following sub-sections describe the rest of the library and how each of the
-matching engines are actually used.
-
-### Parsing
-
-Regular expressions are parsed using the regex-syntax crate, which is
-maintained in this repository. The regex-syntax crate defines an abstract
-syntax and provides very detailed error messages when a parse error is
-encountered. Parsing is done in a separate crate so that others may benefit
-from its existence, and because it is relatively divorced from the rest of the
-regex library.
-
-The regex-syntax crate also provides sophisticated support for extracting
-prefix and suffix literals from regular expressions.
-
-### Compilation
-
-The compiler is in src/compile.rs. The input to the compiler is some abstract
-syntax for a regular expression and the output is a sequence of opcodes that
-matching engines use to execute a search. (One can think of matching engines as
-mini virtual machines.) The sequence of opcodes is a particular encoding of a
-non-deterministic finite automaton. In particular, the opcodes explicitly rely
-on epsilon transitions.
-
-Consider a simple regular expression like `a|b`. Its compiled form looks like
-this:
-
-    000 Save(0)
-    001 Split(2, 3)
-    002 'a' (goto: 4)
-    003 'b'
-    004 Save(1)
-    005 Match
-
-The first column is the instruction pointer and the second column is the
-instruction. Save instructions indicate that the current position in the input
-should be stored in a captured location. Split instructions represent a binary
-branch in the program (i.e., epsilon transitions). The instructions `'a'` and
-`'b'` indicate that the literal bytes `'a'` or `'b'` should match.
-
-In older versions of this library, the compilation looked like this:
-
-    000 Save(0)
-    001 Split(2, 3)
-    002 'a'
-    003 Jump(5)
-    004 'b'
-    005 Save(1)
-    006 Match
-
-In particular, empty instructions that merely served to move execution from one
-point in the program to another were removed. Instead, every instruction has a
-`goto` pointer embedded into it. This resulted in a small performance boost for
-the Pike VM, because it was one fewer epsilon transition that it had to follow.
-
-There exist more instructions and they are defined and documented in
-src/prog.rs.
-
-Compilation has several knobs and a few unfortunately complicated invariants.
-Namely, the output of compilation can be one of two types of programs: a
-program that executes on Unicode scalar values or a program that executes
-on raw bytes. In the former case, the matching engine is responsible for
-performing UTF-8 decoding and executing instructions using Unicode codepoints.
-In the latter case, the program handles UTF-8 decoding implicitly, so that the
-matching engine can execute on raw bytes. All matching engines can execute
-either Unicode or byte based programs except for the lazy DFA, which requires
-byte based programs. In general, both representations were kept because (1) the
-lazy DFA requires byte based programs so that states can be encoded in a memory
-efficient manner and (2) the Pike VM benefits greatly from inlining Unicode
-character classes into fewer instructions as it results in fewer epsilon
-transitions.
-
-N.B. UTF-8 decoding is built into the compiled program by making use of the
-utf8-ranges crate. The compiler in this library factors out common suffixes to
-reduce the size of huge character classes (e.g., `\pL`).
-
-A regrettable consequence of this split in instruction sets is we generally
-need to compile two programs; one for NFA execution and one for the lazy DFA.
-
-In fact, it is worse than that: the lazy DFA is not capable of finding the
-starting location of a match in a single scan, and must instead execute a
-backwards search after finding the end location. To execute a backwards search,
-we must have compiled the regular expression *in reverse*.
-
-This means that every compilation of a regular expression generally results in
-three distinct programs. It would be possible to lazily compile the Unicode
-program, since it is never needed if (1) the regular expression uses no word
-boundary assertions and (2) the caller never asks for sub-capture locations.
-
-### Execution
-
-At the time of writing, there are four matching engines in this library:
-
-1. The Pike VM (supports captures).
-2. Bounded backtracking (supports captures).
-3. Literal substring or multi-substring search.
-4. Lazy DFA (no support for Unicode word boundary assertions).
-
-Only the first two matching engines are capable of executing every regular
-expression program. They also happen to be the slowest, which means we need
-some logic that (1) knows various facts about the regular expression and (2)
-knows what the caller wants. Using this information, we can determine which
-engine (or engines) to use.
-
-The logic for choosing which engine to execute is in src/exec.rs and is
-documented on the Exec type. Exec values contain regular expression Programs
-(defined in src/prog.rs), which contain all the necessary tidbits for actually
-executing a regular expression on search text.
-
-For the most part, the execution logic is straight-forward and follows the
-limitations of each engine described above pretty faithfully. The hairiest
-part of src/exec.rs by far is the execution of the lazy DFA, since it requires
-a forwards and backwards search, and then falls back to either the Pike VM or
-backtracking if the caller requested capture locations.
-
-The Exec type also contains mutable scratch space for each type of matching
-engine. This scratch space is used during search (for example, for the lazy
-DFA, it contains compiled states that are reused on subsequent searches).
-
-### Programs
-
-A regular expression program is essentially a sequence of opcodes produced by
-the compiler plus various facts about the regular expression (such as whether
-it is anchored, its capture names, etc.).
-
-### The regex! macro
-
-The `regex!` macro no longer exists. It was developed in a bygone era as a
-compiler plugin during the infancy of the regex crate. Back then, then only
-matching engine in the crate was the Pike VM. The `regex!` macro was, itself,
-also a Pike VM. The only advantages it offered over the dynamic Pike VM that
-was built at runtime were the following:
-
-  1. Syntax checking was done at compile time. Your Rust program wouldn't
-     compile if your regex didn't compile.
-  2. Reduction of overhead that was proportional to the size of the regex.
-     For the most part, this overhead consisted of heap allocation, which
-     was nearly eliminated in the compiler plugin.
-
-The main takeaway here is that the compiler plugin was a marginally faster
-version of a slow regex engine. As the regex crate evolved, it grew other regex
-engines (DFA, bounded backtracker) and sophisticated literal optimizations.
-The regex macro didn't keep pace, and it therefore became (dramatically) slower
-than the dynamic engines. The only reason left to use it was for the compile
-time guarantee that your regex is correct. Fortunately, Clippy (the Rust lint
-tool) has a lint that checks your regular expression validity, which mostly
-replaces that use case.
-
-Additionally, the regex compiler plugin stopped receiving maintenance. Nobody
-complained. At that point, it seemed prudent to just remove it.
-
-Will a compiler plugin be brought back? The future is murky, but there is
-definitely an opportunity there to build something that is faster than the
-dynamic engines in some cases. But it will be challenging! As of now, there
-are no plans to work on this.
-
-
-## Testing
-
-A key aspect of any mature regex library is its test suite. A subset of the
-tests in this library come from Glenn Fowler's AT&T test suite (its online
-presence seems gone at the time of writing). The source of the test suite is
-located in src/testdata. The scripts/regex-match-tests.py takes the test suite
-in src/testdata and generates tests/matches.rs.
-
-There are also many other manually crafted tests and regression tests in
-tests/tests.rs. Some of these tests were taken from RE2.
-
-The biggest source of complexity in the tests is related to answering this
-question: how can we reuse the tests to check all of our matching engines? One
-approach would have been to encode every test into some kind of format (like
-the AT&T test suite) and code generate tests for each matching engine. The
-approach we use in this library is to create a Cargo.toml entry point for each
-matching engine we want to test. The entry points are:
-
-* `tests/test_default.rs` - tests `Regex::new`
-* `tests/test_default_bytes.rs` - tests `bytes::Regex::new`
-* `tests/test_nfa.rs` - tests `Regex::new`, forced to use the NFA
-  algorithm on every regex.
-* `tests/test_nfa_bytes.rs` - tests `Regex::new`, forced to use the NFA
-  algorithm on every regex and use *arbitrary* byte based programs.
-* `tests/test_nfa_utf8bytes.rs` - tests `Regex::new`, forced to use the NFA
-  algorithm on every regex and use *UTF-8* byte based programs.
-* `tests/test_backtrack.rs` - tests `Regex::new`, forced to use
-  backtracking on every regex.
-* `tests/test_backtrack_bytes.rs` - tests `Regex::new`, forced to use
-  backtracking on every regex and use *arbitrary* byte based programs.
-* `tests/test_backtrack_utf8bytes.rs` - tests `Regex::new`, forced to use
-  backtracking on every regex and use *UTF-8* byte based programs.
-* `tests/test_crates_regex.rs` - tests to make sure that all of the
-  backends behave in the same way against a number of quickcheck
-  generated random inputs. These tests need to be enabled through
-  the `RUST_REGEX_RANDOM_TEST` environment variable (see
-  below).
-
-The lazy DFA and pure literal engines are absent from this list because
-they cannot be used on every regular expression. Instead, we rely on
-`tests/test_dynamic.rs` to test the lazy DFA and literal engines when possible.
-
-Since the tests are repeated several times, and because `cargo test` runs all
-entry points, it can take a while to compile everything. To reduce compile
-times slightly, try using `cargo test --test default`, which will only use the
-`tests/test_default.rs` entry point.
-
-The random testing takes quite a while, so it is not enabled by default.
-In order to run the random testing you can set the
-`RUST_REGEX_RANDOM_TEST` environment variable to anything before
-invoking `cargo test`. Note that this variable is inspected at compile
-time, so if the tests don't seem to be running, you may need to run
-`cargo clean`.
-
-## Benchmarking
-
-The benchmarking in this crate is made up of many micro-benchmarks. Currently,
-there are two primary sets of benchmarks: the benchmarks that were adopted
-at this library's inception (in `bench/src/misc.rs`) and a newer set of
-benchmarks meant to test various optimizations. Specifically, the latter set
-contain some analysis and are in `bench/src/sherlock.rs`. Also, the latter
-set are all executed on the same lengthy input whereas the former benchmarks
-are executed on strings of varying length.
-
-There is also a smattering of benchmarks for parsing and compilation.
-
-Benchmarks are in a separate crate so that its dependencies can be managed
-separately from the main regex crate.
-
-Benchmarking follows a similarly wonky setup as tests. There are multiple entry
-points:
-
-* `bench_rust.rs` - benchmarks `Regex::new`
-* `bench_rust_bytes.rs` benchmarks `bytes::Regex::new`
-* `bench_pcre.rs` - benchmarks PCRE
-* `bench_onig.rs` - benchmarks Oniguruma
-
-The PCRE and Oniguruma benchmarks exist as a comparison point to a mature
-regular expression library. In general, this regex library compares favorably
-(there are even a few benchmarks that PCRE simply runs too slowly on or
-outright can't execute at all). I would love to add other regular expression
-library benchmarks (especially RE2).
-
-If you're hacking on one of the matching engines and just want to see
-benchmarks, then all you need to run is:
-
-    $ (cd bench && ./run rust)
-
-If you want to compare your results with older benchmarks, then try:
-
-    $ (cd bench && ./run rust | tee old)
-    $ ... make it faster
-    $ (cd bench && ./run rust | tee new)
-    $ cargo benchcmp old new --improvements
-
-The `cargo-benchcmp` utility is available here:
-https://github.com/BurntSushi/cargo-benchcmp
-
-The `./bench/run` utility can run benchmarks for PCRE and Oniguruma too. See
-`./bench/bench --help`.
-
-## Dev Docs
-
-When digging your teeth into the codebase for the first time, the
-crate documentation can be a great resource. By default `rustdoc`
-will strip out all documentation of private crate members in an
-effort to help consumers of the crate focus on the *interface*
-without having to concern themselves with the *implementation*.
-Normally this is a great thing, but if you want to start hacking
-on regex internals it is not what you want. Many of the private members
-of this crate are well documented with rustdoc style comments, and
-it would be a shame to miss out on the opportunity that presents.
-You can generate the private docs with:
-
-```
-$ rustdoc --crate-name docs src/lib.rs -o target/doc -L target/debug/deps --no-defaults --passes collapse-docs --passes unindent-comments
-```
-
-Then just point your browser at `target/doc/regex/index.html`.
-
-See https://github.com/rust-lang/rust/issues/15347 for more info
-about generating developer docs for internal use.
diff --git a/README.md b/README.md
index a9d6fcd373..f1e4c404ad 100644
--- a/README.md
+++ b/README.md
@@ -219,9 +219,95 @@ The full set of features one can disable are
 [in the "Crate features" section of the documentation](https://docs.rs/regex/1.*/#crate-features).
 
 
+### Performance
+
+One of the goals of this crate is for the regex engine to be "fast." What that
+is a somewhat nebulous goal, it is usually interpreted in one of two ways.
+First, it means that all searches take worst case `O(m * n)` time, where
+`m` is proportional to `len(regex)` and `n` is proportional to `len(haystack)`.
+Second, it means that even aside from the time complexity constraint, regex
+searches are "fast" in practice.
+
+While the first interpretation is pretty unambiguous, the second one remains
+nebulous. While nebulous, it guides this crate's architecture and the sorts of
+the trade offs it makes. For example, here are some general architectural
+statements that follow as a result of the goal to be "fast":
+
+* When given the choice between faster regex searches and faster _Rust compile
+times_, this crate will generally choose faster regex searches.
+* When given the choice between faster regex searches and faster _regex compile
+times_, this crate will generally choose faster regex searches. That is, it is
+generally acceptable for `Regex::new` to get a little slower if it means that
+searches get faster. (This is a somewhat delicate balance to strike, because
+the speed of `Regex::new` needs to remain somewhat reasonable. But this is why
+one should avoid re-compiling the same regex over and over again.)
+* When given the choice between faster regex searches and simpler API
+design, this crate will generally choose faster regex searches. For example,
+if one didn't care about performance, we could like get rid of both of
+the `Regex::is_match` and `Regex::find` APIs and instead just rely on
+`Regex::captures`.
+
+There are perhaps more ways that being "fast" influences things.
+
+While this repository used to provide its own benchmark suite, it has since
+been moved to [rebar](https://github.com/BurntSushi/rebar). The benchmarks are
+quite extensive, and there are many more than what is shown in rebar's README
+(which is just limited to a "curated" set meant to compare performance between
+regex engines). To run all of this crate's benchmarks, first start by cloning
+and installing `rebar`:
+
+```text
+$ git clone https://github.com/BurntSushi/rebar
+$ cd rebar
+$ cargo install --path ./
+```
+
+Then build the benchmark harness for just this crate:
+
+```text
+$ rebar build -e '^rust/regex$'
+```
+
+Run all benchmarks for this crate as tests (each benchmark is executed once to
+ensure it works):
+
+```text
+$ rebar measure -e '^rust/regex$' -t
+```
+
+Record measurements for all benchmarks and save them to a CSV file:
+
+```text
+$ rebar measure -e '^rust/regex$' | tee results.csv
+```
+
+Explore benchmark timings:
+
+```text
+$ rebar cmp results.csv
+```
+
+See the `rebar` documentation for more details on how it works and how to
+compare results with other regex engines.
+
+
+### Hacking
+
+The `regex` crate is, for the most part, a pretty thin wrapper around the
+[`meta::Regex`](https://docs.rs/regex-automata/latest/regex_automata/meta/struct.Regex.html)
+from the
+[`regex-automata` crate](https://docs.rs/regex-automata/latest/regex_automata/).
+Therefore, if you're looking to work on the internals of this crate, you'll
+likely either want to look in `regex-syntax` (for parsing) or `regex-automata`
+(for construction of finite automata and the search routines).
+
+My [blog on regex internals](https://blog.burntsushi.net/regex-internals/)
+goes into more depth.
+
+
 ### Minimum Rust version policy
 
-This crate's minimum supported `rustc` version is `1.60.0`.
+This crate's minimum supported `rustc` version is `1.65.0`.
 
 The policy is that the minimum Rust version required to use this crate can be
 increased in minor version updates. For example, if regex 1.0 requires Rust
diff --git a/bench/README.md b/bench/README.md
new file mode 100644
index 0000000000..3cc6a1a7af
--- /dev/null
+++ b/bench/README.md
@@ -0,0 +1,2 @@
+Benchmarks for this crate have been moved into the rebar project:
+https://github.com/BurntSushi/rebar
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index 8688e73e03..a7eec2c816 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -32,6 +32,9 @@ features = ["atty", "humantime", "termcolor"]
 [workspace]
 members = ["."]
 
+# NOTE: If you add a new fuzzer below, please make sure to add it to the
+# oss-fuzz-build.sh script, otherwise it won't get run in OSS-fuzz.
+
 [[bin]]
 name = "fuzz_regex_match"
 path = "fuzz_targets/fuzz_regex_match.rs"
diff --git a/fuzz/ast-fuzzers.options b/fuzz/ast-fuzzers.options
new file mode 100644
index 0000000000..678d526b1e
--- /dev/null
+++ b/fuzz/ast-fuzzers.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/fuzz/fuzz_targets/ast_fuzz_match.rs b/fuzz/fuzz_targets/ast_fuzz_match.rs
index 58a8ebbf80..9ccb407dc0 100644
--- a/fuzz/fuzz_targets/ast_fuzz_match.rs
+++ b/fuzz/fuzz_targets/ast_fuzz_match.rs
@@ -25,11 +25,12 @@ fuzz_target!(|data: FuzzData| -> Corpus {
     let _ = env_logger::try_init();
 
     let pattern = format!("{}", data.ast);
-    let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else {
+    let Ok(re) = RegexBuilder::new(&pattern).size_limit(1 << 20).build()
+    else {
         return Corpus::Reject;
     };
-    re.is_match(&data.haystack);
-    re.find(&data.haystack);
-    re.captures(&data.haystack).map_or(0, |c| c.len());
+    let _ = re.is_match(&data.haystack);
+    let _ = re.find(&data.haystack);
+    let _ = re.captures(&data.haystack).map_or(0, |c| c.len());
     Corpus::Keep
 });
diff --git a/fuzz/fuzz_targets/ast_fuzz_match_bytes.rs b/fuzz/fuzz_targets/ast_fuzz_match_bytes.rs
index a4fa0bd737..045c1fb18f 100644
--- a/fuzz/fuzz_targets/ast_fuzz_match_bytes.rs
+++ b/fuzz/fuzz_targets/ast_fuzz_match_bytes.rs
@@ -25,11 +25,12 @@ fuzz_target!(|data: FuzzData| -> Corpus {
     let _ = env_logger::try_init();
 
     let pattern = format!("{}", data.ast);
-    let Ok(re) = RegexBuilder::new(&pattern).size_limit(1<<20).build() else {
+    let Ok(re) = RegexBuilder::new(&pattern).size_limit(1 << 20).build()
+    else {
         return Corpus::Reject;
     };
-    re.is_match(&data.haystack);
-    re.find(&data.haystack);
-    re.captures(&data.haystack).map_or(0, |c| c.len());
+    let _ = re.is_match(&data.haystack);
+    let _ = re.find(&data.haystack);
+    let _ = re.captures(&data.haystack).map_or(0, |c| c.len());
     Corpus::Keep
 });
diff --git a/fuzz/fuzz_targets/ast_roundtrip.rs b/fuzz/fuzz_targets/ast_roundtrip.rs
index 040b59d631..c35ac962e0 100644
--- a/fuzz/fuzz_targets/ast_roundtrip.rs
+++ b/fuzz/fuzz_targets/ast_roundtrip.rs
@@ -3,7 +3,7 @@
 use {
     libfuzzer_sys::{fuzz_target, Corpus},
     regex_syntax::ast::{
-        parse::Parser, visit, Ast, Flag, Group, GroupKind, SetFlags, Visitor,
+        parse::Parser, visit, Ast, Flag, Flags, GroupKind, Visitor,
     },
 };
 
@@ -32,16 +32,17 @@ impl Visitor for VerboseVisitor {
     }
 
     fn visit_pre(&mut self, ast: &Ast) -> Result<Self::Output, Self::Err> {
+        let reject_flags = |flags: &Flags| {
+            flags.flag_state(Flag::IgnoreWhitespace).unwrap_or(false)
+        };
         match ast {
-            Ast::Flags(SetFlags { flags, .. })
-            | Ast::Group(Group {
-                kind: GroupKind::NonCapturing(flags), ..
-            }) if flags
-                .flag_state(Flag::IgnoreWhitespace)
-                .unwrap_or(false) =>
-            {
-                Err(())
-            }
+            Ast::Flags(x) if reject_flags(&x.flags) => return Err(()),
+            Ast::Group(x) => match x.kind {
+                GroupKind::NonCapturing(ref flags) if reject_flags(flags) => {
+                    return Err(())
+                }
+                _ => Ok(()),
+            },
             _ => Ok(()),
         }
     }
diff --git a/fuzz/fuzz_targets/fuzz_regex_lite_match.rs b/fuzz/fuzz_targets/fuzz_regex_lite_match.rs
index 579078c71e..155fa6d8dc 100644
--- a/fuzz/fuzz_targets/fuzz_regex_lite_match.rs
+++ b/fuzz/fuzz_targets/fuzz_regex_lite_match.rs
@@ -57,8 +57,11 @@ fuzz_target!(|case: FuzzCase| -> Corpus {
         .dot_matches_new_line(case.dot_matches_new_line)
         .swap_greed(case.swap_greed)
         .ignore_whitespace(case.ignore_whitespace)
-        .size_limit(1<<20)
-        .build() else { return Corpus::Reject };
+        .size_limit(1 << 16)
+        .build()
+    else {
+        return Corpus::Reject;
+    };
     re.is_match(case.haystack);
     Corpus::Keep
 });
diff --git a/fuzz/fuzz_targets/fuzz_regex_match.rs b/fuzz/fuzz_targets/fuzz_regex_match.rs
index 6c375510d0..a5dda53d65 100644
--- a/fuzz/fuzz_targets/fuzz_regex_match.rs
+++ b/fuzz/fuzz_targets/fuzz_regex_match.rs
@@ -54,6 +54,9 @@ re.is_match({haystack:?});
 fuzz_target!(|case: FuzzCase| -> Corpus {
     let _ = env_logger::try_init();
 
+    if case.pattern.len() > (16 * (1 << 10)) {
+        return Corpus::Reject;
+    }
     if case.haystack.len() > (16 * (1 << 10)) {
         return Corpus::Reject;
     }
@@ -65,8 +68,11 @@ fuzz_target!(|case: FuzzCase| -> Corpus {
         .ignore_whitespace(case.ignore_whitespace)
         .unicode(case.unicode)
         .octal(case.octal)
-        .size_limit(1<<18)
-        .build() else { return Corpus::Reject };
+        .size_limit(1 << 18)
+        .build()
+    else {
+        return Corpus::Reject;
+    };
     re.is_match(case.haystack);
     Corpus::Keep
 });
diff --git a/fuzz/oss-fuzz-build.sh b/fuzz/oss-fuzz-build.sh
index 38750250b6..81f619dcb5 100755
--- a/fuzz/oss-fuzz-build.sh
+++ b/fuzz/oss-fuzz-build.sh
@@ -1,4 +1,21 @@
 #!/bin/bash -eu
+
 cd $SRC/regex
-cargo fuzz build -O --debug-assertions 
-cp fuzz/target/x86_64-unknown-linux-gnu/release/fuzz_regex_match $OUT/
+cargo fuzz build -O --debug-assertions
+
+targets=(
+  fuzz_regex_match
+  fuzz_regex_lite_match
+  fuzz_regex_automata_deserialize_dense_dfa
+  fuzz_regex_automata_deserialize_sparse_dfa
+  ast_roundtrip
+  ast_fuzz_match
+  ast_fuzz_regex
+  ast_fuzz_match_bytes
+)
+for target in "${targets[@]}"; do
+  cp "fuzz/target/x86_64-unknown-linux-gnu/release/${target}" "${OUT}/"
+  if [[ "$target" == ast_* ]]; then
+    cp fuzz/ast-fuzzers.options "${OUT}/${target}.options"
+  fi
+done
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match-5990349284442112 b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match-5990349284442112
new file mode 100644
index 0000000000..8de974975d
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match-5990349284442112 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match-6114393576046592 b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match-6114393576046592
new file mode 100644
index 0000000000..a34eeaf2c0
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match-6114393576046592 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match_bytes-4820641084473344 b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match_bytes-4820641084473344
new file mode 100644
index 0000000000..ce5b868b95
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_match_bytes-4820641084473344 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600 b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600
new file mode 100644
index 0000000000..711817e4ed
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-4596093180313600 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-6345245270605824 b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-6345245270605824
new file mode 100644
index 0000000000..312767e97b
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_fuzz_regex-6345245270605824 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-ast_roundtrip-5633607856947200 b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_roundtrip-5633607856947200
new file mode 100644
index 0000000000..726609cf21
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-ast_roundtrip-5633607856947200 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5624222820728832 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5624222820728832
new file mode 100644
index 0000000000..e236ae735c
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5624222820728832 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5883983265923072 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5883983265923072
new file mode 100644
index 0000000000..233fcbc950
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5883983265923072 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-6363062083649536 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-6363062083649536
new file mode 100644
index 0000000000..d4a35d1d10
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-6363062083649536 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_sparse_dfa-4903112680538112 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_sparse_dfa-4903112680538112
new file mode 100644
index 0000000000..3056bca2f3
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_sparse_dfa-4903112680538112 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_sparse_dfa-5415338693754880 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_sparse_dfa-5415338693754880
new file mode 100644
index 0000000000..cac835c53e
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_sparse_dfa-5415338693754880 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-4692452983046144 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-4692452983046144
new file mode 100644
index 0000000000..184b6ed701
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-4692452983046144 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-5690981331369984 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-5690981331369984
new file mode 100644
index 0000000000..d892bc31c4
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-5690981331369984 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-5888324890656768 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-5888324890656768
new file mode 100644
index 0000000000..8612658526
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-5888324890656768 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-5736465767989248 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-5736465767989248
new file mode 100644
index 0000000000..30a3a3ba0e
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-5736465767989248 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-6413499984904192 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-6413499984904192
new file mode 100644
index 0000000000..8b24e0a6e0
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-6413499984904192 differ
diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-6659953212129280 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-6659953212129280
new file mode 100644
index 0000000000..b8cdc138a4
Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_match-6659953212129280 differ
diff --git a/regex-automata/Cargo.toml b/regex-automata/Cargo.toml
index 86eb7d8f5a..3cb3d7c8e9 100644
--- a/regex-automata/Cargo.toml
+++ b/regex-automata/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "regex-automata"
-version = "0.3.2"  #:version
+version = "0.4.3"  #:version
 authors = ["The Rust Project Developers", "Andrew Gallant <jamslam@gmail.com>"]
 description = "Automata construction and matching using regular expressions."
 documentation = "https://docs.rs/regex-automata"
@@ -11,6 +11,7 @@ license = "MIT OR Apache-2.0"
 categories = ["text-processing"]
 edition = "2021"
 autoexamples = false
+rust-version = "1.65"
 
 [lib]
 bench = false
@@ -21,7 +22,7 @@ bench = false
 default = ["std", "syntax", "perf", "unicode", "meta", "nfa", "dfa", "hybrid"]
 std = ["regex-syntax?/std", "memchr?/std", "aho-corasick?/std", "alloc"]
 alloc = []
-logging = ["dep:log", "aho-corasick?/logging"]
+logging = ["dep:log", "aho-corasick?/logging", "memchr?/logging"]
 
 syntax = ["dep:regex-syntax", "alloc"]
 
@@ -84,8 +85,8 @@ internal-instrument-pikevm = ["logging", "std"]
 [dependencies]
 aho-corasick = { version = "1.0.0", optional = true, default-features = false }
 log = { version = "0.4.14", optional = true }
-memchr = { version = "2.5.0", optional = true, default-features = false }
-regex-syntax = { path = "../regex-syntax", version = "0.7.0", optional = true, default-features = false }
+memchr = { version = "2.6.0", optional = true, default-features = false }
+regex-syntax = { path = "../regex-syntax", version = "0.8.2", optional = true, default-features = false }
 
 [dev-dependencies]
 anyhow = "1.0.69"
diff --git a/regex-automata/src/dfa/accel.rs b/regex-automata/src/dfa/accel.rs
index 5ea2423dd0..c0ba18ea89 100644
--- a/regex-automata/src/dfa/accel.rs
+++ b/regex-automata/src/dfa/accel.rs
@@ -6,15 +6,16 @@
 // non-Unicode regexes. For example, consider '(?-u)[^a]+a'. We can look at its
 // DFA with regex-cli:
 //
-//     $ regex-cli debug dfa dense '(?-u)[^a]+a' -BbC
-//     dense::DFA(
+//     $ regex-cli debug dense dfa -p '(?-u)[^a]+a' -BbC --no-table
 //     D 000000:
 //     Q 000001:
 //      *000002:
-//     A 000003: \x00-` => 3, a => 5, b-\xFF => 3
-//      >000004: \x00-` => 3, a => 4, b-\xFF => 3
-//       000005: \x00-\xFF => 2, EOI => 2
-//     )
+//     A 000003: \x00-` => 3, a => 8, b-\xFF => 3
+//     A 000004: \x00-` => 4, a => 7, b-\xFF => 4
+//       000005: \x00-` => 4, b-\xFF => 4
+//       000006: \x00-` => 3, a => 6, b-\xFF => 3
+//       000007: \x00-\xFF => 2, EOI => 2
+//       000008: \x00-\xFF => 2, EOI => 2
 //
 // In particular, state 3 is accelerated (shown via the 'A' indicator) since
 // the only way to leave that state once entered is to see an 'a' byte. If
diff --git a/regex-automata/src/dfa/automaton.rs b/regex-automata/src/dfa/automaton.rs
index 7e2be9a151..fcfcf29975 100644
--- a/regex-automata/src/dfa/automaton.rs
+++ b/regex-automata/src/dfa/automaton.rs
@@ -7,6 +7,7 @@ use crate::{
         prefilter::Prefilter,
         primitives::{PatternID, StateID},
         search::{Anchored, HalfMatch, Input, MatchError},
+        start,
     },
 };
 
@@ -226,8 +227,8 @@ pub unsafe trait Automaton {
     /// ```
     fn next_eoi_state(&self, current: StateID) -> StateID;
 
-    /// Return the ID of the start state for this lazy DFA when executing a
-    /// forward search.
+    /// Return the ID of the start state for this DFA for the given starting
+    /// configuration.
     ///
     /// Unlike typical DFA implementations, the start state for DFAs in this
     /// crate is dependent on a few different factors:
@@ -235,12 +236,41 @@ pub unsafe trait Automaton {
     /// * The [`Anchored`] mode of the search. Unanchored, anchored and
     /// anchored searches for a specific [`PatternID`] all use different start
     /// states.
-    /// * The position at which the search begins, via [`Input::start`]. This
-    /// and the byte immediately preceding the start of the search (if one
-    /// exists) influence which look-behind assertions are true at the start
-    /// of the search. This in turn influences which start state is selected.
-    /// * Whether the search is a forward or reverse search. This routine can
-    /// only be used for forward searches.
+    /// * Whether a "look-behind" byte exists. For example, the `^` anchor
+    /// matches if and only if there is no look-behind byte.
+    /// * The specific value of that look-behind byte. For example, a `(?m:^)`
+    /// assertion only matches when there is either no look-behind byte, or
+    /// when the look-behind byte is a line terminator.
+    ///
+    /// The [starting configuration](start::Config) provides the above
+    /// information.
+    ///
+    /// This routine can be used for either forward or reverse searches.
+    /// Although, as a convenience, if you have an [`Input`], then it may
+    /// be more succinct to use [`Automaton::start_state_forward`] or
+    /// [`Automaton::start_state_reverse`]. Note, for example, that the
+    /// convenience routines return a [`MatchError`] on failure where as this
+    /// routine returns a [`StartError`].
+    ///
+    /// # Errors
+    ///
+    /// This may return a [`StartError`] if the search needs to give up when
+    /// determining the start state (for example, if it sees a "quit" byte).
+    /// This can also return an error if the given configuration contains an
+    /// unsupported [`Anchored`] configuration.
+    fn start_state(
+        &self,
+        config: &start::Config,
+    ) -> Result<StateID, StartError>;
+
+    /// Return the ID of the start state for this DFA when executing a forward
+    /// search.
+    ///
+    /// This is a convenience routine for calling [`Automaton::start_state`]
+    /// that converts the given [`Input`] to a [start
+    /// configuration](start::Config). Additionally, if an error occurs, it is
+    /// converted from a [`StartError`] to a [`MatchError`] using the offset
+    /// information in the given [`Input`].
     ///
     /// # Errors
     ///
@@ -251,23 +281,30 @@ pub unsafe trait Automaton {
     fn start_state_forward(
         &self,
         input: &Input<'_>,
-    ) -> Result<StateID, MatchError>;
+    ) -> Result<StateID, MatchError> {
+        let config = start::Config::from_input_forward(input);
+        self.start_state(&config).map_err(|err| match err {
+            StartError::Quit { byte } => {
+                let offset = input
+                    .start()
+                    .checked_sub(1)
+                    .expect("no quit in start without look-behind");
+                MatchError::quit(byte, offset)
+            }
+            StartError::UnsupportedAnchored { mode } => {
+                MatchError::unsupported_anchored(mode)
+            }
+        })
+    }
 
-    /// Return the ID of the start state for this lazy DFA when executing a
-    /// reverse search.
+    /// Return the ID of the start state for this DFA when executing a reverse
+    /// search.
     ///
-    /// Unlike typical DFA implementations, the start state for DFAs in this
-    /// crate is dependent on a few different factors:
-    ///
-    /// * The [`Anchored`] mode of the search. Unanchored, anchored and
-    /// anchored searches for a specific [`PatternID`] all use different start
-    /// states.
-    /// * The position at which the search begins, via [`Input::start`]. This
-    /// and the byte immediately preceding the start of the search (if one
-    /// exists) influence which look-behind assertions are true at the start
-    /// of the search. This in turn influences which start state is selected.
-    /// * Whether the search is a forward or reverse search. This routine can
-    /// only be used for reverse searches.
+    /// This is a convenience routine for calling [`Automaton::start_state`]
+    /// that converts the given [`Input`] to a [start
+    /// configuration](start::Config). Additionally, if an error occurs, it is
+    /// converted from a [`StartError`] to a [`MatchError`] using the offset
+    /// information in the given [`Input`].
     ///
     /// # Errors
     ///
@@ -278,7 +315,18 @@ pub unsafe trait Automaton {
     fn start_state_reverse(
         &self,
         input: &Input<'_>,
-    ) -> Result<StateID, MatchError>;
+    ) -> Result<StateID, MatchError> {
+        let config = start::Config::from_input_reverse(input);
+        self.start_state(&config).map_err(|err| match err {
+            StartError::Quit { byte } => {
+                let offset = input.end();
+                MatchError::quit(byte, offset)
+            }
+            StartError::UnsupportedAnchored { mode } => {
+                MatchError::unsupported_anchored(mode)
+            }
+        })
+    }
 
     /// If this DFA has a universal starting state for the given anchor mode
     /// and the DFA supports universal starting states, then this returns that
@@ -1084,7 +1132,7 @@ pub unsafe trait Automaton {
     /// // implementation defined.
     /// //
     /// // N.B. We get '3' by inspecting the state machine using 'regex-cli'.
-    /// // e.g., try `regex-cli debug dfa dense '[^abc]+a' -BbUC`.
+    /// // e.g., try `regex-cli debug dense dfa -p '[^abc]+a' -BbUC`.
     /// let id = StateID::new(3 * dfa.stride()).unwrap();
     /// let accelerator = dfa.accelerator(id);
     /// // The `[^abc]+` sub-expression permits [a, b, c] to be accelerated.
@@ -1798,6 +1846,14 @@ unsafe impl<'a, A: Automaton + ?Sized> Automaton for &'a A {
         (**self).next_eoi_state(current)
     }
 
+    #[inline]
+    fn start_state(
+        &self,
+        config: &start::Config,
+    ) -> Result<StateID, StartError> {
+        (**self).start_state(config)
+    }
+
     #[inline]
     fn start_state_forward(
         &self,
@@ -2015,6 +2071,90 @@ impl OverlappingState {
     }
 }
 
+/// An error that can occur when computing the start state for a search.
+///
+/// Computing a start state can fail for a few reasons, either based on
+/// incorrect configuration or even based on whether the look-behind byte
+/// triggers a quit state. Typically one does not need to handle this error
+/// if you're using [`Automaton::start_state_forward`] (or its reverse
+/// counterpart), as that routine automatically converts `StartError` to a
+/// [`MatchError`] for you.
+///
+/// This error may be returned by the [`Automaton::start_state`] routine.
+///
+/// This error implements the `std::error::Error` trait when the `std` feature
+/// is enabled.
+///
+/// This error is marked as non-exhaustive. New variants may be added in a
+/// semver compatible release.
+#[non_exhaustive]
+#[derive(Clone, Debug)]
+pub enum StartError {
+    /// An error that occurs when a starting configuration's look-behind byte
+    /// is in this DFA's quit set.
+    Quit {
+        /// The quit byte that was found.
+        byte: u8,
+    },
+    /// An error that occurs when the caller requests an anchored mode that
+    /// isn't supported by the DFA.
+    UnsupportedAnchored {
+        /// The anchored mode given that is unsupported.
+        mode: Anchored,
+    },
+}
+
+impl StartError {
+    pub(crate) fn quit(byte: u8) -> StartError {
+        StartError::Quit { byte }
+    }
+
+    pub(crate) fn unsupported_anchored(mode: Anchored) -> StartError {
+        StartError::UnsupportedAnchored { mode }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for StartError {}
+
+impl core::fmt::Display for StartError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match *self {
+            StartError::Quit { byte } => write!(
+                f,
+                "error computing start state because the look-behind byte \
+                 {:?} triggered a quit state",
+                crate::util::escape::DebugByte(byte),
+            ),
+            StartError::UnsupportedAnchored { mode: Anchored::Yes } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     anchored searches are not supported or enabled"
+                )
+            }
+            StartError::UnsupportedAnchored { mode: Anchored::No } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     unanchored searches are not supported or enabled"
+                )
+            }
+            StartError::UnsupportedAnchored {
+                mode: Anchored::Pattern(pid),
+            } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     anchored searches for a specific pattern ({}) \
+                     are not supported or enabled",
+                    pid.as_usize(),
+                )
+            }
+        }
+    }
+}
+
 /// Runs the given overlapping `search` function (forwards or backwards) until
 /// a match is found whose offset does not split a codepoint.
 ///
diff --git a/regex-automata/src/dfa/dense.rs b/regex-automata/src/dfa/dense.rs
index 35f037ca63..6fc61dc4f5 100644
--- a/regex-automata/src/dfa/dense.rs
+++ b/regex-automata/src/dfa/dense.rs
@@ -30,7 +30,7 @@ use crate::{
 use crate::{
     dfa::{
         accel::Accels,
-        automaton::{fmt_state_indicator, Automaton},
+        automaton::{fmt_state_indicator, Automaton, StartError},
         special::Special,
         start::StartKind,
         DEAD,
@@ -40,8 +40,8 @@ use crate::{
         int::{Pointer, Usize},
         prefilter::Prefilter,
         primitives::{PatternID, StateID},
-        search::{Anchored, Input, MatchError},
-        start::{Start, StartByteMap},
+        search::Anchored,
+        start::{self, Start, StartByteMap},
         wire::{self, DeserializeError, Endian, SerializeError},
     },
 };
@@ -66,8 +66,9 @@ const VERSION: u32 = 2;
 ///
 /// The default configuration guarantees that a search will never return
 /// a "quit" error, although it is possible for a search to fail if
-/// [`Config::starts_for_each_pattern`] wasn't enabled (which it is not by
-/// default) and an [`Anchored::Pattern`] mode is requested via [`Input`].
+/// [`Config::starts_for_each_pattern`] wasn't enabled (which it is
+/// not by default) and an [`Anchored::Pattern`] mode is requested via
+/// [`Input`](crate::Input).
 #[cfg(feature = "dfa-build")]
 #[derive(Clone, Debug, Default)]
 pub struct Config {
@@ -113,8 +114,7 @@ impl Config {
     /// make searching slower than it otherwise would be if the transitions
     /// that leave accelerated states are traversed frequently.
     ///
-    /// See [`Automaton::accelerator`](crate::dfa::Automaton::accelerator) for
-    /// an example.
+    /// See [`Automaton::accelerator`] for an example.
     ///
     /// This is enabled by default.
     pub fn accelerate(mut self, yes: bool) -> Config {
@@ -879,22 +879,23 @@ impl Config {
     ///
     /// ```
     /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// # if !cfg!(target_pointer_width = "64") { return Ok(()); } // see #1039
     /// use regex_automata::{dfa::{dense, Automaton}, Input};
     ///
-    /// // 600KB isn't enough!
+    /// // 700KB isn't enough!
     /// dense::Builder::new()
     ///     .configure(dense::Config::new()
-    ///         .determinize_size_limit(Some(600_000))
+    ///         .determinize_size_limit(Some(700_000))
     ///     )
     ///     .build(r"\w{20}")
     ///     .unwrap_err();
     ///
-    /// // ... but 700KB probably is!
+    /// // ... but 800KB probably is!
     /// // (Note that auxiliary storage sizes aren't necessarily stable between
     /// // releases.)
     /// let dfa = dense::Builder::new()
     ///     .configure(dense::Config::new()
-    ///         .determinize_size_limit(Some(700_000))
+    ///         .determinize_size_limit(Some(800_000))
     ///     )
     ///     .build(r"\w{20}")?;
     /// let haystack = "A".repeat(20).into_bytes();
@@ -912,6 +913,7 @@ impl Config {
     ///
     /// ```
     /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// # if !cfg!(target_pointer_width = "64") { return Ok(()); } // see #1039
     /// use regex_automata::{
     ///     dfa::{dense, Automaton, StartKind},
     ///     Anchored, Input,
@@ -1168,7 +1170,10 @@ impl Builder {
             .clone()
             // We can always forcefully disable captures because DFAs do not
             // support them.
-            .configure(thompson::Config::new().captures(false))
+            .configure(
+                thompson::Config::new()
+                    .which_captures(thompson::WhichCaptures::None),
+            )
             .build_many(patterns)
             .map_err(BuildError::nfa)?;
         self.build_from_nfa(&nfa)
@@ -1223,13 +1228,14 @@ impl Builder {
         } else {
             let mut set = nfa.byte_class_set().clone();
             // It is important to distinguish any "quit" bytes from all other
-            // bytes. Otherwise, a non-quit byte may end up in the same class
-            // as a quit byte, and thus cause the DFA stop when it shouldn't.
+            // bytes. Otherwise, a non-quit byte may end up in the same
+            // class as a quit byte, and thus cause the DFA to stop when it
+            // shouldn't.
             //
             // Test case:
             //
-            //   regex-cli find hybrid regex -w @conn.json.1000x.log \
-            //     '^#' '\b10\.55\.182\.100\b'
+            //   regex-cli find match dense --unicode-word-boundary \
+            //     -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log
             if !quitset.is_empty() {
                 set.add_set(&quitset);
             }
@@ -2334,12 +2340,30 @@ impl<'a> DFA<&'a [u32]> {
         // table, match states and accelerators below. If any validation fails,
         // then we return an error.
         let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? };
-        dfa.tt.validate(&dfa.special)?;
-        dfa.st.validate(&dfa.tt)?;
+        dfa.tt.validate(&dfa)?;
+        dfa.st.validate(&dfa)?;
         dfa.ms.validate(&dfa)?;
         dfa.accels.validate()?;
         // N.B. dfa.special doesn't have a way to do unchecked deserialization,
         // so it has already been validated.
+        for state in dfa.states() {
+            // If the state is an accel state, then it must have a non-empty
+            // accelerator.
+            if dfa.is_accel_state(state.id()) {
+                let index = dfa.accelerator_index(state.id());
+                if index >= dfa.accels.len() {
+                    return Err(DeserializeError::generic(
+                        "found DFA state with invalid accelerator index",
+                    ));
+                }
+                let needles = dfa.accels.needles(index);
+                if !(1 <= needles.len() && needles.len() <= 3) {
+                    return Err(DeserializeError::generic(
+                        "accelerator needles has invalid length",
+                    ));
+                }
+            }
+        }
         Ok((dfa, nread))
     }
 
@@ -2880,31 +2904,33 @@ impl OwnedDFA {
     fn set_universal_starts(&mut self) {
         assert_eq!(6, Start::len(), "expected 6 start configurations");
 
-        let start_id = |dfa: &mut OwnedDFA, inp: &Input<'_>, start: Start| {
+        let start_id = |dfa: &mut OwnedDFA,
+                        anchored: Anchored,
+                        start: Start| {
             // This OK because we only call 'start' under conditions
             // in which we know it will succeed.
-            dfa.st.start(inp, start).expect("valid Input configuration")
+            dfa.st.start(anchored, start).expect("valid Input configuration")
         };
         if self.start_kind().has_unanchored() {
-            let inp = Input::new("").anchored(Anchored::No);
-            let sid = start_id(self, &inp, Start::NonWordByte);
-            if sid == start_id(self, &inp, Start::WordByte)
-                && sid == start_id(self, &inp, Start::Text)
-                && sid == start_id(self, &inp, Start::LineLF)
-                && sid == start_id(self, &inp, Start::LineCR)
-                && sid == start_id(self, &inp, Start::CustomLineTerminator)
+            let anchor = Anchored::No;
+            let sid = start_id(self, anchor, Start::NonWordByte);
+            if sid == start_id(self, anchor, Start::WordByte)
+                && sid == start_id(self, anchor, Start::Text)
+                && sid == start_id(self, anchor, Start::LineLF)
+                && sid == start_id(self, anchor, Start::LineCR)
+                && sid == start_id(self, anchor, Start::CustomLineTerminator)
             {
                 self.st.universal_start_unanchored = Some(sid);
             }
         }
         if self.start_kind().has_anchored() {
-            let inp = Input::new("").anchored(Anchored::Yes);
-            let sid = start_id(self, &inp, Start::NonWordByte);
-            if sid == start_id(self, &inp, Start::WordByte)
-                && sid == start_id(self, &inp, Start::Text)
-                && sid == start_id(self, &inp, Start::LineLF)
-                && sid == start_id(self, &inp, Start::LineCR)
-                && sid == start_id(self, &inp, Start::CustomLineTerminator)
+            let anchor = Anchored::Yes;
+            let sid = start_id(self, anchor, Start::NonWordByte);
+            if sid == start_id(self, anchor, Start::WordByte)
+                && sid == start_id(self, anchor, Start::Text)
+                && sid == start_id(self, anchor, Start::LineLF)
+                && sid == start_id(self, anchor, Start::LineCR)
+                && sid == start_id(self, anchor, Start::CustomLineTerminator)
             {
                 self.st.universal_start_anchored = Some(sid);
             }
@@ -3211,35 +3237,21 @@ unsafe impl<T: AsRef<[u32]>> Automaton for DFA<T> {
     }
 
     #[cfg_attr(feature = "perf-inline", inline(always))]
-    fn start_state_forward(
+    fn start_state(
         &self,
-        input: &Input<'_>,
-    ) -> Result<StateID, MatchError> {
-        if !self.quitset.is_empty() && input.start() > 0 {
-            let offset = input.start() - 1;
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
-            }
-        }
-        let start = self.st.start_map.fwd(&input);
-        self.st.start(input, start)
-    }
-
-    #[cfg_attr(feature = "perf-inline", inline(always))]
-    fn start_state_reverse(
-        &self,
-        input: &Input<'_>,
-    ) -> Result<StateID, MatchError> {
-        if !self.quitset.is_empty() && input.end() < input.haystack().len() {
-            let offset = input.end();
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
+        config: &start::Config,
+    ) -> Result<StateID, StartError> {
+        let anchored = config.get_anchored();
+        let start = match config.get_look_behind() {
+            None => Start::Text,
+            Some(byte) => {
+                if !self.quitset.is_empty() && self.quitset.contains(byte) {
+                    return Err(StartError::quit(byte));
+                }
+                self.st.start_map.get(byte)
             }
-        }
-        let start = self.st.start_map.rev(&input);
-        self.st.start(input, start)
+        };
+        self.st.start(anchored, start)
     }
 
     #[cfg_attr(feature = "perf-inline", inline(always))]
@@ -3581,7 +3593,8 @@ impl<T: AsRef<[u32]>> TransitionTable<T> {
     ///
     /// That is, every state ID can be used to correctly index a state in this
     /// table.
-    fn validate(&self, sp: &Special) -> Result<(), DeserializeError> {
+    fn validate(&self, dfa: &DFA<T>) -> Result<(), DeserializeError> {
+        let sp = &dfa.special;
         for state in self.states() {
             // We check that the ID itself is well formed. That is, if it's
             // a special state then it must actually be a quit, dead, accel,
@@ -3599,6 +3612,13 @@ impl<T: AsRef<[u32]>> TransitionTable<T> {
                          wasn't actually special",
                     ));
                 }
+                if sp.is_match_state(state.id())
+                    && dfa.match_len(state.id()) == 0
+                {
+                    return Err(DeserializeError::generic(
+                        "found match state with zero pattern IDs",
+                    ));
+                }
             }
             for (_, to) in state.transitions() {
                 if !self.is_valid(to) {
@@ -4115,10 +4135,8 @@ impl<T: AsRef<[u32]>> StartTable<T> {
     /// it against the given transition table (which must be for the same DFA).
     ///
     /// That is, every state ID can be used to correctly index a state.
-    fn validate(
-        &self,
-        tt: &TransitionTable<T>,
-    ) -> Result<(), DeserializeError> {
+    fn validate(&self, dfa: &DFA<T>) -> Result<(), DeserializeError> {
+        let tt = &dfa.tt;
         if !self.universal_start_unanchored.map_or(true, |s| tt.is_valid(s)) {
             return Err(DeserializeError::generic(
                 "found invalid universal unanchored starting state ID",
@@ -4175,28 +4193,27 @@ impl<T: AsRef<[u32]>> StartTable<T> {
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn start(
         &self,
-        input: &Input<'_>,
+        anchored: Anchored,
         start: Start,
-    ) -> Result<StateID, MatchError> {
+    ) -> Result<StateID, StartError> {
         let start_index = start.as_usize();
-        let mode = input.get_anchored();
-        let index = match mode {
+        let index = match anchored {
             Anchored::No => {
                 if !self.kind.has_unanchored() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 start_index
             }
             Anchored::Yes => {
                 if !self.kind.has_anchored() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 self.stride + start_index
             }
             Anchored::Pattern(pid) => {
                 let len = match self.pattern_len {
                     None => {
-                        return Err(MatchError::unsupported_anchored(mode))
+                        return Err(StartError::unsupported_anchored(anchored))
                     }
                     Some(len) => len,
                 };
@@ -5081,6 +5098,8 @@ impl core::fmt::Display for BuildError {
 
 #[cfg(all(test, feature = "syntax", feature = "dfa-build"))]
 mod tests {
+    use crate::{Input, MatchError};
+
     use super::*;
 
     #[test]
diff --git a/regex-automata/src/dfa/mod.rs b/regex-automata/src/dfa/mod.rs
index 4bb8704352..fd58cac23a 100644
--- a/regex-automata/src/dfa/mod.rs
+++ b/regex-automata/src/dfa/mod.rs
@@ -320,7 +320,7 @@ dramatically.
 
 #[cfg(feature = "dfa-search")]
 pub use crate::dfa::{
-    automaton::{Automaton, OverlappingState},
+    automaton::{Automaton, OverlappingState, StartError},
     start::StartKind,
 };
 
diff --git a/regex-automata/src/dfa/onepass.rs b/regex-automata/src/dfa/onepass.rs
index 44691d0c8a..e62bbd383e 100644
--- a/regex-automata/src/dfa/onepass.rs
+++ b/regex-automata/src/dfa/onepass.rs
@@ -2581,10 +2581,11 @@ impl Cache {
 
 /// Represents a single transition in a one-pass DFA.
 ///
-/// The high 24 bits corresponds to the state ID. The low 48 bits corresponds
-/// to the transition epsilons, which contains the slots that should be saved
-/// when this transition is followed and the conditional epsilon transitions
-/// that must be satisfied in order to follow this transition.
+/// The high 21 bits corresponds to the state ID. The bit following corresponds
+/// to the special "match wins" flag. The remaining low 42 bits corresponds to
+/// the transition epsilons, which contains the slots that should be saved when
+/// this transition is followed and the conditional epsilon transitions that
+/// must be satisfied in order to follow this transition.
 #[derive(Clone, Copy, Eq, PartialEq)]
 struct Transition(u64);
 
@@ -2741,7 +2742,7 @@ impl PatternEpsilons {
     fn set_epsilons(self, epsilons: Epsilons) -> PatternEpsilons {
         PatternEpsilons(
             (self.0 & PatternEpsilons::PATTERN_ID_MASK)
-                | u64::from(epsilons.0),
+                | (u64::from(epsilons.0) & PatternEpsilons::EPSILONS_MASK),
         )
     }
 }
@@ -2814,12 +2815,15 @@ impl Epsilons {
 
     /// Return the set of look-around assertions in these epsilon transitions.
     fn looks(self) -> LookSet {
-        LookSet { bits: (self.0 & Epsilons::LOOK_MASK).low_u16() }
+        LookSet { bits: (self.0 & Epsilons::LOOK_MASK).low_u32() }
     }
 
     /// Set the look-around assertions on these epsilon transitions.
     fn set_looks(self, look_set: LookSet) -> Epsilons {
-        Epsilons((self.0 & Epsilons::SLOT_MASK) | u64::from(look_set.bits))
+        Epsilons(
+            (self.0 & Epsilons::SLOT_MASK)
+                | (u64::from(look_set.bits) & Epsilons::LOOK_MASK),
+        )
     }
 }
 
diff --git a/regex-automata/src/dfa/regex.rs b/regex-automata/src/dfa/regex.rs
index f39c1c055c..5e7e6e38ac 100644
--- a/regex-automata/src/dfa/regex.rs
+++ b/regex-automata/src/dfa/regex.rs
@@ -853,7 +853,7 @@ impl Builder {
     }
 
     /// Set the dense DFA compilation configuration for this builder using
-    /// [`dense::Config`](dense::Config).
+    /// [`dense::Config`].
     ///
     /// This permits setting things like whether the underlying DFAs should
     /// be minimized.
diff --git a/regex-automata/src/dfa/search.rs b/regex-automata/src/dfa/search.rs
index 8c012a5944..5a82261f97 100644
--- a/regex-automata/src/dfa/search.rs
+++ b/regex-automata/src/dfa/search.rs
@@ -176,7 +176,6 @@ fn find_fwd_imp<A: Automaton + ?Sized>(
                 // It's important that this is a debug_assert, since this can
                 // actually be tripped even if DFA::from_bytes succeeds and
                 // returns a supposedly valid DFA.
-                debug_assert!(dfa.is_quit_state(sid));
                 return Err(MatchError::quit(input.haystack()[at], at));
             }
         }
@@ -297,7 +296,6 @@ fn find_rev_imp<A: Automaton + ?Sized>(
             } else if dfa.is_dead_state(sid) {
                 return Ok(mat);
             } else {
-                debug_assert!(dfa.is_quit_state(sid));
                 return Err(MatchError::quit(input.haystack()[at], at));
             }
         }
@@ -422,7 +420,6 @@ fn find_overlapping_fwd_imp<A: Automaton + ?Sized>(
             } else if dfa.is_dead_state(sid) {
                 return Ok(());
             } else {
-                debug_assert!(dfa.is_quit_state(sid));
                 return Err(MatchError::quit(
                     input.haystack()[state.at],
                     state.at,
@@ -526,7 +523,6 @@ pub(crate) fn find_overlapping_rev<A: Automaton + ?Sized>(
             } else if dfa.is_dead_state(sid) {
                 return Ok(());
             } else {
-                debug_assert!(dfa.is_quit_state(sid));
                 return Err(MatchError::quit(
                     input.haystack()[state.at],
                     state.at,
@@ -600,9 +596,6 @@ fn eoi_fwd<A: Automaton + ?Sized>(
                 let pattern = dfa.match_pattern(*sid, 0);
                 *mat = Some(HalfMatch::new(pattern, input.haystack().len()));
             }
-            // N.B. We don't have to check 'is_quit' here because the EOI
-            // transition can never lead to a quit state.
-            debug_assert!(!dfa.is_quit_state(*sid));
         }
     }
     Ok(())
@@ -631,9 +624,6 @@ fn eoi_rev<A: Automaton + ?Sized>(
             let pattern = dfa.match_pattern(*sid, 0);
             *mat = Some(HalfMatch::new(pattern, 0));
         }
-        // N.B. We don't have to check 'is_quit' here because the EOI
-        // transition can never lead to a quit state.
-        debug_assert!(!dfa.is_quit_state(*sid));
     }
     Ok(())
 }
diff --git a/regex-automata/src/dfa/sparse.rs b/regex-automata/src/dfa/sparse.rs
index 5d8ec23408..d461e0a0f3 100644
--- a/regex-automata/src/dfa/sparse.rs
+++ b/regex-automata/src/dfa/sparse.rs
@@ -3,13 +3,12 @@ Types and routines specific to sparse DFAs.
 
 This module is the home of [`sparse::DFA`](DFA).
 
-Unlike the [`dense`](super::dense) module, this module does not contain a
-builder or configuration specific for sparse DFAs. Instead, the intended
-way to build a sparse DFA is either by using a default configuration with
-its constructor [`sparse::DFA::new`](DFA::new), or by first configuring the
-construction of a dense DFA with [`dense::Builder`](super::dense::Builder)
-and then calling [`dense::DFA::to_sparse`](super::dense::DFA::to_sparse). For
-example, this configures a sparse DFA to do an overlapping search:
+Unlike the [`dense`] module, this module does not contain a builder or
+configuration specific for sparse DFAs. Instead, the intended way to build a
+sparse DFA is either by using a default configuration with its constructor
+[`sparse::DFA::new`](DFA::new), or by first configuring the construction of a
+dense DFA with [`dense::Builder`] and then calling [`dense::DFA::to_sparse`].
+For example, this configures a sparse DFA to do an overlapping search:
 
 ```
 use regex_automata::{
@@ -52,7 +51,7 @@ use alloc::{vec, vec::Vec};
 use crate::dfa::dense::{self, BuildError};
 use crate::{
     dfa::{
-        automaton::{fmt_state_indicator, Automaton},
+        automaton::{fmt_state_indicator, Automaton, StartError},
         dense::Flags,
         special::Special,
         StartKind, DEAD,
@@ -63,8 +62,8 @@ use crate::{
         int::{Pointer, Usize, U16, U32},
         prefilter::Prefilter,
         primitives::{PatternID, StateID},
-        search::{Anchored, Input, MatchError},
-        start::{Start, StartByteMap},
+        search::Anchored,
+        start::{self, Start, StartByteMap},
         wire::{self, DeserializeError, Endian, SerializeError},
     },
 };
@@ -74,18 +73,17 @@ const VERSION: u32 = 2;
 
 /// A sparse deterministic finite automaton (DFA) with variable sized states.
 ///
-/// In contrast to a [dense::DFA](crate::dfa::dense::DFA), a sparse DFA uses
-/// a more space efficient representation for its transitions. Consequently,
-/// sparse DFAs may use much less memory than dense DFAs, but this comes at a
-/// price. In particular, reading the more space efficient transitions takes
-/// more work, and consequently, searching using a sparse DFA is typically
-/// slower than a dense DFA.
+/// In contrast to a [dense::DFA], a sparse DFA uses a more space efficient
+/// representation for its transitions. Consequently, sparse DFAs may use much
+/// less memory than dense DFAs, but this comes at a price. In particular,
+/// reading the more space efficient transitions takes more work, and
+/// consequently, searching using a sparse DFA is typically slower than a dense
+/// DFA.
 ///
 /// A sparse DFA can be built using the default configuration via the
-/// [`DFA::new`] constructor. Otherwise, one can configure various aspects
-/// of a dense DFA via [`dense::Builder`](crate::dfa::dense::Builder),
-/// and then convert a dense DFA to a sparse DFA using
-/// [`dense::DFA::to_sparse`](crate::dfa::dense::DFA::to_sparse).
+/// [`DFA::new`] constructor. Otherwise, one can configure various aspects of a
+/// dense DFA via [`dense::Builder`], and then convert a dense DFA to a sparse
+/// DFA using [`dense::DFA::to_sparse`].
 ///
 /// In general, a sparse DFA supports all the same search operations as a dense
 /// DFA.
@@ -140,11 +138,9 @@ impl DFA<Vec<u8>> {
     /// Parse the given regular expression using a default configuration and
     /// return the corresponding sparse DFA.
     ///
-    /// If you want a non-default configuration, then use
-    /// the [`dense::Builder`](crate::dfa::dense::Builder)
-    /// to set your own configuration, and then call
-    /// [`dense::DFA::to_sparse`](crate::dfa::dense::DFA::to_sparse) to create
-    /// a sparse DFA.
+    /// If you want a non-default configuration, then use the
+    /// [`dense::Builder`] to set your own configuration, and then call
+    /// [`dense::DFA::to_sparse`] to create a sparse DFA.
     ///
     /// # Example
     ///
@@ -167,11 +163,9 @@ impl DFA<Vec<u8>> {
     /// Parse the given regular expressions using a default configuration and
     /// return the corresponding multi-DFA.
     ///
-    /// If you want a non-default configuration, then use
-    /// the [`dense::Builder`](crate::dfa::dense::Builder)
-    /// to set your own configuration, and then call
-    /// [`dense::DFA::to_sparse`](crate::dfa::dense::DFA::to_sparse) to create
-    /// a sparse DFA.
+    /// If you want a non-default configuration, then use the
+    /// [`dense::Builder`] to set your own configuration, and then call
+    /// [`dense::DFA::to_sparse`] to create a sparse DFA.
     ///
     /// # Example
     ///
@@ -511,10 +505,9 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// * [`DFA::from_bytes`]
     /// * [`DFA::from_bytes_unchecked`]
     ///
-    /// Note that unlike a [`dense::DFA`](crate::dfa::dense::DFA)'s
-    /// serialization methods, this does not add any initial padding to the
-    /// returned bytes. Padding isn't required for sparse DFAs since they have
-    /// no alignment requirements.
+    /// Note that unlike a [`dense::DFA`]'s serialization methods, this does
+    /// not add any initial padding to the returned bytes. Padding isn't
+    /// required for sparse DFAs since they have no alignment requirements.
     ///
     /// # Example
     ///
@@ -553,10 +546,9 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// * [`DFA::from_bytes`]
     /// * [`DFA::from_bytes_unchecked`]
     ///
-    /// Note that unlike a [`dense::DFA`](crate::dfa::dense::DFA)'s
-    /// serialization methods, this does not add any initial padding to the
-    /// returned bytes. Padding isn't required for sparse DFAs since they have
-    /// no alignment requirements.
+    /// Note that unlike a [`dense::DFA`]'s serialization methods, this does
+    /// not add any initial padding to the returned bytes. Padding isn't
+    /// required for sparse DFAs since they have no alignment requirements.
     ///
     /// # Example
     ///
@@ -595,10 +587,9 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// * [`DFA::from_bytes`]
     /// * [`DFA::from_bytes_unchecked`]
     ///
-    /// Note that unlike a [`dense::DFA`](crate::dfa::dense::DFA)'s
-    /// serialization methods, this does not add any initial padding to the
-    /// returned bytes. Padding isn't required for sparse DFAs since they have
-    /// no alignment requirements.
+    /// Note that unlike a [`dense::DFA`]'s serialization methods, this does
+    /// not add any initial padding to the returned bytes. Padding isn't
+    /// required for sparse DFAs since they have no alignment requirements.
     ///
     /// Generally speaking, native endian format should only be used when
     /// you know that the target you're compiling the DFA for matches the
@@ -903,9 +894,9 @@ impl<'a> DFA<&'a [u8]> {
     ///
     /// If any of the above are not true, then an error will be returned.
     ///
-    /// Note that unlike deserializing a
-    /// [`dense::DFA`](crate::dfa::dense::DFA), deserializing a sparse DFA has
-    /// no alignment requirements. That is, an alignment of `1` is valid.
+    /// Note that unlike deserializing a [`dense::DFA`], deserializing a sparse
+    /// DFA has no alignment requirements. That is, an alignment of `1` is
+    /// valid.
     ///
     /// # Panics
     ///
@@ -1001,8 +992,8 @@ impl<'a> DFA<&'a [u8]> {
         // (by trying to decode every state) and start state ID list below. If
         // either validation fails, then we return an error.
         let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? };
-        dfa.tt.validate(&dfa.special)?;
-        dfa.st.validate(&dfa.special, &dfa.tt)?;
+        let seen = dfa.tt.validate(&dfa.special)?;
+        dfa.st.validate(&dfa.special, &seen)?;
         // N.B. dfa.special doesn't have a way to do unchecked deserialization,
         // so it has already been validated.
         Ok((dfa, nread))
@@ -1207,35 +1198,21 @@ unsafe impl<T: AsRef<[u8]>> Automaton for DFA<T> {
     }
 
     #[inline]
-    fn start_state_forward(
+    fn start_state(
         &self,
-        input: &Input<'_>,
-    ) -> Result<StateID, MatchError> {
-        if !self.quitset.is_empty() && input.start() > 0 {
-            let offset = input.start() - 1;
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
-            }
-        }
-        let start = self.st.start_map.fwd(&input);
-        self.st.start(input, start)
-    }
-
-    #[inline]
-    fn start_state_reverse(
-        &self,
-        input: &Input<'_>,
-    ) -> Result<StateID, MatchError> {
-        if !self.quitset.is_empty() && input.end() < input.haystack().len() {
-            let offset = input.end();
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
+        config: &start::Config,
+    ) -> Result<StateID, StartError> {
+        let anchored = config.get_anchored();
+        let start = match config.get_look_behind() {
+            None => Start::Text,
+            Some(byte) => {
+                if !self.quitset.is_empty() && self.quitset.contains(byte) {
+                    return Err(StartError::quit(byte));
+                }
+                self.st.start_map.get(byte)
             }
-        }
-        let start = self.st.start_map.rev(&input);
-        self.st.start(input, start)
+        };
+        self.st.start(anchored, start)
     }
 
     #[inline]
@@ -1411,63 +1388,8 @@ impl<T: AsRef<[u8]>> Transitions<T> {
     ///
     /// That is, every state ID can be used to correctly index a state in this
     /// table.
-    fn validate(&self, sp: &Special) -> Result<(), DeserializeError> {
-        // In order to validate everything, we not only need to make sure we
-        // can decode every state, but that every transition in every state
-        // points to a valid state. There are many duplicative transitions, so
-        // we record state IDs that we've verified so that we don't redo the
-        // decoding work.
-        //
-        // Except, when in no_std mode, we don't have dynamic memory allocation
-        // available to us, so we skip this optimization. It's not clear
-        // whether doing something more clever is worth it just yet. If you're
-        // profiling this code and need it to run faster, please file an issue.
-        //
-        // OK, so we also use this to record the set of valid state IDs. Since
-        // it is possible for a transition to point to an invalid state ID that
-        // still (somehow) deserializes to a valid state. So we need to make
-        // sure our transitions are limited to actually correct state IDs.
-        // The problem is, I'm not sure how to do this verification step in
-        // no-std no-alloc mode. I think we'd *have* to store the set of valid
-        // state IDs in the DFA itself. For now, we don't do this verification
-        // in no-std no-alloc mode. The worst thing that can happen is an
-        // incorrect result. But no panics or memory safety problems should
-        // result. Because we still do validate that the state itself is
-        // "valid" in the sense that everything it points to actually exists.
-        //
-        // ---AG
-        struct Seen {
-            #[cfg(feature = "alloc")]
-            set: alloc::collections::BTreeSet<StateID>,
-            #[cfg(not(feature = "alloc"))]
-            set: core::marker::PhantomData<StateID>,
-        }
-
-        #[cfg(feature = "alloc")]
-        impl Seen {
-            fn new() -> Seen {
-                Seen { set: alloc::collections::BTreeSet::new() }
-            }
-            fn insert(&mut self, id: StateID) {
-                self.set.insert(id);
-            }
-            fn contains(&self, id: &StateID) -> bool {
-                self.set.contains(id)
-            }
-        }
-
-        #[cfg(not(feature = "alloc"))]
-        impl Seen {
-            fn new() -> Seen {
-                Seen { set: core::marker::PhantomData }
-            }
-            fn insert(&mut self, _id: StateID) {}
-            fn contains(&self, _id: &StateID) -> bool {
-                false
-            }
-        }
-
-        let mut verified: Seen = Seen::new();
+    fn validate(&self, sp: &Special) -> Result<Seen, DeserializeError> {
+        let mut verified = Seen::new();
         // We need to make sure that we decode the correct number of states.
         // Otherwise, an empty set of transitions would validate even if the
         // recorded state length is non-empty.
@@ -1544,7 +1466,7 @@ impl<T: AsRef<[u8]>> Transitions<T> {
                 "mismatching sparse state length",
             ));
         }
-        Ok(())
+        Ok(verified)
     }
 
     /// Converts these transitions to a borrowed value.
@@ -1682,7 +1604,7 @@ impl<T: AsRef<[u8]>> Transitions<T> {
             let state = &state[nr..];
             if npats == 0 {
                 return Err(DeserializeError::generic(
-                    "state marked as a match, but has no pattern IDs",
+                    "state marked as a match, but pattern length is zero",
                 ));
             }
 
@@ -1704,6 +1626,21 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         } else {
             (&[][..], state)
         };
+        if is_match && pattern_ids.is_empty() {
+            return Err(DeserializeError::generic(
+                "state marked as a match, but has no pattern IDs",
+            ));
+        }
+        if sp.is_match_state(id) && pattern_ids.is_empty() {
+            return Err(DeserializeError::generic(
+                "state marked special as a match, but has no pattern IDs",
+            ));
+        }
+        if sp.is_match_state(id) != is_match {
+            return Err(DeserializeError::generic(
+                "whether state is a match or not is inconsistent",
+            ));
+        }
 
         // Now read this state's accelerator info. The first byte is the length
         // of the accelerator, which is typically 0 (for no acceleration) but
@@ -2084,28 +2021,19 @@ impl<T: AsRef<[u8]>> StartTable<T> {
     fn validate(
         &self,
         sp: &Special,
-        trans: &Transitions<T>,
+        seen: &Seen,
     ) -> Result<(), DeserializeError> {
         for (id, _, _) in self.iter() {
+            if !seen.contains(&id) {
+                return Err(DeserializeError::generic(
+                    "found invalid start state ID",
+                ));
+            }
             if sp.is_match_state(id) {
                 return Err(DeserializeError::generic(
                     "start states cannot be match states",
                 ));
             }
-            // Confirm that the start state points to a valid state.
-            let state = trans.try_state(sp, id)?;
-            // And like for the transition table, confirm that the transitions
-            // on all start states themselves point to a valid state.
-            //
-            // It'd probably be better to integrate this validation with the
-            // transition table, or otherwise store a sorted sequence of all
-            // valid state IDs in the sparse DFA itself. That way, we could
-            // check that every pointer to a state corresponds precisely to a
-            // correct and valid state.
-            for i in 0..state.ntrans {
-                let to = state.next_at(i);
-                let _ = trans.try_state(sp, to)?;
-            }
         }
         Ok(())
     }
@@ -2145,28 +2073,27 @@ impl<T: AsRef<[u8]>> StartTable<T> {
     /// panics.
     fn start(
         &self,
-        input: &Input<'_>,
+        anchored: Anchored,
         start: Start,
-    ) -> Result<StateID, MatchError> {
+    ) -> Result<StateID, StartError> {
         let start_index = start.as_usize();
-        let mode = input.get_anchored();
-        let index = match mode {
+        let index = match anchored {
             Anchored::No => {
                 if !self.kind.has_unanchored() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 start_index
             }
             Anchored::Yes => {
                 if !self.kind.has_anchored() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 self.stride + start_index
             }
             Anchored::Pattern(pid) => {
                 let len = match self.pattern_len {
                     None => {
-                        return Err(MatchError::unsupported_anchored(mode))
+                        return Err(StartError::unsupported_anchored(anchored))
                     }
                     Some(len) => len,
                 };
@@ -2561,6 +2488,62 @@ impl<'a> fmt::Debug for StateMut<'a> {
     }
 }
 
+// In order to validate everything, we not only need to make sure we
+// can decode every state, but that every transition in every state
+// points to a valid state. There are many duplicative transitions, so
+// we record state IDs that we've verified so that we don't redo the
+// decoding work.
+//
+// Except, when in no_std mode, we don't have dynamic memory allocation
+// available to us, so we skip this optimization. It's not clear
+// whether doing something more clever is worth it just yet. If you're
+// profiling this code and need it to run faster, please file an issue.
+//
+// OK, so we also use this to record the set of valid state IDs. Since
+// it is possible for a transition to point to an invalid state ID that
+// still (somehow) deserializes to a valid state. So we need to make
+// sure our transitions are limited to actually correct state IDs.
+// The problem is, I'm not sure how to do this verification step in
+// no-std no-alloc mode. I think we'd *have* to store the set of valid
+// state IDs in the DFA itself. For now, we don't do this verification
+// in no-std no-alloc mode. The worst thing that can happen is an
+// incorrect result. But no panics or memory safety problems should
+// result. Because we still do validate that the state itself is
+// "valid" in the sense that everything it points to actually exists.
+//
+// ---AG
+#[derive(Debug)]
+struct Seen {
+    #[cfg(feature = "alloc")]
+    set: alloc::collections::BTreeSet<StateID>,
+    #[cfg(not(feature = "alloc"))]
+    set: core::marker::PhantomData<StateID>,
+}
+
+#[cfg(feature = "alloc")]
+impl Seen {
+    fn new() -> Seen {
+        Seen { set: alloc::collections::BTreeSet::new() }
+    }
+    fn insert(&mut self, id: StateID) {
+        self.set.insert(id);
+    }
+    fn contains(&self, id: &StateID) -> bool {
+        self.set.contains(id)
+    }
+}
+
+#[cfg(not(feature = "alloc"))]
+impl Seen {
+    fn new() -> Seen {
+        Seen { set: core::marker::PhantomData }
+    }
+    fn insert(&mut self, _id: StateID) {}
+    fn contains(&self, _id: &StateID) -> bool {
+        true
+    }
+}
+
 /*
 /// A binary search routine specialized specifically to a sparse DFA state's
 /// transitions. Specifically, the transitions are defined as a set of pairs
diff --git a/regex-automata/src/hybrid/dfa.rs b/regex-automata/src/hybrid/dfa.rs
index 86963248f7..bd9179b194 100644
--- a/regex-automata/src/hybrid/dfa.rs
+++ b/regex-automata/src/hybrid/dfa.rs
@@ -13,7 +13,7 @@ use alloc::vec::Vec;
 
 use crate::{
     hybrid::{
-        error::{BuildError, CacheError},
+        error::{BuildError, CacheError, StartError},
         id::{LazyStateID, LazyStateIDError},
         search,
     },
@@ -28,7 +28,7 @@ use crate::{
             Anchored, HalfMatch, Input, MatchError, MatchKind, PatternSet,
         },
         sparse_set::SparseSets,
-        start::{Start, StartByteMap},
+        start::{self, Start, StartByteMap},
     },
 };
 
@@ -1518,8 +1518,8 @@ impl DFA {
         Lazy::new(self, cache).cache_next_state(current, unit)
     }
 
-    /// Return the ID of the start state for this lazy DFA when executing a
-    /// forward search.
+    /// Return the ID of the start state for this lazy DFA for the given
+    /// starting configuration.
     ///
     /// Unlike typical DFA implementations, the start state for DFAs in this
     /// crate is dependent on a few different factors:
@@ -1527,85 +1527,122 @@ impl DFA {
     /// * The [`Anchored`] mode of the search. Unanchored, anchored and
     /// anchored searches for a specific [`PatternID`] all use different start
     /// states.
-    /// * The position at which the search begins, via [`Input::start`]. This
-    /// and the byte immediately preceding the start of the search (if one
-    /// exists) influence which look-behind assertions are true at the start
-    /// of the search. This in turn influences which start state is selected.
-    /// * Whether the search is a forward or reverse search. This routine can
-    /// only be used for forward searches.
+    /// * Whether a "look-behind" byte exists. For example, the `^` anchor
+    /// matches if and only if there is no look-behind byte.
+    /// * The specific value of that look-behind byte. For example, a `(?m:^)`
+    /// assertion only matches when there is either no look-behind byte, or
+    /// when the look-behind byte is a line terminator.
+    ///
+    /// The [starting configuration](start::Config) provides the above
+    /// information.
+    ///
+    /// This routine can be used for either forward or reverse searches.
+    /// Although, as a convenience, if you have an [`Input`], then it
+    /// may be more succinct to use [`DFA::start_state_forward`] or
+    /// [`DFA::start_state_reverse`]. Note, for example, that the convenience
+    /// routines return a [`MatchError`] on failure where as this routine
+    /// returns a [`StartError`].
     ///
     /// # Errors
     ///
-    /// This may return a [`MatchError`] (not a [`CacheError`]!) if the search
-    /// needs to give up when determining the start state (for example, if
-    /// it sees a "quit" byte or if the cache has been cleared too many
-    /// times). This can also return an error if the given `Input` contains an
-    /// unsupported [`Anchored`] configuration.
+    /// This may return a [`StartError`] if the search needs to give up when
+    /// determining the start state (for example, if it sees a "quit" byte
+    /// or if the cache has become inefficient). This can also return an
+    /// error if the given configuration contains an unsupported [`Anchored`]
+    /// configuration.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub fn start_state(
+        &self,
+        cache: &mut Cache,
+        config: &start::Config,
+    ) -> Result<LazyStateID, StartError> {
+        let lazy = LazyRef::new(self, cache);
+        let anchored = config.get_anchored();
+        let start = match config.get_look_behind() {
+            None => Start::Text,
+            Some(byte) => {
+                if !self.quitset.is_empty() && self.quitset.contains(byte) {
+                    return Err(StartError::quit(byte));
+                }
+                self.start_map.get(byte)
+            }
+        };
+        let start_id = lazy.get_cached_start_id(anchored, start)?;
+        if !start_id.is_unknown() {
+            return Ok(start_id);
+        }
+        Lazy::new(self, cache).cache_start_group(anchored, start)
+    }
+
+    /// Return the ID of the start state for this lazy DFA when executing a
+    /// forward search.
+    ///
+    /// This is a convenience routine for calling [`DFA::start_state`] that
+    /// converts the given [`Input`] to a [start configuration](start::Config).
+    /// Additionally, if an error occurs, it is converted from a [`StartError`]
+    /// to a [`MatchError`] using the offset information in the given
+    /// [`Input`].
+    ///
+    /// # Errors
+    ///
+    /// This may return a [`MatchError`] if the search needs to give up when
+    /// determining the start state (for example, if it sees a "quit" byte or
+    /// if the cache has become inefficient). This can also return an error if
+    /// the given `Input` contains an unsupported [`Anchored`] configuration.
     #[cfg_attr(feature = "perf-inline", inline(always))]
     pub fn start_state_forward(
         &self,
         cache: &mut Cache,
         input: &Input<'_>,
     ) -> Result<LazyStateID, MatchError> {
-        if !self.quitset.is_empty() && input.start() > 0 {
-            let offset = input.start() - 1;
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
+        let config = start::Config::from_input_forward(input);
+        self.start_state(cache, &config).map_err(|err| match err {
+            StartError::Cache { .. } => MatchError::gave_up(input.start()),
+            StartError::Quit { byte } => {
+                let offset = input
+                    .start()
+                    .checked_sub(1)
+                    .expect("no quit in start without look-behind");
+                MatchError::quit(byte, offset)
             }
-        }
-        let start_type = self.start_map.fwd(input);
-        let start = LazyRef::new(self, cache)
-            .get_cached_start_id(input, start_type)?;
-        if !start.is_unknown() {
-            return Ok(start);
-        }
-        Lazy::new(self, cache).cache_start_group(input, start_type)
+            StartError::UnsupportedAnchored { mode } => {
+                MatchError::unsupported_anchored(mode)
+            }
+        })
     }
 
     /// Return the ID of the start state for this lazy DFA when executing a
     /// reverse search.
     ///
-    /// Unlike typical DFA implementations, the start state for DFAs in this
-    /// crate is dependent on a few different factors:
-    ///
-    /// * The [`Anchored`] mode of the search. Unanchored, anchored and
-    /// anchored searches for a specific [`PatternID`] all use different start
-    /// states.
-    /// * The position at which the search begins, via [`Input::start`]. This
-    /// and the byte immediately preceding the start of the search (if one
-    /// exists) influence which look-behind assertions are true at the start
-    /// of the search. This in turn influences which start state is selected.
-    /// * Whether the search is a forward or reverse search. This routine can
-    /// only be used for reverse searches.
+    /// This is a convenience routine for calling [`DFA::start_state`] that
+    /// converts the given [`Input`] to a [start configuration](start::Config).
+    /// Additionally, if an error occurs, it is converted from a [`StartError`]
+    /// to a [`MatchError`] using the offset information in the given
+    /// [`Input`].
     ///
     /// # Errors
     ///
-    /// This may return a [`MatchError`] (not a [`CacheError`]!) if the search
-    /// needs to give up when determining the start state (for example, if
-    /// it sees a "quit" byte or if the cache has been cleared too many
-    /// times). This can also return an error if the given `Input` contains an
-    /// unsupported [`Anchored`] configuration.
+    /// This may return a [`MatchError`] if the search needs to give up when
+    /// determining the start state (for example, if it sees a "quit" byte or
+    /// if the cache has become inefficient). This can also return an error if
+    /// the given `Input` contains an unsupported [`Anchored`] configuration.
     #[cfg_attr(feature = "perf-inline", inline(always))]
     pub fn start_state_reverse(
         &self,
         cache: &mut Cache,
         input: &Input<'_>,
     ) -> Result<LazyStateID, MatchError> {
-        if !self.quitset.is_empty() && input.end() < input.haystack().len() {
-            let offset = input.end();
-            let byte = input.haystack()[offset];
-            if self.quitset.contains(byte) {
-                return Err(MatchError::quit(byte, offset));
+        let config = start::Config::from_input_reverse(input);
+        self.start_state(cache, &config).map_err(|err| match err {
+            StartError::Cache { .. } => MatchError::gave_up(input.end()),
+            StartError::Quit { byte } => {
+                let offset = input.end();
+                MatchError::quit(byte, offset)
             }
-        }
-        let start_type = self.start_map.rev(input);
-        let start = LazyRef::new(self, cache)
-            .get_cached_start_id(input, start_type)?;
-        if !start.is_unknown() {
-            return Ok(start);
-        }
-        Lazy::new(self, cache).cache_start_group(input, start_type)
+            StartError::UnsupportedAnchored { mode } => {
+                MatchError::unsupported_anchored(mode)
+            }
+        })
     }
 
     /// Returns the total number of patterns that match in this state.
@@ -2066,8 +2103,10 @@ impl<'i, 'c> Lazy<'i, 'c> {
     /// Here's an example that justifies 'inline(never)'
     ///
     /// ```ignore
-    /// regex-cli find hybrid dfa \
-    ///   @all-codepoints-utf8-100x '\pL{100}' --cache-capacity 10000000
+    /// regex-cli find match hybrid \
+    ///   --cache-capacity 100000000 \
+    ///   -p '\pL{100}'
+    ///   all-codepoints-utf8-100x
     /// ```
     ///
     /// Where 'all-codepoints-utf8-100x' is the UTF-8 encoding of every
@@ -2122,16 +2161,15 @@ impl<'i, 'c> Lazy<'i, 'c> {
     #[inline(never)]
     fn cache_start_group(
         &mut self,
-        input: &Input<'_>,
+        anchored: Anchored,
         start: Start,
-    ) -> Result<LazyStateID, MatchError> {
-        let mode = input.get_anchored();
-        let nfa_start_id = match mode {
+    ) -> Result<LazyStateID, StartError> {
+        let nfa_start_id = match anchored {
             Anchored::No => self.dfa.get_nfa().start_unanchored(),
             Anchored::Yes => self.dfa.get_nfa().start_anchored(),
             Anchored::Pattern(pid) => {
                 if !self.dfa.get_config().get_starts_for_each_pattern() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 match self.dfa.get_nfa().start_pattern(pid) {
                     None => return Ok(self.as_ref().dead_id()),
@@ -2142,8 +2180,8 @@ impl<'i, 'c> Lazy<'i, 'c> {
 
         let id = self
             .cache_start_one(nfa_start_id, start)
-            .map_err(|_| MatchError::gave_up(input.start()))?;
-        self.set_start_state(input, start, id);
+            .map_err(StartError::cache)?;
+        self.set_start_state(anchored, start, id);
         Ok(id)
     }
 
@@ -2574,13 +2612,13 @@ impl<'i, 'c> Lazy<'i, 'c> {
     /// 'starts_for_each_pattern' is not enabled.
     fn set_start_state(
         &mut self,
-        input: &Input<'_>,
+        anchored: Anchored,
         start: Start,
         id: LazyStateID,
     ) {
         assert!(self.as_ref().is_valid(id));
         let start_index = start.as_usize();
-        let index = match input.get_anchored() {
+        let index = match anchored {
             Anchored::No => start_index,
             Anchored::Yes => Start::len() + start_index,
             Anchored::Pattern(pid) => {
@@ -2642,17 +2680,16 @@ impl<'i, 'c> LazyRef<'i, 'c> {
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn get_cached_start_id(
         &self,
-        input: &Input<'_>,
+        anchored: Anchored,
         start: Start,
-    ) -> Result<LazyStateID, MatchError> {
+    ) -> Result<LazyStateID, StartError> {
         let start_index = start.as_usize();
-        let mode = input.get_anchored();
-        let index = match mode {
+        let index = match anchored {
             Anchored::No => start_index,
             Anchored::Yes => Start::len() + start_index,
             Anchored::Pattern(pid) => {
                 if !self.dfa.get_config().get_starts_for_each_pattern() {
-                    return Err(MatchError::unsupported_anchored(mode));
+                    return Err(StartError::unsupported_anchored(anchored));
                 }
                 if pid.as_usize() >= self.dfa.pattern_len() {
                     return Ok(self.dead_id());
@@ -3178,12 +3215,12 @@ impl Config {
     /// be quit bytes _only_ when a Unicode word boundary is present in the
     /// pattern.
     ///
-    /// When enabling this option, callers _must_ be prepared to handle
-    /// a [`MatchError`](crate::MatchError) error during search.
-    /// When using a [`Regex`](crate::hybrid::regex::Regex), this
-    /// corresponds to using the `try_` suite of methods. Alternatively,
-    /// if callers can guarantee that their input is ASCII only, then a
-    /// [`MatchError::quit`] error will never be returned while searching.
+    /// When enabling this option, callers _must_ be prepared to
+    /// handle a [`MatchError`] error during search. When using a
+    /// [`Regex`](crate::hybrid::regex::Regex), this corresponds to using the
+    /// `try_` suite of methods. Alternatively, if callers can guarantee that
+    /// their input is ASCII only, then a [`MatchError::quit`] error will never
+    /// be returned while searching.
     ///
     /// This is disabled by default.
     ///
@@ -3269,8 +3306,8 @@ impl Config {
     /// (The advantage being that non-ASCII quit bytes will only be added if a
     /// Unicode word boundary is in the pattern.)
     ///
-    /// When enabling this option, callers _must_ be prepared to handle a
-    /// [`MatchError`](crate::MatchError) error during search. When using a
+    /// When enabling this option, callers _must_ be prepared to
+    /// handle a [`MatchError`] error during search. When using a
     /// [`Regex`](crate::hybrid::regex::Regex), this corresponds to using the
     /// `try_` suite of methods.
     ///
@@ -3795,8 +3832,8 @@ impl Config {
             //
             // Test case:
             //
-            //   regex-cli find hybrid regex -w @conn.json.1000x.log \
-            //     '^#' '\b10\.55\.182\.100\b'
+            //   regex-cli find match hybrid --unicode-word-boundary \
+            //     -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log
             if !quit.is_empty() {
                 set.add_set(&quit);
             }
@@ -3973,7 +4010,10 @@ impl Builder {
             .clone()
             // We can always forcefully disable captures because DFAs do not
             // support them.
-            .configure(thompson::Config::new().captures(false))
+            .configure(
+                thompson::Config::new()
+                    .which_captures(thompson::WhichCaptures::None),
+            )
             .build_many(patterns)
             .map_err(BuildError::nfa)?;
         self.build_from_nfa(nfa)
diff --git a/regex-automata/src/hybrid/error.rs b/regex-automata/src/hybrid/error.rs
index 604daf3c38..d134e7ec90 100644
--- a/regex-automata/src/hybrid/error.rs
+++ b/regex-automata/src/hybrid/error.rs
@@ -1,4 +1,4 @@
-use crate::{hybrid::id::LazyStateIDError, nfa};
+use crate::{hybrid::id::LazyStateIDError, nfa, util::search::Anchored};
 
 /// An error that occurs when initial construction of a lazy DFA fails.
 ///
@@ -95,6 +95,113 @@ impl core::fmt::Display for BuildError {
     }
 }
 
+/// An error that can occur when computing the start state for a search.
+///
+/// Computing a start state can fail for a few reasons, either
+/// based on incorrect configuration or even based on whether
+/// the look-behind byte triggers a quit state. Typically
+/// one does not need to handle this error if you're using
+/// [`DFA::start_state_forward`](crate::hybrid::dfa::DFA::start_state_forward)
+/// (or its reverse counterpart), as that routine automatically converts
+/// `StartError` to a [`MatchError`](crate::MatchError) for you.
+///
+/// This error may be returned by the
+/// [`DFA::start_state`](crate::hybrid::dfa::DFA::start_state) routine.
+///
+/// This error implements the `std::error::Error` trait when the `std` feature
+/// is enabled.
+///
+/// This error is marked as non-exhaustive. New variants may be added in a
+/// semver compatible release.
+#[non_exhaustive]
+#[derive(Clone, Debug)]
+pub enum StartError {
+    /// An error that occurs when cache inefficiency has dropped below the
+    /// configured heuristic thresholds.
+    Cache {
+        /// The underlying cache error that occurred.
+        err: CacheError,
+    },
+    /// An error that occurs when a starting configuration's look-behind byte
+    /// is in this DFA's quit set.
+    Quit {
+        /// The quit byte that was found.
+        byte: u8,
+    },
+    /// An error that occurs when the caller requests an anchored mode that
+    /// isn't supported by the DFA.
+    UnsupportedAnchored {
+        /// The anchored mode given that is unsupported.
+        mode: Anchored,
+    },
+}
+
+impl StartError {
+    pub(crate) fn cache(err: CacheError) -> StartError {
+        StartError::Cache { err }
+    }
+
+    pub(crate) fn quit(byte: u8) -> StartError {
+        StartError::Quit { byte }
+    }
+
+    pub(crate) fn unsupported_anchored(mode: Anchored) -> StartError {
+        StartError::UnsupportedAnchored { mode }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for StartError {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match *self {
+            StartError::Cache { ref err } => Some(err),
+            _ => None,
+        }
+    }
+}
+
+impl core::fmt::Display for StartError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match *self {
+            StartError::Cache { .. } => write!(
+                f,
+                "error computing start state because of cache inefficiency"
+            ),
+            StartError::Quit { byte } => write!(
+                f,
+                "error computing start state because the look-behind byte \
+                 {:?} triggered a quit state",
+                crate::util::escape::DebugByte(byte),
+            ),
+            StartError::UnsupportedAnchored { mode: Anchored::Yes } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     anchored searches are not supported or enabled"
+                )
+            }
+            StartError::UnsupportedAnchored { mode: Anchored::No } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     unanchored searches are not supported or enabled"
+                )
+            }
+            StartError::UnsupportedAnchored {
+                mode: Anchored::Pattern(pid),
+            } => {
+                write!(
+                    f,
+                    "error computing start state because \
+                     anchored searches for a specific pattern ({}) \
+                     are not supported or enabled",
+                    pid.as_usize(),
+                )
+            }
+        }
+    }
+}
+
 /// An error that occurs when cache usage has become inefficient.
 ///
 /// One of the weaknesses of a lazy DFA is that it may need to clear its
@@ -126,11 +233,7 @@ impl CacheError {
 }
 
 #[cfg(feature = "std")]
-impl std::error::Error for CacheError {
-    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
-        None
-    }
-}
+impl std::error::Error for CacheError {}
 
 impl core::fmt::Display for CacheError {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
diff --git a/regex-automata/src/hybrid/mod.rs b/regex-automata/src/hybrid/mod.rs
index 44e67e1299..2feb839d16 100644
--- a/regex-automata/src/hybrid/mod.rs
+++ b/regex-automata/src/hybrid/mod.rs
@@ -133,7 +133,7 @@ compiled DFAs.
 */
 
 pub use self::{
-    error::{BuildError, CacheError},
+    error::{BuildError, CacheError, StartError},
     id::LazyStateID,
 };
 
diff --git a/regex-automata/src/hybrid/regex.rs b/regex-automata/src/hybrid/regex.rs
index 75667daf91..b3b1fe317d 100644
--- a/regex-automata/src/hybrid/regex.rs
+++ b/regex-automata/src/hybrid/regex.rs
@@ -878,7 +878,7 @@ impl Builder {
     }
 
     /// Set the lazy DFA compilation configuration for this builder using
-    /// [`dfa::Config`](dfa::Config).
+    /// [`dfa::Config`].
     ///
     /// This permits setting things like whether Unicode word boundaries should
     /// be heuristically supported or settings how the behavior of the cache.
diff --git a/regex-automata/src/hybrid/search.rs b/regex-automata/src/hybrid/search.rs
index f232836854..1f4a505db4 100644
--- a/regex-automata/src/hybrid/search.rs
+++ b/regex-automata/src/hybrid/search.rs
@@ -105,14 +105,14 @@ fn find_fwd_imp(
             // PERF: For justification of omitting bounds checks, it gives us a
             // ~10% bump in search time. This was used for a benchmark:
             //
-            //     regex-cli find hybrid dfa @bigfile '(?m)^.+$' -UBb
+            //     regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile
             //
             // PERF: For justification for the loop unrolling, we use a few
             // different tests:
             //
-            //     regex-cli find hybrid dfa @$bigfile '\w{50}' -UBb
-            //     regex-cli find hybrid dfa @$bigfile '(?m)^.+$' -UBb
-            //     regex-cli find hybrid dfa @$bigfile 'ZQZQZQZQ' -UBb
+            //     regex-cli find half hybrid -p '\w{50}' -UBb bigfile
+            //     regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile
+            //     regex-cli find half hybrid -p 'ZQZQZQZQ' -UBb bigfile
             //
             // And there are three different configurations:
             //
@@ -353,7 +353,7 @@ fn find_rev_imp(
             // anchored and on shorter haystacks. However, this still makes a
             // difference. Take this command for example:
             //
-            //     regex-cli find hybrid regex @$bigfile '(?m)^.+$' -UBb
+            //     regex-cli find match hybrid -p '(?m)^.+$' -UBb bigfile
             //
             // (Notice that we use 'find hybrid regex', not 'find hybrid dfa'
             // like in the justification for the forward direction. The 'regex'
diff --git a/regex-automata/src/meta/limited.rs b/regex-automata/src/meta/limited.rs
index 005878acdb..5653adc9aa 100644
--- a/regex-automata/src/meta/limited.rs
+++ b/regex-automata/src/meta/limited.rs
@@ -69,9 +69,6 @@ pub(crate) fn dfa_try_search_half_rev(
             } else if dfa.is_dead_state(sid) {
                 return Ok(mat);
             } else if dfa.is_quit_state(sid) {
-                if mat.is_some() {
-                    return Ok(mat);
-                }
                 return Err(MatchError::quit(input.haystack()[at], at).into());
             }
         }
@@ -88,7 +85,41 @@ pub(crate) fn dfa_try_search_half_rev(
             return Err(RetryError::Quadratic(RetryQuadraticError::new()));
         }
     }
+    let was_dead = dfa.is_dead_state(sid);
     dfa_eoi_rev(dfa, input, &mut sid, &mut mat)?;
+    // If we reach the beginning of the search and we could otherwise still
+    // potentially keep matching if there was more to match, then we actually
+    // return an error to indicate giving up on this optimization. Why? Because
+    // we can't prove that the real match begins at where we would report it.
+    //
+    // This only happens when all of the following are true:
+    //
+    // 1) We reach the starting point of our search span.
+    // 2) The match we found is before the starting point.
+    // 3) The FSM reports we could possibly find a longer match.
+    //
+    // We need (1) because otherwise the search stopped before the starting
+    // point and there is no possible way to find a more leftmost position.
+    //
+    // We need (2) because if the match found has an offset equal to the minimum
+    // possible offset, then there is no possible more leftmost match.
+    //
+    // We need (3) because if the FSM couldn't continue anyway (i.e., it's in
+    // a dead state), then we know we couldn't find anything more leftmost
+    // than what we have. (We have to check the state we were in prior to the
+    // EOI transition since the EOI transition will usually bring us to a dead
+    // state by virtue of it represents the end-of-input.)
+    if at == input.start()
+        && mat.map_or(false, |m| m.offset() > input.start())
+        && !was_dead
+    {
+        trace!(
+            "reached beginning of search at offset {} without hitting \
+             a dead state, quitting to avoid potential false positive match",
+            at,
+        );
+        return Err(RetryError::Quadratic(RetryQuadraticError::new()));
+    }
     Ok(mat)
 }
 
@@ -121,9 +152,6 @@ pub(crate) fn hybrid_try_search_half_rev(
             } else if sid.is_dead() {
                 return Ok(mat);
             } else if sid.is_quit() {
-                if mat.is_some() {
-                    return Ok(mat);
-                }
                 return Err(MatchError::quit(input.haystack()[at], at).into());
             }
         }
@@ -140,7 +168,20 @@ pub(crate) fn hybrid_try_search_half_rev(
             return Err(RetryError::Quadratic(RetryQuadraticError::new()));
         }
     }
+    let was_dead = sid.is_dead();
     hybrid_eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
+    // See the comments in the full DFA routine above for why we need this.
+    if at == input.start()
+        && mat.map_or(false, |m| m.offset() > input.start())
+        && !was_dead
+    {
+        trace!(
+            "reached beginning of search at offset {} without hitting \
+             a dead state, quitting to avoid potential false positive match",
+            at,
+        );
+        return Err(RetryError::Quadratic(RetryQuadraticError::new()));
+    }
     Ok(mat)
 }
 
@@ -162,9 +203,6 @@ fn dfa_eoi_rev(
             let pattern = dfa.match_pattern(*sid, 0);
             *mat = Some(HalfMatch::new(pattern, sp.start));
         } else if dfa.is_quit_state(*sid) {
-            if mat.is_some() {
-                return Ok(());
-            }
             return Err(MatchError::quit(byte, sp.start - 1));
         }
     } else {
@@ -199,9 +237,6 @@ fn hybrid_eoi_rev(
             let pattern = dfa.match_pattern(cache, *sid, 0);
             *mat = Some(HalfMatch::new(pattern, sp.start));
         } else if sid.is_quit() {
-            if mat.is_some() {
-                return Ok(());
-            }
             return Err(MatchError::quit(byte, sp.start - 1));
         }
     } else {
diff --git a/regex-automata/src/meta/regex.rs b/regex-automata/src/meta/regex.rs
index 6e16ceedb6..a06d2bb48c 100644
--- a/regex-automata/src/meta/regex.rs
+++ b/regex-automata/src/meta/regex.rs
@@ -16,6 +16,7 @@ use crate::{
         strategy::{self, Strategy},
         wrappers,
     },
+    nfa::thompson::WhichCaptures,
     util::{
         captures::{Captures, GroupInfo},
         iter,
@@ -528,7 +529,14 @@ impl Regex {
     #[inline]
     pub fn is_match<'h, I: Into<Input<'h>>>(&self, input: I) -> bool {
         let input = input.into().earliest(true);
-        self.search_half(&input).is_some()
+        if self.imp.info.is_impossible(&input) {
+            return false;
+        }
+        let mut guard = self.pool.get();
+        let result = self.imp.strat.is_match(&mut guard, &input);
+        // See 'Regex::search' for why we put the guard back explicitly.
+        PoolGuard::put(guard);
+        result
     }
 
     /// Executes a leftmost search and returns the first match that is found,
@@ -2429,6 +2437,7 @@ pub struct Config {
     utf8_empty: Option<bool>,
     autopre: Option<bool>,
     pre: Option<Option<Prefilter>>,
+    which_captures: Option<WhichCaptures>,
     nfa_size_limit: Option<Option<usize>>,
     onepass_size_limit: Option<Option<usize>>,
     hybrid_cache_capacity: Option<usize>,
@@ -2619,6 +2628,77 @@ impl Config {
         Config { pre: Some(pre), ..self }
     }
 
+    /// Configures what kinds of groups are compiled as "capturing" in the
+    /// underlying regex engine.
+    ///
+    /// This is set to [`WhichCaptures::All`] by default. Callers may wish to
+    /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the
+    /// overhead of capture states for explicit groups.
+    ///
+    /// Note that another approach to avoiding the overhead of capture groups
+    /// is by using non-capturing groups in the regex pattern. That is,
+    /// `(?:a)` instead of `(a)`. This option is useful when you can't control
+    /// the concrete syntax but know that you don't need the underlying capture
+    /// states. For example, using `WhichCaptures::Implicit` will behave as if
+    /// all explicit capturing groups in the pattern were non-capturing.
+    ///
+    /// Setting this to `WhichCaptures::None` is usually not the right thing to
+    /// do. When no capture states are compiled, some regex engines (such as
+    /// the `PikeVM`) won't be able to report match offsets. This will manifest
+    /// as no match being found.
+    ///
+    /// # Example
+    ///
+    /// This example demonstrates how the results of capture groups can change
+    /// based on this option. First we show the default (all capture groups in
+    /// the pattern are capturing):
+    ///
+    /// ```
+    /// use regex_automata::{meta::Regex, Match, Span};
+    ///
+    /// let re = Regex::new(r"foo([0-9]+)bar")?;
+    /// let hay = "foo123bar";
+    ///
+    /// let mut caps = re.create_captures();
+    /// re.captures(hay, &mut caps);
+    /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0));
+    /// assert_eq!(Some(Span::from(3..6)), caps.get_group(1));
+    ///
+    /// Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// And now we show the behavior when we only include implicit capture
+    /// groups. In this case, we can only find the overall match span, but the
+    /// spans of any other explicit group don't exist because they are treated
+    /// as non-capturing. (In effect, when `WhichCaptures::Implicit` is used,
+    /// there is no real point in using [`Regex::captures`] since it will never
+    /// be able to report more information than [`Regex::find`].)
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     meta::Regex,
+    ///     nfa::thompson::WhichCaptures,
+    ///     Match,
+    ///     Span,
+    /// };
+    ///
+    /// let re = Regex::builder()
+    ///     .configure(Regex::config().which_captures(WhichCaptures::Implicit))
+    ///     .build(r"foo([0-9]+)bar")?;
+    /// let hay = "foo123bar";
+    ///
+    /// let mut caps = re.create_captures();
+    /// re.captures(hay, &mut caps);
+    /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0));
+    /// assert_eq!(None, caps.get_group(1));
+    ///
+    /// Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn which_captures(mut self, which_captures: WhichCaptures) -> Config {
+        self.which_captures = Some(which_captures);
+        self
+    }
+
     /// Sets the size limit, in bytes, to enforce on the construction of every
     /// NFA build by the meta regex engine.
     ///
@@ -2626,7 +2706,7 @@ impl Config {
     /// you're compiling untrusted patterns.
     ///
     /// Note that this limit is applied to _each_ NFA built, and if any of
-    /// them excceed the limit, then construction will fail. This limit does
+    /// them exceed the limit, then construction will fail. This limit does
     /// _not_ correspond to the total memory used by all NFAs in the meta regex
     /// engine.
     ///
@@ -2983,6 +3063,14 @@ impl Config {
         self.pre.as_ref().unwrap_or(&None).as_ref()
     }
 
+    /// Returns the capture configuration, as set by
+    /// [`Config::which_captures`].
+    ///
+    /// If it was not explicitly set, then a default value is returned.
+    pub fn get_which_captures(&self) -> WhichCaptures {
+        self.which_captures.unwrap_or(WhichCaptures::All)
+    }
+
     /// Returns NFA size limit, as set by [`Config::nfa_size_limit`].
     ///
     /// If it was not explicitly set, then a default value is returned.
@@ -3126,6 +3214,7 @@ impl Config {
             utf8_empty: o.utf8_empty.or(self.utf8_empty),
             autopre: o.autopre.or(self.autopre),
             pre: o.pre.or_else(|| self.pre.clone()),
+            which_captures: o.which_captures.or(self.which_captures),
             nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit),
             onepass_size_limit: o
                 .onepass_size_limit
@@ -3551,8 +3640,8 @@ mod tests {
     // I found this in the course of building out the benchmark suite for
     // rebar.
     #[test]
-    fn regression() {
-        env_logger::init();
+    fn regression_suffix_literal_count() {
+        let _ = env_logger::try_init();
 
         let re = Regex::new(r"[a-zA-Z]+ing").unwrap();
         assert_eq!(1, re.find_iter("tingling").count());
diff --git a/regex-automata/src/meta/stopat.rs b/regex-automata/src/meta/stopat.rs
index e8d716689c..c4dcd797a0 100644
--- a/regex-automata/src/meta/stopat.rs
+++ b/regex-automata/src/meta/stopat.rs
@@ -81,9 +81,6 @@ pub(crate) fn dfa_try_search_half_fwd(
             } else if dfa.is_dead_state(sid) {
                 return Ok(mat.ok_or(at));
             } else if dfa.is_quit_state(sid) {
-                if mat.is_some() {
-                    return Ok(mat.ok_or(at));
-                }
                 return Err(MatchError::quit(input.haystack()[at], at).into());
             } else {
                 // Ideally we wouldn't use a DFA that specialized start states
@@ -122,9 +119,6 @@ pub(crate) fn hybrid_try_search_half_fwd(
             } else if sid.is_dead() {
                 return Ok(mat.ok_or(at));
             } else if sid.is_quit() {
-                if mat.is_some() {
-                    return Ok(mat.ok_or(at));
-                }
                 return Err(MatchError::quit(input.haystack()[at], at).into());
             } else {
                 // We should NEVER get an unknown state ID back from
@@ -162,9 +156,6 @@ fn dfa_eoi_fwd(
                 let pattern = dfa.match_pattern(*sid, 0);
                 *mat = Some(HalfMatch::new(pattern, sp.end));
             } else if dfa.is_quit_state(*sid) {
-                if mat.is_some() {
-                    return Ok(());
-                }
                 return Err(MatchError::quit(b, sp.end));
             }
         }
@@ -201,9 +192,6 @@ fn hybrid_eoi_fwd(
                 let pattern = dfa.match_pattern(cache, *sid, 0);
                 *mat = Some(HalfMatch::new(pattern, sp.end));
             } else if sid.is_quit() {
-                if mat.is_some() {
-                    return Ok(());
-                }
                 return Err(MatchError::quit(b, sp.end));
             }
         }
diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs
index 2de2c385ec..04f2ba3c3e 100644
--- a/regex-automata/src/meta/strategy.rs
+++ b/regex-automata/src/meta/strategy.rs
@@ -13,7 +13,7 @@ use crate::{
         regex::{Cache, RegexInfo},
         reverse_inner, wrappers,
     },
-    nfa::thompson::{self, NFA},
+    nfa::thompson::{self, WhichCaptures, NFA},
     util::{
         captures::{Captures, GroupInfo},
         look::LookMatcher,
@@ -58,6 +58,8 @@ pub(super) trait Strategy:
         input: &Input<'_>,
     ) -> Option<HalfMatch>;
 
+    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool;
+
     fn search_slots(
         &self,
         cache: &mut Cache,
@@ -351,6 +353,7 @@ impl Pre<()> {
 // strategy when len(patterns)==1 if the number of literals is large. In that
 // case, literal extraction gives up and will return an infinite set.)
 impl<P: PrefilterI> Strategy for Pre<P> {
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn group_info(&self) -> &GroupInfo {
         &self.group_info
     }
@@ -376,6 +379,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
         self.pre.memory_usage()
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
         if input.is_done() {
             return None;
@@ -391,6 +395,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
             .map(|sp| Match::new(PatternID::ZERO, sp))
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search_half(
         &self,
         cache: &mut Cache,
@@ -399,6 +404,12 @@ impl<P: PrefilterI> Strategy for Pre<P> {
         self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
+        self.search(cache, input).is_some()
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search_slots(
         &self,
         cache: &mut Cache,
@@ -415,6 +426,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
         Some(m.pattern())
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn which_overlapping_matches(
         &self,
         cache: &mut Cache,
@@ -452,7 +464,7 @@ impl Core {
             .utf8(info.config().get_utf8_empty())
             .nfa_size_limit(info.config().get_nfa_size_limit())
             .shrink(false)
-            .captures(true)
+            .which_captures(info.config().get_which_captures())
             .look_matcher(lookm);
         let nfa = thompson::Compiler::new()
             .configure(thompson_config.clone())
@@ -499,7 +511,10 @@ impl Core {
                     // useful with capturing groups in reverse. And of course,
                     // the lazy DFA ignores capturing groups in all cases.
                     .configure(
-                        thompson_config.clone().captures(false).reverse(true),
+                        thompson_config
+                            .clone()
+                            .which_captures(WhichCaptures::None)
+                            .reverse(true),
                     )
                     .build_many_from_hir(hirs)
                     .map_err(BuildError::nfa)?;
@@ -620,6 +635,29 @@ impl Core {
         }
     }
 
+    fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
+        if let Some(ref e) = self.onepass.get(input) {
+            trace!(
+                "using OnePass for is-match search at {:?}",
+                input.get_span()
+            );
+            e.search_slots(&mut cache.onepass, input, &mut []).is_some()
+        } else if let Some(ref e) = self.backtrack.get(input) {
+            trace!(
+                "using BoundedBacktracker for is-match search at {:?}",
+                input.get_span()
+            );
+            e.is_match(&mut cache.backtrack, input)
+        } else {
+            trace!(
+                "using PikeVM for is-match search at {:?}",
+                input.get_span()
+            );
+            let e = self.pikevm.get();
+            e.is_match(&mut cache.pikevm, input)
+        }
+    }
+
     fn is_capture_search_needed(&self, slots_len: usize) -> bool {
         slots_len > self.nfa.group_info().implicit_slot_len()
     }
@@ -700,7 +738,7 @@ impl Strategy for Core {
         // The main difference with 'search' is that if we're using a DFA, we
         // can use a single forward scan without needing to run the reverse
         // DFA.
-        return if let Some(e) = self.dfa.get(input) {
+        if let Some(e) = self.dfa.get(input) {
             trace!("using full DFA for half search at {:?}", input.get_span());
             match e.try_search_half_fwd(input) {
                 Ok(x) => x,
@@ -720,7 +758,38 @@ impl Strategy for Core {
             }
         } else {
             self.search_half_nofail(cache, input)
-        };
+        }
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
+        if let Some(e) = self.dfa.get(input) {
+            trace!(
+                "using full DFA for is-match search at {:?}",
+                input.get_span()
+            );
+            match e.try_search_half_fwd(input) {
+                Ok(x) => x.is_some(),
+                Err(_err) => {
+                    trace!("full DFA half search failed: {}", _err);
+                    self.is_match_nofail(cache, input)
+                }
+            }
+        } else if let Some(e) = self.hybrid.get(input) {
+            trace!(
+                "using lazy DFA for is-match search at {:?}",
+                input.get_span()
+            );
+            match e.try_search_half_fwd(&mut cache.hybrid, input) {
+                Ok(x) => x.is_some(),
+                Err(_err) => {
+                    trace!("lazy DFA half search failed: {}", _err);
+                    self.is_match_nofail(cache, input)
+                }
+            }
+        } else {
+            self.is_match_nofail(cache, input)
+        }
     }
 
     #[cfg_attr(feature = "perf-inline", inline(always))]
@@ -845,6 +914,14 @@ impl ReverseAnchored {
             );
             return Err(core);
         }
+        // Note that the caller can still request an anchored search even when
+        // the regex isn't anchored at the start. We detect that case in the
+        // search routines below and just fallback to the core engine. This
+        // is fine because both searches are anchored. It's just a matter of
+        // picking one. Falling back to the core engine is a little simpler,
+        // since if we used the reverse anchored approach, we'd have to add an
+        // extra check to ensure the match reported starts at the place where
+        // the caller requested the search to start.
         if core.info.is_always_anchored_start() {
             debug!(
                 "skipping reverse anchored optimization because \
@@ -930,6 +1007,9 @@ impl Strategy for ReverseAnchored {
 
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search(cache, input);
+        }
         match self.try_search_half_anchored_rev(cache, input) {
             Err(_err) => {
                 trace!("fast reverse anchored search failed: {}", _err);
@@ -948,6 +1028,9 @@ impl Strategy for ReverseAnchored {
         cache: &mut Cache,
         input: &Input<'_>,
     ) -> Option<HalfMatch> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search_half(cache, input);
+        }
         match self.try_search_half_anchored_rev(cache, input) {
             Err(_err) => {
                 trace!("fast reverse anchored search failed: {}", _err);
@@ -966,6 +1049,21 @@ impl Strategy for ReverseAnchored {
         }
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
+        if input.get_anchored().is_anchored() {
+            return self.core.is_match(cache, input);
+        }
+        match self.try_search_half_anchored_rev(cache, input) {
+            Err(_err) => {
+                trace!("fast reverse anchored search failed: {}", _err);
+                self.core.is_match_nofail(cache, input)
+            }
+            Ok(None) => false,
+            Ok(Some(_)) => true,
+        }
+    }
+
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search_slots(
         &self,
@@ -973,6 +1071,9 @@ impl Strategy for ReverseAnchored {
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
     ) -> Option<PatternID> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search_slots(cache, input, slots);
+        }
         match self.try_search_half_anchored_rev(cache, input) {
             Err(_err) => {
                 trace!("fast reverse anchored search failed: {}", _err);
@@ -1034,6 +1135,13 @@ impl ReverseSuffix {
         // requires a reverse scan after a literal match to confirm or reject
         // the match. (Although, in the case of confirmation, it then needs to
         // do another forward scan to find the end position.)
+        //
+        // Note that the caller can still request an anchored search even
+        // when the regex isn't anchored. We detect that case in the search
+        // routines below and just fallback to the core engine. Currently this
+        // optimization assumes all searches are unanchored, so if we do want
+        // to enable this optimization for anchored searches, it will need a
+        // little work to support it.
         if core.info.is_always_anchored_start() {
             debug!(
                 "skipping reverse suffix optimization because \
@@ -1173,7 +1281,7 @@ impl ReverseSuffix {
             e.try_search_half_rev_limited(&input, min_start)
         } else if let Some(e) = self.core.hybrid.get(&input) {
             trace!(
-                "using lazy DFA for reverse inner search at {:?}, \
+                "using lazy DFA for reverse suffix search at {:?}, \
                  but will be stopped at {} to avoid quadratic behavior",
                 input.get_span(),
                 min_start,
@@ -1211,6 +1319,9 @@ impl Strategy for ReverseSuffix {
 
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search(cache, input);
+        }
         match self.try_search_half_start(cache, input) {
             Err(RetryError::Quadratic(_err)) => {
                 trace!("reverse suffix optimization failed: {}", _err);
@@ -1255,6 +1366,9 @@ impl Strategy for ReverseSuffix {
         cache: &mut Cache,
         input: &Input<'_>,
     ) -> Option<HalfMatch> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search_half(cache, input);
+        }
         match self.try_search_half_start(cache, input) {
             Err(RetryError::Quadratic(_err)) => {
                 trace!("reverse suffix half optimization failed: {}", _err);
@@ -1302,6 +1416,28 @@ impl Strategy for ReverseSuffix {
         }
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
+        if input.get_anchored().is_anchored() {
+            return self.core.is_match(cache, input);
+        }
+        match self.try_search_half_start(cache, input) {
+            Err(RetryError::Quadratic(_err)) => {
+                trace!("reverse suffix half optimization failed: {}", _err);
+                self.core.is_match_nofail(cache, input)
+            }
+            Err(RetryError::Fail(_err)) => {
+                trace!(
+                    "reverse suffix reverse fast half search failed: {}",
+                    _err
+                );
+                self.core.is_match_nofail(cache, input)
+            }
+            Ok(None) => false,
+            Ok(Some(_)) => true,
+        }
+    }
+
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search_slots(
         &self,
@@ -1309,6 +1445,9 @@ impl Strategy for ReverseSuffix {
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
     ) -> Option<PatternID> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search_slots(cache, input, slots);
+        }
         if !self.core.is_capture_search_needed(slots.len()) {
             trace!("asked for slots unnecessarily, trying fast path");
             let m = self.search(cache, input)?;
@@ -1396,6 +1535,13 @@ impl ReverseInner {
         // or when the literal scan matches. If it matches, then confirming the
         // match requires a reverse scan followed by a forward scan to confirm
         // or reject, which is a fair bit of work.
+        //
+        // Note that the caller can still request an anchored search even
+        // when the regex isn't anchored. We detect that case in the search
+        // routines below and just fallback to the core engine. Currently this
+        // optimization assumes all searches are unanchored, so if we do want
+        // to enable this optimization for anchored searches, it will need a
+        // little work to support it.
         if core.info.is_always_anchored_start() {
             debug!(
                 "skipping reverse inner optimization because \
@@ -1440,7 +1586,7 @@ impl ReverseInner {
             .utf8(core.info.config().get_utf8_empty())
             .nfa_size_limit(core.info.config().get_nfa_size_limit())
             .shrink(false)
-            .captures(false)
+            .which_captures(WhichCaptures::None)
             .look_matcher(lookm);
         let result = thompson::Compiler::new()
             .configure(thompson_config)
@@ -1635,6 +1781,9 @@ impl Strategy for ReverseInner {
 
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search(cache, input);
+        }
         match self.try_search_full(cache, input) {
             Err(RetryError::Quadratic(_err)) => {
                 trace!("reverse inner optimization failed: {}", _err);
@@ -1654,6 +1803,9 @@ impl Strategy for ReverseInner {
         cache: &mut Cache,
         input: &Input<'_>,
     ) -> Option<HalfMatch> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search_half(cache, input);
+        }
         match self.try_search_full(cache, input) {
             Err(RetryError::Quadratic(_err)) => {
                 trace!("reverse inner half optimization failed: {}", _err);
@@ -1668,6 +1820,25 @@ impl Strategy for ReverseInner {
         }
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
+        if input.get_anchored().is_anchored() {
+            return self.core.is_match(cache, input);
+        }
+        match self.try_search_full(cache, input) {
+            Err(RetryError::Quadratic(_err)) => {
+                trace!("reverse inner half optimization failed: {}", _err);
+                self.core.is_match_nofail(cache, input)
+            }
+            Err(RetryError::Fail(_err)) => {
+                trace!("reverse inner fast half search failed: {}", _err);
+                self.core.is_match_nofail(cache, input)
+            }
+            Ok(None) => false,
+            Ok(Some(_)) => true,
+        }
+    }
+
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search_slots(
         &self,
@@ -1675,6 +1846,9 @@ impl Strategy for ReverseInner {
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
     ) -> Option<PatternID> {
+        if input.get_anchored().is_anchored() {
+            return self.core.search_slots(cache, input, slots);
+        }
         if !self.core.is_capture_search_needed(slots.len()) {
             trace!("asked for slots unnecessarily, trying fast path");
             let m = self.search(cache, input)?;
diff --git a/regex-automata/src/meta/wrappers.rs b/regex-automata/src/meta/wrappers.rs
index 8f58363a17..6cb19ba0d2 100644
--- a/regex-automata/src/meta/wrappers.rs
+++ b/regex-automata/src/meta/wrappers.rs
@@ -87,6 +87,15 @@ impl PikeVMEngine {
         Ok(PikeVMEngine(engine))
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn is_match(
+        &self,
+        cache: &mut PikeVMCache,
+        input: &Input<'_>,
+    ) -> bool {
+        self.0.is_match(cache.0.as_mut().unwrap(), input.clone())
+    }
+
     #[cfg_attr(feature = "perf-inline", inline(always))]
     pub(crate) fn search_slots(
         &self,
@@ -203,7 +212,10 @@ impl BoundedBacktrackerEngine {
                 .configure(backtrack_config)
                 .build_from_nfa(nfa.clone())
                 .map_err(BuildError::nfa)?;
-            debug!("BoundedBacktracker built");
+            debug!(
+                "BoundedBacktracker built (max haystack length: {:?})",
+                engine.max_haystack_len()
+            );
             Ok(Some(BoundedBacktrackerEngine(engine)))
         }
         #[cfg(not(feature = "nfa-backtrack"))]
@@ -212,6 +224,29 @@ impl BoundedBacktrackerEngine {
         }
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn is_match(
+        &self,
+        cache: &mut BoundedBacktrackerCache,
+        input: &Input<'_>,
+    ) -> bool {
+        #[cfg(feature = "nfa-backtrack")]
+        {
+            // OK because we only permit access to this engine when we know
+            // the haystack is short enough for the backtracker to run without
+            // reporting an error.
+            self.0
+                .try_is_match(cache.0.as_mut().unwrap(), input.clone())
+                .unwrap()
+        }
+        #[cfg(not(feature = "nfa-backtrack"))]
+        {
+            // Impossible to reach because this engine is never constructed
+            // if the requisite features aren't enabled.
+            unreachable!()
+        }
+    }
+
     #[cfg_attr(feature = "perf-inline", inline(always))]
     pub(crate) fn search_slots(
         &self,
diff --git a/regex-automata/src/nfa/thompson/backtrack.rs b/regex-automata/src/nfa/thompson/backtrack.rs
index 75b6c096b2..df99e456df 100644
--- a/regex-automata/src/nfa/thompson/backtrack.rs
+++ b/regex-automata/src/nfa/thompson/backtrack.rs
@@ -19,7 +19,7 @@ use crate::{
         empty, iter,
         prefilter::Prefilter,
         primitives::{NonMaxUsize, PatternID, SmallIndex, StateID},
-        search::{Anchored, Input, Match, MatchError, Span},
+        search::{Anchored, HalfMatch, Input, Match, MatchError, Span},
     },
 };
 
@@ -300,15 +300,6 @@ impl Builder {
         &self,
         nfa: NFA,
     ) -> Result<BoundedBacktracker, BuildError> {
-        // If the NFA has no captures, then the backtracker doesn't work since
-        // it relies on them in order to report match locations. However, in
-        // the special case of an NFA with no patterns, it is allowed, since
-        // no matches can ever be produced. And importantly, an NFA with no
-        // patterns has no capturing groups anyway, so this is necessary to
-        // permit the backtracker to work with regexes with zero patterns.
-        if !nfa.has_capture() && nfa.pattern_len() > 0 {
-            return Err(BuildError::missing_captures());
-        }
         nfa.look_set_any().available().map_err(BuildError::word)?;
         Ok(BoundedBacktracker { config: self.config.clone(), nfa })
     }
@@ -829,8 +820,11 @@ impl BoundedBacktracker {
         // bytes to the capacity in bits.
         let capacity = 8 * self.get_config().get_visited_capacity();
         let blocks = div_ceil(capacity, Visited::BLOCK_SIZE);
-        let real_capacity = blocks * Visited::BLOCK_SIZE;
-        (real_capacity / self.nfa.states().len()) - 1
+        let real_capacity = blocks.saturating_mul(Visited::BLOCK_SIZE);
+        // It's possible for `real_capacity` to be smaller than the number of
+        // NFA states for particularly large regexes, so we saturate towards
+        // zero.
+        (real_capacity / self.nfa.states().len()).saturating_sub(1)
     }
 }
 
@@ -954,8 +948,14 @@ impl BoundedBacktracker {
                 None => return Ok(None),
                 Some(pid) => pid,
             };
-            let start = slots[0].unwrap().get();
-            let end = slots[1].unwrap().get();
+            let start = match slots[0] {
+                None => return Ok(None),
+                Some(s) => s.get(),
+            };
+            let end = match slots[1] {
+                None => return Ok(None),
+                Some(s) => s.get(),
+            };
             return Ok(Some(Match::new(pid, Span { start, end })));
         }
         let ginfo = self.get_nfa().group_info();
@@ -965,8 +965,14 @@ impl BoundedBacktracker {
             None => return Ok(None),
             Some(pid) => pid,
         };
-        let start = slots[pid.as_usize() * 2].unwrap().get();
-        let end = slots[pid.as_usize() * 2 + 1].unwrap().get();
+        let start = match slots[pid.as_usize() * 2] {
+            None => return Ok(None),
+            Some(s) => s.get(),
+        };
+        let end = match slots[pid.as_usize() * 2 + 1] {
+            None => return Ok(None),
+            Some(s) => s.get(),
+        };
         Ok(Some(Match::new(pid, Span { start, end })))
     }
 
@@ -1292,12 +1298,14 @@ impl BoundedBacktracker {
     ) -> Result<Option<PatternID>, MatchError> {
         let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
         if !utf8empty {
-            return self.try_search_slots_imp(cache, input, slots);
+            let maybe_hm = self.try_search_slots_imp(cache, input, slots)?;
+            return Ok(maybe_hm.map(|hm| hm.pattern()));
         }
         // See PikeVM::try_search_slots for why we do this.
         let min = self.get_nfa().group_info().implicit_slot_len();
         if slots.len() >= min {
-            return self.try_search_slots_imp(cache, input, slots);
+            let maybe_hm = self.try_search_slots_imp(cache, input, slots)?;
+            return Ok(maybe_hm.map(|hm| hm.pattern()));
         }
         if self.get_nfa().pattern_len() == 1 {
             let mut enough = [None, None];
@@ -1305,14 +1313,14 @@ impl BoundedBacktracker {
             // This is OK because we know `enough_slots` is strictly bigger
             // than `slots`, otherwise this special case isn't reached.
             slots.copy_from_slice(&enough[..slots.len()]);
-            return Ok(got);
+            return Ok(got.map(|hm| hm.pattern()));
         }
         let mut enough = vec![None; min];
         let got = self.try_search_slots_imp(cache, input, &mut enough)?;
         // This is OK because we know `enough_slots` is strictly bigger than
         // `slots`, otherwise this special case isn't reached.
         slots.copy_from_slice(&enough[..slots.len()]);
-        Ok(got)
+        Ok(got.map(|hm| hm.pattern()))
     }
 
     /// This is the actual implementation of `try_search_slots_imp` that
@@ -1325,30 +1333,17 @@ impl BoundedBacktracker {
         cache: &mut Cache,
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
-    ) -> Result<Option<PatternID>, MatchError> {
+    ) -> Result<Option<HalfMatch>, MatchError> {
         let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
-        let (pid, end) = match self.search_imp(cache, input, slots)? {
+        let hm = match self.search_imp(cache, input, slots)? {
             None => return Ok(None),
-            Some(pid) if !utf8empty => return Ok(Some(pid)),
-            Some(pid) => {
-                let slot_start = pid.as_usize() * 2;
-                let slot_end = slot_start + 1;
-                // OK because we know we have a match and we know our caller
-                // provided slots are big enough (which we make true above if
-                // the caller didn't). Namely, we're only here when 'utf8empty'
-                // is true, and when that's true, we require slots for every
-                // pattern.
-                (pid, slots[slot_end].unwrap().get())
-            }
+            Some(hm) if !utf8empty => return Ok(Some(hm)),
+            Some(hm) => hm,
         };
-        empty::skip_splits_fwd(input, pid, end, |input| {
-            let pid = match self.search_imp(cache, input, slots)? {
-                None => return Ok(None),
-                Some(pid) => pid,
-            };
-            let slot_start = pid.as_usize() * 2;
-            let slot_end = slot_start + 1;
-            Ok(Some((pid, slots[slot_end].unwrap().get())))
+        empty::skip_splits_fwd(input, hm, hm.offset(), |input| {
+            Ok(self
+                .search_imp(cache, input, slots)?
+                .map(|hm| (hm, hm.offset())))
         })
     }
 
@@ -1364,7 +1359,7 @@ impl BoundedBacktracker {
         cache: &mut Cache,
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
-    ) -> Result<Option<PatternID>, MatchError> {
+    ) -> Result<Option<HalfMatch>, MatchError> {
         // Unlike in the PikeVM, we write our capturing group spans directly
         // into the caller's captures groups. So we have to make sure we're
         // starting with a blank slate first. In the PikeVM, we avoid this
@@ -1411,10 +1406,9 @@ impl BoundedBacktracker {
                     Some(ref span) => at = span.start,
                 }
             }
-            if let Some(pid) =
-                self.backtrack(cache, input, at, start_id, slots)
+            if let Some(hm) = self.backtrack(cache, input, at, start_id, slots)
             {
-                return Ok(Some(pid));
+                return Ok(Some(hm));
             }
             at += 1;
         }
@@ -1435,14 +1429,13 @@ impl BoundedBacktracker {
         at: usize,
         start_id: StateID,
         slots: &mut [Option<NonMaxUsize>],
-    ) -> Option<PatternID> {
+    ) -> Option<HalfMatch> {
         cache.stack.push(Frame::Step { sid: start_id, at });
         while let Some(frame) = cache.stack.pop() {
             match frame {
                 Frame::Step { sid, at } => {
-                    if let Some(pid) = self.step(cache, input, sid, at, slots)
-                    {
-                        return Some(pid);
+                    if let Some(hm) = self.step(cache, input, sid, at, slots) {
+                        return Some(hm);
                     }
                 }
                 Frame::RestoreCapture { slot, offset } => {
@@ -1472,7 +1465,7 @@ impl BoundedBacktracker {
         mut sid: StateID,
         mut at: usize,
         slots: &mut [Option<NonMaxUsize>],
-    ) -> Option<PatternID> {
+    ) -> Option<HalfMatch> {
         loop {
             if !cache.visited.insert(sid, at - input.start()) {
                 return None;
@@ -1555,7 +1548,7 @@ impl BoundedBacktracker {
                 }
                 State::Fail => return None,
                 State::Match { pattern_id } => {
-                    return Some(pattern_id);
+                    return Some(HalfMatch::new(pattern_id, at));
                 }
             }
         }
@@ -1892,3 +1885,24 @@ fn div_ceil(lhs: usize, rhs: usize) -> usize {
         (lhs / rhs) + 1
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // This is a regression test for the maximum haystack length computation.
+    // Previously, it assumed that the total capacity of the backtracker's
+    // bitset would always be greater than the number of NFA states. But there
+    // is of course no guarantee that this is true. This regression test
+    // ensures that not only does `max_haystack_len` not panic, but that it
+    // should return `0`.
+    #[cfg(feature = "syntax")]
+    #[test]
+    fn max_haystack_len_overflow() {
+        let re = BoundedBacktracker::builder()
+            .configure(BoundedBacktracker::config().visited_capacity(10))
+            .build(r"[0-9A-Za-z]{100}")
+            .unwrap();
+        assert_eq!(0, re.max_haystack_len());
+    }
+}
diff --git a/regex-automata/src/nfa/thompson/builder.rs b/regex-automata/src/nfa/thompson/builder.rs
index b57e5bc0f3..6b69e8784d 100644
--- a/regex-automata/src/nfa/thompson/builder.rs
+++ b/regex-automata/src/nfa/thompson/builder.rs
@@ -61,7 +61,7 @@ enum State {
     Look { look: Look, next: StateID },
     /// An empty state that records the start of a capture location. This is an
     /// unconditional epsilon transition like `Empty`, except it can be used to
-    /// record position information for a captue group when using the NFA for
+    /// record position information for a capture group when using the NFA for
     /// search.
     CaptureStart {
         /// The ID of the pattern that this capture was defined.
@@ -77,7 +77,7 @@ enum State {
     },
     /// An empty state that records the end of a capture location. This is an
     /// unconditional epsilon transition like `Empty`, except it can be used to
-    /// record position information for a captue group when using the NFA for
+    /// record position information for a capture group when using the NFA for
     /// search.
     CaptureEnd {
         /// The ID of the pattern that this capture was defined.
@@ -128,7 +128,7 @@ enum State {
 }
 
 impl State {
-    /// If this state is an unconditional espilon transition, then this returns
+    /// If this state is an unconditional epsilon transition, then this returns
     /// the target of the transition.
     fn goto(&self) -> Option<StateID> {
         match *self {
diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs
index 497fc62b47..2d2172957f 100644
--- a/regex-automata/src/nfa/thompson/compiler.rs
+++ b/regex-automata/src/nfa/thompson/compiler.rs
@@ -30,7 +30,7 @@ pub struct Config {
     reverse: Option<bool>,
     nfa_size_limit: Option<Option<usize>>,
     shrink: Option<bool>,
-    captures: Option<bool>,
+    which_captures: Option<WhichCaptures>,
     look_matcher: Option<LookMatcher>,
     #[cfg(test)]
     unanchored_prefix: Option<bool>,
@@ -178,12 +178,15 @@ impl Config {
     /// ```
     /// use regex_automata::{
     ///     dfa::{self, Automaton},
-    ///     nfa::thompson::NFA,
+    ///     nfa::thompson::{NFA, WhichCaptures},
     ///     HalfMatch, Input,
     /// };
     ///
     /// let dfa = dfa::dense::Builder::new()
-    ///     .thompson(NFA::config().captures(false).reverse(true))
+    ///     .thompson(NFA::config()
+    ///         .which_captures(WhichCaptures::None)
+    ///         .reverse(true)
+    ///     )
     ///     .build("baz[0-9]+")?;
     /// let expected = Some(HalfMatch::must(0, 3));
     /// assert_eq!(
@@ -277,10 +280,12 @@ impl Config {
     ///
     /// ```
     /// # if cfg!(miri) { return Ok(()); } // miri takes too long
-    /// use regex_automata::nfa::thompson::NFA;
+    /// use regex_automata::nfa::thompson::{NFA, WhichCaptures};
     ///
     /// // Currently we have to disable captures when enabling reverse NFA.
-    /// let config = NFA::config().captures(false).reverse(true);
+    /// let config = NFA::config()
+    ///     .which_captures(WhichCaptures::None)
+    ///     .reverse(true);
     /// let not_shrunk = NFA::compiler()
     ///     .configure(config.clone().shrink(false))
     ///     .build(r"\w")?;
@@ -311,21 +316,99 @@ impl Config {
     /// # Example
     ///
     /// This example demonstrates that some regex engines, like the Pike VM,
-    /// require capturing groups to be present in the NFA. Building a Pike VM
-    /// with an NFA without capturing groups will result in an error.
+    /// require capturing states to be present in the NFA to report match
+    /// offsets.
+    ///
+    /// (Note that since this method is deprecated, the example below uses
+    /// [`Config::which_captures`] to disable capture states.)
     ///
     /// ```
-    /// use regex_automata::nfa::thompson::{pikevm::PikeVM, NFA};
+    /// use regex_automata::nfa::thompson::{
+    ///     pikevm::PikeVM,
+    ///     NFA,
+    ///     WhichCaptures,
+    /// };
     ///
-    /// let nfa = NFA::compiler()
-    ///     .configure(NFA::config().captures(false))
+    /// let re = PikeVM::builder()
+    ///     .thompson(NFA::config().which_captures(WhichCaptures::None))
+    ///     .build(r"[a-z]+")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// assert!(re.is_match(&mut cache, "abc"));
+    /// assert_eq!(None, re.find(&mut cache, "abc"));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[deprecated(since = "0.3.5", note = "use which_captures instead")]
+    pub fn captures(self, yes: bool) -> Config {
+        self.which_captures(if yes {
+            WhichCaptures::All
+        } else {
+            WhichCaptures::None
+        })
+    }
+
+    /// Configures what kinds of capture groups are compiled into
+    /// [`State::Capture`](crate::nfa::thompson::State::Capture) states in a
+    /// Thompson NFA.
+    ///
+    /// Currently, using any option except for [`WhichCaptures::None`] requires
+    /// disabling the [`reverse`](Config::reverse) setting. If both are
+    /// enabled, then the compiler will return an error. It is expected that
+    /// this limitation will be lifted in the future.
+    ///
+    /// This is set to [`WhichCaptures::All`] by default. Callers may wish to
+    /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the
+    /// overhead of capture states for explicit groups. Usually this occurs
+    /// when one wants to use the `PikeVM` only for determining the overall
+    /// match. Otherwise, the `PikeVM` could use much more memory than is
+    /// necessary.
+    ///
+    /// # Example
+    ///
+    /// This example demonstrates that some regex engines, like the Pike VM,
+    /// require capturing states to be present in the NFA to report match
+    /// offsets.
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::{
+    ///     pikevm::PikeVM,
+    ///     NFA,
+    ///     WhichCaptures,
+    /// };
+    ///
+    /// let re = PikeVM::builder()
+    ///     .thompson(NFA::config().which_captures(WhichCaptures::None))
+    ///     .build(r"[a-z]+")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// assert!(re.is_match(&mut cache, "abc"));
+    /// assert_eq!(None, re.find(&mut cache, "abc"));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// The same applies to the bounded backtracker:
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::{
+    ///     backtrack::BoundedBacktracker,
+    ///     NFA,
+    ///     WhichCaptures,
+    /// };
+    ///
+    /// let re = BoundedBacktracker::builder()
+    ///     .thompson(NFA::config().which_captures(WhichCaptures::None))
     ///     .build(r"[a-z]+")?;
-    /// assert!(PikeVM::new_from_nfa(nfa).is_err());
+    /// let mut cache = re.create_cache();
+    ///
+    /// assert!(re.try_is_match(&mut cache, "abc")?);
+    /// assert_eq!(None, re.try_find(&mut cache, "abc")?);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    pub fn captures(mut self, yes: bool) -> Config {
-        self.captures = Some(yes);
+    pub fn which_captures(mut self, which_captures: WhichCaptures) -> Config {
+        self.which_captures = Some(which_captures);
         self
     }
 
@@ -405,8 +488,14 @@ impl Config {
     }
 
     /// Return whether NFA compilation is configured to produce capture states.
+    #[deprecated(since = "0.3.5", note = "use get_which_captures instead")]
     pub fn get_captures(&self) -> bool {
-        self.captures.unwrap_or(true)
+        self.get_which_captures().is_any()
+    }
+
+    /// Return what kinds of capture states will be compiled into an NFA.
+    pub fn get_which_captures(&self) -> WhichCaptures {
+        self.which_captures.unwrap_or(WhichCaptures::All)
     }
 
     /// Return the look-around matcher for this NFA.
@@ -439,7 +528,7 @@ impl Config {
             reverse: o.reverse.or(self.reverse),
             nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit),
             shrink: o.shrink.or(self.shrink),
-            captures: o.captures.or(self.captures),
+            which_captures: o.which_captures.or(self.which_captures),
             look_matcher: o.look_matcher.or_else(|| self.look_matcher.clone()),
             #[cfg(test)]
             unanchored_prefix: o.unanchored_prefix.or(self.unanchored_prefix),
@@ -447,6 +536,57 @@ impl Config {
     }
 }
 
+/// A configuration indicating which kinds of
+/// [`State::Capture`](crate::nfa::thompson::State::Capture) states to include.
+///
+/// This configuration can be used with [`Config::which_captures`] to control
+/// which capture states are compiled into a Thompson NFA.
+///
+/// The default configuration is [`WhichCaptures::All`].
+#[derive(Clone, Copy, Debug)]
+pub enum WhichCaptures {
+    /// All capture states, including those corresponding to both implicit and
+    /// explicit capture groups, are included in the Thompson NFA.
+    All,
+    /// Only capture states corresponding to implicit capture groups are
+    /// included. Implicit capture groups appear in every pattern implicitly
+    /// and correspond to the overall match of a pattern.
+    ///
+    /// This is useful when one only cares about the overall match of a
+    /// pattern. By excluding capture states from explicit capture groups,
+    /// one might be able to reduce the memory usage of a multi-pattern regex
+    /// substantially if it was otherwise written to have many explicit capture
+    /// groups.
+    Implicit,
+    /// No capture states are compiled into the Thompson NFA.
+    ///
+    /// This is useful when capture states are either not needed (for example,
+    /// if one is only trying to build a DFA) or if they aren't supported (for
+    /// example, a reverse NFA).
+    None,
+}
+
+impl Default for WhichCaptures {
+    fn default() -> WhichCaptures {
+        WhichCaptures::All
+    }
+}
+
+impl WhichCaptures {
+    /// Returns true if this configuration indicates that no capture states
+    /// should be produced in an NFA.
+    pub fn is_none(&self) -> bool {
+        matches!(*self, WhichCaptures::None)
+    }
+
+    /// Returns true if this configuration indicates that some capture states
+    /// should be added to an NFA. Note that this might only include capture
+    /// states for implicit capture groups.
+    pub fn is_any(&self) -> bool {
+        !self.is_none()
+    }
+}
+
 /*
 This compiler below uses Thompson's construction algorithm. The compiler takes
 a regex-syntax::Hir as input and emits an NFA graph as output. The NFA graph
@@ -800,7 +940,9 @@ impl Compiler {
         if exprs.len() > PatternID::LIMIT {
             return Err(BuildError::too_many_patterns(exprs.len()));
         }
-        if self.config.get_reverse() && self.config.get_captures() {
+        if self.config.get_reverse()
+            && self.config.get_which_captures().is_any()
+        {
             return Err(BuildError::unsupported_captures());
         }
 
@@ -978,8 +1120,13 @@ impl Compiler {
         name: Option<&str>,
         expr: &Hir,
     ) -> Result<ThompsonRef, BuildError> {
-        if !self.config.get_captures() {
-            return self.c(expr);
+        match self.config.get_which_captures() {
+            // No capture states means we always skip them.
+            WhichCaptures::None => return self.c(expr),
+            // Implicit captures states means we only add when index==0 since
+            // index==0 implies the group is implicit.
+            WhichCaptures::Implicit if index > 0 => return self.c(expr),
+            _ => {}
         }
 
         let start = self.add_capture_start(index, name)?;
@@ -1319,7 +1466,7 @@ impl Compiler {
         // compare and contrast performance of the Pike VM when the code below
         // is active vs the code above. Here's an example to try:
         //
-        //   regex-cli find nfa thompson pikevm -b @$smallishru '(?m)^\w{20}'
+        //   regex-cli find match pikevm -b -p '(?m)^\w{20}' non-ascii-file
         //
         // With Unicode classes generated below, this search takes about 45s on
         // my machine. But with the compressed version above, the search takes
@@ -1338,7 +1485,7 @@ impl Compiler {
                     .map(|rng| self.c_range(rng.start, rng.end));
                 self.c_concat(it)
             });
-        self.c_alt(it)
+        self.c_alt_iter(it)
         */
     }
 
@@ -1410,6 +1557,14 @@ impl Compiler {
             hir::Look::WordAsciiNegate => Look::WordAsciiNegate,
             hir::Look::WordUnicode => Look::WordUnicode,
             hir::Look::WordUnicodeNegate => Look::WordUnicodeNegate,
+            hir::Look::WordStartAscii => Look::WordStartAscii,
+            hir::Look::WordEndAscii => Look::WordEndAscii,
+            hir::Look::WordStartUnicode => Look::WordStartUnicode,
+            hir::Look::WordEndUnicode => Look::WordEndUnicode,
+            hir::Look::WordStartHalfAscii => Look::WordStartHalfAscii,
+            hir::Look::WordEndHalfAscii => Look::WordEndHalfAscii,
+            hir::Look::WordStartHalfUnicode => Look::WordStartHalfUnicode,
+            hir::Look::WordEndHalfUnicode => Look::WordEndHalfUnicode,
         };
         let id = self.add_look(look)?;
         Ok(ThompsonRef { start: id, end: id })
@@ -1725,12 +1880,18 @@ mod tests {
 
     use crate::{
         nfa::thompson::{SparseTransitions, State, Transition, NFA},
-        util::primitives::{PatternID, StateID},
+        util::primitives::{PatternID, SmallIndex, StateID},
     };
 
+    use super::*;
+
     fn build(pattern: &str) -> NFA {
         NFA::compiler()
-            .configure(NFA::config().captures(false).unanchored_prefix(false))
+            .configure(
+                NFA::config()
+                    .which_captures(WhichCaptures::None)
+                    .unanchored_prefix(false),
+            )
             .build(pattern)
             .unwrap()
     }
@@ -1781,6 +1942,15 @@ mod tests {
         }
     }
 
+    fn s_cap(next: usize, pattern: usize, index: usize, slot: usize) -> State {
+        State::Capture {
+            next: sid(next),
+            pattern_id: pid(pattern),
+            group_index: SmallIndex::new(index).unwrap(),
+            slot: SmallIndex::new(slot).unwrap(),
+        }
+    }
+
     fn s_fail() -> State {
         State::Fail
     }
@@ -1794,7 +1964,7 @@ mod tests {
     #[test]
     fn compile_unanchored_prefix() {
         let nfa = NFA::compiler()
-            .configure(NFA::config().captures(false))
+            .configure(NFA::config().which_captures(WhichCaptures::None))
             .build(r"a")
             .unwrap();
         assert_eq!(
@@ -1827,7 +1997,11 @@ mod tests {
 
         // Check that non-UTF-8 literals work.
         let nfa = NFA::compiler()
-            .configure(NFA::config().captures(false).unanchored_prefix(false))
+            .configure(
+                NFA::config()
+                    .which_captures(WhichCaptures::None)
+                    .unanchored_prefix(false),
+            )
             .syntax(crate::util::syntax::Config::new().utf8(false))
             .build(r"(?-u)\xFF")
             .unwrap();
@@ -1937,7 +2111,7 @@ mod tests {
         let nfa = NFA::compiler()
             .configure(
                 NFA::config()
-                    .captures(false)
+                    .which_captures(WhichCaptures::None)
                     .reverse(true)
                     .shrink(false)
                     .unanchored_prefix(false),
@@ -1965,7 +2139,11 @@ mod tests {
     #[test]
     fn compile_many_start_pattern() {
         let nfa = NFA::compiler()
-            .configure(NFA::config().captures(false).unanchored_prefix(false))
+            .configure(
+                NFA::config()
+                    .which_captures(WhichCaptures::None)
+                    .unanchored_prefix(false),
+            )
             .build_many(&["a", "b"])
             .unwrap();
         assert_eq!(
@@ -1993,7 +2171,9 @@ mod tests {
         use regex_syntax::hir::{Class, ClassBytes, Hir};
 
         let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![])));
-        let config = NFA::config().captures(false).unanchored_prefix(false);
+        let config = NFA::config()
+            .which_captures(WhichCaptures::None)
+            .unanchored_prefix(false);
         let nfa =
             NFA::compiler().configure(config).build_from_hir(&hir).unwrap();
         assert_eq!(nfa.states(), &[s_fail(), s_match(0)]);
@@ -2005,9 +2185,81 @@ mod tests {
         use regex_syntax::hir::{Class, ClassUnicode, Hir};
 
         let hir = Hir::class(Class::Unicode(ClassUnicode::new(vec![])));
-        let config = NFA::config().captures(false).unanchored_prefix(false);
+        let config = NFA::config()
+            .which_captures(WhichCaptures::None)
+            .unanchored_prefix(false);
         let nfa =
             NFA::compiler().configure(config).build_from_hir(&hir).unwrap();
         assert_eq!(nfa.states(), &[s_fail(), s_match(0)]);
     }
+
+    #[test]
+    fn compile_captures_all() {
+        let nfa = NFA::compiler()
+            .configure(
+                NFA::config()
+                    .unanchored_prefix(false)
+                    .which_captures(WhichCaptures::All),
+            )
+            .build("a(b)c")
+            .unwrap();
+        assert_eq!(
+            nfa.states(),
+            &[
+                s_cap(1, 0, 0, 0),
+                s_byte(b'a', 2),
+                s_cap(3, 0, 1, 2),
+                s_byte(b'b', 4),
+                s_cap(5, 0, 1, 3),
+                s_byte(b'c', 6),
+                s_cap(7, 0, 0, 1),
+                s_match(0)
+            ]
+        );
+        let ginfo = nfa.group_info();
+        assert_eq!(2, ginfo.all_group_len());
+    }
+
+    #[test]
+    fn compile_captures_implicit() {
+        let nfa = NFA::compiler()
+            .configure(
+                NFA::config()
+                    .unanchored_prefix(false)
+                    .which_captures(WhichCaptures::Implicit),
+            )
+            .build("a(b)c")
+            .unwrap();
+        assert_eq!(
+            nfa.states(),
+            &[
+                s_cap(1, 0, 0, 0),
+                s_byte(b'a', 2),
+                s_byte(b'b', 3),
+                s_byte(b'c', 4),
+                s_cap(5, 0, 0, 1),
+                s_match(0)
+            ]
+        );
+        let ginfo = nfa.group_info();
+        assert_eq!(1, ginfo.all_group_len());
+    }
+
+    #[test]
+    fn compile_captures_none() {
+        let nfa = NFA::compiler()
+            .configure(
+                NFA::config()
+                    .unanchored_prefix(false)
+                    .which_captures(WhichCaptures::None),
+            )
+            .build("a(b)c")
+            .unwrap();
+        assert_eq!(
+            nfa.states(),
+            &[s_byte(b'a', 1), s_byte(b'b', 2), s_byte(b'c', 3), s_match(0)]
+        );
+        let ginfo = nfa.group_info();
+        assert_eq!(0, ginfo.all_group_len());
+    }
 }
diff --git a/regex-automata/src/nfa/thompson/error.rs b/regex-automata/src/nfa/thompson/error.rs
index 82648813ba..3c2fa8a215 100644
--- a/regex-automata/src/nfa/thompson/error.rs
+++ b/regex-automata/src/nfa/thompson/error.rs
@@ -68,9 +68,6 @@ enum BuildErrorKind {
         /// The invalid index that was given.
         index: u32,
     },
-    /// An error that occurs when one tries to build an NFA simulation (such as
-    /// the PikeVM) without any capturing groups.
-    MissingCaptures,
     /// An error that occurs when one tries to build a reverse NFA with
     /// captures enabled. Currently, this isn't supported, but we probably
     /// should support it at some point.
@@ -126,10 +123,6 @@ impl BuildError {
         BuildError { kind: BuildErrorKind::InvalidCaptureIndex { index } }
     }
 
-    pub(crate) fn missing_captures() -> BuildError {
-        BuildError { kind: BuildErrorKind::MissingCaptures }
-    }
-
     #[cfg(feature = "syntax")]
     pub(crate) fn unsupported_captures() -> BuildError {
         BuildError { kind: BuildErrorKind::UnsupportedCaptures }
@@ -181,11 +174,6 @@ impl core::fmt::Display for BuildError {
                 "capture group index {} is invalid (too big or discontinuous)",
                 index,
             ),
-            BuildErrorKind::MissingCaptures => write!(
-                f,
-                "operation requires the NFA to have capturing groups, \
-                 but the NFA given contains none",
-            ),
             #[cfg(feature = "syntax")]
             BuildErrorKind::UnsupportedCaptures => write!(
                 f,
diff --git a/regex-automata/src/nfa/thompson/map.rs b/regex-automata/src/nfa/thompson/map.rs
index c36ce53866..7f074a353b 100644
--- a/regex-automata/src/nfa/thompson/map.rs
+++ b/regex-automata/src/nfa/thompson/map.rs
@@ -65,7 +65,7 @@ const INIT: u64 = 14695981039346656037;
 /// Specifically, one could observe the difference with std's hashmap via
 /// something like the following benchmark:
 ///
-///   hyperfine "regex-cli debug nfa thompson --quiet --reverse '\w{90} ecurB'"
+///   hyperfine "regex-cli debug thompson -qr --captures none '\w{90} ecurB'"
 ///
 /// But to observe that difference, you'd have to modify the code to use
 /// std's hashmap.
diff --git a/regex-automata/src/nfa/thompson/mod.rs b/regex-automata/src/nfa/thompson/mod.rs
index 3581d738c2..cf426736dc 100644
--- a/regex-automata/src/nfa/thompson/mod.rs
+++ b/regex-automata/src/nfa/thompson/mod.rs
@@ -78,4 +78,4 @@ pub use self::{
     },
 };
 #[cfg(feature = "syntax")]
-pub use compiler::{Compiler, Config};
+pub use compiler::{Compiler, Config, WhichCaptures};
diff --git a/regex-automata/src/nfa/thompson/nfa.rs b/regex-automata/src/nfa/thompson/nfa.rs
index 86131406ca..1f57f8ebd9 100644
--- a/regex-automata/src/nfa/thompson/nfa.rs
+++ b/regex-automata/src/nfa/thompson/nfa.rs
@@ -453,10 +453,10 @@ impl NFA {
     /// predict the anchored starting state.
     ///
     /// ```
-    /// use regex_automata::nfa::thompson::{NFA, State};
+    /// use regex_automata::nfa::thompson::{NFA, State, WhichCaptures};
     ///
     /// let nfa = NFA::compiler()
-    ///     .configure(NFA::config().captures(false))
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
     ///     .build("a")?;
     /// let state = nfa.state(nfa.start_anchored());
     /// match *state {
@@ -711,7 +711,7 @@ impl NFA {
     /// or not.
     ///
     /// ```
-    /// use regex_automata::nfa::thompson::NFA;
+    /// use regex_automata::nfa::thompson::{NFA, WhichCaptures};
     ///
     /// // Obviously has capture states.
     /// let nfa = NFA::new("(a)")?;
@@ -733,7 +733,7 @@ impl NFA {
     /// // Notice that 'has_capture' is false here even when we have an
     /// // explicit capture group in the pattern.
     /// let nfa = NFA::compiler()
-    ///     .configure(NFA::config().captures(false))
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
     ///     .build("(a)")?;
     /// assert!(!nfa.has_capture());
     ///
@@ -1841,14 +1841,12 @@ impl SparseTransitions {
         // This is an alternative implementation that uses binary search. In
         // some ad hoc experiments, like
         //
-        //   smallishru=OpenSubtitles2018.raw.sample.smallish.ru
-        //   regex-cli find nfa thompson pikevm -b "@$smallishru" '\b\w+\b'
+        //   regex-cli find match pikevm -b -p '\b\w+\b' non-ascii-file
         //
         // I could not observe any improvement, and in fact, things seemed to
         // be a bit slower. I can see an improvement in at least one benchmark:
         //
-        //   allcpssmall=all-codepoints-utf8-10x
-        //   regex-cli find nfa thompson pikevm @$allcpssmall '\pL{100}'
+        //   regex-cli find match pikevm -b -p '\pL{100}' all-codepoints-utf8
         //
         // Where total search time goes from 3.2s to 2.4s when using binary
         // search.
diff --git a/regex-automata/src/nfa/thompson/pikevm.rs b/regex-automata/src/nfa/thompson/pikevm.rs
index d737fb71e9..0128c151ae 100644
--- a/regex-automata/src/nfa/thompson/pikevm.rs
+++ b/regex-automata/src/nfa/thompson/pikevm.rs
@@ -17,7 +17,9 @@ use crate::{
         empty, iter,
         prefilter::Prefilter,
         primitives::{NonMaxUsize, PatternID, SmallIndex, StateID},
-        search::{Anchored, Input, Match, MatchKind, PatternSet, Span},
+        search::{
+            Anchored, HalfMatch, Input, Match, MatchKind, PatternSet, Span,
+        },
         sparse_set::SparseSet,
     },
 };
@@ -275,15 +277,6 @@ impl Builder {
     /// construction of the NFA itself will of course be ignored, since the NFA
     /// given here is already built.
     pub fn build_from_nfa(&self, nfa: NFA) -> Result<PikeVM, BuildError> {
-        // If the NFA has no captures, then the PikeVM doesn't work since it
-        // relies on them in order to report match locations. However, in
-        // the special case of an NFA with no patterns, it is allowed, since
-        // no matches can ever be produced. And importantly, an NFA with no
-        // patterns has no capturing groups anyway, so this is necessary to
-        // permit the PikeVM to work with regexes with zero patterns.
-        if !nfa.has_capture() && nfa.pattern_len() > 0 {
-            return Err(BuildError::missing_captures());
-        }
         nfa.look_set_any().available().map_err(BuildError::word)?;
         Ok(PikeVM { config: self.config.clone(), nfa })
     }
@@ -828,16 +821,16 @@ impl PikeVM {
         if self.get_nfa().pattern_len() == 1 {
             let mut slots = [None, None];
             let pid = self.search_slots(cache, &input, &mut slots)?;
-            let start = slots[0].unwrap().get();
-            let end = slots[1].unwrap().get();
+            let start = slots[0]?.get();
+            let end = slots[1]?.get();
             return Some(Match::new(pid, Span { start, end }));
         }
         let ginfo = self.get_nfa().group_info();
         let slots_len = ginfo.implicit_slot_len();
         let mut slots = vec![None; slots_len];
         let pid = self.search_slots(cache, &input, &mut slots)?;
-        let start = slots[pid.as_usize() * 2].unwrap().get();
-        let end = slots[pid.as_usize() * 2 + 1].unwrap().get();
+        let start = slots[pid.as_usize() * 2]?.get();
+        let end = slots[pid.as_usize() * 2 + 1]?.get();
         Some(Match::new(pid, Span { start, end }))
     }
 
@@ -1103,7 +1096,8 @@ impl PikeVM {
     ) -> Option<PatternID> {
         let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
         if !utf8empty {
-            return self.search_slots_imp(cache, input, slots);
+            let hm = self.search_slots_imp(cache, input, slots)?;
+            return Some(hm.pattern());
         }
         // There is an unfortunate special case where if the regex can
         // match the empty string and UTF-8 mode is enabled, the search
@@ -1118,22 +1112,23 @@ impl PikeVM {
         // this case.
         let min = self.get_nfa().group_info().implicit_slot_len();
         if slots.len() >= min {
-            return self.search_slots_imp(cache, input, slots);
+            let hm = self.search_slots_imp(cache, input, slots)?;
+            return Some(hm.pattern());
         }
         if self.get_nfa().pattern_len() == 1 {
             let mut enough = [None, None];
             let got = self.search_slots_imp(cache, input, &mut enough);
-            // This is OK because we know `enough_slots` is strictly bigger
-            // than `slots`, otherwise this special case isn't reached.
+            // This is OK because we know `enough` is strictly bigger than
+            // `slots`, otherwise this special case isn't reached.
             slots.copy_from_slice(&enough[..slots.len()]);
-            return got;
+            return got.map(|hm| hm.pattern());
         }
         let mut enough = vec![None; min];
         let got = self.search_slots_imp(cache, input, &mut enough);
-        // This is OK because we know `enough_slots` is strictly bigger than
-        // `slots`, otherwise this special case isn't reached.
+        // This is OK because we know `enough` is strictly bigger than `slots`,
+        // otherwise this special case isn't reached.
         slots.copy_from_slice(&enough[..slots.len()]);
-        got
+        got.map(|hm| hm.pattern())
     }
 
     /// This is the actual implementation of `search_slots_imp` that
@@ -1146,30 +1141,17 @@ impl PikeVM {
         cache: &mut Cache,
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
-    ) -> Option<PatternID> {
+    ) -> Option<HalfMatch> {
         let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
-        let (pid, end) = match self.search_imp(cache, input, slots) {
+        let hm = match self.search_imp(cache, input, slots) {
             None => return None,
-            Some(pid) if !utf8empty => return Some(pid),
-            Some(pid) => {
-                let slot_start = pid.as_usize() * 2;
-                let slot_end = slot_start + 1;
-                // OK because we know we have a match and we know our caller
-                // provided slots are big enough (which we make true above if
-                // the caller didn't). Namely, we're only here when 'utf8empty'
-                // is true, and when that's true, we require slots for every
-                // pattern.
-                (pid, slots[slot_end].unwrap().get())
-            }
+            Some(hm) if !utf8empty => return Some(hm),
+            Some(hm) => hm,
         };
-        empty::skip_splits_fwd(input, pid, end, |input| {
-            let pid = match self.search_imp(cache, input, slots) {
-                None => return Ok(None),
-                Some(pid) => pid,
-            };
-            let slot_start = pid.as_usize() * 2;
-            let slot_end = slot_start + 1;
-            Ok(Some((pid, slots[slot_end].unwrap().get())))
+        empty::skip_splits_fwd(input, hm, hm.offset(), |input| {
+            Ok(self
+                .search_imp(cache, input, slots)
+                .map(|hm| (hm, hm.offset())))
         })
         // OK because the PikeVM never errors.
         .unwrap()
@@ -1244,7 +1226,7 @@ impl PikeVM {
         cache: &mut Cache,
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
-    ) -> Option<PatternID> {
+    ) -> Option<HalfMatch> {
         cache.setup_search(slots.len());
         if input.is_done() {
             return None;
@@ -1273,7 +1255,7 @@ impl PikeVM {
         let pre =
             if anchored { None } else { self.get_config().get_prefilter() };
         let Cache { ref mut stack, ref mut curr, ref mut next } = cache;
-        let mut pid = None;
+        let mut hm = None;
         // Yes, our search doesn't end at input.end(), but includes it. This
         // is necessary because matches are delayed by one byte, just like
         // how the DFA engines work. The delay is used to handle look-behind
@@ -1292,7 +1274,7 @@ impl PikeVM {
             if curr.set.is_empty() {
                 // We have a match and we haven't been instructed to continue
                 // on even after finding a match, so we can quit.
-                if pid.is_some() && !allmatches {
+                if hm.is_some() && !allmatches {
                     break;
                 }
                 // If we're running an anchored search and we've advanced
@@ -1356,7 +1338,15 @@ impl PikeVM {
             // matches their behavior. (Generally, 'allmatches' is useful for
             // overlapping searches or leftmost anchored searches to find the
             // longest possible match by ignoring match priority.)
-            if !pid.is_some() || allmatches {
+            //
+            // Additionally, when we're running an anchored search, this
+            // epsilon closure should only be computed at the beginning of the
+            // search. If we re-computed it at every position, we would be
+            // simulating an unanchored search when we were tasked to perform
+            // an anchored search.
+            if (!hm.is_some() || allmatches)
+                && (!anchored || at == input.start())
+            {
                 // Since we are adding to the 'curr' active states and since
                 // this is for the start ID, we use a slots slice that is
                 // guaranteed to have the right length but where every element
@@ -1373,14 +1363,15 @@ impl PikeVM {
                 let slots = next.slot_table.all_absent();
                 self.epsilon_closure(stack, slots, curr, input, at, start_id);
             }
-            if let Some(x) = self.nexts(stack, curr, next, input, at, slots) {
-                pid = Some(x);
+            if let Some(pid) = self.nexts(stack, curr, next, input, at, slots)
+            {
+                hm = Some(HalfMatch::new(pid, at));
             }
             // Unless the caller asked us to return early, we need to mush on
             // to see if we can extend our match. (But note that 'nexts' will
             // quit right after seeing a match when match_kind==LeftmostFirst,
             // as is consistent with leftmost-first match priority.)
-            if input.get_earliest() && pid.is_some() {
+            if input.get_earliest() && hm.is_some() {
                 break;
             }
             core::mem::swap(curr, next);
@@ -1388,7 +1379,7 @@ impl PikeVM {
             at += 1;
         }
         instrument!(|c| c.eprint(&self.nfa));
-        pid
+        hm
     }
 
     /// The implementation for the 'which_overlapping_matches' API. Basically,
@@ -2100,15 +2091,16 @@ impl SlotTable {
         // if a 'Captures' has fewer slots, e.g., none at all or only slots
         // for tracking the overall match instead of all slots for every
         // group.
-        self.slots_for_captures = nfa.group_info().slot_len();
+        self.slots_for_captures = core::cmp::max(
+            self.slots_per_state,
+            nfa.pattern_len().checked_mul(2).unwrap(),
+        );
         let len = nfa
             .states()
             .len()
-            // We add 1 so that our last row is always empty. We use it as
-            // "scratch" space for computing the epsilon closure off of the
-            // starting state.
-            .checked_add(1)
-            .and_then(|x| x.checked_mul(self.slots_per_state))
+            .checked_mul(self.slots_per_state)
+            // Add space to account for scratch space used during a search.
+            .and_then(|x| x.checked_add(self.slots_for_captures))
             // It seems like this could actually panic on legitimate inputs on
             // 32-bit targets, and very likely to panic on 16-bit. Should we
             // somehow convert this to an error? What about something similar
@@ -2162,7 +2154,7 @@ impl SlotTable {
     /// compute an epsilon closure outside of the user supplied regex, and thus
     /// never want it to have any capturing slots set.
     fn all_absent(&mut self) -> &mut [Option<NonMaxUsize>] {
-        let i = self.table.len() - self.slots_per_state;
+        let i = self.table.len() - self.slots_for_captures;
         &mut self.table[i..i + self.slots_for_captures]
     }
 }
diff --git a/regex-automata/src/nfa/thompson/range_trie.rs b/regex-automata/src/nfa/thompson/range_trie.rs
index 2d43a5b6f7..cd77cc1507 100644
--- a/regex-automata/src/nfa/thompson/range_trie.rs
+++ b/regex-automata/src/nfa/thompson/range_trie.rs
@@ -594,7 +594,7 @@ impl State {
         // Benchmarks suggest that binary search is just a bit faster than
         // straight linear search. Specifically when using the debug tool:
         //
-        //   hyperfine "regex-cli debug nfa thompson --quiet --reverse '\w{90} ecurB'"
+        //   hyperfine "regex-cli debug thompson -qr --captures none '\w{90} ecurB'"
         binary_search(&self.transitions, |t| range.start <= t.range.end)
     }
 
diff --git a/regex-automata/src/util/captures.rs b/regex-automata/src/util/captures.rs
index 60b6df7e25..05db6a9936 100644
--- a/regex-automata/src/util/captures.rs
+++ b/regex-automata/src/util/captures.rs
@@ -444,6 +444,8 @@ impl Captures {
     /// assert_eq!(Some(Span::from(6..17)), caps.get_group(2));
     /// // Looking for a non-existent capturing group will return None:
     /// assert_eq!(None, caps.get_group(3));
+    /// # // literals are too big for 32-bit usize: #1039
+    /// # #[cfg(target_pointer_width = "64")]
     /// assert_eq!(None, caps.get_group(9944060567225171988));
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -1809,10 +1811,10 @@ impl GroupInfo {
     /// panic even if captures aren't enabled on this NFA:
     ///
     /// ```
-    /// use regex_automata::nfa::thompson::NFA;
+    /// use regex_automata::nfa::thompson::{NFA, WhichCaptures};
     ///
     /// let nfa = NFA::compiler()
-    ///     .configure(NFA::config().captures(false))
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
     ///     .build_many(&[
     ///         r"(?P<foo>a)",
     ///         r"a",
@@ -1957,7 +1959,7 @@ impl GroupInfo {
     /// for different patterns and NFA configurations.
     ///
     /// ```
-    /// use regex_automata::{nfa::thompson::NFA, PatternID};
+    /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
     ///
     /// let nfa = NFA::new(r"(a)(b)(c)")?;
     /// // There are 3 explicit groups in the pattern's concrete syntax and
@@ -1969,13 +1971,13 @@ impl GroupInfo {
     /// assert_eq!(1, nfa.group_info().group_len(PatternID::ZERO));
     ///
     /// let nfa = NFA::compiler()
-    ///     .configure(NFA::config().captures(false))
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
     ///     .build(r"abc")?;
     /// // We disabled capturing groups, so there are none.
     /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
     ///
     /// let nfa = NFA::compiler()
-    ///     .configure(NFA::config().captures(false))
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
     ///     .build(r"(a)(b)(c)")?;
     /// // We disabled capturing groups, so there are none, even if there are
     /// // explicit groups in the concrete syntax.
@@ -1999,7 +2001,7 @@ impl GroupInfo {
     /// for different patterns and NFA configurations.
     ///
     /// ```
-    /// use regex_automata::{nfa::thompson::NFA, PatternID};
+    /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
     ///
     /// let nfa = NFA::new(r"(a)(b)(c)")?;
     /// // There are 3 explicit groups in the pattern's concrete syntax and
@@ -2016,13 +2018,13 @@ impl GroupInfo {
     /// assert_eq!(5, nfa.group_info().all_group_len());
     ///
     /// let nfa = NFA::compiler()
-    ///     .configure(NFA::config().captures(false))
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
     ///     .build(r"abc")?;
     /// // We disabled capturing groups, so there are none.
     /// assert_eq!(0, nfa.group_info().all_group_len());
     ///
     /// let nfa = NFA::compiler()
-    ///     .configure(NFA::config().captures(false))
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
     ///     .build(r"(a)(b)(c)")?;
     /// // We disabled capturing groups, so there are none, even if there are
     /// // explicit groups in the concrete syntax.
diff --git a/regex-automata/src/util/determinize/mod.rs b/regex-automata/src/util/determinize/mod.rs
index 30a82afb81..ba32991d06 100644
--- a/regex-automata/src/util/determinize/mod.rs
+++ b/regex-automata/src/util/determinize/mod.rs
@@ -145,9 +145,10 @@ pub(crate) fn next(
             }
             Some(_) => {}
             None => {
-                look_have = look_have.insert(Look::End);
-                look_have = look_have.insert(Look::EndLF);
-                look_have = look_have.insert(Look::EndCRLF);
+                look_have = look_have
+                    .insert(Look::End)
+                    .insert(Look::EndLF)
+                    .insert(Look::EndCRLF);
             }
         }
         if unit.is_byte(lookm.get_line_terminator()) {
@@ -160,11 +161,26 @@ pub(crate) fn next(
             look_have = look_have.insert(Look::StartCRLF);
         }
         if state.is_from_word() == unit.is_word_byte() {
-            look_have = look_have.insert(Look::WordUnicodeNegate);
-            look_have = look_have.insert(Look::WordAsciiNegate);
+            look_have = look_have
+                .insert(Look::WordAsciiNegate)
+                .insert(Look::WordUnicodeNegate);
         } else {
-            look_have = look_have.insert(Look::WordUnicode);
-            look_have = look_have.insert(Look::WordAscii);
+            look_have =
+                look_have.insert(Look::WordAscii).insert(Look::WordUnicode);
+        }
+        if !unit.is_word_byte() {
+            look_have = look_have
+                .insert(Look::WordEndHalfAscii)
+                .insert(Look::WordEndHalfUnicode);
+        }
+        if state.is_from_word() && !unit.is_word_byte() {
+            look_have = look_have
+                .insert(Look::WordEndAscii)
+                .insert(Look::WordEndUnicode);
+        } else if !state.is_from_word() && unit.is_word_byte() {
+            look_have = look_have
+                .insert(Look::WordStartAscii)
+                .insert(Look::WordStartUnicode);
         }
         // If we have new assertions satisfied that are among the set of
         // assertions that exist in this state (that is, just because we added
@@ -220,6 +236,14 @@ pub(crate) fn next(
     {
         builder.set_look_have(|have| have.insert(Look::StartCRLF));
     }
+    // And also for the start-half word boundary assertions. As long as the
+    // look-behind byte is not a word char, then the assertions are satisfied.
+    if nfa.look_set_any().contains_word() && !unit.is_word_byte() {
+        builder.set_look_have(|have| {
+            have.insert(Look::WordStartHalfAscii)
+                .insert(Look::WordStartHalfUnicode)
+        });
+    }
     for nfa_id in sparses.set1.iter() {
         match *nfa.state(nfa_id) {
             thompson::State::Union { .. }
@@ -563,47 +587,95 @@ pub(crate) fn set_lookbehind_from_start(
 ) {
     let rev = nfa.is_reverse();
     let lineterm = nfa.look_matcher().get_line_terminator();
+    let lookset = nfa.look_set_any();
     match *start {
-        Start::NonWordByte => {}
+        Start::NonWordByte => {
+            if lookset.contains_word() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::WordStartHalfAscii)
+                        .insert(Look::WordStartHalfUnicode)
+                });
+            }
+        }
         Start::WordByte => {
-            builder.set_is_from_word();
+            if lookset.contains_word() {
+                builder.set_is_from_word();
+            }
         }
         Start::Text => {
-            builder.set_look_have(|have| {
-                have.insert(Look::Start)
-                    .insert(Look::StartLF)
-                    .insert(Look::StartCRLF)
-            });
+            if lookset.contains_anchor_haystack() {
+                builder.set_look_have(|have| have.insert(Look::Start));
+            }
+            if lookset.contains_anchor_line() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::StartLF).insert(Look::StartCRLF)
+                });
+            }
+            if lookset.contains_word() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::WordStartHalfAscii)
+                        .insert(Look::WordStartHalfUnicode)
+                });
+            }
         }
         Start::LineLF => {
             if rev {
-                builder.set_is_half_crlf();
-                builder.set_look_have(|have| have.insert(Look::StartLF));
+                if lookset.contains_anchor_crlf() {
+                    builder.set_is_half_crlf();
+                }
+                if lookset.contains_anchor_line() {
+                    builder.set_look_have(|have| have.insert(Look::StartLF));
+                }
             } else {
-                builder.set_look_have(|have| have.insert(Look::StartCRLF));
+                if lookset.contains_anchor_line() {
+                    builder.set_look_have(|have| have.insert(Look::StartCRLF));
+                }
             }
-            if lineterm == b'\n' {
+            if lookset.contains_anchor_line() && lineterm == b'\n' {
                 builder.set_look_have(|have| have.insert(Look::StartLF));
             }
+            if lookset.contains_word() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::WordStartHalfAscii)
+                        .insert(Look::WordStartHalfUnicode)
+                });
+            }
         }
         Start::LineCR => {
-            if rev {
-                builder.set_look_have(|have| have.insert(Look::StartCRLF));
-            } else {
-                builder.set_is_half_crlf();
+            if lookset.contains_anchor_crlf() {
+                if rev {
+                    builder.set_look_have(|have| have.insert(Look::StartCRLF));
+                } else {
+                    builder.set_is_half_crlf();
+                }
             }
-            if lineterm == b'\r' {
+            if lookset.contains_anchor_line() && lineterm == b'\r' {
                 builder.set_look_have(|have| have.insert(Look::StartLF));
             }
+            if lookset.contains_word() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::WordStartHalfAscii)
+                        .insert(Look::WordStartHalfUnicode)
+                });
+            }
         }
         Start::CustomLineTerminator => {
-            builder.set_look_have(|have| have.insert(Look::StartLF));
+            if lookset.contains_anchor_line() {
+                builder.set_look_have(|have| have.insert(Look::StartLF));
+            }
             // This is a bit of a tricky case, but if the line terminator was
             // set to a word byte, then we also need to behave as if the start
             // configuration is Start::WordByte. That is, we need to mark our
             // state as having come from a word byte.
-            if utf8::is_word_byte(lineterm) {
-                builder.set_is_from_word();
+            if lookset.contains_word() {
+                if utf8::is_word_byte(lineterm) {
+                    builder.set_is_from_word();
+                } else {
+                    builder.set_look_have(|have| {
+                        have.insert(Look::WordStartHalfAscii)
+                            .insert(Look::WordStartHalfUnicode)
+                    });
+                }
             }
         }
     }
diff --git a/regex-automata/src/util/determinize/state.rs b/regex-automata/src/util/determinize/state.rs
index e641235874..effa6f44d7 100644
--- a/regex-automata/src/util/determinize/state.rs
+++ b/regex-automata/src/util/determinize/state.rs
@@ -197,7 +197,7 @@ impl StateBuilderEmpty {
     }
 
     pub(crate) fn into_matches(mut self) -> StateBuilderMatches {
-        self.0.extend_from_slice(&[0, 0, 0, 0, 0]);
+        self.0.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0]);
         StateBuilderMatches(self.0)
     }
 
@@ -348,16 +348,17 @@ impl StateBuilderNFA {
 /// generated by a transition over a "word" byte. (Callers may not always set
 /// this. For example, if the NFA has no word boundary assertion, then needing
 /// to track whether a state came from a word byte or not is superfluous and
-/// wasteful.)
+/// wasteful.) Bit 3 is set to 1 if the state was generated by a transition
+/// from a `\r` (forward search) or a `\n` (reverse search) when CRLF mode is
+/// enabled.
 ///
-/// Byte 1 corresponds to the look-behind assertions that were satisfied by
-/// the transition that created this state. This generally only includes the
-/// StartLF and Start assertions. (Look-ahead assertions are not tracked as
-/// part of states. Instead, these are applied by re-computing the epsilon
-/// closure of a state when computing the transition function. See `next` in
-/// the parent module.)
+/// Bytes 1..5 correspond to the look-behind assertions that were satisfied
+/// by the transition that created this state. (Look-ahead assertions are not
+/// tracked as part of states. Instead, these are applied by re-computing the
+/// epsilon closure of a state when computing the transition function. See
+/// `next` in the parent module.)
 ///
-/// Byte 2 corresponds to the set of look-around assertions (including both
+/// Bytes 5..9 correspond to the set of look-around assertions (including both
 /// look-behind and look-ahead) that appear somewhere in this state's set of
 /// NFA state IDs. This is used to determine whether this state's epsilon
 /// closure should be re-computed when computing the transition function.
@@ -366,7 +367,7 @@ impl StateBuilderNFA {
 /// function, we should only re-compute the epsilon closure if those new
 /// assertions are relevant to this particular state.
 ///
-/// Bytes 3..7 correspond to a 32-bit native-endian encoded integer
+/// Bytes 9..13 correspond to a 32-bit native-endian encoded integer
 /// corresponding to the number of patterns encoded in this state. If the state
 /// is not a match state (byte 0 bit 0 is 0) or if it's only pattern ID is
 /// PatternID::ZERO, then no integer is encoded at this position. Instead, byte
@@ -452,7 +453,7 @@ impl<'a> Repr<'a> {
     /// state has no conditional epsilon transitions, then there is no need
     /// to re-compute the epsilon closure.
     fn look_need(&self) -> LookSet {
-        LookSet::read_repr(&self.0[3..])
+        LookSet::read_repr(&self.0[5..])
     }
 
     /// Returns the total number of match pattern IDs in this state.
@@ -476,7 +477,7 @@ impl<'a> Repr<'a> {
         if !self.has_pattern_ids() {
             PatternID::ZERO
         } else {
-            let offset = 9 + index * PatternID::SIZE;
+            let offset = 13 + index * PatternID::SIZE;
             // This is OK since we only ever serialize valid PatternIDs to
             // states.
             wire::read_pattern_id_unchecked(&self.0[offset..]).0
@@ -507,7 +508,7 @@ impl<'a> Repr<'a> {
             f(PatternID::ZERO);
             return;
         }
-        let mut pids = &self.0[9..self.pattern_offset_end()];
+        let mut pids = &self.0[13..self.pattern_offset_end()];
         while !pids.is_empty() {
             let pid = wire::read_u32(pids);
             pids = &pids[PatternID::SIZE..];
@@ -539,11 +540,11 @@ impl<'a> Repr<'a> {
     fn pattern_offset_end(&self) -> usize {
         let encoded = self.encoded_pattern_len();
         if encoded == 0 {
-            return 5;
+            return 9;
         }
         // This arithmetic is OK since we were able to address this many bytes
         // when writing to the state, thus, it must fit into a usize.
-        encoded.checked_mul(4).unwrap().checked_add(9).unwrap()
+        encoded.checked_mul(4).unwrap().checked_add(13).unwrap()
     }
 
     /// Returns the total number of *encoded* pattern IDs in this state.
@@ -557,7 +558,7 @@ impl<'a> Repr<'a> {
         }
         // This unwrap is OK since the total number of patterns is always
         // guaranteed to fit into a usize.
-        usize::try_from(wire::read_u32(&self.0[5..9])).unwrap()
+        usize::try_from(wire::read_u32(&self.0[9..13])).unwrap()
     }
 }
 
@@ -643,7 +644,7 @@ impl<'a> ReprVec<'a> {
     /// Mutate the set of look-around (both behind and ahead) assertions that
     /// appear at least once in this state's set of NFA states.
     fn set_look_need(&mut self, mut set: impl FnMut(LookSet) -> LookSet) {
-        set(self.look_need()).write_repr(&mut self.0[3..]);
+        set(self.look_need()).write_repr(&mut self.0[5..]);
     }
 
     /// Add a pattern ID to this state. All match states must have at least
@@ -703,14 +704,14 @@ impl<'a> ReprVec<'a> {
             return;
         }
         let patsize = PatternID::SIZE;
-        let pattern_bytes = self.0.len() - 9;
+        let pattern_bytes = self.0.len() - 13;
         // Every pattern ID uses 4 bytes, so number of bytes should be
         // divisible by 4.
         assert_eq!(pattern_bytes % patsize, 0);
         // This unwrap is OK since we are guaranteed that the maximum number
         // of possible patterns fits into a u32.
         let count32 = u32::try_from(pattern_bytes / patsize).unwrap();
-        wire::NE::write_u32(count32, &mut self.0[5..9]);
+        wire::NE::write_u32(count32, &mut self.0[9..13]);
     }
 
     /// Add an NFA state ID to this state. The order in which NFA states are
diff --git a/regex-automata/src/util/lazy.rs b/regex-automata/src/util/lazy.rs
index de27a2a6e6..0d0b4fb2ae 100644
--- a/regex-automata/src/util/lazy.rs
+++ b/regex-automata/src/util/lazy.rs
@@ -384,11 +384,7 @@ mod lazy {
                 // SAFETY: state is DONE if and only if data has been fully
                 // initialized. At which point, it is safe to drop.
                 unsafe {
-                    // MSRV(1.60): Use assume_init_drop. The below is how
-                    // assume_init_drop is implemented.
-                    core::ptr::drop_in_place(
-                        (*self.data.as_ptr()).as_mut_ptr(),
-                    )
+                    self.data.get_mut().assume_init_drop();
                 }
             }
         }
diff --git a/regex-automata/src/util/look.rs b/regex-automata/src/util/look.rs
index aee31b34e0..73e51c0f6e 100644
--- a/regex-automata/src/util/look.rs
+++ b/regex-automata/src/util/look.rs
@@ -96,6 +96,42 @@ pub enum Look {
     WordUnicode = 1 << 8,
     /// Match a Unicode-aware negation of a word boundary.
     WordUnicodeNegate = 1 << 9,
+    /// Match the start of an ASCII-only word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartAscii = 1 << 10,
+    /// Match the end of an ASCII-only word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndAscii = 1 << 11,
+    /// Match the start of a Unicode word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartUnicode = 1 << 12,
+    /// Match the end of a Unicode word boundary. That is, this matches a
+    /// position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndUnicode = 1 << 13,
+    /// Match the start half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfAscii = 1 << 14,
+    /// Match the end half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the end of the haystack or where the
+    /// following character is not a word character.
+    WordEndHalfAscii = 1 << 15,
+    /// Match the start half of a Unicode word boundary. That is, this matches
+    /// a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfUnicode = 1 << 16,
+    /// Match the end half of a Unicode word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the following
+    /// character is not a word character.
+    WordEndHalfUnicode = 1 << 17,
 }
 
 impl Look {
@@ -117,6 +153,14 @@ impl Look {
             Look::WordAsciiNegate => Look::WordAsciiNegate,
             Look::WordUnicode => Look::WordUnicode,
             Look::WordUnicodeNegate => Look::WordUnicodeNegate,
+            Look::WordStartAscii => Look::WordEndAscii,
+            Look::WordEndAscii => Look::WordStartAscii,
+            Look::WordStartUnicode => Look::WordEndUnicode,
+            Look::WordEndUnicode => Look::WordStartUnicode,
+            Look::WordStartHalfAscii => Look::WordEndHalfAscii,
+            Look::WordEndHalfAscii => Look::WordStartHalfAscii,
+            Look::WordStartHalfUnicode => Look::WordEndHalfUnicode,
+            Look::WordEndHalfUnicode => Look::WordStartHalfUnicode,
         }
     }
 
@@ -125,28 +169,36 @@ impl Look {
     /// constructor is guaranteed to return the same look-around variant that
     /// one started with within a semver compatible release of this crate.
     #[inline]
-    pub const fn as_repr(self) -> u16 {
+    pub const fn as_repr(self) -> u32 {
         // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
         // actual int.
-        self as u16
+        self as u32
     }
 
     /// Given the underlying representation of a `Look` value, return the
     /// corresponding `Look` value if the representation is valid. Otherwise
     /// `None` is returned.
     #[inline]
-    pub const fn from_repr(repr: u16) -> Option<Look> {
+    pub const fn from_repr(repr: u32) -> Option<Look> {
         match repr {
-            0b00_0000_0001 => Some(Look::Start),
-            0b00_0000_0010 => Some(Look::End),
-            0b00_0000_0100 => Some(Look::StartLF),
-            0b00_0000_1000 => Some(Look::EndLF),
-            0b00_0001_0000 => Some(Look::StartCRLF),
-            0b00_0010_0000 => Some(Look::EndCRLF),
-            0b00_0100_0000 => Some(Look::WordAscii),
-            0b00_1000_0000 => Some(Look::WordAsciiNegate),
-            0b01_0000_0000 => Some(Look::WordUnicode),
-            0b10_0000_0000 => Some(Look::WordUnicodeNegate),
+            0b00_0000_0000_0000_0001 => Some(Look::Start),
+            0b00_0000_0000_0000_0010 => Some(Look::End),
+            0b00_0000_0000_0000_0100 => Some(Look::StartLF),
+            0b00_0000_0000_0000_1000 => Some(Look::EndLF),
+            0b00_0000_0000_0001_0000 => Some(Look::StartCRLF),
+            0b00_0000_0000_0010_0000 => Some(Look::EndCRLF),
+            0b00_0000_0000_0100_0000 => Some(Look::WordAscii),
+            0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate),
+            0b00_0000_0001_0000_0000 => Some(Look::WordUnicode),
+            0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate),
+            0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii),
+            0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii),
+            0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode),
+            0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode),
+            0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii),
+            0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii),
+            0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode),
+            0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode),
             _ => None,
         }
     }
@@ -171,6 +223,14 @@ impl Look {
             Look::WordAsciiNegate => 'B',
             Look::WordUnicode => '𝛃',
             Look::WordUnicodeNegate => '𝚩',
+            Look::WordStartAscii => '<',
+            Look::WordEndAscii => '>',
+            Look::WordStartUnicode => '〈',
+            Look::WordEndUnicode => '〉',
+            Look::WordStartHalfAscii => '◁',
+            Look::WordEndHalfAscii => '▷',
+            Look::WordStartHalfUnicode => '◀',
+            Look::WordEndHalfUnicode => '▶',
         }
     }
 }
@@ -184,14 +244,14 @@ impl Look {
 pub struct LookSet {
     /// The underlying representation this set is exposed to make it possible
     /// to store it somewhere efficiently. The representation is that
-    /// of a bitset, where each assertion occupies bit `i` where `i =
-    /// Look::as_repr()`.
+    /// of a bitset, where each assertion occupies bit `i` where
+    /// `i = Look::as_repr()`.
     ///
     /// Note that users of this internal representation must permit the full
     /// range of `u16` values to be represented. For example, even if the
     /// current implementation only makes use of the 10 least significant bits,
     /// it may use more bits in a future semver compatible release.
-    pub bits: u16,
+    pub bits: u32,
 }
 
 impl LookSet {
@@ -294,13 +354,22 @@ impl LookSet {
     pub fn contains_word_unicode(self) -> bool {
         self.contains(Look::WordUnicode)
             || self.contains(Look::WordUnicodeNegate)
+            || self.contains(Look::WordStartUnicode)
+            || self.contains(Look::WordEndUnicode)
+            || self.contains(Look::WordStartHalfUnicode)
+            || self.contains(Look::WordEndHalfUnicode)
     }
 
     /// Returns true if and only if this set contains any ASCII word boundary
     /// or negated ASCII word boundary assertions.
     #[inline]
     pub fn contains_word_ascii(self) -> bool {
-        self.contains(Look::WordAscii) || self.contains(Look::WordAsciiNegate)
+        self.contains(Look::WordAscii)
+            || self.contains(Look::WordAsciiNegate)
+            || self.contains(Look::WordStartAscii)
+            || self.contains(Look::WordEndAscii)
+            || self.contains(Look::WordStartHalfAscii)
+            || self.contains(Look::WordEndHalfAscii)
     }
 
     /// Returns an iterator over all of the look-around assertions in this set.
@@ -379,29 +448,31 @@ impl LookSet {
         *self = self.intersect(other);
     }
 
-    /// Return a `LookSet` from the slice given as a native endian 16-bit
+    /// Return a `LookSet` from the slice given as a native endian 32-bit
     /// integer.
     ///
     /// # Panics
     ///
-    /// This panics if `slice.len() < 2`.
+    /// This panics if `slice.len() < 4`.
     #[inline]
     pub fn read_repr(slice: &[u8]) -> LookSet {
-        let bits = u16::from_ne_bytes(slice[..2].try_into().unwrap());
+        let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap());
         LookSet { bits }
     }
 
-    /// Write a `LookSet` as a native endian 16-bit integer to the beginning
+    /// Write a `LookSet` as a native endian 32-bit integer to the beginning
     /// of the slice given.
     ///
     /// # Panics
     ///
-    /// This panics if `slice.len() < 2`.
+    /// This panics if `slice.len() < 4`.
     #[inline]
     pub fn write_repr(self, slice: &mut [u8]) {
         let raw = self.bits.to_ne_bytes();
         slice[0] = raw[0];
         slice[1] = raw[1];
+        slice[2] = raw[2];
+        slice[3] = raw[3];
     }
 
     /// Checks that all assertions in this set can be matched.
@@ -456,9 +527,9 @@ impl Iterator for LookSetIter {
             return None;
         }
         // We'll never have more than u8::MAX distinct look-around assertions,
-        // so 'repr' will always fit into a u16.
-        let repr = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
-        let look = Look::from_repr(1 << repr)?;
+        // so 'bit' will always fit into a u16.
+        let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
+        let look = Look::from_repr(1 << bit)?;
         self.set = self.set.remove(look);
         Some(look)
     }
@@ -566,6 +637,23 @@ impl LookMatcher {
     }
 
     /// Like `matches`, but forcefully inlined.
+    ///
+    /// # Panics
+    ///
+    /// This panics when testing any Unicode word boundary assertion in this
+    /// set and when the Unicode word data is not available. Specifically, this
+    /// only occurs when the `unicode-word-boundary` feature is not enabled.
+    ///
+    /// Since it's generally expected that this routine is called inside of
+    /// a matching engine, callers should check the error condition when
+    /// building the matching engine. If there is a Unicode word boundary
+    /// in the matcher and the data isn't available, then the matcher should
+    /// fail to build.
+    ///
+    /// Callers can check the error condition with [`LookSet::available`].
+    ///
+    /// This also may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
     #[cfg_attr(feature = "perf-inline", inline(always))]
     pub(crate) fn matches_inline(
         &self,
@@ -586,6 +674,26 @@ impl LookMatcher {
             Look::WordUnicodeNegate => {
                 self.is_word_unicode_negate(haystack, at).unwrap()
             }
+            Look::WordStartAscii => self.is_word_start_ascii(haystack, at),
+            Look::WordEndAscii => self.is_word_end_ascii(haystack, at),
+            Look::WordStartUnicode => {
+                self.is_word_start_unicode(haystack, at).unwrap()
+            }
+            Look::WordEndUnicode => {
+                self.is_word_end_unicode(haystack, at).unwrap()
+            }
+            Look::WordStartHalfAscii => {
+                self.is_word_start_half_ascii(haystack, at)
+            }
+            Look::WordEndHalfAscii => {
+                self.is_word_end_half_ascii(haystack, at)
+            }
+            Look::WordStartHalfUnicode => {
+                self.is_word_start_half_unicode(haystack, at).unwrap()
+            }
+            Look::WordEndHalfUnicode => {
+                self.is_word_end_half_unicode(haystack, at).unwrap()
+            }
         }
     }
 
@@ -680,6 +788,46 @@ impl LookMatcher {
                 return false;
             }
         }
+        if set.contains(Look::WordStartAscii) {
+            if !self.is_word_start_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordEndAscii) {
+            if !self.is_word_end_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordStartUnicode) {
+            if !self.is_word_start_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        if set.contains(Look::WordEndUnicode) {
+            if !self.is_word_end_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        if set.contains(Look::WordStartHalfAscii) {
+            if !self.is_word_start_half_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordEndHalfAscii) {
+            if !self.is_word_end_half_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordStartHalfUnicode) {
+            if !self.is_word_start_half_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        if set.contains(Look::WordEndHalfUnicode) {
+            if !self.is_word_end_half_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
         true
     }
 
@@ -703,7 +851,15 @@ impl LookMatcher {
             Look::WordAscii
             | Look::WordAsciiNegate
             | Look::WordUnicode
-            | Look::WordUnicodeNegate => {
+            | Look::WordUnicodeNegate
+            | Look::WordStartAscii
+            | Look::WordEndAscii
+            | Look::WordStartUnicode
+            | Look::WordEndUnicode
+            | Look::WordStartHalfAscii
+            | Look::WordEndHalfAscii
+            | Look::WordStartHalfUnicode
+            | Look::WordEndHalfUnicode => {
                 // We need to mark all ranges of bytes whose pairs result in
                 // evaluating \b differently. This isn't technically correct
                 // for Unicode word boundaries, but DFAs can't handle those
@@ -931,6 +1087,177 @@ impl LookMatcher {
             };
         Ok(word_before == word_after)
     }
+
+    /// Returns true when [`Look::WordStartAscii`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_start_ascii(&self, haystack: &[u8], at: usize) -> bool {
+        let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]);
+        let word_after =
+            at < haystack.len() && utf8::is_word_byte(haystack[at]);
+        !word_before && word_after
+    }
+
+    /// Returns true when [`Look::WordEndAscii`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_end_ascii(&self, haystack: &[u8], at: usize) -> bool {
+        let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]);
+        let word_after =
+            at < haystack.len() && utf8::is_word_byte(haystack[at]);
+        word_before && !word_after
+    }
+
+    /// Returns true when [`Look::WordStartUnicode`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_start_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        let word_before = is_word_char::rev(haystack, at)?;
+        let word_after = is_word_char::fwd(haystack, at)?;
+        Ok(!word_before && word_after)
+    }
+
+    /// Returns true when [`Look::WordEndUnicode`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_end_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        let word_before = is_word_char::rev(haystack, at)?;
+        let word_after = is_word_char::fwd(haystack, at)?;
+        Ok(word_before && !word_after)
+    }
+
+    /// Returns true when [`Look::WordStartHalfAscii`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_start_half_ascii(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> bool {
+        let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]);
+        !word_before
+    }
+
+    /// Returns true when [`Look::WordEndHalfAscii`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_end_half_ascii(&self, haystack: &[u8], at: usize) -> bool {
+        let word_after =
+            at < haystack.len() && utf8::is_word_byte(haystack[at]);
+        !word_after
+    }
+
+    /// Returns true when [`Look::WordStartHalfUnicode`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_start_half_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        // See `is_word_unicode_negate` for why we need to do this. We don't
+        // need to do it for `is_word_start_unicode` because that guarantees
+        // that the position matched falls on a valid UTF-8 boundary given
+        // that the right side must be in \w.
+        let word_before = at > 0
+            && match utf8::decode_last(&haystack[..at]) {
+                None | Some(Err(_)) => return Ok(false),
+                Some(Ok(_)) => is_word_char::rev(haystack, at)?,
+            };
+        Ok(!word_before)
+    }
+
+    /// Returns true when [`Look::WordEndHalfUnicode`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_end_half_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        // See `is_word_unicode_negate` for why we need to do this. We don't
+        // need to do it for `is_word_end_unicode` because that guarantees
+        // that the position matched falls on a valid UTF-8 boundary given
+        // that the left side must be in \w.
+        let word_after = at < haystack.len()
+            && match utf8::decode(&haystack[at..]) {
+                None | Some(Err(_)) => return Ok(false),
+                Some(Ok(_)) => is_word_char::fwd(haystack, at)?,
+            };
+        Ok(!word_after)
+    }
 }
 
 impl Default for LookMatcher {
@@ -1024,7 +1351,9 @@ impl core::fmt::Display for UnicodeWordBoundaryError {
 // There are perhaps other choices as well. Why did I stop at these 4? Because
 // I wanted to preserve my sanity. I suspect I'll wind up adding the lazy DFA
 // approach eventually, as the benefits of the DFA approach are somewhat
-// compelling. The 'boundary-words-holmes' benchmark tests this:
+// compelling. The 'boundary-words-holmes' benchmark tests this. (Note that
+// the commands below no longer work. If necessary, we should re-capitulate
+// the benchmark from whole cloth in rebar.)
 //
 //   $ regex-cli bench measure -f boundary-words-holmes -e pikevm > dfa.csv
 //
@@ -1322,8 +1651,7 @@ mod is_word_char {
     fn is_word_character(c: char) -> bool {
         use crate::util::{unicode_data::perl_word::PERL_WORD, utf8};
 
-        // MSRV(1.59): Use 'u8::try_from(c)' instead.
-        if u8::try_from(u32::from(c)).map_or(false, utf8::is_word_byte) {
+        if u8::try_from(c).map_or(false, utf8::is_word_byte) {
             return true;
         }
         PERL_WORD
@@ -1656,50 +1984,478 @@ mod tests {
     }
 
     #[test]
-    fn look_set() {
-        let mut f = LookSet::default();
-        assert!(!f.contains(Look::Start));
-        assert!(!f.contains(Look::End));
-        assert!(!f.contains(Look::StartLF));
-        assert!(!f.contains(Look::EndLF));
-        assert!(!f.contains(Look::WordUnicode));
-        assert!(!f.contains(Look::WordUnicodeNegate));
-        assert!(!f.contains(Look::WordAscii));
-        assert!(!f.contains(Look::WordAsciiNegate));
+    fn look_matches_word_start_ascii() {
+        let look = Look::WordStartAscii;
 
-        f = f.insert(Look::Start);
-        assert!(f.contains(Look::Start));
-        f = f.remove(Look::Start);
-        assert!(!f.contains(Look::Start));
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
 
-        f = f.insert(Look::End);
-        assert!(f.contains(Look::End));
-        f = f.remove(Look::End);
-        assert!(!f.contains(Look::End));
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
 
-        f = f.insert(Look::StartLF);
-        assert!(f.contains(Look::StartLF));
-        f = f.remove(Look::StartLF);
-        assert!(!f.contains(Look::StartLF));
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
 
-        f = f.insert(Look::EndLF);
-        assert!(f.contains(Look::EndLF));
-        f = f.remove(Look::EndLF);
-        assert!(!f.contains(Look::EndLF));
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
 
-        f = f.insert(Look::StartCRLF);
-        assert!(f.contains(Look::StartCRLF));
-        f = f.remove(Look::StartCRLF);
-        assert!(!f.contains(Look::StartCRLF));
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
 
-        f = f.insert(Look::EndCRLF);
-        assert!(f.contains(Look::EndCRLF));
-        f = f.remove(Look::EndCRLF);
-        assert!(!f.contains(Look::EndCRLF));
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
 
-        f = f.insert(Look::WordUnicode);
-        assert!(f.contains(Look::WordUnicode));
-        f = f.remove(Look::WordUnicode);
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_end_ascii() {
+        let look = Look::WordEndAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(testlook!(look, "b𝛃", 1));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_start_unicode() {
+        let look = Look::WordStartUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_end_unicode() {
+        let look = Look::WordEndUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_start_half_ascii() {
+        let look = Look::WordStartHalfAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(testlook!(look, "𝛃", 1));
+        assert!(testlook!(look, "𝛃", 2));
+        assert!(testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 1));
+        assert!(testlook!(look, "𝛃𐆀", 2));
+        assert!(testlook!(look, "𝛃𐆀", 3));
+        assert!(testlook!(look, "𝛃𐆀", 5));
+        assert!(testlook!(look, "𝛃𐆀", 6));
+        assert!(testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_end_half_ascii() {
+        let look = Look::WordEndHalfAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(testlook!(look, "b𝛃", 1));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(testlook!(look, "𝛃", 1));
+        assert!(testlook!(look, "𝛃", 2));
+        assert!(testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 1));
+        assert!(testlook!(look, "𝛃𐆀", 2));
+        assert!(testlook!(look, "𝛃𐆀", 3));
+        assert!(testlook!(look, "𝛃𐆀", 5));
+        assert!(testlook!(look, "𝛃𐆀", 6));
+        assert!(testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_start_half_unicode() {
+        let look = Look::WordStartHalfUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_end_half_unicode() {
+        let look = Look::WordEndHalfUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_set() {
+        let mut f = LookSet::default();
+        assert!(!f.contains(Look::Start));
+        assert!(!f.contains(Look::End));
+        assert!(!f.contains(Look::StartLF));
+        assert!(!f.contains(Look::EndLF));
+        assert!(!f.contains(Look::WordUnicode));
+        assert!(!f.contains(Look::WordUnicodeNegate));
+        assert!(!f.contains(Look::WordAscii));
+        assert!(!f.contains(Look::WordAsciiNegate));
+
+        f = f.insert(Look::Start);
+        assert!(f.contains(Look::Start));
+        f = f.remove(Look::Start);
+        assert!(!f.contains(Look::Start));
+
+        f = f.insert(Look::End);
+        assert!(f.contains(Look::End));
+        f = f.remove(Look::End);
+        assert!(!f.contains(Look::End));
+
+        f = f.insert(Look::StartLF);
+        assert!(f.contains(Look::StartLF));
+        f = f.remove(Look::StartLF);
+        assert!(!f.contains(Look::StartLF));
+
+        f = f.insert(Look::EndLF);
+        assert!(f.contains(Look::EndLF));
+        f = f.remove(Look::EndLF);
+        assert!(!f.contains(Look::EndLF));
+
+        f = f.insert(Look::StartCRLF);
+        assert!(f.contains(Look::StartCRLF));
+        f = f.remove(Look::StartCRLF);
+        assert!(!f.contains(Look::StartCRLF));
+
+        f = f.insert(Look::EndCRLF);
+        assert!(f.contains(Look::EndCRLF));
+        f = f.remove(Look::EndCRLF);
+        assert!(!f.contains(Look::EndCRLF));
+
+        f = f.insert(Look::WordUnicode);
+        assert!(f.contains(Look::WordUnicode));
+        f = f.remove(Look::WordUnicode);
         assert!(!f.contains(Look::WordUnicode));
 
         f = f.insert(Look::WordUnicodeNegate);
@@ -1716,6 +2472,46 @@ mod tests {
         assert!(f.contains(Look::WordAsciiNegate));
         f = f.remove(Look::WordAsciiNegate);
         assert!(!f.contains(Look::WordAsciiNegate));
+
+        f = f.insert(Look::WordStartAscii);
+        assert!(f.contains(Look::WordStartAscii));
+        f = f.remove(Look::WordStartAscii);
+        assert!(!f.contains(Look::WordStartAscii));
+
+        f = f.insert(Look::WordEndAscii);
+        assert!(f.contains(Look::WordEndAscii));
+        f = f.remove(Look::WordEndAscii);
+        assert!(!f.contains(Look::WordEndAscii));
+
+        f = f.insert(Look::WordStartUnicode);
+        assert!(f.contains(Look::WordStartUnicode));
+        f = f.remove(Look::WordStartUnicode);
+        assert!(!f.contains(Look::WordStartUnicode));
+
+        f = f.insert(Look::WordEndUnicode);
+        assert!(f.contains(Look::WordEndUnicode));
+        f = f.remove(Look::WordEndUnicode);
+        assert!(!f.contains(Look::WordEndUnicode));
+
+        f = f.insert(Look::WordStartHalfAscii);
+        assert!(f.contains(Look::WordStartHalfAscii));
+        f = f.remove(Look::WordStartHalfAscii);
+        assert!(!f.contains(Look::WordStartHalfAscii));
+
+        f = f.insert(Look::WordEndHalfAscii);
+        assert!(f.contains(Look::WordEndHalfAscii));
+        f = f.remove(Look::WordEndHalfAscii);
+        assert!(!f.contains(Look::WordEndHalfAscii));
+
+        f = f.insert(Look::WordStartHalfUnicode);
+        assert!(f.contains(Look::WordStartHalfUnicode));
+        f = f.remove(Look::WordStartHalfUnicode);
+        assert!(!f.contains(Look::WordStartHalfUnicode));
+
+        f = f.insert(Look::WordEndHalfUnicode);
+        assert!(f.contains(Look::WordEndHalfUnicode));
+        f = f.remove(Look::WordEndHalfUnicode);
+        assert!(!f.contains(Look::WordEndHalfUnicode));
     }
 
     #[test]
@@ -1724,7 +2520,7 @@ mod tests {
         assert_eq!(0, set.iter().count());
 
         let set = LookSet::full();
-        assert_eq!(10, set.iter().count());
+        assert_eq!(18, set.iter().count());
 
         let set =
             LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode);
@@ -1735,6 +2531,9 @@ mod tests {
 
         let set = LookSet::empty().insert(Look::WordAsciiNegate);
         assert_eq!(1, set.iter().count());
+
+        let set = LookSet::empty().insert(Look::WordEndHalfUnicode);
+        assert_eq!(1, set.iter().count());
     }
 
     #[test]
@@ -1743,6 +2542,6 @@ mod tests {
         let res = alloc::format!("{:?}", LookSet::empty());
         assert_eq!("∅", res);
         let res = alloc::format!("{:?}", LookSet::full());
-        assert_eq!("Az^$rRbB𝛃𝚩", res);
+        assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res);
     }
 }
diff --git a/regex-automata/src/util/mod.rs b/regex-automata/src/util/mod.rs
index bb739df1df..b3eef64e64 100644
--- a/regex-automata/src/util/mod.rs
+++ b/regex-automata/src/util/mod.rs
@@ -40,6 +40,7 @@ pub mod look;
 pub mod pool;
 pub mod prefilter;
 pub mod primitives;
+pub mod start;
 #[cfg(feature = "syntax")]
 pub mod syntax;
 pub mod wire;
@@ -52,6 +53,5 @@ pub(crate) mod memchr;
 pub(crate) mod search;
 #[cfg(feature = "alloc")]
 pub(crate) mod sparse_set;
-pub(crate) mod start;
 pub(crate) mod unicode_data;
 pub(crate) mod utf8;
diff --git a/regex-automata/src/util/pool.rs b/regex-automata/src/util/pool.rs
index 7f4a1c21e2..d90d4ecffa 100644
--- a/regex-automata/src/util/pool.rs
+++ b/regex-automata/src/util/pool.rs
@@ -177,6 +177,7 @@ impl<T: Send, F: Fn() -> T> Pool<T, F> {
     /// the value to go back into the pool) and then calling get again is
     /// *not* guaranteed to return the same value received in the first `get`
     /// call.
+    #[inline]
     pub fn get(&self) -> PoolGuard<'_, T, F> {
         PoolGuard(self.0.get())
     }
@@ -200,6 +201,7 @@ impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> {
     /// This circumvents the guard's `Drop` implementation. This can be useful
     /// in circumstances where the automatic `Drop` results in poorer codegen,
     /// such as calling non-inlined functions.
+    #[inline]
     pub fn put(this: PoolGuard<'_, T, F>) {
         inner::PoolGuard::put(this.0);
     }
@@ -208,12 +210,14 @@ impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> {
 impl<'a, T: Send, F: Fn() -> T> core::ops::Deref for PoolGuard<'a, T, F> {
     type Target = T;
 
+    #[inline]
     fn deref(&self) -> &T {
         self.0.value()
     }
 }
 
 impl<'a, T: Send, F: Fn() -> T> core::ops::DerefMut for PoolGuard<'a, T, F> {
+    #[inline]
     fn deref_mut(&mut self) -> &mut T {
         self.0.value_mut()
     }
@@ -268,6 +272,64 @@ mod inner {
     /// do.
     static THREAD_ID_DROPPED: usize = 2;
 
+    /// The number of stacks we use inside of the pool. These are only used for
+    /// non-owners. That is, these represent the "slow" path.
+    ///
+    /// In the original implementation of this pool, we only used a single
+    /// stack. While this might be okay for a couple threads, the prevalence of
+    /// 32, 64 and even 128 core CPUs has made it untenable. The contention
+    /// such an environment introduces when threads are doing a lot of searches
+    /// on short haystacks (a not uncommon use case) is palpable and leads to
+    /// huge slowdowns.
+    ///
+    /// This constant reflects a change from using one stack to the number of
+    /// stacks that this constant is set to. The stack for a particular thread
+    /// is simply chosen by `thread_id % MAX_POOL_STACKS`. The idea behind
+    /// this setup is that there should be a good chance that accesses to the
+    /// pool will be distributed over several stacks instead of all of them
+    /// converging to one.
+    ///
+    /// This is not a particularly smart or dynamic strategy. Fixing this to a
+    /// specific number has at least two downsides. First is that it will help,
+    /// say, an 8 core CPU more than it will a 128 core CPU. (But, crucially,
+    /// it will still help the 128 core case.) Second is that this may wind
+    /// up being a little wasteful with respect to memory usage. Namely, if a
+    /// regex is used on one thread and then moved to another thread, then it
+    /// could result in creating a new copy of the data in the pool even though
+    /// only one is actually needed.
+    ///
+    /// And that memory usage bit is why this is set to 8 and not, say, 64.
+    /// Keeping it at 8 limits, to an extent, how much unnecessary memory can
+    /// be allocated.
+    ///
+    /// In an ideal world, we'd be able to have something like this:
+    ///
+    /// * Grow the number of stacks as the number of concurrent callers
+    /// increases. I spent a little time trying this, but even just adding an
+    /// atomic addition/subtraction for each pop/push for tracking concurrent
+    /// callers led to a big perf hit. Since even more work would seemingly be
+    /// required than just an addition/subtraction, I abandoned this approach.
+    /// * The maximum amount of memory used should scale with respect to the
+    /// number of concurrent callers and *not* the total number of existing
+    /// threads. This is primarily why the `thread_local` crate isn't used, as
+    /// as some environments spin up a lot of threads. This led to multiple
+    /// reports of extremely high memory usage (often described as memory
+    /// leaks).
+    /// * Even more ideally, the pool should contract in size. That is, it
+    /// should grow with bursts and then shrink. But this is a pretty thorny
+    /// issue to tackle and it might be better to just not.
+    /// * It would be nice to explore the use of, say, a lock-free stack
+    /// instead of using a mutex to guard a `Vec` that is ultimately just
+    /// treated as a stack. The main thing preventing me from exploring this
+    /// is the ABA problem. The `crossbeam` crate has tools for dealing with
+    /// this sort of problem (via its epoch based memory reclamation strategy),
+    /// but I can't justify bringing in all of `crossbeam` as a dependency of
+    /// `regex` for this.
+    ///
+    /// See this issue for more context and discussion:
+    /// https://github.com/rust-lang/regex/issues/934
+    const MAX_POOL_STACKS: usize = 8;
+
     thread_local!(
         /// A thread local used to assign an ID to a thread.
         static THREAD_ID: usize = {
@@ -291,6 +353,17 @@ mod inner {
         };
     );
 
+    /// This puts each stack in the pool below into its own cache line. This is
+    /// an absolutely critical optimization that tends to have the most impact
+    /// in high contention workloads. Without forcing each mutex protected
+    /// into its own cache line, high contention exacerbates the performance
+    /// problem by causing "false sharing." By putting each mutex in its own
+    /// cache-line, we avoid the false sharing problem and the affects of
+    /// contention are greatly reduced.
+    #[derive(Debug)]
+    #[repr(C, align(64))]
+    struct CacheLine<T>(T);
+
     /// A thread safe pool utilizing std-only features.
     ///
     /// The main difference between this and the simplistic alloc-only pool is
@@ -299,12 +372,16 @@ mod inner {
     /// This makes the common case of running a regex within a single thread
     /// faster by avoiding mutex unlocking.
     pub(super) struct Pool<T, F> {
-        /// A stack of T values to hand out. These are used when a Pool is
-        /// accessed by a thread that didn't create it.
-        stack: Mutex<Vec<Box<T>>>,
         /// A function to create more T values when stack is empty and a caller
         /// has requested a T.
         create: F,
+        /// Multiple stacks of T values to hand out. These are used when a Pool
+        /// is accessed by a thread that didn't create it.
+        ///
+        /// Conceptually this is `Mutex<Vec<Box<T>>>`, but sharded out to make
+        /// it scale better under high contention work-loads. We index into
+        /// this sequence via `thread_id % stacks.len()`.
+        stacks: Vec<CacheLine<Mutex<Vec<Box<T>>>>>,
         /// The ID of the thread that owns this pool. The owner is the thread
         /// that makes the first call to 'get'. When the owner calls 'get', it
         /// gets 'owner_val' directly instead of returning a T from 'stack'.
@@ -354,9 +431,17 @@ mod inner {
     unsafe impl<T: Send, F: Send + Sync> Sync for Pool<T, F> {}
 
     // If T is UnwindSafe, then since we provide exclusive access to any
-    // particular value in the pool, it should therefore also be considered
-    // RefUnwindSafe. Also, since we use std::sync::Mutex, we get poisoning
-    // from it if another thread panics while the lock is held.
+    // particular value in the pool, the pool should therefore also be
+    // considered UnwindSafe.
+    //
+    // We require `F: UnwindSafe + RefUnwindSafe` because we call `F` at any
+    // point on demand, so it needs to be unwind safe on both dimensions for
+    // the entire Pool to be unwind safe.
+    impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> UnwindSafe for Pool<T, F> {}
+
+    // If T is UnwindSafe, then since we provide exclusive access to any
+    // particular value in the pool, the pool should therefore also be
+    // considered RefUnwindSafe.
     //
     // We require `F: UnwindSafe + RefUnwindSafe` because we call `F` at any
     // point on demand, so it needs to be unwind safe on both dimensions for
@@ -370,20 +455,58 @@ mod inner {
         /// Create a new pool. The given closure is used to create values in
         /// the pool when necessary.
         pub(super) fn new(create: F) -> Pool<T, F> {
-            // MSRV(1.63): Mark this function as 'const'. I've arranged the
-            // code such that it should "just work." Then mark the public
-            // 'Pool::new' method as 'const' too. (The alloc-only Pool::new
-            // is already 'const', so that should "just work" too.) The only
-            // thing we're waiting for is Mutex::new to be const.
+            // FIXME: Now that we require 1.65+, Mutex::new is available as
+            // const... So we can almost mark this function as const. But of
+            // course, we're creating a Vec of stacks below (we didn't when I
+            // originally wrote this code). It seems like the best way to work
+            // around this would be to use a `[Stack; MAX_POOL_STACKS]` instead
+            // of a `Vec<Stack>`. I refrained from making this change at time
+            // of writing (2023/10/08) because I was making a lot of other
+            // changes at the same time and wanted to do this more carefully.
+            // Namely, because of the cache line optimization, that `[Stack;
+            // MAX_POOL_STACKS]` would be quite big. It's unclear how bad (if
+            // at all) that would be.
+            //
+            // Another choice would be to lazily allocate the stacks, but...
+            // I'm not so sure about that. Seems like a fair bit of complexity?
+            //
+            // Maybe there's a simple solution I'm missing.
+            //
+            // ... OK, I tried to fix this. First, I did it by putting `stacks`
+            // in an `UnsafeCell` and using a `Once` to lazily initialize it.
+            // I benchmarked it and everything looked okay. I then made this
+            // function `const` and thought I was just about done. But the
+            // public pool type wraps its inner pool in a `Box` to keep its
+            // size down. Blech.
+            //
+            // So then I thought that I could push the box down into this
+            // type (and leave the non-std version unboxed) and use the same
+            // `UnsafeCell` technique to lazily initialize it. This has the
+            // downside of the `Once` now needing to get hit in the owner fast
+            // path, but maybe that's OK? However, I then realized that we can
+            // only lazily initialize `stacks`, `owner` and `owner_val`. The
+            // `create` function needs to be put somewhere outside of the box.
+            // So now the pool is a `Box`, `Once` and a function. Now we're
+            // starting to defeat the point of boxing in the first place. So I
+            // backed out that change too.
+            //
+            // Back to square one. I maybe we just don't make a pool's
+            // constructor const and live with it. It's probably not a huge
+            // deal.
+            let mut stacks = Vec::with_capacity(MAX_POOL_STACKS);
+            for _ in 0..stacks.capacity() {
+                stacks.push(CacheLine(Mutex::new(vec![])));
+            }
             let owner = AtomicUsize::new(THREAD_ID_UNOWNED);
             let owner_val = UnsafeCell::new(None); // init'd on first access
-            Pool { stack: Mutex::new(vec![]), create, owner, owner_val }
+            Pool { create, stacks, owner, owner_val }
         }
     }
 
     impl<T: Send, F: Fn() -> T> Pool<T, F> {
         /// Get a value from the pool. This may block if another thread is also
         /// attempting to retrieve a value from the pool.
+        #[inline]
         pub(super) fn get(&self) -> PoolGuard<'_, T, F> {
             // Our fast path checks if the caller is the thread that "owns"
             // this pool. Or stated differently, whether it is the first thread
@@ -401,6 +524,9 @@ mod inner {
             let caller = THREAD_ID.with(|id| *id);
             let owner = self.owner.load(Ordering::Acquire);
             if caller == owner {
+                // N.B. We could also do a CAS here instead of a load/store,
+                // but ad hoc benchmarking suggests it is slower. And a lot
+                // slower in the case where `get_slow` is common.
                 self.owner.store(THREAD_ID_INUSE, Ordering::Release);
                 return self.guard_owned(caller);
             }
@@ -444,37 +570,86 @@ mod inner {
                     return self.guard_owned(caller);
                 }
             }
-            let mut stack = self.stack.lock().unwrap();
-            let value = match stack.pop() {
-                None => Box::new((self.create)()),
-                Some(value) => value,
-            };
-            self.guard_stack(value)
+            let stack_id = caller % self.stacks.len();
+            // We try to acquire exclusive access to this thread's stack, and
+            // if so, grab a value from it if we can. We put this in a loop so
+            // that it's easy to tweak and experiment with a different number
+            // of tries. In the end, I couldn't see anything obviously better
+            // than one attempt in ad hoc testing.
+            for _ in 0..1 {
+                let mut stack = match self.stacks[stack_id].0.try_lock() {
+                    Err(_) => continue,
+                    Ok(stack) => stack,
+                };
+                if let Some(value) = stack.pop() {
+                    return self.guard_stack(value);
+                }
+                // Unlock the mutex guarding the stack before creating a fresh
+                // value since we no longer need the stack.
+                drop(stack);
+                let value = Box::new((self.create)());
+                return self.guard_stack(value);
+            }
+            // We're only here if we could get access to our stack, so just
+            // create a new value. This seems like it could be wasteful, but
+            // waiting for exclusive access to a stack when there's high
+            // contention is brutal for perf.
+            self.guard_stack_transient(Box::new((self.create)()))
         }
 
         /// Puts a value back into the pool. Callers don't need to call this.
         /// Once the guard that's returned by 'get' is dropped, it is put back
         /// into the pool automatically.
+        #[inline]
         fn put_value(&self, value: Box<T>) {
-            let mut stack = self.stack.lock().unwrap();
-            stack.push(value);
+            let caller = THREAD_ID.with(|id| *id);
+            let stack_id = caller % self.stacks.len();
+            // As with trying to pop a value from this thread's stack, we
+            // merely attempt to get access to push this value back on the
+            // stack. If there's too much contention, we just give up and throw
+            // the value away.
+            //
+            // Interestingly, in ad hoc benchmarking, it is beneficial to
+            // attempt to push the value back more than once, unlike when
+            // popping the value. I don't have a good theory for why this is.
+            // I guess if we drop too many values then that winds up forcing
+            // the pop operation to create new fresh values and thus leads to
+            // less reuse. There's definitely a balancing act here.
+            for _ in 0..10 {
+                let mut stack = match self.stacks[stack_id].0.try_lock() {
+                    Err(_) => continue,
+                    Ok(stack) => stack,
+                };
+                stack.push(value);
+                return;
+            }
         }
 
         /// Create a guard that represents the special owned T.
+        #[inline]
         fn guard_owned(&self, caller: usize) -> PoolGuard<'_, T, F> {
-            PoolGuard { pool: self, value: Err(caller) }
+            PoolGuard { pool: self, value: Err(caller), discard: false }
         }
 
         /// Create a guard that contains a value from the pool's stack.
+        #[inline]
         fn guard_stack(&self, value: Box<T>) -> PoolGuard<'_, T, F> {
-            PoolGuard { pool: self, value: Ok(value) }
+            PoolGuard { pool: self, value: Ok(value), discard: false }
+        }
+
+        /// Create a guard that contains a value from the pool's stack with an
+        /// instruction to throw away the value instead of putting it back
+        /// into the pool.
+        #[inline]
+        fn guard_stack_transient(&self, value: Box<T>) -> PoolGuard<'_, T, F> {
+            PoolGuard { pool: self, value: Ok(value), discard: true }
         }
     }
 
     impl<T: core::fmt::Debug, F> core::fmt::Debug for Pool<T, F> {
         fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
             f.debug_struct("Pool")
-                .field("stack", &self.stack)
+                .field("stacks", &self.stacks)
                 .field("owner", &self.owner)
                 .field("owner_val", &self.owner_val)
                 .finish()
@@ -490,10 +665,17 @@ mod inner {
         /// in the special case of `Err(THREAD_ID_DROPPED)`, it means the
         /// guard has been put back into the pool and should no longer be used.
         value: Result<Box<T>, usize>,
+        /// When true, the value should be discarded instead of being pushed
+        /// back into the pool. We tend to use this under high contention, and
+        /// this allows us to avoid inflating the size of the pool. (Because
+        /// under contention, we tend to create more values instead of waiting
+        /// for access to a stack of existing values.)
+        discard: bool,
     }
 
     impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> {
         /// Return the underlying value.
+        #[inline]
         pub(super) fn value(&self) -> &T {
             match self.value {
                 Ok(ref v) => &**v,
@@ -518,6 +700,7 @@ mod inner {
         }
 
         /// Return the underlying value as a mutable borrow.
+        #[inline]
         pub(super) fn value_mut(&mut self) -> &mut T {
             match self.value {
                 Ok(ref mut v) => &mut **v,
@@ -542,6 +725,7 @@ mod inner {
         }
 
         /// Consumes this guard and puts it back into the pool.
+        #[inline]
         pub(super) fn put(this: PoolGuard<'_, T, F>) {
             // Since this is effectively consuming the guard and putting the
             // value back into the pool, there's no reason to run its Drop
@@ -557,7 +741,17 @@ mod inner {
         #[inline(always)]
         fn put_imp(&mut self) {
             match core::mem::replace(&mut self.value, Err(THREAD_ID_DROPPED)) {
-                Ok(value) => self.pool.put_value(value),
+                Ok(value) => {
+                    // If we were told to discard this value then don't bother
+                    // trying to put it back into the pool. This occurs when
+                    // the pop operation failed to acquire a lock and we
+                    // decided to create a new value in lieu of contending for
+                    // the lock.
+                    if self.discard {
+                        return;
+                    }
+                    self.pool.put_value(value);
+                }
                 // If this guard has a value "owned" by the thread, then
                 // the Pool guarantees that this is the ONLY such guard.
                 // Therefore, in order to place it back into the pool and make
@@ -580,6 +774,7 @@ mod inner {
     }
 
     impl<'a, T: Send, F: Fn() -> T> Drop for PoolGuard<'a, T, F> {
+        #[inline]
         fn drop(&mut self) {
             self.put_imp();
         }
@@ -657,6 +852,7 @@ mod inner {
     impl<T: Send, F: Fn() -> T> Pool<T, F> {
         /// Get a value from the pool. This may block if another thread is also
         /// attempting to retrieve a value from the pool.
+        #[inline]
         pub(super) fn get(&self) -> PoolGuard<'_, T, F> {
             let mut stack = self.stack.lock();
             let value = match stack.pop() {
@@ -666,6 +862,7 @@ mod inner {
             PoolGuard { pool: self, value: Some(value) }
         }
 
+        #[inline]
         fn put(&self, guard: PoolGuard<'_, T, F>) {
             let mut guard = core::mem::ManuallyDrop::new(guard);
             if let Some(value) = guard.value.take() {
@@ -676,6 +873,7 @@ mod inner {
         /// Puts a value back into the pool. Callers don't need to call this.
         /// Once the guard that's returned by 'get' is dropped, it is put back
         /// into the pool automatically.
+        #[inline]
         fn put_value(&self, value: Box<T>) {
             let mut stack = self.stack.lock();
             stack.push(value);
@@ -698,16 +896,19 @@ mod inner {
 
     impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> {
         /// Return the underlying value.
+        #[inline]
         pub(super) fn value(&self) -> &T {
             self.value.as_deref().unwrap()
         }
 
         /// Return the underlying value as a mutable borrow.
+        #[inline]
         pub(super) fn value_mut(&mut self) -> &mut T {
             self.value.as_deref_mut().unwrap()
         }
 
         /// Consumes this guard and puts it back into the pool.
+        #[inline]
         pub(super) fn put(this: PoolGuard<'_, T, F>) {
             // Since this is effectively consuming the guard and putting the
             // value back into the pool, there's no reason to run its Drop
@@ -729,6 +930,7 @@ mod inner {
     }
 
     impl<'a, T: Send, F: Fn() -> T> Drop for PoolGuard<'a, T, F> {
+        #[inline]
         fn drop(&mut self) {
             self.put_imp();
         }
@@ -782,6 +984,7 @@ mod inner {
         /// Lock this mutex and return a guard providing exclusive access to
         /// `T`. This blocks if some other thread has already locked this
         /// mutex.
+        #[inline]
         fn lock(&self) -> MutexGuard<'_, T> {
             while self
                 .locked
@@ -814,18 +1017,21 @@ mod inner {
     impl<'a, T> core::ops::Deref for MutexGuard<'a, T> {
         type Target = T;
 
+        #[inline]
         fn deref(&self) -> &T {
             self.data
         }
     }
 
     impl<'a, T> core::ops::DerefMut for MutexGuard<'a, T> {
+        #[inline]
         fn deref_mut(&mut self) -> &mut T {
             self.data
         }
     }
 
     impl<'a, T> Drop for MutexGuard<'a, T> {
+        #[inline]
         fn drop(&mut self) {
             // Drop means 'data' is no longer accessible, so we can unlock
             // the mutex.
diff --git a/regex-automata/src/util/prefilter/aho_corasick.rs b/regex-automata/src/util/prefilter/aho_corasick.rs
index a7474d29ab..50cce827ee 100644
--- a/regex-automata/src/util/prefilter/aho_corasick.rs
+++ b/regex-automata/src/util/prefilter/aho_corasick.rs
@@ -22,11 +22,20 @@ impl AhoCorasick {
         }
         #[cfg(feature = "perf-literal-multisubstring")]
         {
+            // We used to use `aho_corasick::MatchKind::Standard` here when
+            // `kind` was `MatchKind::All`, but this is not correct. The
+            // "standard" Aho-Corasick match semantics are to report a match
+            // immediately as soon as it is seen, but `All` isn't like that.
+            // In particular, with "standard" semantics, given the needles
+            // "abc" and "b" and the haystack "abc," it would report a match
+            // at offset 1 before a match at offset 0. This is never what we
+            // want in the context of the regex engine, regardless of whether
+            // we have leftmost-first or 'all' semantics. Namely, we always
+            // want the leftmost match.
             let ac_match_kind = match kind {
-                MatchKind::LeftmostFirst => {
+                MatchKind::LeftmostFirst | MatchKind::All => {
                     aho_corasick::MatchKind::LeftmostFirst
                 }
-                MatchKind::All => aho_corasick::MatchKind::Standard,
             };
             // This is kind of just an arbitrary number, but basically, if we
             // have a small enough set of literals, then we try to use the VERY
diff --git a/regex-automata/src/util/prefilter/mod.rs b/regex-automata/src/util/prefilter/mod.rs
index ea3eb73d8c..51fc922337 100644
--- a/regex-automata/src/util/prefilter/mod.rs
+++ b/regex-automata/src/util/prefilter/mod.rs
@@ -195,15 +195,6 @@ impl Prefilter {
     ///     Some(Span::from(6..9)),
     ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
     /// );
-    /// // Now we put 'samwise' back before 'sam', but change the match
-    /// // semantics to 'All'. In this case, there is no preference
-    /// // order semantics and the first match detected is returned.
-    /// let pre = Prefilter::new(MatchKind::All, &["samwise", "sam"])
-    ///     .expect("a prefilter");
-    /// assert_eq!(
-    ///     Some(Span::from(6..9)),
-    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
-    /// );
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
diff --git a/regex-automata/src/util/prefilter/teddy.rs b/regex-automata/src/util/prefilter/teddy.rs
index 02210a5eca..fc79f2b2f3 100644
--- a/regex-automata/src/util/prefilter/teddy.rs
+++ b/regex-automata/src/util/prefilter/teddy.rs
@@ -50,12 +50,17 @@ impl Teddy {
             // theory we could at least support leftmost-longest, as the
             // aho-corasick crate does, but regex-automata doesn't know about
             // leftmost-longest currently.
+            //
+            // And like the aho-corasick prefilter, if we're using `All`
+            // semantics, then we can still use leftmost semantics for a
+            // prefilter. (This might be a suspicious choice for the literal
+            // engine, which uses a prefilter as a regex engine directly, but
+            // that only happens when using leftmost-first semantics.)
             let (packed_match_kind, ac_match_kind) = match kind {
-                MatchKind::LeftmostFirst => (
+                MatchKind::LeftmostFirst | MatchKind::All => (
                     aho_corasick::packed::MatchKind::LeftmostFirst,
                     aho_corasick::MatchKind::LeftmostFirst,
                 ),
-                _ => return None,
             };
             let minimum_len =
                 needles.iter().map(|n| n.as_ref().len()).min().unwrap_or(0);
diff --git a/regex-automata/src/util/search.rs b/regex-automata/src/util/search.rs
index b7bf934ea9..39aec522be 100644
--- a/regex-automata/src/util/search.rs
+++ b/regex-automata/src/util/search.rs
@@ -246,7 +246,7 @@ impl<'h> Input<'h> {
     /// When a search is anchored (so that's [`Anchored::Yes`] or
     /// [`Anchored::Pattern`]), a match must begin at the start of a search.
     /// When a search is not anchored (that's [`Anchored::No`]), regex engines
-    /// will behave as if the pattern started with a `(?:s-u.)*?`. This prefix
+    /// will behave as if the pattern started with a `(?s-u:.)*?`. This prefix
     /// permits a match to appear anywhere.
     ///
     /// By default, the anchored mode is [`Anchored::No`].
diff --git a/regex-automata/src/util/start.rs b/regex-automata/src/util/start.rs
index 4e360d083a..27153780ec 100644
--- a/regex-automata/src/util/start.rs
+++ b/regex-automata/src/util/start.rs
@@ -1,17 +1,195 @@
 /*!
-Provides some helpers for dealing with start state configurations in DFAs.
-
-[`Start`] represents the possible starting configurations, while
-[`StartByteMap`] represents a way to retrieve the `Start` configuration for a
-given position in a haystack.
+Provides helpers for dealing with start state configurations in DFAs.
 */
 
 use crate::util::{
     look::LookMatcher,
-    search::Input,
+    search::{Anchored, Input},
     wire::{self, DeserializeError, SerializeError},
 };
 
+/// The configuration used to determine a DFA's start state for a search.
+///
+/// A DFA has a single starting state in the typical textbook description. That
+/// is, it corresponds to the set of all starting states for the NFA that built
+/// it, along with their espsilon closures. In this crate, however, DFAs have
+/// many possible start states due to a few factors:
+///
+/// * DFAs support the ability to run either anchored or unanchored searches.
+/// Each type of search needs its own start state. For example, an unanchored
+/// search requires starting at a state corresponding to a regex with a
+/// `(?s-u:.)*?` prefix, which will match through anything.
+/// * DFAs also optionally support starting an anchored search for any one
+/// specific pattern. Each such pattern requires its own start state.
+/// * If a look-behind assertion like `^` or `\b` is used in the regex, then
+/// the DFA will need to inspect a single byte immediately before the start of
+/// the search to choose the correct start state.
+///
+/// Indeed, this configuration precisely encapsulates all of the above factors.
+/// The [`Config::anchored`] method sets which kind of anchored search to
+/// perform while the [`Config::look_behind`] method provides a way to set
+/// the byte that occurs immediately before the start of the search.
+///
+/// Generally speaking, this type is only useful when you want to run searches
+/// without using an [`Input`]. In particular, an `Input` wants a haystack
+/// slice, but callers may not have a contiguous sequence of bytes as a
+/// haystack in all cases. This type provides a lower level of control such
+/// that callers can provide their own anchored configuration and look-behind
+/// byte explicitly.
+///
+/// # Example
+///
+/// This shows basic usage that permits running a search with a DFA without
+/// using the `Input` abstraction.
+///
+/// ```
+/// use regex_automata::{
+///     dfa::{Automaton, dense},
+///     util::start,
+///     Anchored,
+/// };
+///
+/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
+/// let haystack = "quartz";
+///
+/// let config = start::Config::new().anchored(Anchored::Yes);
+/// let mut state = dfa.start_state(&config)?;
+/// for &b in haystack.as_bytes().iter() {
+///     state = dfa.next_state(state, b);
+/// }
+/// state = dfa.next_eoi_state(state);
+/// assert!(dfa.is_match_state(state));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// This example shows how to correctly run a search that doesn't begin at
+/// the start of a haystack. Notice how we set the look-behind byte, and as
+/// a result, the `\b` assertion does not match.
+///
+/// ```
+/// use regex_automata::{
+///     dfa::{Automaton, dense},
+///     util::start,
+///     Anchored,
+/// };
+///
+/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
+/// let haystack = "quartz";
+///
+/// let config = start::Config::new()
+///     .anchored(Anchored::Yes)
+///     .look_behind(Some(b'q'));
+/// let mut state = dfa.start_state(&config)?;
+/// for &b in haystack.as_bytes().iter().skip(1) {
+///     state = dfa.next_state(state, b);
+/// }
+/// state = dfa.next_eoi_state(state);
+/// // No match!
+/// assert!(!dfa.is_match_state(state));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// If we had instead not set a look-behind byte, then the DFA would assume
+/// that it was starting at the beginning of the haystack, and thus `\b` should
+/// match. This in turn would result in erroneously reporting a match:
+///
+/// ```
+/// use regex_automata::{
+///     dfa::{Automaton, dense},
+///     util::start,
+///     Anchored,
+/// };
+///
+/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
+/// let haystack = "quartz";
+///
+/// // Whoops, forgot the look-behind byte...
+/// let config = start::Config::new().anchored(Anchored::Yes);
+/// let mut state = dfa.start_state(&config)?;
+/// for &b in haystack.as_bytes().iter().skip(1) {
+///     state = dfa.next_state(state, b);
+/// }
+/// state = dfa.next_eoi_state(state);
+/// // And now we get a match unexpectedly.
+/// assert!(dfa.is_match_state(state));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug)]
+pub struct Config {
+    look_behind: Option<u8>,
+    anchored: Anchored,
+}
+
+impl Config {
+    /// Create a new default start configuration.
+    ///
+    /// The default is an unanchored search that starts at the beginning of the
+    /// haystack.
+    pub fn new() -> Config {
+        Config { anchored: Anchored::No, look_behind: None }
+    }
+
+    /// A convenience routine for building a start configuration from an
+    /// [`Input`] for a forward search.
+    ///
+    /// This automatically sets the look-behind byte to the byte immediately
+    /// preceding the start of the search. If the start of the search is at
+    /// offset `0`, then no look-behind byte is set.
+    pub fn from_input_forward(input: &Input<'_>) -> Config {
+        let look_behind = input
+            .start()
+            .checked_sub(1)
+            .and_then(|i| input.haystack().get(i).copied());
+        Config { look_behind, anchored: input.get_anchored() }
+    }
+
+    /// A convenience routine for building a start configuration from an
+    /// [`Input`] for a reverse search.
+    ///
+    /// This automatically sets the look-behind byte to the byte immediately
+    /// following the end of the search. If the end of the search is at
+    /// offset `haystack.len()`, then no look-behind byte is set.
+    pub fn from_input_reverse(input: &Input<'_>) -> Config {
+        let look_behind = input.haystack().get(input.end()).copied();
+        Config { look_behind, anchored: input.get_anchored() }
+    }
+
+    /// Set the look-behind byte at the start of a search.
+    ///
+    /// Unless the search is intended to logically start at the beginning of a
+    /// haystack, this should _always_ be set to the byte immediately preceding
+    /// the start of the search. If no look-behind byte is set, then the start
+    /// configuration will assume it is at the beginning of the haystack. For
+    /// example, the anchor `^` will match.
+    ///
+    /// The default is that no look-behind byte is set.
+    pub fn look_behind(mut self, byte: Option<u8>) -> Config {
+        self.look_behind = byte;
+        self
+    }
+
+    /// Set the anchored mode of a search.
+    ///
+    /// The default is an unanchored search.
+    pub fn anchored(mut self, mode: Anchored) -> Config {
+        self.anchored = mode;
+        self
+    }
+
+    /// Return the look-behind byte in this configuration, if one exists.
+    pub fn get_look_behind(&self) -> Option<u8> {
+        self.look_behind
+    }
+
+    /// Return the anchored mode in this configuration.
+    pub fn get_anchored(&self) -> Anchored {
+        self.anchored
+    }
+}
+
 /// A map from every possible byte value to its corresponding starting
 /// configuration.
 ///
@@ -71,30 +249,11 @@ impl StartByteMap {
         StartByteMap { map }
     }
 
-    /// Return the forward starting configuration for the given `input`.
-    #[cfg_attr(feature = "perf-inline", inline(always))]
-    pub(crate) fn fwd(&self, input: &Input) -> Start {
-        match input
-            .start()
-            .checked_sub(1)
-            .and_then(|i| input.haystack().get(i))
-        {
-            None => Start::Text,
-            Some(&byte) => self.get(byte),
-        }
-    }
-
-    /// Return the reverse starting configuration for the given `input`.
-    #[cfg_attr(feature = "perf-inline", inline(always))]
-    pub(crate) fn rev(&self, input: &Input) -> Start {
-        match input.haystack().get(input.end()) {
-            None => Start::Text,
-            Some(&byte) => self.get(byte),
-        }
-    }
-
+    /// Return the starting configuration for the given look-behind byte.
+    ///
+    /// If no look-behind exists, callers should use `Start::Text`.
     #[cfg_attr(feature = "perf-inline", inline(always))]
-    fn get(&self, byte: u8) -> Start {
+    pub(crate) fn get(&self, byte: u8) -> Start {
         self.map[usize::from(byte)]
     }
 
@@ -253,21 +412,32 @@ mod tests {
     #[test]
     fn start_fwd_done_range() {
         let smap = StartByteMap::new(&LookMatcher::default());
-        assert_eq!(Start::Text, smap.fwd(&Input::new("").range(1..0)));
+        let input = Input::new("").range(1..0);
+        let config = Config::from_input_forward(&input);
+        let start =
+            config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
+        assert_eq!(Start::Text, start);
     }
 
     #[test]
     fn start_rev_done_range() {
         let smap = StartByteMap::new(&LookMatcher::default());
-        assert_eq!(Start::Text, smap.rev(&Input::new("").range(1..0)));
+        let input = Input::new("").range(1..0);
+        let config = Config::from_input_reverse(&input);
+        let start =
+            config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
+        assert_eq!(Start::Text, start);
     }
 
     #[test]
     fn start_fwd() {
         let f = |haystack, start, end| {
             let smap = StartByteMap::new(&LookMatcher::default());
-            let input = &Input::new(haystack).range(start..end);
-            smap.fwd(input)
+            let input = Input::new(haystack).range(start..end);
+            let config = Config::from_input_forward(&input);
+            let start =
+                config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
+            start
         };
 
         assert_eq!(Start::Text, f("", 0, 0));
@@ -287,8 +457,11 @@ mod tests {
     fn start_rev() {
         let f = |haystack, start, end| {
             let smap = StartByteMap::new(&LookMatcher::default());
-            let input = &Input::new(haystack).range(start..end);
-            smap.rev(input)
+            let input = Input::new(haystack).range(start..end);
+            let config = Config::from_input_reverse(&input);
+            let start =
+                config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
+            start
         };
 
         assert_eq!(Start::Text, f("", 0, 0));
diff --git a/regex-automata/tests/dfa/suite.rs b/regex-automata/tests/dfa/suite.rs
index f3445e02a4..8ed6dd0077 100644
--- a/regex-automata/tests/dfa/suite.rs
+++ b/regex-automata/tests/dfa/suite.rs
@@ -9,7 +9,6 @@ use {
         util::{prefilter::Prefilter, syntax},
         Anchored, Input, PatternSet,
     },
-    regex_syntax::hir,
     regex_test::{
         CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult,
         TestRunner,
@@ -285,10 +284,7 @@ fn compiler(
         // That is, Unicode word boundaries when searching non-ASCII text.
         if !test.haystack().is_ascii() {
             for hir in hirs.iter() {
-                let looks = hir.properties().look_set();
-                if looks.contains(hir::Look::WordUnicode)
-                    || looks.contains(hir::Look::WordUnicodeNegate)
-                {
+                if hir.properties().look_set().contains_word_unicode() {
                     return Ok(CompiledRegex::skip());
                 }
             }
diff --git a/regex-automata/tests/hybrid/api.rs b/regex-automata/tests/hybrid/api.rs
index e82d808e34..4b04c4f8fd 100644
--- a/regex-automata/tests/hybrid/api.rs
+++ b/regex-automata/tests/hybrid/api.rs
@@ -55,7 +55,7 @@ fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> {
     let mut cache = dfa.create_cache();
 
     let haystack = "a".repeat(101).into_bytes();
-    let err = MatchError::gave_up(25);
+    let err = MatchError::gave_up(24);
     // Notice that we make the same amount of progress in each search! That's
     // because the cache is reused and already has states to handle the first
     // N bytes.
@@ -83,7 +83,7 @@ fn too_many_cache_resets_cause_quit() -> Result<(), Box<dyn Error>> {
     // OK, if we reset the cache, then we should be able to create more states
     // and make more progress with searching for betas.
     cache.reset(&dfa);
-    let err = MatchError::gave_up(27);
+    let err = MatchError::gave_up(26);
     assert_eq!(
         Err(err),
         dfa.try_search_fwd(&mut cache, &Input::new(&haystack))
diff --git a/regex-automata/tests/lib.rs b/regex-automata/tests/lib.rs
index 1465e51eb7..67c979aa8d 100644
--- a/regex-automata/tests/lib.rs
+++ b/regex-automata/tests/lib.rs
@@ -61,6 +61,7 @@ fn suite() -> anyhow::Result<regex_test::RegexTests> {
     load!("unicode");
     load!("utf8");
     load!("word-boundary");
+    load!("word-boundary-special");
     load!("fowler/basic");
     load!("fowler/nullsubexpr");
     load!("fowler/repetition");
diff --git a/regex-cli/Cargo.toml b/regex-cli/Cargo.toml
index f9dec00242..a107c09df2 100644
--- a/regex-cli/Cargo.toml
+++ b/regex-cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "regex-cli"
-version = "0.1.0"  #:version
+version = "0.2.0"  #:version
 authors = ["The Rust Project Developers", "Andrew Gallant <jamslam@gmail.com>"]
 description = """
 A command line tool for debugging, ad hoc benchmarking and generating regular
@@ -12,6 +12,7 @@ license = "MIT OR Apache-2.0"
 categories = ["text-processing"]
 autotests = false
 edition = "2021"
+rust-version = "1.65"
 
 [[bin]]
 name = "regex-cli"
@@ -28,8 +29,8 @@ lexopt = "0.3.0"
 log = { version = "0.4.17", features = ["std"] }
 memmap2 = "0.5.10"
 regex = { version = "1.9.0", path = ".." }
-regex-automata = { version = "0.3.0", path = "../regex-automata", features = ["logging"] }
+regex-automata = { version = "0.4.0", path = "../regex-automata", features = ["logging"] }
 regex-lite = { version = "0.1.0", path = "../regex-lite" }
-regex-syntax = { version = "0.7.3", path = "../regex-syntax" }
+regex-syntax = { version = "0.8.0", path = "../regex-syntax" }
 tabwriter = { version = "1.2.1", features = ["ansi_formatting"] }
 textwrap = { version = "0.16.0", default-features = false }
diff --git a/regex-cli/README.md b/regex-cli/README.md
index 36dc50e772..376d89091a 100644
--- a/regex-cli/README.md
+++ b/regex-cli/README.md
@@ -7,11 +7,10 @@ various regex development tasks such as generating tests.
 
 ### Installation
 
-Currently `regex-cli` is not on crates.io and should be installed from this
-git repository:
+Simply use `cargo` to install from crates.io.
 
 ```
-$ cargo install --git https://github.com/rust-lang/regex regex-cli
+$ cargo install regex-cli
 ```
 
 
diff --git a/regex-cli/args/flags.rs b/regex-cli/args/flags.rs
index db8a847ef8..61732a28e7 100644
--- a/regex-cli/args/flags.rs
+++ b/regex-cli/args/flags.rs
@@ -152,3 +152,55 @@ impl std::str::FromStr for MatchKind {
         Ok(MatchKind { kind })
     }
 }
+
+/// Provides an implementation of the --captures flag, for use with Thompson
+/// NFA configuration.
+#[derive(Debug)]
+pub struct WhichCaptures {
+    pub which: regex_automata::nfa::thompson::WhichCaptures,
+}
+
+impl WhichCaptures {
+    pub const USAGE: Usage = Usage::new(
+        "--captures <which>",
+        "One of: all, implicit or none.",
+        r#"
+Selects which capture states should be included in the Thompson NFA. The
+choices are 'all' (the default), 'implicit' or 'none'.
+
+'all' means that both explicit and implicit capture states are included.
+
+'implicit' means that only implicit capture states are included. That is, the
+Thompson NFA will only be able to report the overall match offsets and not the
+match offsets of each explicit capture group.
+
+'none' means that no capture states will be included. This is useful when
+capture states aren't needed (like when building a DFA) or if they aren't
+supported (like when building a reverse NFA).
+"#,
+    );
+}
+
+impl Default for WhichCaptures {
+    fn default() -> WhichCaptures {
+        WhichCaptures {
+            which: regex_automata::nfa::thompson::WhichCaptures::All,
+        }
+    }
+}
+
+impl std::str::FromStr for WhichCaptures {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<WhichCaptures> {
+        let which = match s {
+            "all" => regex_automata::nfa::thompson::WhichCaptures::All,
+            "implicit" => {
+                regex_automata::nfa::thompson::WhichCaptures::Implicit
+            }
+            "none" => regex_automata::nfa::thompson::WhichCaptures::None,
+            unk => anyhow::bail!("unrecognized captures option '{}'", unk),
+        };
+        Ok(WhichCaptures { which })
+    }
+}
diff --git a/regex-cli/args/thompson.rs b/regex-cli/args/thompson.rs
index 6e7b4afd80..bd8388d117 100644
--- a/regex-cli/args/thompson.rs
+++ b/regex-cli/args/thompson.rs
@@ -28,7 +28,11 @@ impl Config {
     pub fn reversed(&self) -> Config {
         // Reverse DFAs require that captures are disabled. In practice, there
         // is no current use case for a reverse NFA with capture groups.
-        let thompson = self.thompson.clone().reverse(true).captures(false);
+        let thompson = self
+            .thompson
+            .clone()
+            .reverse(true)
+            .which_captures(thompson::WhichCaptures::None);
         Config { thompson }
     }
 
@@ -66,8 +70,11 @@ impl Configurable for Config {
             Arg::Long("shrink") => {
                 self.thompson = self.thompson.clone().shrink(true);
             }
-            Arg::Long("no-captures") => {
-                self.thompson = self.thompson.clone().captures(false);
+            Arg::Long("captures") => {
+                let which: flags::WhichCaptures =
+                    args::parse(p, "--captures")?;
+                self.thompson =
+                    self.thompson.clone().which_captures(which.which);
             }
             Arg::Long("line-terminator") => {
                 let byte: flags::OneByte =
@@ -129,19 +136,7 @@ spent shrinking the NFA can lead to far larger savings in the subsequent DFA
 determinization.
 "#,
             ),
-            Usage::new(
-                "--no-captures",
-                "Disable capture states.",
-                r#"
-Disables capture states. By default, NFAs include special "capture" states that
-instruct some regex engines (like the PikeVM) to record offset positions in
-ancillary state.
-
-It can be useful to disable capture states in order to reduce "clutter" in the
-automaton when debugging it. Also, at time of writing, reverse NFAs require
-that capture groups are disabled.
-"#,
-            ),
+            flags::WhichCaptures::USAGE,
             Usage::new(
                 "--line-terminator",
                 "Set the line terminator used by line anchors.",
diff --git a/regex-cli/cmd/generate/fowler.rs b/regex-cli/cmd/generate/fowler.rs
index c0ab1b361c..c287f6f527 100644
--- a/regex-cli/cmd/generate/fowler.rs
+++ b/regex-cli/cmd/generate/fowler.rs
@@ -404,7 +404,9 @@ fn count_capturing_groups_ast(ast: &regex_syntax::ast::Ast) -> usize {
         | Ast::Literal(_)
         | Ast::Dot(_)
         | Ast::Assertion(_)
-        | Ast::Class(_) => 0,
+        | Ast::ClassUnicode(_)
+        | Ast::ClassPerl(_)
+        | Ast::ClassBracketed(_) => 0,
         Ast::Repetition(ref rep) => count_capturing_groups_ast(&*rep.ast),
         Ast::Group(ref group) => {
             let this = if group.is_capturing() { 1 } else { 0 };
diff --git a/regex-lite/Cargo.toml b/regex-lite/Cargo.toml
index 1dc144b316..0ba53485b9 100644
--- a/regex-lite/Cargo.toml
+++ b/regex-lite/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "regex-lite"
-version = "0.1.0"  #:version
+version = "0.1.5"  #:version
 authors = ["The Rust Project Developers", "Andrew Gallant <jamslam@gmail.com>"]
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/rust-lang/regex/tree/master/regex-lite"
@@ -10,11 +10,11 @@ A lightweight regex engine that optimizes for binary size and compilation time.
 """
 workspace = ".."
 edition = "2021"
-rust-version = "1.60.0"
+rust-version = "1.65"
 autotests = false
 
 # Features are documented in the "Crate features" section of the crate docs:
-# https://docs.rs/regex-syntax/*/#crate-features
+# https://docs.rs/regex-lite/*/#crate-features
 #
 # (Currently there are no supported features. 'std' is technically one, but it
 # is currently required.)
diff --git a/regex-lite/README.md b/regex-lite/README.md
index 34c749b216..758fac6aea 100644
--- a/regex-lite/README.md
+++ b/regex-lite/README.md
@@ -78,7 +78,7 @@ year: 2014, month: 10, day: 14
 
 ### Minimum Rust version policy
 
-This crate's minimum supported `rustc` version is `1.60.0`.
+This crate's minimum supported `rustc` version is `1.65.0`.
 
 The policy is that the minimum Rust version required to use this crate can be
 increased in semver compatible updates.
diff --git a/regex-lite/src/hir/mod.rs b/regex-lite/src/hir/mod.rs
index f73a5420ab..6e5348a5bc 100644
--- a/regex-lite/src/hir/mod.rs
+++ b/regex-lite/src/hir/mod.rs
@@ -366,6 +366,24 @@ impl Hir {
     }
 }
 
+impl HirKind {
+    /// Returns a slice of this kind's sub-expressions, if any.
+    fn subs(&self) -> &[Hir] {
+        use core::slice::from_ref;
+
+        match *self {
+            HirKind::Empty
+            | HirKind::Char(_)
+            | HirKind::Class(_)
+            | HirKind::Look(_) => &[],
+            HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub),
+            HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub),
+            HirKind::Concat(ref subs) => subs,
+            HirKind::Alternation(ref subs) => subs,
+        }
+    }
+}
+
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub(crate) struct Class {
     pub(crate) ranges: Vec<ClassRange>,
@@ -592,6 +610,24 @@ pub(crate) enum Look {
     Word = 1 << 6,
     /// Match an ASCII-only negation of a word boundary.
     WordNegate = 1 << 7,
+    /// Match the start of an ASCII-only word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStart = 1 << 8,
+    /// Match the end of an ASCII-only word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEnd = 1 << 9,
+    /// Match the start half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalf = 1 << 10,
+    /// Match the end half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the end of the haystack or where the
+    /// following character is not a word character.
+    WordEndHalf = 1 << 11,
 }
 
 impl Look {
@@ -631,6 +667,30 @@ impl Look {
                     at < haystack.len() && utf8::is_word_byte(haystack[at]);
                 word_before == word_after
             }
+            WordStart => {
+                let word_before =
+                    at > 0 && utf8::is_word_byte(haystack[at - 1]);
+                let word_after =
+                    at < haystack.len() && utf8::is_word_byte(haystack[at]);
+                !word_before && word_after
+            }
+            WordEnd => {
+                let word_before =
+                    at > 0 && utf8::is_word_byte(haystack[at - 1]);
+                let word_after =
+                    at < haystack.len() && utf8::is_word_byte(haystack[at]);
+                word_before && !word_after
+            }
+            WordStartHalf => {
+                let word_before =
+                    at > 0 && utf8::is_word_byte(haystack[at - 1]);
+                !word_before
+            }
+            WordEndHalf => {
+                let word_after =
+                    at < haystack.len() && utf8::is_word_byte(haystack[at]);
+                !word_after
+            }
         }
     }
 }
@@ -705,3 +765,45 @@ fn prev_char(ch: char) -> Option<char> {
     // and U+E000 yields a valid scalar value.
     Some(char::from_u32(u32::from(ch).checked_sub(1)?).unwrap())
 }
+
+impl Drop for Hir {
+    fn drop(&mut self) {
+        use core::mem;
+
+        match *self.kind() {
+            HirKind::Empty
+            | HirKind::Char(_)
+            | HirKind::Class(_)
+            | HirKind::Look(_) => return,
+            HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return,
+            HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => {
+                return
+            }
+            HirKind::Concat(ref x) if x.is_empty() => return,
+            HirKind::Alternation(ref x) if x.is_empty() => return,
+            _ => {}
+        }
+
+        let mut stack = vec![mem::replace(self, Hir::empty())];
+        while let Some(mut expr) = stack.pop() {
+            match expr.kind {
+                HirKind::Empty
+                | HirKind::Char(_)
+                | HirKind::Class(_)
+                | HirKind::Look(_) => {}
+                HirKind::Capture(ref mut x) => {
+                    stack.push(mem::replace(&mut x.sub, Hir::empty()));
+                }
+                HirKind::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.sub, Hir::empty()));
+                }
+                HirKind::Concat(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+                HirKind::Alternation(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+            }
+        }
+    }
+}
diff --git a/regex-lite/src/hir/parse.rs b/regex-lite/src/hir/parse.rs
index cc3c21fe63..ca93b88387 100644
--- a/regex-lite/src/hir/parse.rs
+++ b/regex-lite/src/hir/parse.rs
@@ -111,6 +111,12 @@ const ERR_CLASS_DIFFERENCE_UNSUPPORTED: &str =
     "character class difference is not supported";
 const ERR_CLASS_SYMDIFFERENCE_UNSUPPORTED: &str =
     "character class symmetric difference is not supported";
+const ERR_SPECIAL_WORD_BOUNDARY_UNCLOSED: &str =
+    "special word boundary assertion is unclosed or has an invalid character";
+const ERR_SPECIAL_WORD_BOUNDARY_UNRECOGNIZED: &str =
+    "special word boundary assertion is unrecognized";
+const ERR_SPECIAL_WORD_OR_REP_UNEXPECTED_EOF: &str =
+    "found start of special word boundary or repetition without an end";
 
 /// A regular expression parser.
 ///
@@ -371,6 +377,24 @@ impl<'a> Parser<'a> {
 /// own routine.
 impl<'a> Parser<'a> {
     pub(super) fn parse(&self) -> Result<Hir, Error> {
+        let hir = self.parse_inner()?;
+        // While we also check nesting during parsing, that only checks the
+        // number of recursive parse calls. It does not necessarily cover
+        // all possible recursive nestings of the Hir itself. For example,
+        // repetition operators don't require recursive parse calls. So one
+        // can stack them arbitrarily without overflowing the stack in the
+        // *parser*. But then if one recurses over the resulting Hir, a stack
+        // overflow is possible. So here we check the Hir nesting level
+        // thoroughly to ensure it isn't nested too deeply.
+        //
+        // Note that we do still need the nesting limit check in the parser as
+        // well, since that will avoid overflowing the stack during parse time
+        // before the complete Hir value is constructed.
+        check_hir_nesting(&hir, self.config.nest_limit)?;
+        Ok(hir)
+    }
+
+    fn parse_inner(&self) -> Result<Hir, Error> {
         let depth = self.increment_depth()?;
         let mut alternates = vec![];
         let mut concat = vec![];
@@ -479,12 +503,86 @@ impl<'a> Parser<'a> {
             'v' => special('\x0B'),
             'A' => Ok(Hir::look(hir::Look::Start)),
             'z' => Ok(Hir::look(hir::Look::End)),
-            'b' => Ok(Hir::look(hir::Look::Word)),
+            'b' => {
+                let mut hir = Hir::look(hir::Look::Word);
+                if !self.is_done() && self.char() == '{' {
+                    if let Some(special) =
+                        self.maybe_parse_special_word_boundary()?
+                    {
+                        hir = special;
+                    }
+                }
+                Ok(hir)
+            }
             'B' => Ok(Hir::look(hir::Look::WordNegate)),
+            '<' => Ok(Hir::look(hir::Look::WordStart)),
+            '>' => Ok(Hir::look(hir::Look::WordEnd)),
             _ => Err(Error::new(ERR_ESCAPE_UNRECOGNIZED)),
         }
     }
 
+    /// Attempt to parse a specialty word boundary. That is, `\b{start}`,
+    /// `\b{end}`, `\b{start-half}` or `\b{end-half}`.
+    ///
+    /// This is similar to `maybe_parse_ascii_class` in that, in most cases,
+    /// if it fails it will just return `None` with no error. This is done
+    /// because `\b{5}` is a valid expression and we want to let that be parsed
+    /// by the existing counted repetition parsing code. (I thought about just
+    /// invoking the counted repetition code from here, but it seemed a little
+    /// ham-fisted.)
+    ///
+    /// Unlike `maybe_parse_ascii_class` though, this can return an error.
+    /// Namely, if we definitely know it isn't a counted repetition, then we
+    /// return an error specific to the specialty word boundaries.
+    ///
+    /// This assumes the parser is positioned at a `{` immediately following
+    /// a `\b`. When `None` is returned, the parser is returned to the position
+    /// at which it started: pointing at a `{`.
+    ///
+    /// The position given should correspond to the start of the `\b`.
+    fn maybe_parse_special_word_boundary(&self) -> Result<Option<Hir>, Error> {
+        assert_eq!(self.char(), '{');
+
+        let is_valid_char = |c| match c {
+            'A'..='Z' | 'a'..='z' | '-' => true,
+            _ => false,
+        };
+        let start = self.pos();
+        if !self.bump_and_bump_space() {
+            return Err(Error::new(ERR_SPECIAL_WORD_OR_REP_UNEXPECTED_EOF));
+        }
+        // This is one of the critical bits: if the first non-whitespace
+        // character isn't in [-A-Za-z] (i.e., this can't be a special word
+        // boundary), then we bail and let the counted repetition parser deal
+        // with this.
+        if !is_valid_char(self.char()) {
+            self.pos.set(start);
+            self.char.set(Some('{'));
+            return Ok(None);
+        }
+
+        // Now collect up our chars until we see a '}'.
+        let mut scratch = String::new();
+        while !self.is_done() && is_valid_char(self.char()) {
+            scratch.push(self.char());
+            self.bump_and_bump_space();
+        }
+        if self.is_done() || self.char() != '}' {
+            return Err(Error::new(ERR_SPECIAL_WORD_BOUNDARY_UNCLOSED));
+        }
+        self.bump();
+        let kind = match scratch.as_str() {
+            "start" => hir::Look::WordStart,
+            "end" => hir::Look::WordEnd,
+            "start-half" => hir::Look::WordStartHalf,
+            "end-half" => hir::Look::WordEndHalf,
+            _ => {
+                return Err(Error::new(ERR_SPECIAL_WORD_BOUNDARY_UNRECOGNIZED))
+            }
+        };
+        Ok(Some(Hir::look(kind)))
+    }
+
     /// Parse a hex representation of a Unicode codepoint. This handles both
     /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
     /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
@@ -726,7 +824,7 @@ impl<'a> Parser<'a> {
         if self.bump_if("?P<") || self.bump_if("?<") {
             let index = self.next_capture_index()?;
             let name = Some(Box::from(self.parse_capture_name()?));
-            let sub = Box::new(self.parse()?);
+            let sub = Box::new(self.parse_inner()?);
             let cap = hir::Capture { index, name, sub };
             Ok(Some(Hir::capture(cap)))
         } else if self.bump_if("?") {
@@ -746,11 +844,11 @@ impl<'a> Parser<'a> {
             } else {
                 assert_eq!(':', self.char());
                 self.bump();
-                self.parse().map(Some)
+                self.parse_inner().map(Some)
             }
         } else {
             let index = self.next_capture_index()?;
-            let sub = Box::new(self.parse()?);
+            let sub = Box::new(self.parse_inner()?);
             let cap = hir::Capture { index, name: None, sub };
             Ok(Some(Hir::capture(cap)))
         }
@@ -1183,6 +1281,38 @@ impl<'a> Parser<'a> {
     }
 }
 
+/// This checks the depth of the given `Hir` value, and if it exceeds the given
+/// limit, then an error is returned.
+fn check_hir_nesting(hir: &Hir, limit: u32) -> Result<(), Error> {
+    fn recurse(hir: &Hir, limit: u32, depth: u32) -> Result<(), Error> {
+        if depth > limit {
+            return Err(Error::new(ERR_TOO_MUCH_NESTING));
+        }
+        let Some(next_depth) = depth.checked_add(1) else {
+            return Err(Error::new(ERR_TOO_MUCH_NESTING));
+        };
+        match *hir.kind() {
+            HirKind::Empty
+            | HirKind::Char(_)
+            | HirKind::Class(_)
+            | HirKind::Look(_) => Ok(()),
+            HirKind::Repetition(hir::Repetition { ref sub, .. }) => {
+                recurse(sub, limit, next_depth)
+            }
+            HirKind::Capture(hir::Capture { ref sub, .. }) => {
+                recurse(sub, limit, next_depth)
+            }
+            HirKind::Concat(ref subs) | HirKind::Alternation(ref subs) => {
+                for sub in subs.iter() {
+                    recurse(sub, limit, next_depth)?;
+                }
+                Ok(())
+            }
+        }
+    }
+    recurse(hir, limit, 0)
+}
+
 /// Converts the given Hir to a literal char if the Hir is just a single
 /// character. Otherwise this returns an error.
 ///
@@ -1198,8 +1328,10 @@ fn into_class_item_range(hir: Hir) -> Result<char, Error> {
     }
 }
 
-fn into_class_item_ranges(hir: Hir) -> Result<Vec<hir::ClassRange>, Error> {
-    match hir.kind {
+fn into_class_item_ranges(
+    mut hir: Hir,
+) -> Result<Vec<hir::ClassRange>, Error> {
+    match core::mem::replace(&mut hir.kind, HirKind::Empty) {
         HirKind::Char(ch) => Ok(vec![hir::ClassRange { start: ch, end: ch }]),
         HirKind::Class(hir::Class { ranges }) => Ok(ranges),
         _ => Err(Error::new(ERR_CLASS_INVALID_ITEM)),
@@ -1264,12 +1396,12 @@ mod tests {
     use super::*;
 
     fn p(pattern: &str) -> Hir {
-        Parser::new(Config::default(), pattern).parse().unwrap()
+        Parser::new(Config::default(), pattern).parse_inner().unwrap()
     }
 
     fn perr(pattern: &str) -> String {
         Parser::new(Config::default(), pattern)
-            .parse()
+            .parse_inner()
             .unwrap_err()
             .to_string()
     }
@@ -1948,8 +2080,6 @@ bar
         assert_eq!(ERR_UNICODE_CLASS_UNSUPPORTED, perr(r"\pL"));
         assert_eq!(ERR_UNICODE_CLASS_UNSUPPORTED, perr(r"\p{L}"));
         assert_eq!(ERR_ESCAPE_UNRECOGNIZED, perr(r"\i"));
-        assert_eq!(ERR_ESCAPE_UNRECOGNIZED, perr(r"\<"));
-        assert_eq!(ERR_ESCAPE_UNRECOGNIZED, perr(r"\>"));
         assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"?"));
         assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"*"));
         assert_eq!(ERR_UNCOUNTED_REP_SUB_MISSING, perr(r"+"));
@@ -1983,6 +2113,11 @@ bar
         assert_eq!(ERR_CLASS_INTERSECTION_UNSUPPORTED, perr(r"[a&&b]"));
         assert_eq!(ERR_CLASS_DIFFERENCE_UNSUPPORTED, perr(r"[a--b]"));
         assert_eq!(ERR_CLASS_SYMDIFFERENCE_UNSUPPORTED, perr(r"[a~~b]"));
+        assert_eq!(ERR_SPECIAL_WORD_BOUNDARY_UNCLOSED, perr(r"\b{foo"));
+        assert_eq!(ERR_SPECIAL_WORD_BOUNDARY_UNCLOSED, perr(r"\b{foo!}"));
+        assert_eq!(ERR_SPECIAL_WORD_BOUNDARY_UNRECOGNIZED, perr(r"\b{foo}"));
+        assert_eq!(ERR_SPECIAL_WORD_OR_REP_UNEXPECTED_EOF, perr(r"\b{"));
+        assert_eq!(ERR_SPECIAL_WORD_OR_REP_UNEXPECTED_EOF, perr(r"(?x)\b{ "));
     }
 
     #[test]
diff --git a/regex-lite/src/lib.rs b/regex-lite/src/lib.rs
index d8e9016788..9b394a480b 100644
--- a/regex-lite/src/lib.rs
+++ b/regex-lite/src/lib.rs
@@ -107,7 +107,7 @@ fn main() {
 }
 ```
 
-Foruth, run it with `cargo run`:
+Fourth, run it with `cargo run`:
 
 ```text
 $ cargo run
@@ -466,12 +466,16 @@ x{n}?     exactly n x
 ### Empty matches
 
 <pre class="rust">
-^     the beginning of a haystack (or start-of-line with multi-line mode)
-$     the end of a haystack (or end-of-line with multi-line mode)
-\A    only the beginning of a haystack (even with multi-line mode enabled)
-\z    only the end of a haystack (even with multi-line mode enabled)
-\b    an ASCII word boundary (\w on one side and \W, \A, or \z on other)
-\B    not an ASCII word boundary
+^               the beginning of a haystack (or start-of-line with multi-line mode)
+$               the end of a haystack (or end-of-line with multi-line mode)
+\A              only the beginning of a haystack (even with multi-line mode enabled)
+\z              only the end of a haystack (even with multi-line mode enabled)
+\b              an ASCII word boundary (\w on one side and \W, \A, or \z on other)
+\B              not an ASCII word boundary
+\b{start}, \<   an ASCII start-of-word boundary (\W|\A on the left, \w on the right)
+\b{end}, \>     an ASCII end-of-word boundary (\w on the left, \W|\z on the right))
+\b{start-half}  half of an ASCII start-of-word boundary (\W|\A on the left)
+\b{end-half}    half of an ASCII end-of-word boundary (\W|\z on the right)
 </pre>
 
 The empty regex is valid and matches the empty string. For example, the
@@ -581,25 +585,29 @@ Note that this includes all possible escape sequences, even ones that are
 documented elsewhere.
 
 <pre class="rust">
-\*          literal *, applies to all ASCII except [0-9A-Za-z<>]
-\a          bell (\x07)
-\f          form feed (\x0C)
-\t          horizontal tab
-\n          new line
-\r          carriage return
-\v          vertical tab (\x0B)
-\A          matches at the beginning of a haystack
-\z          matches at the end of a haystack
-\b          word boundary assertion
-\B          negated word boundary assertion
-\x7F        hex character code (exactly two digits)
-\x{10FFFF}  any hex character code corresponding to a Unicode code point
-\u007F      hex character code (exactly four digits)
-\u{7F}      any hex character code corresponding to a Unicode code point
-\U0000007F  hex character code (exactly eight digits)
-\U{7F}      any hex character code corresponding to a Unicode code point
-\d, \s, \w  Perl character class
-\D, \S, \W  negated Perl character class
+\*              literal *, applies to all ASCII except [0-9A-Za-z<>]
+\a              bell (\x07)
+\f              form feed (\x0C)
+\t              horizontal tab
+\n              new line
+\r              carriage return
+\v              vertical tab (\x0B)
+\A              matches at the beginning of a haystack
+\z              matches at the end of a haystack
+\b              word boundary assertion
+\B              negated word boundary assertion
+\b{start}, \<   start-of-word boundary assertion
+\b{end}, \>     end-of-word boundary assertion
+\b{start-half}  half of a start-of-word boundary assertion
+\b{end-half}    half of a end-of-word boundary assertion
+\x7F            hex character code (exactly two digits)
+\x{10FFFF}      any hex character code corresponding to a Unicode code point
+\u007F          hex character code (exactly four digits)
+\u{7F}          any hex character code corresponding to a Unicode code point
+\U0000007F      hex character code (exactly eight digits)
+\U{7F}          any hex character code corresponding to a Unicode code point
+\d, \s, \w      Perl character class
+\D, \S, \W      negated Perl character class
 </pre>
 
 ### Perl character classes (ASCII only)
diff --git a/regex-lite/src/string.rs b/regex-lite/src/string.rs
index 91b81d008a..4e4de90683 100644
--- a/regex-lite/src/string.rs
+++ b/regex-lite/src/string.rs
@@ -1186,8 +1186,8 @@ impl Regex {
     /// To create a `CaptureLocations` value, use the
     /// [`Regex::capture_locations`] method.
     ///
-    /// This also the overall match if one was found. When a match is found,
-    /// its offsets are also always stored in `locs` at index `0`.
+    /// This also returns the overall match if one was found. When a match is
+    /// found, its offsets are also always stored in `locs` at index `0`.
     ///
     /// # Panics
     ///
@@ -2075,7 +2075,10 @@ impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> {
 ///
 /// // Asking for an invalid capture group always returns None.
 /// assert_eq!(None, locs.get(3));
+/// # // literals are too big for 32-bit usize: #1041
+/// # #[cfg(target_pointer_width = "64")]
 /// assert_eq!(None, locs.get(34973498648));
+/// # #[cfg(target_pointer_width = "64")]
 /// assert_eq!(None, locs.get(9944060567225171988));
 /// ```
 #[derive(Clone, Debug)]
diff --git a/regex-lite/tests/fuzz/mod.rs b/regex-lite/tests/fuzz/mod.rs
index 6eb37b50bb..5a721f142a 100644
--- a/regex-lite/tests/fuzz/mod.rs
+++ b/regex-lite/tests/fuzz/mod.rs
@@ -14,6 +14,23 @@ fn captures_wrong_order_min() {
     let _ = run(data);
 }
 
+// Simpler regression test from a failure found by OSS-fuzz[1]. This test,
+// when it failed, caused a stack overflow. We fixed it by adding another nest
+// check on the Hir value itself, since the Hir type can have depth added to
+// it without recursive calls in the parser (which is where the existing nest
+// check was).
+//
+// Many thanks to Addison Crump for coming up with this test case[2].
+//
+// [1]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608
+// [2]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608#c1
+#[test]
+fn many_zero_to_many_reps() {
+    let pat = format!(".{}", "*".repeat(1 << 15));
+    let Ok(re) = regex_lite::Regex::new(&pat) else { return };
+    re.is_match("");
+}
+
 // This is the fuzz target function. We duplicate it here since this is the
 // thing we use to interpret the data. It is ultimately what we want to
 // succeed.
diff --git a/regex-lite/tests/lib.rs b/regex-lite/tests/lib.rs
index 757b394411..89635f2d78 100644
--- a/regex-lite/tests/lib.rs
+++ b/regex-lite/tests/lib.rs
@@ -38,6 +38,7 @@ fn suite() -> anyhow::Result<regex_test::RegexTests> {
     load!("unicode");
     load!("utf8");
     load!("word-boundary");
+    load!("word-boundary-special");
     load!("fowler/basic");
     load!("fowler/nullsubexpr");
     load!("fowler/repetition");
diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml
index e6d7965be6..c9ce87da70 100644
--- a/regex-syntax/Cargo.toml
+++ b/regex-syntax/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "regex-syntax"
-version = "0.7.3"  #:version
+version = "0.8.2"  #:version
 authors = ["The Rust Project Developers", "Andrew Gallant <jamslam@gmail.com>"]
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax"
@@ -8,7 +8,7 @@ documentation = "https://docs.rs/regex-syntax"
 description = "A regular expression parser."
 workspace = ".."
 edition = "2021"
-rust-version = "1.60.0"
+rust-version = "1.65"
 
 # Features are documented in the "Crate features" section of the crate docs:
 # https://docs.rs/regex-syntax/*/#crate-features
diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs
index 9e4284fee8..6a77ee1343 100644
--- a/regex-syntax/src/ast/mod.rs
+++ b/regex-syntax/src/ast/mod.rs
@@ -162,6 +162,18 @@ pub enum ErrorKind {
     /// `(?i)*`. It is, however, possible to create a repetition operating on
     /// an empty sub-expression. For example, `()*` is still considered valid.
     RepetitionMissing,
+    /// The special word boundary syntax, `\b{something}`, was used, but
+    /// either EOF without `}` was seen, or an invalid character in the
+    /// braces was seen.
+    SpecialWordBoundaryUnclosed,
+    /// The special word boundary syntax, `\b{something}`, was used, but
+    /// `something` was not recognized as a valid word boundary kind.
+    SpecialWordBoundaryUnrecognized,
+    /// The syntax `\b{` was observed, but afterwards the end of the pattern
+    /// was observed without being able to tell whether it was meant to be a
+    /// bounded repetition on the `\b` or the beginning of a special word
+    /// boundary assertion.
+    SpecialWordOrRepetitionUnexpectedEof,
     /// The Unicode class is not valid. This typically occurs when a `\p` is
     /// followed by something other than a `{`.
     UnicodeClassInvalid,
@@ -260,6 +272,29 @@ impl core::fmt::Display for ErrorKind {
             RepetitionMissing => {
                 write!(f, "repetition operator missing expression")
             }
+            SpecialWordBoundaryUnclosed => {
+                write!(
+                    f,
+                    "special word boundary assertion is either \
+                     unclosed or contains an invalid character",
+                )
+            }
+            SpecialWordBoundaryUnrecognized => {
+                write!(
+                    f,
+                    "unrecognized special word boundary assertion, \
+                     valid choices are: start, end, start-half \
+                     or end-half",
+                )
+            }
+            SpecialWordOrRepetitionUnexpectedEof => {
+                write!(
+                    f,
+                    "found either the beginning of a special word \
+                     boundary or a bounded repetition on a \\b with \
+                     an opening brace, but no closing brace",
+                )
+            }
             UnicodeClassInvalid => {
                 write!(f, "invalid Unicode character class")
             }
@@ -433,29 +468,94 @@ pub struct Comment {
 #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum Ast {
     /// An empty regex that matches everything.
-    Empty(Span),
+    Empty(Box<Span>),
     /// A set of flags, e.g., `(?is)`.
-    Flags(SetFlags),
+    Flags(Box<SetFlags>),
     /// A single character literal, which includes escape sequences.
-    Literal(Literal),
+    Literal(Box<Literal>),
     /// The "any character" class.
-    Dot(Span),
+    Dot(Box<Span>),
     /// A single zero-width assertion.
-    Assertion(Assertion),
-    /// A single character class. This includes all forms of character classes
-    /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
-    Class(Class),
+    Assertion(Box<Assertion>),
+    /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
+    ClassUnicode(Box<ClassUnicode>),
+    /// A single perl character class, e.g., `\d` or `\W`.
+    ClassPerl(Box<ClassPerl>),
+    /// A single bracketed character class set, which may contain zero or more
+    /// character ranges and/or zero or more nested classes. e.g.,
+    /// `[a-zA-Z\pL]`.
+    ClassBracketed(Box<ClassBracketed>),
     /// A repetition operator applied to an arbitrary regular expression.
-    Repetition(Repetition),
+    Repetition(Box<Repetition>),
     /// A grouped regular expression.
-    Group(Group),
+    Group(Box<Group>),
     /// An alternation of regular expressions.
-    Alternation(Alternation),
+    Alternation(Box<Alternation>),
     /// A concatenation of regular expressions.
-    Concat(Concat),
+    Concat(Box<Concat>),
 }
 
 impl Ast {
+    /// Create an "empty" AST item.
+    pub fn empty(span: Span) -> Ast {
+        Ast::Empty(Box::new(span))
+    }
+
+    /// Create a "flags" AST item.
+    pub fn flags(e: SetFlags) -> Ast {
+        Ast::Flags(Box::new(e))
+    }
+
+    /// Create a "literal" AST item.
+    pub fn literal(e: Literal) -> Ast {
+        Ast::Literal(Box::new(e))
+    }
+
+    /// Create a "dot" AST item.
+    pub fn dot(span: Span) -> Ast {
+        Ast::Dot(Box::new(span))
+    }
+
+    /// Create a "assertion" AST item.
+    pub fn assertion(e: Assertion) -> Ast {
+        Ast::Assertion(Box::new(e))
+    }
+
+    /// Create a "Unicode class" AST item.
+    pub fn class_unicode(e: ClassUnicode) -> Ast {
+        Ast::ClassUnicode(Box::new(e))
+    }
+
+    /// Create a "Perl class" AST item.
+    pub fn class_perl(e: ClassPerl) -> Ast {
+        Ast::ClassPerl(Box::new(e))
+    }
+
+    /// Create a "bracketed class" AST item.
+    pub fn class_bracketed(e: ClassBracketed) -> Ast {
+        Ast::ClassBracketed(Box::new(e))
+    }
+
+    /// Create a "repetition" AST item.
+    pub fn repetition(e: Repetition) -> Ast {
+        Ast::Repetition(Box::new(e))
+    }
+
+    /// Create a "group" AST item.
+    pub fn group(e: Group) -> Ast {
+        Ast::Group(Box::new(e))
+    }
+
+    /// Create a "alternation" AST item.
+    pub fn alternation(e: Alternation) -> Ast {
+        Ast::Alternation(Box::new(e))
+    }
+
+    /// Create a "concat" AST item.
+    pub fn concat(e: Concat) -> Ast {
+        Ast::Concat(Box::new(e))
+    }
+
     /// Return the span of this abstract syntax tree.
     pub fn span(&self) -> &Span {
         match *self {
@@ -464,7 +564,9 @@ impl Ast {
             Ast::Literal(ref x) => &x.span,
             Ast::Dot(ref span) => span,
             Ast::Assertion(ref x) => &x.span,
-            Ast::Class(ref x) => x.span(),
+            Ast::ClassUnicode(ref x) => &x.span,
+            Ast::ClassPerl(ref x) => &x.span,
+            Ast::ClassBracketed(ref x) => &x.span,
             Ast::Repetition(ref x) => &x.span,
             Ast::Group(ref x) => &x.span,
             Ast::Alternation(ref x) => &x.span,
@@ -488,8 +590,10 @@ impl Ast {
             | Ast::Flags(_)
             | Ast::Literal(_)
             | Ast::Dot(_)
-            | Ast::Assertion(_) => false,
-            Ast::Class(_)
+            | Ast::Assertion(_)
+            | Ast::ClassUnicode(_)
+            | Ast::ClassPerl(_) => false,
+            Ast::ClassBracketed(_)
             | Ast::Repetition(_)
             | Ast::Group(_)
             | Ast::Alternation(_)
@@ -526,14 +630,14 @@ pub struct Alternation {
 impl Alternation {
     /// Return this alternation as an AST.
     ///
-    /// If this alternation contains zero ASTs, then Ast::Empty is
-    /// returned. If this alternation contains exactly 1 AST, then the
-    /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
+    /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
+    /// If this alternation contains exactly 1 AST, then the corresponding AST
+    /// is returned. Otherwise, `Ast::alternation` is returned.
     pub fn into_ast(mut self) -> Ast {
         match self.asts.len() {
-            0 => Ast::Empty(self.span),
+            0 => Ast::empty(self.span),
             1 => self.asts.pop().unwrap(),
-            _ => Ast::Alternation(self),
+            _ => Ast::alternation(self),
         }
     }
 }
@@ -551,14 +655,14 @@ pub struct Concat {
 impl Concat {
     /// Return this concatenation as an AST.
     ///
-    /// If this concatenation contains zero ASTs, then Ast::Empty is
-    /// returned. If this concatenation contains exactly 1 AST, then the
-    /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
+    /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
+    /// If this alternation contains exactly 1 AST, then the corresponding AST
+    /// is returned. Otherwise, `Ast::concat` is returned.
     pub fn into_ast(mut self) -> Ast {
         match self.asts.len() {
-            0 => Ast::Empty(self.span),
+            0 => Ast::empty(self.span),
             1 => self.asts.pop().unwrap(),
-            _ => Ast::Concat(self),
+            _ => Ast::concat(self),
         }
     }
 }
@@ -675,31 +779,6 @@ impl HexLiteralKind {
     }
 }
 
-/// A single character class expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum Class {
-    /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
-    Unicode(ClassUnicode),
-    /// A perl character class, e.g., `\d` or `\W`.
-    Perl(ClassPerl),
-    /// A bracketed character class set, which may contain zero or more
-    /// character ranges and/or zero or more nested classes. e.g.,
-    /// `[a-zA-Z\pL]`.
-    Bracketed(ClassBracketed),
-}
-
-impl Class {
-    /// Return the span of this character class.
-    pub fn span(&self) -> &Span {
-        match *self {
-            Class::Perl(ref x) => &x.span,
-            Class::Unicode(ref x) => &x.span,
-            Class::Bracketed(ref x) => &x.span,
-        }
-    }
-}
-
 /// A Perl character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
 #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
@@ -1249,6 +1328,18 @@ pub enum AssertionKind {
     WordBoundary,
     /// `\B`
     NotWordBoundary,
+    /// `\b{start}`
+    WordBoundaryStart,
+    /// `\b{end}`
+    WordBoundaryEnd,
+    /// `\<` (alias for `\b{start}`)
+    WordBoundaryStartAngle,
+    /// `\>` (alias for `\b{end}`)
+    WordBoundaryEndAngle,
+    /// `\b{start-half}`
+    WordBoundaryStartHalf,
+    /// `\b{end-half}`
+    WordBoundaryEndHalf,
 }
 
 /// A repetition operation applied to a regular expression.
@@ -1550,8 +1641,10 @@ impl Drop for Ast {
             | Ast::Literal(_)
             | Ast::Dot(_)
             | Ast::Assertion(_)
-            // Classes are recursive, so they get their own Drop impl.
-            | Ast::Class(_) => return,
+            | Ast::ClassUnicode(_)
+            | Ast::ClassPerl(_)
+            // Bracketed classes are recursive, they get their own Drop impl.
+            | Ast::ClassBracketed(_) => return,
             Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
             Ast::Group(ref x) if !x.ast.has_subexprs() => return,
             Ast::Alternation(ref x) if x.asts.is_empty() => return,
@@ -1560,7 +1653,7 @@ impl Drop for Ast {
         }
 
         let empty_span = || Span::splat(Position::new(0, 0, 0));
-        let empty_ast = || Ast::Empty(empty_span());
+        let empty_ast = || Ast::empty(empty_span());
         let mut stack = vec![mem::replace(self, empty_ast())];
         while let Some(mut ast) = stack.pop() {
             match ast {
@@ -1569,8 +1662,11 @@ impl Drop for Ast {
                 | Ast::Literal(_)
                 | Ast::Dot(_)
                 | Ast::Assertion(_)
-                // Classes are recursive, so they get their own Drop impl.
-                | Ast::Class(_) => {}
+                | Ast::ClassUnicode(_)
+                | Ast::ClassPerl(_)
+                // Bracketed classes are recursive, so they get their own Drop
+                // impl.
+                | Ast::ClassBracketed(_) => {}
                 Ast::Repetition(ref mut x) => {
                     stack.push(mem::replace(&mut x.ast, empty_ast()));
                 }
@@ -1663,9 +1759,9 @@ mod tests {
 
         let run = || {
             let span = || Span::splat(Position::new(0, 0, 0));
-            let mut ast = Ast::Empty(span());
+            let mut ast = Ast::empty(span());
             for i in 0..200 {
-                ast = Ast::Group(Group {
+                ast = Ast::group(Group {
                     span: span(),
                     kind: GroupKind::CaptureIndex(i),
                     ast: Box::new(ast),
@@ -1694,4 +1790,20 @@ mod tests {
             .join()
             .unwrap();
     }
+
+    // This tests that our `Ast` has a reasonable size. This isn't a hard rule
+    // and it can be increased if given a good enough reason. But this test
+    // exists because the size of `Ast` was at one point over 200 bytes on a
+    // 64-bit target. Wow.
+    #[test]
+    fn ast_size() {
+        let max = 2 * core::mem::size_of::<usize>();
+        let size = core::mem::size_of::<Ast>();
+        assert!(
+            size <= max,
+            "Ast size of {} bytes is bigger than suggested max {}",
+            size,
+            max
+        );
+    }
 }
diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs
index 9cf64e9ec7..593b14fbc3 100644
--- a/regex-syntax/src/ast/parse.rs
+++ b/regex-syntax/src/ast/parse.rs
@@ -53,11 +53,11 @@ impl Primitive {
     /// Convert this primitive into a proper AST.
     fn into_ast(self) -> Ast {
         match self {
-            Primitive::Literal(lit) => Ast::Literal(lit),
-            Primitive::Assertion(assert) => Ast::Assertion(assert),
-            Primitive::Dot(span) => Ast::Dot(span),
-            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
-            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
+            Primitive::Literal(lit) => Ast::literal(lit),
+            Primitive::Assertion(assert) => Ast::assertion(assert),
+            Primitive::Dot(span) => Ast::dot(span),
+            Primitive::Perl(cls) => Ast::class_perl(cls),
+            Primitive::Unicode(cls) => Ast::class_unicode(cls),
         }
     }
 
@@ -383,7 +383,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
 
     /// Return a reference to the pattern being parsed.
     fn pattern(&self) -> &str {
-        self.pattern.borrow()
+        self.pattern
     }
 
     /// Create a new error with the given span and error type.
@@ -691,7 +691,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                     self.parser().ignore_whitespace.set(v);
                 }
 
-                concat.asts.push(Ast::Flags(set));
+                concat.asts.push(Ast::flags(set));
                 Ok(concat)
             }
             Either::Right(group) => {
@@ -764,7 +764,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 group.ast = Box::new(group_concat.into_ast());
             }
         }
-        prior_concat.asts.push(Ast::Group(group));
+        prior_concat.asts.push(Ast::group(group));
         Ok(prior_concat)
     }
 
@@ -783,7 +783,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             Some(GroupState::Alternation(mut alt)) => {
                 alt.span.end = self.pos();
                 alt.asts.push(concat.into_ast());
-                Ok(Ast::Alternation(alt))
+                Ok(Ast::alternation(alt))
             }
             Some(GroupState::Group { group, .. }) => {
                 return Err(
@@ -850,7 +850,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
     fn pop_class(
         &self,
         nested_union: ast::ClassSetUnion,
-    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
+    ) -> Result<Either<ast::ClassSetUnion, ast::ClassBracketed>> {
         assert_eq!(self.char(), ']');
 
         let item = ast::ClassSet::Item(nested_union.into_item());
@@ -882,7 +882,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 set.span.end = self.pos();
                 set.kind = prevset;
                 if stack.is_empty() {
-                    Ok(Either::Right(ast::Class::Bracketed(set)))
+                    Ok(Either::Right(set))
                 } else {
                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
                     Ok(Either::Left(union))
@@ -976,7 +976,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 '|' => concat = self.push_alternate(concat)?,
                 '[' => {
                     let class = self.parse_set_class()?;
-                    concat.asts.push(Ast::Class(class));
+                    concat.asts.push(Ast::class_bracketed(class));
                 }
                 '?' => {
                     concat = self.parse_uncounted_repetition(
@@ -1057,7 +1057,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             greedy = false;
             self.bump();
         }
-        concat.asts.push(Ast::Repetition(ast::Repetition {
+        concat.asts.push(Ast::repetition(ast::Repetition {
             span: ast.span().with_end(self.pos()),
             op: ast::RepetitionOp {
                 span: Span::new(op_start, self.pos()),
@@ -1159,7 +1159,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
             );
         }
-        concat.asts.push(Ast::Repetition(ast::Repetition {
+        concat.asts.push(Ast::repetition(ast::Repetition {
             span: ast.span().with_end(self.pos()),
             op: ast::RepetitionOp {
                 span: op_span,
@@ -1212,7 +1212,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             Ok(Either::Right(ast::Group {
                 span: open_span,
                 kind: ast::GroupKind::CaptureName { starts_with_p, name },
-                ast: Box::new(Ast::Empty(self.span())),
+                ast: Box::new(Ast::empty(self.span())),
             }))
         } else if self.bump_if("?") {
             if self.is_eof() {
@@ -1241,7 +1241,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 Ok(Either::Right(ast::Group {
                     span: open_span,
                     kind: ast::GroupKind::NonCapturing(flags),
-                    ast: Box::new(Ast::Empty(self.span())),
+                    ast: Box::new(Ast::empty(self.span())),
                 }))
             }
         } else {
@@ -1249,7 +1249,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             Ok(Either::Right(ast::Group {
                 span: open_span,
                 kind: ast::GroupKind::CaptureIndex(capture_index),
-                ast: Box::new(Ast::Empty(self.span())),
+                ast: Box::new(Ast::empty(self.span())),
             }))
         }
     }
@@ -1528,18 +1528,115 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 span,
                 kind: ast::AssertionKind::EndText,
             })),
-            'b' => Ok(Primitive::Assertion(ast::Assertion {
-                span,
-                kind: ast::AssertionKind::WordBoundary,
-            })),
+            'b' => {
+                let mut wb = ast::Assertion {
+                    span,
+                    kind: ast::AssertionKind::WordBoundary,
+                };
+                // After a \b, we "try" to parse things like \b{start} for
+                // special word boundary assertions.
+                if !self.is_eof() && self.char() == '{' {
+                    if let Some(kind) =
+                        self.maybe_parse_special_word_boundary(start)?
+                    {
+                        wb.kind = kind;
+                        wb.span.end = self.pos();
+                    }
+                }
+                Ok(Primitive::Assertion(wb))
+            }
             'B' => Ok(Primitive::Assertion(ast::Assertion {
                 span,
                 kind: ast::AssertionKind::NotWordBoundary,
             })),
+            '<' => Ok(Primitive::Assertion(ast::Assertion {
+                span,
+                kind: ast::AssertionKind::WordBoundaryStartAngle,
+            })),
+            '>' => Ok(Primitive::Assertion(ast::Assertion {
+                span,
+                kind: ast::AssertionKind::WordBoundaryEndAngle,
+            })),
             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
         }
     }
 
+    /// Attempt to parse a specialty word boundary. That is, `\b{start}`,
+    /// `\b{end}`, `\b{start-half}` or `\b{end-half}`.
+    ///
+    /// This is similar to `maybe_parse_ascii_class` in that, in most cases,
+    /// if it fails it will just return `None` with no error. This is done
+    /// because `\b{5}` is a valid expression and we want to let that be parsed
+    /// by the existing counted repetition parsing code. (I thought about just
+    /// invoking the counted repetition code from here, but it seemed a little
+    /// ham-fisted.)
+    ///
+    /// Unlike `maybe_parse_ascii_class` though, this can return an error.
+    /// Namely, if we definitely know it isn't a counted repetition, then we
+    /// return an error specific to the specialty word boundaries.
+    ///
+    /// This assumes the parser is positioned at a `{` immediately following
+    /// a `\b`. When `None` is returned, the parser is returned to the position
+    /// at which it started: pointing at a `{`.
+    ///
+    /// The position given should correspond to the start of the `\b`.
+    fn maybe_parse_special_word_boundary(
+        &self,
+        wb_start: Position,
+    ) -> Result<Option<ast::AssertionKind>> {
+        assert_eq!(self.char(), '{');
+
+        let is_valid_char = |c| match c {
+            'A'..='Z' | 'a'..='z' | '-' => true,
+            _ => false,
+        };
+        let start = self.pos();
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                Span::new(wb_start, self.pos()),
+                ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
+            ));
+        }
+        let start_contents = self.pos();
+        // This is one of the critical bits: if the first non-whitespace
+        // character isn't in [-A-Za-z] (i.e., this can't be a special word
+        // boundary), then we bail and let the counted repetition parser deal
+        // with this.
+        if !is_valid_char(self.char()) {
+            self.parser().pos.set(start);
+            return Ok(None);
+        }
+
+        // Now collect up our chars until we see a '}'.
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+        while !self.is_eof() && is_valid_char(self.char()) {
+            scratch.push(self.char());
+            self.bump_and_bump_space();
+        }
+        if self.is_eof() || self.char() != '}' {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::SpecialWordBoundaryUnclosed,
+            ));
+        }
+        let end = self.pos();
+        self.bump();
+        let kind = match scratch.as_str() {
+            "start" => ast::AssertionKind::WordBoundaryStart,
+            "end" => ast::AssertionKind::WordBoundaryEnd,
+            "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
+            "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
+            _ => {
+                return Err(self.error(
+                    Span::new(start_contents, end),
+                    ast::ErrorKind::SpecialWordBoundaryUnrecognized,
+                ))
+            }
+        };
+        Ok(Some(kind))
+    }
+
     /// Parse an octal representation of a Unicode codepoint up to 3 digits
     /// long. This expects the parser to be positioned at the first octal
     /// digit and advances the parser to the first character immediately
@@ -1743,7 +1840,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
     /// is successful, then the parser is advanced to the position immediately
     /// following the closing `]`.
     #[inline(never)]
-    fn parse_set_class(&self) -> Result<ast::Class> {
+    fn parse_set_class(&self) -> Result<ast::ClassBracketed> {
         assert_eq!(self.char(), '[');
 
         let mut union =
@@ -1967,9 +2064,9 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
         // because parsing cannot fail with any interesting error. For example,
         // in order to use an ASCII character class, it must be enclosed in
         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
-        // of it as "ASCII character characters have the syntax `[:NAME:]`
-        // which can only appear within character brackets." This means that
-        // things like `[[:lower:]A]` are legal constructs.
+        // of it as "ASCII character classes have the syntax `[:NAME:]` which
+        // can only appear within character brackets." This means that things
+        // like `[[:lower:]A]` are legal constructs.
         //
         // However, if one types an incorrect ASCII character class, e.g.,
         // `[[:loower:]]`, then we treat that as a normal nested character
@@ -2189,12 +2286,12 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
             | Ast::Literal(_)
             | Ast::Dot(_)
             | Ast::Assertion(_)
-            | Ast::Class(ast::Class::Unicode(_))
-            | Ast::Class(ast::Class::Perl(_)) => {
+            | Ast::ClassUnicode(_)
+            | Ast::ClassPerl(_) => {
                 // These are all base cases, so we don't increment depth.
                 return Ok(());
             }
-            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
+            Ast::ClassBracketed(ref x) => &x.span,
             Ast::Repetition(ref x) => &x.span,
             Ast::Group(ref x) => &x.span,
             Ast::Alternation(ref x) => &x.span,
@@ -2210,12 +2307,12 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
             | Ast::Literal(_)
             | Ast::Dot(_)
             | Ast::Assertion(_)
-            | Ast::Class(ast::Class::Unicode(_))
-            | Ast::Class(ast::Class::Perl(_)) => {
+            | Ast::ClassUnicode(_)
+            | Ast::ClassPerl(_) => {
                 // These are all base cases, so we don't decrement depth.
                 Ok(())
             }
-            Ast::Class(ast::Class::Bracketed(_))
+            Ast::ClassBracketed(_)
             | Ast::Repetition(_)
             | Ast::Group(_)
             | Ast::Alternation(_)
@@ -2426,12 +2523,12 @@ mod tests {
 
     /// Create a meta literal starting at the given position.
     fn meta_lit(c: char, span: Span) -> Ast {
-        Ast::Literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
+        Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
     }
 
     /// Create a verbatim literal with the given span.
     fn lit_with(c: char, span: Span) -> Ast {
-        Ast::Literal(ast::Literal {
+        Ast::literal(ast::Literal {
             span,
             kind: ast::LiteralKind::Verbatim,
             c,
@@ -2445,17 +2542,17 @@ mod tests {
 
     /// Create a concatenation with the given span.
     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
-        Ast::Concat(ast::Concat { span, asts })
+        Ast::concat(ast::Concat { span, asts })
     }
 
     /// Create an alternation with the given span.
     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
-        Ast::Alternation(ast::Alternation { span: span(range), asts })
+        Ast::alternation(ast::Alternation { span: span(range), asts })
     }
 
     /// Create a capturing group with the given span.
     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
-        Ast::Group(ast::Group {
+        Ast::group(ast::Group {
             span: span(range),
             kind: ast::GroupKind::CaptureIndex(index),
             ast: Box::new(ast),
@@ -2488,7 +2585,7 @@ mod tests {
                 },
             );
         }
-        Ast::Flags(ast::SetFlags {
+        Ast::flags(ast::SetFlags {
             span: span_range(pat, range.clone()),
             flags: ast::Flags {
                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
@@ -2502,7 +2599,7 @@ mod tests {
         // A nest limit of 0 still allows some types of regexes.
         assert_eq!(
             parser_nest_limit("", 0).parse(),
-            Ok(Ast::Empty(span(0..0)))
+            Ok(Ast::empty(span(0..0)))
         );
         assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
 
@@ -2516,7 +2613,7 @@ mod tests {
         );
         assert_eq!(
             parser_nest_limit("a+", 1).parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2542,14 +2639,14 @@ mod tests {
         );
         assert_eq!(
             parser_nest_limit("a+*", 2).parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..3),
                 op: ast::RepetitionOp {
                     span: span(2..3),
                     kind: ast::RepetitionKind::ZeroOrMore,
                 },
                 greedy: true,
-                ast: Box::new(Ast::Repetition(ast::Repetition {
+                ast: Box::new(Ast::repetition(ast::Repetition {
                     span: span(0..2),
                     op: ast::RepetitionOp {
                         span: span(1..2),
@@ -2606,7 +2703,7 @@ mod tests {
         );
         assert_eq!(
             parser_nest_limit("[a]", 1).parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..3),
                 negated: false,
                 kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
@@ -2616,7 +2713,7 @@ mod tests {
                         c: 'a',
                     }
                 )),
-            })))
+            }))
         );
         assert_eq!(
             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
@@ -2776,7 +2873,7 @@ bar
                 vec![
                     lit_with('a', span_range(pat, 0..1)),
                     lit_with(' ', span_range(pat, 1..2)),
-                    Ast::Group(ast::Group {
+                    Ast::group(ast::Group {
                         span: span_range(pat, 2..9),
                         kind: ast::GroupKind::NonCapturing(ast::Flags {
                             span: span_range(pat, 4..5),
@@ -2803,7 +2900,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Group(ast::Group {
+                    Ast::group(ast::Group {
                         span: span_range(pat, 4..pat.len()),
                         kind: ast::GroupKind::CaptureName {
                             starts_with_p: true,
@@ -2825,7 +2922,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Group(ast::Group {
+                    Ast::group(ast::Group {
                         span: span_range(pat, 4..pat.len()),
                         kind: ast::GroupKind::CaptureIndex(1),
                         ast: Box::new(lit_with('a', span_range(pat, 7..8))),
@@ -2840,7 +2937,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Group(ast::Group {
+                    Ast::group(ast::Group {
                         span: span_range(pat, 4..pat.len()),
                         kind: ast::GroupKind::NonCapturing(ast::Flags {
                             span: span_range(pat, 8..8),
@@ -2858,7 +2955,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(4..13),
                         kind: ast::LiteralKind::HexBrace(
                             ast::HexLiteralKind::X
@@ -2877,7 +2974,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span_range(pat, 4..6),
                         kind: ast::LiteralKind::Superfluous,
                         c: ' ',
@@ -2895,9 +2992,9 @@ bar
             Ok(concat_with(
                 span_range(pat, 0..3),
                 vec![
-                    Ast::Dot(span_range(pat, 0..1)),
+                    Ast::dot(span_range(pat, 0..1)),
                     lit_with('\n', span_range(pat, 1..2)),
-                    Ast::Dot(span_range(pat, 2..3)),
+                    Ast::dot(span_range(pat, 2..3)),
                 ]
             ))
         );
@@ -2933,7 +3030,7 @@ bar
     fn parse_uncounted_repetition() {
         assert_eq!(
             parser(r"a*").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2945,7 +3042,7 @@ bar
         );
         assert_eq!(
             parser(r"a+").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2958,7 +3055,7 @@ bar
 
         assert_eq!(
             parser(r"a?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2970,7 +3067,7 @@ bar
         );
         assert_eq!(
             parser(r"a??").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..3),
                 op: ast::RepetitionOp {
                     span: span(1..3),
@@ -2982,7 +3079,7 @@ bar
         );
         assert_eq!(
             parser(r"a?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2997,7 +3094,7 @@ bar
             Ok(concat(
                 0..3,
                 vec![
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(0..2),
                         op: ast::RepetitionOp {
                             span: span(1..2),
@@ -3015,7 +3112,7 @@ bar
             Ok(concat(
                 0..4,
                 vec![
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(0..3),
                         op: ast::RepetitionOp {
                             span: span(1..3),
@@ -3034,7 +3131,7 @@ bar
                 0..3,
                 vec![
                     lit('a', 0),
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(1..3),
                         op: ast::RepetitionOp {
                             span: span(2..3),
@@ -3048,7 +3145,7 @@ bar
         );
         assert_eq!(
             parser(r"(ab)?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..5),
                 op: ast::RepetitionOp {
                     span: span(4..5),
@@ -3067,8 +3164,8 @@ bar
             Ok(alt(
                 0..3,
                 vec![
-                    Ast::Empty(span(0..0)),
-                    Ast::Repetition(ast::Repetition {
+                    Ast::empty(span(0..0)),
+                    Ast::repetition(ast::Repetition {
                         span: span(1..3),
                         op: ast::RepetitionOp {
                             span: span(2..3),
@@ -3157,7 +3254,7 @@ bar
     fn parse_counted_repetition() {
         assert_eq!(
             parser(r"a{5}").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..4),
                 op: ast::RepetitionOp {
                     span: span(1..4),
@@ -3171,7 +3268,7 @@ bar
         );
         assert_eq!(
             parser(r"a{5,}").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..5),
                 op: ast::RepetitionOp {
                     span: span(1..5),
@@ -3185,7 +3282,7 @@ bar
         );
         assert_eq!(
             parser(r"a{5,9}").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..6),
                 op: ast::RepetitionOp {
                     span: span(1..6),
@@ -3199,7 +3296,7 @@ bar
         );
         assert_eq!(
             parser(r"a{5}?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..5),
                 op: ast::RepetitionOp {
                     span: span(1..5),
@@ -3217,7 +3314,7 @@ bar
                 0..5,
                 vec![
                     lit('a', 0),
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(1..5),
                         op: ast::RepetitionOp {
                             span: span(2..5),
@@ -3237,7 +3334,7 @@ bar
                 0..6,
                 vec![
                     lit('a', 0),
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(1..5),
                         op: ast::RepetitionOp {
                             span: span(2..5),
@@ -3255,7 +3352,7 @@ bar
 
         assert_eq!(
             parser(r"a{ 5 }").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..6),
                 op: ast::RepetitionOp {
                     span: span(1..6),
@@ -3269,7 +3366,7 @@ bar
         );
         assert_eq!(
             parser(r"a{ 5 , 9 }").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..10),
                 op: ast::RepetitionOp {
                     span: span(1..10),
@@ -3283,7 +3380,7 @@ bar
         );
         assert_eq!(
             parser_ignore_whitespace(r"a{5,9} ?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..8),
                 op: ast::RepetitionOp {
                     span: span(1..8),
@@ -3295,6 +3392,23 @@ bar
                 ast: Box::new(lit('a', 0)),
             }))
         );
+        assert_eq!(
+            parser(r"\b{5,9}").parse(),
+            Ok(Ast::repetition(ast::Repetition {
+                span: span(0..7),
+                op: ast::RepetitionOp {
+                    span: span(2..7),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)
+                    ),
+                },
+                greedy: true,
+                ast: Box::new(Ast::assertion(ast::Assertion {
+                    span: span(0..2),
+                    kind: ast::AssertionKind::WordBoundary,
+                })),
+            }))
+        );
 
         assert_eq!(
             parser(r"(?i){0}").parse().unwrap_err(),
@@ -3414,7 +3528,7 @@ bar
     fn parse_alternate() {
         assert_eq!(
             parser(r"a|b").parse(),
-            Ok(Ast::Alternation(ast::Alternation {
+            Ok(Ast::alternation(ast::Alternation {
                 span: span(0..3),
                 asts: vec![lit('a', 0), lit('b', 2)],
             }))
@@ -3424,7 +3538,7 @@ bar
             Ok(group(
                 0..5,
                 1,
-                Ast::Alternation(ast::Alternation {
+                Ast::alternation(ast::Alternation {
                     span: span(1..4),
                     asts: vec![lit('a', 1), lit('b', 3)],
                 })
@@ -3433,14 +3547,14 @@ bar
 
         assert_eq!(
             parser(r"a|b|c").parse(),
-            Ok(Ast::Alternation(ast::Alternation {
+            Ok(Ast::alternation(ast::Alternation {
                 span: span(0..5),
                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
             }))
         );
         assert_eq!(
             parser(r"ax|by|cz").parse(),
-            Ok(Ast::Alternation(ast::Alternation {
+            Ok(Ast::alternation(ast::Alternation {
                 span: span(0..8),
                 asts: vec![
                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
@@ -3454,7 +3568,7 @@ bar
             Ok(group(
                 0..10,
                 1,
-                Ast::Alternation(ast::Alternation {
+                Ast::alternation(ast::Alternation {
                     span: span(1..9),
                     asts: vec![
                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
@@ -3503,7 +3617,7 @@ bar
             parser(r"|").parse(),
             Ok(alt(
                 0..1,
-                vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
+                vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),]
             ))
         );
         assert_eq!(
@@ -3511,19 +3625,19 @@ bar
             Ok(alt(
                 0..2,
                 vec![
-                    Ast::Empty(span(0..0)),
-                    Ast::Empty(span(1..1)),
-                    Ast::Empty(span(2..2)),
+                    Ast::empty(span(0..0)),
+                    Ast::empty(span(1..1)),
+                    Ast::empty(span(2..2)),
                 ]
             ))
         );
         assert_eq!(
             parser(r"a|").parse(),
-            Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
+            Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),]))
         );
         assert_eq!(
             parser(r"|a").parse(),
-            Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
+            Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),]))
         );
 
         assert_eq!(
@@ -3533,7 +3647,7 @@ bar
                 1,
                 alt(
                     1..2,
-                    vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
+                    vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),]
                 )
             ))
         );
@@ -3542,7 +3656,7 @@ bar
             Ok(group(
                 0..4,
                 1,
-                alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
+                alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),])
             ))
         );
         assert_eq!(
@@ -3550,7 +3664,7 @@ bar
             Ok(group(
                 0..4,
                 1,
-                alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
+                alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),])
             ))
         );
 
@@ -3606,7 +3720,7 @@ bar
     fn parse_group() {
         assert_eq!(
             parser("(?i)").parse(),
-            Ok(Ast::Flags(ast::SetFlags {
+            Ok(Ast::flags(ast::SetFlags {
                 span: span(0..4),
                 flags: ast::Flags {
                     span: span(2..3),
@@ -3621,7 +3735,7 @@ bar
         );
         assert_eq!(
             parser("(?iU)").parse(),
-            Ok(Ast::Flags(ast::SetFlags {
+            Ok(Ast::flags(ast::SetFlags {
                 span: span(0..5),
                 flags: ast::Flags {
                     span: span(2..4),
@@ -3644,7 +3758,7 @@ bar
         );
         assert_eq!(
             parser("(?i-U)").parse(),
-            Ok(Ast::Flags(ast::SetFlags {
+            Ok(Ast::flags(ast::SetFlags {
                 span: span(0..6),
                 flags: ast::Flags {
                     span: span(2..5),
@@ -3672,15 +3786,15 @@ bar
 
         assert_eq!(
             parser("()").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..2),
                 kind: ast::GroupKind::CaptureIndex(1),
-                ast: Box::new(Ast::Empty(span(1..1))),
+                ast: Box::new(Ast::empty(span(1..1))),
             }))
         );
         assert_eq!(
             parser("(a)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..3),
                 kind: ast::GroupKind::CaptureIndex(1),
                 ast: Box::new(lit('a', 1)),
@@ -3688,20 +3802,20 @@ bar
         );
         assert_eq!(
             parser("(())").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..4),
                 kind: ast::GroupKind::CaptureIndex(1),
-                ast: Box::new(Ast::Group(ast::Group {
+                ast: Box::new(Ast::group(ast::Group {
                     span: span(1..3),
                     kind: ast::GroupKind::CaptureIndex(2),
-                    ast: Box::new(Ast::Empty(span(2..2))),
+                    ast: Box::new(Ast::empty(span(2..2))),
                 })),
             }))
         );
 
         assert_eq!(
             parser("(?:a)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..5),
                 kind: ast::GroupKind::NonCapturing(ast::Flags {
                     span: span(2..2),
@@ -3713,7 +3827,7 @@ bar
 
         assert_eq!(
             parser("(?i:a)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..6),
                 kind: ast::GroupKind::NonCapturing(ast::Flags {
                     span: span(2..3),
@@ -3729,7 +3843,7 @@ bar
         );
         assert_eq!(
             parser("(?i-U:a)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..8),
                 kind: ast::GroupKind::NonCapturing(ast::Flags {
                     span: span(2..5),
@@ -3818,7 +3932,7 @@ bar
     fn parse_capture_name() {
         assert_eq!(
             parser("(?<a>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..7),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: false,
@@ -3833,7 +3947,7 @@ bar
         );
         assert_eq!(
             parser("(?P<a>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..8),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3848,7 +3962,7 @@ bar
         );
         assert_eq!(
             parser("(?P<abc>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..10),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3864,7 +3978,7 @@ bar
 
         assert_eq!(
             parser("(?P<a_1>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..10),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3880,7 +3994,7 @@ bar
 
         assert_eq!(
             parser("(?P<a.1>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..10),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3896,7 +4010,7 @@ bar
 
         assert_eq!(
             parser("(?P<a[1]>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..11),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3912,7 +4026,7 @@ bar
 
         assert_eq!(
             parser("(?P<a¾>)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: Span::new(
                     Position::new(0, 1, 1),
                     Position::new(9, 1, 9),
@@ -3928,7 +4042,7 @@ bar
                         index: 1,
                     }
                 },
-                ast: Box::new(Ast::Empty(Span::new(
+                ast: Box::new(Ast::empty(Span::new(
                     Position::new(8, 1, 8),
                     Position::new(8, 1, 8),
                 ))),
@@ -3936,7 +4050,7 @@ bar
         );
         assert_eq!(
             parser("(?P<名字>)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: Span::new(
                     Position::new(0, 1, 1),
                     Position::new(12, 1, 9),
@@ -3952,7 +4066,7 @@ bar
                         index: 1,
                     }
                 },
-                ast: Box::new(Ast::Empty(Span::new(
+                ast: Box::new(Ast::empty(Span::new(
                     Position::new(11, 1, 8),
                     Position::new(11, 1, 8),
                 ))),
@@ -4381,6 +4495,48 @@ bar
                 kind: ast::AssertionKind::WordBoundary,
             }))
         );
+        assert_eq!(
+            parser(r"\b{start}").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..9),
+                kind: ast::AssertionKind::WordBoundaryStart,
+            }))
+        );
+        assert_eq!(
+            parser(r"\b{end}").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..7),
+                kind: ast::AssertionKind::WordBoundaryEnd,
+            }))
+        );
+        assert_eq!(
+            parser(r"\b{start-half}").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..14),
+                kind: ast::AssertionKind::WordBoundaryStartHalf,
+            }))
+        );
+        assert_eq!(
+            parser(r"\b{end-half}").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..12),
+                kind: ast::AssertionKind::WordBoundaryEndHalf,
+            }))
+        );
+        assert_eq!(
+            parser(r"\<").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::WordBoundaryStartAngle,
+            }))
+        );
+        assert_eq!(
+            parser(r"\>").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::WordBoundaryEndAngle,
+            }))
+        );
         assert_eq!(
             parser(r"\B").parse_primitive(),
             Ok(Primitive::Assertion(ast::Assertion {
@@ -4418,20 +4574,60 @@ bar
                 kind: ast::ErrorKind::EscapeUnrecognized,
             }
         );
-        // But also, < and > are banned, so that we may evolve them into
-        // start/end word boundary assertions. (Not sure if we will...)
+
+        // Starting a special word boundary without any non-whitespace chars
+        // after the brace makes it ambiguous whether the user meant to write
+        // a counted repetition (probably not?) or an actual special word
+        // boundary assertion.
         assert_eq!(
-            parser(r"\<").parse_escape().unwrap_err(),
+            parser(r"\b{").parse_escape().unwrap_err(),
             TestError {
-                span: span(0..2),
-                kind: ast::ErrorKind::EscapeUnrecognized,
+                span: span(0..3),
+                kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
             }
         );
         assert_eq!(
-            parser(r"\>").parse_escape().unwrap_err(),
+            parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(),
             TestError {
-                span: span(0..2),
-                kind: ast::ErrorKind::EscapeUnrecognized,
+                span: span(0..4),
+                kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
+            }
+        );
+        // When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char,
+        // and thus causes the parser to treat it as a counted repetition.
+        assert_eq!(
+            parser(r"\b{ ").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+            }
+        );
+        // In this case, we got some valid chars that makes it look like the
+        // user is writing one of the special word boundary assertions, but
+        // we forget to close the brace.
+        assert_eq!(
+            parser(r"\b{foo").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..6),
+                kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
+            }
+        );
+        // We get the same error as above, except it is provoked by seeing a
+        // char that we know is invalid before seeing a closing brace.
+        assert_eq!(
+            parser(r"\b{foo!}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..6),
+                kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
+            }
+        );
+        // And this one occurs when, syntactically, everything looks okay, but
+        // we don't use a valid spelling of a word boundary assertion.
+        assert_eq!(
+            parser(r"\b{foo}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..6),
+                kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized,
             }
         );
 
@@ -4494,15 +4690,15 @@ bar
         );
         assert_eq!(
             parser_octal(r"\778").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..4),
                 asts: vec![
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(0..3),
                         kind: ast::LiteralKind::Octal,
                         c: '?',
                     }),
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(3..4),
                         kind: ast::LiteralKind::Verbatim,
                         c: '8',
@@ -4512,15 +4708,15 @@ bar
         );
         assert_eq!(
             parser_octal(r"\7777").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..5),
                 asts: vec![
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(0..4),
                         kind: ast::LiteralKind::Octal,
                         c: '\u{01FF}',
                     }),
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(4..5),
                         kind: ast::LiteralKind::Verbatim,
                         c: '7',
@@ -4965,15 +5161,15 @@ bar
 
         assert_eq!(
             parser("[[:alnum:]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..11),
                 negated: false,
                 kind: itemset(item_ascii(alnum(span(1..10), false))),
-            })))
+            }))
         );
         assert_eq!(
             parser("[[[:alnum:]]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..13),
                 negated: false,
                 kind: itemset(item_bracket(ast::ClassBracketed {
@@ -4981,11 +5177,11 @@ bar
                     negated: false,
                     kind: itemset(item_ascii(alnum(span(2..11), false))),
                 })),
-            })))
+            }))
         );
         assert_eq!(
             parser("[[:alnum:]&&[:lower:]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..22),
                 negated: false,
                 kind: intersection(
@@ -4993,11 +5189,11 @@ bar
                     itemset(item_ascii(alnum(span(1..10), false))),
                     itemset(item_ascii(lower(span(12..21), false))),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[[:alnum:]--[:lower:]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..22),
                 negated: false,
                 kind: difference(
@@ -5005,11 +5201,11 @@ bar
                     itemset(item_ascii(alnum(span(1..10), false))),
                     itemset(item_ascii(lower(span(12..21), false))),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[[:alnum:]~~[:lower:]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..22),
                 negated: false,
                 kind: symdifference(
@@ -5017,20 +5213,20 @@ bar
                     itemset(item_ascii(alnum(span(1..10), false))),
                     itemset(item_ascii(lower(span(12..21), false))),
                 ),
-            })))
+            }))
         );
 
         assert_eq!(
             parser("[a]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..3),
                 negated: false,
                 kind: itemset(lit(span(1..2), 'a')),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a\]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..5),
                 negated: false,
                 kind: union(
@@ -5044,11 +5240,11 @@ bar
                         }),
                     ]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a\-z]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..6),
                 negated: false,
                 kind: union(
@@ -5063,44 +5259,44 @@ bar
                         lit(span(4..5), 'z'),
                     ]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[ab]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..4),
                 negated: false,
                 kind: union(
                     span(1..3),
                     vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[a-]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..4),
                 negated: false,
                 kind: union(
                     span(1..3),
                     vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[-a]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..4),
                 negated: false,
                 kind: union(
                     span(1..3),
                     vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\pL]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..5),
                 negated: false,
                 kind: itemset(item_unicode(ast::ClassUnicode {
@@ -5108,11 +5304,11 @@ bar
                     negated: false,
                     kind: ast::ClassUnicodeKind::OneLetter('L'),
                 })),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\w]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..4),
                 negated: false,
                 kind: itemset(item_perl(ast::ClassPerl {
@@ -5120,11 +5316,11 @@ bar
                     kind: ast::ClassPerlKind::Word,
                     negated: false,
                 })),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a\wz]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..6),
                 negated: false,
                 kind: union(
@@ -5139,20 +5335,20 @@ bar
                         lit(span(4..5), 'z'),
                     ]
                 ),
-            })))
+            }))
         );
 
         assert_eq!(
             parser("[a-z]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..5),
                 negated: false,
                 kind: itemset(range(span(1..4), 'a', 'z')),
-            })))
+            }))
         );
         assert_eq!(
             parser("[a-cx-z]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..8),
                 negated: false,
                 kind: union(
@@ -5162,11 +5358,11 @@ bar
                         range(span(4..7), 'x', 'z'),
                     ]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\w&&a-cx-z]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..12),
                 negated: false,
                 kind: intersection(
@@ -5184,11 +5380,11 @@ bar
                         ]
                     ),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a-cx-z&&\w]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..12),
                 negated: false,
                 kind: intersection(
@@ -5206,11 +5402,11 @@ bar
                         negated: false,
                     })),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a--b--c]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..9),
                 negated: false,
                 kind: difference(
@@ -5222,11 +5418,11 @@ bar
                     ),
                     itemset(lit(span(7..8), 'c')),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a~~b~~c]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..9),
                 negated: false,
                 kind: symdifference(
@@ -5238,11 +5434,11 @@ bar
                     ),
                     itemset(lit(span(7..8), 'c')),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\^&&^]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..7),
                 negated: false,
                 kind: intersection(
@@ -5254,11 +5450,11 @@ bar
                     })),
                     itemset(lit(span(5..6), '^')),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\&&&&]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..7),
                 negated: false,
                 kind: intersection(
@@ -5270,11 +5466,11 @@ bar
                     })),
                     itemset(lit(span(5..6), '&')),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[&&&&]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..6),
                 negated: false,
                 kind: intersection(
@@ -5286,13 +5482,13 @@ bar
                     ),
                     itemset(empty(span(5..5))),
                 ),
-            })))
+            }))
         );
 
         let pat = "[☃-⛄]";
         assert_eq!(
             parser(pat).parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span_range(pat, 0..9),
                 negated: false,
                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
@@ -5308,20 +5504,20 @@ bar
                         c: '⛄',
                     },
                 })),
-            })))
+            }))
         );
 
         assert_eq!(
             parser(r"[]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..3),
                 negated: false,
                 kind: itemset(lit(span(1..2), ']')),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[]\[]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..5),
                 negated: false,
                 kind: union(
@@ -5335,14 +5531,14 @@ bar
                         }),
                     ]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\[]]").parse(),
             Ok(concat(
                 0..5,
                 vec![
-                    Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                    Ast::class_bracketed(ast::ClassBracketed {
                         span: span(0..4),
                         negated: false,
                         kind: itemset(ast::ClassSetItem::Literal(
@@ -5352,8 +5548,8 @@ bar
                                 c: '[',
                             }
                         )),
-                    })),
-                    Ast::Literal(ast::Literal {
+                    }),
+                    Ast::literal(ast::Literal {
                         span: span(4..5),
                         kind: ast::LiteralKind::Verbatim,
                         c: ']',
@@ -5914,15 +6110,15 @@ bar
 
         assert_eq!(
             parser(r"\pNz").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..4),
                 asts: vec![
-                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                    Ast::class_unicode(ast::ClassUnicode {
                         span: span(0..3),
                         negated: false,
                         kind: ast::ClassUnicodeKind::OneLetter('N'),
-                    })),
-                    Ast::Literal(ast::Literal {
+                    }),
+                    Ast::literal(ast::Literal {
                         span: span(3..4),
                         kind: ast::LiteralKind::Verbatim,
                         c: 'z',
@@ -5932,15 +6128,15 @@ bar
         );
         assert_eq!(
             parser(r"\p{Greek}z").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..10),
                 asts: vec![
-                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                    Ast::class_unicode(ast::ClassUnicode {
                         span: span(0..9),
                         negated: false,
                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
-                    })),
-                    Ast::Literal(ast::Literal {
+                    }),
+                    Ast::literal(ast::Literal {
                         span: span(9..10),
                         kind: ast::LiteralKind::Verbatim,
                         c: 'z',
@@ -6017,23 +6213,23 @@ bar
 
         assert_eq!(
             parser(r"\d").parse(),
-            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
+            Ok(Ast::class_perl(ast::ClassPerl {
                 span: span(0..2),
                 kind: ast::ClassPerlKind::Digit,
                 negated: false,
-            })))
+            }))
         );
         assert_eq!(
             parser(r"\dz").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..3),
                 asts: vec![
-                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
+                    Ast::class_perl(ast::ClassPerl {
                         span: span(0..2),
                         kind: ast::ClassPerlKind::Digit,
                         negated: false,
-                    })),
-                    Ast::Literal(ast::Literal {
+                    }),
+                    Ast::literal(ast::Literal {
                         span: span(2..3),
                         kind: ast::LiteralKind::Verbatim,
                         c: 'z',
diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs
index 86a87e1439..1ceb3c7faa 100644
--- a/regex-syntax/src/ast/print.rs
+++ b/regex-syntax/src/ast/print.rs
@@ -80,27 +80,21 @@ impl<W: fmt::Write> Visitor for Writer<W> {
     fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
         match *ast {
             Ast::Group(ref x) => self.fmt_group_pre(x),
-            Ast::Class(ast::Class::Bracketed(ref x)) => {
-                self.fmt_class_bracketed_pre(x)
-            }
+            Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x),
             _ => Ok(()),
         }
     }
 
     fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
-        use crate::ast::Class;
-
         match *ast {
             Ast::Empty(_) => Ok(()),
             Ast::Flags(ref x) => self.fmt_set_flags(x),
             Ast::Literal(ref x) => self.fmt_literal(x),
             Ast::Dot(_) => self.wtr.write_str("."),
             Ast::Assertion(ref x) => self.fmt_assertion(x),
-            Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
-            Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
-            Ast::Class(Class::Bracketed(ref x)) => {
-                self.fmt_class_bracketed_post(x)
-            }
+            Ast::ClassPerl(ref x) => self.fmt_class_perl(x),
+            Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x),
+            Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x),
             Ast::Repetition(ref x) => self.fmt_repetition(x),
             Ast::Group(ref x) => self.fmt_group_post(x),
             Ast::Alternation(_) => Ok(()),
@@ -267,6 +261,12 @@ impl<W: fmt::Write> Writer<W> {
             EndText => self.wtr.write_str(r"\z"),
             WordBoundary => self.wtr.write_str(r"\b"),
             NotWordBoundary => self.wtr.write_str(r"\B"),
+            WordBoundaryStart => self.wtr.write_str(r"\b{start}"),
+            WordBoundaryEnd => self.wtr.write_str(r"\b{end}"),
+            WordBoundaryStartAngle => self.wtr.write_str(r"\<"),
+            WordBoundaryEndAngle => self.wtr.write_str(r"\>"),
+            WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"),
+            WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"),
         }
     }
 
diff --git a/regex-syntax/src/ast/visitor.rs b/regex-syntax/src/ast/visitor.rs
index 03d12a14db..c1bb24d971 100644
--- a/regex-syntax/src/ast/visitor.rs
+++ b/regex-syntax/src/ast/visitor.rs
@@ -264,7 +264,7 @@ impl<'a> HeapVisitor<'a> {
         visitor: &mut V,
     ) -> Result<Option<Frame<'a>>, V::Err> {
         Ok(match *ast {
-            Ast::Class(ast::Class::Bracketed(ref x)) => {
+            Ast::ClassBracketed(ref x) => {
                 self.visit_class(x, visitor)?;
                 None
             }
diff --git a/regex-syntax/src/hir/literal.rs b/regex-syntax/src/hir/literal.rs
index 9461db9891..a5a3737f68 100644
--- a/regex-syntax/src/hir/literal.rs
+++ b/regex-syntax/src/hir/literal.rs
@@ -477,7 +477,7 @@ impl Extractor {
                 }
                 seq
             }
-            hir::Repetition { min, max: Some(max), .. } if min < max => {
+            hir::Repetition { min, .. } => {
                 assert!(min > 0); // handled above
                 let limit =
                     u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
@@ -491,10 +491,6 @@ impl Extractor {
                 seq.make_inexact();
                 seq
             }
-            hir::Repetition { .. } => {
-                subseq.make_inexact();
-                subseq
-            }
         }
     }
 
@@ -2239,24 +2235,19 @@ impl PreferenceTrie {
     /// after them and because any removed literals are guaranteed to never
     /// match.
     fn minimize(literals: &mut Vec<Literal>, keep_exact: bool) {
-        use core::cell::RefCell;
-
-        // MSRV(1.61): Use retain_mut here to avoid interior mutability.
-        let trie = RefCell::new(PreferenceTrie {
+        let mut trie = PreferenceTrie {
             states: vec![],
             matches: vec![],
             next_literal_index: 1,
-        });
+        };
         let mut make_inexact = vec![];
-        literals.retain(|lit| {
-            match trie.borrow_mut().insert(lit.as_bytes()) {
-                Ok(_) => true,
-                Err(i) => {
-                    if !keep_exact {
-                        make_inexact.push(i.checked_sub(1).unwrap());
-                    }
-                    false
+        literals.retain_mut(|lit| match trie.insert(lit.as_bytes()) {
+            Ok(_) => true,
+            Err(i) => {
+                if !keep_exact {
+                    make_inexact.push(i.checked_sub(1).unwrap());
                 }
+                false
             }
         });
         for i in make_inexact {
@@ -2655,6 +2646,12 @@ mod tests {
             ]),
             e(r"(ab|cd)(ef|gh)(ij|kl)")
         );
+
+        assert_eq!(inexact([E("abab")], [E("abab")]), e(r"(ab){2}"));
+
+        assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,3}"));
+
+        assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,}"));
     }
 
     #[test]
diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs
index 6c1d2745e0..ae3ba318ee 100644
--- a/regex-syntax/src/hir/mod.rs
+++ b/regex-syntax/src/hir/mod.rs
@@ -322,6 +322,22 @@ impl Hir {
     /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes())));
     /// assert_eq!(&expected, concat.kind());
     /// ```
+    ///
+    /// # Example: building a literal from a `char`
+    ///
+    /// This example shows how to build a single `Hir` literal from a `char`
+    /// value. Since a [`Literal`] is just bytes, we just need to UTF-8
+    /// encode a `char` value:
+    ///
+    /// ```
+    /// use regex_syntax::hir::{Hir, HirKind, Literal};
+    ///
+    /// let ch = '☃';
+    /// let got = Hir::literal(ch.encode_utf8(&mut [0; 4]).as_bytes());
+    ///
+    /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes())));
+    /// assert_eq!(&expected, got.kind());
+    /// ```
     #[inline]
     pub fn literal<B: Into<Box<[u8]>>>(lit: B) -> Hir {
         let bytes = lit.into();
@@ -797,13 +813,18 @@ impl core::fmt::Debug for Literal {
 /// The high-level intermediate representation of a character class.
 ///
 /// A character class corresponds to a set of characters. A character is either
-/// defined by a Unicode scalar value or a byte. Unicode characters are used
-/// by default, while bytes are used when Unicode mode (via the `u` flag) is
-/// disabled.
+/// defined by a Unicode scalar value or a byte.
 ///
 /// A character class, regardless of its character type, is represented by a
 /// sequence of non-overlapping non-adjacent ranges of characters.
 ///
+/// There are no guarantees about which class variant is used. Generally
+/// speaking, the Unicode variat is used whenever a class needs to contain
+/// non-ASCII Unicode scalar values. But the Unicode variant can be used even
+/// when Unicode mode is disabled. For example, at the time of writing, the
+/// regex `(?-u:a|\xc2\xa0)` will compile down to HIR for the Unicode class
+/// `[a\u00A0]` due to optimizations.
+///
 /// Note that `Bytes` variant may be produced even when it exclusively matches
 /// valid UTF-8. This is because a `Bytes` variant represents an intention by
 /// the author of the regular expression to disable Unicode mode, which in turn
@@ -1326,8 +1347,9 @@ impl ClassUnicodeRange {
     }
 }
 
-/// A set of characters represented by arbitrary bytes (where one byte
-/// corresponds to one character).
+/// A set of characters represented by arbitrary bytes.
+///
+/// Each byte corresponds to one character.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassBytes {
     set: IntervalSet<ClassBytesRange>,
@@ -1629,6 +1651,42 @@ pub enum Look {
     WordUnicode = 1 << 8,
     /// Match a Unicode-aware negation of a word boundary.
     WordUnicodeNegate = 1 << 9,
+    /// Match the start of an ASCII-only word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartAscii = 1 << 10,
+    /// Match the end of an ASCII-only word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndAscii = 1 << 11,
+    /// Match the start of a Unicode word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartUnicode = 1 << 12,
+    /// Match the end of a Unicode word boundary. That is, this matches a
+    /// position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndUnicode = 1 << 13,
+    /// Match the start half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfAscii = 1 << 14,
+    /// Match the end half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the end of the haystack or where the
+    /// following character is not a word character.
+    WordEndHalfAscii = 1 << 15,
+    /// Match the start half of a Unicode word boundary. That is, this matches
+    /// a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfUnicode = 1 << 16,
+    /// Match the end half of a Unicode word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the following
+    /// character is not a word character.
+    WordEndHalfUnicode = 1 << 17,
 }
 
 impl Look {
@@ -1650,6 +1708,14 @@ impl Look {
             Look::WordAsciiNegate => Look::WordAsciiNegate,
             Look::WordUnicode => Look::WordUnicode,
             Look::WordUnicodeNegate => Look::WordUnicodeNegate,
+            Look::WordStartAscii => Look::WordEndAscii,
+            Look::WordEndAscii => Look::WordStartAscii,
+            Look::WordStartUnicode => Look::WordEndUnicode,
+            Look::WordEndUnicode => Look::WordStartUnicode,
+            Look::WordStartHalfAscii => Look::WordEndHalfAscii,
+            Look::WordEndHalfAscii => Look::WordStartHalfAscii,
+            Look::WordStartHalfUnicode => Look::WordEndHalfUnicode,
+            Look::WordEndHalfUnicode => Look::WordStartHalfUnicode,
         }
     }
 
@@ -1658,28 +1724,36 @@ impl Look {
     /// constructor is guaranteed to return the same look-around variant that
     /// one started with within a semver compatible release of this crate.
     #[inline]
-    pub const fn as_repr(self) -> u16 {
+    pub const fn as_repr(self) -> u32 {
         // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
         // actual int.
-        self as u16
+        self as u32
     }
 
     /// Given the underlying representation of a `Look` value, return the
     /// corresponding `Look` value if the representation is valid. Otherwise
     /// `None` is returned.
     #[inline]
-    pub const fn from_repr(repr: u16) -> Option<Look> {
+    pub const fn from_repr(repr: u32) -> Option<Look> {
         match repr {
-            0b00_0000_0001 => Some(Look::Start),
-            0b00_0000_0010 => Some(Look::End),
-            0b00_0000_0100 => Some(Look::StartLF),
-            0b00_0000_1000 => Some(Look::EndLF),
-            0b00_0001_0000 => Some(Look::StartCRLF),
-            0b00_0010_0000 => Some(Look::EndCRLF),
-            0b00_0100_0000 => Some(Look::WordAscii),
-            0b00_1000_0000 => Some(Look::WordAsciiNegate),
-            0b01_0000_0000 => Some(Look::WordUnicode),
-            0b10_0000_0000 => Some(Look::WordUnicodeNegate),
+            0b00_0000_0000_0000_0001 => Some(Look::Start),
+            0b00_0000_0000_0000_0010 => Some(Look::End),
+            0b00_0000_0000_0000_0100 => Some(Look::StartLF),
+            0b00_0000_0000_0000_1000 => Some(Look::EndLF),
+            0b00_0000_0000_0001_0000 => Some(Look::StartCRLF),
+            0b00_0000_0000_0010_0000 => Some(Look::EndCRLF),
+            0b00_0000_0000_0100_0000 => Some(Look::WordAscii),
+            0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate),
+            0b00_0000_0001_0000_0000 => Some(Look::WordUnicode),
+            0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate),
+            0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii),
+            0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii),
+            0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode),
+            0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode),
+            0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii),
+            0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii),
+            0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode),
+            0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode),
             _ => None,
         }
     }
@@ -1704,6 +1778,14 @@ impl Look {
             Look::WordAsciiNegate => 'B',
             Look::WordUnicode => '𝛃',
             Look::WordUnicodeNegate => '𝚩',
+            Look::WordStartAscii => '<',
+            Look::WordEndAscii => '>',
+            Look::WordStartUnicode => '〈',
+            Look::WordEndUnicode => '〉',
+            Look::WordStartHalfAscii => '◁',
+            Look::WordEndHalfAscii => '▷',
+            Look::WordStartHalfUnicode => '◀',
+            Look::WordEndHalfUnicode => '▶',
         }
     }
 }
@@ -2594,7 +2676,7 @@ pub struct LookSet {
     /// range of `u16` values to be represented. For example, even if the
     /// current implementation only makes use of the 10 least significant bits,
     /// it may use more bits in a future semver compatible release.
-    pub bits: u16,
+    pub bits: u32,
 }
 
 impl LookSet {
@@ -2697,13 +2779,22 @@ impl LookSet {
     pub fn contains_word_unicode(self) -> bool {
         self.contains(Look::WordUnicode)
             || self.contains(Look::WordUnicodeNegate)
+            || self.contains(Look::WordStartUnicode)
+            || self.contains(Look::WordEndUnicode)
+            || self.contains(Look::WordStartHalfUnicode)
+            || self.contains(Look::WordEndHalfUnicode)
     }
 
     /// Returns true if and only if this set contains any ASCII word boundary
     /// or negated ASCII word boundary assertions.
     #[inline]
     pub fn contains_word_ascii(self) -> bool {
-        self.contains(Look::WordAscii) || self.contains(Look::WordAsciiNegate)
+        self.contains(Look::WordAscii)
+            || self.contains(Look::WordAsciiNegate)
+            || self.contains(Look::WordStartAscii)
+            || self.contains(Look::WordEndAscii)
+            || self.contains(Look::WordStartHalfAscii)
+            || self.contains(Look::WordEndHalfAscii)
     }
 
     /// Returns an iterator over all of the look-around assertions in this set.
@@ -2782,29 +2873,31 @@ impl LookSet {
         *self = self.intersect(other);
     }
 
-    /// Return a `LookSet` from the slice given as a native endian 16-bit
+    /// Return a `LookSet` from the slice given as a native endian 32-bit
     /// integer.
     ///
     /// # Panics
     ///
-    /// This panics if `slice.len() < 2`.
+    /// This panics if `slice.len() < 4`.
     #[inline]
     pub fn read_repr(slice: &[u8]) -> LookSet {
-        let bits = u16::from_ne_bytes(slice[..2].try_into().unwrap());
+        let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap());
         LookSet { bits }
     }
 
-    /// Write a `LookSet` as a native endian 16-bit integer to the beginning
+    /// Write a `LookSet` as a native endian 32-bit integer to the beginning
     /// of the slice given.
     ///
     /// # Panics
     ///
-    /// This panics if `slice.len() < 2`.
+    /// This panics if `slice.len() < 4`.
     #[inline]
     pub fn write_repr(self, slice: &mut [u8]) {
         let raw = self.bits.to_ne_bytes();
         slice[0] = raw[0];
         slice[1] = raw[1];
+        slice[2] = raw[2];
+        slice[3] = raw[3];
     }
 }
 
@@ -2837,9 +2930,9 @@ impl Iterator for LookSetIter {
             return None;
         }
         // We'll never have more than u8::MAX distinct look-around assertions,
-        // so 'repr' will always fit into a u16.
-        let repr = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
-        let look = Look::from_repr(1 << repr)?;
+        // so 'bit' will always fit into a u16.
+        let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
+        let look = Look::from_repr(1 << bit)?;
         self.set = self.set.remove(look);
         Some(look)
     }
@@ -3761,7 +3854,7 @@ mod tests {
         assert_eq!(0, set.iter().count());
 
         let set = LookSet::full();
-        assert_eq!(10, set.iter().count());
+        assert_eq!(18, set.iter().count());
 
         let set =
             LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode);
@@ -3779,6 +3872,6 @@ mod tests {
         let res = format!("{:?}", LookSet::empty());
         assert_eq!("∅", res);
         let res = format!("{:?}", LookSet::full());
-        assert_eq!("Az^$rRbB𝛃𝚩", res);
+        assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res);
     }
 }
diff --git a/regex-syntax/src/hir/print.rs b/regex-syntax/src/hir/print.rs
index aa737a092d..dfa6d40322 100644
--- a/regex-syntax/src/hir/print.rs
+++ b/regex-syntax/src/hir/print.rs
@@ -202,6 +202,30 @@ impl<W: fmt::Write> Visitor for Writer<W> {
                 hir::Look::WordUnicodeNegate => {
                     self.wtr.write_str(r"\B")?;
                 }
+                hir::Look::WordStartAscii => {
+                    self.wtr.write_str(r"(?-u:\b{start})")?;
+                }
+                hir::Look::WordEndAscii => {
+                    self.wtr.write_str(r"(?-u:\b{end})")?;
+                }
+                hir::Look::WordStartUnicode => {
+                    self.wtr.write_str(r"\b{start}")?;
+                }
+                hir::Look::WordEndUnicode => {
+                    self.wtr.write_str(r"\b{end}")?;
+                }
+                hir::Look::WordStartHalfAscii => {
+                    self.wtr.write_str(r"(?-u:\b{start-half})")?;
+                }
+                hir::Look::WordEndHalfAscii => {
+                    self.wtr.write_str(r"(?-u:\b{end-half})")?;
+                }
+                hir::Look::WordStartHalfUnicode => {
+                    self.wtr.write_str(r"\b{start-half}")?;
+                }
+                hir::Look::WordEndHalfUnicode => {
+                    self.wtr.write_str(r"\b{end-half}")?;
+                }
             },
             HirKind::Capture(hir::Capture { ref name, .. }) => {
                 self.wtr.write_str("(")?;
diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs
index 5430b51b27..313a1e9e8b 100644
--- a/regex-syntax/src/hir/translate.rs
+++ b/regex-syntax/src/hir/translate.rs
@@ -337,7 +337,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
 
     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
         match *ast {
-            Ast::Class(ast::Class::Bracketed(_)) => {
+            Ast::ClassBracketed(_) => {
                 if self.flags().unicode() {
                     let cls = hir::ClassUnicode::empty();
                     self.push(HirFrame::ClassUnicode(cls));
@@ -354,14 +354,14 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                     .unwrap_or_else(|| self.flags());
                 self.push(HirFrame::Group { old_flags });
             }
-            Ast::Concat(ref x) if x.asts.is_empty() => {}
             Ast::Concat(_) => {
                 self.push(HirFrame::Concat);
             }
-            Ast::Alternation(ref x) if x.asts.is_empty() => {}
-            Ast::Alternation(_) => {
+            Ast::Alternation(ref x) => {
                 self.push(HirFrame::Alternation);
-                self.push(HirFrame::AlternationBranch);
+                if !x.asts.is_empty() {
+                    self.push(HirFrame::AlternationBranch);
+                }
             }
             _ => {}
         }
@@ -386,29 +386,20 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                 // consistency sake.
                 self.push(HirFrame::Expr(Hir::empty()));
             }
-            Ast::Literal(ref x) => {
-                match self.ast_literal_to_scalar(x)? {
-                    Either::Right(byte) => self.push_byte(byte),
-                    Either::Left(ch) => {
-                        if !self.flags().unicode() && ch.len_utf8() > 1 {
-                            return Err(self
-                                .error(x.span, ErrorKind::UnicodeNotAllowed));
-                        }
-                        match self.case_fold_char(x.span, ch)? {
-                            None => self.push_char(ch),
-                            Some(expr) => self.push(HirFrame::Expr(expr)),
-                        }
-                    }
-                }
-                // self.push(HirFrame::Expr(self.hir_literal(x)?));
-            }
-            Ast::Dot(span) => {
-                self.push(HirFrame::Expr(self.hir_dot(span)?));
+            Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
+                Either::Right(byte) => self.push_byte(byte),
+                Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
+                    None => self.push_char(ch),
+                    Some(expr) => self.push(HirFrame::Expr(expr)),
+                },
+            },
+            Ast::Dot(ref span) => {
+                self.push(HirFrame::Expr(self.hir_dot(**span)?));
             }
             Ast::Assertion(ref x) => {
                 self.push(HirFrame::Expr(self.hir_assertion(x)?));
             }
-            Ast::Class(ast::Class::Perl(ref x)) => {
+            Ast::ClassPerl(ref x) => {
                 if self.flags().unicode() {
                     let cls = self.hir_perl_unicode_class(x)?;
                     let hcls = hir::Class::Unicode(cls);
@@ -419,11 +410,11 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                     self.push(HirFrame::Expr(Hir::class(hcls)));
                 }
             }
-            Ast::Class(ast::Class::Unicode(ref x)) => {
+            Ast::ClassUnicode(ref x) => {
                 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
                 self.push(HirFrame::Expr(Hir::class(cls)));
             }
-            Ast::Class(ast::Class::Bracketed(ref ast)) => {
+            Ast::ClassBracketed(ref ast) => {
                 if self.flags().unicode() {
                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
                     self.unicode_fold_and_negate(
@@ -874,8 +865,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
             })?;
             Ok(Some(Hir::class(hir::Class::Unicode(cls))))
         } else {
-            if c.len_utf8() > 1 {
-                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+            if !c.is_ascii() {
+                return Ok(None);
             }
             // If case folding won't do anything, then don't bother trying.
             match c {
@@ -964,6 +955,34 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
             } else {
                 hir::Look::WordAsciiNegate
             }),
+            ast::AssertionKind::WordBoundaryStart
+            | ast::AssertionKind::WordBoundaryStartAngle => {
+                Hir::look(if unicode {
+                    hir::Look::WordStartUnicode
+                } else {
+                    hir::Look::WordStartAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryEnd
+            | ast::AssertionKind::WordBoundaryEndAngle => {
+                Hir::look(if unicode {
+                    hir::Look::WordEndUnicode
+                } else {
+                    hir::Look::WordEndAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryStartHalf => {
+                Hir::look(if unicode {
+                    hir::Look::WordStartHalfUnicode
+                } else {
+                    hir::Look::WordStartHalfAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
+                hir::Look::WordEndHalfUnicode
+            } else {
+                hir::Look::WordEndHalfAscii
+            }),
         })
     }
 
@@ -1185,9 +1204,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         match self.ast_literal_to_scalar(ast)? {
             Either::Right(byte) => Ok(byte),
             Either::Left(ch) => {
-                let cp = u32::from(ch);
-                if cp <= 0x7F {
-                    Ok(u8::try_from(cp).unwrap())
+                if ch.is_ascii() {
+                    Ok(u8::try_from(ch).unwrap())
                 } else {
                     // We can't feasibly support Unicode in
                     // byte oriented classes. Byte classes don't
@@ -1635,16 +1653,7 @@ mod tests {
         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
 
-        assert_eq!(
-            t_err("(?-u)☃"),
-            TestError {
-                kind: hir::ErrorKind::UnicodeNotAllowed,
-                span: Span::new(
-                    Position::new(5, 1, 6),
-                    Position::new(8, 1, 7)
-                ),
-            }
-        );
+        assert_eq!(t("(?-u)☃"), hir_lit("☃"));
         assert_eq!(
             t_err(r"(?-u)\xFF"),
             TestError {
@@ -1722,16 +1731,7 @@ mod tests {
         );
         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
 
-        assert_eq!(
-            t_err("(?i-u)β"),
-            TestError {
-                kind: hir::ErrorKind::UnicodeNotAllowed,
-                span: Span::new(
-                    Position::new(6, 1, 7),
-                    Position::new(8, 1, 8),
-                ),
-            }
-        );
+        assert_eq!(t("(?i-u)β"), hir_lit("β"),);
     }
 
     #[test]
@@ -3626,4 +3626,99 @@ mod tests {
             ]),
         );
     }
+
+    #[test]
+    fn regression_alt_empty_concat() {
+        use crate::ast::{self, Ast};
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::alternation(ast::Alternation {
+            span,
+            asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
+    }
+
+    #[test]
+    fn regression_empty_alt() {
+        use crate::ast::{self, Ast};
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::concat(ast::Concat {
+            span,
+            asts: vec![Ast::alternation(ast::Alternation {
+                span,
+                asts: vec![],
+            })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
+    }
+
+    #[test]
+    fn regression_singleton_alt() {
+        use crate::{
+            ast::{self, Ast},
+            hir::Dot,
+        };
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::concat(ast::Concat {
+            span,
+            asts: vec![Ast::alternation(ast::Alternation {
+                span,
+                asts: vec![Ast::dot(span)],
+            })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168
+    #[test]
+    fn regression_fuzz_match() {
+        let pat = "[(\u{6} \0-\u{afdf5}]  \0 ";
+        let ast = ParserBuilder::new()
+            .octal(false)
+            .ignore_whitespace(true)
+            .build()
+            .parse(pat)
+            .unwrap();
+        let hir = TranslatorBuilder::new()
+            .utf8(true)
+            .case_insensitive(false)
+            .multi_line(false)
+            .dot_matches_new_line(false)
+            .swap_greed(true)
+            .unicode(true)
+            .build()
+            .translate(pat, &ast)
+            .unwrap();
+        assert_eq!(
+            hir,
+            Hir::concat(vec![
+                hir_uclass(&[('\0', '\u{afdf5}')]),
+                hir_lit("\0"),
+            ])
+        );
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155
+    #[cfg(feature = "unicode")]
+    #[test]
+    fn regression_fuzz_difference1() {
+        let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
+        let _ = t(pat); // shouldn't panic
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153
+    #[test]
+    fn regression_fuzz_char_decrement1() {
+        let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0]<D\0\0\0\0\0\0\u{1}]\0\0\0\0]\0\0-*\0]\0\0 ";
+        let _ = t(pat); // shouldn't panic
+    }
 }
diff --git a/regex-syntax/src/lib.rs b/regex-syntax/src/lib.rs
index 47d818a17f..20f25db716 100644
--- a/regex-syntax/src/lib.rs
+++ b/regex-syntax/src/lib.rs
@@ -168,18 +168,6 @@ The following features are available:
 #![forbid(unsafe_code)]
 #![deny(missing_docs, rustdoc::broken_intra_doc_links)]
 #![warn(missing_debug_implementations)]
-// MSRV(1.62): Allow unused warnings. Needed for the 'allow' below,
-// since the warning is no longer triggered in newer Rust releases.
-// Once the 'allow(mutable_borrow_reservation_conflict)' can be
-// removed, we can remove the 'allow(renamed_and_removed_lints)' too.
-#![allow(renamed_and_removed_lints)]
-// MSRV(1.62): This gets triggered on Rust <1.62, and since our MSRV
-// is Rust 1.60 at the time of writing, a warning is displayed. But
-// the lang team decided the code pattern flagged by this warning is
-// OK, so the warning is innocuous. We can remove this explicit allow
-// once we get to a Rust release where the warning is no longer
-// triggered. I believe that's Rust 1.62.
-#![allow(mutable_borrow_reservation_conflict)]
 #![cfg_attr(docsrs, feature(doc_auto_cfg))]
 
 #[cfg(any(test, feature = "std"))]
@@ -334,6 +322,9 @@ pub fn is_escapeable_character(c: char) -> bool {
         // escapeable, \< and \> will result in a parse error. Thus, we can
         // turn them into something else in the future without it being a
         // backwards incompatible change.
+        //
+        // OK, now we support \< and \>, and we need to retain them as *not*
+        // escapeable here since the escape sequence is significant.
         '<' | '>' => false,
         _ => true,
     }
@@ -381,7 +372,7 @@ pub fn try_is_word_character(
 /// Returns true if and only if the given character is an ASCII word character.
 ///
 /// An ASCII word character is defined by the following character class:
-/// `[_0-9a-zA-Z]'.
+/// `[_0-9a-zA-Z]`.
 pub fn is_word_byte(c: u8) -> bool {
     match c {
         b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true,
diff --git a/src/builders.rs b/src/builders.rs
index d19a0ffe23..c111a96c0b 100644
--- a/src/builders.rs
+++ b/src/builders.rs
@@ -28,7 +28,9 @@ use alloc::{
     vec::Vec,
 };
 
-use regex_automata::{meta, util::syntax, MatchKind};
+use regex_automata::{
+    meta, nfa::thompson::WhichCaptures, util::syntax, MatchKind,
+};
 
 use crate::error::Error;
 
@@ -100,8 +102,12 @@ impl Builder {
     }
 
     fn build_many_string(&self) -> Result<crate::RegexSet, Error> {
-        let metac =
-            self.metac.clone().match_kind(MatchKind::All).utf8_empty(true);
+        let metac = self
+            .metac
+            .clone()
+            .match_kind(MatchKind::All)
+            .utf8_empty(true)
+            .which_captures(WhichCaptures::None);
         let syntaxc = self.syntaxc.clone().utf8(true);
         let patterns = Arc::from(self.pats.as_slice());
         meta::Builder::new()
@@ -113,8 +119,12 @@ impl Builder {
     }
 
     fn build_many_bytes(&self) -> Result<crate::bytes::RegexSet, Error> {
-        let metac =
-            self.metac.clone().match_kind(MatchKind::All).utf8_empty(false);
+        let metac = self
+            .metac
+            .clone()
+            .match_kind(MatchKind::All)
+            .utf8_empty(false)
+            .which_captures(WhichCaptures::None);
         let syntaxc = self.syntaxc.clone().utf8(false);
         let patterns = Arc::from(self.pats.as_slice());
         meta::Builder::new()
@@ -669,6 +679,7 @@ pub(crate) mod string {
         /// # Example
         ///
         /// ```
+        /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041
         /// use regex::RegexBuilder;
         ///
         /// // It may surprise you how big some seemingly small patterns can
@@ -1236,6 +1247,7 @@ pub(crate) mod string {
         /// # Example
         ///
         /// ```
+        /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041
         /// use regex::RegexSetBuilder;
         ///
         /// // It may surprise you how big some seemingly small patterns can
@@ -1846,6 +1858,7 @@ pub(crate) mod bytes {
         /// # Example
         ///
         /// ```
+        /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041
         /// use regex::bytes::RegexBuilder;
         ///
         /// // It may surprise you how big some seemingly small patterns can
@@ -2418,6 +2431,7 @@ pub(crate) mod bytes {
         /// # Example
         ///
         /// ```
+        /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041
         /// use regex::bytes::RegexSetBuilder;
         ///
         /// // It may surprise you how big some seemingly small patterns can
diff --git a/src/bytes.rs b/src/bytes.rs
index 3f53a3ea55..383ac4a5b5 100644
--- a/src/bytes.rs
+++ b/src/bytes.rs
@@ -68,8 +68,8 @@ bytes:
 1. The `u` flag can be disabled even when disabling it might cause the regex to
 match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in
 "ASCII compatible" mode.
-2. In ASCII compatible mode, neither Unicode scalar values nor Unicode
-character classes are allowed.
+2. In ASCII compatible mode, Unicode character classes are not allowed. Literal
+Unicode scalar values outside of character classes are allowed.
 3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`)
 revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps
 to `[[:digit:]]` and `\s` maps to `[[:space:]]`.
diff --git a/src/lib.rs b/src/lib.rs
index e9c9c570fc..e4c67f4bc1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -543,8 +543,10 @@ scalar value, even when it is encoded using multiple bytes. When Unicode mode
 is disabled (e.g., `(?-u:.)`), then `.` will match a single byte in all cases.
 * The character classes `\w`, `\d` and `\s` are all Unicode-aware by default.
 Use `(?-u:\w)`, `(?-u:\d)` and `(?-u:\s)` to get their ASCII-only definitions.
-* Similarly, `\b` and `\B` use a Unicode definition of a "word" character. To
-get ASCII-only word boundaries, use `(?-u:\b)` and `(?-u:\B)`.
+* Similarly, `\b` and `\B` use a Unicode definition of a "word" character.
+To get ASCII-only word boundaries, use `(?-u:\b)` and `(?-u:\B)`. This also
+applies to the special word boundary assertions. (That is, `\b{start}`,
+`\b{end}`, `\b{start-half}`, `\b{end-half}`.)
 * `^` and `$` are **not** Unicode-aware in multi-line mode. Namely, they only
 recognize `\n` (assuming CRLF mode is not enabled) and not any of the other
 forms of line terminators defined by Unicode.
@@ -665,8 +667,8 @@ a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax).
 </pre>
 
 Any named character class may appear inside a bracketed `[...]` character
-class. For example, `[\p{Greek}[:digit:]]` matches any Greek or ASCII
-digit. `[\p{Greek}&&\pL]` matches Greek letters.
+class. For example, `[\p{Greek}[:digit:]]` matches any ASCII digit or any
+codepoint in the `Greek` script. `[\p{Greek}&&\pL]` matches Greek letters.
 
 Precedence in character classes, from most binding to least:
 
@@ -723,12 +725,16 @@ x{n}?     exactly n x
 ### Empty matches
 
 <pre class="rust">
-^     the beginning of a haystack (or start-of-line with multi-line mode)
-$     the end of a haystack (or end-of-line with multi-line mode)
-\A    only the beginning of a haystack (even with multi-line mode enabled)
-\z    only the end of a haystack (even with multi-line mode enabled)
-\b    a Unicode word boundary (\w on one side and \W, \A, or \z on other)
-\B    not a Unicode word boundary
+^               the beginning of a haystack (or start-of-line with multi-line mode)
+$               the end of a haystack (or end-of-line with multi-line mode)
+\A              only the beginning of a haystack (even with multi-line mode enabled)
+\z              only the end of a haystack (even with multi-line mode enabled)
+\b              a Unicode word boundary (\w on one side and \W, \A, or \z on other)
+\B              not a Unicode word boundary
+\b{start}, \<   a Unicode start-of-word boundary (\W|\A on the left, \w on the right)
+\b{end}, \>     a Unicode end-of-word boundary (\w on the left, \W|\z on the right))
+\b{start-half}  half of a Unicode start-of-word boundary (\W|\A on the left)
+\b{end-half}    half of a Unicode end-of-word boundary (\W|\z on the right)
 </pre>
 
 The empty regex is valid and matches the empty string. For example, the
@@ -856,28 +862,32 @@ Note that this includes all possible escape sequences, even ones that are
 documented elsewhere.
 
 <pre class="rust">
-\*          literal *, applies to all ASCII except [0-9A-Za-z<>]
-\a          bell (\x07)
-\f          form feed (\x0C)
-\t          horizontal tab
-\n          new line
-\r          carriage return
-\v          vertical tab (\x0B)
-\A          matches at the beginning of a haystack
-\z          matches at the end of a haystack
-\b          word boundary assertion
-\B          negated word boundary assertion
-\123        octal character code, up to three digits (when enabled)
-\x7F        hex character code (exactly two digits)
-\x{10FFFF}  any hex character code corresponding to a Unicode code point
-\u007F      hex character code (exactly four digits)
-\u{7F}      any hex character code corresponding to a Unicode code point
-\U0000007F  hex character code (exactly eight digits)
-\U{7F}      any hex character code corresponding to a Unicode code point
-\p{Letter}  Unicode character class
-\P{Letter}  negated Unicode character class
-\d, \s, \w  Perl character class
-\D, \S, \W  negated Perl character class
+\*              literal *, applies to all ASCII except [0-9A-Za-z<>]
+\a              bell (\x07)
+\f              form feed (\x0C)
+\t              horizontal tab
+\n              new line
+\r              carriage return
+\v              vertical tab (\x0B)
+\A              matches at the beginning of a haystack
+\z              matches at the end of a haystack
+\b              word boundary assertion
+\B              negated word boundary assertion
+\b{start}, \<   start-of-word boundary assertion
+\b{end}, \>     end-of-word boundary assertion
+\b{start-half}  half of a start-of-word boundary assertion
+\b{end-half}    half of a end-of-word boundary assertion
+\123            octal character code, up to three digits (when enabled)
+\x7F            hex character code (exactly two digits)
+\x{10FFFF}      any hex character code corresponding to a Unicode code point
+\u007F          hex character code (exactly four digits)
+\u{7F}          any hex character code corresponding to a Unicode code point
+\U0000007F      hex character code (exactly eight digits)
+\U{7F}          any hex character code corresponding to a Unicode code point
+\p{Letter}      Unicode character class
+\P{Letter}      negated Unicode character class
+\d, \s, \w      Perl character class
+\D, \S, \W      negated Perl character class
 </pre>
 
 ### Perl character classes (Unicode friendly)
diff --git a/src/regex/bytes.rs b/src/regex/bytes.rs
index 6522ee7e3b..19f5701afd 100644
--- a/src/regex/bytes.rs
+++ b/src/regex/bytes.rs
@@ -1154,7 +1154,7 @@ impl Regex {
     ///
     /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`],
     /// but does *not* store a reference to the haystack. This makes its API
-    /// a bit lower level and less convenience. But in exchange, callers
+    /// a bit lower level and less convenient. But in exchange, callers
     /// may allocate their own `CaptureLocations` and reuse it for multiple
     /// searches. This may be helpful if allocating a `Captures` shows up in a
     /// profile as too costly.
@@ -1162,8 +1162,8 @@ impl Regex {
     /// To create a `CaptureLocations` value, use the
     /// [`Regex::capture_locations`] method.
     ///
-    /// This also the overall match if one was found. When a match is found,
-    /// its offsets are also always stored in `locs` at index `0`.
+    /// This also returns the overall match if one was found. When a match is
+    /// found, its offsets are also always stored in `locs` at index `0`.
     ///
     /// # Example
     ///
@@ -2037,7 +2037,10 @@ impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> {
 ///
 /// // Asking for an invalid capture group always returns None.
 /// assert_eq!(None, locs.get(3));
+/// # // literals are too big for 32-bit usize: #1041
+/// # #[cfg(target_pointer_width = "64")]
 /// assert_eq!(None, locs.get(34973498648));
+/// # #[cfg(target_pointer_width = "64")]
 /// assert_eq!(None, locs.get(9944060567225171988));
 /// ```
 #[derive(Clone, Debug)]
diff --git a/src/regex/string.rs b/src/regex/string.rs
index 65a76740ed..880d6082ad 100644
--- a/src/regex/string.rs
+++ b/src/regex/string.rs
@@ -1145,7 +1145,7 @@ impl Regex {
     ///
     /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`],
     /// but does *not* store a reference to the haystack. This makes its API
-    /// a bit lower level and less convenience. But in exchange, callers
+    /// a bit lower level and less convenient. But in exchange, callers
     /// may allocate their own `CaptureLocations` and reuse it for multiple
     /// searches. This may be helpful if allocating a `Captures` shows up in a
     /// profile as too costly.
@@ -1153,8 +1153,8 @@ impl Regex {
     /// To create a `CaptureLocations` value, use the
     /// [`Regex::capture_locations`] method.
     ///
-    /// This also the overall match if one was found. When a match is found,
-    /// its offsets are also always stored in `locs` at index `0`.
+    /// This also returns the overall match if one was found. When a match is
+    /// found, its offsets are also always stored in `locs` at index `0`.
     ///
     /// # Panics
     ///
@@ -2040,7 +2040,10 @@ impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> {
 ///
 /// // Asking for an invalid capture group always returns None.
 /// assert_eq!(None, locs.get(3));
+/// # // literals are too big for 32-bit usize: #1041
+/// # #[cfg(target_pointer_width = "64")]
 /// assert_eq!(None, locs.get(34973498648));
+/// # #[cfg(target_pointer_width = "64")]
 /// assert_eq!(None, locs.get(9944060567225171988));
 /// ```
 #[derive(Clone, Debug)]
diff --git a/testdata/anchored.toml b/testdata/anchored.toml
index cca561de10..0f2248d098 100644
--- a/testdata/anchored.toml
+++ b/testdata/anchored.toml
@@ -69,3 +69,59 @@ haystack = 'abcβ'
 matches = [[0, 3]]
 anchored = true
 unicode = false
+
+# Tests that '.c' doesn't match 'abc' when performing an anchored search from
+# the beginning of the haystack. This test found two different bugs in the
+# PikeVM and the meta engine.
+[[test]]
+name = "no-match-at-start"
+regex = '.c'
+haystack = 'abc'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-bounds"
+regex = '.c'
+haystack = 'aabc'
+bounds = [1, 4]
+matches = []
+anchored = true
+
+# This is like no-match-at-start, but hits the "reverse inner" optimization
+# inside the meta engine. (no-match-at-start hits the "reverse suffix"
+# optimization.)
+[[test]]
+name = "no-match-at-start-reverse-inner"
+regex = '.c[a-z]'
+haystack = 'abcz'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-reverse-inner-bounds"
+regex = '.c[a-z]'
+haystack = 'aabcz'
+bounds = [1, 5]
+matches = []
+anchored = true
+
+# Same as no-match-at-start, but applies to the meta engine's "reverse
+# anchored" optimization.
+[[test]]
+name = "no-match-at-start-reverse-anchored"
+regex = '.c[a-z]$'
+haystack = 'abcz'
+matches = []
+anchored = true
+
+# Like above, but at a non-zero start offset.
+[[test]]
+name = "no-match-at-start-reverse-anchored-bounds"
+regex = '.c[a-z]$'
+haystack = 'aabcz'
+bounds = [1, 5]
+matches = []
+anchored = true
diff --git a/testdata/line-terminator.toml b/testdata/line-terminator.toml
index 4de72de31e..a398dafa2e 100644
--- a/testdata/line-terminator.toml
+++ b/testdata/line-terminator.toml
@@ -38,6 +38,18 @@ unescape = true
 line-terminator = '\xFF'
 utf8 = false
 
+# This tests a tricky case where the line terminator is set to \r. This ensures
+# that the StartLF look-behind assertion is tracked when computing the start
+# state.
+[[test]]
+name = "carriage"
+regex = '(?m)^[a-z]+'
+haystack = 'ABC\rabc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+line-terminator = '\r'
+
 # This tests that we can set the line terminator to a byte corresponding to a
 # word character, and things work as expected.
 [[test]]
diff --git a/testdata/regression.toml b/testdata/regression.toml
index bb5e4fd46f..53b0701a3c 100644
--- a/testdata/regression.toml
+++ b/testdata/regression.toml
@@ -739,3 +739,92 @@ matches = [[0, 9]]
 utf8 = false
 match-kind = "all"
 search-kind = "overlapping"
+
+# See: https://github.com/rust-lang/regex/issues/1060
+[[test]]
+name = "reverse-inner-plus-shorter-than-expected"
+regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})'
+haystack = '102:12:39'
+matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
+
+# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex
+# to demonstrate the extent of the rot. Sigh.
+#
+# See: https://github.com/rust-lang/regex/issues/1060
+[[test]]
+name = "reverse-inner-short"
+regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])'
+haystack = '102:12:39'
+matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
+
+# This regression test was found via the RegexSet APIs. It triggered a
+# particular code path where a regex was compiled with 'All' match semantics
+# (to support overlapping search), but got funneled down into a standard
+# leftmost search when calling 'is_match'. This is fine on its own, but the
+# leftmost search will use a prefilter and that's where this went awry.
+#
+# Namely, since 'All' semantics were used, the aho-corasick prefilter was
+# incorrectly compiled with 'Standard' semantics. This was wrong because
+# 'Standard' immediately attempts to report a match at every position, even if
+# that would mean reporting a match past the leftmost match before reporting
+# the leftmost match. This breaks the prefilter contract of never having false
+# negatives and leads overall to the engine not finding a match.
+#
+# See: https://github.com/rust-lang/regex/issues/1070
+[[test]]
+name = "prefilter-with-aho-corasick-standard-semantics"
+regex = '(?m)^ *v [0-9]'
+haystack = 'v 0'
+matches = [
+  { id = 0, spans = [[0, 3]] },
+]
+match-kind = "all"
+search-kind = "overlapping"
+unicode = true
+utf8 = true
+
+# This tests that the PikeVM and the meta regex agree on a particular regex.
+# This test previously failed when the ad hoc engines inside the meta engine
+# did not handle quit states correctly. Namely, the Unicode word boundary here
+# combined with a non-ASCII codepoint provokes the quit state. The ad hoc
+# engines were previously returning a match even after entering the quit state
+# if a match had been previously detected, but this is incorrect. The reason
+# is that if a quit state is found, then the search must give up *immediately*
+# because it prevents the search from finding the "proper" leftmost-first
+# match. If it instead returns a match that has been found, it risks reporting
+# an improper match, as it did in this case.
+#
+# See: https://github.com/rust-lang/regex/issues/1046
+[[test]]
+name = "non-prefix-literal-quit-state"
+regex = '.+\b\n'
+haystack = "β77\n"
+matches = [[0, 5]]
+
+# This is a regression test for some errant HIR interval set operations that
+# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The
+# issue here is that the HIR produced from the regex had out-of-order ranges.
+#
+# See: https://github.com/rust-lang/regex/issues/1103
+# Ref: https://github.com/rust-lang/regex/pull/1051
+# Ref: https://github.com/rust-lang/regex/pull/1102
+[[test]]
+name = "hir-optimization-out-of-order-class"
+regex = '^[[:alnum:]./-]+$'
+haystack = "a-b"
+matches = [[0, 3]]
+
+# This is a regression test for an improper reverse suffix optimization. This
+# occurred when I "broadened" the applicability of the optimization to include
+# multiple possible literal suffixes instead of only sticking to a non-empty
+# longest common suffix. It turns out that, at least given how the reverse
+# suffix optimization works, we need to stick to the longest common suffix for
+# now.
+#
+# See: https://github.com/rust-lang/regex/issues/1110
+# See also: https://github.com/astral-sh/ruff/pull/7980
+[[test]]
+name = 'improper-reverse-suffix-optimization'
+regex = '(\\N\{[^}]+})|([{}])'
+haystack = 'hiya \N{snowman} bye'
+matches = [[[5, 16], [5, 16], []]]
diff --git a/testdata/word-boundary-special.toml b/testdata/word-boundary-special.toml
new file mode 100644
index 0000000000..2b5a2a0acf
--- /dev/null
+++ b/testdata/word-boundary-special.toml
@@ -0,0 +1,687 @@
+# These tests are for the "special" word boundary assertions. That is,
+# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty
+# assertions for more niche use cases, but hitting those cases without these
+# assertions is difficult. For example, \b{start-half} and \b{end-half} are
+# used to implement the -w/--word-regexp flag in a grep program.
+
+# Tests for (?-u:\b{start})
+
+[[test]]
+name = "word-start-ascii-010"
+regex = '\b{start}'
+haystack = "a"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-020"
+regex = '\b{start}'
+haystack = "a "
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-030"
+regex = '\b{start}'
+haystack = " a "
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-040"
+regex = '\b{start}'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-050"
+regex = '\b{start}'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-060"
+regex = '\b{start}'
+haystack = "𝛃"
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-060-bounds"
+regex = '\b{start}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-070"
+regex = '\b{start}'
+haystack = " 𝛃 "
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-080"
+regex = '\b{start}'
+haystack = "𝛃𐆀"
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-ascii-090"
+regex = '\b{start}'
+haystack = "𝛃b"
+matches = [[4, 4]]
+unicode = false
+
+[[test]]
+name = "word-start-ascii-110"
+regex = '\b{start}'
+haystack = "b𝛃"
+matches = [[0, 0]]
+unicode = false
+
+# Tests for (?-u:\b{end})
+
+[[test]]
+name = "word-end-ascii-010"
+regex = '\b{end}'
+haystack = "a"
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-020"
+regex = '\b{end}'
+haystack = "a "
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-030"
+regex = '\b{end}'
+haystack = " a "
+matches = [[2, 2]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-040"
+regex = '\b{end}'
+haystack = ""
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-050"
+regex = '\b{end}'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-060"
+regex = '\b{end}'
+haystack = "𝛃"
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-060-bounds"
+regex = '\b{end}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-070"
+regex = '\b{end}'
+haystack = " 𝛃 "
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-080"
+regex = '\b{end}'
+haystack = "𝛃𐆀"
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-ascii-090"
+regex = '\b{end}'
+haystack = "𝛃b"
+matches = [[5, 5]]
+unicode = false
+
+[[test]]
+name = "word-end-ascii-110"
+regex = '\b{end}'
+haystack = "b𝛃"
+matches = [[1, 1]]
+unicode = false
+
+# Tests for \b{start}
+
+[[test]]
+name = "word-start-unicode-010"
+regex = '\b{start}'
+haystack = "a"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-020"
+regex = '\b{start}'
+haystack = "a "
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-030"
+regex = '\b{start}'
+haystack = " a "
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-040"
+regex = '\b{start}'
+haystack = ""
+matches = []
+unicode = true
+
+[[test]]
+name = "word-start-unicode-050"
+regex = '\b{start}'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-060"
+regex = '\b{start}'
+haystack = "𝛃"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-060-bounds"
+regex = '\b{start}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = true
+
+[[test]]
+name = "word-start-unicode-070"
+regex = '\b{start}'
+haystack = " 𝛃 "
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-080"
+regex = '\b{start}'
+haystack = "𝛃𐆀"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-090"
+regex = '\b{start}'
+haystack = "𝛃b"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-unicode-110"
+regex = '\b{start}'
+haystack = "b𝛃"
+matches = [[0, 0]]
+unicode = true
+
+# Tests for \b{end}
+
+[[test]]
+name = "word-end-unicode-010"
+regex = '\b{end}'
+haystack = "a"
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-020"
+regex = '\b{end}'
+haystack = "a "
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-030"
+regex = '\b{end}'
+haystack = " a "
+matches = [[2, 2]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-040"
+regex = '\b{end}'
+haystack = ""
+matches = []
+unicode = true
+
+[[test]]
+name = "word-end-unicode-050"
+regex = '\b{end}'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-060"
+regex = '\b{end}'
+haystack = "𝛃"
+matches = [[4, 4]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-060-bounds"
+regex = '\b{end}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = true
+
+[[test]]
+name = "word-end-unicode-070"
+regex = '\b{end}'
+haystack = " 𝛃 "
+matches = [[5, 5]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-080"
+regex = '\b{end}'
+haystack = "𝛃𐆀"
+matches = [[4, 4]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-090"
+regex = '\b{end}'
+haystack = "𝛃b"
+matches = [[5, 5]]
+unicode = true
+
+[[test]]
+name = "word-end-unicode-110"
+regex = '\b{end}'
+haystack = "b𝛃"
+matches = [[5, 5]]
+unicode = true
+
+# Tests for (?-u:\b{start-half})
+
+[[test]]
+name = "word-start-half-ascii-010"
+regex = '\b{start-half}'
+haystack = "a"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-020"
+regex = '\b{start-half}'
+haystack = "a "
+matches = [[0, 0], [2, 2]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-030"
+regex = '\b{start-half}'
+haystack = " a "
+matches = [[0, 0], [1, 1], [3, 3]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-040"
+regex = '\b{start-half}'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-050"
+regex = '\b{start-half}'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-060"
+regex = '\b{start-half}'
+haystack = "𝛃"
+matches = [[0, 0], [4, 4]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-060-noutf8"
+regex = '\b{start-half}'
+haystack = "𝛃"
+matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
+unicode = false
+utf8 = false
+
+[[test]]
+name = "word-start-half-ascii-060-bounds"
+regex = '\b{start-half}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-070"
+regex = '\b{start-half}'
+haystack = " 𝛃 "
+matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-080"
+regex = '\b{start-half}'
+haystack = "𝛃𐆀"
+matches = [[0, 0], [4, 4], [8, 8]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-090"
+regex = '\b{start-half}'
+haystack = "𝛃b"
+matches = [[0, 0], [4, 4]]
+unicode = false
+
+[[test]]
+name = "word-start-half-ascii-110"
+regex = '\b{start-half}'
+haystack = "b𝛃"
+matches = [[0, 0], [5, 5]]
+unicode = false
+
+# Tests for (?-u:\b{end-half})
+
+[[test]]
+name = "word-end-half-ascii-010"
+regex = '\b{end-half}'
+haystack = "a"
+matches = [[1, 1]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-020"
+regex = '\b{end-half}'
+haystack = "a "
+matches = [[1, 1], [2, 2]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-030"
+regex = '\b{end-half}'
+haystack = " a "
+matches = [[0, 0], [2, 2], [3, 3]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-040"
+regex = '\b{end-half}'
+haystack = ""
+matches = [[0, 0]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-050"
+regex = '\b{end-half}'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-060"
+regex = '\b{end-half}'
+haystack = "𝛃"
+matches = [[0, 0], [4, 4]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-060-bounds"
+regex = '\b{end-half}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-070"
+regex = '\b{end-half}'
+haystack = " 𝛃 "
+matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-080"
+regex = '\b{end-half}'
+haystack = "𝛃𐆀"
+matches = [[0, 0], [4, 4], [8, 8]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-090"
+regex = '\b{end-half}'
+haystack = "𝛃b"
+matches = [[0, 0], [5, 5]]
+unicode = false
+
+[[test]]
+name = "word-end-half-ascii-110"
+regex = '\b{end-half}'
+haystack = "b𝛃"
+matches = [[1, 1], [5, 5]]
+unicode = false
+
+# Tests for \b{start-half}
+
+[[test]]
+name = "word-start-half-unicode-010"
+regex = '\b{start-half}'
+haystack = "a"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-020"
+regex = '\b{start-half}'
+haystack = "a "
+matches = [[0, 0], [2, 2]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-030"
+regex = '\b{start-half}'
+haystack = " a "
+matches = [[0, 0], [1, 1], [3, 3]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-040"
+regex = '\b{start-half}'
+haystack = ""
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-050"
+regex = '\b{start-half}'
+haystack = "ab"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-060"
+regex = '\b{start-half}'
+haystack = "𝛃"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-060-bounds"
+regex = '\b{start-half}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-070"
+regex = '\b{start-half}'
+haystack = " 𝛃 "
+matches = [[0, 0], [1, 1], [6, 6]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-080"
+regex = '\b{start-half}'
+haystack = "𝛃𐆀"
+matches = [[0, 0], [8, 8]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-090"
+regex = '\b{start-half}'
+haystack = "𝛃b"
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-start-half-unicode-110"
+regex = '\b{start-half}'
+haystack = "b𝛃"
+matches = [[0, 0]]
+unicode = true
+
+# Tests for \b{end-half}
+
+[[test]]
+name = "word-end-half-unicode-010"
+regex = '\b{end-half}'
+haystack = "a"
+matches = [[1, 1]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-020"
+regex = '\b{end-half}'
+haystack = "a "
+matches = [[1, 1], [2, 2]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-030"
+regex = '\b{end-half}'
+haystack = " a "
+matches = [[0, 0], [2, 2], [3, 3]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-040"
+regex = '\b{end-half}'
+haystack = ""
+matches = [[0, 0]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-050"
+regex = '\b{end-half}'
+haystack = "ab"
+matches = [[2, 2]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-060"
+regex = '\b{end-half}'
+haystack = "𝛃"
+matches = [[4, 4]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-060-bounds"
+regex = '\b{end-half}'
+haystack = "𝛃"
+bounds = [2, 3]
+matches = []
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-070"
+regex = '\b{end-half}'
+haystack = " 𝛃 "
+matches = [[0, 0], [5, 5], [6, 6]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-080"
+regex = '\b{end-half}'
+haystack = "𝛃𐆀"
+matches = [[4, 4], [8, 8]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-090"
+regex = '\b{end-half}'
+haystack = "𝛃b"
+matches = [[5, 5]]
+unicode = true
+
+[[test]]
+name = "word-end-half-unicode-110"
+regex = '\b{end-half}'
+haystack = "b𝛃"
+matches = [[5, 5]]
+unicode = true
+
+# Specialty tests.
+
+# Since \r is special cased in the start state computation (to deal with CRLF
+# mode), this test ensures that the correct start state is computed when the
+# pattern starts with a half word boundary assertion.
+[[test]]
+name = "word-start-half-ascii-carriage"
+regex = '\b{start-half}[a-z]+'
+haystack = 'ABC\rabc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+
+# Since \n is also special cased in the start state computation, this test
+# ensures that the correct start state is computed when the pattern starts with
+# a half word boundary assertion.
+[[test]]
+name = "word-start-half-ascii-linefeed"
+regex = '\b{start-half}[a-z]+'
+haystack = 'ABC\nabc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+
+# Like the carriage return test above, but with a custom line terminator.
+[[test]]
+name = "word-start-half-ascii-customlineterm"
+regex = '\b{start-half}[a-z]+'
+haystack = 'ABC!abc'
+matches = [[4, 7]]
+bounds = [4, 7]
+unescape = true
+line-terminator = '!'
diff --git a/tests/lib.rs b/tests/lib.rs
index badd57455d..b3f69423d9 100644
--- a/tests/lib.rs
+++ b/tests/lib.rs
@@ -49,6 +49,7 @@ fn suite() -> anyhow::Result<regex_test::RegexTests> {
     load!("unicode");
     load!("utf8");
     load!("word-boundary");
+    load!("word-boundary-special");
     load!("fowler/basic");
     load!("fowler/nullsubexpr");
     load!("fowler/repetition");