diff --git a/.gitignore b/.gitignore
index fd3afa8a97..ece777a348 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,7 @@
 /Cargo.lock
 /regex_macros/target
 /regex_macros/Cargo.lock
+/regex_syntax/target
+/regex_syntax/Cargo.lock
+/bench-log
 .*.swp
diff --git a/Cargo.toml b/Cargo.toml
index 33bf5ead6c..734c4da39b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,8 +21,16 @@ path = "regex_macros/benches/bench_dynamic.rs"
 test = false
 bench = true
 
+[dependencies.regex-syntax]
+path = "regex_syntax"
+version = "*"
+
 [dev-dependencies]
 rand = "0.3"
 
 [features]
 pattern = []
+
+[profile.bench]
+opt-level = 3
+lto = true
diff --git a/regex_macros/src/lib.rs b/regex_macros/src/lib.rs
index 73e9705801..cd6c8d232d 100644
--- a/regex_macros/src/lib.rs
+++ b/regex_macros/src/lib.rs
@@ -36,10 +36,7 @@ use rustc::plugin::Registry;
 
 use regex::Regex;
 use regex::native::{
-    OneChar, CharClass, Any, Save, Jump, Split,
-    Match, EmptyBegin, EmptyEnd, EmptyWordBoundary,
-    Program, Dynamic, ExDynamic, Native,
-    FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, FLAG_NEGATED,
+    Inst, Program, Dynamic, ExDynamic, Native,
     simple_case_fold,
 };
 
@@ -79,7 +76,9 @@ fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
         // error is logged in 'parse' with cx.span_err
         None => return DummyResult::any(sp),
     };
-    let re = match Regex::new(&regex) {
+    // We use the largest possible size limit because this is happening at
+    // compile time. We trust the programmer.
+    let re = match Regex::with_size_limit(::std::usize::MAX, &regex) {
         Ok(re) => re,
         Err(err) => {
             cx.span_err(sp, &err.to_string());
@@ -121,11 +120,10 @@ impl<'a> NfaGen<'a> {
                 None => cx.expr_none(self.sp),
             }
         );
-        let prefix_anchor =
-            match self.prog.insts[1] {
-                EmptyBegin(flags) if flags & FLAG_MULTI == 0 => true,
-                _ => false,
-            };
+        let prefix_anchor = match self.prog.insts[1] {
+            Inst::StartText => true,
+            _ => false,
+        };
         let init_groups = self.vec_expr(0..num_cap_locs,
                                         &mut |cx, _| cx.expr_none(self.sp));
 
@@ -338,49 +336,55 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
         let arms = self.prog.insts.iter().enumerate().map(|(pc, inst)| {
             let nextpc = pc + 1;
             let body = match *inst {
-                EmptyBegin(flags) => {
-                    let cond =
-                        if flags & FLAG_MULTI > 0 {
-                            quote_expr!(self.cx,
-                                self.chars.is_begin()
-                                || self.chars.prev == Some('\n')
-                            )
-                        } else {
-                            quote_expr!(self.cx, self.chars.is_begin())
-                        };
+                Inst::StartLine => {
                     quote_expr!(self.cx, {
                         nlist.add_empty($pc);
-                        if $cond { self.add(nlist, $nextpc, &mut *groups) }
+                        if self.chars.is_begin() || self.chars.prev == Some('\n') {
+                            self.add(nlist, $nextpc, &mut *groups)
+                        }
                     })
                 }
-                EmptyEnd(flags) => {
-                    let cond =
-                        if flags & FLAG_MULTI > 0 {
-                            quote_expr!(self.cx,
-                                self.chars.is_end()
-                                || self.chars.cur == Some('\n')
-                            )
-                        } else {
-                            quote_expr!(self.cx, self.chars.is_end())
-                        };
+                Inst::StartText => {
                     quote_expr!(self.cx, {
                         nlist.add_empty($pc);
-                        if $cond { self.add(nlist, $nextpc, &mut *groups) }
+                        if self.chars.is_begin() {
+                            self.add(nlist, $nextpc, &mut *groups)
+                        }
                     })
                 }
-                EmptyWordBoundary(flags) => {
-                    let cond =
-                        if flags & FLAG_NEGATED > 0 {
-                            quote_expr!(self.cx, !self.chars.is_word_boundary())
-                        } else {
-                            quote_expr!(self.cx, self.chars.is_word_boundary())
-                        };
+                Inst::EndLine => {
+                    quote_expr!(self.cx, {
+                        nlist.add_empty($pc);
+                        if self.chars.is_end() || self.chars.cur == Some('\n') {
+                            self.add(nlist, $nextpc, &mut *groups)
+                        }
+                    })
+                }
+                Inst::EndText => {
+                    quote_expr!(self.cx, {
+                        nlist.add_empty($pc);
+                        if self.chars.is_end() {
+                            self.add(nlist, $nextpc, &mut *groups)
+                        }
+                    })
+                }
+                Inst::WordBoundary => {
                     quote_expr!(self.cx, {
                         nlist.add_empty($pc);
-                        if $cond { self.add(nlist, $nextpc, &mut *groups) }
+                        if self.chars.is_word_boundary() {
+                            self.add(nlist, $nextpc, &mut *groups)
+                        }
+                    })
+                }
+                Inst::NotWordBoundary => {
+                    quote_expr!(self.cx, {
+                        nlist.add_empty($pc);
+                        if !self.chars.is_word_boundary() {
+                            self.add(nlist, $nextpc, &mut *groups)
+                        }
                     })
                 }
-                Save(slot) => {
+                Inst::Save(slot) => {
                     let save = quote_expr!(self.cx, {
                         let old = groups[$slot];
                         groups[$slot] = Some(self.ic);
@@ -411,20 +415,20 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
                         })
                     }
                 }
-                Jump(to) => {
+                Inst::Jump(to) => {
                     quote_expr!(self.cx, {
                         nlist.add_empty($pc);
                         self.add(nlist, $to, &mut *groups);
                     })
                 }
-                Split(x, y) => {
+                Inst::Split(x, y) => {
                     quote_expr!(self.cx, {
                         nlist.add_empty($pc);
                         self.add(nlist, $x, &mut *groups);
                         self.add(nlist, $y, &mut *groups);
                     })
                 }
-                // For Match, OneChar, CharClass, Any
+                // For Match, OneChar, CharClass, Any, AnyNoNL
                 _ => quote_expr!(self.cx, nlist.add($pc, &*groups)),
             };
             self.arm_inst(pc, body)
@@ -439,7 +443,7 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
         let arms = self.prog.insts.iter().enumerate().map(|(pc, inst)| {
             let nextpc = pc + 1;
             let body = match *inst {
-                Match => {
+                Inst::Match => {
                     quote_expr!(self.cx, {
                         match self.which {
                             Exists => {
@@ -459,8 +463,8 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
                         }
                     })
                 }
-                OneChar(c, flags) => {
-                    if flags & FLAG_NOCASE > 0 {
+                Inst::OneChar { c, casei } => {
+                    if casei {
                         let upc = simple_case_fold(c);
                         quote_expr!(self.cx, {
                             let upc = self.chars.prev.map(simple_case_fold);
@@ -476,45 +480,37 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
                         })
                     }
                 }
-                CharClass(ref ranges, flags) => {
-                    let negate = flags & FLAG_NEGATED > 0;
-                    let casei = flags & FLAG_NOCASE > 0;
+                Inst::CharClass(ref cls) => {
+                    let ranges: Vec<(char, char)> =
+                        cls.iter().map(|r| (r.start, r.end)).collect();
+                    let mranges = self.match_class(&ranges);
                     let get_char =
-                        if casei {
+                        if cls.is_case_insensitive() {
                             quote_expr!(
                                 self.cx,
                                 simple_case_fold(self.chars.prev.unwrap()))
                         } else {
                             quote_expr!(self.cx, self.chars.prev.unwrap())
                         };
-                    let negcond =
-                        if negate {
-                            quote_expr!(self.cx, !found)
-                        } else {
-                            quote_expr!(self.cx, found)
-                        };
-                    let mranges = self.match_class(&ranges);
                     quote_expr!(self.cx, {
                         if self.chars.prev.is_some() {
                             let c = $get_char;
-                            let found = $mranges;
-                            if $negcond {
+                            if $mranges {
                                 self.add(nlist, $nextpc, caps);
                             }
                         }
                     })
                 }
-                Any(flags) => {
-                    if flags & FLAG_DOTNL > 0 {
-                        quote_expr!(self.cx, self.add(nlist, $nextpc, caps))
-                    } else {
-                        quote_expr!(self.cx, {
-                            if self.chars.prev != Some('\n') {
-                                self.add(nlist, $nextpc, caps)
-                            }
-                            ()
-                        })
-                    }
+                Inst::Any => {
+                    quote_expr!(self.cx, self.add(nlist, $nextpc, caps))
+                }
+                Inst::AnyNoNL => {
+                    quote_expr!(self.cx, {
+                        if self.chars.prev != Some('\n') {
+                            self.add(nlist, $nextpc, caps);
+                        }
+                        ()
+                    })
                 }
                 // EmptyBegin, EmptyEnd, EmptyWordBoundary, Save, Jump, Split
                 _ => self.empty_block(),
diff --git a/regex_macros/tests/tests.rs b/regex_macros/tests/tests.rs
index 11670ac8f1..ab1db14a0b 100644
--- a/regex_macros/tests/tests.rs
+++ b/regex_macros/tests/tests.rs
@@ -203,6 +203,8 @@ replace!(rep_named, replace_all,
          "w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3");
 replace!(rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t  trim me\t   \t",
          "", "trim me");
+replace!(rep_number_hypen, replace, r"(.)(.)", "ab", "$1-$2", "a-b");
+replace!(rep_number_underscore, replace, r"(.)(.)", "ab", "$1_$2", "a_b");
 
 macro_rules! noparse(
     ($name:ident, $re:expr) => (
@@ -219,7 +221,6 @@ macro_rules! noparse(
 
 noparse!(fail_double_repeat, "a**");
 noparse!(fail_no_repeat_arg, "*");
-noparse!(fail_no_repeat_arg_begin, "^*");
 noparse!(fail_incomplete_escape, "\\");
 noparse!(fail_class_incomplete, "[A-");
 noparse!(fail_class_not_closed, "[A");
@@ -235,8 +236,7 @@ noparse!(fail_bad_capture_name, "(?P<na-me>)");
 noparse!(fail_bad_flag, "(?a)a");
 noparse!(fail_empty_alt_before, "|a");
 noparse!(fail_empty_alt_after, "a|");
-noparse!(fail_counted_big_exact, "a{1001}");
-noparse!(fail_counted_big_min, "a{1001,}");
+noparse!(fail_too_big, "a{10000000}");
 noparse!(fail_counted_no_close, "a{1001");
 noparse!(fail_unfinished_cap, "(?");
 noparse!(fail_unfinished_escape, "\\");
diff --git a/regex_syntax/Cargo.toml b/regex_syntax/Cargo.toml
new file mode 100644
index 0000000000..48231a41ec
--- /dev/null
+++ b/regex_syntax/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "regex-syntax"
+version = "0.1.0"
+authors = ["The Rust Project Developers"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/rust-lang/regex"
+documentation = "http://doc.rust-lang.org/regex"
+homepage = "https://github.com/rust-lang/regex"
+description = "A regular expression parser (RE2 only)."
+
+[dev-dependencies]
+quickcheck = "*"
+rand = "*"
diff --git a/regex_syntax/src/lib.rs b/regex_syntax/src/lib.rs
new file mode 100644
index 0000000000..95eed3f5c3
--- /dev/null
+++ b/regex_syntax/src/lib.rs
@@ -0,0 +1,1162 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+This crate provides a regular expression parser and an abstract syntax for
+regular expressions. The abstract syntax is defined by the `Expr` type. The
+concrete syntax is enumerated in the
+[`regex`](../regex/index.html#syntax)
+crate documentation.
+
+Note that since this crate is first and foremost an implementation detail for
+the `regex` crate, it may experience more frequent breaking changes. It is
+exposed as a separate crate so that others may use it to do analysis on regular
+expressions or even build their own matching engine.
+
+# Example: parsing an expression
+
+Parsing a regular expression can be done with the `Expr::parse` function.
+
+```rust
+use regex_syntax::Expr;
+
+assert_eq!(Expr::parse(r"ab|yz").unwrap(), Expr::Alternate(vec![
+    Expr::Literal { chars: vec!['a', 'b'], casei: false },
+    Expr::Literal { chars: vec!['y', 'z'], casei: false },
+]));
+```
+
+# Example: inspecting an error
+
+The parser in this crate provides very detailed error values. For example,
+if an invalid character class range is given:
+
+```rust
+use regex_syntax::{Expr, ErrorKind};
+
+let err = Expr::parse(r"[z-a]").unwrap_err();
+assert_eq!(err.position(), 4);
+assert_eq!(err.kind(), &ErrorKind::InvalidClassRange {
+    start: 'z',
+    end: 'a',
+});
+```
+
+Or unbalanced parentheses:
+
+```rust
+use regex_syntax::{Expr, ErrorKind};
+
+let err = Expr::parse(r"ab(cd").unwrap_err();
+assert_eq!(err.position(), 2);
+assert_eq!(err.kind(), &ErrorKind::UnclosedParen);
+```
+*/
+
+#![deny(missing_docs)]
+
+#[cfg(test)] extern crate quickcheck;
+#[cfg(test)] extern crate rand;
+
+mod parser;
+mod unicode;
+
+use std::char;
+use std::cmp::{Ordering, max, min};
+use std::fmt;
+use std::iter::IntoIterator;
+use std::ops::Deref;
+use std::slice;
+use std::vec;
+
+use unicode::case_folding;
+
+use self::Expr::*;
+use self::Repeater::*;
+
+pub use parser::is_punct;
+
+/// A regular expression abstract syntax tree.
+///
+/// An `Expr` represents the abstract syntax of a regular expression.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum Expr {
+    /// An empty regex (which never matches any text).
+    Empty,
+    /// A sequence of one or more literal characters to be matched.
+    Literal {
+        /// The characters.
+        chars: Vec<char>,
+        /// Whether to match case insensitively.
+        casei: bool,
+    },
+    /// Match any character, excluding new line.
+    AnyChar,
+    /// Match any character.
+    AnyCharNoNL,
+    /// A character class.
+    Class(CharClass),
+    /// Match the start of a line or beginning of input.
+    StartLine,
+    /// Match the end of a line or end of input.
+    EndLine,
+    /// Match the beginning of input.
+    StartText,
+    /// Match the end of input.
+    EndText,
+    /// Match a word boundary (word character on one side and a non-word
+    /// character on the other).
+    WordBoundary,
+    /// Match a position that is not a word boundary (word or non-word
+    /// characters on both sides).
+    NotWordBoundary,
+    /// A group, possibly non-capturing.
+    Group {
+        /// The expression inside the group.
+        e: Box<Expr>,
+        /// The capture index (starting at `1`) only for capturing groups.
+        i: Option<usize>,
+        /// The capture name, only for capturing named groups.
+        name: Option<String>,
+    },
+    /// A repeat operator (`?`, `*`, `+` or `{m,n}`).
+    Repeat {
+        /// The expression to be repeated. Limited to literals, `.`, classes
+        /// or grouped expressions.
+        e: Box<Expr>,
+        /// The type of repeat operator used.
+        r: Repeater,
+        /// Whether the repeat is greedy (match the most) or not (match the
+        /// least).
+        greedy: bool,
+    },
+    /// A concatenation of expressions. Must be matched one after the other.
+    ///
+    /// N.B. A concat expression can only appear at the top-level or
+    /// immediately inside a group expression.
+    Concat(Vec<Expr>),
+    /// An alternation of expressions. Only one must match.
+    ///
+    /// N.B. An alternate expression can only appear at the top-level or
+    /// immediately inside a group expression.
+    Alternate(Vec<Expr>),
+}
+
+type CaptureIndex = Option<usize>;
+
+type CaptureName = Option<String>;
+
+/// The type of a repeat operator expression.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Repeater {
+    /// Match zero or one (`?`).
+    ZeroOrOne,
+    /// Match zero or more (`*`).
+    ZeroOrMore,
+    /// Match one or more (`+`).
+    OneOrMore,
+    /// Match for at least `min` and at most `max` (`{m,n}`).
+    ///
+    /// When `max` is `None`, there is no upper bound on the number of matches.
+    Range {
+        /// Lower bound on the number of matches.
+        min: u32,
+        /// Optional upper bound on the number of matches.
+        max: Option<u32>,
+    },
+}
+
+/// A character class.
+///
+/// A character class has a canonical format that the parser guarantees. Its
+/// canonical format is defined by the following invariants:
+///
+/// 1. Given any Unicode scalar value, it is matched by *at most* one character
+///    range in a canonical character class.
+/// 2. Every adjacent character range is separated by at least one Unicode
+///    scalar value.
+/// 3. Given any pair of character ranges `r1` and `r2`, if
+///    `r1.end < r2.start`, then `r1` comes before `r2` in a canonical
+///    character class.
+///
+/// In sum, any `CharClass` produced by this crate's parser is a sorted
+/// sequence of non-overlapping ranges. This makes it possible to test whether
+/// a character is matched by a class with a binary search.
+///
+/// Additionally, a character class may be marked *case insensitive*. If it's
+/// case insensitive, then:
+///
+/// 1. Simple case folding has been applied to all ranges.
+/// 2. Simple case folding must be applied to a character before testing
+///    whether it matches the character class.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CharClass {
+    ranges: Vec<ClassRange>,
+    casei: bool,
+}
+
+/// A single inclusive range in a character class.
+///
+/// Since range boundaries are defined by Unicode scalar values, the boundaries
+/// can never be in the open interval `(0xD7FF, 0xE000)`. However, a range may
+/// *cover* codepoints that are not scalar values.
+///
+/// Note that this has a few convenient impls on `PartialEq` and `PartialOrd`
+/// for testing whether a character is contained inside a given range.
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
+pub struct ClassRange {
+    /// The start character of the range.
+    ///
+    /// This must be less than or equal to `end`.
+    pub start: char,
+
+    /// The end character of the range.
+    ///
+    /// This must be greater than or equal to `end`.
+    pub end: char,
+}
+
+impl Expr {
+    /// Parses a string in a regular expression syntax tree.
+    pub fn parse(s: &str) -> Result<Expr> {
+        parser::Parser::parse(s).map(|e| e.simplify())
+    }
+
+    /// Returns true iff the expression can be repeated by a quantifier.
+    fn can_repeat(&self) -> bool {
+        match *self {
+            Literal{..}
+            | AnyChar
+            | AnyCharNoNL
+            | Class(_)
+            | StartLine | EndLine | StartText | EndText
+            | WordBoundary | NotWordBoundary
+            | Group{..}
+            => true,
+            _ => false,
+        }
+    }
+
+    fn simplify(self) -> Expr {
+        fn combine_literals(es: &mut Vec<Expr>, e: Expr) {
+            match (es.pop(), e) {
+                (None, e) => es.push(e),
+                (Some(Literal { chars: mut chars1, casei: casei1 }),
+                 Literal { chars: chars2, casei: casei2 }) => {
+                    if casei1 == casei2 {
+                        chars1.extend(chars2);
+                        es.push(Literal { chars: chars1, casei: casei1 });
+                    } else {
+                        es.push(Literal { chars: chars1, casei: casei1 });
+                        es.push(Literal { chars: chars2, casei: casei2 });
+                    }
+                }
+                (Some(e1), e2) => {
+                    es.push(e1);
+                    es.push(e2);
+                }
+            }
+        }
+        match self {
+            Repeat { e, r, greedy } => Repeat {
+                e: Box::new(e.simplify()),
+                r: r,
+                greedy: greedy,
+            },
+            Group { e, i, name } => {
+                let e = e.simplify();
+                if i.is_none() && name.is_none() && e.can_repeat() {
+                    e
+                } else {
+                    Group { e: Box::new(e), i: i, name: name }
+                }
+            }
+            Concat(es) => {
+                let mut new_es = Vec::with_capacity(es.len());
+                for e in es {
+                    combine_literals(&mut new_es, e.simplify());
+                }
+                if new_es.len() == 1 {
+                    new_es.pop().unwrap()
+                } else {
+                    Concat(new_es)
+                }
+            }
+            Alternate(es) => Alternate(es.into_iter()
+                                         .map(|e| e.simplify())
+                                         .collect()),
+            e => e,
+        }
+    }
+}
+
+impl Deref for CharClass {
+    type Target = Vec<ClassRange>;
+    fn deref(&self) -> &Vec<ClassRange> { &self.ranges }
+}
+
+impl IntoIterator for CharClass {
+    type Item = ClassRange;
+    type IntoIter = vec::IntoIter<ClassRange>;
+    fn into_iter(self) -> vec::IntoIter<ClassRange> { self.ranges.into_iter() }
+}
+
+impl<'a> IntoIterator for &'a CharClass {
+    type Item = &'a ClassRange;
+    type IntoIter = slice::Iter<'a, ClassRange>;
+    fn into_iter(self) -> slice::Iter<'a, ClassRange> { self.iter() }
+}
+
+impl CharClass {
+    /// Create a new class from an existing set of ranges.
+    fn new(ranges: Vec<ClassRange>) -> CharClass {
+        CharClass { ranges: ranges, casei: false }
+    }
+
+    /// Create an empty class.
+    fn empty() -> CharClass {
+        CharClass::new(Vec::new())
+    }
+
+    /// Returns true if `c` is matched by this character class.
+    ///
+    /// If this character class is case insensitive, then simple case folding
+    /// is applied to `c` before checking for a match.
+    pub fn matches(&self, mut c: char) -> bool {
+        if self.is_case_insensitive() {
+            c = simple_case_fold(c)
+        }
+        self.binary_search_by(|range| c.partial_cmp(range).unwrap()).is_ok()
+    }
+
+    /// Returns true if this character class should be matched case
+    /// insensitively.
+    ///
+    /// When `true`, simple case folding has already been applied to the
+    /// class.
+    pub fn is_case_insensitive(&self) -> bool {
+        self.casei
+    }
+
+    /// Create a new empty class from this one.
+    ///
+    /// Namely, its capacity and case insensitive setting will be the same.
+    fn to_empty(&self) -> CharClass {
+        CharClass { ranges: Vec::with_capacity(self.len()), casei: self.casei }
+    }
+
+    /// Merge two classes and canonicalize them.
+    #[cfg(test)]
+    fn merge(mut self, other: CharClass) -> CharClass {
+        self.ranges.extend(other);
+        self.canonicalize()
+    }
+
+    /// Canonicalze any sequence of ranges.
+    ///
+    /// This is responsible for enforcing the canonical format invariants
+    /// as described on the docs for the `CharClass` type.
+    fn canonicalize(mut self) -> CharClass {
+        // TODO: Save some cycles here by checking if already canonicalized.
+        self.ranges.sort();
+        let mut ordered = self.to_empty(); // TODO: Do this in place?
+        for candidate in self {
+            // If the candidate overlaps with an existing range, then it must
+            // be the most recent range added because we process the candidates
+            // in order.
+            if let Some(or) = ordered.ranges.last_mut() {
+                if or.overlapping(candidate) {
+                    *or = or.merge(candidate);
+                    continue;
+                }
+            }
+            ordered.ranges.push(candidate);
+        }
+        ordered
+    }
+
+    /// Negates the character class.
+    ///
+    /// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
+    /// if and only if `c` does not match `self.negate()`.
+    ///
+    /// Note that this cannot be called on a character class that has had
+    /// case folding applied to it. (Because case folding turns on a flag
+    /// and doesn't store every possible matching character. Therefore,
+    /// its negation is tricky to get right. Turns out, we don't need it
+    /// anyway!)
+    fn negate(mut self) -> CharClass {
+        fn range(s: char, e: char) -> ClassRange { ClassRange::new(s, e) }
+
+        // Never allow negating of a class that has been case folded!
+        assert!(!self.casei);
+
+        if self.is_empty() { return self; }
+        self = self.canonicalize();
+        let mut inv = self.to_empty();
+        if self[0].start > '\x00' {
+            inv.ranges.push(range('\x00', dec_char(self[0].start)));
+        }
+        for win in self.windows(2) {
+            inv.ranges.push(range(inc_char(win[0].end),
+                                  dec_char(win[1].start)));
+        }
+        if self[self.len() - 1].end < char::MAX {
+            inv.ranges.push(range(inc_char(self[self.len() - 1].end),
+                                  char::MAX));
+        }
+        inv
+    }
+
+    /// Apply case folding to this character class.
+    ///
+    /// One a class had been case folded, it cannot be negated.
+    fn case_fold(self) -> CharClass {
+        let mut folded = self.to_empty();
+        folded.casei = true;
+        for r in self {
+            // Applying case folding to a range is expensive because *every*
+            // character needed to be examined. Thus, we avoid that drudgery
+            // if no character in the current range is in our case folding
+            // table.
+            if r.needs_case_folding() {
+                folded.ranges.extend(r.case_fold());
+            } else {
+                folded.ranges.push(r);
+            }
+        }
+        folded.canonicalize()
+    }
+}
+
+impl ClassRange {
+    /// Create a new class range.
+    ///
+    /// If `end < start`, then the two values are swapped so that
+    /// the invariant `start <= end` is preserved.
+    fn new(start: char, end: char) -> ClassRange {
+        if start <= end {
+            ClassRange { start: start, end: end }
+        } else {
+            ClassRange { start: end, end: start }
+        }
+    }
+
+    /// Create a range of one character.
+    fn one(c: char) -> ClassRange {
+        ClassRange { start: c, end: c }
+    }
+
+    /// Returns true if and only if the two ranges are overlapping. Note that
+    /// since ranges are inclusive, `a-c` and `d-f` are overlapping!
+    fn overlapping(self, other: ClassRange) -> bool {
+        max(self.start, other.start) <= inc_char(min(self.end, other.end))
+    }
+
+    /// Creates a new range representing the union of `self` and `other.
+    fn merge(self, other: ClassRange) -> ClassRange {
+        ClassRange {
+            start: min(self.start, other.start),
+            end: max(self.end, other.end),
+        }
+    }
+
+    /// Returns true if and only if this range contains a character that is
+    /// in the case folding table.
+    fn needs_case_folding(self) -> bool {
+        case_folding::C_plus_S_table
+        .binary_search_by(|&(c, _)| self.partial_cmp(&c).unwrap()).is_ok()
+    }
+
+    /// Apply case folding to this range.
+    ///
+    /// Since case folding might add characters such that the range is no
+    /// longer contiguous, this returns multiple class ranges. They are in
+    /// canonical order.
+    fn case_fold(self) -> Vec<ClassRange> {
+        let (s, e) = (self.start as u32, self.end as u32 + 1);
+        let folded = (s..e).filter_map(char::from_u32).map(simple_case_fold);
+        ClassRange::ranges(folded)
+    }
+
+    /// Turns a non-empty sequence of sorted characters into a sequence of
+    /// class ranges in canonical format/order.
+    fn ranges<I: Iterator<Item=char>>(mut chars: I) -> Vec<ClassRange> {
+        let mut ranges = Vec::with_capacity(100);
+        let mut start = chars.next().expect("non-empty char iterator");
+        let mut end = start;
+        for c in chars {
+            if c != inc_char(end) {
+                ranges.push(ClassRange::new(start, end));
+                start = c;
+            }
+            end = c;
+        }
+        ranges.push(ClassRange::new(start, end));
+        ranges
+    }
+}
+
+impl PartialEq<char> for ClassRange {
+    #[inline]
+    fn eq(&self, other: &char) -> bool {
+        self.start <= *other && *other <= self.end
+    }
+}
+
+impl PartialEq<ClassRange> for char {
+    #[inline]
+    fn eq(&self, other: &ClassRange) -> bool {
+        other.eq(self)
+    }
+}
+
+impl PartialOrd<char> for ClassRange {
+    #[inline]
+    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
+        Some(if self == other {
+            Ordering::Equal
+        } else if *other > self.end {
+            Ordering::Greater
+        } else {
+            Ordering::Less
+        })
+    }
+}
+
+impl PartialOrd<ClassRange> for char {
+    #[inline]
+    fn partial_cmp(&self, other: &ClassRange) -> Option<Ordering> {
+        other.partial_cmp(self).map(|o| o.reverse())
+    }
+}
+
+/// This implementation of `Display` will write a regular expression from the
+/// syntax tree. It does not write the original string parsed.
+// TODO(burntsushi): Write tests for the regex writer.
+impl fmt::Display for Expr {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Empty => write!(f, ""),
+            Literal { ref chars, casei } => {
+                if casei { try!(write!(f, "(?i:")); }
+                for &c in chars {
+                    try!(write!(f, "{}", quote_char(c)));
+                }
+                if casei { try!(write!(f, ")")); }
+                Ok(())
+            }
+            AnyChar => write!(f, "(?s:.)"),
+            AnyCharNoNL => write!(f, "."),
+            Class(ref cls) => write!(f, "{}", cls),
+            StartLine => write!(f, "(?m:^)"),
+            EndLine => write!(f, "(?m:$)"),
+            StartText => write!(f, r"^"),
+            EndText => write!(f, r"$"),
+            WordBoundary => write!(f, r"\b"),
+            NotWordBoundary => write!(f, r"\B"),
+            Group { ref e, i: None, name: None } => write!(f, "(?:{})", e),
+            Group { ref e, name: None, .. } => write!(f, "({})", e),
+            Group { ref e, name: Some(ref n), .. } => {
+                write!(f, "(?P<{}>{})", n, e)
+            }
+            Repeat { ref e, r, greedy } => {
+                match &**e {
+                    &Literal { ref chars, .. } if chars.len() > 1 => {
+                        try!(write!(f, "(?:{}){}", e, r))
+                    }
+                    _ => try!(write!(f, "{}{}", e, r)),
+                }
+                if !greedy { try!(write!(f, "?")); }
+                Ok(())
+            }
+            Concat(ref es) => {
+                for e in es {
+                    try!(write!(f, "{}", e));
+                }
+                Ok(())
+            }
+            Alternate(ref es) => {
+                for (i, e) in es.iter().enumerate() {
+                    if i > 0 { try!(write!(f, "|")); }
+                    try!(write!(f, "{}", e));
+                }
+                Ok(())
+            }
+        }
+    }
+}
+
+impl fmt::Display for Repeater {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            ZeroOrOne => write!(f, "?"),
+            ZeroOrMore => write!(f, "*"),
+            OneOrMore => write!(f, "+"),
+            Range { min: s, max: None } => write!(f, "{{{},}}", s),
+            Range { min: s, max: Some(e) } if s == e => write!(f, "{{{}}}", s),
+            Range { min: s, max: Some(e) } => write!(f, "{{{}, {}}}", s, e),
+        }
+    }
+}
+
+impl fmt::Display for CharClass {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.casei {
+            try!(write!(f, "(?i:"));
+        }
+        try!(write!(f, "["));
+        for range in self.iter() {
+            try!(write!(f, "{}", range));
+        }
+        try!(write!(f, "]"));
+        if self.casei {
+            try!(write!(f, ")"));
+        }
+        Ok(())
+    }
+}
+
+impl fmt::Display for ClassRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}-{}", quote_char(self.start), quote_char(self.end))
+    }
+}
+
+/// An alias for computations that can return a `Error`.
+pub type Result<T> = ::std::result::Result<T, Error>;
+
+/// A parse error.
+///
+/// This includes details about the specific type of error and a rough
+/// approximation of where it occurred.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Error {
+    pos: usize,
+    surround: String,
+    kind: ErrorKind,
+}
+
+/// The specific type of parse error that can occur.
+#[derive(Clone, Debug, PartialEq)]
+pub enum ErrorKind {
+    /// A negation symbol is used twice in flag settings.
+    /// e.g., `(?-i-s)`.
+    DoubleFlagNegation,
+    /// The same capture name was used more than once.
+    /// e.g., `(?P<a>.)(?P<a>.)`.
+    DuplicateCaptureName(String),
+    /// An alternate is empty. e.g., `(|a)`.
+    EmptyAlternate,
+    /// A capture group name is empty. e.g., `(?P<>a)`.
+    EmptyCaptureName,
+    /// A negation symbol was not proceded by any flags. e.g., `(?i-)`.
+    EmptyFlagNegation,
+    /// A group is empty. e.g., `()`.
+    EmptyGroup,
+    /// An invalid number was used in a counted repetition. e.g., `a{b}`.
+    InvalidBase10(String),
+    /// An invalid hexadecimal number was used in an escape sequence.
+    /// e.g., `\xAG`.
+    InvalidBase16(String),
+    /// An invalid capture name was used. e.g., `(?P<0a>b)`.
+    InvalidCaptureName(String),
+    /// An invalid class range was givien. Specifically, when the start of the
+    /// range is greater than the end. e.g., `[z-a]`.
+    InvalidClassRange {
+        /// The first character specified in the range.
+        start: char,
+        /// The second character specified in the range.
+        end: char,
+    },
+    /// An escape sequence was used in a character class where it is not
+    /// allowed. e.g., `[a-\pN]` or `[\A]`.
+    InvalidClassEscape(Expr),
+    /// An invalid counted repetition min/max was given. e.g., `a{2,1}`.
+    InvalidRepeatRange {
+        /// The first number specified in the repetition.
+        min: u32,
+        /// The second number specified in the repetition.
+        max: u32,
+    },
+    /// An invalid Unicode scalar value was used in a long hexadecimal
+    /// sequence. e.g., `\x{D800}`.
+    InvalidScalarValue(u32),
+    /// An empty counted repetition operator. e.g., `a{}`.
+    MissingBase10,
+    /// A repetition operator was not applied to an expression. e.g., `*`.
+    RepeaterExpectsExpr,
+    /// A repetition operator was applied to an expression that cannot be
+    /// repeated. e.g., `a+*` or `a|*`.
+    RepeaterUnexpectedExpr(Expr),
+    /// A capture group name that is never closed. e.g., `(?P<a`.
+    UnclosedCaptureName(String),
+    /// An unclosed hexadecimal literal. e.g., `\x{a`.
+    UnclosedHex,
+    /// An unclosed parenthesis. e.g., `(a`.
+    UnclosedParen,
+    /// An unclosed counted repetition operator. e.g., `a{2`.
+    UnclosedRepeat,
+    /// An unclosed named Unicode class. e.g., `\p{Yi`.
+    UnclosedUnicodeName,
+    /// Saw end of regex before class was closed. e.g., `[a`.
+    UnexpectedClassEof,
+    /// Saw end of regex before escape sequence was closed. e.g., `\`.
+    UnexpectedEscapeEof,
+    /// Saw end of regex before flags were closed. e.g., `(?i`.
+    UnexpectedFlagEof,
+    /// Saw end of regex before two hexadecimal digits were seen. e.g., `\xA`.
+    UnexpectedTwoDigitHexEof,
+    /// Unopened parenthesis. e.g., `)`.
+    UnopenedParen,
+    /// Unrecognized escape sequence. e.g., `\q`.
+    UnrecognizedEscape(char),
+    /// Unrecognized flag. e.g., `(?a)`.
+    UnrecognizedFlag(char),
+    /// Unrecognized named Unicode class. e.g., `\p{Foo}`.
+    UnrecognizedUnicodeClass(String),
+}
+
+impl Error {
+    /// Returns an approximate *character* offset at which the error occurred.
+    ///
+    /// The character offset may be equal to the number of characters in the
+    /// string, in which case it should be interpreted as pointing to the end
+    /// of the regex.
+    pub fn position(&self) -> usize {
+        self.pos
+    }
+
+    /// Returns the type of the regex parse error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+}
+
+impl ErrorKind {
+    fn description(&self) -> &str {
+        use ErrorKind::*;
+        match *self {
+            DoubleFlagNegation => "double flag negation",
+            DuplicateCaptureName(_) => "duplicate capture name",
+            EmptyAlternate => "empty alternate",
+            EmptyCaptureName => "empty capture name",
+            EmptyFlagNegation => "flag negation without any flags",
+            EmptyGroup => "empty group (e.g., '()')",
+            InvalidBase10(_) => "invalid base 10 number",
+            InvalidBase16(_) => "invalid base 16 number",
+            InvalidCaptureName(_) => "invalid capture name",
+            InvalidClassRange{..} => "invalid character class range",
+            InvalidClassEscape(_) => "invalid escape sequence in class",
+            InvalidRepeatRange{..} => "invalid counted repetition range",
+            InvalidScalarValue(_) => "invalid Unicode scalar value",
+            MissingBase10 => "missing count in repetition operator",
+            RepeaterExpectsExpr => "repetition operator missing expression",
+            RepeaterUnexpectedExpr(_) => "expression cannot be repeated",
+            UnclosedCaptureName(_) => "unclosed capture group name",
+            UnclosedHex => "unclosed hexadecimal literal",
+            UnclosedParen => "unclosed parenthesis",
+            UnclosedRepeat => "unclosed counted repetition operator",
+            UnclosedUnicodeName => "unclosed Unicode class literal",
+            UnexpectedClassEof => "unexpected EOF in character class",
+            UnexpectedEscapeEof => "unexpected EOF in escape sequence",
+            UnexpectedFlagEof => "unexpected EOF in flags",
+            UnexpectedTwoDigitHexEof => "unexpected EOF in hex literal",
+            UnopenedParen => "unopened parenthesis",
+            UnrecognizedEscape(_) => "unrecognized escape sequence",
+            UnrecognizedFlag(_) => "unrecognized flag",
+            UnrecognizedUnicodeClass(_) => "unrecognized Unicode class name",
+        }
+    }
+}
+
+impl ::std::error::Error for Error {
+    fn description(&self) -> &str {
+        self.kind.description()
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Error parsing regex near '{}' at character offset {}: {}",
+               self.surround, self.pos, self.kind)
+    }
+}
+
+impl fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use ErrorKind::*;
+        match *self {
+            DoubleFlagNegation =>
+                write!(f, "Only one negation symbol is allowed in flags."),
+            DuplicateCaptureName(ref s) =>
+                write!(f, "Capture name '{}' is used more than once.", s),
+            EmptyAlternate =>
+                write!(f, "Alternations cannot be empty."),
+            EmptyCaptureName =>
+                write!(f, "Capture names cannot be empty."),
+            EmptyFlagNegation =>
+                write!(f, "Flag negation requires setting at least one flag."),
+            EmptyGroup =>
+                write!(f, "Empty regex groups (e.g., '()') are not allowed."),
+            InvalidBase10(ref s) =>
+                write!(f, "Not a valid base 10 number: '{}'", s),
+            InvalidBase16(ref s) =>
+                write!(f, "Not a valid base 16 number: '{}'", s),
+            InvalidCaptureName(ref s) =>
+                write!(f, "Invalid capture name: '{}'. Capture names must \
+                           consist of [_a-zA-Z0-9] and are not allowed to \
+                           start with with a number.", s),
+            InvalidClassRange { start, end } =>
+                write!(f, "Invalid character class range '{}-{}'. \
+                           Character class ranges must start with the smaller \
+                           character, but {} > {}", start, end, start, end),
+            InvalidClassEscape(ref e) =>
+                write!(f, "Invalid escape sequence in character \
+                           class: '{}'.", e),
+            InvalidRepeatRange { min, max } =>
+                write!(f, "Invalid counted repetition range: {{{}, {}}}. \
+                           Counted repetition ranges must start with the \
+                           minimum, but {} > {}", min, max, min, max),
+            InvalidScalarValue(c) =>
+                write!(f, "Number does not correspond to a Unicode scalar \
+                           value: '{}'.", c),
+            MissingBase10 =>
+                write!(f, "Missing maximum in counted reptition operator."),
+            RepeaterExpectsExpr =>
+                write!(f, "Missing expression for reptition operator."),
+            RepeaterUnexpectedExpr(ref e) =>
+                write!(f, "Invalid application of reptition operator to: \
+                          '{}'.", e),
+            UnclosedCaptureName(ref s) =>
+                write!(f, "Capture name group for '{}' is not closed. \
+                           (Missing a '>'.)", s),
+            UnclosedHex =>
+                write!(f, "Unclosed hexadecimal literal (missing a '}}')."),
+            UnclosedParen =>
+                write!(f, "Unclosed parenthesis."),
+            UnclosedRepeat =>
+                write!(f, "Unclosed counted repetition (missing a '}}')."),
+            UnclosedUnicodeName =>
+                write!(f, "Unclosed Unicode literal (missing a '}}')."),
+            UnexpectedClassEof =>
+                write!(f, "Character class was not closed before the end of \
+                           the regex (missing a ']')."),
+            UnexpectedEscapeEof =>
+                write!(f, "Started an escape sequence that didn't finish \
+                           before the end of the regex."),
+            UnexpectedFlagEof =>
+                write!(f, "Inline flag settings was not closed before the end \
+                           of the regex (missing a ')' or ':')."),
+            UnexpectedTwoDigitHexEof =>
+                write!(f, "Unexpected end of two digit hexadecimal literal."),
+            UnopenedParen =>
+                write!(f, "Unopened parenthesis."),
+            UnrecognizedEscape(c) =>
+                write!(f, "Unrecognized escape sequence: '\\{}'.", c),
+            UnrecognizedFlag(c) =>
+                write!(f, "Unrecognized flag: '{}'. \
+                           (Allowed flags: i, s, m, U, x.)", c),
+            UnrecognizedUnicodeClass(ref s) =>
+                write!(f, "Unrecognized Unicode class name: '{}'.", s),
+        }
+    }
+}
+
+/// Returns the Unicode *simple* case folding of `c`.
+///
+/// N.B. This is hidden because it really isn't the responsibility of this
+/// crate to do simple case folding. One hopes that either another crate or
+/// the standard library will be able to do this for us. In any case, we still
+/// expose it because it is used inside the various Regex engines.
+#[doc(hidden)]
+pub fn simple_case_fold(c: char) -> char {
+    match case_folding::C_plus_S_table.binary_search_by(|&(x, _)| x.cmp(&c)) {
+        Ok(i) => case_folding::C_plus_S_table[i].1,
+        Err(_) => c,
+    }
+}
+
+/// Escapes all regular expression meta characters in `text`.
+///
+/// The string returned may be safely used as a literal in a regular
+/// expression.
+pub fn quote(text: &str) -> String {
+    let mut quoted = String::with_capacity(text.len());
+    for c in text.chars() {
+        if parser::is_punct(c) {
+            quoted.push('\\');
+        }
+        quoted.push(c);
+    }
+    quoted
+}
+
+fn quote_char(c: char) -> String {
+    let mut s = String::new();
+    if parser::is_punct(c) {
+        s.push('\\');
+    }
+    s.push(c);
+    s
+}
+
+fn inc_char(c: char) -> char {
+    match c {
+        char::MAX => char::MAX,
+        '\u{D7FF}' => '\u{E000}',
+        c => char::from_u32(c as u32 + 1).unwrap(),
+    }
+}
+
+fn dec_char(c: char) -> char {
+    match c {
+        '\x00' => '\x00',
+        '\u{E000}' => '\u{D7FF}',
+        c => char::from_u32(c as u32 - 1).unwrap(),
+    }
+}
+
+/// Returns true if and only if `c` is a word character.
+#[doc(hidden)]
+pub fn is_word_char(c: char) -> bool {
+    match c {
+        '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z'  => true,
+        _ => ::unicode::regex::PERLW.binary_search_by(|&(start, end)| {
+            if c >= start && c <= end {
+                Ordering::Equal
+            } else if start > c {
+                Ordering::Greater
+            } else {
+                Ordering::Less
+            }
+        }).is_ok(),
+    }
+}
+
+#[cfg(test)]
+mod properties;
+
+#[cfg(test)]
+mod tests {
+    use {CharClass, ClassRange};
+
+    fn class(ranges: &[(char, char)]) -> CharClass {
+        let ranges = ranges.iter().cloned()
+                           .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
+        CharClass::new(ranges)
+    }
+
+    fn classi(ranges: &[(char, char)]) -> CharClass {
+        let mut cls = class(ranges);
+        cls.casei = true;
+        cls
+    }
+
+    #[test]
+    fn class_canon_no_change() {
+        let cls = class(&[('a', 'c'), ('x', 'z')]);
+        assert_eq!(cls.clone().canonicalize(), cls);
+    }
+
+    #[test]
+    fn class_canon_unordered() {
+        let cls = class(&[('x', 'z'), ('a', 'c')]);
+        assert_eq!(cls.canonicalize(), class(&[
+            ('a', 'c'), ('x', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_overlap() {
+        let cls = class(&[('x', 'z'), ('w', 'y')]);
+        assert_eq!(cls.canonicalize(), class(&[
+            ('w', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_overlap_many() {
+        let cls = class(&[
+            ('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
+            ('m', 'p'), ('l', 's'),
+        ]);
+        assert_eq!(cls.clone().canonicalize(), class(&[
+            ('a', 'j'), ('l', 's'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_overlap_many_case_fold() {
+        let cls = class(&[
+            ('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
+            ('M', 'P'), ('L', 'S'), ('c', 'f'),
+        ]);
+        assert_eq!(cls.case_fold(), classi(&[
+            ('a', 'j'), ('l', 's'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_overlap_boundary() {
+        let cls = class(&[('x', 'z'), ('u', 'w')]);
+        assert_eq!(cls.canonicalize(), class(&[
+            ('u', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_extreme_edge_case() {
+        let cls = class(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
+        assert_eq!(cls.canonicalize(), class(&[
+            ('\x00', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_singles() {
+        let cls = class(&[('a', 'a'), ('b', 'b')]);
+        assert_eq!(cls.canonicalize(), class(&[('a', 'b')]));
+    }
+
+    #[test]
+    fn class_negate_single() {
+        let cls = class(&[('a', 'a')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_singles() {
+        let cls = class(&[('a', 'a'), ('b', 'b')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\x60'), ('\x63', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_multiples() {
+        let cls = class(&[('a', 'c'), ('x', 'z')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\x60'), ('\x64', '\x77'), ('\x7b', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_min_scalar() {
+        let cls = class(&[('\x00', 'a')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x62', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_max_scalar() {
+        let cls = class(&[('a', '\u{10FFFF}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\x60'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_everything() {
+        let cls = class(&[('\x00', '\u{10FFFF}')]);
+        assert_eq!(cls.negate(), class(&[]));
+    }
+
+    #[test]
+    fn class_negate_everything_sans_one() {
+        let cls = class(&[
+            ('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')
+        ]);
+        assert_eq!(cls.negate(), class(&[
+            ('\u{10FFFE}', '\u{10FFFE}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_surrogates_min() {
+        let cls = class(&[('\x00', '\u{D7FF}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\u{E000}', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_surrogates_min_edge() {
+        let cls = class(&[('\x00', '\u{D7FE}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\u{D7FF}', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_surrogates_max() {
+        let cls = class(&[('\u{E000}', '\u{10FFFF}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\u{D7FF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_surrogates_max_edge() {
+        let cls = class(&[('\u{E001}', '\u{10FFFF}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\u{E000}'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_retain_only_needed() {
+        let cls = class(&[('A', 'Z'), ('a', 'z')]);
+        assert_eq!(cls.case_fold(), classi(&[
+            ('a', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_az() {
+        let cls = class(&[('A', 'Z')]);
+        assert_eq!(cls.case_fold(), classi(&[
+            ('a', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_a_underscore() {
+        let cls = class(&[('A', 'A'), ('_', '_')]);
+        assert_eq!(cls.clone().canonicalize(), class(&[
+            ('A', 'A'), ('_', '_'),
+        ]));
+        assert_eq!(cls.case_fold(), classi(&[
+            ('_', '_'), ('a', 'a'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_a_equals() {
+        let cls = class(&[('A', 'A'), ('=', '=')]);
+        assert_eq!(cls.clone().canonicalize(), class(&[
+            ('=', '='), ('A', 'A'),
+        ]));
+        assert_eq!(cls.case_fold(), classi(&[
+            ('=', '='), ('a', 'a'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_no_folding_needed() {
+        let cls = class(&[('\x00', '\x10')]);
+        assert_eq!(cls.case_fold(), classi(&[
+            ('\x00', '\x10'),
+        ]));
+    }
+}
diff --git a/regex_syntax/src/parser.rs b/regex_syntax/src/parser.rs
new file mode 100644
index 0000000000..25020cc752
--- /dev/null
+++ b/regex_syntax/src/parser.rs
@@ -0,0 +1,2298 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::cmp::{max, min};
+
+use unicode::regex::UNICODE_CLASSES;
+
+use {
+    Expr, Repeater, CharClass, ClassRange, CaptureIndex, CaptureName,
+    Error, ErrorKind, Result,
+};
+
+/// Parser state.
+///
+/// Keeps the entire input in memory and maintains a cursor (char offset).
+///
+/// It also keeps an expression stack, which is responsible for managing
+/// grouped expressions and flag state.
+#[derive(Debug)]
+pub struct Parser {
+    chars: Vec<char>,
+    chari: usize,
+    stack: Vec<Build>,
+    caps: usize,
+    names: Vec<String>, // to check for duplicates
+    flags: Flags,
+}
+
+/// An empheral type for representing the expression stack.
+///
+/// Everything on the stack is either a regular expression or a marker
+/// indicating the opening of a group (possibly non-capturing). The opening
+/// of a group copies the current flag state, which is reset on the parser
+/// state once the group closes.
+#[derive(Debug)]
+enum Build {
+    Expr(Expr),
+    LeftParen {
+        i: CaptureIndex,
+        name: CaptureName,
+        chari: usize,
+        old_flags: Flags,
+    },
+}
+
+/// Flag state.
+#[derive(Clone, Copy, Debug)]
+struct Flags {
+    casei: bool,
+    multi: bool,
+    dotnl: bool,
+    swap_greed: bool,
+    ignore_space: bool,
+}
+
+// Primary expression parsing routines.
+impl Parser {
+    pub fn parse(s: &str) -> Result<Expr> {
+        Parser {
+            chars: s.chars().collect(),
+            chari: 0,
+            stack: vec![],
+            caps: 0,
+            names: vec![],
+            flags: Flags {
+                casei: false,
+                multi: false,
+                dotnl: false,
+                swap_greed: false,
+                ignore_space: false,
+            },
+        }.parse_expr()
+    }
+
+    // Top-level expression parser.
+    //
+    // Starts at the beginning of the input and consumes until either the end
+    // of input or an error.
+    fn parse_expr(mut self) -> Result<Expr> {
+        while !self.eof() {
+            let build_expr = match self.cur() {
+                '\\' => try!(self.parse_escape()),
+                '|' => { let e = try!(self.alternate()); self.bump(); e }
+                '?' => try!(self.parse_simple_repeat(Repeater::ZeroOrOne)),
+                '*' => try!(self.parse_simple_repeat(Repeater::ZeroOrMore)),
+                '+' => try!(self.parse_simple_repeat(Repeater::OneOrMore)),
+                '{' => try!(self.parse_counted_repeat()),
+                '[' => match self.maybe_parse_ascii() {
+                    None => try!(self.parse_class()),
+                    Some(cls) => Build::Expr(Expr::Class(cls)),
+                },
+                '^' => {
+                    if self.flags.multi {
+                        self.parse_one(Expr::StartLine)
+                    } else {
+                        self.parse_one(Expr::StartText)
+                    }
+                }
+                '$' => {
+                    if self.flags.multi {
+                        self.parse_one(Expr::EndLine)
+                    } else {
+                        self.parse_one(Expr::EndText)
+                    }
+                }
+                '.' => {
+                    if self.flags.dotnl {
+                        self.parse_one(Expr::AnyChar)
+                    } else {
+                        self.parse_one(Expr::AnyCharNoNL)
+                    }
+                }
+                '(' => try!(self.parse_group()),
+                ')' => {
+                    let (old_flags, e) = try!(self.close_paren());
+                    self.bump();
+                    self.flags = old_flags;
+                    e
+                }
+                _ => Build::Expr(Expr::Literal {
+                    chars: vec![self.bump()],
+                    casei: self.flags.casei,
+                }),
+            };
+            if !build_expr.is_empty() {
+                let build_expr = self.maybe_class_case_fold(build_expr);
+                self.stack.push(build_expr);
+            }
+        }
+        self.finish_concat()
+    }
+
+    // Parses an escape sequence, e.g., \Ax
+    //
+    // Start: `\`
+    // End:   `x`
+    fn parse_escape(&mut self) -> Result<Build> {
+        self.bump();
+        if self.eof() {
+            return Err(self.err(ErrorKind::UnexpectedEscapeEof));
+        }
+        let c = self.cur();
+        if is_punct(c) {
+            return Ok(Build::Expr(Expr::Literal {
+                chars: vec![self.bump()],
+                casei: self.flags.casei,
+            }));
+        }
+
+        fn lit(c: char) -> Build {
+            Build::Expr(Expr::Literal { chars: vec![c], casei: false })
+        }
+        match c {
+            'a' => { self.bump(); Ok(lit('\x07')) }
+            'f' => { self.bump(); Ok(lit('\x0C')) }
+            't' => { self.bump(); Ok(lit('\t')) }
+            'n' => { self.bump(); Ok(lit('\n')) }
+            'r' => { self.bump(); Ok(lit('\r')) }
+            'v' => { self.bump(); Ok(lit('\x0B')) }
+            'A' => { self.bump(); Ok(Build::Expr(Expr::StartText)) }
+            'z' => { self.bump(); Ok(Build::Expr(Expr::EndText)) }
+            'b' => { self.bump(); Ok(Build::Expr(Expr::WordBoundary)) }
+            'B' => { self.bump(); Ok(Build::Expr(Expr::NotWordBoundary)) }
+            '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => self.parse_octal(),
+            'x' => { self.bump(); self.parse_hex() }
+            'p'|'P' => {
+                self.bump();
+                self.parse_unicode_class(c == 'P')
+                    .map(|cls| Build::Expr(Expr::Class(cls)))
+            }
+            'd'|'s'|'w'|'D'|'S'|'W' => {
+                self.bump();
+                Ok(Build::Expr(Expr::Class(self.parse_perl_class(c))))
+            }
+            c => Err(self.err(ErrorKind::UnrecognizedEscape(c))),
+        }
+    }
+
+    // Parses a group, e.g., `(abc)`.
+    //
+    // Start: `(`
+    // End:   `a`
+    //
+    // A more interesting example, `(?P<foo>abc)`.
+    //
+    // Start: `(`
+    // End:   `a`
+    fn parse_group(&mut self) -> Result<Build> {
+        let chari = self.chari;
+        let mut name: CaptureName = None;
+        self.bump();
+        if self.bump_if("?P<") {
+            let n = try!(self.parse_group_name());
+            if self.names.iter().any(|n2| n2 == &n) {
+                return Err(self.err(ErrorKind::DuplicateCaptureName(n)));
+            }
+            self.names.push(n.clone());
+            name = Some(n);
+        } else if self.bump_if("?") {
+            // This can never be capturing. It's either setting flags for
+            // the current group, or it's opening a non-capturing group or
+            // it's opening a group with a specific set of flags (which is
+            // also non-capturing).
+            // Anything else is an error.
+            return self.parse_group_flags(chari);
+        }
+        self.caps = checkadd(self.caps, 1);
+        Ok(Build::LeftParen {
+            i: Some(self.caps),
+            name: name,
+            chari: chari,
+            old_flags: self.flags, // no flags changed if we're here
+        })
+    }
+
+    // Parses flags (inline or grouped), e.g., `(?s-i:abc)`.
+    //
+    // Start: `s`
+    // End:   `a`
+    //
+    // Another example, `(?s-i)a`.
+    //
+    // Start: `s`
+    // End:   `a`
+    fn parse_group_flags(&mut self, opening_chari: usize) -> Result<Build> {
+        let old_flags = self.flags;
+        let mut sign = true;
+        let mut saw_flag = false;
+        loop {
+            if self.eof() {
+                // e.g., (?i
+                return Err(self.err(ErrorKind::UnexpectedFlagEof));
+            }
+            match self.cur() {
+                'i' => { self.flags.casei = sign; saw_flag = true }
+                'm' => { self.flags.multi = sign; saw_flag = true }
+                's' => { self.flags.dotnl = sign; saw_flag = true }
+                'U' => { self.flags.swap_greed = sign; saw_flag = true }
+                'x' => { self.flags.ignore_space = sign; saw_flag = true }
+                '-' => {
+                    if !sign {
+                        // e.g., (?-i-s)
+                        return Err(self.err(ErrorKind::DoubleFlagNegation));
+                    }
+                    sign = false;
+                    saw_flag = false;
+                }
+                ')' => {
+                    if !saw_flag {
+                        // e.g., (?)
+                        return Err(self.err(ErrorKind::EmptyFlagNegation));
+                    }
+                    // At this point, we're just changing the flags inside
+                    // the current group, which means the old flags have
+                    // been saved elsewhere. Our modifications in place are
+                    // okey dokey!
+                    //
+                    // This particular flag expression only has a stateful
+                    // impact on a regex's AST, so nothing gets explicitly
+                    // added.
+                    self.bump();
+                    return Ok(Build::Expr(Expr::Empty));
+                }
+                ':' => {
+                    if !sign && !saw_flag {
+                        // e.g., (?i-:a)
+                        // Note that if there's no negation, it's OK not
+                        // to see flag, because you end up with a regular
+                        // non-capturing group: `(?:a)`.
+                        return Err(self.err(ErrorKind::EmptyFlagNegation));
+                    }
+                    self.bump();
+                    return Ok(Build::LeftParen {
+                        i: None,
+                        name: None,
+                        chari: opening_chari,
+                        old_flags: old_flags,
+                    });
+                }
+                // e.g., (?z:a)
+                c => return Err(self.err(ErrorKind::UnrecognizedFlag(c))),
+            }
+            self.bump();
+        }
+    }
+
+    // Parses a group name, e.g., `foo` in `(?P<foo>abc)`.
+    //
+    // Start: `f`
+    // End:   `a`
+    fn parse_group_name(&mut self) -> Result<String> {
+        let mut name = String::new();
+        while !self.eof() && !self.peek_is('>') {
+            name.push(self.bump());
+        }
+        if self.eof() {
+            // e.g., (?P<a
+            return Err(self.err(ErrorKind::UnclosedCaptureName(name)));
+        }
+        let all_valid = name.chars().all(is_valid_capture_char);
+        match name.chars().next() {
+            // e.g., (?P<>a)
+            None => Err(self.err(ErrorKind::EmptyCaptureName)),
+            Some(c) if (c >= '0' && c <= '9') || !all_valid => {
+                // e.g., (?P<a#>x)
+                // e.g., (?P<1a>x)
+                Err(self.err(ErrorKind::InvalidCaptureName(name)))
+            }
+            _ => {
+                self.bump(); // for `>`
+                Ok(name)
+            }
+        }
+    }
+
+    // Parses a counted repeition operator, e.g., `a{2,4}?z`.
+    //
+    // Start: `{`
+    // End:   `z`
+    fn parse_counted_repeat(&mut self) -> Result<Build> {
+        let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr)); // e.g., ({5}
+        if !e.can_repeat() {
+            // e.g., a*{5}
+            return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
+        }
+        self.bump();
+        let min = try!(self.parse_decimal(|c| c != ',' && c != '}'));
+        let mut max_opt = Some(min);
+        if self.bump_if(',') {
+            if self.peek_is('}') {
+                max_opt = None;
+            } else {
+                let max = try!(self.parse_decimal(|c| c != '}'));
+                if min > max {
+                    // e.g., a{2,1}
+                    return Err(self.err(ErrorKind::InvalidRepeatRange {
+                        min: min,
+                        max: max,
+                    }));
+                }
+                max_opt = Some(max);
+            }
+        }
+        if !self.bump_if('}') {
+            Err(self.err(ErrorKind::UnclosedRepeat))
+        } else {
+            Ok(Build::Expr(Expr::Repeat {
+                e: Box::new(e),
+                r: Repeater::Range { min: min, max: max_opt },
+                greedy: !self.bump_if('?') ^ self.flags.swap_greed,
+            }))
+        }
+    }
+
+    // Parses a simple repetition operator, e.g., `a+?z`.
+    //
+    // Start: `+`
+    // End:   `z`
+    //
+    // N.B. "simple" in this context means "not min/max repetition",
+    // e.g., `a{1,2}`.
+    fn parse_simple_repeat(&mut self, rep: Repeater) -> Result<Build> {
+        let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr)); // e.g., (*
+        if !e.can_repeat() {
+            // e.g., a**
+            return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
+        }
+        self.bump();
+        Ok(Build::Expr(Expr::Repeat {
+            e: Box::new(e),
+            r: rep,
+            greedy: !self.bump_if('?') ^ self.flags.swap_greed,
+        }))
+    }
+
+    // Parses a decimal number until the given character, e.g., `a{123,456}`.
+    //
+    // Start: `1`
+    // End:   `,` (where `until == ','`)
+    fn parse_decimal<B: Bumpable>(&mut self, until: B) -> Result<u32> {
+        match self.bump_get(until) {
+            // e.g., a{}
+            None => Err(self.err(ErrorKind::MissingBase10)),
+            Some(n) => {
+                // e.g., a{xyz
+                // e.g., a{9999999999}
+                let n = n.trim();
+                u32::from_str_radix(n, 10)
+                    .map_err(|_| self.err(ErrorKind::InvalidBase10(n.into())))
+            }
+        }
+    }
+
+    // Parses an octal number, up to 3 digits, e.g., `a\123b`
+    //
+    // Start: `1`
+    // End:   `b`
+    fn parse_octal(&mut self) -> Result<Build> {
+        use std::char;
+        let mut i = 0; // counter for limiting octal to 3 digits.
+        let n = self.bump_get(|c| { i += 1; i <= 3 && c >= '0' && c <= '7' })
+                    .expect("octal string"); // guaranteed at least 1 digit
+        // I think both of the following unwraps are impossible to fail.
+        // We limit it to a three digit octal number, which maxes out at
+        // `0777` or `511` in decimal. Since all digits are in `0...7`, we'll
+        // always have a valid `u32` number. Moreover, since all numbers in
+        // the range `0...511` are valid Unicode scalar values, it will always
+        // be a valid `char`.
+        //
+        // Hence, we `unwrap` with reckless abandon.
+        let n = u32::from_str_radix(&n, 8).ok().expect("valid octal number");
+        Ok(Build::Expr(Expr::Literal {
+            chars: vec![char::from_u32(n).expect("Unicode scalar value")],
+            casei: self.flags.casei,
+        }))
+    }
+
+    // Parses a hex number, e.g., `a\x5ab`.
+    //
+    // Start: `5`
+    // End:   `b`
+    //
+    // And also, `a\x{2603}b`.
+    //
+    // Start: `{`
+    // End:   `b`
+    fn parse_hex(&mut self) -> Result<Build> {
+        if self.bump_if('{') {
+            self.parse_hex_many_digits()
+        } else {
+            self.parse_hex_two_digits()
+        }
+    }
+
+    // Parses a many-digit hex number, e.g., `a\x{2603}b`.
+    //
+    // Start: `2`
+    // End:   `b`
+    fn parse_hex_many_digits(&mut self) -> Result<Build> {
+        use std::char;
+
+        let s = self.bump_get(|c| c != '}').unwrap_or("".into());
+        let n = try!(u32::from_str_radix(&s, 16)
+                         .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
+        let c = try!(char::from_u32(n)
+                          .ok_or(self.err(ErrorKind::InvalidScalarValue(n))));
+        if !self.bump_if('}') {
+            // e.g., a\x{d
+            return Err(self.err(ErrorKind::UnclosedHex));
+        }
+        Ok(Build::Expr(Expr::Literal {
+            chars: vec![c],
+            casei: self.flags.casei,
+        }))
+    }
+
+    // Parses a two-digit hex number, e.g., `a\x5ab`.
+    //
+    // Start: `5`
+    // End:   `b`
+    fn parse_hex_two_digits(&mut self) -> Result<Build> {
+        use std::char;
+
+        let mut i = 0;
+        let s = self.bump_get(|_| { i += 1; i <= 2 }).unwrap_or("".into());
+        if s.len() < 2 {
+            // e.g., a\x
+            // e.g., a\xf
+            return Err(self.err(ErrorKind::UnexpectedTwoDigitHexEof));
+        }
+        let n = try!(u32::from_str_radix(&s, 16)
+                         .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
+        Ok(Build::Expr(Expr::Literal {
+            // Because 0...255 are all valid Unicode scalar values.
+            chars: vec![char::from_u32(n).expect("Unicode scalar value")],
+            casei: self.flags.casei,
+        }))
+    }
+
+    // Parses a character class, e.g., `[^a-zA-Z0-9]+`.
+    //
+    // Start: `[`
+    // End:   `+`
+    fn parse_class(&mut self) -> Result<Build> {
+        self.bump();
+        let negated = self.bump_if('^');
+        let mut class = CharClass::empty();
+        while self.bump_if('-') {
+            class.ranges.push(ClassRange::one('-'));
+        }
+        loop {
+            if self.eof() {
+                // e.g., [a
+                return Err(self.err(ErrorKind::UnexpectedClassEof));
+            }
+            match self.cur() {
+                // If no ranges have been added, then `]` is the first
+                // character (sans, perhaps, the `^` symbol), so it should
+                // be interpreted as a `]` instead of a closing class bracket.
+                ']' if class.len() > 0 => { self.bump(); break }
+                '[' => match self.maybe_parse_ascii() {
+                    Some(class2) => class.ranges.extend(class2),
+                    None => {
+                        self.bump();
+                        try!(self.parse_class_range(&mut class, '['))
+                    }
+                },
+                '\\' => match try!(self.parse_escape()) {
+                    Build::Expr(Expr::Class(class2)) => {
+                        class.ranges.extend(class2);
+                    }
+                    Build::Expr(Expr::Literal { chars, .. }) => {
+                        try!(self.parse_class_range(&mut class, chars[0]));
+                    }
+                    Build::Expr(e) => {
+                        let err = ErrorKind::InvalidClassEscape(e);
+                        return Err(self.err(err));
+                    }
+                    // Because `parse_escape` can never return `LeftParen`.
+                    _ => unreachable!(),
+                },
+                start => {
+                    self.bump();
+                    try!(self.parse_class_range(&mut class, start));
+                }
+            }
+        }
+        if negated {
+            class = class.negate();
+        }
+        Ok(Build::Expr(Expr::Class(class.canonicalize())))
+    }
+
+    // Parses a single range in a character class.
+    //
+    // Since this is a helper for `parse_class`, its signature sticks out.
+    // Namely, it requires the start character of the range and the char
+    // class to mutate.
+    //
+    // e.g., `[a-z]`
+    //
+    // Start: `-` (with start == `a`)
+    // End:   `]`
+    fn parse_class_range(&mut self, class: &mut CharClass, start: char)
+                        -> Result<()> {
+        if !self.bump_if('-') {
+            // Not a range, so just push a singleton range.
+            class.ranges.push(ClassRange::one(start));
+            return Ok(());
+        }
+        if self.eof() {
+            // e.g., [a-
+            return Err(self.err(ErrorKind::UnexpectedClassEof));
+        }
+        if self.peek_is(']') {
+            // This is the end of the class, so we permit use of `-` as a
+            // regular char (just like we do in the beginning).
+            class.ranges.push(ClassRange::one(start));
+            class.ranges.push(ClassRange::one('-'));
+            return Ok(());
+        }
+
+        // We have a real range. Just need to check to parse literal and
+        // make sure it's a valid range.
+        let end = match self.cur() {
+            '\\' => match try!(self.parse_escape()) {
+                Build::Expr(Expr::Literal { chars, .. }) => chars[0],
+                Build::Expr(e) => {
+                    return Err(self.err(ErrorKind::InvalidClassEscape(e)));
+                }
+                // Because `parse_escape` can never return `LeftParen`.
+                _ => unreachable!(),
+            },
+            _ => self.bump(),
+        };
+        if end < start {
+            // e.g., [z-a]
+            return Err(self.err(ErrorKind::InvalidClassRange {
+                start: start,
+                end: end,
+            }));
+        }
+        class.ranges.push(ClassRange::new(start, end));
+        Ok(())
+    }
+
+    // Parses an ASCII class, e.g., `[:alnum:]+`.
+    //
+    // Start: `[`
+    // End:   `+`
+    //
+    // Also supports negation, e.g., `[:^alnum:]`.
+    //
+    // This parsing routine is distinct from the others in that it doesn't
+    // actually report any errors. Namely, if it fails, then the parser should
+    // fall back to parsing a regular class.
+    //
+    // This method will only make progress in the parser if it succeeds.
+    // Otherwise, the input remains where it started.
+    fn maybe_parse_ascii(&mut self) -> Option<CharClass> {
+        fn parse(p: &mut Parser) -> Option<CharClass> {
+            p.bump(); // the `[`
+            if !p.bump_if(':') { return None; }
+            let negate = p.bump_if('^');
+            let name = match p.bump_get(|c| c != ':') {
+                None => return None,
+                Some(name) => name,
+            };
+            if !p.bump_if(":]") { return None; }
+            ascii_class(&name).map(|c| if !negate { c } else { c.negate() })
+        }
+        let start = self.chari;
+        match parse(self) {
+            None => { self.chari = start; None }
+            result => result,
+        }
+    }
+
+    // Parses a Uncode class name, e.g., `a\pLb`.
+    //
+    // Start: `L`
+    // End:   `b`
+    //
+    // And also, `a\p{Greek}b`.
+    //
+    // Start: `{`
+    // End:   `b`
+    //
+    // `negate` is true when the class name is used with `\P`.
+    fn parse_unicode_class(&mut self, neg: bool) -> Result<CharClass> {
+        let name =
+            if self.bump_if('{') {
+                let n = self.bump_get(|c| c != '}').unwrap_or("".into());
+                if n.is_empty() || !self.bump_if('}') {
+                    // e.g., \p{Greek
+                    return Err(self.err(ErrorKind::UnclosedUnicodeName));
+                }
+                n
+            } else {
+                if self.eof() {
+                    // e.g., \p
+                    return Err(self.err(ErrorKind::UnexpectedEscapeEof));
+                }
+                self.bump().to_string()
+            };
+        match unicode_class(&name) {
+            None => Err(self.err(ErrorKind::UnrecognizedUnicodeClass(name))),
+            Some(cls) => if neg { Ok(cls.negate()) } else { Ok(cls) },
+        }
+    }
+
+    // Parses a perl character class with Unicode support.
+    //
+    // `name` must be one of d, s, w, D, S, W. If not, this function panics.
+    //
+    // No parser state is changed.
+    fn parse_perl_class(&mut self, name: char) -> CharClass {
+        use unicode::regex::{PERLD, PERLS, PERLW};
+        match name {
+            'd' => raw_class_to_expr(PERLD),
+            'D' => raw_class_to_expr(PERLD).negate(),
+            's' => raw_class_to_expr(PERLS),
+            'S' => raw_class_to_expr(PERLS).negate(),
+            'w' => raw_class_to_expr(PERLW),
+            'W' => raw_class_to_expr(PERLW).negate(),
+            _ => unreachable!(),
+        }
+    }
+
+    // Always bump to the next input and return the given expression as a
+    // `Build`.
+    //
+    // This is mostly for convenience when the surrounding context implies
+    // that the next character corresponds to the given expression.
+    fn parse_one(&mut self, e: Expr) -> Build {
+        self.bump();
+        Build::Expr(e)
+    }
+}
+
+// Auxiliary helper methods.
+impl Parser {
+    fn chars(&self) -> Chars {
+        Chars::new(&self.chars[self.chari..], self.flags.ignore_space)
+    }
+
+    fn bump(&mut self) -> char {
+        let c = self.cur();
+        self.chari = checkadd(self.chari, self.chars().next_count());
+        c
+    }
+
+    fn cur(&self) -> char { self.chars().next().unwrap() }
+
+    fn eof(&self) -> bool { self.chars().next().is_none() }
+
+    fn bump_get<B: Bumpable>(&mut self, s: B) -> Option<String> {
+        let n = s.match_end(self);
+        if n == 0 {
+            None
+        } else {
+            let end = checkadd(self.chari, n);
+            let s = self.chars[self.chari..end]
+                        .iter().cloned().collect::<String>();
+            self.chari = end;
+            Some(s)
+        }
+    }
+
+    fn bump_if<B: Bumpable>(&mut self, s: B) -> bool {
+        let n = s.match_end(self);
+        if n == 0 {
+            false
+        } else {
+            self.chari = checkadd(self.chari, n);
+            true
+        }
+    }
+
+    fn peek_is<B: Bumpable>(&self, s: B) -> bool {
+        s.match_end(self) > 0
+    }
+
+    fn err(&self, kind: ErrorKind) -> Error {
+        self.errat(self.chari, kind)
+    }
+
+    fn errat(&self, pos: usize, kind: ErrorKind) -> Error {
+        Error { pos: pos, surround: self.windowat(pos), kind: kind }
+    }
+
+    fn windowat(&self, pos: usize) -> String {
+        let s = max(5, pos) - 5;
+        let e = min(self.chars.len(), checkadd(pos, 5));
+        self.chars[s..e].iter().cloned().collect()
+    }
+
+    fn pop(&mut self, expected: ErrorKind) -> Result<Expr> {
+        match self.stack.pop() {
+            None | Some(Build::LeftParen{..}) => Err(self.err(expected)),
+            Some(Build::Expr(e)) => Ok(e),
+        }
+    }
+
+    // If the current contexts calls for case insensitivity and if the expr
+    // given is a character class, do case folding on it and return the new
+    // class.
+    //
+    // Otherwise, return the expression unchanged.
+    fn maybe_class_case_fold(&mut self, bexpr: Build) -> Build {
+        match bexpr {
+            Build::Expr(Expr::Class(cls)) => {
+                Build::Expr(Expr::Class(
+                    if self.flags.casei && !cls.casei {
+                        cls.case_fold()
+                    } else {
+                        cls
+                    }
+                ))
+            }
+            bexpr => bexpr,
+        }
+    }
+}
+
+struct Chars<'a> {
+    chars: &'a [char],
+    cur: usize,
+    ignore_space: bool,
+    in_comment: bool,
+}
+
+impl<'a> Iterator for Chars<'a> {
+    type Item = char;
+    fn next(&mut self) -> Option<char> {
+        self.skip();
+        if self.cur < self.chars.len() {
+            let c = self.chars[self.cur];
+            self.cur = checkadd(self.cur, 1);
+            Some(c)
+        } else {
+            None
+        }
+    }
+}
+
+impl<'a> Chars<'a> {
+    fn new(chars: &[char], ignore_space: bool) -> Chars {
+        Chars {
+            chars: chars,
+            cur: 0,
+            ignore_space: ignore_space,
+            in_comment: false,
+        }
+    }
+
+    fn skip(&mut self) {
+        if !self.ignore_space { return; }
+        while self.cur < self.chars.len() {
+            if !self.in_comment && self.c() == '#' {
+                self.in_comment = true;
+            } else if self.in_comment && self.c() == '\n' {
+                self.in_comment = false;
+            }
+            if self.in_comment || self.c().is_whitespace() {
+                self.cur = checkadd(self.cur, 1);
+            } else {
+                break;
+            }
+        }
+    }
+
+    fn c(&self) -> char {
+        self.chars[self.cur]
+    }
+
+    fn next_count(&mut self) -> usize {
+        self.next();
+        self.cur
+    }
+}
+
+// Auxiliary methods for manipulating the expression stack.
+impl Parser {
+    // Called whenever an alternate (`|`) is found.
+    //
+    // This pops the expression stack until:
+    //
+    //  1. The stack is empty. Pushes an alternation with one arm.
+    //  2. An opening parenthesis is found. Leave the parenthesis
+    //     on the stack and push an alternation with one arm.
+    //  3. An alternate (`|`) is found. Pop the existing alternation,
+    //     add an arm and push the modified alternation.
+    //
+    // Each "arm" in the above corresponds to the concatenation of all
+    // popped expressions.
+    //
+    // In the first two cases, the stack is left in an invalid state
+    // because an alternation with one arm is not allowed. This
+    // particular state will be detected by `finish_concat` and an
+    // error will be reported.
+    //
+    // In none of the cases is an empty arm allowed. If an empty arm
+    // is found, an error is reported.
+    fn alternate(&mut self) -> Result<Build> {
+        let mut concat = vec![];
+        let alts = |es| Ok(Build::Expr(Expr::Alternate(es)));
+        loop {
+            match self.stack.pop() {
+                None => {
+                    if concat.is_empty() {
+                        // e.g., |a
+                        return Err(self.err(ErrorKind::EmptyAlternate));
+                    }
+                    return alts(vec![rev_concat(concat)]);
+                }
+                Some(e @ Build::LeftParen{..}) => {
+                    if concat.is_empty() {
+                        // e.g., (|a)
+                        return Err(self.err(ErrorKind::EmptyAlternate));
+                    }
+                    self.stack.push(e);
+                    return alts(vec![rev_concat(concat)]);
+                }
+                Some(Build::Expr(Expr::Alternate(mut es))) => {
+                    if concat.is_empty() {
+                        // e.g., a||
+                        return Err(self.err(ErrorKind::EmptyAlternate));
+                    }
+                    es.push(rev_concat(concat));
+                    return alts(es);
+                }
+                Some(Build::Expr(e)) => { concat.push(e); }
+            }
+        }
+    }
+
+    // Called whenever a closing parenthesis (`)`) is found.
+    //
+    // This pops the expression stack until:
+    //
+    //  1. The stack is empty. An error is reported because this
+    //     indicates an unopened parenthesis.
+    //  2. An opening parenthesis is found. Pop the opening parenthesis
+    //     and push a `Group` expression.
+    //  3. An alternate (`|`) is found. Pop the existing alternation
+    //     and an arm to it in place. Pop one more item from the stack.
+    //     If the stack was empty, then report an unopened parenthesis
+    //     error, otherwise assume it is an opening parenthesis and
+    //     push a `Group` expression with the popped alternation.
+    //     (We can assume this is an opening parenthesis because an
+    //     alternation either corresponds to the entire Regex or it
+    //     corresponds to an entire group. This is guaranteed by the
+    //     `alternate` method.)
+    //
+    // Each "arm" in the above corresponds to the concatenation of all
+    // popped expressions.
+    //
+    // Empty arms nor empty groups are allowed.
+    fn close_paren(&mut self) -> Result<(Flags, Build)> {
+        let mut concat = vec![];
+        loop {
+            match self.stack.pop() {
+                // e.g., )
+                None => return Err(self.err(ErrorKind::UnopenedParen)),
+                Some(Build::LeftParen { i, name, old_flags, .. }) => {
+                    if concat.is_empty() {
+                        // e.g., ()
+                        return Err(self.err(ErrorKind::EmptyGroup));
+                    }
+                    return Ok((old_flags, Build::Expr(Expr::Group {
+                        e: Box::new(rev_concat(concat)),
+                        i: i,
+                        name: name,
+                    })));
+                }
+                Some(Build::Expr(Expr::Alternate(mut es))) => {
+                    if concat.is_empty() {
+                        // e.g., (a|)
+                        return Err(self.err(ErrorKind::EmptyAlternate));
+                    }
+                    es.push(rev_concat(concat));
+                    match self.stack.pop() {
+                        // e.g., a|b)
+                        None => return Err(self.err(ErrorKind::UnopenedParen)),
+                        Some(Build::Expr(_)) => unreachable!(),
+                        Some(Build::LeftParen { i, name, old_flags, .. }) => {
+                            return Ok((old_flags, Build::Expr(Expr::Group {
+                                e: Box::new(Expr::Alternate(es)),
+                                i: i,
+                                name: name,
+                            })));
+                        }
+                    }
+                }
+                Some(Build::Expr(e)) => { concat.push(e); }
+            }
+        }
+    }
+
+    // Called only when the parser reaches the end of input.
+    //
+    // This pops the expression stack until:
+    //
+    //  1. The stack is empty. Return concatenation of popped
+    //     expressions. This concatenation may be empty!
+    //  2. An alternation is found. Pop the alternation and push
+    //     a new arm. Return the alternation as the entire Regex.
+    //
+    // If an opening parenthesis is popped, then an error is
+    // returned since it indicates an unclosed parenthesis.
+    fn finish_concat(&mut self) -> Result<Expr> {
+        let mut concat = vec![];
+        loop {
+            match self.stack.pop() {
+                None => { return Ok(rev_concat(concat)); }
+                Some(Build::LeftParen{ chari, ..}) => {
+                    // e.g., a(b
+                    return Err(self.errat(chari, ErrorKind::UnclosedParen));
+                }
+                Some(Build::Expr(Expr::Alternate(mut es))) => {
+                    if concat.is_empty() {
+                        // e.g., a|
+                        return Err(self.err(ErrorKind::EmptyAlternate));
+                    }
+                    es.push(rev_concat(concat));
+                    return Ok(Expr::Alternate(es));
+                }
+                Some(Build::Expr(e)) => { concat.push(e); }
+            }
+        }
+    }
+}
+
+impl Build {
+    fn is_empty(&self) -> bool {
+        match *self {
+            Build::Expr(Expr::Empty) => true,
+            _ => false,
+        }
+    }
+}
+
+// Make it ergonomic to conditionally bump the parser.
+// i.e., `bump_if('a')` or `bump_if("abc")`.
+trait Bumpable {
+    fn match_end(self, p: &Parser) -> usize;
+}
+
+impl Bumpable for char {
+    fn match_end(self, p: &Parser) -> usize {
+        let mut chars = p.chars();
+        if chars.next().map(|c| c == self).unwrap_or(false) {
+            chars.cur
+        } else {
+            0
+        }
+    }
+}
+
+impl<'a> Bumpable for &'a str {
+    fn match_end(self, p: &Parser) -> usize {
+        let mut search = self.chars();
+        let mut rest = p.chars();
+        let mut count = 0;
+        loop {
+            match (rest.next(), search.next()) {
+                (Some(c1), Some(c2)) if c1 == c2 => count = rest.cur,
+                (_, None) => return count,
+                _ => return 0,
+            }
+        }
+    }
+}
+
+impl<F: FnMut(char) -> bool> Bumpable for F {
+    fn match_end(mut self, p: &Parser) -> usize {
+        let mut chars = p.chars();
+        let mut count = 0;
+        while let Some(c) = chars.next() {
+            if !self(c) {
+                break
+            }
+            count = chars.cur;
+        }
+        count
+    }
+}
+
+// Turn a sequence of expressions into a concatenation.
+// This only uses `Concat` if there are 2 or more expressions.
+fn rev_concat(mut exprs: Vec<Expr>) -> Expr {
+    if exprs.len() == 0 {
+        Expr::Empty
+    } else if exprs.len() == 1 {
+        exprs.pop().unwrap()
+    } else {
+        exprs.reverse();
+        Expr::Concat(exprs)
+    }
+}
+
+// Returns ture iff the given character is allowed in a capture name.
+// Note that the first char of a capture name must not be numeric.
+fn is_valid_capture_char(c: char) -> bool {
+    c == '_' || (c >= '0' && c <= '9')
+    || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+}
+
+/// Returns true iff the give character has significance in a regex.
+#[doc(hidden)]
+pub fn is_punct(c: char) -> bool {
+    match c {
+        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
+        '[' | ']' | '{' | '}' | '^' | '$' => true,
+        _ => false,
+    }
+}
+
+fn checkadd(x: usize, y: usize) -> usize {
+    x.checked_add(y).expect("regex length overflow")
+}
+
+fn unicode_class(name: &str) -> Option<CharClass> {
+    UNICODE_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
+        raw_class_to_expr(UNICODE_CLASSES[i].1)
+    })
+}
+
+fn ascii_class(name: &str) -> Option<CharClass> {
+    ASCII_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
+        raw_class_to_expr(ASCII_CLASSES[i].1)
+    })
+}
+
+fn raw_class_to_expr(raw: &[(char, char)]) -> CharClass {
+    let range = |&(s, e)| ClassRange { start: s, end: e };
+    CharClass::new(raw.iter().map(range).collect())
+}
+
+type Class = &'static [(char, char)];
+type NamedClasses = &'static [(&'static str, Class)];
+
+const ASCII_CLASSES: NamedClasses = &[
+    // Classes must be in alphabetical order so that bsearch works.
+    // [:alnum:]      alphanumeric (== [0-9A-Za-z])
+    // [:alpha:]      alphabetic (== [A-Za-z])
+    // [:ascii:]      ASCII (== [\x00-\x7F])
+    // [:blank:]      blank (== [\t ])
+    // [:cntrl:]      control (== [\x00-\x1F\x7F])
+    // [:digit:]      digits (== [0-9])
+    // [:graph:]      graphical (== [!-~])
+    // [:lower:]      lower case (== [a-z])
+    // [:print:]      printable (== [ -~] == [ [:graph:]])
+    // [:punct:]      punctuation (== [!-/:-@[-`{-~])
+    // [:space:]      whitespace (== [\t\n\v\f\r ])
+    // [:upper:]      upper case (== [A-Z])
+    // [:word:]       word characters (== [0-9A-Za-z_])
+    // [:xdigit:]     hex digit (== [0-9A-Fa-f])
+    // Taken from: http://golang.org/pkg/regex/syntax/
+    ("alnum", &ALNUM),
+    ("alpha", &ALPHA),
+    ("ascii", &ASCII),
+    ("blank", &BLANK),
+    ("cntrl", &CNTRL),
+    ("digit", &DIGIT),
+    ("graph", &GRAPH),
+    ("lower", &LOWER),
+    ("print", &PRINT),
+    ("punct", &PUNCT),
+    ("space", &SPACE),
+    ("upper", &UPPER),
+    ("word", &WORD),
+    ("xdigit", &XDIGIT),
+];
+
+const ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
+const ALPHA: Class = &[('A', 'Z'), ('a', 'z')];
+const ASCII: Class = &[('\x00', '\x7F')];
+const BLANK: Class = &[(' ', ' '), ('\t', '\t')];
+const CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
+const DIGIT: Class = &[('0', '9')];
+const GRAPH: Class = &[('!', '~')];
+const LOWER: Class = &[('a', 'z')];
+const PRINT: Class = &[(' ', '~')];
+const PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
+const SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'),
+                       ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')];
+const UPPER: Class = &[('A', 'Z')];
+const WORD: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z'), ('_', '_')];
+const XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];
+
+#[cfg(test)]
+mod tests {
+    use { CharClass, ClassRange, Expr, Repeater, ErrorKind };
+    use unicode::regex::{PERLD, PERLS, PERLW};
+    use super::Parser;
+    use super::{LOWER, UPPER};
+
+    static YI: &'static [(char, char)] = &[
+        ('\u{a000}', '\u{a48c}'), ('\u{a490}', '\u{a4c6}'),
+    ];
+
+    fn p(s: &str) -> Expr { Parser::parse(s).unwrap() }
+    fn lit(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: false } }
+    fn liti(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: true } }
+    fn b<T>(v: T) -> Box<T> { Box::new(v) }
+    fn c(es: &[Expr]) -> Expr { Expr::Concat(es.to_vec()) }
+
+    fn class(ranges: &[(char, char)]) -> CharClass {
+        let ranges = ranges.iter().cloned()
+                           .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
+        CharClass::new(ranges)
+    }
+
+    fn classes(classes: &[&[(char, char)]]) -> CharClass {
+        let mut cls = CharClass::empty();
+        for &ranges in classes {
+            cls.ranges.extend(class(ranges));
+        }
+        cls.canonicalize()
+    }
+
+    #[test]
+    fn empty() {
+        assert_eq!(p(""), Expr::Empty);
+    }
+
+    #[test]
+    fn literal() {
+        assert_eq!(p("a"), lit('a'));
+    }
+
+    #[test]
+    fn literal_string() {
+        assert_eq!(p("ab"), Expr::Concat(vec![lit('a'), lit('b')]));
+    }
+
+    #[test]
+    fn start_literal() {
+        assert_eq!(p("^a"), Expr::Concat(vec![
+            Expr::StartText,
+            Expr::Literal { chars: vec!['a'], casei: false },
+        ]));
+    }
+
+    #[test]
+    fn repeat_zero_or_one_greedy() {
+        assert_eq!(p("a?"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::ZeroOrOne,
+            greedy: true,
+        });
+    }
+
+    #[test]
+    fn repeat_zero_or_one_greedy_concat() {
+        assert_eq!(p("ab?"), Expr::Concat(vec![
+            lit('a'),
+            Expr::Repeat {
+                e: b(lit('b')),
+                r: Repeater::ZeroOrOne,
+                greedy: true,
+            },
+        ]));
+    }
+
+    #[test]
+    fn repeat_zero_or_one_nongreedy() {
+        assert_eq!(p("a??"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::ZeroOrOne,
+            greedy: false,
+        });
+    }
+
+    #[test]
+    fn repeat_one_or_more_greedy() {
+        assert_eq!(p("a+"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::OneOrMore,
+            greedy: true,
+        });
+    }
+
+    #[test]
+    fn repeat_one_or_more_nongreedy() {
+        assert_eq!(p("a+?"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::OneOrMore,
+            greedy: false,
+        });
+    }
+
+    #[test]
+    fn repeat_zero_or_more_greedy() {
+        assert_eq!(p("a*"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::ZeroOrMore,
+            greedy: true,
+        });
+    }
+
+    #[test]
+    fn repeat_zero_or_more_nongreedy() {
+        assert_eq!(p("a*?"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::ZeroOrMore,
+            greedy: false,
+        });
+    }
+
+    #[test]
+    fn repeat_counted_exact() {
+        assert_eq!(p("a{5}"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::Range { min: 5, max: Some(5) },
+            greedy: true,
+        });
+    }
+
+    #[test]
+    fn repeat_counted_min() {
+        assert_eq!(p("a{5,}"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::Range { min: 5, max: None },
+            greedy: true,
+        });
+    }
+
+    #[test]
+    fn repeat_counted_min_max() {
+        assert_eq!(p("a{5,10}"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::Range { min: 5, max: Some(10) },
+            greedy: true,
+        });
+    }
+
+    #[test]
+    fn repeat_counted_exact_nongreedy() {
+        assert_eq!(p("a{5}?"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::Range { min: 5, max: Some(5) },
+            greedy: false,
+        });
+    }
+
+    #[test]
+    fn repeat_counted_min_nongreedy() {
+        assert_eq!(p("a{5,}?"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::Range { min: 5, max: None },
+            greedy: false,
+        });
+    }
+
+    #[test]
+    fn repeat_counted_min_max_nongreedy() {
+        assert_eq!(p("a{5,10}?"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::Range { min: 5, max: Some(10) },
+            greedy: false,
+        });
+    }
+
+    #[test]
+    fn repeat_counted_whitespace() {
+        assert_eq!(p("a{ 5   }"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::Range { min: 5, max: Some(5) },
+            greedy: true,
+        });
+        assert_eq!(p("a{ 5 , 10 }"), Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::Range { min: 5, max: Some(10) },
+            greedy: true,
+        });
+    }
+
+    #[test]
+    fn group_literal() {
+        assert_eq!(p("(a)"), Expr::Group {
+            e: b(lit('a')),
+            i: Some(1),
+            name: None,
+        });
+    }
+
+    #[test]
+    fn group_literal_concat() {
+        assert_eq!(p("(ab)"), Expr::Group {
+            e: b(c(&[lit('a'), lit('b')])),
+            i: Some(1),
+            name: None,
+        });
+    }
+
+    #[test]
+    fn alt_two() {
+        assert_eq!(p("a|b"), Expr::Alternate(vec![lit('a'), lit('b')]));
+    }
+
+    #[test]
+    fn alt_many() {
+        assert_eq!(p("a|b|c"), Expr::Alternate(vec![
+            lit('a'), lit('b'), lit('c'),
+        ]));
+    }
+
+    #[test]
+    fn alt_many_concat() {
+        assert_eq!(p("ab|bc|cd"), Expr::Alternate(vec![
+            c(&[lit('a'), lit('b')]),
+            c(&[lit('b'), lit('c')]),
+            c(&[lit('c'), lit('d')]),
+        ]));
+    }
+
+    #[test]
+    fn alt_group_two() {
+        assert_eq!(p("(a|b)"), Expr::Group {
+            e: b(Expr::Alternate(vec![lit('a'), lit('b')])),
+            i: Some(1),
+            name: None,
+        });
+    }
+
+    #[test]
+    fn alt_group_many() {
+        assert_eq!(p("(a|b|c)"), Expr::Group {
+            e: b(Expr::Alternate(vec![lit('a'), lit('b'), lit('c')])),
+            i: Some(1),
+            name: None,
+        });
+    }
+
+    #[test]
+    fn alt_group_many_concat() {
+        assert_eq!(p("(ab|bc|cd)"), Expr::Group {
+            e: b(Expr::Alternate(vec![
+                c(&[lit('a'), lit('b')]),
+                c(&[lit('b'), lit('c')]),
+                c(&[lit('c'), lit('d')]),
+            ])),
+            i: Some(1),
+            name: None,
+        });
+    }
+
+    #[test]
+    fn alt_group_nested() {
+        assert_eq!(p("(ab|(bc|(cd)))"), Expr::Group {
+            e: b(Expr::Alternate(vec![
+                c(&[lit('a'), lit('b')]),
+                Expr::Group {
+                    e: b(Expr::Alternate(vec![
+                        c(&[lit('b'), lit('c')]),
+                        Expr::Group {
+                            e: b(c(&[lit('c'), lit('d')])),
+                            i: Some(3),
+                            name: None,
+                        }
+                    ])),
+                    i: Some(2),
+                    name: None,
+                },
+            ])),
+            i: Some(1),
+            name: None,
+        });
+    }
+
+    #[test]
+    fn group_name() {
+        assert_eq!(p("(?P<foo>a)"), Expr::Group {
+            e: b(lit('a')),
+            i: Some(1),
+            name: Some("foo".into()),
+        });
+    }
+
+    #[test]
+    fn group_no_capture() {
+        assert_eq!(p("(?:a)"), Expr::Group {
+            e: b(lit('a')),
+            i: None,
+            name: None,
+        });
+    }
+
+    #[test]
+    fn group_flags() {
+        assert_eq!(p("(?i:a)"), Expr::Group {
+            e: b(liti('a')),
+            i: None,
+            name: None,
+        });
+    }
+
+    #[test]
+    fn group_flags_returned() {
+        assert_eq!(p("(?i:a)a"), c(&[
+            Expr::Group {
+                e: b(liti('a')),
+                i: None,
+                name: None,
+            },
+            lit('a'),
+        ]));
+    }
+
+    #[test]
+    fn group_flags_retained() {
+        assert_eq!(p("(?i)(?-i:a)a"), c(&[
+            Expr::Group {
+                e: b(lit('a')),
+                i: None,
+                name: None,
+            },
+            liti('a'),
+        ]));
+    }
+
+    #[test]
+    fn flags_inline() {
+        assert_eq!(p("(?i)a"), liti('a'));
+    }
+
+    #[test]
+    fn flags_inline_multiple() {
+        assert_eq!(p("(?is)a."), c(&[liti('a'), Expr::AnyChar]));
+    }
+
+    #[test]
+    fn flags_inline_multiline() {
+        assert_eq!(p("(?m)^(?-m)$"), c(&[Expr::StartLine, Expr::EndText]));
+    }
+
+    #[test]
+    fn flags_inline_swap_greed() {
+        assert_eq!(p("(?U)a*a*?(?i-U)a*a*?"), c(&[
+            Expr::Repeat {
+                e: b(lit('a')),
+                r: Repeater::ZeroOrMore,
+                greedy: false,
+            },
+            Expr::Repeat {
+                e: b(lit('a')),
+                r: Repeater::ZeroOrMore,
+                greedy: true,
+            },
+            Expr::Repeat {
+                e: b(liti('a')),
+                r: Repeater::ZeroOrMore,
+                greedy: true,
+            },
+            Expr::Repeat {
+                e: b(liti('a')),
+                r: Repeater::ZeroOrMore,
+                greedy: false,
+            },
+        ]));
+    }
+
+    #[test]
+    fn flags_inline_multiple_negate_one() {
+        assert_eq!(p("(?is)a.(?i-s)a."), c(&[
+            liti('a'), Expr::AnyChar, liti('a'), Expr::AnyCharNoNL,
+        ]));
+    }
+
+    #[test]
+    fn flags_inline_negate() {
+        assert_eq!(p("(?i)a(?-i)a"), c(&[liti('a'), lit('a')]));
+    }
+
+    #[test]
+    fn flags_group_inline() {
+        assert_eq!(p("(a(?i)a)a"), c(&[
+            Expr::Group {
+                e: b(c(&[lit('a'), liti('a')])),
+                i: Some(1),
+                name: None,
+            },
+            lit('a'),
+        ]));
+    }
+
+    #[test]
+    fn flags_group_inline_retain() {
+        assert_eq!(p("(?i)((?-i)a)a"), c(&[
+            Expr::Group {
+                e: b(lit('a')),
+                i: Some(1),
+                name: None,
+            },
+            liti('a'),
+        ]));
+    }
+
+    #[test]
+    fn escape_simple() {
+        assert_eq!(p(r"\a\f\t\n\r\v"), c(&[
+            lit('\x07'), lit('\x0C'), lit('\t'),
+            lit('\n'), lit('\r'), lit('\x0B'),
+        ]));
+    }
+
+    #[test]
+    fn escape_boundaries() {
+        assert_eq!(p(r"\A\z\b\B"), c(&[
+            Expr::StartText, Expr::EndText,
+            Expr::WordBoundary, Expr::NotWordBoundary,
+        ]));
+    }
+
+    #[test]
+    fn escape_punctuation() {
+        assert_eq!(p(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$"), c(&[
+            lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
+            lit('('), lit(')'), lit('|'), lit('['), lit(']'),
+            lit('{'), lit('}'), lit('^'), lit('$'),
+        ]));
+    }
+
+    #[test]
+    fn escape_octal() {
+        assert_eq!(p(r"\123"), lit('S'));
+        assert_eq!(p(r"\1234"), c(&[lit('S'), lit('4')]));
+    }
+
+    #[test]
+    fn escape_hex2() {
+        assert_eq!(p(r"\x53"), lit('S'));
+        assert_eq!(p(r"\x534"), c(&[lit('S'), lit('4')]));
+    }
+
+    #[test]
+    fn escape_hex() {
+        assert_eq!(p(r"\x{53}"), lit('S'));
+        assert_eq!(p(r"\x{53}4"), c(&[lit('S'), lit('4')]));
+        assert_eq!(p(r"\x{2603}"), lit('\u{2603}'));
+    }
+
+    #[test]
+    fn escape_unicode_name() {
+        assert_eq!(p(r"\p{Yi}"), Expr::Class(class(YI)));
+    }
+
+    #[test]
+    fn escape_unicode_letter() {
+        assert_eq!(p(r"\pZ"), Expr::Class(class(&[
+            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
+            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+        ])));
+    }
+
+    #[test]
+    fn escape_unicode_name_case_fold() {
+        assert_eq!(p(r"(?i)\p{Yi}"), Expr::Class(class(YI).case_fold()));
+    }
+
+    #[test]
+    fn escape_unicode_letter_case_fold() {
+        assert_eq!(p(r"(?i)\pZ"), Expr::Class(class(&[
+            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
+            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+        ]).case_fold()));
+    }
+
+    #[test]
+    fn escape_unicode_name_negate() {
+        assert_eq!(p(r"\P{Yi}"), Expr::Class(class(YI).negate()));
+    }
+
+    #[test]
+    fn escape_unicode_letter_negate() {
+        assert_eq!(p(r"\PZ"), Expr::Class(class(&[
+            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
+            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+        ]).negate()));
+    }
+
+    #[test]
+    fn escape_unicode_name_negate_case_fold() {
+        assert_eq!(p(r"(?i)\P{Yi}"),
+                   Expr::Class(class(YI).negate().case_fold()));
+    }
+
+    #[test]
+    fn escape_unicode_letter_negate_case_fold() {
+        assert_eq!(p(r"(?i)\PZ"), Expr::Class(class(&[
+            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
+            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+        ]).negate().case_fold()));
+    }
+
+    #[test]
+    fn escape_perl_d() {
+        assert_eq!(p(r"\d"), Expr::Class(class(PERLD)));
+    }
+
+    #[test]
+    fn escape_perl_s() {
+        assert_eq!(p(r"\s"), Expr::Class(class(PERLS)));
+    }
+
+    #[test]
+    fn escape_perl_w() {
+        assert_eq!(p(r"\w"), Expr::Class(class(PERLW)));
+    }
+
+    #[test]
+    fn escape_perl_d_negate() {
+        assert_eq!(p(r"\D"), Expr::Class(class(PERLD).negate()));
+    }
+
+    #[test]
+    fn escape_perl_s_negate() {
+        assert_eq!(p(r"\S"), Expr::Class(class(PERLS).negate()));
+    }
+
+    #[test]
+    fn escape_perl_w_negate() {
+        assert_eq!(p(r"\W"), Expr::Class(class(PERLW).negate()));
+    }
+
+    #[test]
+    fn escape_perl_d_case_fold() {
+        assert_eq!(p(r"(?i)\d"), Expr::Class(class(PERLD).case_fold()));
+    }
+
+    #[test]
+    fn escape_perl_s_case_fold() {
+        assert_eq!(p(r"(?i)\s"), Expr::Class(class(PERLS).case_fold()));
+    }
+
+    #[test]
+    fn escape_perl_w_case_fold() {
+        assert_eq!(p(r"(?i)\w"), Expr::Class(class(PERLW).case_fold()));
+    }
+
+    #[test]
+    fn escape_perl_d_case_fold_negate() {
+        assert_eq!(p(r"(?i)\D"),
+                   Expr::Class(class(PERLD).negate().case_fold()));
+    }
+
+    #[test]
+    fn escape_perl_s_case_fold_negate() {
+        assert_eq!(p(r"(?i)\S"),
+                   Expr::Class(class(PERLS).negate().case_fold()));
+    }
+
+    #[test]
+    fn escape_perl_w_case_fold_negate() {
+        assert_eq!(p(r"(?i)\W"),
+                   Expr::Class(class(PERLW).negate().case_fold()));
+    }
+
+    #[test]
+    fn class_singleton() {
+        assert_eq!(p(r"[a]"), Expr::Class(class(&[('a', 'a')])));
+        assert_eq!(p(r"[\x00]"), Expr::Class(class(&[('\x00', '\x00')])));
+        assert_eq!(p(r"[\n]"), Expr::Class(class(&[('\n', '\n')])));
+        assert_eq!(p("[\n]"), Expr::Class(class(&[('\n', '\n')])));
+    }
+
+    #[test]
+    fn class_singleton_negate() {
+        assert_eq!(p(r"[^a]"), Expr::Class(class(&[
+            ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
+        ])));
+        assert_eq!(p(r"[^\x00]"), Expr::Class(class(&[
+            ('\x01', '\u{10FFFF}'),
+        ])));
+        assert_eq!(p(r"[^\n]"), Expr::Class(class(&[
+            ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
+        ])));
+        assert_eq!(p("[^\n]"), Expr::Class(class(&[
+            ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
+        ])));
+    }
+
+    #[test]
+    fn class_singleton_class() {
+        assert_eq!(p(r"[\d]"), Expr::Class(class(PERLD)));
+        assert_eq!(p(r"[\p{Yi}]"), Expr::Class(class(YI)));
+    }
+
+    #[test]
+    fn class_singleton_class_negate() {
+        assert_eq!(p(r"[^\d]"), Expr::Class(class(PERLD).negate()));
+        assert_eq!(p(r"[^\w]"), Expr::Class(class(PERLW).negate()));
+        assert_eq!(p(r"[^\s]"), Expr::Class(class(PERLS).negate()));
+    }
+
+    #[test]
+    fn class_singleton_class_negate_negate() {
+        assert_eq!(p(r"[^\D]"), Expr::Class(class(PERLD)));
+        assert_eq!(p(r"[^\W]"), Expr::Class(class(PERLW)));
+        assert_eq!(p(r"[^\S]"), Expr::Class(class(PERLS)));
+    }
+
+    #[test]
+    fn class_singleton_class_casei() {
+        assert_eq!(p(r"(?i)[\d]"), Expr::Class(class(PERLD).case_fold()));
+        assert_eq!(p(r"(?i)[\p{Yi}]"), Expr::Class(class(YI).case_fold()));
+    }
+
+    #[test]
+    fn class_singleton_class_negate_casei() {
+        assert_eq!(p(r"(?i)[^\d]"),
+                   Expr::Class(class(PERLD).negate().case_fold()));
+        assert_eq!(p(r"(?i)[^\w]"),
+                   Expr::Class(class(PERLW).negate().case_fold()));
+        assert_eq!(p(r"(?i)[^\s]"),
+                   Expr::Class(class(PERLS).negate().case_fold()));
+    }
+
+    #[test]
+    fn class_singleton_class_negate_negate_casei() {
+        assert_eq!(p(r"(?i)[^\D]"), Expr::Class(class(PERLD).case_fold()));
+        assert_eq!(p(r"(?i)[^\W]"), Expr::Class(class(PERLW).case_fold()));
+        assert_eq!(p(r"(?i)[^\S]"), Expr::Class(class(PERLS).case_fold()));
+    }
+
+    #[test]
+    fn class_multiple_class() {
+        assert_eq!(p(r"[\d\p{Yi}]"), Expr::Class(classes(&[
+            PERLD, YI,
+        ])));
+    }
+
+    #[test]
+    fn class_multiple_class_negate() {
+        assert_eq!(p(r"[^\d\p{Yi}]"), Expr::Class(classes(&[
+            PERLD, YI,
+        ]).negate()));
+    }
+
+    #[test]
+    fn class_multiple_class_negate_negate() {
+        let nperld = class(PERLD).negate();
+        let nyi = class(YI).negate();
+        let cls = CharClass::empty().merge(nperld).merge(nyi);
+        assert_eq!(p(r"[^\D\P{Yi}]"), Expr::Class(cls.negate()));
+    }
+
+    #[test]
+    fn class_multiple_class_casei() {
+        assert_eq!(p(r"(?i)[\d\p{Yi}]"), Expr::Class(classes(&[
+            PERLD, YI,
+        ]).case_fold()));
+    }
+
+    #[test]
+    fn class_multiple_class_negate_casei() {
+        assert_eq!(p(r"(?i)[^\d\p{Yi}]"), Expr::Class(classes(&[
+            PERLD, YI,
+        ]).negate().case_fold()));
+    }
+
+    #[test]
+    fn class_multiple_class_negate_negate_casei() {
+        let nperld = class(PERLD).negate();
+        let nyi = class(YI).negate();
+        let class = CharClass::empty().merge(nperld).merge(nyi);
+        assert_eq!(p(r"(?i)[^\D\P{Yi}]"),
+                   Expr::Class(class.negate().case_fold()));
+    }
+
+    #[test]
+    fn class_class_hypen() {
+        assert_eq!(p(r"[\p{Yi}-]"), Expr::Class(classes(&[
+            &[('-', '-')], YI,
+        ])));
+        assert_eq!(p(r"[\p{Yi}-a]"), Expr::Class(classes(&[
+            &[('-', '-')], &[('a', 'a')], YI,
+        ])));
+    }
+
+    #[test]
+    fn class_brackets() {
+        assert_eq!(p("[]]"), Expr::Class(class(&[(']', ']')])));
+        assert_eq!(p("[][]"), Expr::Class(class(&[('[', '['), (']', ']')])));
+        assert_eq!(p("[[]]"), Expr::Concat(vec![
+            Expr::Class(class(&[('[', '[')])),
+            lit(']'),
+        ]));
+    }
+
+    #[test]
+    fn class_brackets_hypen() {
+        assert_eq!(p("[]-]"), Expr::Class(class(&[('-', '-'), (']', ']')])));
+        assert_eq!(p("[-]]"), Expr::Concat(vec![
+            Expr::Class(class(&[('-', '-')])),
+            lit(']'),
+        ]));
+    }
+
+    #[test]
+    fn class_overlapping() {
+        assert_eq!(p("[a-fd-h]"), Expr::Class(class(&[('a', 'h')])));
+        assert_eq!(p("[a-fg-m]"), Expr::Class(class(&[('a', 'm')])));
+    }
+
+    #[test]
+    fn ascii_class() {
+        assert_eq!(p("[:upper:]"), Expr::Class(class(UPPER)));
+        assert_eq!(p("[[:upper:]]"), Expr::Class(class(UPPER)));
+    }
+
+    #[test]
+    fn ascii_class_not() {
+        assert_eq!(p("[:abc:]"),
+                   Expr::Class(class(&[(':', ':'), ('a', 'c')])));
+    }
+
+    #[test]
+    fn ascii_class_multiple() {
+        assert_eq!(p("[[:lower:][:upper:]]"),
+                   Expr::Class(classes(&[UPPER, LOWER])));
+    }
+
+    #[test]
+    fn ascii_class_negate() {
+        assert_eq!(p("[[:^upper:]]"), Expr::Class(class(UPPER).negate()));
+        assert_eq!(p("[^[:^upper:]]"), Expr::Class(class(UPPER)));
+    }
+
+    #[test]
+    fn ascii_class_negate_multiple() {
+        let (nlower, nupper) = (class(LOWER).negate(), class(UPPER).negate());
+        let cls = CharClass::empty().merge(nlower).merge(nupper);
+        assert_eq!(p("[[:^lower:][:^upper:]]"), Expr::Class(cls.clone()));
+        assert_eq!(p("[^[:^lower:][:^upper:]]"), Expr::Class(cls.negate()));
+    }
+
+    #[test]
+    fn ascii_class_case_fold() {
+        assert_eq!(p("(?i)[:upper:]"), Expr::Class(class(UPPER).case_fold()));
+        assert_eq!(p("(?i)[[:upper:]]"),
+                   Expr::Class(class(UPPER).case_fold()));
+    }
+
+    #[test]
+    fn ascii_class_negate_case_fold() {
+        assert_eq!(p("(?i)[[:^upper:]]"),
+                   Expr::Class(class(UPPER).negate().case_fold()));
+        assert_eq!(p("(?i)[^[:^upper:]]"),
+                   Expr::Class(class(UPPER).case_fold()));
+    }
+
+    #[test]
+    fn ignore_space_literal() {
+        assert_eq!(p("(?x) a b c"), Expr::Concat(vec![
+            lit('a'), lit('b'), lit('c'),
+        ]));
+    }
+
+    #[test]
+    fn ignore_space_literal_off() {
+        assert_eq!(p("(?x) a b c(?-x) a"), Expr::Concat(vec![
+            lit('a'), lit('b'), lit('c'), lit(' '), lit('a'),
+        ]));
+    }
+
+    #[test]
+    fn ignore_space_class() {
+        assert_eq!(p("(?x)[a
+        - z
+]"), Expr::Class(class(&[('a', 'z')])));
+        assert_eq!(p("(?x)[  ^   a
+        - z
+]"), Expr::Class(class(&[('a', 'z')]).negate()));
+    }
+
+    #[test]
+    fn ignore_space_escape() {
+        assert_eq!(p(r"(?x)\ d"), Expr::Class(class(PERLD)));
+        assert_eq!(p(r"(?x)\
+                     D"), Expr::Class(class(PERLD).negate()));
+    }
+
+    #[test]
+    fn ignore_space_comments() {
+        assert_eq!(p(r"(?x)(?P<foo>
+    a # comment 1
+)(?P<bar>
+    z # comment 2
+)"), Expr::Concat(vec![
+        Expr::Group {
+            e: Box::new(lit('a')),
+            i: Some(1),
+            name: Some("foo".into()),
+        },
+        Expr::Group {
+            e: Box::new(lit('z')),
+            i: Some(2),
+            name: Some("bar".into()),
+        },
+    ]));
+    }
+
+    #[test]
+    fn ignore_space_comments_re_enable() {
+        assert_eq!(p(r"(?x)a # hi
+(?-x:#) # sweet"), Expr::Concat(vec![
+            lit('a'),
+            Expr::Group {
+                e: Box::new(lit('#')),
+                i: None,
+                name: None,
+            },
+        ]));
+    }
+
+    // Test every single possible error case.
+
+    macro_rules! test_err {
+        ($re:expr, $pos:expr, $kind:expr) => {{
+            let err = Parser::parse($re).unwrap_err();
+            assert_eq!($pos, err.pos);
+            assert_eq!($kind, err.kind);
+            assert!($re.contains(&err.surround));
+        }}
+    }
+
+    #[test]
+    fn error_repeat_no_expr_simple() {
+        test_err!("(*", 1, ErrorKind::RepeaterExpectsExpr);
+    }
+
+    #[test]
+    fn error_repeat_no_expr_counted() {
+        test_err!("({5}", 1, ErrorKind::RepeaterExpectsExpr);
+    }
+
+    #[test]
+    fn error_repeat_beginning_counted() {
+        test_err!("{5}", 0, ErrorKind::RepeaterExpectsExpr);
+    }
+
+    #[test]
+    fn error_repeat_illegal_exprs_simple() {
+        test_err!("a**", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::ZeroOrMore,
+            greedy: true,
+        }));
+        test_err!("a|*", 2,
+            ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
+        );
+    }
+
+    #[test]
+    fn error_repeat_illegal_exprs_counted() {
+        test_err!("a*{5}", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
+            e: b(lit('a')),
+            r: Repeater::ZeroOrMore,
+            greedy: true,
+        }));
+        test_err!("a|{5}", 2,
+            ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
+        );
+    }
+
+    #[test]
+    fn error_repeat_empty_number() {
+        test_err!("a{}", 2, ErrorKind::MissingBase10);
+    }
+
+    #[test]
+    fn error_repeat_eof() {
+        test_err!("a{5", 3, ErrorKind::UnclosedRepeat);
+    }
+
+    #[test]
+    fn error_repeat_empty_number_eof() {
+        test_err!("a{xyz", 5, ErrorKind::InvalidBase10("xyz".into()));
+        test_err!("a{12,xyz", 8, ErrorKind::InvalidBase10("xyz".into()));
+    }
+
+    #[test]
+    fn error_repeat_invalid_number() {
+        test_err!("a{9999999999}", 12,
+                  ErrorKind::InvalidBase10("9999999999".into()));
+        test_err!("a{1,9999999999}", 14,
+                  ErrorKind::InvalidBase10("9999999999".into()));
+    }
+
+    #[test]
+    fn error_repeat_invalid_number_extra() {
+        test_err!("a{12x}", 5, ErrorKind::InvalidBase10("12x".into()));
+        test_err!("a{1,12x}", 7, ErrorKind::InvalidBase10("12x".into()));
+    }
+
+    #[test]
+    fn error_repeat_invalid_range() {
+        test_err!("a{2,1}", 5,
+                  ErrorKind::InvalidRepeatRange { min: 2, max: 1 });
+    }
+
+    #[test]
+    fn error_alternate_empty() {
+        test_err!("|a", 0, ErrorKind::EmptyAlternate);
+    }
+
+    #[test]
+    fn error_alternate_empty_with_group() {
+        test_err!("(|a)", 1, ErrorKind::EmptyAlternate);
+    }
+
+    #[test]
+    fn error_alternate_empty_with_alternate() {
+        test_err!("a||", 2, ErrorKind::EmptyAlternate);
+    }
+
+    #[test]
+    fn error_close_paren_unopened_empty() {
+        test_err!(")", 0, ErrorKind::UnopenedParen);
+    }
+
+    #[test]
+    fn error_close_paren_unopened() {
+        test_err!("ab)", 2, ErrorKind::UnopenedParen);
+    }
+
+    #[test]
+    fn error_close_paren_unopened_with_alt() {
+        test_err!("a|b)", 3, ErrorKind::UnopenedParen);
+    }
+
+    #[test]
+    fn error_close_paren_empty_alt() {
+        test_err!("(a|)", 3, ErrorKind::EmptyAlternate);
+    }
+
+    #[test]
+    fn error_close_paren_empty_group() {
+        test_err!("()", 1, ErrorKind::EmptyGroup);
+    }
+
+    #[test]
+    fn error_close_paren_empty_group_with_name() {
+        test_err!("(?P<foo>)", 8, ErrorKind::EmptyGroup);
+    }
+
+    #[test]
+    fn error_finish_concat_unclosed() {
+        test_err!("ab(xy", 2, ErrorKind::UnclosedParen);
+    }
+
+    #[test]
+    fn error_finish_concat_empty_alt() {
+        test_err!("a|", 2, ErrorKind::EmptyAlternate);
+    }
+
+    #[test]
+    fn error_group_name_invalid() {
+        test_err!("(?P<a#>x)", 6, ErrorKind::InvalidCaptureName("a#".into()));
+    }
+
+    #[test]
+    fn error_group_name_invalid_leading() {
+        test_err!("(?P<1a>a)", 6, ErrorKind::InvalidCaptureName("1a".into()));
+    }
+
+    #[test]
+    fn error_group_name_unexpected_eof() {
+        test_err!("(?P<a", 5, ErrorKind::UnclosedCaptureName("a".into()));
+    }
+
+    #[test]
+    fn error_group_name_empty() {
+        test_err!("(?P<>a)", 4, ErrorKind::EmptyCaptureName);
+    }
+
+    #[test]
+    fn error_group_opts_unrecognized_flag() {
+        test_err!("(?z:a)", 2, ErrorKind::UnrecognizedFlag('z'));
+    }
+
+    #[test]
+    fn error_group_opts_unexpected_eof() {
+        test_err!("(?i", 3, ErrorKind::UnexpectedFlagEof);
+    }
+
+    #[test]
+    fn error_group_opts_double_negation() {
+        test_err!("(?-i-s:a)", 4, ErrorKind::DoubleFlagNegation);
+    }
+
+    #[test]
+    fn error_group_opts_empty_negation() {
+        test_err!("(?i-:a)", 4, ErrorKind::EmptyFlagNegation);
+    }
+
+    #[test]
+    fn error_group_opts_empty() {
+        test_err!("(?)", 2, ErrorKind::EmptyFlagNegation);
+    }
+
+    #[test]
+    fn error_escape_unexpected_eof() {
+        test_err!(r"\", 1, ErrorKind::UnexpectedEscapeEof);
+    }
+
+    #[test]
+    fn error_escape_unrecognized() {
+        test_err!(r"\m", 1, ErrorKind::UnrecognizedEscape('m'));
+    }
+
+    #[test]
+    fn error_escape_hex2_eof0() {
+        test_err!(r"\x", 2, ErrorKind::UnexpectedTwoDigitHexEof);
+    }
+
+    #[test]
+    fn error_escape_hex2_eof1() {
+        test_err!(r"\xA", 3, ErrorKind::UnexpectedTwoDigitHexEof);
+    }
+
+    #[test]
+    fn error_escape_hex2_invalid() {
+        test_err!(r"\xAG", 4, ErrorKind::InvalidBase16("AG".into()));
+    }
+
+    #[test]
+    fn error_escape_hex_eof0() {
+        test_err!(r"\x{", 3, ErrorKind::InvalidBase16("".into()));
+    }
+
+    #[test]
+    fn error_escape_hex_eof1() {
+        test_err!(r"\x{A", 4, ErrorKind::UnclosedHex);
+    }
+
+    #[test]
+    fn error_escape_hex_invalid() {
+        test_err!(r"\x{AG}", 5, ErrorKind::InvalidBase16("AG".into()));
+    }
+
+    #[test]
+    fn error_escape_hex_invalid_scalar_value_surrogate() {
+        test_err!(r"\x{D800}", 7, ErrorKind::InvalidScalarValue(0xD800));
+    }
+
+    #[test]
+    fn error_escape_hex_invalid_scalar_value_high() {
+        test_err!(r"\x{110000}", 9, ErrorKind::InvalidScalarValue(0x110000));
+    }
+
+    #[test]
+    fn error_escape_hex_invalid_u32() {
+        test_err!(r"\x{9999999999}", 13,
+                  ErrorKind::InvalidBase16("9999999999".into()));
+    }
+
+    #[test]
+    fn error_unicode_unclosed() {
+        test_err!(r"\p{", 3, ErrorKind::UnclosedUnicodeName);
+        test_err!(r"\p{Greek", 8, ErrorKind::UnclosedUnicodeName);
+    }
+
+    #[test]
+    fn error_unicode_no_letter() {
+        test_err!(r"\p", 2, ErrorKind::UnexpectedEscapeEof);
+    }
+
+    #[test]
+    fn error_unicode_unknown_letter() {
+        test_err!(r"\pA", 3, ErrorKind::UnrecognizedUnicodeClass("A".into()));
+    }
+
+    #[test]
+    fn error_unicode_unknown_name() {
+        test_err!(r"\p{Yii}", 7,
+                  ErrorKind::UnrecognizedUnicodeClass("Yii".into()));
+    }
+
+    #[test]
+    fn error_class_eof_empty() {
+        test_err!("[", 1, ErrorKind::UnexpectedClassEof);
+        test_err!("[^", 2, ErrorKind::UnexpectedClassEof);
+    }
+
+    #[test]
+    fn error_class_eof_non_empty() {
+        test_err!("[a", 2, ErrorKind::UnexpectedClassEof);
+        test_err!("[^a", 3, ErrorKind::UnexpectedClassEof);
+    }
+
+    #[test]
+    fn error_class_eof_range() {
+        test_err!("[a-", 3, ErrorKind::UnexpectedClassEof);
+        test_err!("[^a-", 4, ErrorKind::UnexpectedClassEof);
+        test_err!("[---", 4, ErrorKind::UnexpectedClassEof);
+    }
+
+    #[test]
+    fn error_class_invalid_escape() {
+        test_err!(r"[\pA]", 4,
+                  ErrorKind::UnrecognizedUnicodeClass("A".into()));
+    }
+
+    #[test]
+    fn error_class_valid_escape_not_allowed() {
+        test_err!(r"[\A]", 3, ErrorKind::InvalidClassEscape(Expr::StartText));
+    }
+
+    #[test]
+    fn error_class_range_valid_escape_not_allowed() {
+        test_err!(r"[a-\d]", 5,
+                  ErrorKind::InvalidClassEscape(Expr::Class(class(PERLD))));
+        test_err!(r"[a-\A]", 5,
+                  ErrorKind::InvalidClassEscape(Expr::StartText));
+        test_err!(r"[\A-a]", 3,
+                  ErrorKind::InvalidClassEscape(Expr::StartText));
+    }
+
+    #[test]
+    fn error_class_invalid_range() {
+        test_err!("[z-a]", 4, ErrorKind::InvalidClassRange {
+            start: 'z',
+            end: 'a',
+        });
+    }
+
+    #[test]
+    fn error_class_empty_range() {
+        test_err!("[]", 2, ErrorKind::UnexpectedClassEof);
+        test_err!("[^]", 3, ErrorKind::UnexpectedClassEof);
+    }
+
+    #[test]
+    fn error_duplicate_capture_name() {
+        test_err!("(?P<a>.)(?P<a>.)", 14,
+                  ErrorKind::DuplicateCaptureName("a".into()));
+    }
+}
diff --git a/regex_syntax/src/properties.rs b/regex_syntax/src/properties.rs
new file mode 100644
index 0000000000..38cbb02e73
--- /dev/null
+++ b/regex_syntax/src/properties.rs
@@ -0,0 +1,407 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use quickcheck::{Arbitrary, Gen, Testable, QuickCheck, StdGen};
+use rand::Rng;
+
+use {Expr, CharClass, ClassRange, Repeater, dec_char};
+
+fn qc<T: Testable>(t: T) {
+    QuickCheck::new()
+        .tests(10_000)
+        .max_tests(20_000)
+        .quickcheck(t);
+}
+
+fn class(ranges: &[(char, char)]) -> CharClass {
+    let ranges = ranges.iter().cloned()
+                       .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
+    CharClass::new(ranges)
+}
+
+// Test invariants for canonicalizing character classes.
+
+#[test]
+fn negate() {
+    fn prop(ranges: Vec<(char, char)>) -> bool {
+        class(&ranges).canonicalize() == class(&ranges).negate().negate()
+    }
+    qc(prop as fn(Vec<(char, char)>) -> bool);
+}
+
+#[test]
+fn classes_are_sorted_and_nonoverlapping() {
+    fn prop(ranges: Vec<(char, char)>) -> bool {
+        class(&ranges)
+            .canonicalize()
+            .windows(2)
+            .all(|w| w[0].end < dec_char(w[1].start))
+    }
+    qc(prop as fn(Vec<(char, char)>) -> bool);
+}
+
+#[test]
+fn valid_class_ranges() {
+    fn prop(ranges: Vec<(char, char)>) -> bool {
+        class(&ranges).canonicalize().into_iter().all(|r| r.start <= r.end)
+    }
+    qc(prop as fn(Vec<(char, char)>) -> bool);
+}
+
+/// A wrapper type for generating "regex-like" Unicode strings.
+///
+/// In particular, this type's `Arbitrary` impl specifically biases toward
+/// special regex characters to make test cases more interesting.
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+struct RegexLikeString(String);
+
+impl Arbitrary for RegexLikeString {
+    fn arbitrary<G: Gen>(g: &mut G) -> RegexLikeString {
+        const SPECIAL: &'static [char] = &[
+            '\\', '.', '+', '*', '?', '(', ')', '|', '[', ']', '{', '}',
+            '^', '$',
+        ];
+        // Generating random Unicode strings results in mostly uninteresting
+        // regexes. Namely, they'll mostly just be literals.
+        // To make properties using regex strings more interesting, we bias
+        // toward selecting characters of significance to a regex.
+        let size = { let s = g.size(); g.gen_range(0, s) };
+        RegexLikeString((0..size).map(|_| {
+            if g.gen_weighted_bool(3) {
+                *g.choose(SPECIAL).unwrap()
+            } else {
+                g.gen()
+            }
+        }).collect())
+    }
+
+    fn shrink(&self) -> Box<Iterator<Item=RegexLikeString>> {
+        // The regular `String` shrinker is good enough.
+        Box::new(self.0.shrink().map(RegexLikeString))
+    }
+}
+
+/// A special type for generating small non-zero sized ASCII strings.
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+struct SmallAscii(String);
+
+impl Arbitrary for SmallAscii {
+    fn arbitrary<G: Gen>(g: &mut G) -> SmallAscii {
+        use std::char::from_u32;
+        let size = g.gen_range(1, 5);
+        SmallAscii((0..size)
+                   .map(|_| from_u32(g.gen_range(97, 123)).unwrap())
+                   .collect())
+    }
+
+    fn shrink(&self) -> Box<Iterator<Item=SmallAscii>> {
+        Box::new(self.0.shrink().map(SmallAscii))
+    }
+}
+
+#[test]
+fn parser_never_panics() {
+    fn prop(s: RegexLikeString) -> bool {
+        let _ = Expr::parse(&s.0); true
+    }
+    qc(prop as fn(RegexLikeString) -> bool);
+}
+
+// Testing entire expressions.
+//
+// We only have one test at the moment, but the machinery could be useful
+// for other things.
+//
+// In particular, Russ Cox writes about testing regexes by comparing the
+// strings they match with other regex implementations. A fuzzer/shrinker
+// (which is what's implemented below) would be a great way to drive that
+// process. ---AG
+
+impl Arbitrary for Expr {
+    fn arbitrary<G: Gen>(g: &mut G) -> Expr {
+        fix_capture_indices(gen_expr(g, 0, ExprType::Anything)).simplify()
+    }
+
+    fn shrink(&self) -> Box<Iterator<Item=Expr>> {
+        use Expr::*;
+
+        let nada = || Box::new(None.into_iter());
+        let es: Box<Iterator<Item=Expr>> = match *self {
+            Empty | AnyChar | AnyCharNoNL
+            | StartLine | EndLine | StartText | EndText
+            | WordBoundary | NotWordBoundary => nada(),
+            Literal { ref chars, .. } if chars.len() == 1 => nada(),
+            Literal { ref chars, casei } => {
+                Box::new((chars.clone(), casei)
+                         .shrink()
+                         .filter(|&(ref chars, _)| chars.len() > 0)
+                         .map(|(chars, casei)| {
+                             Literal { chars: chars, casei: casei }
+                         }))
+            }
+            Class(ref cls) => Box::new(cls.shrink().map(Class)),
+            Group { ref e, ref i, ref name } => {
+                let (i, name) = (i.clone(), name.clone());
+                Box::new(e.clone().shrink()
+                          .chain(e.clone().shrink()
+                                  .map(move |e| Group {
+                                      e: Box::new(e),
+                                      i: i.clone(),
+                                      name: name.clone(),
+                                  })))
+            }
+            Repeat { ref e, ref r, greedy } => {
+                Box::new((*e.clone(), r.clone())
+                         .shrink()
+                         .filter(|&(ref e, _)| e.can_repeat())
+                         .map(move |(e, r)| Repeat {
+                             e: Box::new(e),
+                             r: r,
+                             greedy: greedy,
+                         }))
+            }
+            // Concat(ref es) if es.len() <= 2 => nada(),
+            Concat(ref es) => {
+                Box::new(es.clone()
+                           .shrink()
+                           .filter(|es| es.len() > 0)
+                           .map(|mut es| if es.len() == 1 {
+                               es.pop().unwrap()
+                           } else {
+                               Concat(es)
+                           }))
+            }
+            // Alternate(ref es) if es.len() <= 2 => nada(),
+            Alternate(ref es) => {
+                Box::new(es.clone()
+                           .shrink()
+                           .filter(|es| es.len() > 0)
+                           .map(|mut es| if es.len() == 1 {
+                               es.pop().unwrap()
+                           } else {
+                               Alternate(es)
+                           }))
+            }
+        };
+        Box::new(es.map(|e| fix_capture_indices(e).simplify()))
+    }
+}
+
+enum ExprType {
+    NoSequences, // disallow concat/alternate
+    Anything,
+}
+
+fn gen_expr<G: Gen>(g: &mut G, depth: u32, ty: ExprType) -> Expr {
+    use Expr::*;
+    let ub = match (depth as usize >= g.size(), ty) {
+        (true, _) => 11,
+        (false, ExprType::NoSequences) => 13,
+        (false, ExprType::Anything) => 15,
+    };
+    match g.gen_range(1, ub) {
+        0 => Empty,
+        1 => Literal {
+            chars: SmallAscii::arbitrary(g).0.chars().collect(),
+            casei: g.gen(),
+        },
+        2 => AnyChar,
+        3 => AnyCharNoNL,
+        4 => Class(CharClass::arbitrary(g)),
+        5 => StartLine,
+        6 => EndLine,
+        7 => StartText,
+        8 => EndText,
+        9 => WordBoundary,
+        10 => NotWordBoundary,
+        11 => gen_group_expr(g, depth + 1),
+        12 => Repeat {
+            e: Box::new(gen_repeatable_expr(g, depth + 1)),
+            r: Repeater::arbitrary(g),
+            greedy: bool::arbitrary(g),
+        },
+        13 => {
+            let size = { let s = g.size(); g.gen_range(2, s) };
+            Concat((0..size)
+                   .map(|_| {
+                       gen_expr(g, depth + 1, ExprType::NoSequences)
+                    })
+                   .collect())
+        }
+        14 => {
+            let size = { let s = g.size(); g.gen_range(2, s) };
+            Alternate((0..size)
+                      .map(|_| {
+                          gen_expr(g, depth + 1, ExprType::NoSequences)
+                      })
+                      .collect())
+        }
+        _ => unreachable!()
+    }
+}
+
+fn gen_repeatable_expr<G: Gen>(g: &mut G, depth: u32) -> Expr {
+    use Expr::*;
+    match g.gen_range(1, 6) {
+        0 => Empty,
+        1 => Literal {
+            chars: vec![Arbitrary::arbitrary(g)],
+            casei: g.gen(),
+        },
+        2 => AnyChar,
+        3 => AnyCharNoNL,
+        4 => Class(CharClass::arbitrary(g)),
+        5 => gen_group_expr(g, depth + 1),
+        _ => unreachable!(),
+    }
+}
+
+fn gen_group_expr<G: Gen>(g: &mut G, depth: u32) -> Expr {
+    let (i, name) = if g.gen() {
+        (None, None)
+    } else {
+        (Some(0), if g.gen() {
+            Some(SmallAscii::arbitrary(g).0)
+        } else {
+            None
+        })
+    };
+    Expr::Group {
+        e: Box::new(gen_expr(g, depth + 1, ExprType::Anything)),
+        i: i,
+        name: name,
+    }
+}
+
+fn fix_capture_indices(e: Expr) -> Expr {
+    fn bx(e: Expr) -> Box<Expr> { Box::new(e) }
+    fn fix(e: Expr, capi: &mut usize, names: &mut Vec<String>) -> Expr {
+        use Expr::*;
+        match e {
+            Group { e, i: Some(_), mut name } => {
+                *capi += 1;
+                let i = *capi;
+                let mut dupe_name = false;
+                if let Some(ref n1) = name {
+                    if names.iter().any(|n2| n1 == n2) {
+                        dupe_name = true;
+                    } else {
+                        names.push(n1.clone());
+                    }
+                }
+                if dupe_name { name = None; }
+                Group { e: bx(fix(*e, capi, names)), i: Some(i), name: name }
+            }
+            Group { e, i, name } => {
+                Group { e: bx(fix(*e, capi, names)), i: i, name: name }
+            }
+            Repeat { e, r, greedy } => {
+                Repeat { e: bx(fix(*e, capi, names)), r: r, greedy: greedy }
+            }
+            Concat(es) =>
+                Concat(es.into_iter().map(|e| fix(e, capi, names)).collect()),
+            Alternate(es) =>
+                Alternate(es.into_iter().map(|e| fix(e, capi, names)).collect()),
+            e => e,
+        }
+    }
+    fix(e, &mut 0, &mut vec![])
+}
+
+impl Arbitrary for Repeater {
+    fn arbitrary<G: Gen>(g: &mut G) -> Repeater {
+        use Repeater::*;
+        match g.gen_range(0, 4) {
+            0 => ZeroOrOne,
+            1 => ZeroOrMore,
+            2 => OneOrMore,
+            3 => {
+                use std::cmp::{max, min};
+                let n1 = Arbitrary::arbitrary(g);
+                let n2 = Arbitrary::arbitrary(g);
+                Range {
+                    min: min(n1, n2),
+                    max: if g.gen() { None } else { Some(max(n1, n2)) },
+                }
+            },
+            _ => unreachable!(),
+        }
+    }
+
+    fn shrink(&self) -> Box<Iterator<Item=Repeater>> {
+        use Repeater::*;
+        match *self {
+            ZeroOrOne | ZeroOrMore | OneOrMore => Box::new(None.into_iter()),
+            Range { min, max } => {
+                Box::new((min, max)
+                         .shrink()
+                         .map(|(min, max)| Range { min: min, max: max }))
+            }
+        }
+    }
+}
+
+impl Arbitrary for CharClass {
+    fn arbitrary<G: Gen>(g: &mut G) -> CharClass {
+        let mut ranges: Vec<ClassRange> = Arbitrary::arbitrary(g);
+        if ranges.is_empty() {
+            ranges.push(Arbitrary::arbitrary(g));
+        }
+        let cls = CharClass {
+            ranges: ranges,
+            casei: false,
+        }.canonicalize();
+        if g.gen() { cls.case_fold() } else { cls }
+    }
+
+    fn shrink(&self) -> Box<Iterator<Item=CharClass>> {
+        Box::new((self.ranges.clone(), self.casei)
+                 .shrink()
+                 .filter(|&(ref ranges, _)| ranges.len() > 0)
+                 .map(|(ranges, casei)| {
+                     let cls = CharClass {
+                         ranges: ranges,
+                         casei: casei,
+                     }.canonicalize();
+                     if casei { cls.case_fold() } else { cls }
+                 }))
+    }
+}
+
+impl Arbitrary for ClassRange {
+    fn arbitrary<G: Gen>(g: &mut G) -> ClassRange {
+        use std::char::from_u32;
+        ClassRange::new(
+            from_u32(g.gen_range(97, 123)).unwrap(),
+            from_u32(g.gen_range(97, 123)).unwrap(),
+        )
+    }
+
+    fn shrink(&self) -> Box<Iterator<Item=ClassRange>> {
+        Box::new((self.start, self.end)
+                 .shrink().map(|(s, e)| ClassRange::new(s, e)))
+    }
+}
+
+#[test]
+fn display_regex_roundtrips() {
+    // Given an AST, if we print it as a regex and then re-parse it, do we
+    // get back the same AST?
+    // A lot of this relies crucially on regex simplification. So this is
+    // testing `Expr::simplify` as much as it is testing the `Display` impl.
+    fn prop(e: Expr) -> bool {
+        e == Expr::parse(&e.to_string()).unwrap()
+    }
+    QuickCheck::new()
+        .tests(10_000)
+        .max_tests(20_000)
+        .gen(StdGen::new(::rand::thread_rng(), 50))
+        .quickcheck(prop as fn(Expr) -> bool);
+}
diff --git a/src/unicode.rs b/regex_syntax/src/unicode.rs
similarity index 100%
rename from src/unicode.rs
rename to regex_syntax/src/unicode.rs
diff --git a/scripts/unicode.py b/scripts/unicode.py
index f734b78099..05bf78c9d5 100755
--- a/scripts/unicode.py
+++ b/scripts/unicode.py
@@ -20,7 +20,7 @@
 # Since this should not require frequent updates, we just store this
 # out-of-line and check the unicode.rs file into git.
 
-import fileinput, re, os, sys, operator
+import fileinput, re, os, sys
 
 preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
diff --git a/src/compile.rs b/src/compile.rs
index fd1cf27c62..413da3e9e8 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -14,61 +14,29 @@
 use self::Inst::*;
 
 use std::cmp;
-use std::iter::repeat;
-use parse;
-use parse::{Flags, FLAG_EMPTY};
-use parse::Ast::{
-    Nothing, Literal, Dot, AstClass, Begin, End, WordBoundary, Capture,
-    Cat, Alt, Rep,
-};
-use parse::Repeater::{ZeroOne, ZeroMore, OneMore};
+use syntax::{self, Expr, Repeater};
+use Error;
 
 pub type InstIdx = usize;
 
 /// An instruction, the underlying unit of a compiled regular expression
+#[allow(missing_docs)]
 #[derive(Debug, Clone)]
 pub enum Inst {
     /// When a Match instruction is executed, the current thread is successful.
     Match,
-
-    /// The OneChar instruction matches a literal character.
-    /// The flags indicate whether to do a case insensitive match.
-    OneChar(char, Flags),
-
-    /// The CharClass instruction tries to match one input character against
-    /// the range of characters given.
-    /// The flags indicate whether to do a case insensitive match.
-    CharClass(Vec<(char, char)>, Flags),
-
-    /// Matches any character except new lines.
-    /// The flags indicate whether to include the '\n' character.
-    Any(Flags),
-
-    /// Matches the beginning of the string, consumes no characters.
-    /// The flags indicate whether it matches if the preceding character
-    /// is a new line.
-    EmptyBegin(Flags),
-
-    /// Matches the end of the string, consumes no characters.
-    /// The flags indicate whether it matches if the proceeding character
-    /// is a new line.
-    EmptyEnd(Flags),
-
-    /// Matches a word boundary (\w on one side and \W \A or \z on the other),
-    /// and consumes no character.
-    /// The flags indicate whether this matches a word boundary or something
-    /// that isn't a word boundary.
-    EmptyWordBoundary(Flags),
-
-    /// Saves the current position in the input string to the Nth save slot.
+    OneChar { c: char, casei: bool },
+    CharClass(syntax::CharClass),
+    Any,
+    AnyNoNL,
+    StartLine,
+    EndLine,
+    StartText,
+    EndText,
+    WordBoundary,
+    NotWordBoundary,
     Save(usize),
-
-    /// Jumps to the instruction at the index given.
     Jump(InstIdx),
-
-    /// Jumps to the instruction at the first index given. If that leads to
-    /// a panic state, then the instruction at the second index given is
-    /// tried.
     Split(InstIdx, InstIdx),
 }
 
@@ -90,14 +58,15 @@ pub struct Program {
 
 impl Program {
     /// Compiles a Regex given its AST.
-    pub fn new(ast: parse::Ast) -> (Program, Vec<Option<String>>) {
+    pub fn new(ast: Expr, size: usize) -> Result<(Program, Vec<Option<String>>), Error> {
         let mut c = Compiler {
             insts: Vec::with_capacity(100),
-            names: Vec::with_capacity(10),
+            names: vec![None],
+            size_limit: size,
         };
 
         c.insts.push(Save(0));
-        c.compile(ast);
+        try!(c.compile(ast));
         c.insts.push(Save(1));
         c.insts.push(Match);
 
@@ -107,17 +76,17 @@ impl Program {
         let mut pre = String::with_capacity(5);
         for inst in c.insts[1..].iter() {
             match *inst {
-                OneChar(c, FLAG_EMPTY) => pre.push(c),
+                OneChar { c, casei: false } => pre.push(c),
                 _ => break
             }
         }
 
-        let Compiler { insts, names } = c;
+        let Compiler { insts, names, .. } = c;
         let prog = Program {
             insts: insts,
             prefix: pre,
         };
-        (prog, names)
+        Ok((prog, names))
     }
 
     /// Returns the total number of capture groups in the regular expression.
@@ -138,6 +107,7 @@ impl Program {
 struct Compiler {
     insts: Vec<Inst>,
     names: Vec<Option<String>>,
+    size_limit: usize,
 }
 
 // The compiler implemented here is extremely simple. Most of the complexity
@@ -145,83 +115,132 @@ struct Compiler {
 // The only tricky thing here is patching jump/split instructions to point to
 // the right instruction.
 impl Compiler {
-    fn compile(&mut self, ast: parse::Ast) {
+    fn check_size(&self) -> Result<(), Error> {
+        if self.insts.len() * ::std::mem::size_of::<Inst>() > self.size_limit {
+            Err(Error::CompiledTooBig(self.size_limit))
+        } else {
+            Ok(())
+        }
+    }
+
+    fn compile(&mut self, ast: Expr) -> Result<(), Error> {
         match ast {
-            Nothing => {},
-            Literal(c, flags) => self.push(OneChar(c, flags)),
-            Dot(nl) => self.push(Any(nl)),
-            AstClass(ranges, flags) => self.push(CharClass(ranges, flags)),
-            Begin(flags) => self.push(EmptyBegin(flags)),
-            End(flags) => self.push(EmptyEnd(flags)),
-            WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
-            Capture(cap, name, x) => {
-                let len = self.names.len();
-                if cap >= len {
-                    self.names.extend(repeat(None).take(10 + cap - len))
+            Expr::Empty => {},
+            Expr::Literal { chars, casei } => {
+                for c in chars {
+                    self.push(OneChar { c: c, casei: casei });
                 }
-                self.names[cap] = name;
-
-                self.push(Save(2 * cap));
-                self.compile(*x);
-                self.push(Save(2 * cap + 1));
             }
-            Cat(xs) => {
-                for x in xs.into_iter() {
-                    self.compile(x)
+            Expr::AnyChar => self.push(Any),
+            Expr::AnyCharNoNL => self.push(AnyNoNL),
+            Expr::Class(cls) => self.push(CharClass(cls)),
+            Expr::StartLine => self.push(StartLine),
+            Expr::EndLine => self.push(EndLine),
+            Expr::StartText => self.push(StartText),
+            Expr::EndText => self.push(EndText),
+            Expr::WordBoundary => self.push(WordBoundary),
+            Expr::NotWordBoundary => self.push(NotWordBoundary),
+            Expr::Group { e, i: None, name: None } => try!(self.compile(*e)),
+            Expr::Group { e, i, name } => {
+                let i = i.expect("capture index");
+                self.names.push(name);
+                self.push(Save(2 * i));
+                try!(self.compile(*e));
+                self.push(Save(2 * i + 1));
+            }
+            Expr::Concat(es) => {
+                for e in es {
+                    try!(self.compile(e));
                 }
             }
-            Alt(x, y) => {
+            Expr::Alternate(mut es) => {
+                // TODO: Don't use recursion here. ---AG
+                if es.len() == 0 {
+                    return Ok(());
+                }
+                let e1 = es.remove(0);
+                if es.len() == 0 {
+                    try!(self.compile(e1));
+                    return Ok(());
+                }
+                let e2 = Expr::Alternate(es); // this causes recursion
+
                 let split = self.empty_split(); // push: split 0, 0
                 let j1 = self.insts.len();
-                self.compile(*x);                // push: insts for x
+                try!(self.compile(e1));                // push: insts for x
                 let jmp = self.empty_jump();    // push: jmp 0
                 let j2 = self.insts.len();
-                self.compile(*y);                // push: insts for y
+                try!(self.compile(e2));                // push: insts for y
                 let j3 = self.insts.len();
 
                 self.set_split(split, j1, j2);  // split 0, 0 -> split j1, j2
                 self.set_jump(jmp, j3);         // jmp 0      -> jmp j3
             }
-            Rep(x, ZeroOne, g) => {
+            Expr::Repeat { e, r: Repeater::ZeroOrOne, greedy } => {
                 let split = self.empty_split();
                 let j1 = self.insts.len();
-                self.compile(*x);
+                try!(self.compile(*e));
                 let j2 = self.insts.len();
 
-                if g.is_greedy() {
+                if greedy {
                     self.set_split(split, j1, j2);
                 } else {
                     self.set_split(split, j2, j1);
                 }
             }
-            Rep(x, ZeroMore, g) => {
+            Expr::Repeat { e, r: Repeater::ZeroOrMore, greedy } => {
                 let j1 = self.insts.len();
                 let split = self.empty_split();
                 let j2 = self.insts.len();
-                self.compile(*x);
+                try!(self.compile(*e));
                 let jmp = self.empty_jump();
                 let j3 = self.insts.len();
 
                 self.set_jump(jmp, j1);
-                if g.is_greedy() {
+                if greedy {
                     self.set_split(split, j2, j3);
                 } else {
                     self.set_split(split, j3, j2);
                 }
             }
-            Rep(x, OneMore, g) => {
+            Expr::Repeat { e, r: Repeater::OneOrMore, greedy } => {
                 let j1 = self.insts.len();
-                self.compile(*x);
+                try!(self.compile(*e));
                 let split = self.empty_split();
                 let j2 = self.insts.len();
 
-                if g.is_greedy() {
+                if greedy {
                     self.set_split(split, j1, j2);
                 } else {
                     self.set_split(split, j2, j1);
                 }
             }
+            Expr::Repeat { e, r: Repeater::Range { min, max: None }, greedy } => {
+                let e = *e;
+                for _ in 0..min {
+                    try!(self.compile(e.clone()));
+                }
+                try!(self.compile(Expr::Repeat {
+                    e: Box::new(e),
+                    r: Repeater::ZeroOrMore,
+                    greedy: greedy,
+                }));
+            }
+            Expr::Repeat { e, r: Repeater::Range { min, max: Some(max) }, greedy } => {
+                let e = *e;
+                for _ in 0..min {
+                    try!(self.compile(e.clone()));
+                }
+                for _ in min..max {
+                    try!(self.compile(Expr::Repeat {
+                        e: Box::new(e.clone()),
+                        r: Repeater::ZeroOrOne,
+                        greedy: greedy,
+                    }));
+                }
+            }
         }
+        self.check_size()
     }
 
     /// Appends the given instruction to the program.
diff --git a/src/lib.rs b/src/lib.rs
index c5cbb9c126..d63c98dad4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -20,17 +20,17 @@
 //! details on the API, please see the documentation for the `Regex` type.
 //!
 //! # Usage
-//! 
+//!
 //! This crates is [on crates.io](https://crates.io/crates/regex) and can be
 //! used by adding `regex` to your dependencies in your project's `Cargo.toml`.
-//! 
+//!
 //! ```toml
 //! [dependencies]
 //! regex = "0.1.8"
 //! ```
-//! 
+//!
 //! and this to your crate root:
-//! 
+//!
 //! ```rust
 //! extern crate regex;
 //! ```
@@ -43,11 +43,8 @@
 //!
 //! ```rust
 //! use regex::Regex;
-//! let re = match Regex::new(r"^\d{4}-\d{2}-\d{2}$") {
-//!     Ok(re) => re,
-//!     Err(err) => panic!("{}", err),
-//! };
-//! assert_eq!(re.is_match("2014-01-01"), true);
+//! let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
+//! assert!(re.is_match("2014-01-01"));
 //! ```
 //!
 //! Notice the use of the `^` and `$` anchors. In this crate, every expression
@@ -55,8 +52,9 @@
 //! it to match anywhere in the text. Anchors can be used to ensure that the
 //! full text matches an expression.
 //!
-//! This example also demonstrates the utility of [raw
-//! strings](../reference.html#character-and-string-literals) in Rust, which
+//! This example also demonstrates the utility of
+//! [raw strings](http://doc.rust-lang.org/stable/reference.html#raw-byte-string-literals)
+//! in Rust, which
 //! are just like regular strings except they are prefixed with an `r` and do
 //! not process any escape sequences. For example, `"\\d"` is the same
 //! expression as `r"\d"`.
@@ -81,7 +79,7 @@
 //!
 //! fn main() {
 //!     let re = regex!(r"^\d{4}-\d{2}-\d{2}$");
-//!     assert_eq!(re.is_match("2014-01-01"), true);
+//!     assert!(re.is_match("2014-01-01"));
 //! }
 //! ```
 //!
@@ -96,20 +94,9 @@
 //! expressions, but 100+ calls to `regex!` will probably result in a
 //! noticeably bigger binary.
 //!
-//! **NOTE**: This is implemented using a compiler plugin, which will not be
+//! **NOTE**: This is implemented using a compiler plugin, which is not
 //! available on the Rust 1.0 beta/stable channels. Therefore, you'll only
-//! be able to use `regex!` on the nightlies. If you want to retain the
-//! `regex!` macro, you can cheat and define this:
-//!
-//! ```rust
-//! macro_rules! regex(
-//!     ($s:expr) => (regex::Regex::new($s).unwrap());
-//! );
-//! ```
-//!
-//! But this just replaces native regexes with dynamic regexes under the hood.
-//! Moreover, this will cause your program to panic *at runtime* if an invalid
-//! regular expression is given.
+//! be able to use `regex!` on the nightlies.
 //!
 //! # Example: iterating over capture groups
 //!
@@ -159,6 +146,25 @@
 //! provides more flexibility than is seen here. (See the documentation for
 //! `Regex::replace` for more details.)
 //!
+//! Note that if your regex gets complicated, you can use the `x` flag to
+//! enable insigificant whitespace mode, which also lets you write comments:
+//!
+//! ```rust
+//! # extern crate regex; use regex::Regex;
+//! # fn main() {
+//! let re = Regex::new(r"(?x)
+//!   (?P<y>\d{4}) # the year
+//!   -
+//!   (?P<m>\d{2}) # the month
+//!   -
+//!   (?P<d>\d{2}) # the day
+//! ").unwrap();
+//! let before = "2012-03-14, 2013-01-01 and 2014-07-05";
+//! let after = re.replace_all(before, "$m/$d/$y");
+//! assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
+//! # }
+//! ```
+//!
 //! # Pay for what you use
 //!
 //! With respect to searching text with a regular expression, there are three
@@ -180,15 +186,16 @@
 //! # Unicode
 //!
 //! This implementation executes regular expressions **only** on sequences of
-//! Unicode code points while exposing match locations as byte indices into the
-//! search string.
+//! Unicode scalar values while exposing match locations as byte indices into
+//! the search string.
 //!
-//! Currently, only naive case folding is supported. Namely, when matching
-//! case insensitively, the characters are first converted to their uppercase
-//! forms and then compared.
+//! Currently, only simple case folding is supported. Namely, when matching
+//! case insensitively, the characters are first mapped using the
+//! [simple case folding](ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt)
+//! mapping.
 //!
 //! Regular expressions themselves are also **only** interpreted as a sequence
-//! of Unicode code points. This means you can use Unicode characters
+//! of Unicode scalar values. This means you can use Unicode characters
 //! directly in your expression:
 //!
 //! ```rust
@@ -214,7 +221,11 @@
 //! # Syntax
 //!
 //! The syntax supported in this crate is almost in an exact correspondence
-//! with the syntax supported by RE2.
+//! with the syntax supported by RE2. It is documented below.
+//!
+//! Note that the regular expression parser and abstract syntax are exposed in
+//! a separate crate,
+//! [`regex-syntax`](../regex_syntax/index.html).
 //!
 //! ## Matching one character
 //!
@@ -294,6 +305,7 @@
 //! m     multi-line mode: ^ and $ match begin/end of line
 //! s     allow . to match \n
 //! U     swap the meaning of x* and x*?
+//! x     ignore whitespace and allow line comments (starting with `#`)
 //! </pre>
 //!
 //! Here's an example that matches case insensitively for only part of the
@@ -361,22 +373,19 @@
 //!
 //! # Untrusted input
 //!
-//! There are two factors to consider here: untrusted regular expressions and
-//! untrusted search text.
-//!
-//! Currently, there are no counter-measures in place to prevent a malicious
-//! user from writing an expression that may use a lot of resources. One such
-//! example is to repeat counted repetitions: `((a{100}){100}){100}` will try
-//! to repeat the `a` instruction `100^3` times. Essentially, this means it's
-//! very easy for an attacker to exhaust your system's memory if they are
-//! allowed to execute arbitrary regular expressions. A possible solution to
-//! this is to impose a hard limit on the size of a compiled expression, but it
-//! does not yet exist.
-//!
-//! The story is a bit better with untrusted search text, since this crate's
-//! implementation provides `O(nm)` search where `n` is the number of
-//! characters in the search text and `m` is the number of instructions in a
-//! compiled expression.
+//! This crate can handle both untrusted regular expressions and untrusted
+//! search text.
+//!
+//! Untrusted regular expressions are handled by capping the size of a compiled
+//! regular expression. (See `Regex::with_size_limit`.) Without this, it would
+//! be trivial for an attacker to exhaust your system's memory with expressions
+//! like `a{100}{100}{100}`.
+//!
+//! Untrusted search text is allowed because the matching engine(s) in this
+//! crate have time complexity `O(mn)` (with `m ~ regex` and `n ~ search
+//! text`), which means there's no way to cause exponential blow-up like with
+//! some other regular expression engines. (We pay for this by disallowing
+//! features like arbitrary look-ahead and back-references.)
 
 #![deny(missing_docs)]
 #![cfg_attr(test, deny(warnings))]
@@ -385,16 +394,17 @@
        html_favicon_url = "http://www.rust-lang.org/favicon.ico",
        html_root_url = "http://doc.rust-lang.org/regex/")]
 
-pub use parse::Error;
-pub use re::{Regex, Captures, SubCaptures, SubCapturesPos, SubCapturesNamed};
-pub use re::{FindCaptures, FindMatches};
-pub use re::{Replacer, NoExpand, RegexSplits, RegexSplitsN};
-pub use re::{quote, is_match};
+extern crate regex_syntax as syntax;
+
+pub use re::{
+    Regex, Error, Captures, SubCaptures, SubCapturesPos, SubCapturesNamed,
+    FindCaptures, FindMatches,
+    Replacer, NoExpand, RegexSplits, RegexSplitsN,
+    quote, is_match,
+};
 
 mod compile;
-mod parse;
 mod re;
-mod unicode;
 mod vm;
 
 /// The `native` module exists to support the `regex!` macro. Do not use.
@@ -416,17 +426,11 @@ pub mod native {
     // On the bright side, `rustdoc` lets us hide this from the public API
     // documentation.
     pub use compile::Program;
-    pub use compile::Inst::{
-        Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd,
-        EmptyWordBoundary, Save, Jump, Split,
-    };
-    pub use parse::{
-        FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL,
-        FLAG_SWAP_GREED, FLAG_NEGATED,
-    };
+    pub use compile::Inst;
+    pub use syntax::simple_case_fold;
     pub use re::{ExDynamic, ExNative};
     pub use re::Regex::{Dynamic, Native};
-    pub use vm::{CharReader, find_prefix, simple_case_fold};
+    pub use vm::{CharReader, find_prefix};
     pub use vm::MatchKind::{self, Exists, Location, Submatches};
     pub use vm::StepState::{
         self, StepMatchEarlyReturn, StepMatch, StepContinue,
diff --git a/src/parse.rs b/src/parse.rs
deleted file mode 100644
index 6ec8362e49..0000000000
--- a/src/parse.rs
+++ /dev/null
@@ -1,1160 +0,0 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use std::char;
-use std::cmp;
-use std::fmt;
-
-/// Static data containing Unicode ranges for general categories and scripts.
-use unicode::regex::{UNICODE_CLASSES, PERLD, PERLS, PERLW};
-use vm::simple_case_fold;
-
-use self::Ast::*;
-use self::Repeater::*;
-use self::Greed::*;
-use self::BuildAst::*;
-
-/// The maximum number of repetitions allowed with the `{n,m}` syntax.
-static MAX_REPEAT: usize = 1000;
-
-/// Error corresponds to something that can go wrong while parsing
-/// a regular expression.
-///
-/// (Once an expression is compiled, it is not possible to produce an error
-/// via searching, splitting or replacing.)
-#[derive(Debug)]
-pub struct Error {
-    /// The *approximate* character index of where the error occurred.
-    pub pos: usize,
-    /// A message describing the error.
-    pub msg: String,
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "Regex syntax error near position {}: {}",
-               self.pos, self.msg)
-    }
-}
-
-/// Represents the abstract syntax of a regular expression.
-/// It is showable so that error messages resulting from a bug can provide
-/// useful information.
-/// It is cloneable so that expressions can be repeated for the counted
-/// repetition feature. (No other copying is done.)
-///
-/// Note that this representation prevents one from reproducing the regex as
-/// it was typed. (But it could be used to reproduce an equivalent regex.)
-#[derive(Debug, Clone)]
-pub enum Ast {
-    Nothing,
-    Literal(char, Flags),
-    Dot(Flags),
-    AstClass(Vec<(char, char)>, Flags),
-    Begin(Flags),
-    End(Flags),
-    WordBoundary(Flags),
-    Capture(usize, Option<String>, Box<Ast>),
-    // Represent concatenation as a flat vector to avoid blowing the
-    // stack in the compiler.
-    Cat(Vec<Ast>),
-    Alt(Box<Ast>, Box<Ast>),
-    Rep(Box<Ast>, Repeater, Greed),
-}
-
-#[derive(Debug, PartialEq, Clone)]
-pub enum Repeater {
-    ZeroOne,
-    ZeroMore,
-    OneMore,
-}
-
-#[derive(Debug, Clone)]
-pub enum Greed {
-    Greedy,
-    Ungreedy,
-}
-
-impl Copy for Greed {}
-
-impl Greed {
-    pub fn is_greedy(&self) -> bool {
-        match *self {
-            Greedy => true,
-            _ => false,
-        }
-    }
-
-    fn swap(self, swapped: bool) -> Greed {
-        if !swapped { return self }
-        match self {
-            Greedy => Ungreedy,
-            Ungreedy => Greedy,
-        }
-    }
-}
-
-/// BuildAst is a regrettable type that represents intermediate state for
-/// constructing an abstract syntax tree. Its central purpose is to facilitate
-/// parsing groups and alternations while also maintaining a stack of flag
-/// state.
-#[derive(Debug)]
-enum BuildAst {
-    Expr(Ast),
-    Paren(Flags, usize, String), // '('
-    Bar, // '|'
-}
-
-impl BuildAst {
-    fn paren(&self) -> bool {
-        match *self {
-            Paren(_, _, _) => true,
-            _ => false,
-        }
-    }
-
-    fn flags(&self) -> Flags {
-        match *self {
-            Paren(flags, _, _) => flags,
-            _ => panic!("Cannot get flags from {:?}", self),
-        }
-    }
-
-    fn capture(&self) -> Option<usize> {
-        match *self {
-            Paren(_, 0, _) => None,
-            Paren(_, c, _) => Some(c),
-            _ => panic!("Cannot get capture group from {:?}", self),
-        }
-    }
-
-    fn capture_name(&self) -> Option<String> {
-        match *self {
-            Paren(_, 0, _) => None,
-            Paren(_, _, ref name) => {
-                if name.len() == 0 {
-                    None
-                } else {
-                    Some(name.clone())
-                }
-            }
-            _ => panic!("Cannot get capture name from {:?}", self),
-        }
-    }
-
-    fn bar(&self) -> bool {
-        match *self {
-            Bar => true,
-            _ => false,
-        }
-    }
-
-    fn unwrap(self) -> Result<Ast, Error> {
-        match self {
-            Expr(x) => Ok(x),
-            _ => panic!("Tried to unwrap non-AST item: {:?}", self),
-        }
-    }
-}
-
-/// Flags represents all options that can be twiddled by a user in an
-/// expression.
-pub type Flags = u8;
-
-pub const FLAG_EMPTY:      u8 = 0;
-pub const FLAG_NOCASE:     u8 = 1 << 0; // i
-pub const FLAG_MULTI:      u8 = 1 << 1; // m
-pub const FLAG_DOTNL:      u8 = 1 << 2; // s
-pub const FLAG_SWAP_GREED: u8 = 1 << 3; // U
-pub const FLAG_NEGATED:    u8 = 1 << 4; // char class or not word boundary
-
-struct Parser {
-    // The input, parsed only as a sequence of UTF8 code points.
-    chars: Vec<char>,
-    // The index of the current character in the input.
-    chari: usize,
-    // The intermediate state representing the AST.
-    stack: Vec<BuildAst>,
-    // The current set of flags.
-    flags: Flags,
-    // The total number of capture groups.
-    // Incremented each time an opening left paren is seen (assuming it is
-    // opening a capture group).
-    caps: usize,
-    // A set of all capture group names used only to detect duplicates.
-    names: Vec<String>,
-}
-
-pub fn parse(s: &str) -> Result<Ast, Error> {
-    Parser {
-        chars: s.chars().collect(),
-        chari: 0,
-        stack: vec!(),
-        flags: FLAG_EMPTY,
-        caps: 0,
-        names: vec!(),
-    }.parse()
-}
-
-impl Parser {
-    fn parse(&mut self) -> Result<Ast, Error> {
-        if self.chars.len() == 0 {
-            return Ok(Nothing);
-        }
-        loop {
-            let c = self.cur();
-            match c {
-                '?' | '*' | '+' => try!(self.push_repeater(c)),
-                '\\' => {
-                    let ast = try!(self.parse_escape());
-                    if let AstClass(mut ranges, flags) = ast {
-                        if flags & FLAG_NOCASE > 0 {
-                            ranges = case_fold_and_combine_ranges(ranges);
-                        }
-                        self.push(AstClass(ranges, flags))
-                    } else {
-                        self.push(ast)
-                    }
-                }
-                '{' => try!(self.parse_counted()),
-                '[' => match self.try_parse_ascii() {
-                    None => try!(self.parse_class()),
-                    Some(class) => self.push(class),
-                },
-                '(' => {
-                    if self.peek_is(1, '?') {
-                        try!(self.expect('?'));
-                        try!(self.parse_group_opts());
-                    } else {
-                        self.caps += 1;
-                        self.stack.push(Paren(self.flags,
-                                              self.caps,
-                                              "".to_string()))
-                    }
-                }
-                ')' => {
-                    let catfrom = try!(
-                        self.pos_last(false, |x| x.paren() || x.bar()));
-                    try!(self.concat(catfrom));
-
-                    let altfrom = try!(self.pos_last(false, |x| x.paren()));
-                    // Before we smush the alternates together and pop off the
-                    // left paren, let's grab the old flags and see if we
-                    // need a capture.
-                    let (cap, cap_name, oldflags) = {
-                        let paren = &self.stack[altfrom-1];
-                        (paren.capture(), paren.capture_name(), paren.flags())
-                    };
-                    try!(self.alternate(altfrom));
-                    self.flags = oldflags;
-
-                    // If this was a capture, pop what we just pushed in
-                    // alternate and make it a capture.
-                    if cap.is_some() {
-                        let ast = try!(self.pop_ast());
-                        self.push(Capture(cap.unwrap(), cap_name, Box::new(ast)));
-                    }
-                }
-                '|' => {
-                    let catfrom = try!(
-                        self.pos_last(true, |x| x.paren() || x.bar()));
-                    try!(self.concat(catfrom));
-
-                    self.stack.push(Bar);
-                }
-                _ => try!(self.push_literal(c)),
-            }
-            if !self.next_char() {
-                break
-            }
-        }
-
-        // Try to improve error handling. At this point, there should be
-        // no remaining open parens.
-        if self.stack.iter().any(|x| x.paren()) {
-            return self.err("Unclosed parenthesis.")
-        }
-        let catfrom = try!(self.pos_last(true, |x| x.bar()));
-        try!(self.concat(catfrom));
-        try!(self.alternate(0));
-
-        assert!(self.stack.len() == 1);
-        self.pop_ast()
-    }
-
-    fn noteof(&mut self, expected: &str) -> Result<(), Error> {
-        match self.next_char() {
-            true => Ok(()),
-            false => {
-                self.err(&format!("Expected {:?} but got EOF.", expected))
-            }
-        }
-    }
-
-    fn expect(&mut self, expected: char) -> Result<(), Error> {
-        match self.next_char() {
-            true if self.cur() == expected => Ok(()),
-            true => self.err(&format!("Expected '{}' but got '{}'.",
-                                      expected, self.cur())),
-            false => {
-                self.err(&format!("Expected '{}' but got EOF.",
-                                  expected))
-            }
-        }
-    }
-
-    fn next_char(&mut self) -> bool {
-        self.chari += 1;
-        self.chari < self.chars.len()
-    }
-
-    fn pop_ast(&mut self) -> Result<Ast, Error> {
-        match self.stack.pop().unwrap().unwrap() {
-            Err(e) => Err(e),
-            Ok(ast) => Ok(ast),
-        }
-    }
-
-    fn push(&mut self, ast: Ast) {
-        self.stack.push(Expr(ast))
-    }
-
-    fn push_repeater(&mut self, c: char) -> Result<(), Error> {
-        if self.stack.len() == 0 {
-            return self.err(
-                "A repeat operator must be preceded by a valid expression.")
-        }
-        let rep: Repeater = match c {
-            '?' => ZeroOne, '*' => ZeroMore, '+' => OneMore,
-            _ => panic!("Not a valid repeater operator."),
-        };
-
-        match self.peek(1) {
-            Some('*') | Some('+') =>
-                return self.err(
-                    "Double repeat operators are not supported."),
-            _ => {},
-        }
-        let ast = match self.stack.pop().unwrap() { // checked empty stack ^^
-            Paren(_, _, _) | Bar | Expr(Nothing) | Expr(Rep(_, _, _)) =>
-                return self.err("A repreat operator must be preceded by a \
-                                 valid expression."),
-            Expr(Begin(_)) | Expr(End(_)) | Expr(WordBoundary(_)) =>
-                return self.err(
-                    "Repeat arguments cannot be empty width assertions."),
-            Expr(ast) => ast,
-        };
-        let greed = try!(self.get_next_greedy());
-        self.push(Rep(Box::new(ast), rep, greed));
-        Ok(())
-    }
-
-    fn push_literal(&mut self, c: char) -> Result<(), Error> {
-        let flags = self.flags;
-        match c {
-            '.' => {
-                self.push(Dot(flags))
-            }
-            '^' => {
-                self.push(Begin(flags))
-            }
-            '$' => {
-                self.push(End(flags))
-            }
-            _ => {
-                self.push(Literal(c, flags))
-            }
-        }
-        Ok(())
-    }
-
-    // Parses all forms of character classes.
-    // Assumes that '[' is the current character.
-    fn parse_class(&mut self) -> Result<(), Error> {
-        let negated =
-            if self.peek_is(1, '^') {
-                try!(self.expect('^'));
-                true
-            } else {
-                false
-            };
-        let mut ranges: Vec<(char, char)> = vec!();
-
-        while self.peek_is(1, '-') {
-            try!(self.expect('-'));
-            ranges.push(('-', '-'))
-        }
-        loop {
-            try!(self.noteof("a closing ']' or a non-empty character class)"));
-            let mut c = self.cur();
-            match c {
-                '[' =>
-                    match self.try_parse_ascii() {
-                        Some(AstClass(mut more_ranges, flags)) => {
-                            more_ranges = combine_ranges(more_ranges);
-                            if flags & FLAG_NEGATED > 0 {
-                                more_ranges = invert_ranges(more_ranges);
-                            }
-                            ranges.extend(more_ranges);
-                            continue
-                        }
-                        Some(ast) =>
-                            panic!("Expected Class AST but got '{:?}'", ast),
-                        // Just drop down and try to add as a regular character.
-                        None => {},
-                    },
-                '\\' => {
-                    match try!(self.parse_escape()) {
-                        AstClass(mut more_ranges, flags) => {
-                            more_ranges = combine_ranges(more_ranges);
-                            if flags & FLAG_NEGATED > 0 {
-                                more_ranges = invert_ranges(more_ranges);
-                            }
-                            ranges.extend(more_ranges);
-                            continue
-                        }
-                        Literal(c2, _) => c = c2, // process below
-                        Begin(_) | End(_) | WordBoundary(_) =>
-                            return self.err(
-                                "\\A, \\z, \\b and \\B are not valid escape \
-                                 sequences inside a character class."),
-                        ast => panic!("Unexpected AST item '{:?}'", ast),
-                    }
-                }
-                ']' if ranges.len() > 0 => {
-                    if self.flags & FLAG_NOCASE > 0 {
-                        ranges = case_fold_and_combine_ranges(ranges)
-                    } else {
-                        ranges = combine_ranges(ranges);
-                    }
-                    if negated {
-                        ranges = invert_ranges(ranges);
-                    }
-                    let flags = self.flags & FLAG_NOCASE;
-                    self.push(AstClass(ranges, flags));
-                    return Ok(())
-                }
-                _ => {}
-            }
-
-            if self.peek_is(1, '-') && !self.peek_is(2, ']') {
-                try!(self.expect('-'));
-                // The regex can't end here.
-                try!(self.noteof("not a ']'"));
-                // End the range with a single character or character escape.
-                let mut c2 = self.cur();
-                if c2 == '\\' {
-                    match try!(self.parse_escape()) {
-                        Literal(c3, _) => c2 = c3, // allow literal escapes below
-                        ast => return self.err(&format!(
-                            "Expected a literal, but got {:?}.", ast)),
-                    }
-                }
-                if c2 < c {
-                    return self.err(&format!(
-                        "Invalid character class range '{}-{}'", c, c2))
-                }
-                ranges.push((c, self.cur()))
-            } else {
-                ranges.push((c, c))
-            }
-        }
-    }
-
-    // Tries to parse an ASCII character class of the form [:name:].
-    // If successful, returns an AST character class corresponding to name
-    // and moves the parser to the final ']' character.
-    // If unsuccessful, no state is changed and None is returned.
-    // Assumes that '[' is the current character.
-    fn try_parse_ascii(&mut self) -> Option<Ast> {
-        if !self.peek_is(1, ':') {
-            return None
-        }
-        let closer =
-            match self.pos(']') {
-                Some(i) => i,
-                None => return None,
-            };
-        if self.chars[closer-1] != ':' {
-            return None
-        }
-        if closer - self.chari <= 3 {
-            return None
-        }
-        let mut name_start = self.chari + 2;
-        let negated =
-            if self.peek_is(2, '^') {
-                name_start += 1;
-                FLAG_NEGATED
-            } else {
-                FLAG_EMPTY
-            };
-        let name = self.slice(name_start, closer - 1);
-        match find_class(ASCII_CLASSES, &name) {
-            None => None,
-            Some(ranges) => {
-                self.chari = closer;
-                let flags = negated | (self.flags & FLAG_NOCASE);
-                Some(AstClass(combine_ranges(ranges), flags))
-            }
-        }
-    }
-
-    // Parses counted repetition. Supports:
-    // {n}, {n,}, {n,m}, {n}?, {n,}? and {n,m}?
-    // Assumes that '{' is the current character.
-    // Returns either an error or moves the parser to the final '}' character.
-    // (Or the '?' character if not greedy.)
-    fn parse_counted(&mut self) -> Result<(), Error> {
-        // Scan until the closing '}' and grab the stuff in {}.
-        let start = self.chari;
-        let closer =
-            match self.pos('}') {
-                Some(i) => i,
-                None => {
-                    return self.err(&format!("No closing brace for counted \
-                                              repetition starting at position \
-                                              {}.", start))
-                }
-            };
-        self.chari = closer;
-        let greed = try!(self.get_next_greedy());
-        let inner = self.chars[(start + 1)..closer].iter().cloned().collect::<String>();
-
-        // Parse the min and max values from the regex.
-        let (mut min, mut max): (usize, Option<usize>);
-        if !inner.contains(",") {
-            min = try!(self.parse_usize(&inner));
-            max = Some(min);
-        } else {
-            let pieces: Vec<&str> = inner.splitn(2, ',').collect();
-            let (smin, smax) = (pieces[0], pieces[1]);
-            if smin.len() == 0 {
-                return self.err("Max repetitions cannot be specified \
-                                    without min repetitions.")
-            }
-            min = try!(self.parse_usize(smin));
-            max =
-                if smax.len() == 0 {
-                    None
-                } else {
-                    Some(try!(self.parse_usize(smax)))
-                };
-        }
-
-        // Do some bounds checking and make sure max >= min.
-        if min > MAX_REPEAT {
-            return self.err(&format!(
-                "{} exceeds maximum allowed repetitions ({})",
-                min, MAX_REPEAT));
-        }
-        if max.is_some() {
-            let m = max.unwrap();
-            if m > MAX_REPEAT {
-                return self.err(&format!(
-                    "{} exceeds maximum allowed repetitions ({})",
-                    m, MAX_REPEAT));
-            }
-            if m < min {
-                return self.err(&format!(
-                    "Max repetitions ({}) cannot be smaller than min \
-                     repetitions ({}).", m, min));
-            }
-        }
-
-        // Now manipulate the AST be repeating elements.
-        if max.is_none() {
-            // Require N copies of what's on the stack and then repeat it.
-            let ast = try!(self.pop_ast());
-            for _ in 0..min {
-                self.push(ast.clone())
-            }
-            self.push(Rep(Box::new(ast), ZeroMore, greed));
-        } else {
-            // Require N copies of what's on the stack and then repeat it
-            // up to M times optionally.
-            let ast = try!(self.pop_ast());
-            for _ in 0..min {
-                self.push(ast.clone())
-            }
-            if let Some(max) = max {
-                for _ in min..max {
-                    self.push(Rep(Box::new(ast.clone()), ZeroOne, greed))
-                }
-            }
-            // It's possible that we popped something off the stack but
-            // never put anything back on it. To keep things simple, add
-            // a no-op expression.
-            if min == 0 && (max.is_none() || max == Some(0)) {
-                self.push(Nothing)
-            }
-        }
-        Ok(())
-    }
-
-    // Parses all escape sequences.
-    // Assumes that '\' is the current character.
-    fn parse_escape(&mut self) -> Result<Ast, Error> {
-        try!(self.noteof("an escape sequence following a '\\'"));
-
-        let c = self.cur();
-        if is_punct(c) {
-            return Ok(Literal(c, FLAG_EMPTY))
-        }
-        match c {
-            'a' => Ok(Literal('\x07', FLAG_EMPTY)),
-            'f' => Ok(Literal('\x0C', FLAG_EMPTY)),
-            't' => Ok(Literal('\t', FLAG_EMPTY)),
-            'n' => Ok(Literal('\n', FLAG_EMPTY)),
-            'r' => Ok(Literal('\r', FLAG_EMPTY)),
-            'v' => Ok(Literal('\x0B', FLAG_EMPTY)),
-            'A' => Ok(Begin(FLAG_EMPTY)),
-            'z' => Ok(End(FLAG_EMPTY)),
-            'b' => Ok(WordBoundary(FLAG_EMPTY)),
-            'B' => Ok(WordBoundary(FLAG_NEGATED)),
-            '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => Ok(try!(self.parse_octal())),
-            'x' => Ok(try!(self.parse_hex())),
-            'p' | 'P' => Ok(try!(self.parse_unicode_name())),
-            'd' | 'D' | 's' | 'S' | 'w' | 'W' => {
-                let ranges = perl_unicode_class(c);
-                let mut flags = self.flags & FLAG_NOCASE;
-                if c.is_uppercase() { flags |= FLAG_NEGATED }
-                Ok(AstClass(ranges, flags))
-            }
-            _ => {
-                self.err(&format!("Invalid escape sequence '\\\\{}'", c))
-            }
-        }
-    }
-
-    // Parses a Unicode character class name, either of the form \pF where
-    // F is a one letter Unicode class name or of the form \p{name} where
-    // name is the Unicode class name.
-    // Assumes that \p or \P has been read (and 'p' or 'P' is the current
-    // character).
-    fn parse_unicode_name(&mut self) -> Result<Ast, Error> {
-        let negated = if self.cur() == 'P' { FLAG_NEGATED } else { FLAG_EMPTY };
-        let mut name: String;
-        if self.peek_is(1, '{') {
-            try!(self.expect('{'));
-            let closer =
-                match self.pos('}') {
-                    Some(i) => i,
-                    None => return self.err(&format!(
-                        "Missing '}}' for unclosed '{{' at position {}",
-                        self.chari)),
-                };
-            if closer - self.chari + 1 == 0 {
-                return self.err("No Unicode class name found.")
-            }
-            name = self.slice(self.chari + 1, closer);
-            self.chari = closer;
-        } else {
-            if self.chari + 1 >= self.chars.len() {
-                return self.err("No single letter Unicode class name found.")
-            }
-            name = self.slice(self.chari + 1, self.chari + 2);
-            self.chari += 1;
-        }
-        match find_class(UNICODE_CLASSES, &name) {
-            None => {
-                return self.err(&format!("Could not find Unicode class '{}'",
-                                         name))
-            }
-            Some(ranges) => {
-                Ok(AstClass(ranges, negated | (self.flags & FLAG_NOCASE)))
-            }
-        }
-    }
-
-    // Parses an octal number, up to 3 digits.
-    // Assumes that \n has been read, where n is the first digit.
-    fn parse_octal(&mut self) -> Result<Ast, Error> {
-        let start = self.chari;
-        let mut end = start + 1;
-        let (d2, d3) = (self.peek(1), self.peek(2));
-        if d2 >= Some('0') && d2 <= Some('7') {
-            try!(self.noteof("expected octal character in [0-7]"));
-            end += 1;
-            if d3 >= Some('0') && d3 <= Some('7') {
-                try!(self.noteof("expected octal character in [0-7]"));
-                end += 1;
-            }
-        }
-        match from_str_radix_pos_integer(&self.slice(start, end), 8) {
-            Ok(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
-            Err(err) => self.err(&err),
-        }
-    }
-
-    // Parse a hex number. Either exactly two digits or anything in {}.
-    // Assumes that \x has been read.
-    fn parse_hex(&mut self) -> Result<Ast, Error> {
-        if !self.peek_is(1, '{') {
-            try!(self.expect('{'));
-            return self.parse_hex_two()
-        }
-        let start = self.chari + 2;
-        let closer =
-            match self.pos('}') {
-                None => {
-                    return self.err(&format!("Missing '}}' for unclosed \
-                                              '{{' at position {}", start))
-                }
-                Some(i) => i,
-            };
-        self.chari = closer;
-        self.parse_hex_digits(&self.slice(start, closer))
-    }
-
-    // Parses a two-digit hex number.
-    // Assumes that \xn has been read, where n is the first digit and is the
-    // current character.
-    // After return, parser will point at the second digit.
-    fn parse_hex_two(&mut self) -> Result<Ast, Error> {
-        let (start, end) = (self.chari, self.chari + 2);
-        let bad = self.slice(start - 2, self.chars.len());
-        try!(self.noteof(&format!("Invalid hex escape sequence '{}'", bad)));
-        self.parse_hex_digits(&self.slice(start, end))
-    }
-
-    // Parses `s` as a hexadecimal number.
-    fn parse_hex_digits(&self, s: &str) -> Result<Ast, Error> {
-        match from_str_radix_pos_integer(s, 16) {
-            Ok(n) => Ok(Literal(try!(self.char_from_u32(n)), FLAG_EMPTY)),
-            Err(err) => self.err(&err),
-        }
-    }
-
-    // Parses a named capture.
-    // Assumes that '(?P<' has been consumed and that the current character
-    // is '<'.
-    // When done, parser will be at the closing '>' character.
-    fn parse_named_capture(&mut self) -> Result<(), Error> {
-        try!(self.noteof("a capture name"));
-        let closer =
-            match self.pos('>') {
-                Some(i) => i,
-                None => return self.err("Capture name must end with '>'."),
-            };
-        if closer - self.chari == 0 {
-            return self.err("Capture names must have at least 1 character.")
-        }
-        let name = self.slice(self.chari, closer);
-        if !name.chars().all(is_valid_cap) {
-            return self.err(
-                "Capture names can only have underscores, letters and digits.")
-        }
-        if self.names.contains(&name) {
-            return self.err(&format!("Duplicate capture group name '{}'.", name))
-        }
-        self.names.push(name.clone());
-        self.chari = closer;
-        self.caps += 1;
-        self.stack.push(Paren(self.flags, self.caps, name));
-        Ok(())
-    }
-
-    // Parses non-capture groups and options.
-    // Assumes that '(?' has already been consumed and '?' is the current
-    // character.
-    fn parse_group_opts(&mut self) -> Result<(), Error> {
-        if self.peek_is(1, 'P') && self.peek_is(2, '<') {
-            try!(self.expect('P')); try!(self.expect('<'));
-            return self.parse_named_capture()
-        }
-        let start = self.chari;
-        let mut flags = self.flags;
-        let mut sign = 1;
-        let mut saw_flag = false;
-        loop {
-            try!(self.noteof("expected non-empty set of flags or closing ')'"));
-            match self.cur() {
-                'i' => { flags = flags | FLAG_NOCASE;     saw_flag = true},
-                'm' => { flags = flags | FLAG_MULTI;      saw_flag = true},
-                's' => { flags = flags | FLAG_DOTNL;      saw_flag = true},
-                'U' => { flags = flags | FLAG_SWAP_GREED; saw_flag = true},
-                '-' => {
-                    if sign < 0 {
-                        return self.err(&format!(
-                            "Cannot negate flags twice in '{}'.",
-                            self.slice(start, self.chari + 1)))
-                    }
-                    sign = -1;
-                    saw_flag = false;
-                    flags = flags ^ flags;
-                }
-                ':' | ')' => {
-                    if sign < 0 {
-                        if !saw_flag {
-                            return self.err(&format!(
-                                "A valid flag does not follow negation in '{}'",
-                                self.slice(start, self.chari + 1)))
-                        }
-                        flags = flags ^ flags;
-                    }
-                    if self.cur() == ':' {
-                        // Save the old flags with the opening paren.
-                        self.stack.push(Paren(self.flags, 0, "".to_string()));
-                    }
-                    self.flags = flags;
-                    return Ok(())
-                }
-                _ => return self.err(&format!(
-                    "Unrecognized flag '{}'.", self.cur())),
-            }
-        }
-    }
-
-    // Peeks at the next character and returns whether it's ungreedy or not.
-    // If it is, then the next character is consumed.
-    fn get_next_greedy(&mut self) -> Result<Greed, Error> {
-        Ok(if self.peek_is(1, '?') {
-            try!(self.expect('?'));
-            Ungreedy
-        } else {
-            Greedy
-        }.swap(self.flags & FLAG_SWAP_GREED > 0))
-    }
-
-    // Searches the stack (starting at the top) until it finds an expression
-    // for which `pred` returns true. The index of that expression in the
-    // stack is returned.
-    // If there's no match, then one of two things happens depending on the
-    // values of `allow_start`. When it's true, then `0` will be returned.
-    // Otherwise, an error will be returned.
-    // Generally, `allow_start` is only true when you're *not* expecting an
-    // opening parenthesis.
-    fn pos_last<P>(&self, allow_start: bool, pred: P) -> Result<usize, Error> where
-        P: FnMut(&BuildAst) -> bool,
-   {
-        let from = match self.stack.iter().rev().position(pred) {
-            Some(i) => i,
-            None => {
-                if allow_start {
-                    self.stack.len()
-                } else {
-                    return self.err("No matching opening parenthesis.")
-                }
-            }
-        };
-        // Adjust index since 'from' is for the reversed stack.
-        // Also, don't include the '(' or '|'.
-        Ok(self.stack.len() - from)
-    }
-
-    // concat starts at `from` in the parser's stack and concatenates all
-    // expressions up to the top of the stack. The resulting concatenation is
-    // then pushed on to the stack.
-    // Usually `from` corresponds to the position of an opening parenthesis,
-    // a '|' (alternation) or the start of the entire expression.
-    fn concat(&mut self, from: usize) -> Result<(), Error> {
-        let ast = try!(self.build_from(from, concat_flatten));
-        self.push(ast);
-        Ok(())
-    }
-
-    // concat starts at `from` in the parser's stack and alternates all
-    // expressions up to the top of the stack. The resulting alternation is
-    // then pushed on to the stack.
-    // Usually `from` corresponds to the position of an opening parenthesis
-    // or the start of the entire expression.
-    // This will also drop any opening parens or alternation bars found in
-    // the intermediate AST.
-    fn alternate(&mut self, mut from: usize) -> Result<(), Error> {
-        // Unlike in the concatenation case, we want 'build_from' to continue
-        // all the way to the opening left paren (so it will be popped off and
-        // thrown away). But be careful with overflow---we can't count on the
-        // open paren to be there.
-        if from > 0 { from = from - 1}
-        let ast = try!(self.build_from(from, |l,r| Alt(Box::new(l), Box::new(r))));
-        self.push(ast);
-        Ok(())
-    }
-
-    // build_from combines all AST elements starting at 'from' in the
-    // parser's stack using 'mk' to combine them. If any such element is not an
-    // AST then it is popped off the stack and ignored.
-    fn build_from<F>(&mut self, from: usize, mut mk: F) -> Result<Ast, Error> where
-        F: FnMut(Ast, Ast) -> Ast,
-    {
-        if from >= self.stack.len() {
-            return self.err("Empty group or alternate not allowed.")
-        }
-
-        let mut combined = try!(self.pop_ast());
-        let mut i = self.stack.len();
-        while i > from {
-            i = i - 1;
-            match self.stack.pop().unwrap() {
-                Expr(x) => combined = mk(x, combined),
-                _ => {},
-            }
-        }
-        Ok(combined)
-    }
-
-    fn parse_usize(&self, s: &str) -> Result<usize, Error> {
-        match s.parse::<usize>() {
-            Ok(i) => Ok(i),
-            Err(_) => {
-                self.err(&format!("Expected an unsigned integer but got '{}'.",
-                                  s))
-            }
-        }
-    }
-
-    fn char_from_u32(&self, n: u32) -> Result<char, Error> {
-        match char::from_u32(n) {
-            Some(c) => Ok(c),
-            None => {
-                self.err(&format!("Could not decode '{}' to unicode \
-                                   character.", n))
-            }
-        }
-    }
-
-    fn pos(&self, c: char) -> Option<usize> {
-        self.chars.iter()
-            .skip(self.chari).position(|&c2| c2 == c).map(|i| self.chari + i)
-    }
-
-    fn err<T>(&self, msg: &str) -> Result<T, Error> {
-        Err(Error {
-            pos: self.chari,
-            msg: msg.to_string(),
-        })
-    }
-
-    fn peek(&self, offset: usize) -> Option<char> {
-        if self.chari + offset >= self.chars.len() {
-            return None
-        }
-        Some(self.chars[self.chari + offset])
-    }
-
-    fn peek_is(&self, offset: usize, is: char) -> bool {
-        self.peek(offset) == Some(is)
-    }
-
-    fn cur(&self) -> char {
-        self.chars[self.chari]
-    }
-
-    fn slice(&self, start: usize, end: usize) -> String {
-        self.chars[start..end].iter().cloned().collect()
-    }
-}
-
-// Given an unordered collection of character ranges, combine_ranges returns
-// an ordered sequence of character ranges where no two ranges overlap. They
-// are ordered from least to greatest (using start position).
-fn combine_ranges(mut unordered: Vec<(char, char)>) -> Vec<(char, char)> {
-    // Returns true iff the two character classes overlap or share a boundary.
-    // e.g., ('a', 'g') and ('h', 'm') would return true.
-    fn should_merge((a, b): (char, char), (x, y): (char, char)) -> bool {
-        cmp::max(a, x) <= inc_char(cmp::min(b, y))
-    }
-
-    // This is currently O(n^2), but I think with sufficient cleverness,
-    // it can be reduced to O(n) **if necessary**.
-    unordered.sort();
-    let mut ordered: Vec<(char, char)> = Vec::with_capacity(unordered.len());
-    for (us, ue) in unordered.into_iter() {
-        let (mut us, mut ue) = (us, ue);
-        assert!(us <= ue);
-        let mut which: Option<usize> = None;
-        for (i, &(os, oe)) in ordered.iter().enumerate() {
-            if should_merge((us, ue), (os, oe)) {
-                us = cmp::min(us, os);
-                ue = cmp::max(ue, oe);
-                which = Some(i);
-                break
-            }
-        }
-        match which {
-            None => ordered.push((us, ue)),
-            Some(i) => ordered[i] = (us, ue),
-        }
-    }
-    ordered.sort();
-    ordered
-}
-
-// FIXME: Is there a clever way to do this by considering ranges rather than individual chars?
-// E.g. binary search for overlap with entries in unicode::case_folding::C_plus_S_table
-fn case_fold_and_combine_ranges(ranges: Vec<(char, char)>) -> Vec<(char, char)> {
-    if ranges.is_empty() {
-        return ranges
-    }
-    let mut chars: Vec<char> = ranges
-        .into_iter()
-        .flat_map(|(start, end)| start as u32 .. end as u32 + 1)
-        .filter_map(char::from_u32)
-        .map(simple_case_fold)
-        .collect();
-    chars.sort();
-    chars.dedup();
-    let mut chars = chars.into_iter();
-    let mut start = chars.next().unwrap();
-    let mut end = start;
-    let mut ranges = Vec::new();
-    for c in chars {
-        if c != inc_char(end) {
-            ranges.push((start, end));
-            start = c;
-        }
-        end = c;
-    }
-    ranges.push((start, end));
-    ranges
-}
-
-fn invert_ranges(ranges: Vec<(char, char)>) -> Vec<(char, char)> {
-    if ranges.is_empty() { return ranges; }
-
-    let mut inv = Vec::with_capacity(ranges.len());
-    if ranges[0].0 > '\x00' {
-        inv.push(('\x00', dec_char(ranges[0].0)));
-    }
-    for win in ranges.windows(2) {
-        let ((_, e1), (s2, _)) = (win[0], win[1]);
-        inv.push((inc_char(e1), dec_char(s2)));
-    }
-    if ranges[ranges.len() - 1].1 < char::MAX {
-        inv.push((inc_char(ranges[ranges.len() - 1].1), char::MAX));
-    }
-    inv
-}
-
-fn inc_char(c: char) -> char {
-    assert!(c < char::MAX);
-    match c {
-        '\u{D7FF}' => '\u{E000}',
-        c => char::from_u32(c as u32 + 1).unwrap(),
-    }
-}
-
-fn dec_char(c: char) -> char {
-    assert!(c > '\x00');
-    match c {
-        '\u{E000}' => '\u{D7FF}',
-        c => char::from_u32(c as u32 - 1).unwrap(),
-    }
-}
-
-// Constructs a Unicode friendly Perl character class from \d, \s or \w
-// (or any of their negated forms). Note that this does not handle negation.
-fn perl_unicode_class(which: char) -> Vec<(char, char)> {
-    match which {
-        'd' | 'D' => PERLD.to_vec(),
-        's' | 'S' => PERLS.to_vec(),
-        'w' | 'W' => PERLW.to_vec(),
-        _ => unreachable!(),
-    }
-}
-
-// Returns a concatenation of two expressions. This also guarantees that a
-// `Cat` expression will never be a direct child of another `Cat` expression.
-fn concat_flatten(x: Ast, y: Ast) -> Ast {
-    match (x, y) {
-        (Cat(mut xs), Cat(ys)) => { xs.extend(ys.into_iter()); Cat(xs) }
-        (Cat(mut xs), ast) => { xs.push(ast); Cat(xs) }
-        (ast, Cat(mut xs)) => { xs.insert(0, ast); Cat(xs) }
-        (ast1, ast2) => Cat(vec!(ast1, ast2)),
-    }
-}
-
-fn from_str_radix_pos_integer(s: &str, radix: u32) -> Result<u32, String> {
-    let mut num = 0;
-    for c in s.chars() {
-        match c.to_digit(radix) {
-            None => return Err(
-                format!("Could not parse '{}' as a hex number.", s)),
-            Some(n) => {
-                num *= radix;
-                num += n;
-            }
-        }
-    }
-    Ok(num)
-}
-
-pub fn is_punct(c: char) -> bool {
-    match c {
-        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
-        '[' | ']' | '{' | '}' | '^' | '$' => true,
-        _ => false,
-    }
-}
-
-fn is_valid_cap(c: char) -> bool {
-    c == '_' || (c >= '0' && c <= '9')
-    || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-}
-
-fn find_class(classes: NamedClasses, name: &str) -> Option<Vec<(char, char)>> {
-    match classes.binary_search_by(|&(s, _)| s.cmp(name)) {
-        Ok(i) => Some(classes[i].1.to_vec()),
-        Err(_) => None,
-    }
-}
-
-type Class = &'static [(char, char)];
-type NamedClasses = &'static [(&'static str, Class)];
-
-static ASCII_CLASSES: NamedClasses = &[
-    // Classes must be in alphabetical order so that bsearch works.
-    // [:alnum:]      alphanumeric (== [0-9A-Za-z])
-    // [:alpha:]      alphabetic (== [A-Za-z])
-    // [:ascii:]      ASCII (== [\x00-\x7F])
-    // [:blank:]      blank (== [\t ])
-    // [:cntrl:]      control (== [\x00-\x1F\x7F])
-    // [:digit:]      digits (== [0-9])
-    // [:graph:]      graphical (== [!-~])
-    // [:lower:]      lower case (== [a-z])
-    // [:print:]      printable (== [ -~] == [ [:graph:]])
-    // [:punct:]      punctuation (== [!-/:-@[-`{-~])
-    // [:space:]      whitespace (== [\t\n\v\f\r ])
-    // [:upper:]      upper case (== [A-Z])
-    // [:word:]       word characters (== [0-9A-Za-z_])
-    // [:xdigit:]     hex digit (== [0-9A-Fa-f])
-    // Taken from: http://golang.org/pkg/regex/syntax/
-    ("alnum", &ALNUM),
-    ("alpha", &ALPHA),
-    ("ascii", &ASCII),
-    ("blank", &BLANK),
-    ("cntrl", &CNTRL),
-    ("digit", &DIGIT),
-    ("graph", &GRAPH),
-    ("lower", &LOWER),
-    ("print", &PRINT),
-    ("punct", &PUNCT),
-    ("space", &SPACE),
-    ("upper", &UPPER),
-    ("word", &WORD),
-    ("xdigit", &XDIGIT),
-];
-
-const ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
-const ALPHA: Class = &[('A', 'Z'), ('a', 'z')];
-const ASCII: Class = &[('\x00', '\x7F')];
-const BLANK: Class = &[(' ', ' '), ('\t', '\t')];
-const CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
-const DIGIT: Class = &[('0', '9')];
-const GRAPH: Class = &[('!', '~')];
-const LOWER: Class = &[('a', 'z')];
-const PRINT: Class = &[(' ', '~')];
-const PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
-const SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'),
-                        ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')];
-const UPPER: Class = &[('A', 'Z')];
-const WORD: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z'), ('_', '_')];
-const XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];
diff --git a/src/re.rs b/src/re.rs
index bfdcf5317a..5bf2c6b645 100644
--- a/src/re.rs
+++ b/src/re.rs
@@ -17,7 +17,7 @@ use std::str::pattern::{Pattern, Searcher, SearchStep};
 use std::str::FromStr;
 
 use compile::Program;
-use parse;
+use syntax;
 use vm;
 use vm::CaptureLocs;
 use vm::MatchKind::{self, Exists, Location, Submatches};
@@ -32,7 +32,7 @@ use self::Regex::*;
 pub fn quote(text: &str) -> String {
     let mut quoted = String::with_capacity(text.len());
     for c in text.chars() {
-        if parse::is_punct(c) {
+        if syntax::is_punct(c) {
             quoted.push('\\')
         }
         quoted.push(c);
@@ -47,10 +47,54 @@ pub fn quote(text: &str) -> String {
 ///
 /// To find submatches, split or replace text, you'll need to compile an
 /// expression first.
-pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
+pub fn is_match(regex: &str, text: &str) -> Result<bool, Error> {
     Regex::new(regex).map(|r| r.is_match(text))
 }
 
+/// An error that occurred during parsing or compiling a regular expression.
+#[derive(Debug)]
+pub enum Error {
+    /// A syntax error.
+    Syntax(syntax::Error),
+    /// The compiled program exceeded the set size limit.
+    /// The argument is the size limit imposed.
+    CompiledTooBig(usize),
+}
+
+impl ::std::error::Error for Error {
+    fn description(&self) -> &str {
+        match *self {
+            Error::Syntax(ref err) => err.description(),
+            Error::CompiledTooBig(_) => "compiled program too big",
+        }
+    }
+
+    fn cause(&self) -> Option<&::std::error::Error> {
+        match *self {
+            Error::Syntax(ref err) => Some(err),
+            _ => None,
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Syntax(ref err) => err.fmt(f),
+            Error::CompiledTooBig(limit) => {
+                write!(f, "Compiled regex exceeds size limit of {} bytes.",
+                       limit)
+            }
+        }
+    }
+}
+
+impl From<syntax::Error> for Error {
+    fn from(err: syntax::Error) -> Error {
+        Error::Syntax(err)
+    }
+}
+
 /// A compiled regular expression
 ///
 /// It is represented as either a sequence of bytecode instructions (dynamic)
@@ -159,9 +203,10 @@ impl fmt::Debug for Regex {
     }
 }
 
-/// Equality comparison is based on the original string. It is possible that different regular
-/// expressions have the same matching behavior, but are still compared unequal.  For example,
-/// `\d+` and `\d\d*` match the same set of strings, but are not considered equal.
+/// Equality comparison is based on the original string. It is possible that
+/// different regular expressions have the same matching behavior, but are
+/// still compared unequal. For example, `\d+` and `\d\d*` match the same set
+/// of strings, but are not considered equal.
 impl PartialEq for Regex {
     fn eq(&self, other: &Regex) -> bool {
         self.as_str() == other.as_str()
@@ -171,10 +216,10 @@ impl PartialEq for Regex {
 impl Eq for Regex {}
 
 impl FromStr for Regex {
-    type Err = parse::Error;
+    type Err = Error;
 
     /// Attempts to parse a string into a regular expression
-    fn from_str(s: &str) -> Result<Regex, parse::Error> {
+    fn from_str(s: &str) -> Result<Regex, Error> {
         Regex::new(s)
     }
 }
@@ -184,9 +229,20 @@ impl Regex {
     /// used repeatedly to search, split or replace text in a string.
     ///
     /// If an invalid expression is given, then an error is returned.
-    pub fn new(re: &str) -> Result<Regex, parse::Error> {
-        let ast = try!(parse::parse(re));
-        let (prog, names) = Program::new(ast);
+    pub fn new(re: &str) -> Result<Regex, Error> {
+        Regex::with_size_limit(10 * (1 << 20), re)
+    }
+
+    /// Compiles a dynamic regular expression with the given size limit.
+    ///
+    /// The size limit is applied to the size of the *compiled* data structure.
+    /// If the data structure exceeds the size given, then an error is
+    /// returned.
+    ///
+    /// The default size limit used in `new` is 10MB.
+    pub fn with_size_limit(size: usize, re: &str) -> Result<Regex, Error> {
+        let ast = try!(syntax::Expr::parse(re));
+        let (prog, names) = try!(Program::new(ast, size));
         Ok(Dynamic(ExDynamic {
             original: re.to_string(),
             names: names,
@@ -194,6 +250,7 @@ impl Regex {
         }))
     }
 
+
     /// Returns true if and only if the regex matches the string given.
     ///
     /// # Example
@@ -790,13 +847,19 @@ impl<'t> Captures<'t> {
     /// To write a literal `$` use `$$`.
     pub fn expand(&self, text: &str) -> String {
         // How evil can you get?
-        // FIXME: Don't use regexes for this. It's completely unnecessary.
-        let re = Regex::new(r"(^|[^$]|\b)\$(\d+|\w+)").unwrap();
+        let re = Regex::new(r"(?x)
+          (?P<before>^|\b|[^$]) # Ignore `$$name`.
+          \$
+          (?P<name> # Match the actual capture name. Can be...
+            [0-9]+  # A sequence of digits (for indexed captures), or...
+            |
+            [_a-zA-Z][_0-9a-zA-Z]* # A name for named captures.
+          )
+        ").unwrap();
         let text = re.replace_all(text, |refs: &Captures| -> String {
-            let pre = refs.at(1).unwrap_or("");
-            let name = refs.at(2).unwrap_or("");
-            format!("{}{}", pre,
-                    match name.parse::<usize>() {
+            let before = refs.name("before").unwrap_or("");
+            let name = refs.name("name").unwrap_or("");
+            format!("{}{}", before, match name.parse::<usize>() {
                 Err(_) => self.name(name).unwrap_or("").to_string(),
                 Ok(i) => self.at(i).unwrap_or("").to_string(),
             })
@@ -809,7 +872,7 @@ impl<'t> Captures<'t> {
     #[inline]
     pub fn len(&self) -> usize { self.locs.len() / 2 }
 
-    /// Returns if there are no captured groups.
+    /// Returns true if and only if there are no captured groups.
     #[inline]
     pub fn is_empty(&self) -> bool { self.len() == 0 }
 }
diff --git a/src/vm.rs b/src/vm.rs
index 84da8e5089..7fcd7fded8 100644
--- a/src/vm.rs
+++ b/src/vm.rs
@@ -36,18 +36,12 @@
 use self::MatchKind::*;
 use self::StepState::*;
 
-use std::cmp::{self, Ordering};
-use std::iter::repeat;
+use std::cmp;
 use std::mem;
 
 use compile::Program;
-use compile::Inst::{
-    Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd, EmptyWordBoundary,
-    Save, Jump, Split,
-};
-use parse::{FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, FLAG_NEGATED};
-use unicode::regex::PERLW;
-use unicode::case_folding;
+use compile::Inst::*;
+use syntax;
 
 pub type CaptureLocs = Vec<Option<usize>>;
 
@@ -122,18 +116,16 @@ impl<'r, 't> Nfa<'r, 't> {
         let ninsts = self.prog.insts.len();
         let mut clist = Threads::new(self.which, ninsts, ncaps);
         let mut nlist = Threads::new(self.which, ninsts, ncaps);
-
-        let mut groups = repeat(None).take(ncaps * 2).collect::<Vec<_>>();
+        let mut groups = vec![None; ncaps * 2];
 
         // Determine if the expression starts with a '^' so we can avoid
         // simulating .*?
         // Make sure multi-line mode isn't enabled for it, otherwise we can't
         // drop the initial .*?
-        let prefix_anchor =
-            match self.prog.insts[1] {
-                EmptyBegin(flags) if flags & FLAG_MULTI == 0 => true,
-                _ => false,
-            };
+        let prefix_anchor = match self.prog.insts[1] {
+            StartText => true,
+            _ => false,
+        };
 
         self.ic = self.start;
         let mut next_ic = self.chars.set(self.start);
@@ -224,30 +216,24 @@ impl<'r, 't> Nfa<'r, 't> {
                     }
                 }
             }
-            OneChar(c, flags) => {
-                if self.char_eq(flags & FLAG_NOCASE > 0, self.chars.prev, c) {
+            OneChar { c, casei } => {
+                if self.char_eq(casei, self.chars.prev, c) {
                     self.add(nlist, pc+1, caps);
                 }
             }
-            CharClass(ref ranges, flags) => {
-                if let Some(mut c) = self.chars.prev {
-                    let negate = flags & FLAG_NEGATED > 0;
-                    if flags & FLAG_NOCASE > 0 {
-                        c = simple_case_fold(c);
-                    }
-                    let found = ranges.binary_search_by(|&rc| class_cmp(c, rc)).is_ok();
-                    if found ^ negate {
-                        self.add(nlist, pc+1, caps);
-                    }
+            CharClass(ref cls) => {
+                if self.chars.prev.map(|c| cls.matches(c)).unwrap_or(false) {
+                    self.add(nlist, pc+1, caps);
                 }
             }
-            Any(flags) => {
-                if flags & FLAG_DOTNL > 0
-                   || !self.char_eq(false, self.chars.prev, '\n') {
+            Any => self.add(nlist, pc+1, caps),
+            AnyNoNL => {
+                if !self.char_eq(false, self.chars.prev, '\n') {
                     self.add(nlist, pc+1, caps)
                 }
             }
-            EmptyBegin(_) | EmptyEnd(_) | EmptyWordBoundary(_)
+            StartLine | EndLine | StartText | EndText
+            | WordBoundary | NotWordBoundary
             | Save(_) | Jump(_) | Split(_, _) => {},
         }
         StepContinue
@@ -272,28 +258,42 @@ impl<'r, 't> Nfa<'r, 't> {
         // We make a minor optimization by indicating that the state is "empty"
         // so that its capture groups are not filled in.
         match self.prog.insts[pc] {
-            EmptyBegin(flags) => {
-                let multi = flags & FLAG_MULTI > 0;
+            StartLine => {
                 nlist.add(pc, groups, true);
-                if self.chars.is_begin()
-                   || (multi && self.char_is(self.chars.prev, '\n')) {
-                    self.add(nlist, pc + 1, groups)
+                if self.chars.is_begin() || self.char_is(self.chars.prev, '\n') {
+                    self.add(nlist, pc + 1, groups);
+                }
+            }
+            StartText => {
+                nlist.add(pc, groups, true);
+                if self.chars.is_begin() {
+                    self.add(nlist, pc + 1, groups);
                 }
             }
-            EmptyEnd(flags) => {
-                let multi = flags & FLAG_MULTI > 0;
+            EndLine => {
                 nlist.add(pc, groups, true);
-                if self.chars.is_end()
-                   || (multi && self.char_is(self.chars.cur, '\n')) {
+                if self.chars.is_end() || self.char_is(self.chars.cur, '\n') {
                     self.add(nlist, pc + 1, groups)
                 }
             }
-            EmptyWordBoundary(flags) => {
+            EndText => {
                 nlist.add(pc, groups, true);
-                if self.chars.is_word_boundary() == !(flags & FLAG_NEGATED > 0) {
+                if self.chars.is_end() {
                     self.add(nlist, pc + 1, groups)
                 }
             }
+            WordBoundary => {
+                nlist.add(pc, groups, true);
+                if self.chars.is_word_boundary() {
+                    self.add(nlist, pc + 1, groups);
+                }
+            }
+            NotWordBoundary => {
+                nlist.add(pc, groups, true);
+                if !self.chars.is_word_boundary() {
+                    self.add(nlist, pc + 1, groups);
+                }
+            }
             Save(slot) => {
                 nlist.add(pc, groups, true);
                 match self.which {
@@ -321,7 +321,7 @@ impl<'r, 't> Nfa<'r, 't> {
                 self.add(nlist, x, groups);
                 self.add(nlist, y, groups);
             }
-            Match | OneChar(_, _) | CharClass(_, _) | Any(_) => {
+            Match | OneChar{..} | CharClass(_) | Any | AnyNoNL => {
                 nlist.add(pc, groups, false);
             }
         }
@@ -334,7 +334,7 @@ impl<'r, 't> Nfa<'r, 't> {
         match textc {
             None => false,
             Some(textc) => {
-                regc == textc || (casei && simple_case_fold(regc) == simple_case_fold(textc))
+                regc == textc || (casei && syntax::simple_case_fold(regc) == syntax::simple_case_fold(textc))
             }
         }
     }
@@ -425,17 +425,22 @@ impl<'t> CharReader<'t> {
     /// Returns true if and only if the current position is a word boundary.
     /// (Ignoring the range of the input to search.)
     pub fn is_word_boundary(&self) -> bool {
+        fn is_word(c: Option<char>) -> bool {
+            c.map(syntax::is_word_char).unwrap_or(false)
+        }
+
         if self.is_begin() {
-            return is_word(self.cur)
+            return is_word(self.cur);
         }
         if self.is_end() {
-            return is_word(self.prev)
+            return is_word(self.prev);
         }
         (is_word(self.cur) && !is_word(self.prev))
         || (is_word(self.prev) && !is_word(self.cur))
     }
 }
 
+#[derive(Clone)]
 struct Thread {
     pc: usize,
     groups: Vec<Option<usize>>,
@@ -457,12 +462,11 @@ impl Threads {
     //
     // See http://research.swtch.com/sparse for the deets.
     fn new(which: MatchKind, num_insts: usize, ncaps: usize) -> Threads {
+        let t = Thread { pc: 0, groups: vec![None; ncaps * 2] };
         Threads {
             which: which,
-            queue: (0..num_insts).map(|_| {
-                Thread {pc: 0, groups: repeat(None).take(ncaps * 2).collect() }
-            }).collect(),
-            sparse: repeat(0).take(num_insts).collect(),
+            queue: vec![t; num_insts],
+            sparse: vec![0; num_insts],
             size: 0,
         }
     }
@@ -508,58 +512,6 @@ impl Threads {
     }
 }
 
-/// Returns true if the character is a word character, according to the
-/// (Unicode friendly) Perl character class '\w'.
-/// Note that this is only use for testing word boundaries. The actual '\w'
-/// is encoded as a CharClass instruction.
-pub fn is_word(c: Option<char>) -> bool {
-    let c = match c {
-        None => return false,
-        Some(c) => c,
-    };
-    // Try the common ASCII case before invoking binary search.
-    match c {
-        '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z' => true,
-        _ => PERLW.binary_search_by(|&(start, end)| {
-            if c >= start && c <= end {
-                Ordering::Equal
-            } else if start > c {
-                Ordering::Greater
-            } else {
-                Ordering::Less
-            }
-        }).ok().is_some()
-    }
-}
-
-
-/// Returns the Unicode *simple* case folding of `c`.
-/// Uses the mappings with status C + S form Unicode’s `CaseFolding.txt`.
-/// This is not as “correct” as full case folding, but preserves the number of code points.
-pub fn simple_case_fold(c: char) -> char {
-    match case_folding::C_plus_S_table.binary_search_by(|&(x, _)| x.cmp(&c)) {
-        Ok(i) => case_folding::C_plus_S_table[i].1,
-        Err(_) => c
-    }
-}
-
-
-/// Given a character and a single character class range, return an ordering
-/// indicating whether the character is less than the start of the range,
-/// in the range (inclusive) or greater than the end of the range.
-///
-/// This function is meant to be used with a binary search.
-#[inline]
-fn class_cmp(textc: char, (start, end): (char, char)) -> Ordering {
-    if textc >= start && textc <= end {
-        Ordering::Equal
-    } else if start > textc {
-        Ordering::Greater
-    } else {
-        Ordering::Less
-    }
-}
-
 /// Returns the starting location of `needle` in `haystack`.
 /// If `needle` is not in `haystack`, then `None` is returned.
 ///