diff --git a/src/cli/lexer.rs b/src/cli/lexer.rs
new file mode 100644
index 000000000..9cc9bb237
--- /dev/null
+++ b/src/cli/lexer.rs
@@ -0,0 +1,3098 @@
+use crate::util::datetime::DateTime;
+use crate::util::duration::Duration;
+use std::convert::TryFrom;
+
+// based on src/Lexer.{h,cpp} in the Taskwarrior code
+
+const UUID_PATTERN: &[u8] = b"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
+const UUID_MIN_LENGTH: usize = 8;
+const MINIMUM_MATCH_LEN: usize = 3;
+const DATE_SUBELEMENTS: &[&str] = &[
+    "year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second",
+];
+
+#[derive(PartialEq, Debug, Clone, Copy)]
+enum Type {
+    Uuid,
+    Number,
+    Hex,
+    String,
+    URL,
+    Pair,
+    Set,
+    Separator,
+    Tag,
+    Path,
+    Substitution,
+    Pattern,
+    Op,
+    DOM,
+    Identifier,
+    Word,
+    Date,
+    Duration,
+}
+
+struct Lexer {
+    text: String,
+    cursor: usize,
+    eos: usize,
+    attributes: Vec<String>,
+}
+
+// TaskWarrior uses some non-standard character definitions, so they are repeated verbatim here,
+// rather than defaulting to the unicode functions available on the char type.
+
+/// Returns true if this character is whitespace, as defined in TaskWarrior's libshared.
+fn unicode_whitespace(c: char) -> bool {
+    unicode_horizontal_whitespace(c) || unicode_vertical_whitespace(c)
+}
+
+/// Returns true if this character is horizontal whitespace, as defined in TaskWarrior's libshared.
+fn unicode_horizontal_whitespace(c: char) -> bool {
+    let c: u32 = c.into();
+    return c == 0x0020 ||   // space Common  Separator, space
+          c == 0x0009 ||   // Common  Other, control  HT, Horizontal Tab
+          c == 0x00A0 ||   // no-break space  Common  Separator, space
+          c == 0x1680 ||   // ogham space mark  Ogham Separator, space
+          c == 0x180E ||   // mongolian vowel separator Mongolian Separator, space
+          c == 0x2000 ||   // en quad Common  Separator, space
+          c == 0x2001 ||   // em quad Common  Separator, space
+          c == 0x2002 ||   // en space  Common  Separator, space
+          c == 0x2003 ||   // em space  Common  Separator, space
+          c == 0x2004 ||   // three-per-em space  Common  Separator, space
+          c == 0x2005 ||   // four-per-em space Common  Separator, space
+          c == 0x2006 ||   // six-per-em space  Common  Separator, space
+          c == 0x2007 ||   // figure space  Common  Separator, space
+          c == 0x2008 ||   // punctuation space Common  Separator, space
+          c == 0x2009 ||   // thin space  Common  Separator, space
+          c == 0x200A ||   // hair space  Common  Separator, space
+          c == 0x200B ||   // zero width space
+          c == 0x200C ||   // zero width non-joiner
+          c == 0x200D ||   // zero width joiner
+          c == 0x202F ||   // narrow no-break space Common  Separator, space
+          c == 0x205F ||   // medium mathematical space Common  Separator, space
+          c == 0x2060 ||   // word joiner
+          c == 0x3000; // ideographic space Common  Separator, space
+}
+
+/// Returns true if this character is vertical whitespace, as defined in TaskWarrior's libshared.
+fn unicode_vertical_whitespace(c: char) -> bool {
+    let c: u32 = c.into();
+    return c == 0x000A ||   // Common  Other, control  LF, Line feed
+          c == 0x000B ||   // Common  Other, control  VT, Vertical Tab
+          c == 0x000C ||   // Common  Other, control  FF, Form feed
+          c == 0x000D ||   // Common  Other, control  CR, Carriage return
+          c == 0x0085 ||   // Common  Other, control  NEL, Next line
+          c == 0x2028 ||   // line separator  Common  Separator, line
+          c == 0x2029; // paragraph separator Common  Separator, paragraph
+}
+
+/// Returns true if the given character is an ascii digit
+fn unicode_latin_digit(c: char) -> bool {
+    c.is_ascii_digit()
+}
+
+/// Returns true if the given character is an ascii letter
+fn unicode_latin_alpha(c: char) -> bool {
+    c.is_ascii_alphabetic()
+}
+
+/// Replicates the C function of the same name, which only recognizes ASCII printable
+fn isprint(c: char) -> bool {
+    c.is_ascii_graphic()
+}
+
+/// Returns true if the given character is punctuation.
+fn is_punctuation(c: char) -> bool {
+    isprint(c)
+        && c != ' '
+        && c != '@'
+        && c != '#'
+        && c != '$'
+        && c != '_'
+        && !unicode_latin_digit(c)
+        && !unicode_latin_alpha(c)
+}
+
+/// Returns true if this character is an operator
+fn is_single_char_operator(c: char) -> bool {
+    match c {
+        '+' | '-' | '*' | '/' | '(' | ')' | '<' | '>' | '^' | '!' | '%' | '=' | '~' => true,
+        _ => false,
+    }
+}
+
+/// Returns true if this character can start an identifier
+fn is_identifier_start(c: char) -> bool {
+    !unicode_whitespace(c)
+        && !unicode_latin_digit(c)
+        && !is_single_char_operator(c)
+        && !is_punctuation(c)
+}
+
+/// Returns true if this character can be in the middle of an identifier
+fn is_identifier_next(c: char) -> bool {
+    c != ':' && c != '=' && !unicode_whitespace(c) && !is_single_char_operator(c)
+}
+
+/// Returns true if the sequence `<left><right>` represents a token boundary.
+fn is_boundary(left: char, right: char) -> bool {
+    right == '\0'
+        || (unicode_latin_alpha(left) != unicode_latin_alpha(right))
+        || (unicode_latin_digit(left) != unicode_latin_digit(right))
+        || (unicode_whitespace(left) != unicode_whitespace(right))
+        || is_punctuation(left)
+        || is_punctuation(right)
+}
+
+/// Returns true if the sequence `<left><right>` represents a hard token boundary.
+fn is_hard_boundary(left: char, right: char) -> bool {
+    right == '\0' || left == '(' || left == ')' || right == '(' || right == ')'
+}
+
+/// Returns true if the given string must have been shell-quoted
+fn was_quoted(s: &str) -> bool {
+    s.contains(&[' ', '\t', '(', ')', '<', '>', '&', '~'][..])
+}
+
+fn is_unicode_hex_digit(c: char) -> bool {
+    match c {
+        '0'..='9' | 'a'..='f' | 'A'..='F' => true,
+        _ => false,
+    }
+}
+
+fn hex_to_char(hex: &str) -> Option<char> {
+    let mut num = 0u32;
+    for c in hex.chars() {
+        num <<= 4;
+        num += match c {
+            '0'..='9' => c as u32 - '0' as u32,
+            'a'..='f' => 10 + (c as u32 - 'a' as u32),
+            'A'..='F' => 10 + (c as u32 - 'A' as u32),
+            _ => return None,
+        }
+    }
+
+    if let Ok(c) = char::try_from(num) {
+        Some(c)
+    } else {
+        None
+    }
+}
+
+/// Strips matching quote symbols from the beginning and end of the given string
+/// (removing all quotes if given a single quote `'`)
+fn dequote<'a, 'b>(s: &'a str, quotes: &'b str) -> &'a str {
+    // note that this returns a new ref to the same string, rather
+    // than modifying its argument as the C++ version does.
+    if let Some(first_char) = s.chars().next() {
+        if let Some(last_char) = s.chars().rev().next() {
+            if first_char == last_char && quotes.contains(first_char) {
+                let quote_len = first_char.len_utf8();
+                if s.len() > 2 * quote_len {
+                    return &s[quote_len..s.len() - quote_len];
+                } else {
+                    return "";
+                }
+            }
+        }
+    }
+    s
+}
+
+fn read_word_quoted(text: &str, quotes: &str, cursor: usize) -> Option<(String, usize)> {
+    let mut pos = cursor;
+    let mut res = String::new();
+    let mut skipchars = 0;
+
+    let mut chars = text.get(cursor..)?.chars();
+    let quote = chars.next();
+    if quote.is_none() {
+        return None;
+    }
+    let quote = quote.unwrap();
+    if !quotes.contains(quote) {
+        return None;
+    }
+
+    res.push(quote);
+    pos += quote.len_utf8();
+
+    for c in chars {
+        if skipchars > 0 {
+            skipchars -= 1;
+            pos += c.len_utf8();
+            continue;
+        }
+        if c == quote {
+            res.push(c);
+            pos += quote.len_utf8();
+            return Some((res, pos));
+        }
+
+        if c == 'U' {
+            if let Some('+') = text.get(pos + 1..).unwrap().chars().next() {
+                if let Some(hex) = text.get(pos + 2..pos + 6) {
+                    if let Some(c) = hex_to_char(hex) {
+                        res.push(c);
+                        skipchars += 5;
+                    } else {
+                        res.push('U');
+                    }
+                } else {
+                    res.push('U');
+                }
+            } else {
+                res.push('U');
+            }
+        } else if c == '\\' {
+            match text.get(pos + 1..).unwrap().chars().next() {
+                None => res.push(c),
+                Some('b') => res.push('\x08'),
+                Some('f') => res.push('\x0c'),
+                Some('n') => res.push('\x0a'),
+                Some('r') => res.push('\x0d'),
+                Some('t') => res.push('\x09'),
+                Some('v') => res.push('\x0b'),
+                Some('u') => {
+                    if let Some(hex) = text.get(pos + 2..pos + 6) {
+                        if let Some(c) = hex_to_char(hex) {
+                            res.push(c);
+                            skipchars += 4;
+                        } else {
+                            res.push('u')
+                        }
+                    } else {
+                        res.push('u')
+                    }
+                }
+                Some(c @ _) => res.push(c),
+            }
+            skipchars += 1;
+        } else {
+            res.push(c);
+        }
+
+        pos += c.len_utf8();
+    }
+
+    None
+}
+
+fn read_word_unquoted(text: &str, cursor: usize) -> Option<(String, usize)> {
+    let mut pos = cursor;
+    let mut res = String::new();
+    let mut prev = None;
+    let mut skipchars = 0;
+
+    for c in text.get(cursor..)?.chars() {
+        if skipchars > 0 {
+            skipchars -= 1;
+            pos += c.len_utf8();
+            prev = Some(c);
+            continue;
+        }
+        if unicode_whitespace(c) {
+            break;
+        }
+        if let Some(p) = prev {
+            if is_hard_boundary(p, c) {
+                break;
+            }
+        }
+
+        if c == 'U' {
+            if let Some('+') = text.get(pos + 1..).unwrap().chars().next() {
+                if let Some(hex) = text.get(pos + 2..pos + 6) {
+                    if let Some(c) = hex_to_char(hex) {
+                        res.push(c);
+                        skipchars += 5;
+                    } else {
+                        res.push('U');
+                    }
+                } else {
+                    res.push('U');
+                }
+            } else {
+                res.push('U');
+            }
+        } else if c == '\\' {
+            match text.get(pos + 1..).unwrap().chars().next() {
+                None => res.push(c),
+                Some('b') => res.push('\x08'),
+                Some('f') => res.push('\x0c'),
+                Some('n') => res.push('\x0a'),
+                Some('r') => res.push('\x0d'),
+                Some('t') => res.push('\x09'),
+                Some('v') => res.push('\x0b'),
+                Some('u') => {
+                    if let Some(hex) = text.get(pos + 2..pos + 6) {
+                        if let Some(c) = hex_to_char(hex) {
+                            res.push(c);
+                            skipchars += 4;
+                        } else {
+                            res.push('u')
+                        }
+                    } else {
+                        res.push('u')
+                    }
+                }
+                Some(c @ _) => res.push(c),
+            }
+            skipchars += 1;
+        } else {
+            res.push(c);
+        }
+
+        pos += c.len_utf8();
+        prev = Some(c);
+    }
+
+    if pos != cursor {
+        Some((res, pos))
+    } else {
+        None
+    }
+}
+
+fn common_length(s1: &str, s2: &str) -> usize {
+    s1.chars()
+        .zip(s2.chars())
+        .take_while(|(c1, c2)| c1 == c2)
+        .collect::<Vec<_>>()
+        .len()
+}
+
+#[derive(Debug, PartialEq)]
+pub struct DecomposedPair {
+    name: String,
+    modifier: String,
+    separator: String,
+    value: String,
+}
+
+impl Lexer {
+    pub fn new<S: Into<String>>(text: S) -> Lexer {
+        let text = text.into();
+        let eos = text.len();
+        Lexer {
+            text,
+            cursor: 0,
+            eos,
+            attributes: vec![],
+        }
+    }
+
+    pub fn add_attribute<S: Into<String>>(&mut self, attribute: S) {
+        self.attributes.push(attribute.into());
+    }
+
+    /// This static method tokenizes the input, but discards the type information.
+    pub fn split<S: Into<String>>(text: S) -> Vec<String> {
+        Lexer::new(text).into_iter().map(|(tx, ty)| tx).collect()
+    }
+
+    pub fn token(&mut self) -> Option<(String, Type)> {
+        // Eat whitespace
+        while let Some(c) = self.text[self.cursor..].chars().next() {
+            if unicode_whitespace(c) {
+                self.cursor += c.len_utf8();
+                continue;
+            }
+            break;
+        }
+
+        if self.cursor == self.eos {
+            return None;
+        }
+
+        // The sequence is specific, and must follow these rules:
+        //   - date < duration < uuid < identifier
+        //   - dom < uuid
+        //   - uuid < hex < number
+        //   - url < pair < identifier
+        //   - hex < number
+        //   - separator < tag < operator
+        //   - path < substitution < pattern
+        //   - set < number
+        //   - word last
+        if let Some(r) = self.is_string("\"'") {
+            return Some(r);
+        }
+        if let Some(r) = self.is_date() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_duration() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_url() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_pair() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_uuid(true) {
+            return Some(r);
+        }
+        if let Some(r) = self.is_set() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_dom() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_hexnumber() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_number() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_separator() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_tag() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_path() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_substitution() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_pattern() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_operator() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_identifier() {
+            return Some(r);
+        }
+        if let Some(r) = self.is_word() {
+            return Some(r);
+        }
+        None
+    }
+
+    pub fn decompose_pair(text: &str) -> Option<DecomposedPair> {
+        let npos = usize::max_value();
+        // npos
+        let dot = text.find(".").unwrap_or(npos);
+        // npos
+        let sep_defer = text.find("::").unwrap_or(npos);
+        // npos
+        let sep_eval = text.find(":=").unwrap_or(npos);
+        // 4
+        let sep_colon = text.find(":").unwrap_or(npos);
+        // npos
+        let sep_equal = text.find("=").unwrap_or(npos);
+
+        let (sep, sep_end) = if sep_defer != npos
+            && sep_defer <= sep_eval
+            && sep_defer <= sep_colon
+            && sep_defer <= sep_equal
+        {
+            (sep_defer, sep_defer + 2)
+        } else if sep_eval != npos
+            && sep_eval <= sep_defer
+            && sep_eval <= sep_colon
+            && sep_eval <= sep_equal
+        {
+            (sep_eval, sep_eval + 2)
+        } else if sep_colon != npos
+            && sep_colon <= sep_defer
+            && sep_colon <= sep_eval
+            && sep_colon <= sep_equal
+        {
+            (sep_colon, sep_colon + 1)
+        } else if sep_equal != npos
+            && sep_equal <= sep_defer
+            && sep_equal <= sep_eval
+            && sep_equal <= sep_colon
+        {
+            (sep_equal, sep_equal + 1)
+        } else {
+            return None;
+        };
+
+        let (name, modifier) = if dot != npos && dot < sep {
+            (
+                text.get(0..dot).unwrap().into(),
+                text.get(dot + 1..sep).unwrap().into(),
+            )
+        } else {
+            (text.get(0..sep).unwrap().into(), "".into())
+        };
+
+        let separator = text.get(sep..sep_end).unwrap().into();
+        let value = text.get(sep_end..).unwrap().into();
+
+        Some(DecomposedPair {
+            name,
+            modifier,
+            separator,
+            value,
+        })
+    }
+
+    // recognizers for the `token` method
+
+    fn is_string(&mut self, quotes: &str) -> Option<(String, Type)> {
+        if let Some((s, pos)) = read_word_quoted(&self.text, quotes, self.cursor) {
+            self.cursor = pos;
+            return Some((s, Type::String));
+        }
+        None
+    }
+
+    fn is_date(&mut self) -> Option<(String, Type)> {
+        let (_, read) = DateTime::parse(&self.text[self.cursor..], "")?;
+        let token = self.text[self.cursor..self.cursor + read].into();
+        self.cursor += read;
+        Some((token, Type::Date))
+    }
+
+    fn is_duration(&mut self) -> Option<(String, Type)> {
+        let marker = self.cursor;
+
+        if self.is_operator().is_some() {
+            self.cursor = marker;
+            return None;
+        }
+
+        let (_, read) = Duration::parse(&self.text[self.cursor..], "")?;
+        let token = self.text[self.cursor..self.cursor + read].into();
+        self.cursor += read;
+        Some((token, Type::Duration))
+    }
+
+    fn is_url(&mut self) -> Option<(String, Type)> {
+        let remainder = &self.text[self.cursor..];
+        if remainder.starts_with("https://") || remainder.starts_with("http://") {
+            if let Some(i) = remainder.find(unicode_whitespace) {
+                let token = &remainder[..i];
+                self.cursor += i;
+                return Some((token.into(), Type::URL));
+            } else {
+                self.cursor = self.eos;
+                return Some((remainder.into(), Type::URL));
+            }
+        }
+        None
+    }
+
+    fn is_pair(&mut self) -> Option<(String, Type)> {
+        let marker = self.cursor;
+        if self.is_identifier().is_some() {
+            let separator = &self.text[self.cursor..];
+            if separator.starts_with("::") || separator.starts_with(":=") {
+                self.cursor += 2;
+            } else if separator.starts_with(":") || separator.starts_with("=") {
+                self.cursor += 1;
+            } else {
+                self.cursor = marker;
+                return None;
+            }
+
+            // String, word, or nothing are all valid
+            let marker2 = self.cursor;
+            if let Some((word, end)) = read_word_quoted(&self.text[..], "'\"", self.cursor) {
+                self.cursor = end;
+                return Some((
+                    format!("{}{}", &self.text[marker..marker2], word),
+                    Type::Pair,
+                ));
+            }
+            if let Some((word, end)) = read_word_unquoted(&self.text[..], self.cursor) {
+                self.cursor = end;
+                return Some((
+                    format!("{}{}", &self.text[marker..marker2], word),
+                    Type::Pair,
+                ));
+            }
+            if self.cursor == self.eos
+                || unicode_whitespace(self.text[self.cursor..].chars().next().unwrap())
+            {
+                return Some((self.text[marker..self.cursor].into(), Type::Pair));
+            }
+        }
+        self.cursor = marker;
+        None
+    }
+
+    fn is_uuid(&mut self, end_boundary: bool) -> Option<(String, Type)> {
+        let mut i = 0;
+        for c in self.text[self.cursor..].chars() {
+            if UUID_PATTERN[i] == b'x' {
+                if !is_unicode_hex_digit(c) {
+                    break;
+                }
+            } else {
+                if c != '-' {
+                    break;
+                }
+            }
+            i += 1;
+            if i >= UUID_PATTERN.len() {
+                break;
+            }
+        }
+
+        if i < UUID_MIN_LENGTH {
+            return None;
+        }
+
+        if end_boundary {
+            let c = self.text[self.cursor + i..].chars().next();
+            if let Some(c) = c {
+                if !unicode_whitespace(c) && !is_single_char_operator(c) {
+                    return None;
+                }
+            }
+        }
+
+        let token = self.text[self.cursor..self.cursor + i].into();
+        self.cursor += i;
+        Some((token, Type::Uuid))
+    }
+
+    fn is_set(&mut self) -> Option<(String, Type)> {
+        let marker = self.cursor;
+        let mut count = 0;
+        loop {
+            if self.is_integer().is_some() {
+                count += 1;
+                if self.is_literal("-", false, false) {
+                    if self.is_integer().is_some() {
+                        count += 1;
+                    } else {
+                        self.cursor = marker;
+                        return None;
+                    }
+                }
+            } else {
+                self.cursor = marker;
+                return None;
+            }
+            if !self.is_literal(",", false, false) {
+                break;
+            }
+        }
+
+        if count <= 1 {
+            self.cursor = marker;
+            return None;
+        }
+
+        // -1 is OK here since integers are ASCII
+        let last_char = self.text[self.cursor - 1..].chars().next().unwrap();
+
+        // look ahead a bit
+        match self.text[self.cursor..].chars().next() {
+            Some(c) if !unicode_whitespace(c) && !is_hard_boundary(last_char, c) => {
+                self.cursor = marker;
+                return None;
+            }
+            _ => (),
+        }
+
+        Some((self.text[marker..self.cursor].into(), Type::Set))
+    }
+
+    fn is_dom(&mut self) -> Option<(String, Type)> {
+        let marker = self.cursor;
+
+        // rc. ...
+        if self.is_literal("rc.", false, false) && self.is_word().is_some() {
+            return Some((self.text[marker..self.cursor].into(), Type::DOM));
+        } else {
+            self.cursor = marker;
+        }
+
+        // Literals
+        if self.is_one_of(
+            &vec![
+                "tw.syncneeded",
+                "tw.program",
+                "tw.args",
+                "tw.width",
+                "tw.height",
+                "tw.version",
+                "context.program",
+                "context.args",
+                "context.width",
+                "context.height",
+                "system.version",
+                "system.os",
+            ],
+            false,
+            true,
+        ) {
+            return Some((self.text[marker..self.cursor].into(), Type::DOM));
+        }
+
+        // Optional:
+        //   <uuid>.
+        //   <id>.
+        if self.is_uuid(false).is_some() || self.is_integer().is_some() {
+            if !self.is_literal(".", false, false) {
+                self.cursor = marker;
+                return None;
+            }
+        }
+
+        // Any failure after this line should rollback to the checkpoint.
+        let checkpoint = self.cursor;
+
+        // [prefix]tags.<word>
+        if self.is_literal("tags", false, false)
+            && self.is_literal(".", false, false)
+            && self.is_word().is_some()
+        {
+            return Some((self.text[marker..self.cursor].into(), Type::DOM));
+        } else {
+            self.cursor = checkpoint;
+        }
+
+        // [prefix]attribute (bounded)
+        // (have to clone here to avoid double-borrowing self
+        let attributes = self.attributes.clone();
+        if self.is_one_of(&attributes, false, true) {
+            return Some((self.text[marker..self.cursor].into(), Type::DOM));
+        }
+
+        // [prefix]attribute. (unbounded)
+        if self.is_one_of(&attributes, false, false) {
+            if self.is_literal(".", false, false) {
+                let attribute = &self.text[checkpoint..self.cursor - 1];
+                // if attribute type is 'date', then it has sub-elements.
+                if attribute == "date" && self.is_one_of(&DATE_SUBELEMENTS, false, true) {
+                    return Some((self.text[marker..self.cursor].into(), Type::DOM));
+                }
+                self.cursor = checkpoint;
+            }
+            // Lookahead: !<alpha>
+            else if !self.text[marker..]
+                .chars()
+                .next()
+                .map_or(false, |c| unicode_latin_alpha(c))
+            {
+                return Some((self.text[marker..self.cursor].into(), Type::DOM));
+            }
+            self.cursor = checkpoint;
+        }
+
+        // [prefix]annotations.
+        if self.is_literal("annotations", true, false) && self.is_literal(".", false, false) {
+            if self.is_literal("count", false, false) {
+                return Some((self.text[marker..self.cursor].into(), Type::DOM));
+            }
+
+            if self.is_integer().is_some() {
+                if self.is_literal(".", false, false) {
+                    if self.is_literal("description", false, true) {
+                        return Some((self.text[marker..self.cursor].into(), Type::DOM));
+                    } else if self.is_literal("entry", false, true) {
+                        return Some((self.text[marker..self.cursor].into(), Type::DOM));
+                    } else if self.is_literal("entry", false, false)
+                        && self.is_literal(".", false, false)
+                        && self.is_one_of(&DATE_SUBELEMENTS, false, true)
+                    {
+                        return Some((self.text[marker..self.cursor].into(), Type::DOM));
+                    }
+                }
+            } else {
+                self.cursor = checkpoint;
+            }
+        }
+
+        self.cursor = marker;
+        None
+    }
+
+    fn is_hexnumber(&mut self) -> Option<(String, Type)> {
+        let remainder = &self.text[self.cursor..];
+
+        if !remainder.starts_with("0x") {
+            return None;
+        }
+        let mut end = 2;
+        for (i, c) in remainder[2..].char_indices() {
+            if is_unicode_hex_digit(c) {
+                end = 2 + i + c.len_utf8();
+            } else {
+                break;
+            }
+        }
+        if end > 2 {
+            self.cursor += end;
+            Some((remainder[..end].into(), Type::Hex))
+        } else {
+            None
+        }
+    }
+
+    fn is_number(&mut self) -> Option<(String, Type)> {
+        let remainder = &self.text[self.cursor..];
+        let mut chars = remainder.char_indices().peekable();
+        let mut marker = 0;
+
+        // A hand-rolled regexp.  States are as follows:
+        //   \d \d* (. \d \d*)? ([eE] [+-]? \d \d* (.  \d \d*)?)?
+        // 0 1  2    3 4  5      6    7     8  9    10 11 12
+        let mut state = 0;
+
+        loop {
+            let c = match chars.peek() {
+                Some((i, c)) => {
+                    marker = *i;
+                    Some(*c)
+                }
+                None => None,
+            };
+            match (state, c) {
+                (0, Some(c)) if unicode_latin_digit(c) => state = 1,
+
+                (1, Some(c)) if unicode_latin_digit(c) => state = 2,
+                (1, Some(c)) if c == '.' => state = 3,
+                (1, Some(c)) if c == 'e' || c == 'E' => state = 6,
+                (1, _) => break,
+
+                (2, Some(c)) if unicode_latin_digit(c) => state = 2,
+                (2, Some(c)) if c == '.' => state = 3,
+                (2, Some(c)) if c == 'e' || c == 'E' => state = 6,
+                (2, _) => break,
+
+                (3, Some(c)) if unicode_latin_digit(c) => state = 4,
+                (3, Some(c)) if c == 'e' || c == 'E' => state = 6,
+                (3, _) => break,
+
+                (4, Some(c)) if unicode_latin_digit(c) => state = 5,
+                (4, Some(c)) if c == 'e' || c == 'E' => state = 6,
+                (4, _) => break,
+
+                (5, Some(c)) if unicode_latin_digit(c) => state = 5,
+                (5, Some(c)) if c == 'e' || c == 'E' => state = 6,
+                (5, _) => break,
+
+                (6, Some(c)) if unicode_latin_digit(c) => state = 8,
+                (6, Some(c)) if c == '-' || c == '+' => state = 7,
+                (6, _) => break,
+
+                (7, Some(c)) if unicode_latin_digit(c) => state = 8,
+                (7, _) => break,
+
+                (8, Some(c)) if unicode_latin_digit(c) => state = 9,
+                (8, Some(c)) if c == '.' => state = 10,
+                (8, _) => break,
+
+                (9, Some(c)) if unicode_latin_digit(c) => state = 9,
+                (9, Some(c)) if c == '.' => state = 10,
+                (9, _) => break,
+
+                (10, Some(c)) if unicode_latin_digit(c) => state = 11,
+                (10, _) => break,
+
+                (11, Some(c)) if unicode_latin_digit(c) => state = 11,
+                (11, _) => break,
+
+                _ => return None,
+            };
+            if let Some((i, c)) = chars.next() {
+                marker = i + c.len_utf8();
+            }
+        }
+        // lookahead
+        if let Some((_, c)) = chars.peek() {
+            if !unicode_whitespace(*c) && !is_single_char_operator(*c) {
+                return None;
+            }
+        }
+        self.cursor += marker;
+        Some((remainder[..marker].into(), Type::Number))
+    }
+
+    fn is_separator(&mut self) -> Option<(String, Type)> {
+        let next_chars = self
+            .text
+            .get(self.cursor..self.cursor + 2)?
+            .chars()
+            .collect::<Vec<_>>();
+        if &next_chars[..] == &['-', '-'] {
+            self.cursor += 2;
+            return Some(("--".into(), Type::Separator));
+        }
+        None
+    }
+
+    fn is_tag(&mut self) -> Option<(String, Type)> {
+        let mut marker = self.cursor;
+
+        // Lookbehind: Assert ^ or preceded by whitespace, (, or ).
+        if marker > 0 {
+            // if the previous byte is not a valid character, then it's
+            // not ( or )
+            if let Some(lookbehind) = self.text.get(self.cursor - 1..) {
+                if let Some(c) = lookbehind.chars().next() {
+                    if !unicode_whitespace(c) && c != '(' && c != ')' {
+                        return None;
+                    }
+                }
+            } else {
+                return None;
+            }
+        }
+
+        let mut chars = self.text[marker..].chars();
+        if let Some(c) = chars.next() {
+            if c == '+' || c == '-' {
+                marker += c.len_utf8();
+                if let Some(c) = chars.next() {
+                    if is_identifier_start(c) {
+                        marker += c.len_utf8();
+                        while let Some(c) = chars.next() {
+                            if !is_identifier_next(c) {
+                                break;
+                            }
+                            marker += c.len_utf8();
+                        }
+                        let token = self.text[self.cursor..marker].into();
+                        self.cursor = marker;
+                        return Some((token, Type::Tag));
+                    }
+                }
+            }
+        }
+
+        None
+    }
+
+    fn is_path(&mut self) -> Option<(String, Type)> {
+        let mut marker = self.cursor;
+        let mut slash_count = 0;
+        let mut chars = self.text[self.cursor..].chars().peekable();
+
+        loop {
+            if let Some('/') = chars.next() {
+                marker += 1;
+                slash_count += 1;
+            } else {
+                break;
+            }
+
+            if let Some(c) = chars.next() {
+                if !unicode_whitespace(c) && c != '/' {
+                    marker += 1;
+                    while let Some(c) = chars.peek() {
+                        if !unicode_whitespace(*c) && *c != '/' {
+                            marker += 1;
+                            chars.next();
+                        } else {
+                            break;
+                        }
+                    }
+                } else {
+                    break;
+                }
+            } else {
+                break;
+            }
+        }
+
+        if marker > self.cursor && slash_count > 3 {
+            let token = self.text[self.cursor..marker].into();
+            self.cursor = marker;
+            return Some((token, Type::Path));
+        }
+
+        None
+    }
+
+    fn is_substitution(&mut self) -> Option<(String, Type)> {
+        let marker = self.cursor;
+
+        if let Some((_, end)) = read_word_quoted(&self.text, "/", self.cursor) {
+            // end-1 to step back over the middle `/`
+            if let Some((_, end)) = read_word_quoted(&self.text, "/", end - 1) {
+                let mut remainder = self.text[end..].chars();
+                return match remainder.next() {
+                    None => {
+                        self.cursor = end;
+                        Some((self.text[marker..self.cursor].into(), Type::Substitution))
+                    }
+                    Some('g') => match remainder.next() {
+                        None => {
+                            self.cursor = end + 1;
+                            Some((self.text[marker..self.cursor].into(), Type::Substitution))
+                        }
+                        Some(c) if unicode_whitespace(c) => {
+                            self.cursor = end + 1;
+                            Some((self.text[marker..self.cursor].into(), Type::Substitution))
+                        }
+                        _ => None,
+                    },
+                    Some(c) if unicode_whitespace(c) => {
+                        self.cursor = end;
+                        Some((self.text[marker..self.cursor].into(), Type::Substitution))
+                    }
+                    _ => None,
+                };
+            }
+        }
+
+        None
+    }
+
+    fn is_pattern(&mut self) -> Option<(String, Type)> {
+        let marker = self.cursor;
+        if let Some((_, end)) = read_word_quoted(&self.text, "/", self.cursor) {
+            if end == self.eos || unicode_whitespace(self.text[end..].chars().next().unwrap()) {
+                self.cursor = end;
+                return Some((self.text[marker..self.cursor].into(), Type::Pattern));
+            }
+        }
+        None
+    }
+
+    fn is_operator(&mut self) -> Option<(String, Type)> {
+        let remainder = &self.text[self.cursor..];
+
+        // operators that do not require a boundary afterward
+        for strop in &[
+            // custom stuff
+            "_hastag_", "_notag_", "_neg_", "_pos_",
+            // triple-char
+            "!==", // and, xor below
+            // double-char
+            "==", "!=", "<=", ">=", "||", "&&", "!~", // or below
+            // single-char
+            "+", "-", "*", "/", "(", ")", "<", ">", "^", "!", "%", "=", "~",
+        ] {
+            if remainder.starts_with(strop) {
+                self.cursor += strop.len();
+                return Some((remainder[..strop.len()].into(), Type::Op));
+            }
+        }
+
+        // operators that require a boundary afterward
+        for strop in &["and", "xor", "!==", "or"] {
+            if remainder.starts_with(strop) {
+                if self.cursor + strop.len() == self.eos
+                    || is_boundary(
+                        remainder[strop.len() - 1..].chars().next().unwrap(),
+                        remainder[strop.len()..].chars().next().unwrap(),
+                    )
+                {
+                    self.cursor += strop.len();
+                    return Some((remainder[..strop.len()].into(), Type::Op));
+                }
+            }
+        }
+        None
+    }
+
+    fn is_identifier(&mut self) -> Option<(String, Type)> {
+        let mut chars = self.text.get(self.cursor..)?.chars();
+        let start = self.cursor;
+        let mut len = 0;
+
+        if let Some(c) = chars.next() {
+            if is_identifier_start(c) {
+                len += c.len_utf8();
+                for c in chars {
+                    if !is_identifier_next(c) {
+                        break;
+                    }
+                    len += c.len_utf8();
+                }
+                self.cursor += len;
+                return Some((self.text.get(start..self.cursor)?.into(), Type::Identifier));
+            }
+        }
+
+        None
+    }
+
+    fn is_word(&mut self) -> Option<(String, Type)> {
+        let mut marker = self.cursor;
+        for c in self.text[self.cursor..].chars() {
+            if unicode_whitespace(c) || is_single_char_operator(c) {
+                break;
+            }
+            marker += c.len_utf8();
+        }
+
+        if marker > self.cursor {
+            let token = self.text[self.cursor..marker].into();
+            self.cursor = marker;
+            return Some((token, Type::Word));
+        }
+
+        None
+    }
+
+    // utilities that may modify self
+
+    fn is_one_of<S: AsRef<str>>(
+        &mut self,
+        options: &[S],
+        allow_abbreviations: bool,
+        end_boundary: bool,
+    ) -> bool {
+        for option in options {
+            if self.is_literal(option.as_ref(), allow_abbreviations, end_boundary) {
+                return true;
+            }
+        }
+        false
+    }
+
+    fn is_literal(&mut self, literal: &str, allow_abbreviations: bool, end_boundary: bool) -> bool {
+        // calculate the number of common characters between the literal and the string being
+        // parsed
+        let common = common_length(literal, &self.text[self.cursor..]);
+
+        // Without abbreviations, common must equal literal length.
+        if !allow_abbreviations && common < literal.len() {
+            return false;
+        }
+
+        if allow_abbreviations && common < MINIMUM_MATCH_LEN {
+            return false;
+        }
+
+        if end_boundary {
+            let c = self.text[self.cursor + common..].chars().next();
+            if let Some(c) = c {
+                if !unicode_whitespace(c) && !is_single_char_operator(c) {
+                    return false;
+                }
+            }
+        }
+
+        self.cursor += common;
+
+        true
+    }
+
+    fn is_integer(&mut self) -> Option<(String, Type)> {
+        let mut marker = self.cursor;
+        for c in self.text[self.cursor..].chars() {
+            if !unicode_latin_digit(c) {
+                break;
+            }
+            marker += c.len_utf8();
+        }
+
+        if marker > self.cursor {
+            let token = self.text[self.cursor..marker].into();
+            self.cursor = marker;
+            return Some((token, Type::Number));
+        }
+
+        None
+    }
+}
+
+struct LexerIterator(Lexer);
+
+impl Iterator for LexerIterator {
+    type Item = (String, Type);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.token()
+    }
+}
+
+impl IntoIterator for Lexer {
+    type Item = (String, Type);
+    type IntoIter = LexerIterator;
+
+    fn into_iter(self) -> Self::IntoIter {
+        LexerIterator(self)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    const NONE: Option<(String, Type)> = None;
+
+    #[test]
+    fn test_is_punctuation_comma() {
+        assert!(is_punctuation(','));
+    }
+
+    #[test]
+    fn test_is_punctuation_slash() {
+        assert!(is_punctuation('/'));
+    }
+
+    #[test]
+    fn test_is_punctuation_at() {
+        assert!(!is_punctuation('@'));
+    }
+
+    #[test]
+    fn test_is_punctuation_hash() {
+        assert!(!is_punctuation('#'));
+    }
+
+    #[test]
+    fn test_is_punctuation_dollar() {
+        assert!(!is_punctuation('$'));
+    }
+
+    #[test]
+    fn test_is_punctuation_underscore() {
+        assert!(!is_punctuation('_'));
+    }
+
+    #[test]
+    fn test_is_punctuation_space() {
+        assert!(!is_punctuation(' '));
+    }
+
+    #[test]
+    fn test_is_punctuation_a() {
+        assert!(!is_punctuation('a'));
+    }
+
+    #[test]
+    fn test_is_punctuation_9() {
+        assert!(!is_punctuation('9'));
+    }
+
+    #[test]
+    fn test_is_punctuation_latin() {
+        assert!(!is_punctuation('é'));
+    }
+
+    #[test]
+    fn test_is_punctuation_euro() {
+        assert!(!is_punctuation('€'));
+    }
+
+    #[test]
+    fn test_is_punctuation_smile() {
+        assert!(!is_punctuation('☺'));
+    }
+
+    #[test]
+    fn test_is_punctuation_numeric() {
+        assert!(!is_punctuation('¾'));
+    }
+
+    #[test]
+    fn test_is_boundary() {
+        assert!(is_boundary(' ', 'a'));
+        assert!(is_boundary('a', ' '));
+        assert!(is_boundary(' ', '+'));
+        assert!(is_boundary(' ', ','));
+        assert!(!is_boundary('3', '4'));
+        assert!(is_boundary('(', '('));
+        assert!(!is_boundary('r', 'd'));
+    }
+
+    #[test]
+    fn test_was_quoted() {
+        assert!(!was_quoted(""));
+        assert!(!was_quoted("foo"));
+        assert!(was_quoted("a b"));
+        assert!(was_quoted("(a)"));
+    }
+
+    #[test]
+    fn test_dequote() {
+        assert_eq!(dequote("foo", "'\""), "foo");
+        assert_eq!(dequote("'foo'", "'\""), "foo");
+        assert_eq!(dequote("\"foo\"", "'\""), "foo");
+        assert_eq!(dequote("'o\\'clock'", "'\""), "o\\'clock");
+        // single quote char
+        assert_eq!(dequote("'", "'\""), "");
+        // multibyte quote char
+        assert_eq!(dequote("éo\\'clocké", "é"), "o\\'clock");
+    }
+
+    #[test]
+    fn test_token_empty() {
+        let mut l = Lexer::new("");
+        assert_eq!(l.token(), NONE);
+    }
+
+    #[test]
+    fn test_token_tokens() {
+        let mut l = Lexer::new(
+            " one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4    foo.bar and '\\u20ac'",
+        );
+        assert_eq!(l.token(), Some((String::from("one"), Type::Identifier)));
+        assert_eq!(
+            l.token(),
+            Some((String::from("'two 'three''"), Type::String))
+        );
+        assert_eq!(l.token(), Some((String::from("+"), Type::Op)));
+        assert_eq!(l.token(), Some((String::from("456"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("-"), Type::Op)));
+        assert_eq!(l.token(), Some((String::from("("), Type::Op)));
+        assert_eq!(l.token(), Some((String::from("1.3"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("*"), Type::Op)));
+        assert_eq!(l.token(), Some((String::from("2"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("-"), Type::Op)));
+        assert_eq!(l.token(), Some((String::from("0x12"), Type::Hex)));
+        assert_eq!(l.token(), Some((String::from(")"), Type::Op)));
+        assert_eq!(l.token(), Some((String::from("1.2e-3.4"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("foo.bar"), Type::Identifier)));
+        assert_eq!(l.token(), Some((String::from("and"), Type::Op)));
+        assert_eq!(l.token(), Some((String::from("'€'"), Type::String)));
+        assert_eq!(l.token(), None);
+    }
+
+    #[test]
+    fn test_token_short_numbers() {
+        let mut l = Lexer::new("1 12 123 1234 12345 123456 1234567 123.45e 12.34e+");
+        assert_eq!(l.token(), Some((String::from("1"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("12"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("123"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("1234"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("12345"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("123456"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("1234567"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("123.45e"), Type::Number)));
+        assert_eq!(l.token(), Some((String::from("12.34e+"), Type::Number)));
+        assert_eq!(l.token(), None);
+    }
+
+    #[test]
+    fn test_read_word_quoted_simple() {
+        assert_eq!(
+            read_word_quoted("'one two'", "'\"", 0),
+            Some((String::from("'one two'"), 9))
+        );
+    }
+
+    #[test]
+    fn test_read_word_quoted_unterminated() {
+        assert_eq!(
+            read_word_quoted("'one two", "'\"", 0),
+            None as Option<(String, usize)>
+        );
+    }
+
+    #[test]
+    fn test_read_word_quoted_backslash_u() {
+        assert_eq!(
+            read_word_quoted("'pay \\u20a43'", "'\"", 0),
+            Some((String::from("'pay ₤3'"), 13))
+        );
+    }
+
+    #[test]
+    fn test_read_word_quoted_u_plus() {
+        assert_eq!(
+            read_word_quoted("\"pay U+20AC5\"", "'\"", 0),
+            Some((String::from("\"pay €5\""), 13))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_simple() {
+        assert_eq!(
+            read_word_unquoted("input", 0),
+            Some((String::from("input"), 5))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_escaped_space() {
+        assert_eq!(
+            read_word_unquoted("one\\ two", 0),
+            Some((String::from("one two"), 8))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_escaped_quote() {
+        assert_eq!(
+            read_word_unquoted("one\\\"two", 0),
+            Some((String::from("one\"two"), 8))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_escaped_newline() {
+        assert_eq!(
+            read_word_unquoted("one\\ntwo", 0),
+            Some((String::from("one\x0atwo"), 8))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_escaped_backslash_u() {
+        assert_eq!(
+            read_word_unquoted("pay\\u20a43", 0),
+            Some((String::from("pay₤3"), 10))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_incomplete_escaped_backslash_u() {
+        assert_eq!(
+            read_word_unquoted("\\u203", 0),
+            Some((String::from("u203"), 5))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_nonhex_escaped_backslash_u() {
+        assert_eq!(
+            read_word_unquoted("\\u2fghk", 0),
+            Some((String::from("u2fghk"), 7))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_escaped_u_plus() {
+        assert_eq!(
+            read_word_unquoted("payU+20AC4", 0),
+            Some((String::from("pay€4"), 10))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_incomplete_u_plus() {
+        assert_eq!(
+            read_word_unquoted("U+20A", 0),
+            Some((String::from("U+20A"), 5))
+        );
+    }
+
+    #[test]
+    fn test_read_word_trailing_whitespace() {
+        assert_eq!(
+            read_word_unquoted("one      ", 0),
+            Some((String::from("one"), 3))
+        );
+    }
+
+    #[test]
+    fn test_read_word_unquoted_several_words() {
+        let text = "one 'two' three\\ four";
+        assert_eq!(read_word_unquoted(text, 0), Some((String::from("one"), 3)));
+        assert_eq!(
+            read_word_unquoted(text, 4),
+            Some((String::from("'two'"), 9))
+        );
+        assert_eq!(
+            read_word_unquoted(text, 10),
+            Some((String::from("three four"), 21))
+        );
+    }
+
+    #[test]
+    fn test_common_length_empty() {
+        assert_eq!(common_length("", ""), 0);
+    }
+
+    #[test]
+    fn test_common_length_match_one() {
+        assert_eq!(common_length("a", "a"), 1);
+    }
+
+    #[test]
+    fn test_common_length_match_longer() {
+        assert_eq!(common_length("abcde", "abcde"), 5);
+    }
+
+    #[test]
+    fn test_common_length_match_s2_short() {
+        assert_eq!(common_length("abc", ""), 0);
+    }
+
+    #[test]
+    fn test_common_length_match_differ() {
+        assert_eq!(common_length("abc", "def"), 0);
+    }
+
+    #[test]
+    fn test_common_length_match_s2_prefix() {
+        assert_eq!(common_length("foobar", "foo"), 3);
+    }
+
+    #[test]
+    fn test_common_length_match_s1_prefix() {
+        assert_eq!(common_length("foo", "foobar"), 3);
+    }
+
+    #[test]
+    fn test_is_string() {
+        let mut l = Lexer::new("'one'");
+        assert_eq!(l.is_string("'\""), Some(("'one'".into(), Type::String)));
+        assert_eq!(l.cursor, 5);
+    }
+
+    #[test]
+    fn test_is_string_negative() {
+        let mut l = Lexer::new("one");
+        assert_eq!(l.is_string("'\""), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_string_empty() {
+        let mut l = Lexer::new("''");
+        assert_eq!(l.is_string("'\""), Some(("''".into(), Type::String)));
+        assert_eq!(l.cursor, 2);
+    }
+
+    #[test]
+    fn test_is_string_escape() {
+        let mut l = Lexer::new("'one\ttwo'");
+        assert_eq!(
+            l.is_string("'\""),
+            Some(("'one\ttwo'".into(), Type::String))
+        );
+        assert_eq!(l.cursor, 9);
+    }
+
+    #[test]
+    fn test_is_date_year_eos() {
+        let mut l = Lexer::new("2015");
+        assert_eq!(l.is_date(), Some(("2015".into(), Type::Date)));
+        assert_eq!(l.cursor, 4);
+    }
+
+    #[test]
+    fn test_is_date_epoch() {
+        let mut l = Lexer::new("315532800");
+        assert_eq!(l.is_date(), Some(("315532800".into(), Type::Date)));
+        assert_eq!(l.cursor, 9);
+    }
+
+    #[test]
+    fn test_is_date_year_ws() {
+        let mut l = Lexer::new("2015  ");
+        assert_eq!(l.is_date(), Some(("2015".into(), Type::Date)));
+        assert_eq!(l.cursor, 4);
+    }
+
+    #[test]
+    fn test_is_date_year_ident() {
+        let mut l = Lexer::new("2015abc");
+        assert_eq!(l.is_date(), Some(("2015".into(), Type::Date)));
+        assert_eq!(l.cursor, 4);
+    }
+
+    #[test]
+    fn test_is_date_year_plus() {
+        let mut l = Lexer::new("2015+");
+        assert_eq!(l.is_date(), Some(("2015".into(), Type::Date)));
+        assert_eq!(l.cursor, 4);
+    }
+
+    #[test]
+    fn test_is_date_year_minus() {
+        let mut l = Lexer::new("2015-xyz");
+        assert_eq!(l.is_date(), Some(("2015-".into(), Type::Date)));
+        assert_eq!(l.cursor, 5);
+    }
+
+    #[test]
+    fn test_is_duration_1w() {
+        let mut l = Lexer::new("1w");
+        assert_eq!(l.is_duration(), Some(("1w".into(), Type::Duration)));
+        assert_eq!(l.cursor, 2);
+    }
+
+    #[test]
+    fn test_is_duration_op() {
+        let mut l = Lexer::new("!!");
+        assert_eq!(l.is_duration(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_number_digit() {
+        let mut l = Lexer::new("3");
+        assert_eq!(l.is_number(), Some(("3".into(), Type::Number)));
+        assert_eq!(l.cursor, 1);
+    }
+
+    #[test]
+    fn test_is_number_integer() {
+        let mut l = Lexer::new("13");
+        assert_eq!(l.is_number(), Some(("13".into(), Type::Number)));
+        assert_eq!(l.cursor, 2);
+    }
+
+    #[test]
+    fn test_is_number_trailing_minus() {
+        let mut l = Lexer::new("13-");
+        assert_eq!(l.is_number(), Some(("13".into(), Type::Number)));
+        assert_eq!(l.cursor, 2);
+    }
+
+    #[test]
+    fn test_is_number_decimal() {
+        let mut l = Lexer::new("1.3");
+        assert_eq!(l.is_number(), Some(("1.3".into(), Type::Number)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_number_multiple_decimal() {
+        let mut l = Lexer::new("1.3.4");
+        assert_eq!(l.is_number(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_number_decimal_no_digits() {
+        let mut l = Lexer::new("1.");
+        assert_eq!(l.is_number(), Some(("1.".into(), Type::Number)));
+        assert_eq!(l.cursor, 2);
+    }
+
+    #[test]
+    fn test_is_number_decimal_multi_digit() {
+        let mut l = Lexer::new("12.32");
+        assert_eq!(l.is_number(), Some(("12.32".into(), Type::Number)));
+        assert_eq!(l.cursor, 5);
+    }
+
+    #[test]
+    fn test_is_number_decimal_e_no_exponent() {
+        let mut l = Lexer::new("12.32e");
+        assert_eq!(l.is_number(), Some(("12.32e".into(), Type::Number)));
+        assert_eq!(l.cursor, 6);
+    }
+
+    #[test]
+    fn test_is_number_decimal_e_plus_no_exponent() {
+        let mut l = Lexer::new("12.32e+");
+        assert_eq!(l.is_number(), Some(("12.32e+".into(), Type::Number)));
+        assert_eq!(l.cursor, 7);
+    }
+
+    #[test]
+    fn test_is_number_decimal_e_integer_exponent() {
+        let mut l = Lexer::new("12.32e-12");
+        assert_eq!(l.is_number(), Some(("12.32e-12".into(), Type::Number)));
+        assert_eq!(l.cursor, 9);
+    }
+
+    #[test]
+    fn test_is_number_decimal_e_decimal_exponent() {
+        let mut l = Lexer::new("12.32e12.34");
+        assert_eq!(l.is_number(), Some(("12.32e12.34".into(), Type::Number)));
+        assert_eq!(l.cursor, 11);
+    }
+
+    #[test]
+    fn test_is_number_integer_invalid_lookahead() {
+        let mut l = Lexer::new("13a");
+        assert_eq!(l.is_number(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_set_singletons() {
+        let mut l = Lexer::new("12,13");
+        assert_eq!(l.is_set(), Some(("12,13".into(), Type::Set)));
+        assert_eq!(l.cursor, 5);
+    }
+
+    #[test]
+    fn test_is_set_ranges() {
+        let mut l = Lexer::new("12-13,19-200");
+        assert_eq!(l.is_set(), Some(("12-13,19-200".into(), Type::Set)));
+        assert_eq!(l.cursor, 12);
+    }
+
+    #[test]
+    fn test_is_set_double_comma() {
+        let mut l = Lexer::new("12-13,,19-200");
+        assert_eq!(l.is_set(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_set_trailing_comma() {
+        let mut l = Lexer::new("12-13,");
+        assert_eq!(l.is_set(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_set_trailing_ws() {
+        let mut l = Lexer::new("12-13  ");
+        assert_eq!(l.is_set(), Some(("12-13".into(), Type::Set)));
+        assert_eq!(l.cursor, 5);
+    }
+
+    #[test]
+    fn test_is_set_trailing_non_hard_boundary() {
+        let mut l = Lexer::new("12-13abc");
+        assert_eq!(l.is_set(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_separator() {
+        let mut l = Lexer::new("  -- ");
+        l.cursor = 2;
+        assert_eq!(l.is_separator(), Some(("--".into(), Type::Separator)));
+        assert_eq!(l.cursor, 4);
+    }
+
+    #[test]
+    fn test_is_separator_negative() {
+        let mut l = Lexer::new("- ");
+        assert_eq!(l.is_separator(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_tag_plus() {
+        let mut l = Lexer::new("+foo");
+        assert_eq!(l.is_tag(), Some(("+foo".into(), Type::Tag)));
+        assert_eq!(l.cursor, 4);
+    }
+
+    #[test]
+    fn test_is_tag_not_after_whitespace() {
+        let mut l = Lexer::new("x+y");
+        l.cursor = 1;
+        assert_eq!(l.is_tag(), NONE);
+        assert_eq!(l.cursor, 1);
+    }
+
+    #[test]
+    fn test_is_tag_after_whitespace() {
+        let mut l = Lexer::new(" +y");
+        l.cursor = 1;
+        assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_tag_after_lparen() {
+        let mut l = Lexer::new("(+y");
+        l.cursor = 1;
+        assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_tag_after_rparen() {
+        let mut l = Lexer::new(")+y");
+        l.cursor = 1;
+        assert_eq!(l.is_tag(), Some(("+y".into(), Type::Tag)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_tag_after_multibyte_char() {
+        let mut l = Lexer::new("€+y");
+        l.cursor = 3;
+        assert_eq!(l.is_tag(), NONE);
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_url_http() {
+        let mut l = Lexer::new("http://foo.com/bar");
+        assert_eq!(l.is_url(), Some(("http://foo.com/bar".into(), Type::URL)));
+        assert_eq!(l.cursor, 18);
+    }
+
+    #[test]
+    fn test_is_url_https() {
+        let mut l = Lexer::new("https://foo.com/bar");
+        assert_eq!(l.is_url(), Some(("https://foo.com/bar".into(), Type::URL)));
+        assert_eq!(l.cursor, 19);
+    }
+
+    #[test]
+    fn test_is_url_ws() {
+        let mut l = Lexer::new("https://foo.com/bar  ");
+        assert_eq!(l.is_url(), Some(("https://foo.com/bar".into(), Type::URL)));
+        assert_eq!(l.cursor, 19);
+    }
+
+    #[test]
+    fn test_is_url_with_ops() {
+        let mut l = Lexer::new("https://foo.com/bar()+-~");
+        assert_eq!(
+            l.is_url(),
+            Some(("https://foo.com/bar()+-~".into(), Type::URL))
+        );
+        assert_eq!(l.cursor, 24);
+    }
+
+    #[test]
+    fn test_is_url_negative() {
+        let mut l = Lexer::new("file://foo.com/bar");
+        assert_eq!(l.is_url(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_pair_double_colon() {
+        let mut l = Lexer::new("foo::bar  ");
+        assert_eq!(l.is_pair(), Some(("foo::bar".into(), Type::Pair)));
+        assert_eq!(l.cursor, 8);
+    }
+
+    #[test]
+    fn test_is_pair_colon_eq() {
+        let mut l = Lexer::new("foo:=bar  ");
+        assert_eq!(l.is_pair(), Some(("foo:=bar".into(), Type::Pair)));
+        assert_eq!(l.cursor, 8);
+    }
+
+    #[test]
+    fn test_is_pair_colon() {
+        let mut l = Lexer::new("foo:bar  ");
+        assert_eq!(l.is_pair(), Some(("foo:bar".into(), Type::Pair)));
+        assert_eq!(l.cursor, 7);
+    }
+
+    #[test]
+    fn test_is_pair_equal() {
+        let mut l = Lexer::new("foo=bar");
+        assert_eq!(l.is_pair(), Some(("foo=bar".into(), Type::Pair)));
+        assert_eq!(l.cursor, 7);
+    }
+
+    #[test]
+    fn test_is_pair_quoted() {
+        let mut l = Lexer::new("foo='abc def'");
+        assert_eq!(l.is_pair(), Some(("foo='abc def'".into(), Type::Pair)));
+        assert_eq!(l.cursor, 13);
+    }
+
+    #[test]
+    fn test_is_pair_quoted_escapes() {
+        let mut l = Lexer::new("foo='abc\\u20acdef'");
+        assert_eq!(l.is_pair(), Some(("foo='abc€def'".into(), Type::Pair)));
+        assert_eq!(l.cursor, 18);
+    }
+
+    #[test]
+    fn test_is_uuid_long_eof() {
+        let u = "ffffffff-ffff-ffff-ffff-ffffffffff";
+        let mut l = Lexer::new(u);
+        assert_eq!(l.is_uuid(true), Some((u.into(), Type::Uuid)));
+        assert_eq!(l.cursor, 34);
+    }
+
+    #[test]
+    fn test_is_uuid_long_ws() {
+        let u = "ffffffff-ffff-ffff-ffff-ffffffffff  kjdf";
+        let mut l = Lexer::new(u);
+        assert_eq!(l.is_uuid(true), Some((u[..34].into(), Type::Uuid)));
+        assert_eq!(l.cursor, 34);
+    }
+
+    #[test]
+    fn test_is_uuid_long_op() {
+        let u = "ffffffff-ffff-ffff-ffff-ffffffffff+";
+        let mut l = Lexer::new(u);
+        assert_eq!(l.is_uuid(true), Some((u[..34].into(), Type::Uuid)));
+        assert_eq!(l.cursor, 34);
+    }
+
+    #[test]
+    fn test_is_uuid_long_bad_boundary() {
+        let u = "ffffffff-ffff-ffff-ffff-ffffffffff_";
+        let mut l = Lexer::new(u);
+        assert_eq!(l.is_uuid(true), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_uuid_long_bad_boundary_ignored() {
+        let u = "ffffffff-ffff-ffff-ffff-ffffffffff_";
+        let mut l = Lexer::new(u);
+        assert_eq!(l.is_uuid(false), Some((u[..34].into(), Type::Uuid)));
+        assert_eq!(l.cursor, 34);
+    }
+
+    #[test]
+    fn test_is_uuid_too_short() {
+        let u = "ffffff";
+        let mut l = Lexer::new(u);
+        assert_eq!(l.is_uuid(true), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_path_simple() {
+        let mut l = Lexer::new("/path/to/a/file");
+        assert_eq!(l.is_path(), Some(("/path/to/a/file".into(), Type::Path)));
+        assert_eq!(l.cursor, 15);
+    }
+
+    #[test]
+    fn test_is_path_too_short() {
+        let mut l = Lexer::new("/a/file");
+        assert_eq!(l.is_path(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_path_trailing_slash() {
+        let mut l = Lexer::new("/path/to/a/dir/");
+        assert_eq!(l.is_path(), Some(("/path/to/a/dir/".into(), Type::Path)));
+        assert_eq!(l.cursor, 15);
+    }
+
+    #[test]
+    fn test_is_path_double_slash() {
+        let mut l = Lexer::new("/a//file");
+        assert_eq!(l.is_path(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_path_no_initial_slash() {
+        let mut l = Lexer::new("a/path/to/a/file");
+        assert_eq!(l.is_path(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_substitution_simple() {
+        let mut l = Lexer::new("/foo/bar/");
+        assert_eq!(
+            l.is_substitution(),
+            Some(("/foo/bar/".into(), Type::Substitution))
+        );
+        assert_eq!(l.cursor, 9);
+    }
+
+    #[test]
+    fn test_is_substitution_simple_ws() {
+        let mut l = Lexer::new("/foo/bar/  ");
+        assert_eq!(
+            l.is_substitution(),
+            Some(("/foo/bar/".into(), Type::Substitution))
+        );
+        assert_eq!(l.cursor, 9);
+    }
+
+    #[test]
+    fn test_is_substitution_simple_g() {
+        let mut l = Lexer::new("/foo/bar/g");
+        assert_eq!(
+            l.is_substitution(),
+            Some(("/foo/bar/g".into(), Type::Substitution))
+        );
+        assert_eq!(l.cursor, 10);
+    }
+
+    #[test]
+    fn test_is_substitution_simple_g_ws() {
+        let mut l = Lexer::new("/foo/bar/g  ");
+        assert_eq!(
+            l.is_substitution(),
+            Some(("/foo/bar/g".into(), Type::Substitution))
+        );
+        assert_eq!(l.cursor, 10);
+    }
+
+    #[test]
+    fn test_is_substitution_simple_not_g() {
+        let mut l = Lexer::new("/foo/bar/h");
+        assert_eq!(l.is_substitution(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_substitution_simple_not_g_op() {
+        let mut l = Lexer::new("/foo/bar/+");
+        assert_eq!(l.is_substitution(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_substitution_simple_g_but_not_ws() {
+        let mut l = Lexer::new("/foo/bar/ghi");
+        assert_eq!(l.is_substitution(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_pattern_simple() {
+        let mut l = Lexer::new("/foo/");
+        assert_eq!(l.is_pattern(), Some(("/foo/".into(), Type::Pattern)));
+        assert_eq!(l.cursor, 5);
+    }
+
+    #[test]
+    fn test_is_pattern_escaped() {
+        let mut l = Lexer::new("/f\\u20A4o/");
+        assert_eq!(l.is_pattern(), Some(("/f\\u20A4o/".into(), Type::Pattern)));
+        assert_eq!(l.cursor, 10);
+    }
+
+    #[test]
+    fn test_is_pattern_simple_trailing_ws() {
+        let mut l = Lexer::new("/foo/\n\t");
+        assert_eq!(l.is_pattern(), Some(("/foo/".into(), Type::Pattern)));
+        assert_eq!(l.cursor, 5);
+    }
+
+    #[test]
+    fn test_is_operator_hastag() {
+        let mut l = Lexer::new("_hastag_");
+        assert_eq!(l.is_operator(), Some(("_hastag_".into(), Type::Op)));
+    }
+
+    #[test]
+    fn test_is_operator_notag() {
+        let mut l = Lexer::new("_notag_");
+        assert_eq!(l.is_operator(), Some(("_notag_".into(), Type::Op)));
+    }
+
+    #[test]
+    fn test_is_operator_neg() {
+        let mut l = Lexer::new("_neg_");
+        assert_eq!(l.is_operator(), Some(("_neg_".into(), Type::Op)));
+    }
+
+    #[test]
+    fn test_is_operator_xor() {
+        let mut l = Lexer::new("xor");
+        assert_eq!(l.is_operator(), Some(("xor".into(), Type::Op)));
+    }
+
+    #[test]
+    fn test_is_identifier_empty() {
+        let mut l = Lexer::new("");
+        assert_eq!(l.is_identifier(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_identifier_multibyte_nonpunct_first_char() {
+        let mut l = Lexer::new("☺");
+        assert_eq!(l.is_identifier(), Some(("☺".into(), Type::Identifier)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_identifier_bad_first_char() {
+        let mut l = Lexer::new("1abc");
+        assert_eq!(l.is_identifier(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_identifier_bad_next_char() {
+        let mut l = Lexer::new("a:bc");
+        assert_eq!(l.is_identifier(), Some(("a".into(), Type::Identifier)));
+        assert_eq!(l.cursor, 1);
+    }
+
+    #[test]
+    fn test_is_identifier_ok() {
+        let mut l = Lexer::new("abc");
+        assert_eq!(l.is_identifier(), Some(("abc".into(), Type::Identifier)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_word_no() {
+        let mut l = Lexer::new("+");
+        assert!(l.is_word().is_none());
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_word_pending() {
+        let mut l = Lexer::new("foo.PENDING");
+        l.cursor = 4;
+        assert_eq!(l.is_word(), Some(("PENDING".into(), Type::Word)));
+        assert_eq!(l.cursor, 11);
+    }
+
+    #[test]
+    fn test_is_word_to_eof() {
+        let mut l = Lexer::new("abc");
+        assert_eq!(l.is_word(), Some(("abc".into(), Type::Word)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_word_nonzero_start() {
+        let mut l = Lexer::new("--abc");
+        l.cursor = 2;
+        assert_eq!(l.is_word(), Some(("abc".into(), Type::Word)));
+        assert_eq!(l.cursor, 5);
+    }
+
+    #[test]
+    fn test_is_word_to_ws() {
+        let mut l = Lexer::new("abc def");
+        assert_eq!(l.is_word(), Some(("abc".into(), Type::Word)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_word_to_op() {
+        let mut l = Lexer::new("abc*def");
+        assert_eq!(l.is_word(), Some(("abc".into(), Type::Word)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_split_simple() {
+        assert_eq!(
+            Lexer::split(" ( A or B ) "),
+            vec![
+                String::from("("),
+                String::from("A"),
+                String::from("or"),
+                String::from("B"),
+                String::from(")"),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_split_confusing() {
+        assert_eq!(
+            Lexer::split("  +-* a+b 12.3e4 'c d'"),
+            vec![
+                String::from("+"),
+                String::from("-"),
+                String::from("*"),
+                String::from("a"),
+                String::from("+"),
+                String::from("b"),
+                String::from("12.3e4"),
+                String::from("'c d'"),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_decompose_pair_combos() {
+        let name = "name";
+        for modifier in ["", "mod"].iter() {
+            for separator in [":", "=", "::", ":="].iter() {
+                for value in ["", "value", "a:b", "a::b", "a=b", "a:=b"].iter() {
+                    let input = format!(
+                        "{}{}{}{}{}",
+                        name,
+                        if modifier.len() > 0 { "." } else { "" },
+                        modifier,
+                        separator,
+                        value
+                    );
+                    assert_eq!(
+                        Lexer::decompose_pair(&input),
+                        Some(DecomposedPair {
+                            name: name.into(),
+                            modifier: String::from(*modifier),
+                            separator: String::from(*separator),
+                            value: String::from(*value),
+                        })
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_is_one_of() {
+        let mut l = Lexer::new("Grumpy.");
+        let dwarves = vec![
+            "Sneezy", "Doc", "Bashful", "Grumpy", "Happy", "Sleepy", "Dopey",
+        ];
+        assert!(!l.is_one_of(&dwarves, false, true));
+        assert_eq!(l.cursor, 0);
+        assert!(l.is_one_of(&dwarves, false, false));
+        assert_eq!(l.cursor, 6);
+    }
+
+    #[test]
+    fn test_is_integer_negative() {
+        let mut l = Lexer::new("one");
+        assert_eq!(l.is_integer(), NONE);
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_integer_positive() {
+        let mut l = Lexer::new("123");
+        assert_eq!(l.is_integer(), Some(("123".into(), Type::Number)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_integer_trailing_dot() {
+        let mut l = Lexer::new("123.foo");
+        assert_eq!(l.is_integer(), Some(("123".into(), Type::Number)));
+        assert_eq!(l.cursor, 3);
+    }
+
+    #[test]
+    fn test_is_integer_not_at_start() {
+        let mut l = Lexer::new("abc.123.foo");
+        l.cursor = 4;
+        assert_eq!(l.is_integer(), Some(("123".into(), Type::Number)));
+        assert_eq!(l.cursor, 7);
+    }
+
+    #[test]
+    fn test_is_literal_no_match() {
+        let mut l = Lexer::new("one.two");
+        assert!(!l.is_literal("zero", false, false));
+        assert_eq!(l.cursor, 0);
+    }
+
+    #[test]
+    fn test_is_literal_multi() {
+        let mut l = Lexer::new("one.two");
+        assert!(l.is_literal("one", false, false));
+        assert_eq!(l.cursor, 3);
+        assert!(l.is_literal(".", false, false));
+        assert_eq!(l.cursor, 4);
+        assert!(l.is_literal("two", false, true));
+        assert_eq!(l.cursor, 7);
+    }
+
+    #[test]
+    fn test_is_literal_abbrev() {
+        let mut l = Lexer::new("wonder");
+        assert!(!l.is_literal("wonderful", false, false));
+        assert_eq!(l.cursor, 0);
+        assert!(l.is_literal("wonderful", true, false));
+        assert_eq!(l.cursor, 6);
+    }
+
+    mod integ {
+        use super::super::*;
+
+        fn lexer_test(input: &str, expected: Vec<(&str, Type)>) {
+            // isolated case..
+            let mut lexer = Lexer::new(input);
+            lexer.add_attribute("due");
+            lexer.add_attribute("tags");
+            lexer.add_attribute("description");
+            let got: Vec<_> = lexer.into_iter().collect();
+            let got_strs: Vec<_> = got.iter().map(|(s, t)| (s.as_ref(), *t)).collect();
+            assert_eq!(got_strs, expected);
+
+            // embedded case..
+            let mut lexer = Lexer::new(format!(" {} ", input));
+            lexer.add_attribute("due");
+            lexer.add_attribute("tags");
+            lexer.add_attribute("description");
+            let got: Vec<_> = lexer.into_iter().collect();
+            let got_strs: Vec<_> = got.iter().map(|(s, t)| (s.as_ref(), *t)).collect();
+            assert_eq!(got_strs, expected);
+        }
+
+        #[test]
+        fn test_pattern_foo() {
+            lexer_test("/foo/", vec![("/foo/", Type::Pattern)]);
+        }
+
+        #[test]
+        fn test_pattern_escaped_slash() {
+            lexer_test("/a\\/b/", vec![("/a\\/b/", Type::Pattern)]);
+        }
+
+        #[test]
+        fn test_pattern_quote() {
+            lexer_test("/'/", vec![("/'/", Type::Pattern)]);
+        }
+
+        // Substitution
+        //
+        #[test]
+        fn test_subst_g() {
+            lexer_test("/from/to/g", vec![("/from/to/g", Type::Substitution)]);
+        }
+
+        #[test]
+        fn test_subst() {
+            lexer_test("/from/to/", vec![("/from/to/", Type::Substitution)]);
+        }
+
+        // Tag
+        //
+        #[test]
+        fn test_tag_simple() {
+            lexer_test("+tag", vec![("+tag", Type::Tag)]);
+        }
+
+        #[test]
+        fn test_tag_negative() {
+            lexer_test("-tag", vec![("-tag", Type::Tag)]);
+        }
+
+        #[test]
+        fn test_tag_at() {
+            lexer_test("+@tag", vec![("+@tag", Type::Tag)]);
+        }
+
+        // Path
+        //
+        #[test]
+        fn test_path() {
+            lexer_test(
+                "/long/path/to/file.txt",
+                vec![("/long/path/to/file.txt", Type::Path)],
+            );
+        }
+
+        #[test]
+        fn test_path_dir() {
+            lexer_test(
+                "/long/path/to/dir/",
+                vec![("/long/path/to/dir/", Type::Path)],
+            );
+        }
+
+        // Word
+        //
+        #[test]
+        fn test_1_foo_bar() {
+            lexer_test("1.foo.bar", vec![("1.foo.bar", Type::Word)]);
+        }
+
+        // Identifier
+        //
+        #[test]
+        fn test_foo() {
+            lexer_test("foo", vec![("foo", Type::Identifier)]);
+        }
+
+        #[test]
+        fn test_multibyte_ident() {
+            lexer_test("Çirçös", vec![("Çirçös", Type::Identifier)]);
+        }
+
+        #[test]
+        fn test_multibyte_nonpunctuation_single_char() {
+            lexer_test("☺", vec![("☺", Type::Identifier)]);
+        }
+
+        #[test]
+        fn test_name() {
+            lexer_test("name", vec![("name", Type::Identifier)]);
+        }
+
+        #[test]
+        fn test_f1() {
+            lexer_test("f1", vec![("f1", Type::Identifier)]);
+        }
+
+        #[test]
+        fn test_foo_dot_bar() {
+            lexer_test("foo.bar", vec![("foo.bar", Type::Identifier)]);
+        }
+
+        #[test]
+        fn test_long_with_underscore() {
+            lexer_test(
+                "a1a1a1a1_a1a1_a1a1_a1a1_a1a1a1a1a1a1",
+                vec![("a1a1a1a1_a1a1_a1a1_a1a1_a1a1a1a1a1a1", Type::Identifier)],
+            );
+        }
+
+        // Word that starts wih 'or', which is an operator, but should be ignored.
+        //
+        #[test]
+        fn test_starts_with_or() {
+            lexer_test("ordinary", vec![("ordinary", Type::Identifier)]);
+        }
+
+        // DOM
+        //
+        #[test]
+        fn test_due() {
+            lexer_test("due", vec![("due", Type::DOM)]);
+        }
+
+        #[test]
+        fn test_123_tags() {
+            lexer_test("123.tags", vec![("123.tags", Type::DOM)]);
+        }
+
+        #[test]
+        fn test_123_tags_pending() {
+            lexer_test("123.tags.PENDING", vec![("123.tags.PENDING", Type::DOM)]);
+        }
+
+        #[test]
+        fn test_123_description() {
+            lexer_test("123.description", vec![("123.description", Type::DOM)]);
+        }
+
+        #[test]
+        fn test_123_annotations_count() {
+            lexer_test(
+                "123.annotations.count",
+                vec![("123.annotations.count", Type::DOM)],
+            );
+        }
+
+        #[test]
+        fn test_123_annotations_1_description() {
+            lexer_test(
+                "123.annotations.1.description",
+                vec![("123.annotations.1.description", Type::DOM)],
+            );
+        }
+
+        #[test]
+        fn test_123_annotations_1_entry() {
+            lexer_test(
+                "123.annotations.1.entry",
+                vec![("123.annotations.1.entry", Type::DOM)],
+            );
+        }
+
+        #[test]
+        fn test_123_annotations_1_entry_year() {
+            lexer_test(
+                "123.annotations.1.entry.year",
+                vec![("123.annotations.1.entry.year", Type::DOM)],
+            );
+        }
+
+        #[test]
+        fn test_uuid_due() {
+            lexer_test(
+                "a360fc44-315c-4366-b70c-ea7e7520b749.due",
+                vec![("a360fc44-315c-4366-b70c-ea7e7520b749.due", Type::DOM)],
+            );
+        }
+
+        #[test]
+        fn test_numeric_uuid_due() {
+            lexer_test(
+                "12345678-1234-1234-1234-123456789012.due",
+                vec![("12345678-1234-1234-1234-123456789012.due", Type::DOM)],
+            );
+        }
+
+        #[test]
+        fn test_system_os() {
+            lexer_test("system.os", vec![("system.os", Type::DOM)]);
+        }
+
+        #[test]
+        fn test_rc_foo() {
+            lexer_test("rc.foo", vec![("rc.foo", Type::DOM)]);
+        }
+
+        // URL
+        //
+        #[test]
+        fn test_lexer_31() {
+            lexer_test(
+                "http://example.com",
+                vec![("http://example.com", Type::URL)],
+            );
+        }
+
+        #[test]
+        fn test_lexer_32() {
+            lexer_test(
+                "https://foo.example.com",
+                vec![("https://foo.example.com", Type::URL)],
+            );
+        }
+
+        // String
+        //
+        #[test]
+        fn test_quoted_string() {
+            lexer_test("'one two'", vec![("'one two'", Type::String)]);
+        }
+
+        #[test]
+        fn test_double_quoted_string() {
+            lexer_test("\"three\"", vec![("\"three\"", Type::String)]);
+        }
+
+        #[test]
+        fn test_string_quoted_with_escapes() {
+            lexer_test("'\\''", vec![("'''", Type::String)]);
+        }
+
+        #[test]
+        fn test_string_quoted_quotes() {
+            lexer_test("\"\\\"\"", vec![("\"\"\"", Type::String)]);
+        }
+
+        #[test]
+        fn test_quoted_tabs() {
+            lexer_test("\"\tfoo\t\"", vec![("\"\tfoo\t\"", Type::String)]);
+        }
+
+        #[test]
+        fn test_multibyte_slash_u() {
+            lexer_test("\"\\u20A43\"", vec![("\"₤3\"", Type::String)]);
+        }
+
+        #[test]
+        fn test_multibyte_u_plus() {
+            lexer_test("\"U+20AC4\"", vec![("\"€4\"", Type::String)]);
+        }
+
+        // Number
+        //
+        #[test]
+        fn test_one() {
+            lexer_test("1", vec![("1", Type::Number)]);
+        }
+
+        #[test]
+        fn test_pi() {
+            lexer_test("3.14", vec![("3.14", Type::Number)]);
+        }
+
+        #[test]
+        fn test_avogadro() {
+            lexer_test("6.02217e23", vec![("6.02217e23", Type::Number)]);
+        }
+
+        #[test]
+        fn test_expo() {
+            lexer_test("1.2e-3.4", vec![("1.2e-3.4", Type::Number)]);
+        }
+
+        #[test]
+        fn test_hex() {
+            lexer_test("0x2f", vec![("0x2f", Type::Hex)]);
+        }
+
+        // Set (1,2,4-7,9)
+        //
+        #[test]
+        fn test_set_pair() {
+            lexer_test("1,2", vec![("1,2", Type::Set)]);
+        }
+
+        #[test]
+        fn test_set_range() {
+            lexer_test("1-2", vec![("1-2", Type::Set)]);
+        }
+
+        #[test]
+        fn test_set_range_pair() {
+            lexer_test("1-2,4", vec![("1-2,4", Type::Set)]);
+        }
+
+        #[test]
+        fn test_set_range_pair_ws() {
+            lexer_test("1-2,4 ", vec![("1-2,4", Type::Set)]);
+        }
+
+        #[test]
+        fn test_set_range_pair_paren() {
+            lexer_test("1-2,4(", vec![("1-2,4", Type::Set), ("(", Type::Op)]);
+        }
+
+        #[test]
+        fn test_ranges_and_singletons() {
+            lexer_test("1-2,4,6-8", vec![("1-2,4,6-8", Type::Set)]);
+        }
+
+        #[test]
+        fn test_set_more_ranges_and_singletons() {
+            lexer_test("1-2,4,6-8,10-12", vec![("1-2,4,6-8,10-12", Type::Set)]);
+        }
+
+        // Pair
+        //
+        #[test]
+        fn test_name_colon_value() {
+            lexer_test("name:value", vec![("name:value", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_eq_value() {
+            lexer_test("name=value", vec![("name=value", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_colon_eq_value() {
+            lexer_test("name:=value", vec![("name:=value", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_dot_mod_colon_value() {
+            lexer_test("name.mod:value", vec![("name.mod:value", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_dot_mod_eq_value() {
+            lexer_test("name.mod=value", vec![("name.mod=value", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_colon() {
+            lexer_test("name:", vec![("name:", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_eq() {
+            lexer_test("name=", vec![("name=", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_dot_mod_colon() {
+            lexer_test("name.mod:", vec![("name.mod:", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_dot_mod_equal() {
+            lexer_test("name.mod=", vec![("name.mod=", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_pro_quoted() {
+            lexer_test("pro:'P 1'", vec![("pro:'P 1'", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_rc_colon_x() {
+            lexer_test("rc:x", vec![("rc:x", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_rc_dot_name_colon_value() {
+            lexer_test("rc.name:value", vec![("rc.name:value", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_rc_dot_name_eq_value() {
+            lexer_test("rc.name=value", vec![("rc.name=value", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_rc_dot_name_colon_eq_value() {
+            lexer_test("rc.name:=value", vec![("rc.name:=value", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_due_colon_eq_quoted() {
+            lexer_test("due:='eow - 2d'", vec![("due:='eow - 2d'", Type::Pair)]);
+        }
+
+        #[test]
+        fn test_name_colon_quoted_with_newline() {
+            lexer_test("name:'foo\nbar'", vec![("name:'foo\nbar'", Type::Pair)]);
+        }
+
+        // Operator - complete set
+        //
+        #[test]
+        fn test_caret() {
+            lexer_test("^", vec![("^", Type::Op)]);
+        }
+
+        #[test]
+        fn test_bang() {
+            lexer_test("!", vec![("!", Type::Op)]);
+        }
+
+        #[test]
+        fn test_neg() {
+            lexer_test("_neg_", vec![("_neg_", Type::Op)]);
+        }
+
+        #[test]
+        fn test_pos() {
+            lexer_test("_pos_", vec![("_pos_", Type::Op)]);
+        }
+
+        #[test]
+        fn test_hastag() {
+            lexer_test("_hastag_", vec![("_hastag_", Type::Op)]);
+        }
+
+        #[test]
+        fn test_notag() {
+            lexer_test("_notag_", vec![("_notag_", Type::Op)]);
+        }
+
+        #[test]
+        fn test_star() {
+            lexer_test("*", vec![("*", Type::Op)]);
+        }
+
+        #[test]
+        fn test_slash() {
+            lexer_test("/", vec![("/", Type::Op)]);
+        }
+
+        #[test]
+        fn test_percent() {
+            lexer_test("%", vec![("%", Type::Op)]);
+        }
+
+        #[test]
+        fn test_plus() {
+            lexer_test("+", vec![("+", Type::Op)]);
+        }
+
+        #[test]
+        fn test_minus() {
+            lexer_test("-", vec![("-", Type::Op)]);
+        }
+
+        #[test]
+        fn test_leq() {
+            lexer_test("<=", vec![("<=", Type::Op)]);
+        }
+
+        #[test]
+        fn test_geq() {
+            lexer_test(">=", vec![(">=", Type::Op)]);
+        }
+
+        #[test]
+        fn test_gt() {
+            lexer_test(">", vec![(">", Type::Op)]);
+        }
+
+        #[test]
+        fn test_lt() {
+            lexer_test("<", vec![("<", Type::Op)]);
+        }
+
+        #[test]
+        fn test_eq() {
+            lexer_test("=", vec![("=", Type::Op)]);
+        }
+
+        #[test]
+        fn test_double_eq() {
+            lexer_test("==", vec![("==", Type::Op)]);
+        }
+
+        #[test]
+        fn test_not_eq() {
+            lexer_test("!=", vec![("!=", Type::Op)]);
+        }
+
+        #[test]
+        fn test_not_double_eq() {
+            lexer_test("!==", vec![("!==", Type::Op)]);
+        }
+
+        #[test]
+        fn test_tilde() {
+            lexer_test("~", vec![("~", Type::Op)]);
+        }
+
+        #[test]
+        fn test_not_tilde() {
+            lexer_test("!~", vec![("!~", Type::Op)]);
+        }
+
+        #[test]
+        fn test_and() {
+            lexer_test("and", vec![("and", Type::Op)]);
+        }
+
+        #[test]
+        fn test_or() {
+            lexer_test("or", vec![("or", Type::Op)]);
+        }
+
+        #[test]
+        fn test_xor() {
+            lexer_test("xor", vec![("xor", Type::Op)]);
+        }
+
+        #[test]
+        fn test_lparen() {
+            lexer_test("(", vec![("(", Type::Op)]);
+        }
+
+        #[test]
+        fn test_rparen() {
+            lexer_test(")", vec![(")", Type::Op)]);
+        }
+
+        // UUID
+        //
+        #[test]
+        fn test_uuid_ffs() {
+            lexer_test(
+                "ffffffff-ffff-ffff-ffff-ffffffffffff",
+                vec![("ffffffff-ffff-ffff-ffff-ffffffffffff", Type::Uuid)],
+            );
+        }
+
+        #[test]
+        fn test_uuid_00s() {
+            lexer_test(
+                "00000000-0000-0000-0000-0000000",
+                vec![("00000000-0000-0000-0000-0000000", Type::Uuid)],
+            );
+        }
+
+        #[test]
+        fn test_uuid_shorter() {
+            lexer_test(
+                "00000000-0000-0000-0000",
+                vec![("00000000-0000-0000-0000", Type::Uuid)],
+            );
+        }
+
+        #[test]
+        fn test_uuid_shorter_still() {
+            lexer_test(
+                "00000000-0000-0000",
+                vec![("00000000-0000-0000", Type::Uuid)],
+            );
+        }
+
+        #[test]
+        fn test_uuid_even_shorter() {
+            lexer_test("00000000-0000", vec![("00000000-0000", Type::Uuid)]);
+        }
+
+        #[test]
+        fn test_uuid_only_first_bit() {
+            lexer_test("00000000", vec![("00000000", Type::Uuid)]);
+        }
+
+        #[test]
+        fn test_real_uuid() {
+            lexer_test(
+                "a360fc44-315c-4366-b70c-ea7e7520b749",
+                vec![("a360fc44-315c-4366-b70c-ea7e7520b749", Type::Uuid)],
+            );
+        }
+
+        #[test]
+        fn test_real_uuid_shorter() {
+            lexer_test(
+                "a360fc44-315c-4366-b70c-ea7e752",
+                vec![("a360fc44-315c-4366-b70c-ea7e752", Type::Uuid)],
+            );
+        }
+
+        #[test]
+        fn test_real_uuid_shorter_still() {
+            lexer_test(
+                "a360fc44-315c-4366-b70c",
+                vec![("a360fc44-315c-4366-b70c", Type::Uuid)],
+            );
+        }
+
+        #[test]
+        fn test_real_uuid_even_shorter() {
+            lexer_test(
+                "a360fc44-315c-4366",
+                vec![("a360fc44-315c-4366", Type::Uuid)],
+            );
+        }
+
+        #[test]
+        fn test_real_uuid_naming_is_hard() {
+            lexer_test("a360fc44-315c", vec![("a360fc44-315c", Type::Uuid)]);
+        }
+
+        #[test]
+        fn test_real_uuid_only_first_bit() {
+            lexer_test("a360fc44", vec![("a360fc44", Type::Uuid)]);
+        }
+
+        // Date
+        //
+        #[test]
+        fn test_year_week() {
+            lexer_test("2015-W01", vec![("2015-W01", Type::Date)]);
+        }
+
+        #[test]
+        fn test_year_month_day() {
+            lexer_test("2015-02-17", vec![("2015-02-17", Type::Date)]);
+        }
+
+        #[test]
+        fn test_timestamp() {
+            lexer_test(
+                "2013-11-29T22:58:00Z",
+                vec![("2013-11-29T22:58:00Z", Type::Date)],
+            );
+        }
+
+        #[test]
+        fn test_abbrev_timestamp() {
+            lexer_test("20131129T225800Z", vec![("20131129T225800Z", Type::Date)]);
+        }
+
+        #[test]
+        fn test_9thn() {
+            lexer_test("9th", vec![("9th", Type::Date)]);
+        }
+
+        #[test]
+        fn test_10th() {
+            lexer_test("10th", vec![("10th", Type::Date)]);
+        }
+
+        #[test]
+        fn test_today() {
+            lexer_test("today", vec![("today", Type::Date)]);
+        }
+
+        // Duration
+        //
+        #[test]
+        fn test_year() {
+            lexer_test("year", vec![("year", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_4weeks() {
+            lexer_test("4weeks", vec![("4weeks", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_pt23h() {
+            lexer_test("PT23H", vec![("PT23H", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_1second() {
+            lexer_test("1second", vec![("1second", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_1s() {
+            lexer_test("1s", vec![("1s", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_1minute() {
+            lexer_test("1minute", vec![("1minute", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_2hour() {
+            lexer_test("2hour", vec![("2hour", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_3_days() {
+            lexer_test("3 days", vec![("3 days", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_4w() {
+            lexer_test("4w", vec![("4w", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_5mo() {
+            lexer_test("5mo", vec![("5mo", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_6_years() {
+            lexer_test("6 years", vec![("6 years", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_p1y() {
+            lexer_test("P1Y", vec![("P1Y", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_pt1h() {
+            lexer_test("PT1H", vec![("PT1H", Type::Duration)]);
+        }
+
+        #[test]
+        fn test_p_full() {
+            lexer_test("P1Y1M1DT1H1M1S", vec![("P1Y1M1DT1H1M1S", Type::Duration)]);
+        }
+
+        // Misc
+        //
+        #[test]
+        fn test_separator() {
+            lexer_test("--", vec![("--", Type::Separator)]);
+        }
+
+        #[test]
+        fn test_separator_ws() {
+            lexer_test("  --  ", vec![("--", Type::Separator)]);
+        }
+
+        #[test]
+        fn test_separator_boundaries() {
+            lexer_test(
+                "123--123  ",
+                vec![
+                    ("123", Type::Number),
+                    ("--", Type::Separator),
+                    ("123", Type::Number),
+                ],
+            );
+        }
+
+        // Expression
+        // due:eom-2w
+        // due < eom + 1w + 1d
+        // ( /pattern/ or 8ad2e3db-914d-4832-b0e6-72fa04f6e331,3b6218f9-726a-44fc-aa63-889ff52be442 )
+        //
+        #[test]
+        fn test_expression() {
+            lexer_test(
+                "(1+2)",
+                vec![
+                    ("(", Type::Op),
+                    ("1", Type::Number),
+                    ("+", Type::Op),
+                    ("2", Type::Number),
+                    (")", Type::Op),
+                ],
+            );
+        }
+
+        #[test]
+        fn test_expression_dom_tilde() {
+            lexer_test(
+                "description~pattern",
+                vec![
+                    ("description", Type::DOM),
+                    ("~", Type::Op),
+                    ("pattern", Type::Identifier),
+                ],
+            );
+        }
+
+        #[test]
+        fn test_expression_paren_tag() {
+            lexer_test(
+                "(+tag)",
+                vec![("(", Type::Op), ("+tag", Type::Tag), (")", Type::Op)],
+            );
+        }
+
+        #[test]
+        fn test_expression_paren_name_value() {
+            lexer_test(
+                "(name:value)",
+                vec![("(", Type::Op), ("name:value", Type::Pair), (")", Type::Op)],
+            );
+        }
+    }
+}
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
new file mode 100644
index 000000000..a5464ca53
--- /dev/null
+++ b/src/cli/mod.rs
@@ -0,0 +1 @@
+mod lexer;
diff --git a/src/lib.rs b/src/lib.rs
index 475ea1d6c..c84a35f98 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,6 +5,7 @@
 #[macro_use]
 extern crate failure;
 
+mod cli;
 mod errors;
 mod operation;
 mod replica;
@@ -13,6 +14,7 @@ mod task;
 mod taskdb;
 pub mod taskstorage;
 mod tdb2;
+mod util;
 
 pub use operation::Operation;
 pub use replica::Replica;
diff --git a/src/tdb2/ff4.rs b/src/tdb2/ff4.rs
index fd554ed6e..ed64f1a94 100644
--- a/src/tdb2/ff4.rs
+++ b/src/tdb2/ff4.rs
@@ -1,7 +1,7 @@
 use std::str;
 
-use super::pig::Pig;
 use crate::task::{Task, TaskBuilder};
+use crate::util::pig::Pig;
 use failure::Fallible;
 
 /// Rust implementation of part of utf8_codepoint from Taskwarrior's src/utf8.cpp
diff --git a/src/tdb2/mod.rs b/src/tdb2/mod.rs
index 7b39d986c..009b741e9 100644
--- a/src/tdb2/mod.rs
+++ b/src/tdb2/mod.rs
@@ -2,7 +2,6 @@
 //! support for the data structure as a  compatibility layer.
 
 mod ff4;
-mod pig;
 
 use self::ff4::parse_ff4;
 use crate::task::Task;
diff --git a/src/util/datetime.rs b/src/util/datetime.rs
new file mode 100644
index 000000000..ff7ca0340
--- /dev/null
+++ b/src/util/datetime.rs
@@ -0,0 +1,39 @@
+//! A re-implementation of the "Datetime" parsing utility from the Taskwarrior
+//! source.
+
+// TODO: this module is not yet implemented
+
+pub(crate) struct DateTime {}
+
+impl DateTime {
+    /// Parse a datestamp from a prefix of input and return the number of bytes consumed in the
+    /// input
+    pub(crate) fn parse<S: AsRef<str>>(
+        input: S,
+        format: &'static str,
+    ) -> Option<(DateTime, usize)> {
+        let input = input.as_ref();
+        let mut len = input.len();
+
+        // try parsing the whole string and repeatedly drop suffixes until a match
+        while len > 0 {
+            if let Some(str) = input.get(..len) {
+                match str {
+                    "2015" => return Some((DateTime {}, len)),
+                    "2015-" => return Some((DateTime {}, len)),
+                    "9th" => return Some((DateTime {}, len)),
+                    "10th" => return Some((DateTime {}, len)),
+                    "2015-W01" => return Some((DateTime {}, len)),
+                    "2015-02-17" => return Some((DateTime {}, len)),
+                    "2013-11-29T22:58:00Z" => return Some((DateTime {}, len)),
+                    "315532800" => return Some((DateTime {}, len)),
+                    "20131129T225800Z" => return Some((DateTime {}, len)),
+                    "today" => return Some((DateTime {}, len)),
+                    _ => (),
+                }
+            }
+            len -= 1;
+        }
+        None
+    }
+}
diff --git a/src/util/duration.rs b/src/util/duration.rs
new file mode 100644
index 000000000..e7ec7fb32
--- /dev/null
+++ b/src/util/duration.rs
@@ -0,0 +1,44 @@
+//! A re-implementation of the "Duration" parsing utility from the Taskwarrior
+//! source.
+
+// TODO: this module is not yet implemented
+
+pub(crate) struct Duration {}
+
+impl Duration {
+    /// Parse a duration from a prefix of input and return the number of bytes consumed in the
+    /// input
+    pub(crate) fn parse<S: AsRef<str>>(
+        input: S,
+        format: &'static str,
+    ) -> Option<(Duration, usize)> {
+        let input = input.as_ref();
+        let mut len = input.len();
+
+        // try parsing the whole string and repeatedly drop suffixes until a match
+        while len > 0 {
+            if let Some(str) = input.get(..len) {
+                match str {
+                    "1w" => return Some((Duration {}, len)),
+                    "4w" => return Some((Duration {}, len)),
+                    "4weeks" => return Some((Duration {}, len)),
+                    "5mo" => return Some((Duration {}, len)),
+                    "6 years" => return Some((Duration {}, len)),
+                    "3 days" => return Some((Duration {}, len)),
+                    "1minute" => return Some((Duration {}, len)),
+                    "2hour" => return Some((Duration {}, len)),
+                    "1s" => return Some((Duration {}, len)),
+                    "1second" => return Some((Duration {}, len)),
+                    "PT23H" => return Some((Duration {}, len)),
+                    "PT1H" => return Some((Duration {}, len)),
+                    "P1Y" => return Some((Duration {}, len)),
+                    "P1Y1M1DT1H1M1S" => return Some((Duration {}, len)),
+                    "year" => return Some((Duration {}, len)),
+                    _ => (),
+                }
+            }
+            len -= 1;
+        }
+        None
+    }
+}
diff --git a/src/util/mod.rs b/src/util/mod.rs
new file mode 100644
index 000000000..efbbc7fb5
--- /dev/null
+++ b/src/util/mod.rs
@@ -0,0 +1,3 @@
+pub(crate) mod datetime;
+pub(crate) mod duration;
+pub(crate) mod pig;
diff --git a/src/tdb2/pig.rs b/src/util/pig.rs
similarity index 99%
rename from src/tdb2/pig.rs
rename to src/util/pig.rs
index a3d837629..08a883639 100644
--- a/src/tdb2/pig.rs
+++ b/src/util/pig.rs
@@ -3,7 +3,7 @@
 
 use failure::Fallible;
 
-pub struct Pig<'a> {
+pub(crate) struct Pig<'a> {
     input: &'a [u8],
     cursor: usize,
 }