From d9bcbdee0ad6092bae20ed699539a53b53191e5c Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Mon, 22 Jun 2015 21:34:57 -0400 Subject: [PATCH] Lexer: Added ::isContiguous for word-like matching --- src/Lexer.cpp | 36 +++++++++++++++++++++++++++++------- src/Lexer.h | 1 + 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 2fceb04a4..b1ea50f5d 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -746,9 +746,9 @@ bool Lexer::isPair (std::string& token, Lexer::Type& type) { _cursor++; - if (isString (ignoredToken, ignoredType, '\'') || - isString (ignoredToken, ignoredType, '"') || - isWord (ignoredToken, ignoredType)) + if (isString (ignoredToken, ignoredType, '\'') || + isString (ignoredToken, ignoredType, '"') || + isContiguous (ignoredToken, ignoredType)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::pair; @@ -763,10 +763,10 @@ bool Lexer::isPair (std::string& token, Lexer::Type& type) { _cursor++; - if (isString (ignoredToken, ignoredType, '\'') || - isString (ignoredToken, ignoredType, '"') || - isWord (ignoredToken, ignoredType) || - _eos == _cursor || + if (isString (ignoredToken, ignoredType, '\'') || + isString (ignoredToken, ignoredType, '"') || + isContiguous (ignoredToken, ignoredType) || + _eos == _cursor || _text[_cursor] == ' ') { token = _text.substr (marker, _cursor - marker); @@ -1161,6 +1161,28 @@ bool Lexer::isWord (std::string& token, Lexer::Type& type) return false; } +//////////////////////////////////////////////////////////////////////////////// +// Lexer::Type::word +// [^\s]+ +bool Lexer::isContiguous (std::string& token, Lexer::Type& type) +{ + std::size_t marker = _cursor; + + while (_text[marker] && + ! isWhitespace (_text[marker])) + utf8_next_char (_text, marker); + + if (marker > _cursor) + { + token = _text.substr (_cursor, marker - _cursor); + type = Lexer::Type::word; + _cursor = marker; + return true; + } + + return false; +} + //////////////////////////////////////////////////////////////////////////////// // Static std::string Lexer::typeToString (Lexer::Type type) diff --git a/src/Lexer.h b/src/Lexer.h index 139ea3ff9..2bacdd727 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -102,6 +102,7 @@ public: bool isDOM (std::string&, Lexer::Type&); bool isIdentifier (std::string&, Lexer::Type&); bool isWord (std::string&, Lexer::Type&); + bool isContiguous (std::string&, Lexer::Type&); private: std::string _text;