From a86edaa6b2939a5c26215b167e1de2b81064e49d Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Fri, 10 Jul 2015 11:24:57 -0400 Subject: [PATCH] Lexer: Converted ::isPair to use ::readWord - With ::isPair using ::readWord, attribute values may now contain escaped entities such as \t, \uNNNN, and U+NNNN. - Removed distinct handling for , rc and rc. - all generic now. --- src/Lexer.cpp | 60 +++++++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/src/Lexer.cpp b/src/Lexer.cpp index f7a158e9c..cb303f253 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -382,7 +382,6 @@ bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string& return false; */ - if (quotes.find (_text[marker]) != std::string::npos) { int quote = _text[marker]; @@ -728,9 +727,8 @@ bool Lexer::isURL (std::string& token, Lexer::Type& type) //////////////////////////////////////////////////////////////////////////////// // Lexer::Type::pair -// : [ | ] -// = [ | ] -// := [ | ] +// [ | ] +// separator '::' | ':=' | ':' | '=' bool Lexer::isPair (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -739,41 +737,27 @@ bool Lexer::isPair (std::string& token, Lexer::Type& type) Lexer::Type ignoredType; if (isIdentifier (ignoredToken, ignoredType)) { - // Look for rc.name{:=,=,:}value first, because '=' is allowed. - if (ignoredToken == "rc" || - ignoredToken.substr (0, 3) == "rc.") + // Look for a valid separator. + std::string separator = _text.substr (_cursor, 2); + if (separator == "::" || separator == ":=") + _cursor += 2; + else if (separator[0] == ':' || separator[0] == '=') + _cursor++; + else { - if (_eos - _cursor > 1 && - (_text[_cursor] == ':' || - _text[_cursor] == '=')) - { - _cursor++; - - if (isString (ignoredToken, ignoredType, "'\"") || - isContiguous (ignoredToken, ignoredType)) - { - token = _text.substr (marker, _cursor - marker); - type = Lexer::Type::pair; - return true; - } - } + _cursor = marker; + return false; } - if (_eos - _cursor >= 1 && - (_text[_cursor] == ':' || - _text[_cursor] == '=')) + // String, word or nothing are all valid. + if (readWord (_text, "'\"", _cursor, ignoredToken) || + readWord (_text, _cursor, ignoredToken) || + isEOS () || + isWhitespace (_text[_cursor])) { - _cursor++; - - if (isString (ignoredToken, ignoredType, "'\"") || - isContiguous (ignoredToken, ignoredType) || - _eos == _cursor || - _text[_cursor] == ' ') - { - token = _text.substr (marker, _cursor - marker); - type = Lexer::Type::pair; - return true; - } + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::pair; + return true; } } @@ -841,7 +825,7 @@ bool Lexer::isSet (std::string& token, Lexer::Type& type) //////////////////////////////////////////////////////////////////////////////// // Lexer::Type::tag -// ^ | '(' | ')' | +// ^ | '(' | ')' | // [ +|- ] [ ]* bool Lexer::isTag (std::string& token, Lexer::Type& type) { @@ -922,7 +906,7 @@ bool Lexer::isPath (std::string& token, Lexer::Type& type) //////////////////////////////////////////////////////////////////////////////// // Lexer::Type::substitution -// / / / [g] | +// / / / [g] | bool Lexer::isSubstitution (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -955,7 +939,7 @@ bool Lexer::isSubstitution (std::string& token, Lexer::Type& type) //////////////////////////////////////////////////////////////////////////////// // Lexer::Type::pattern -// / / | +// / / | bool Lexer::isPattern (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor;