From f5571c80c68fefd9cbbe74958ee98a4189b1af68 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Sun, 26 Jul 2015 12:22:35 -0400 Subject: [PATCH] Lexer: Rewrote ::isDOM to perform exact matching, not pattern matching --- src/Lexer.cpp | 163 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 131 insertions(+), 32 deletions(-) diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 1463e2909..1ac8625fd 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -994,61 +994,160 @@ bool Lexer::isOperator (std::string& token, Lexer::Type& type) //////////////////////////////////////////////////////////////////////////////// // Lexer::Type::dom // [ | + . ] [ . ]* +// +// Configuration: +// rc. +// +// System: +// context.program +// context.args +// context.width +// context.height +// system.version +// system.os +// +// Relative or absolute attribute: +// +// . +// . +// +// Single tag: +// tags. +// +// Date type: +// .year +// .month +// .day +// .week +// .weekday +// .julian +// .hour +// .minute +// .second +// +// Annotations (entry is a date): +// annotations..entry +// annotations..description +// bool Lexer::isDOM (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; + if (isOneOf ({"context.program", + "context.args", + "context.width", + "context.height", + "system.version", + "system.os"}, true)) + { + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::dom; + return true; + } + + // Optional: + // . + // . std::string extractedToken; Lexer::Type extractedType; - if (isUUID (extractedToken, extractedType)) + if (isUUID (extractedToken, extractedType, false) || + isInteger (extractedToken, extractedType)) { - if (_text[_cursor] == '.') - ++_cursor; - else + if (! isLiteral (".", false)) { _cursor = marker; return false; } } - else - { - if (isDigit (_text[_cursor])) - { - ++_cursor; - while (isDigit (_text[_cursor])) - ++_cursor; - if (_text[_cursor] == '.') - ++_cursor; - else + // Any failure after this line should rollback to the checkpoint. + std::size_t checkpoint = _cursor; + + // [prefix]tags. + std::string partialToken; + Lexer::Type partialType; + if (isLiteral ("tags.", false) && + isWord (partialToken, partialType)) + { + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::dom; + return true; + } + else + _cursor = checkpoint; + + // [prefix]attribute + if (isOneOf (attributes, true)) + { + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::dom; + return true; + } + else + _cursor = checkpoint; + + // [prefix]attribute + if (isOneOf (attributes, false)) + { + if (isLiteral (".", false)) + { + std::string attribute = _text.substr (checkpoint, _cursor - checkpoint - 1); + + // if attribute type is 'date' + if (attributes[attribute] == "date" && + isOneOf ({"year", "month", "day", + "week", "weekday", + "julian", + "hour", "minute", "second"}, true)) { - _cursor = marker; - return false; + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::dom; + return true; } } + else + { + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::dom; + return true; + } } + else + _cursor = checkpoint; - if (! isOperator (extractedToken, extractedType) && - isIdentifier (extractedToken, extractedType)) + // [prefix]annotations. + if (isLiteral ("annotations.", false)) { - while (1) + std::string extractedToken; + Lexer::Type extractedType; + if (isInteger (extractedToken, extractedType)) { - if (_text[_cursor] == '.') - ++_cursor; - else - break; - - if (isOperator (extractedToken, extractedType) || - ! isIdentifier (extractedToken, extractedType)) + if (isLiteral (".", false)) { - _cursor = marker; - return false; + if (isLiteral ("description", true)) + { + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::dom; + return true; + } + else if (isLiteral ("entry", true)) + { + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::dom; + return true; + } + else if (isLiteral ("entry.", false) && + isOneOf ({"year", "month", "day", + "week", "weekday", + "julian", + "hour", "minute", "second"}, true)) + { + token = _text.substr (marker, _cursor - marker); + type = Lexer::Type::dom; + return true; + } } } - - type = Lexer::Type::dom; - token = _text.substr (marker, _cursor - marker); - return true; } _cursor = marker;