Lexer: Converted ::isPair to use ::readWord

- With ::isPair using ::readWord, attribute values may now contain escaped
  entities such as \t, \uNNNN, and U+NNNN.
- Removed distinct handling for <name><sep><value>, rc<sep><value> and
  rc.<name><sep><value> - all generic now.
This commit is contained in:
Paul Beckingham 2015-07-10 11:24:57 -04:00
parent 45aaa530ac
commit a86edaa6b2

View file

@ -382,7 +382,6 @@ bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string&
return false; return false;
*/ */
if (quotes.find (_text[marker]) != std::string::npos) if (quotes.find (_text[marker]) != std::string::npos)
{ {
int quote = _text[marker]; int quote = _text[marker];
@ -728,9 +727,8 @@ bool Lexer::isURL (std::string& token, Lexer::Type& type)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::pair // Lexer::Type::pair
// <identifier> : [ <string> | <word> ] // <identifier> <separator> [ <string> | <word> ]
// <identifier> = [ <string> | <word> ] // separator '::' | ':=' | ':' | '='
// <identifier> := [ <string> | <word> ]
bool Lexer::isPair (std::string& token, Lexer::Type& type) bool Lexer::isPair (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -739,43 +737,29 @@ bool Lexer::isPair (std::string& token, Lexer::Type& type)
Lexer::Type ignoredType; Lexer::Type ignoredType;
if (isIdentifier (ignoredToken, ignoredType)) if (isIdentifier (ignoredToken, ignoredType))
{ {
// Look for rc.name{:=,=,:}value first, because '=' is allowed. // Look for a valid separator.
if (ignoredToken == "rc" || std::string separator = _text.substr (_cursor, 2);
ignoredToken.substr (0, 3) == "rc.") if (separator == "::" || separator == ":=")
{ _cursor += 2;
if (_eos - _cursor > 1 && else if (separator[0] == ':' || separator[0] == '=')
(_text[_cursor] == ':' ||
_text[_cursor] == '='))
{
_cursor++; _cursor++;
else
{
_cursor = marker;
return false;
}
if (isString (ignoredToken, ignoredType, "'\"") || // String, word or nothing are all valid.
isContiguous (ignoredToken, ignoredType)) if (readWord (_text, "'\"", _cursor, ignoredToken) ||
readWord (_text, _cursor, ignoredToken) ||
isEOS () ||
isWhitespace (_text[_cursor]))
{ {
token = _text.substr (marker, _cursor - marker); token = _text.substr (marker, _cursor - marker);
type = Lexer::Type::pair; type = Lexer::Type::pair;
return true; return true;
} }
} }
}
if (_eos - _cursor >= 1 &&
(_text[_cursor] == ':' ||
_text[_cursor] == '='))
{
_cursor++;
if (isString (ignoredToken, ignoredType, "'\"") ||
isContiguous (ignoredToken, ignoredType) ||
_eos == _cursor ||
_text[_cursor] == ' ')
{
token = _text.substr (marker, _cursor - marker);
type = Lexer::Type::pair;
return true;
}
}
}
_cursor = marker; _cursor = marker;
return false; return false;
@ -841,7 +825,7 @@ bool Lexer::isSet (std::string& token, Lexer::Type& type)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::tag // Lexer::Type::tag
// ^ | '(' | ')' | <isWhiteSpace> // ^ | '(' | ')' | <isWhitespace>
// [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]* // [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]*
bool Lexer::isTag (std::string& token, Lexer::Type& type) bool Lexer::isTag (std::string& token, Lexer::Type& type)
{ {
@ -922,7 +906,7 @@ bool Lexer::isPath (std::string& token, Lexer::Type& type)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::substitution // Lexer::Type::substitution
// / <unquoted-string> / <unquoted-string> / [g] <EOS> | <isWhiteSpace> // / <unquoted-string> / <unquoted-string> / [g] <EOS> | <isWhitespace>
bool Lexer::isSubstitution (std::string& token, Lexer::Type& type) bool Lexer::isSubstitution (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -955,7 +939,7 @@ bool Lexer::isSubstitution (std::string& token, Lexer::Type& type)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::pattern // Lexer::Type::pattern
// / <unquoted-string> / <EOS> | <isWhiteSpace> // / <unquoted-string> / <EOS> | <isWhitespace>
bool Lexer::isPattern (std::string& token, Lexer::Type& type) bool Lexer::isPattern (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;