Lexer, Duration

- Merged libexpr code.
This commit is contained in:
Paul Beckingham 2014-01-02 00:55:53 -05:00
parent 9c5adc432c
commit 9bfe40fac7
5 changed files with 913 additions and 1 deletions

View file

@ -14,15 +14,17 @@ set (task_SRCS A3.cpp A3.h
DOM.cpp DOM.h
Date.cpp Date.h
Directory.cpp Directory.h
OldDuration.cpp OldDuration.h
Duration.cpp Duration.h
E9.cpp E9.h
File.cpp File.h
Hooks.cpp Hooks.h
ISO8601.cpp ISO8601.h
JSON.cpp JSON.h
Lexer.cpp Lexer.h
LRParser.cpp LRParser.h
Msg.cpp Msg.h
Nibbler.cpp Nibbler.h
OldDuration.cpp OldDuration.h
Parser.cpp Parser.h
Path.cpp Path.h
RX.cpp RX.h

167
src/Duration.cpp Normal file
View file

@ -0,0 +1,167 @@
////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2006 - 2014, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <cmake.h>
#include <stdlib.h>
#include <Nibbler.h>
#include <Lexer.h>
#include <Duration.h>
#define DAY 86400
#define HOUR 3600
#define MINUTE 60
#define SECOND 1
static struct
{
std::string unit;
int seconds;
} durations[] =
{
// These are sorted by first character, then length, so that Nibbler::getOneOf
// returns a maximal match.
{"annual", 365 * DAY},
{"biannual", 730 * DAY},
{"bimonthly", 61 * DAY},
{"biweekly", 14 * DAY},
{"biyearly", 730 * DAY},
{"daily", 1 * DAY},
{"days", 1 * DAY},
{"day", 1 * DAY},
{"d", 1 * DAY},
{"fortnight", 14 * DAY},
{"hours", 1 * HOUR},
{"hour", 1 * HOUR},
{"hrs", 1 * HOUR}, // Deprecate
{"hr", 1 * HOUR}, // Deprecate
{"h", 1 * HOUR},
{"minutes", 1 * MINUTE},
{"minute", 1 * MINUTE},
{"mins", 1 * MINUTE}, // Deprecate
{"min", 1 * MINUTE},
{"monthly", 30 * DAY},
{"months", 30 * DAY},
{"month", 30 * DAY},
{"mnths", 30 * DAY}, // Deprecate
{"mths", 30 * DAY}, // Deprecate
{"mth", 30 * DAY}, // Deprecate
{"mos", 30 * DAY}, // Deprecate
{"mo", 30 * DAY},
{"quarterly", 91 * DAY},
{"quarters", 91 * DAY},
{"quarter", 91 * DAY},
{"qrtrs", 91 * DAY}, // Deprecate
{"qtrs", 91 * DAY}, // Deprecate
{"qtr", 91 * DAY}, // Deprecate
{"q", 91 * DAY},
{"semiannual", 183 * DAY},
{"sennight", 14 * DAY},
{"seconds", 1 * SECOND},
{"second", 1 * SECOND},
{"secs", 1 * SECOND}, // Deprecate
{"sec", 1 * SECOND}, // Deprecate
{"s", 1 * SECOND},
{"weekdays", DAY},
{"weekly", 7 * DAY},
{"weeks", 7 * DAY},
{"week", 7 * DAY},
{"wks", 7 * DAY}, // Deprecate
{"wk", 7 * DAY}, // Deprecate
{"w", 7 * DAY},
{"yearly", 365 * DAY},
{"years", 365 * DAY},
{"year", 365 * DAY},
{"yrs", 365 * DAY}, // Deprecate
{"yr", 365 * DAY}, // Deprecate
{"y", 365 * DAY},
};
#define NUM_DURATIONS (sizeof (durations) / sizeof (durations[0]))
////////////////////////////////////////////////////////////////////////////////
Duration::Duration ()
: _secs (0)
{
}
////////////////////////////////////////////////////////////////////////////////
Duration::~Duration ()
{
}
////////////////////////////////////////////////////////////////////////////////
Duration::operator time_t () const
{
return _secs;
}
////////////////////////////////////////////////////////////////////////////////
bool Duration::parse (const std::string& input, std::string::size_type& start)
{
std::string::size_type original_start = start;
Nibbler n (input.substr (start));
std::vector <std::string> units;
for (int i = 0; i < NUM_DURATIONS; i++)
units.push_back (durations[i].unit);
std::string number;
std::string unit;
if ((n.getNumber (number) && n.skipWS () && n.getOneOf (units, unit)) ||
n.getOneOf (units, unit))
{
if (n.depleted () ||
Lexer::is_ws (n.next ()))
{
start = original_start + n.cursor ();
double quantity = (number == "")
? 1.0
: strtod (number.c_str (), NULL);
// Linear lookup - should be logarithmic.
double seconds = 1;
for (int i = 0; i < NUM_DURATIONS; i++)
{
if (durations[i].unit == unit)
{
seconds = durations[i].seconds;
_secs = static_cast <int> (quantity * static_cast <double> (seconds));
return true;
}
}
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
void Duration::clear ()
{
_secs = 0;
}
////////////////////////////////////////////////////////////////////////////////

49
src/Duration.h Normal file
View file

@ -0,0 +1,49 @@
////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2006 - 2014, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_DURATION
#define INCLUDED_DURATION
#include <string>
#include <time.h>
class Duration
{
public:
Duration (); // Default constructor
~Duration (); // Destructor
Duration (const Duration&); // Unimplemented
Duration& operator= (const Duration&); // Unimplemented
operator time_t () const;
bool parse (const std::string&, std::string::size_type&);
void clear ();
protected:
time_t _secs;
};
#endif
////////////////////////////////////////////////////////////////////////////////

600
src/Lexer.cpp Normal file
View file

@ -0,0 +1,600 @@
////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2013 - 2014, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <utf8.h>
#include <ISO8601.h>
#include <Duration.h>
#include <Lexer.h>
////////////////////////////////////////////////////////////////////////////////
Lexer::Lexer (const std::string& input)
: _input (input)
, _i (0)
, _n0 (32)
, _n1 (32)
, _n2 (32)
, _n3 (32)
, _ambiguity (true)
{
// Read 4 chars in preparation. Even if there are < 4. Take a deep breath.
shift ();
shift ();
shift ();
shift ();
}
////////////////////////////////////////////////////////////////////////////////
Lexer::~Lexer ()
{
}
////////////////////////////////////////////////////////////////////////////////
// Walk the input string, looking for transitions.
bool Lexer::token (std::string& token, Type& type)
{
// Start with nothing.
token = "";
// Different types of matching quote: ', ".
int quote = 0;
type = typeNone;
while (_n0)
{
switch (type)
{
case typeNone:
if (is_ws (_n0))
shift ();
else if (_n0 == '"' || _n0 == '\'')
{
type = typeString;
quote = _n0;
shift ();
}
else if (_n0 == '0' &&
_n1 == 'x' &&
is_hex_digit (_n2))
{
type = typeHex;
token += utf8_character (_n0);
shift ();
token += utf8_character (_n0);
shift ();
token += utf8_character (_n0);
shift ();
}
else if (is_dec_digit (_n0))
{
// Speculatively try a date and duration parse. Longest wins.
std::string::size_type iso_i = 0;
std::string iso_token;
ISO8601d iso;
iso.ambiguity (_ambiguity);
if (iso.parse (_input.substr (_i < 4 ? 0 : _i - 4), iso_i))
iso_token = _input.substr ((_i < 4 ? 0 : _i - 4), iso_i);
std::string::size_type dur_i = 0;
std::string dur_token;
Duration dur;
if (dur.parse (_input.substr (_i < 4 ? 0 : _i - 4), dur_i))
dur_token = _input.substr ((_i < 4 ? 0 : _i - 4), dur_i);
if (iso_token.length () > dur_token.length ())
{
while (iso_i--) shift ();
token = iso_token;
type = typeDate;
return true;
}
else if (dur_token.length () > iso_token.length ())
{
while (dur_i--) shift ();
token = dur_token;
type = typeDuration;
return true;
}
type = typeNumber;
token += utf8_character (_n0);
shift ();
}
else if (_n0 == '.' && is_dec_digit (_n1))
{
type = typeDecimal;
token += utf8_character (_n0);
shift ();
}
else if (is_triple_op (_n0, _n1, _n2))
{
type = typeOperator;
token += utf8_character (_n0);
shift ();
token += utf8_character (_n0);
shift ();
token += utf8_character (_n0);
shift ();
return true;
}
else if (is_double_op (_n0, _n1))
{
type = typeOperator;
token += utf8_character (_n0);
shift ();
token += utf8_character (_n0);
shift ();
return true;
}
else if (is_single_op (_n0))
{
type = typeOperator;
token += utf8_character (_n0);
shift ();
return true;
}
else if (_n0 == '\\')
{
type = typeIdentifierEscape;
shift ();
}
else if (is_ident_start (_n0))
{
// Speculatively try a date and duration parse. Longest wins.
std::string::size_type iso_i = 0;
std::string iso_token;
ISO8601p iso;
if (iso.parse (_input.substr (_i < 4 ? 0 : _i - 4), iso_i))
iso_token = _input.substr ((_i < 4 ? 0 : _i - 4), iso_i);
std::string::size_type dur_i = 0;
std::string dur_token;
Duration dur;
if (dur.parse (_input.substr (_i < 4 ? 0 : _i - 4), dur_i))
dur_token = _input.substr ((_i < 4 ? 0 : _i - 4), dur_i);
if (iso_token.length () > dur_token.length ())
{
while (iso_i--) shift ();
token = iso_token;
type = typeDuration;
return true;
}
else if (dur_token.length () > iso_token.length ())
{
while (dur_i--) shift ();
token = dur_token;
type = typeDuration;
return true;
}
type = typeIdentifier;
token += utf8_character (_n0);
shift ();
}
else
throw std::string ("Unexpected error 1");
break;
case typeString:
if (_n0 == quote)
{
shift ();
quote = 0;
return true;
}
else if (_n0 == '\\')
{
type = typeEscape;
shift ();
}
else
{
token += utf8_character (_n0);
shift ();
}
break;
case typeIdentifier:
if (is_ident (_n0))
{
token += utf8_character (_n0);
shift ();
}
else
{
return true;
}
break;
case typeIdentifierEscape:
if (_n0 == 'u')
{
type = typeEscapeUnicode;
shift ();
}
break;
case typeEscape:
if (_n0 == 'x')
{
type = typeEscapeHex;
shift ();
}
else if (_n0 == 'u')
{
type = typeEscapeUnicode;
shift ();
}
else
{
token += decode_escape (_n0);
type = quote ? typeString : typeIdentifier;
shift ();
}
break;
case typeEscapeHex:
if (is_hex_digit (_n0) && is_hex_digit (_n1))
{
token += utf8_character (hex_to_int (_n0, _n1));
type = quote ? typeString : typeIdentifier;
shift ();
shift ();
}
else
{
type = quote ? typeString : typeIdentifier;
shift ();
quote = 0;
return true;
}
break;
case typeEscapeUnicode:
if (is_hex_digit (_n0) &&
is_hex_digit (_n1) &&
is_hex_digit (_n2) &&
is_hex_digit (_n3))
{
token += utf8_character (hex_to_int (_n0, _n1, _n2, _n3));
shift ();
shift ();
shift ();
shift ();
type = quote ? typeString : typeIdentifier;
}
else if (_n0 == quote)
{
type = typeString;
shift ();
quote = 0;
return true;
}
case typeNumber:
if (is_dec_digit (_n0))
{
token += utf8_character (_n0);
shift ();
}
else if (_n0 == '.')
{
type = typeDecimal;
token += utf8_character (_n0);
shift ();
}
else if (_n0 == 'e' || _n0 == 'E')
{
type = typeExponentIndicator;
token += utf8_character (_n0);
shift ();
}
else
{
return true;
}
break;
case typeDecimal:
if (is_dec_digit (_n0))
{
token += utf8_character (_n0);
shift ();
}
else if (_n0 == 'e' || _n0 == 'E')
{
type = typeExponentIndicator;
token += utf8_character (_n0);
shift ();
}
else
{
return true;
}
break;
case typeExponentIndicator:
if (_n0 == '+' || _n0 == '-')
{
token += utf8_character (_n0);
shift ();
}
else if (is_dec_digit (_n0))
{
type = typeExponent;
token += utf8_character (_n0);
shift ();
}
break;
case typeExponent:
if (is_dec_digit (_n0))
{
token += utf8_character (_n0);
shift ();
}
else if (_n0 == '.')
{
token += utf8_character (_n0);
shift ();
}
else
{
type = typeDecimal;
return true;
}
break;
case typeHex:
if (is_hex_digit (_n0))
{
token += utf8_character (_n0);
shift ();
}
else
{
return true;
}
break;
default:
throw std::string ("Unexpected error 2");
break;
}
// Fence post.
if (!_n0 && token != "")
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
void Lexer::ambiguity (bool value)
{
_ambiguity = value;
}
////////////////////////////////////////////////////////////////////////////////
const std::string Lexer::type_name (const Type& type)
{
switch (type)
{
case Lexer::typeNone: return "None";
case Lexer::typeString: return "String";
case Lexer::typeIdentifier: return "Identifier";
case Lexer::typeIdentifierEscape: return "IdentifierEscape";
case Lexer::typeNumber: return "Number";
case Lexer::typeDecimal: return "Decimal";
case Lexer::typeExponentIndicator: return "ExponentIndicator";
case Lexer::typeExponent: return "Exponent";
case Lexer::typeHex: return "Hex";
case Lexer::typeOperator: return "Operator";
case Lexer::typeEscape: return "Escape";
case Lexer::typeEscapeHex: return "EscapeHex";
case Lexer::typeEscapeUnicode: return "EscapeUnicode";
case Lexer::typeDate: return "Date";
case Lexer::typeDuration: return "Duration";
}
}
////////////////////////////////////////////////////////////////////////////////
// Complete Unicode whitespace list.
//
// http://en.wikipedia.org/wiki/Whitespace_character
// Updated 2013-11-18
bool Lexer::is_ws (int c)
{
return (c == 0x0020 || // space Common Separator, space
c == 0x0009 || // Common Other, control HT, Horizontal Tab
c == 0x000A || // Common Other, control LF, Line feed
c == 0x000B || // Common Other, control VT, Vertical Tab
c == 0x000C || // Common Other, control FF, Form feed
c == 0x000D || // Common Other, control CR, Carriage return
c == 0x0085 || // Common Other, control NEL, Next line
c == 0x00A0 || // no-break space Common Separator, space
c == 0x1680 || // ogham space mark Ogham Separator, space
c == 0x180E || // mongolian vowel separator Mongolian Separator, space
c == 0x2000 || // en quad Common Separator, space
c == 0x2001 || // em quad Common Separator, space
c == 0x2002 || // en space Common Separator, space
c == 0x2003 || // em space Common Separator, space
c == 0x2004 || // three-per-em space Common Separator, space
c == 0x2005 || // four-per-em space Common Separator, space
c == 0x2006 || // six-per-em space Common Separator, space
c == 0x2007 || // figure space Common Separator, space
c == 0x2008 || // punctuation space Common Separator, space
c == 0x2009 || // thin space Common Separator, space
c == 0x200A || // hair space Common Separator, space
c == 0x2028 || // line separator Common Separator, line
c == 0x2029 || // paragraph separator Common Separator, paragraph
c == 0x202F || // narrow no-break space Common Separator, space
c == 0x205F || // medium mathematical space Common Separator, space
c == 0x3000); // ideographic space Common Separator, space
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_punct (int c) const
{
if (c == ',' ||
c == '.') // Tab
return true;
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_num (int c) const
{
if ((c >= '0' && c <= '9') ||
c == '.')
return true;
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_ident_start (int c) const
{
return c && // Include null character check.
! is_ws (c) &&
! is_dec_digit (c);
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_ident (int c) const
{
return c && // Include null character check.
! is_ws (c) &&
! is_single_op (c);
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_triple_op (int c0, int c1, int c2) const
{
return (c0 == 'a' && c1 == 'n' && c2 == 'd') ||
(c0 == 'x' && c1 == 'o' && c2 == 'r');
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_double_op (int c0, int c1) const
{
return (c0 == '=' && c1 == '=') ||
(c0 == '!' && c1 == '=') ||
(c0 == '<' && c1 == '=') ||
(c0 == '>' && c1 == '=') ||
(c0 == 'o' && c1 == 'r') ||
(c0 == '|' && c1 == '|') ||
(c0 == '&' && c1 == '&') ||
(c0 == '!' && c1 == '~');
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_single_op (int c) const
{
return c == '+' ||
c == '-' ||
c == '*' ||
c == '/' ||
c == '(' ||
c == ')' ||
c == '<' ||
c == '>' ||
c == '^' ||
c == '!' ||
c == '%' ||
c == '=' ||
c == '~';
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_dec_digit (int c) const
{
return c >= '0' && c <= '9';
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_hex_digit (int c) const
{
return (c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F');
}
////////////////////////////////////////////////////////////////////////////////
int Lexer::decode_escape (int c) const
{
switch (c)
{
case 'b': return 0x08;
case 'f': return 0x0C;
case 'n': return 0x0A;
case 'r': return 0x0D;
case 't': return 0x09;
case 'v': return 0x0B;
case '\'': return 0x27;
case '"': return 0x22;
case '\\': return 0x5C;
default: return c;
}
}
////////////////////////////////////////////////////////////////////////////////
int Lexer::hex_to_int (int c) const
{
if (c >= '0' && c <= '9') return (c - '0');
else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
else return (c - 'A' + 10);
}
////////////////////////////////////////////////////////////////////////////////
int Lexer::hex_to_int (int c0, int c1) const
{
return (hex_to_int (c0) << 4) + hex_to_int (c1);
}
////////////////////////////////////////////////////////////////////////////////
int Lexer::hex_to_int (int c0, int c1, int c2, int c3) const
{
return (hex_to_int (c0) << 12) +
(hex_to_int (c1) << 8) +
(hex_to_int (c2) << 4) +
hex_to_int (c3);
}
////////////////////////////////////////////////////////////////////////////////
void Lexer::shift ()
{
_n0 = _n1;
_n1 = _n2;
_n2 = _n3;
_n3 = utf8_next_char (_input, _i);
//std::cout << "# shift [" << (char) _n0 << (char) _n1 << (char) _n2 << (char) _n3 << "]\n";
}
////////////////////////////////////////////////////////////////////////////////

94
src/Lexer.h Normal file
View file

@ -0,0 +1,94 @@
////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2013 - 2014, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_LEXER
#define INCLUDED_LEXER
#include <vector>
#include <string>
class Lexer
{
public:
enum Type
{
typeNone = 0,
typeString,
typeIdentifier,
typeIdentifierEscape, // Intermediate
typeEscape, // Intermediate
typeEscapeHex, // Intermediate
typeEscapeUnicode, // Intermediate
typeNumber,
typeDecimal,
typeExponentIndicator, // Intermediate
typeExponent, // Intermediate
typeHex,
typeOperator,
typeDate,
typeDuration,
};
Lexer (const std::string&);
virtual ~Lexer ();
Lexer (const Lexer&); // Not implemented.
Lexer& operator= (const Lexer&); // Not implemented.
bool operator== (const Lexer&); // Not implemented.
bool token (std::string&, Type&);
void ambiguity (bool);
static const std::string type_name (const Type&);
static bool is_ws (int);
private:
bool is_punct (int) const;
bool is_num (int) const;
bool is_ident_start (int) const;
bool is_ident (int) const;
bool is_triple_op (int, int, int) const;
bool is_double_op (int, int) const;
bool is_single_op (int) const;
bool is_dec_digit (int) const;
bool is_hex_digit (int) const;
int decode_escape (int) const;
int hex_to_int (int) const;
int hex_to_int (int, int) const;
int hex_to_int (int, int, int, int) const;
void shift ();
private:
const std::string _input;
std::string::size_type _i;
int _n0;
int _n1;
int _n2;
int _n3;
bool _ambiguity;
};
#endif
////////////////////////////////////////////////////////////////////////////////