From d2629584d593fa6247fefeeed2234f483564a834 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Thu, 14 Jan 2016 22:04:44 -0500 Subject: [PATCH] =?UTF-8?q?Lexer:=20Migrated=20from=20::i=D1=95Digit=20to?= =?UTF-8?q?=20unicodeLatinDigit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Lexer.cpp | 33 ++++++++++++--------------------- src/Lexer.h | 1 - 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 5cb0040c..d0914659 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -84,15 +84,6 @@ const std::string Lexer::typeName (const Lexer::Type& type) return "unknown"; } -//////////////////////////////////////////////////////////////////////////////// -// Digits 0-9. -// -// TODO This list should be derived from the Unicode database. -bool Lexer::isDigit (int c) -{ - return c >= 0x30 && c <= 0x39; -} - //////////////////////////////////////////////////////////////////////////////// // Digits 0-9 a-f A-F. bool Lexer::isHexDigit (int c) @@ -112,19 +103,19 @@ bool Lexer::isNumber (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; - if (isDigit (_text[marker])) + if (unicodeLatinDigit (_text[marker])) { ++marker; - while (isDigit (_text[marker])) + while (unicodeLatinDigit (_text[marker])) utf8_next_char (_text, marker); if (_text[marker] == '.') { ++marker; - if (isDigit (_text[marker])) + if (unicodeLatinDigit (_text[marker])) { ++marker; - while (isDigit (_text[marker])) + while (unicodeLatinDigit (_text[marker])) utf8_next_char (_text, marker); } } @@ -138,19 +129,19 @@ bool Lexer::isNumber (std::string& token, Lexer::Type& type) _text[marker] == '-') ++marker; - if (isDigit (_text[marker])) + if (unicodeLatinDigit (_text[marker])) { ++marker; - while (isDigit (_text[marker])) + while (unicodeLatinDigit (_text[marker])) utf8_next_char (_text, marker); if (_text[marker] == '.') { ++marker; - if (isDigit (_text[marker])) + if (unicodeLatinDigit (_text[marker])) { ++marker; - while (isDigit (_text[marker])) + while (unicodeLatinDigit (_text[marker])) utf8_next_char (_text, marker); } } @@ -180,10 +171,10 @@ bool Lexer::isInteger (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; - if (isDigit (_text[marker])) + if (unicodeLatinDigit (_text[marker])) { ++marker; - while (isDigit (_text[marker])) + while (unicodeLatinDigit (_text[marker])) utf8_next_char (_text, marker); token = _text.substr (_cursor, marker - _cursor); @@ -242,7 +233,7 @@ bool Lexer::isBoundary (int left, int right) // XOR if (unicodeLatinAlpha (left) != unicodeLatinAlpha (right)) return true; - if (isDigit (left) != isDigit (right)) return true; + if (unicodeLatinDigit (left) != unicodeLatinDigit (right)) return true; if (unicodeWhitespace (left) != unicodeWhitespace (right)) return true; // OR @@ -277,7 +268,7 @@ bool Lexer::isPunctuation (int c) c != '#' && c != '$' && c != '_' && - ! isDigit (c) && + ! unicodeLatinDigit (c) && ! unicodeLatinAlpha (c); } diff --git a/src/Lexer.h b/src/Lexer.h index 1868cb3a..48e3cbdb 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -49,7 +49,6 @@ public: // Static helpers. static const std::string typeName (const Lexer::Type&); - static bool isDigit (int); static bool isHexDigit (int); static bool isSingleCharOperator (int); static bool isDoubleCharOperator (int, int, int);