diff --git a/src/common/unicode.cpp b/src/common/unicode.cpp index 5ee37122..3d107f46 100644 --- a/src/common/unicode.cpp +++ b/src/common/unicode.cpp @@ -35,7 +35,7 @@ // Static // // TODO This list should be derived from the Unicode database. -bool unicodeWhitespace (int c) +bool unicodeWhitespace (unsigned int c) { return (c == 0x0020 || // space Common Separator, space c == 0x0009 || // Common Other, control HT, Horizontal Tab diff --git a/test/.gitignore b/test/.gitignore index 0b562dd4..0b2c1586 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -10,4 +10,5 @@ rules.t rx.t table.t text.t +unicode.t utf8.t diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 575b2fb2..cd63896e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -9,7 +9,7 @@ include_directories (${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/test ${TIMEW_INCLUDE_DIRS}) -set (test_SRCS color.t fs.t grammar.t lexer.t list.t lr0.t pig.t rules.t rx.t table.t text.t utf8.t) +set (test_SRCS color.t fs.t grammar.t lexer.t list.t lr0.t pig.t rules.t rx.t table.t text.t unicode.t utf8.t) add_custom_target (test ./run_all --verbose DEPENDS ${test_SRCS} timew_executable diff --git a/test/unicode.t.cpp b/test/unicode.t.cpp new file mode 100644 index 00000000..4b9b675d --- /dev/null +++ b/test/unicode.t.cpp @@ -0,0 +1,68 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright 2013 - 2016, Göteborg Bit Factory. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +int main (int, char**) +{ + UnitTest t (27); + + // White space detection. + t.notok (unicodeWhitespace (0x0041), "U+0041 (A) ! unicodeWhitespace"); + t.ok (unicodeWhitespace (0x0020), "U+0020 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x0009), "U+0009 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x000A), "U+000A unicodeWhitespace"); + t.ok (unicodeWhitespace (0x000B), "U+000B unicodeWhitespace"); + t.ok (unicodeWhitespace (0x000C), "U+000C unicodeWhitespace"); + t.ok (unicodeWhitespace (0x000D), "U+000D unicodeWhitespace"); + t.ok (unicodeWhitespace (0x0085), "U+0085 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x00A0), "U+00A0 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x1680), "U+1680 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x180E), "U+180E unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2000), "U+2000 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2001), "U+2001 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2002), "U+2002 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2003), "U+2003 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2004), "U+2004 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2005), "U+2005 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2006), "U+2006 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2007), "U+2007 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2008), "U+2008 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2009), "U+2009 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x200A), "U+200A unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2028), "U+2028 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x2029), "U+2029 unicodeWhitespace"); + t.ok (unicodeWhitespace (0x202F), "U+202F unicodeWhitespace"); + t.ok (unicodeWhitespace (0x205F), "U+205F unicodeWhitespace"); + t.ok (unicodeWhitespace (0x3000), "U+3000 unicodeWhitespace"); + + return 0; +} + +////////////////////////////////////////////////////////////////////////////////