From 01e589a172881e9079f32c52ceccf8c1c23ee276 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Wed, 16 Jan 2013 18:35:37 -0500 Subject: [PATCH] I18N - One does not simply std::string::substr a UTF8 string, like a bloody caveman. Implemented utf8_substr, and added unit tests. --- ChangeLog | 2 ++ src/Config.cpp | 4 ++- src/commands/CmdCalendar.cpp | 29 +++++++++--------- src/utf8.cpp | 26 ++++++++++++++++ src/utf8.h | 1 + test/.gitignore | 1 + test/CMakeLists.txt | 3 +- test/utf8.t.cpp | 58 ++++++++++++++++++++++++++++++++++++ test/{utf8.t => utf8_tw.t} | 0 9 files changed, 108 insertions(+), 16 deletions(-) create mode 100644 test/utf8.t.cpp rename test/{utf8.t => utf8_tw.t} (100%) diff --git a/ChangeLog b/ChangeLog index 6740e2a5a..15488c852 100644 --- a/ChangeLog +++ b/ChangeLog @@ -81,6 +81,8 @@ Bugs + Fix a bug where 'print.empty.columns=no' resulted in never printing the project column. + Integrated latest UTF8 character width calculations from Markus Kuhn. + + Fixed bug where localized UTF8 strings were not properly substr'd (thanks to + Fidel Mato). ------ old releases ------------------------------ diff --git a/src/Config.cpp b/src/Config.cpp index 1f0eda8a1..93689d3a6 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -113,7 +113,9 @@ std::string Config::_defaults = "dateformat.info=m/d/Y H:N:S # Preferred display date format for information\n" "dateformat.report=m/d/Y # Preferred display date format for reports\n" "dateformat.annotation=m/d/Y # Preferred display date format for annotations\n" - "weekstart=Sunday # Sunday or Monday only\n" + "weekstart=" + STRING_DATE_SUNDAY_LONG + " # Sunday or Monday only\n" "displayweeknumber=yes # Show week numbers on calendar\n" "due=7 # Task is considered due in 7 days\n" "\n" diff --git a/src/commands/CmdCalendar.cpp b/src/commands/CmdCalendar.cpp index e65d0a790..89dc147ac 100644 --- a/src/commands/CmdCalendar.cpp +++ b/src/commands/CmdCalendar.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -429,24 +430,24 @@ std::string CmdCalendar::renderMonths ( if (weekStart == 1) { view.add (Column::factory ("string.right", " ")); - view.add (Column::factory ("string.right", Date::dayName (1).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (2).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (3).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (4).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (5).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (6).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (0).substr (0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (1), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (2), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (3), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (4), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (5), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (6), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (0), 0, 2))); } else { view.add (Column::factory ("string.right", " ")); - view.add (Column::factory ("string.right", Date::dayName (0).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (1).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (2).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (3).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (4).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (5).substr (0, 2))); - view.add (Column::factory ("string.right", Date::dayName (6).substr (0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (0), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (1), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (2), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (3), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (4), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (5), 0, 2))); + view.add (Column::factory ("string.right", utf8_substr (Date::dayName (6), 0, 2))); } } diff --git a/src/utf8.cpp b/src/utf8.cpp index 4ff781045..8c6b8d7f4 100644 --- a/src/utf8.cpp +++ b/src/utf8.cpp @@ -231,3 +231,29 @@ unsigned int utf8_text_length (const std::string& str) } //////////////////////////////////////////////////////////////////////////////// +const std::string utf8_substr ( + const std::string& input, + unsigned int start, + unsigned int length /*=0*/) +{ + // Find the starting index. + std::string::size_type index_start = 0; + for (unsigned int i = 0; i < start; i++) + utf8_next_char (input, index_start); + + std::string result; + if (length) + { + std::string::size_type index_end = index_start; + for (unsigned int i = 0; i < length; i++) + utf8_next_char (input, index_end); + + result = input.substr (index_start, index_end - index_start); + } + else + result = input.substr (index_start); + + return result; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/utf8.h b/src/utf8.h index 4be0734e3..3b67f2ab8 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -37,6 +37,7 @@ std::string utf8_character (unsigned int); int utf8_sequence (unsigned int); unsigned int utf8_length (const std::string&); unsigned int utf8_text_length (const std::string&); +const std::string utf8_substr (const std::string&, unsigned int, unsigned int length = 0); #endif diff --git a/test/.gitignore b/test/.gitignore index b4b29c42e..4195acc81 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -21,6 +21,7 @@ taskmod.t tdb2.t text.t uri.t +utf8.t util.t view.t diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ca38fbbab..3e009d749 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,7 +8,8 @@ include_directories (${CMAKE_SOURCE_DIR} set (test_SRCS autocomplete.t color.t config.t date.t directory.t dom.t duration.t file.t i18n.t json.t list.t nibbler.t path.t rx.t - t.t t2.t taskmod.t tdb2.t text.t uri.t util.t view.t json_test) + t.t t2.t taskmod.t tdb2.t text.t uri.t utf8.t util.t view.t + json_test) message ("-- Configuring run_all") set (TESTBLOB "*.t") diff --git a/test/utf8.t.cpp b/test/utf8.t.cpp new file mode 100644 index 000000000..0fdc1c998 --- /dev/null +++ b/test/utf8.t.cpp @@ -0,0 +1,58 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006-2012, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +int main (int argc, char** argv) +{ + UnitTest t (4); + + std::string ascii_text = "This is a test"; + std::string utf8_text = "más sábado miércoles"; + + // TODO unsigned int utf8_codepoint (const std::string&); + // TODO unsigned int utf8_next_char (const std::string&, std::string::size_type&); + // TODO std::string utf8_character (unsigned int); + // TODO int utf8_sequence (unsigned int); + + // unsigned int utf8_length (const std::string&); + t.is (utf8_length (ascii_text), 14, "ASCII utf8_length"); + t.is (utf8_length (utf8_text), 20, "UTF8 utf8_length"); + + // TODO unsigned int utf8_text_length (const std::string&); + + // const std::string utf8_substr (const std::string&, unsigned int, unsigned int length = 0); + t.is (utf8_substr (ascii_text, 0, 2), "Th", "ASCII utf8_substr"); + t.is (utf8_substr (utf8_text, 0, 2), "má", "UTF8 utf8_substr"); + + return 0; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/test/utf8.t b/test/utf8_tw.t similarity index 100% rename from test/utf8.t rename to test/utf8_tw.t