From b0ff1ff55b7217105261b110184b8177a5230418 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Sun, 1 May 2011 11:10:32 -0400 Subject: [PATCH] I18N - Renamed text.cpp/characters to utf8.cpp/utf8_length, which is more appropriate. --- src/Table.cpp | 13 +++++++------ src/View.cpp | 3 ++- src/text.cpp | 27 ++++----------------------- src/text.h | 3 --- src/utf8.cpp | 19 +++++++++++++++++++ src/utf8.h | 2 ++ test/text.t.cpp | 11 ++++++----- 7 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/Table.cpp b/src/Table.cpp index e630fc098..19fde779c 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -52,6 +52,7 @@ #include "Duration.h" #include "Timer.h" #include "text.h" +#include "utf8.h" #include "util.h" #include "Context.h" @@ -108,7 +109,7 @@ void Table::setTableDashedUnderline () int Table::addColumn (const std::string& col) { mSpecifiedWidth.push_back (minimum); - mMaxDataWidth.push_back (col == "" ? 1 : characters (col)); + mMaxDataWidth.push_back (col == "" ? 1 : utf8_length (col)); mCalculatedWidth.push_back (0); mColumnPadding.push_back (0); @@ -193,11 +194,11 @@ void Table::addCell (const int row, const int col, const std::string& data) std::vector lines; split (lines, data, "\n"); for (unsigned int i = 0; i < lines.size (); ++i) - if (characters (lines[i]) > length) - length = characters (lines[i]); + if (utf8_length (lines[i]) > length) + length = utf8_length (lines[i]); } else - length = characters (data); + length = utf8_length (data); // Automatically maintain max width. mMaxDataWidth[col] = max (mMaxDataWidth[col], length); @@ -447,7 +448,7 @@ const std::string Table::formatHeader ( std::string data = mColumns[col]; Color c = getHeaderUnderline (col); - int gap = width - characters (data); + int gap = width - utf8_length (data); std::string pad = std::string (padding, ' '); @@ -530,7 +531,7 @@ void Table::formatCell ( for (size_t chunk = 0; chunk < chunks.size (); ++chunk) { // Place the data within the available space - justify. - int gap = width - characters (chunks[chunk]); + int gap = width - utf8_length (chunks[chunk]); preJust = ""; postJust = ""; diff --git a/src/View.cpp b/src/View.cpp index 2a4bf7783..35f84e1d6 100644 --- a/src/View.cpp +++ b/src/View.cpp @@ -27,6 +27,7 @@ #include #include +#include #include //////////////////////////////////////////////////////////////////////////////// @@ -96,7 +97,7 @@ std::string View::render (std::vector & data, std::vector & sequence) for (i = _columns.begin (); i != _columns.end (); ++i) { // Headers factor in to width calculations. - int global_min = characters ((*i)->getLabel ()); + int global_min = utf8_length ((*i)->getLabel ()); int global_ideal = global_min; std::vector ::iterator d; diff --git a/src/text.cpp b/src/text.cpp index badea65e4..74566671e 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -36,6 +36,7 @@ #include "Context.h" #include "util.h" #include "text.h" +#include "utf8.h" extern Context context; @@ -229,7 +230,7 @@ void extractLine (std::string& text, std::string& line, int length) // Special case: no \n, and less than length characters total. // special case: text.find ("\n") == std::string::npos && text.length () < length - if (eol == std::string::npos && characters (text) <= length) + if (eol == std::string::npos && utf8_length (text) <= length) { line = text; text = ""; @@ -635,26 +636,6 @@ int strippedLength (const std::string& input) return count; } -//////////////////////////////////////////////////////////////////////////////// -// UTF8 -int characters (const std::string& str) -{ - int byteLength = str.length (); - int charLength = byteLength; - const char* data = str.data (); - - // decrement the number of bytes for each byte that matches 0b10?????? - // this way only the first byte of any utf8 sequence is counted - for (int i = 0; i < byteLength; i++) - { - // extract the two MSB and check whether they are 10 - if ((data[i] & 0xC0) == 0x80) - charLength--; - } - - return charLength; -} - //////////////////////////////////////////////////////////////////////////////// // Truncates a long line, and include a two-character ellipsis. std::string cutOff (const std::string& str, std::string::size_type len) @@ -728,7 +709,7 @@ std::string leftJustify (const int input, const int width) //////////////////////////////////////////////////////////////////////////////// std::string leftJustify (const std::string& input, const int width) { - return input + std::string (width - characters (input), ' '); + return input + std::string (width - utf8_length (input), ' '); } //////////////////////////////////////////////////////////////////////////////// @@ -742,7 +723,7 @@ std::string rightJustify (const int input, const int width) //////////////////////////////////////////////////////////////////////////////// std::string rightJustify (const std::string& input, const int width) { - return std::string (width - characters (input), ' ') + input; + return std::string (width - utf8_length (input), ' ') + input; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/text.h b/src/text.h index 882a8b2e8..669e80464 100644 --- a/src/text.h +++ b/src/text.h @@ -71,8 +71,5 @@ std::string leftJustify (const std::string&, const int); std::string rightJustify (const int, const int); std::string rightJustify (const std::string&, const int); -// UTF-8 aware. -int characters (const std::string&); - #endif //////////////////////////////////////////////////////////////////////////////// diff --git a/src/utf8.cpp b/src/utf8.cpp index 0e3bee35f..ae81c8207 100644 --- a/src/utf8.cpp +++ b/src/utf8.cpp @@ -166,3 +166,22 @@ int utf8_sequence (unsigned int character) } //////////////////////////////////////////////////////////////////////////////// +int utf8_length (const std::string& str) +{ + int byteLength = str.length (); + int charLength = byteLength; + const char* data = str.data (); + + // Decrement the number of bytes for each byte that matches 0b10?????? + // this way only the first byte of any utf8 sequence is counted. + for (int i = 0; i < byteLength; i++) + { + // Extract the first two bits and check whether they are 10 + if ((data[i] & 0xC0) == 0x80) + charLength--; + } + + return charLength; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/utf8.h b/src/utf8.h index 09aa49561..7f5ac618b 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -33,6 +33,8 @@ unsigned int utf8_codepoint (const std::string&); unsigned int utf8_next_char (const std::string&, std::string::size_type&); std::string utf8_character (unsigned int); int utf8_sequence (unsigned int); +int utf8_length (const std::string&); + #endif //////////////////////////////////////////////////////////////////////////////// diff --git a/test/text.t.cpp b/test/text.t.cpp index 23b7bea1d..3361c1267 100644 --- a/test/text.t.cpp +++ b/test/text.t.cpp @@ -27,6 +27,7 @@ #include #include "main.h" #include "text.h" +#include "utf8.h" #include "test.h" Context context; @@ -387,11 +388,6 @@ int main (int argc, char** argv) t.is (strippedLength (std::string ("\033[0m")), 0, "strippedLength ^[[0m -> 0"); t.is (strippedLength (std::string ("\033[1m\033[0m")), 0, "strippedLength ^[[1m^[[0m -> 0"); - // int characters (const std::string&); - t.is (characters ("Çirçös"), 6, "characters (Çirçös) == 6"); - t.is (characters ("ツネナラム"), 5, "characters (ツネナラム) == 5"); - t.is (characters ("Zwölf Boxkämpfer"), 16, "characters (Zwölf Boxkämpfer) == 16"); - // std::string format (char); t.is (format ('A'), "A", "format ('A') -> A"); @@ -438,6 +434,11 @@ int main (int argc, char** argv) t.is (rightJustify ("foo", 5), " foo", "rightJustify foo,5 -> ' foo'"); t.is (rightJustify ("föo", 5), " föo", "rightJustify föo,5 -> ' föo'"); + // int utf8_length (const std::string&); + t.is (utf8_length ("Çirçös"), 6, "utf8_length (Çirçös) == 6"); + t.is (utf8_length ("ツネナラム"), 5, "utf8_length (ツネナラム) == 5"); + t.is (utf8_length ("Zwölf Boxkämpfer"), 16, "utf8_length (Zwölf Boxkämpfer) == 16"); + return 0; }