From 291818c33da9cda1ca22c663bae834607c212734 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Fri, 13 May 2011 18:01:02 -0400 Subject: [PATCH] i18n - Added function utf8_text_length which calculates the length of text in characters, not bytes, and excludes color control codes. --- src/text.cpp | 6 +++--- src/utf8.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/utf8.h | 1 + 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/text.cpp b/src/text.cpp index 8a26426c2..3e4eb1dcc 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -254,7 +254,7 @@ void extractLine (std::string& text, std::string& line, int length) // Special case: no \n, and less than length characters total. // special case: text.find ("\n") == std::string::npos && text.length () < length - if (eol == std::string::npos && utf8_length (text) <= length) + if (eol == std::string::npos && utf8_text_length (text) <= length) { line = text; text = ""; @@ -730,7 +730,7 @@ std::string leftJustify (const int input, const int width) //////////////////////////////////////////////////////////////////////////////// std::string leftJustify (const std::string& input, const int width) { - return input + std::string (width - utf8_length (input), ' '); + return input + std::string (width - utf8_text_length (input), ' '); } //////////////////////////////////////////////////////////////////////////////// @@ -744,7 +744,7 @@ std::string rightJustify (const int input, const int width) //////////////////////////////////////////////////////////////////////////////// std::string rightJustify (const std::string& input, const int width) { - return std::string (width - utf8_length (input), ' ') + input; + return std::string (width - utf8_text_length (input), ' ') + input; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/utf8.cpp b/src/utf8.cpp index ae81c8207..2cd81d60a 100644 --- a/src/utf8.cpp +++ b/src/utf8.cpp @@ -185,3 +185,41 @@ int utf8_length (const std::string& str) } //////////////////////////////////////////////////////////////////////////////// +int utf8_text_length (const std::string& str) +{ + int byteLength = str.length (); + int charLength = byteLength; + const char* data = str.data (); + bool in_color = false; + + // Decrement the number of bytes for each byte that matches 0b10?????? + // this way only the first byte of any utf8 sequence is counted. + for (int i = 0; i < byteLength; i++) + { + if (in_color) + { + if (data[i] == 'm') + in_color = false; + + --charLength; + } + else + { + if (data[i] == 033) + { + in_color = true; + --charLength; + } + else + { + // Extract the first two bits and check whether they are 10 + if ((data[i] & 0xC0) == 0x80) + --charLength; + } + } + } + + return charLength; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/utf8.h b/src/utf8.h index 7f5ac618b..c76b08210 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -34,6 +34,7 @@ unsigned int utf8_next_char (const std::string&, std::string::size_type&); std::string utf8_character (unsigned int); int utf8_sequence (unsigned int); int utf8_length (const std::string&); +int utf8_text_length (const std::string&); #endif