I18N

- Renamed text.cpp/characters to utf8.cpp/utf8_length, which is more appropriate.
2025-06-26 10:54:26 +02:00 · 2011-05-01 11:10:32 -04:00 · 2011-05-01 11:10:32 -04:00 · b0ff1ff55b
commit b0ff1ff55b
parent 0b3281d01d
7 changed files with 40 additions and 38 deletions
--- a/src/Table.cpp
+++ b/src/Table.cpp
@ -52,6 +52,7 @@
 #include "Duration.h"
 #include "Timer.h"
 #include "text.h"
+#include "utf8.h"
 #include "util.h"
 #include "Context.h"

@ -108,7 +109,7 @@ void Table::setTableDashedUnderline ()
 int Table::addColumn (const std::string& col)
 {
  mSpecifiedWidth.push_back (minimum);
-  mMaxDataWidth.push_back (col == "" ? 1 : characters (col));
+  mMaxDataWidth.push_back (col == "" ? 1 : utf8_length (col));
  mCalculatedWidth.push_back (0);
  mColumnPadding.push_back (0);

@ -193,11 +194,11 @@ void Table::addCell (const int row, const int col, const std::string& data)
    std::vector <std::string> lines;
    split (lines, data, "\n");
    for (unsigned int i = 0; i < lines.size (); ++i)
-      if (characters (lines[i]) > length)
-        length = characters (lines[i]);
+      if (utf8_length (lines[i]) > length)
+        length = utf8_length (lines[i]);
  }
  else
-    length = characters (data);
+    length = utf8_length (data);

  // Automatically maintain max width.
  mMaxDataWidth[col] = max (mMaxDataWidth[col], length);
@ -447,7 +448,7 @@ const std::string Table::formatHeader (

  std::string data = mColumns[col];
  Color c = getHeaderUnderline (col);
-  int gap = width - characters (data);
+  int gap = width - utf8_length (data);

  std::string pad = std::string (padding, ' ');

@ -530,7 +531,7 @@ void Table::formatCell (
  for (size_t chunk = 0; chunk < chunks.size (); ++chunk)
  {
    // Place the data within the available space - justify.
-    int gap = width - characters (chunks[chunk]);
+    int gap = width - utf8_length (chunks[chunk]);

    preJust = "";
    postJust = "";
--- a/src/View.cpp
+++ b/src/View.cpp
@ -27,6 +27,7 @@

 #include <View.h>
 #include <text.h>
+#include <utf8.h>
 #include <main.h>

 ////////////////////////////////////////////////////////////////////////////////
@ -96,7 +97,7 @@ std::string View::render (std::vector <Task>& data, std::vector <int>& sequence)
  for (i = _columns.begin (); i != _columns.end (); ++i)
  {
    // Headers factor in to width calculations.
-    int global_min = characters ((*i)->getLabel ());
+    int global_min = utf8_length ((*i)->getLabel ());
    int global_ideal = global_min;

    std::vector <Task>::iterator d;
--- a/src/text.cpp
+++ b/src/text.cpp
@ -36,6 +36,7 @@
 #include "Context.h"
 #include "util.h"
 #include "text.h"
+#include "utf8.h"

 extern Context context;

@ -229,7 +230,7 @@ void extractLine (std::string& text, std::string& line, int length)

  // Special case: no \n, and less than length characters total.
  // special case: text.find ("\n") == std::string::npos && text.length () < length
-  if (eol == std::string::npos && characters (text) <= length)
+  if (eol == std::string::npos && utf8_length (text) <= length)
  {
    line = text;
    text = "";
@ -635,26 +636,6 @@ int strippedLength (const std::string& input)
  return count;
 }

-////////////////////////////////////////////////////////////////////////////////
-// UTF8
-int characters (const std::string& str)
-{
-  int byteLength = str.length ();
-  int charLength = byteLength;
-  const char* data = str.data ();
-
-  // decrement the number of bytes for each byte that matches 0b10??????
-  // this way only the first byte of any utf8 sequence is counted
-  for (int i = 0; i < byteLength; i++)
-  {
-    // extract the two MSB and check whether they are 10
-    if ((data[i] & 0xC0) == 0x80)
-      charLength--;
-  }
-
-  return charLength;
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // Truncates a long line, and include a two-character ellipsis.
 std::string cutOff (const std::string& str, std::string::size_type len)
@ -728,7 +709,7 @@ std::string leftJustify (const int input, const int width)
 ////////////////////////////////////////////////////////////////////////////////
 std::string leftJustify (const std::string& input, const int width)
 {
-  return input + std::string (width - characters (input), ' ');
+  return input + std::string (width - utf8_length (input), ' ');
 }

 ////////////////////////////////////////////////////////////////////////////////
@ -742,7 +723,7 @@ std::string rightJustify (const int input, const int width)
 ////////////////////////////////////////////////////////////////////////////////
 std::string rightJustify (const std::string& input, const int width)
 {
-  return std::string (width - characters (input), ' ') + input;
+  return std::string (width - utf8_length (input), ' ') + input;
 }

 ////////////////////////////////////////////////////////////////////////////////
--- a/src/text.h
+++ b/src/text.h
@ -71,8 +71,5 @@ std::string leftJustify (const std::string&, const int);
 std::string rightJustify (const int, const int);
 std::string rightJustify (const std::string&, const int);

-// UTF-8 aware.
-int characters (const std::string&);
-
 #endif
 ////////////////////////////////////////////////////////////////////////////////
--- a/src/utf8.cpp
+++ b/src/utf8.cpp
@ -166,3 +166,22 @@ int utf8_sequence (unsigned int character)
 }

 ////////////////////////////////////////////////////////////////////////////////
+int utf8_length (const std::string& str)
+{
+  int byteLength = str.length ();
+  int charLength = byteLength;
+  const char* data = str.data ();
+
+  // Decrement the number of bytes for each byte that matches 0b10??????
+  // this way only the first byte of any utf8 sequence is counted.
+  for (int i = 0; i < byteLength; i++)
+  {
+    // Extract the first two bits and check whether they are 10
+    if ((data[i] & 0xC0) == 0x80)
+      charLength--;
+  }
+
+  return charLength;
+}
+
+////////////////////////////////////////////////////////////////////////////////
--- a/src/utf8.h
+++ b/src/utf8.h
@ -33,6 +33,8 @@ unsigned int utf8_codepoint (const std::string&);
 unsigned int utf8_next_char (const std::string&, std::string::size_type&);
 std::string utf8_character (unsigned int);
 int utf8_sequence (unsigned int);
+int utf8_length (const std::string&);
+

 #endif
 ////////////////////////////////////////////////////////////////////////////////
--- a/test/text.t.cpp
+++ b/test/text.t.cpp
@ -27,6 +27,7 @@
 #include <iostream>
 #include "main.h"
 #include "text.h"
+#include "utf8.h"
 #include "test.h"

 Context context;
@ -387,11 +388,6 @@ int main (int argc, char** argv)
  t.is (strippedLength (std::string ("\033[0m")),                           0, "strippedLength ^[[0m                        -> 0");
  t.is (strippedLength (std::string ("\033[1m\033[0m")),                    0, "strippedLength ^[[1m^[[0m                   -> 0");

-  // int characters (const std::string&);
-  t.is (characters ("Çirçös"),            6, "characters (Çirçös) == 6");
-  t.is (characters ("ツネナラム"),        5, "characters (ツネナラム) == 5");
-  t.is (characters ("Zwölf Boxkämpfer"), 16, "characters (Zwölf Boxkämpfer) == 16");
-
  // std::string format (char);
  t.is (format ('A'), "A", "format ('A') -> A");

@ -438,6 +434,11 @@ int main (int argc, char** argv)
  t.is (rightJustify ("foo", 5), "  foo", "rightJustify foo,5 -> '  foo'");
  t.is (rightJustify ("föo", 5), "  föo", "rightJustify föo,5 -> '  föo'");

+  // int utf8_length (const std::string&);
+  t.is (utf8_length ("Çirçös"),            6, "utf8_length (Çirçös) == 6");
+  t.is (utf8_length ("ツネナラム"),        5, "utf8_length (ツネナラム) == 5");
+  t.is (utf8_length ("Zwölf Boxkämpfer"), 16, "utf8_length (Zwölf Boxkämpfer) == 16");
+
  return 0;
 }