From b0ff1ff55b7217105261b110184b8177a5230418 Mon Sep 17 00:00:00 2001
From: Paul Beckingham <paul@beckingham.net>
Date: Sun, 1 May 2011 11:10:32 -0400
Subject: [PATCH] I18N

- Renamed text.cpp/characters to utf8.cpp/utf8_length, which is more
  appropriate.
---
 src/Table.cpp   | 13 +++++++------
 src/View.cpp    |  3 ++-
 src/text.cpp    | 27 ++++-----------------------
 src/text.h      |  3 ---
 src/utf8.cpp    | 19 +++++++++++++++++++
 src/utf8.h      |  2 ++
 test/text.t.cpp | 11 ++++++-----
 7 files changed, 40 insertions(+), 38 deletions(-)
diff --git a/src/Table.cpp b/src/Table.cpp
index e630fc098..19fde779c 100644
--- a/src/Table.cpp
+++ b/src/Table.cpp
@@ -52,6 +52,7 @@
 #include "Duration.h"
 #include "Timer.h"
 #include "text.h"
+#include "utf8.h"
 #include "util.h"
 #include "Context.h"
 
@@ -108,7 +109,7 @@ void Table::setTableDashedUnderline ()
 int Table::addColumn (const std::string& col)
 {
   mSpecifiedWidth.push_back (minimum);
-  mMaxDataWidth.push_back (col == "" ? 1 : characters (col));
+  mMaxDataWidth.push_back (col == "" ? 1 : utf8_length (col));
   mCalculatedWidth.push_back (0);
   mColumnPadding.push_back (0);
 
@@ -193,11 +194,11 @@ void Table::addCell (const int row, const int col, const std::string& data)
     std::vector <std::string> lines;
     split (lines, data, "\n");
     for (unsigned int i = 0; i < lines.size (); ++i)
-      if (characters (lines[i]) > length)
-        length = characters (lines[i]);
+      if (utf8_length (lines[i]) > length)
+        length = utf8_length (lines[i]);
   }
   else
-    length = characters (data);
+    length = utf8_length (data);
 
   // Automatically maintain max width.
   mMaxDataWidth[col] = max (mMaxDataWidth[col], length);
@@ -447,7 +448,7 @@ const std::string Table::formatHeader (
 
   std::string data = mColumns[col];
   Color c = getHeaderUnderline (col);
-  int gap = width - characters (data);
+  int gap = width - utf8_length (data);
 
   std::string pad = std::string (padding, ' ');
 
@@ -530,7 +531,7 @@ void Table::formatCell (
   for (size_t chunk = 0; chunk < chunks.size (); ++chunk)
   {
     // Place the data within the available space - justify.
-    int gap = width - characters (chunks[chunk]);
+    int gap = width - utf8_length (chunks[chunk]);
 
     preJust = "";
     postJust = "";
diff --git a/src/View.cpp b/src/View.cpp
index 2a4bf7783..35f84e1d6 100644
--- a/src/View.cpp
+++ b/src/View.cpp
@@ -27,6 +27,7 @@
 
 #include <View.h>
 #include <text.h>
+#include <utf8.h>
 #include <main.h>
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -96,7 +97,7 @@ std::string View::render (std::vector <Task>& data, std::vector <int>& sequence)
   for (i = _columns.begin (); i != _columns.end (); ++i)
   {
     // Headers factor in to width calculations.
-    int global_min = characters ((*i)->getLabel ());
+    int global_min = utf8_length ((*i)->getLabel ());
     int global_ideal = global_min;
 
     std::vector <Task>::iterator d;
diff --git a/src/text.cpp b/src/text.cpp
index badea65e4..74566671e 100644
--- a/src/text.cpp
+++ b/src/text.cpp
@@ -36,6 +36,7 @@
 #include "Context.h"
 #include "util.h"
 #include "text.h"
+#include "utf8.h"
 
 extern Context context;
 
@@ -229,7 +230,7 @@ void extractLine (std::string& text, std::string& line, int length)
 
   // Special case: no \n, and less than length characters total.
   // special case: text.find ("\n") == std::string::npos && text.length () < length
-  if (eol == std::string::npos && characters (text) <= length)
+  if (eol == std::string::npos && utf8_length (text) <= length)
   {
     line = text;
     text = "";
@@ -635,26 +636,6 @@ int strippedLength (const std::string& input)
   return count;
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// UTF8
-int characters (const std::string& str)
-{
-  int byteLength = str.length ();
-  int charLength = byteLength;
-  const char* data = str.data ();
-
-  // decrement the number of bytes for each byte that matches 0b10??????
-  // this way only the first byte of any utf8 sequence is counted
-  for (int i = 0; i < byteLength; i++)
-  {
-    // extract the two MSB and check whether they are 10
-    if ((data[i] & 0xC0) == 0x80)
-      charLength--;
-  }
-
-  return charLength;
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // Truncates a long line, and include a two-character ellipsis.
 std::string cutOff (const std::string& str, std::string::size_type len)
@@ -728,7 +709,7 @@ std::string leftJustify (const int input, const int width)
 ////////////////////////////////////////////////////////////////////////////////
 std::string leftJustify (const std::string& input, const int width)
 {
-  return input + std::string (width - characters (input), ' ');
+  return input + std::string (width - utf8_length (input), ' ');
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -742,7 +723,7 @@ std::string rightJustify (const int input, const int width)
 ////////////////////////////////////////////////////////////////////////////////
 std::string rightJustify (const std::string& input, const int width)
 {
-  return std::string (width - characters (input), ' ') + input;
+  return std::string (width - utf8_length (input), ' ') + input;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/text.h b/src/text.h
index 882a8b2e8..669e80464 100644
--- a/src/text.h
+++ b/src/text.h
@@ -71,8 +71,5 @@ std::string leftJustify (const std::string&, const int);
 std::string rightJustify (const int, const int);
 std::string rightJustify (const std::string&, const int);
 
-// UTF-8 aware.
-int characters (const std::string&);
-
 #endif
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/utf8.cpp b/src/utf8.cpp
index 0e3bee35f..ae81c8207 100644
--- a/src/utf8.cpp
+++ b/src/utf8.cpp
@@ -166,3 +166,22 @@ int utf8_sequence (unsigned int character)
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+int utf8_length (const std::string& str)
+{
+  int byteLength = str.length ();
+  int charLength = byteLength;
+  const char* data = str.data ();
+
+  // Decrement the number of bytes for each byte that matches 0b10??????
+  // this way only the first byte of any utf8 sequence is counted.
+  for (int i = 0; i < byteLength; i++)
+  {
+    // Extract the first two bits and check whether they are 10
+    if ((data[i] & 0xC0) == 0x80)
+      charLength--;
+  }
+
+  return charLength;
+}
+
+////////////////////////////////////////////////////////////////////////////////
diff --git a/src/utf8.h b/src/utf8.h
index 09aa49561..7f5ac618b 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -33,6 +33,8 @@ unsigned int utf8_codepoint (const std::string&);
 unsigned int utf8_next_char (const std::string&, std::string::size_type&);
 std::string utf8_character (unsigned int);
 int utf8_sequence (unsigned int);
+int utf8_length (const std::string&);
+
 
 #endif
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/test/text.t.cpp b/test/text.t.cpp
index 23b7bea1d..3361c1267 100644
--- a/test/text.t.cpp
+++ b/test/text.t.cpp
@@ -27,6 +27,7 @@
 #include <iostream>
 #include "main.h"
 #include "text.h"
+#include "utf8.h"
 #include "test.h"
 
 Context context;
@@ -387,11 +388,6 @@ int main (int argc, char** argv)
   t.is (strippedLength (std::string ("\033[0m")),                           0, "strippedLength ^[[0m                        -> 0");
   t.is (strippedLength (std::string ("\033[1m\033[0m")),                    0, "strippedLength ^[[1m^[[0m                   -> 0");
 
-  // int characters (const std::string&);
-  t.is (characters ("Çirçös"),            6, "characters (Çirçös) == 6");
-  t.is (characters ("ツネナラム"),        5, "characters (ツネナラム) == 5");
-  t.is (characters ("Zwölf Boxkämpfer"), 16, "characters (Zwölf Boxkämpfer) == 16");
-
   // std::string format (char);
   t.is (format ('A'), "A", "format ('A') -> A");
 
@@ -438,6 +434,11 @@ int main (int argc, char** argv)
   t.is (rightJustify ("foo", 5), "  foo", "rightJustify foo,5 -> '  foo'");
   t.is (rightJustify ("föo", 5), "  föo", "rightJustify föo,5 -> '  föo'");
 
+  // int utf8_length (const std::string&);
+  t.is (utf8_length ("Çirçös"),            6, "utf8_length (Çirçös) == 6");
+  t.is (utf8_length ("ツネナラム"),        5, "utf8_length (ツネナラム) == 5");
+  t.is (utf8_length ("Zwölf Boxkämpfer"), 16, "utf8_length (Zwölf Boxkämpfer) == 16");
+
   return 0;
 }