Code Cleanup

- Relocated the Table::getCharLength() from Johannes to text.cpp/characters()
  because it is a general-purpose function, and will be the start of the UTF8
  conversion of all text.cpp code.
- Added unit tests for characters().
This commit is contained in:
Paul Beckingham 2010-08-01 13:05:53 -04:00
parent 6e1aa42d1a
commit 2f1c582d7d
5 changed files with 35 additions and 28 deletions

View file

@ -620,3 +620,22 @@ int strippedLength (const std::string& input)
}
////////////////////////////////////////////////////////////////////////////////
int characters (const std::string& str)
{
int byteLength = str.length ();
int charLength = byteLength;
const char* data = str.data ();
// decrement the number of bytes for each byte that matches 0b10??????
// this way only the first byte of any utf8 sequence is counted
for (int i = 0; i < byteLength; i++)
{
// extract the two MSB and check whether they are 10
if ((data[i] & 0xC0) == 0x80)
charLength--;
}
return charLength;
}
////////////////////////////////////////////////////////////////////////////////