From 7cba0b71856dbabe8ee7d6a4345bf1fb63d9ad6c Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Tue, 2 Jun 2015 15:41:00 -0400 Subject: [PATCH] Text: Added ::extractLine and ::wrapText. --- src/text.cpp | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/text.h | 2 + 2 files changed, 129 insertions(+) diff --git a/src/text.cpp b/src/text.cpp index 42f62f6..27e20da 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -30,9 +30,136 @@ #include #include #include +#include +#include static void replace_positional (std::string&, const std::string&, const std::string&); +/////////////////////////////////////////////////////////////////////////////// +void wrapText ( + std::vector & lines, + const std::string& text, + const int width, + bool hyphenate) +{ + std::string line; + unsigned int offset = 0; + while (extractLine (line, text, width, hyphenate, offset)) + lines.push_back (line); +} + +//////////////////////////////////////////////////////////////////////////////// +// Walk the input text looking for a break point. A break point is one of: +// - EOS +// - \n +// - last space before 'length' characters +// - last punctuation (, ; . :) before 'length' characters, even if not +// followed by a space +// - first 'length' characters +// +// text "one two three\n four" +// bytes 0123456789012 3456789 +// characters 1234567890a23 4567890 +// +// leading_ws +// ws ^ ^ ^^ +// punct +// break ^ +bool extractLine ( + std::string& line, + const std::string& text, + int width, + bool hyphenate, + unsigned int& offset) +{ + // Terminate processing. + // Note: bytes vs bytes. + if (offset >= text.length ()) + return false; + + std::string::size_type last_last_bytes = offset; + std::string::size_type last_bytes = offset; + std::string::size_type bytes = offset; + unsigned int last_ws = 0; + int character; + int char_width = 0; + int line_width = 0; + while (1) + { + last_last_bytes = last_bytes; + last_bytes = bytes; + character = utf8_next_char (text, bytes); + + if (character == 0 || + character == '\n') + { + line = text.substr (offset, last_bytes - offset); + offset = bytes; + break; + } + else if (character == ' ') + last_ws = last_bytes; + + char_width = mk_wcwidth (character); + if (line_width + char_width > width) + { + int last_last_character = text[last_last_bytes]; + int last_character = text[last_bytes]; + + // [case 1] one| two --> last_last != 32, last == 32, ws == 0 + if (last_last_character != ' ' && + last_character == ' ') + { + line = text.substr (offset, last_bytes - offset); + offset = last_bytes + 1; + break; + } + + // [case 2] one |two --> last_last == 32, last != 32, ws != 0 + else if (last_last_character == ' ' && + last_character != ' ' && + last_ws != 0) + { + line = text.substr (offset, last_bytes - offset - 1); + offset = last_bytes; + break; + } + + else if (last_last_character != ' ' && + last_character != ' ') + { + // [case 3] one t|wo --> last_last != 32, last != 32, ws != 0 + if (last_ws != 0) + { + line = text.substr (offset, last_ws - offset); + offset = last_ws + 1; + break; + } + // [case 4] on|e two --> last_last != 32, last != 32, ws == 0 + else + { + if (hyphenate) + { + line = text.substr (offset, last_bytes - offset - 1) + "-"; + offset = last_last_bytes; + } + else + { + line = text.substr (offset, last_bytes - offset); + offset = last_bytes; + } + } + + break; + } + } + + line_width += char_width; + } + + return true; +} + //////////////////////////////////////////////////////////////////////////////// void split ( std::vector& results, diff --git a/src/text.h b/src/text.h index a67a2a6..6be2edb 100644 --- a/src/text.h +++ b/src/text.h @@ -30,9 +30,11 @@ #include #include +void wrapText (std::vector &, const std::string&, const int, bool); std::string trimLeft (const std::string& in, const std::string& t = " "); std::string trimRight (const std::string& in, const std::string& t = " "); std::string trim (const std::string& in, const std::string& t = " "); +bool extractLine (std::string&, const std::string&, int, bool, unsigned int&); void split (std::vector&, const std::string&, const char); std::string lowerCase (const std::string&); bool compare (const std::string&, const std::string&, bool sensitive = true);