From 029f3af578e6b9f4d13977b04f9a1239b8a58542 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Tue, 23 Feb 2016 21:05:53 -0500 Subject: [PATCH] TW-1709: Parsing bug when doing "task undo" - Thanks to Scott Kostyshak. --- ChangeLog | 2 + src/text.cpp | 136 +++++++++++++++++++++------------------------------ 2 files changed, 58 insertions(+), 80 deletions(-) diff --git a/ChangeLog b/ChangeLog index fd7c472e8..d6c390ccf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -35,6 +35,8 @@ - TW-1704 Use Task::identifier to reference the Task in the output - TW-1705 Directories in .task/hooks should not be reported as invalid hooks (thanks to Tomas Babej). +- TW-1709 Parsing bug when doing "task undo" + (thanks to Scott Kostyshak). - TW-1710 Setting wait date on status:completed / status:deleted (thanks to Daniel Shahaf). - TW-1714 Starting recurring task starts all recurrences diff --git a/src/text.cpp b/src/text.cpp index 80d0f9509..91bd266e6 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -225,22 +225,7 @@ int longestLine (const std::string& input) } //////////////////////////////////////////////////////////////////////////////// -// Walk the input text looking for a break point. A break point is one of: -// - EOS -// - \n -// - last space before 'length' characters -// - last punctuation (, ; . :) before 'length' characters, even if not -// followed by a space -// - first 'length' characters -// -// text "one two three\n four" -// bytes 0123456789012 3456789 -// characters 1234567890a23 4567890 -// -// leading_ws -// ws ^ ^ ^^ -// punct -// break ^ +// Break UTF8 text into chunks no more than width characters. bool extractLine ( std::string& line, const std::string& text, @@ -249,91 +234,82 @@ bool extractLine ( unsigned int& offset) { // Terminate processing. - // Note: bytes vs bytes. if (offset >= text.length ()) return false; - std::string::size_type last_last_bytes = offset; - std::string::size_type last_bytes = offset; - std::string::size_type bytes = offset; - unsigned int last_ws = 0; - int character; - int char_width = 0; - int line_width = 0; - while (1) + int line_length {0}; + int character {0}; + std::string::size_type lastWordEnd {std::string::npos}; + bool something {false}; + std::string::size_type cursor {offset}; + std::string::size_type prior_cursor {offset}; + while ((character = utf8_next_char (text, cursor))) { - last_last_bytes = last_bytes; - last_bytes = bytes; - character = utf8_next_char (text, bytes); - - if (character == 0 || - character == '\n') + // Premature EOL. + if (character == '\n') { - line = text.substr (offset, last_bytes - offset); - offset = bytes; - break; + line = text.substr (offset, line_length); + offset = cursor; + return true; } - else if (character == ' ') - last_ws = last_bytes; - char_width = mk_wcwidth (character); - if (line_width + char_width > width) + if (! Lexer::isWhitespace (character)) { - int last_last_character = text[last_last_bytes]; - int last_character = text[last_bytes]; + something = true; + if (! text[cursor] || Lexer::isWhitespace (text[cursor])) + lastWordEnd = prior_cursor; + } - // [case 1] one| two --> last_last != 32, last == 32, ws == 0 - if (last_last_character != ' ' && - last_character == ' ') + line_length += mk_wcwidth (character); + + if (line_length >= width) + { + // Backtrack to previous word end. + if (lastWordEnd != std::string::npos) { - line = text.substr (offset, last_bytes - offset); - offset = last_bytes + 1; - break; + // Eat one WS after lastWordEnd. + std::string::size_type lastBreak = lastWordEnd; + utf8_next_char (text, lastBreak); + + // Position offset at following char. + std::string::size_type nextStart = lastBreak; + utf8_next_char (text, nextStart); + + line = text.substr (offset, lastBreak - offset); + offset = nextStart; + return true; } - // [case 2] one |two --> last_last == 32, last != 32, ws != 0 - else if (last_last_character == ' ' && - last_character != ' ' && - last_ws != 0) + // No backtrack, possible hyphenation. + else if (hyphenate) { - line = text.substr (offset, last_bytes - offset - 1); - offset = last_bytes; - break; + line = text.substr (offset, prior_cursor - offset) + "-"; + offset = prior_cursor; + return true; } - else if (last_last_character != ' ' && - last_character != ' ') + // No hyphenation, just truncation. + else { - // [case 3] one t|wo --> last_last != 32, last != 32, ws != 0 - if (last_ws != 0) - { - line = text.substr (offset, last_ws - offset); - offset = last_ws + 1; - break; - } - // [case 4] on|e two --> last_last != 32, last != 32, ws == 0 - else - { - if (hyphenate) - { - line = text.substr (offset, last_bytes - offset - 1) + "-"; - offset = last_last_bytes; - } - else - { - line = text.substr (offset, last_bytes - offset); - offset = last_bytes; - } - } - - break; + line = text.substr (offset, prior_cursor - offset); + offset = cursor; + return true; } } - line_width += char_width; + // Hindsight. + prior_cursor = cursor; } - return true; + // Residual text. + if (something) + { + line = text.substr (offset, cursor - offset); + offset = cursor; + return true; + } + + return false; } ////////////////////////////////////////////////////////////////////////////////