From 36ed70ad93d37b952e1876b62ad4d16711f5f88a Mon Sep 17 00:00:00 2001
From: Ralph Bean <ralph.bean@gmail.com>
Date: Sat, 7 Jan 2012 12:31:08 -0500
Subject: [PATCH] Exports

- Provided sample sqlite3 export script in Python, to serve as a
  starting point for anyone wanting to migrate taskwarrior data into
  a SQL database.  Illustrates JSON parsing and separation of the
  relational data.

Signed-off-by: Paul Beckingham <paul@beckingham.net>
---
 ChangeLog                     |   1 +
 DEVELOPER                     |   7 +-
 scripts/add-ons/export-sql.py | 171 ++++++++++++++++++++++++++++++++++
 src/text.cpp                  | 103 +++++++++++++-------
 src/utf8.cpp                  |   2 +
 test/text.t.cpp               |   3 +-
 6 files changed, 244 insertions(+), 43 deletions(-)
 create mode 100755 scripts/add-ons/export-sql.py

diff --git a/ChangeLog b/ChangeLog
index 08776414c..a7a993a13 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -40,6 +40,7 @@
    output is enclosed by '[...]'.
  + The duration 'm' is now interpreted as 'months', not 'minutes'.
  + Urgency now has an 'age' component.
+ + Improved text wrapping of UTF8 text.
  
 
 # Tracked Features, sorted by ID.
diff --git a/DEVELOPER b/DEVELOPER
index 4350a978b..4ef6fc0c7 100644
--- a/DEVELOPER
+++ b/DEVELOPER
@@ -11,11 +11,6 @@ Deprecated Code
 New Code Needs
   This is code that needs to be written, usually down at the C++ function level.
 
-  - text.cpp extractLines needs to be rewritten in a UTF8-aware and color-code
-    sensitive manner.
-  - Need export_sql.yy script.  Any language.  This would have value as an
-    example, or template script serving as a starting-point for anyone who
-    needed this format.
   - Need export_viz.yy script.  Any language.  This would have value as an
     example, or template script serving as a starting-point for anyone who
     needed this format.
@@ -135,6 +130,8 @@ Current Codebase Condition
 
 ---
 
+2012-01-10 Removed entry for extractLines, which was rewritten.
+2012-01-07 Removed entry for export-sql.yy.  Example exists now.
 2011-12-31 Added note about bad unit tests at EOY.
 2011-12-23 Removed entry for tree-indentation function.
 2011-10-16 Removed obsolete entries, added test suite description.
diff --git a/scripts/add-ons/export-sql.py b/scripts/add-ons/export-sql.py
new file mode 100755
index 000000000..d587a415e
--- /dev/null
+++ b/scripts/add-ons/export-sql.py
@@ -0,0 +1,171 @@
+#! /usr/bin/python
+###############################################################################
+# taskwarrior - a command line task list manager.
+#
+# Copyright 2006-2012, Paul Beckingham, Federico Hernandez.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# http://www.opensource.org/licenses/mit-license.php
+#
+###############################################################################
+"""
+export-sql.py -- Export the taskwarrior database as a series of SQL commands.
+
+Example usage::
+
+    $ ./export-sql.py | sqlite3 mytasks.db
+    $ /usr/bin/sqlite3 mytasks.db "select * from annotations;"
+
+This script has only been tested with sqlite3, but in theory, it could be
+easily modified to supported mysql, postgres or whatever you choose.
+
+Author:  Ralph Bean
+"""
+
+import sys
+import commands
+import json
+
+from datetime import datetime
+
+# Note that you may want to modify the field sizes to suit your usage.
+table_definitions = """
+CREATE TABLE tasks (
+    uuid VARCHAR(255) NOT NULL,
+    description VARCHAR(255) NOT NULL,
+    entry DATETIME NOT NULL,
+    end DATETIME,
+    priority VARCHAR(32),
+    project VARCHAR(32),
+    status VARCHAR(32),
+    PRIMARY KEY (uuid)
+);
+
+CREATE TABLE annotations (
+    uuid VARCHAR(255) NOT NULL,
+    description VARCHAR(255) NOT NULL,
+    entry DATETIME NOT NULL,
+    FOREIGN KEY(uuid) REFERENCES tasks(uuid)
+);
+"""
+
+
+replacements = {
+    '"': '&dquot;',
+    "'": '&quot;',
+    '[': '&open;',
+    ']': '&close;',
+    '/': '\\/',
+}
+
+
+def escape(s):
+    """ Escape a string in the taskwarrior style """
+
+    for unsafe, safe in replacements.iteritems():
+        s = s.replace(unsafe, safe)
+    return s
+
+
+# A lookup table for how to convert various values by type to SQL
+conversion_lookup = {
+    # Tack on an extra set of quotes
+    unicode: lambda v: "'%s'" % escape(v),
+    # Do the same as for unicode
+    str: lambda v: convert(unicode(v)),
+    # Convert to ISO format and do the same as for unicode
+    datetime: lambda v: convert(v.isoformat(' ')),
+    # Replace python None with SQL NULL
+    type(None): lambda v: 'NULL',
+}
+
+# Compose a value with its corresponding function in conversion_lookup
+convert = lambda v: conversion_lookup.get(type(v), lambda v: v)(v)
+
+
+def parse_datetime(task):
+    """ Parse the datetime strings given to us by `task export` """
+
+    for key in ['entry', 'end']:
+        if key in task:
+            task[key] = datetime.strptime(task[key], "%Y%m%dT%H%M%SZ")
+    return task
+
+
+def to_sql(task):
+    """ Create a list of SQL INSERT statements out of a task python dict """
+
+    def make_annotation(annot):
+        """ Create a list of SQL INSERT statements for an annotation """
+
+        annot['uuid'] = task['uuid']
+        template = "{uuid}, {description}, {entry}"
+        annot = dict(zip(annot.keys(), map(convert, annot.values())))
+        values = template.format(**annot)
+        return "INSERT INTO \"annotations\" VALUES(%s)" % values
+
+    template = "{uuid}, {description}, {entry}, {end}, " + \
+           "{priority}, {project}, {status}"
+
+    nullables = ['end', 'priority', 'project', 'status']
+    defaults = dict([(key, None) for key in nullables])
+    defaults['annotations'] = []
+    defaults.update(task)
+
+    defaults = dict(zip(defaults.keys(), map(convert, defaults.values())))
+
+    values = template.format(**defaults)
+    annotations = map(make_annotation, defaults['annotations'])
+
+    return ["INSERT INTO \"tasks\" VALUES(%s)" % values] + annotations
+
+
+def main():
+    """ Return a list of SQL statements. """
+
+    # Use the taskwarrior 2.0+ export command to filter and return JSON
+    command = "task export " + " ".join(sys.argv[1:])
+
+    # Load each task from json to a python dict
+    tasks = map(json.loads, commands.getoutput(command).split(",\n"))
+
+    # Mangle datetime strings into python datetime objects
+    tasks = map(parse_datetime, tasks)
+
+    # Produce formatted SQL statements for each task
+    inserts = sum(map(to_sql, tasks), [])
+
+    return inserts
+
+
+if __name__ == '__main__':
+    # Get the INSERT statements
+    lines = main()
+
+    # Combine them with semicolons
+    sql = table_definitions + ";\n".join(lines) + ';'
+
+    # Print them out, decorated with sqlite3 trappings
+    print """
+BEGIN TRANSACTION;
+{sql}
+COMMIT;""".format(sql=sql)
+
+###############################################################################
diff --git a/src/text.cpp b/src/text.cpp
index 17b0c9f93..c78addd37 100644
--- a/src/text.cpp
+++ b/src/text.cpp
@@ -25,7 +25,6 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-
 #define L10N                                           // Localization complete.
 
 #include <algorithm>
@@ -318,66 +317,98 @@ int longestLine (const std::string& input)
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+// Walk the input text looking for a break point.  A break point is one of:
+//   - EOS
+//   - \n
+//   - last space before 'length' characters
+//   - first 'length' characters
 void extractLine (
   std::string& text,
   std::string& line,
   int length,
   bool hyphenate)
 {
-  size_t eol = text.find ("\n");
-
-  // Special case: found \n in first length characters.
-  if (eol != std::string::npos && eol < (unsigned) length)
+  std::string::size_type bytes      = 0;
+  std::string::size_type previous   = std::string::npos;
+  std::string::size_type last_space = std::string::npos;
+  int character;
+  int chars;
+  for (chars = 0; chars < length; ++chars)
   {
-    line = text.substr (0, eol); // strip \n
-    text = text.substr (eol + 1);
-    return;
+    previous = bytes;
+    character = utf8_next_char (text, bytes);
+
+    // Record last seen space.
+    if (character == ' ')
+      last_space = previous;
+
+    // Newline is an early break point.
+    if (character == '\n')
+    {
+      line = text.substr (0, bytes - 1);
+      text = text.substr (bytes);
+      return;
+    }
+
+    // EOS is an early break point.
+    if (character == 0)
+    {
+      line = text;
+      text = "";
+      return;
+    }
   }
 
-  // Special case: no \n, and less than length characters total.
-  // special case: text.find ("\n") == std::string::npos && text.length () < length
-  if (eol == std::string::npos && utf8_text_length (text) <= length)
+  // Case where EOS was not quite reached.
+  //  012345
+  // |eleven|\0
+  if (text[bytes] == '\0')
   {
     line = text;
     text = "";
     return;
   }
 
-  // Safe to ASSERT text.length () > length
-
-  // Look for the last space prior to length
-  eol = length;
-  while (eol && text[eol] != ' ' && text[eol] != '\n')
-    --eol;
-
-  // If a space was found, break there.
-  if (eol)
+  // Case where a word ends at the right margin.
+  //  012345
+  // |eleven|_
+  if (text[bytes] == ' ')
   {
-    line = text.substr (0, eol);
-    text = text.substr (eol + 1);
+    line = text.substr (0, bytes);
+    text = text.substr (bytes + 1);
+    return;
   }
 
-  // If no space was found, hyphenate.
+  // Case where a word straddles the margin, but there is an earlier space
+  // to break on.
+  //  012345
+  // |one_tw|o
+  if (last_space != std::string::npos)
+  {
+    line = text.substr (0, last_space);
+    text = text.substr (last_space + 1);
+    return;
+  }
+
+  // Case where a word needs to be split, and there is no last_space.
+  // Hyphenation becomes the issue.
+  //  012345
+  // |fiftee|n
+  // |fifte-|en
   else
   {
-    if (length > 1)
+    if (hyphenate)
     {
-      if (hyphenate)
-      {
-        line = text.substr (0, length - 1) + "-";
-        text = text.substr (length - 1);
-      }
-      else
-      {
-        line = text.substr (0, length);
-        text = text.substr (length);
-      }
+      line = text.substr (0, previous) + "-";
+      text = text.substr (previous);
     }
     else
     {
-      line = text.substr (0, 1);
-      text = text.substr (length);
+      line = text.substr (0, bytes);
+      text = text.substr (bytes);
     }
+
+    return;
   }
 }
 
diff --git a/src/utf8.cpp b/src/utf8.cpp
index 0e950c40a..cef0bafb9 100644
--- a/src/utf8.cpp
+++ b/src/utf8.cpp
@@ -76,6 +76,8 @@ unsigned int utf8_codepoint (const std::string& input)
 
 ////////////////////////////////////////////////////////////////////////////////
 // Iterates along a UTF8 string.
+//   - argument i counts bytes advanced through the string
+//   - returns the next character
 unsigned int utf8_next_char (const std::string& input, std::string::size_type& i)
 {
   // How many bytes in the sequence?
diff --git a/test/text.t.cpp b/test/text.t.cpp
index 438bed3e1..71e71c505 100644
--- a/test/text.t.cpp
+++ b/test/text.t.cpp
@@ -37,7 +37,6 @@ Context context;
 int main (int argc, char** argv)
 {
   UnitTest t (262);
-
   // void wrapText (std::vector <std::string>& lines, const std::string& text, const int width, bool hyphenate)
   std::string text = "This is a test of the line wrapping code.";
   std::vector <std::string> lines;
@@ -66,7 +65,7 @@ int main (int argc, char** argv)
   text = "This ☺ is a test of utf8 line extraction.";
   std::string line;
   extractLine (text, line, 7, true);
-  t.is (line, "line 1", "extractLine 7 'This ☺ is a test of utf8 line extraction.' -> 'This ☺'");
+  t.is (line, "This ☺", "extractLine 7 'This ☺ is a test of utf8 line extraction.' -> 'This ☺'");
 
   // void extractLine (std::string& text, std::string& line, int length)
   text = "line 1\nlengthy second line that exceeds width";