From 36ed70ad93d37b952e1876b62ad4d16711f5f88a Mon Sep 17 00:00:00 2001 From: Ralph Bean Date: Sat, 7 Jan 2012 12:31:08 -0500 Subject: [PATCH] Exports - Provided sample sqlite3 export script in Python, to serve as a starting point for anyone wanting to migrate taskwarrior data into a SQL database. Illustrates JSON parsing and separation of the relational data. Signed-off-by: Paul Beckingham --- ChangeLog | 1 + DEVELOPER | 7 +- scripts/add-ons/export-sql.py | 171 ++++++++++++++++++++++++++++++++++ src/text.cpp | 103 +++++++++++++------- src/utf8.cpp | 2 + test/text.t.cpp | 3 +- 6 files changed, 244 insertions(+), 43 deletions(-) create mode 100755 scripts/add-ons/export-sql.py diff --git a/ChangeLog b/ChangeLog index 08776414c..a7a993a13 100644 --- a/ChangeLog +++ b/ChangeLog @@ -40,6 +40,7 @@ output is enclosed by '[...]'. + The duration 'm' is now interpreted as 'months', not 'minutes'. + Urgency now has an 'age' component. + + Improved text wrapping of UTF8 text. # Tracked Features, sorted by ID. diff --git a/DEVELOPER b/DEVELOPER index 4350a978b..4ef6fc0c7 100644 --- a/DEVELOPER +++ b/DEVELOPER @@ -11,11 +11,6 @@ Deprecated Code New Code Needs This is code that needs to be written, usually down at the C++ function level. - - text.cpp extractLines needs to be rewritten in a UTF8-aware and color-code - sensitive manner. - - Need export_sql.yy script. Any language. This would have value as an - example, or template script serving as a starting-point for anyone who - needed this format. - Need export_viz.yy script. Any language. This would have value as an example, or template script serving as a starting-point for anyone who needed this format. @@ -135,6 +130,8 @@ Current Codebase Condition --- +2012-01-10 Removed entry for extractLines, which was rewritten. +2012-01-07 Removed entry for export-sql.yy. Example exists now. 2011-12-31 Added note about bad unit tests at EOY. 2011-12-23 Removed entry for tree-indentation function. 2011-10-16 Removed obsolete entries, added test suite description. diff --git a/scripts/add-ons/export-sql.py b/scripts/add-ons/export-sql.py new file mode 100755 index 000000000..d587a415e --- /dev/null +++ b/scripts/add-ons/export-sql.py @@ -0,0 +1,171 @@ +#! /usr/bin/python +############################################################################### +# taskwarrior - a command line task list manager. +# +# Copyright 2006-2012, Paul Beckingham, Federico Hernandez. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# http://www.opensource.org/licenses/mit-license.php +# +############################################################################### +""" +export-sql.py -- Export the taskwarrior database as a series of SQL commands. + +Example usage:: + + $ ./export-sql.py | sqlite3 mytasks.db + $ /usr/bin/sqlite3 mytasks.db "select * from annotations;" + +This script has only been tested with sqlite3, but in theory, it could be +easily modified to supported mysql, postgres or whatever you choose. + +Author: Ralph Bean +""" + +import sys +import commands +import json + +from datetime import datetime + +# Note that you may want to modify the field sizes to suit your usage. +table_definitions = """ +CREATE TABLE tasks ( + uuid VARCHAR(255) NOT NULL, + description VARCHAR(255) NOT NULL, + entry DATETIME NOT NULL, + end DATETIME, + priority VARCHAR(32), + project VARCHAR(32), + status VARCHAR(32), + PRIMARY KEY (uuid) +); + +CREATE TABLE annotations ( + uuid VARCHAR(255) NOT NULL, + description VARCHAR(255) NOT NULL, + entry DATETIME NOT NULL, + FOREIGN KEY(uuid) REFERENCES tasks(uuid) +); +""" + + +replacements = { + '"': '&dquot;', + "'": '"', + '[': '&open;', + ']': '&close;', + '/': '\\/', +} + + +def escape(s): + """ Escape a string in the taskwarrior style """ + + for unsafe, safe in replacements.iteritems(): + s = s.replace(unsafe, safe) + return s + + +# A lookup table for how to convert various values by type to SQL +conversion_lookup = { + # Tack on an extra set of quotes + unicode: lambda v: "'%s'" % escape(v), + # Do the same as for unicode + str: lambda v: convert(unicode(v)), + # Convert to ISO format and do the same as for unicode + datetime: lambda v: convert(v.isoformat(' ')), + # Replace python None with SQL NULL + type(None): lambda v: 'NULL', +} + +# Compose a value with its corresponding function in conversion_lookup +convert = lambda v: conversion_lookup.get(type(v), lambda v: v)(v) + + +def parse_datetime(task): + """ Parse the datetime strings given to us by `task export` """ + + for key in ['entry', 'end']: + if key in task: + task[key] = datetime.strptime(task[key], "%Y%m%dT%H%M%SZ") + return task + + +def to_sql(task): + """ Create a list of SQL INSERT statements out of a task python dict """ + + def make_annotation(annot): + """ Create a list of SQL INSERT statements for an annotation """ + + annot['uuid'] = task['uuid'] + template = "{uuid}, {description}, {entry}" + annot = dict(zip(annot.keys(), map(convert, annot.values()))) + values = template.format(**annot) + return "INSERT INTO \"annotations\" VALUES(%s)" % values + + template = "{uuid}, {description}, {entry}, {end}, " + \ + "{priority}, {project}, {status}" + + nullables = ['end', 'priority', 'project', 'status'] + defaults = dict([(key, None) for key in nullables]) + defaults['annotations'] = [] + defaults.update(task) + + defaults = dict(zip(defaults.keys(), map(convert, defaults.values()))) + + values = template.format(**defaults) + annotations = map(make_annotation, defaults['annotations']) + + return ["INSERT INTO \"tasks\" VALUES(%s)" % values] + annotations + + +def main(): + """ Return a list of SQL statements. """ + + # Use the taskwarrior 2.0+ export command to filter and return JSON + command = "task export " + " ".join(sys.argv[1:]) + + # Load each task from json to a python dict + tasks = map(json.loads, commands.getoutput(command).split(",\n")) + + # Mangle datetime strings into python datetime objects + tasks = map(parse_datetime, tasks) + + # Produce formatted SQL statements for each task + inserts = sum(map(to_sql, tasks), []) + + return inserts + + +if __name__ == '__main__': + # Get the INSERT statements + lines = main() + + # Combine them with semicolons + sql = table_definitions + ";\n".join(lines) + ';' + + # Print them out, decorated with sqlite3 trappings + print """ +BEGIN TRANSACTION; +{sql} +COMMIT;""".format(sql=sql) + +############################################################################### diff --git a/src/text.cpp b/src/text.cpp index 17b0c9f93..c78addd37 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -25,7 +25,6 @@ // //////////////////////////////////////////////////////////////////////////////// - #define L10N // Localization complete. #include @@ -318,66 +317,98 @@ int longestLine (const std::string& input) } //////////////////////////////////////////////////////////////////////////////// +// Walk the input text looking for a break point. A break point is one of: +// - EOS +// - \n +// - last space before 'length' characters +// - first 'length' characters void extractLine ( std::string& text, std::string& line, int length, bool hyphenate) { - size_t eol = text.find ("\n"); - - // Special case: found \n in first length characters. - if (eol != std::string::npos && eol < (unsigned) length) + std::string::size_type bytes = 0; + std::string::size_type previous = std::string::npos; + std::string::size_type last_space = std::string::npos; + int character; + int chars; + for (chars = 0; chars < length; ++chars) { - line = text.substr (0, eol); // strip \n - text = text.substr (eol + 1); - return; + previous = bytes; + character = utf8_next_char (text, bytes); + + // Record last seen space. + if (character == ' ') + last_space = previous; + + // Newline is an early break point. + if (character == '\n') + { + line = text.substr (0, bytes - 1); + text = text.substr (bytes); + return; + } + + // EOS is an early break point. + if (character == 0) + { + line = text; + text = ""; + return; + } } - // Special case: no \n, and less than length characters total. - // special case: text.find ("\n") == std::string::npos && text.length () < length - if (eol == std::string::npos && utf8_text_length (text) <= length) + // Case where EOS was not quite reached. + // 012345 + // |eleven|\0 + if (text[bytes] == '\0') { line = text; text = ""; return; } - // Safe to ASSERT text.length () > length - - // Look for the last space prior to length - eol = length; - while (eol && text[eol] != ' ' && text[eol] != '\n') - --eol; - - // If a space was found, break there. - if (eol) + // Case where a word ends at the right margin. + // 012345 + // |eleven|_ + if (text[bytes] == ' ') { - line = text.substr (0, eol); - text = text.substr (eol + 1); + line = text.substr (0, bytes); + text = text.substr (bytes + 1); + return; } - // If no space was found, hyphenate. + // Case where a word straddles the margin, but there is an earlier space + // to break on. + // 012345 + // |one_tw|o + if (last_space != std::string::npos) + { + line = text.substr (0, last_space); + text = text.substr (last_space + 1); + return; + } + + // Case where a word needs to be split, and there is no last_space. + // Hyphenation becomes the issue. + // 012345 + // |fiftee|n + // |fifte-|en else { - if (length > 1) + if (hyphenate) { - if (hyphenate) - { - line = text.substr (0, length - 1) + "-"; - text = text.substr (length - 1); - } - else - { - line = text.substr (0, length); - text = text.substr (length); - } + line = text.substr (0, previous) + "-"; + text = text.substr (previous); } else { - line = text.substr (0, 1); - text = text.substr (length); + line = text.substr (0, bytes); + text = text.substr (bytes); } + + return; } } diff --git a/src/utf8.cpp b/src/utf8.cpp index 0e950c40a..cef0bafb9 100644 --- a/src/utf8.cpp +++ b/src/utf8.cpp @@ -76,6 +76,8 @@ unsigned int utf8_codepoint (const std::string& input) //////////////////////////////////////////////////////////////////////////////// // Iterates along a UTF8 string. +// - argument i counts bytes advanced through the string +// - returns the next character unsigned int utf8_next_char (const std::string& input, std::string::size_type& i) { // How many bytes in the sequence? diff --git a/test/text.t.cpp b/test/text.t.cpp index 438bed3e1..71e71c505 100644 --- a/test/text.t.cpp +++ b/test/text.t.cpp @@ -37,7 +37,6 @@ Context context; int main (int argc, char** argv) { UnitTest t (262); - // void wrapText (std::vector & lines, const std::string& text, const int width, bool hyphenate) std::string text = "This is a test of the line wrapping code."; std::vector lines; @@ -66,7 +65,7 @@ int main (int argc, char** argv) text = "This ☺ is a test of utf8 line extraction."; std::string line; extractLine (text, line, 7, true); - t.is (line, "line 1", "extractLine 7 'This ☺ is a test of utf8 line extraction.' -> 'This ☺'"); + t.is (line, "This ☺", "extractLine 7 'This ☺ is a test of utf8 line extraction.' -> 'This ☺'"); // void extractLine (std::string& text, std::string& line, int length) text = "line 1\nlengthy second line that exceeds width";