mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-06-26 10:54:26 +02:00

- Provided sample sqlite3 export script in Python, to serve as a starting point for anyone wanting to migrate taskwarrior data into a SQL database. Illustrates JSON parsing and separation of the relational data. Signed-off-by: Paul Beckingham <paul@beckingham.net>
233 lines
6.8 KiB
C++
233 lines
6.8 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
// taskwarrior - a command line task list manager.
|
|
//
|
|
// Copyright 2006-2012, Paul Beckingham, Federico Hernandez.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included
|
|
// in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
//
|
|
// http://www.opensource.org/licenses/mit-license.php
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
#define L10N // Localization complete.
|
|
|
|
#include <string>
|
|
#include <utf8.h>
|
|
#include <i18n.h>
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Converts '0' -> 0
|
|
// '9' -> 9
|
|
// 'a'/'A' -> 10
|
|
// 'f'/'F' -> 15
|
|
#define XDIGIT(x) ((x) >= '0' && (x) <= '9' ? ((x) - '0') : \
|
|
(x) >= 'a' && (x) <= 'f' ? ((x) + 10 - 'a') : \
|
|
(x) >= 'A' && (x) <= 'F' ? ((x) + 10 - 'A') : 0)
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Note: Assumes 4-digit hex codepoints:
|
|
// xxxx
|
|
// \uxxxx
|
|
// U+xxxx
|
|
unsigned int utf8_codepoint (const std::string& input)
|
|
{
|
|
unsigned int codepoint = 0;
|
|
int length = input.length ();
|
|
|
|
// U+xxxx, \uxxxx
|
|
if (length >= 6 &&
|
|
((input[0] == 'U' && input[1] == '+') ||
|
|
(input[0] == '\\' && input[1] == 'u')))
|
|
{
|
|
codepoint = XDIGIT (input[2]) << 12 |
|
|
XDIGIT (input[3]) << 8 |
|
|
XDIGIT (input[4]) << 4 |
|
|
XDIGIT (input[5]);
|
|
}
|
|
else if (length >= 4)
|
|
{
|
|
codepoint = XDIGIT (input[0]) << 12 |
|
|
XDIGIT (input[1]) << 8 |
|
|
XDIGIT (input[2]) << 4 |
|
|
XDIGIT (input[3]);
|
|
}
|
|
else
|
|
throw std::string (STRING_UTF8_INVALID_CP_REP);
|
|
|
|
return codepoint;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Iterates along a UTF8 string.
|
|
// - argument i counts bytes advanced through the string
|
|
// - returns the next character
|
|
unsigned int utf8_next_char (const std::string& input, std::string::size_type& i)
|
|
{
|
|
// How many bytes in the sequence?
|
|
int length = utf8_sequence (input[i]);
|
|
|
|
i += length;
|
|
|
|
// 0xxxxxxx -> 0xxxxxxx
|
|
if (length == 1)
|
|
return input[i - 1];
|
|
|
|
// 110yyyyy 10xxxxxx -> 00000yyy yyxxxxxx
|
|
if (length == 2)
|
|
return ((input[i - 2] & 0x1F) << 6) +
|
|
(input[i - 1] & 0x3F);
|
|
|
|
// 1110zzzz 10yyyyyy 10xxxxxx -> zzzzyyyy yyxxxxxx
|
|
if (length == 3)
|
|
return ((input[i - 3] & 0xF) << 12) +
|
|
((input[i - 2] & 0x3F) << 6) +
|
|
(input[i - 1] & 0x3F);
|
|
|
|
// 11110www 10zzzzzz 10yyyyyy 10xxxxxx -> 000wwwzz zzzzyyyy yyxxxxxx
|
|
if (length == 4)
|
|
return ((input[i - 4] & 0x7) << 18) +
|
|
((input[i - 3] & 0x3F) << 12) +
|
|
((input[i - 2] & 0x3F) << 6) +
|
|
(input[i - 1] & 0x3F);
|
|
|
|
// Default: pretend as though it's a single character.
|
|
// TODO Or should this throw?
|
|
return input[i - 1];
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// http://en.wikipedia.org/wiki/UTF-8
|
|
std::string utf8_character (unsigned int codepoint)
|
|
{
|
|
char sequence[5];
|
|
|
|
// 0xxxxxxx -> 0xxxxxxx
|
|
if (codepoint < 0x80)
|
|
{
|
|
sequence[0] = codepoint;
|
|
sequence[1] = 0;
|
|
}
|
|
|
|
// 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx
|
|
else if (codepoint < 0x800)
|
|
{
|
|
sequence[0] = 0xC0 | (codepoint & 0x7C0) >> 6;
|
|
sequence[1] = 0x80 | (codepoint & 0x3F);
|
|
sequence[2] = 0;
|
|
}
|
|
|
|
// zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
|
|
else if (codepoint < 0x10000)
|
|
{
|
|
sequence[0] = 0xE0 | (codepoint & 0xF000) >> 12;
|
|
sequence[1] = 0x80 | (codepoint & 0xFC0) >> 6;
|
|
sequence[2] = 0x80 | (codepoint & 0x3F);
|
|
sequence[3] = 0;
|
|
}
|
|
|
|
// 000wwwzz zzzzyyyy yyxxxxxx -> 11110www 10zzzzzz 10yyyyyy 10xxxxxx
|
|
else if (codepoint < 0x110000)
|
|
{
|
|
sequence[0] = 0xF0 | (codepoint & 0x1C0000) >> 18;
|
|
sequence[1] = 0x80 | (codepoint & 0x03F000) >> 12;
|
|
sequence[2] = 0x80 | (codepoint & 0x0FC0) >> 6;
|
|
sequence[3] = 0x80 | (codepoint & 0x3F);
|
|
sequence[4] = 0;
|
|
}
|
|
else
|
|
throw std::string (STRING_UTF8_INVALID_CP);
|
|
|
|
sequence[4] = '\0';
|
|
return std::string (sequence);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
int utf8_sequence (unsigned int character)
|
|
{
|
|
if ((character & 0xE0) == 0xC0)
|
|
return 2;
|
|
|
|
if ((character & 0xF0) == 0xE0)
|
|
return 3;
|
|
|
|
if ((character & 0xF8) == 0xF0)
|
|
return 4;
|
|
|
|
return 1;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
int utf8_length (const std::string& str)
|
|
{
|
|
int byteLength = str.length ();
|
|
int charLength = byteLength;
|
|
const char* data = str.data ();
|
|
|
|
// Decrement the number of bytes for each byte that matches 0b10??????
|
|
// this way only the first byte of any utf8 sequence is counted.
|
|
for (int i = 0; i < byteLength; i++)
|
|
{
|
|
// Extract the first two bits and check whether they are 10
|
|
if ((data[i] & 0xC0) == 0x80)
|
|
charLength--;
|
|
}
|
|
|
|
return charLength;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
int utf8_text_length (const std::string& str)
|
|
{
|
|
int byteLength = str.length ();
|
|
int charLength = byteLength;
|
|
const char* data = str.data ();
|
|
bool in_color = false;
|
|
|
|
// Decrement the number of bytes for each byte that matches 0b10??????
|
|
// this way only the first byte of any utf8 sequence is counted.
|
|
for (int i = 0; i < byteLength; i++)
|
|
{
|
|
if (in_color)
|
|
{
|
|
if (data[i] == 'm')
|
|
in_color = false;
|
|
|
|
--charLength;
|
|
}
|
|
else
|
|
{
|
|
if (data[i] == 033)
|
|
{
|
|
in_color = true;
|
|
--charLength;
|
|
}
|
|
else
|
|
{
|
|
// Extract the first two bits and check whether they are 10
|
|
if ((data[i] & 0xC0) == 0x80)
|
|
--charLength;
|
|
}
|
|
}
|
|
}
|
|
|
|
return charLength;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|