Code Migration

- Migrated taskd JSON parser into task, to provide encode/decode
  capability to Task::composeJSON.
- Migrated taskd utf8 code, replacing old unused code.
- Added unit tests or JSON.
- Migrated Tree updates from taskd.
This commit is contained in:
Paul Beckingham 2011-01-22 23:33:47 -05:00
parent 2f4e0d9e17
commit 57c1983e07
13 changed files with 717 additions and 607 deletions

View file

@ -5,8 +5,8 @@ set (task_SRCS API.cpp API.h Att.cpp Att.h Cmd.cpp Cmd.h Color.cpp Color.h
Config.cpp Config.h Context.cpp Context.h Date.cpp Date.h
Directory.cpp Directory.h Duration.cpp Duration.h File.cpp
File.h Filter.cpp Filter.h Grid.cpp Grid.h Hooks.cpp Hooks.h
Keymap.cpp Keymap.h Lisp.cpp Lisp.h Location.cpp Location.h
Nibbler.cpp Nibbler.h Path.cpp Path.h Permission.cpp
JSON.cpp JSON.h Keymap.cpp Keymap.h Lisp.cpp Lisp.h Location.cpp
Location.h Nibbler.cpp Nibbler.h Path.cpp Path.h Permission.cpp
Permission.h Record.cpp Record.h Rectangle.cpp Rectangle.h
Sensor.cpp Sensor.h Sequence.cpp Sequence.h StringTable.cpp
StringTable.h Subst.cpp Subst.h TDB.cpp TDB.h Table.cpp Table.h
@ -17,7 +17,7 @@ set (task_SRCS API.cpp API.h Att.cpp Att.h Cmd.cpp Cmd.h Color.cpp Color.h
command.cpp custom.cpp dependency.cpp diag.cpp edit.cpp
export.cpp history.cpp i18n.h import.cpp interactive.cpp
recur.cpp report.cpp rules.cpp rx.cpp rx.h text.cpp text.h
util.cpp util.h Uri.cpp Uri.h)
utf8.cpp utf8.h util.cpp util.h Uri.cpp Uri.h)
add_library (task STATIC ${task_SRCS})
add_executable (task_executable main.cpp)

338
src/JSON.cpp Normal file
View file

@ -0,0 +1,338 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2010 - 2011, Paul Beckingham, Federico Hernandez.
// All rights reserved.
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software
// Foundation; either version 2 of the License, or (at your option) any later
// version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the
//
// Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor,
// Boston, MA
// 02110-1301
// USA
//
////////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <sstream>
#include <utf8.h>
#include <JSON.h>
////////////////////////////////////////////////////////////////////////////////
JSON::JSON ()
: root ("root")
{
}
////////////////////////////////////////////////////////////////////////////////
JSON::JSON (const std::string& input)
: root ("root")
{
Nibbler n (input);
if (!parseObject (&root, n))
throw std::string ("Syntax error in request.");
}
////////////////////////////////////////////////////////////////////////////////
JSON::~JSON ()
{
}
////////////////////////////////////////////////////////////////////////////////
// \n -> "\\n"
// \t -> "\\t"
std::string JSON::encode (const std::string& input)
{
std::string output;
for (std::string::size_type i = 0; i < input.length (); ++i)
{
switch (input[i])
{
// Simple translations.
case '"': output += "\\\""; break;
case '\\': output += "\\\\"; break;
case '/': output += "\\/"; break;
case '\b': output += "\\b"; break;
case '\f': output += "\\f"; break;
case '\n': output += "\\n"; break;
case '\r': output += "\\r"; break;
case '\t': output += "\\t"; break;
// Default NOP.
default: output += input[i]; break;
}
}
return output;
}
////////////////////////////////////////////////////////////////////////////////
std::string JSON::decode (const std::string& input)
{
std::string output;
for (unsigned int i = 0; i < input.length (); ++i)
{
if (input[i] == '\\')
{
++i;
switch (input[i])
{
// Simple translations.
case '"': output += '"'; break;
case '\\': output += '\\'; break;
case '/': output += '/'; break;
case 'b': output += '\b'; break;
case 'f': output += '\f'; break;
case 'n': output += '\n'; break;
case 'r': output += '\r'; break;
case 't': output += '\t'; break;
// Compose a UTF8 unicode character.
case 'u':
output += utf8_character (utf8_codepoint (input.substr (++i)));
i += 3;
break;
// If it is an unrecognized seqeence, do nothing.
default:
output += '\\';
output += input[i];
break;
}
}
else
output += input[i];
}
return output;
}
////////////////////////////////////////////////////////////////////////////////
Tree* JSON::tree ()
{
return &root;
}
////////////////////////////////////////////////////////////////////////////////
// object
// {}
// { pair , ... }
bool JSON::parseObject (Tree* t, Nibbler& nibbler)
{
Nibbler n (nibbler);
n.skipWS ();
if (n.skip ('{'))
{
n.skipWS ();
Tree* node = new Tree ("node");
if (parsePair (node, n))
{
t->addBranch (node);
n.skipWS ();
while (n.skip (','))
{
n.skipWS ();
node = new Tree ("node");
if (!parsePair (node, n))
{
delete node;
return false;
}
t->addBranch (node);
n.skipWS ();
}
}
else
delete node;
if (n.skip ('}'))
{
n.skipWS ();
nibbler = n;
t->attribute ("type", "collection");
return true;
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// pair
// string : value
bool JSON::parsePair (Tree* t, Nibbler& nibbler)
{
Nibbler n (nibbler);
std::string value;
if (n.getQuoted ('"', value))
{
n.skipWS ();
if (n.skip (':'))
{
n.skipWS ();
if (parseValue (t, n))
{
nibbler = n;
t->name (value);
return true;
}
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// array
// []
// [ value , ... ]
bool JSON::parseArray (Tree* t, Nibbler& nibbler)
{
Nibbler n (nibbler);
n.skipWS ();
if (n.skip ('['))
{
n.skipWS ();
Tree* node = new Tree ("node");
if (parseValue (node, n))
{
t->addBranch (node);
n.skipWS ();
while (n.skip (','))
{
n.skipWS ();
node = new Tree ("node");
if (!parseValue (node, n))
{
delete node;
return false;
}
t->addBranch (node);
n.skipWS ();
}
}
else
delete node;
if (n.skip (']'))
{
n.skipWS ();
nibbler = n;
t->attribute ("type", "list");
return true;
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// value
// string
// number
// object
// array
// true
// false
// null
bool JSON::parseValue (Tree* t, Nibbler& nibbler)
{
if (parseString (t, nibbler) ||
parseNumber (t, nibbler) ||
parseObject (t, nibbler) ||
parseArray (t, nibbler) ||
nibbler.getLiteral ("true") ||
nibbler.getLiteral ("false") ||
nibbler.getLiteral ("null"))
{
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// string
// ""
// " chars "
//
// chars
// char
// char chars
//
// char
// any-Unicode-character-except-"-or-\-or-control-character
// \"
// \\ [extra text to de-confuse gcc]
// \/
// \b
// \f
// \n
// \r
// \t
// \u four-hex-digits
bool JSON::parseString (Tree* t, Nibbler& nibbler)
{
std::string value;
if (nibbler.getQuoted ('"', value, false))
{
t->attribute ("type", "string");
t->attribute ("value", value);
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// number
// int frac exp
// int frac
// int exp
// int
bool JSON::parseNumber (Tree* t, Nibbler& nibbler)
{
int i;
double d;
if (nibbler.getNumber (d))
{
t->attribute ("type", "number");
t->attribute ("value", d);
return true;
}
else if (nibbler.getInt (i))
{
t->attribute ("type", "number");
t->attribute ("value", i);
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////

61
src/JSON.h Normal file
View file

@ -0,0 +1,61 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2010 - 2011, Paul Beckingham, Federico Hernandez.
// All rights reserved.
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software
// Foundation; either version 2 of the License, or (at your option) any later
// version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the
//
// Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor,
// Boston, MA
// 02110-1301
// USA
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_JSON
#define INCLUDED_JSON
#include <string>
#include <Tree.h>
#include <Nibbler.h>
class JSON
{
public:
JSON (); // Default constructor
JSON (const std::string&); // Constructor
JSON (const JSON&); // Copy constructor
JSON& operator= (const JSON&); // Assignment operator
~JSON (); // Destructor
static std::string encode (const std::string&);
static std::string decode (const std::string&);
Tree* tree ();
private:
bool parseObject (Tree*, Nibbler&);
bool parsePair (Tree*, Nibbler&);
bool parseArray (Tree*, Nibbler&);
bool parseValue (Tree*, Nibbler&);
bool parseString (Tree*, Nibbler&);
bool parseNumber (Tree*, Nibbler&);
private:
Tree root;
};
#endif
////////////////////////////////////////////////////////////////////////////////

View file

@ -26,11 +26,11 @@
////////////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <ctype.h>
#include "Nibbler.h"
#include "rx.h"
#include <inttypes.h>
#include <Nibbler.h>
#include <rx.h>
const char* c_digits = "0123456789";
@ -283,7 +283,7 @@ bool Nibbler::getInt (int& result)
if (i > mCursor)
{
result = atoi (mInput.substr (mCursor, i - mCursor).c_str ());
result = strtoimax (mInput.substr (mCursor, i - mCursor).c_str (), NULL, 10);
mCursor = i;
return true;
}
@ -328,7 +328,7 @@ bool Nibbler::getUnsignedInt (int& result)
if (i > mCursor)
{
result = atoi (mInput.substr (mCursor, i - mCursor).c_str ());
result = strtoimax (mInput.substr (mCursor, i - mCursor).c_str (), NULL, 10);
mCursor = i;
return true;
}
@ -392,7 +392,7 @@ bool Nibbler::getNumber (double& result)
while (i < mLength && isdigit (mInput[i]))
++i;
result = atof (mInput.substr (mCursor, i - mCursor).c_str ());
result = strtof (mInput.substr (mCursor, i - mCursor).c_str (), NULL);
mCursor = i;
return true;
}
@ -400,7 +400,7 @@ bool Nibbler::getNumber (double& result)
return false;
}
result = atof (mInput.substr (mCursor, i - mCursor).c_str ());
result = strtof (mInput.substr (mCursor, i - mCursor).c_str (), NULL);
mCursor = i;
return true;
}

View file

@ -47,6 +47,10 @@ public:
bool getUntilEOL (std::string&);
bool getUntilEOS (std::string&);
/*
bool getAllOneOf (const std::string&, std::string&);
*/
bool getQuoted (char, std::string&, bool quote = false);
bool getInt (int&);
bool getHex (int&);

View file

@ -27,9 +27,8 @@
#include <algorithm>
#include <iostream>
#include <sstream>
#include "text.h"
#include "Tree.h"
#include <text.h>
#include <Tree.h>
////////////////////////////////////////////////////////////////////////////////
// - Tree, Branch and Node are synonymous.
@ -118,6 +117,12 @@ int Tree::branches ()
return _branches.size ();
}
////////////////////////////////////////////////////////////////////////////////
void Tree::name (const std::string& name)
{
_name = name;
}
////////////////////////////////////////////////////////////////////////////////
std::string Tree::name () const
{
@ -135,9 +140,14 @@ void Tree::attribute (const std::string& name, const std::string& value)
// Accessor for attributes.
void Tree::attribute (const std::string& name, const int value)
{
std::stringstream s;
s << value;
_attributes[name] = s.str ();
_attributes[name] = format (value);
}
////////////////////////////////////////////////////////////////////////////////
// Accessor for attributes.
void Tree::attribute (const std::string& name, const double value)
{
_attributes[name] = format (value, 1, 8);
}
////////////////////////////////////////////////////////////////////////////////

View file

@ -47,9 +47,11 @@ public:
void replaceBranch (Tree*, Tree*);
int branches ();
void name (const std::string&);
std::string name () const;
void attribute (const std::string&, const std::string&);
void attribute (const std::string&, const int);
void attribute (const std::string&, const double);
std::string attribute (const std::string&);
void removeAttribute (const std::string&);
int attributes () const;

View file

@ -1,486 +1,7 @@
#ifdef NOPE
/*
Basic UTF-8 manipulation routines
by Jeff Bezanson
placed in the public domain Fall 2005
This code is designed to provide the utilities you need to manipulate
UTF-8 as an internal string encoding. These functions do not perform the
error checking normally needed when handling UTF-8 data, so if you happen
to be from the Unicode Consortium you will want to flay me alive.
I do this because error checking can be performed at the boundaries (I/O),
with these routines reserved for higher performance on data known to be
valid.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#ifdef WIN32
#include <malloc.h>
#else
#include <alloca.h>
#endif
#include "utf8.h"
static const u_int32_t offsetsFromUTF8[6] = {
0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL
};
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
/* returns length of next utf-8 sequence */
int u8_seqlen(char *s)
{
return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1;
}
/* conversions without error checking
only works for valid UTF-8, i.e. no 5- or 6-byte sequences
srcsz = source size in bytes, or -1 if 0-terminated
sz = dest size in # of wide characters
returns # characters converted
dest will always be L'\0'-terminated, even if there isn't enough room
for all the characters.
if sz = srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
*/
int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz)
{
u_int32_t ch;
char *src_end = src + srcsz;
int nb;
int i=0;
while (i < sz-1) {
nb = trailingBytesForUTF8[(unsigned char)*src];
if (srcsz == -1) {
if (*src == 0)
goto done_toucs;
}
else {
if (src + nb >= src_end)
goto done_toucs;
}
ch = 0;
switch (nb) {
/* these fall through deliberately */
case 3: ch += (unsigned char)*src++; ch <<= 6;
case 2: ch += (unsigned char)*src++; ch <<= 6;
case 1: ch += (unsigned char)*src++; ch <<= 6;
case 0: ch += (unsigned char)*src++;
}
ch -= offsetsFromUTF8[nb];
dest[i++] = ch;
}
done_toucs:
dest[i] = 0;
return i;
}
/* srcsz = number of source characters, or -1 if 0-terminated
sz = size of dest buffer in bytes
returns # characters converted
dest will only be '\0'-terminated if there is enough space. this is
for consistency; imagine there are 2 bytes of space left, but the next
character requires 3 bytes. in this case we could NUL-terminate, but in
general we can't when there's insufficient space. therefore this function
only NUL-terminates if all the characters fit, and there's space for
the NUL as well.
the destination string will never be bigger than the source string.
*/
int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz)
{
u_int32_t ch;
int i = 0;
char *dest_end = dest + sz;
while (srcsz<0 ? src[i]!=0 : i < srcsz) {
ch = src[i];
if (ch < 0x80) {
if (dest >= dest_end)
return i;
*dest++ = (char)ch;
}
else if (ch < 0x800) {
if (dest >= dest_end-1)
return i;
*dest++ = (ch>>6) | 0xC0;
*dest++ = (ch & 0x3F) | 0x80;
}
else if (ch < 0x10000) {
if (dest >= dest_end-2)
return i;
*dest++ = (ch>>12) | 0xE0;
*dest++ = ((ch>>6) & 0x3F) | 0x80;
*dest++ = (ch & 0x3F) | 0x80;
}
else if (ch < 0x110000) {
if (dest >= dest_end-3)
return i;
*dest++ = (ch>>18) | 0xF0;
*dest++ = ((ch>>12) & 0x3F) | 0x80;
*dest++ = ((ch>>6) & 0x3F) | 0x80;
*dest++ = (ch & 0x3F) | 0x80;
}
i++;
}
if (dest < dest_end)
*dest = '\0';
return i;
}
int u8_wc_toutf8(char *dest, u_int32_t ch)
{
if (ch < 0x80) {
dest[0] = (char)ch;
return 1;
}
if (ch < 0x800) {
dest[0] = (ch>>6) | 0xC0;
dest[1] = (ch & 0x3F) | 0x80;
return 2;
}
if (ch < 0x10000) {
dest[0] = (ch>>12) | 0xE0;
dest[1] = ((ch>>6) & 0x3F) | 0x80;
dest[2] = (ch & 0x3F) | 0x80;
return 3;
}
if (ch < 0x110000) {
dest[0] = (ch>>18) | 0xF0;
dest[1] = ((ch>>12) & 0x3F) | 0x80;
dest[2] = ((ch>>6) & 0x3F) | 0x80;
dest[3] = (ch & 0x3F) | 0x80;
return 4;
}
return 0;
}
/* charnum => byte offset */
int u8_offset(char *str, int charnum)
{
int offs=0;
while (charnum > 0 && str[offs]) {
(void)(isutf(str[++offs]) || isutf(str[++offs]) ||
isutf(str[++offs]) || ++offs);
charnum--;
}
return offs;
}
/* byte offset => charnum */
int u8_charnum(char *s, int offset)
{
int charnum = 0, offs=0;
while (offs < offset && s[offs]) {
(void)(isutf(s[++offs]) || isutf(s[++offs]) ||
isutf(s[++offs]) || ++offs);
charnum++;
}
return charnum;
}
/* number of characters */
int u8_strlen(char *s)
{
int count = 0;
int i = 0;
while (u8_nextchar(s, &i) != 0)
count++;
return count;
}
/* reads the next utf-8 sequence out of a string, updating an index */
u_int32_t u8_nextchar(char *s, int *i)
{
u_int32_t ch = 0;
int sz = 0;
do {
ch <<= 6;
ch += (unsigned char)s[(*i)++];
sz++;
} while (s[*i] && !isutf(s[*i]));
ch -= offsetsFromUTF8[sz-1];
return ch;
}
void u8_inc(char *s, int *i)
{
(void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) ||
isutf(s[++(*i)]) || ++(*i));
}
void u8_dec(char *s, int *i)
{
(void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) ||
isutf(s[--(*i)]) || --(*i));
}
int octal_digit(char c)
{
return (c >= '0' && c <= '7');
}
int hex_digit(char c)
{
return ((c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f'));
}
/* assumes that src points to the character after a backslash
returns number of input characters processed */
int u8_read_escape_sequence(char *str, u_int32_t *dest)
{
u_int32_t ch;
char digs[9]="\0\0\0\0\0\0\0\0";
int dno=0, i=1;
ch = (u_int32_t)str[0]; /* take literal character */
if (str[0] == 'n')
ch = L'\n';
else if (str[0] == 't')
ch = L'\t';
else if (str[0] == 'r')
ch = L'\r';
else if (str[0] == 'b')
ch = L'\b';
else if (str[0] == 'f')
ch = L'\f';
else if (str[0] == 'v')
ch = L'\v';
else if (str[0] == 'a')
ch = L'\a';
else if (octal_digit(str[0])) {
i = 0;
do {
digs[dno++] = str[i++];
} while (octal_digit(str[i]) && dno < 3);
ch = strtol(digs, NULL, 8);
}
else if (str[0] == 'x') {
while (hex_digit(str[i]) && dno < 2) {
digs[dno++] = str[i++];
}
if (dno > 0)
ch = strtol(digs, NULL, 16);
}
else if (str[0] == 'u') {
while (hex_digit(str[i]) && dno < 4) {
digs[dno++] = str[i++];
}
if (dno > 0)
ch = strtol(digs, NULL, 16);
}
else if (str[0] == 'U') {
while (hex_digit(str[i]) && dno < 8) {
digs[dno++] = str[i++];
}
if (dno > 0)
ch = strtol(digs, NULL, 16);
}
*dest = ch;
return i;
}
/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
example: u8_unescape(mybuf, 256, "hello\\u220e")
note the double backslash is needed if called on a C string literal */
int u8_unescape(char *buf, int sz, char *src)
{
int c=0, amt;
u_int32_t ch;
char temp[4];
while (*src && c < sz) {
if (*src == '\\') {
src++;
amt = u8_read_escape_sequence(src, &ch);
}
else {
ch = (u_int32_t)*src;
amt = 1;
}
src += amt;
amt = u8_wc_toutf8(temp, ch);
if (amt > sz-c)
break;
memcpy(&buf[c], temp, amt);
c += amt;
}
if (c < sz)
buf[c] = '\0';
return c;
}
int u8_escape_wchar(char *buf, int sz, u_int32_t ch)
{
if (ch == L'\n')
return snprintf(buf, sz, "\\n");
else if (ch == L'\t')
return snprintf(buf, sz, "\\t");
else if (ch == L'\r')
return snprintf(buf, sz, "\\r");
else if (ch == L'\b')
return snprintf(buf, sz, "\\b");
else if (ch == L'\f')
return snprintf(buf, sz, "\\f");
else if (ch == L'\v')
return snprintf(buf, sz, "\\v");
else if (ch == L'\a')
return snprintf(buf, sz, "\\a");
else if (ch == L'\\')
return snprintf(buf, sz, "\\\\");
else if (ch < 32 || ch == 0x7f)
return snprintf(buf, sz, "\\x%hhX", (unsigned char)ch);
else if (ch > 0xFFFF)
return snprintf(buf, sz, "\\U%.8X", (u_int32_t)ch);
else if (ch >= 0x80 && ch <= 0xFFFF)
return snprintf(buf, sz, "\\u%.4hX", (unsigned short)ch);
return snprintf(buf, sz, "%c", (char)ch);
}
int u8_escape(char *buf, int sz, char *src, int escape_quotes)
{
int c=0, i=0, amt;
while (src[i] && c < sz) {
if (escape_quotes && src[i] == '"') {
amt = snprintf(buf, sz - c, "\\\"");
i++;
}
else {
amt = u8_escape_wchar(buf, sz - c, u8_nextchar(src, &i));
}
c += amt;
buf += amt;
}
if (c < sz)
*buf = '\0';
return c;
}
char *u8_strchr(char *s, u_int32_t ch, int *charn)
{
int i = 0, lasti=0;
u_int32_t c;
*charn = 0;
while (s[i]) {
c = u8_nextchar(s, &i);
if (c == ch) {
return &s[lasti];
}
lasti = i;
(*charn)++;
}
return NULL;
}
char *u8_memchr(char *s, u_int32_t ch, size_t sz, int *charn)
{
int i = 0, lasti=0;
u_int32_t c;
int csz;
*charn = 0;
while (i < sz) {
c = csz = 0;
do {
c <<= 6;
c += (unsigned char)s[i++];
csz++;
} while (i < sz && !isutf(s[i]));
c -= offsetsFromUTF8[csz-1];
if (c == ch) {
return &s[lasti];
}
lasti = i;
(*charn)++;
}
return NULL;
}
int u8_is_locale_utf8(char *locale)
{
/* this code based on libutf8 */
const char* cp = locale;
for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++) {
if (*cp == '.') {
const char* encoding = ++cp;
for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++)
;
if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5))
|| (cp-encoding == 4 && !strncmp(encoding, "utf8", 4)))
return 1; /* it's UTF-8 */
break;
}
}
return 0;
}
int u8_vprintf(char *fmt, va_list ap)
{
int cnt, sz=0;
char *buf;
u_int32_t *wcs;
sz = 512;
buf = (char*)alloca(sz);
try_print:
cnt = vsnprintf(buf, sz, fmt, ap);
if (cnt >= sz) {
buf = (char*)alloca(cnt - sz + 1);
sz = cnt + 1;
goto try_print;
}
wcs = (u_int32_t*)alloca((cnt+1) * sizeof(u_int32_t));
cnt = u8_toucs(wcs, cnt+1, buf, cnt);
printf("%ls", (wchar_t*)wcs);
return cnt;
}
int u8_printf(char *fmt, ...)
{
int cnt;
va_list args;
va_start(args, fmt);
cnt = u8_vprintf(fmt, args);
va_end(args);
return cnt;
}
#endif
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006 - 2011, Paul Beckingham, Federico Hernandez.
// Copyright 2010 - 2011, Paul Beckingham, Federico Hernandez.
// All rights reserved.
//
// This program is free software; you can redistribute it and/or modify it under
@ -504,42 +25,145 @@ int u8_printf(char *fmt, ...)
//
////////////////////////////////////////////////////////////////////////////////
#include "utf8.h"
static const u_int32_t offsetsFromUTF8[6] =
{
0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL,
};
#include <string>
#include <utf8.h>
////////////////////////////////////////////////////////////////////////////////
// number of characters in a string.
int utf8_length (const std::string& s)
{
int count = 0;
int i = 0;
while (utf8_nextchar (s.c_str (), &i) != 0)
count++;
return count;
}
// Converts '0' -> 0
// '9' -> 9
// 'a'/'A' -> 10
// 'f'/'F' -> 15
#define XDIGIT(x) ((x) >= '0' && (x) <= '9' ? ((x) - '0') : \
(x) >= 'a' && (x) <= 'f' ? ((x) + 10 - 'a') : \
(x) >= 'A' && (x) <= 'F' ? ((x) + 10 - 'A') : 0)
////////////////////////////////////////////////////////////////////////////////
// reads the next utf-8 sequence out of a string, updating an index.
u_int32_t utf8_nextchar (const char* s, int* i)
// Note: Assumes 4-digit hex codepoints:
// xxxx
// \uxxxx
// U+xxxx
unsigned int utf8_codepoint (const std::string& input)
{
u_int32_t ch = 0;
int sz = 0;
unsigned int codepoint = 0;
int length = input.length ();
do
// U+xxxx, \uxxxx
if (length >= 6 &&
((input[0] == 'U' && input[1] == '+') ||
(input[0] == '\\' && input[1] == 'u')))
{
ch <<= 6;
ch += (unsigned char) s[(*i)++];
sz++;
codepoint = XDIGIT (input[2]) << 12 |
XDIGIT (input[3]) << 8 |
XDIGIT (input[4]) << 4 |
XDIGIT (input[5]);
}
while (s[*i] && ! isutf (s[*i]));
else if (length >= 4)
{
codepoint = XDIGIT (input[0]) << 12 |
XDIGIT (input[1]) << 8 |
XDIGIT (input[2]) << 4 |
XDIGIT (input[3]);
}
else
throw std::string ("Invalid codepoint representation.");
return ch - offsetsFromUTF8[sz - 1];
return codepoint;
}
////////////////////////////////////////////////////////////////////////////////
// Iterates along a UTF8 string.
unsigned int utf8_next_char (const std::string& input, std::string::size_type& i)
{
// How many bytes in the sequence?
int length = utf8_sequence (input[i]);
// 0xxxxxxx -> 0xxxxxxx
if (length == 1)
return input[i++];
// 110yyyyy 10xxxxxx -> 00000yyy yyxxxxxx
if (length == 2)
return ((input[i++] & 0x1F) << 6) +
(input[i++] & 0x3F);
// 1110zzzz 10yyyyyy 10xxxxxx -> zzzzyyyy yyxxxxxx
if (length == 3)
return ((input[i++] & 0xF) << 12) +
((input[i++] & 0x3F) << 6) +
(input[i++] & 0x3F);
// 11110www 10zzzzzz 10yyyyyy 10xxxxxx -> 000wwwzz zzzzyyyy yyxxxxxx
if (length == 4)
return ((input[i++] & 0x7) << 18) +
((input[i++] & 0x3F) << 12) +
((input[i++] & 0x3F) << 6) +
(input[i++] & 0x3F);
// Default: pretend as though it's a single character.
// TODO Or should this throw?
return input[i++];
}
////////////////////////////////////////////////////////////////////////////////
// http://en.wikipedia.org/wiki/UTF-8
std::string utf8_character (unsigned int codepoint)
{
char sequence[5];
// 0xxxxxxx -> 0xxxxxxx
if (codepoint < 0x80)
{
sequence[0] = codepoint;
sequence[1] = 0;
}
// 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx
else if (codepoint < 0x800)
{
sequence[0] = 0xC0 | (codepoint & 0x7C0) >> 6;
sequence[1] = 0x80 | (codepoint & 0x3F);
sequence[2] = 0;
}
// zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
else if (codepoint < 0x10000)
{
sequence[0] = 0xE0 | (codepoint & 0xF000) >> 12;
sequence[1] = 0x80 | (codepoint & 0xFC0) >> 6;
sequence[2] = 0x80 | (codepoint & 0x3F);
sequence[3] = 0;
}
// 000wwwzz zzzzyyyy yyxxxxxx -> 11110www 10zzzzzz 10yyyyyy 10xxxxxx
else if (codepoint < 0x110000)
{
sequence[0] = 0xF0 | (codepoint & 0x1C0000) >> 18;
sequence[1] = 0x80 | (codepoint & 0x03F000) >> 12;
sequence[2] = 0x80 | (codepoint & 0x0FC0) >> 6;
sequence[3] = 0x80 | (codepoint & 0x3F);
sequence[4] = 0;
}
else
throw std::string ("Invalid Unicode codepoint.");
sequence[4] = '\0';
return std::string (sequence);
}
////////////////////////////////////////////////////////////////////////////////
int utf8_sequence (unsigned int character)
{
if ((character & 0xE0) == 0xC0)
return 2;
if ((character & 0xF0) == 0xE0)
return 3;
if ((character & 0xF8) == 0xF0)
return 4;
return 1;
}
////////////////////////////////////////////////////////////////////////////////

View file

@ -1,81 +1,7 @@
#ifdef NOPE
#include <stdarg.h>
/* is c the start of a utf8 sequence? */
#define isutf(c) (((c)&0xC0)!=0x80)
/* convert UTF-8 data to wide character */
int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz);
/* the opposite conversion */
int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz);
/* single character to UTF-8 */
int u8_wc_toutf8(char *dest, u_int32_t ch);
/* character number to byte offset */
int u8_offset(char *str, int charnum);
/* byte offset to character number */
int u8_charnum(char *s, int offset);
/* return next character, updating an index variable */
u_int32_t u8_nextchar(char *s, int *i);
/* move to next character */
void u8_inc(char *s, int *i);
/* move to previous character */
void u8_dec(char *s, int *i);
/* returns length of next utf-8 sequence */
int u8_seqlen(char *s);
/* assuming src points to the character after a backslash, read an
escape sequence, storing the result in dest and returning the number of
input characters processed */
int u8_read_escape_sequence(char *src, u_int32_t *dest);
/* given a wide character, convert it to an ASCII escape sequence stored in
buf, where buf is "sz" bytes. returns the number of characters output. */
int u8_escape_wchar(char *buf, int sz, u_int32_t ch);
/* convert a string "src" containing escape sequences to UTF-8 */
int u8_unescape(char *buf, int sz, char *src);
/* convert UTF-8 "src" to ASCII with escape sequences.
if escape_quotes is nonzero, quote characters will be preceded by
backslashes as well. */
int u8_escape(char *buf, int sz, char *src, int escape_quotes);
/* utility predicates used by the above */
int octal_digit(char c);
int hex_digit(char c);
/* return a pointer to the first occurrence of ch in s, or NULL if not
found. character index of found character returned in *charn. */
char *u8_strchr(char *s, u_int32_t ch, int *charn);
/* same as the above, but searches a buffer of a given size instead of
a NUL-terminated string. */
char *u8_memchr(char *s, u_int32_t ch, size_t sz, int *charn);
/* count the number of characters in a UTF-8 string */
int u8_strlen(char *s);
int u8_is_locale_utf8(char *locale);
/* printf where the format string and arguments may be in UTF-8.
you can avoid this function and just use ordinary printf() if the current
locale is UTF-8. */
int u8_vprintf(char *fmt, va_list ap);
int u8_printf(char *fmt, ...);
#endif
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006 - 2011, Paul Beckingham, Federico Hernandez.
// Copyright 2010 - 2011, Paul Beckingham, Federico Hernandez.
// All rights reserved.
//
// This program is free software; you can redistribute it and/or modify it under
@ -103,11 +29,10 @@ int u8_printf(char *fmt, ...);
#include <string>
// is c the start of a utf8 sequence?
#define isutf(c) (((c)&0xC0)!=0x80)
int utf8_length (const std::string&);
u_int32_t utf8_nextchar (const char*, int*);
unsigned int utf8_codepoint (const std::string&);
unsigned int utf8_next_char (const std::string&, std::string::size_type&);
std::string utf8_character (unsigned int);
int utf8_sequence (unsigned int);
#endif
////////////////////////////////////////////////////////////////////////////////

1
test/.gitignore vendored
View file

@ -11,6 +11,7 @@ duration.t
file.t
filt.t
grid.t
json.t
lisp.t
list.t
nibbler.t

View file

@ -6,7 +6,7 @@ include_directories (${CMAKE_SOURCE_DIR}/src
set (test_SRCS date.t t.t tdb.t duration.t t.benchmark.t text.t autocomplete.t seq.t
record.t att.t stringtable.t subst.t nibbler.t filt.t cmd.t config.t
util.t color.t list.t path.t file.t grid.t directory.t rx.t taskmod.t
lisp.t rectangle.t sensor.t tree.t tree2.t uri.t)
lisp.t rectangle.t sensor.t tree.t tree2.t uri.t json.t)
add_custom_target (test ./run_all DEPENDS ${test_SRCS}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/test)

145
test/json.t.cpp Normal file
View file

@ -0,0 +1,145 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006 - 2011, Paul Beckingham.
// All rights reserved.
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software
// Foundation; either version 2 of the License, or (at your option) any later
// version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the
//
// Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor,
// Boston, MA
// 02110-1301
// USA
//
////////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <JSON.h>
#include <test.h>
#include <Context.h>
Context context;
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv)
{
UnitTest t (14);
try
{
// Basic parsing tests.
std::string input = "{}";
std::cout << "-- j1 -------------------\n"
<< "input: " << input << "\n";
JSON j1 (input);
j1.tree ()->dump ();
input = "{\"name\":123}";
std::cout << "-- j2 -------------------\n"
<< "input: " << input << "\n";
JSON j2 (input);
j2.tree ()->dump ();
input = "{\"name\":123, \"array\":[1,2,3.4], \"map\":{\"m1\":\"v1\", \"m2\":\"v2\"}}";
std::cout << "-- j3 -------------------\n"
<< "input: " << input << "\n";
JSON j3 (input);
j3.tree ()->dump ();
// Sample ticket as a parsing test.
input = "{\n"
"\"ticket\": { \"type\":\"add\", \"client\":\"taskwarrior 2.x\"},\n"
"\"auth\": { \"user\":\"paul\", \"org\":\"gbf\", \"key\":\".........\",\n"
" \"locale\":\"en-US\" },\n"
"\n"
"\"add\": { \"description\":\"Wash the dog\",\n"
" \"project\":\"home\",\n"
" \"due\":\"20101101T000000Z\" }\n"
"}";
std::cout << "-- j4 -------------------\n"
<< "input: " << input << "\n";
JSON j4 (input);
j4.tree ()->dump ();
std::cout << "-------------------------\n";
// Regular unit tests.
t.is (JSON::encode ("1\b2"), "1\\b2", "JSON::encode \\b -> \\\\b");
t.is (JSON::decode ("1\\b2"), "1\b2", "JSON::decode \\\\b -> \\b");
t.is (JSON::encode ("1\n2"), "1\\n2", "JSON::encode \\n -> \\\\n");
t.is (JSON::decode ("1\\n2"), "1\n2", "JSON::decode \\\\n -> \\n");
t.is (JSON::encode ("1\r2"), "1\\r2", "JSON::encode \\r -> \\\\r");
t.is (JSON::decode ("1\\r2"), "1\r2", "JSON::decode \\\\r -> \\r");
t.is (JSON::encode ("1\t2"), "1\\t2", "JSON::encode \\t -> \\\\t");
t.is (JSON::decode ("1\\t2"), "1\t2", "JSON::decode \\\\t -> \\t");
t.is (JSON::encode ("1\\2"), "1\\\\2", "JSON::encode \\ -> \\\\");
t.is (JSON::decode ("1\\\\2"), "1\\2", "JSON::decode \\\\ -> \\");
t.is (JSON::encode ("1\x2"), "1\x2", "JSON::encode \\x -> \\x (NOP)");
t.is (JSON::decode ("1\x2"), "1\x2", "JSON::decode \\x -> \\x (NOP)");
t.is (JSON::encode ("1€2"), "1€2", "JSON::encode € -> €");
t.is (JSON::decode ("1\\u20ac2"), "1€2", "JSON::decode \\u20ac -> €");
/*
{
"ticket":
{
"type":"synch",
"client":"taskd-test-suite 1.0"
},
"synch":
{
"user":
{
"data":
[
{
"uuid":"11111111-1111-1111-1111-111111111111",
"status":"pending",
"description":"This is a test",
"entry":"20110111T124000Z"
}
],
"synch":"key"
}
},
"auth":
{
"org":"gbf",
"user":"Paul Beckingham",
"key":"K",
"locale":"en-US"
}
}
*/
input = "{\"ticket\":{\"type\":\"synch\",\"client\":\"taskd-test-suite 1.0\"},\"synch\":{\"user\":{\"data\":[{\"uuid\":\"11111111-1111-1111-1111-111111111111\",\"status\":\"pending\",\"description\":\"This is a test\",\"entry\":\"20110111T124000Z\"}],\"synch\":\"key\"}},\"auth\":{\"org\":\"gbf\",\"user\":\"Paul Beckingham\",\"key\":\"K\",\"locale\":\"en-US\"}}";
std::cout << "-- j4 -------------------\n"
<< "input: " << input << "\n";
JSON j5 (input);
j5.tree ()->dump ();
}
catch (std::string& e) {t.diag (e);}
return 0;
}
////////////////////////////////////////////////////////////////////////////////

View file

@ -244,7 +244,7 @@ int main (int argc, char** argv)
t.diag ("Nibbler::getNumber");
n = Nibbler ("-1.234 2.3e4");
t.ok (n.getNumber (d), "'-1.234 2.3e4' : getNumber () -> true");
t.is (d, -1.234, "'-1.234 2.3e4' : getNumber () -> '-1.234'");
t.is (d, -1.234, 0.000001, "'-1.234 2.3e4' : getNumber () -> '-1.234'");
t.ok (n.skip (' '), " ' 2.3e4' : skip (' ') -> true");
t.ok (n.getNumber (d), " '2.3e4' : getNumber () -> true");
t.is (d, 2.3e4, " '2.3e4' : getNumber () -> '2.3e4'");