diff --git a/ChangeLog b/ChangeLog index bf53c4bf2..407e21d79 100644 --- a/ChangeLog +++ b/ChangeLog @@ -31,6 +31,7 @@ - TW-1733 taskwarrior 2.5.0 can not compile FreeBSD 10.1 (thanks to ribbon). - TW-1735 context with no subcommand should do something (thanks to Simon Michael). +- TW-1736 Error on detection of BOM in files. - TW-1738 add defined languages JAPANESE (thanks to ribbon). - TW-1741 Warning "ignoring return value of ‘int ftruncate" while doing make on xubuntu15.10 (thanks to Sunil Joshi). diff --git a/doc/man/task.1.in b/doc/man/task.1.in index 4622fced0..cddf6b6d3 100644 --- a/doc/man/task.1.in +++ b/doc/man/task.1.in @@ -1212,6 +1212,9 @@ intact, so you can use: $ task add project:\\'Three Word Project\\' description .RE +Taskwarrior supports Unicode using only the UTF8 encoding, with no Byte Order +Marks in the data files. + .SH CONFIGURATION FILE AND OVERRIDE OPTIONS Taskwarrior stores its configuration in a file in the user's home directory: ~/.taskrc. The default configuration file can be overridden with: diff --git a/src/FS.cpp b/src/FS.cpp index f7589d907..88e4d3f69 100644 --- a/src/FS.cpp +++ b/src/FS.cpp @@ -375,6 +375,17 @@ bool File::remove () const return unlink (_data.c_str ()) == 0 ? true : false; } +//////////////////////////////////////////////////////////////////////////////// +std::string File::removeBOM (const std::string& input) +{ + if (input[0] && input[0] == '\xEF' && + input[1] && input[1] == '\xBB' && + input[2] && input[2] == '\xBF') + return input.substr (3); + + return input; +} + //////////////////////////////////////////////////////////////////////////////// bool File::open () { @@ -457,10 +468,20 @@ void File::read (std::string& contents) std::ifstream in (_data.c_str ()); if (in.good ()) { + bool first = true; std::string line; line.reserve (512 * 1024); while (getline (in, line)) + { + // Detect forbidden BOM on first line. + if (first) + { + line = File::removeBOM (line); + first = false; + } + contents += line + "\n"; + } in.close (); } @@ -475,10 +496,20 @@ void File::read (std::vector & contents) std::ifstream in (_data.c_str ()); if (in.good ()) { + bool first = true; std::string line; line.reserve (512 * 1024); while (getline (in, line)) + { + // Detect forbidden BOM on first line. + if (first) + { + line = File::removeBOM (line); + first = false; + } + contents.push_back (line); + } in.close (); } @@ -627,10 +658,20 @@ bool File::read (const std::string& name, std::string& contents) std::ifstream in (name.c_str ()); if (in.good ()) { + bool first = true; std::string line; line.reserve (1024); while (getline (in, line)) + { + // Detect forbidden BOM on first line. + if (first) + { + line = File::removeBOM (line); + first = false; + } + contents += line + "\n"; + } in.close (); return true; @@ -647,10 +688,20 @@ bool File::read (const std::string& name, std::vector & contents) std::ifstream in (name.c_str ()); if (in.good ()) { + bool first = true; std::string line; line.reserve (1024); while (getline (in, line)) + { + // Detect forbidden BOM on first line. + if (first) + { + line = File::removeBOM (line); + first = false; + } + contents.push_back (line); + } in.close (); return true; diff --git a/src/FS.h b/src/FS.h index bd7a8a077..db873ecf4 100644 --- a/src/FS.h +++ b/src/FS.h @@ -107,6 +107,7 @@ public: static bool write (const std::string&, const std::string&); static bool write (const std::string&, const std::vector &, bool addNewlines = true); static bool remove (const std::string&); + static std::string removeBOM (const std::string&); private: FILE* _fh; diff --git a/test/fs.t.cpp b/test/fs.t.cpp index 5554bd40a..2a573c545 100644 --- a/test/fs.t.cpp +++ b/test/fs.t.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -35,7 +36,7 @@ Context context; int main (int, char**) { - UnitTest t (112); + UnitTest t (116); // Ensure environment has no influence. unsetenv ("TASKDATA"); @@ -291,6 +292,19 @@ int main (int, char**) tmp.remove (); t.notok (tmp.exists (), "tmp dir removed."); + // File::removeBOM + std::string line = "Should not be modified."; + t.is (File::removeBOM (line), line, "File::removeBOM 'Should not be modified' --> 'Should not be modified'"); + + line = "no"; + t.is (File::removeBOM (line), line, "File::removeBOM 'no' --> 'no'"); + + line = ""; + t.is (File::removeBOM (line), line, "File::removeBOM '' --> ''"); + + line = {'\xEF', '\xBB', '\xBF', 'F', 'o', 'o'}; + t.is (File::removeBOM (line), "Foo", "File::removeBOM 'Foo' --> 'Foo'"); + return 0; }