L10N Utility

- Combined three l10n utiity scripts into one, which can now do the following:
  - Automatically locate all the localized string files.
  - Look for missing strings in the localized string files.
  - Point out strings that are not used in the code.
  - Count strings.
  - Warn when strings are unchanged across languages.
  - Exit with a status code, so it can be used in unit tests.
- Thanks to Wim Schuermann and Fidel Mato for providing code and ideas.
This commit is contained in:
Paul Beckingham 2013-04-14 11:29:46 -04:00
parent 09f577536a
commit f0048395fb
4 changed files with 176 additions and 116 deletions

172
scripts/utils/l10n Executable file
View file

@ -0,0 +1,172 @@
#! /usr/bin/env python -tt
################################################################################
## taskwarrior - a command line task list manager.
##
## Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to deal
## in the Software without restriction, including without limitation the rights
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
## copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included
## in all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
## OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
##
## http://www.opensource.org/licenses/mit-license.php
##
################################################################################
from __future__ import print_function
from __future__ import unicode_literals
import os
import sys
import argparse
import re
import fnmatch
def find_localizations(source):
'''Finds all [a-z][a-z]-[A-Z][A-Z].h files in the source tree.'''
found = []
for path, dirs, files in os.walk(source, topdown=True, onerror=None, followlinks=False):
found.extend(map(lambda x: os.path.join(path, x),
fnmatch.filter(files, '[a-z][a-z]-[A-Z][A-Z].h')))
return found
def read_file(translations, file):
'''Reads all the localized strings from a file.'''
translations[file] = {}
with open(file, 'r') as fh:
for match in re.findall(r'^\s*#define\s+(STRING_[^\s]+)(\s|\\)+"([^"]*)"', fh.read(), re.MULTILINE):
translations[file][match[0]] = match[2]
def is_present(translations, file, string):
'''Determines if the string is defined in a translation.'''
return string in translations[file]
def used_in_source(source, string):
'''Determines if the string is used in the source.'''
command = "git grep %s %s | grep -v [a-z][a-z]-[A-Z][A-Z].h >/dev/null 2>&1" % (string, source)
return True if os.system(command) == 0 else False
def is_translated(translations, file, string):
'''Determines whether the string is the same in the base version as in the
translation, indicating work needed.'''
if file == base:
return True
elif string not in translations[base]:
return True
elif string not in translations[file]:
return False
else:
return bool(translations[file][string] != translations[base][string])
def main(args):
'''Processes all the localized files.'''
errors = 0
translations = {}
for file in args.files:
# Verify all files exist.
if not os.path.exists(file):
raise Exception("Localized file '%s' not readable." % file)
read_file(translations, file)
strings = set()
for file in translations:
for string in translations[file]:
strings.add(string)
if len(strings) == 0:
if not args.quiet:
print("There are no localized strings found.")
errors = 1
# Get length of longest string ID.
longest_string = len(max(strings, key=len))
# Display info.
if not args.quiet:
print('Scanning in', args.source)
print()
# Print header line.
files = map(lambda x: os.path.basename(x), args.files)
if not args.quiet:
print('%-*s %s' % (longest_string, 'String ID', ' '.join(files)))
print('-' * longest_string, ' '.join(['-------'] * len(files)))
for string in sorted(strings):
# assess status of 'string':
# - clean
line = ''
line_errors = 0
for file in args.files:
message = ' '
if is_present(translations, file, string):
if is_translated(translations, file, string):
message = ' Ok '
else:
message = ' TODO '
else:
message = ' Missing'
line_errors = 1
line += message
if args.all or line_errors != 0:
if args.search:
if used_in_source(args.source, string):
if not args.quiet:
print('%-*s' % (longest_string, string), line, sep='')
else:
if not args.quiet:
print('%-*s' % (longest_string, string), line, sep='')
line_errors = 1
else:
if not args.quiet:
print('%-*s' % (longest_string, string), line, sep='')
if line_errors:
errors = 1
if not args.quiet:
print('-' * longest_string, ' '.join(['-------'] * len(files)))
print('%-*s' % (longest_string, 'Total'), end='')
for file in args.files:
print('%8d' % len(translations[file]), end='')
print()
sys.exit(errors)
if __name__ == "__main__":
usage="""Utility for checking localized string status across translations."""
parser = argparse.ArgumentParser(description=usage)
parser.add_argument('--source', action='store', required=True, help='The source code tree.')
parser.add_argument('--all', action='store_true', help='Show all string IDs.')
parser.add_argument('--search', action='store_true', help='Search source for use.')
parser.add_argument('--quiet', action='store_true', help='Produces no output.')
args = parser.parse_args()
if args.source:
args.files = find_localizations(args.source)
base = filter(lambda x: x.endswith('en-US.h'), args.files)[0]
try:
main(args)
except Exception as msg:
print('Error:', msg)

View file

@ -1,59 +0,0 @@
#!/usr/bin/env python
################################################################################
## taskwarrior - a command line task list manager.
##
## Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to deal
## in the Software without restriction, including without limitation the rights
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
## copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included
## in all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
## OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
##
## http://www.opensource.org/licenses/mit-license.php
##
################################################################################
import sys
import re
if len(sys.argv) < 3:
print "Usage:", sys.argv[0], "file1 file2 ..."
sys.exit()
translations = {}
missing = {}
for file in sys.argv[1:]:
with open(file, 'r') as f:
translations[file] = set()
missing[file] = set()
# Treat empty strings ("") as a missing entry - because they are just that.
for m in re.findall(r'^\s*#define\s(STRING_[^\s]+)(\s|\\)+"([^"]+)"', f.read(), re.MULTILINE):
translations[file].add(m[0])
for file in translations:
for entry in translations[file]:
for other_translation in translations:
if entry not in translations[other_translation]:
missing[other_translation].add(entry)
for file in missing:
if len(missing[file]) > 0:
print "--- %s --- missing defines:" % file
for i in sorted(list(missing[file])):
print i
else:
print "--- %s --- is ok." % file

View file

@ -1,57 +0,0 @@
#! /usr/bin/perl
################################################################################
## taskwarrior - a command line task list manager.
##
## Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to deal
## in the Software without restriction, including without limitation the rights
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
## copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included
## in all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
## OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
##
## http://www.opensource.org/licenses/mit-license.php
##
################################################################################
use strict;
use warnings;
# Use the en-US.h file, or whatever was specified on the command line.
my $file = scalar @ARGV ? shift @ARGV : 'en-US.h';
# Find all the defined strings.
my @strings;
if (open my $fh, '<', $file)
{
while (my $line = <$fh>)
{
push @strings, $1 if $line =~ /\#define\s+(STRING_\S+)/;
}
close $fh;
}
# Recursively search for all the defined strings, reporting those that are not
# used.
for my $string (@strings)
{
print "$string is not used\n"
if `git grep $string | grep -v [a-z][a-z]-[A-Z][A-Z].h` eq '';
}
exit 0;
################################################################################