#!/usr/bin/python # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # import argparse import json import os import re import sys parser = argparse.ArgumentParser(description='Compares two JSON translation ' 'files, as used by the Apache Guacamole web application, listing ' 'the strings which appear to be missing or incorrect.') parser.add_argument('--no-missing', dest='check_missing', action='store_false', help='Disables checking for strings which are present in ORIGINAL but ' 'are missing from TRANSLATED. Assuming ORIGINAL represents the set of ' 'strings actually used by the web application, these strings are ' 'those which are missing and need to be defined for the translation ' 'to be complete. By default, the comparison will check for missing ' 'translations.') parser.add_argument('--no-unused', dest='check_unused', action='store_false', help='Disables checking for strings which are present in TRANSLATED ' 'but not in ORIGINAL. Assuming ORIGINAL represents the set of strings ' 'actually used by the web application, these strings are those which ' 'are defined by the translation but unused. By default, the ' 'comparison will check for unused translations.') parser.add_argument('--check-copied', action='store_true', help='Enables ' 'checking for strings defined in TRANSLATED which are identical to ' 'the corresponding strings in ORIGINAL. Such strings may have been ' 'incorrectly copied verbatim from the original without being ' 'translated at all. It is also possible that both languages simply ' 'use the same text for that string, and the string is correct. As ' 'this test can produce false positives, it is disabled by default.') parser.add_argument('ORIGINAL', nargs='?', help='The JSON file which should ' 'be used as the basis for comparison. This should be JSON which can ' 'be expected to contain every string used by the web application and ' 'no others. Typically, this will be the primary, original language of ' 'the web application. In the case of Apache Guacamole, this should be ' 'English. If omitted, the file "en.json" within the same directory ' 'as TRANSLATED will be used by default.') parser.add_argument('TRANSLATED', help='The JSON file which should be ' 'compared against ORIGINAL. This should be the JSON which has been ' 'translated from ORIGINAL, and thus should contain the same set of ' 'strings if the translation is complete.') args = parser.parse_args() def flatten_strings(translation, prefix=u''): """Reads all translation strings from the given JSON, taking into account namespacing, flattening nested namespaces into a single set of key/value pairs. For example, the following call: flatten_strings({ u'TOP' : { u'LETTERS' : { u'A' : u'A', u'B' : u'B' }, u'NUMBERS' : { u'ONE' : u'1', u'TWO' : u'2', u'THREE' : u'3' } } }) would return: { u'TOP.LETTERS.A' : u'A', u'TOP.LETTERS.B' : u'B', u'TOP.NUMBERS.ONE' : u'1', u'TOP.NUMBERS.TWO' : u'2', u'TOP.NUMBERS.THREE' : u'3' } Parameters ---------- translation : dict or unicode The dict object to read translation strings from, where each key is a translation key or namespace and each value is a translation string or a dict containing the translations nested within that namespace. this object is simply a Unicode string, it will be assumed to be the value of a translation string, and the prefix provided will be assumed to be the name. prefix : unicode, optional The namespace prefix to apply to all translation strings within the given object, if any. This parameter is optional. If omitted, an empty string will be used. Returns ------- dict An dict whose properties are the names of all translation strings contained within the given object. """ strings = {} # If the provided object is a string, the prefix is the string name if isinstance(translation, unicode): strings[prefix] = translation return strings # Otherwise, if the prefix is non-empty, append a period for children if prefix: prefix += u'.' # For each property of the given object, read all string names for key, child in translation.items(): # Add all string names within the child under its prefix for flattened, value in flatten_strings(child, prefix + key).items(): strings[flattened] = value return strings class Translation: """A set of namespaced translation strings read from a JSON file, as supported by angular-translate and used by Apache Guacamole. Attributes ---------- lang_key : unicode The unique key identifying the JSON translation file and the language within that file. This will simply be the filename without the ".json" extension. lang_name : unicode The name of the language as defined within the JSON translation file by the special "NAME" key. Not all translations will define a "NAME", as some translations (those provided by Guacamole extensions) are used as overlays for the base translation for that language defined at the web application level. If no "NAME" key is present, `lang_name` will be `None`. strings : dict The flattened set of translation key/value pairs. Each key will contain all applicable namespaces, separated by periods, as produced by `flatten_strings()`. There will be no nested keys. """ def __init__(self, path): """ Parses the details and contents of the JSON translation file at the given path. Parameters ---------- path : str The path to the JSON file containing the translation to be read. """ json_data = open(path).read() filename = os.path.basename(path) self.lang_key = os.path.splitext(filename)[0] self.strings = flatten_strings(json.loads(json_data)) self.lang_name = self.strings.get(u'NAME', None) def get_missing(self, expected): """Returns a list of translation keys which are present in the given translation but missing from this translation. Parameters ---------- expected : Translation The translation to compare this translation against. Returns ------- list A list of translation keys which are present in the given translation but are NOT present in this translation. """ return [ key for key in expected.strings if not key in self.strings ] def get_identical(self, other): """Returns a list of translation keys which map to the same exact value in both this translation and the given translation. Parameters ---------- other : Translation The translation to compare this translation against. Returns ------- list A list of translation keys which map to the same exact value in both translations. """ return [ key for key, value in self.strings.items() if key in other.strings and other.strings[key] == value ] # # Translation keys which are expected to always be inherited from the base # translation and thus should be missing from all translations # expected_missing = { u'APP.NAME', u'APP.VERSION' } # # Regular expression which matches strings that are expected to be copied # verbatim # expected_copied = re.compile('|'.join([ '^$', # Empty string '^@:', # References to other strings '^\\d+$', # Numbers '^(VNC|RDP|SSH|SFTP|Telnet)$', # Protocol names '^(Apache )?Guacamole$' # Guacamole itself ])) # # Read provided input files # orig = Translation(args.ORIGINAL or '{}/en.json'.format(os.path.dirname(args.TRANSLATED))) trans = Translation(args.TRANSLATED) print u'Original language: {} ({})'.format(orig.lang_key, orig.lang_name) print u'Translation language: {} ({})'.format(trans.lang_key, trans.lang_name) # Ignore keys that are expected to be missing orig.strings = { key:value for key, value in orig.strings.items() if key not in expected_missing } # # Perform requested tests # missing = trans.get_missing(orig) if args.check_missing else [] unused = orig.get_missing(trans) if args.check_unused else [] copied = orig.get_identical(trans) if args.check_copied else [] # Exclude keys which are expected to be copied copied = [ key for key in copied if not expected_copied.match(orig.strings[key]) ] # # Group any errors encountered by type # if missing: print('\nThe following strings are missing from the translation and ' 'should be added:\n') for name in sorted(missing): print ' {}'.format(name) if unused: print('\nThe following strings are either NOT defined for the original ' 'language or are expected to be inherited from the original ' 'language and should be removed:\n') for name in sorted(unused): print ' {}'.format(name) if copied: print('\nThe following strings are identical to the original language ' 'and MIGHT be untranslated:\n') for name in sorted(copied): print ' {}'.format(name) # # Count total number of errors and summarize result # errors = len(missing) + len(unused) + len(copied) if errors: print '\n{} error(s) total.'.format(errors) sys.exit(1) print '\nCheck completed successfully. No errors.'