310 lines
10 KiB
Python
Executable File
310 lines
10 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
parser = argparse.ArgumentParser(description='Compares two JSON translation '
|
|
'files, as used by the Apache Guacamole web application, listing '
|
|
'the strings which appear to be missing or incorrect.')
|
|
|
|
parser.add_argument('--no-missing', dest='check_missing', action='store_false',
|
|
help='Disables checking for strings which are present in ORIGINAL but '
|
|
'are missing from TRANSLATED. Assuming ORIGINAL represents the set of '
|
|
'strings actually used by the web application, these strings are '
|
|
'those which are missing and need to be defined for the translation '
|
|
'to be complete. By default, the comparison will check for missing '
|
|
'translations.')
|
|
|
|
parser.add_argument('--no-unused', dest='check_unused', action='store_false',
|
|
help='Disables checking for strings which are present in TRANSLATED '
|
|
'but not in ORIGINAL. Assuming ORIGINAL represents the set of strings '
|
|
'actually used by the web application, these strings are those which '
|
|
'are defined by the translation but unused. By default, the '
|
|
'comparison will check for unused translations.')
|
|
|
|
parser.add_argument('--check-copied', action='store_true', help='Enables '
|
|
'checking for strings defined in TRANSLATED which are identical to '
|
|
'the corresponding strings in ORIGINAL. Such strings may have been '
|
|
'incorrectly copied verbatim from the original without being '
|
|
'translated at all. It is also possible that both languages simply '
|
|
'use the same text for that string, and the string is correct. As '
|
|
'this test can produce false positives, it is disabled by default.')
|
|
|
|
parser.add_argument('ORIGINAL', nargs='?', help='The JSON file which should '
|
|
'be used as the basis for comparison. This should be JSON which can '
|
|
'be expected to contain every string used by the web application and '
|
|
'no others. Typically, this will be the primary, original language of '
|
|
'the web application. In the case of Apache Guacamole, this should be '
|
|
'English. If omitted, the file "en.json" within the same directory '
|
|
'as TRANSLATED will be used by default.')
|
|
|
|
parser.add_argument('TRANSLATED', help='The JSON file which should be '
|
|
'compared against ORIGINAL. This should be the JSON which has been '
|
|
'translated from ORIGINAL, and thus should contain the same set of '
|
|
'strings if the translation is complete.')
|
|
|
|
args = parser.parse_args()
|
|
|
|
def flatten_strings(translation, prefix=u''):
|
|
"""Reads all translation strings from the given JSON, taking into account
|
|
namespacing, flattening nested namespaces into a single set of key/value
|
|
pairs.
|
|
|
|
For example, the following call:
|
|
|
|
flatten_strings({
|
|
u'TOP' : {
|
|
u'LETTERS' : {
|
|
u'A' : u'A',
|
|
u'B' : u'B'
|
|
},
|
|
u'NUMBERS' : {
|
|
u'ONE' : u'1',
|
|
u'TWO' : u'2',
|
|
u'THREE' : u'3'
|
|
}
|
|
}
|
|
})
|
|
|
|
would return:
|
|
|
|
{
|
|
u'TOP.LETTERS.A' : u'A',
|
|
u'TOP.LETTERS.B' : u'B',
|
|
u'TOP.NUMBERS.ONE' : u'1',
|
|
u'TOP.NUMBERS.TWO' : u'2',
|
|
u'TOP.NUMBERS.THREE' : u'3'
|
|
}
|
|
|
|
Parameters
|
|
----------
|
|
translation : dict or unicode
|
|
The dict object to read translation strings from, where each key is a
|
|
translation key or namespace and each value is a translation string or
|
|
a dict containing the translations nested within that namespace.
|
|
this object is simply a Unicode string, it will be assumed to be the
|
|
value of a translation string, and the prefix provided will be assumed
|
|
to be the name.
|
|
|
|
prefix : unicode, optional
|
|
The namespace prefix to apply to all translation strings within the
|
|
given object, if any. This parameter is optional. If omitted, an empty
|
|
string will be used.
|
|
|
|
Returns
|
|
-------
|
|
dict
|
|
An dict whose properties are the names of all translation strings
|
|
contained within the given object.
|
|
|
|
"""
|
|
|
|
strings = {}
|
|
|
|
# If the provided object is a string, the prefix is the string name
|
|
if isinstance(translation, unicode):
|
|
strings[prefix] = translation
|
|
return strings
|
|
|
|
# Otherwise, if the prefix is non-empty, append a period for children
|
|
if prefix:
|
|
prefix += u'.'
|
|
|
|
# For each property of the given object, read all string names
|
|
for key, child in translation.items():
|
|
|
|
# Add all string names within the child under its prefix
|
|
for flattened, value in flatten_strings(child, prefix + key).items():
|
|
strings[flattened] = value
|
|
|
|
return strings
|
|
|
|
class Translation:
|
|
"""A set of namespaced translation strings read from a JSON file, as
|
|
supported by angular-translate and used by Apache Guacamole.
|
|
|
|
Attributes
|
|
----------
|
|
lang_key : unicode
|
|
The unique key identifying the JSON translation file and the language
|
|
within that file. This will simply be the filename without the ".json"
|
|
extension.
|
|
lang_name : unicode
|
|
The name of the language as defined within the JSON translation file by
|
|
the special "NAME" key. Not all translations will define a "NAME", as
|
|
some translations (those provided by Guacamole extensions) are used as
|
|
overlays for the base translation for that language defined at the web
|
|
application level. If no "NAME" key is present, `lang_name` will be
|
|
`None`.
|
|
strings : dict
|
|
The flattened set of translation key/value pairs. Each key will contain
|
|
all applicable namespaces, separated by periods, as produced by
|
|
`flatten_strings()`. There will be no nested keys.
|
|
|
|
"""
|
|
|
|
|
|
def __init__(self, path):
|
|
"""
|
|
Parses the details and contents of the JSON translation file at the
|
|
given path.
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
The path to the JSON file containing the translation to be read.
|
|
|
|
"""
|
|
|
|
json_data = open(path).read()
|
|
filename = os.path.basename(path)
|
|
|
|
self.lang_key = os.path.splitext(filename)[0]
|
|
self.strings = flatten_strings(json.loads(json_data))
|
|
self.lang_name = self.strings.get(u'NAME', None)
|
|
|
|
def get_missing(self, expected):
|
|
"""Returns a list of translation keys which are present in the given
|
|
translation but missing from this translation.
|
|
|
|
Parameters
|
|
----------
|
|
expected : Translation
|
|
The translation to compare this translation against.
|
|
|
|
Returns
|
|
-------
|
|
list
|
|
A list of translation keys which are present in the given
|
|
translation but are NOT present in this translation.
|
|
|
|
"""
|
|
return [ key for key in expected.strings if not key in self.strings ]
|
|
|
|
def get_identical(self, other):
|
|
"""Returns a list of translation keys which map to the same exact value
|
|
in both this translation and the given translation.
|
|
|
|
Parameters
|
|
----------
|
|
other : Translation
|
|
The translation to compare this translation against.
|
|
|
|
Returns
|
|
-------
|
|
list
|
|
A list of translation keys which map to the same exact value in
|
|
both translations.
|
|
|
|
"""
|
|
return [ key for key, value in self.strings.items()
|
|
if key in other.strings and other.strings[key] == value ]
|
|
|
|
#
|
|
# Translation keys which are expected to always be inherited from the base
|
|
# translation and thus should be missing from all translations
|
|
#
|
|
|
|
expected_missing = {
|
|
u'APP.NAME',
|
|
u'APP.VERSION'
|
|
}
|
|
|
|
#
|
|
# Regular expression which matches strings that are expected to be copied
|
|
# verbatim
|
|
#
|
|
|
|
expected_copied = re.compile('|'.join([
|
|
'^$', # Empty string
|
|
'^@:', # References to other strings
|
|
'^\\d+$', # Numbers
|
|
'^(VNC|RDP|SSH|SFTP|Telnet)$', # Protocol names
|
|
'^(Apache )?Guacamole$' # Guacamole itself
|
|
]))
|
|
|
|
#
|
|
# Read provided input files
|
|
#
|
|
|
|
orig = Translation(args.ORIGINAL
|
|
or '{}/en.json'.format(os.path.dirname(args.TRANSLATED)))
|
|
|
|
trans = Translation(args.TRANSLATED)
|
|
|
|
print u'Original language: {} ({})'.format(orig.lang_key, orig.lang_name)
|
|
print u'Translation language: {} ({})'.format(trans.lang_key, trans.lang_name)
|
|
|
|
# Ignore keys that are expected to be missing
|
|
orig.strings = { key:value for key, value in orig.strings.items()
|
|
if key not in expected_missing }
|
|
|
|
#
|
|
# Perform requested tests
|
|
#
|
|
|
|
missing = trans.get_missing(orig) if args.check_missing else []
|
|
unused = orig.get_missing(trans) if args.check_unused else []
|
|
copied = orig.get_identical(trans) if args.check_copied else []
|
|
|
|
# Exclude keys which are expected to be copied
|
|
copied = [ key for key in copied
|
|
if not expected_copied.match(orig.strings[key]) ]
|
|
|
|
#
|
|
# Group any errors encountered by type
|
|
#
|
|
|
|
if missing:
|
|
print('\nThe following strings are missing from the translation and '
|
|
'should be added:\n')
|
|
for name in sorted(missing):
|
|
print ' {}'.format(name)
|
|
|
|
if unused:
|
|
print('\nThe following strings are either NOT defined for the original '
|
|
'language or are expected to be inherited from the original '
|
|
'language and should be removed:\n')
|
|
for name in sorted(unused):
|
|
print ' {}'.format(name)
|
|
|
|
if copied:
|
|
print('\nThe following strings are identical to the original language '
|
|
'and MIGHT be untranslated:\n')
|
|
for name in sorted(copied):
|
|
print ' {}'.format(name)
|
|
|
|
#
|
|
# Count total number of errors and summarize result
|
|
#
|
|
|
|
errors = len(missing) + len(unused) + len(copied)
|
|
|
|
if errors:
|
|
print '\n{} error(s) total.'.format(errors)
|
|
sys.exit(1)
|
|
|
|
print '\nCheck completed successfully. No errors.'
|
|
|