upstream/kallithea Commit - r8184:93dabafa

scripts/i18n: add command 'normalized-diff'...

Thomas De Schampheleire -

r8184:93dabafa default

parent child

scripts/i18n

0 +8 0

             #!/usr/bin/env python3
             # -*- coding: utf-8 -*-
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU General Public License as published by
             # the Free Software Foundation, either version 3 of the License, or
             # (at your option) any later version.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+            import sys
             import click
             import i18n_utils
             """
             Tool for maintenance of .po and .pot files
             Normally, the i18n-related files contain for each translatable string a
             reference to all the source code locations where this string is found. This
             meta data is useful for translators to assess how strings are used, but is not
             relevant for normal development nor for running Kallithea. Such meta data, or
             derived data like kallithea.pot, will inherently be outdated, and create
             unnecessary churn and repository growth, making it harder to spot actual and
             important changes.
             """
             @click.group()
             @click.option('--debug/--no-debug', default=False)
             def cli(debug):
                 if (debug):
                     i18n_utils.do_debug = True
                 pass
             @cli.command()
             @click.argument('po_files', nargs=-1)
             def normalize_po_files(po_files):
                 """Normalize the specified .po and .pot files.
                 Only actual translations and essential headers will be preserved.
                 """
                 for po_file in po_files:
                     i18n_utils._normalize_po_file(po_file, strip=True)
+            @cli.command()
+            @click.argument('file1')
+            @click.argument('file2')
+            def normalized_diff(file1, file2):
+                """Compare two files while transparently normalizing them."""
+                sys.exit(i18n_utils._normalized_diff(file1, file2, strip=True))
             if __name__ == '__main__':
                 cli()

scripts/i18n_utils.py

0 +18 0

             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU General Public License as published by
             # the Free Software Foundation, either version 3 of the License, or
             # (at your option) any later version.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             from __future__ import print_function
             import os
             import re
+            import shutil
             import subprocess
+            import tempfile
             do_debug = False  # set from scripts/i18n --debug
             def debug(*args, **kwargs):
                 if do_debug:
                     print(*args, **kwargs)
             def runcmd(cmd, *args, **kwargs):
                 debug('... Executing command: %s' % ' '.join(cmd))
                 subprocess.check_call(cmd, *args, **kwargs)
             header_comment_strip_re = re.compile(r'''
                 ^
                 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
                 |
                 ^
                 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
                 [#] \n
                 [#],[ ]fuzzy \n
                 ''',
                 re.MULTILINE|re.VERBOSE)
             header_normalize_re = re.compile(r'''
                 ^ "
                 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
                 [ ][^\\]*\\n
                 " \n
                 ''',
                 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
             def _normalize_po(raw_content):
                 r"""
                 >>> print(_normalize_po(r'''
                 ... # header comment
                 ...
                 ...
                 ... # comment before header
                 ... msgid ""
                 ... msgstr "yada"
                 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
                 ... "MIME-Version: "
                 ... "1.0\n"
                 ... "Last-Translator: Jabba"
                 ... "the Hutt\n"
                 ... "X-Generator: Weblate 1.2.3\n"
                 ...
                 ... # comment, but not in header
                 ... msgid "None"
                 ... msgstr "Ingen"
                 ...
                 ...
                 ... line 2
                 ... # third comment
                 ...
                 ... msgid "Special"
                 ... msgstr ""
                 ...
                 ... msgid "Specialist"
                 ... # odd comment
                 ... msgstr ""
                 ... "Expert"
                 ...
                 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
                 ... #, fuzzy
                 ... #| msgid "some foo string"
                 ... msgid "some bar string."
                 ... msgstr "translation of foo string"
                 ...
                 ... msgid "%d minute"
                 ... msgid_plural "%d minutes"
                 ... msgstr[0] "minut"
                 ... msgstr[1] "minutter"
                 ... msgstr[2] ""
                 ...
                 ... msgid "%d year"
                 ... msgid_plural "%d years"
                 ... msgstr[0] ""
                 ... msgstr[1] ""
                 ...
                 ... # last comment
                 ... ''') + '^^^')
                 # header comment
                 <BLANKLINE>
                 <BLANKLINE>
                 # comment before header
                 <BLANKLINE>
                 msgid ""
                 msgstr "yada"
                 "MIME-Version: "
                 "1.0\n"
                 <BLANKLINE>
                 msgid "None"
                 msgstr "Ingen"
                 <BLANKLINE>
                 line 2
                 <BLANKLINE>
                 msgid "Specialist"
                 msgstr ""
                 "Expert"
                 <BLANKLINE>
                 msgid "%d minute"
                 msgid_plural "%d minutes"
                 msgstr[0] "minut"
                 msgstr[1] "minutter"
                 msgstr[2] ""
                 ^^^
                 """
                 header_start = raw_content.find('\nmsgid ""\n') + 1
                 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
                 chunks = [
                     header_comment_strip_re.sub('', raw_content[0:header_start])
                         .strip(),
                     '',
                     header_normalize_re.sub('', raw_content[header_start:header_end])
                         .strip(),
                     '']  # preserve normalized header
                 # all chunks are separated by empty line
                 for raw_chunk in raw_content[header_end:].split('\n\n'):
                     if '\n#, fuzzy' in raw_chunk:  # might be like "#, fuzzy, python-format"
                         continue  # drop crazy auto translation that is worse than useless
                     # strip all comment lines from chunk
                     chunk_lines = [
                         line
                         for line in raw_chunk.splitlines()
                         if line
                         and not line.startswith('#')
                     ]
                     if not chunk_lines:
                         continue
                     # check lines starting from first msgstr, skip chunk if no translation lines
                     msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
                     if (
                         chunk_lines[0].startswith('msgid') and
                         msgstr_i and
                         all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
                     ):  # skip translation chunks that doesn't have any actual translations
                         continue
                     chunks.append('\n'.join(chunk_lines) + '\n')
                 return '\n'.join(chunks)
             def _normalize_po_file(po_file, strip=False):
                 if strip:
                     po_tmp = po_file + '.tmp'
                     with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
                         raw_content = src.read()
                         normalized_content = _normalize_po(raw_content)
                         dest.write(normalized_content)
                     os.rename(po_tmp, po_file)
+            def _normalized_diff(file1, file2, strip=False):
+                # Create temporary copies of both files
+                temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
+                temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
+                debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
+                shutil.copyfile(file1, temp1.name)
+                shutil.copyfile(file2, temp2.name)
+                # Normalize them in place
+                _normalize_po_file(temp1.name, strip=strip)
+                _normalize_po_file(temp2.name, strip=strip)
+                # Now compare
+                try:
+                    runcmd(['diff', '-u', temp1.name, temp2.name])
+                except subprocess.CalledProcessError as e:
+                    return e.returncode

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages