upstream/kallithea Commit - r8184:93dabafa

scripts/i18n: add command 'normalized-diff'...

Thomas De Schampheleire -

r8184:93dabafa default

parent child

scripts/i18n

0 +8 0

              #!/usr/bin/env python3
              # -*- coding: utf-8 -*-
              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation, either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+             import sys
              import click
              import i18n_utils
              """
              Tool for maintenance of .po and .pot files
              Normally, the i18n-related files contain for each translatable string a
              reference to all the source code locations where this string is found. This
              meta data is useful for translators to assess how strings are used, but is not
              relevant for normal development nor for running Kallithea. Such meta data, or
              derived data like kallithea.pot, will inherently be outdated, and create
              unnecessary churn and repository growth, making it harder to spot actual and
              important changes.
              """
              @click.group()
              @click.option('--debug/--no-debug', default=False)
              def cli(debug):
                  if (debug):
                      i18n_utils.do_debug = True
                  pass
              @cli.command()
              @click.argument('po_files', nargs=-1)
              def normalize_po_files(po_files):
                  """Normalize the specified .po and .pot files.
                  Only actual translations and essential headers will be preserved.
                  """
                  for po_file in po_files:
                      i18n_utils._normalize_po_file(po_file, strip=True)
+             @cli.command()
+             @click.argument('file1')
+             @click.argument('file2')
+             def normalized_diff(file1, file2):
+                 """Compare two files while transparently normalizing them."""
+                 sys.exit(i18n_utils._normalized_diff(file1, file2, strip=True))
              if __name__ == '__main__':
                  cli()

scripts/i18n_utils.py

0 +18 0

              # This program is free software: you can redistribute it and/or modify
              # it under the terms of the GNU General Public License as published by
              # the Free Software Foundation, either version 3 of the License, or
              # (at your option) any later version.
              #
              # This program is distributed in the hope that it will be useful,
              # but WITHOUT ANY WARRANTY; without even the implied warranty of
              # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
              # GNU General Public License for more details.
              #
              # You should have received a copy of the GNU General Public License
              # along with this program.  If not, see <http://www.gnu.org/licenses/>.
              from __future__ import print_function
              import os
              import re
+             import shutil
              import subprocess
+             import tempfile
              do_debug = False  # set from scripts/i18n --debug
              def debug(*args, **kwargs):
                  if do_debug:
                      print(*args, **kwargs)
              def runcmd(cmd, *args, **kwargs):
                  debug('... Executing command: %s' % ' '.join(cmd))
                  subprocess.check_call(cmd, *args, **kwargs)
              header_comment_strip_re = re.compile(r'''
                  ^
                  [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
                  |
                  ^
                  [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
                  [#] \n
                  [#],[ ]fuzzy \n
                  ''',
                  re.MULTILINE|re.VERBOSE)
              header_normalize_re = re.compile(r'''
                  ^ "
                  (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
                  [ ][^\\]*\\n
                  " \n
                  ''',
                  re.MULTILINE|re.IGNORECASE|re.VERBOSE)
              def _normalize_po(raw_content):
                  r"""
                  >>> print(_normalize_po(r'''
                  ... # header comment
                  ...
                  ...
                  ... # comment before header
                  ... msgid ""
                  ... msgstr "yada"
                  ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
                  ... "MIME-Version: "
                  ... "1.0\n"
                  ... "Last-Translator: Jabba"
                  ... "the Hutt\n"
                  ... "X-Generator: Weblate 1.2.3\n"
                  ...
                  ... # comment, but not in header
                  ... msgid "None"
                  ... msgstr "Ingen"
                  ...
                  ...
                  ... line 2
                  ... # third comment
                  ...
                  ... msgid "Special"
                  ... msgstr ""
                  ...
                  ... msgid "Specialist"
                  ... # odd comment
                  ... msgstr ""
                  ... "Expert"
                  ...
                  ... # crazy fuzzy auto translation by msgmerge, using foo for bar
                  ... #, fuzzy
                  ... #| msgid "some foo string"
                  ... msgid "some bar string."
                  ... msgstr "translation of foo string"
                  ...
                  ... msgid "%d minute"
                  ... msgid_plural "%d minutes"
                  ... msgstr[0] "minut"
                  ... msgstr[1] "minutter"
                  ... msgstr[2] ""
                  ...
                  ... msgid "%d year"
                  ... msgid_plural "%d years"
                  ... msgstr[0] ""
                  ... msgstr[1] ""
                  ...
                  ... # last comment
                  ... ''') + '^^^')
                  # header comment
                  <BLANKLINE>
                  <BLANKLINE>
                  # comment before header
                  <BLANKLINE>
                  msgid ""
                  msgstr "yada"
                  "MIME-Version: "
                  "1.0\n"
                  <BLANKLINE>
                  msgid "None"
                  msgstr "Ingen"
                  <BLANKLINE>
                  line 2
                  <BLANKLINE>
                  msgid "Specialist"
                  msgstr ""
                  "Expert"
                  <BLANKLINE>
                  msgid "%d minute"
                  msgid_plural "%d minutes"
                  msgstr[0] "minut"
                  msgstr[1] "minutter"
                  msgstr[2] ""
                  ^^^
                  """
                  header_start = raw_content.find('\nmsgid ""\n') + 1
                  header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
                  chunks = [
                      header_comment_strip_re.sub('', raw_content[0:header_start])
                          .strip(),
                      '',
                      header_normalize_re.sub('', raw_content[header_start:header_end])
                          .strip(),
                      '']  # preserve normalized header
                  # all chunks are separated by empty line
                  for raw_chunk in raw_content[header_end:].split('\n\n'):
                      if '\n#, fuzzy' in raw_chunk:  # might be like "#, fuzzy, python-format"
                          continue  # drop crazy auto translation that is worse than useless
                      # strip all comment lines from chunk
                      chunk_lines = [
                          line
                          for line in raw_chunk.splitlines()
                          if line
                          and not line.startswith('#')
                      ]
                      if not chunk_lines:
                          continue
                      # check lines starting from first msgstr, skip chunk if no translation lines
                      msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
                      if (
                          chunk_lines[0].startswith('msgid') and
                          msgstr_i and
                          all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
                      ):  # skip translation chunks that doesn't have any actual translations
                          continue
                      chunks.append('\n'.join(chunk_lines) + '\n')
                  return '\n'.join(chunks)
              def _normalize_po_file(po_file, strip=False):
                  if strip:
                      po_tmp = po_file + '.tmp'
                      with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
                          raw_content = src.read()
                          normalized_content = _normalize_po(raw_content)
                          dest.write(normalized_content)
                      os.rename(po_tmp, po_file)
+             def _normalized_diff(file1, file2, strip=False):
+                 # Create temporary copies of both files
+                 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
+                 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
+                 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
+                 shutil.copyfile(file1, temp1.name)
+                 shutil.copyfile(file2, temp2.name)
+                 # Normalize them in place
+                 _normalize_po_file(temp1.name, strip=strip)
+                 _normalize_po_file(temp2.name, strip=strip)
+                 # Now compare
+                 try:
+                     runcmd(['diff', '-u', temp1.name, temp2.name])
+                 except subprocess.CalledProcessError as e:
+                     return e.returncode

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages