# This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import os import re import shutil import subprocess import tempfile do_debug = False # set from scripts/i18n --debug def debug(*args, **kwargs): if do_debug: print(*args, **kwargs) def runcmd(cmd, *args, **kwargs): debug('... Executing command: %s' % ' '.join(cmd)) subprocess.check_call(cmd, *args, **kwargs) header_comment_strip_re = re.compile(r''' ^ [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n | ^ [#][ ]FIRST[ ]AUTHOR[ ],[ ]\d+[.] \n (?:[#] \n)? | ^ (?:[#] \n)? [#],[ ]fuzzy \n | ^ [#][ ][#],[ ]fuzzy \n ''', re.MULTILINE|re.VERBOSE) header_normalize_re = re.compile(r''' ^ " (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version): [ ][^\\]*\\n " \n ''', re.MULTILINE|re.IGNORECASE|re.VERBOSE) def _normalize_po(raw_content): r""" >>> print(_normalize_po(r''' ... # header comment ... ... ... # comment before header ... msgid "" ... msgstr "yada" ... "POT-Creation-Date: 2019-05-04 21:13+0200\n" ... "MIME-Version: " ... "1.0\n" ... "Last-Translator: Jabba" ... "the Hutt\n" ... "X-Generator: Weblate 1.2.3\n" ... ... # comment, but not in header ... msgid "None" ... msgstr "Ingen" ... ... ... line 2 ... # third comment ... ... msgid "Special" ... msgstr "" ... ... msgid "Specialist" ... # odd comment ... msgstr "" ... "Expert" ... ... # crazy fuzzy auto translation by msgmerge, using foo for bar ... #, fuzzy ... #| msgid "some foo string" ... msgid "some bar string." ... msgstr "translation of foo string" ... ... msgid "%d minute" ... msgid_plural "%d minutes" ... msgstr[0] "minut" ... msgstr[1] "minutter" ... msgstr[2] "" ... ... msgid "%d year" ... msgid_plural "%d years" ... msgstr[0] "" ... msgstr[1] "" ... ... # last comment ... ''') + '^^^') # header comment # comment before header msgid "" msgstr "yada" "MIME-Version: " "1.0\n" msgid "None" msgstr "Ingen" line 2 msgid "Specialist" msgstr "" "Expert" msgid "%d minute" msgid_plural "%d minutes" msgstr[0] "minut" msgstr[1] "minutter" msgstr[2] "" ^^^ """ header_start = raw_content.find('\nmsgid ""\n') + 1 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content) chunks = [ header_comment_strip_re.sub('', raw_content[0:header_start]) .strip(), '', header_normalize_re.sub('', raw_content[header_start:header_end]) .replace( r'"Content-Type: text/plain; charset=utf-8\n"', r'"Content-Type: text/plain; charset=UTF-8\n"') # maintain msgmerge casing .strip(), ''] # preserve normalized header # all chunks are separated by empty line for raw_chunk in raw_content[header_end:].split('\n\n'): if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format" continue # drop crazy auto translation that is worse than useless # strip all comment lines from chunk chunk_lines = [ line for line in raw_chunk.splitlines() if line and not line.startswith('#') ] if not chunk_lines: continue # check lines starting from first msgstr, skip chunk if no translation lines msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')] if ( chunk_lines[0].startswith('msgid') and msgstr_i and all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:]) ): # skip translation chunks that doesn't have any actual translations continue chunks.append('\n'.join(chunk_lines) + '\n') return '\n'.join(chunks) def _normalize_po_file(po_file, merge_pot_file=None, strip=False): if merge_pot_file: runcmd(['msgmerge', '--width=76', '--backup=none', '--previous', '--no-fuzzy-matching', '--update', po_file, '-q', merge_pot_file]) if strip: po_tmp = po_file + '.tmp' with open(po_file, 'r') as src, open(po_tmp, 'w') as dest: raw_content = src.read() normalized_content = _normalize_po(raw_content) dest.write(normalized_content) os.rename(po_tmp, po_file) def _normalized_diff(file1, file2, merge_pot_file=None, strip=False): # Create temporary copies of both files temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1)) temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2)) debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name)) shutil.copyfile(file1, temp1.name) shutil.copyfile(file2, temp2.name) # Normalize them in place _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip) _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip) # Now compare try: runcmd(['diff', '-u', temp1.name, temp2.name]) except subprocess.CalledProcessError as e: return e.returncode