upstream/kallithea Files · scripts/i18n_utils.py

deps: also report unseen known violations

Mads Kiilerich - - Load All Authors

File last commit:

r8469:6bde1c0a default


                r8600:c7663810

default

Download file

             i18n_utils.py
        
                    195 lines
            
             | 5.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / scripts / i18n_utils.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Thomas De Schampheleire
    
scripts/i18n: introduce new i18n maintenance script...

              r8182
            
      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU General Public License as published by

      # the Free Software Foundation, either version 3 of the License, or

      # (at your option) any later version.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalize-po-files'...

              r8183
            
      import os

      import re

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalized-diff'...

              r8184
            
      import shutil

        Thomas De Schampheleire
    
scripts/i18n: introduce new i18n maintenance script...

              r8182
            
      import subprocess

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalized-diff'...

              r8184
            
      import tempfile

        Thomas De Schampheleire
    
scripts/i18n: introduce new i18n maintenance script...

              r8182
            
      do_debug = False  # set from scripts/i18n --debug

      def debug(*args, **kwargs):

          if do_debug:

              print(*args, **kwargs)

      def runcmd(cmd, *args, **kwargs):

          debug('... Executing command: %s' % ' '.join(cmd))

          subprocess.check_call(cmd, *args, **kwargs)

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalize-po-files'...

              r8183
            
      header_comment_strip_re = re.compile(r'''

          ^

          [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n

          |

          ^

          [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n

        Mads Kiilerich
    
i18n: better stripping of header comment for new translations...

              r8202
            
          (?:[#] \n)?

          |

          ^

          (?:[#] \n)?

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalize-po-files'...

              r8183
            
          [#],[ ]fuzzy \n

        Mads Kiilerich
    
i18n: also strip '# #, fuzzy' from header comment - it might appear when verifying branches are in sync

              r8203
            
          |

          ^

          [#][ ][#],[ ]fuzzy \n

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalize-po-files'...

              r8183
            
          ''',

          re.MULTILINE|re.VERBOSE)

      header_normalize_re = re.compile(r'''

          ^ "

          (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):

          [ ][^\\]*\\n

          " \n

          ''',

          re.MULTILINE|re.IGNORECASE|re.VERBOSE)

      def _normalize_po(raw_content):

          r"""

          >>> print(_normalize_po(r'''

          ... # header comment

          ...

          ...

          ... # comment before header

          ... msgid ""

          ... msgstr "yada"

          ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"

          ... "MIME-Version: "

          ... "1.0\n"

          ... "Last-Translator: Jabba"

          ... "the Hutt\n"

          ... "X-Generator: Weblate 1.2.3\n"

          ...

          ... # comment, but not in header

          ... msgid "None"

          ... msgstr "Ingen"

          ...

          ...

          ... line 2

          ... # third comment

          ...

          ... msgid "Special"

          ... msgstr ""

          ...

          ... msgid "Specialist"

          ... # odd comment

          ... msgstr ""

          ... "Expert"

          ...

          ... # crazy fuzzy auto translation by msgmerge, using foo for bar

          ... #, fuzzy

          ... #| msgid "some foo string"

          ... msgid "some bar string."

          ... msgstr "translation of foo string"

          ...

          ... msgid "%d minute"

          ... msgid_plural "%d minutes"

          ... msgstr[0] "minut"

          ... msgstr[1] "minutter"

          ... msgstr[2] ""

          ...

          ... msgid "%d year"

          ... msgid_plural "%d years"

          ... msgstr[0] ""

          ... msgstr[1] ""

          ...

          ... # last comment

          ... ''') + '^^^')

          # header comment

          <BLANKLINE>

          <BLANKLINE>

          # comment before header

          <BLANKLINE>

          msgid ""

          msgstr "yada"

          "MIME-Version: "

          "1.0\n"

          <BLANKLINE>

          msgid "None"

          msgstr "Ingen"

          <BLANKLINE>

          line 2

          <BLANKLINE>

          msgid "Specialist"

          msgstr ""

          "Expert"

          <BLANKLINE>

          msgid "%d minute"

          msgid_plural "%d minutes"

          msgstr[0] "minut"

          msgstr[1] "minutter"

          msgstr[2] ""

          ^^^

          """

          header_start = raw_content.find('\nmsgid ""\n') + 1

          header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)

          chunks = [

              header_comment_strip_re.sub('', raw_content[0:header_start])

                  .strip(),

              '',

              header_normalize_re.sub('', raw_content[header_start:header_end])

        Mads Kiilerich
    
scripts/i18n: also normalize casing of UTF-8 in Content-Type...

              r8187
            
                  .replace(

                      r'"Content-Type: text/plain; charset=utf-8\n"',

                      r'"Content-Type: text/plain; charset=UTF-8\n"')  # maintain msgmerge casing

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalize-po-files'...

              r8183
            
                  .strip(),

              '']  # preserve normalized header

          # all chunks are separated by empty line

          for raw_chunk in raw_content[header_end:].split('\n\n'):

              if '\n#, fuzzy' in raw_chunk:  # might be like "#, fuzzy, python-format"

                  continue  # drop crazy auto translation that is worse than useless

              # strip all comment lines from chunk

              chunk_lines = [

                  line

                  for line in raw_chunk.splitlines()

                  if line

                  and not line.startswith('#')

              ]

              if not chunk_lines:

                  continue

              # check lines starting from first msgstr, skip chunk if no translation lines

              msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]

              if (

                  chunk_lines[0].startswith('msgid') and

                  msgstr_i and

                  all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])

              ):  # skip translation chunks that doesn't have any actual translations

                  continue

              chunks.append('\n'.join(chunk_lines) + '\n')

          return '\n'.join(chunks)

        Mads Kiilerich
    
scripts/i18n: introduce --merge-pot-file to control normalization...

              r8186
            
      def _normalize_po_file(po_file, merge_pot_file=None, strip=False):

          if merge_pot_file:

              runcmd(['msgmerge', '--width=76', '--backup=none', '--previous',

                      '--update', po_file, '-q', merge_pot_file])

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalize-po-files'...

              r8183
            
          if strip:

              po_tmp = po_file + '.tmp'

              with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:

                  raw_content = src.read()

                  normalized_content = _normalize_po(raw_content)

                  dest.write(normalized_content)

              os.rename(po_tmp, po_file)

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalized-diff'...

              r8184
            
        Mads Kiilerich
    
scripts/i18n: introduce --merge-pot-file to control normalization...

              r8186
            
      def _normalized_diff(file1, file2, merge_pot_file=None, strip=False):

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalized-diff'...

              r8184
            
          # Create temporary copies of both files

          temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))

          temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))

          debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))

          shutil.copyfile(file1, temp1.name)

          shutil.copyfile(file2, temp2.name)

          # Normalize them in place

        Mads Kiilerich
    
scripts/i18n: introduce --merge-pot-file to control normalization...

              r8186
            
          _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip)

          _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip)

        Thomas De Schampheleire
    
scripts/i18n: add command 'normalized-diff'...

              r8184
            
          # Now compare

          try:

              runcmd(['diff', '-u', temp1.name, temp2.name])

          except subprocess.CalledProcessError as e:

              return e.returncode

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Thomas De Schampheleire scripts/i18n: introduce new i18n maintenance script...	r8182	# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU General Public License as published by
		# the Free Software Foundation, either version 3 of the License, or
		# (at your option) any later version.
		#
		# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
		#
		# You should have received a copy of the GNU General Public License
		# along with this program. If not, see <http://www.gnu.org/licenses/>.

Thomas De Schampheleire scripts/i18n: add command 'normalize-po-files'...	r8183	import os
		import re
Thomas De Schampheleire scripts/i18n: add command 'normalized-diff'...	r8184	import shutil
Thomas De Schampheleire scripts/i18n: introduce new i18n maintenance script...	r8182	import subprocess
Thomas De Schampheleire scripts/i18n: add command 'normalized-diff'...	r8184	import tempfile
Thomas De Schampheleire scripts/i18n: introduce new i18n maintenance script...	r8182

		do_debug = False # set from scripts/i18n --debug

		def debug(args, *kwargs):
		if do_debug:
		print(args, *kwargs)

		def runcmd(cmd, args, *kwargs):
		debug('... Executing command: %s' % ' '.join(cmd))
		subprocess.check_call(cmd, args, *kwargs)
Thomas De Schampheleire scripts/i18n: add command 'normalize-po-files'...	r8183
		header_comment_strip_re = re.compile(r'''
		^
		[#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
		\|
		^
		[#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
Mads Kiilerich i18n: better stripping of header comment for new translations...	r8202	(?:[#] \n)?
		\|
		^
		(?:[#] \n)?
Thomas De Schampheleire scripts/i18n: add command 'normalize-po-files'...	r8183	[#],[ ]fuzzy \n
Mads Kiilerich i18n: also strip '# #, fuzzy' from header comment - it might appear when verifying branches are in sync	r8203	\|
		^
		[#][ ][#],[ ]fuzzy \n
Thomas De Schampheleire scripts/i18n: add command 'normalize-po-files'...	r8183	''',
		re.MULTILINE\|re.VERBOSE)

		header_normalize_re = re.compile(r'''
		^ "
		(POT-Creation-Date\|PO-Revision-Date\|Last-Translator\|Language-Team\|X-Generator\|Generated-By\|Project-Id-Version):
		[ ][^\\]*\\n
		" \n
		''',
		re.MULTILINE\|re.IGNORECASE\|re.VERBOSE)

		def _normalize_po(raw_content):
		r"""
		>>> print(_normalize_po(r'''
		... # header comment
		...
		...
		... # comment before header
		... msgid ""
		... msgstr "yada"
		... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
		... "MIME-Version: "
		... "1.0\n"
		... "Last-Translator: Jabba"
		... "the Hutt\n"
		... "X-Generator: Weblate 1.2.3\n"
		...
		... # comment, but not in header
		... msgid "None"
		... msgstr "Ingen"
		...
		...
		... line 2
		... # third comment
		...
		... msgid "Special"
		... msgstr ""
		...
		... msgid "Specialist"
		... # odd comment
		... msgstr ""
		... "Expert"
		...
		... # crazy fuzzy auto translation by msgmerge, using foo for bar
		... #, fuzzy
		... #\| msgid "some foo string"
		... msgid "some bar string."
		... msgstr "translation of foo string"
		...
		... msgid "%d minute"
		... msgid_plural "%d minutes"
		... msgstr[0] "minut"
		... msgstr[1] "minutter"
		... msgstr[2] ""
		...
		... msgid "%d year"
		... msgid_plural "%d years"
		... msgstr[0] ""
		... msgstr[1] ""
		...
		... # last comment
		... ''') + '^^^')
		# header comment
		<BLANKLINE>
		<BLANKLINE>
		# comment before header
		<BLANKLINE>
		msgid ""
		msgstr "yada"
		"MIME-Version: "
		"1.0\n"
		<BLANKLINE>
		msgid "None"
		msgstr "Ingen"
		<BLANKLINE>
		line 2
		<BLANKLINE>
		msgid "Specialist"
		msgstr ""
		"Expert"
		<BLANKLINE>
		msgid "%d minute"
		msgid_plural "%d minutes"
		msgstr[0] "minut"
		msgstr[1] "minutter"
		msgstr[2] ""
		^^^
		"""
		header_start = raw_content.find('\nmsgid ""\n') + 1
		header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
		chunks = [
		header_comment_strip_re.sub('', raw_content[0:header_start])
		.strip(),
		'',
		header_normalize_re.sub('', raw_content[header_start:header_end])
Mads Kiilerich scripts/i18n: also normalize casing of UTF-8 in Content-Type...	r8187	.replace(
		r'"Content-Type: text/plain; charset=utf-8\n"',
		r'"Content-Type: text/plain; charset=UTF-8\n"') # maintain msgmerge casing
Thomas De Schampheleire scripts/i18n: add command 'normalize-po-files'...	r8183	.strip(),
		''] # preserve normalized header
		# all chunks are separated by empty line
		for raw_chunk in raw_content[header_end:].split('\n\n'):
		if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
		continue # drop crazy auto translation that is worse than useless
		# strip all comment lines from chunk
		chunk_lines = [
		line
		for line in raw_chunk.splitlines()
		if line
		and not line.startswith('#')
		]
		if not chunk_lines:
		continue
		# check lines starting from first msgstr, skip chunk if no translation lines
		msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
		if (
		chunk_lines[0].startswith('msgid') and
		msgstr_i and
		all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
		): # skip translation chunks that doesn't have any actual translations
		continue
		chunks.append('\n'.join(chunk_lines) + '\n')
		return '\n'.join(chunks)

Mads Kiilerich scripts/i18n: introduce --merge-pot-file to control normalization...	r8186	def _normalize_po_file(po_file, merge_pot_file=None, strip=False):
		if merge_pot_file:
		runcmd(['msgmerge', '--width=76', '--backup=none', '--previous',
		'--update', po_file, '-q', merge_pot_file])
Thomas De Schampheleire scripts/i18n: add command 'normalize-po-files'...	r8183	if strip:
		po_tmp = po_file + '.tmp'
		with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
		raw_content = src.read()
		normalized_content = _normalize_po(raw_content)
		dest.write(normalized_content)
		os.rename(po_tmp, po_file)
Thomas De Schampheleire scripts/i18n: add command 'normalized-diff'...	r8184
Mads Kiilerich scripts/i18n: introduce --merge-pot-file to control normalization...	r8186	def _normalized_diff(file1, file2, merge_pot_file=None, strip=False):
Thomas De Schampheleire scripts/i18n: add command 'normalized-diff'...	r8184	# Create temporary copies of both files
		temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
		temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
		debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
		shutil.copyfile(file1, temp1.name)
		shutil.copyfile(file2, temp2.name)
		# Normalize them in place
Mads Kiilerich scripts/i18n: introduce --merge-pot-file to control normalization...	r8186	_normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip)
		_normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip)
Thomas De Schampheleire scripts/i18n: add command 'normalized-diff'...	r8184	# Now compare
		try:
		runcmd(['diff', '-u', temp1.name, temp2.name])
		except subprocess.CalledProcessError as e:
		return e.returncode