upstream/kallithea Commit - r8183:ae9d205f

scripts/i18n: add command 'normalize-po-files'...

Thomas De Schampheleire -

r8183:ae9d205f default

parent child

scripts/i18n

0 +19 0

              """
              Tool for maintenance of .po and .pot files
+             Normally, the i18n-related files contain for each translatable string a
+             reference to all the source code locations where this string is found. This
+             meta data is useful for translators to assess how strings are used, but is not
+             relevant for normal development nor for running Kallithea. Such meta data, or
+             derived data like kallithea.pot, will inherently be outdated, and create
+             unnecessary churn and repository growth, making it harder to spot actual and
+             important changes.
              """
              @click.group()
                      i18n_utils.do_debug = True
                  pass
+             @cli.command()
+             @click.argument('po_files', nargs=-1)
+             def normalize_po_files(po_files):
+                 """Normalize the specified .po and .pot files.
+                 Only actual translations and essential headers will be preserved.
+                 """
+                 for po_file in po_files:
+                     i18n_utils._normalize_po_file(po_file, strip=True)
              if __name__ == '__main__':
                  cli()

scripts/i18n_utils.py

0 +140 0

		@@ -13,6 +13,8 b''
13	13
14	14	from __future__ import print_function
15	15
	16	import os
	17	import re
16	18	import subprocess
17	19
18	20
		@@ -25,3 +27,141 b' def debug(args, *kwargs):'
25	27	def runcmd(cmd, args, *kwargs):
26	28	debug('... Executing command: %s' % ' '.join(cmd))
27	29	subprocess.check_call(cmd, args, *kwargs)
	30
	31	header_comment_strip_re = re.compile(r'''
	32	^
	33	[#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
	34	\|
	35	^
	36	[#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
	37	[#] \n
	38	[#],[ ]fuzzy \n
	39	''',
	40	re.MULTILINE\|re.VERBOSE)
	41
	42	header_normalize_re = re.compile(r'''
	43	^ "
	44	(POT-Creation-Date\|PO-Revision-Date\|Last-Translator\|Language-Team\|X-Generator\|Generated-By\|Project-Id-Version):
	45	[ ][^\\]*\\n
	46	" \n
	47	''',
	48	re.MULTILINE\|re.IGNORECASE\|re.VERBOSE)
	49
	50	def _normalize_po(raw_content):
	51	r"""
	52	>>> print(_normalize_po(r'''
	53	... # header comment
	54	...
	55	...
	56	... # comment before header
	57	... msgid ""
	58	... msgstr "yada"
	59	... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
	60	... "MIME-Version: "
	61	... "1.0\n"
	62	... "Last-Translator: Jabba"
	63	... "the Hutt\n"
	64	... "X-Generator: Weblate 1.2.3\n"
	65	...
	66	... # comment, but not in header
	67	... msgid "None"
	68	... msgstr "Ingen"
	69	...
	70	...
	71	... line 2
	72	... # third comment
	73	...
	74	... msgid "Special"
	75	... msgstr ""
	76	...
	77	... msgid "Specialist"
	78	... # odd comment
	79	... msgstr ""
	80	... "Expert"
	81	...
	82	... # crazy fuzzy auto translation by msgmerge, using foo for bar
	83	... #, fuzzy
	84	... #\| msgid "some foo string"
	85	... msgid "some bar string."
	86	... msgstr "translation of foo string"
	87	...
	88	... msgid "%d minute"
	89	... msgid_plural "%d minutes"
	90	... msgstr[0] "minut"
	91	... msgstr[1] "minutter"
	92	... msgstr[2] ""
	93	...
	94	... msgid "%d year"
	95	... msgid_plural "%d years"
	96	... msgstr[0] ""
	97	... msgstr[1] ""
	98	...
	99	... # last comment
	100	... ''') + '^^^')
	101	# header comment
	102	<BLANKLINE>
	103	<BLANKLINE>
	104	# comment before header
	105	<BLANKLINE>
	106	msgid ""
	107	msgstr "yada"
	108	"MIME-Version: "
	109	"1.0\n"
	110	<BLANKLINE>
	111	msgid "None"
	112	msgstr "Ingen"
	113	<BLANKLINE>
	114	line 2
	115	<BLANKLINE>
	116	msgid "Specialist"
	117	msgstr ""
	118	"Expert"
	119	<BLANKLINE>
	120	msgid "%d minute"
	121	msgid_plural "%d minutes"
	122	msgstr[0] "minut"
	123	msgstr[1] "minutter"
	124	msgstr[2] ""
	125	^^^
	126	"""
	127	header_start = raw_content.find('\nmsgid ""\n') + 1
	128	header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
	129	chunks = [
	130	header_comment_strip_re.sub('', raw_content[0:header_start])
	131	.strip(),
	132	'',
	133	header_normalize_re.sub('', raw_content[header_start:header_end])
	134	.strip(),
	135	''] # preserve normalized header
	136	# all chunks are separated by empty line
	137	for raw_chunk in raw_content[header_end:].split('\n\n'):
	138	if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
	139	continue # drop crazy auto translation that is worse than useless
	140	# strip all comment lines from chunk
	141	chunk_lines = [
	142	line
	143	for line in raw_chunk.splitlines()
	144	if line
	145	and not line.startswith('#')
	146	]
	147	if not chunk_lines:
	148	continue
	149	# check lines starting from first msgstr, skip chunk if no translation lines
	150	msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
	151	if (
	152	chunk_lines[0].startswith('msgid') and
	153	msgstr_i and
	154	all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
	155	): # skip translation chunks that doesn't have any actual translations
	156	continue
	157	chunks.append('\n'.join(chunk_lines) + '\n')
	158	return '\n'.join(chunks)
	159
	160	def _normalize_po_file(po_file, strip=False):
	161	if strip:
	162	po_tmp = po_file + '.tmp'
	163	with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
	164	raw_content = src.read()
	165	normalized_content = _normalize_po(raw_content)
	166	dest.write(normalized_content)
	167	os.rename(po_tmp, po_file)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages