upstream/kallithea Commit - r8183:ae9d205f

scripts/i18n: add command 'normalize-po-files'...

Thomas De Schampheleire -

r8183:ae9d205f default

parent child

scripts/i18n

0 +19 0

@@ -21,6 +21,14 b' import i18n_utils'
21		21
22	"""	22	"""
23	Tool for maintenance of .po and .pot files	23	Tool for maintenance of .po and .pot files
		24
		25	Normally, the i18n-related files contain for each translatable string a
		26	reference to all the source code locations where this string is found. This
		27	meta data is useful for translators to assess how strings are used, but is not
		28	relevant for normal development nor for running Kallithea. Such meta data, or
		29	derived data like kallithea.pot, will inherently be outdated, and create
		30	unnecessary churn and repository growth, making it harder to spot actual and
		31	important changes.
24	"""	32	"""
25		33
26	@click.group()	34	@click.group()
@@ -30,5 +38,16 b' def cli(debug):'
30	i18n_utils.do_debug = True	38	i18n_utils.do_debug = True
31	pass	39	pass
32		40
		41	@cli.command()
		42	@click.argument('po_files', nargs=-1)
		43	def normalize_po_files(po_files):
		44	"""Normalize the specified .po and .pot files.
		45
		46	Only actual translations and essential headers will be preserved.
		47	"""
		48	for po_file in po_files:
		49	i18n_utils._normalize_po_file(po_file, strip=True)
		50
		51
33	if __name__ == '__main__':	52	if __name__ == '__main__':
34	cli()	53	cli()

scripts/i18n_utils.py

0 +140 0

@@ -13,6 +13,8 b''
13		13
14	from __future__ import print_function	14	from __future__ import print_function
15		15
		16	import os
		17	import re
16	import subprocess	18	import subprocess
17		19
18		20
@@ -25,3 +27,141 b' def debug(args, *kwargs):'
25	def runcmd(cmd, args, *kwargs):	27	def runcmd(cmd, args, *kwargs):
26	debug('... Executing command: %s' % ' '.join(cmd))	28	debug('... Executing command: %s' % ' '.join(cmd))
27	subprocess.check_call(cmd, args, *kwargs)	29	subprocess.check_call(cmd, args, *kwargs)
		30
		31	header_comment_strip_re = re.compile(r'''
		32	^
		33	[#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
		34	\|
		35	^
		36	[#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
		37	[#] \n
		38	[#],[ ]fuzzy \n
		39	''',
		40	re.MULTILINE\|re.VERBOSE)
		41
		42	header_normalize_re = re.compile(r'''
		43	^ "
		44	(POT-Creation-Date\|PO-Revision-Date\|Last-Translator\|Language-Team\|X-Generator\|Generated-By\|Project-Id-Version):
		45	[ ][^\\]*\\n
		46	" \n
		47	''',
		48	re.MULTILINE\|re.IGNORECASE\|re.VERBOSE)
		49
		50	def _normalize_po(raw_content):
		51	r"""
		52	>>> print(_normalize_po(r'''
		53	... # header comment
		54	...
		55	...
		56	... # comment before header
		57	... msgid ""
		58	... msgstr "yada"
		59	... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
		60	... "MIME-Version: "
		61	... "1.0\n"
		62	... "Last-Translator: Jabba"
		63	... "the Hutt\n"
		64	... "X-Generator: Weblate 1.2.3\n"
		65	...
		66	... # comment, but not in header
		67	... msgid "None"
		68	... msgstr "Ingen"
		69	...
		70	...
		71	... line 2
		72	... # third comment
		73	...
		74	... msgid "Special"
		75	... msgstr ""
		76	...
		77	... msgid "Specialist"
		78	... # odd comment
		79	... msgstr ""
		80	... "Expert"
		81	...
		82	... # crazy fuzzy auto translation by msgmerge, using foo for bar
		83	... #, fuzzy
		84	... #\| msgid "some foo string"
		85	... msgid "some bar string."
		86	... msgstr "translation of foo string"
		87	...
		88	... msgid "%d minute"
		89	... msgid_plural "%d minutes"
		90	... msgstr[0] "minut"
		91	... msgstr[1] "minutter"
		92	... msgstr[2] ""
		93	...
		94	... msgid "%d year"
		95	... msgid_plural "%d years"
		96	... msgstr[0] ""
		97	... msgstr[1] ""
		98	...
		99	... # last comment
		100	... ''') + '^^^')
		101	# header comment
		102	<BLANKLINE>
		103	<BLANKLINE>
		104	# comment before header
		105	<BLANKLINE>
		106	msgid ""
		107	msgstr "yada"
		108	"MIME-Version: "
		109	"1.0\n"
		110	<BLANKLINE>
		111	msgid "None"
		112	msgstr "Ingen"
		113	<BLANKLINE>
		114	line 2
		115	<BLANKLINE>
		116	msgid "Specialist"
		117	msgstr ""
		118	"Expert"
		119	<BLANKLINE>
		120	msgid "%d minute"
		121	msgid_plural "%d minutes"
		122	msgstr[0] "minut"
		123	msgstr[1] "minutter"
		124	msgstr[2] ""
		125	^^^
		126	"""
		127	header_start = raw_content.find('\nmsgid ""\n') + 1
		128	header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
		129	chunks = [
		130	header_comment_strip_re.sub('', raw_content[0:header_start])
		131	.strip(),
		132	'',
		133	header_normalize_re.sub('', raw_content[header_start:header_end])
		134	.strip(),
		135	''] # preserve normalized header
		136	# all chunks are separated by empty line
		137	for raw_chunk in raw_content[header_end:].split('\n\n'):
		138	if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
		139	continue # drop crazy auto translation that is worse than useless
		140	# strip all comment lines from chunk
		141	chunk_lines = [
		142	line
		143	for line in raw_chunk.splitlines()
		144	if line
		145	and not line.startswith('#')
		146	]
		147	if not chunk_lines:
		148	continue
		149	# check lines starting from first msgstr, skip chunk if no translation lines
		150	msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
		151	if (
		152	chunk_lines[0].startswith('msgid') and
		153	msgstr_i and
		154	all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
		155	): # skip translation chunks that doesn't have any actual translations
		156	continue
		157	chunks.append('\n'.join(chunk_lines) + '\n')
		158	return '\n'.join(chunks)
		159
		160	def _normalize_po_file(po_file, strip=False):
		161	if strip:
		162	po_tmp = po_file + '.tmp'
		163	with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
		164	raw_content = src.read()
		165	normalized_content = _normalize_po(raw_content)
		166	dest.write(normalized_content)
		167	os.rename(po_tmp, po_file)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages