##// END OF EJS Templates
scripts/i18n: add command 'normalize-po-files'...
Thomas De Schampheleire -
r8183:ae9d205f default
parent child Browse files
Show More
@@ -21,6 +21,14 b' import i18n_utils'
21 21
22 22 """
23 23 Tool for maintenance of .po and .pot files
24
25 Normally, the i18n-related files contain for each translatable string a
26 reference to all the source code locations where this string is found. This
27 meta data is useful for translators to assess how strings are used, but is not
28 relevant for normal development nor for running Kallithea. Such meta data, or
29 derived data like kallithea.pot, will inherently be outdated, and create
30 unnecessary churn and repository growth, making it harder to spot actual and
31 important changes.
24 32 """
25 33
26 34 @click.group()
@@ -30,5 +38,16 b' def cli(debug):'
30 38 i18n_utils.do_debug = True
31 39 pass
32 40
41 @cli.command()
42 @click.argument('po_files', nargs=-1)
43 def normalize_po_files(po_files):
44 """Normalize the specified .po and .pot files.
45
46 Only actual translations and essential headers will be preserved.
47 """
48 for po_file in po_files:
49 i18n_utils._normalize_po_file(po_file, strip=True)
50
51
33 52 if __name__ == '__main__':
34 53 cli()
@@ -13,6 +13,8 b''
13 13
14 14 from __future__ import print_function
15 15
16 import os
17 import re
16 18 import subprocess
17 19
18 20
@@ -25,3 +27,141 b' def debug(*args, **kwargs):'
25 27 def runcmd(cmd, *args, **kwargs):
26 28 debug('... Executing command: %s' % ' '.join(cmd))
27 29 subprocess.check_call(cmd, *args, **kwargs)
30
31 header_comment_strip_re = re.compile(r'''
32 ^
33 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
34 |
35 ^
36 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
37 [#] \n
38 [#],[ ]fuzzy \n
39 ''',
40 re.MULTILINE|re.VERBOSE)
41
42 header_normalize_re = re.compile(r'''
43 ^ "
44 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
45 [ ][^\\]*\\n
46 " \n
47 ''',
48 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
49
50 def _normalize_po(raw_content):
51 r"""
52 >>> print(_normalize_po(r'''
53 ... # header comment
54 ...
55 ...
56 ... # comment before header
57 ... msgid ""
58 ... msgstr "yada"
59 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
60 ... "MIME-Version: "
61 ... "1.0\n"
62 ... "Last-Translator: Jabba"
63 ... "the Hutt\n"
64 ... "X-Generator: Weblate 1.2.3\n"
65 ...
66 ... # comment, but not in header
67 ... msgid "None"
68 ... msgstr "Ingen"
69 ...
70 ...
71 ... line 2
72 ... # third comment
73 ...
74 ... msgid "Special"
75 ... msgstr ""
76 ...
77 ... msgid "Specialist"
78 ... # odd comment
79 ... msgstr ""
80 ... "Expert"
81 ...
82 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
83 ... #, fuzzy
84 ... #| msgid "some foo string"
85 ... msgid "some bar string."
86 ... msgstr "translation of foo string"
87 ...
88 ... msgid "%d minute"
89 ... msgid_plural "%d minutes"
90 ... msgstr[0] "minut"
91 ... msgstr[1] "minutter"
92 ... msgstr[2] ""
93 ...
94 ... msgid "%d year"
95 ... msgid_plural "%d years"
96 ... msgstr[0] ""
97 ... msgstr[1] ""
98 ...
99 ... # last comment
100 ... ''') + '^^^')
101 # header comment
102 <BLANKLINE>
103 <BLANKLINE>
104 # comment before header
105 <BLANKLINE>
106 msgid ""
107 msgstr "yada"
108 "MIME-Version: "
109 "1.0\n"
110 <BLANKLINE>
111 msgid "None"
112 msgstr "Ingen"
113 <BLANKLINE>
114 line 2
115 <BLANKLINE>
116 msgid "Specialist"
117 msgstr ""
118 "Expert"
119 <BLANKLINE>
120 msgid "%d minute"
121 msgid_plural "%d minutes"
122 msgstr[0] "minut"
123 msgstr[1] "minutter"
124 msgstr[2] ""
125 ^^^
126 """
127 header_start = raw_content.find('\nmsgid ""\n') + 1
128 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
129 chunks = [
130 header_comment_strip_re.sub('', raw_content[0:header_start])
131 .strip(),
132 '',
133 header_normalize_re.sub('', raw_content[header_start:header_end])
134 .strip(),
135 ''] # preserve normalized header
136 # all chunks are separated by empty line
137 for raw_chunk in raw_content[header_end:].split('\n\n'):
138 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
139 continue # drop crazy auto translation that is worse than useless
140 # strip all comment lines from chunk
141 chunk_lines = [
142 line
143 for line in raw_chunk.splitlines()
144 if line
145 and not line.startswith('#')
146 ]
147 if not chunk_lines:
148 continue
149 # check lines starting from first msgstr, skip chunk if no translation lines
150 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
151 if (
152 chunk_lines[0].startswith('msgid') and
153 msgstr_i and
154 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
155 ): # skip translation chunks that doesn't have any actual translations
156 continue
157 chunks.append('\n'.join(chunk_lines) + '\n')
158 return '\n'.join(chunks)
159
160 def _normalize_po_file(po_file, strip=False):
161 if strip:
162 po_tmp = po_file + '.tmp'
163 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
164 raw_content = src.read()
165 normalized_content = _normalize_po(raw_content)
166 dest.write(normalized_content)
167 os.rename(po_tmp, po_file)
General Comments 0
You need to be logged in to leave comments. Login now