##// END OF EJS Templates
scripts/i18n: add command 'normalize-po-files'...
Thomas De Schampheleire -
r8183:ae9d205f default
parent child Browse files
Show More
@@ -1,34 +1,53 b''
1 #!/usr/bin/env python3
1 #!/usr/bin/env python3
2
2
3 # -*- coding: utf-8 -*-
3 # -*- coding: utf-8 -*-
4 # This program is free software: you can redistribute it and/or modify
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
7 # (at your option) any later version.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU General Public License
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
17 import click
17 import click
18
18
19 import i18n_utils
19 import i18n_utils
20
20
21
21
22 """
22 """
23 Tool for maintenance of .po and .pot files
23 Tool for maintenance of .po and .pot files
24
25 Normally, the i18n-related files contain for each translatable string a
26 reference to all the source code locations where this string is found. This
27 meta data is useful for translators to assess how strings are used, but is not
28 relevant for normal development nor for running Kallithea. Such meta data, or
29 derived data like kallithea.pot, will inherently be outdated, and create
30 unnecessary churn and repository growth, making it harder to spot actual and
31 important changes.
24 """
32 """
25
33
26 @click.group()
34 @click.group()
27 @click.option('--debug/--no-debug', default=False)
35 @click.option('--debug/--no-debug', default=False)
28 def cli(debug):
36 def cli(debug):
29 if (debug):
37 if (debug):
30 i18n_utils.do_debug = True
38 i18n_utils.do_debug = True
31 pass
39 pass
32
40
41 @cli.command()
42 @click.argument('po_files', nargs=-1)
43 def normalize_po_files(po_files):
44 """Normalize the specified .po and .pot files.
45
46 Only actual translations and essential headers will be preserved.
47 """
48 for po_file in po_files:
49 i18n_utils._normalize_po_file(po_file, strip=True)
50
51
33 if __name__ == '__main__':
52 if __name__ == '__main__':
34 cli()
53 cli()
@@ -1,27 +1,167 b''
1 # This program is free software: you can redistribute it and/or modify
1 # This program is free software: you can redistribute it and/or modify
2 # it under the terms of the GNU General Public License as published by
2 # it under the terms of the GNU General Public License as published by
3 # the Free Software Foundation, either version 3 of the License, or
3 # the Free Software Foundation, either version 3 of the License, or
4 # (at your option) any later version.
4 # (at your option) any later version.
5 #
5 #
6 # This program is distributed in the hope that it will be useful,
6 # This program is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 # GNU General Public License for more details.
9 # GNU General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU General Public License
11 # You should have received a copy of the GNU General Public License
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
13
14 from __future__ import print_function
14 from __future__ import print_function
15
15
16 import os
17 import re
16 import subprocess
18 import subprocess
17
19
18
20
19 do_debug = False # set from scripts/i18n --debug
21 do_debug = False # set from scripts/i18n --debug
20
22
21 def debug(*args, **kwargs):
23 def debug(*args, **kwargs):
22 if do_debug:
24 if do_debug:
23 print(*args, **kwargs)
25 print(*args, **kwargs)
24
26
25 def runcmd(cmd, *args, **kwargs):
27 def runcmd(cmd, *args, **kwargs):
26 debug('... Executing command: %s' % ' '.join(cmd))
28 debug('... Executing command: %s' % ' '.join(cmd))
27 subprocess.check_call(cmd, *args, **kwargs)
29 subprocess.check_call(cmd, *args, **kwargs)
30
31 header_comment_strip_re = re.compile(r'''
32 ^
33 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
34 |
35 ^
36 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
37 [#] \n
38 [#],[ ]fuzzy \n
39 ''',
40 re.MULTILINE|re.VERBOSE)
41
42 header_normalize_re = re.compile(r'''
43 ^ "
44 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
45 [ ][^\\]*\\n
46 " \n
47 ''',
48 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
49
50 def _normalize_po(raw_content):
51 r"""
52 >>> print(_normalize_po(r'''
53 ... # header comment
54 ...
55 ...
56 ... # comment before header
57 ... msgid ""
58 ... msgstr "yada"
59 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
60 ... "MIME-Version: "
61 ... "1.0\n"
62 ... "Last-Translator: Jabba"
63 ... "the Hutt\n"
64 ... "X-Generator: Weblate 1.2.3\n"
65 ...
66 ... # comment, but not in header
67 ... msgid "None"
68 ... msgstr "Ingen"
69 ...
70 ...
71 ... line 2
72 ... # third comment
73 ...
74 ... msgid "Special"
75 ... msgstr ""
76 ...
77 ... msgid "Specialist"
78 ... # odd comment
79 ... msgstr ""
80 ... "Expert"
81 ...
82 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
83 ... #, fuzzy
84 ... #| msgid "some foo string"
85 ... msgid "some bar string."
86 ... msgstr "translation of foo string"
87 ...
88 ... msgid "%d minute"
89 ... msgid_plural "%d minutes"
90 ... msgstr[0] "minut"
91 ... msgstr[1] "minutter"
92 ... msgstr[2] ""
93 ...
94 ... msgid "%d year"
95 ... msgid_plural "%d years"
96 ... msgstr[0] ""
97 ... msgstr[1] ""
98 ...
99 ... # last comment
100 ... ''') + '^^^')
101 # header comment
102 <BLANKLINE>
103 <BLANKLINE>
104 # comment before header
105 <BLANKLINE>
106 msgid ""
107 msgstr "yada"
108 "MIME-Version: "
109 "1.0\n"
110 <BLANKLINE>
111 msgid "None"
112 msgstr "Ingen"
113 <BLANKLINE>
114 line 2
115 <BLANKLINE>
116 msgid "Specialist"
117 msgstr ""
118 "Expert"
119 <BLANKLINE>
120 msgid "%d minute"
121 msgid_plural "%d minutes"
122 msgstr[0] "minut"
123 msgstr[1] "minutter"
124 msgstr[2] ""
125 ^^^
126 """
127 header_start = raw_content.find('\nmsgid ""\n') + 1
128 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
129 chunks = [
130 header_comment_strip_re.sub('', raw_content[0:header_start])
131 .strip(),
132 '',
133 header_normalize_re.sub('', raw_content[header_start:header_end])
134 .strip(),
135 ''] # preserve normalized header
136 # all chunks are separated by empty line
137 for raw_chunk in raw_content[header_end:].split('\n\n'):
138 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
139 continue # drop crazy auto translation that is worse than useless
140 # strip all comment lines from chunk
141 chunk_lines = [
142 line
143 for line in raw_chunk.splitlines()
144 if line
145 and not line.startswith('#')
146 ]
147 if not chunk_lines:
148 continue
149 # check lines starting from first msgstr, skip chunk if no translation lines
150 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
151 if (
152 chunk_lines[0].startswith('msgid') and
153 msgstr_i and
154 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
155 ): # skip translation chunks that doesn't have any actual translations
156 continue
157 chunks.append('\n'.join(chunk_lines) + '\n')
158 return '\n'.join(chunks)
159
160 def _normalize_po_file(po_file, strip=False):
161 if strip:
162 po_tmp = po_file + '.tmp'
163 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
164 raw_content = src.read()
165 normalized_content = _normalize_po(raw_content)
166 dest.write(normalized_content)
167 os.rename(po_tmp, po_file)
General Comments 0
You need to be logged in to leave comments. Login now