##// END OF EJS Templates
scripts/i18n: add command 'normalized-diff'...
Thomas De Schampheleire -
r8184:93dabafa default
parent child Browse files
Show More
@@ -1,53 +1,61 b''
1 #!/usr/bin/env python3
1 #!/usr/bin/env python3
2
2
3 # -*- coding: utf-8 -*-
3 # -*- coding: utf-8 -*-
4 # This program is free software: you can redistribute it and/or modify
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
7 # (at your option) any later version.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU General Public License
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
17 import sys
18
17 import click
19 import click
18
20
19 import i18n_utils
21 import i18n_utils
20
22
21
23
22 """
24 """
23 Tool for maintenance of .po and .pot files
25 Tool for maintenance of .po and .pot files
24
26
25 Normally, the i18n-related files contain for each translatable string a
27 Normally, the i18n-related files contain for each translatable string a
26 reference to all the source code locations where this string is found. This
28 reference to all the source code locations where this string is found. This
27 meta data is useful for translators to assess how strings are used, but is not
29 meta data is useful for translators to assess how strings are used, but is not
28 relevant for normal development nor for running Kallithea. Such meta data, or
30 relevant for normal development nor for running Kallithea. Such meta data, or
29 derived data like kallithea.pot, will inherently be outdated, and create
31 derived data like kallithea.pot, will inherently be outdated, and create
30 unnecessary churn and repository growth, making it harder to spot actual and
32 unnecessary churn and repository growth, making it harder to spot actual and
31 important changes.
33 important changes.
32 """
34 """
33
35
34 @click.group()
36 @click.group()
35 @click.option('--debug/--no-debug', default=False)
37 @click.option('--debug/--no-debug', default=False)
36 def cli(debug):
38 def cli(debug):
37 if (debug):
39 if (debug):
38 i18n_utils.do_debug = True
40 i18n_utils.do_debug = True
39 pass
41 pass
40
42
41 @cli.command()
43 @cli.command()
42 @click.argument('po_files', nargs=-1)
44 @click.argument('po_files', nargs=-1)
43 def normalize_po_files(po_files):
45 def normalize_po_files(po_files):
44 """Normalize the specified .po and .pot files.
46 """Normalize the specified .po and .pot files.
45
47
46 Only actual translations and essential headers will be preserved.
48 Only actual translations and essential headers will be preserved.
47 """
49 """
48 for po_file in po_files:
50 for po_file in po_files:
49 i18n_utils._normalize_po_file(po_file, strip=True)
51 i18n_utils._normalize_po_file(po_file, strip=True)
50
52
53 @cli.command()
54 @click.argument('file1')
55 @click.argument('file2')
56 def normalized_diff(file1, file2):
57 """Compare two files while transparently normalizing them."""
58 sys.exit(i18n_utils._normalized_diff(file1, file2, strip=True))
51
59
52 if __name__ == '__main__':
60 if __name__ == '__main__':
53 cli()
61 cli()
@@ -1,167 +1,185 b''
1 # This program is free software: you can redistribute it and/or modify
1 # This program is free software: you can redistribute it and/or modify
2 # it under the terms of the GNU General Public License as published by
2 # it under the terms of the GNU General Public License as published by
3 # the Free Software Foundation, either version 3 of the License, or
3 # the Free Software Foundation, either version 3 of the License, or
4 # (at your option) any later version.
4 # (at your option) any later version.
5 #
5 #
6 # This program is distributed in the hope that it will be useful,
6 # This program is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 # GNU General Public License for more details.
9 # GNU General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU General Public License
11 # You should have received a copy of the GNU General Public License
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
13
14 from __future__ import print_function
14 from __future__ import print_function
15
15
16 import os
16 import os
17 import re
17 import re
18 import shutil
18 import subprocess
19 import subprocess
20 import tempfile
19
21
20
22
21 do_debug = False # set from scripts/i18n --debug
23 do_debug = False # set from scripts/i18n --debug
22
24
23 def debug(*args, **kwargs):
25 def debug(*args, **kwargs):
24 if do_debug:
26 if do_debug:
25 print(*args, **kwargs)
27 print(*args, **kwargs)
26
28
27 def runcmd(cmd, *args, **kwargs):
29 def runcmd(cmd, *args, **kwargs):
28 debug('... Executing command: %s' % ' '.join(cmd))
30 debug('... Executing command: %s' % ' '.join(cmd))
29 subprocess.check_call(cmd, *args, **kwargs)
31 subprocess.check_call(cmd, *args, **kwargs)
30
32
31 header_comment_strip_re = re.compile(r'''
33 header_comment_strip_re = re.compile(r'''
32 ^
34 ^
33 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
35 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
34 |
36 |
35 ^
37 ^
36 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
38 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
37 [#] \n
39 [#] \n
38 [#],[ ]fuzzy \n
40 [#],[ ]fuzzy \n
39 ''',
41 ''',
40 re.MULTILINE|re.VERBOSE)
42 re.MULTILINE|re.VERBOSE)
41
43
42 header_normalize_re = re.compile(r'''
44 header_normalize_re = re.compile(r'''
43 ^ "
45 ^ "
44 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
46 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
45 [ ][^\\]*\\n
47 [ ][^\\]*\\n
46 " \n
48 " \n
47 ''',
49 ''',
48 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
50 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
49
51
50 def _normalize_po(raw_content):
52 def _normalize_po(raw_content):
51 r"""
53 r"""
52 >>> print(_normalize_po(r'''
54 >>> print(_normalize_po(r'''
53 ... # header comment
55 ... # header comment
54 ...
56 ...
55 ...
57 ...
56 ... # comment before header
58 ... # comment before header
57 ... msgid ""
59 ... msgid ""
58 ... msgstr "yada"
60 ... msgstr "yada"
59 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
61 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
60 ... "MIME-Version: "
62 ... "MIME-Version: "
61 ... "1.0\n"
63 ... "1.0\n"
62 ... "Last-Translator: Jabba"
64 ... "Last-Translator: Jabba"
63 ... "the Hutt\n"
65 ... "the Hutt\n"
64 ... "X-Generator: Weblate 1.2.3\n"
66 ... "X-Generator: Weblate 1.2.3\n"
65 ...
67 ...
66 ... # comment, but not in header
68 ... # comment, but not in header
67 ... msgid "None"
69 ... msgid "None"
68 ... msgstr "Ingen"
70 ... msgstr "Ingen"
69 ...
71 ...
70 ...
72 ...
71 ... line 2
73 ... line 2
72 ... # third comment
74 ... # third comment
73 ...
75 ...
74 ... msgid "Special"
76 ... msgid "Special"
75 ... msgstr ""
77 ... msgstr ""
76 ...
78 ...
77 ... msgid "Specialist"
79 ... msgid "Specialist"
78 ... # odd comment
80 ... # odd comment
79 ... msgstr ""
81 ... msgstr ""
80 ... "Expert"
82 ... "Expert"
81 ...
83 ...
82 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
84 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
83 ... #, fuzzy
85 ... #, fuzzy
84 ... #| msgid "some foo string"
86 ... #| msgid "some foo string"
85 ... msgid "some bar string."
87 ... msgid "some bar string."
86 ... msgstr "translation of foo string"
88 ... msgstr "translation of foo string"
87 ...
89 ...
88 ... msgid "%d minute"
90 ... msgid "%d minute"
89 ... msgid_plural "%d minutes"
91 ... msgid_plural "%d minutes"
90 ... msgstr[0] "minut"
92 ... msgstr[0] "minut"
91 ... msgstr[1] "minutter"
93 ... msgstr[1] "minutter"
92 ... msgstr[2] ""
94 ... msgstr[2] ""
93 ...
95 ...
94 ... msgid "%d year"
96 ... msgid "%d year"
95 ... msgid_plural "%d years"
97 ... msgid_plural "%d years"
96 ... msgstr[0] ""
98 ... msgstr[0] ""
97 ... msgstr[1] ""
99 ... msgstr[1] ""
98 ...
100 ...
99 ... # last comment
101 ... # last comment
100 ... ''') + '^^^')
102 ... ''') + '^^^')
101 # header comment
103 # header comment
102 <BLANKLINE>
104 <BLANKLINE>
103 <BLANKLINE>
105 <BLANKLINE>
104 # comment before header
106 # comment before header
105 <BLANKLINE>
107 <BLANKLINE>
106 msgid ""
108 msgid ""
107 msgstr "yada"
109 msgstr "yada"
108 "MIME-Version: "
110 "MIME-Version: "
109 "1.0\n"
111 "1.0\n"
110 <BLANKLINE>
112 <BLANKLINE>
111 msgid "None"
113 msgid "None"
112 msgstr "Ingen"
114 msgstr "Ingen"
113 <BLANKLINE>
115 <BLANKLINE>
114 line 2
116 line 2
115 <BLANKLINE>
117 <BLANKLINE>
116 msgid "Specialist"
118 msgid "Specialist"
117 msgstr ""
119 msgstr ""
118 "Expert"
120 "Expert"
119 <BLANKLINE>
121 <BLANKLINE>
120 msgid "%d minute"
122 msgid "%d minute"
121 msgid_plural "%d minutes"
123 msgid_plural "%d minutes"
122 msgstr[0] "minut"
124 msgstr[0] "minut"
123 msgstr[1] "minutter"
125 msgstr[1] "minutter"
124 msgstr[2] ""
126 msgstr[2] ""
125 ^^^
127 ^^^
126 """
128 """
127 header_start = raw_content.find('\nmsgid ""\n') + 1
129 header_start = raw_content.find('\nmsgid ""\n') + 1
128 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
130 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
129 chunks = [
131 chunks = [
130 header_comment_strip_re.sub('', raw_content[0:header_start])
132 header_comment_strip_re.sub('', raw_content[0:header_start])
131 .strip(),
133 .strip(),
132 '',
134 '',
133 header_normalize_re.sub('', raw_content[header_start:header_end])
135 header_normalize_re.sub('', raw_content[header_start:header_end])
134 .strip(),
136 .strip(),
135 ''] # preserve normalized header
137 ''] # preserve normalized header
136 # all chunks are separated by empty line
138 # all chunks are separated by empty line
137 for raw_chunk in raw_content[header_end:].split('\n\n'):
139 for raw_chunk in raw_content[header_end:].split('\n\n'):
138 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
140 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
139 continue # drop crazy auto translation that is worse than useless
141 continue # drop crazy auto translation that is worse than useless
140 # strip all comment lines from chunk
142 # strip all comment lines from chunk
141 chunk_lines = [
143 chunk_lines = [
142 line
144 line
143 for line in raw_chunk.splitlines()
145 for line in raw_chunk.splitlines()
144 if line
146 if line
145 and not line.startswith('#')
147 and not line.startswith('#')
146 ]
148 ]
147 if not chunk_lines:
149 if not chunk_lines:
148 continue
150 continue
149 # check lines starting from first msgstr, skip chunk if no translation lines
151 # check lines starting from first msgstr, skip chunk if no translation lines
150 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
152 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
151 if (
153 if (
152 chunk_lines[0].startswith('msgid') and
154 chunk_lines[0].startswith('msgid') and
153 msgstr_i and
155 msgstr_i and
154 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
156 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
155 ): # skip translation chunks that doesn't have any actual translations
157 ): # skip translation chunks that doesn't have any actual translations
156 continue
158 continue
157 chunks.append('\n'.join(chunk_lines) + '\n')
159 chunks.append('\n'.join(chunk_lines) + '\n')
158 return '\n'.join(chunks)
160 return '\n'.join(chunks)
159
161
160 def _normalize_po_file(po_file, strip=False):
162 def _normalize_po_file(po_file, strip=False):
161 if strip:
163 if strip:
162 po_tmp = po_file + '.tmp'
164 po_tmp = po_file + '.tmp'
163 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
165 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
164 raw_content = src.read()
166 raw_content = src.read()
165 normalized_content = _normalize_po(raw_content)
167 normalized_content = _normalize_po(raw_content)
166 dest.write(normalized_content)
168 dest.write(normalized_content)
167 os.rename(po_tmp, po_file)
169 os.rename(po_tmp, po_file)
170
171 def _normalized_diff(file1, file2, strip=False):
172 # Create temporary copies of both files
173 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
174 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
175 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
176 shutil.copyfile(file1, temp1.name)
177 shutil.copyfile(file2, temp2.name)
178 # Normalize them in place
179 _normalize_po_file(temp1.name, strip=strip)
180 _normalize_po_file(temp2.name, strip=strip)
181 # Now compare
182 try:
183 runcmd(['diff', '-u', temp1.name, temp2.name])
184 except subprocess.CalledProcessError as e:
185 return e.returncode
General Comments 0
You need to be logged in to leave comments. Login now