##// END OF EJS Templates
scripts/i18n: add command 'normalized-diff'...
Thomas De Schampheleire -
r8184:93dabafa default
parent child Browse files
Show More
@@ -1,53 +1,61 b''
1 1 #!/usr/bin/env python3
2 2
3 3 # -*- coding: utf-8 -*-
4 4 # This program is free software: you can redistribute it and/or modify
5 5 # it under the terms of the GNU General Public License as published by
6 6 # the Free Software Foundation, either version 3 of the License, or
7 7 # (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16
17 import sys
18
17 19 import click
18 20
19 21 import i18n_utils
20 22
21 23
22 24 """
23 25 Tool for maintenance of .po and .pot files
24 26
25 27 Normally, the i18n-related files contain for each translatable string a
26 28 reference to all the source code locations where this string is found. This
27 29 meta data is useful for translators to assess how strings are used, but is not
28 30 relevant for normal development nor for running Kallithea. Such meta data, or
29 31 derived data like kallithea.pot, will inherently be outdated, and create
30 32 unnecessary churn and repository growth, making it harder to spot actual and
31 33 important changes.
32 34 """
33 35
34 36 @click.group()
35 37 @click.option('--debug/--no-debug', default=False)
36 38 def cli(debug):
37 39 if (debug):
38 40 i18n_utils.do_debug = True
39 41 pass
40 42
41 43 @cli.command()
42 44 @click.argument('po_files', nargs=-1)
43 45 def normalize_po_files(po_files):
44 46 """Normalize the specified .po and .pot files.
45 47
46 48 Only actual translations and essential headers will be preserved.
47 49 """
48 50 for po_file in po_files:
49 51 i18n_utils._normalize_po_file(po_file, strip=True)
50 52
53 @cli.command()
54 @click.argument('file1')
55 @click.argument('file2')
56 def normalized_diff(file1, file2):
57 """Compare two files while transparently normalizing them."""
58 sys.exit(i18n_utils._normalized_diff(file1, file2, strip=True))
51 59
52 60 if __name__ == '__main__':
53 61 cli()
@@ -1,167 +1,185 b''
1 1 # This program is free software: you can redistribute it and/or modify
2 2 # it under the terms of the GNU General Public License as published by
3 3 # the Free Software Foundation, either version 3 of the License, or
4 4 # (at your option) any later version.
5 5 #
6 6 # This program is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 9 # GNU General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU General Public License
12 12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
13 13
14 14 from __future__ import print_function
15 15
16 16 import os
17 17 import re
18 import shutil
18 19 import subprocess
20 import tempfile
19 21
20 22
21 23 do_debug = False # set from scripts/i18n --debug
22 24
23 25 def debug(*args, **kwargs):
24 26 if do_debug:
25 27 print(*args, **kwargs)
26 28
27 29 def runcmd(cmd, *args, **kwargs):
28 30 debug('... Executing command: %s' % ' '.join(cmd))
29 31 subprocess.check_call(cmd, *args, **kwargs)
30 32
31 33 header_comment_strip_re = re.compile(r'''
32 34 ^
33 35 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
34 36 |
35 37 ^
36 38 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
37 39 [#] \n
38 40 [#],[ ]fuzzy \n
39 41 ''',
40 42 re.MULTILINE|re.VERBOSE)
41 43
42 44 header_normalize_re = re.compile(r'''
43 45 ^ "
44 46 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
45 47 [ ][^\\]*\\n
46 48 " \n
47 49 ''',
48 50 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
49 51
50 52 def _normalize_po(raw_content):
51 53 r"""
52 54 >>> print(_normalize_po(r'''
53 55 ... # header comment
54 56 ...
55 57 ...
56 58 ... # comment before header
57 59 ... msgid ""
58 60 ... msgstr "yada"
59 61 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
60 62 ... "MIME-Version: "
61 63 ... "1.0\n"
62 64 ... "Last-Translator: Jabba"
63 65 ... "the Hutt\n"
64 66 ... "X-Generator: Weblate 1.2.3\n"
65 67 ...
66 68 ... # comment, but not in header
67 69 ... msgid "None"
68 70 ... msgstr "Ingen"
69 71 ...
70 72 ...
71 73 ... line 2
72 74 ... # third comment
73 75 ...
74 76 ... msgid "Special"
75 77 ... msgstr ""
76 78 ...
77 79 ... msgid "Specialist"
78 80 ... # odd comment
79 81 ... msgstr ""
80 82 ... "Expert"
81 83 ...
82 84 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
83 85 ... #, fuzzy
84 86 ... #| msgid "some foo string"
85 87 ... msgid "some bar string."
86 88 ... msgstr "translation of foo string"
87 89 ...
88 90 ... msgid "%d minute"
89 91 ... msgid_plural "%d minutes"
90 92 ... msgstr[0] "minut"
91 93 ... msgstr[1] "minutter"
92 94 ... msgstr[2] ""
93 95 ...
94 96 ... msgid "%d year"
95 97 ... msgid_plural "%d years"
96 98 ... msgstr[0] ""
97 99 ... msgstr[1] ""
98 100 ...
99 101 ... # last comment
100 102 ... ''') + '^^^')
101 103 # header comment
102 104 <BLANKLINE>
103 105 <BLANKLINE>
104 106 # comment before header
105 107 <BLANKLINE>
106 108 msgid ""
107 109 msgstr "yada"
108 110 "MIME-Version: "
109 111 "1.0\n"
110 112 <BLANKLINE>
111 113 msgid "None"
112 114 msgstr "Ingen"
113 115 <BLANKLINE>
114 116 line 2
115 117 <BLANKLINE>
116 118 msgid "Specialist"
117 119 msgstr ""
118 120 "Expert"
119 121 <BLANKLINE>
120 122 msgid "%d minute"
121 123 msgid_plural "%d minutes"
122 124 msgstr[0] "minut"
123 125 msgstr[1] "minutter"
124 126 msgstr[2] ""
125 127 ^^^
126 128 """
127 129 header_start = raw_content.find('\nmsgid ""\n') + 1
128 130 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
129 131 chunks = [
130 132 header_comment_strip_re.sub('', raw_content[0:header_start])
131 133 .strip(),
132 134 '',
133 135 header_normalize_re.sub('', raw_content[header_start:header_end])
134 136 .strip(),
135 137 ''] # preserve normalized header
136 138 # all chunks are separated by empty line
137 139 for raw_chunk in raw_content[header_end:].split('\n\n'):
138 140 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
139 141 continue # drop crazy auto translation that is worse than useless
140 142 # strip all comment lines from chunk
141 143 chunk_lines = [
142 144 line
143 145 for line in raw_chunk.splitlines()
144 146 if line
145 147 and not line.startswith('#')
146 148 ]
147 149 if not chunk_lines:
148 150 continue
149 151 # check lines starting from first msgstr, skip chunk if no translation lines
150 152 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
151 153 if (
152 154 chunk_lines[0].startswith('msgid') and
153 155 msgstr_i and
154 156 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
155 157 ): # skip translation chunks that doesn't have any actual translations
156 158 continue
157 159 chunks.append('\n'.join(chunk_lines) + '\n')
158 160 return '\n'.join(chunks)
159 161
160 162 def _normalize_po_file(po_file, strip=False):
161 163 if strip:
162 164 po_tmp = po_file + '.tmp'
163 165 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
164 166 raw_content = src.read()
165 167 normalized_content = _normalize_po(raw_content)
166 168 dest.write(normalized_content)
167 169 os.rename(po_tmp, po_file)
170
171 def _normalized_diff(file1, file2, strip=False):
172 # Create temporary copies of both files
173 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
174 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
175 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
176 shutil.copyfile(file1, temp1.name)
177 shutil.copyfile(file2, temp2.name)
178 # Normalize them in place
179 _normalize_po_file(temp1.name, strip=strip)
180 _normalize_po_file(temp2.name, strip=strip)
181 # Now compare
182 try:
183 runcmd(['diff', '-u', temp1.name, temp2.name])
184 except subprocess.CalledProcessError as e:
185 return e.returncode
General Comments 0
You need to be logged in to leave comments. Login now