##// END OF EJS Templates
scripts/i18n: also normalize casing of UTF-8 in Content-Type...
Mads Kiilerich -
r8187:30e137b4 default
parent child Browse files
Show More
@@ -1,188 +1,191 b''
1 # This program is free software: you can redistribute it and/or modify
1 # This program is free software: you can redistribute it and/or modify
2 # it under the terms of the GNU General Public License as published by
2 # it under the terms of the GNU General Public License as published by
3 # the Free Software Foundation, either version 3 of the License, or
3 # the Free Software Foundation, either version 3 of the License, or
4 # (at your option) any later version.
4 # (at your option) any later version.
5 #
5 #
6 # This program is distributed in the hope that it will be useful,
6 # This program is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 # GNU General Public License for more details.
9 # GNU General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU General Public License
11 # You should have received a copy of the GNU General Public License
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
13
14 from __future__ import print_function
14 from __future__ import print_function
15
15
16 import os
16 import os
17 import re
17 import re
18 import shutil
18 import shutil
19 import subprocess
19 import subprocess
20 import tempfile
20 import tempfile
21
21
22
22
23 do_debug = False # set from scripts/i18n --debug
23 do_debug = False # set from scripts/i18n --debug
24
24
25 def debug(*args, **kwargs):
25 def debug(*args, **kwargs):
26 if do_debug:
26 if do_debug:
27 print(*args, **kwargs)
27 print(*args, **kwargs)
28
28
29 def runcmd(cmd, *args, **kwargs):
29 def runcmd(cmd, *args, **kwargs):
30 debug('... Executing command: %s' % ' '.join(cmd))
30 debug('... Executing command: %s' % ' '.join(cmd))
31 subprocess.check_call(cmd, *args, **kwargs)
31 subprocess.check_call(cmd, *args, **kwargs)
32
32
33 header_comment_strip_re = re.compile(r'''
33 header_comment_strip_re = re.compile(r'''
34 ^
34 ^
35 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
35 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
36 |
36 |
37 ^
37 ^
38 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
38 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
39 [#] \n
39 [#] \n
40 [#],[ ]fuzzy \n
40 [#],[ ]fuzzy \n
41 ''',
41 ''',
42 re.MULTILINE|re.VERBOSE)
42 re.MULTILINE|re.VERBOSE)
43
43
44 header_normalize_re = re.compile(r'''
44 header_normalize_re = re.compile(r'''
45 ^ "
45 ^ "
46 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
46 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
47 [ ][^\\]*\\n
47 [ ][^\\]*\\n
48 " \n
48 " \n
49 ''',
49 ''',
50 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
50 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
51
51
52 def _normalize_po(raw_content):
52 def _normalize_po(raw_content):
53 r"""
53 r"""
54 >>> print(_normalize_po(r'''
54 >>> print(_normalize_po(r'''
55 ... # header comment
55 ... # header comment
56 ...
56 ...
57 ...
57 ...
58 ... # comment before header
58 ... # comment before header
59 ... msgid ""
59 ... msgid ""
60 ... msgstr "yada"
60 ... msgstr "yada"
61 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
61 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
62 ... "MIME-Version: "
62 ... "MIME-Version: "
63 ... "1.0\n"
63 ... "1.0\n"
64 ... "Last-Translator: Jabba"
64 ... "Last-Translator: Jabba"
65 ... "the Hutt\n"
65 ... "the Hutt\n"
66 ... "X-Generator: Weblate 1.2.3\n"
66 ... "X-Generator: Weblate 1.2.3\n"
67 ...
67 ...
68 ... # comment, but not in header
68 ... # comment, but not in header
69 ... msgid "None"
69 ... msgid "None"
70 ... msgstr "Ingen"
70 ... msgstr "Ingen"
71 ...
71 ...
72 ...
72 ...
73 ... line 2
73 ... line 2
74 ... # third comment
74 ... # third comment
75 ...
75 ...
76 ... msgid "Special"
76 ... msgid "Special"
77 ... msgstr ""
77 ... msgstr ""
78 ...
78 ...
79 ... msgid "Specialist"
79 ... msgid "Specialist"
80 ... # odd comment
80 ... # odd comment
81 ... msgstr ""
81 ... msgstr ""
82 ... "Expert"
82 ... "Expert"
83 ...
83 ...
84 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
84 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
85 ... #, fuzzy
85 ... #, fuzzy
86 ... #| msgid "some foo string"
86 ... #| msgid "some foo string"
87 ... msgid "some bar string."
87 ... msgid "some bar string."
88 ... msgstr "translation of foo string"
88 ... msgstr "translation of foo string"
89 ...
89 ...
90 ... msgid "%d minute"
90 ... msgid "%d minute"
91 ... msgid_plural "%d minutes"
91 ... msgid_plural "%d minutes"
92 ... msgstr[0] "minut"
92 ... msgstr[0] "minut"
93 ... msgstr[1] "minutter"
93 ... msgstr[1] "minutter"
94 ... msgstr[2] ""
94 ... msgstr[2] ""
95 ...
95 ...
96 ... msgid "%d year"
96 ... msgid "%d year"
97 ... msgid_plural "%d years"
97 ... msgid_plural "%d years"
98 ... msgstr[0] ""
98 ... msgstr[0] ""
99 ... msgstr[1] ""
99 ... msgstr[1] ""
100 ...
100 ...
101 ... # last comment
101 ... # last comment
102 ... ''') + '^^^')
102 ... ''') + '^^^')
103 # header comment
103 # header comment
104 <BLANKLINE>
104 <BLANKLINE>
105 <BLANKLINE>
105 <BLANKLINE>
106 # comment before header
106 # comment before header
107 <BLANKLINE>
107 <BLANKLINE>
108 msgid ""
108 msgid ""
109 msgstr "yada"
109 msgstr "yada"
110 "MIME-Version: "
110 "MIME-Version: "
111 "1.0\n"
111 "1.0\n"
112 <BLANKLINE>
112 <BLANKLINE>
113 msgid "None"
113 msgid "None"
114 msgstr "Ingen"
114 msgstr "Ingen"
115 <BLANKLINE>
115 <BLANKLINE>
116 line 2
116 line 2
117 <BLANKLINE>
117 <BLANKLINE>
118 msgid "Specialist"
118 msgid "Specialist"
119 msgstr ""
119 msgstr ""
120 "Expert"
120 "Expert"
121 <BLANKLINE>
121 <BLANKLINE>
122 msgid "%d minute"
122 msgid "%d minute"
123 msgid_plural "%d minutes"
123 msgid_plural "%d minutes"
124 msgstr[0] "minut"
124 msgstr[0] "minut"
125 msgstr[1] "minutter"
125 msgstr[1] "minutter"
126 msgstr[2] ""
126 msgstr[2] ""
127 ^^^
127 ^^^
128 """
128 """
129 header_start = raw_content.find('\nmsgid ""\n') + 1
129 header_start = raw_content.find('\nmsgid ""\n') + 1
130 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
130 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
131 chunks = [
131 chunks = [
132 header_comment_strip_re.sub('', raw_content[0:header_start])
132 header_comment_strip_re.sub('', raw_content[0:header_start])
133 .strip(),
133 .strip(),
134 '',
134 '',
135 header_normalize_re.sub('', raw_content[header_start:header_end])
135 header_normalize_re.sub('', raw_content[header_start:header_end])
136 .replace(
137 r'"Content-Type: text/plain; charset=utf-8\n"',
138 r'"Content-Type: text/plain; charset=UTF-8\n"') # maintain msgmerge casing
136 .strip(),
139 .strip(),
137 ''] # preserve normalized header
140 ''] # preserve normalized header
138 # all chunks are separated by empty line
141 # all chunks are separated by empty line
139 for raw_chunk in raw_content[header_end:].split('\n\n'):
142 for raw_chunk in raw_content[header_end:].split('\n\n'):
140 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
143 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
141 continue # drop crazy auto translation that is worse than useless
144 continue # drop crazy auto translation that is worse than useless
142 # strip all comment lines from chunk
145 # strip all comment lines from chunk
143 chunk_lines = [
146 chunk_lines = [
144 line
147 line
145 for line in raw_chunk.splitlines()
148 for line in raw_chunk.splitlines()
146 if line
149 if line
147 and not line.startswith('#')
150 and not line.startswith('#')
148 ]
151 ]
149 if not chunk_lines:
152 if not chunk_lines:
150 continue
153 continue
151 # check lines starting from first msgstr, skip chunk if no translation lines
154 # check lines starting from first msgstr, skip chunk if no translation lines
152 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
155 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
153 if (
156 if (
154 chunk_lines[0].startswith('msgid') and
157 chunk_lines[0].startswith('msgid') and
155 msgstr_i and
158 msgstr_i and
156 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
159 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
157 ): # skip translation chunks that doesn't have any actual translations
160 ): # skip translation chunks that doesn't have any actual translations
158 continue
161 continue
159 chunks.append('\n'.join(chunk_lines) + '\n')
162 chunks.append('\n'.join(chunk_lines) + '\n')
160 return '\n'.join(chunks)
163 return '\n'.join(chunks)
161
164
162 def _normalize_po_file(po_file, merge_pot_file=None, strip=False):
165 def _normalize_po_file(po_file, merge_pot_file=None, strip=False):
163 if merge_pot_file:
166 if merge_pot_file:
164 runcmd(['msgmerge', '--width=76', '--backup=none', '--previous',
167 runcmd(['msgmerge', '--width=76', '--backup=none', '--previous',
165 '--update', po_file, '-q', merge_pot_file])
168 '--update', po_file, '-q', merge_pot_file])
166 if strip:
169 if strip:
167 po_tmp = po_file + '.tmp'
170 po_tmp = po_file + '.tmp'
168 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
171 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
169 raw_content = src.read()
172 raw_content = src.read()
170 normalized_content = _normalize_po(raw_content)
173 normalized_content = _normalize_po(raw_content)
171 dest.write(normalized_content)
174 dest.write(normalized_content)
172 os.rename(po_tmp, po_file)
175 os.rename(po_tmp, po_file)
173
176
174 def _normalized_diff(file1, file2, merge_pot_file=None, strip=False):
177 def _normalized_diff(file1, file2, merge_pot_file=None, strip=False):
175 # Create temporary copies of both files
178 # Create temporary copies of both files
176 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
179 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
177 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
180 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
178 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
181 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
179 shutil.copyfile(file1, temp1.name)
182 shutil.copyfile(file1, temp1.name)
180 shutil.copyfile(file2, temp2.name)
183 shutil.copyfile(file2, temp2.name)
181 # Normalize them in place
184 # Normalize them in place
182 _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip)
185 _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip)
183 _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip)
186 _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip)
184 # Now compare
187 # Now compare
185 try:
188 try:
186 runcmd(['diff', '-u', temp1.name, temp2.name])
189 runcmd(['diff', '-u', temp1.name, temp2.name])
187 except subprocess.CalledProcessError as e:
190 except subprocess.CalledProcessError as e:
188 return e.returncode
191 return e.returncode
General Comments 0
You need to be logged in to leave comments. Login now