##// END OF EJS Templates
i18n: also strip '# #, fuzzy' from header comment - it might appear when verifying branches are in sync
Mads Kiilerich -
r8203:19735bc6 default
parent child Browse files
Show More
@@ -1,194 +1,197 b''
1 # This program is free software: you can redistribute it and/or modify
1 # This program is free software: you can redistribute it and/or modify
2 # it under the terms of the GNU General Public License as published by
2 # it under the terms of the GNU General Public License as published by
3 # the Free Software Foundation, either version 3 of the License, or
3 # the Free Software Foundation, either version 3 of the License, or
4 # (at your option) any later version.
4 # (at your option) any later version.
5 #
5 #
6 # This program is distributed in the hope that it will be useful,
6 # This program is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 # GNU General Public License for more details.
9 # GNU General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU General Public License
11 # You should have received a copy of the GNU General Public License
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
13
14 from __future__ import print_function
14 from __future__ import print_function
15
15
16 import os
16 import os
17 import re
17 import re
18 import shutil
18 import shutil
19 import subprocess
19 import subprocess
20 import tempfile
20 import tempfile
21
21
22
22
23 do_debug = False # set from scripts/i18n --debug
23 do_debug = False # set from scripts/i18n --debug
24
24
25 def debug(*args, **kwargs):
25 def debug(*args, **kwargs):
26 if do_debug:
26 if do_debug:
27 print(*args, **kwargs)
27 print(*args, **kwargs)
28
28
29 def runcmd(cmd, *args, **kwargs):
29 def runcmd(cmd, *args, **kwargs):
30 debug('... Executing command: %s' % ' '.join(cmd))
30 debug('... Executing command: %s' % ' '.join(cmd))
31 subprocess.check_call(cmd, *args, **kwargs)
31 subprocess.check_call(cmd, *args, **kwargs)
32
32
33 header_comment_strip_re = re.compile(r'''
33 header_comment_strip_re = re.compile(r'''
34 ^
34 ^
35 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
35 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
36 |
36 |
37 ^
37 ^
38 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
38 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
39 (?:[#] \n)?
39 (?:[#] \n)?
40 |
40 |
41 ^
41 ^
42 (?:[#] \n)?
42 (?:[#] \n)?
43 [#],[ ]fuzzy \n
43 [#],[ ]fuzzy \n
44 |
45 ^
46 [#][ ][#],[ ]fuzzy \n
44 ''',
47 ''',
45 re.MULTILINE|re.VERBOSE)
48 re.MULTILINE|re.VERBOSE)
46
49
47 header_normalize_re = re.compile(r'''
50 header_normalize_re = re.compile(r'''
48 ^ "
51 ^ "
49 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
52 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
50 [ ][^\\]*\\n
53 [ ][^\\]*\\n
51 " \n
54 " \n
52 ''',
55 ''',
53 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
56 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
54
57
55 def _normalize_po(raw_content):
58 def _normalize_po(raw_content):
56 r"""
59 r"""
57 >>> print(_normalize_po(r'''
60 >>> print(_normalize_po(r'''
58 ... # header comment
61 ... # header comment
59 ...
62 ...
60 ...
63 ...
61 ... # comment before header
64 ... # comment before header
62 ... msgid ""
65 ... msgid ""
63 ... msgstr "yada"
66 ... msgstr "yada"
64 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
67 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
65 ... "MIME-Version: "
68 ... "MIME-Version: "
66 ... "1.0\n"
69 ... "1.0\n"
67 ... "Last-Translator: Jabba"
70 ... "Last-Translator: Jabba"
68 ... "the Hutt\n"
71 ... "the Hutt\n"
69 ... "X-Generator: Weblate 1.2.3\n"
72 ... "X-Generator: Weblate 1.2.3\n"
70 ...
73 ...
71 ... # comment, but not in header
74 ... # comment, but not in header
72 ... msgid "None"
75 ... msgid "None"
73 ... msgstr "Ingen"
76 ... msgstr "Ingen"
74 ...
77 ...
75 ...
78 ...
76 ... line 2
79 ... line 2
77 ... # third comment
80 ... # third comment
78 ...
81 ...
79 ... msgid "Special"
82 ... msgid "Special"
80 ... msgstr ""
83 ... msgstr ""
81 ...
84 ...
82 ... msgid "Specialist"
85 ... msgid "Specialist"
83 ... # odd comment
86 ... # odd comment
84 ... msgstr ""
87 ... msgstr ""
85 ... "Expert"
88 ... "Expert"
86 ...
89 ...
87 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
90 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
88 ... #, fuzzy
91 ... #, fuzzy
89 ... #| msgid "some foo string"
92 ... #| msgid "some foo string"
90 ... msgid "some bar string."
93 ... msgid "some bar string."
91 ... msgstr "translation of foo string"
94 ... msgstr "translation of foo string"
92 ...
95 ...
93 ... msgid "%d minute"
96 ... msgid "%d minute"
94 ... msgid_plural "%d minutes"
97 ... msgid_plural "%d minutes"
95 ... msgstr[0] "minut"
98 ... msgstr[0] "minut"
96 ... msgstr[1] "minutter"
99 ... msgstr[1] "minutter"
97 ... msgstr[2] ""
100 ... msgstr[2] ""
98 ...
101 ...
99 ... msgid "%d year"
102 ... msgid "%d year"
100 ... msgid_plural "%d years"
103 ... msgid_plural "%d years"
101 ... msgstr[0] ""
104 ... msgstr[0] ""
102 ... msgstr[1] ""
105 ... msgstr[1] ""
103 ...
106 ...
104 ... # last comment
107 ... # last comment
105 ... ''') + '^^^')
108 ... ''') + '^^^')
106 # header comment
109 # header comment
107 <BLANKLINE>
110 <BLANKLINE>
108 <BLANKLINE>
111 <BLANKLINE>
109 # comment before header
112 # comment before header
110 <BLANKLINE>
113 <BLANKLINE>
111 msgid ""
114 msgid ""
112 msgstr "yada"
115 msgstr "yada"
113 "MIME-Version: "
116 "MIME-Version: "
114 "1.0\n"
117 "1.0\n"
115 <BLANKLINE>
118 <BLANKLINE>
116 msgid "None"
119 msgid "None"
117 msgstr "Ingen"
120 msgstr "Ingen"
118 <BLANKLINE>
121 <BLANKLINE>
119 line 2
122 line 2
120 <BLANKLINE>
123 <BLANKLINE>
121 msgid "Specialist"
124 msgid "Specialist"
122 msgstr ""
125 msgstr ""
123 "Expert"
126 "Expert"
124 <BLANKLINE>
127 <BLANKLINE>
125 msgid "%d minute"
128 msgid "%d minute"
126 msgid_plural "%d minutes"
129 msgid_plural "%d minutes"
127 msgstr[0] "minut"
130 msgstr[0] "minut"
128 msgstr[1] "minutter"
131 msgstr[1] "minutter"
129 msgstr[2] ""
132 msgstr[2] ""
130 ^^^
133 ^^^
131 """
134 """
132 header_start = raw_content.find('\nmsgid ""\n') + 1
135 header_start = raw_content.find('\nmsgid ""\n') + 1
133 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
136 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
134 chunks = [
137 chunks = [
135 header_comment_strip_re.sub('', raw_content[0:header_start])
138 header_comment_strip_re.sub('', raw_content[0:header_start])
136 .strip(),
139 .strip(),
137 '',
140 '',
138 header_normalize_re.sub('', raw_content[header_start:header_end])
141 header_normalize_re.sub('', raw_content[header_start:header_end])
139 .replace(
142 .replace(
140 r'"Content-Type: text/plain; charset=utf-8\n"',
143 r'"Content-Type: text/plain; charset=utf-8\n"',
141 r'"Content-Type: text/plain; charset=UTF-8\n"') # maintain msgmerge casing
144 r'"Content-Type: text/plain; charset=UTF-8\n"') # maintain msgmerge casing
142 .strip(),
145 .strip(),
143 ''] # preserve normalized header
146 ''] # preserve normalized header
144 # all chunks are separated by empty line
147 # all chunks are separated by empty line
145 for raw_chunk in raw_content[header_end:].split('\n\n'):
148 for raw_chunk in raw_content[header_end:].split('\n\n'):
146 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
149 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
147 continue # drop crazy auto translation that is worse than useless
150 continue # drop crazy auto translation that is worse than useless
148 # strip all comment lines from chunk
151 # strip all comment lines from chunk
149 chunk_lines = [
152 chunk_lines = [
150 line
153 line
151 for line in raw_chunk.splitlines()
154 for line in raw_chunk.splitlines()
152 if line
155 if line
153 and not line.startswith('#')
156 and not line.startswith('#')
154 ]
157 ]
155 if not chunk_lines:
158 if not chunk_lines:
156 continue
159 continue
157 # check lines starting from first msgstr, skip chunk if no translation lines
160 # check lines starting from first msgstr, skip chunk if no translation lines
158 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
161 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
159 if (
162 if (
160 chunk_lines[0].startswith('msgid') and
163 chunk_lines[0].startswith('msgid') and
161 msgstr_i and
164 msgstr_i and
162 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
165 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
163 ): # skip translation chunks that doesn't have any actual translations
166 ): # skip translation chunks that doesn't have any actual translations
164 continue
167 continue
165 chunks.append('\n'.join(chunk_lines) + '\n')
168 chunks.append('\n'.join(chunk_lines) + '\n')
166 return '\n'.join(chunks)
169 return '\n'.join(chunks)
167
170
168 def _normalize_po_file(po_file, merge_pot_file=None, strip=False):
171 def _normalize_po_file(po_file, merge_pot_file=None, strip=False):
169 if merge_pot_file:
172 if merge_pot_file:
170 runcmd(['msgmerge', '--width=76', '--backup=none', '--previous',
173 runcmd(['msgmerge', '--width=76', '--backup=none', '--previous',
171 '--update', po_file, '-q', merge_pot_file])
174 '--update', po_file, '-q', merge_pot_file])
172 if strip:
175 if strip:
173 po_tmp = po_file + '.tmp'
176 po_tmp = po_file + '.tmp'
174 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
177 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
175 raw_content = src.read()
178 raw_content = src.read()
176 normalized_content = _normalize_po(raw_content)
179 normalized_content = _normalize_po(raw_content)
177 dest.write(normalized_content)
180 dest.write(normalized_content)
178 os.rename(po_tmp, po_file)
181 os.rename(po_tmp, po_file)
179
182
180 def _normalized_diff(file1, file2, merge_pot_file=None, strip=False):
183 def _normalized_diff(file1, file2, merge_pot_file=None, strip=False):
181 # Create temporary copies of both files
184 # Create temporary copies of both files
182 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
185 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
183 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
186 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
184 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
187 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
185 shutil.copyfile(file1, temp1.name)
188 shutil.copyfile(file1, temp1.name)
186 shutil.copyfile(file2, temp2.name)
189 shutil.copyfile(file2, temp2.name)
187 # Normalize them in place
190 # Normalize them in place
188 _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip)
191 _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip)
189 _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip)
192 _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip)
190 # Now compare
193 # Now compare
191 try:
194 try:
192 runcmd(['diff', '-u', temp1.name, temp2.name])
195 runcmd(['diff', '-u', temp1.name, temp2.name])
193 except subprocess.CalledProcessError as e:
196 except subprocess.CalledProcessError as e:
194 return e.returncode
197 return e.returncode
General Comments 0
You need to be logged in to leave comments. Login now