##// END OF EJS Templates
i18n: prevent msgmerge fuzzy matching - it is too random
Mads Kiilerich -
r8776:36a36ebd stable
parent child Browse files
Show More
@@ -1,195 +1,195 b''
1 # This program is free software: you can redistribute it and/or modify
1 # This program is free software: you can redistribute it and/or modify
2 # it under the terms of the GNU General Public License as published by
2 # it under the terms of the GNU General Public License as published by
3 # the Free Software Foundation, either version 3 of the License, or
3 # the Free Software Foundation, either version 3 of the License, or
4 # (at your option) any later version.
4 # (at your option) any later version.
5 #
5 #
6 # This program is distributed in the hope that it will be useful,
6 # This program is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 # GNU General Public License for more details.
9 # GNU General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU General Public License
11 # You should have received a copy of the GNU General Public License
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
12 # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
13
14 import os
14 import os
15 import re
15 import re
16 import shutil
16 import shutil
17 import subprocess
17 import subprocess
18 import tempfile
18 import tempfile
19
19
20
20
21 do_debug = False # set from scripts/i18n --debug
21 do_debug = False # set from scripts/i18n --debug
22
22
23 def debug(*args, **kwargs):
23 def debug(*args, **kwargs):
24 if do_debug:
24 if do_debug:
25 print(*args, **kwargs)
25 print(*args, **kwargs)
26
26
27 def runcmd(cmd, *args, **kwargs):
27 def runcmd(cmd, *args, **kwargs):
28 debug('... Executing command: %s' % ' '.join(cmd))
28 debug('... Executing command: %s' % ' '.join(cmd))
29 subprocess.check_call(cmd, *args, **kwargs)
29 subprocess.check_call(cmd, *args, **kwargs)
30
30
31 header_comment_strip_re = re.compile(r'''
31 header_comment_strip_re = re.compile(r'''
32 ^
32 ^
33 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
33 [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
34 |
34 |
35 ^
35 ^
36 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
36 [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
37 (?:[#] \n)?
37 (?:[#] \n)?
38 |
38 |
39 ^
39 ^
40 (?:[#] \n)?
40 (?:[#] \n)?
41 [#],[ ]fuzzy \n
41 [#],[ ]fuzzy \n
42 |
42 |
43 ^
43 ^
44 [#][ ][#],[ ]fuzzy \n
44 [#][ ][#],[ ]fuzzy \n
45 ''',
45 ''',
46 re.MULTILINE|re.VERBOSE)
46 re.MULTILINE|re.VERBOSE)
47
47
48 header_normalize_re = re.compile(r'''
48 header_normalize_re = re.compile(r'''
49 ^ "
49 ^ "
50 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
50 (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
51 [ ][^\\]*\\n
51 [ ][^\\]*\\n
52 " \n
52 " \n
53 ''',
53 ''',
54 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
54 re.MULTILINE|re.IGNORECASE|re.VERBOSE)
55
55
56 def _normalize_po(raw_content):
56 def _normalize_po(raw_content):
57 r"""
57 r"""
58 >>> print(_normalize_po(r'''
58 >>> print(_normalize_po(r'''
59 ... # header comment
59 ... # header comment
60 ...
60 ...
61 ...
61 ...
62 ... # comment before header
62 ... # comment before header
63 ... msgid ""
63 ... msgid ""
64 ... msgstr "yada"
64 ... msgstr "yada"
65 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
65 ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
66 ... "MIME-Version: "
66 ... "MIME-Version: "
67 ... "1.0\n"
67 ... "1.0\n"
68 ... "Last-Translator: Jabba"
68 ... "Last-Translator: Jabba"
69 ... "the Hutt\n"
69 ... "the Hutt\n"
70 ... "X-Generator: Weblate 1.2.3\n"
70 ... "X-Generator: Weblate 1.2.3\n"
71 ...
71 ...
72 ... # comment, but not in header
72 ... # comment, but not in header
73 ... msgid "None"
73 ... msgid "None"
74 ... msgstr "Ingen"
74 ... msgstr "Ingen"
75 ...
75 ...
76 ...
76 ...
77 ... line 2
77 ... line 2
78 ... # third comment
78 ... # third comment
79 ...
79 ...
80 ... msgid "Special"
80 ... msgid "Special"
81 ... msgstr ""
81 ... msgstr ""
82 ...
82 ...
83 ... msgid "Specialist"
83 ... msgid "Specialist"
84 ... # odd comment
84 ... # odd comment
85 ... msgstr ""
85 ... msgstr ""
86 ... "Expert"
86 ... "Expert"
87 ...
87 ...
88 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
88 ... # crazy fuzzy auto translation by msgmerge, using foo for bar
89 ... #, fuzzy
89 ... #, fuzzy
90 ... #| msgid "some foo string"
90 ... #| msgid "some foo string"
91 ... msgid "some bar string."
91 ... msgid "some bar string."
92 ... msgstr "translation of foo string"
92 ... msgstr "translation of foo string"
93 ...
93 ...
94 ... msgid "%d minute"
94 ... msgid "%d minute"
95 ... msgid_plural "%d minutes"
95 ... msgid_plural "%d minutes"
96 ... msgstr[0] "minut"
96 ... msgstr[0] "minut"
97 ... msgstr[1] "minutter"
97 ... msgstr[1] "minutter"
98 ... msgstr[2] ""
98 ... msgstr[2] ""
99 ...
99 ...
100 ... msgid "%d year"
100 ... msgid "%d year"
101 ... msgid_plural "%d years"
101 ... msgid_plural "%d years"
102 ... msgstr[0] ""
102 ... msgstr[0] ""
103 ... msgstr[1] ""
103 ... msgstr[1] ""
104 ...
104 ...
105 ... # last comment
105 ... # last comment
106 ... ''') + '^^^')
106 ... ''') + '^^^')
107 # header comment
107 # header comment
108 <BLANKLINE>
108 <BLANKLINE>
109 <BLANKLINE>
109 <BLANKLINE>
110 # comment before header
110 # comment before header
111 <BLANKLINE>
111 <BLANKLINE>
112 msgid ""
112 msgid ""
113 msgstr "yada"
113 msgstr "yada"
114 "MIME-Version: "
114 "MIME-Version: "
115 "1.0\n"
115 "1.0\n"
116 <BLANKLINE>
116 <BLANKLINE>
117 msgid "None"
117 msgid "None"
118 msgstr "Ingen"
118 msgstr "Ingen"
119 <BLANKLINE>
119 <BLANKLINE>
120 line 2
120 line 2
121 <BLANKLINE>
121 <BLANKLINE>
122 msgid "Specialist"
122 msgid "Specialist"
123 msgstr ""
123 msgstr ""
124 "Expert"
124 "Expert"
125 <BLANKLINE>
125 <BLANKLINE>
126 msgid "%d minute"
126 msgid "%d minute"
127 msgid_plural "%d minutes"
127 msgid_plural "%d minutes"
128 msgstr[0] "minut"
128 msgstr[0] "minut"
129 msgstr[1] "minutter"
129 msgstr[1] "minutter"
130 msgstr[2] ""
130 msgstr[2] ""
131 ^^^
131 ^^^
132 """
132 """
133 header_start = raw_content.find('\nmsgid ""\n') + 1
133 header_start = raw_content.find('\nmsgid ""\n') + 1
134 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
134 header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
135 chunks = [
135 chunks = [
136 header_comment_strip_re.sub('', raw_content[0:header_start])
136 header_comment_strip_re.sub('', raw_content[0:header_start])
137 .strip(),
137 .strip(),
138 '',
138 '',
139 header_normalize_re.sub('', raw_content[header_start:header_end])
139 header_normalize_re.sub('', raw_content[header_start:header_end])
140 .replace(
140 .replace(
141 r'"Content-Type: text/plain; charset=utf-8\n"',
141 r'"Content-Type: text/plain; charset=utf-8\n"',
142 r'"Content-Type: text/plain; charset=UTF-8\n"') # maintain msgmerge casing
142 r'"Content-Type: text/plain; charset=UTF-8\n"') # maintain msgmerge casing
143 .strip(),
143 .strip(),
144 ''] # preserve normalized header
144 ''] # preserve normalized header
145 # all chunks are separated by empty line
145 # all chunks are separated by empty line
146 for raw_chunk in raw_content[header_end:].split('\n\n'):
146 for raw_chunk in raw_content[header_end:].split('\n\n'):
147 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
147 if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format"
148 continue # drop crazy auto translation that is worse than useless
148 continue # drop crazy auto translation that is worse than useless
149 # strip all comment lines from chunk
149 # strip all comment lines from chunk
150 chunk_lines = [
150 chunk_lines = [
151 line
151 line
152 for line in raw_chunk.splitlines()
152 for line in raw_chunk.splitlines()
153 if line
153 if line
154 and not line.startswith('#')
154 and not line.startswith('#')
155 ]
155 ]
156 if not chunk_lines:
156 if not chunk_lines:
157 continue
157 continue
158 # check lines starting from first msgstr, skip chunk if no translation lines
158 # check lines starting from first msgstr, skip chunk if no translation lines
159 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
159 msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
160 if (
160 if (
161 chunk_lines[0].startswith('msgid') and
161 chunk_lines[0].startswith('msgid') and
162 msgstr_i and
162 msgstr_i and
163 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
163 all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
164 ): # skip translation chunks that doesn't have any actual translations
164 ): # skip translation chunks that doesn't have any actual translations
165 continue
165 continue
166 chunks.append('\n'.join(chunk_lines) + '\n')
166 chunks.append('\n'.join(chunk_lines) + '\n')
167 return '\n'.join(chunks)
167 return '\n'.join(chunks)
168
168
169 def _normalize_po_file(po_file, merge_pot_file=None, strip=False):
169 def _normalize_po_file(po_file, merge_pot_file=None, strip=False):
170 if merge_pot_file:
170 if merge_pot_file:
171 runcmd(['msgmerge', '--width=76', '--backup=none', '--previous',
171 runcmd(['msgmerge', '--width=76', '--backup=none', '--previous', '--no-fuzzy-matching',
172 '--update', po_file, '-q', merge_pot_file])
172 '--update', po_file, '-q', merge_pot_file])
173 if strip:
173 if strip:
174 po_tmp = po_file + '.tmp'
174 po_tmp = po_file + '.tmp'
175 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
175 with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
176 raw_content = src.read()
176 raw_content = src.read()
177 normalized_content = _normalize_po(raw_content)
177 normalized_content = _normalize_po(raw_content)
178 dest.write(normalized_content)
178 dest.write(normalized_content)
179 os.rename(po_tmp, po_file)
179 os.rename(po_tmp, po_file)
180
180
181 def _normalized_diff(file1, file2, merge_pot_file=None, strip=False):
181 def _normalized_diff(file1, file2, merge_pot_file=None, strip=False):
182 # Create temporary copies of both files
182 # Create temporary copies of both files
183 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
183 temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
184 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
184 temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
185 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
185 debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
186 shutil.copyfile(file1, temp1.name)
186 shutil.copyfile(file1, temp1.name)
187 shutil.copyfile(file2, temp2.name)
187 shutil.copyfile(file2, temp2.name)
188 # Normalize them in place
188 # Normalize them in place
189 _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip)
189 _normalize_po_file(temp1.name, merge_pot_file=merge_pot_file, strip=strip)
190 _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip)
190 _normalize_po_file(temp2.name, merge_pot_file=merge_pot_file, strip=strip)
191 # Now compare
191 # Now compare
192 try:
192 try:
193 runcmd(['diff', '-u', temp1.name, temp2.name])
193 runcmd(['diff', '-u', temp1.name, temp2.name])
194 except subprocess.CalledProcessError as e:
194 except subprocess.CalledProcessError as e:
195 return e.returncode
195 return e.returncode
General Comments 0
You need to be logged in to leave comments. Login now