Show More
@@ -1,53 +1,61 b'' | |||
|
1 | 1 | #!/usr/bin/env python3 |
|
2 | 2 | |
|
3 | 3 | # -*- coding: utf-8 -*- |
|
4 | 4 | # This program is free software: you can redistribute it and/or modify |
|
5 | 5 | # it under the terms of the GNU General Public License as published by |
|
6 | 6 | # the Free Software Foundation, either version 3 of the License, or |
|
7 | 7 | # (at your option) any later version. |
|
8 | 8 | # |
|
9 | 9 | # This program is distributed in the hope that it will be useful, |
|
10 | 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11 | 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
12 | 12 | # GNU General Public License for more details. |
|
13 | 13 | # |
|
14 | 14 | # You should have received a copy of the GNU General Public License |
|
15 | 15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
16 | 16 | |
|
17 | import sys | |
|
18 | ||
|
17 | 19 | import click |
|
18 | 20 | |
|
19 | 21 | import i18n_utils |
|
20 | 22 | |
|
21 | 23 | |
|
22 | 24 | """ |
|
23 | 25 | Tool for maintenance of .po and .pot files |
|
24 | 26 | |
|
25 | 27 | Normally, the i18n-related files contain for each translatable string a |
|
26 | 28 | reference to all the source code locations where this string is found. This |
|
27 | 29 | meta data is useful for translators to assess how strings are used, but is not |
|
28 | 30 | relevant for normal development nor for running Kallithea. Such meta data, or |
|
29 | 31 | derived data like kallithea.pot, will inherently be outdated, and create |
|
30 | 32 | unnecessary churn and repository growth, making it harder to spot actual and |
|
31 | 33 | important changes. |
|
32 | 34 | """ |
|
33 | 35 | |
|
34 | 36 | @click.group() |
|
35 | 37 | @click.option('--debug/--no-debug', default=False) |
|
36 | 38 | def cli(debug): |
|
37 | 39 | if (debug): |
|
38 | 40 | i18n_utils.do_debug = True |
|
39 | 41 | pass |
|
40 | 42 | |
|
41 | 43 | @cli.command() |
|
42 | 44 | @click.argument('po_files', nargs=-1) |
|
43 | 45 | def normalize_po_files(po_files): |
|
44 | 46 | """Normalize the specified .po and .pot files. |
|
45 | 47 | |
|
46 | 48 | Only actual translations and essential headers will be preserved. |
|
47 | 49 | """ |
|
48 | 50 | for po_file in po_files: |
|
49 | 51 | i18n_utils._normalize_po_file(po_file, strip=True) |
|
50 | 52 | |
|
53 | @cli.command() | |
|
54 | @click.argument('file1') | |
|
55 | @click.argument('file2') | |
|
56 | def normalized_diff(file1, file2): | |
|
57 | """Compare two files while transparently normalizing them.""" | |
|
58 | sys.exit(i18n_utils._normalized_diff(file1, file2, strip=True)) | |
|
51 | 59 | |
|
52 | 60 | if __name__ == '__main__': |
|
53 | 61 | cli() |
@@ -1,167 +1,185 b'' | |||
|
1 | 1 | # This program is free software: you can redistribute it and/or modify |
|
2 | 2 | # it under the terms of the GNU General Public License as published by |
|
3 | 3 | # the Free Software Foundation, either version 3 of the License, or |
|
4 | 4 | # (at your option) any later version. |
|
5 | 5 | # |
|
6 | 6 | # This program is distributed in the hope that it will be useful, |
|
7 | 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
8 | 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
9 | 9 | # GNU General Public License for more details. |
|
10 | 10 | # |
|
11 | 11 | # You should have received a copy of the GNU General Public License |
|
12 | 12 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
13 | 13 | |
|
14 | 14 | from __future__ import print_function |
|
15 | 15 | |
|
16 | 16 | import os |
|
17 | 17 | import re |
|
18 | import shutil | |
|
18 | 19 | import subprocess |
|
20 | import tempfile | |
|
19 | 21 | |
|
20 | 22 | |
|
21 | 23 | do_debug = False # set from scripts/i18n --debug |
|
22 | 24 | |
|
23 | 25 | def debug(*args, **kwargs): |
|
24 | 26 | if do_debug: |
|
25 | 27 | print(*args, **kwargs) |
|
26 | 28 | |
|
27 | 29 | def runcmd(cmd, *args, **kwargs): |
|
28 | 30 | debug('... Executing command: %s' % ' '.join(cmd)) |
|
29 | 31 | subprocess.check_call(cmd, *args, **kwargs) |
|
30 | 32 | |
|
31 | 33 | header_comment_strip_re = re.compile(r''' |
|
32 | 34 | ^ |
|
33 | 35 | [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n |
|
34 | 36 | | |
|
35 | 37 | ^ |
|
36 | 38 | [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n |
|
37 | 39 | [#] \n |
|
38 | 40 | [#],[ ]fuzzy \n |
|
39 | 41 | ''', |
|
40 | 42 | re.MULTILINE|re.VERBOSE) |
|
41 | 43 | |
|
42 | 44 | header_normalize_re = re.compile(r''' |
|
43 | 45 | ^ " |
|
44 | 46 | (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version): |
|
45 | 47 | [ ][^\\]*\\n |
|
46 | 48 | " \n |
|
47 | 49 | ''', |
|
48 | 50 | re.MULTILINE|re.IGNORECASE|re.VERBOSE) |
|
49 | 51 | |
|
50 | 52 | def _normalize_po(raw_content): |
|
51 | 53 | r""" |
|
52 | 54 | >>> print(_normalize_po(r''' |
|
53 | 55 | ... # header comment |
|
54 | 56 | ... |
|
55 | 57 | ... |
|
56 | 58 | ... # comment before header |
|
57 | 59 | ... msgid "" |
|
58 | 60 | ... msgstr "yada" |
|
59 | 61 | ... "POT-Creation-Date: 2019-05-04 21:13+0200\n" |
|
60 | 62 | ... "MIME-Version: " |
|
61 | 63 | ... "1.0\n" |
|
62 | 64 | ... "Last-Translator: Jabba" |
|
63 | 65 | ... "the Hutt\n" |
|
64 | 66 | ... "X-Generator: Weblate 1.2.3\n" |
|
65 | 67 | ... |
|
66 | 68 | ... # comment, but not in header |
|
67 | 69 | ... msgid "None" |
|
68 | 70 | ... msgstr "Ingen" |
|
69 | 71 | ... |
|
70 | 72 | ... |
|
71 | 73 | ... line 2 |
|
72 | 74 | ... # third comment |
|
73 | 75 | ... |
|
74 | 76 | ... msgid "Special" |
|
75 | 77 | ... msgstr "" |
|
76 | 78 | ... |
|
77 | 79 | ... msgid "Specialist" |
|
78 | 80 | ... # odd comment |
|
79 | 81 | ... msgstr "" |
|
80 | 82 | ... "Expert" |
|
81 | 83 | ... |
|
82 | 84 | ... # crazy fuzzy auto translation by msgmerge, using foo for bar |
|
83 | 85 | ... #, fuzzy |
|
84 | 86 | ... #| msgid "some foo string" |
|
85 | 87 | ... msgid "some bar string." |
|
86 | 88 | ... msgstr "translation of foo string" |
|
87 | 89 | ... |
|
88 | 90 | ... msgid "%d minute" |
|
89 | 91 | ... msgid_plural "%d minutes" |
|
90 | 92 | ... msgstr[0] "minut" |
|
91 | 93 | ... msgstr[1] "minutter" |
|
92 | 94 | ... msgstr[2] "" |
|
93 | 95 | ... |
|
94 | 96 | ... msgid "%d year" |
|
95 | 97 | ... msgid_plural "%d years" |
|
96 | 98 | ... msgstr[0] "" |
|
97 | 99 | ... msgstr[1] "" |
|
98 | 100 | ... |
|
99 | 101 | ... # last comment |
|
100 | 102 | ... ''') + '^^^') |
|
101 | 103 | # header comment |
|
102 | 104 | <BLANKLINE> |
|
103 | 105 | <BLANKLINE> |
|
104 | 106 | # comment before header |
|
105 | 107 | <BLANKLINE> |
|
106 | 108 | msgid "" |
|
107 | 109 | msgstr "yada" |
|
108 | 110 | "MIME-Version: " |
|
109 | 111 | "1.0\n" |
|
110 | 112 | <BLANKLINE> |
|
111 | 113 | msgid "None" |
|
112 | 114 | msgstr "Ingen" |
|
113 | 115 | <BLANKLINE> |
|
114 | 116 | line 2 |
|
115 | 117 | <BLANKLINE> |
|
116 | 118 | msgid "Specialist" |
|
117 | 119 | msgstr "" |
|
118 | 120 | "Expert" |
|
119 | 121 | <BLANKLINE> |
|
120 | 122 | msgid "%d minute" |
|
121 | 123 | msgid_plural "%d minutes" |
|
122 | 124 | msgstr[0] "minut" |
|
123 | 125 | msgstr[1] "minutter" |
|
124 | 126 | msgstr[2] "" |
|
125 | 127 | ^^^ |
|
126 | 128 | """ |
|
127 | 129 | header_start = raw_content.find('\nmsgid ""\n') + 1 |
|
128 | 130 | header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content) |
|
129 | 131 | chunks = [ |
|
130 | 132 | header_comment_strip_re.sub('', raw_content[0:header_start]) |
|
131 | 133 | .strip(), |
|
132 | 134 | '', |
|
133 | 135 | header_normalize_re.sub('', raw_content[header_start:header_end]) |
|
134 | 136 | .strip(), |
|
135 | 137 | ''] # preserve normalized header |
|
136 | 138 | # all chunks are separated by empty line |
|
137 | 139 | for raw_chunk in raw_content[header_end:].split('\n\n'): |
|
138 | 140 | if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format" |
|
139 | 141 | continue # drop crazy auto translation that is worse than useless |
|
140 | 142 | # strip all comment lines from chunk |
|
141 | 143 | chunk_lines = [ |
|
142 | 144 | line |
|
143 | 145 | for line in raw_chunk.splitlines() |
|
144 | 146 | if line |
|
145 | 147 | and not line.startswith('#') |
|
146 | 148 | ] |
|
147 | 149 | if not chunk_lines: |
|
148 | 150 | continue |
|
149 | 151 | # check lines starting from first msgstr, skip chunk if no translation lines |
|
150 | 152 | msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')] |
|
151 | 153 | if ( |
|
152 | 154 | chunk_lines[0].startswith('msgid') and |
|
153 | 155 | msgstr_i and |
|
154 | 156 | all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:]) |
|
155 | 157 | ): # skip translation chunks that doesn't have any actual translations |
|
156 | 158 | continue |
|
157 | 159 | chunks.append('\n'.join(chunk_lines) + '\n') |
|
158 | 160 | return '\n'.join(chunks) |
|
159 | 161 | |
|
160 | 162 | def _normalize_po_file(po_file, strip=False): |
|
161 | 163 | if strip: |
|
162 | 164 | po_tmp = po_file + '.tmp' |
|
163 | 165 | with open(po_file, 'r') as src, open(po_tmp, 'w') as dest: |
|
164 | 166 | raw_content = src.read() |
|
165 | 167 | normalized_content = _normalize_po(raw_content) |
|
166 | 168 | dest.write(normalized_content) |
|
167 | 169 | os.rename(po_tmp, po_file) |
|
170 | ||
|
171 | def _normalized_diff(file1, file2, strip=False): | |
|
172 | # Create temporary copies of both files | |
|
173 | temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1)) | |
|
174 | temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2)) | |
|
175 | debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name)) | |
|
176 | shutil.copyfile(file1, temp1.name) | |
|
177 | shutil.copyfile(file2, temp2.name) | |
|
178 | # Normalize them in place | |
|
179 | _normalize_po_file(temp1.name, strip=strip) | |
|
180 | _normalize_po_file(temp2.name, strip=strip) | |
|
181 | # Now compare | |
|
182 | try: | |
|
183 | runcmd(['diff', '-u', temp1.name, temp2.name]) | |
|
184 | except subprocess.CalledProcessError as e: | |
|
185 | return e.returncode |
General Comments 0
You need to be logged in to leave comments.
Login now