Show More
@@ -21,6 +21,14 b' import i18n_utils' | |||||
21 |
|
21 | |||
22 | """ |
|
22 | """ | |
23 | Tool for maintenance of .po and .pot files |
|
23 | Tool for maintenance of .po and .pot files | |
|
24 | ||||
|
25 | Normally, the i18n-related files contain for each translatable string a | |||
|
26 | reference to all the source code locations where this string is found. This | |||
|
27 | meta data is useful for translators to assess how strings are used, but is not | |||
|
28 | relevant for normal development nor for running Kallithea. Such meta data, or | |||
|
29 | derived data like kallithea.pot, will inherently be outdated, and create | |||
|
30 | unnecessary churn and repository growth, making it harder to spot actual and | |||
|
31 | important changes. | |||
24 | """ |
|
32 | """ | |
25 |
|
33 | |||
26 | @click.group() |
|
34 | @click.group() | |
@@ -30,5 +38,16 b' def cli(debug):' | |||||
30 | i18n_utils.do_debug = True |
|
38 | i18n_utils.do_debug = True | |
31 | pass |
|
39 | pass | |
32 |
|
40 | |||
|
41 | @cli.command() | |||
|
42 | @click.argument('po_files', nargs=-1) | |||
|
43 | def normalize_po_files(po_files): | |||
|
44 | """Normalize the specified .po and .pot files. | |||
|
45 | ||||
|
46 | Only actual translations and essential headers will be preserved. | |||
|
47 | """ | |||
|
48 | for po_file in po_files: | |||
|
49 | i18n_utils._normalize_po_file(po_file, strip=True) | |||
|
50 | ||||
|
51 | ||||
33 | if __name__ == '__main__': |
|
52 | if __name__ == '__main__': | |
34 | cli() |
|
53 | cli() |
@@ -13,6 +13,8 b'' | |||||
13 |
|
13 | |||
14 | from __future__ import print_function |
|
14 | from __future__ import print_function | |
15 |
|
15 | |||
|
16 | import os | |||
|
17 | import re | |||
16 | import subprocess |
|
18 | import subprocess | |
17 |
|
19 | |||
18 |
|
20 | |||
@@ -25,3 +27,141 b' def debug(*args, **kwargs):' | |||||
25 | def runcmd(cmd, *args, **kwargs): |
|
27 | def runcmd(cmd, *args, **kwargs): | |
26 | debug('... Executing command: %s' % ' '.join(cmd)) |
|
28 | debug('... Executing command: %s' % ' '.join(cmd)) | |
27 | subprocess.check_call(cmd, *args, **kwargs) |
|
29 | subprocess.check_call(cmd, *args, **kwargs) | |
|
30 | ||||
|
31 | header_comment_strip_re = re.compile(r''' | |||
|
32 | ^ | |||
|
33 | [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n | |||
|
34 | | | |||
|
35 | ^ | |||
|
36 | [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n | |||
|
37 | [#] \n | |||
|
38 | [#],[ ]fuzzy \n | |||
|
39 | ''', | |||
|
40 | re.MULTILINE|re.VERBOSE) | |||
|
41 | ||||
|
42 | header_normalize_re = re.compile(r''' | |||
|
43 | ^ " | |||
|
44 | (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version): | |||
|
45 | [ ][^\\]*\\n | |||
|
46 | " \n | |||
|
47 | ''', | |||
|
48 | re.MULTILINE|re.IGNORECASE|re.VERBOSE) | |||
|
49 | ||||
|
50 | def _normalize_po(raw_content): | |||
|
51 | r""" | |||
|
52 | >>> print(_normalize_po(r''' | |||
|
53 | ... # header comment | |||
|
54 | ... | |||
|
55 | ... | |||
|
56 | ... # comment before header | |||
|
57 | ... msgid "" | |||
|
58 | ... msgstr "yada" | |||
|
59 | ... "POT-Creation-Date: 2019-05-04 21:13+0200\n" | |||
|
60 | ... "MIME-Version: " | |||
|
61 | ... "1.0\n" | |||
|
62 | ... "Last-Translator: Jabba" | |||
|
63 | ... "the Hutt\n" | |||
|
64 | ... "X-Generator: Weblate 1.2.3\n" | |||
|
65 | ... | |||
|
66 | ... # comment, but not in header | |||
|
67 | ... msgid "None" | |||
|
68 | ... msgstr "Ingen" | |||
|
69 | ... | |||
|
70 | ... | |||
|
71 | ... line 2 | |||
|
72 | ... # third comment | |||
|
73 | ... | |||
|
74 | ... msgid "Special" | |||
|
75 | ... msgstr "" | |||
|
76 | ... | |||
|
77 | ... msgid "Specialist" | |||
|
78 | ... # odd comment | |||
|
79 | ... msgstr "" | |||
|
80 | ... "Expert" | |||
|
81 | ... | |||
|
82 | ... # crazy fuzzy auto translation by msgmerge, using foo for bar | |||
|
83 | ... #, fuzzy | |||
|
84 | ... #| msgid "some foo string" | |||
|
85 | ... msgid "some bar string." | |||
|
86 | ... msgstr "translation of foo string" | |||
|
87 | ... | |||
|
88 | ... msgid "%d minute" | |||
|
89 | ... msgid_plural "%d minutes" | |||
|
90 | ... msgstr[0] "minut" | |||
|
91 | ... msgstr[1] "minutter" | |||
|
92 | ... msgstr[2] "" | |||
|
93 | ... | |||
|
94 | ... msgid "%d year" | |||
|
95 | ... msgid_plural "%d years" | |||
|
96 | ... msgstr[0] "" | |||
|
97 | ... msgstr[1] "" | |||
|
98 | ... | |||
|
99 | ... # last comment | |||
|
100 | ... ''') + '^^^') | |||
|
101 | # header comment | |||
|
102 | <BLANKLINE> | |||
|
103 | <BLANKLINE> | |||
|
104 | # comment before header | |||
|
105 | <BLANKLINE> | |||
|
106 | msgid "" | |||
|
107 | msgstr "yada" | |||
|
108 | "MIME-Version: " | |||
|
109 | "1.0\n" | |||
|
110 | <BLANKLINE> | |||
|
111 | msgid "None" | |||
|
112 | msgstr "Ingen" | |||
|
113 | <BLANKLINE> | |||
|
114 | line 2 | |||
|
115 | <BLANKLINE> | |||
|
116 | msgid "Specialist" | |||
|
117 | msgstr "" | |||
|
118 | "Expert" | |||
|
119 | <BLANKLINE> | |||
|
120 | msgid "%d minute" | |||
|
121 | msgid_plural "%d minutes" | |||
|
122 | msgstr[0] "minut" | |||
|
123 | msgstr[1] "minutter" | |||
|
124 | msgstr[2] "" | |||
|
125 | ^^^ | |||
|
126 | """ | |||
|
127 | header_start = raw_content.find('\nmsgid ""\n') + 1 | |||
|
128 | header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content) | |||
|
129 | chunks = [ | |||
|
130 | header_comment_strip_re.sub('', raw_content[0:header_start]) | |||
|
131 | .strip(), | |||
|
132 | '', | |||
|
133 | header_normalize_re.sub('', raw_content[header_start:header_end]) | |||
|
134 | .strip(), | |||
|
135 | ''] # preserve normalized header | |||
|
136 | # all chunks are separated by empty line | |||
|
137 | for raw_chunk in raw_content[header_end:].split('\n\n'): | |||
|
138 | if '\n#, fuzzy' in raw_chunk: # might be like "#, fuzzy, python-format" | |||
|
139 | continue # drop crazy auto translation that is worse than useless | |||
|
140 | # strip all comment lines from chunk | |||
|
141 | chunk_lines = [ | |||
|
142 | line | |||
|
143 | for line in raw_chunk.splitlines() | |||
|
144 | if line | |||
|
145 | and not line.startswith('#') | |||
|
146 | ] | |||
|
147 | if not chunk_lines: | |||
|
148 | continue | |||
|
149 | # check lines starting from first msgstr, skip chunk if no translation lines | |||
|
150 | msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')] | |||
|
151 | if ( | |||
|
152 | chunk_lines[0].startswith('msgid') and | |||
|
153 | msgstr_i and | |||
|
154 | all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:]) | |||
|
155 | ): # skip translation chunks that doesn't have any actual translations | |||
|
156 | continue | |||
|
157 | chunks.append('\n'.join(chunk_lines) + '\n') | |||
|
158 | return '\n'.join(chunks) | |||
|
159 | ||||
|
160 | def _normalize_po_file(po_file, strip=False): | |||
|
161 | if strip: | |||
|
162 | po_tmp = po_file + '.tmp' | |||
|
163 | with open(po_file, 'r') as src, open(po_tmp, 'w') as dest: | |||
|
164 | raw_content = src.read() | |||
|
165 | normalized_content = _normalize_po(raw_content) | |||
|
166 | dest.write(normalized_content) | |||
|
167 | os.rename(po_tmp, po_file) |
General Comments 0
You need to be logged in to leave comments.
Login now