##// END OF EJS Templates
polib: update to latest release 1.0.7 (upstream rev d75ce6dbbc2a)...
Augie Fackler -
r40221:19fc5a98 default
parent child Browse files
Show More
@@ -1,246 +1,238
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-translation.py - check Mercurial specific translation problems
4 4 from __future__ import absolute_import
5 5
6 6 import re
7 7
8 8 import polib
9 9
10 10 scanners = []
11 11 checkers = []
12 12
13 13 def scanner():
14 14 def decorator(func):
15 15 scanners.append(func)
16 16 return func
17 17 return decorator
18 18
19 19 def levelchecker(level, msgidpat):
20 20 def decorator(func):
21 21 if msgidpat:
22 22 match = re.compile(msgidpat).search
23 23 else:
24 24 match = lambda msgid: True
25 25 checkers.append((func, level))
26 26 func.match = match
27 27 return func
28 28 return decorator
29 29
30 30 def match(checker, pe):
31 31 """Examine whether POEntry "pe" is target of specified checker or not
32 32 """
33 33 if not checker.match(pe.msgid):
34 34 return
35 35 # examine suppression by translator comment
36 36 nochecker = 'no-%s-check' % checker.__name__
37 37 for tc in pe.tcomment.split():
38 38 if nochecker == tc:
39 39 return
40 40 return True
41 41
42 42 ####################
43 43
44 44 def fatalchecker(msgidpat=None):
45 45 return levelchecker('fatal', msgidpat)
46 46
47 47 @fatalchecker(r'\$\$')
48 48 def promptchoice(pe):
49 49 """Check translation of the string given to "ui.promptchoice()"
50 50
51 51 >>> pe = polib.POEntry(
52 52 ... msgid ='prompt$$missing &sep$$missing &amp$$followed by &none',
53 53 ... msgstr='prompt missing &sep$$missing amp$$followed by none&')
54 54 >>> match(promptchoice, pe)
55 55 True
56 56 >>> for e in promptchoice(pe): print(e)
57 57 number of choices differs between msgid and msgstr
58 58 msgstr has invalid choice missing '&'
59 59 msgstr has invalid '&' followed by none
60 60 """
61 61 idchoices = [c.rstrip(' ') for c in pe.msgid.split('$$')[1:]]
62 62 strchoices = [c.rstrip(' ') for c in pe.msgstr.split('$$')[1:]]
63 63
64 64 if len(idchoices) != len(strchoices):
65 65 yield "number of choices differs between msgid and msgstr"
66 66
67 67 indices = [(c, c.find('&')) for c in strchoices]
68 68 if [c for c, i in indices if i == -1]:
69 69 yield "msgstr has invalid choice missing '&'"
70 70 if [c for c, i in indices if len(c) == i + 1]:
71 71 yield "msgstr has invalid '&' followed by none"
72 72
73 73 deprecatedpe = None
74 74 @scanner()
75 75 def deprecatedsetup(pofile):
76 76 pes = [p for p in pofile if p.msgid == '(DEPRECATED)' and p.msgstr]
77 77 if len(pes):
78 78 global deprecatedpe
79 79 deprecatedpe = pes[0]
80 80
81 81 @fatalchecker(r'\(DEPRECATED\)')
82 82 def deprecated(pe):
83 83 """Check for DEPRECATED
84 84 >>> ped = polib.POEntry(
85 85 ... msgid = '(DEPRECATED)',
86 86 ... msgstr= '(DETACERPED)')
87 87 >>> deprecatedsetup([ped])
88 88 >>> pe = polib.POEntry(
89 89 ... msgid = 'Something (DEPRECATED)',
90 90 ... msgstr= 'something (DEPRECATED)')
91 91 >>> match(deprecated, pe)
92 92 True
93 93 >>> for e in deprecated(pe): print(e)
94 94 >>> pe = polib.POEntry(
95 95 ... msgid = 'Something (DEPRECATED)',
96 96 ... msgstr= 'something (DETACERPED)')
97 97 >>> match(deprecated, pe)
98 98 True
99 99 >>> for e in deprecated(pe): print(e)
100 100 >>> pe = polib.POEntry(
101 101 ... msgid = 'Something (DEPRECATED)',
102 102 ... msgstr= 'something')
103 103 >>> match(deprecated, pe)
104 104 True
105 105 >>> for e in deprecated(pe): print(e)
106 106 msgstr inconsistently translated (DEPRECATED)
107 107 >>> pe = polib.POEntry(
108 108 ... msgid = 'Something (DEPRECATED, foo bar)',
109 109 ... msgstr= 'something (DETACERPED, foo bar)')
110 110 >>> match(deprecated, pe)
111 111 """
112 112 if not ('(DEPRECATED)' in pe.msgstr or
113 113 (deprecatedpe and
114 114 deprecatedpe.msgstr in pe.msgstr)):
115 115 yield "msgstr inconsistently translated (DEPRECATED)"
116 116
117 117 ####################
118 118
119 119 def warningchecker(msgidpat=None):
120 120 return levelchecker('warning', msgidpat)
121 121
122 122 @warningchecker()
123 123 def taildoublecolons(pe):
124 124 """Check equality of tail '::'-ness between msgid and msgstr
125 125
126 126 >>> pe = polib.POEntry(
127 127 ... msgid ='ends with ::',
128 128 ... msgstr='ends with ::')
129 129 >>> for e in taildoublecolons(pe): print(e)
130 130 >>> pe = polib.POEntry(
131 131 ... msgid ='ends with ::',
132 132 ... msgstr='ends without double-colons')
133 133 >>> for e in taildoublecolons(pe): print(e)
134 134 tail '::'-ness differs between msgid and msgstr
135 135 >>> pe = polib.POEntry(
136 136 ... msgid ='ends without double-colons',
137 137 ... msgstr='ends with ::')
138 138 >>> for e in taildoublecolons(pe): print(e)
139 139 tail '::'-ness differs between msgid and msgstr
140 140 """
141 141 if pe.msgid.endswith('::') != pe.msgstr.endswith('::'):
142 142 yield "tail '::'-ness differs between msgid and msgstr"
143 143
144 144 @warningchecker()
145 145 def indentation(pe):
146 146 """Check equality of initial indentation between msgid and msgstr
147 147
148 148 This may report unexpected warning, because this doesn't aware
149 149 the syntax of rst document and the context of msgstr.
150 150
151 151 >>> pe = polib.POEntry(
152 152 ... msgid =' indented text',
153 153 ... msgstr=' narrowed indentation')
154 154 >>> for e in indentation(pe): print(e)
155 155 initial indentation width differs betweeen msgid and msgstr
156 156 """
157 157 idindent = len(pe.msgid) - len(pe.msgid.lstrip())
158 158 strindent = len(pe.msgstr) - len(pe.msgstr.lstrip())
159 159 if idindent != strindent:
160 160 yield "initial indentation width differs betweeen msgid and msgstr"
161 161
162 162 ####################
163 163
164 164 def check(pofile, fatal=True, warning=False):
165 165 targetlevel = { 'fatal': fatal, 'warning': warning }
166 166 targetcheckers = [(checker, level)
167 167 for checker, level in checkers
168 168 if targetlevel[level]]
169 169 if not targetcheckers:
170 170 return []
171 171
172 172 detected = []
173 173 for checker in scanners:
174 174 checker(pofile)
175 175 for pe in pofile.translated_entries():
176 176 errors = []
177 177 for checker, level in targetcheckers:
178 178 if match(checker, pe):
179 179 errors.extend((level, checker.__name__, error)
180 180 for error in checker(pe))
181 181 if errors:
182 182 detected.append((pe, errors))
183 183 return detected
184 184
185 185 ########################################
186 186
187 187 if __name__ == "__main__":
188 188 import sys
189 189 import optparse
190 190
191 191 optparser = optparse.OptionParser("""%prog [options] pofile ...
192 192
193 193 This checks Mercurial specific translation problems in specified
194 194 '*.po' files.
195 195
196 196 Each detected problems are shown in the format below::
197 197
198 198 filename:linenum:type(checker): problem detail .....
199 199
200 200 "type" is "fatal" or "warning". "checker" is the name of the function
201 201 detecting corresponded error.
202 202
203 203 Checking by checker "foo" on the specific msgstr can be suppressed by
204 204 the "translator comment" like below. Multiple "no-xxxx-check" should
205 205 be separated by whitespaces::
206 206
207 207 # no-foo-check
208 208 msgid = "....."
209 209 msgstr = "....."
210 210 """)
211 211 optparser.add_option("", "--warning",
212 212 help="show also warning level problems",
213 213 action="store_true")
214 214 optparser.add_option("", "--doctest",
215 215 help="run doctest of this tool, instead of check",
216 216 action="store_true")
217 217 (options, args) = optparser.parse_args()
218 218
219 219 if options.doctest:
220 220 import os
221 221 if 'TERM' in os.environ:
222 222 del os.environ['TERM']
223 223 import doctest
224 224 failures, tests = doctest.testmod()
225 225 sys.exit(failures and 1 or 0)
226 226
227 # replace polib._POFileParser to show linenum of problematic msgstr
228 class ExtPOFileParser(polib._POFileParser):
229 def process(self, symbol, linenum):
230 super(ExtPOFileParser, self).process(symbol, linenum)
231 if symbol == 'MS': # msgstr
232 self.current_entry.linenum = linenum
233 polib._POFileParser = ExtPOFileParser
234
235 227 detected = []
236 228 warning = options.warning
237 229 for f in args:
238 230 detected.extend((f, pe, errors)
239 231 for pe, errors in check(polib.pofile(f),
240 232 warning=warning))
241 233 if detected:
242 234 for f, pe, errors in detected:
243 235 for level, checker, error in errors:
244 236 sys.stderr.write('%s:%d:%s(%s): %s\n'
245 237 % (f, pe.linenum, level, checker, error))
246 238 sys.exit(1)
This diff has been collapsed as it changes many lines, (776 lines changed) Show them Hide them
@@ -1,1554 +1,1838
1 # -*- coding: utf-8 -*-
2 1 # no-check-code
2 # -* coding: utf-8 -*-
3 3 #
4 4 # License: MIT (see LICENSE file provided)
5 5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6 6
7 7 """
8 8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
9 9 mo files). You can load existing files, iterate through it's entries, add,
10 10 modify entries, comments or metadata, etc. or create new po files from scratch.
11 11
12 12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
13 13 :func:`~polib.mofile` convenience functions.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
19 __version__ = '0.6.4'
19 __version__ = '1.0.7'
20 20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
21 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
21 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
22 22
23 23 import array
24 24 import codecs
25 25 import os
26 26 import re
27 27 import struct
28 28 import sys
29 29 import textwrap
30 import types
30
31 try:
32 import io
33 except ImportError:
34 # replacement of io.open() for python < 2.6
35 # we use codecs instead
36 class io(object):
37 @staticmethod
38 def open(fpath, mode='r', encoding=None):
39 return codecs.open(fpath, mode, encoding)
31 40
32 41
33 42 # the default encoding to use when encoding cannot be detected
34 43 default_encoding = 'utf-8'
35 44
45 # python 2/3 compatibility helpers {{{
46
47
48 if sys.version_info[:2] < (3, 0):
49 PY3 = False
50 text_type = unicode
51
52 def b(s):
53 return s
54
55 def u(s):
56 return unicode(s, "unicode_escape")
57
58 else:
59 PY3 = True
60 text_type = str
61
62 def b(s):
63 return s.encode("latin-1")
64
65 def u(s):
66 return s
67 # }}}
36 68 # _pofile_or_mofile {{{
37 69
70
38 71 def _pofile_or_mofile(f, type, **kwargs):
39 72 """
40 73 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
41 74 honor the DRY concept.
42 75 """
43 76 # get the file encoding
44 77 enc = kwargs.get('encoding')
45 78 if enc is None:
46 79 enc = detect_encoding(f, type == 'mofile')
47 80
48 81 # parse the file
49 82 kls = type == 'pofile' and _POFileParser or _MOFileParser
50 83 parser = kls(
51 84 f,
52 85 encoding=enc,
53 check_for_duplicates=kwargs.get('check_for_duplicates', False)
86 check_for_duplicates=kwargs.get('check_for_duplicates', False),
87 klass=kwargs.get('klass')
54 88 )
55 89 instance = parser.parse()
56 90 instance.wrapwidth = kwargs.get('wrapwidth', 78)
57 91 return instance
92 # }}}
93 # _is_file {{{
58 94
95
96 def _is_file(filename_or_contents):
97 """
98 Safely returns the value of os.path.exists(filename_or_contents).
99
100 Arguments:
101
102 ``filename_or_contents``
103 either a filename, or a string holding the contents of some file.
104 In the latter case, this function will always return False.
105 """
106 try:
107 return os.path.exists(filename_or_contents)
108 except (ValueError, UnicodeEncodeError):
109 return False
59 110 # }}}
60 111 # function pofile() {{{
61 112
113
62 114 def pofile(pofile, **kwargs):
63 115 """
64 116 Convenience function that parses the po or pot file ``pofile`` and returns
65 117 a :class:`~polib.POFile` instance.
66 118
67 119 Arguments:
68 120
69 121 ``pofile``
70 122 string, full or relative path to the po/pot file or its content (data).
71 123
72 124 ``wrapwidth``
73 125 integer, the wrap width, only useful when the ``-w`` option was passed
74 126 to xgettext (optional, default: ``78``).
75 127
76 128 ``encoding``
77 129 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
78 130 encoding will be auto-detected).
79 131
80 132 ``check_for_duplicates``
81 133 whether to check for duplicate entries when adding entries to the
82 134 file (optional, default: ``False``).
135
136 ``klass``
137 class which is used to instantiate the return value (optional,
138 default: ``None``, the return value with be a :class:`~polib.POFile`
139 instance).
83 140 """
84 141 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
85
86 142 # }}}
87 143 # function mofile() {{{
88 144
145
89 146 def mofile(mofile, **kwargs):
90 147 """
91 148 Convenience function that parses the mo file ``mofile`` and returns a
92 149 :class:`~polib.MOFile` instance.
93 150
94 151 Arguments:
95 152
96 153 ``mofile``
97 154 string, full or relative path to the mo file or its content (data).
98 155
99 156 ``wrapwidth``
100 157 integer, the wrap width, only useful when the ``-w`` option was passed
101 158 to xgettext to generate the po file that was used to format the mo file
102 159 (optional, default: ``78``).
103 160
104 161 ``encoding``
105 162 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
106 163 encoding will be auto-detected).
107 164
108 165 ``check_for_duplicates``
109 166 whether to check for duplicate entries when adding entries to the
110 167 file (optional, default: ``False``).
168
169 ``klass``
170 class which is used to instantiate the return value (optional,
171 default: ``None``, the return value with be a :class:`~polib.POFile`
172 instance).
111 173 """
112 174 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
113
114 175 # }}}
115 176 # function detect_encoding() {{{
116 177
178
117 179 def detect_encoding(file, binary_mode=False):
118 180 """
119 181 Try to detect the encoding used by the ``file``. The ``file`` argument can
120 182 be a PO or MO file path or a string containing the contents of the file.
121 183 If the encoding cannot be detected, the function will return the value of
122 184 ``default_encoding``.
123 185
124 186 Arguments:
125 187
126 188 ``file``
127 189 string, full or relative path to the po/mo file or its content.
128 190
129 191 ``binary_mode``
130 192 boolean, set this to True if ``file`` is a mo file.
131 193 """
132 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
194 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
195 rxt = re.compile(u(PATTERN))
196 rxb = re.compile(b(PATTERN))
133 197
134 198 def charset_exists(charset):
135 199 """Check whether ``charset`` is valid or not."""
136 200 try:
137 201 codecs.lookup(charset)
138 202 except LookupError:
139 203 return False
140 204 return True
141 205
142 if not os.path.exists(file):
143 match = rx.search(file)
206 if not _is_file(file):
207 match = rxt.search(file)
144 208 if match:
145 209 enc = match.group(1).strip()
146 210 if charset_exists(enc):
147 211 return enc
148 212 else:
149 if binary_mode:
213 # For PY3, always treat as binary
214 if binary_mode or PY3:
150 215 mode = 'rb'
216 rx = rxb
151 217 else:
152 218 mode = 'r'
219 rx = rxt
153 220 f = open(file, mode)
154 221 for l in f.readlines():
155 222 match = rx.search(l)
156 223 if match:
157 224 f.close()
158 225 enc = match.group(1).strip()
226 if not isinstance(enc, text_type):
227 enc = enc.decode('utf-8')
159 228 if charset_exists(enc):
160 229 return enc
161 230 f.close()
162 231 return default_encoding
163
164 232 # }}}
165 233 # function escape() {{{
166 234
235
167 236 def escape(st):
168 237 """
169 238 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
170 239 the given string ``st`` and returns it.
171 240 """
172 241 return st.replace('\\', r'\\')\
173 242 .replace('\t', r'\t')\
174 243 .replace('\r', r'\r')\
175 244 .replace('\n', r'\n')\
176 245 .replace('\"', r'\"')
177
178 246 # }}}
179 247 # function unescape() {{{
180 248
249
181 250 def unescape(st):
182 251 """
183 252 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
184 253 the given string ``st`` and returns it.
185 254 """
186 255 def unescape_repl(m):
187 256 m = m.group(1)
188 257 if m == 'n':
189 258 return '\n'
190 259 if m == 't':
191 260 return '\t'
192 261 if m == 'r':
193 262 return '\r'
194 263 if m == '\\':
195 264 return '\\'
196 265 return m # handles escaped double quote
197 266 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
198
199 267 # }}}
200 268 # class _BaseFile {{{
201 269
270
202 271 class _BaseFile(list):
203 272 """
204 273 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
205 274 classes. This class should **not** be instanciated directly.
206 275 """
207 276
208 277 def __init__(self, *args, **kwargs):
209 278 """
210 279 Constructor, accepts the following keyword arguments:
211 280
212 281 ``pofile``
213 282 string, the path to the po or mo file, or its content as a string.
214 283
215 284 ``wrapwidth``
216 285 integer, the wrap width, only useful when the ``-w`` option was
217 286 passed to xgettext (optional, default: ``78``).
218 287
219 288 ``encoding``
220 289 string, the encoding to use, defaults to ``default_encoding``
221 290 global variable (optional).
222 291
223 292 ``check_for_duplicates``
224 293 whether to check for duplicate entries when adding entries to the
225 294 file, (optional, default: ``False``).
226 295 """
227 296 list.__init__(self)
228 297 # the opened file handle
229 298 pofile = kwargs.get('pofile', None)
230 if pofile and os.path.exists(pofile):
299 if pofile and _is_file(pofile):
231 300 self.fpath = pofile
232 301 else:
233 302 self.fpath = kwargs.get('fpath')
234 303 # the width at which lines should be wrapped
235 304 self.wrapwidth = kwargs.get('wrapwidth', 78)
236 305 # the file encoding
237 306 self.encoding = kwargs.get('encoding', default_encoding)
238 307 # whether to check for duplicate entries or not
239 308 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
240 309 # header
241 310 self.header = ''
242 311 # both po and mo files have metadata
243 312 self.metadata = {}
244 313 self.metadata_is_fuzzy = 0
245 314
246 315 def __unicode__(self):
247 316 """
248 317 Returns the unicode representation of the file.
249 318 """
250 319 ret = []
251 320 entries = [self.metadata_as_entry()] + \
252 321 [e for e in self if not e.obsolete]
253 322 for entry in entries:
254 323 ret.append(entry.__unicode__(self.wrapwidth))
255 324 for entry in self.obsolete_entries():
256 325 ret.append(entry.__unicode__(self.wrapwidth))
257 ret = '\n'.join(ret)
326 ret = u('\n').join(ret)
258 327
259 if type(ret) != types.UnicodeType:
260 return unicode(ret, self.encoding)
328 assert isinstance(ret, text_type)
329 #if type(ret) != text_type:
330 # return unicode(ret, self.encoding)
261 331 return ret
262 332
333 if PY3:
334 def __str__(self):
335 return self.__unicode__()
336 else:
263 337 def __str__(self):
264 338 """
265 339 Returns the string representation of the file.
266 340 """
267 341 return unicode(self).encode(self.encoding)
268 342
269 343 def __contains__(self, entry):
270 344 """
271 Overriden ``list`` method to implement the membership test (in and
345 Overridden ``list`` method to implement the membership test (in and
272 346 not in).
273 347 The method considers that an entry is in the file if it finds an entry
274 that has the same msgid (the test is **case sensitive**).
348 that has the same msgid (the test is **case sensitive**) and the same
349 msgctxt (or none for both entries).
275 350
276 351 Argument:
277 352
278 353 ``entry``
279 354 an instance of :class:`~polib._BaseEntry`.
280 355 """
281 return self.find(entry.msgid, by='msgid') is not None
356 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
357 is not None
282 358
283 359 def __eq__(self, other):
284 return unicode(self) == unicode(other)
360 return str(self) == str(other)
285 361
286 362 def append(self, entry):
287 363 """
288 Overriden method to check for duplicates entries, if a user tries to
364 Overridden method to check for duplicates entries, if a user tries to
289 365 add an entry that is already in the file, the method will raise a
290 366 ``ValueError`` exception.
291 367
292 368 Argument:
293 369
294 370 ``entry``
295 371 an instance of :class:`~polib._BaseEntry`.
296 372 """
297 373 if self.check_for_duplicates and entry in self:
298 374 raise ValueError('Entry "%s" already exists' % entry.msgid)
299 375 super(_BaseFile, self).append(entry)
300 376
301 377 def insert(self, index, entry):
302 378 """
303 Overriden method to check for duplicates entries, if a user tries to
379 Overridden method to check for duplicates entries, if a user tries to
304 380 add an entry that is already in the file, the method will raise a
305 381 ``ValueError`` exception.
306 382
307 383 Arguments:
308 384
309 385 ``index``
310 386 index at which the entry should be inserted.
311 387
312 388 ``entry``
313 389 an instance of :class:`~polib._BaseEntry`.
314 390 """
315 391 if self.check_for_duplicates and entry in self:
316 392 raise ValueError('Entry "%s" already exists' % entry.msgid)
317 393 super(_BaseFile, self).insert(index, entry)
318 394
319 395 def metadata_as_entry(self):
320 396 """
321 397 Returns the file metadata as a :class:`~polib.POFile` instance.
322 398 """
323 399 e = POEntry(msgid='')
324 400 mdata = self.ordered_metadata()
325 401 if mdata:
326 402 strs = []
327 403 for name, value in mdata:
328 404 # Strip whitespace off each line in a multi-line entry
329 405 strs.append('%s: %s' % (name, value))
330 406 e.msgstr = '\n'.join(strs) + '\n'
331 407 if self.metadata_is_fuzzy:
332 408 e.flags.append('fuzzy')
333 409 return e
334 410
335 def save(self, fpath=None, repr_method='__str__'):
411 def save(self, fpath=None, repr_method='__unicode__'):
336 412 """
337 413 Saves the po file to ``fpath``.
338 414 If it is an existing file and no ``fpath`` is provided, then the
339 415 existing file is rewritten with the modified data.
340 416
341 417 Keyword arguments:
342 418
343 419 ``fpath``
344 420 string, full or relative path to the file.
345 421
346 422 ``repr_method``
347 423 string, the method to use for output.
348 424 """
349 425 if self.fpath is None and fpath is None:
350 426 raise IOError('You must provide a file path to save() method')
351 427 contents = getattr(self, repr_method)()
352 428 if fpath is None:
353 429 fpath = self.fpath
354 430 if repr_method == 'to_binary':
355 431 fhandle = open(fpath, 'wb')
356 432 else:
357 fhandle = codecs.open(fpath, 'w', self.encoding)
358 if type(contents) != types.UnicodeType:
433 fhandle = io.open(fpath, 'w', encoding=self.encoding)
434 if not isinstance(contents, text_type):
359 435 contents = contents.decode(self.encoding)
360 436 fhandle.write(contents)
361 437 fhandle.close()
362 438 # set the file path if not set
363 439 if self.fpath is None and fpath:
364 440 self.fpath = fpath
365 441
366 442 def find(self, st, by='msgid', include_obsolete_entries=False,
367 443 msgctxt=False):
368 444 """
369 445 Find the entry which msgid (or property identified by the ``by``
370 446 argument) matches the string ``st``.
371 447
372 448 Keyword arguments:
373 449
374 450 ``st``
375 451 string, the string to search for.
376 452
377 453 ``by``
378 454 string, the property to use for comparison (default: ``msgid``).
379 455
380 456 ``include_obsolete_entries``
381 457 boolean, whether to also search in entries that are obsolete.
382 458
383 459 ``msgctxt``
384 string, allows to specify a specific message context for the
460 string, allows specifying a specific message context for the
385 461 search.
386 462 """
387 463 if include_obsolete_entries:
388 464 entries = self[:]
389 465 else:
390 466 entries = [e for e in self if not e.obsolete]
391 467 for e in entries:
392 468 if getattr(e, by) == st:
393 if msgctxt and e.msgctxt != msgctxt:
469 if msgctxt is not False and e.msgctxt != msgctxt:
394 470 continue
395 471 return e
396 472 return None
397 473
398 474 def ordered_metadata(self):
399 475 """
400 476 Convenience method that returns an ordered version of the metadata
401 477 dictionary. The return value is list of tuples (metadata name,
402 478 metadata_value).
403 479 """
404 480 # copy the dict first
405 481 metadata = self.metadata.copy()
406 482 data_order = [
407 483 'Project-Id-Version',
408 484 'Report-Msgid-Bugs-To',
409 485 'POT-Creation-Date',
410 486 'PO-Revision-Date',
411 487 'Last-Translator',
412 488 'Language-Team',
413 489 'MIME-Version',
414 490 'Content-Type',
415 'Content-Transfer-Encoding'
491 'Content-Transfer-Encoding',
492 'Language',
493 'Plural-Forms'
416 494 ]
417 495 ordered_data = []
418 496 for data in data_order:
419 497 try:
420 498 value = metadata.pop(data)
421 499 ordered_data.append((data, value))
422 500 except KeyError:
423 501 pass
424 502 # the rest of the metadata will be alphabetically ordered since there
425 503 # are no specs for this AFAIK
426 keys = metadata.keys()
427 keys.sort()
428 for data in keys:
504 for data in sorted(metadata.keys()):
429 505 value = metadata[data]
430 506 ordered_data.append((data, value))
431 507 return ordered_data
432 508
433 509 def to_binary(self):
434 510 """
435 511 Return the binary representation of the file.
436 512 """
437 513 offsets = []
438 514 entries = self.translated_entries()
515
439 516 # the keys are sorted in the .mo file
440 517 def cmp(_self, other):
441 518 # msgfmt compares entries with msgctxt if it exists
442 if _self.msgctxt:
443 self_msgid = _self.msgctxt
444 else:
445 self_msgid = _self.msgid
446
447 if other.msgctxt:
448 other_msgid = other.msgctxt
449 else:
450 other_msgid = other.msgid
519 self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
520 other_msgid = other.msgctxt and other.msgctxt or other.msgid
451 521 if self_msgid > other_msgid:
452 522 return 1
453 523 elif self_msgid < other_msgid:
454 524 return -1
455 525 else:
456 526 return 0
457 527 # add metadata entry
458 entries.sort(cmp)
528 entries.sort(key=lambda o: o.msgctxt or o.msgid)
459 529 mentry = self.metadata_as_entry()
460 530 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
461 531 entries = [mentry] + entries
462 532 entries_len = len(entries)
463 ids, strs = '', ''
533 ids, strs = b(''), b('')
464 534 for e in entries:
465 535 # For each string, we need size and file offset. Each string is
466 536 # NUL terminated; the NUL does not count into the size.
467 msgid = ''
537 msgid = b('')
468 538 if e.msgctxt:
469 539 # Contexts are stored by storing the concatenation of the
470 540 # context, a <EOT> byte, and the original string
471 541 msgid = self._encode(e.msgctxt + '\4')
472 542 if e.msgid_plural:
473 indexes = e.msgstr_plural.keys()
474 indexes.sort()
475 543 msgstr = []
476 for index in indexes:
544 for index in sorted(e.msgstr_plural.keys()):
477 545 msgstr.append(e.msgstr_plural[index])
478 546 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
479 547 msgstr = self._encode('\0'.join(msgstr))
480 548 else:
481 549 msgid += self._encode(e.msgid)
482 550 msgstr = self._encode(e.msgstr)
483 551 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
484 ids += msgid + '\0'
485 strs += msgstr + '\0'
552 ids += msgid + b('\0')
553 strs += msgstr + b('\0')
486 554
487 555 # The header is 7 32-bit unsigned integers.
488 556 keystart = 7*4+16*entries_len
489 557 # and the values start after the keys
490 558 valuestart = keystart + len(ids)
491 559 koffsets = []
492 560 voffsets = []
493 561 # The string table first has the list of keys, then the list of values.
494 562 # Each entry has first the size of the string, then the file offset.
495 563 for o1, l1, o2, l2 in offsets:
496 564 koffsets += [l1, o1+keystart]
497 565 voffsets += [l2, o2+valuestart]
498 566 offsets = koffsets + voffsets
499 # check endianness for magic number
500 if struct.pack('@h', 1) == struct.pack('<h', 1):
501 magic_number = MOFile.LITTLE_ENDIAN
502 else:
503 magic_number = MOFile.BIG_ENDIAN
504 567
505 568 output = struct.pack(
506 569 "Iiiiiii",
507 magic_number, # Magic number
508 0, # Version
509 entries_len, # # of entries
510 7*4, # start of key index
511 7*4+entries_len*8, # start of value index
512 0, keystart # size and offset of hash table
513 # Important: we don't use hash tables
570 # Magic number
571 MOFile.MAGIC,
572 # Version
573 0,
574 # number of entries
575 entries_len,
576 # start of key index
577 7 * 4,
578 # start of value index
579 7 * 4 + entries_len * 8,
580 # size and offset of hash table, we don't use hash tables
581 0, keystart
582
514 583 )
584 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
585 output += array.array("i", offsets).tobytes()
586 else:
515 587 output += array.array("i", offsets).tostring()
516 588 output += ids
517 589 output += strs
518 590 return output
519 591
520 592 def _encode(self, mixed):
521 593 """
522 594 Encodes the given ``mixed`` argument with the file encoding if and
523 595 only if it's an unicode string and returns the encoded string.
524 596 """
525 if type(mixed) == types.UnicodeType:
526 return mixed.encode(self.encoding)
597 if isinstance(mixed, text_type):
598 mixed = mixed.encode(self.encoding)
527 599 return mixed
528
529 600 # }}}
530 601 # class POFile {{{
531 602
603
532 604 class POFile(_BaseFile):
533 605 """
534 606 Po (or Pot) file reader/writer.
535 607 This class inherits the :class:`~polib._BaseFile` class and, by extension,
536 608 the python ``list`` type.
537 609 """
538 610
539 611 def __unicode__(self):
540 612 """
541 613 Returns the unicode representation of the po file.
542 614 """
543 615 ret, headers = '', self.header.split('\n')
544 616 for header in headers:
545 if header[:1] in [',', ':']:
617 if not len(header):
618 ret += "#\n"
619 elif header[:1] in [',', ':']:
546 620 ret += '#%s\n' % header
547 621 else:
548 622 ret += '# %s\n' % header
549 623
550 if type(ret) != types.UnicodeType:
551 ret = unicode(ret, self.encoding)
624 if not isinstance(ret, text_type):
625 ret = ret.decode(self.encoding)
552 626
553 627 return ret + _BaseFile.__unicode__(self)
554 628
555 629 def save_as_mofile(self, fpath):
556 630 """
557 631 Saves the binary representation of the file to given ``fpath``.
558 632
559 633 Keyword argument:
560 634
561 635 ``fpath``
562 636 string, full or relative path to the mo file.
563 637 """
564 638 _BaseFile.save(self, fpath, 'to_binary')
565 639
566 640 def percent_translated(self):
567 641 """
568 642 Convenience method that returns the percentage of translated
569 643 messages.
570 644 """
571 645 total = len([e for e in self if not e.obsolete])
572 646 if total == 0:
573 647 return 100
574 648 translated = len(self.translated_entries())
575 return int((100.00 / float(total)) * translated)
649 return int(translated * 100 / float(total))
576 650
577 651 def translated_entries(self):
578 652 """
579 653 Convenience method that returns the list of translated entries.
580 654 """
581 655 return [e for e in self if e.translated()]
582 656
583 657 def untranslated_entries(self):
584 658 """
585 659 Convenience method that returns the list of untranslated entries.
586 660 """
587 return [e for e in self if not e.translated() and not e.obsolete \
661 return [e for e in self if not e.translated() and not e.obsolete
588 662 and not 'fuzzy' in e.flags]
589 663
590 664 def fuzzy_entries(self):
591 665 """
592 666 Convenience method that returns the list of fuzzy entries.
593 667 """
594 668 return [e for e in self if 'fuzzy' in e.flags]
595 669
596 670 def obsolete_entries(self):
597 671 """
598 672 Convenience method that returns the list of obsolete entries.
599 673 """
600 674 return [e for e in self if e.obsolete]
601 675
602 676 def merge(self, refpot):
603 677 """
604 678 Convenience method that merges the current pofile with the pot file
605 679 provided. It behaves exactly as the gettext msgmerge utility:
606 680
607 681 * comments of this file will be preserved, but extracted comments and
608 682 occurrences will be discarded;
609 683 * any translations or comments in the file will be discarded, however,
610 684 dot comments and file positions will be preserved;
611 685 * the fuzzy flags are preserved.
612 686
613 687 Keyword argument:
614 688
615 689 ``refpot``
616 690 object POFile, the reference catalog.
617 691 """
692 # Store entries in dict/set for faster access
693 self_entries = dict((entry.msgid, entry) for entry in self)
694 refpot_msgids = set(entry.msgid for entry in refpot)
695 # Merge entries that are in the refpot
618 696 for entry in refpot:
619 e = self.find(entry.msgid, include_obsolete_entries=True)
697 e = self_entries.get(entry.msgid)
620 698 if e is None:
621 699 e = POEntry()
622 700 self.append(e)
623 701 e.merge(entry)
624 702 # ok, now we must "obsolete" entries that are not in the refpot anymore
625 703 for entry in self:
626 if refpot.find(entry.msgid) is None:
704 if entry.msgid not in refpot_msgids:
627 705 entry.obsolete = True
628
629 706 # }}}
630 707 # class MOFile {{{
631 708
709
632 710 class MOFile(_BaseFile):
633 711 """
634 712 Mo file reader/writer.
635 713 This class inherits the :class:`~polib._BaseFile` class and, by
636 714 extension, the python ``list`` type.
637 715 """
638 BIG_ENDIAN = 0xde120495
639 LITTLE_ENDIAN = 0x950412de
716 MAGIC = 0x950412de
717 MAGIC_SWAPPED = 0xde120495
640 718
641 719 def __init__(self, *args, **kwargs):
642 720 """
643 721 Constructor, accepts all keywords arguments accepted by
644 722 :class:`~polib._BaseFile` class.
645 723 """
646 724 _BaseFile.__init__(self, *args, **kwargs)
647 725 self.magic_number = None
648 726 self.version = 0
649 727
650 728 def save_as_pofile(self, fpath):
651 729 """
652 730 Saves the mofile as a pofile to ``fpath``.
653 731
654 732 Keyword argument:
655 733
656 734 ``fpath``
657 735 string, full or relative path to the file.
658 736 """
659 737 _BaseFile.save(self, fpath)
660 738
661 739 def save(self, fpath=None):
662 740 """
663 741 Saves the mofile to ``fpath``.
664 742
665 743 Keyword argument:
666 744
667 745 ``fpath``
668 746 string, full or relative path to the file.
669 747 """
670 748 _BaseFile.save(self, fpath, 'to_binary')
671 749
672 750 def percent_translated(self):
673 751 """
674 752 Convenience method to keep the same interface with POFile instances.
675 753 """
676 754 return 100
677 755
678 756 def translated_entries(self):
679 757 """
680 758 Convenience method to keep the same interface with POFile instances.
681 759 """
682 760 return self
683 761
684 762 def untranslated_entries(self):
685 763 """
686 764 Convenience method to keep the same interface with POFile instances.
687 765 """
688 766 return []
689 767
690 768 def fuzzy_entries(self):
691 769 """
692 770 Convenience method to keep the same interface with POFile instances.
693 771 """
694 772 return []
695 773
696 774 def obsolete_entries(self):
697 775 """
698 776 Convenience method to keep the same interface with POFile instances.
699 777 """
700 778 return []
701
702 779 # }}}
703 780 # class _BaseEntry {{{
704 781
782
705 783 class _BaseEntry(object):
706 784 """
707 785 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
708 786 This class should **not** be instanciated directly.
709 787 """
710 788
711 789 def __init__(self, *args, **kwargs):
712 790 """
713 791 Constructor, accepts the following keyword arguments:
714 792
715 793 ``msgid``
716 794 string, the entry msgid.
717 795
718 796 ``msgstr``
719 797 string, the entry msgstr.
720 798
721 799 ``msgid_plural``
722 800 string, the entry msgid_plural.
723 801
724 802 ``msgstr_plural``
725 803 list, the entry msgstr_plural lines.
726 804
727 805 ``msgctxt``
728 806 string, the entry context (msgctxt).
729 807
730 808 ``obsolete``
731 809 bool, whether the entry is "obsolete" or not.
732 810
733 811 ``encoding``
734 812 string, the encoding to use, defaults to ``default_encoding``
735 813 global variable (optional).
736 814 """
737 815 self.msgid = kwargs.get('msgid', '')
738 816 self.msgstr = kwargs.get('msgstr', '')
739 817 self.msgid_plural = kwargs.get('msgid_plural', '')
740 818 self.msgstr_plural = kwargs.get('msgstr_plural', {})
741 819 self.msgctxt = kwargs.get('msgctxt', None)
742 820 self.obsolete = kwargs.get('obsolete', False)
743 821 self.encoding = kwargs.get('encoding', default_encoding)
744 822
745 823 def __unicode__(self, wrapwidth=78):
746 824 """
747 825 Returns the unicode representation of the entry.
748 826 """
749 827 if self.obsolete:
750 828 delflag = '#~ '
751 829 else:
752 830 delflag = ''
753 831 ret = []
754 832 # write the msgctxt if any
755 833 if self.msgctxt is not None:
756 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
834 ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
835 wrapwidth)
757 836 # write the msgid
758 837 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
759 838 # write the msgid_plural if any
760 839 if self.msgid_plural:
761 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
840 ret += self._str_field("msgid_plural", delflag, "",
841 self.msgid_plural, wrapwidth)
762 842 if self.msgstr_plural:
763 843 # write the msgstr_plural if any
764 844 msgstrs = self.msgstr_plural
765 845 keys = list(msgstrs)
766 846 keys.sort()
767 847 for index in keys:
768 848 msgstr = msgstrs[index]
769 849 plural_index = '[%s]' % index
770 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
850 ret += self._str_field("msgstr", delflag, plural_index, msgstr,
851 wrapwidth)
771 852 else:
772 853 # otherwise write the msgstr
773 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
854 ret += self._str_field("msgstr", delflag, "", self.msgstr,
855 wrapwidth)
774 856 ret.append('')
775 ret = '\n'.join(ret)
776
777 if type(ret) != types.UnicodeType:
778 return unicode(ret, self.encoding)
857 ret = u('\n').join(ret)
779 858 return ret
780 859
860 if PY3:
861 def __str__(self):
862 return self.__unicode__()
863 else:
781 864 def __str__(self):
782 865 """
783 866 Returns the string representation of the entry.
784 867 """
785 868 return unicode(self).encode(self.encoding)
786 869
787 870 def __eq__(self, other):
788 return unicode(self) == unicode(other)
871 return str(self) == str(other)
789 872
790 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
873 def _str_field(self, fieldname, delflag, plural_index, field,
874 wrapwidth=78):
791 875 lines = field.splitlines(True)
792 876 if len(lines) > 1:
793 877 lines = [''] + lines # start with initial empty line
794 878 else:
795 879 escaped_field = escape(field)
796 880 specialchars_count = 0
797 881 for c in ['\\', '\n', '\r', '\t', '"']:
798 882 specialchars_count += field.count(c)
799 883 # comparison must take into account fieldname length + one space
800 884 # + 2 quotes (eg. msgid "<string>")
801 885 flength = len(fieldname) + 3
802 886 if plural_index:
803 887 flength += len(plural_index)
804 888 real_wrapwidth = wrapwidth - flength + specialchars_count
805 889 if wrapwidth > 0 and len(field) > real_wrapwidth:
806 890 # Wrap the line but take field name into account
807 lines = [''] + [unescape(item) for item in textwrap.wrap(
891 lines = [''] + [unescape(item) for item in wrap(
808 892 escaped_field,
809 893 wrapwidth - 2, # 2 for quotes ""
810 894 drop_whitespace=False,
811 895 break_long_words=False
812 896 )]
813 897 else:
814 898 lines = [field]
815 899 if fieldname.startswith('previous_'):
816 900 # quick and dirty trick to get the real field name
817 901 fieldname = fieldname[9:]
818 902
819 903 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
820 904 escape(lines.pop(0)))]
821 for mstr in lines:
822 ret.append('%s"%s"' % (delflag, escape(mstr)))
905 for line in lines:
906 ret.append('%s"%s"' % (delflag, escape(line)))
823 907 return ret
824
825 908 # }}}
826 909 # class POEntry {{{
827 910
911
828 912 class POEntry(_BaseEntry):
829 913 """
830 914 Represents a po file entry.
831 915 """
832 916
833 917 def __init__(self, *args, **kwargs):
834 918 """
835 919 Constructor, accepts the following keyword arguments:
836 920
837 921 ``comment``
838 922 string, the entry comment.
839 923
840 924 ``tcomment``
841 925 string, the entry translator comment.
842 926
843 927 ``occurrences``
844 928 list, the entry occurrences.
845 929
846 930 ``flags``
847 931 list, the entry flags.
848 932
849 933 ``previous_msgctxt``
850 934 string, the entry previous context.
851 935
852 936 ``previous_msgid``
853 937 string, the entry previous msgid.
854 938
855 939 ``previous_msgid_plural``
856 940 string, the entry previous msgid_plural.
941
942 ``linenum``
943 integer, the line number of the entry
857 944 """
858 945 _BaseEntry.__init__(self, *args, **kwargs)
859 946 self.comment = kwargs.get('comment', '')
860 947 self.tcomment = kwargs.get('tcomment', '')
861 948 self.occurrences = kwargs.get('occurrences', [])
862 949 self.flags = kwargs.get('flags', [])
863 950 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
864 951 self.previous_msgid = kwargs.get('previous_msgid', None)
865 952 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
953 self.linenum = kwargs.get('linenum', None)
866 954
867 955 def __unicode__(self, wrapwidth=78):
868 956 """
869 957 Returns the unicode representation of the entry.
870 958 """
871 959 if self.obsolete:
872 960 return _BaseEntry.__unicode__(self, wrapwidth)
873 961
874 962 ret = []
875 963 # comments first, if any (with text wrapping as xgettext does)
876 964 comments = [('comment', '#. '), ('tcomment', '# ')]
877 965 for c in comments:
878 966 val = getattr(self, c[0])
879 967 if val:
880 968 for comment in val.split('\n'):
881 969 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
882 ret += textwrap.wrap(
970 ret += wrap(
883 971 comment,
884 972 wrapwidth,
885 973 initial_indent=c[1],
886 974 subsequent_indent=c[1],
887 975 break_long_words=False
888 976 )
889 977 else:
890 978 ret.append('%s%s' % (c[1], comment))
891 979
892 980 # occurrences (with text wrapping as xgettext does)
893 981 if self.occurrences:
894 982 filelist = []
895 983 for fpath, lineno in self.occurrences:
896 984 if lineno:
897 985 filelist.append('%s:%s' % (fpath, lineno))
898 986 else:
899 987 filelist.append(fpath)
900 988 filestr = ' '.join(filelist)
901 989 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
902 990 # textwrap split words that contain hyphen, this is not
903 991 # what we want for filenames, so the dirty hack is to
904 992 # temporally replace hyphens with a char that a file cannot
905 993 # contain, like "*"
906 ret += [l.replace('*', '-') for l in textwrap.wrap(
994 ret += [l.replace('*', '-') for l in wrap(
907 995 filestr.replace('-', '*'),
908 996 wrapwidth,
909 997 initial_indent='#: ',
910 998 subsequent_indent='#: ',
911 999 break_long_words=False
912 1000 )]
913 1001 else:
914 1002 ret.append('#: ' + filestr)
915 1003
916 1004 # flags (TODO: wrapping ?)
917 1005 if self.flags:
918 1006 ret.append('#, %s' % ', '.join(self.flags))
919 1007
920 1008 # previous context and previous msgid/msgid_plural
921 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
1009 fields = ['previous_msgctxt', 'previous_msgid',
1010 'previous_msgid_plural']
922 1011 for f in fields:
923 1012 val = getattr(self, f)
924 1013 if val:
925 1014 ret += self._str_field(f, "#| ", "", val, wrapwidth)
926 1015
927 1016 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
928 ret = '\n'.join(ret)
1017 ret = u('\n').join(ret)
929 1018
930 if type(ret) != types.UnicodeType:
931 return unicode(ret, self.encoding)
1019 assert isinstance(ret, text_type)
1020 #if type(ret) != types.UnicodeType:
1021 # return unicode(ret, self.encoding)
932 1022 return ret
933 1023
934 1024 def __cmp__(self, other):
935 1025 """
936 1026 Called by comparison operations if rich comparison is not defined.
937 1027 """
938 def compare_occurrences(a, b):
939 """
940 Compare an entry occurrence with another one.
941 """
942 if a[0] != b[0]:
943 return a[0] < b[0]
944 if a[1] != b[1]:
945 return a[1] < b[1]
946 return 0
947 1028
948 1029 # First: Obsolete test
949 1030 if self.obsolete != other.obsolete:
950 1031 if self.obsolete:
951 1032 return -1
952 1033 else:
953 1034 return 1
954 1035 # Work on a copy to protect original
955 occ1 = self.occurrences[:]
956 occ2 = other.occurrences[:]
957 # Sorting using compare method
958 occ1.sort(compare_occurrences)
959 occ2.sort(compare_occurrences)
960 # Comparing sorted occurrences
1036 occ1 = sorted(self.occurrences[:])
1037 occ2 = sorted(other.occurrences[:])
961 1038 pos = 0
962 1039 for entry1 in occ1:
963 1040 try:
964 1041 entry2 = occ2[pos]
965 1042 except IndexError:
966 1043 return 1
967 1044 pos = pos + 1
968 1045 if entry1[0] != entry2[0]:
969 1046 if entry1[0] > entry2[0]:
970 1047 return 1
971 1048 else:
972 1049 return -1
973 1050 if entry1[1] != entry2[1]:
974 1051 if entry1[1] > entry2[1]:
975 1052 return 1
976 1053 else:
977 1054 return -1
1055 # Compare msgid_plural if set
1056 if self.msgid_plural:
1057 if not other.msgid_plural:
1058 return 1
1059 for pos in self.msgid_plural:
1060 if pos not in other.msgid_plural:
1061 return 1
1062 if self.msgid_plural[pos] > other.msgid_plural[pos]:
1063 return 1
1064 if self.msgid_plural[pos] < other.msgid_plural[pos]:
1065 return -1
978 1066 # Finally: Compare message ID
979 if self.msgid > other.msgid: return 1
980 else: return -1
1067 if self.msgid > other.msgid:
1068 return 1
1069 elif self.msgid < other.msgid:
1070 return -1
1071 return 0
1072
1073 def __gt__(self, other):
1074 return self.__cmp__(other) > 0
1075
1076 def __lt__(self, other):
1077 return self.__cmp__(other) < 0
1078
1079 def __ge__(self, other):
1080 return self.__cmp__(other) >= 0
1081
1082 def __le__(self, other):
1083 return self.__cmp__(other) <= 0
1084
1085 def __eq__(self, other):
1086 return self.__cmp__(other) == 0
1087
1088 def __ne__(self, other):
1089 return self.__cmp__(other) != 0
981 1090
982 1091 def translated(self):
983 1092 """
984 1093 Returns ``True`` if the entry has been translated or ``False``
985 1094 otherwise.
986 1095 """
987 1096 if self.obsolete or 'fuzzy' in self.flags:
988 1097 return False
989 1098 if self.msgstr != '':
990 1099 return True
991 1100 if self.msgstr_plural:
992 1101 for pos in self.msgstr_plural:
993 1102 if self.msgstr_plural[pos] == '':
994 1103 return False
995 1104 return True
996 1105 return False
997 1106
998 1107 def merge(self, other):
999 1108 """
1000 1109 Merge the current entry with the given pot entry.
1001 1110 """
1002 1111 self.msgid = other.msgid
1003 1112 self.msgctxt = other.msgctxt
1004 1113 self.occurrences = other.occurrences
1005 1114 self.comment = other.comment
1006 1115 fuzzy = 'fuzzy' in self.flags
1007 1116 self.flags = other.flags[:] # clone flags
1008 1117 if fuzzy:
1009 1118 self.flags.append('fuzzy')
1010 1119 self.msgid_plural = other.msgid_plural
1011 1120 self.obsolete = other.obsolete
1012 1121 self.previous_msgctxt = other.previous_msgctxt
1013 1122 self.previous_msgid = other.previous_msgid
1014 1123 self.previous_msgid_plural = other.previous_msgid_plural
1015 1124 if other.msgstr_plural:
1016 1125 for pos in other.msgstr_plural:
1017 1126 try:
1018 1127 # keep existing translation at pos if any
1019 1128 self.msgstr_plural[pos]
1020 1129 except KeyError:
1021 1130 self.msgstr_plural[pos] = ''
1022 1131
1132 def __hash__(self):
1133 return hash((self.msgid, self.msgstr))
1023 1134 # }}}
1024 1135 # class MOEntry {{{
1025 1136
1137
1026 1138 class MOEntry(_BaseEntry):
1027 1139 """
1028 1140 Represents a mo file entry.
1029 1141 """
1030 pass
1142 def __init__(self, *args, **kwargs):
1143 """
1144 Constructor, accepts the following keyword arguments,
1145 for consistency with :class:`~polib.POEntry`:
1146
1147 ``comment``
1148 ``tcomment``
1149 ``occurrences``
1150 ``flags``
1151 ``previous_msgctxt``
1152 ``previous_msgid``
1153 ``previous_msgid_plural``
1154
1155 Note: even though these keyword arguments are accepted,
1156 they hold no real meaning in the context of MO files
1157 and are simply ignored.
1158 """
1159 _BaseEntry.__init__(self, *args, **kwargs)
1160 self.comment = ''
1161 self.tcomment = ''
1162 self.occurrences = []
1163 self.flags = []
1164 self.previous_msgctxt = None
1165 self.previous_msgid = None
1166 self.previous_msgid_plural = None
1167
1168 def __hash__(self):
1169 return hash((self.msgid, self.msgstr))
1031 1170
1032 1171 # }}}
1033 1172 # class _POFileParser {{{
1034 1173
1174
1035 1175 class _POFileParser(object):
1036 1176 """
1037 1177 A finite state machine to parse efficiently and correctly po
1038 1178 file format.
1039 1179 """
1040 1180
1041 1181 def __init__(self, pofile, *args, **kwargs):
1042 1182 """
1043 1183 Constructor.
1044 1184
1045 1185 Keyword arguments:
1046 1186
1047 1187 ``pofile``
1048 1188 string, path to the po file or its content
1049 1189
1050 1190 ``encoding``
1051 1191 string, the encoding to use, defaults to ``default_encoding``
1052 1192 global variable (optional).
1053 1193
1054 1194 ``check_for_duplicates``
1055 1195 whether to check for duplicate entries when adding entries to the
1056 1196 file (optional, default: ``False``).
1057 1197 """
1058 1198 enc = kwargs.get('encoding', default_encoding)
1059 if os.path.exists(pofile):
1199 if _is_file(pofile):
1060 1200 try:
1061 self.fhandle = codecs.open(pofile, 'rU', enc)
1201 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1062 1202 except LookupError:
1063 1203 enc = default_encoding
1064 self.fhandle = codecs.open(pofile, 'rU', enc)
1204 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1065 1205 else:
1066 1206 self.fhandle = pofile.splitlines()
1067 1207
1068 self.instance = POFile(
1208 klass = kwargs.get('klass')
1209 if klass is None:
1210 klass = POFile
1211 self.instance = klass(
1069 1212 pofile=pofile,
1070 1213 encoding=enc,
1071 1214 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1072 1215 )
1073 1216 self.transitions = {}
1074 self.current_entry = POEntry()
1075 self.current_state = 'ST'
1217 self.current_line = 0
1218 self.current_entry = POEntry(linenum=self.current_line)
1219 self.current_state = 'st'
1076 1220 self.current_token = None
1077 1221 # two memo flags used in handlers
1078 1222 self.msgstr_index = 0
1079 1223 self.entry_obsolete = 0
1080 1224 # Configure the state machine, by adding transitions.
1081 1225 # Signification of symbols:
1082 1226 # * ST: Beginning of the file (start)
1083 1227 # * HE: Header
1084 1228 # * TC: a translation comment
1085 1229 # * GC: a generated comment
1086 # * OC: a file/line occurence
1230 # * OC: a file/line occurrence
1087 1231 # * FL: a flags line
1088 1232 # * CT: a message context
1089 1233 # * PC: a previous msgctxt
1090 1234 # * PM: a previous msgid
1091 1235 # * PP: a previous msgid_plural
1092 1236 # * MI: a msgid
1093 1237 # * MP: a msgid plural
1094 1238 # * MS: a msgstr
1095 1239 # * MX: a msgstr plural
1096 1240 # * MC: a msgid or msgstr continuation line
1097 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1098 'MS', 'MP', 'MX', 'MI']
1241 all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
1242 'ms', 'mp', 'mx', 'mi']
1099 1243
1100 self.add('TC', ['ST', 'HE'], 'HE')
1101 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1102 'MP', 'MX', 'MI'], 'TC')
1103 self.add('GC', all, 'GC')
1104 self.add('OC', all, 'OC')
1105 self.add('FL', all, 'FL')
1106 self.add('PC', all, 'PC')
1107 self.add('PM', all, 'PM')
1108 self.add('PP', all, 'PP')
1109 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1110 'PP', 'MS', 'MX'], 'CT')
1111 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1112 'PM', 'PP', 'MS', 'MX'], 'MI')
1113 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1114 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1115 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1116 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1244 self.add('tc', ['st', 'he'], 'he')
1245 self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
1246 'mp', 'mx', 'mi'], 'tc')
1247 self.add('gc', all, 'gc')
1248 self.add('oc', all, 'oc')
1249 self.add('fl', all, 'fl')
1250 self.add('pc', all, 'pc')
1251 self.add('pm', all, 'pm')
1252 self.add('pp', all, 'pp')
1253 self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
1254 'pp', 'ms', 'mx'], 'ct')
1255 self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
1256 'pm', 'pp', 'ms', 'mx'], 'mi')
1257 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
1258 self.add('ms', ['mi', 'mp', 'tc'], 'ms')
1259 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
1260 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1117 1261
1118 1262 def parse(self):
1119 1263 """
1120 1264 Run the state machine, parse the file line by line and call process()
1121 1265 with the current matched symbol.
1122 1266 """
1123 i = 0
1124 1267
1125 1268 keywords = {
1126 'msgctxt': 'CT',
1127 'msgid': 'MI',
1128 'msgstr': 'MS',
1129 'msgid_plural': 'MP',
1269 'msgctxt': 'ct',
1270 'msgid': 'mi',
1271 'msgstr': 'ms',
1272 'msgid_plural': 'mp',
1130 1273 }
1131 1274 prev_keywords = {
1132 'msgid_plural': 'PP',
1133 'msgid': 'PM',
1134 'msgctxt': 'PC',
1275 'msgid_plural': 'pp',
1276 'msgid': 'pm',
1277 'msgctxt': 'pc',
1135 1278 }
1136
1279 tokens = []
1137 1280 for line in self.fhandle:
1138 i += 1
1281 self.current_line += 1
1139 1282 line = line.strip()
1140 1283 if line == '':
1141 1284 continue
1142 1285
1143 1286 tokens = line.split(None, 2)
1144 1287 nb_tokens = len(tokens)
1145 1288
1289 if tokens[0] == '#~|':
1290 continue
1291
1146 1292 if tokens[0] == '#~' and nb_tokens > 1:
1147 1293 line = line[3:].strip()
1148 1294 tokens = tokens[1:]
1149 1295 nb_tokens -= 1
1150 1296 self.entry_obsolete = 1
1151 1297 else:
1152 1298 self.entry_obsolete = 0
1153 1299
1154 1300 # Take care of keywords like
1155 1301 # msgid, msgid_plural, msgctxt & msgstr.
1156 1302 if tokens[0] in keywords and nb_tokens > 1:
1157 1303 line = line[len(tokens[0]):].lstrip()
1304 if re.search(r'([^\\]|^)"', line[1:-1]):
1305 raise IOError('Syntax error in po file %s (line %s): '
1306 'unescaped double quote found' %
1307 (self.instance.fpath, self.current_line))
1158 1308 self.current_token = line
1159 self.process(keywords[tokens[0]], i)
1309 self.process(keywords[tokens[0]])
1160 1310 continue
1161 1311
1162 1312 self.current_token = line
1163 1313
1164 if tokens[0] == '#:' and nb_tokens > 1:
1314 if tokens[0] == '#:':
1315 if nb_tokens <= 1:
1316 continue
1165 1317 # we are on a occurrences line
1166 self.process('OC', i)
1318 self.process('oc')
1167 1319
1168 1320 elif line[:1] == '"':
1169 1321 # we are on a continuation line
1170 self.process('MC', i)
1322 if re.search(r'([^\\]|^)"', line[1:-1]):
1323 raise IOError('Syntax error in po file %s (line %s): '
1324 'unescaped double quote found' %
1325 (self.instance.fpath, self.current_line))
1326 self.process('mc')
1171 1327
1172 1328 elif line[:7] == 'msgstr[':
1173 1329 # we are on a msgstr plural
1174 self.process('MX', i)
1330 self.process('mx')
1175 1331
1176 elif tokens[0] == '#,' and nb_tokens > 1:
1332 elif tokens[0] == '#,':
1333 if nb_tokens <= 1:
1334 continue
1177 1335 # we are on a flags line
1178 self.process('FL', i)
1336 self.process('fl')
1179 1337
1180 elif tokens[0] == '#':
1181 if line == '#': line += ' '
1338 elif tokens[0] == '#' or tokens[0].startswith('##'):
1339 if line == '#':
1340 line += ' '
1182 1341 # we are on a translator comment line
1183 self.process('TC', i)
1342 self.process('tc')
1184 1343
1185 elif tokens[0] == '#.' and nb_tokens > 1:
1344 elif tokens[0] == '#.':
1345 if nb_tokens <= 1:
1346 continue
1186 1347 # we are on a generated comment line
1187 self.process('GC', i)
1348 self.process('gc')
1188 1349
1189 1350 elif tokens[0] == '#|':
1190 if nb_tokens < 2:
1191 self.process('??', i)
1192 continue
1351 if nb_tokens <= 1:
1352 raise IOError('Syntax error in po file %s (line %s)' %
1353 (self.instance.fpath, self.current_line))
1193 1354
1194 1355 # Remove the marker and any whitespace right after that.
1195 1356 line = line[2:].lstrip()
1196 1357 self.current_token = line
1197 1358
1198 1359 if tokens[1].startswith('"'):
1199 1360 # Continuation of previous metadata.
1200 self.process('MC', i)
1361 self.process('mc')
1201 1362 continue
1202 1363
1203 1364 if nb_tokens == 2:
1204 1365 # Invalid continuation line.
1205 self.process('??', i)
1366 raise IOError('Syntax error in po file %s (line %s): '
1367 'invalid continuation line' %
1368 (self.instance.fpath, self.current_line))
1206 1369
1207 1370 # we are on a "previous translation" comment line,
1208 1371 if tokens[1] not in prev_keywords:
1209 1372 # Unknown keyword in previous translation comment.
1210 self.process('??', i)
1373 raise IOError('Syntax error in po file %s (line %s): '
1374 'unknown keyword %s' %
1375 (self.instance.fpath, self.current_line,
1376 tokens[1]))
1211 1377
1212 1378 # Remove the keyword and any whitespace
1213 1379 # between it and the starting quote.
1214 1380 line = line[len(tokens[1]):].lstrip()
1215 1381 self.current_token = line
1216 self.process(prev_keywords[tokens[1]], i)
1382 self.process(prev_keywords[tokens[1]])
1217 1383
1218 1384 else:
1219 self.process('??', i)
1385 raise IOError('Syntax error in po file %s (line %s)' %
1386 (self.instance.fpath, self.current_line))
1220 1387
1221 if self.current_entry:
1388 if self.current_entry and len(tokens) > 0 and \
1389 not tokens[0].startswith('#'):
1222 1390 # since entries are added when another entry is found, we must add
1223 # the last entry here (only if there are lines)
1391 # the last entry here (only if there are lines). Trailing comments
1392 # are ignored
1224 1393 self.instance.append(self.current_entry)
1394
1225 1395 # before returning the instance, check if there's metadata and if
1226 1396 # so extract it in a dict
1227 firstentry = self.instance[0]
1228 if firstentry.msgid == '': # metadata found
1397 metadataentry = self.instance.find('')
1398 if metadataentry: # metadata found
1229 1399 # remove the entry
1230 firstentry = self.instance.pop(0)
1231 self.instance.metadata_is_fuzzy = firstentry.flags
1400 self.instance.remove(metadataentry)
1401 self.instance.metadata_is_fuzzy = metadataentry.flags
1232 1402 key = None
1233 for msg in firstentry.msgstr.splitlines():
1403 for msg in metadataentry.msgstr.splitlines():
1234 1404 try:
1235 1405 key, val = msg.split(':', 1)
1236 1406 self.instance.metadata[key] = val.strip()
1237 except:
1407 except (ValueError, KeyError):
1238 1408 if key is not None:
1239 1409 self.instance.metadata[key] += '\n'+ msg.strip()
1240 1410 # close opened file
1241 if isinstance(self.fhandle, file):
1411 if not isinstance(self.fhandle, list): # must be file
1242 1412 self.fhandle.close()
1243 1413 return self.instance
1244 1414
1245 1415 def add(self, symbol, states, next_state):
1246 1416 """
1247 1417 Add a transition to the state machine.
1248 1418
1249 1419 Keywords arguments:
1250 1420
1251 1421 ``symbol``
1252 1422 string, the matched token (two chars symbol).
1253 1423
1254 1424 ``states``
1255 1425 list, a list of states (two chars symbols).
1256 1426
1257 1427 ``next_state``
1258 1428 the next state the fsm will have after the action.
1259 1429 """
1260 1430 for state in states:
1261 action = getattr(self, 'handle_%s' % next_state.lower())
1431 action = getattr(self, 'handle_%s' % next_state)
1262 1432 self.transitions[(symbol, state)] = (action, next_state)
1263 1433
1264 def process(self, symbol, linenum):
1434 def process(self, symbol):
1265 1435 """
1266 1436 Process the transition corresponding to the current state and the
1267 1437 symbol provided.
1268 1438
1269 1439 Keywords arguments:
1270 1440
1271 1441 ``symbol``
1272 1442 string, the matched token (two chars symbol).
1273 1443
1274 1444 ``linenum``
1275 1445 integer, the current line number of the parsed file.
1276 1446 """
1277 1447 try:
1278 1448 (action, state) = self.transitions[(symbol, self.current_state)]
1279 1449 if action():
1280 1450 self.current_state = state
1281 except Exception as exc:
1282 raise IOError('Syntax error in po file (line %s)' % linenum)
1451 except Exception:
1452 raise IOError('Syntax error in po file (line %s)' %
1453 self.current_line)
1283 1454
1284 1455 # state handlers
1285 1456
1286 1457 def handle_he(self):
1287 1458 """Handle a header comment."""
1288 1459 if self.instance.header != '':
1289 1460 self.instance.header += '\n'
1290 1461 self.instance.header += self.current_token[2:]
1291 1462 return 1
1292 1463
1293 1464 def handle_tc(self):
1294 1465 """Handle a translator comment."""
1295 if self.current_state in ['MC', 'MS', 'MX']:
1466 if self.current_state in ['mc', 'ms', 'mx']:
1296 1467 self.instance.append(self.current_entry)
1297 self.current_entry = POEntry()
1468 self.current_entry = POEntry(linenum=self.current_line)
1298 1469 if self.current_entry.tcomment != '':
1299 1470 self.current_entry.tcomment += '\n'
1300 self.current_entry.tcomment += self.current_token[2:]
1471 tcomment = self.current_token.lstrip('#')
1472 if tcomment.startswith(' '):
1473 tcomment = tcomment[1:]
1474 self.current_entry.tcomment += tcomment
1301 1475 return True
1302 1476
1303 1477 def handle_gc(self):
1304 1478 """Handle a generated comment."""
1305 if self.current_state in ['MC', 'MS', 'MX']:
1479 if self.current_state in ['mc', 'ms', 'mx']:
1306 1480 self.instance.append(self.current_entry)
1307 self.current_entry = POEntry()
1481 self.current_entry = POEntry(linenum=self.current_line)
1308 1482 if self.current_entry.comment != '':
1309 1483 self.current_entry.comment += '\n'
1310 1484 self.current_entry.comment += self.current_token[3:]
1311 1485 return True
1312 1486
1313 1487 def handle_oc(self):
1314 """Handle a file:num occurence."""
1315 if self.current_state in ['MC', 'MS', 'MX']:
1488 """Handle a file:num occurrence."""
1489 if self.current_state in ['mc', 'ms', 'mx']:
1316 1490 self.instance.append(self.current_entry)
1317 self.current_entry = POEntry()
1491 self.current_entry = POEntry(linenum=self.current_line)
1318 1492 occurrences = self.current_token[3:].split()
1319 1493 for occurrence in occurrences:
1320 1494 if occurrence != '':
1321 1495 try:
1322 1496 fil, line = occurrence.split(':')
1323 1497 if not line.isdigit():
1324 1498 fil = fil + line
1325 1499 line = ''
1326 1500 self.current_entry.occurrences.append((fil, line))
1327 except:
1501 except (ValueError, AttributeError):
1328 1502 self.current_entry.occurrences.append((occurrence, ''))
1329 1503 return True
1330 1504
1331 1505 def handle_fl(self):
1332 1506 """Handle a flags line."""
1333 if self.current_state in ['MC', 'MS', 'MX']:
1507 if self.current_state in ['mc', 'ms', 'mx']:
1334 1508 self.instance.append(self.current_entry)
1335 self.current_entry = POEntry()
1336 self.current_entry.flags += self.current_token[3:].split(', ')
1509 self.current_entry = POEntry(linenum=self.current_line)
1510 self.current_entry.flags += [c.strip() for c in
1511 self.current_token[3:].split(',')]
1337 1512 return True
1338 1513
1339 1514 def handle_pp(self):
1340 1515 """Handle a previous msgid_plural line."""
1341 if self.current_state in ['MC', 'MS', 'MX']:
1516 if self.current_state in ['mc', 'ms', 'mx']:
1342 1517 self.instance.append(self.current_entry)
1343 self.current_entry = POEntry()
1518 self.current_entry = POEntry(linenum=self.current_line)
1344 1519 self.current_entry.previous_msgid_plural = \
1345 1520 unescape(self.current_token[1:-1])
1346 1521 return True
1347 1522
1348 1523 def handle_pm(self):
1349 1524 """Handle a previous msgid line."""
1350 if self.current_state in ['MC', 'MS', 'MX']:
1525 if self.current_state in ['mc', 'ms', 'mx']:
1351 1526 self.instance.append(self.current_entry)
1352 self.current_entry = POEntry()
1527 self.current_entry = POEntry(linenum=self.current_line)
1353 1528 self.current_entry.previous_msgid = \
1354 1529 unescape(self.current_token[1:-1])
1355 1530 return True
1356 1531
1357 1532 def handle_pc(self):
1358 1533 """Handle a previous msgctxt line."""
1359 if self.current_state in ['MC', 'MS', 'MX']:
1534 if self.current_state in ['mc', 'ms', 'mx']:
1360 1535 self.instance.append(self.current_entry)
1361 self.current_entry = POEntry()
1536 self.current_entry = POEntry(linenum=self.current_line)
1362 1537 self.current_entry.previous_msgctxt = \
1363 1538 unescape(self.current_token[1:-1])
1364 1539 return True
1365 1540
1366 1541 def handle_ct(self):
1367 1542 """Handle a msgctxt."""
1368 if self.current_state in ['MC', 'MS', 'MX']:
1543 if self.current_state in ['mc', 'ms', 'mx']:
1369 1544 self.instance.append(self.current_entry)
1370 self.current_entry = POEntry()
1545 self.current_entry = POEntry(linenum=self.current_line)
1371 1546 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1372 1547 return True
1373 1548
1374 1549 def handle_mi(self):
1375 1550 """Handle a msgid."""
1376 if self.current_state in ['MC', 'MS', 'MX']:
1551 if self.current_state in ['mc', 'ms', 'mx']:
1377 1552 self.instance.append(self.current_entry)
1378 self.current_entry = POEntry()
1553 self.current_entry = POEntry(linenum=self.current_line)
1379 1554 self.current_entry.obsolete = self.entry_obsolete
1380 1555 self.current_entry.msgid = unescape(self.current_token[1:-1])
1381 1556 return True
1382 1557
1383 1558 def handle_mp(self):
1384 1559 """Handle a msgid plural."""
1385 1560 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1386 1561 return True
1387 1562
1388 1563 def handle_ms(self):
1389 1564 """Handle a msgstr."""
1390 1565 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1391 1566 return True
1392 1567
1393 1568 def handle_mx(self):
1394 1569 """Handle a msgstr plural."""
1395 index, value = self.current_token[7], self.current_token[11:-1]
1396 self.current_entry.msgstr_plural[index] = unescape(value)
1397 self.msgstr_index = index
1570 index = self.current_token[7]
1571 value = self.current_token[self.current_token.find('"') + 1:-1]
1572 self.current_entry.msgstr_plural[int(index)] = unescape(value)
1573 self.msgstr_index = int(index)
1398 1574 return True
1399 1575
1400 1576 def handle_mc(self):
1401 1577 """Handle a msgid or msgstr continuation line."""
1402 1578 token = unescape(self.current_token[1:-1])
1403 if self.current_state == 'CT':
1404 typ = 'msgctxt'
1579 if self.current_state == 'ct':
1405 1580 self.current_entry.msgctxt += token
1406 elif self.current_state == 'MI':
1407 typ = 'msgid'
1581 elif self.current_state == 'mi':
1408 1582 self.current_entry.msgid += token
1409 elif self.current_state == 'MP':
1410 typ = 'msgid_plural'
1583 elif self.current_state == 'mp':
1411 1584 self.current_entry.msgid_plural += token
1412 elif self.current_state == 'MS':
1413 typ = 'msgstr'
1585 elif self.current_state == 'ms':
1414 1586 self.current_entry.msgstr += token
1415 elif self.current_state == 'MX':
1416 typ = 'msgstr[%s]' % self.msgstr_index
1587 elif self.current_state == 'mx':
1417 1588 self.current_entry.msgstr_plural[self.msgstr_index] += token
1418 elif self.current_state == 'PP':
1419 typ = 'previous_msgid_plural'
1420 token = token[3:]
1589 elif self.current_state == 'pp':
1421 1590 self.current_entry.previous_msgid_plural += token
1422 elif self.current_state == 'PM':
1423 typ = 'previous_msgid'
1424 token = token[3:]
1591 elif self.current_state == 'pm':
1425 1592 self.current_entry.previous_msgid += token
1426 elif self.current_state == 'PC':
1427 typ = 'previous_msgctxt'
1428 token = token[3:]
1593 elif self.current_state == 'pc':
1429 1594 self.current_entry.previous_msgctxt += token
1430 1595 # don't change the current state
1431 1596 return False
1432
1433 1597 # }}}
1434 1598 # class _MOFileParser {{{
1435 1599
1600
1436 1601 class _MOFileParser(object):
1437 1602 """
1438 1603 A class to parse binary mo files.
1439 1604 """
1440 1605
1441 1606 def __init__(self, mofile, *args, **kwargs):
1442 1607 """
1443 1608 Constructor.
1444 1609
1445 1610 Keyword arguments:
1446 1611
1447 1612 ``mofile``
1448 1613 string, path to the mo file or its content
1449 1614
1450 1615 ``encoding``
1451 1616 string, the encoding to use, defaults to ``default_encoding``
1452 1617 global variable (optional).
1453 1618
1454 1619 ``check_for_duplicates``
1455 1620 whether to check for duplicate entries when adding entries to the
1456 1621 file (optional, default: ``False``).
1457 1622 """
1458 1623 self.fhandle = open(mofile, 'rb')
1459 self.instance = MOFile(
1624
1625 klass = kwargs.get('klass')
1626 if klass is None:
1627 klass = MOFile
1628 self.instance = klass(
1460 1629 fpath=mofile,
1461 1630 encoding=kwargs.get('encoding', default_encoding),
1462 1631 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1463 1632 )
1464 1633
1634 def __del__(self):
1635 """
1636 Make sure the file is closed, this prevents warnings on unclosed file
1637 when running tests with python >= 3.2.
1638 """
1639 if self.fhandle:
1640 self.fhandle.close()
1641
1465 1642 def parse(self):
1466 1643 """
1467 1644 Build the instance with the file handle provided in the
1468 1645 constructor.
1469 1646 """
1470 1647 # parse magic number
1471 1648 magic_number = self._readbinary('<I', 4)
1472 if magic_number == MOFile.LITTLE_ENDIAN:
1649 if magic_number == MOFile.MAGIC:
1473 1650 ii = '<II'
1474 elif magic_number == MOFile.BIG_ENDIAN:
1651 elif magic_number == MOFile.MAGIC_SWAPPED:
1475 1652 ii = '>II'
1476 1653 else:
1477 1654 raise IOError('Invalid mo file, magic number is incorrect !')
1478 1655 self.instance.magic_number = magic_number
1479 1656 # parse the version number and the number of strings
1480 self.instance.version, numofstrings = self._readbinary(ii, 8)
1657 version, numofstrings = self._readbinary(ii, 8)
1658 # from MO file format specs: "A program seeing an unexpected major
1659 # revision number should stop reading the MO file entirely"
1660 if version not in (0, 1):
1661 raise IOError('Invalid mo file, unexpected major revision number')
1662 self.instance.version = version
1481 1663 # original strings and translation strings hash table offset
1482 1664 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1483 1665 # move to msgid hash table and read length and offset of msgids
1484 1666 self.fhandle.seek(msgids_hash_offset)
1485 1667 msgids_index = []
1486 1668 for i in range(numofstrings):
1487 1669 msgids_index.append(self._readbinary(ii, 8))
1488 1670 # move to msgstr hash table and read length and offset of msgstrs
1489 1671 self.fhandle.seek(msgstrs_hash_offset)
1490 1672 msgstrs_index = []
1491 1673 for i in range(numofstrings):
1492 1674 msgstrs_index.append(self._readbinary(ii, 8))
1493 1675 # build entries
1676 encoding = self.instance.encoding
1494 1677 for i in range(numofstrings):
1495 1678 self.fhandle.seek(msgids_index[i][1])
1496 1679 msgid = self.fhandle.read(msgids_index[i][0])
1680
1497 1681 self.fhandle.seek(msgstrs_index[i][1])
1498 1682 msgstr = self.fhandle.read(msgstrs_index[i][0])
1499 if i == 0: # metadata
1500 raw_metadata, metadata = msgstr.split('\n'), {}
1683 if i == 0 and not msgid: # metadata
1684 raw_metadata, metadata = msgstr.split(b('\n')), {}
1501 1685 for line in raw_metadata:
1502 tokens = line.split(':', 1)
1503 if tokens[0] != '':
1686 tokens = line.split(b(':'), 1)
1687 if tokens[0] != b(''):
1504 1688 try:
1505 metadata[tokens[0]] = tokens[1].strip()
1689 k = tokens[0].decode(encoding)
1690 v = tokens[1].decode(encoding)
1691 metadata[k] = v.strip()
1506 1692 except IndexError:
1507 metadata[tokens[0]] = ''
1693 metadata[k] = u('')
1508 1694 self.instance.metadata = metadata
1509 1695 continue
1510 1696 # test if we have a plural entry
1511 msgid_tokens = msgid.split('\0')
1697 msgid_tokens = msgid.split(b('\0'))
1512 1698 if len(msgid_tokens) > 1:
1513 1699 entry = self._build_entry(
1514 1700 msgid=msgid_tokens[0],
1515 1701 msgid_plural=msgid_tokens[1],
1516 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1702 msgstr_plural=dict((k, v) for k, v in
1703 enumerate(msgstr.split(b('\0'))))
1517 1704 )
1518 1705 else:
1519 1706 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1520 1707 self.instance.append(entry)
1521 1708 # close opened file
1522 1709 self.fhandle.close()
1523 1710 return self.instance
1524 1711
1525 1712 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1526 1713 msgstr_plural=None):
1527 msgctxt_msgid = msgid.split('\x04')
1714 msgctxt_msgid = msgid.split(b('\x04'))
1715 encoding = self.instance.encoding
1528 1716 if len(msgctxt_msgid) > 1:
1529 1717 kwargs = {
1530 'msgctxt': msgctxt_msgid[0],
1531 'msgid' : msgctxt_msgid[1],
1718 'msgctxt': msgctxt_msgid[0].decode(encoding),
1719 'msgid': msgctxt_msgid[1].decode(encoding),
1532 1720 }
1533 1721 else:
1534 kwargs = {'msgid': msgid}
1722 kwargs = {'msgid': msgid.decode(encoding)}
1535 1723 if msgstr:
1536 kwargs['msgstr'] = msgstr
1724 kwargs['msgstr'] = msgstr.decode(encoding)
1537 1725 if msgid_plural:
1538 kwargs['msgid_plural'] = msgid_plural
1726 kwargs['msgid_plural'] = msgid_plural.decode(encoding)
1539 1727 if msgstr_plural:
1728 for k in msgstr_plural:
1729 msgstr_plural[k] = msgstr_plural[k].decode(encoding)
1540 1730 kwargs['msgstr_plural'] = msgstr_plural
1541 1731 return MOEntry(**kwargs)
1542 1732
1543 1733 def _readbinary(self, fmt, numbytes):
1544 1734 """
1545 1735 Private method that unpack n bytes of data using format <fmt>.
1546 1736 It returns a tuple or a mixed value if the tuple length is 1.
1547 1737 """
1548 1738 bytes = self.fhandle.read(numbytes)
1549 1739 tup = struct.unpack(fmt, bytes)
1550 1740 if len(tup) == 1:
1551 1741 return tup[0]
1552 1742 return tup
1743 # }}}
1744 # class TextWrapper {{{
1745
1746
1747 class TextWrapper(textwrap.TextWrapper):
1748 """
1749 Subclass of textwrap.TextWrapper that backport the
1750 drop_whitespace option.
1751 """
1752 def __init__(self, *args, **kwargs):
1753 drop_whitespace = kwargs.pop('drop_whitespace', True)
1754 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1755 self.drop_whitespace = drop_whitespace
1756
1757 def _wrap_chunks(self, chunks):
1758 """_wrap_chunks(chunks : [string]) -> [string]
1759
1760 Wrap a sequence of text chunks and return a list of lines of
1761 length 'self.width' or less. (If 'break_long_words' is false,
1762 some lines may be longer than this.) Chunks correspond roughly
1763 to words and the whitespace between them: each chunk is
1764 indivisible (modulo 'break_long_words'), but a line break can
1765 come between any two chunks. Chunks should not have internal
1766 whitespace; ie. a chunk is either all whitespace or a "word".
1767 Whitespace chunks will be removed from the beginning and end of
1768 lines, but apart from that whitespace is preserved.
1769 """
1770 lines = []
1771 if self.width <= 0:
1772 raise ValueError("invalid width %r (must be > 0)" % self.width)
1773
1774 # Arrange in reverse order so items can be efficiently popped
1775 # from a stack of chucks.
1776 chunks.reverse()
1777
1778 while chunks:
1779
1780 # Start the list of chunks that will make up the current line.
1781 # cur_len is just the length of all the chunks in cur_line.
1782 cur_line = []
1783 cur_len = 0
1784
1785 # Figure out which static string will prefix this line.
1786 if lines:
1787 indent = self.subsequent_indent
1788 else:
1789 indent = self.initial_indent
1790
1791 # Maximum width for this line.
1792 width = self.width - len(indent)
1793
1794 # First chunk on line is whitespace -- drop it, unless this
1795 # is the very beginning of the text (ie. no lines started yet).
1796 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1797 del chunks[-1]
1798
1799 while chunks:
1800 l = len(chunks[-1])
1801
1802 # Can at least squeeze this chunk onto the current line.
1803 if cur_len + l <= width:
1804 cur_line.append(chunks.pop())
1805 cur_len += l
1806
1807 # Nope, this line is full.
1808 else:
1809 break
1810
1811 # The current line is full, and the next chunk is too big to
1812 # fit on *any* line (not just this one).
1813 if chunks and len(chunks[-1]) > width:
1814 self._handle_long_word(chunks, cur_line, cur_len, width)
1815
1816 # If the last chunk on this line is all whitespace, drop it.
1817 if self.drop_whitespace and cur_line and not cur_line[-1].strip():
1818 del cur_line[-1]
1819
1820 # Convert current line back to a string and store it in list
1821 # of all lines (return value).
1822 if cur_line:
1823 lines.append(indent + ''.join(cur_line))
1824
1825 return lines
1826 # }}}
1827 # function wrap() {{{
1828
1829
1830 def wrap(text, width=70, **kwargs):
1831 """
1832 Wrap a single paragraph of text, returning a list of wrapped lines.
1833 """
1834 if sys.version_info < (2, 6):
1835 return TextWrapper(width=width, **kwargs).wrap(text)
1836 return textwrap.wrap(text, width=width, **kwargs)
1553 1837
1554 1838 # }}}
General Comments 0
You need to be logged in to leave comments. Login now