##// END OF EJS Templates
i18n: drop a py25 conditional...
Matt Harbison -
r32889:a7310a47 default
parent child Browse files
Show More
@@ -1,1648 +1,1554 b''
1 1 # -*- coding: utf-8 -*-
2 2 # no-check-code
3 3 #
4 4 # License: MIT (see LICENSE file provided)
5 5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6 6
7 7 """
8 8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
9 9 mo files). You can load existing files, iterate through it's entries, add,
10 10 modify entries, comments or metadata, etc. or create new po files from scratch.
11 11
12 12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
13 13 :func:`~polib.mofile` convenience functions.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
19 19 __version__ = '0.6.4'
20 20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
21 21 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
22 22
23 23 import array
24 24 import codecs
25 25 import os
26 26 import re
27 27 import struct
28 28 import sys
29 29 import textwrap
30 30 import types
31 31
32 32
33 33 # the default encoding to use when encoding cannot be detected
34 34 default_encoding = 'utf-8'
35 35
36 36 # _pofile_or_mofile {{{
37 37
38 38 def _pofile_or_mofile(f, type, **kwargs):
39 39 """
40 40 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
41 41 honor the DRY concept.
42 42 """
43 43 # get the file encoding
44 44 enc = kwargs.get('encoding')
45 45 if enc is None:
46 46 enc = detect_encoding(f, type == 'mofile')
47 47
48 48 # parse the file
49 49 kls = type == 'pofile' and _POFileParser or _MOFileParser
50 50 parser = kls(
51 51 f,
52 52 encoding=enc,
53 53 check_for_duplicates=kwargs.get('check_for_duplicates', False)
54 54 )
55 55 instance = parser.parse()
56 56 instance.wrapwidth = kwargs.get('wrapwidth', 78)
57 57 return instance
58 58
59 59 # }}}
60 60 # function pofile() {{{
61 61
62 62 def pofile(pofile, **kwargs):
63 63 """
64 64 Convenience function that parses the po or pot file ``pofile`` and returns
65 65 a :class:`~polib.POFile` instance.
66 66
67 67 Arguments:
68 68
69 69 ``pofile``
70 70 string, full or relative path to the po/pot file or its content (data).
71 71
72 72 ``wrapwidth``
73 73 integer, the wrap width, only useful when the ``-w`` option was passed
74 74 to xgettext (optional, default: ``78``).
75 75
76 76 ``encoding``
77 77 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
78 78 encoding will be auto-detected).
79 79
80 80 ``check_for_duplicates``
81 81 whether to check for duplicate entries when adding entries to the
82 82 file (optional, default: ``False``).
83 83 """
84 84 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
85 85
86 86 # }}}
87 87 # function mofile() {{{
88 88
89 89 def mofile(mofile, **kwargs):
90 90 """
91 91 Convenience function that parses the mo file ``mofile`` and returns a
92 92 :class:`~polib.MOFile` instance.
93 93
94 94 Arguments:
95 95
96 96 ``mofile``
97 97 string, full or relative path to the mo file or its content (data).
98 98
99 99 ``wrapwidth``
100 100 integer, the wrap width, only useful when the ``-w`` option was passed
101 101 to xgettext to generate the po file that was used to format the mo file
102 102 (optional, default: ``78``).
103 103
104 104 ``encoding``
105 105 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
106 106 encoding will be auto-detected).
107 107
108 108 ``check_for_duplicates``
109 109 whether to check for duplicate entries when adding entries to the
110 110 file (optional, default: ``False``).
111 111 """
112 112 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
113 113
114 114 # }}}
115 115 # function detect_encoding() {{{
116 116
117 117 def detect_encoding(file, binary_mode=False):
118 118 """
119 119 Try to detect the encoding used by the ``file``. The ``file`` argument can
120 120 be a PO or MO file path or a string containing the contents of the file.
121 121 If the encoding cannot be detected, the function will return the value of
122 122 ``default_encoding``.
123 123
124 124 Arguments:
125 125
126 126 ``file``
127 127 string, full or relative path to the po/mo file or its content.
128 128
129 129 ``binary_mode``
130 130 boolean, set this to True if ``file`` is a mo file.
131 131 """
132 132 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
133 133
134 134 def charset_exists(charset):
135 135 """Check whether ``charset`` is valid or not."""
136 136 try:
137 137 codecs.lookup(charset)
138 138 except LookupError:
139 139 return False
140 140 return True
141 141
142 142 if not os.path.exists(file):
143 143 match = rx.search(file)
144 144 if match:
145 145 enc = match.group(1).strip()
146 146 if charset_exists(enc):
147 147 return enc
148 148 else:
149 149 if binary_mode:
150 150 mode = 'rb'
151 151 else:
152 152 mode = 'r'
153 153 f = open(file, mode)
154 154 for l in f.readlines():
155 155 match = rx.search(l)
156 156 if match:
157 157 f.close()
158 158 enc = match.group(1).strip()
159 159 if charset_exists(enc):
160 160 return enc
161 161 f.close()
162 162 return default_encoding
163 163
164 164 # }}}
165 165 # function escape() {{{
166 166
167 167 def escape(st):
168 168 """
169 169 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
170 170 the given string ``st`` and returns it.
171 171 """
172 172 return st.replace('\\', r'\\')\
173 173 .replace('\t', r'\t')\
174 174 .replace('\r', r'\r')\
175 175 .replace('\n', r'\n')\
176 176 .replace('\"', r'\"')
177 177
178 178 # }}}
179 179 # function unescape() {{{
180 180
181 181 def unescape(st):
182 182 """
183 183 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
184 184 the given string ``st`` and returns it.
185 185 """
186 186 def unescape_repl(m):
187 187 m = m.group(1)
188 188 if m == 'n':
189 189 return '\n'
190 190 if m == 't':
191 191 return '\t'
192 192 if m == 'r':
193 193 return '\r'
194 194 if m == '\\':
195 195 return '\\'
196 196 return m # handles escaped double quote
197 197 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
198 198
199 199 # }}}
200 200 # class _BaseFile {{{
201 201
202 202 class _BaseFile(list):
203 203 """
204 204 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
205 205 classes. This class should **not** be instanciated directly.
206 206 """
207 207
208 208 def __init__(self, *args, **kwargs):
209 209 """
210 210 Constructor, accepts the following keyword arguments:
211 211
212 212 ``pofile``
213 213 string, the path to the po or mo file, or its content as a string.
214 214
215 215 ``wrapwidth``
216 216 integer, the wrap width, only useful when the ``-w`` option was
217 217 passed to xgettext (optional, default: ``78``).
218 218
219 219 ``encoding``
220 220 string, the encoding to use, defaults to ``default_encoding``
221 221 global variable (optional).
222 222
223 223 ``check_for_duplicates``
224 224 whether to check for duplicate entries when adding entries to the
225 225 file, (optional, default: ``False``).
226 226 """
227 227 list.__init__(self)
228 228 # the opened file handle
229 229 pofile = kwargs.get('pofile', None)
230 230 if pofile and os.path.exists(pofile):
231 231 self.fpath = pofile
232 232 else:
233 233 self.fpath = kwargs.get('fpath')
234 234 # the width at which lines should be wrapped
235 235 self.wrapwidth = kwargs.get('wrapwidth', 78)
236 236 # the file encoding
237 237 self.encoding = kwargs.get('encoding', default_encoding)
238 238 # whether to check for duplicate entries or not
239 239 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
240 240 # header
241 241 self.header = ''
242 242 # both po and mo files have metadata
243 243 self.metadata = {}
244 244 self.metadata_is_fuzzy = 0
245 245
246 246 def __unicode__(self):
247 247 """
248 248 Returns the unicode representation of the file.
249 249 """
250 250 ret = []
251 251 entries = [self.metadata_as_entry()] + \
252 252 [e for e in self if not e.obsolete]
253 253 for entry in entries:
254 254 ret.append(entry.__unicode__(self.wrapwidth))
255 255 for entry in self.obsolete_entries():
256 256 ret.append(entry.__unicode__(self.wrapwidth))
257 257 ret = '\n'.join(ret)
258 258
259 259 if type(ret) != types.UnicodeType:
260 260 return unicode(ret, self.encoding)
261 261 return ret
262 262
263 263 def __str__(self):
264 264 """
265 265 Returns the string representation of the file.
266 266 """
267 267 return unicode(self).encode(self.encoding)
268 268
269 269 def __contains__(self, entry):
270 270 """
271 271 Overriden ``list`` method to implement the membership test (in and
272 272 not in).
273 273 The method considers that an entry is in the file if it finds an entry
274 274 that has the same msgid (the test is **case sensitive**).
275 275
276 276 Argument:
277 277
278 278 ``entry``
279 279 an instance of :class:`~polib._BaseEntry`.
280 280 """
281 281 return self.find(entry.msgid, by='msgid') is not None
282 282
283 283 def __eq__(self, other):
284 284 return unicode(self) == unicode(other)
285 285
286 286 def append(self, entry):
287 287 """
288 288 Overriden method to check for duplicates entries, if a user tries to
289 289 add an entry that is already in the file, the method will raise a
290 290 ``ValueError`` exception.
291 291
292 292 Argument:
293 293
294 294 ``entry``
295 295 an instance of :class:`~polib._BaseEntry`.
296 296 """
297 297 if self.check_for_duplicates and entry in self:
298 298 raise ValueError('Entry "%s" already exists' % entry.msgid)
299 299 super(_BaseFile, self).append(entry)
300 300
301 301 def insert(self, index, entry):
302 302 """
303 303 Overriden method to check for duplicates entries, if a user tries to
304 304 add an entry that is already in the file, the method will raise a
305 305 ``ValueError`` exception.
306 306
307 307 Arguments:
308 308
309 309 ``index``
310 310 index at which the entry should be inserted.
311 311
312 312 ``entry``
313 313 an instance of :class:`~polib._BaseEntry`.
314 314 """
315 315 if self.check_for_duplicates and entry in self:
316 316 raise ValueError('Entry "%s" already exists' % entry.msgid)
317 317 super(_BaseFile, self).insert(index, entry)
318 318
319 319 def metadata_as_entry(self):
320 320 """
321 321 Returns the file metadata as a :class:`~polib.POFile` instance.
322 322 """
323 323 e = POEntry(msgid='')
324 324 mdata = self.ordered_metadata()
325 325 if mdata:
326 326 strs = []
327 327 for name, value in mdata:
328 328 # Strip whitespace off each line in a multi-line entry
329 329 strs.append('%s: %s' % (name, value))
330 330 e.msgstr = '\n'.join(strs) + '\n'
331 331 if self.metadata_is_fuzzy:
332 332 e.flags.append('fuzzy')
333 333 return e
334 334
335 335 def save(self, fpath=None, repr_method='__str__'):
336 336 """
337 337 Saves the po file to ``fpath``.
338 338 If it is an existing file and no ``fpath`` is provided, then the
339 339 existing file is rewritten with the modified data.
340 340
341 341 Keyword arguments:
342 342
343 343 ``fpath``
344 344 string, full or relative path to the file.
345 345
346 346 ``repr_method``
347 347 string, the method to use for output.
348 348 """
349 349 if self.fpath is None and fpath is None:
350 350 raise IOError('You must provide a file path to save() method')
351 351 contents = getattr(self, repr_method)()
352 352 if fpath is None:
353 353 fpath = self.fpath
354 354 if repr_method == 'to_binary':
355 355 fhandle = open(fpath, 'wb')
356 356 else:
357 357 fhandle = codecs.open(fpath, 'w', self.encoding)
358 358 if type(contents) != types.UnicodeType:
359 359 contents = contents.decode(self.encoding)
360 360 fhandle.write(contents)
361 361 fhandle.close()
362 362 # set the file path if not set
363 363 if self.fpath is None and fpath:
364 364 self.fpath = fpath
365 365
366 366 def find(self, st, by='msgid', include_obsolete_entries=False,
367 367 msgctxt=False):
368 368 """
369 369 Find the entry which msgid (or property identified by the ``by``
370 370 argument) matches the string ``st``.
371 371
372 372 Keyword arguments:
373 373
374 374 ``st``
375 375 string, the string to search for.
376 376
377 377 ``by``
378 378 string, the property to use for comparison (default: ``msgid``).
379 379
380 380 ``include_obsolete_entries``
381 381 boolean, whether to also search in entries that are obsolete.
382 382
383 383 ``msgctxt``
384 384 string, allows to specify a specific message context for the
385 385 search.
386 386 """
387 387 if include_obsolete_entries:
388 388 entries = self[:]
389 389 else:
390 390 entries = [e for e in self if not e.obsolete]
391 391 for e in entries:
392 392 if getattr(e, by) == st:
393 393 if msgctxt and e.msgctxt != msgctxt:
394 394 continue
395 395 return e
396 396 return None
397 397
398 398 def ordered_metadata(self):
399 399 """
400 400 Convenience method that returns an ordered version of the metadata
401 401 dictionary. The return value is list of tuples (metadata name,
402 402 metadata_value).
403 403 """
404 404 # copy the dict first
405 405 metadata = self.metadata.copy()
406 406 data_order = [
407 407 'Project-Id-Version',
408 408 'Report-Msgid-Bugs-To',
409 409 'POT-Creation-Date',
410 410 'PO-Revision-Date',
411 411 'Last-Translator',
412 412 'Language-Team',
413 413 'MIME-Version',
414 414 'Content-Type',
415 415 'Content-Transfer-Encoding'
416 416 ]
417 417 ordered_data = []
418 418 for data in data_order:
419 419 try:
420 420 value = metadata.pop(data)
421 421 ordered_data.append((data, value))
422 422 except KeyError:
423 423 pass
424 424 # the rest of the metadata will be alphabetically ordered since there
425 425 # are no specs for this AFAIK
426 426 keys = metadata.keys()
427 427 keys.sort()
428 428 for data in keys:
429 429 value = metadata[data]
430 430 ordered_data.append((data, value))
431 431 return ordered_data
432 432
433 433 def to_binary(self):
434 434 """
435 435 Return the binary representation of the file.
436 436 """
437 437 offsets = []
438 438 entries = self.translated_entries()
439 439 # the keys are sorted in the .mo file
440 440 def cmp(_self, other):
441 441 # msgfmt compares entries with msgctxt if it exists
442 442 if _self.msgctxt:
443 443 self_msgid = _self.msgctxt
444 444 else:
445 445 self_msgid = _self.msgid
446 446
447 447 if other.msgctxt:
448 448 other_msgid = other.msgctxt
449 449 else:
450 450 other_msgid = other.msgid
451 451 if self_msgid > other_msgid:
452 452 return 1
453 453 elif self_msgid < other_msgid:
454 454 return -1
455 455 else:
456 456 return 0
457 457 # add metadata entry
458 458 entries.sort(cmp)
459 459 mentry = self.metadata_as_entry()
460 460 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
461 461 entries = [mentry] + entries
462 462 entries_len = len(entries)
463 463 ids, strs = '', ''
464 464 for e in entries:
465 465 # For each string, we need size and file offset. Each string is
466 466 # NUL terminated; the NUL does not count into the size.
467 467 msgid = ''
468 468 if e.msgctxt:
469 469 # Contexts are stored by storing the concatenation of the
470 470 # context, a <EOT> byte, and the original string
471 471 msgid = self._encode(e.msgctxt + '\4')
472 472 if e.msgid_plural:
473 473 indexes = e.msgstr_plural.keys()
474 474 indexes.sort()
475 475 msgstr = []
476 476 for index in indexes:
477 477 msgstr.append(e.msgstr_plural[index])
478 478 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
479 479 msgstr = self._encode('\0'.join(msgstr))
480 480 else:
481 481 msgid += self._encode(e.msgid)
482 482 msgstr = self._encode(e.msgstr)
483 483 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
484 484 ids += msgid + '\0'
485 485 strs += msgstr + '\0'
486 486
487 487 # The header is 7 32-bit unsigned integers.
488 488 keystart = 7*4+16*entries_len
489 489 # and the values start after the keys
490 490 valuestart = keystart + len(ids)
491 491 koffsets = []
492 492 voffsets = []
493 493 # The string table first has the list of keys, then the list of values.
494 494 # Each entry has first the size of the string, then the file offset.
495 495 for o1, l1, o2, l2 in offsets:
496 496 koffsets += [l1, o1+keystart]
497 497 voffsets += [l2, o2+valuestart]
498 498 offsets = koffsets + voffsets
499 499 # check endianness for magic number
500 500 if struct.pack('@h', 1) == struct.pack('<h', 1):
501 501 magic_number = MOFile.LITTLE_ENDIAN
502 502 else:
503 503 magic_number = MOFile.BIG_ENDIAN
504 504
505 505 output = struct.pack(
506 506 "Iiiiiii",
507 507 magic_number, # Magic number
508 508 0, # Version
509 509 entries_len, # # of entries
510 510 7*4, # start of key index
511 511 7*4+entries_len*8, # start of value index
512 512 0, keystart # size and offset of hash table
513 513 # Important: we don't use hash tables
514 514 )
515 515 output += array.array("i", offsets).tostring()
516 516 output += ids
517 517 output += strs
518 518 return output
519 519
520 520 def _encode(self, mixed):
521 521 """
522 522 Encodes the given ``mixed`` argument with the file encoding if and
523 523 only if it's an unicode string and returns the encoded string.
524 524 """
525 525 if type(mixed) == types.UnicodeType:
526 526 return mixed.encode(self.encoding)
527 527 return mixed
528 528
529 529 # }}}
530 530 # class POFile {{{
531 531
532 532 class POFile(_BaseFile):
533 533 """
534 534 Po (or Pot) file reader/writer.
535 535 This class inherits the :class:`~polib._BaseFile` class and, by extension,
536 536 the python ``list`` type.
537 537 """
538 538
539 539 def __unicode__(self):
540 540 """
541 541 Returns the unicode representation of the po file.
542 542 """
543 543 ret, headers = '', self.header.split('\n')
544 544 for header in headers:
545 545 if header[:1] in [',', ':']:
546 546 ret += '#%s\n' % header
547 547 else:
548 548 ret += '# %s\n' % header
549 549
550 550 if type(ret) != types.UnicodeType:
551 551 ret = unicode(ret, self.encoding)
552 552
553 553 return ret + _BaseFile.__unicode__(self)
554 554
555 555 def save_as_mofile(self, fpath):
556 556 """
557 557 Saves the binary representation of the file to given ``fpath``.
558 558
559 559 Keyword argument:
560 560
561 561 ``fpath``
562 562 string, full or relative path to the mo file.
563 563 """
564 564 _BaseFile.save(self, fpath, 'to_binary')
565 565
566 566 def percent_translated(self):
567 567 """
568 568 Convenience method that returns the percentage of translated
569 569 messages.
570 570 """
571 571 total = len([e for e in self if not e.obsolete])
572 572 if total == 0:
573 573 return 100
574 574 translated = len(self.translated_entries())
575 575 return int((100.00 / float(total)) * translated)
576 576
577 577 def translated_entries(self):
578 578 """
579 579 Convenience method that returns the list of translated entries.
580 580 """
581 581 return [e for e in self if e.translated()]
582 582
583 583 def untranslated_entries(self):
584 584 """
585 585 Convenience method that returns the list of untranslated entries.
586 586 """
587 587 return [e for e in self if not e.translated() and not e.obsolete \
588 588 and not 'fuzzy' in e.flags]
589 589
590 590 def fuzzy_entries(self):
591 591 """
592 592 Convenience method that returns the list of fuzzy entries.
593 593 """
594 594 return [e for e in self if 'fuzzy' in e.flags]
595 595
596 596 def obsolete_entries(self):
597 597 """
598 598 Convenience method that returns the list of obsolete entries.
599 599 """
600 600 return [e for e in self if e.obsolete]
601 601
602 602 def merge(self, refpot):
603 603 """
604 604 Convenience method that merges the current pofile with the pot file
605 605 provided. It behaves exactly as the gettext msgmerge utility:
606 606
607 607 * comments of this file will be preserved, but extracted comments and
608 608 occurrences will be discarded;
609 609 * any translations or comments in the file will be discarded, however,
610 610 dot comments and file positions will be preserved;
611 611 * the fuzzy flags are preserved.
612 612
613 613 Keyword argument:
614 614
615 615 ``refpot``
616 616 object POFile, the reference catalog.
617 617 """
618 618 for entry in refpot:
619 619 e = self.find(entry.msgid, include_obsolete_entries=True)
620 620 if e is None:
621 621 e = POEntry()
622 622 self.append(e)
623 623 e.merge(entry)
624 624 # ok, now we must "obsolete" entries that are not in the refpot anymore
625 625 for entry in self:
626 626 if refpot.find(entry.msgid) is None:
627 627 entry.obsolete = True
628 628
629 629 # }}}
630 630 # class MOFile {{{
631 631
632 632 class MOFile(_BaseFile):
633 633 """
634 634 Mo file reader/writer.
635 635 This class inherits the :class:`~polib._BaseFile` class and, by
636 636 extension, the python ``list`` type.
637 637 """
638 638 BIG_ENDIAN = 0xde120495
639 639 LITTLE_ENDIAN = 0x950412de
640 640
641 641 def __init__(self, *args, **kwargs):
642 642 """
643 643 Constructor, accepts all keywords arguments accepted by
644 644 :class:`~polib._BaseFile` class.
645 645 """
646 646 _BaseFile.__init__(self, *args, **kwargs)
647 647 self.magic_number = None
648 648 self.version = 0
649 649
650 650 def save_as_pofile(self, fpath):
651 651 """
652 652 Saves the mofile as a pofile to ``fpath``.
653 653
654 654 Keyword argument:
655 655
656 656 ``fpath``
657 657 string, full or relative path to the file.
658 658 """
659 659 _BaseFile.save(self, fpath)
660 660
661 661 def save(self, fpath=None):
662 662 """
663 663 Saves the mofile to ``fpath``.
664 664
665 665 Keyword argument:
666 666
667 667 ``fpath``
668 668 string, full or relative path to the file.
669 669 """
670 670 _BaseFile.save(self, fpath, 'to_binary')
671 671
672 672 def percent_translated(self):
673 673 """
674 674 Convenience method to keep the same interface with POFile instances.
675 675 """
676 676 return 100
677 677
678 678 def translated_entries(self):
679 679 """
680 680 Convenience method to keep the same interface with POFile instances.
681 681 """
682 682 return self
683 683
684 684 def untranslated_entries(self):
685 685 """
686 686 Convenience method to keep the same interface with POFile instances.
687 687 """
688 688 return []
689 689
690 690 def fuzzy_entries(self):
691 691 """
692 692 Convenience method to keep the same interface with POFile instances.
693 693 """
694 694 return []
695 695
696 696 def obsolete_entries(self):
697 697 """
698 698 Convenience method to keep the same interface with POFile instances.
699 699 """
700 700 return []
701 701
702 702 # }}}
703 703 # class _BaseEntry {{{
704 704
705 705 class _BaseEntry(object):
706 706 """
707 707 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
708 708 This class should **not** be instanciated directly.
709 709 """
710 710
711 711 def __init__(self, *args, **kwargs):
712 712 """
713 713 Constructor, accepts the following keyword arguments:
714 714
715 715 ``msgid``
716 716 string, the entry msgid.
717 717
718 718 ``msgstr``
719 719 string, the entry msgstr.
720 720
721 721 ``msgid_plural``
722 722 string, the entry msgid_plural.
723 723
724 724 ``msgstr_plural``
725 725 list, the entry msgstr_plural lines.
726 726
727 727 ``msgctxt``
728 728 string, the entry context (msgctxt).
729 729
730 730 ``obsolete``
731 731 bool, whether the entry is "obsolete" or not.
732 732
733 733 ``encoding``
734 734 string, the encoding to use, defaults to ``default_encoding``
735 735 global variable (optional).
736 736 """
737 737 self.msgid = kwargs.get('msgid', '')
738 738 self.msgstr = kwargs.get('msgstr', '')
739 739 self.msgid_plural = kwargs.get('msgid_plural', '')
740 740 self.msgstr_plural = kwargs.get('msgstr_plural', {})
741 741 self.msgctxt = kwargs.get('msgctxt', None)
742 742 self.obsolete = kwargs.get('obsolete', False)
743 743 self.encoding = kwargs.get('encoding', default_encoding)
744 744
745 745 def __unicode__(self, wrapwidth=78):
746 746 """
747 747 Returns the unicode representation of the entry.
748 748 """
749 749 if self.obsolete:
750 750 delflag = '#~ '
751 751 else:
752 752 delflag = ''
753 753 ret = []
754 754 # write the msgctxt if any
755 755 if self.msgctxt is not None:
756 756 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
757 757 # write the msgid
758 758 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
759 759 # write the msgid_plural if any
760 760 if self.msgid_plural:
761 761 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
762 762 if self.msgstr_plural:
763 763 # write the msgstr_plural if any
764 764 msgstrs = self.msgstr_plural
765 765 keys = list(msgstrs)
766 766 keys.sort()
767 767 for index in keys:
768 768 msgstr = msgstrs[index]
769 769 plural_index = '[%s]' % index
770 770 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
771 771 else:
772 772 # otherwise write the msgstr
773 773 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
774 774 ret.append('')
775 775 ret = '\n'.join(ret)
776 776
777 777 if type(ret) != types.UnicodeType:
778 778 return unicode(ret, self.encoding)
779 779 return ret
780 780
781 781 def __str__(self):
782 782 """
783 783 Returns the string representation of the entry.
784 784 """
785 785 return unicode(self).encode(self.encoding)
786 786
787 787 def __eq__(self, other):
788 788 return unicode(self) == unicode(other)
789 789
790 790 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
791 791 lines = field.splitlines(True)
792 792 if len(lines) > 1:
793 793 lines = [''] + lines # start with initial empty line
794 794 else:
795 795 escaped_field = escape(field)
796 796 specialchars_count = 0
797 797 for c in ['\\', '\n', '\r', '\t', '"']:
798 798 specialchars_count += field.count(c)
799 799 # comparison must take into account fieldname length + one space
800 800 # + 2 quotes (eg. msgid "<string>")
801 801 flength = len(fieldname) + 3
802 802 if plural_index:
803 803 flength += len(plural_index)
804 804 real_wrapwidth = wrapwidth - flength + specialchars_count
805 805 if wrapwidth > 0 and len(field) > real_wrapwidth:
806 806 # Wrap the line but take field name into account
807 lines = [''] + [unescape(item) for item in wrap(
807 lines = [''] + [unescape(item) for item in textwrap.wrap(
808 808 escaped_field,
809 809 wrapwidth - 2, # 2 for quotes ""
810 810 drop_whitespace=False,
811 811 break_long_words=False
812 812 )]
813 813 else:
814 814 lines = [field]
815 815 if fieldname.startswith('previous_'):
816 816 # quick and dirty trick to get the real field name
817 817 fieldname = fieldname[9:]
818 818
819 819 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
820 820 escape(lines.pop(0)))]
821 821 for mstr in lines:
822 822 ret.append('%s"%s"' % (delflag, escape(mstr)))
823 823 return ret
824 824
825 825 # }}}
826 826 # class POEntry {{{
827 827
828 828 class POEntry(_BaseEntry):
829 829 """
830 830 Represents a po file entry.
831 831 """
832 832
833 833 def __init__(self, *args, **kwargs):
834 834 """
835 835 Constructor, accepts the following keyword arguments:
836 836
837 837 ``comment``
838 838 string, the entry comment.
839 839
840 840 ``tcomment``
841 841 string, the entry translator comment.
842 842
843 843 ``occurrences``
844 844 list, the entry occurrences.
845 845
846 846 ``flags``
847 847 list, the entry flags.
848 848
849 849 ``previous_msgctxt``
850 850 string, the entry previous context.
851 851
852 852 ``previous_msgid``
853 853 string, the entry previous msgid.
854 854
855 855 ``previous_msgid_plural``
856 856 string, the entry previous msgid_plural.
857 857 """
858 858 _BaseEntry.__init__(self, *args, **kwargs)
859 859 self.comment = kwargs.get('comment', '')
860 860 self.tcomment = kwargs.get('tcomment', '')
861 861 self.occurrences = kwargs.get('occurrences', [])
862 862 self.flags = kwargs.get('flags', [])
863 863 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
864 864 self.previous_msgid = kwargs.get('previous_msgid', None)
865 865 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
866 866
867 867 def __unicode__(self, wrapwidth=78):
868 868 """
869 869 Returns the unicode representation of the entry.
870 870 """
871 871 if self.obsolete:
872 872 return _BaseEntry.__unicode__(self, wrapwidth)
873 873
874 874 ret = []
875 875 # comments first, if any (with text wrapping as xgettext does)
876 876 comments = [('comment', '#. '), ('tcomment', '# ')]
877 877 for c in comments:
878 878 val = getattr(self, c[0])
879 879 if val:
880 880 for comment in val.split('\n'):
881 881 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
882 ret += wrap(
882 ret += textwrap.wrap(
883 883 comment,
884 884 wrapwidth,
885 885 initial_indent=c[1],
886 886 subsequent_indent=c[1],
887 887 break_long_words=False
888 888 )
889 889 else:
890 890 ret.append('%s%s' % (c[1], comment))
891 891
892 892 # occurrences (with text wrapping as xgettext does)
893 893 if self.occurrences:
894 894 filelist = []
895 895 for fpath, lineno in self.occurrences:
896 896 if lineno:
897 897 filelist.append('%s:%s' % (fpath, lineno))
898 898 else:
899 899 filelist.append(fpath)
900 900 filestr = ' '.join(filelist)
901 901 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
902 902 # textwrap split words that contain hyphen, this is not
903 903 # what we want for filenames, so the dirty hack is to
904 904 # temporally replace hyphens with a char that a file cannot
905 905 # contain, like "*"
906 ret += [l.replace('*', '-') for l in wrap(
906 ret += [l.replace('*', '-') for l in textwrap.wrap(
907 907 filestr.replace('-', '*'),
908 908 wrapwidth,
909 909 initial_indent='#: ',
910 910 subsequent_indent='#: ',
911 911 break_long_words=False
912 912 )]
913 913 else:
914 914 ret.append('#: ' + filestr)
915 915
916 916 # flags (TODO: wrapping ?)
917 917 if self.flags:
918 918 ret.append('#, %s' % ', '.join(self.flags))
919 919
920 920 # previous context and previous msgid/msgid_plural
921 921 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
922 922 for f in fields:
923 923 val = getattr(self, f)
924 924 if val:
925 925 ret += self._str_field(f, "#| ", "", val, wrapwidth)
926 926
927 927 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
928 928 ret = '\n'.join(ret)
929 929
930 930 if type(ret) != types.UnicodeType:
931 931 return unicode(ret, self.encoding)
932 932 return ret
933 933
934 934 def __cmp__(self, other):
935 935 """
936 936 Called by comparison operations if rich comparison is not defined.
937 937 """
938 938 def compare_occurrences(a, b):
939 939 """
940 940 Compare an entry occurrence with another one.
941 941 """
942 942 if a[0] != b[0]:
943 943 return a[0] < b[0]
944 944 if a[1] != b[1]:
945 945 return a[1] < b[1]
946 946 return 0
947 947
948 948 # First: Obsolete test
949 949 if self.obsolete != other.obsolete:
950 950 if self.obsolete:
951 951 return -1
952 952 else:
953 953 return 1
954 954 # Work on a copy to protect original
955 955 occ1 = self.occurrences[:]
956 956 occ2 = other.occurrences[:]
957 957 # Sorting using compare method
958 958 occ1.sort(compare_occurrences)
959 959 occ2.sort(compare_occurrences)
960 960 # Comparing sorted occurrences
961 961 pos = 0
962 962 for entry1 in occ1:
963 963 try:
964 964 entry2 = occ2[pos]
965 965 except IndexError:
966 966 return 1
967 967 pos = pos + 1
968 968 if entry1[0] != entry2[0]:
969 969 if entry1[0] > entry2[0]:
970 970 return 1
971 971 else:
972 972 return -1
973 973 if entry1[1] != entry2[1]:
974 974 if entry1[1] > entry2[1]:
975 975 return 1
976 976 else:
977 977 return -1
978 978 # Finally: Compare message ID
979 979 if self.msgid > other.msgid: return 1
980 980 else: return -1
981 981
982 982 def translated(self):
983 983 """
984 984 Returns ``True`` if the entry has been translated or ``False``
985 985 otherwise.
986 986 """
987 987 if self.obsolete or 'fuzzy' in self.flags:
988 988 return False
989 989 if self.msgstr != '':
990 990 return True
991 991 if self.msgstr_plural:
992 992 for pos in self.msgstr_plural:
993 993 if self.msgstr_plural[pos] == '':
994 994 return False
995 995 return True
996 996 return False
997 997
998 998 def merge(self, other):
999 999 """
1000 1000 Merge the current entry with the given pot entry.
1001 1001 """
1002 1002 self.msgid = other.msgid
1003 1003 self.msgctxt = other.msgctxt
1004 1004 self.occurrences = other.occurrences
1005 1005 self.comment = other.comment
1006 1006 fuzzy = 'fuzzy' in self.flags
1007 1007 self.flags = other.flags[:] # clone flags
1008 1008 if fuzzy:
1009 1009 self.flags.append('fuzzy')
1010 1010 self.msgid_plural = other.msgid_plural
1011 1011 self.obsolete = other.obsolete
1012 1012 self.previous_msgctxt = other.previous_msgctxt
1013 1013 self.previous_msgid = other.previous_msgid
1014 1014 self.previous_msgid_plural = other.previous_msgid_plural
1015 1015 if other.msgstr_plural:
1016 1016 for pos in other.msgstr_plural:
1017 1017 try:
1018 1018 # keep existing translation at pos if any
1019 1019 self.msgstr_plural[pos]
1020 1020 except KeyError:
1021 1021 self.msgstr_plural[pos] = ''
1022 1022
1023 1023 # }}}
1024 1024 # class MOEntry {{{
1025 1025
1026 1026 class MOEntry(_BaseEntry):
1027 1027 """
1028 1028 Represents a mo file entry.
1029 1029 """
1030 1030 pass
1031 1031
1032 1032 # }}}
1033 1033 # class _POFileParser {{{
1034 1034
1035 1035 class _POFileParser(object):
1036 1036 """
1037 1037 A finite state machine to parse efficiently and correctly po
1038 1038 file format.
1039 1039 """
1040 1040
1041 1041 def __init__(self, pofile, *args, **kwargs):
1042 1042 """
1043 1043 Constructor.
1044 1044
1045 1045 Keyword arguments:
1046 1046
1047 1047 ``pofile``
1048 1048 string, path to the po file or its content
1049 1049
1050 1050 ``encoding``
1051 1051 string, the encoding to use, defaults to ``default_encoding``
1052 1052 global variable (optional).
1053 1053
1054 1054 ``check_for_duplicates``
1055 1055 whether to check for duplicate entries when adding entries to the
1056 1056 file (optional, default: ``False``).
1057 1057 """
1058 1058 enc = kwargs.get('encoding', default_encoding)
1059 1059 if os.path.exists(pofile):
1060 1060 try:
1061 1061 self.fhandle = codecs.open(pofile, 'rU', enc)
1062 1062 except LookupError:
1063 1063 enc = default_encoding
1064 1064 self.fhandle = codecs.open(pofile, 'rU', enc)
1065 1065 else:
1066 1066 self.fhandle = pofile.splitlines()
1067 1067
1068 1068 self.instance = POFile(
1069 1069 pofile=pofile,
1070 1070 encoding=enc,
1071 1071 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1072 1072 )
1073 1073 self.transitions = {}
1074 1074 self.current_entry = POEntry()
1075 1075 self.current_state = 'ST'
1076 1076 self.current_token = None
1077 1077 # two memo flags used in handlers
1078 1078 self.msgstr_index = 0
1079 1079 self.entry_obsolete = 0
1080 1080 # Configure the state machine, by adding transitions.
1081 1081 # Signification of symbols:
1082 1082 # * ST: Beginning of the file (start)
1083 1083 # * HE: Header
1084 1084 # * TC: a translation comment
1085 1085 # * GC: a generated comment
1086 1086 # * OC: a file/line occurence
1087 1087 # * FL: a flags line
1088 1088 # * CT: a message context
1089 1089 # * PC: a previous msgctxt
1090 1090 # * PM: a previous msgid
1091 1091 # * PP: a previous msgid_plural
1092 1092 # * MI: a msgid
1093 1093 # * MP: a msgid plural
1094 1094 # * MS: a msgstr
1095 1095 # * MX: a msgstr plural
1096 1096 # * MC: a msgid or msgstr continuation line
1097 1097 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1098 1098 'MS', 'MP', 'MX', 'MI']
1099 1099
1100 1100 self.add('TC', ['ST', 'HE'], 'HE')
1101 1101 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1102 1102 'MP', 'MX', 'MI'], 'TC')
1103 1103 self.add('GC', all, 'GC')
1104 1104 self.add('OC', all, 'OC')
1105 1105 self.add('FL', all, 'FL')
1106 1106 self.add('PC', all, 'PC')
1107 1107 self.add('PM', all, 'PM')
1108 1108 self.add('PP', all, 'PP')
1109 1109 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1110 1110 'PP', 'MS', 'MX'], 'CT')
1111 1111 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1112 1112 'PM', 'PP', 'MS', 'MX'], 'MI')
1113 1113 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1114 1114 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1115 1115 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1116 1116 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1117 1117
1118 1118 def parse(self):
1119 1119 """
1120 1120 Run the state machine, parse the file line by line and call process()
1121 1121 with the current matched symbol.
1122 1122 """
1123 1123 i = 0
1124 1124
1125 1125 keywords = {
1126 1126 'msgctxt': 'CT',
1127 1127 'msgid': 'MI',
1128 1128 'msgstr': 'MS',
1129 1129 'msgid_plural': 'MP',
1130 1130 }
1131 1131 prev_keywords = {
1132 1132 'msgid_plural': 'PP',
1133 1133 'msgid': 'PM',
1134 1134 'msgctxt': 'PC',
1135 1135 }
1136 1136
1137 1137 for line in self.fhandle:
1138 1138 i += 1
1139 1139 line = line.strip()
1140 1140 if line == '':
1141 1141 continue
1142 1142
1143 1143 tokens = line.split(None, 2)
1144 1144 nb_tokens = len(tokens)
1145 1145
1146 1146 if tokens[0] == '#~' and nb_tokens > 1:
1147 1147 line = line[3:].strip()
1148 1148 tokens = tokens[1:]
1149 1149 nb_tokens -= 1
1150 1150 self.entry_obsolete = 1
1151 1151 else:
1152 1152 self.entry_obsolete = 0
1153 1153
1154 1154 # Take care of keywords like
1155 1155 # msgid, msgid_plural, msgctxt & msgstr.
1156 1156 if tokens[0] in keywords and nb_tokens > 1:
1157 1157 line = line[len(tokens[0]):].lstrip()
1158 1158 self.current_token = line
1159 1159 self.process(keywords[tokens[0]], i)
1160 1160 continue
1161 1161
1162 1162 self.current_token = line
1163 1163
1164 1164 if tokens[0] == '#:' and nb_tokens > 1:
1165 1165 # we are on a occurrences line
1166 1166 self.process('OC', i)
1167 1167
1168 1168 elif line[:1] == '"':
1169 1169 # we are on a continuation line
1170 1170 self.process('MC', i)
1171 1171
1172 1172 elif line[:7] == 'msgstr[':
1173 1173 # we are on a msgstr plural
1174 1174 self.process('MX', i)
1175 1175
1176 1176 elif tokens[0] == '#,' and nb_tokens > 1:
1177 1177 # we are on a flags line
1178 1178 self.process('FL', i)
1179 1179
1180 1180 elif tokens[0] == '#':
1181 1181 if line == '#': line += ' '
1182 1182 # we are on a translator comment line
1183 1183 self.process('TC', i)
1184 1184
1185 1185 elif tokens[0] == '#.' and nb_tokens > 1:
1186 1186 # we are on a generated comment line
1187 1187 self.process('GC', i)
1188 1188
1189 1189 elif tokens[0] == '#|':
1190 1190 if nb_tokens < 2:
1191 1191 self.process('??', i)
1192 1192 continue
1193 1193
1194 1194 # Remove the marker and any whitespace right after that.
1195 1195 line = line[2:].lstrip()
1196 1196 self.current_token = line
1197 1197
1198 1198 if tokens[1].startswith('"'):
1199 1199 # Continuation of previous metadata.
1200 1200 self.process('MC', i)
1201 1201 continue
1202 1202
1203 1203 if nb_tokens == 2:
1204 1204 # Invalid continuation line.
1205 1205 self.process('??', i)
1206 1206
1207 1207 # we are on a "previous translation" comment line,
1208 1208 if tokens[1] not in prev_keywords:
1209 1209 # Unknown keyword in previous translation comment.
1210 1210 self.process('??', i)
1211 1211
1212 1212 # Remove the keyword and any whitespace
1213 1213 # between it and the starting quote.
1214 1214 line = line[len(tokens[1]):].lstrip()
1215 1215 self.current_token = line
1216 1216 self.process(prev_keywords[tokens[1]], i)
1217 1217
1218 1218 else:
1219 1219 self.process('??', i)
1220 1220
1221 1221 if self.current_entry:
1222 1222 # since entries are added when another entry is found, we must add
1223 1223 # the last entry here (only if there are lines)
1224 1224 self.instance.append(self.current_entry)
1225 1225 # before returning the instance, check if there's metadata and if
1226 1226 # so extract it in a dict
1227 1227 firstentry = self.instance[0]
1228 1228 if firstentry.msgid == '': # metadata found
1229 1229 # remove the entry
1230 1230 firstentry = self.instance.pop(0)
1231 1231 self.instance.metadata_is_fuzzy = firstentry.flags
1232 1232 key = None
1233 1233 for msg in firstentry.msgstr.splitlines():
1234 1234 try:
1235 1235 key, val = msg.split(':', 1)
1236 1236 self.instance.metadata[key] = val.strip()
1237 1237 except:
1238 1238 if key is not None:
1239 1239 self.instance.metadata[key] += '\n'+ msg.strip()
1240 1240 # close opened file
1241 1241 if isinstance(self.fhandle, file):
1242 1242 self.fhandle.close()
1243 1243 return self.instance
1244 1244
1245 1245 def add(self, symbol, states, next_state):
1246 1246 """
1247 1247 Add a transition to the state machine.
1248 1248
1249 1249 Keywords arguments:
1250 1250
1251 1251 ``symbol``
1252 1252 string, the matched token (two chars symbol).
1253 1253
1254 1254 ``states``
1255 1255 list, a list of states (two chars symbols).
1256 1256
1257 1257 ``next_state``
1258 1258 the next state the fsm will have after the action.
1259 1259 """
1260 1260 for state in states:
1261 1261 action = getattr(self, 'handle_%s' % next_state.lower())
1262 1262 self.transitions[(symbol, state)] = (action, next_state)
1263 1263
1264 1264 def process(self, symbol, linenum):
1265 1265 """
1266 1266 Process the transition corresponding to the current state and the
1267 1267 symbol provided.
1268 1268
1269 1269 Keywords arguments:
1270 1270
1271 1271 ``symbol``
1272 1272 string, the matched token (two chars symbol).
1273 1273
1274 1274 ``linenum``
1275 1275 integer, the current line number of the parsed file.
1276 1276 """
1277 1277 try:
1278 1278 (action, state) = self.transitions[(symbol, self.current_state)]
1279 1279 if action():
1280 1280 self.current_state = state
1281 1281 except Exception as exc:
1282 1282 raise IOError('Syntax error in po file (line %s)' % linenum)
1283 1283
1284 1284 # state handlers
1285 1285
1286 1286 def handle_he(self):
1287 1287 """Handle a header comment."""
1288 1288 if self.instance.header != '':
1289 1289 self.instance.header += '\n'
1290 1290 self.instance.header += self.current_token[2:]
1291 1291 return 1
1292 1292
1293 1293 def handle_tc(self):
1294 1294 """Handle a translator comment."""
1295 1295 if self.current_state in ['MC', 'MS', 'MX']:
1296 1296 self.instance.append(self.current_entry)
1297 1297 self.current_entry = POEntry()
1298 1298 if self.current_entry.tcomment != '':
1299 1299 self.current_entry.tcomment += '\n'
1300 1300 self.current_entry.tcomment += self.current_token[2:]
1301 1301 return True
1302 1302
1303 1303 def handle_gc(self):
1304 1304 """Handle a generated comment."""
1305 1305 if self.current_state in ['MC', 'MS', 'MX']:
1306 1306 self.instance.append(self.current_entry)
1307 1307 self.current_entry = POEntry()
1308 1308 if self.current_entry.comment != '':
1309 1309 self.current_entry.comment += '\n'
1310 1310 self.current_entry.comment += self.current_token[3:]
1311 1311 return True
1312 1312
1313 1313 def handle_oc(self):
1314 1314 """Handle a file:num occurence."""
1315 1315 if self.current_state in ['MC', 'MS', 'MX']:
1316 1316 self.instance.append(self.current_entry)
1317 1317 self.current_entry = POEntry()
1318 1318 occurrences = self.current_token[3:].split()
1319 1319 for occurrence in occurrences:
1320 1320 if occurrence != '':
1321 1321 try:
1322 1322 fil, line = occurrence.split(':')
1323 1323 if not line.isdigit():
1324 1324 fil = fil + line
1325 1325 line = ''
1326 1326 self.current_entry.occurrences.append((fil, line))
1327 1327 except:
1328 1328 self.current_entry.occurrences.append((occurrence, ''))
1329 1329 return True
1330 1330
1331 1331 def handle_fl(self):
1332 1332 """Handle a flags line."""
1333 1333 if self.current_state in ['MC', 'MS', 'MX']:
1334 1334 self.instance.append(self.current_entry)
1335 1335 self.current_entry = POEntry()
1336 1336 self.current_entry.flags += self.current_token[3:].split(', ')
1337 1337 return True
1338 1338
1339 1339 def handle_pp(self):
1340 1340 """Handle a previous msgid_plural line."""
1341 1341 if self.current_state in ['MC', 'MS', 'MX']:
1342 1342 self.instance.append(self.current_entry)
1343 1343 self.current_entry = POEntry()
1344 1344 self.current_entry.previous_msgid_plural = \
1345 1345 unescape(self.current_token[1:-1])
1346 1346 return True
1347 1347
1348 1348 def handle_pm(self):
1349 1349 """Handle a previous msgid line."""
1350 1350 if self.current_state in ['MC', 'MS', 'MX']:
1351 1351 self.instance.append(self.current_entry)
1352 1352 self.current_entry = POEntry()
1353 1353 self.current_entry.previous_msgid = \
1354 1354 unescape(self.current_token[1:-1])
1355 1355 return True
1356 1356
1357 1357 def handle_pc(self):
1358 1358 """Handle a previous msgctxt line."""
1359 1359 if self.current_state in ['MC', 'MS', 'MX']:
1360 1360 self.instance.append(self.current_entry)
1361 1361 self.current_entry = POEntry()
1362 1362 self.current_entry.previous_msgctxt = \
1363 1363 unescape(self.current_token[1:-1])
1364 1364 return True
1365 1365
1366 1366 def handle_ct(self):
1367 1367 """Handle a msgctxt."""
1368 1368 if self.current_state in ['MC', 'MS', 'MX']:
1369 1369 self.instance.append(self.current_entry)
1370 1370 self.current_entry = POEntry()
1371 1371 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1372 1372 return True
1373 1373
1374 1374 def handle_mi(self):
1375 1375 """Handle a msgid."""
1376 1376 if self.current_state in ['MC', 'MS', 'MX']:
1377 1377 self.instance.append(self.current_entry)
1378 1378 self.current_entry = POEntry()
1379 1379 self.current_entry.obsolete = self.entry_obsolete
1380 1380 self.current_entry.msgid = unescape(self.current_token[1:-1])
1381 1381 return True
1382 1382
1383 1383 def handle_mp(self):
1384 1384 """Handle a msgid plural."""
1385 1385 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1386 1386 return True
1387 1387
1388 1388 def handle_ms(self):
1389 1389 """Handle a msgstr."""
1390 1390 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1391 1391 return True
1392 1392
1393 1393 def handle_mx(self):
1394 1394 """Handle a msgstr plural."""
1395 1395 index, value = self.current_token[7], self.current_token[11:-1]
1396 1396 self.current_entry.msgstr_plural[index] = unescape(value)
1397 1397 self.msgstr_index = index
1398 1398 return True
1399 1399
1400 1400 def handle_mc(self):
1401 1401 """Handle a msgid or msgstr continuation line."""
1402 1402 token = unescape(self.current_token[1:-1])
1403 1403 if self.current_state == 'CT':
1404 1404 typ = 'msgctxt'
1405 1405 self.current_entry.msgctxt += token
1406 1406 elif self.current_state == 'MI':
1407 1407 typ = 'msgid'
1408 1408 self.current_entry.msgid += token
1409 1409 elif self.current_state == 'MP':
1410 1410 typ = 'msgid_plural'
1411 1411 self.current_entry.msgid_plural += token
1412 1412 elif self.current_state == 'MS':
1413 1413 typ = 'msgstr'
1414 1414 self.current_entry.msgstr += token
1415 1415 elif self.current_state == 'MX':
1416 1416 typ = 'msgstr[%s]' % self.msgstr_index
1417 1417 self.current_entry.msgstr_plural[self.msgstr_index] += token
1418 1418 elif self.current_state == 'PP':
1419 1419 typ = 'previous_msgid_plural'
1420 1420 token = token[3:]
1421 1421 self.current_entry.previous_msgid_plural += token
1422 1422 elif self.current_state == 'PM':
1423 1423 typ = 'previous_msgid'
1424 1424 token = token[3:]
1425 1425 self.current_entry.previous_msgid += token
1426 1426 elif self.current_state == 'PC':
1427 1427 typ = 'previous_msgctxt'
1428 1428 token = token[3:]
1429 1429 self.current_entry.previous_msgctxt += token
1430 1430 # don't change the current state
1431 1431 return False
1432 1432
1433 1433 # }}}
1434 1434 # class _MOFileParser {{{
1435 1435
1436 1436 class _MOFileParser(object):
1437 1437 """
1438 1438 A class to parse binary mo files.
1439 1439 """
1440 1440
1441 1441 def __init__(self, mofile, *args, **kwargs):
1442 1442 """
1443 1443 Constructor.
1444 1444
1445 1445 Keyword arguments:
1446 1446
1447 1447 ``mofile``
1448 1448 string, path to the mo file or its content
1449 1449
1450 1450 ``encoding``
1451 1451 string, the encoding to use, defaults to ``default_encoding``
1452 1452 global variable (optional).
1453 1453
1454 1454 ``check_for_duplicates``
1455 1455 whether to check for duplicate entries when adding entries to the
1456 1456 file (optional, default: ``False``).
1457 1457 """
1458 1458 self.fhandle = open(mofile, 'rb')
1459 1459 self.instance = MOFile(
1460 1460 fpath=mofile,
1461 1461 encoding=kwargs.get('encoding', default_encoding),
1462 1462 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1463 1463 )
1464 1464
1465 1465 def parse(self):
1466 1466 """
1467 1467 Build the instance with the file handle provided in the
1468 1468 constructor.
1469 1469 """
1470 1470 # parse magic number
1471 1471 magic_number = self._readbinary('<I', 4)
1472 1472 if magic_number == MOFile.LITTLE_ENDIAN:
1473 1473 ii = '<II'
1474 1474 elif magic_number == MOFile.BIG_ENDIAN:
1475 1475 ii = '>II'
1476 1476 else:
1477 1477 raise IOError('Invalid mo file, magic number is incorrect !')
1478 1478 self.instance.magic_number = magic_number
1479 1479 # parse the version number and the number of strings
1480 1480 self.instance.version, numofstrings = self._readbinary(ii, 8)
1481 1481 # original strings and translation strings hash table offset
1482 1482 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1483 1483 # move to msgid hash table and read length and offset of msgids
1484 1484 self.fhandle.seek(msgids_hash_offset)
1485 1485 msgids_index = []
1486 1486 for i in range(numofstrings):
1487 1487 msgids_index.append(self._readbinary(ii, 8))
1488 1488 # move to msgstr hash table and read length and offset of msgstrs
1489 1489 self.fhandle.seek(msgstrs_hash_offset)
1490 1490 msgstrs_index = []
1491 1491 for i in range(numofstrings):
1492 1492 msgstrs_index.append(self._readbinary(ii, 8))
1493 1493 # build entries
1494 1494 for i in range(numofstrings):
1495 1495 self.fhandle.seek(msgids_index[i][1])
1496 1496 msgid = self.fhandle.read(msgids_index[i][0])
1497 1497 self.fhandle.seek(msgstrs_index[i][1])
1498 1498 msgstr = self.fhandle.read(msgstrs_index[i][0])
1499 1499 if i == 0: # metadata
1500 1500 raw_metadata, metadata = msgstr.split('\n'), {}
1501 1501 for line in raw_metadata:
1502 1502 tokens = line.split(':', 1)
1503 1503 if tokens[0] != '':
1504 1504 try:
1505 1505 metadata[tokens[0]] = tokens[1].strip()
1506 1506 except IndexError:
1507 1507 metadata[tokens[0]] = ''
1508 1508 self.instance.metadata = metadata
1509 1509 continue
1510 1510 # test if we have a plural entry
1511 1511 msgid_tokens = msgid.split('\0')
1512 1512 if len(msgid_tokens) > 1:
1513 1513 entry = self._build_entry(
1514 1514 msgid=msgid_tokens[0],
1515 1515 msgid_plural=msgid_tokens[1],
1516 1516 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1517 1517 )
1518 1518 else:
1519 1519 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1520 1520 self.instance.append(entry)
1521 1521 # close opened file
1522 1522 self.fhandle.close()
1523 1523 return self.instance
1524 1524
1525 1525 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1526 1526 msgstr_plural=None):
1527 1527 msgctxt_msgid = msgid.split('\x04')
1528 1528 if len(msgctxt_msgid) > 1:
1529 1529 kwargs = {
1530 1530 'msgctxt': msgctxt_msgid[0],
1531 1531 'msgid' : msgctxt_msgid[1],
1532 1532 }
1533 1533 else:
1534 1534 kwargs = {'msgid': msgid}
1535 1535 if msgstr:
1536 1536 kwargs['msgstr'] = msgstr
1537 1537 if msgid_plural:
1538 1538 kwargs['msgid_plural'] = msgid_plural
1539 1539 if msgstr_plural:
1540 1540 kwargs['msgstr_plural'] = msgstr_plural
1541 1541 return MOEntry(**kwargs)
1542 1542
1543 1543 def _readbinary(self, fmt, numbytes):
1544 1544 """
1545 1545 Private method that unpack n bytes of data using format <fmt>.
1546 1546 It returns a tuple or a mixed value if the tuple length is 1.
1547 1547 """
1548 1548 bytes = self.fhandle.read(numbytes)
1549 1549 tup = struct.unpack(fmt, bytes)
1550 1550 if len(tup) == 1:
1551 1551 return tup[0]
1552 1552 return tup
1553 1553
1554 1554 # }}}
1555 # class TextWrapper {{{
1556
1557 class TextWrapper(textwrap.TextWrapper):
1558 """
1559 Subclass of textwrap.TextWrapper that backport the
1560 drop_whitespace option.
1561 """
1562 def __init__(self, *args, **kwargs):
1563 drop_whitespace = kwargs.pop('drop_whitespace', True)
1564 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1565 self.drop_whitespace = drop_whitespace
1566
1567 def _wrap_chunks(self, chunks):
1568 """_wrap_chunks(chunks : [string]) -> [string]
1569
1570 Wrap a sequence of text chunks and return a list of lines of
1571 length 'self.width' or less. (If 'break_long_words' is false,
1572 some lines may be longer than this.) Chunks correspond roughly
1573 to words and the whitespace between them: each chunk is
1574 indivisible (modulo 'break_long_words'), but a line break can
1575 come between any two chunks. Chunks should not have internal
1576 whitespace; ie. a chunk is either all whitespace or a "word".
1577 Whitespace chunks will be removed from the beginning and end of
1578 lines, but apart from that whitespace is preserved.
1579 """
1580 lines = []
1581 if self.width <= 0:
1582 raise ValueError("invalid width %r (must be > 0)" % self.width)
1583
1584 # Arrange in reverse order so items can be efficiently popped
1585 # from a stack of chucks.
1586 chunks.reverse()
1587
1588 while chunks:
1589
1590 # Start the list of chunks that will make up the current line.
1591 # cur_len is just the length of all the chunks in cur_line.
1592 cur_line = []
1593 cur_len = 0
1594
1595 # Figure out which static string will prefix this line.
1596 if lines:
1597 indent = self.subsequent_indent
1598 else:
1599 indent = self.initial_indent
1600
1601 # Maximum width for this line.
1602 width = self.width - len(indent)
1603
1604 # First chunk on line is whitespace -- drop it, unless this
1605 # is the very beginning of the text (ie. no lines started yet).
1606 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1607 del chunks[-1]
1608
1609 while chunks:
1610 l = len(chunks[-1])
1611
1612 # Can at least squeeze this chunk onto the current line.
1613 if cur_len + l <= width:
1614 cur_line.append(chunks.pop())
1615 cur_len += l
1616
1617 # Nope, this line is full.
1618 else:
1619 break
1620
1621 # The current line is full, and the next chunk is too big to
1622 # fit on *any* line (not just this one).
1623 if chunks and len(chunks[-1]) > width:
1624 self._handle_long_word(chunks, cur_line, cur_len, width)
1625
1626 # If the last chunk on this line is all whitespace, drop it.
1627 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
1628 del cur_line[-1]
1629
1630 # Convert current line back to a string and store it in list
1631 # of all lines (return value).
1632 if cur_line:
1633 lines.append(indent + ''.join(cur_line))
1634
1635 return lines
1636
1637 # }}}
1638 # function wrap() {{{
1639
1640 def wrap(text, width=70, **kwargs):
1641 """
1642 Wrap a single paragraph of text, returning a list of wrapped lines.
1643 """
1644 if sys.version_info < (2, 6):
1645 return TextWrapper(width=width, **kwargs).wrap(text)
1646 return textwrap.wrap(text, width=width, **kwargs)
1647
1648 #}}}
General Comments 0
You need to be logged in to leave comments. Login now