##// END OF EJS Templates
polib: update to latest release 1.0.7 (upstream rev d75ce6dbbc2a)...
Augie Fackler -
r40221:19fc5a98 default
parent child Browse files
Show More
@@ -224,14 +224,6 be separated by whitespaces::
224 224 failures, tests = doctest.testmod()
225 225 sys.exit(failures and 1 or 0)
226 226
227 # replace polib._POFileParser to show linenum of problematic msgstr
228 class ExtPOFileParser(polib._POFileParser):
229 def process(self, symbol, linenum):
230 super(ExtPOFileParser, self).process(symbol, linenum)
231 if symbol == 'MS': # msgstr
232 self.current_entry.linenum = linenum
233 polib._POFileParser = ExtPOFileParser
234
235 227 detected = []
236 228 warning = options.warning
237 229 for f in args:
This diff has been collapsed as it changes many lines, (776 lines changed) Show them Hide them
@@ -1,5 +1,5
1 # -*- coding: utf-8 -*-
2 1 # no-check-code
2 # -* coding: utf-8 -*-
3 3 #
4 4 # License: MIT (see LICENSE file provided)
5 5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
@@ -16,9 +16,9 modify entries, comments or metadata, et
16 16 from __future__ import absolute_import
17 17
18 18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
19 __version__ = '0.6.4'
19 __version__ = '1.0.7'
20 20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
21 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
21 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
22 22
23 23 import array
24 24 import codecs
@@ -27,14 +27,47 import re
27 27 import struct
28 28 import sys
29 29 import textwrap
30 import types
30
31 try:
32 import io
33 except ImportError:
34 # replacement of io.open() for python < 2.6
35 # we use codecs instead
36 class io(object):
37 @staticmethod
38 def open(fpath, mode='r', encoding=None):
39 return codecs.open(fpath, mode, encoding)
31 40
32 41
33 42 # the default encoding to use when encoding cannot be detected
34 43 default_encoding = 'utf-8'
35 44
45 # python 2/3 compatibility helpers {{{
46
47
48 if sys.version_info[:2] < (3, 0):
49 PY3 = False
50 text_type = unicode
51
52 def b(s):
53 return s
54
55 def u(s):
56 return unicode(s, "unicode_escape")
57
58 else:
59 PY3 = True
60 text_type = str
61
62 def b(s):
63 return s.encode("latin-1")
64
65 def u(s):
66 return s
67 # }}}
36 68 # _pofile_or_mofile {{{
37 69
70
38 71 def _pofile_or_mofile(f, type, **kwargs):
39 72 """
40 73 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
@@ -50,15 +83,34 def _pofile_or_mofile(f, type, **kwargs)
50 83 parser = kls(
51 84 f,
52 85 encoding=enc,
53 check_for_duplicates=kwargs.get('check_for_duplicates', False)
86 check_for_duplicates=kwargs.get('check_for_duplicates', False),
87 klass=kwargs.get('klass')
54 88 )
55 89 instance = parser.parse()
56 90 instance.wrapwidth = kwargs.get('wrapwidth', 78)
57 91 return instance
92 # }}}
93 # _is_file {{{
58 94
95
96 def _is_file(filename_or_contents):
97 """
98 Safely returns the value of os.path.exists(filename_or_contents).
99
100 Arguments:
101
102 ``filename_or_contents``
103 either a filename, or a string holding the contents of some file.
104 In the latter case, this function will always return False.
105 """
106 try:
107 return os.path.exists(filename_or_contents)
108 except (ValueError, UnicodeEncodeError):
109 return False
59 110 # }}}
60 111 # function pofile() {{{
61 112
113
62 114 def pofile(pofile, **kwargs):
63 115 """
64 116 Convenience function that parses the po or pot file ``pofile`` and returns
@@ -80,12 +132,17 def pofile(pofile, **kwargs):
80 132 ``check_for_duplicates``
81 133 whether to check for duplicate entries when adding entries to the
82 134 file (optional, default: ``False``).
135
136 ``klass``
137 class which is used to instantiate the return value (optional,
138 default: ``None``, the return value with be a :class:`~polib.POFile`
139 instance).
83 140 """
84 141 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
85
86 142 # }}}
87 143 # function mofile() {{{
88 144
145
89 146 def mofile(mofile, **kwargs):
90 147 """
91 148 Convenience function that parses the mo file ``mofile`` and returns a
@@ -108,12 +165,17 def mofile(mofile, **kwargs):
108 165 ``check_for_duplicates``
109 166 whether to check for duplicate entries when adding entries to the
110 167 file (optional, default: ``False``).
168
169 ``klass``
170 class which is used to instantiate the return value (optional,
171 default: ``None``, the return value with be a :class:`~polib.POFile`
172 instance).
111 173 """
112 174 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
113
114 175 # }}}
115 176 # function detect_encoding() {{{
116 177
178
117 179 def detect_encoding(file, binary_mode=False):
118 180 """
119 181 Try to detect the encoding used by the ``file``. The ``file`` argument can
@@ -129,7 +191,9 def detect_encoding(file, binary_mode=Fa
129 191 ``binary_mode``
130 192 boolean, set this to True if ``file`` is a mo file.
131 193 """
132 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
194 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
195 rxt = re.compile(u(PATTERN))
196 rxb = re.compile(b(PATTERN))
133 197
134 198 def charset_exists(charset):
135 199 """Check whether ``charset`` is valid or not."""
@@ -139,31 +203,36 def detect_encoding(file, binary_mode=Fa
139 203 return False
140 204 return True
141 205
142 if not os.path.exists(file):
143 match = rx.search(file)
206 if not _is_file(file):
207 match = rxt.search(file)
144 208 if match:
145 209 enc = match.group(1).strip()
146 210 if charset_exists(enc):
147 211 return enc
148 212 else:
149 if binary_mode:
213 # For PY3, always treat as binary
214 if binary_mode or PY3:
150 215 mode = 'rb'
216 rx = rxb
151 217 else:
152 218 mode = 'r'
219 rx = rxt
153 220 f = open(file, mode)
154 221 for l in f.readlines():
155 222 match = rx.search(l)
156 223 if match:
157 224 f.close()
158 225 enc = match.group(1).strip()
226 if not isinstance(enc, text_type):
227 enc = enc.decode('utf-8')
159 228 if charset_exists(enc):
160 229 return enc
161 230 f.close()
162 231 return default_encoding
163
164 232 # }}}
165 233 # function escape() {{{
166 234
235
167 236 def escape(st):
168 237 """
169 238 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
@@ -174,10 +243,10 def escape(st):
174 243 .replace('\r', r'\r')\
175 244 .replace('\n', r'\n')\
176 245 .replace('\"', r'\"')
177
178 246 # }}}
179 247 # function unescape() {{{
180 248
249
181 250 def unescape(st):
182 251 """
183 252 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
@@ -195,10 +264,10 def unescape(st):
195 264 return '\\'
196 265 return m # handles escaped double quote
197 266 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
198
199 267 # }}}
200 268 # class _BaseFile {{{
201 269
270
202 271 class _BaseFile(list):
203 272 """
204 273 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
@@ -227,7 +296,7 class _BaseFile(list):
227 296 list.__init__(self)
228 297 # the opened file handle
229 298 pofile = kwargs.get('pofile', None)
230 if pofile and os.path.exists(pofile):
299 if pofile and _is_file(pofile):
231 300 self.fpath = pofile
232 301 else:
233 302 self.fpath = kwargs.get('fpath')
@@ -254,12 +323,17 class _BaseFile(list):
254 323 ret.append(entry.__unicode__(self.wrapwidth))
255 324 for entry in self.obsolete_entries():
256 325 ret.append(entry.__unicode__(self.wrapwidth))
257 ret = '\n'.join(ret)
326 ret = u('\n').join(ret)
258 327
259 if type(ret) != types.UnicodeType:
260 return unicode(ret, self.encoding)
328 assert isinstance(ret, text_type)
329 #if type(ret) != text_type:
330 # return unicode(ret, self.encoding)
261 331 return ret
262 332
333 if PY3:
334 def __str__(self):
335 return self.__unicode__()
336 else:
263 337 def __str__(self):
264 338 """
265 339 Returns the string representation of the file.
@@ -268,24 +342,26 class _BaseFile(list):
268 342
269 343 def __contains__(self, entry):
270 344 """
271 Overriden ``list`` method to implement the membership test (in and
345 Overridden ``list`` method to implement the membership test (in and
272 346 not in).
273 347 The method considers that an entry is in the file if it finds an entry
274 that has the same msgid (the test is **case sensitive**).
348 that has the same msgid (the test is **case sensitive**) and the same
349 msgctxt (or none for both entries).
275 350
276 351 Argument:
277 352
278 353 ``entry``
279 354 an instance of :class:`~polib._BaseEntry`.
280 355 """
281 return self.find(entry.msgid, by='msgid') is not None
356 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
357 is not None
282 358
283 359 def __eq__(self, other):
284 return unicode(self) == unicode(other)
360 return str(self) == str(other)
285 361
286 362 def append(self, entry):
287 363 """
288 Overriden method to check for duplicates entries, if a user tries to
364 Overridden method to check for duplicates entries, if a user tries to
289 365 add an entry that is already in the file, the method will raise a
290 366 ``ValueError`` exception.
291 367
@@ -300,7 +376,7 class _BaseFile(list):
300 376
301 377 def insert(self, index, entry):
302 378 """
303 Overriden method to check for duplicates entries, if a user tries to
379 Overridden method to check for duplicates entries, if a user tries to
304 380 add an entry that is already in the file, the method will raise a
305 381 ``ValueError`` exception.
306 382
@@ -332,7 +408,7 class _BaseFile(list):
332 408 e.flags.append('fuzzy')
333 409 return e
334 410
335 def save(self, fpath=None, repr_method='__str__'):
411 def save(self, fpath=None, repr_method='__unicode__'):
336 412 """
337 413 Saves the po file to ``fpath``.
338 414 If it is an existing file and no ``fpath`` is provided, then the
@@ -354,8 +430,8 class _BaseFile(list):
354 430 if repr_method == 'to_binary':
355 431 fhandle = open(fpath, 'wb')
356 432 else:
357 fhandle = codecs.open(fpath, 'w', self.encoding)
358 if type(contents) != types.UnicodeType:
433 fhandle = io.open(fpath, 'w', encoding=self.encoding)
434 if not isinstance(contents, text_type):
359 435 contents = contents.decode(self.encoding)
360 436 fhandle.write(contents)
361 437 fhandle.close()
@@ -381,7 +457,7 class _BaseFile(list):
381 457 boolean, whether to also search in entries that are obsolete.
382 458
383 459 ``msgctxt``
384 string, allows to specify a specific message context for the
460 string, allows specifying a specific message context for the
385 461 search.
386 462 """
387 463 if include_obsolete_entries:
@@ -390,7 +466,7 class _BaseFile(list):
390 466 entries = [e for e in self if not e.obsolete]
391 467 for e in entries:
392 468 if getattr(e, by) == st:
393 if msgctxt and e.msgctxt != msgctxt:
469 if msgctxt is not False and e.msgctxt != msgctxt:
394 470 continue
395 471 return e
396 472 return None
@@ -412,7 +488,9 class _BaseFile(list):
412 488 'Language-Team',
413 489 'MIME-Version',
414 490 'Content-Type',
415 'Content-Transfer-Encoding'
491 'Content-Transfer-Encoding',
492 'Language',
493 'Plural-Forms'
416 494 ]
417 495 ordered_data = []
418 496 for data in data_order:
@@ -423,9 +501,7 class _BaseFile(list):
423 501 pass
424 502 # the rest of the metadata will be alphabetically ordered since there
425 503 # are no specs for this AFAIK
426 keys = metadata.keys()
427 keys.sort()
428 for data in keys:
504 for data in sorted(metadata.keys()):
429 505 value = metadata[data]
430 506 ordered_data.append((data, value))
431 507 return ordered_data
@@ -436,18 +512,12 class _BaseFile(list):
436 512 """
437 513 offsets = []
438 514 entries = self.translated_entries()
515
439 516 # the keys are sorted in the .mo file
440 517 def cmp(_self, other):
441 518 # msgfmt compares entries with msgctxt if it exists
442 if _self.msgctxt:
443 self_msgid = _self.msgctxt
444 else:
445 self_msgid = _self.msgid
446
447 if other.msgctxt:
448 other_msgid = other.msgctxt
449 else:
450 other_msgid = other.msgid
519 self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
520 other_msgid = other.msgctxt and other.msgctxt or other.msgid
451 521 if self_msgid > other_msgid:
452 522 return 1
453 523 elif self_msgid < other_msgid:
@@ -455,25 +525,23 class _BaseFile(list):
455 525 else:
456 526 return 0
457 527 # add metadata entry
458 entries.sort(cmp)
528 entries.sort(key=lambda o: o.msgctxt or o.msgid)
459 529 mentry = self.metadata_as_entry()
460 530 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
461 531 entries = [mentry] + entries
462 532 entries_len = len(entries)
463 ids, strs = '', ''
533 ids, strs = b(''), b('')
464 534 for e in entries:
465 535 # For each string, we need size and file offset. Each string is
466 536 # NUL terminated; the NUL does not count into the size.
467 msgid = ''
537 msgid = b('')
468 538 if e.msgctxt:
469 539 # Contexts are stored by storing the concatenation of the
470 540 # context, a <EOT> byte, and the original string
471 541 msgid = self._encode(e.msgctxt + '\4')
472 542 if e.msgid_plural:
473 indexes = e.msgstr_plural.keys()
474 indexes.sort()
475 543 msgstr = []
476 for index in indexes:
544 for index in sorted(e.msgstr_plural.keys()):
477 545 msgstr.append(e.msgstr_plural[index])
478 546 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
479 547 msgstr = self._encode('\0'.join(msgstr))
@@ -481,8 +549,8 class _BaseFile(list):
481 549 msgid += self._encode(e.msgid)
482 550 msgstr = self._encode(e.msgstr)
483 551 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
484 ids += msgid + '\0'
485 strs += msgstr + '\0'
552 ids += msgid + b('\0')
553 strs += msgstr + b('\0')
486 554
487 555 # The header is 7 32-bit unsigned integers.
488 556 keystart = 7*4+16*entries_len
@@ -496,22 +564,26 class _BaseFile(list):
496 564 koffsets += [l1, o1+keystart]
497 565 voffsets += [l2, o2+valuestart]
498 566 offsets = koffsets + voffsets
499 # check endianness for magic number
500 if struct.pack('@h', 1) == struct.pack('<h', 1):
501 magic_number = MOFile.LITTLE_ENDIAN
502 else:
503 magic_number = MOFile.BIG_ENDIAN
504 567
505 568 output = struct.pack(
506 569 "Iiiiiii",
507 magic_number, # Magic number
508 0, # Version
509 entries_len, # # of entries
510 7*4, # start of key index
511 7*4+entries_len*8, # start of value index
512 0, keystart # size and offset of hash table
513 # Important: we don't use hash tables
570 # Magic number
571 MOFile.MAGIC,
572 # Version
573 0,
574 # number of entries
575 entries_len,
576 # start of key index
577 7 * 4,
578 # start of value index
579 7 * 4 + entries_len * 8,
580 # size and offset of hash table, we don't use hash tables
581 0, keystart
582
514 583 )
584 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
585 output += array.array("i", offsets).tobytes()
586 else:
515 587 output += array.array("i", offsets).tostring()
516 588 output += ids
517 589 output += strs
@@ -522,13 +594,13 class _BaseFile(list):
522 594 Encodes the given ``mixed`` argument with the file encoding if and
523 595 only if it's an unicode string and returns the encoded string.
524 596 """
525 if type(mixed) == types.UnicodeType:
526 return mixed.encode(self.encoding)
597 if isinstance(mixed, text_type):
598 mixed = mixed.encode(self.encoding)
527 599 return mixed
528
529 600 # }}}
530 601 # class POFile {{{
531 602
603
532 604 class POFile(_BaseFile):
533 605 """
534 606 Po (or Pot) file reader/writer.
@@ -542,13 +614,15 class POFile(_BaseFile):
542 614 """
543 615 ret, headers = '', self.header.split('\n')
544 616 for header in headers:
545 if header[:1] in [',', ':']:
617 if not len(header):
618 ret += "#\n"
619 elif header[:1] in [',', ':']:
546 620 ret += '#%s\n' % header
547 621 else:
548 622 ret += '# %s\n' % header
549 623
550 if type(ret) != types.UnicodeType:
551 ret = unicode(ret, self.encoding)
624 if not isinstance(ret, text_type):
625 ret = ret.decode(self.encoding)
552 626
553 627 return ret + _BaseFile.__unicode__(self)
554 628
@@ -572,7 +646,7 class POFile(_BaseFile):
572 646 if total == 0:
573 647 return 100
574 648 translated = len(self.translated_entries())
575 return int((100.00 / float(total)) * translated)
649 return int(translated * 100 / float(total))
576 650
577 651 def translated_entries(self):
578 652 """
@@ -584,7 +658,7 class POFile(_BaseFile):
584 658 """
585 659 Convenience method that returns the list of untranslated entries.
586 660 """
587 return [e for e in self if not e.translated() and not e.obsolete \
661 return [e for e in self if not e.translated() and not e.obsolete
588 662 and not 'fuzzy' in e.flags]
589 663
590 664 def fuzzy_entries(self):
@@ -615,28 +689,32 class POFile(_BaseFile):
615 689 ``refpot``
616 690 object POFile, the reference catalog.
617 691 """
692 # Store entries in dict/set for faster access
693 self_entries = dict((entry.msgid, entry) for entry in self)
694 refpot_msgids = set(entry.msgid for entry in refpot)
695 # Merge entries that are in the refpot
618 696 for entry in refpot:
619 e = self.find(entry.msgid, include_obsolete_entries=True)
697 e = self_entries.get(entry.msgid)
620 698 if e is None:
621 699 e = POEntry()
622 700 self.append(e)
623 701 e.merge(entry)
624 702 # ok, now we must "obsolete" entries that are not in the refpot anymore
625 703 for entry in self:
626 if refpot.find(entry.msgid) is None:
704 if entry.msgid not in refpot_msgids:
627 705 entry.obsolete = True
628
629 706 # }}}
630 707 # class MOFile {{{
631 708
709
632 710 class MOFile(_BaseFile):
633 711 """
634 712 Mo file reader/writer.
635 713 This class inherits the :class:`~polib._BaseFile` class and, by
636 714 extension, the python ``list`` type.
637 715 """
638 BIG_ENDIAN = 0xde120495
639 LITTLE_ENDIAN = 0x950412de
716 MAGIC = 0x950412de
717 MAGIC_SWAPPED = 0xde120495
640 718
641 719 def __init__(self, *args, **kwargs):
642 720 """
@@ -698,10 +776,10 class MOFile(_BaseFile):
698 776 Convenience method to keep the same interface with POFile instances.
699 777 """
700 778 return []
701
702 779 # }}}
703 780 # class _BaseEntry {{{
704 781
782
705 783 class _BaseEntry(object):
706 784 """
707 785 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
@@ -753,12 +831,14 class _BaseEntry(object):
753 831 ret = []
754 832 # write the msgctxt if any
755 833 if self.msgctxt is not None:
756 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
834 ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
835 wrapwidth)
757 836 # write the msgid
758 837 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
759 838 # write the msgid_plural if any
760 839 if self.msgid_plural:
761 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
840 ret += self._str_field("msgid_plural", delflag, "",
841 self.msgid_plural, wrapwidth)
762 842 if self.msgstr_plural:
763 843 # write the msgstr_plural if any
764 844 msgstrs = self.msgstr_plural
@@ -767,17 +847,20 class _BaseEntry(object):
767 847 for index in keys:
768 848 msgstr = msgstrs[index]
769 849 plural_index = '[%s]' % index
770 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
850 ret += self._str_field("msgstr", delflag, plural_index, msgstr,
851 wrapwidth)
771 852 else:
772 853 # otherwise write the msgstr
773 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
854 ret += self._str_field("msgstr", delflag, "", self.msgstr,
855 wrapwidth)
774 856 ret.append('')
775 ret = '\n'.join(ret)
776
777 if type(ret) != types.UnicodeType:
778 return unicode(ret, self.encoding)
857 ret = u('\n').join(ret)
779 858 return ret
780 859
860 if PY3:
861 def __str__(self):
862 return self.__unicode__()
863 else:
781 864 def __str__(self):
782 865 """
783 866 Returns the string representation of the entry.
@@ -785,9 +868,10 class _BaseEntry(object):
785 868 return unicode(self).encode(self.encoding)
786 869
787 870 def __eq__(self, other):
788 return unicode(self) == unicode(other)
871 return str(self) == str(other)
789 872
790 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
873 def _str_field(self, fieldname, delflag, plural_index, field,
874 wrapwidth=78):
791 875 lines = field.splitlines(True)
792 876 if len(lines) > 1:
793 877 lines = [''] + lines # start with initial empty line
@@ -804,7 +888,7 class _BaseEntry(object):
804 888 real_wrapwidth = wrapwidth - flength + specialchars_count
805 889 if wrapwidth > 0 and len(field) > real_wrapwidth:
806 890 # Wrap the line but take field name into account
807 lines = [''] + [unescape(item) for item in textwrap.wrap(
891 lines = [''] + [unescape(item) for item in wrap(
808 892 escaped_field,
809 893 wrapwidth - 2, # 2 for quotes ""
810 894 drop_whitespace=False,
@@ -818,13 +902,13 class _BaseEntry(object):
818 902
819 903 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
820 904 escape(lines.pop(0)))]
821 for mstr in lines:
822 ret.append('%s"%s"' % (delflag, escape(mstr)))
905 for line in lines:
906 ret.append('%s"%s"' % (delflag, escape(line)))
823 907 return ret
824
825 908 # }}}
826 909 # class POEntry {{{
827 910
911
828 912 class POEntry(_BaseEntry):
829 913 """
830 914 Represents a po file entry.
@@ -854,6 +938,9 class POEntry(_BaseEntry):
854 938
855 939 ``previous_msgid_plural``
856 940 string, the entry previous msgid_plural.
941
942 ``linenum``
943 integer, the line number of the entry
857 944 """
858 945 _BaseEntry.__init__(self, *args, **kwargs)
859 946 self.comment = kwargs.get('comment', '')
@@ -863,6 +950,7 class POEntry(_BaseEntry):
863 950 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
864 951 self.previous_msgid = kwargs.get('previous_msgid', None)
865 952 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
953 self.linenum = kwargs.get('linenum', None)
866 954
867 955 def __unicode__(self, wrapwidth=78):
868 956 """
@@ -879,7 +967,7 class POEntry(_BaseEntry):
879 967 if val:
880 968 for comment in val.split('\n'):
881 969 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
882 ret += textwrap.wrap(
970 ret += wrap(
883 971 comment,
884 972 wrapwidth,
885 973 initial_indent=c[1],
@@ -903,7 +991,7 class POEntry(_BaseEntry):
903 991 # what we want for filenames, so the dirty hack is to
904 992 # temporally replace hyphens with a char that a file cannot
905 993 # contain, like "*"
906 ret += [l.replace('*', '-') for l in textwrap.wrap(
994 ret += [l.replace('*', '-') for l in wrap(
907 995 filestr.replace('-', '*'),
908 996 wrapwidth,
909 997 initial_indent='#: ',
@@ -918,32 +1006,25 class POEntry(_BaseEntry):
918 1006 ret.append('#, %s' % ', '.join(self.flags))
919 1007
920 1008 # previous context and previous msgid/msgid_plural
921 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
1009 fields = ['previous_msgctxt', 'previous_msgid',
1010 'previous_msgid_plural']
922 1011 for f in fields:
923 1012 val = getattr(self, f)
924 1013 if val:
925 1014 ret += self._str_field(f, "#| ", "", val, wrapwidth)
926 1015
927 1016 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
928 ret = '\n'.join(ret)
1017 ret = u('\n').join(ret)
929 1018
930 if type(ret) != types.UnicodeType:
931 return unicode(ret, self.encoding)
1019 assert isinstance(ret, text_type)
1020 #if type(ret) != types.UnicodeType:
1021 # return unicode(ret, self.encoding)
932 1022 return ret
933 1023
934 1024 def __cmp__(self, other):
935 1025 """
936 1026 Called by comparison operations if rich comparison is not defined.
937 1027 """
938 def compare_occurrences(a, b):
939 """
940 Compare an entry occurrence with another one.
941 """
942 if a[0] != b[0]:
943 return a[0] < b[0]
944 if a[1] != b[1]:
945 return a[1] < b[1]
946 return 0
947 1028
948 1029 # First: Obsolete test
949 1030 if self.obsolete != other.obsolete:
@@ -952,12 +1033,8 class POEntry(_BaseEntry):
952 1033 else:
953 1034 return 1
954 1035 # Work on a copy to protect original
955 occ1 = self.occurrences[:]
956 occ2 = other.occurrences[:]
957 # Sorting using compare method
958 occ1.sort(compare_occurrences)
959 occ2.sort(compare_occurrences)
960 # Comparing sorted occurrences
1036 occ1 = sorted(self.occurrences[:])
1037 occ2 = sorted(other.occurrences[:])
961 1038 pos = 0
962 1039 for entry1 in occ1:
963 1040 try:
@@ -975,9 +1052,41 class POEntry(_BaseEntry):
975 1052 return 1
976 1053 else:
977 1054 return -1
1055 # Compare msgid_plural if set
1056 if self.msgid_plural:
1057 if not other.msgid_plural:
1058 return 1
1059 for pos in self.msgid_plural:
1060 if pos not in other.msgid_plural:
1061 return 1
1062 if self.msgid_plural[pos] > other.msgid_plural[pos]:
1063 return 1
1064 if self.msgid_plural[pos] < other.msgid_plural[pos]:
1065 return -1
978 1066 # Finally: Compare message ID
979 if self.msgid > other.msgid: return 1
980 else: return -1
1067 if self.msgid > other.msgid:
1068 return 1
1069 elif self.msgid < other.msgid:
1070 return -1
1071 return 0
1072
1073 def __gt__(self, other):
1074 return self.__cmp__(other) > 0
1075
1076 def __lt__(self, other):
1077 return self.__cmp__(other) < 0
1078
1079 def __ge__(self, other):
1080 return self.__cmp__(other) >= 0
1081
1082 def __le__(self, other):
1083 return self.__cmp__(other) <= 0
1084
1085 def __eq__(self, other):
1086 return self.__cmp__(other) == 0
1087
1088 def __ne__(self, other):
1089 return self.__cmp__(other) != 0
981 1090
982 1091 def translated(self):
983 1092 """
@@ -1020,18 +1129,49 class POEntry(_BaseEntry):
1020 1129 except KeyError:
1021 1130 self.msgstr_plural[pos] = ''
1022 1131
1132 def __hash__(self):
1133 return hash((self.msgid, self.msgstr))
1023 1134 # }}}
1024 1135 # class MOEntry {{{
1025 1136
1137
1026 1138 class MOEntry(_BaseEntry):
1027 1139 """
1028 1140 Represents a mo file entry.
1029 1141 """
1030 pass
1142 def __init__(self, *args, **kwargs):
1143 """
1144 Constructor, accepts the following keyword arguments,
1145 for consistency with :class:`~polib.POEntry`:
1146
1147 ``comment``
1148 ``tcomment``
1149 ``occurrences``
1150 ``flags``
1151 ``previous_msgctxt``
1152 ``previous_msgid``
1153 ``previous_msgid_plural``
1154
1155 Note: even though these keyword arguments are accepted,
1156 they hold no real meaning in the context of MO files
1157 and are simply ignored.
1158 """
1159 _BaseEntry.__init__(self, *args, **kwargs)
1160 self.comment = ''
1161 self.tcomment = ''
1162 self.occurrences = []
1163 self.flags = []
1164 self.previous_msgctxt = None
1165 self.previous_msgid = None
1166 self.previous_msgid_plural = None
1167
1168 def __hash__(self):
1169 return hash((self.msgid, self.msgstr))
1031 1170
1032 1171 # }}}
1033 1172 # class _POFileParser {{{
1034 1173
1174
1035 1175 class _POFileParser(object):
1036 1176 """
1037 1177 A finite state machine to parse efficiently and correctly po
@@ -1056,23 +1196,27 class _POFileParser(object):
1056 1196 file (optional, default: ``False``).
1057 1197 """
1058 1198 enc = kwargs.get('encoding', default_encoding)
1059 if os.path.exists(pofile):
1199 if _is_file(pofile):
1060 1200 try:
1061 self.fhandle = codecs.open(pofile, 'rU', enc)
1201 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1062 1202 except LookupError:
1063 1203 enc = default_encoding
1064 self.fhandle = codecs.open(pofile, 'rU', enc)
1204 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1065 1205 else:
1066 1206 self.fhandle = pofile.splitlines()
1067 1207
1068 self.instance = POFile(
1208 klass = kwargs.get('klass')
1209 if klass is None:
1210 klass = POFile
1211 self.instance = klass(
1069 1212 pofile=pofile,
1070 1213 encoding=enc,
1071 1214 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1072 1215 )
1073 1216 self.transitions = {}
1074 self.current_entry = POEntry()
1075 self.current_state = 'ST'
1217 self.current_line = 0
1218 self.current_entry = POEntry(linenum=self.current_line)
1219 self.current_state = 'st'
1076 1220 self.current_token = None
1077 1221 # two memo flags used in handlers
1078 1222 self.msgstr_index = 0
@@ -1083,7 +1227,7 class _POFileParser(object):
1083 1227 # * HE: Header
1084 1228 # * TC: a translation comment
1085 1229 # * GC: a generated comment
1086 # * OC: a file/line occurence
1230 # * OC: a file/line occurrence
1087 1231 # * FL: a flags line
1088 1232 # * CT: a message context
1089 1233 # * PC: a previous msgctxt
@@ -1094,48 +1238,47 class _POFileParser(object):
1094 1238 # * MS: a msgstr
1095 1239 # * MX: a msgstr plural
1096 1240 # * MC: a msgid or msgstr continuation line
1097 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1098 'MS', 'MP', 'MX', 'MI']
1241 all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
1242 'ms', 'mp', 'mx', 'mi']
1099 1243
1100 self.add('TC', ['ST', 'HE'], 'HE')
1101 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1102 'MP', 'MX', 'MI'], 'TC')
1103 self.add('GC', all, 'GC')
1104 self.add('OC', all, 'OC')
1105 self.add('FL', all, 'FL')
1106 self.add('PC', all, 'PC')
1107 self.add('PM', all, 'PM')
1108 self.add('PP', all, 'PP')
1109 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1110 'PP', 'MS', 'MX'], 'CT')
1111 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1112 'PM', 'PP', 'MS', 'MX'], 'MI')
1113 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1114 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1115 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1116 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1244 self.add('tc', ['st', 'he'], 'he')
1245 self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
1246 'mp', 'mx', 'mi'], 'tc')
1247 self.add('gc', all, 'gc')
1248 self.add('oc', all, 'oc')
1249 self.add('fl', all, 'fl')
1250 self.add('pc', all, 'pc')
1251 self.add('pm', all, 'pm')
1252 self.add('pp', all, 'pp')
1253 self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
1254 'pp', 'ms', 'mx'], 'ct')
1255 self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
1256 'pm', 'pp', 'ms', 'mx'], 'mi')
1257 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
1258 self.add('ms', ['mi', 'mp', 'tc'], 'ms')
1259 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
1260 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1117 1261
1118 1262 def parse(self):
1119 1263 """
1120 1264 Run the state machine, parse the file line by line and call process()
1121 1265 with the current matched symbol.
1122 1266 """
1123 i = 0
1124 1267
1125 1268 keywords = {
1126 'msgctxt': 'CT',
1127 'msgid': 'MI',
1128 'msgstr': 'MS',
1129 'msgid_plural': 'MP',
1269 'msgctxt': 'ct',
1270 'msgid': 'mi',
1271 'msgstr': 'ms',
1272 'msgid_plural': 'mp',
1130 1273 }
1131 1274 prev_keywords = {
1132 'msgid_plural': 'PP',
1133 'msgid': 'PM',
1134 'msgctxt': 'PC',
1275 'msgid_plural': 'pp',
1276 'msgid': 'pm',
1277 'msgctxt': 'pc',
1135 1278 }
1136
1279 tokens = []
1137 1280 for line in self.fhandle:
1138 i += 1
1281 self.current_line += 1
1139 1282 line = line.strip()
1140 1283 if line == '':
1141 1284 continue
@@ -1143,6 +1286,9 class _POFileParser(object):
1143 1286 tokens = line.split(None, 2)
1144 1287 nb_tokens = len(tokens)
1145 1288
1289 if tokens[0] == '#~|':
1290 continue
1291
1146 1292 if tokens[0] == '#~' and nb_tokens > 1:
1147 1293 line = line[3:].strip()
1148 1294 tokens = tokens[1:]
@@ -1155,41 +1301,56 class _POFileParser(object):
1155 1301 # msgid, msgid_plural, msgctxt & msgstr.
1156 1302 if tokens[0] in keywords and nb_tokens > 1:
1157 1303 line = line[len(tokens[0]):].lstrip()
1304 if re.search(r'([^\\]|^)"', line[1:-1]):
1305 raise IOError('Syntax error in po file %s (line %s): '
1306 'unescaped double quote found' %
1307 (self.instance.fpath, self.current_line))
1158 1308 self.current_token = line
1159 self.process(keywords[tokens[0]], i)
1309 self.process(keywords[tokens[0]])
1160 1310 continue
1161 1311
1162 1312 self.current_token = line
1163 1313
1164 if tokens[0] == '#:' and nb_tokens > 1:
1314 if tokens[0] == '#:':
1315 if nb_tokens <= 1:
1316 continue
1165 1317 # we are on a occurrences line
1166 self.process('OC', i)
1318 self.process('oc')
1167 1319
1168 1320 elif line[:1] == '"':
1169 1321 # we are on a continuation line
1170 self.process('MC', i)
1322 if re.search(r'([^\\]|^)"', line[1:-1]):
1323 raise IOError('Syntax error in po file %s (line %s): '
1324 'unescaped double quote found' %
1325 (self.instance.fpath, self.current_line))
1326 self.process('mc')
1171 1327
1172 1328 elif line[:7] == 'msgstr[':
1173 1329 # we are on a msgstr plural
1174 self.process('MX', i)
1330 self.process('mx')
1175 1331
1176 elif tokens[0] == '#,' and nb_tokens > 1:
1332 elif tokens[0] == '#,':
1333 if nb_tokens <= 1:
1334 continue
1177 1335 # we are on a flags line
1178 self.process('FL', i)
1336 self.process('fl')
1179 1337
1180 elif tokens[0] == '#':
1181 if line == '#': line += ' '
1338 elif tokens[0] == '#' or tokens[0].startswith('##'):
1339 if line == '#':
1340 line += ' '
1182 1341 # we are on a translator comment line
1183 self.process('TC', i)
1342 self.process('tc')
1184 1343
1185 elif tokens[0] == '#.' and nb_tokens > 1:
1344 elif tokens[0] == '#.':
1345 if nb_tokens <= 1:
1346 continue
1186 1347 # we are on a generated comment line
1187 self.process('GC', i)
1348 self.process('gc')
1188 1349
1189 1350 elif tokens[0] == '#|':
1190 if nb_tokens < 2:
1191 self.process('??', i)
1192 continue
1351 if nb_tokens <= 1:
1352 raise IOError('Syntax error in po file %s (line %s)' %
1353 (self.instance.fpath, self.current_line))
1193 1354
1194 1355 # Remove the marker and any whitespace right after that.
1195 1356 line = line[2:].lstrip()
@@ -1197,48 +1358,57 class _POFileParser(object):
1197 1358
1198 1359 if tokens[1].startswith('"'):
1199 1360 # Continuation of previous metadata.
1200 self.process('MC', i)
1361 self.process('mc')
1201 1362 continue
1202 1363
1203 1364 if nb_tokens == 2:
1204 1365 # Invalid continuation line.
1205 self.process('??', i)
1366 raise IOError('Syntax error in po file %s (line %s): '
1367 'invalid continuation line' %
1368 (self.instance.fpath, self.current_line))
1206 1369
1207 1370 # we are on a "previous translation" comment line,
1208 1371 if tokens[1] not in prev_keywords:
1209 1372 # Unknown keyword in previous translation comment.
1210 self.process('??', i)
1373 raise IOError('Syntax error in po file %s (line %s): '
1374 'unknown keyword %s' %
1375 (self.instance.fpath, self.current_line,
1376 tokens[1]))
1211 1377
1212 1378 # Remove the keyword and any whitespace
1213 1379 # between it and the starting quote.
1214 1380 line = line[len(tokens[1]):].lstrip()
1215 1381 self.current_token = line
1216 self.process(prev_keywords[tokens[1]], i)
1382 self.process(prev_keywords[tokens[1]])
1217 1383
1218 1384 else:
1219 self.process('??', i)
1385 raise IOError('Syntax error in po file %s (line %s)' %
1386 (self.instance.fpath, self.current_line))
1220 1387
1221 if self.current_entry:
1388 if self.current_entry and len(tokens) > 0 and \
1389 not tokens[0].startswith('#'):
1222 1390 # since entries are added when another entry is found, we must add
1223 # the last entry here (only if there are lines)
1391 # the last entry here (only if there are lines). Trailing comments
1392 # are ignored
1224 1393 self.instance.append(self.current_entry)
1394
1225 1395 # before returning the instance, check if there's metadata and if
1226 1396 # so extract it in a dict
1227 firstentry = self.instance[0]
1228 if firstentry.msgid == '': # metadata found
1397 metadataentry = self.instance.find('')
1398 if metadataentry: # metadata found
1229 1399 # remove the entry
1230 firstentry = self.instance.pop(0)
1231 self.instance.metadata_is_fuzzy = firstentry.flags
1400 self.instance.remove(metadataentry)
1401 self.instance.metadata_is_fuzzy = metadataentry.flags
1232 1402 key = None
1233 for msg in firstentry.msgstr.splitlines():
1403 for msg in metadataentry.msgstr.splitlines():
1234 1404 try:
1235 1405 key, val = msg.split(':', 1)
1236 1406 self.instance.metadata[key] = val.strip()
1237 except:
1407 except (ValueError, KeyError):
1238 1408 if key is not None:
1239 1409 self.instance.metadata[key] += '\n'+ msg.strip()
1240 1410 # close opened file
1241 if isinstance(self.fhandle, file):
1411 if not isinstance(self.fhandle, list): # must be file
1242 1412 self.fhandle.close()
1243 1413 return self.instance
1244 1414
@@ -1258,10 +1428,10 class _POFileParser(object):
1258 1428 the next state the fsm will have after the action.
1259 1429 """
1260 1430 for state in states:
1261 action = getattr(self, 'handle_%s' % next_state.lower())
1431 action = getattr(self, 'handle_%s' % next_state)
1262 1432 self.transitions[(symbol, state)] = (action, next_state)
1263 1433
1264 def process(self, symbol, linenum):
1434 def process(self, symbol):
1265 1435 """
1266 1436 Process the transition corresponding to the current state and the
1267 1437 symbol provided.
@@ -1278,8 +1448,9 class _POFileParser(object):
1278 1448 (action, state) = self.transitions[(symbol, self.current_state)]
1279 1449 if action():
1280 1450 self.current_state = state
1281 except Exception as exc:
1282 raise IOError('Syntax error in po file (line %s)' % linenum)
1451 except Exception:
1452 raise IOError('Syntax error in po file (line %s)' %
1453 self.current_line)
1283 1454
1284 1455 # state handlers
1285 1456
@@ -1292,29 +1463,32 class _POFileParser(object):
1292 1463
1293 1464 def handle_tc(self):
1294 1465 """Handle a translator comment."""
1295 if self.current_state in ['MC', 'MS', 'MX']:
1466 if self.current_state in ['mc', 'ms', 'mx']:
1296 1467 self.instance.append(self.current_entry)
1297 self.current_entry = POEntry()
1468 self.current_entry = POEntry(linenum=self.current_line)
1298 1469 if self.current_entry.tcomment != '':
1299 1470 self.current_entry.tcomment += '\n'
1300 self.current_entry.tcomment += self.current_token[2:]
1471 tcomment = self.current_token.lstrip('#')
1472 if tcomment.startswith(' '):
1473 tcomment = tcomment[1:]
1474 self.current_entry.tcomment += tcomment
1301 1475 return True
1302 1476
1303 1477 def handle_gc(self):
1304 1478 """Handle a generated comment."""
1305 if self.current_state in ['MC', 'MS', 'MX']:
1479 if self.current_state in ['mc', 'ms', 'mx']:
1306 1480 self.instance.append(self.current_entry)
1307 self.current_entry = POEntry()
1481 self.current_entry = POEntry(linenum=self.current_line)
1308 1482 if self.current_entry.comment != '':
1309 1483 self.current_entry.comment += '\n'
1310 1484 self.current_entry.comment += self.current_token[3:]
1311 1485 return True
1312 1486
1313 1487 def handle_oc(self):
1314 """Handle a file:num occurence."""
1315 if self.current_state in ['MC', 'MS', 'MX']:
1488 """Handle a file:num occurrence."""
1489 if self.current_state in ['mc', 'ms', 'mx']:
1316 1490 self.instance.append(self.current_entry)
1317 self.current_entry = POEntry()
1491 self.current_entry = POEntry(linenum=self.current_line)
1318 1492 occurrences = self.current_token[3:].split()
1319 1493 for occurrence in occurrences:
1320 1494 if occurrence != '':
@@ -1324,58 +1498,59 class _POFileParser(object):
1324 1498 fil = fil + line
1325 1499 line = ''
1326 1500 self.current_entry.occurrences.append((fil, line))
1327 except:
1501 except (ValueError, AttributeError):
1328 1502 self.current_entry.occurrences.append((occurrence, ''))
1329 1503 return True
1330 1504
1331 1505 def handle_fl(self):
1332 1506 """Handle a flags line."""
1333 if self.current_state in ['MC', 'MS', 'MX']:
1507 if self.current_state in ['mc', 'ms', 'mx']:
1334 1508 self.instance.append(self.current_entry)
1335 self.current_entry = POEntry()
1336 self.current_entry.flags += self.current_token[3:].split(', ')
1509 self.current_entry = POEntry(linenum=self.current_line)
1510 self.current_entry.flags += [c.strip() for c in
1511 self.current_token[3:].split(',')]
1337 1512 return True
1338 1513
1339 1514 def handle_pp(self):
1340 1515 """Handle a previous msgid_plural line."""
1341 if self.current_state in ['MC', 'MS', 'MX']:
1516 if self.current_state in ['mc', 'ms', 'mx']:
1342 1517 self.instance.append(self.current_entry)
1343 self.current_entry = POEntry()
1518 self.current_entry = POEntry(linenum=self.current_line)
1344 1519 self.current_entry.previous_msgid_plural = \
1345 1520 unescape(self.current_token[1:-1])
1346 1521 return True
1347 1522
1348 1523 def handle_pm(self):
1349 1524 """Handle a previous msgid line."""
1350 if self.current_state in ['MC', 'MS', 'MX']:
1525 if self.current_state in ['mc', 'ms', 'mx']:
1351 1526 self.instance.append(self.current_entry)
1352 self.current_entry = POEntry()
1527 self.current_entry = POEntry(linenum=self.current_line)
1353 1528 self.current_entry.previous_msgid = \
1354 1529 unescape(self.current_token[1:-1])
1355 1530 return True
1356 1531
1357 1532 def handle_pc(self):
1358 1533 """Handle a previous msgctxt line."""
1359 if self.current_state in ['MC', 'MS', 'MX']:
1534 if self.current_state in ['mc', 'ms', 'mx']:
1360 1535 self.instance.append(self.current_entry)
1361 self.current_entry = POEntry()
1536 self.current_entry = POEntry(linenum=self.current_line)
1362 1537 self.current_entry.previous_msgctxt = \
1363 1538 unescape(self.current_token[1:-1])
1364 1539 return True
1365 1540
1366 1541 def handle_ct(self):
1367 1542 """Handle a msgctxt."""
1368 if self.current_state in ['MC', 'MS', 'MX']:
1543 if self.current_state in ['mc', 'ms', 'mx']:
1369 1544 self.instance.append(self.current_entry)
1370 self.current_entry = POEntry()
1545 self.current_entry = POEntry(linenum=self.current_line)
1371 1546 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1372 1547 return True
1373 1548
1374 1549 def handle_mi(self):
1375 1550 """Handle a msgid."""
1376 if self.current_state in ['MC', 'MS', 'MX']:
1551 if self.current_state in ['mc', 'ms', 'mx']:
1377 1552 self.instance.append(self.current_entry)
1378 self.current_entry = POEntry()
1553 self.current_entry = POEntry(linenum=self.current_line)
1379 1554 self.current_entry.obsolete = self.entry_obsolete
1380 1555 self.current_entry.msgid = unescape(self.current_token[1:-1])
1381 1556 return True
@@ -1392,47 +1567,37 class _POFileParser(object):
1392 1567
1393 1568 def handle_mx(self):
1394 1569 """Handle a msgstr plural."""
1395 index, value = self.current_token[7], self.current_token[11:-1]
1396 self.current_entry.msgstr_plural[index] = unescape(value)
1397 self.msgstr_index = index
1570 index = self.current_token[7]
1571 value = self.current_token[self.current_token.find('"') + 1:-1]
1572 self.current_entry.msgstr_plural[int(index)] = unescape(value)
1573 self.msgstr_index = int(index)
1398 1574 return True
1399 1575
1400 1576 def handle_mc(self):
1401 1577 """Handle a msgid or msgstr continuation line."""
1402 1578 token = unescape(self.current_token[1:-1])
1403 if self.current_state == 'CT':
1404 typ = 'msgctxt'
1579 if self.current_state == 'ct':
1405 1580 self.current_entry.msgctxt += token
1406 elif self.current_state == 'MI':
1407 typ = 'msgid'
1581 elif self.current_state == 'mi':
1408 1582 self.current_entry.msgid += token
1409 elif self.current_state == 'MP':
1410 typ = 'msgid_plural'
1583 elif self.current_state == 'mp':
1411 1584 self.current_entry.msgid_plural += token
1412 elif self.current_state == 'MS':
1413 typ = 'msgstr'
1585 elif self.current_state == 'ms':
1414 1586 self.current_entry.msgstr += token
1415 elif self.current_state == 'MX':
1416 typ = 'msgstr[%s]' % self.msgstr_index
1587 elif self.current_state == 'mx':
1417 1588 self.current_entry.msgstr_plural[self.msgstr_index] += token
1418 elif self.current_state == 'PP':
1419 typ = 'previous_msgid_plural'
1420 token = token[3:]
1589 elif self.current_state == 'pp':
1421 1590 self.current_entry.previous_msgid_plural += token
1422 elif self.current_state == 'PM':
1423 typ = 'previous_msgid'
1424 token = token[3:]
1591 elif self.current_state == 'pm':
1425 1592 self.current_entry.previous_msgid += token
1426 elif self.current_state == 'PC':
1427 typ = 'previous_msgctxt'
1428 token = token[3:]
1593 elif self.current_state == 'pc':
1429 1594 self.current_entry.previous_msgctxt += token
1430 1595 # don't change the current state
1431 1596 return False
1432
1433 1597 # }}}
1434 1598 # class _MOFileParser {{{
1435 1599
1600
1436 1601 class _MOFileParser(object):
1437 1602 """
1438 1603 A class to parse binary mo files.
@@ -1456,12 +1621,24 class _MOFileParser(object):
1456 1621 file (optional, default: ``False``).
1457 1622 """
1458 1623 self.fhandle = open(mofile, 'rb')
1459 self.instance = MOFile(
1624
1625 klass = kwargs.get('klass')
1626 if klass is None:
1627 klass = MOFile
1628 self.instance = klass(
1460 1629 fpath=mofile,
1461 1630 encoding=kwargs.get('encoding', default_encoding),
1462 1631 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1463 1632 )
1464 1633
1634 def __del__(self):
1635 """
1636 Make sure the file is closed, this prevents warnings on unclosed file
1637 when running tests with python >= 3.2.
1638 """
1639 if self.fhandle:
1640 self.fhandle.close()
1641
1465 1642 def parse(self):
1466 1643 """
1467 1644 Build the instance with the file handle provided in the
@@ -1469,15 +1646,20 class _MOFileParser(object):
1469 1646 """
1470 1647 # parse magic number
1471 1648 magic_number = self._readbinary('<I', 4)
1472 if magic_number == MOFile.LITTLE_ENDIAN:
1649 if magic_number == MOFile.MAGIC:
1473 1650 ii = '<II'
1474 elif magic_number == MOFile.BIG_ENDIAN:
1651 elif magic_number == MOFile.MAGIC_SWAPPED:
1475 1652 ii = '>II'
1476 1653 else:
1477 1654 raise IOError('Invalid mo file, magic number is incorrect !')
1478 1655 self.instance.magic_number = magic_number
1479 1656 # parse the version number and the number of strings
1480 self.instance.version, numofstrings = self._readbinary(ii, 8)
1657 version, numofstrings = self._readbinary(ii, 8)
1658 # from MO file format specs: "A program seeing an unexpected major
1659 # revision number should stop reading the MO file entirely"
1660 if version not in (0, 1):
1661 raise IOError('Invalid mo file, unexpected major revision number')
1662 self.instance.version = version
1481 1663 # original strings and translation strings hash table offset
1482 1664 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1483 1665 # move to msgid hash table and read length and offset of msgids
@@ -1491,29 +1673,34 class _MOFileParser(object):
1491 1673 for i in range(numofstrings):
1492 1674 msgstrs_index.append(self._readbinary(ii, 8))
1493 1675 # build entries
1676 encoding = self.instance.encoding
1494 1677 for i in range(numofstrings):
1495 1678 self.fhandle.seek(msgids_index[i][1])
1496 1679 msgid = self.fhandle.read(msgids_index[i][0])
1680
1497 1681 self.fhandle.seek(msgstrs_index[i][1])
1498 1682 msgstr = self.fhandle.read(msgstrs_index[i][0])
1499 if i == 0: # metadata
1500 raw_metadata, metadata = msgstr.split('\n'), {}
1683 if i == 0 and not msgid: # metadata
1684 raw_metadata, metadata = msgstr.split(b('\n')), {}
1501 1685 for line in raw_metadata:
1502 tokens = line.split(':', 1)
1503 if tokens[0] != '':
1686 tokens = line.split(b(':'), 1)
1687 if tokens[0] != b(''):
1504 1688 try:
1505 metadata[tokens[0]] = tokens[1].strip()
1689 k = tokens[0].decode(encoding)
1690 v = tokens[1].decode(encoding)
1691 metadata[k] = v.strip()
1506 1692 except IndexError:
1507 metadata[tokens[0]] = ''
1693 metadata[k] = u('')
1508 1694 self.instance.metadata = metadata
1509 1695 continue
1510 1696 # test if we have a plural entry
1511 msgid_tokens = msgid.split('\0')
1697 msgid_tokens = msgid.split(b('\0'))
1512 1698 if len(msgid_tokens) > 1:
1513 1699 entry = self._build_entry(
1514 1700 msgid=msgid_tokens[0],
1515 1701 msgid_plural=msgid_tokens[1],
1516 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1702 msgstr_plural=dict((k, v) for k, v in
1703 enumerate(msgstr.split(b('\0'))))
1517 1704 )
1518 1705 else:
1519 1706 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
@@ -1524,19 +1711,22 class _MOFileParser(object):
1524 1711
1525 1712 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1526 1713 msgstr_plural=None):
1527 msgctxt_msgid = msgid.split('\x04')
1714 msgctxt_msgid = msgid.split(b('\x04'))
1715 encoding = self.instance.encoding
1528 1716 if len(msgctxt_msgid) > 1:
1529 1717 kwargs = {
1530 'msgctxt': msgctxt_msgid[0],
1531 'msgid' : msgctxt_msgid[1],
1718 'msgctxt': msgctxt_msgid[0].decode(encoding),
1719 'msgid': msgctxt_msgid[1].decode(encoding),
1532 1720 }
1533 1721 else:
1534 kwargs = {'msgid': msgid}
1722 kwargs = {'msgid': msgid.decode(encoding)}
1535 1723 if msgstr:
1536 kwargs['msgstr'] = msgstr
1724 kwargs['msgstr'] = msgstr.decode(encoding)
1537 1725 if msgid_plural:
1538 kwargs['msgid_plural'] = msgid_plural
1726 kwargs['msgid_plural'] = msgid_plural.decode(encoding)
1539 1727 if msgstr_plural:
1728 for k in msgstr_plural:
1729 msgstr_plural[k] = msgstr_plural[k].decode(encoding)
1540 1730 kwargs['msgstr_plural'] = msgstr_plural
1541 1731 return MOEntry(**kwargs)
1542 1732
@@ -1550,5 +1740,99 class _MOFileParser(object):
1550 1740 if len(tup) == 1:
1551 1741 return tup[0]
1552 1742 return tup
1743 # }}}
1744 # class TextWrapper {{{
1745
1746
1747 class TextWrapper(textwrap.TextWrapper):
1748 """
1749 Subclass of textwrap.TextWrapper that backport the
1750 drop_whitespace option.
1751 """
1752 def __init__(self, *args, **kwargs):
1753 drop_whitespace = kwargs.pop('drop_whitespace', True)
1754 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1755 self.drop_whitespace = drop_whitespace
1756
1757 def _wrap_chunks(self, chunks):
1758 """_wrap_chunks(chunks : [string]) -> [string]
1759
1760 Wrap a sequence of text chunks and return a list of lines of
1761 length 'self.width' or less. (If 'break_long_words' is false,
1762 some lines may be longer than this.) Chunks correspond roughly
1763 to words and the whitespace between them: each chunk is
1764 indivisible (modulo 'break_long_words'), but a line break can
1765 come between any two chunks. Chunks should not have internal
1766 whitespace; ie. a chunk is either all whitespace or a "word".
1767 Whitespace chunks will be removed from the beginning and end of
1768 lines, but apart from that whitespace is preserved.
1769 """
1770 lines = []
1771 if self.width <= 0:
1772 raise ValueError("invalid width %r (must be > 0)" % self.width)
1773
1774 # Arrange in reverse order so items can be efficiently popped
1775 # from a stack of chucks.
1776 chunks.reverse()
1777
1778 while chunks:
1779
1780 # Start the list of chunks that will make up the current line.
1781 # cur_len is just the length of all the chunks in cur_line.
1782 cur_line = []
1783 cur_len = 0
1784
1785 # Figure out which static string will prefix this line.
1786 if lines:
1787 indent = self.subsequent_indent
1788 else:
1789 indent = self.initial_indent
1790
1791 # Maximum width for this line.
1792 width = self.width - len(indent)
1793
1794 # First chunk on line is whitespace -- drop it, unless this
1795 # is the very beginning of the text (ie. no lines started yet).
1796 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1797 del chunks[-1]
1798
1799 while chunks:
1800 l = len(chunks[-1])
1801
1802 # Can at least squeeze this chunk onto the current line.
1803 if cur_len + l <= width:
1804 cur_line.append(chunks.pop())
1805 cur_len += l
1806
1807 # Nope, this line is full.
1808 else:
1809 break
1810
1811 # The current line is full, and the next chunk is too big to
1812 # fit on *any* line (not just this one).
1813 if chunks and len(chunks[-1]) > width:
1814 self._handle_long_word(chunks, cur_line, cur_len, width)
1815
1816 # If the last chunk on this line is all whitespace, drop it.
1817 if self.drop_whitespace and cur_line and not cur_line[-1].strip():
1818 del cur_line[-1]
1819
1820 # Convert current line back to a string and store it in list
1821 # of all lines (return value).
1822 if cur_line:
1823 lines.append(indent + ''.join(cur_line))
1824
1825 return lines
1826 # }}}
1827 # function wrap() {{{
1828
1829
1830 def wrap(text, width=70, **kwargs):
1831 """
1832 Wrap a single paragraph of text, returning a list of wrapped lines.
1833 """
1834 if sys.version_info < (2, 6):
1835 return TextWrapper(width=width, **kwargs).wrap(text)
1836 return textwrap.wrap(text, width=width, **kwargs)
1553 1837
1554 1838 # }}}
General Comments 0
You need to be logged in to leave comments. Login now