##// END OF EJS Templates
polib: update to latest release 1.0.7 (upstream rev d75ce6dbbc2a)...
Augie Fackler -
r40221:19fc5a98 default
parent child Browse files
Show More
@@ -224,14 +224,6 b' be separated by whitespaces::'
224 failures, tests = doctest.testmod()
224 failures, tests = doctest.testmod()
225 sys.exit(failures and 1 or 0)
225 sys.exit(failures and 1 or 0)
226
226
227 # replace polib._POFileParser to show linenum of problematic msgstr
228 class ExtPOFileParser(polib._POFileParser):
229 def process(self, symbol, linenum):
230 super(ExtPOFileParser, self).process(symbol, linenum)
231 if symbol == 'MS': # msgstr
232 self.current_entry.linenum = linenum
233 polib._POFileParser = ExtPOFileParser
234
235 detected = []
227 detected = []
236 warning = options.warning
228 warning = options.warning
237 for f in args:
229 for f in args:
This diff has been collapsed as it changes many lines, (818 lines changed) Show them Hide them
@@ -1,5 +1,5 b''
1 # -*- coding: utf-8 -*-
2 # no-check-code
1 # no-check-code
2 # -* coding: utf-8 -*-
3 #
3 #
4 # License: MIT (see LICENSE file provided)
4 # License: MIT (see LICENSE file provided)
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
@@ -15,10 +15,10 b' modify entries, comments or metadata, et'
15
15
16 from __future__ import absolute_import
16 from __future__ import absolute_import
17
17
18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
19 __version__ = '0.6.4'
19 __version__ = '1.0.7'
20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
21 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
21 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
22
22
23 import array
23 import array
24 import codecs
24 import codecs
@@ -27,14 +27,47 b' import re'
27 import struct
27 import struct
28 import sys
28 import sys
29 import textwrap
29 import textwrap
30 import types
30
31 try:
32 import io
33 except ImportError:
34 # replacement of io.open() for python < 2.6
35 # we use codecs instead
36 class io(object):
37 @staticmethod
38 def open(fpath, mode='r', encoding=None):
39 return codecs.open(fpath, mode, encoding)
31
40
32
41
33 # the default encoding to use when encoding cannot be detected
42 # the default encoding to use when encoding cannot be detected
34 default_encoding = 'utf-8'
43 default_encoding = 'utf-8'
35
44
45 # python 2/3 compatibility helpers {{{
46
47
48 if sys.version_info[:2] < (3, 0):
49 PY3 = False
50 text_type = unicode
51
52 def b(s):
53 return s
54
55 def u(s):
56 return unicode(s, "unicode_escape")
57
58 else:
59 PY3 = True
60 text_type = str
61
62 def b(s):
63 return s.encode("latin-1")
64
65 def u(s):
66 return s
67 # }}}
36 # _pofile_or_mofile {{{
68 # _pofile_or_mofile {{{
37
69
70
38 def _pofile_or_mofile(f, type, **kwargs):
71 def _pofile_or_mofile(f, type, **kwargs):
39 """
72 """
40 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
73 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
@@ -50,15 +83,34 b' def _pofile_or_mofile(f, type, **kwargs)'
50 parser = kls(
83 parser = kls(
51 f,
84 f,
52 encoding=enc,
85 encoding=enc,
53 check_for_duplicates=kwargs.get('check_for_duplicates', False)
86 check_for_duplicates=kwargs.get('check_for_duplicates', False),
87 klass=kwargs.get('klass')
54 )
88 )
55 instance = parser.parse()
89 instance = parser.parse()
56 instance.wrapwidth = kwargs.get('wrapwidth', 78)
90 instance.wrapwidth = kwargs.get('wrapwidth', 78)
57 return instance
91 return instance
92 # }}}
93 # _is_file {{{
58
94
95
96 def _is_file(filename_or_contents):
97 """
98 Safely returns the value of os.path.exists(filename_or_contents).
99
100 Arguments:
101
102 ``filename_or_contents``
103 either a filename, or a string holding the contents of some file.
104 In the latter case, this function will always return False.
105 """
106 try:
107 return os.path.exists(filename_or_contents)
108 except (ValueError, UnicodeEncodeError):
109 return False
59 # }}}
110 # }}}
60 # function pofile() {{{
111 # function pofile() {{{
61
112
113
62 def pofile(pofile, **kwargs):
114 def pofile(pofile, **kwargs):
63 """
115 """
64 Convenience function that parses the po or pot file ``pofile`` and returns
116 Convenience function that parses the po or pot file ``pofile`` and returns
@@ -80,12 +132,17 b' def pofile(pofile, **kwargs):'
80 ``check_for_duplicates``
132 ``check_for_duplicates``
81 whether to check for duplicate entries when adding entries to the
133 whether to check for duplicate entries when adding entries to the
82 file (optional, default: ``False``).
134 file (optional, default: ``False``).
135
136 ``klass``
137 class which is used to instantiate the return value (optional,
138 default: ``None``, the return value with be a :class:`~polib.POFile`
139 instance).
83 """
140 """
84 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
141 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
85
86 # }}}
142 # }}}
87 # function mofile() {{{
143 # function mofile() {{{
88
144
145
89 def mofile(mofile, **kwargs):
146 def mofile(mofile, **kwargs):
90 """
147 """
91 Convenience function that parses the mo file ``mofile`` and returns a
148 Convenience function that parses the mo file ``mofile`` and returns a
@@ -108,12 +165,17 b' def mofile(mofile, **kwargs):'
108 ``check_for_duplicates``
165 ``check_for_duplicates``
109 whether to check for duplicate entries when adding entries to the
166 whether to check for duplicate entries when adding entries to the
110 file (optional, default: ``False``).
167 file (optional, default: ``False``).
168
169 ``klass``
170 class which is used to instantiate the return value (optional,
171 default: ``None``, the return value with be a :class:`~polib.POFile`
172 instance).
111 """
173 """
112 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
174 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
113
114 # }}}
175 # }}}
115 # function detect_encoding() {{{
176 # function detect_encoding() {{{
116
177
178
117 def detect_encoding(file, binary_mode=False):
179 def detect_encoding(file, binary_mode=False):
118 """
180 """
119 Try to detect the encoding used by the ``file``. The ``file`` argument can
181 Try to detect the encoding used by the ``file``. The ``file`` argument can
@@ -129,7 +191,9 b' def detect_encoding(file, binary_mode=Fa'
129 ``binary_mode``
191 ``binary_mode``
130 boolean, set this to True if ``file`` is a mo file.
192 boolean, set this to True if ``file`` is a mo file.
131 """
193 """
132 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
194 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
195 rxt = re.compile(u(PATTERN))
196 rxb = re.compile(b(PATTERN))
133
197
134 def charset_exists(charset):
198 def charset_exists(charset):
135 """Check whether ``charset`` is valid or not."""
199 """Check whether ``charset`` is valid or not."""
@@ -139,31 +203,36 b' def detect_encoding(file, binary_mode=Fa'
139 return False
203 return False
140 return True
204 return True
141
205
142 if not os.path.exists(file):
206 if not _is_file(file):
143 match = rx.search(file)
207 match = rxt.search(file)
144 if match:
208 if match:
145 enc = match.group(1).strip()
209 enc = match.group(1).strip()
146 if charset_exists(enc):
210 if charset_exists(enc):
147 return enc
211 return enc
148 else:
212 else:
149 if binary_mode:
213 # For PY3, always treat as binary
214 if binary_mode or PY3:
150 mode = 'rb'
215 mode = 'rb'
216 rx = rxb
151 else:
217 else:
152 mode = 'r'
218 mode = 'r'
219 rx = rxt
153 f = open(file, mode)
220 f = open(file, mode)
154 for l in f.readlines():
221 for l in f.readlines():
155 match = rx.search(l)
222 match = rx.search(l)
156 if match:
223 if match:
157 f.close()
224 f.close()
158 enc = match.group(1).strip()
225 enc = match.group(1).strip()
226 if not isinstance(enc, text_type):
227 enc = enc.decode('utf-8')
159 if charset_exists(enc):
228 if charset_exists(enc):
160 return enc
229 return enc
161 f.close()
230 f.close()
162 return default_encoding
231 return default_encoding
163
164 # }}}
232 # }}}
165 # function escape() {{{
233 # function escape() {{{
166
234
235
167 def escape(st):
236 def escape(st):
168 """
237 """
169 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
238 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
@@ -174,10 +243,10 b' def escape(st):'
174 .replace('\r', r'\r')\
243 .replace('\r', r'\r')\
175 .replace('\n', r'\n')\
244 .replace('\n', r'\n')\
176 .replace('\"', r'\"')
245 .replace('\"', r'\"')
177
178 # }}}
246 # }}}
179 # function unescape() {{{
247 # function unescape() {{{
180
248
249
181 def unescape(st):
250 def unescape(st):
182 """
251 """
183 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
252 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
@@ -193,12 +262,12 b' def unescape(st):'
193 return '\r'
262 return '\r'
194 if m == '\\':
263 if m == '\\':
195 return '\\'
264 return '\\'
196 return m # handles escaped double quote
265 return m # handles escaped double quote
197 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
266 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
198
199 # }}}
267 # }}}
200 # class _BaseFile {{{
268 # class _BaseFile {{{
201
269
270
202 class _BaseFile(list):
271 class _BaseFile(list):
203 """
272 """
204 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
273 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
@@ -227,7 +296,7 b' class _BaseFile(list):'
227 list.__init__(self)
296 list.__init__(self)
228 # the opened file handle
297 # the opened file handle
229 pofile = kwargs.get('pofile', None)
298 pofile = kwargs.get('pofile', None)
230 if pofile and os.path.exists(pofile):
299 if pofile and _is_file(pofile):
231 self.fpath = pofile
300 self.fpath = pofile
232 else:
301 else:
233 self.fpath = kwargs.get('fpath')
302 self.fpath = kwargs.get('fpath')
@@ -254,38 +323,45 b' class _BaseFile(list):'
254 ret.append(entry.__unicode__(self.wrapwidth))
323 ret.append(entry.__unicode__(self.wrapwidth))
255 for entry in self.obsolete_entries():
324 for entry in self.obsolete_entries():
256 ret.append(entry.__unicode__(self.wrapwidth))
325 ret.append(entry.__unicode__(self.wrapwidth))
257 ret = '\n'.join(ret)
326 ret = u('\n').join(ret)
258
327
259 if type(ret) != types.UnicodeType:
328 assert isinstance(ret, text_type)
260 return unicode(ret, self.encoding)
329 #if type(ret) != text_type:
330 # return unicode(ret, self.encoding)
261 return ret
331 return ret
262
332
263 def __str__(self):
333 if PY3:
264 """
334 def __str__(self):
265 Returns the string representation of the file.
335 return self.__unicode__()
266 """
336 else:
267 return unicode(self).encode(self.encoding)
337 def __str__(self):
338 """
339 Returns the string representation of the file.
340 """
341 return unicode(self).encode(self.encoding)
268
342
269 def __contains__(self, entry):
343 def __contains__(self, entry):
270 """
344 """
271 Overriden ``list`` method to implement the membership test (in and
345 Overridden ``list`` method to implement the membership test (in and
272 not in).
346 not in).
273 The method considers that an entry is in the file if it finds an entry
347 The method considers that an entry is in the file if it finds an entry
274 that has the same msgid (the test is **case sensitive**).
348 that has the same msgid (the test is **case sensitive**) and the same
349 msgctxt (or none for both entries).
275
350
276 Argument:
351 Argument:
277
352
278 ``entry``
353 ``entry``
279 an instance of :class:`~polib._BaseEntry`.
354 an instance of :class:`~polib._BaseEntry`.
280 """
355 """
281 return self.find(entry.msgid, by='msgid') is not None
356 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
357 is not None
282
358
283 def __eq__(self, other):
359 def __eq__(self, other):
284 return unicode(self) == unicode(other)
360 return str(self) == str(other)
285
361
286 def append(self, entry):
362 def append(self, entry):
287 """
363 """
288 Overriden method to check for duplicates entries, if a user tries to
364 Overridden method to check for duplicates entries, if a user tries to
289 add an entry that is already in the file, the method will raise a
365 add an entry that is already in the file, the method will raise a
290 ``ValueError`` exception.
366 ``ValueError`` exception.
291
367
@@ -300,7 +376,7 b' class _BaseFile(list):'
300
376
301 def insert(self, index, entry):
377 def insert(self, index, entry):
302 """
378 """
303 Overriden method to check for duplicates entries, if a user tries to
379 Overridden method to check for duplicates entries, if a user tries to
304 add an entry that is already in the file, the method will raise a
380 add an entry that is already in the file, the method will raise a
305 ``ValueError`` exception.
381 ``ValueError`` exception.
306
382
@@ -332,7 +408,7 b' class _BaseFile(list):'
332 e.flags.append('fuzzy')
408 e.flags.append('fuzzy')
333 return e
409 return e
334
410
335 def save(self, fpath=None, repr_method='__str__'):
411 def save(self, fpath=None, repr_method='__unicode__'):
336 """
412 """
337 Saves the po file to ``fpath``.
413 Saves the po file to ``fpath``.
338 If it is an existing file and no ``fpath`` is provided, then the
414 If it is an existing file and no ``fpath`` is provided, then the
@@ -354,8 +430,8 b' class _BaseFile(list):'
354 if repr_method == 'to_binary':
430 if repr_method == 'to_binary':
355 fhandle = open(fpath, 'wb')
431 fhandle = open(fpath, 'wb')
356 else:
432 else:
357 fhandle = codecs.open(fpath, 'w', self.encoding)
433 fhandle = io.open(fpath, 'w', encoding=self.encoding)
358 if type(contents) != types.UnicodeType:
434 if not isinstance(contents, text_type):
359 contents = contents.decode(self.encoding)
435 contents = contents.decode(self.encoding)
360 fhandle.write(contents)
436 fhandle.write(contents)
361 fhandle.close()
437 fhandle.close()
@@ -381,7 +457,7 b' class _BaseFile(list):'
381 boolean, whether to also search in entries that are obsolete.
457 boolean, whether to also search in entries that are obsolete.
382
458
383 ``msgctxt``
459 ``msgctxt``
384 string, allows to specify a specific message context for the
460 string, allows specifying a specific message context for the
385 search.
461 search.
386 """
462 """
387 if include_obsolete_entries:
463 if include_obsolete_entries:
@@ -390,7 +466,7 b' class _BaseFile(list):'
390 entries = [e for e in self if not e.obsolete]
466 entries = [e for e in self if not e.obsolete]
391 for e in entries:
467 for e in entries:
392 if getattr(e, by) == st:
468 if getattr(e, by) == st:
393 if msgctxt and e.msgctxt != msgctxt:
469 if msgctxt is not False and e.msgctxt != msgctxt:
394 continue
470 continue
395 return e
471 return e
396 return None
472 return None
@@ -412,7 +488,9 b' class _BaseFile(list):'
412 'Language-Team',
488 'Language-Team',
413 'MIME-Version',
489 'MIME-Version',
414 'Content-Type',
490 'Content-Type',
415 'Content-Transfer-Encoding'
491 'Content-Transfer-Encoding',
492 'Language',
493 'Plural-Forms'
416 ]
494 ]
417 ordered_data = []
495 ordered_data = []
418 for data in data_order:
496 for data in data_order:
@@ -423,9 +501,7 b' class _BaseFile(list):'
423 pass
501 pass
424 # the rest of the metadata will be alphabetically ordered since there
502 # the rest of the metadata will be alphabetically ordered since there
425 # are no specs for this AFAIK
503 # are no specs for this AFAIK
426 keys = metadata.keys()
504 for data in sorted(metadata.keys()):
427 keys.sort()
428 for data in keys:
429 value = metadata[data]
505 value = metadata[data]
430 ordered_data.append((data, value))
506 ordered_data.append((data, value))
431 return ordered_data
507 return ordered_data
@@ -436,18 +512,12 b' class _BaseFile(list):'
436 """
512 """
437 offsets = []
513 offsets = []
438 entries = self.translated_entries()
514 entries = self.translated_entries()
515
439 # the keys are sorted in the .mo file
516 # the keys are sorted in the .mo file
440 def cmp(_self, other):
517 def cmp(_self, other):
441 # msgfmt compares entries with msgctxt if it exists
518 # msgfmt compares entries with msgctxt if it exists
442 if _self.msgctxt:
519 self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
443 self_msgid = _self.msgctxt
520 other_msgid = other.msgctxt and other.msgctxt or other.msgid
444 else:
445 self_msgid = _self.msgid
446
447 if other.msgctxt:
448 other_msgid = other.msgctxt
449 else:
450 other_msgid = other.msgid
451 if self_msgid > other_msgid:
521 if self_msgid > other_msgid:
452 return 1
522 return 1
453 elif self_msgid < other_msgid:
523 elif self_msgid < other_msgid:
@@ -455,25 +525,23 b' class _BaseFile(list):'
455 else:
525 else:
456 return 0
526 return 0
457 # add metadata entry
527 # add metadata entry
458 entries.sort(cmp)
528 entries.sort(key=lambda o: o.msgctxt or o.msgid)
459 mentry = self.metadata_as_entry()
529 mentry = self.metadata_as_entry()
460 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
530 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
461 entries = [mentry] + entries
531 entries = [mentry] + entries
462 entries_len = len(entries)
532 entries_len = len(entries)
463 ids, strs = '', ''
533 ids, strs = b(''), b('')
464 for e in entries:
534 for e in entries:
465 # For each string, we need size and file offset. Each string is
535 # For each string, we need size and file offset. Each string is
466 # NUL terminated; the NUL does not count into the size.
536 # NUL terminated; the NUL does not count into the size.
467 msgid = ''
537 msgid = b('')
468 if e.msgctxt:
538 if e.msgctxt:
469 # Contexts are stored by storing the concatenation of the
539 # Contexts are stored by storing the concatenation of the
470 # context, a <EOT> byte, and the original string
540 # context, a <EOT> byte, and the original string
471 msgid = self._encode(e.msgctxt + '\4')
541 msgid = self._encode(e.msgctxt + '\4')
472 if e.msgid_plural:
542 if e.msgid_plural:
473 indexes = e.msgstr_plural.keys()
474 indexes.sort()
475 msgstr = []
543 msgstr = []
476 for index in indexes:
544 for index in sorted(e.msgstr_plural.keys()):
477 msgstr.append(e.msgstr_plural[index])
545 msgstr.append(e.msgstr_plural[index])
478 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
546 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
479 msgstr = self._encode('\0'.join(msgstr))
547 msgstr = self._encode('\0'.join(msgstr))
@@ -481,11 +549,11 b' class _BaseFile(list):'
481 msgid += self._encode(e.msgid)
549 msgid += self._encode(e.msgid)
482 msgstr = self._encode(e.msgstr)
550 msgstr = self._encode(e.msgstr)
483 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
551 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
484 ids += msgid + '\0'
552 ids += msgid + b('\0')
485 strs += msgstr + '\0'
553 strs += msgstr + b('\0')
486
554
487 # The header is 7 32-bit unsigned integers.
555 # The header is 7 32-bit unsigned integers.
488 keystart = 7*4+16*entries_len
556 keystart = 7 * 4 + 16 * entries_len
489 # and the values start after the keys
557 # and the values start after the keys
490 valuestart = keystart + len(ids)
558 valuestart = keystart + len(ids)
491 koffsets = []
559 koffsets = []
@@ -493,26 +561,30 b' class _BaseFile(list):'
493 # The string table first has the list of keys, then the list of values.
561 # The string table first has the list of keys, then the list of values.
494 # Each entry has first the size of the string, then the file offset.
562 # Each entry has first the size of the string, then the file offset.
495 for o1, l1, o2, l2 in offsets:
563 for o1, l1, o2, l2 in offsets:
496 koffsets += [l1, o1+keystart]
564 koffsets += [l1, o1 + keystart]
497 voffsets += [l2, o2+valuestart]
565 voffsets += [l2, o2 + valuestart]
498 offsets = koffsets + voffsets
566 offsets = koffsets + voffsets
499 # check endianness for magic number
500 if struct.pack('@h', 1) == struct.pack('<h', 1):
501 magic_number = MOFile.LITTLE_ENDIAN
502 else:
503 magic_number = MOFile.BIG_ENDIAN
504
567
505 output = struct.pack(
568 output = struct.pack(
506 "Iiiiiii",
569 "Iiiiiii",
507 magic_number, # Magic number
570 # Magic number
508 0, # Version
571 MOFile.MAGIC,
509 entries_len, # # of entries
572 # Version
510 7*4, # start of key index
573 0,
511 7*4+entries_len*8, # start of value index
574 # number of entries
512 0, keystart # size and offset of hash table
575 entries_len,
513 # Important: we don't use hash tables
576 # start of key index
577 7 * 4,
578 # start of value index
579 7 * 4 + entries_len * 8,
580 # size and offset of hash table, we don't use hash tables
581 0, keystart
582
514 )
583 )
515 output += array.array("i", offsets).tostring()
584 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
585 output += array.array("i", offsets).tobytes()
586 else:
587 output += array.array("i", offsets).tostring()
516 output += ids
588 output += ids
517 output += strs
589 output += strs
518 return output
590 return output
@@ -522,13 +594,13 b' class _BaseFile(list):'
522 Encodes the given ``mixed`` argument with the file encoding if and
594 Encodes the given ``mixed`` argument with the file encoding if and
523 only if it's an unicode string and returns the encoded string.
595 only if it's an unicode string and returns the encoded string.
524 """
596 """
525 if type(mixed) == types.UnicodeType:
597 if isinstance(mixed, text_type):
526 return mixed.encode(self.encoding)
598 mixed = mixed.encode(self.encoding)
527 return mixed
599 return mixed
528
529 # }}}
600 # }}}
530 # class POFile {{{
601 # class POFile {{{
531
602
603
532 class POFile(_BaseFile):
604 class POFile(_BaseFile):
533 """
605 """
534 Po (or Pot) file reader/writer.
606 Po (or Pot) file reader/writer.
@@ -542,13 +614,15 b' class POFile(_BaseFile):'
542 """
614 """
543 ret, headers = '', self.header.split('\n')
615 ret, headers = '', self.header.split('\n')
544 for header in headers:
616 for header in headers:
545 if header[:1] in [',', ':']:
617 if not len(header):
618 ret += "#\n"
619 elif header[:1] in [',', ':']:
546 ret += '#%s\n' % header
620 ret += '#%s\n' % header
547 else:
621 else:
548 ret += '# %s\n' % header
622 ret += '# %s\n' % header
549
623
550 if type(ret) != types.UnicodeType:
624 if not isinstance(ret, text_type):
551 ret = unicode(ret, self.encoding)
625 ret = ret.decode(self.encoding)
552
626
553 return ret + _BaseFile.__unicode__(self)
627 return ret + _BaseFile.__unicode__(self)
554
628
@@ -572,7 +646,7 b' class POFile(_BaseFile):'
572 if total == 0:
646 if total == 0:
573 return 100
647 return 100
574 translated = len(self.translated_entries())
648 translated = len(self.translated_entries())
575 return int((100.00 / float(total)) * translated)
649 return int(translated * 100 / float(total))
576
650
577 def translated_entries(self):
651 def translated_entries(self):
578 """
652 """
@@ -584,7 +658,7 b' class POFile(_BaseFile):'
584 """
658 """
585 Convenience method that returns the list of untranslated entries.
659 Convenience method that returns the list of untranslated entries.
586 """
660 """
587 return [e for e in self if not e.translated() and not e.obsolete \
661 return [e for e in self if not e.translated() and not e.obsolete
588 and not 'fuzzy' in e.flags]
662 and not 'fuzzy' in e.flags]
589
663
590 def fuzzy_entries(self):
664 def fuzzy_entries(self):
@@ -615,28 +689,32 b' class POFile(_BaseFile):'
615 ``refpot``
689 ``refpot``
616 object POFile, the reference catalog.
690 object POFile, the reference catalog.
617 """
691 """
692 # Store entries in dict/set for faster access
693 self_entries = dict((entry.msgid, entry) for entry in self)
694 refpot_msgids = set(entry.msgid for entry in refpot)
695 # Merge entries that are in the refpot
618 for entry in refpot:
696 for entry in refpot:
619 e = self.find(entry.msgid, include_obsolete_entries=True)
697 e = self_entries.get(entry.msgid)
620 if e is None:
698 if e is None:
621 e = POEntry()
699 e = POEntry()
622 self.append(e)
700 self.append(e)
623 e.merge(entry)
701 e.merge(entry)
624 # ok, now we must "obsolete" entries that are not in the refpot anymore
702 # ok, now we must "obsolete" entries that are not in the refpot anymore
625 for entry in self:
703 for entry in self:
626 if refpot.find(entry.msgid) is None:
704 if entry.msgid not in refpot_msgids:
627 entry.obsolete = True
705 entry.obsolete = True
628
629 # }}}
706 # }}}
630 # class MOFile {{{
707 # class MOFile {{{
631
708
709
632 class MOFile(_BaseFile):
710 class MOFile(_BaseFile):
633 """
711 """
634 Mo file reader/writer.
712 Mo file reader/writer.
635 This class inherits the :class:`~polib._BaseFile` class and, by
713 This class inherits the :class:`~polib._BaseFile` class and, by
636 extension, the python ``list`` type.
714 extension, the python ``list`` type.
637 """
715 """
638 BIG_ENDIAN = 0xde120495
716 MAGIC = 0x950412de
639 LITTLE_ENDIAN = 0x950412de
717 MAGIC_SWAPPED = 0xde120495
640
718
641 def __init__(self, *args, **kwargs):
719 def __init__(self, *args, **kwargs):
642 """
720 """
@@ -698,10 +776,10 b' class MOFile(_BaseFile):'
698 Convenience method to keep the same interface with POFile instances.
776 Convenience method to keep the same interface with POFile instances.
699 """
777 """
700 return []
778 return []
701
702 # }}}
779 # }}}
703 # class _BaseEntry {{{
780 # class _BaseEntry {{{
704
781
782
705 class _BaseEntry(object):
783 class _BaseEntry(object):
706 """
784 """
707 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
785 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
@@ -753,12 +831,14 b' class _BaseEntry(object):'
753 ret = []
831 ret = []
754 # write the msgctxt if any
832 # write the msgctxt if any
755 if self.msgctxt is not None:
833 if self.msgctxt is not None:
756 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
834 ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
835 wrapwidth)
757 # write the msgid
836 # write the msgid
758 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
837 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
759 # write the msgid_plural if any
838 # write the msgid_plural if any
760 if self.msgid_plural:
839 if self.msgid_plural:
761 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
840 ret += self._str_field("msgid_plural", delflag, "",
841 self.msgid_plural, wrapwidth)
762 if self.msgstr_plural:
842 if self.msgstr_plural:
763 # write the msgstr_plural if any
843 # write the msgstr_plural if any
764 msgstrs = self.msgstr_plural
844 msgstrs = self.msgstr_plural
@@ -767,30 +847,34 b' class _BaseEntry(object):'
767 for index in keys:
847 for index in keys:
768 msgstr = msgstrs[index]
848 msgstr = msgstrs[index]
769 plural_index = '[%s]' % index
849 plural_index = '[%s]' % index
770 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
850 ret += self._str_field("msgstr", delflag, plural_index, msgstr,
851 wrapwidth)
771 else:
852 else:
772 # otherwise write the msgstr
853 # otherwise write the msgstr
773 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
854 ret += self._str_field("msgstr", delflag, "", self.msgstr,
855 wrapwidth)
774 ret.append('')
856 ret.append('')
775 ret = '\n'.join(ret)
857 ret = u('\n').join(ret)
776
777 if type(ret) != types.UnicodeType:
778 return unicode(ret, self.encoding)
779 return ret
858 return ret
780
859
781 def __str__(self):
860 if PY3:
782 """
861 def __str__(self):
783 Returns the string representation of the entry.
862 return self.__unicode__()
784 """
863 else:
785 return unicode(self).encode(self.encoding)
864 def __str__(self):
865 """
866 Returns the string representation of the entry.
867 """
868 return unicode(self).encode(self.encoding)
786
869
787 def __eq__(self, other):
870 def __eq__(self, other):
788 return unicode(self) == unicode(other)
871 return str(self) == str(other)
789
872
790 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
873 def _str_field(self, fieldname, delflag, plural_index, field,
874 wrapwidth=78):
791 lines = field.splitlines(True)
875 lines = field.splitlines(True)
792 if len(lines) > 1:
876 if len(lines) > 1:
793 lines = [''] + lines # start with initial empty line
877 lines = [''] + lines # start with initial empty line
794 else:
878 else:
795 escaped_field = escape(field)
879 escaped_field = escape(field)
796 specialchars_count = 0
880 specialchars_count = 0
@@ -804,9 +888,9 b' class _BaseEntry(object):'
804 real_wrapwidth = wrapwidth - flength + specialchars_count
888 real_wrapwidth = wrapwidth - flength + specialchars_count
805 if wrapwidth > 0 and len(field) > real_wrapwidth:
889 if wrapwidth > 0 and len(field) > real_wrapwidth:
806 # Wrap the line but take field name into account
890 # Wrap the line but take field name into account
807 lines = [''] + [unescape(item) for item in textwrap.wrap(
891 lines = [''] + [unescape(item) for item in wrap(
808 escaped_field,
892 escaped_field,
809 wrapwidth - 2, # 2 for quotes ""
893 wrapwidth - 2, # 2 for quotes ""
810 drop_whitespace=False,
894 drop_whitespace=False,
811 break_long_words=False
895 break_long_words=False
812 )]
896 )]
@@ -818,13 +902,13 b' class _BaseEntry(object):'
818
902
819 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
903 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
820 escape(lines.pop(0)))]
904 escape(lines.pop(0)))]
821 for mstr in lines:
905 for line in lines:
822 ret.append('%s"%s"' % (delflag, escape(mstr)))
906 ret.append('%s"%s"' % (delflag, escape(line)))
823 return ret
907 return ret
824
825 # }}}
908 # }}}
826 # class POEntry {{{
909 # class POEntry {{{
827
910
911
828 class POEntry(_BaseEntry):
912 class POEntry(_BaseEntry):
829 """
913 """
830 Represents a po file entry.
914 Represents a po file entry.
@@ -854,6 +938,9 b' class POEntry(_BaseEntry):'
854
938
855 ``previous_msgid_plural``
939 ``previous_msgid_plural``
856 string, the entry previous msgid_plural.
940 string, the entry previous msgid_plural.
941
942 ``linenum``
943 integer, the line number of the entry
857 """
944 """
858 _BaseEntry.__init__(self, *args, **kwargs)
945 _BaseEntry.__init__(self, *args, **kwargs)
859 self.comment = kwargs.get('comment', '')
946 self.comment = kwargs.get('comment', '')
@@ -863,6 +950,7 b' class POEntry(_BaseEntry):'
863 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
950 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
864 self.previous_msgid = kwargs.get('previous_msgid', None)
951 self.previous_msgid = kwargs.get('previous_msgid', None)
865 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
952 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
953 self.linenum = kwargs.get('linenum', None)
866
954
867 def __unicode__(self, wrapwidth=78):
955 def __unicode__(self, wrapwidth=78):
868 """
956 """
@@ -879,7 +967,7 b' class POEntry(_BaseEntry):'
879 if val:
967 if val:
880 for comment in val.split('\n'):
968 for comment in val.split('\n'):
881 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
969 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
882 ret += textwrap.wrap(
970 ret += wrap(
883 comment,
971 comment,
884 wrapwidth,
972 wrapwidth,
885 initial_indent=c[1],
973 initial_indent=c[1],
@@ -903,7 +991,7 b' class POEntry(_BaseEntry):'
903 # what we want for filenames, so the dirty hack is to
991 # what we want for filenames, so the dirty hack is to
904 # temporally replace hyphens with a char that a file cannot
992 # temporally replace hyphens with a char that a file cannot
905 # contain, like "*"
993 # contain, like "*"
906 ret += [l.replace('*', '-') for l in textwrap.wrap(
994 ret += [l.replace('*', '-') for l in wrap(
907 filestr.replace('-', '*'),
995 filestr.replace('-', '*'),
908 wrapwidth,
996 wrapwidth,
909 initial_indent='#: ',
997 initial_indent='#: ',
@@ -918,32 +1006,25 b' class POEntry(_BaseEntry):'
918 ret.append('#, %s' % ', '.join(self.flags))
1006 ret.append('#, %s' % ', '.join(self.flags))
919
1007
920 # previous context and previous msgid/msgid_plural
1008 # previous context and previous msgid/msgid_plural
921 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
1009 fields = ['previous_msgctxt', 'previous_msgid',
1010 'previous_msgid_plural']
922 for f in fields:
1011 for f in fields:
923 val = getattr(self, f)
1012 val = getattr(self, f)
924 if val:
1013 if val:
925 ret += self._str_field(f, "#| ", "", val, wrapwidth)
1014 ret += self._str_field(f, "#| ", "", val, wrapwidth)
926
1015
927 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
1016 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
928 ret = '\n'.join(ret)
1017 ret = u('\n').join(ret)
929
1018
930 if type(ret) != types.UnicodeType:
1019 assert isinstance(ret, text_type)
931 return unicode(ret, self.encoding)
1020 #if type(ret) != types.UnicodeType:
1021 # return unicode(ret, self.encoding)
932 return ret
1022 return ret
933
1023
934 def __cmp__(self, other):
1024 def __cmp__(self, other):
935 """
1025 """
936 Called by comparison operations if rich comparison is not defined.
1026 Called by comparison operations if rich comparison is not defined.
937 """
1027 """
938 def compare_occurrences(a, b):
939 """
940 Compare an entry occurrence with another one.
941 """
942 if a[0] != b[0]:
943 return a[0] < b[0]
944 if a[1] != b[1]:
945 return a[1] < b[1]
946 return 0
947
1028
948 # First: Obsolete test
1029 # First: Obsolete test
949 if self.obsolete != other.obsolete:
1030 if self.obsolete != other.obsolete:
@@ -952,12 +1033,8 b' class POEntry(_BaseEntry):'
952 else:
1033 else:
953 return 1
1034 return 1
954 # Work on a copy to protect original
1035 # Work on a copy to protect original
955 occ1 = self.occurrences[:]
1036 occ1 = sorted(self.occurrences[:])
956 occ2 = other.occurrences[:]
1037 occ2 = sorted(other.occurrences[:])
957 # Sorting using compare method
958 occ1.sort(compare_occurrences)
959 occ2.sort(compare_occurrences)
960 # Comparing sorted occurrences
961 pos = 0
1038 pos = 0
962 for entry1 in occ1:
1039 for entry1 in occ1:
963 try:
1040 try:
@@ -975,9 +1052,41 b' class POEntry(_BaseEntry):'
975 return 1
1052 return 1
976 else:
1053 else:
977 return -1
1054 return -1
1055 # Compare msgid_plural if set
1056 if self.msgid_plural:
1057 if not other.msgid_plural:
1058 return 1
1059 for pos in self.msgid_plural:
1060 if pos not in other.msgid_plural:
1061 return 1
1062 if self.msgid_plural[pos] > other.msgid_plural[pos]:
1063 return 1
1064 if self.msgid_plural[pos] < other.msgid_plural[pos]:
1065 return -1
978 # Finally: Compare message ID
1066 # Finally: Compare message ID
979 if self.msgid > other.msgid: return 1
1067 if self.msgid > other.msgid:
980 else: return -1
1068 return 1
1069 elif self.msgid < other.msgid:
1070 return -1
1071 return 0
1072
1073 def __gt__(self, other):
1074 return self.__cmp__(other) > 0
1075
1076 def __lt__(self, other):
1077 return self.__cmp__(other) < 0
1078
1079 def __ge__(self, other):
1080 return self.__cmp__(other) >= 0
1081
1082 def __le__(self, other):
1083 return self.__cmp__(other) <= 0
1084
1085 def __eq__(self, other):
1086 return self.__cmp__(other) == 0
1087
1088 def __ne__(self, other):
1089 return self.__cmp__(other) != 0
981
1090
982 def translated(self):
1091 def translated(self):
983 """
1092 """
@@ -1020,18 +1129,49 b' class POEntry(_BaseEntry):'
1020 except KeyError:
1129 except KeyError:
1021 self.msgstr_plural[pos] = ''
1130 self.msgstr_plural[pos] = ''
1022
1131
1132 def __hash__(self):
1133 return hash((self.msgid, self.msgstr))
1023 # }}}
1134 # }}}
1024 # class MOEntry {{{
1135 # class MOEntry {{{
1025
1136
1137
1026 class MOEntry(_BaseEntry):
1138 class MOEntry(_BaseEntry):
1027 """
1139 """
1028 Represents a mo file entry.
1140 Represents a mo file entry.
1029 """
1141 """
1030 pass
1142 def __init__(self, *args, **kwargs):
1143 """
1144 Constructor, accepts the following keyword arguments,
1145 for consistency with :class:`~polib.POEntry`:
1146
1147 ``comment``
1148 ``tcomment``
1149 ``occurrences``
1150 ``flags``
1151 ``previous_msgctxt``
1152 ``previous_msgid``
1153 ``previous_msgid_plural``
1154
1155 Note: even though these keyword arguments are accepted,
1156 they hold no real meaning in the context of MO files
1157 and are simply ignored.
1158 """
1159 _BaseEntry.__init__(self, *args, **kwargs)
1160 self.comment = ''
1161 self.tcomment = ''
1162 self.occurrences = []
1163 self.flags = []
1164 self.previous_msgctxt = None
1165 self.previous_msgid = None
1166 self.previous_msgid_plural = None
1167
1168 def __hash__(self):
1169 return hash((self.msgid, self.msgstr))
1031
1170
1032 # }}}
1171 # }}}
1033 # class _POFileParser {{{
1172 # class _POFileParser {{{
1034
1173
1174
1035 class _POFileParser(object):
1175 class _POFileParser(object):
1036 """
1176 """
1037 A finite state machine to parse efficiently and correctly po
1177 A finite state machine to parse efficiently and correctly po
@@ -1056,23 +1196,27 b' class _POFileParser(object):'
1056 file (optional, default: ``False``).
1196 file (optional, default: ``False``).
1057 """
1197 """
1058 enc = kwargs.get('encoding', default_encoding)
1198 enc = kwargs.get('encoding', default_encoding)
1059 if os.path.exists(pofile):
1199 if _is_file(pofile):
1060 try:
1200 try:
1061 self.fhandle = codecs.open(pofile, 'rU', enc)
1201 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1062 except LookupError:
1202 except LookupError:
1063 enc = default_encoding
1203 enc = default_encoding
1064 self.fhandle = codecs.open(pofile, 'rU', enc)
1204 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1065 else:
1205 else:
1066 self.fhandle = pofile.splitlines()
1206 self.fhandle = pofile.splitlines()
1067
1207
1068 self.instance = POFile(
1208 klass = kwargs.get('klass')
1209 if klass is None:
1210 klass = POFile
1211 self.instance = klass(
1069 pofile=pofile,
1212 pofile=pofile,
1070 encoding=enc,
1213 encoding=enc,
1071 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1214 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1072 )
1215 )
1073 self.transitions = {}
1216 self.transitions = {}
1074 self.current_entry = POEntry()
1217 self.current_line = 0
1075 self.current_state = 'ST'
1218 self.current_entry = POEntry(linenum=self.current_line)
1219 self.current_state = 'st'
1076 self.current_token = None
1220 self.current_token = None
1077 # two memo flags used in handlers
1221 # two memo flags used in handlers
1078 self.msgstr_index = 0
1222 self.msgstr_index = 0
@@ -1083,7 +1227,7 b' class _POFileParser(object):'
1083 # * HE: Header
1227 # * HE: Header
1084 # * TC: a translation comment
1228 # * TC: a translation comment
1085 # * GC: a generated comment
1229 # * GC: a generated comment
1086 # * OC: a file/line occurence
1230 # * OC: a file/line occurrence
1087 # * FL: a flags line
1231 # * FL: a flags line
1088 # * CT: a message context
1232 # * CT: a message context
1089 # * PC: a previous msgctxt
1233 # * PC: a previous msgctxt
@@ -1094,48 +1238,47 b' class _POFileParser(object):'
1094 # * MS: a msgstr
1238 # * MS: a msgstr
1095 # * MX: a msgstr plural
1239 # * MX: a msgstr plural
1096 # * MC: a msgid or msgstr continuation line
1240 # * MC: a msgid or msgstr continuation line
1097 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1241 all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
1098 'MS', 'MP', 'MX', 'MI']
1242 'ms', 'mp', 'mx', 'mi']
1099
1243
1100 self.add('TC', ['ST', 'HE'], 'HE')
1244 self.add('tc', ['st', 'he'], 'he')
1101 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1245 self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
1102 'MP', 'MX', 'MI'], 'TC')
1246 'mp', 'mx', 'mi'], 'tc')
1103 self.add('GC', all, 'GC')
1247 self.add('gc', all, 'gc')
1104 self.add('OC', all, 'OC')
1248 self.add('oc', all, 'oc')
1105 self.add('FL', all, 'FL')
1249 self.add('fl', all, 'fl')
1106 self.add('PC', all, 'PC')
1250 self.add('pc', all, 'pc')
1107 self.add('PM', all, 'PM')
1251 self.add('pm', all, 'pm')
1108 self.add('PP', all, 'PP')
1252 self.add('pp', all, 'pp')
1109 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1253 self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
1110 'PP', 'MS', 'MX'], 'CT')
1254 'pp', 'ms', 'mx'], 'ct')
1111 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1255 self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
1112 'PM', 'PP', 'MS', 'MX'], 'MI')
1256 'pm', 'pp', 'ms', 'mx'], 'mi')
1113 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1257 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
1114 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1258 self.add('ms', ['mi', 'mp', 'tc'], 'ms')
1115 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1259 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
1116 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1260 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1117
1261
1118 def parse(self):
1262 def parse(self):
1119 """
1263 """
1120 Run the state machine, parse the file line by line and call process()
1264 Run the state machine, parse the file line by line and call process()
1121 with the current matched symbol.
1265 with the current matched symbol.
1122 """
1266 """
1123 i = 0
1124
1267
1125 keywords = {
1268 keywords = {
1126 'msgctxt': 'CT',
1269 'msgctxt': 'ct',
1127 'msgid': 'MI',
1270 'msgid': 'mi',
1128 'msgstr': 'MS',
1271 'msgstr': 'ms',
1129 'msgid_plural': 'MP',
1272 'msgid_plural': 'mp',
1130 }
1273 }
1131 prev_keywords = {
1274 prev_keywords = {
1132 'msgid_plural': 'PP',
1275 'msgid_plural': 'pp',
1133 'msgid': 'PM',
1276 'msgid': 'pm',
1134 'msgctxt': 'PC',
1277 'msgctxt': 'pc',
1135 }
1278 }
1136
1279 tokens = []
1137 for line in self.fhandle:
1280 for line in self.fhandle:
1138 i += 1
1281 self.current_line += 1
1139 line = line.strip()
1282 line = line.strip()
1140 if line == '':
1283 if line == '':
1141 continue
1284 continue
@@ -1143,6 +1286,9 b' class _POFileParser(object):'
1143 tokens = line.split(None, 2)
1286 tokens = line.split(None, 2)
1144 nb_tokens = len(tokens)
1287 nb_tokens = len(tokens)
1145
1288
1289 if tokens[0] == '#~|':
1290 continue
1291
1146 if tokens[0] == '#~' and nb_tokens > 1:
1292 if tokens[0] == '#~' and nb_tokens > 1:
1147 line = line[3:].strip()
1293 line = line[3:].strip()
1148 tokens = tokens[1:]
1294 tokens = tokens[1:]
@@ -1155,41 +1301,56 b' class _POFileParser(object):'
1155 # msgid, msgid_plural, msgctxt & msgstr.
1301 # msgid, msgid_plural, msgctxt & msgstr.
1156 if tokens[0] in keywords and nb_tokens > 1:
1302 if tokens[0] in keywords and nb_tokens > 1:
1157 line = line[len(tokens[0]):].lstrip()
1303 line = line[len(tokens[0]):].lstrip()
1304 if re.search(r'([^\\]|^)"', line[1:-1]):
1305 raise IOError('Syntax error in po file %s (line %s): '
1306 'unescaped double quote found' %
1307 (self.instance.fpath, self.current_line))
1158 self.current_token = line
1308 self.current_token = line
1159 self.process(keywords[tokens[0]], i)
1309 self.process(keywords[tokens[0]])
1160 continue
1310 continue
1161
1311
1162 self.current_token = line
1312 self.current_token = line
1163
1313
1164 if tokens[0] == '#:' and nb_tokens > 1:
1314 if tokens[0] == '#:':
1315 if nb_tokens <= 1:
1316 continue
1165 # we are on a occurrences line
1317 # we are on a occurrences line
1166 self.process('OC', i)
1318 self.process('oc')
1167
1319
1168 elif line[:1] == '"':
1320 elif line[:1] == '"':
1169 # we are on a continuation line
1321 # we are on a continuation line
1170 self.process('MC', i)
1322 if re.search(r'([^\\]|^)"', line[1:-1]):
1323 raise IOError('Syntax error in po file %s (line %s): '
1324 'unescaped double quote found' %
1325 (self.instance.fpath, self.current_line))
1326 self.process('mc')
1171
1327
1172 elif line[:7] == 'msgstr[':
1328 elif line[:7] == 'msgstr[':
1173 # we are on a msgstr plural
1329 # we are on a msgstr plural
1174 self.process('MX', i)
1330 self.process('mx')
1175
1331
1176 elif tokens[0] == '#,' and nb_tokens > 1:
1332 elif tokens[0] == '#,':
1333 if nb_tokens <= 1:
1334 continue
1177 # we are on a flags line
1335 # we are on a flags line
1178 self.process('FL', i)
1336 self.process('fl')
1179
1337
1180 elif tokens[0] == '#':
1338 elif tokens[0] == '#' or tokens[0].startswith('##'):
1181 if line == '#': line += ' '
1339 if line == '#':
1340 line += ' '
1182 # we are on a translator comment line
1341 # we are on a translator comment line
1183 self.process('TC', i)
1342 self.process('tc')
1184
1343
1185 elif tokens[0] == '#.' and nb_tokens > 1:
1344 elif tokens[0] == '#.':
1345 if nb_tokens <= 1:
1346 continue
1186 # we are on a generated comment line
1347 # we are on a generated comment line
1187 self.process('GC', i)
1348 self.process('gc')
1188
1349
1189 elif tokens[0] == '#|':
1350 elif tokens[0] == '#|':
1190 if nb_tokens < 2:
1351 if nb_tokens <= 1:
1191 self.process('??', i)
1352 raise IOError('Syntax error in po file %s (line %s)' %
1192 continue
1353 (self.instance.fpath, self.current_line))
1193
1354
1194 # Remove the marker and any whitespace right after that.
1355 # Remove the marker and any whitespace right after that.
1195 line = line[2:].lstrip()
1356 line = line[2:].lstrip()
@@ -1197,48 +1358,57 b' class _POFileParser(object):'
1197
1358
1198 if tokens[1].startswith('"'):
1359 if tokens[1].startswith('"'):
1199 # Continuation of previous metadata.
1360 # Continuation of previous metadata.
1200 self.process('MC', i)
1361 self.process('mc')
1201 continue
1362 continue
1202
1363
1203 if nb_tokens == 2:
1364 if nb_tokens == 2:
1204 # Invalid continuation line.
1365 # Invalid continuation line.
1205 self.process('??', i)
1366 raise IOError('Syntax error in po file %s (line %s): '
1367 'invalid continuation line' %
1368 (self.instance.fpath, self.current_line))
1206
1369
1207 # we are on a "previous translation" comment line,
1370 # we are on a "previous translation" comment line,
1208 if tokens[1] not in prev_keywords:
1371 if tokens[1] not in prev_keywords:
1209 # Unknown keyword in previous translation comment.
1372 # Unknown keyword in previous translation comment.
1210 self.process('??', i)
1373 raise IOError('Syntax error in po file %s (line %s): '
1374 'unknown keyword %s' %
1375 (self.instance.fpath, self.current_line,
1376 tokens[1]))
1211
1377
1212 # Remove the keyword and any whitespace
1378 # Remove the keyword and any whitespace
1213 # between it and the starting quote.
1379 # between it and the starting quote.
1214 line = line[len(tokens[1]):].lstrip()
1380 line = line[len(tokens[1]):].lstrip()
1215 self.current_token = line
1381 self.current_token = line
1216 self.process(prev_keywords[tokens[1]], i)
1382 self.process(prev_keywords[tokens[1]])
1217
1383
1218 else:
1384 else:
1219 self.process('??', i)
1385 raise IOError('Syntax error in po file %s (line %s)' %
1386 (self.instance.fpath, self.current_line))
1220
1387
1221 if self.current_entry:
1388 if self.current_entry and len(tokens) > 0 and \
1389 not tokens[0].startswith('#'):
1222 # since entries are added when another entry is found, we must add
1390 # since entries are added when another entry is found, we must add
1223 # the last entry here (only if there are lines)
1391 # the last entry here (only if there are lines). Trailing comments
1392 # are ignored
1224 self.instance.append(self.current_entry)
1393 self.instance.append(self.current_entry)
1394
1225 # before returning the instance, check if there's metadata and if
1395 # before returning the instance, check if there's metadata and if
1226 # so extract it in a dict
1396 # so extract it in a dict
1227 firstentry = self.instance[0]
1397 metadataentry = self.instance.find('')
1228 if firstentry.msgid == '': # metadata found
1398 if metadataentry: # metadata found
1229 # remove the entry
1399 # remove the entry
1230 firstentry = self.instance.pop(0)
1400 self.instance.remove(metadataentry)
1231 self.instance.metadata_is_fuzzy = firstentry.flags
1401 self.instance.metadata_is_fuzzy = metadataentry.flags
1232 key = None
1402 key = None
1233 for msg in firstentry.msgstr.splitlines():
1403 for msg in metadataentry.msgstr.splitlines():
1234 try:
1404 try:
1235 key, val = msg.split(':', 1)
1405 key, val = msg.split(':', 1)
1236 self.instance.metadata[key] = val.strip()
1406 self.instance.metadata[key] = val.strip()
1237 except:
1407 except (ValueError, KeyError):
1238 if key is not None:
1408 if key is not None:
1239 self.instance.metadata[key] += '\n'+ msg.strip()
1409 self.instance.metadata[key] += '\n' + msg.strip()
1240 # close opened file
1410 # close opened file
1241 if isinstance(self.fhandle, file):
1411 if not isinstance(self.fhandle, list): # must be file
1242 self.fhandle.close()
1412 self.fhandle.close()
1243 return self.instance
1413 return self.instance
1244
1414
@@ -1258,10 +1428,10 b' class _POFileParser(object):'
1258 the next state the fsm will have after the action.
1428 the next state the fsm will have after the action.
1259 """
1429 """
1260 for state in states:
1430 for state in states:
1261 action = getattr(self, 'handle_%s' % next_state.lower())
1431 action = getattr(self, 'handle_%s' % next_state)
1262 self.transitions[(symbol, state)] = (action, next_state)
1432 self.transitions[(symbol, state)] = (action, next_state)
1263
1433
1264 def process(self, symbol, linenum):
1434 def process(self, symbol):
1265 """
1435 """
1266 Process the transition corresponding to the current state and the
1436 Process the transition corresponding to the current state and the
1267 symbol provided.
1437 symbol provided.
@@ -1278,8 +1448,9 b' class _POFileParser(object):'
1278 (action, state) = self.transitions[(symbol, self.current_state)]
1448 (action, state) = self.transitions[(symbol, self.current_state)]
1279 if action():
1449 if action():
1280 self.current_state = state
1450 self.current_state = state
1281 except Exception as exc:
1451 except Exception:
1282 raise IOError('Syntax error in po file (line %s)' % linenum)
1452 raise IOError('Syntax error in po file (line %s)' %
1453 self.current_line)
1283
1454
1284 # state handlers
1455 # state handlers
1285
1456
@@ -1292,90 +1463,94 b' class _POFileParser(object):'
1292
1463
1293 def handle_tc(self):
1464 def handle_tc(self):
1294 """Handle a translator comment."""
1465 """Handle a translator comment."""
1295 if self.current_state in ['MC', 'MS', 'MX']:
1466 if self.current_state in ['mc', 'ms', 'mx']:
1296 self.instance.append(self.current_entry)
1467 self.instance.append(self.current_entry)
1297 self.current_entry = POEntry()
1468 self.current_entry = POEntry(linenum=self.current_line)
1298 if self.current_entry.tcomment != '':
1469 if self.current_entry.tcomment != '':
1299 self.current_entry.tcomment += '\n'
1470 self.current_entry.tcomment += '\n'
1300 self.current_entry.tcomment += self.current_token[2:]
1471 tcomment = self.current_token.lstrip('#')
1472 if tcomment.startswith(' '):
1473 tcomment = tcomment[1:]
1474 self.current_entry.tcomment += tcomment
1301 return True
1475 return True
1302
1476
1303 def handle_gc(self):
1477 def handle_gc(self):
1304 """Handle a generated comment."""
1478 """Handle a generated comment."""
1305 if self.current_state in ['MC', 'MS', 'MX']:
1479 if self.current_state in ['mc', 'ms', 'mx']:
1306 self.instance.append(self.current_entry)
1480 self.instance.append(self.current_entry)
1307 self.current_entry = POEntry()
1481 self.current_entry = POEntry(linenum=self.current_line)
1308 if self.current_entry.comment != '':
1482 if self.current_entry.comment != '':
1309 self.current_entry.comment += '\n'
1483 self.current_entry.comment += '\n'
1310 self.current_entry.comment += self.current_token[3:]
1484 self.current_entry.comment += self.current_token[3:]
1311 return True
1485 return True
1312
1486
1313 def handle_oc(self):
1487 def handle_oc(self):
1314 """Handle a file:num occurence."""
1488 """Handle a file:num occurrence."""
1315 if self.current_state in ['MC', 'MS', 'MX']:
1489 if self.current_state in ['mc', 'ms', 'mx']:
1316 self.instance.append(self.current_entry)
1490 self.instance.append(self.current_entry)
1317 self.current_entry = POEntry()
1491 self.current_entry = POEntry(linenum=self.current_line)
1318 occurrences = self.current_token[3:].split()
1492 occurrences = self.current_token[3:].split()
1319 for occurrence in occurrences:
1493 for occurrence in occurrences:
1320 if occurrence != '':
1494 if occurrence != '':
1321 try:
1495 try:
1322 fil, line = occurrence.split(':')
1496 fil, line = occurrence.split(':')
1323 if not line.isdigit():
1497 if not line.isdigit():
1324 fil = fil + line
1498 fil = fil + line
1325 line = ''
1499 line = ''
1326 self.current_entry.occurrences.append((fil, line))
1500 self.current_entry.occurrences.append((fil, line))
1327 except:
1501 except (ValueError, AttributeError):
1328 self.current_entry.occurrences.append((occurrence, ''))
1502 self.current_entry.occurrences.append((occurrence, ''))
1329 return True
1503 return True
1330
1504
1331 def handle_fl(self):
1505 def handle_fl(self):
1332 """Handle a flags line."""
1506 """Handle a flags line."""
1333 if self.current_state in ['MC', 'MS', 'MX']:
1507 if self.current_state in ['mc', 'ms', 'mx']:
1334 self.instance.append(self.current_entry)
1508 self.instance.append(self.current_entry)
1335 self.current_entry = POEntry()
1509 self.current_entry = POEntry(linenum=self.current_line)
1336 self.current_entry.flags += self.current_token[3:].split(', ')
1510 self.current_entry.flags += [c.strip() for c in
1511 self.current_token[3:].split(',')]
1337 return True
1512 return True
1338
1513
1339 def handle_pp(self):
1514 def handle_pp(self):
1340 """Handle a previous msgid_plural line."""
1515 """Handle a previous msgid_plural line."""
1341 if self.current_state in ['MC', 'MS', 'MX']:
1516 if self.current_state in ['mc', 'ms', 'mx']:
1342 self.instance.append(self.current_entry)
1517 self.instance.append(self.current_entry)
1343 self.current_entry = POEntry()
1518 self.current_entry = POEntry(linenum=self.current_line)
1344 self.current_entry.previous_msgid_plural = \
1519 self.current_entry.previous_msgid_plural = \
1345 unescape(self.current_token[1:-1])
1520 unescape(self.current_token[1:-1])
1346 return True
1521 return True
1347
1522
1348 def handle_pm(self):
1523 def handle_pm(self):
1349 """Handle a previous msgid line."""
1524 """Handle a previous msgid line."""
1350 if self.current_state in ['MC', 'MS', 'MX']:
1525 if self.current_state in ['mc', 'ms', 'mx']:
1351 self.instance.append(self.current_entry)
1526 self.instance.append(self.current_entry)
1352 self.current_entry = POEntry()
1527 self.current_entry = POEntry(linenum=self.current_line)
1353 self.current_entry.previous_msgid = \
1528 self.current_entry.previous_msgid = \
1354 unescape(self.current_token[1:-1])
1529 unescape(self.current_token[1:-1])
1355 return True
1530 return True
1356
1531
1357 def handle_pc(self):
1532 def handle_pc(self):
1358 """Handle a previous msgctxt line."""
1533 """Handle a previous msgctxt line."""
1359 if self.current_state in ['MC', 'MS', 'MX']:
1534 if self.current_state in ['mc', 'ms', 'mx']:
1360 self.instance.append(self.current_entry)
1535 self.instance.append(self.current_entry)
1361 self.current_entry = POEntry()
1536 self.current_entry = POEntry(linenum=self.current_line)
1362 self.current_entry.previous_msgctxt = \
1537 self.current_entry.previous_msgctxt = \
1363 unescape(self.current_token[1:-1])
1538 unescape(self.current_token[1:-1])
1364 return True
1539 return True
1365
1540
1366 def handle_ct(self):
1541 def handle_ct(self):
1367 """Handle a msgctxt."""
1542 """Handle a msgctxt."""
1368 if self.current_state in ['MC', 'MS', 'MX']:
1543 if self.current_state in ['mc', 'ms', 'mx']:
1369 self.instance.append(self.current_entry)
1544 self.instance.append(self.current_entry)
1370 self.current_entry = POEntry()
1545 self.current_entry = POEntry(linenum=self.current_line)
1371 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1546 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1372 return True
1547 return True
1373
1548
1374 def handle_mi(self):
1549 def handle_mi(self):
1375 """Handle a msgid."""
1550 """Handle a msgid."""
1376 if self.current_state in ['MC', 'MS', 'MX']:
1551 if self.current_state in ['mc', 'ms', 'mx']:
1377 self.instance.append(self.current_entry)
1552 self.instance.append(self.current_entry)
1378 self.current_entry = POEntry()
1553 self.current_entry = POEntry(linenum=self.current_line)
1379 self.current_entry.obsolete = self.entry_obsolete
1554 self.current_entry.obsolete = self.entry_obsolete
1380 self.current_entry.msgid = unescape(self.current_token[1:-1])
1555 self.current_entry.msgid = unescape(self.current_token[1:-1])
1381 return True
1556 return True
@@ -1392,47 +1567,37 b' class _POFileParser(object):'
1392
1567
1393 def handle_mx(self):
1568 def handle_mx(self):
1394 """Handle a msgstr plural."""
1569 """Handle a msgstr plural."""
1395 index, value = self.current_token[7], self.current_token[11:-1]
1570 index = self.current_token[7]
1396 self.current_entry.msgstr_plural[index] = unescape(value)
1571 value = self.current_token[self.current_token.find('"') + 1:-1]
1397 self.msgstr_index = index
1572 self.current_entry.msgstr_plural[int(index)] = unescape(value)
1573 self.msgstr_index = int(index)
1398 return True
1574 return True
1399
1575
1400 def handle_mc(self):
1576 def handle_mc(self):
1401 """Handle a msgid or msgstr continuation line."""
1577 """Handle a msgid or msgstr continuation line."""
1402 token = unescape(self.current_token[1:-1])
1578 token = unescape(self.current_token[1:-1])
1403 if self.current_state == 'CT':
1579 if self.current_state == 'ct':
1404 typ = 'msgctxt'
1405 self.current_entry.msgctxt += token
1580 self.current_entry.msgctxt += token
1406 elif self.current_state == 'MI':
1581 elif self.current_state == 'mi':
1407 typ = 'msgid'
1408 self.current_entry.msgid += token
1582 self.current_entry.msgid += token
1409 elif self.current_state == 'MP':
1583 elif self.current_state == 'mp':
1410 typ = 'msgid_plural'
1411 self.current_entry.msgid_plural += token
1584 self.current_entry.msgid_plural += token
1412 elif self.current_state == 'MS':
1585 elif self.current_state == 'ms':
1413 typ = 'msgstr'
1414 self.current_entry.msgstr += token
1586 self.current_entry.msgstr += token
1415 elif self.current_state == 'MX':
1587 elif self.current_state == 'mx':
1416 typ = 'msgstr[%s]' % self.msgstr_index
1417 self.current_entry.msgstr_plural[self.msgstr_index] += token
1588 self.current_entry.msgstr_plural[self.msgstr_index] += token
1418 elif self.current_state == 'PP':
1589 elif self.current_state == 'pp':
1419 typ = 'previous_msgid_plural'
1420 token = token[3:]
1421 self.current_entry.previous_msgid_plural += token
1590 self.current_entry.previous_msgid_plural += token
1422 elif self.current_state == 'PM':
1591 elif self.current_state == 'pm':
1423 typ = 'previous_msgid'
1424 token = token[3:]
1425 self.current_entry.previous_msgid += token
1592 self.current_entry.previous_msgid += token
1426 elif self.current_state == 'PC':
1593 elif self.current_state == 'pc':
1427 typ = 'previous_msgctxt'
1428 token = token[3:]
1429 self.current_entry.previous_msgctxt += token
1594 self.current_entry.previous_msgctxt += token
1430 # don't change the current state
1595 # don't change the current state
1431 return False
1596 return False
1432
1433 # }}}
1597 # }}}
1434 # class _MOFileParser {{{
1598 # class _MOFileParser {{{
1435
1599
1600
1436 class _MOFileParser(object):
1601 class _MOFileParser(object):
1437 """
1602 """
1438 A class to parse binary mo files.
1603 A class to parse binary mo files.
@@ -1456,12 +1621,24 b' class _MOFileParser(object):'
1456 file (optional, default: ``False``).
1621 file (optional, default: ``False``).
1457 """
1622 """
1458 self.fhandle = open(mofile, 'rb')
1623 self.fhandle = open(mofile, 'rb')
1459 self.instance = MOFile(
1624
1625 klass = kwargs.get('klass')
1626 if klass is None:
1627 klass = MOFile
1628 self.instance = klass(
1460 fpath=mofile,
1629 fpath=mofile,
1461 encoding=kwargs.get('encoding', default_encoding),
1630 encoding=kwargs.get('encoding', default_encoding),
1462 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1631 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1463 )
1632 )
1464
1633
1634 def __del__(self):
1635 """
1636 Make sure the file is closed, this prevents warnings on unclosed file
1637 when running tests with python >= 3.2.
1638 """
1639 if self.fhandle:
1640 self.fhandle.close()
1641
1465 def parse(self):
1642 def parse(self):
1466 """
1643 """
1467 Build the instance with the file handle provided in the
1644 Build the instance with the file handle provided in the
@@ -1469,15 +1646,20 b' class _MOFileParser(object):'
1469 """
1646 """
1470 # parse magic number
1647 # parse magic number
1471 magic_number = self._readbinary('<I', 4)
1648 magic_number = self._readbinary('<I', 4)
1472 if magic_number == MOFile.LITTLE_ENDIAN:
1649 if magic_number == MOFile.MAGIC:
1473 ii = '<II'
1650 ii = '<II'
1474 elif magic_number == MOFile.BIG_ENDIAN:
1651 elif magic_number == MOFile.MAGIC_SWAPPED:
1475 ii = '>II'
1652 ii = '>II'
1476 else:
1653 else:
1477 raise IOError('Invalid mo file, magic number is incorrect !')
1654 raise IOError('Invalid mo file, magic number is incorrect !')
1478 self.instance.magic_number = magic_number
1655 self.instance.magic_number = magic_number
1479 # parse the version number and the number of strings
1656 # parse the version number and the number of strings
1480 self.instance.version, numofstrings = self._readbinary(ii, 8)
1657 version, numofstrings = self._readbinary(ii, 8)
1658 # from MO file format specs: "A program seeing an unexpected major
1659 # revision number should stop reading the MO file entirely"
1660 if version not in (0, 1):
1661 raise IOError('Invalid mo file, unexpected major revision number')
1662 self.instance.version = version
1481 # original strings and translation strings hash table offset
1663 # original strings and translation strings hash table offset
1482 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1664 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1483 # move to msgid hash table and read length and offset of msgids
1665 # move to msgid hash table and read length and offset of msgids
@@ -1491,29 +1673,34 b' class _MOFileParser(object):'
1491 for i in range(numofstrings):
1673 for i in range(numofstrings):
1492 msgstrs_index.append(self._readbinary(ii, 8))
1674 msgstrs_index.append(self._readbinary(ii, 8))
1493 # build entries
1675 # build entries
1676 encoding = self.instance.encoding
1494 for i in range(numofstrings):
1677 for i in range(numofstrings):
1495 self.fhandle.seek(msgids_index[i][1])
1678 self.fhandle.seek(msgids_index[i][1])
1496 msgid = self.fhandle.read(msgids_index[i][0])
1679 msgid = self.fhandle.read(msgids_index[i][0])
1680
1497 self.fhandle.seek(msgstrs_index[i][1])
1681 self.fhandle.seek(msgstrs_index[i][1])
1498 msgstr = self.fhandle.read(msgstrs_index[i][0])
1682 msgstr = self.fhandle.read(msgstrs_index[i][0])
1499 if i == 0: # metadata
1683 if i == 0 and not msgid: # metadata
1500 raw_metadata, metadata = msgstr.split('\n'), {}
1684 raw_metadata, metadata = msgstr.split(b('\n')), {}
1501 for line in raw_metadata:
1685 for line in raw_metadata:
1502 tokens = line.split(':', 1)
1686 tokens = line.split(b(':'), 1)
1503 if tokens[0] != '':
1687 if tokens[0] != b(''):
1504 try:
1688 try:
1505 metadata[tokens[0]] = tokens[1].strip()
1689 k = tokens[0].decode(encoding)
1690 v = tokens[1].decode(encoding)
1691 metadata[k] = v.strip()
1506 except IndexError:
1692 except IndexError:
1507 metadata[tokens[0]] = ''
1693 metadata[k] = u('')
1508 self.instance.metadata = metadata
1694 self.instance.metadata = metadata
1509 continue
1695 continue
1510 # test if we have a plural entry
1696 # test if we have a plural entry
1511 msgid_tokens = msgid.split('\0')
1697 msgid_tokens = msgid.split(b('\0'))
1512 if len(msgid_tokens) > 1:
1698 if len(msgid_tokens) > 1:
1513 entry = self._build_entry(
1699 entry = self._build_entry(
1514 msgid=msgid_tokens[0],
1700 msgid=msgid_tokens[0],
1515 msgid_plural=msgid_tokens[1],
1701 msgid_plural=msgid_tokens[1],
1516 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1702 msgstr_plural=dict((k, v) for k, v in
1703 enumerate(msgstr.split(b('\0'))))
1517 )
1704 )
1518 else:
1705 else:
1519 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1706 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
@@ -1524,19 +1711,22 b' class _MOFileParser(object):'
1524
1711
1525 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1712 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1526 msgstr_plural=None):
1713 msgstr_plural=None):
1527 msgctxt_msgid = msgid.split('\x04')
1714 msgctxt_msgid = msgid.split(b('\x04'))
1715 encoding = self.instance.encoding
1528 if len(msgctxt_msgid) > 1:
1716 if len(msgctxt_msgid) > 1:
1529 kwargs = {
1717 kwargs = {
1530 'msgctxt': msgctxt_msgid[0],
1718 'msgctxt': msgctxt_msgid[0].decode(encoding),
1531 'msgid' : msgctxt_msgid[1],
1719 'msgid': msgctxt_msgid[1].decode(encoding),
1532 }
1720 }
1533 else:
1721 else:
1534 kwargs = {'msgid': msgid}
1722 kwargs = {'msgid': msgid.decode(encoding)}
1535 if msgstr:
1723 if msgstr:
1536 kwargs['msgstr'] = msgstr
1724 kwargs['msgstr'] = msgstr.decode(encoding)
1537 if msgid_plural:
1725 if msgid_plural:
1538 kwargs['msgid_plural'] = msgid_plural
1726 kwargs['msgid_plural'] = msgid_plural.decode(encoding)
1539 if msgstr_plural:
1727 if msgstr_plural:
1728 for k in msgstr_plural:
1729 msgstr_plural[k] = msgstr_plural[k].decode(encoding)
1540 kwargs['msgstr_plural'] = msgstr_plural
1730 kwargs['msgstr_plural'] = msgstr_plural
1541 return MOEntry(**kwargs)
1731 return MOEntry(**kwargs)
1542
1732
@@ -1550,5 +1740,99 b' class _MOFileParser(object):'
1550 if len(tup) == 1:
1740 if len(tup) == 1:
1551 return tup[0]
1741 return tup[0]
1552 return tup
1742 return tup
1743 # }}}
1744 # class TextWrapper {{{
1745
1746
1747 class TextWrapper(textwrap.TextWrapper):
1748 """
1749 Subclass of textwrap.TextWrapper that backport the
1750 drop_whitespace option.
1751 """
1752 def __init__(self, *args, **kwargs):
1753 drop_whitespace = kwargs.pop('drop_whitespace', True)
1754 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1755 self.drop_whitespace = drop_whitespace
1756
1757 def _wrap_chunks(self, chunks):
1758 """_wrap_chunks(chunks : [string]) -> [string]
1759
1760 Wrap a sequence of text chunks and return a list of lines of
1761 length 'self.width' or less. (If 'break_long_words' is false,
1762 some lines may be longer than this.) Chunks correspond roughly
1763 to words and the whitespace between them: each chunk is
1764 indivisible (modulo 'break_long_words'), but a line break can
1765 come between any two chunks. Chunks should not have internal
1766 whitespace; ie. a chunk is either all whitespace or a "word".
1767 Whitespace chunks will be removed from the beginning and end of
1768 lines, but apart from that whitespace is preserved.
1769 """
1770 lines = []
1771 if self.width <= 0:
1772 raise ValueError("invalid width %r (must be > 0)" % self.width)
1773
1774 # Arrange in reverse order so items can be efficiently popped
1775 # from a stack of chucks.
1776 chunks.reverse()
1777
1778 while chunks:
1779
1780 # Start the list of chunks that will make up the current line.
1781 # cur_len is just the length of all the chunks in cur_line.
1782 cur_line = []
1783 cur_len = 0
1784
1785 # Figure out which static string will prefix this line.
1786 if lines:
1787 indent = self.subsequent_indent
1788 else:
1789 indent = self.initial_indent
1790
1791 # Maximum width for this line.
1792 width = self.width - len(indent)
1793
1794 # First chunk on line is whitespace -- drop it, unless this
1795 # is the very beginning of the text (ie. no lines started yet).
1796 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1797 del chunks[-1]
1798
1799 while chunks:
1800 l = len(chunks[-1])
1801
1802 # Can at least squeeze this chunk onto the current line.
1803 if cur_len + l <= width:
1804 cur_line.append(chunks.pop())
1805 cur_len += l
1806
1807 # Nope, this line is full.
1808 else:
1809 break
1810
1811 # The current line is full, and the next chunk is too big to
1812 # fit on *any* line (not just this one).
1813 if chunks and len(chunks[-1]) > width:
1814 self._handle_long_word(chunks, cur_line, cur_len, width)
1815
1816 # If the last chunk on this line is all whitespace, drop it.
1817 if self.drop_whitespace and cur_line and not cur_line[-1].strip():
1818 del cur_line[-1]
1819
1820 # Convert current line back to a string and store it in list
1821 # of all lines (return value).
1822 if cur_line:
1823 lines.append(indent + ''.join(cur_line))
1824
1825 return lines
1826 # }}}
1827 # function wrap() {{{
1828
1829
1830 def wrap(text, width=70, **kwargs):
1831 """
1832 Wrap a single paragraph of text, returning a list of wrapped lines.
1833 """
1834 if sys.version_info < (2, 6):
1835 return TextWrapper(width=width, **kwargs).wrap(text)
1836 return textwrap.wrap(text, width=width, **kwargs)
1553
1837
1554 # }}}
1838 # }}}
General Comments 0
You need to be logged in to leave comments. Login now