upstream/mercurial-mirror Commit - r40221:19fc5a98

polib: update to latest release 1.0.7 (upstream rev d75ce6dbbc2a)...

Augie Fackler -

r40221:19fc5a98 default

parent child

i18n/check-translation.py

0 0 -8

                      failures, tests = doctest.testmod()
                      sys.exit(failures and 1 or 0)
-                 # replace polib._POFileParser to show linenum of problematic msgstr
-                 class ExtPOFileParser(polib._POFileParser):
-                     def process(self, symbol, linenum):
-                         super(ExtPOFileParser, self).process(symbol, linenum)
-                         if symbol == 'MS': # msgstr
-                             self.current_entry.linenum = linenum
-                 polib._POFileParser = ExtPOFileParser
                  detected = []
                  warning = options.warning
                  for f in args:

i18n/polib.py

0 +551 -267

-             # -*- coding: utf-8 -*-
-             # no-check-code
+             # -* coding: utf-8 -*-
              #
              # License: MIT (see LICENSE file provided)
              # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
              from __future__ import absolute_import
-             __author__    = 'David Jean Louis <izimobil@gmail.com>'
-             __version__   = '0.6.4'
-             __all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
-                              'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
+             __author__ = 'David Jean Louis <izimobil@gmail.com>'
+             __version__ = '1.0.7'
+             __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+                        'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
              import array
              import codecs
              import struct
              import sys
              import textwrap
-             import types
+             try:
+                 import io
+             except ImportError:
+                 # replacement of io.open() for python < 2.6
+                 # we use codecs instead
+                 class io(object):
+                     @staticmethod
+                     def open(fpath, mode='r', encoding=None):
+                         return codecs.open(fpath, mode, encoding)
              # the default encoding to use when encoding cannot be detected
              default_encoding = 'utf-8'
+             # python 2/3 compatibility helpers {{{
+             if sys.version_info[:2] < (3, 0):
+                 PY3 = False
+                 text_type = unicode
+                 def b(s):
+                     return s
+                 def u(s):
+                     return unicode(s, "unicode_escape")
+             else:
+                 PY3 = True
+                 text_type = str
+                 def b(s):
+                     return s.encode("latin-1")
+                 def u(s):
+                     return s
+             # }}}
              # _pofile_or_mofile {{{
              def _pofile_or_mofile(f, type, **kwargs):
                  """
                  Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
                  parser = kls(
                      f,
                      encoding=enc,
-                     check_for_duplicates=kwargs.get('check_for_duplicates', False)
+                     check_for_duplicates=kwargs.get('check_for_duplicates', False),
+                     klass=kwargs.get('klass')
                  )
                  instance = parser.parse()
                  instance.wrapwidth = kwargs.get('wrapwidth', 78)
                  return instance
+             # }}}
+             # _is_file {{{
+             def _is_file(filename_or_contents):
+                 """
+                 Safely returns the value of os.path.exists(filename_or_contents).
+                 Arguments:
+                 ``filename_or_contents``
+                     either a filename, or a string holding the contents of some file.
+                     In the latter case, this function will always return False.
+                 """
+                 try:
+                     return os.path.exists(filename_or_contents)
+                 except (ValueError, UnicodeEncodeError):
+                     return False
              # }}}
              # function pofile() {{{
              def pofile(pofile, **kwargs):
                  """
                  Convenience function that parses the po or pot file ``pofile`` and returns
                  ``check_for_duplicates``
                      whether to check for duplicate entries when adding entries to the
                      file (optional, default: ``False``).
+                 ``klass``
+                     class which is used to instantiate the return value (optional,
+                     default: ``None``, the return value with be a :class:`~polib.POFile`
+                     instance).
                  """
                  return _pofile_or_mofile(pofile, 'pofile', **kwargs)
              # }}}
              # function mofile() {{{
              def mofile(mofile, **kwargs):
                  """
                  Convenience function that parses the mo file ``mofile`` and returns a
                  ``check_for_duplicates``
                      whether to check for duplicate entries when adding entries to the
                      file (optional, default: ``False``).
+                 ``klass``
+                     class which is used to instantiate the return value (optional,
+                     default: ``None``, the return value with be a :class:`~polib.POFile`
+                     instance).
                  """
                  return _pofile_or_mofile(mofile, 'mofile', **kwargs)
              # }}}
              # function detect_encoding() {{{
              def detect_encoding(file, binary_mode=False):
                  """
                  Try to detect the encoding used by the ``file``. The ``file`` argument can
                  ``binary_mode``
                      boolean, set this to True if ``file`` is a mo file.
                  """
-                 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
+                 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
+                 rxt = re.compile(u(PATTERN))
+                 rxb = re.compile(b(PATTERN))
                  def charset_exists(charset):
                      """Check whether ``charset`` is valid or not."""
                          return False
                      return True
-                 if not os.path.exists(file):
-                     match = rx.search(file)
+                 if not _is_file(file):
+                     match = rxt.search(file)
                      if match:
                          enc = match.group(1).strip()
                          if charset_exists(enc):
                              return enc
                  else:
-                     if binary_mode:
+                     # For PY3, always treat as binary
+                     if binary_mode or PY3:
                          mode = 'rb'
+                         rx = rxb
                      else:
                          mode = 'r'
+                         rx = rxt
                      f = open(file, mode)
                      for l in f.readlines():
                          match = rx.search(l)
                          if match:
                              f.close()
                              enc = match.group(1).strip()
+                             if not isinstance(enc, text_type):
+                                 enc = enc.decode('utf-8')
                              if charset_exists(enc):
                                  return enc
                      f.close()
                  return default_encoding
              # }}}
              # function escape() {{{
              def escape(st):
                  """
                  Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
                           .replace('\r', r'\r')\
                           .replace('\n', r'\n')\
                           .replace('\"', r'\"')
              # }}}
              # function unescape() {{{
              def unescape(st):
                  """
                  Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
                          return '\r'
                      if m == '\\':
                          return '\\'
-                     return m # handles escaped double quote
+                     return m  # handles escaped double quote
                  return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
              # }}}
              # class _BaseFile {{{
              class _BaseFile(list):
                  """
                  Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
                      list.__init__(self)
                      # the opened file handle
                      pofile = kwargs.get('pofile', None)
-                     if pofile and os.path.exists(pofile):
+                     if pofile and _is_file(pofile):
                          self.fpath = pofile
                      else:
                          self.fpath = kwargs.get('fpath')
                          ret.append(entry.__unicode__(self.wrapwidth))
                      for entry in self.obsolete_entries():
                          ret.append(entry.__unicode__(self.wrapwidth))
-                     ret = '\n'.join(ret)
+                     ret = u('\n').join(ret)
-                     if type(ret) != types.UnicodeType:
-                         return unicode(ret, self.encoding)
+                     assert isinstance(ret, text_type)
+                     #if type(ret) != text_type:
+                     #    return unicode(ret, self.encoding)
                      return ret
-                 def __str__(self):
-                     """
-                     Returns the string representation of the file.
-                     """
-                     return unicode(self).encode(self.encoding)
+                 if PY3:
+                     def __str__(self):
+                         return self.__unicode__()
+                 else:
+                     def __str__(self):
+                         """
+                         Returns the string representation of the file.
+                         """
+                         return unicode(self).encode(self.encoding)
                  def __contains__(self, entry):
                      """
-                     Overriden ``list`` method to implement the membership test (in and
+                     Overridden ``list`` method to implement the membership test (in and
                      not in).
                      The method considers that an entry is in the file if it finds an entry
-                     that has the same msgid (the test is **case sensitive**).
+                     that has the same msgid (the test is **case sensitive**) and the same
+                     msgctxt (or none for both entries).
                      Argument:
                      ``entry``
                          an instance of :class:`~polib._BaseEntry`.
                      """
-                     return self.find(entry.msgid, by='msgid') is not None
+                     return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
+                         is not None
                  def __eq__(self, other):
-                     return unicode(self) == unicode(other)
+                     return str(self) == str(other)
                  def append(self, entry):
                      """
-                     Overriden method to check for duplicates entries, if a user tries to
+                     Overridden method to check for duplicates entries, if a user tries to
                      add an entry that is already in the file, the method will raise a
                      ``ValueError`` exception.
                  def insert(self, index, entry):
                      """
-                     Overriden method to check for duplicates entries, if a user tries to
+                     Overridden method to check for duplicates entries, if a user tries to
                      add an entry that is already in the file, the method will raise a
                      ``ValueError`` exception.
                          e.flags.append('fuzzy')
                      return e
-                 def save(self, fpath=None, repr_method='__str__'):
+                 def save(self, fpath=None, repr_method='__unicode__'):
                      """
                      Saves the po file to ``fpath``.
                      If it is an existing file and no ``fpath`` is provided, then the
                      if repr_method == 'to_binary':
                          fhandle = open(fpath, 'wb')
                      else:
-                         fhandle = codecs.open(fpath, 'w', self.encoding)
-                         if type(contents) != types.UnicodeType:
+                         fhandle = io.open(fpath, 'w', encoding=self.encoding)
+                         if not isinstance(contents, text_type):
                              contents = contents.decode(self.encoding)
                      fhandle.write(contents)
                      fhandle.close()
                          boolean, whether to also search in entries that are obsolete.
                      ``msgctxt``
-                         string, allows to specify a specific message context for the
+                         string, allows specifying a specific message context for the
                          search.
                      """
                      if include_obsolete_entries:
                          entries = [e for e in self if not e.obsolete]
                      for e in entries:
                          if getattr(e, by) == st:
-                             if msgctxt and e.msgctxt != msgctxt:
+                             if msgctxt is not False and e.msgctxt != msgctxt:
                                  continue
                              return e
                      return None
                          'Language-Team',
                          'MIME-Version',
                          'Content-Type',
-                         'Content-Transfer-Encoding'
+                         'Content-Transfer-Encoding',
+                         'Language',
+                         'Plural-Forms'
                      ]
                      ordered_data = []
                      for data in data_order:
                              pass
                      # the rest of the metadata will be alphabetically ordered since there
                      # are no specs for this AFAIK
-                     keys = metadata.keys()
-                     keys.sort()
-                     for data in keys:
+                     for data in sorted(metadata.keys()):
                          value = metadata[data]
                          ordered_data.append((data, value))
                      return ordered_data
                      """
                      offsets = []
                      entries = self.translated_entries()
                      # the keys are sorted in the .mo file
                      def cmp(_self, other):
                          # msgfmt compares entries with msgctxt if it exists
-                         if _self.msgctxt:
-                             self_msgid = _self.msgctxt
-                         else:
-                             self_msgid = _self.msgid
-                         if other.msgctxt:
-                             other_msgid = other.msgctxt
-                         else:
-                             other_msgid = other.msgid
+                         self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
+                         other_msgid = other.msgctxt and other.msgctxt or other.msgid
                          if self_msgid > other_msgid:
                              return 1
                          elif self_msgid < other_msgid:
                          else:
                              return 0
                      # add metadata entry
-                     entries.sort(cmp)
+                     entries.sort(key=lambda o: o.msgctxt or o.msgid)
                      mentry = self.metadata_as_entry()
                      #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
                      entries = [mentry] + entries
                      entries_len = len(entries)
-                     ids, strs = '', ''
+                     ids, strs = b(''), b('')
                      for e in entries:
                          # For each string, we need size and file offset.  Each string is
                          # NUL terminated; the NUL does not count into the size.
-                         msgid = ''
+                         msgid = b('')
                          if e.msgctxt:
                              # Contexts are stored by storing the concatenation of the
                              # context, a <EOT> byte, and the original string
                              msgid = self._encode(e.msgctxt + '\4')
                          if e.msgid_plural:
-                             indexes = e.msgstr_plural.keys()
-                             indexes.sort()
                              msgstr = []
-                             for index in indexes:
+                             for index in sorted(e.msgstr_plural.keys()):
                                  msgstr.append(e.msgstr_plural[index])
                              msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
                              msgstr = self._encode('\0'.join(msgstr))
                              msgid += self._encode(e.msgid)
                              msgstr = self._encode(e.msgstr)
                          offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
-                         ids  += msgid  + '\0'
-                         strs += msgstr + '\0'
+                         ids += msgid + b('\0')
+                         strs += msgstr + b('\0')
                      # The header is 7 32-bit unsigned integers.
-                     keystart = 7*4+16*entries_len
+                     keystart = 7 * 4 + 16 * entries_len
                      # and the values start after the keys
                      valuestart = keystart + len(ids)
                      koffsets = []
                      # The string table first has the list of keys, then the list of values.
                      # Each entry has first the size of the string, then the file offset.
                      for o1, l1, o2, l2 in offsets:
-                         koffsets += [l1, o1+keystart]
-                         voffsets += [l2, o2+valuestart]
+                         koffsets += [l1, o1 + keystart]
+                         voffsets += [l2, o2 + valuestart]
                      offsets = koffsets + voffsets
-                     # check endianness for magic number
-                     if struct.pack('@h', 1) == struct.pack('<h', 1):
-                         magic_number = MOFile.LITTLE_ENDIAN
-                     else:
-                         magic_number = MOFile.BIG_ENDIAN
                      output = struct.pack(
                          "Iiiiiii",
-                         magic_number,      # Magic number
-,                 # Version
-                         entries_len,       # # of entries
-*4,               # start of key index
-*4+entries_len*8, # start of value index
-, keystart        # size and offset of hash table
-                                            # Important: we don't use hash tables
+                         # Magic number
+                         MOFile.MAGIC,
+                         # Version
+,
+                         # number of entries
+                         entries_len,
+                         # start of key index
+* 4,
+                         # start of value index
+* 4 + entries_len * 8,
+                         # size and offset of hash table, we don't use hash tables
+, keystart
                      )
-                     output += array.array("i", offsets).tostring()
+                     if PY3 and sys.version_info.minor > 1:  # python 3.2 or superior
+                         output += array.array("i", offsets).tobytes()
+                     else:
+                         output += array.array("i", offsets).tostring()
                      output += ids
                      output += strs
                      return output
                      Encodes the given ``mixed`` argument with the file encoding if and
                      only if it's an unicode string and returns the encoded string.
                      """
-                     if type(mixed) == types.UnicodeType:
-                         return mixed.encode(self.encoding)
+                     if isinstance(mixed, text_type):
+                         mixed = mixed.encode(self.encoding)
                      return mixed
              # }}}
              # class POFile {{{
              class POFile(_BaseFile):
                  """
                  Po (or Pot) file reader/writer.
                      """
                      ret, headers = '', self.header.split('\n')
                      for header in headers:
-                         if header[:1] in [',', ':']:
+                         if not len(header):
+                             ret += "#\n"
+                         elif header[:1] in [',', ':']:
                              ret += '#%s\n' % header
                          else:
                              ret += '# %s\n' % header
-                     if type(ret) != types.UnicodeType:
-                         ret = unicode(ret, self.encoding)
+                     if not isinstance(ret, text_type):
+                         ret = ret.decode(self.encoding)
                      return ret + _BaseFile.__unicode__(self)
                      if total == 0:
                          return 100
                      translated = len(self.translated_entries())
-                     return int((100.00 / float(total)) * translated)
+                     return int(translated * 100 / float(total))
                  def translated_entries(self):
                      """
                      """
                      Convenience method that returns the list of untranslated entries.
                      """
-                     return [e for e in self if not e.translated() and not e.obsolete \
+                     return [e for e in self if not e.translated() and not e.obsolete
                              and not 'fuzzy' in e.flags]
                  def fuzzy_entries(self):
                      ``refpot``
                          object POFile, the reference catalog.
                      """
+                     # Store entries in dict/set for faster access
+                     self_entries = dict((entry.msgid, entry) for entry in self)
+                     refpot_msgids = set(entry.msgid for entry in refpot)
+                     # Merge entries that are in the refpot
                      for entry in refpot:
-                         e = self.find(entry.msgid, include_obsolete_entries=True)
+                         e = self_entries.get(entry.msgid)
                          if e is None:
                              e = POEntry()
                              self.append(e)
                          e.merge(entry)
                      # ok, now we must "obsolete" entries that are not in the refpot anymore
                      for entry in self:
-                         if refpot.find(entry.msgid) is None:
+                         if entry.msgid not in refpot_msgids:
                              entry.obsolete = True
              # }}}
              # class MOFile {{{
              class MOFile(_BaseFile):
                  """
                  Mo file reader/writer.
                  This class inherits the :class:`~polib._BaseFile` class and, by
                  extension, the python ``list`` type.
                  """
-                 BIG_ENDIAN    = 0xde120495
-                 LITTLE_ENDIAN = 0x950412de
+                 MAGIC = 0x950412de
+                 MAGIC_SWAPPED = 0xde120495
                  def __init__(self, *args, **kwargs):
                      """
                      Convenience method to keep the same interface with POFile instances.
                      """
                      return []
              # }}}
              # class _BaseEntry {{{
              class _BaseEntry(object):
                  """
                  Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
                      ret = []
                      # write the msgctxt if any
                      if self.msgctxt is not None:
-                         ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
+                         ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
+                                                wrapwidth)
                      # write the msgid
                      ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
                      # write the msgid_plural if any
                      if self.msgid_plural:
-                         ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
+                         ret += self._str_field("msgid_plural", delflag, "",
+                                                self.msgid_plural, wrapwidth)
                      if self.msgstr_plural:
                          # write the msgstr_plural if any
                          msgstrs = self.msgstr_plural
                          for index in keys:
                              msgstr = msgstrs[index]
                              plural_index = '[%s]' % index
-                             ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
+                             ret += self._str_field("msgstr", delflag, plural_index, msgstr,
+                                                    wrapwidth)
                      else:
                          # otherwise write the msgstr
-                         ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
+                         ret += self._str_field("msgstr", delflag, "", self.msgstr,
+                                                wrapwidth)
                      ret.append('')
-                     ret = '\n'.join(ret)
-                     if type(ret) != types.UnicodeType:
-                         return unicode(ret, self.encoding)
+                     ret = u('\n').join(ret)
                      return ret
-                 def __str__(self):
-                     """
-                     Returns the string representation of the entry.
-                     """
-                     return unicode(self).encode(self.encoding)
+                 if PY3:
+                     def __str__(self):
+                         return self.__unicode__()
+                 else:
+                     def __str__(self):
+                         """
+                         Returns the string representation of the entry.
+                         """
+                         return unicode(self).encode(self.encoding)
                  def __eq__(self, other):
-                     return unicode(self) == unicode(other)
+                     return str(self) == str(other)
-                 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
+                 def _str_field(self, fieldname, delflag, plural_index, field,
+                                wrapwidth=78):
                      lines = field.splitlines(True)
                      if len(lines) > 1:
-                         lines = [''] + lines # start with initial empty line
+                         lines = [''] + lines  # start with initial empty line
                      else:
                          escaped_field = escape(field)
                          specialchars_count = 0
                          real_wrapwidth = wrapwidth - flength + specialchars_count
                          if wrapwidth > 0 and len(field) > real_wrapwidth:
                              # Wrap the line but take field name into account
-                             lines = [''] + [unescape(item) for item in textwrap.wrap(
+                             lines = [''] + [unescape(item) for item in wrap(
                                  escaped_field,
-                                 wrapwidth - 2, # 2 for quotes ""
+                                 wrapwidth - 2,  # 2 for quotes ""
                                  drop_whitespace=False,
                                  break_long_words=False
                              )]
                      ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
                                              escape(lines.pop(0)))]
-                     for mstr in lines:
-                         ret.append('%s"%s"' % (delflag, escape(mstr)))
+                     for line in lines:
+                         ret.append('%s"%s"' % (delflag, escape(line)))
                      return ret
              # }}}
              # class POEntry {{{
              class POEntry(_BaseEntry):
                  """
                  Represents a po file entry.
                      ``previous_msgid_plural``
                          string, the entry previous msgid_plural.
+                     ``linenum``
+                         integer, the line number of the entry
                      """
                      _BaseEntry.__init__(self, *args, **kwargs)
                      self.comment = kwargs.get('comment', '')
                      self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
                      self.previous_msgid = kwargs.get('previous_msgid', None)
                      self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
+                     self.linenum = kwargs.get('linenum', None)
                  def __unicode__(self, wrapwidth=78):
                      """
                          if val:
                              for comment in val.split('\n'):
                                  if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
-                                     ret += textwrap.wrap(
+                                     ret += wrap(
                                          comment,
                                          wrapwidth,
                                          initial_indent=c[1],
                              # what we want for filenames, so the dirty hack is to
                              # temporally replace hyphens with a char that a file cannot
                              # contain, like "*"
-                             ret += [l.replace('*', '-') for l in textwrap.wrap(
+                             ret += [l.replace('*', '-') for l in wrap(
                                  filestr.replace('-', '*'),
                                  wrapwidth,
                                  initial_indent='#: ',
                          ret.append('#, %s' % ', '.join(self.flags))
                      # previous context and previous msgid/msgid_plural
-                     fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
+                     fields = ['previous_msgctxt', 'previous_msgid',
+                               'previous_msgid_plural']
                      for f in fields:
                          val = getattr(self, f)
                          if val:
                              ret += self._str_field(f, "#| ", "", val, wrapwidth)
                      ret.append(_BaseEntry.__unicode__(self, wrapwidth))
-                     ret = '\n'.join(ret)
+                     ret = u('\n').join(ret)
-                     if type(ret) != types.UnicodeType:
-                         return unicode(ret, self.encoding)
+                     assert isinstance(ret, text_type)
+                     #if type(ret) != types.UnicodeType:
+                     #    return unicode(ret, self.encoding)
                      return ret
                  def __cmp__(self, other):
                      """
                      Called by comparison operations if rich comparison is not defined.
                      """
-                     def compare_occurrences(a, b):
-                         """
-                         Compare an entry occurrence with another one.
-                         """
-                         if a[0] != b[0]:
-                             return a[0] < b[0]
-                         if a[1] != b[1]:
-                             return a[1] < b[1]
-                         return 0
                      # First: Obsolete test
                      if self.obsolete != other.obsolete:
                          else:
                              return 1
                      # Work on a copy to protect original
-                     occ1 = self.occurrences[:]
-                     occ2 = other.occurrences[:]
-                     # Sorting using compare method
-                     occ1.sort(compare_occurrences)
-                     occ2.sort(compare_occurrences)
-                     # Comparing sorted occurrences
+                     occ1 = sorted(self.occurrences[:])
+                     occ2 = sorted(other.occurrences[:])
                      pos = 0
                      for entry1 in occ1:
                          try:
                                  return 1
                              else:
                                  return -1
+                     # Compare msgid_plural if set
+                     if self.msgid_plural:
+                         if not other.msgid_plural:
+                             return 1
+                         for pos in self.msgid_plural:
+                             if pos not in other.msgid_plural:
+                                 return 1
+                             if self.msgid_plural[pos] > other.msgid_plural[pos]:
+                                 return 1
+                             if self.msgid_plural[pos] < other.msgid_plural[pos]:
+                                 return -1
                      # Finally: Compare message ID
-                     if self.msgid > other.msgid: return 1
-                     else: return -1
+                     if self.msgid > other.msgid:
+                         return 1
+                     elif self.msgid < other.msgid:
+                         return -1
+                     return 0
+                 def __gt__(self, other):
+                     return self.__cmp__(other) > 0
+                 def __lt__(self, other):
+                     return self.__cmp__(other) < 0
+                 def __ge__(self, other):
+                     return self.__cmp__(other) >= 0
+                 def __le__(self, other):
+                     return self.__cmp__(other) <= 0
+                 def __eq__(self, other):
+                     return self.__cmp__(other) == 0
+                 def __ne__(self, other):
+                     return self.__cmp__(other) != 0
                  def translated(self):
                      """
                              except KeyError:
                                  self.msgstr_plural[pos] = ''
+                 def __hash__(self):
+                     return hash((self.msgid, self.msgstr))
              # }}}
              # class MOEntry {{{
              class MOEntry(_BaseEntry):
                  """
                  Represents a mo file entry.
                  """
-                 pass
+                 def __init__(self, *args, **kwargs):
+                     """
+                     Constructor, accepts the following keyword arguments,
+                     for consistency with :class:`~polib.POEntry`:
+                     ``comment``
+                     ``tcomment``
+                     ``occurrences``
+                     ``flags``
+                     ``previous_msgctxt``
+                     ``previous_msgid``
+                     ``previous_msgid_plural``
+                     Note: even though these keyword arguments are accepted,
+                     they hold no real meaning in the context of MO files
+                     and are simply ignored.
+                     """
+                     _BaseEntry.__init__(self, *args, **kwargs)
+                     self.comment = ''
+                     self.tcomment = ''
+                     self.occurrences = []
+                     self.flags = []
+                     self.previous_msgctxt = None
+                     self.previous_msgid = None
+                     self.previous_msgid_plural = None
+                 def __hash__(self):
+                     return hash((self.msgid, self.msgstr))
              # }}}
              # class _POFileParser {{{
              class _POFileParser(object):
                  """
                  A finite state machine to parse efficiently and correctly po
                          file (optional, default: ``False``).
                      """
                      enc = kwargs.get('encoding', default_encoding)
-                     if os.path.exists(pofile):
+                     if _is_file(pofile):
                          try:
-                             self.fhandle = codecs.open(pofile, 'rU', enc)
+                             self.fhandle = io.open(pofile, 'rt', encoding=enc)
                          except LookupError:
                              enc = default_encoding
-                             self.fhandle = codecs.open(pofile, 'rU', enc)
+                             self.fhandle = io.open(pofile, 'rt', encoding=enc)
                      else:
                          self.fhandle = pofile.splitlines()
-                     self.instance = POFile(
+                     klass = kwargs.get('klass')
+                     if klass is None:
+                         klass = POFile
+                     self.instance = klass(
                          pofile=pofile,
                          encoding=enc,
                          check_for_duplicates=kwargs.get('check_for_duplicates', False)
                      )
                      self.transitions = {}
-                     self.current_entry = POEntry()
-                     self.current_state = 'ST'
+                     self.current_line = 0
+                     self.current_entry = POEntry(linenum=self.current_line)
+                     self.current_state = 'st'
                      self.current_token = None
                      # two memo flags used in handlers
                      self.msgstr_index = 0
                      #     * HE: Header
                      #     * TC: a translation comment
                      #     * GC: a generated comment
-                     #     * OC: a file/line occurence
+                     #     * OC: a file/line occurrence
                      #     * FL: a flags line
                      #     * CT: a message context
                      #     * PC: a previous msgctxt
                      #     * MS: a msgstr
                      #     * MX: a msgstr plural
                      #     * MC: a msgid or msgstr continuation line
-                     all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
-                            'MS', 'MP', 'MX', 'MI']
+                     all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
+                            'ms', 'mp', 'mx', 'mi']
-                     self.add('TC', ['ST', 'HE'],                                     'HE')
-                     self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
-                                     'MP', 'MX', 'MI'],                               'TC')
-                     self.add('GC', all,                                              'GC')
-                     self.add('OC', all,                                              'OC')
-                     self.add('FL', all,                                              'FL')
-                     self.add('PC', all,                                              'PC')
-                     self.add('PM', all,                                              'PM')
-                     self.add('PP', all,                                              'PP')
-                     self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
-                                     'PP', 'MS', 'MX'],                               'CT')
-                     self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
-                              'PM', 'PP', 'MS', 'MX'],                                'MI')
-                     self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'],             'MP')
-                     self.add('MS', ['MI', 'MP', 'TC'],                               'MS')
-                     self.add('MX', ['MI', 'MX', 'MP', 'TC'],                         'MX')
-                     self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
+                     self.add('tc', ['st', 'he'],                                     'he')
+                     self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
+                                     'mp', 'mx', 'mi'],                               'tc')
+                     self.add('gc', all,                                              'gc')
+                     self.add('oc', all,                                              'oc')
+                     self.add('fl', all,                                              'fl')
+                     self.add('pc', all,                                              'pc')
+                     self.add('pm', all,                                              'pm')
+                     self.add('pp', all,                                              'pp')
+                     self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
+                                     'pp', 'ms', 'mx'],                               'ct')
+                     self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
+                              'pm', 'pp', 'ms', 'mx'],                                'mi')
+                     self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'],             'mp')
+                     self.add('ms', ['mi', 'mp', 'tc'],                               'ms')
+                     self.add('mx', ['mi', 'mx', 'mp', 'tc'],                         'mx')
+                     self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
                  def parse(self):
                      """
                      Run the state machine, parse the file line by line and call process()
                      with the current matched symbol.
                      """
-                     i = 0
                      keywords = {
-                         'msgctxt': 'CT',
-                         'msgid': 'MI',
-                         'msgstr': 'MS',
-                         'msgid_plural': 'MP',
+                         'msgctxt': 'ct',
+                         'msgid': 'mi',
+                         'msgstr': 'ms',
+                         'msgid_plural': 'mp',
                      }
                      prev_keywords = {
-                         'msgid_plural': 'PP',
-                         'msgid': 'PM',
-                         'msgctxt': 'PC',
+                         'msgid_plural': 'pp',
+                         'msgid': 'pm',
+                         'msgctxt': 'pc',
                      }
+                     tokens = []
                      for line in self.fhandle:
-                         i += 1
+                         self.current_line += 1
                          line = line.strip()
                          if line == '':
                              continue
                          tokens = line.split(None, 2)
                          nb_tokens = len(tokens)
+                         if tokens[0] == '#~|':
+                             continue
                          if tokens[0] == '#~' and nb_tokens > 1:
                              line = line[3:].strip()
                              tokens = tokens[1:]
                          # msgid, msgid_plural, msgctxt & msgstr.
                          if tokens[0] in keywords and nb_tokens > 1:
                              line = line[len(tokens[0]):].lstrip()
+                             if re.search(r'([^\\]|^)"', line[1:-1]):
+                                 raise IOError('Syntax error in po file %s (line %s): '
+                                               'unescaped double quote found' %
+                                               (self.instance.fpath, self.current_line))
                              self.current_token = line
-                             self.process(keywords[tokens[0]], i)
+                             self.process(keywords[tokens[0]])
                              continue
                          self.current_token = line
-                         if tokens[0] == '#:' and nb_tokens > 1:
+                         if tokens[0] == '#:':
+                             if nb_tokens <= 1:
+                                 continue
                              # we are on a occurrences line
-                             self.process('OC', i)
+                             self.process('oc')
                          elif line[:1] == '"':
                              # we are on a continuation line
-                             self.process('MC', i)
+                             if re.search(r'([^\\]|^)"', line[1:-1]):
+                                 raise IOError('Syntax error in po file %s (line %s): '
+                                               'unescaped double quote found' %
+                                               (self.instance.fpath, self.current_line))
+                             self.process('mc')
                          elif line[:7] == 'msgstr[':
                              # we are on a msgstr plural
-                             self.process('MX', i)
+                             self.process('mx')
-                         elif tokens[0] == '#,' and nb_tokens > 1:
+                         elif tokens[0] == '#,':
+                             if nb_tokens <= 1:
+                                 continue
                              # we are on a flags line
-                             self.process('FL', i)
+                             self.process('fl')
-                         elif tokens[0] == '#':
-                             if line == '#': line += ' '
+                         elif tokens[0] == '#' or tokens[0].startswith('##'):
+                             if line == '#':
+                                 line += ' '
                              # we are on a translator comment line
-                             self.process('TC', i)
+                             self.process('tc')
-                         elif tokens[0] == '#.' and nb_tokens > 1:
+                         elif tokens[0] == '#.':
+                             if nb_tokens <= 1:
+                                 continue
                              # we are on a generated comment line
-                             self.process('GC', i)
+                             self.process('gc')
                          elif tokens[0] == '#|':
-                             if nb_tokens < 2:
-                                 self.process('??', i)
-                                 continue
+                             if nb_tokens <= 1:
+                                 raise IOError('Syntax error in po file %s (line %s)' %
+                                               (self.instance.fpath, self.current_line))
                              # Remove the marker and any whitespace right after that.
                              line = line[2:].lstrip()
                              if tokens[1].startswith('"'):
                                  # Continuation of previous metadata.
-                                 self.process('MC', i)
+                                 self.process('mc')
                                  continue
                              if nb_tokens == 2:
                                  # Invalid continuation line.
-                                 self.process('??', i)
+                                 raise IOError('Syntax error in po file %s (line %s): '
+                                               'invalid continuation line' %
+                                               (self.instance.fpath, self.current_line))
                              # we are on a "previous translation" comment line,
                              if tokens[1] not in prev_keywords:
                                  # Unknown keyword in previous translation comment.
-                                 self.process('??', i)
+                                 raise IOError('Syntax error in po file %s (line %s): '
+                                               'unknown keyword %s' %
+                                               (self.instance.fpath, self.current_line,
+                                                tokens[1]))
                              # Remove the keyword and any whitespace
                              # between it and the starting quote.
                              line = line[len(tokens[1]):].lstrip()
                              self.current_token = line
-                             self.process(prev_keywords[tokens[1]], i)
+                             self.process(prev_keywords[tokens[1]])
                          else:
-                             self.process('??', i)
+                             raise IOError('Syntax error in po file %s (line %s)' %
+                                           (self.instance.fpath, self.current_line))
-                     if self.current_entry:
+                     if self.current_entry and len(tokens) > 0 and \
+                        not tokens[0].startswith('#'):
                          # since entries are added when another entry is found, we must add
-                         # the last entry here (only if there are lines)
+                         # the last entry here (only if there are lines). Trailing comments
+                         # are ignored
                          self.instance.append(self.current_entry)
                      # before returning the instance, check if there's metadata and if
                      # so extract it in a dict
-                     firstentry = self.instance[0]
-                     if firstentry.msgid == '': # metadata found
+                     metadataentry = self.instance.find('')
+                     if metadataentry:  # metadata found
                          # remove the entry
-                         firstentry = self.instance.pop(0)
-                         self.instance.metadata_is_fuzzy = firstentry.flags
+                         self.instance.remove(metadataentry)
+                         self.instance.metadata_is_fuzzy = metadataentry.flags
                          key = None
-                         for msg in firstentry.msgstr.splitlines():
+                         for msg in metadataentry.msgstr.splitlines():
                              try:
                                  key, val = msg.split(':', 1)
                                  self.instance.metadata[key] = val.strip()
-                             except:
+                             except (ValueError, KeyError):
                                  if key is not None:
-                                     self.instance.metadata[key] += '\n'+ msg.strip()
+                                     self.instance.metadata[key] += '\n' + msg.strip()
                      # close opened file
-                     if isinstance(self.fhandle, file):
+                     if not isinstance(self.fhandle, list):  # must be file
                          self.fhandle.close()
                      return self.instance
                          the next state the fsm will have after the action.
                      """
                      for state in states:
-                         action = getattr(self, 'handle_%s' % next_state.lower())
+                         action = getattr(self, 'handle_%s' % next_state)
                          self.transitions[(symbol, state)] = (action, next_state)
-                 def process(self, symbol, linenum):
+                 def process(self, symbol):
                      """
                      Process the transition corresponding to the current state and the
                      symbol provided.
                          (action, state) = self.transitions[(symbol, self.current_state)]
                          if action():
                              self.current_state = state
-                     except Exception as exc:
-                         raise IOError('Syntax error in po file (line %s)' % linenum)
+                     except Exception:
+                         raise IOError('Syntax error in po file (line %s)' %
+                                       self.current_line)
                  # state handlers
                  def handle_tc(self):
                      """Handle a translator comment."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
+                         self.current_entry = POEntry(linenum=self.current_line)
                      if self.current_entry.tcomment != '':
                          self.current_entry.tcomment += '\n'
-                     self.current_entry.tcomment += self.current_token[2:]
+                     tcomment = self.current_token.lstrip('#')
+                     if tcomment.startswith(' '):
+                         tcomment = tcomment[1:]
+                     self.current_entry.tcomment += tcomment
                      return True
                  def handle_gc(self):
                      """Handle a generated comment."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
+                         self.current_entry = POEntry(linenum=self.current_line)
                      if self.current_entry.comment != '':
                          self.current_entry.comment += '\n'
                      self.current_entry.comment += self.current_token[3:]
                      return True
                  def handle_oc(self):
-                     """Handle a file:num occurence."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     """Handle a file:num occurrence."""
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
+                         self.current_entry = POEntry(linenum=self.current_line)
                      occurrences = self.current_token[3:].split()
                      for occurrence in occurrences:
                          if occurrence != '':
                              try:
                                  fil, line = occurrence.split(':')
                                  if not line.isdigit():
-                                     fil  = fil + line
+                                     fil = fil + line
                                      line = ''
                                  self.current_entry.occurrences.append((fil, line))
-                             except:
+                             except (ValueError, AttributeError):
                                  self.current_entry.occurrences.append((occurrence, ''))
                      return True
                  def handle_fl(self):
                      """Handle a flags line."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
-                     self.current_entry.flags += self.current_token[3:].split(', ')
+                         self.current_entry = POEntry(linenum=self.current_line)
+                     self.current_entry.flags += [c.strip() for c in
+                                                  self.current_token[3:].split(',')]
                      return True
                  def handle_pp(self):
                      """Handle a previous msgid_plural line."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
+                         self.current_entry = POEntry(linenum=self.current_line)
                      self.current_entry.previous_msgid_plural = \
                          unescape(self.current_token[1:-1])
                      return True
                  def handle_pm(self):
                      """Handle a previous msgid line."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
+                         self.current_entry = POEntry(linenum=self.current_line)
                      self.current_entry.previous_msgid = \
                          unescape(self.current_token[1:-1])
                      return True
                  def handle_pc(self):
                      """Handle a previous msgctxt line."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
+                         self.current_entry = POEntry(linenum=self.current_line)
                      self.current_entry.previous_msgctxt = \
                          unescape(self.current_token[1:-1])
                      return True
                  def handle_ct(self):
                      """Handle a msgctxt."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
+                         self.current_entry = POEntry(linenum=self.current_line)
                      self.current_entry.msgctxt = unescape(self.current_token[1:-1])
                      return True
                  def handle_mi(self):
                      """Handle a msgid."""
-                     if self.current_state in ['MC', 'MS', 'MX']:
+                     if self.current_state in ['mc', 'ms', 'mx']:
                          self.instance.append(self.current_entry)
-                         self.current_entry = POEntry()
+                         self.current_entry = POEntry(linenum=self.current_line)
                      self.current_entry.obsolete = self.entry_obsolete
                      self.current_entry.msgid = unescape(self.current_token[1:-1])
                      return True
                  def handle_mx(self):
                      """Handle a msgstr plural."""
-                     index, value = self.current_token[7], self.current_token[11:-1]
-                     self.current_entry.msgstr_plural[index] = unescape(value)
-                     self.msgstr_index = index
+                     index = self.current_token[7]
+                     value = self.current_token[self.current_token.find('"') + 1:-1]
+                     self.current_entry.msgstr_plural[int(index)] = unescape(value)
+                     self.msgstr_index = int(index)
                      return True
                  def handle_mc(self):
                      """Handle a msgid or msgstr continuation line."""
                      token = unescape(self.current_token[1:-1])
-                     if self.current_state == 'CT':
-                         typ = 'msgctxt'
+                     if self.current_state == 'ct':
                          self.current_entry.msgctxt += token
-                     elif self.current_state == 'MI':
-                         typ = 'msgid'
+                     elif self.current_state == 'mi':
                          self.current_entry.msgid += token
-                     elif self.current_state == 'MP':
-                         typ = 'msgid_plural'
+                     elif self.current_state == 'mp':
                          self.current_entry.msgid_plural += token
-                     elif self.current_state == 'MS':
-                         typ = 'msgstr'
+                     elif self.current_state == 'ms':
                          self.current_entry.msgstr += token
-                     elif self.current_state == 'MX':
-                         typ = 'msgstr[%s]' % self.msgstr_index
+                     elif self.current_state == 'mx':
                          self.current_entry.msgstr_plural[self.msgstr_index] += token
-                     elif self.current_state == 'PP':
-                         typ = 'previous_msgid_plural'
-                         token = token[3:]
+                     elif self.current_state == 'pp':
                          self.current_entry.previous_msgid_plural += token
-                     elif self.current_state == 'PM':
-                         typ = 'previous_msgid'
-                         token = token[3:]
+                     elif self.current_state == 'pm':
                          self.current_entry.previous_msgid += token
-                     elif self.current_state == 'PC':
-                         typ = 'previous_msgctxt'
-                         token = token[3:]
+                     elif self.current_state == 'pc':
                          self.current_entry.previous_msgctxt += token
                      # don't change the current state
                      return False
              # }}}
              # class _MOFileParser {{{
              class _MOFileParser(object):
                  """
                  A class to parse binary mo files.
                          file (optional, default: ``False``).
                      """
                      self.fhandle = open(mofile, 'rb')
-                     self.instance = MOFile(
+                     klass = kwargs.get('klass')
+                     if klass is None:
+                         klass = MOFile
+                     self.instance = klass(
                          fpath=mofile,
                          encoding=kwargs.get('encoding', default_encoding),
                          check_for_duplicates=kwargs.get('check_for_duplicates', False)
                      )
+                 def __del__(self):
+                     """
+                     Make sure the file is closed, this prevents warnings on unclosed file
+                     when running tests with python >= 3.2.
+                     """
+                     if self.fhandle:
+                         self.fhandle.close()
                  def parse(self):
                      """
                      Build the instance with the file handle provided in the
                      """
                      # parse magic number
                      magic_number = self._readbinary('<I', 4)
-                     if magic_number == MOFile.LITTLE_ENDIAN:
+                     if magic_number == MOFile.MAGIC:
                          ii = '<II'
-                     elif magic_number == MOFile.BIG_ENDIAN:
+                     elif magic_number == MOFile.MAGIC_SWAPPED:
                          ii = '>II'
                      else:
                          raise IOError('Invalid mo file, magic number is incorrect !')
                      self.instance.magic_number = magic_number
                      # parse the version number and the number of strings
-                     self.instance.version, numofstrings = self._readbinary(ii, 8)
+                     version, numofstrings = self._readbinary(ii, 8)
+                     # from MO file format specs: "A program seeing an unexpected major
+                     # revision number should stop reading the MO file entirely"
+                     if version not in (0, 1):
+                         raise IOError('Invalid mo file, unexpected major revision number')
+                     self.instance.version = version
                      # original strings and translation strings hash table offset
                      msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
                      # move to msgid hash table and read length and offset of msgids
                      for i in range(numofstrings):
                          msgstrs_index.append(self._readbinary(ii, 8))
                      # build entries
+                     encoding = self.instance.encoding
                      for i in range(numofstrings):
                          self.fhandle.seek(msgids_index[i][1])
                          msgid = self.fhandle.read(msgids_index[i][0])
                          self.fhandle.seek(msgstrs_index[i][1])
                          msgstr = self.fhandle.read(msgstrs_index[i][0])
-                         if i == 0: # metadata
-                             raw_metadata, metadata = msgstr.split('\n'), {}
+                         if i == 0 and not msgid:  # metadata
+                             raw_metadata, metadata = msgstr.split(b('\n')), {}
                              for line in raw_metadata:
-                                 tokens = line.split(':', 1)
-                                 if tokens[0] != '':
+                                 tokens = line.split(b(':'), 1)
+                                 if tokens[0] != b(''):
                                      try:
-                                         metadata[tokens[0]] = tokens[1].strip()
+                                         k = tokens[0].decode(encoding)
+                                         v = tokens[1].decode(encoding)
+                                         metadata[k] = v.strip()
                                      except IndexError:
-                                         metadata[tokens[0]] = ''
+                                         metadata[k] = u('')
                              self.instance.metadata = metadata
                              continue
                          # test if we have a plural entry
-                         msgid_tokens = msgid.split('\0')
+                         msgid_tokens = msgid.split(b('\0'))
                          if len(msgid_tokens) > 1:
                              entry = self._build_entry(
                                  msgid=msgid_tokens[0],
                                  msgid_plural=msgid_tokens[1],
-                                 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
+                                 msgstr_plural=dict((k, v) for k, v in
+                                                    enumerate(msgstr.split(b('\0'))))
                              )
                          else:
                              entry = self._build_entry(msgid=msgid, msgstr=msgstr)
                  def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
                                   msgstr_plural=None):
-                     msgctxt_msgid = msgid.split('\x04')
+                     msgctxt_msgid = msgid.split(b('\x04'))
+                     encoding = self.instance.encoding
                      if len(msgctxt_msgid) > 1:
                          kwargs = {
-                             'msgctxt': msgctxt_msgid[0],
-                             'msgid'  : msgctxt_msgid[1],
+                             'msgctxt': msgctxt_msgid[0].decode(encoding),
+                             'msgid': msgctxt_msgid[1].decode(encoding),
                          }
                      else:
-                         kwargs = {'msgid': msgid}
+                         kwargs = {'msgid': msgid.decode(encoding)}
                      if msgstr:
-                         kwargs['msgstr'] = msgstr
+                         kwargs['msgstr'] = msgstr.decode(encoding)
                      if msgid_plural:
-                         kwargs['msgid_plural'] = msgid_plural
+                         kwargs['msgid_plural'] = msgid_plural.decode(encoding)
                      if msgstr_plural:
+                         for k in msgstr_plural:
+                             msgstr_plural[k] = msgstr_plural[k].decode(encoding)
                          kwargs['msgstr_plural'] = msgstr_plural
                      return MOEntry(**kwargs)
                      if len(tup) == 1:
                          return tup[0]
                      return tup
+             # }}}
+             # class TextWrapper {{{
+             class TextWrapper(textwrap.TextWrapper):
+                 """
+                 Subclass of textwrap.TextWrapper that backport the
+                 drop_whitespace option.
+                 """
+                 def __init__(self, *args, **kwargs):
+                     drop_whitespace = kwargs.pop('drop_whitespace', True)
+                     textwrap.TextWrapper.__init__(self, *args, **kwargs)
+                     self.drop_whitespace = drop_whitespace
+                 def _wrap_chunks(self, chunks):
+                     """_wrap_chunks(chunks : [string]) -> [string]
+                     Wrap a sequence of text chunks and return a list of lines of
+                     length 'self.width' or less.  (If 'break_long_words' is false,
+                     some lines may be longer than this.)  Chunks correspond roughly
+                     to words and the whitespace between them: each chunk is
+                     indivisible (modulo 'break_long_words'), but a line break can
+                     come between any two chunks.  Chunks should not have internal
+                     whitespace; ie. a chunk is either all whitespace or a "word".
+                     Whitespace chunks will be removed from the beginning and end of
+                     lines, but apart from that whitespace is preserved.
+                     """
+                     lines = []
+                     if self.width <= 0:
+                         raise ValueError("invalid width %r (must be > 0)" % self.width)
+                     # Arrange in reverse order so items can be efficiently popped
+                     # from a stack of chucks.
+                     chunks.reverse()
+                     while chunks:
+                         # Start the list of chunks that will make up the current line.
+                         # cur_len is just the length of all the chunks in cur_line.
+                         cur_line = []
+                         cur_len = 0
+                         # Figure out which static string will prefix this line.
+                         if lines:
+                             indent = self.subsequent_indent
+                         else:
+                             indent = self.initial_indent
+                         # Maximum width for this line.
+                         width = self.width - len(indent)
+                         # First chunk on line is whitespace -- drop it, unless this
+                         # is the very beginning of the text (ie. no lines started yet).
+                         if self.drop_whitespace and chunks[-1].strip() == '' and lines:
+                             del chunks[-1]
+                         while chunks:
+                             l = len(chunks[-1])
+                             # Can at least squeeze this chunk onto the current line.
+                             if cur_len + l <= width:
+                                 cur_line.append(chunks.pop())
+                                 cur_len += l
+                             # Nope, this line is full.
+                             else:
+                                 break
+                         # The current line is full, and the next chunk is too big to
+                         # fit on *any* line (not just this one).
+                         if chunks and len(chunks[-1]) > width:
+                             self._handle_long_word(chunks, cur_line, cur_len, width)
+                         # If the last chunk on this line is all whitespace, drop it.
+                         if self.drop_whitespace and cur_line and not cur_line[-1].strip():
+                             del cur_line[-1]
+                         # Convert current line back to a string and store it in list
+                         # of all lines (return value).
+                         if cur_line:
+                             lines.append(indent + ''.join(cur_line))
+                     return lines
+             # }}}
+             # function wrap() {{{
+             def wrap(text, width=70, **kwargs):
+                 """
+                 Wrap a single paragraph of text, returning a list of wrapped lines.
+                 """
+                 if sys.version_info < (2, 6):
+                     return TextWrapper(width=width, **kwargs).wrap(text)
+                 return textwrap.wrap(text, width=width, **kwargs)
              # }}}

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages