polib.py
1954 lines
| 60.6 KiB
| text/x-python
|
PythonLexer
/ i18n / polib.py
Matt Mackall
|
r11432 | # no-check-code | ||
Augie Fackler
|
r40221 | # -* coding: utf-8 -*- | ||
Wagner Bruna
|
r11387 | # | ||
# License: MIT (see LICENSE file provided) | ||||
# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: | ||||
""" | ||||
Wagner Bruna
|
r15290 | **polib** allows you to manipulate, create, modify gettext files (pot, po and | ||
mo files). You can load existing files, iterate through it's entries, add, | ||||
modify entries, comments or metadata, etc. or create new po files from scratch. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and | ||
:func:`~polib.mofile` convenience functions. | ||||
Wagner Bruna
|
r11387 | """ | ||
Pulkit Goyal
|
r29485 | |||
Augie Fackler
|
r40221 | __author__ = 'David Jean Louis <izimobil@gmail.com>' | ||
__version__ = '1.0.7' | ||||
Augie Fackler
|
r43346 | __all__ = [ | ||
'pofile', | ||||
'POFile', | ||||
'POEntry', | ||||
'mofile', | ||||
'MOFile', | ||||
'MOEntry', | ||||
'default_encoding', | ||||
'escape', | ||||
'unescape', | ||||
'detect_encoding', | ||||
] | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | import array | ||
Wagner Bruna
|
r11387 | import codecs | ||
Wagner Bruna
|
r15290 | import os | ||
import re | ||||
Wagner Bruna
|
r11387 | import struct | ||
Wagner Bruna
|
r15290 | import sys | ||
Wagner Bruna
|
r11387 | import textwrap | ||
Augie Fackler
|
r40221 | |||
try: | ||||
import io | ||||
except ImportError: | ||||
# replacement of io.open() for python < 2.6 | ||||
# we use codecs instead | ||||
Gregory Szorc
|
r49801 | class io: | ||
Augie Fackler
|
r40221 | @staticmethod | ||
def open(fpath, mode='r', encoding=None): | ||||
return codecs.open(fpath, mode, encoding) | ||||
Wagner Bruna
|
r15290 | |||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | # the default encoding to use when encoding cannot be detected | ||
Wagner Bruna
|
r11387 | default_encoding = 'utf-8' | ||
Augie Fackler
|
r40221 | # python 2/3 compatibility helpers {{{ | ||
if sys.version_info[:2] < (3, 0): | ||||
PY3 = False | ||||
text_type = unicode | ||||
def b(s): | ||||
return s | ||||
def u(s): | ||||
return unicode(s, "unicode_escape") | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | else: | ||
PY3 = True | ||||
text_type = str | ||||
def b(s): | ||||
return s.encode("latin-1") | ||||
def u(s): | ||||
return s | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | # }}} | ||
Wagner Bruna
|
r15290 | # _pofile_or_mofile {{{ | ||
Wagner Bruna
|
r11387 | |||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r15290 | def _pofile_or_mofile(f, type, **kwargs): | ||
""" | ||||
Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to | ||||
honor the DRY concept. | ||||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | # get the file encoding | ||
enc = kwargs.get('encoding') | ||||
if enc is None: | ||||
enc = detect_encoding(f, type == 'mofile') | ||||
# parse the file | ||||
kls = type == 'pofile' and _POFileParser or _MOFileParser | ||||
parser = kls( | ||||
f, | ||||
Wagner Bruna
|
r11387 | encoding=enc, | ||
Augie Fackler
|
r40221 | check_for_duplicates=kwargs.get('check_for_duplicates', False), | ||
Augie Fackler
|
r43346 | klass=kwargs.get('klass'), | ||
Wagner Bruna
|
r11387 | ) | ||
instance = parser.parse() | ||||
instance.wrapwidth = kwargs.get('wrapwidth', 78) | ||||
return instance | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | # }}} | ||
# _is_file {{{ | ||||
Wagner Bruna
|
r11387 | |||
Augie Fackler
|
r40221 | |||
def _is_file(filename_or_contents): | ||||
""" | ||||
Safely returns the value of os.path.exists(filename_or_contents). | ||||
Arguments: | ||||
``filename_or_contents`` | ||||
either a filename, or a string holding the contents of some file. | ||||
In the latter case, this function will always return False. | ||||
""" | ||||
try: | ||||
return os.path.exists(filename_or_contents) | ||||
except (ValueError, UnicodeEncodeError): | ||||
return False | ||||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
Wagner Bruna
|
r15290 | # function pofile() {{{ | ||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r15290 | def pofile(pofile, **kwargs): | ||
""" | ||||
Convenience function that parses the po or pot file ``pofile`` and returns | ||||
a :class:`~polib.POFile` instance. | ||||
Arguments: | ||||
``pofile`` | ||||
string, full or relative path to the po/pot file or its content (data). | ||||
``wrapwidth`` | ||||
integer, the wrap width, only useful when the ``-w`` option was passed | ||||
to xgettext (optional, default: ``78``). | ||||
``encoding`` | ||||
string, the encoding to use (e.g. "utf-8") (default: ``None``, the | ||||
encoding will be auto-detected). | ||||
``check_for_duplicates`` | ||||
whether to check for duplicate entries when adding entries to the | ||||
file (optional, default: ``False``). | ||||
Augie Fackler
|
r40221 | |||
``klass`` | ||||
class which is used to instantiate the return value (optional, | ||||
default: ``None``, the return value with be a :class:`~polib.POFile` | ||||
instance). | ||||
Wagner Bruna
|
r15290 | """ | ||
return _pofile_or_mofile(pofile, 'pofile', **kwargs) | ||||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r15290 | # }}} | ||
Wagner Bruna
|
r11387 | # function mofile() {{{ | ||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r15290 | def mofile(mofile, **kwargs): | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | Convenience function that parses the mo file ``mofile`` and returns a | ||
:class:`~polib.MOFile` instance. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Arguments: | ||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ``mofile`` | ||
string, full or relative path to the mo file or its content (data). | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ``wrapwidth`` | ||
integer, the wrap width, only useful when the ``-w`` option was passed | ||||
to xgettext to generate the po file that was used to format the mo file | ||||
(optional, default: ``78``). | ||||
``encoding`` | ||||
string, the encoding to use (e.g. "utf-8") (default: ``None``, the | ||||
encoding will be auto-detected). | ||||
``check_for_duplicates`` | ||||
whether to check for duplicate entries when adding entries to the | ||||
file (optional, default: ``False``). | ||||
Augie Fackler
|
r40221 | |||
``klass`` | ||||
class which is used to instantiate the return value (optional, | ||||
default: ``None``, the return value with be a :class:`~polib.POFile` | ||||
instance). | ||||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | return _pofile_or_mofile(mofile, 'mofile', **kwargs) | ||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# function detect_encoding() {{{ | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r15290 | def detect_encoding(file, binary_mode=False): | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | Try to detect the encoding used by the ``file``. The ``file`` argument can | ||
be a PO or MO file path or a string containing the contents of the file. | ||||
If the encoding cannot be detected, the function will return the value of | ||||
``default_encoding``. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Arguments: | ||
``file`` | ||||
string, full or relative path to the po/mo file or its content. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ``binary_mode`` | ||
boolean, set this to True if ``file`` is a mo file. | ||||
""" | ||||
Augie Fackler
|
r40221 | PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)' | ||
rxt = re.compile(u(PATTERN)) | ||||
rxb = re.compile(b(PATTERN)) | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | def charset_exists(charset): | ||
"""Check whether ``charset`` is valid or not.""" | ||||
try: | ||||
codecs.lookup(charset) | ||||
except LookupError: | ||||
return False | ||||
return True | ||||
Augie Fackler
|
r40221 | if not _is_file(file): | ||
match = rxt.search(file) | ||||
Wagner Bruna
|
r15290 | if match: | ||
enc = match.group(1).strip() | ||||
if charset_exists(enc): | ||||
return enc | ||||
Wagner Bruna
|
r11387 | else: | ||
Augie Fackler
|
r40221 | # For PY3, always treat as binary | ||
if binary_mode or PY3: | ||||
Wagner Bruna
|
r15290 | mode = 'rb' | ||
Augie Fackler
|
r40221 | rx = rxb | ||
Wagner Bruna
|
r15290 | else: | ||
mode = 'r' | ||||
Augie Fackler
|
r40221 | rx = rxt | ||
Wagner Bruna
|
r15290 | f = open(file, mode) | ||
for l in f.readlines(): | ||||
match = rx.search(l) | ||||
if match: | ||||
f.close() | ||||
enc = match.group(1).strip() | ||||
Augie Fackler
|
r40221 | if not isinstance(enc, text_type): | ||
enc = enc.decode('utf-8') | ||||
Wagner Bruna
|
r15290 | if charset_exists(enc): | ||
return enc | ||||
f.close() | ||||
Wagner Bruna
|
r11387 | return default_encoding | ||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# function escape() {{{ | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | def escape(st): | ||
""" | ||||
Wagner Bruna
|
r15290 | Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in | ||
the given string ``st`` and returns it. | ||||
Wagner Bruna
|
r11387 | """ | ||
Augie Fackler
|
r43346 | return ( | ||
st.replace('\\', r'\\') | ||||
.replace('\t', r'\t') | ||||
.replace('\r', r'\r') | ||||
.replace('\n', r'\n') | ||||
.replace('\"', r'\"') | ||||
) | ||||
Wagner Bruna
|
r11387 | # }}} | ||
# function unescape() {{{ | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | def unescape(st): | ||
""" | ||||
Wagner Bruna
|
r15290 | Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in | ||
the given string ``st`` and returns it. | ||||
Wagner Bruna
|
r11387 | """ | ||
Augie Fackler
|
r43346 | |||
Martin Geisler
|
r11388 | def unescape_repl(m): | ||
m = m.group(1) | ||||
if m == 'n': | ||||
return '\n' | ||||
if m == 't': | ||||
return '\t' | ||||
if m == 'r': | ||||
return '\r' | ||||
if m == '\\': | ||||
return '\\' | ||||
Augie Fackler
|
r40221 | return m # handles escaped double quote | ||
Augie Fackler
|
r43346 | |||
Martin Geisler
|
r11388 | return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st) | ||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# class _BaseFile {{{ | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | class _BaseFile(list): | ||
""" | ||||
Wagner Bruna
|
r15290 | Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile` | ||
classes. This class should **not** be instanciated directly. | ||||
Wagner Bruna
|
r11387 | """ | ||
def __init__(self, *args, **kwargs): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Constructor, accepts the following keyword arguments: | ||
``pofile`` | ||||
string, the path to the po or mo file, or its content as a string. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ``wrapwidth`` | ||
integer, the wrap width, only useful when the ``-w`` option was | ||||
passed to xgettext (optional, default: ``78``). | ||||
``encoding`` | ||||
string, the encoding to use, defaults to ``default_encoding`` | ||||
global variable (optional). | ||||
``check_for_duplicates`` | ||||
whether to check for duplicate entries when adding entries to the | ||||
file, (optional, default: ``False``). | ||||
Wagner Bruna
|
r11387 | """ | ||
list.__init__(self) | ||||
# the opened file handle | ||||
Wagner Bruna
|
r15290 | pofile = kwargs.get('pofile', None) | ||
Augie Fackler
|
r40221 | if pofile and _is_file(pofile): | ||
Wagner Bruna
|
r15290 | self.fpath = pofile | ||
else: | ||||
self.fpath = kwargs.get('fpath') | ||||
Wagner Bruna
|
r11387 | # the width at which lines should be wrapped | ||
self.wrapwidth = kwargs.get('wrapwidth', 78) | ||||
# the file encoding | ||||
self.encoding = kwargs.get('encoding', default_encoding) | ||||
# whether to check for duplicate entries or not | ||||
self.check_for_duplicates = kwargs.get('check_for_duplicates', False) | ||||
# header | ||||
self.header = '' | ||||
# both po and mo files have metadata | ||||
self.metadata = {} | ||||
self.metadata_is_fuzzy = 0 | ||||
Wagner Bruna
|
r15290 | def __unicode__(self): | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | Returns the unicode representation of the file. | ||
Wagner Bruna
|
r11387 | """ | ||
ret = [] | ||||
Augie Fackler
|
r43346 | entries = [self.metadata_as_entry()] + [ | ||
e for e in self if not e.obsolete | ||||
] | ||||
Wagner Bruna
|
r11387 | for entry in entries: | ||
Wagner Bruna
|
r15290 | ret.append(entry.__unicode__(self.wrapwidth)) | ||
Wagner Bruna
|
r11387 | for entry in self.obsolete_entries(): | ||
Wagner Bruna
|
r15290 | ret.append(entry.__unicode__(self.wrapwidth)) | ||
Augie Fackler
|
r40221 | ret = u('\n').join(ret) | ||
Wagner Bruna
|
r15290 | |||
Augie Fackler
|
r40221 | assert isinstance(ret, text_type) | ||
Augie Fackler
|
r43346 | # if type(ret) != text_type: | ||
Augie Fackler
|
r40221 | # return unicode(ret, self.encoding) | ||
Wagner Bruna
|
r15290 | return ret | ||
Augie Fackler
|
r40221 | if PY3: | ||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | def __str__(self): | ||
return self.__unicode__() | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | else: | ||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | def __str__(self): | ||
""" | ||||
Returns the string representation of the file. | ||||
""" | ||||
return unicode(self).encode(self.encoding) | ||||
Wagner Bruna
|
r11387 | |||
def __contains__(self, entry): | ||||
""" | ||||
Augie Fackler
|
r40221 | Overridden ``list`` method to implement the membership test (in and | ||
Wagner Bruna
|
r15290 | not in). | ||
The method considers that an entry is in the file if it finds an entry | ||||
Augie Fackler
|
r40221 | that has the same msgid (the test is **case sensitive**) and the same | ||
msgctxt (or none for both entries). | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Argument: | ||
``entry`` | ||||
an instance of :class:`~polib._BaseEntry`. | ||||
Wagner Bruna
|
r11387 | """ | ||
Augie Fackler
|
r43346 | return ( | ||
self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) | ||||
Augie Fackler
|
r40221 | is not None | ||
Augie Fackler
|
r43346 | ) | ||
Mads Kiilerich
|
r19023 | |||
Wagner Bruna
|
r15290 | def __eq__(self, other): | ||
Augie Fackler
|
r40221 | return str(self) == str(other) | ||
Wagner Bruna
|
r11387 | |||
def append(self, entry): | ||||
""" | ||||
Augie Fackler
|
r40221 | Overridden method to check for duplicates entries, if a user tries to | ||
Wagner Bruna
|
r15290 | add an entry that is already in the file, the method will raise a | ||
``ValueError`` exception. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Argument: | ||
``entry`` | ||||
an instance of :class:`~polib._BaseEntry`. | ||||
Wagner Bruna
|
r11387 | """ | ||
if self.check_for_duplicates and entry in self: | ||||
raise ValueError('Entry "%s" already exists' % entry.msgid) | ||||
super(_BaseFile, self).append(entry) | ||||
def insert(self, index, entry): | ||||
""" | ||||
Augie Fackler
|
r40221 | Overridden method to check for duplicates entries, if a user tries to | ||
Wagner Bruna
|
r15290 | add an entry that is already in the file, the method will raise a | ||
``ValueError`` exception. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Arguments: | ||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ``index`` | ||
index at which the entry should be inserted. | ||||
``entry`` | ||||
an instance of :class:`~polib._BaseEntry`. | ||||
Wagner Bruna
|
r11387 | """ | ||
if self.check_for_duplicates and entry in self: | ||||
raise ValueError('Entry "%s" already exists' % entry.msgid) | ||||
super(_BaseFile, self).insert(index, entry) | ||||
def metadata_as_entry(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Returns the file metadata as a :class:`~polib.POFile` instance. | ||
Wagner Bruna
|
r11387 | """ | ||
e = POEntry(msgid='') | ||||
mdata = self.ordered_metadata() | ||||
if mdata: | ||||
strs = [] | ||||
for name, value in mdata: | ||||
# Strip whitespace off each line in a multi-line entry | ||||
strs.append('%s: %s' % (name, value)) | ||||
e.msgstr = '\n'.join(strs) + '\n' | ||||
if self.metadata_is_fuzzy: | ||||
e.flags.append('fuzzy') | ||||
return e | ||||
Augie Fackler
|
r40221 | def save(self, fpath=None, repr_method='__unicode__'): | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | Saves the po file to ``fpath``. | ||
If it is an existing file and no ``fpath`` is provided, then the | ||||
existing file is rewritten with the modified data. | ||||
Keyword arguments: | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ``fpath`` | ||
string, full or relative path to the file. | ||||
``repr_method`` | ||||
string, the method to use for output. | ||||
Wagner Bruna
|
r11387 | """ | ||
if self.fpath is None and fpath is None: | ||||
raise IOError('You must provide a file path to save() method') | ||||
contents = getattr(self, repr_method)() | ||||
if fpath is None: | ||||
fpath = self.fpath | ||||
if repr_method == 'to_binary': | ||||
fhandle = open(fpath, 'wb') | ||||
else: | ||||
Augie Fackler
|
r40221 | fhandle = io.open(fpath, 'w', encoding=self.encoding) | ||
if not isinstance(contents, text_type): | ||||
Wagner Bruna
|
r11387 | contents = contents.decode(self.encoding) | ||
fhandle.write(contents) | ||||
fhandle.close() | ||||
Wagner Bruna
|
r15290 | # set the file path if not set | ||
if self.fpath is None and fpath: | ||||
self.fpath = fpath | ||||
Wagner Bruna
|
r11387 | |||
Augie Fackler
|
r43346 | def find( | ||
self, st, by='msgid', include_obsolete_entries=False, msgctxt=False | ||||
): | ||||
Wagner Bruna
|
r15290 | """ | ||
Find the entry which msgid (or property identified by the ``by`` | ||||
argument) matches the string ``st``. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Keyword arguments: | ||
``st`` | ||||
string, the string to search for. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ``by`` | ||
string, the property to use for comparison (default: ``msgid``). | ||||
``include_obsolete_entries`` | ||||
boolean, whether to also search in entries that are obsolete. | ||||
``msgctxt`` | ||||
Augie Fackler
|
r40221 | string, allows specifying a specific message context for the | ||
Wagner Bruna
|
r15290 | search. | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | if include_obsolete_entries: | ||
entries = self[:] | ||||
else: | ||||
entries = [e for e in self if not e.obsolete] | ||||
for e in entries: | ||||
Wagner Bruna
|
r11387 | if getattr(e, by) == st: | ||
Augie Fackler
|
r40221 | if msgctxt is not False and e.msgctxt != msgctxt: | ||
Wagner Bruna
|
r15290 | continue | ||
Wagner Bruna
|
r11387 | return e | ||
return None | ||||
def ordered_metadata(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Convenience method that returns an ordered version of the metadata | ||
Mads Kiilerich
|
r23139 | dictionary. The return value is list of tuples (metadata name, | ||
Wagner Bruna
|
r15290 | metadata_value). | ||
Wagner Bruna
|
r11387 | """ | ||
# copy the dict first | ||||
metadata = self.metadata.copy() | ||||
data_order = [ | ||||
'Project-Id-Version', | ||||
'Report-Msgid-Bugs-To', | ||||
'POT-Creation-Date', | ||||
'PO-Revision-Date', | ||||
'Last-Translator', | ||||
'Language-Team', | ||||
'MIME-Version', | ||||
'Content-Type', | ||||
Augie Fackler
|
r40221 | 'Content-Transfer-Encoding', | ||
'Language', | ||||
Augie Fackler
|
r43346 | 'Plural-Forms', | ||
Wagner Bruna
|
r11387 | ] | ||
ordered_data = [] | ||||
for data in data_order: | ||||
try: | ||||
value = metadata.pop(data) | ||||
ordered_data.append((data, value)) | ||||
except KeyError: | ||||
pass | ||||
Wagner Bruna
|
r15290 | # the rest of the metadata will be alphabetically ordered since there | ||
# are no specs for this AFAIK | ||||
Augie Fackler
|
r40221 | for data in sorted(metadata.keys()): | ||
Wagner Bruna
|
r11387 | value = metadata[data] | ||
ordered_data.append((data, value)) | ||||
return ordered_data | ||||
def to_binary(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Return the binary representation of the file. | ||
Wagner Bruna
|
r11387 | """ | ||
offsets = [] | ||||
entries = self.translated_entries() | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | # add metadata entry | ||
Augie Fackler
|
r40221 | entries.sort(key=lambda o: o.msgctxt or o.msgid) | ||
Wagner Bruna
|
r11387 | mentry = self.metadata_as_entry() | ||
Augie Fackler
|
r43346 | # mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() | ||
Wagner Bruna
|
r11387 | entries = [mentry] + entries | ||
entries_len = len(entries) | ||||
Augie Fackler
|
r40221 | ids, strs = b(''), b('') | ||
Wagner Bruna
|
r11387 | for e in entries: | ||
# For each string, we need size and file offset. Each string is | ||||
# NUL terminated; the NUL does not count into the size. | ||||
Augie Fackler
|
r40221 | msgid = b('') | ||
Wagner Bruna
|
r15290 | if e.msgctxt: | ||
# Contexts are stored by storing the concatenation of the | ||||
# context, a <EOT> byte, and the original string | ||||
msgid = self._encode(e.msgctxt + '\4') | ||||
Wagner Bruna
|
r11387 | if e.msgid_plural: | ||
msgstr = [] | ||||
Augie Fackler
|
r40221 | for index in sorted(e.msgstr_plural.keys()): | ||
Wagner Bruna
|
r11387 | msgstr.append(e.msgstr_plural[index]) | ||
Wagner Bruna
|
r15290 | msgid += self._encode(e.msgid + '\0' + e.msgid_plural) | ||
Wagner Bruna
|
r11387 | msgstr = self._encode('\0'.join(msgstr)) | ||
else: | ||||
Wagner Bruna
|
r15290 | msgid += self._encode(e.msgid) | ||
Wagner Bruna
|
r11387 | msgstr = self._encode(e.msgstr) | ||
offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) | ||||
Augie Fackler
|
r40221 | ids += msgid + b('\0') | ||
strs += msgstr + b('\0') | ||||
Wagner Bruna
|
r15290 | |||
Wagner Bruna
|
r11387 | # The header is 7 32-bit unsigned integers. | ||
Augie Fackler
|
r40221 | keystart = 7 * 4 + 16 * entries_len | ||
Wagner Bruna
|
r11387 | # and the values start after the keys | ||
valuestart = keystart + len(ids) | ||||
koffsets = [] | ||||
voffsets = [] | ||||
# The string table first has the list of keys, then the list of values. | ||||
# Each entry has first the size of the string, then the file offset. | ||||
for o1, l1, o2, l2 in offsets: | ||||
Augie Fackler
|
r40221 | koffsets += [l1, o1 + keystart] | ||
voffsets += [l2, o2 + valuestart] | ||||
Wagner Bruna
|
r11387 | offsets = koffsets + voffsets | ||
Wagner Bruna
|
r15290 | |||
output = struct.pack( | ||||
"Iiiiiii", | ||||
Augie Fackler
|
r40221 | # Magic number | ||
MOFile.MAGIC, | ||||
# Version | ||||
0, | ||||
# number of entries | ||||
entries_len, | ||||
# start of key index | ||||
7 * 4, | ||||
# start of value index | ||||
7 * 4 + entries_len * 8, | ||||
# size and offset of hash table, we don't use hash tables | ||||
Augie Fackler
|
r43346 | 0, | ||
keystart, | ||||
Mads Kiilerich
|
r19023 | ) | ||
Augie Fackler
|
r40221 | if PY3 and sys.version_info.minor > 1: # python 3.2 or superior | ||
output += array.array("i", offsets).tobytes() | ||||
else: | ||||
output += array.array("i", offsets).tostring() | ||||
Wagner Bruna
|
r11387 | output += ids | ||
output += strs | ||||
return output | ||||
def _encode(self, mixed): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Encodes the given ``mixed`` argument with the file encoding if and | ||
only if it's an unicode string and returns the encoded string. | ||||
Wagner Bruna
|
r11387 | """ | ||
Augie Fackler
|
r40221 | if isinstance(mixed, text_type): | ||
mixed = mixed.encode(self.encoding) | ||||
Wagner Bruna
|
r11387 | return mixed | ||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# class POFile {{{ | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | class POFile(_BaseFile): | ||
Wagner Bruna
|
r15290 | """ | ||
Wagner Bruna
|
r11387 | Po (or Pot) file reader/writer. | ||
Wagner Bruna
|
r15290 | This class inherits the :class:`~polib._BaseFile` class and, by extension, | ||
the python ``list`` type. | ||||
""" | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | def __unicode__(self): | ||
""" | ||||
Returns the unicode representation of the po file. | ||||
""" | ||||
Wagner Bruna
|
r11387 | ret, headers = '', self.header.split('\n') | ||
for header in headers: | ||||
Augie Fackler
|
r40221 | if not len(header): | ||
ret += "#\n" | ||||
elif header[:1] in [',', ':']: | ||||
Wagner Bruna
|
r11387 | ret += '#%s\n' % header | ||
else: | ||||
ret += '# %s\n' % header | ||||
Wagner Bruna
|
r15290 | |||
Augie Fackler
|
r40221 | if not isinstance(ret, text_type): | ||
ret = ret.decode(self.encoding) | ||||
Wagner Bruna
|
r15290 | |||
return ret + _BaseFile.__unicode__(self) | ||||
Wagner Bruna
|
r11387 | |||
def save_as_mofile(self, fpath): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Saves the binary representation of the file to given ``fpath``. | ||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Keyword argument: | ||
``fpath`` | ||||
string, full or relative path to the mo file. | ||||
Wagner Bruna
|
r11387 | """ | ||
_BaseFile.save(self, fpath, 'to_binary') | ||||
def percent_translated(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Convenience method that returns the percentage of translated | ||
Wagner Bruna
|
r11387 | messages. | ||
""" | ||||
total = len([e for e in self if not e.obsolete]) | ||||
if total == 0: | ||||
return 100 | ||||
translated = len(self.translated_entries()) | ||||
Augie Fackler
|
r40221 | return int(translated * 100 / float(total)) | ||
Wagner Bruna
|
r11387 | |||
def translated_entries(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Convenience method that returns the list of translated entries. | ||
Wagner Bruna
|
r11387 | """ | ||
return [e for e in self if e.translated()] | ||||
def untranslated_entries(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Convenience method that returns the list of untranslated entries. | ||
Wagner Bruna
|
r11387 | """ | ||
Augie Fackler
|
r43346 | return [ | ||
e | ||||
for e in self | ||||
if not e.translated() and not e.obsolete and not 'fuzzy' in e.flags | ||||
] | ||||
Wagner Bruna
|
r11387 | |||
def fuzzy_entries(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Convenience method that returns the list of fuzzy entries. | ||
Wagner Bruna
|
r11387 | """ | ||
return [e for e in self if 'fuzzy' in e.flags] | ||||
def obsolete_entries(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Convenience method that returns the list of obsolete entries. | ||
Wagner Bruna
|
r11387 | """ | ||
return [e for e in self if e.obsolete] | ||||
def merge(self, refpot): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Convenience method that merges the current pofile with the pot file | ||
Wagner Bruna
|
r11387 | provided. It behaves exactly as the gettext msgmerge utility: | ||
Wagner Bruna
|
r15290 | * comments of this file will be preserved, but extracted comments and | ||
occurrences will be discarded; | ||||
* any translations or comments in the file will be discarded, however, | ||||
dot comments and file positions will be preserved; | ||||
* the fuzzy flags are preserved. | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Keyword argument: | ||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ``refpot`` | ||
object POFile, the reference catalog. | ||||
Wagner Bruna
|
r11387 | """ | ||
Augie Fackler
|
r40221 | # Store entries in dict/set for faster access | ||
Augie Fackler
|
r44937 | self_entries = {entry.msgid: entry for entry in self} | ||
refpot_msgids = {entry.msgid for entry in refpot} | ||||
Augie Fackler
|
r40221 | # Merge entries that are in the refpot | ||
Wagner Bruna
|
r11387 | for entry in refpot: | ||
Augie Fackler
|
r40221 | e = self_entries.get(entry.msgid) | ||
Wagner Bruna
|
r11387 | if e is None: | ||
e = POEntry() | ||||
self.append(e) | ||||
e.merge(entry) | ||||
Wagner Bruna
|
r15290 | # ok, now we must "obsolete" entries that are not in the refpot anymore | ||
Wagner Bruna
|
r11387 | for entry in self: | ||
Augie Fackler
|
r40221 | if entry.msgid not in refpot_msgids: | ||
Wagner Bruna
|
r11387 | entry.obsolete = True | ||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# class MOFile {{{ | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | class MOFile(_BaseFile): | ||
Wagner Bruna
|
r15290 | """ | ||
Wagner Bruna
|
r11387 | Mo file reader/writer. | ||
Wagner Bruna
|
r15290 | This class inherits the :class:`~polib._BaseFile` class and, by | ||
extension, the python ``list`` type. | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
MAGIC = 0x950412DE | ||||
MAGIC_SWAPPED = 0xDE120495 | ||||
Wagner Bruna
|
r11387 | |||
def __init__(self, *args, **kwargs): | ||||
""" | ||||
Mads Kiilerich
|
r19023 | Constructor, accepts all keywords arguments accepted by | ||
Wagner Bruna
|
r15290 | :class:`~polib._BaseFile` class. | ||
Wagner Bruna
|
r11387 | """ | ||
_BaseFile.__init__(self, *args, **kwargs) | ||||
self.magic_number = None | ||||
self.version = 0 | ||||
def save_as_pofile(self, fpath): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Saves the mofile as a pofile to ``fpath``. | ||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Keyword argument: | ||
``fpath`` | ||||
string, full or relative path to the file. | ||||
Wagner Bruna
|
r11387 | """ | ||
_BaseFile.save(self, fpath) | ||||
Wagner Bruna
|
r15290 | def save(self, fpath=None): | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | Saves the mofile to ``fpath``. | ||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | Keyword argument: | ||
``fpath`` | ||||
string, full or relative path to the file. | ||||
Wagner Bruna
|
r11387 | """ | ||
_BaseFile.save(self, fpath, 'to_binary') | ||||
def percent_translated(self): | ||||
""" | ||||
Convenience method to keep the same interface with POFile instances. | ||||
""" | ||||
return 100 | ||||
def translated_entries(self): | ||||
""" | ||||
Convenience method to keep the same interface with POFile instances. | ||||
""" | ||||
return self | ||||
def untranslated_entries(self): | ||||
""" | ||||
Convenience method to keep the same interface with POFile instances. | ||||
""" | ||||
return [] | ||||
def fuzzy_entries(self): | ||||
""" | ||||
Convenience method to keep the same interface with POFile instances. | ||||
""" | ||||
return [] | ||||
def obsolete_entries(self): | ||||
""" | ||||
Convenience method to keep the same interface with POFile instances. | ||||
""" | ||||
return [] | ||||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# class _BaseEntry {{{ | ||||
Augie Fackler
|
r40221 | |||
Gregory Szorc
|
r49801 | class _BaseEntry: | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes. | ||
This class should **not** be instanciated directly. | ||||
Wagner Bruna
|
r11387 | """ | ||
def __init__(self, *args, **kwargs): | ||||
Wagner Bruna
|
r15290 | """ | ||
Constructor, accepts the following keyword arguments: | ||||
``msgid`` | ||||
string, the entry msgid. | ||||
``msgstr`` | ||||
string, the entry msgstr. | ||||
``msgid_plural`` | ||||
string, the entry msgid_plural. | ||||
``msgstr_plural`` | ||||
list, the entry msgstr_plural lines. | ||||
``msgctxt`` | ||||
string, the entry context (msgctxt). | ||||
``obsolete`` | ||||
bool, whether the entry is "obsolete" or not. | ||||
``encoding`` | ||||
string, the encoding to use, defaults to ``default_encoding`` | ||||
global variable (optional). | ||||
""" | ||||
Wagner Bruna
|
r11387 | self.msgid = kwargs.get('msgid', '') | ||
self.msgstr = kwargs.get('msgstr', '') | ||||
self.msgid_plural = kwargs.get('msgid_plural', '') | ||||
self.msgstr_plural = kwargs.get('msgstr_plural', {}) | ||||
Wagner Bruna
|
r15290 | self.msgctxt = kwargs.get('msgctxt', None) | ||
Wagner Bruna
|
r11387 | self.obsolete = kwargs.get('obsolete', False) | ||
self.encoding = kwargs.get('encoding', default_encoding) | ||||
Wagner Bruna
|
r15290 | def __unicode__(self, wrapwidth=78): | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | Returns the unicode representation of the entry. | ||
Wagner Bruna
|
r11387 | """ | ||
if self.obsolete: | ||||
delflag = '#~ ' | ||||
else: | ||||
delflag = '' | ||||
ret = [] | ||||
# write the msgctxt if any | ||||
if self.msgctxt is not None: | ||||
Augie Fackler
|
r43346 | ret += self._str_field( | ||
"msgctxt", delflag, "", self.msgctxt, wrapwidth | ||||
) | ||||
Wagner Bruna
|
r11387 | # write the msgid | ||
Wagner Bruna
|
r15290 | ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth) | ||
Wagner Bruna
|
r11387 | # write the msgid_plural if any | ||
if self.msgid_plural: | ||||
Augie Fackler
|
r43346 | ret += self._str_field( | ||
"msgid_plural", delflag, "", self.msgid_plural, wrapwidth | ||||
) | ||||
Wagner Bruna
|
r11387 | if self.msgstr_plural: | ||
# write the msgstr_plural if any | ||||
msgstrs = self.msgstr_plural | ||||
keys = list(msgstrs) | ||||
keys.sort() | ||||
for index in keys: | ||||
msgstr = msgstrs[index] | ||||
plural_index = '[%s]' % index | ||||
Augie Fackler
|
r43346 | ret += self._str_field( | ||
"msgstr", delflag, plural_index, msgstr, wrapwidth | ||||
) | ||||
Wagner Bruna
|
r11387 | else: | ||
# otherwise write the msgstr | ||||
Augie Fackler
|
r43346 | ret += self._str_field( | ||
"msgstr", delflag, "", self.msgstr, wrapwidth | ||||
) | ||||
Wagner Bruna
|
r11387 | ret.append('') | ||
Augie Fackler
|
r40221 | ret = u('\n').join(ret) | ||
Wagner Bruna
|
r15290 | return ret | ||
Augie Fackler
|
r40221 | if PY3: | ||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | def __str__(self): | ||
return self.__unicode__() | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | else: | ||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | def __str__(self): | ||
""" | ||||
Returns the string representation of the entry. | ||||
""" | ||||
return unicode(self).encode(self.encoding) | ||||
Mads Kiilerich
|
r19023 | |||
Wagner Bruna
|
r15290 | def __eq__(self, other): | ||
Augie Fackler
|
r40221 | return str(self) == str(other) | ||
Wagner Bruna
|
r11387 | |||
Augie Fackler
|
r43346 | def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78): | ||
Wagner Bruna
|
r15290 | lines = field.splitlines(True) | ||
if len(lines) > 1: | ||||
Augie Fackler
|
r40221 | lines = [''] + lines # start with initial empty line | ||
Wagner Bruna
|
r11387 | else: | ||
Wagner Bruna
|
r15290 | escaped_field = escape(field) | ||
specialchars_count = 0 | ||||
for c in ['\\', '\n', '\r', '\t', '"']: | ||||
specialchars_count += field.count(c) | ||||
Mads Kiilerich
|
r19023 | # comparison must take into account fieldname length + one space | ||
Wagner Bruna
|
r15290 | # + 2 quotes (eg. msgid "<string>") | ||
flength = len(fieldname) + 3 | ||||
if plural_index: | ||||
flength += len(plural_index) | ||||
real_wrapwidth = wrapwidth - flength + specialchars_count | ||||
if wrapwidth > 0 and len(field) > real_wrapwidth: | ||||
# Wrap the line but take field name into account | ||||
Augie Fackler
|
r43346 | lines = [''] + [ | ||
unescape(item) | ||||
for item in wrap( | ||||
escaped_field, | ||||
wrapwidth - 2, # 2 for quotes "" | ||||
drop_whitespace=False, | ||||
break_long_words=False, | ||||
) | ||||
] | ||||
Wagner Bruna
|
r11387 | else: | ||
Wagner Bruna
|
r15290 | lines = [field] | ||
Wagner Bruna
|
r11387 | if fieldname.startswith('previous_'): | ||
# quick and dirty trick to get the real field name | ||||
fieldname = fieldname[9:] | ||||
Augie Fackler
|
r43346 | ret = [ | ||
'%s%s%s "%s"' | ||||
% (delflag, fieldname, plural_index, escape(lines.pop(0))) | ||||
] | ||||
Augie Fackler
|
r40221 | for line in lines: | ||
ret.append('%s"%s"' % (delflag, escape(line))) | ||||
Wagner Bruna
|
r11387 | return ret | ||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# class POEntry {{{ | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | class POEntry(_BaseEntry): | ||
""" | ||||
Represents a po file entry. | ||||
""" | ||||
def __init__(self, *args, **kwargs): | ||||
Wagner Bruna
|
r15290 | """ | ||
Constructor, accepts the following keyword arguments: | ||||
``comment`` | ||||
string, the entry comment. | ||||
``tcomment`` | ||||
string, the entry translator comment. | ||||
``occurrences`` | ||||
list, the entry occurrences. | ||||
``flags`` | ||||
list, the entry flags. | ||||
``previous_msgctxt`` | ||||
string, the entry previous context. | ||||
``previous_msgid`` | ||||
string, the entry previous msgid. | ||||
``previous_msgid_plural`` | ||||
string, the entry previous msgid_plural. | ||||
Augie Fackler
|
r40221 | |||
``linenum`` | ||||
integer, the line number of the entry | ||||
Wagner Bruna
|
r15290 | """ | ||
Wagner Bruna
|
r11387 | _BaseEntry.__init__(self, *args, **kwargs) | ||
self.comment = kwargs.get('comment', '') | ||||
self.tcomment = kwargs.get('tcomment', '') | ||||
self.occurrences = kwargs.get('occurrences', []) | ||||
self.flags = kwargs.get('flags', []) | ||||
self.previous_msgctxt = kwargs.get('previous_msgctxt', None) | ||||
self.previous_msgid = kwargs.get('previous_msgid', None) | ||||
self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None) | ||||
Augie Fackler
|
r40221 | self.linenum = kwargs.get('linenum', None) | ||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | def __unicode__(self, wrapwidth=78): | ||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | Returns the unicode representation of the entry. | ||
Wagner Bruna
|
r11387 | """ | ||
if self.obsolete: | ||||
Wagner Bruna
|
r15290 | return _BaseEntry.__unicode__(self, wrapwidth) | ||
Wagner Bruna
|
r11387 | ret = [] | ||
Wagner Bruna
|
r15290 | # comments first, if any (with text wrapping as xgettext does) | ||
comments = [('comment', '#. '), ('tcomment', '# ')] | ||||
for c in comments: | ||||
val = getattr(self, c[0]) | ||||
if val: | ||||
for comment in val.split('\n'): | ||||
if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth: | ||||
Augie Fackler
|
r40221 | ret += wrap( | ||
Wagner Bruna
|
r15290 | comment, | ||
wrapwidth, | ||||
initial_indent=c[1], | ||||
subsequent_indent=c[1], | ||||
Augie Fackler
|
r43346 | break_long_words=False, | ||
Wagner Bruna
|
r15290 | ) | ||
else: | ||||
ret.append('%s%s' % (c[1], comment)) | ||||
Wagner Bruna
|
r11387 | # occurrences (with text wrapping as xgettext does) | ||
if self.occurrences: | ||||
filelist = [] | ||||
for fpath, lineno in self.occurrences: | ||||
if lineno: | ||||
filelist.append('%s:%s' % (fpath, lineno)) | ||||
else: | ||||
filelist.append(fpath) | ||||
filestr = ' '.join(filelist) | ||||
Wagner Bruna
|
r15290 | if wrapwidth > 0 and len(filestr) + 3 > wrapwidth: | ||
Mads Kiilerich
|
r19023 | # textwrap split words that contain hyphen, this is not | ||
# what we want for filenames, so the dirty hack is to | ||||
# temporally replace hyphens with a char that a file cannot | ||||
Wagner Bruna
|
r11387 | # contain, like "*" | ||
Augie Fackler
|
r43346 | ret += [ | ||
l.replace('*', '-') | ||||
for l in wrap( | ||||
filestr.replace('-', '*'), | ||||
wrapwidth, | ||||
initial_indent='#: ', | ||||
subsequent_indent='#: ', | ||||
break_long_words=False, | ||||
) | ||||
] | ||||
Wagner Bruna
|
r11387 | else: | ||
Wagner Bruna
|
r15290 | ret.append('#: ' + filestr) | ||
# flags (TODO: wrapping ?) | ||||
Wagner Bruna
|
r11387 | if self.flags: | ||
Wagner Bruna
|
r15290 | ret.append('#, %s' % ', '.join(self.flags)) | ||
Wagner Bruna
|
r11387 | |||
# previous context and previous msgid/msgid_plural | ||||
Augie Fackler
|
r43346 | fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural'] | ||
Wagner Bruna
|
r15290 | for f in fields: | ||
val = getattr(self, f) | ||||
if val: | ||||
ret += self._str_field(f, "#| ", "", val, wrapwidth) | ||||
Wagner Bruna
|
r11387 | |||
Wagner Bruna
|
r15290 | ret.append(_BaseEntry.__unicode__(self, wrapwidth)) | ||
Augie Fackler
|
r40221 | ret = u('\n').join(ret) | ||
Wagner Bruna
|
r15290 | |||
Augie Fackler
|
r40221 | assert isinstance(ret, text_type) | ||
Augie Fackler
|
r43346 | # if type(ret) != types.UnicodeType: | ||
Augie Fackler
|
r40221 | # return unicode(ret, self.encoding) | ||
Wagner Bruna
|
r15290 | return ret | ||
Wagner Bruna
|
r11387 | |||
def __cmp__(self, other): | ||||
Wagner Bruna
|
r15290 | """ | ||
Wagner Bruna
|
r11387 | Called by comparison operations if rich comparison is not defined. | ||
Wagner Bruna
|
r15290 | """ | ||
Wagner Bruna
|
r11387 | |||
# First: Obsolete test | ||||
if self.obsolete != other.obsolete: | ||||
if self.obsolete: | ||||
return -1 | ||||
else: | ||||
return 1 | ||||
# Work on a copy to protect original | ||||
Augie Fackler
|
r40221 | occ1 = sorted(self.occurrences[:]) | ||
occ2 = sorted(other.occurrences[:]) | ||||
Wagner Bruna
|
r11387 | pos = 0 | ||
for entry1 in occ1: | ||||
try: | ||||
entry2 = occ2[pos] | ||||
except IndexError: | ||||
return 1 | ||||
pos = pos + 1 | ||||
if entry1[0] != entry2[0]: | ||||
if entry1[0] > entry2[0]: | ||||
return 1 | ||||
else: | ||||
return -1 | ||||
if entry1[1] != entry2[1]: | ||||
if entry1[1] > entry2[1]: | ||||
return 1 | ||||
else: | ||||
return -1 | ||||
Augie Fackler
|
r40221 | # Compare msgid_plural if set | ||
if self.msgid_plural: | ||||
if not other.msgid_plural: | ||||
return 1 | ||||
for pos in self.msgid_plural: | ||||
if pos not in other.msgid_plural: | ||||
return 1 | ||||
if self.msgid_plural[pos] > other.msgid_plural[pos]: | ||||
return 1 | ||||
if self.msgid_plural[pos] < other.msgid_plural[pos]: | ||||
return -1 | ||||
Wagner Bruna
|
r11387 | # Finally: Compare message ID | ||
Augie Fackler
|
r40221 | if self.msgid > other.msgid: | ||
return 1 | ||||
elif self.msgid < other.msgid: | ||||
return -1 | ||||
return 0 | ||||
def __gt__(self, other): | ||||
return self.__cmp__(other) > 0 | ||||
def __lt__(self, other): | ||||
return self.__cmp__(other) < 0 | ||||
def __ge__(self, other): | ||||
return self.__cmp__(other) >= 0 | ||||
def __le__(self, other): | ||||
return self.__cmp__(other) <= 0 | ||||
def __eq__(self, other): | ||||
return self.__cmp__(other) == 0 | ||||
def __ne__(self, other): | ||||
return self.__cmp__(other) != 0 | ||||
Wagner Bruna
|
r11387 | |||
def translated(self): | ||||
""" | ||||
Wagner Bruna
|
r15290 | Returns ``True`` if the entry has been translated or ``False`` | ||
otherwise. | ||||
Wagner Bruna
|
r11387 | """ | ||
if self.obsolete or 'fuzzy' in self.flags: | ||||
return False | ||||
if self.msgstr != '': | ||||
return True | ||||
if self.msgstr_plural: | ||||
for pos in self.msgstr_plural: | ||||
if self.msgstr_plural[pos] == '': | ||||
return False | ||||
return True | ||||
return False | ||||
def merge(self, other): | ||||
""" | ||||
Merge the current entry with the given pot entry. | ||||
""" | ||||
Wagner Bruna
|
r15290 | self.msgid = other.msgid | ||
self.msgctxt = other.msgctxt | ||||
self.occurrences = other.occurrences | ||||
self.comment = other.comment | ||||
fuzzy = 'fuzzy' in self.flags | ||||
self.flags = other.flags[:] # clone flags | ||||
if fuzzy: | ||||
self.flags.append('fuzzy') | ||||
Wagner Bruna
|
r11387 | self.msgid_plural = other.msgid_plural | ||
Wagner Bruna
|
r15290 | self.obsolete = other.obsolete | ||
self.previous_msgctxt = other.previous_msgctxt | ||||
self.previous_msgid = other.previous_msgid | ||||
self.previous_msgid_plural = other.previous_msgid_plural | ||||
Wagner Bruna
|
r11387 | if other.msgstr_plural: | ||
for pos in other.msgstr_plural: | ||||
try: | ||||
# keep existing translation at pos if any | ||||
self.msgstr_plural[pos] | ||||
except KeyError: | ||||
self.msgstr_plural[pos] = '' | ||||
Augie Fackler
|
r40221 | def __hash__(self): | ||
return hash((self.msgid, self.msgstr)) | ||||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# class MOEntry {{{ | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | class MOEntry(_BaseEntry): | ||
""" | ||||
Represents a mo file entry. | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | def __init__(self, *args, **kwargs): | ||
""" | ||||
Constructor, accepts the following keyword arguments, | ||||
for consistency with :class:`~polib.POEntry`: | ||||
``comment`` | ||||
``tcomment`` | ||||
``occurrences`` | ||||
``flags`` | ||||
``previous_msgctxt`` | ||||
``previous_msgid`` | ||||
``previous_msgid_plural`` | ||||
Note: even though these keyword arguments are accepted, | ||||
they hold no real meaning in the context of MO files | ||||
and are simply ignored. | ||||
""" | ||||
_BaseEntry.__init__(self, *args, **kwargs) | ||||
self.comment = '' | ||||
self.tcomment = '' | ||||
self.occurrences = [] | ||||
self.flags = [] | ||||
self.previous_msgctxt = None | ||||
self.previous_msgid = None | ||||
self.previous_msgid_plural = None | ||||
def __hash__(self): | ||||
return hash((self.msgid, self.msgstr)) | ||||
Wagner Bruna
|
r11387 | |||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# class _POFileParser {{{ | ||||
Augie Fackler
|
r40221 | |||
Gregory Szorc
|
r49801 | class _POFileParser: | ||
Wagner Bruna
|
r11387 | """ | ||
A finite state machine to parse efficiently and correctly po | ||||
file format. | ||||
""" | ||||
Wagner Bruna
|
r15290 | def __init__(self, pofile, *args, **kwargs): | ||
Wagner Bruna
|
r11387 | """ | ||
Constructor. | ||||
Wagner Bruna
|
r15290 | Keyword arguments: | ||
``pofile`` | ||||
string, path to the po file or its content | ||||
``encoding`` | ||||
string, the encoding to use, defaults to ``default_encoding`` | ||||
global variable (optional). | ||||
``check_for_duplicates`` | ||||
whether to check for duplicate entries when adding entries to the | ||||
file (optional, default: ``False``). | ||||
Wagner Bruna
|
r11387 | """ | ||
enc = kwargs.get('encoding', default_encoding) | ||||
Augie Fackler
|
r40221 | if _is_file(pofile): | ||
Wagner Bruna
|
r15290 | try: | ||
Augie Fackler
|
r40221 | self.fhandle = io.open(pofile, 'rt', encoding=enc) | ||
Wagner Bruna
|
r15290 | except LookupError: | ||
enc = default_encoding | ||||
Augie Fackler
|
r40221 | self.fhandle = io.open(pofile, 'rt', encoding=enc) | ||
Wagner Bruna
|
r15290 | else: | ||
self.fhandle = pofile.splitlines() | ||||
Augie Fackler
|
r40221 | klass = kwargs.get('klass') | ||
if klass is None: | ||||
klass = POFile | ||||
self.instance = klass( | ||||
Wagner Bruna
|
r15290 | pofile=pofile, | ||
Wagner Bruna
|
r11387 | encoding=enc, | ||
Augie Fackler
|
r43346 | check_for_duplicates=kwargs.get('check_for_duplicates', False), | ||
Wagner Bruna
|
r11387 | ) | ||
self.transitions = {} | ||||
Augie Fackler
|
r40221 | self.current_line = 0 | ||
self.current_entry = POEntry(linenum=self.current_line) | ||||
self.current_state = 'st' | ||||
Wagner Bruna
|
r11387 | self.current_token = None | ||
# two memo flags used in handlers | ||||
self.msgstr_index = 0 | ||||
self.entry_obsolete = 0 | ||||
# Configure the state machine, by adding transitions. | ||||
# Signification of symbols: | ||||
# * ST: Beginning of the file (start) | ||||
# * HE: Header | ||||
# * TC: a translation comment | ||||
# * GC: a generated comment | ||||
Augie Fackler
|
r40221 | # * OC: a file/line occurrence | ||
Wagner Bruna
|
r11387 | # * FL: a flags line | ||
# * CT: a message context | ||||
# * PC: a previous msgctxt | ||||
# * PM: a previous msgid | ||||
# * PP: a previous msgid_plural | ||||
# * MI: a msgid | ||||
# * MP: a msgid plural | ||||
# * MS: a msgstr | ||||
# * MX: a msgstr plural | ||||
# * MC: a msgid or msgstr continuation line | ||||
Augie Fackler
|
r43346 | all = [ | ||
'st', | ||||
'he', | ||||
'gc', | ||||
'oc', | ||||
'fl', | ||||
'ct', | ||||
'pc', | ||||
'pm', | ||||
'pp', | ||||
'tc', | ||||
'ms', | ||||
'mp', | ||||
'mx', | ||||
'mi', | ||||
] | ||||
Wagner Bruna
|
r11387 | |||
Augie Fackler
|
r43346 | self.add('tc', ['st', 'he'], 'he') | ||
self.add( | ||||
'tc', | ||||
['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mp', 'mx', 'mi'], | ||||
'tc', | ||||
) | ||||
self.add('gc', all, 'gc') | ||||
self.add('oc', all, 'oc') | ||||
self.add('fl', all, 'fl') | ||||
self.add('pc', all, 'pc') | ||||
self.add('pm', all, 'pm') | ||||
self.add('pp', all, 'pp') | ||||
self.add( | ||||
'ct', | ||||
['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mx'], | ||||
'ct', | ||||
) | ||||
self.add( | ||||
'mi', | ||||
[ | ||||
'st', | ||||
'he', | ||||
'gc', | ||||
'oc', | ||||
'fl', | ||||
'ct', | ||||
'tc', | ||||
'pc', | ||||
'pm', | ||||
'pp', | ||||
'ms', | ||||
'mx', | ||||
], | ||||
'mi', | ||||
) | ||||
self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp') | ||||
self.add('ms', ['mi', 'mp', 'tc'], 'ms') | ||||
self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx') | ||||
Augie Fackler
|
r40221 | self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc') | ||
Wagner Bruna
|
r11387 | |||
def parse(self): | ||||
""" | ||||
Run the state machine, parse the file line by line and call process() | ||||
with the current matched symbol. | ||||
""" | ||||
Wagner Bruna
|
r15290 | |||
keywords = { | ||||
Augie Fackler
|
r40221 | 'msgctxt': 'ct', | ||
'msgid': 'mi', | ||||
'msgstr': 'ms', | ||||
'msgid_plural': 'mp', | ||||
Wagner Bruna
|
r15290 | } | ||
prev_keywords = { | ||||
Augie Fackler
|
r40221 | 'msgid_plural': 'pp', | ||
'msgid': 'pm', | ||||
'msgctxt': 'pc', | ||||
Wagner Bruna
|
r15290 | } | ||
Augie Fackler
|
r40221 | tokens = [] | ||
Wagner Bruna
|
r11387 | for line in self.fhandle: | ||
Augie Fackler
|
r40221 | self.current_line += 1 | ||
Wagner Bruna
|
r11387 | line = line.strip() | ||
if line == '': | ||||
continue | ||||
Wagner Bruna
|
r15290 | |||
tokens = line.split(None, 2) | ||||
nb_tokens = len(tokens) | ||||
Augie Fackler
|
r40221 | if tokens[0] == '#~|': | ||
continue | ||||
Wagner Bruna
|
r15290 | if tokens[0] == '#~' and nb_tokens > 1: | ||
line = line[3:].strip() | ||||
tokens = tokens[1:] | ||||
nb_tokens -= 1 | ||||
Wagner Bruna
|
r11387 | self.entry_obsolete = 1 | ||
else: | ||||
self.entry_obsolete = 0 | ||||
Wagner Bruna
|
r15290 | |||
# Take care of keywords like | ||||
# msgid, msgid_plural, msgctxt & msgstr. | ||||
if tokens[0] in keywords and nb_tokens > 1: | ||||
Augie Fackler
|
r43346 | line = line[len(tokens[0]) :].lstrip() | ||
Augie Fackler
|
r40221 | if re.search(r'([^\\]|^)"', line[1:-1]): | ||
Augie Fackler
|
r43346 | raise IOError( | ||
'Syntax error in po file %s (line %s): ' | ||||
'unescaped double quote found' | ||||
% (self.instance.fpath, self.current_line) | ||||
) | ||||
Wagner Bruna
|
r15290 | self.current_token = line | ||
Augie Fackler
|
r40221 | self.process(keywords[tokens[0]]) | ||
Wagner Bruna
|
r15290 | continue | ||
Wagner Bruna
|
r11387 | self.current_token = line | ||
Wagner Bruna
|
r15290 | |||
Augie Fackler
|
r40221 | if tokens[0] == '#:': | ||
if nb_tokens <= 1: | ||||
continue | ||||
Wagner Bruna
|
r11387 | # we are on a occurrences line | ||
Augie Fackler
|
r40221 | self.process('oc') | ||
Wagner Bruna
|
r15290 | |||
elif line[:1] == '"': | ||||
# we are on a continuation line | ||||
Augie Fackler
|
r40221 | if re.search(r'([^\\]|^)"', line[1:-1]): | ||
Augie Fackler
|
r43346 | raise IOError( | ||
'Syntax error in po file %s (line %s): ' | ||||
'unescaped double quote found' | ||||
% (self.instance.fpath, self.current_line) | ||||
) | ||||
Augie Fackler
|
r40221 | self.process('mc') | ||
Wagner Bruna
|
r15290 | |||
Wagner Bruna
|
r11387 | elif line[:7] == 'msgstr[': | ||
# we are on a msgstr plural | ||||
Augie Fackler
|
r40221 | self.process('mx') | ||
Wagner Bruna
|
r15290 | |||
Augie Fackler
|
r40221 | elif tokens[0] == '#,': | ||
if nb_tokens <= 1: | ||||
continue | ||||
Wagner Bruna
|
r11387 | # we are on a flags line | ||
Augie Fackler
|
r40221 | self.process('fl') | ||
Wagner Bruna
|
r15290 | |||
Augie Fackler
|
r40221 | elif tokens[0] == '#' or tokens[0].startswith('##'): | ||
if line == '#': | ||||
line += ' ' | ||||
Wagner Bruna
|
r11387 | # we are on a translator comment line | ||
Augie Fackler
|
r40221 | self.process('tc') | ||
Wagner Bruna
|
r15290 | |||
Augie Fackler
|
r40221 | elif tokens[0] == '#.': | ||
if nb_tokens <= 1: | ||||
continue | ||||
Wagner Bruna
|
r11387 | # we are on a generated comment line | ||
Augie Fackler
|
r40221 | self.process('gc') | ||
Wagner Bruna
|
r15290 | |||
elif tokens[0] == '#|': | ||||
Augie Fackler
|
r40221 | if nb_tokens <= 1: | ||
Augie Fackler
|
r43346 | raise IOError( | ||
'Syntax error in po file %s (line %s)' | ||||
% (self.instance.fpath, self.current_line) | ||||
) | ||||
Wagner Bruna
|
r15290 | |||
# Remove the marker and any whitespace right after that. | ||||
line = line[2:].lstrip() | ||||
self.current_token = line | ||||
if tokens[1].startswith('"'): | ||||
# Continuation of previous metadata. | ||||
Augie Fackler
|
r40221 | self.process('mc') | ||
Wagner Bruna
|
r15290 | continue | ||
if nb_tokens == 2: | ||||
# Invalid continuation line. | ||||
Augie Fackler
|
r43346 | raise IOError( | ||
'Syntax error in po file %s (line %s): ' | ||||
'invalid continuation line' | ||||
% (self.instance.fpath, self.current_line) | ||||
) | ||||
Wagner Bruna
|
r15290 | |||
# we are on a "previous translation" comment line, | ||||
if tokens[1] not in prev_keywords: | ||||
# Unknown keyword in previous translation comment. | ||||
Augie Fackler
|
r43346 | raise IOError( | ||
'Syntax error in po file %s (line %s): ' | ||||
'unknown keyword %s' | ||||
% (self.instance.fpath, self.current_line, tokens[1]) | ||||
) | ||||
Wagner Bruna
|
r15290 | |||
# Remove the keyword and any whitespace | ||||
# between it and the starting quote. | ||||
Augie Fackler
|
r43346 | line = line[len(tokens[1]) :].lstrip() | ||
Wagner Bruna
|
r15290 | self.current_token = line | ||
Augie Fackler
|
r40221 | self.process(prev_keywords[tokens[1]]) | ||
Wagner Bruna
|
r15290 | |||
else: | ||||
Augie Fackler
|
r43346 | raise IOError( | ||
'Syntax error in po file %s (line %s)' | ||||
% (self.instance.fpath, self.current_line) | ||||
) | ||||
Wagner Bruna
|
r11387 | |||
Augie Fackler
|
r43346 | if ( | ||
self.current_entry | ||||
and len(tokens) > 0 | ||||
and not tokens[0].startswith('#') | ||||
): | ||||
Wagner Bruna
|
r11387 | # since entries are added when another entry is found, we must add | ||
Augie Fackler
|
r40221 | # the last entry here (only if there are lines). Trailing comments | ||
# are ignored | ||||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | |||
Mads Kiilerich
|
r19023 | # before returning the instance, check if there's metadata and if | ||
Wagner Bruna
|
r11387 | # so extract it in a dict | ||
Augie Fackler
|
r40221 | metadataentry = self.instance.find('') | ||
if metadataentry: # metadata found | ||||
Wagner Bruna
|
r11387 | # remove the entry | ||
Augie Fackler
|
r40221 | self.instance.remove(metadataentry) | ||
self.instance.metadata_is_fuzzy = metadataentry.flags | ||||
Wagner Bruna
|
r11387 | key = None | ||
Augie Fackler
|
r40221 | for msg in metadataentry.msgstr.splitlines(): | ||
Wagner Bruna
|
r11387 | try: | ||
key, val = msg.split(':', 1) | ||||
self.instance.metadata[key] = val.strip() | ||||
Augie Fackler
|
r40221 | except (ValueError, KeyError): | ||
Wagner Bruna
|
r11387 | if key is not None: | ||
Augie Fackler
|
r40221 | self.instance.metadata[key] += '\n' + msg.strip() | ||
Wagner Bruna
|
r11387 | # close opened file | ||
Augie Fackler
|
r40221 | if not isinstance(self.fhandle, list): # must be file | ||
Wagner Bruna
|
r15290 | self.fhandle.close() | ||
Wagner Bruna
|
r11387 | return self.instance | ||
def add(self, symbol, states, next_state): | ||||
""" | ||||
Add a transition to the state machine. | ||||
Wagner Bruna
|
r15290 | |||
Wagner Bruna
|
r11387 | Keywords arguments: | ||
Wagner Bruna
|
r15290 | ``symbol`` | ||
string, the matched token (two chars symbol). | ||||
``states`` | ||||
list, a list of states (two chars symbols). | ||||
``next_state`` | ||||
the next state the fsm will have after the action. | ||||
Wagner Bruna
|
r11387 | """ | ||
for state in states: | ||||
Augie Fackler
|
r40221 | action = getattr(self, 'handle_%s' % next_state) | ||
Wagner Bruna
|
r11387 | self.transitions[(symbol, state)] = (action, next_state) | ||
Augie Fackler
|
r40221 | def process(self, symbol): | ||
Wagner Bruna
|
r11387 | """ | ||
Process the transition corresponding to the current state and the | ||||
symbol provided. | ||||
Keywords arguments: | ||||
Wagner Bruna
|
r15290 | |||
``symbol`` | ||||
string, the matched token (two chars symbol). | ||||
``linenum`` | ||||
integer, the current line number of the parsed file. | ||||
Wagner Bruna
|
r11387 | """ | ||
try: | ||||
(action, state) = self.transitions[(symbol, self.current_state)] | ||||
if action(): | ||||
self.current_state = state | ||||
Augie Fackler
|
r40221 | except Exception: | ||
Augie Fackler
|
r43346 | raise IOError( | ||
'Syntax error in po file (line %s)' % self.current_line | ||||
) | ||||
Wagner Bruna
|
r11387 | |||
# state handlers | ||||
def handle_he(self): | ||||
"""Handle a header comment.""" | ||||
if self.instance.header != '': | ||||
self.instance.header += '\n' | ||||
self.instance.header += self.current_token[2:] | ||||
return 1 | ||||
def handle_tc(self): | ||||
"""Handle a translator comment.""" | ||||
Augie Fackler
|
r40221 | if self.current_state in ['mc', 'ms', 'mx']: | ||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Wagner Bruna
|
r11387 | if self.current_entry.tcomment != '': | ||
self.current_entry.tcomment += '\n' | ||||
Augie Fackler
|
r40221 | tcomment = self.current_token.lstrip('#') | ||
if tcomment.startswith(' '): | ||||
tcomment = tcomment[1:] | ||||
self.current_entry.tcomment += tcomment | ||||
Wagner Bruna
|
r11387 | return True | ||
def handle_gc(self): | ||||
"""Handle a generated comment.""" | ||||
Augie Fackler
|
r40221 | if self.current_state in ['mc', 'ms', 'mx']: | ||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Wagner Bruna
|
r11387 | if self.current_entry.comment != '': | ||
self.current_entry.comment += '\n' | ||||
self.current_entry.comment += self.current_token[3:] | ||||
return True | ||||
def handle_oc(self): | ||||
Augie Fackler
|
r40221 | """Handle a file:num occurrence.""" | ||
if self.current_state in ['mc', 'ms', 'mx']: | ||||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Wagner Bruna
|
r11387 | occurrences = self.current_token[3:].split() | ||
for occurrence in occurrences: | ||||
if occurrence != '': | ||||
try: | ||||
fil, line = occurrence.split(':') | ||||
if not line.isdigit(): | ||||
Augie Fackler
|
r40221 | fil = fil + line | ||
Wagner Bruna
|
r11387 | line = '' | ||
self.current_entry.occurrences.append((fil, line)) | ||||
Augie Fackler
|
r40221 | except (ValueError, AttributeError): | ||
Wagner Bruna
|
r11387 | self.current_entry.occurrences.append((occurrence, '')) | ||
return True | ||||
def handle_fl(self): | ||||
"""Handle a flags line.""" | ||||
Augie Fackler
|
r40221 | if self.current_state in ['mc', 'ms', 'mx']: | ||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Augie Fackler
|
r43346 | self.current_entry.flags += [ | ||
c.strip() for c in self.current_token[3:].split(',') | ||||
] | ||||
Wagner Bruna
|
r11387 | return True | ||
def handle_pp(self): | ||||
"""Handle a previous msgid_plural line.""" | ||||
Augie Fackler
|
r40221 | if self.current_state in ['mc', 'ms', 'mx']: | ||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Augie Fackler
|
r43346 | self.current_entry.previous_msgid_plural = unescape( | ||
self.current_token[1:-1] | ||||
) | ||||
Wagner Bruna
|
r11387 | return True | ||
def handle_pm(self): | ||||
"""Handle a previous msgid line.""" | ||||
Augie Fackler
|
r40221 | if self.current_state in ['mc', 'ms', 'mx']: | ||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Augie Fackler
|
r43346 | self.current_entry.previous_msgid = unescape(self.current_token[1:-1]) | ||
Wagner Bruna
|
r11387 | return True | ||
def handle_pc(self): | ||||
"""Handle a previous msgctxt line.""" | ||||
Augie Fackler
|
r40221 | if self.current_state in ['mc', 'ms', 'mx']: | ||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Augie Fackler
|
r43346 | self.current_entry.previous_msgctxt = unescape(self.current_token[1:-1]) | ||
Wagner Bruna
|
r11387 | return True | ||
def handle_ct(self): | ||||
"""Handle a msgctxt.""" | ||||
Augie Fackler
|
r40221 | if self.current_state in ['mc', 'ms', 'mx']: | ||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Wagner Bruna
|
r15290 | self.current_entry.msgctxt = unescape(self.current_token[1:-1]) | ||
Wagner Bruna
|
r11387 | return True | ||
def handle_mi(self): | ||||
"""Handle a msgid.""" | ||||
Augie Fackler
|
r40221 | if self.current_state in ['mc', 'ms', 'mx']: | ||
Wagner Bruna
|
r11387 | self.instance.append(self.current_entry) | ||
Augie Fackler
|
r40221 | self.current_entry = POEntry(linenum=self.current_line) | ||
Wagner Bruna
|
r11387 | self.current_entry.obsolete = self.entry_obsolete | ||
Wagner Bruna
|
r15290 | self.current_entry.msgid = unescape(self.current_token[1:-1]) | ||
Wagner Bruna
|
r11387 | return True | ||
def handle_mp(self): | ||||
"""Handle a msgid plural.""" | ||||
Wagner Bruna
|
r15290 | self.current_entry.msgid_plural = unescape(self.current_token[1:-1]) | ||
Wagner Bruna
|
r11387 | return True | ||
def handle_ms(self): | ||||
"""Handle a msgstr.""" | ||||
Wagner Bruna
|
r15290 | self.current_entry.msgstr = unescape(self.current_token[1:-1]) | ||
Wagner Bruna
|
r11387 | return True | ||
def handle_mx(self): | ||||
"""Handle a msgstr plural.""" | ||||
Augie Fackler
|
r40221 | index = self.current_token[7] | ||
Augie Fackler
|
r43346 | value = self.current_token[self.current_token.find('"') + 1 : -1] | ||
Augie Fackler
|
r40221 | self.current_entry.msgstr_plural[int(index)] = unescape(value) | ||
self.msgstr_index = int(index) | ||||
Wagner Bruna
|
r11387 | return True | ||
def handle_mc(self): | ||||
"""Handle a msgid or msgstr continuation line.""" | ||||
token = unescape(self.current_token[1:-1]) | ||||
Augie Fackler
|
r40221 | if self.current_state == 'ct': | ||
Wagner Bruna
|
r11387 | self.current_entry.msgctxt += token | ||
Augie Fackler
|
r40221 | elif self.current_state == 'mi': | ||
Wagner Bruna
|
r11387 | self.current_entry.msgid += token | ||
Augie Fackler
|
r40221 | elif self.current_state == 'mp': | ||
Wagner Bruna
|
r11387 | self.current_entry.msgid_plural += token | ||
Augie Fackler
|
r40221 | elif self.current_state == 'ms': | ||
Wagner Bruna
|
r11387 | self.current_entry.msgstr += token | ||
Augie Fackler
|
r40221 | elif self.current_state == 'mx': | ||
Wagner Bruna
|
r11387 | self.current_entry.msgstr_plural[self.msgstr_index] += token | ||
Augie Fackler
|
r40221 | elif self.current_state == 'pp': | ||
Wagner Bruna
|
r11387 | self.current_entry.previous_msgid_plural += token | ||
Augie Fackler
|
r40221 | elif self.current_state == 'pm': | ||
Wagner Bruna
|
r11387 | self.current_entry.previous_msgid += token | ||
Augie Fackler
|
r40221 | elif self.current_state == 'pc': | ||
Wagner Bruna
|
r11387 | self.current_entry.previous_msgctxt += token | ||
# don't change the current state | ||||
return False | ||||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||
# class _MOFileParser {{{ | ||||
Augie Fackler
|
r40221 | |||
Gregory Szorc
|
r49801 | class _MOFileParser: | ||
Wagner Bruna
|
r11387 | """ | ||
A class to parse binary mo files. | ||||
""" | ||||
Wagner Bruna
|
r15290 | def __init__(self, mofile, *args, **kwargs): | ||
Wagner Bruna
|
r11387 | """ | ||
Constructor. | ||||
Wagner Bruna
|
r15290 | Keyword arguments: | ||
``mofile`` | ||||
string, path to the mo file or its content | ||||
``encoding`` | ||||
string, the encoding to use, defaults to ``default_encoding`` | ||||
global variable (optional). | ||||
``check_for_duplicates`` | ||||
whether to check for duplicate entries when adding entries to the | ||||
file (optional, default: ``False``). | ||||
Wagner Bruna
|
r11387 | """ | ||
Wagner Bruna
|
r15290 | self.fhandle = open(mofile, 'rb') | ||
Augie Fackler
|
r40221 | |||
klass = kwargs.get('klass') | ||||
if klass is None: | ||||
klass = MOFile | ||||
self.instance = klass( | ||||
Wagner Bruna
|
r15290 | fpath=mofile, | ||
encoding=kwargs.get('encoding', default_encoding), | ||||
Augie Fackler
|
r43346 | check_for_duplicates=kwargs.get('check_for_duplicates', False), | ||
Wagner Bruna
|
r11387 | ) | ||
Augie Fackler
|
r40221 | def __del__(self): | ||
""" | ||||
Make sure the file is closed, this prevents warnings on unclosed file | ||||
when running tests with python >= 3.2. | ||||
""" | ||||
if self.fhandle: | ||||
self.fhandle.close() | ||||
Wagner Bruna
|
r11387 | def parse(self): | ||
""" | ||||
Build the instance with the file handle provided in the | ||||
constructor. | ||||
""" | ||||
Wagner Bruna
|
r15290 | # parse magic number | ||
Wagner Bruna
|
r11387 | magic_number = self._readbinary('<I', 4) | ||
Augie Fackler
|
r40221 | if magic_number == MOFile.MAGIC: | ||
Wagner Bruna
|
r11387 | ii = '<II' | ||
Augie Fackler
|
r40221 | elif magic_number == MOFile.MAGIC_SWAPPED: | ||
Wagner Bruna
|
r11387 | ii = '>II' | ||
else: | ||||
raise IOError('Invalid mo file, magic number is incorrect !') | ||||
self.instance.magic_number = magic_number | ||||
# parse the version number and the number of strings | ||||
Augie Fackler
|
r40221 | version, numofstrings = self._readbinary(ii, 8) | ||
# from MO file format specs: "A program seeing an unexpected major | ||||
# revision number should stop reading the MO file entirely" | ||||
if version not in (0, 1): | ||||
raise IOError('Invalid mo file, unexpected major revision number') | ||||
self.instance.version = version | ||||
Wagner Bruna
|
r11387 | # original strings and translation strings hash table offset | ||
msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) | ||||
# move to msgid hash table and read length and offset of msgids | ||||
self.fhandle.seek(msgids_hash_offset) | ||||
msgids_index = [] | ||||
for i in range(numofstrings): | ||||
msgids_index.append(self._readbinary(ii, 8)) | ||||
# move to msgstr hash table and read length and offset of msgstrs | ||||
self.fhandle.seek(msgstrs_hash_offset) | ||||
msgstrs_index = [] | ||||
for i in range(numofstrings): | ||||
msgstrs_index.append(self._readbinary(ii, 8)) | ||||
# build entries | ||||
Augie Fackler
|
r40221 | encoding = self.instance.encoding | ||
Wagner Bruna
|
r11387 | for i in range(numofstrings): | ||
self.fhandle.seek(msgids_index[i][1]) | ||||
msgid = self.fhandle.read(msgids_index[i][0]) | ||||
Augie Fackler
|
r40221 | |||
Wagner Bruna
|
r11387 | self.fhandle.seek(msgstrs_index[i][1]) | ||
msgstr = self.fhandle.read(msgstrs_index[i][0]) | ||||
Augie Fackler
|
r40221 | if i == 0 and not msgid: # metadata | ||
raw_metadata, metadata = msgstr.split(b('\n')), {} | ||||
Wagner Bruna
|
r11387 | for line in raw_metadata: | ||
Augie Fackler
|
r40221 | tokens = line.split(b(':'), 1) | ||
if tokens[0] != b(''): | ||||
Wagner Bruna
|
r11387 | try: | ||
Augie Fackler
|
r40221 | k = tokens[0].decode(encoding) | ||
v = tokens[1].decode(encoding) | ||||
metadata[k] = v.strip() | ||||
Wagner Bruna
|
r11387 | except IndexError: | ||
Augie Fackler
|
r40221 | metadata[k] = u('') | ||
Wagner Bruna
|
r11387 | self.instance.metadata = metadata | ||
continue | ||||
# test if we have a plural entry | ||||
Augie Fackler
|
r40221 | msgid_tokens = msgid.split(b('\0')) | ||
Wagner Bruna
|
r11387 | if len(msgid_tokens) > 1: | ||
Wagner Bruna
|
r15290 | entry = self._build_entry( | ||
Wagner Bruna
|
r11387 | msgid=msgid_tokens[0], | ||
msgid_plural=msgid_tokens[1], | ||||
Augie Fackler
|
r44937 | msgstr_plural={ | ||
k: v for k, v in enumerate(msgstr.split(b('\0'))) | ||||
}, | ||||
Wagner Bruna
|
r11387 | ) | ||
else: | ||||
Wagner Bruna
|
r15290 | entry = self._build_entry(msgid=msgid, msgstr=msgstr) | ||
Wagner Bruna
|
r11387 | self.instance.append(entry) | ||
# close opened file | ||||
self.fhandle.close() | ||||
return self.instance | ||||
Mads Kiilerich
|
r19023 | |||
Augie Fackler
|
r43346 | def _build_entry( | ||
self, msgid, msgstr=None, msgid_plural=None, msgstr_plural=None | ||||
): | ||||
Augie Fackler
|
r40221 | msgctxt_msgid = msgid.split(b('\x04')) | ||
encoding = self.instance.encoding | ||||
Wagner Bruna
|
r15290 | if len(msgctxt_msgid) > 1: | ||
kwargs = { | ||||
Augie Fackler
|
r40221 | 'msgctxt': msgctxt_msgid[0].decode(encoding), | ||
'msgid': msgctxt_msgid[1].decode(encoding), | ||||
Wagner Bruna
|
r15290 | } | ||
else: | ||||
Augie Fackler
|
r40221 | kwargs = {'msgid': msgid.decode(encoding)} | ||
Wagner Bruna
|
r15290 | if msgstr: | ||
Augie Fackler
|
r40221 | kwargs['msgstr'] = msgstr.decode(encoding) | ||
Wagner Bruna
|
r15290 | if msgid_plural: | ||
Augie Fackler
|
r40221 | kwargs['msgid_plural'] = msgid_plural.decode(encoding) | ||
Wagner Bruna
|
r15290 | if msgstr_plural: | ||
Augie Fackler
|
r40221 | for k in msgstr_plural: | ||
msgstr_plural[k] = msgstr_plural[k].decode(encoding) | ||||
Wagner Bruna
|
r15290 | kwargs['msgstr_plural'] = msgstr_plural | ||
return MOEntry(**kwargs) | ||||
Wagner Bruna
|
r11387 | |||
def _readbinary(self, fmt, numbytes): | ||||
""" | ||||
Private method that unpack n bytes of data using format <fmt>. | ||||
It returns a tuple or a mixed value if the tuple length is 1. | ||||
""" | ||||
bytes = self.fhandle.read(numbytes) | ||||
tup = struct.unpack(fmt, bytes) | ||||
if len(tup) == 1: | ||||
return tup[0] | ||||
return tup | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | # }}} | ||
# class TextWrapper {{{ | ||||
class TextWrapper(textwrap.TextWrapper): | ||||
""" | ||||
Subclass of textwrap.TextWrapper that backport the | ||||
drop_whitespace option. | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | def __init__(self, *args, **kwargs): | ||
drop_whitespace = kwargs.pop('drop_whitespace', True) | ||||
textwrap.TextWrapper.__init__(self, *args, **kwargs) | ||||
self.drop_whitespace = drop_whitespace | ||||
def _wrap_chunks(self, chunks): | ||||
"""_wrap_chunks(chunks : [string]) -> [string] | ||||
Wrap a sequence of text chunks and return a list of lines of | ||||
length 'self.width' or less. (If 'break_long_words' is false, | ||||
some lines may be longer than this.) Chunks correspond roughly | ||||
to words and the whitespace between them: each chunk is | ||||
indivisible (modulo 'break_long_words'), but a line break can | ||||
come between any two chunks. Chunks should not have internal | ||||
whitespace; ie. a chunk is either all whitespace or a "word". | ||||
Whitespace chunks will be removed from the beginning and end of | ||||
lines, but apart from that whitespace is preserved. | ||||
""" | ||||
lines = [] | ||||
if self.width <= 0: | ||||
raise ValueError("invalid width %r (must be > 0)" % self.width) | ||||
# Arrange in reverse order so items can be efficiently popped | ||||
# from a stack of chucks. | ||||
chunks.reverse() | ||||
while chunks: | ||||
# Start the list of chunks that will make up the current line. | ||||
# cur_len is just the length of all the chunks in cur_line. | ||||
cur_line = [] | ||||
cur_len = 0 | ||||
# Figure out which static string will prefix this line. | ||||
if lines: | ||||
indent = self.subsequent_indent | ||||
else: | ||||
indent = self.initial_indent | ||||
# Maximum width for this line. | ||||
width = self.width - len(indent) | ||||
# First chunk on line is whitespace -- drop it, unless this | ||||
# is the very beginning of the text (ie. no lines started yet). | ||||
if self.drop_whitespace and chunks[-1].strip() == '' and lines: | ||||
del chunks[-1] | ||||
while chunks: | ||||
l = len(chunks[-1]) | ||||
# Can at least squeeze this chunk onto the current line. | ||||
if cur_len + l <= width: | ||||
cur_line.append(chunks.pop()) | ||||
cur_len += l | ||||
# Nope, this line is full. | ||||
else: | ||||
break | ||||
# The current line is full, and the next chunk is too big to | ||||
# fit on *any* line (not just this one). | ||||
if chunks and len(chunks[-1]) > width: | ||||
self._handle_long_word(chunks, cur_line, cur_len, width) | ||||
# If the last chunk on this line is all whitespace, drop it. | ||||
if self.drop_whitespace and cur_line and not cur_line[-1].strip(): | ||||
del cur_line[-1] | ||||
# Convert current line back to a string and store it in list | ||||
# of all lines (return value). | ||||
if cur_line: | ||||
lines.append(indent + ''.join(cur_line)) | ||||
return lines | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40221 | # }}} | ||
# function wrap() {{{ | ||||
def wrap(text, width=70, **kwargs): | ||||
""" | ||||
Wrap a single paragraph of text, returning a list of wrapped lines. | ||||
""" | ||||
if sys.version_info < (2, 6): | ||||
return TextWrapper(width=width, **kwargs).wrap(text) | ||||
return textwrap.wrap(text, width=width, **kwargs) | ||||
Wagner Bruna
|
r11387 | |||
Augie Fackler
|
r43346 | |||
Wagner Bruna
|
r11387 | # }}} | ||