##// END OF EJS Templates
i18n: import polib 0.6.4 (rev 84598f2b5365)...
Wagner Bruna -
r15290:e40430fb stable
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (1425 lines changed) Show them Hide them
@@ -1,1680 +1,1639
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 # no-check-code
2 # no-check-code
3 #
3 #
4 # License: MIT (see LICENSE file provided)
4 # License: MIT (see LICENSE file provided)
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6
6
7 """
7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po
8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
9 and mo files). You can load existing files, iterate through it's entries,
9 mo files). You can load existing files, iterate through it's entries, add,
10 add, modify entries, comments or metadata, etc... or create new po files
10 modify entries, comments or metadata, etc. or create new po files from scratch.
11 from scratch.
12
13 **polib** provides a simple and pythonic API, exporting only three
14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
16 new files/entries.
17
18 **Basic example**:
19
11
20 >>> import polib
12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
21 >>> # load an existing po file
13 :func:`~polib.mofile` convenience functions.
22 >>> po = polib.pofile('tests/test_utf8.po')
23 >>> for entry in po:
24 ... # do something with entry...
25 ... pass
26 >>> # add an entry
27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
29 >>> po.append(entry)
30 >>> # to save our modified po file:
31 >>> # po.save()
32 >>> # or you may want to compile the po file
33 >>> # po.save_as_mofile('tests/test_utf8.mo')
34 """
14 """
35
15
36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
16 __author__ = 'David Jean Louis <izimobil@gmail.com>'
37 __version__ = '0.5.2'
17 __version__ = '0.6.4'
38 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
18 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
39 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
19 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
40
20
21 import array
41 import codecs
22 import codecs
23 import os
24 import re
42 import struct
25 import struct
26 import sys
43 import textwrap
27 import textwrap
44 import types
28 import types
45 import re
29
46
30
31 # the default encoding to use when encoding cannot be detected
47 default_encoding = 'utf-8'
32 default_encoding = 'utf-8'
48
33
49 # function pofile() {{{
34 # _pofile_or_mofile {{{
50
51 def pofile(fpath, **kwargs):
52 """
53 Convenience function that parse the po/pot file *fpath* and return
54 a POFile instance.
55
56 **Keyword arguments**:
57 - *fpath*: string, full or relative path to the po/pot file to parse
58 - *wrapwidth*: integer, the wrap width, only useful when -w option was
59 passed to xgettext (optional, default to 78)
60 - *autodetect_encoding*: boolean, if set to False the function will
61 not try to detect the po file encoding (optional, default to True)
62 - *encoding*: string, an encoding, only relevant if autodetect_encoding
63 is set to False
64 - *check_for_duplicates*: whether to check for duplicate entries when
65 adding entries to the file, default: False (optional)
66
67 **Example**:
68
35
69 >>> import polib
36 def _pofile_or_mofile(f, type, **kwargs):
70 >>> po = polib.pofile('tests/test_weird_occurrences.po',
37 """
71 ... check_for_duplicates=True)
38 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
72 >>> po #doctest: +ELLIPSIS
39 honor the DRY concept.
73 <POFile instance at ...>
74 >>> import os, tempfile
75 >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural',
76 ... 'msgstr_plural', 'obsolete', 'comment', 'tcomment',
77 ... 'occurrences', 'flags', 'previous_msgctxt',
78 ... 'previous_msgid', 'previous_msgid_plural')
79 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
80 ... orig_po = polib.pofile('tests/'+fname)
81 ... tmpf = tempfile.NamedTemporaryFile().name
82 ... orig_po.save(tmpf)
83 ... try:
84 ... new_po = polib.pofile(tmpf)
85 ... for old, new in zip(orig_po, new_po):
86 ... for attr in all_attrs:
87 ... if getattr(old, attr) != getattr(new, attr):
88 ... getattr(old, attr)
89 ... getattr(new, attr)
90 ... finally:
91 ... os.unlink(tmpf)
92 >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
93 >>> tmpf = tempfile.NamedTemporaryFile().name
94 >>> po_file.save_as_mofile(tmpf)
95 >>> try:
96 ... mo_file = polib.mofile(tmpf)
97 ... for old, new in zip(po_file, mo_file):
98 ... if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
99 ... 'OLD: ', po_file._encode(old.msgid)
100 ... 'NEW: ', mo_file._encode(new.msgid)
101 ... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
102 ... 'OLD: ', po_file._encode(old.msgstr)
103 ... 'NEW: ', mo_file._encode(new.msgstr)
104 ... print new.msgstr
105 ... finally:
106 ... os.unlink(tmpf)
107 """
40 """
108 if kwargs.get('autodetect_encoding', True):
41 # get the file encoding
109 enc = detect_encoding(fpath)
42 enc = kwargs.get('encoding')
110 else:
43 if enc is None:
111 enc = kwargs.get('encoding', default_encoding)
44 enc = detect_encoding(f, type == 'mofile')
112 check_for_duplicates = kwargs.get('check_for_duplicates', False)
45
113 parser = _POFileParser(
46 # parse the file
114 fpath,
47 kls = type == 'pofile' and _POFileParser or _MOFileParser
48 parser = kls(
49 f,
115 encoding=enc,
50 encoding=enc,
116 check_for_duplicates=kwargs.get('check_for_duplicates', False)
51 check_for_duplicates=kwargs.get('check_for_duplicates', False)
117 )
52 )
118 instance = parser.parse()
53 instance = parser.parse()
119 instance.wrapwidth = kwargs.get('wrapwidth', 78)
54 instance.wrapwidth = kwargs.get('wrapwidth', 78)
120 return instance
55 return instance
121
56
122 # }}}
57 # }}}
58 # function pofile() {{{
59
60 def pofile(pofile, **kwargs):
61 """
62 Convenience function that parses the po or pot file ``pofile`` and returns
63 a :class:`~polib.POFile` instance.
64
65 Arguments:
66
67 ``pofile``
68 string, full or relative path to the po/pot file or its content (data).
69
70 ``wrapwidth``
71 integer, the wrap width, only useful when the ``-w`` option was passed
72 to xgettext (optional, default: ``78``).
73
74 ``encoding``
75 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
76 encoding will be auto-detected).
77
78 ``check_for_duplicates``
79 whether to check for duplicate entries when adding entries to the
80 file (optional, default: ``False``).
81 """
82 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
83
84 # }}}
123 # function mofile() {{{
85 # function mofile() {{{
124
86
125 def mofile(fpath, **kwargs):
87 def mofile(mofile, **kwargs):
126 """
88 """
127 Convenience function that parse the mo file *fpath* and return
89 Convenience function that parses the mo file ``mofile`` and returns a
128 a MOFile instance.
90 :class:`~polib.MOFile` instance.
129
91
130 **Keyword arguments**:
92 Arguments:
131 - *fpath*: string, full or relative path to the mo file to parse
132 - *wrapwidth*: integer, the wrap width, only useful when -w option was
133 passed to xgettext to generate the po file that was used to format
134 the mo file (optional, default to 78)
135 - *autodetect_encoding*: boolean, if set to False the function will
136 not try to detect the po file encoding (optional, default to True)
137 - *encoding*: string, an encoding, only relevant if autodetect_encoding
138 is set to False
139 - *check_for_duplicates*: whether to check for duplicate entries when
140 adding entries to the file, default: False (optional)
141
93
142 **Example**:
94 ``mofile``
95 string, full or relative path to the mo file or its content (data).
143
96
144 >>> import polib
97 ``wrapwidth``
145 >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
98 integer, the wrap width, only useful when the ``-w`` option was passed
146 >>> mo #doctest: +ELLIPSIS
99 to xgettext to generate the po file that was used to format the mo file
147 <MOFile instance at ...>
100 (optional, default: ``78``).
148 >>> import os, tempfile
101
149 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
102 ``encoding``
150 ... orig_mo = polib.mofile('tests/'+fname)
103 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
151 ... tmpf = tempfile.NamedTemporaryFile().name
104 encoding will be auto-detected).
152 ... orig_mo.save(tmpf)
105
153 ... try:
106 ``check_for_duplicates``
154 ... new_mo = polib.mofile(tmpf)
107 whether to check for duplicate entries when adding entries to the
155 ... for old, new in zip(orig_mo, new_mo):
108 file (optional, default: ``False``).
156 ... if old.msgid != new.msgid:
157 ... old.msgstr
158 ... new.msgstr
159 ... finally:
160 ... os.unlink(tmpf)
161 """
109 """
162 if kwargs.get('autodetect_encoding', True):
110 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
163 enc = detect_encoding(fpath, True)
164 else:
165 enc = kwargs.get('encoding', default_encoding)
166 parser = _MOFileParser(
167 fpath,
168 encoding=enc,
169 check_for_duplicates=kwargs.get('check_for_duplicates', False)
170 )
171 instance = parser.parse()
172 instance.wrapwidth = kwargs.get('wrapwidth', 78)
173 return instance
174
111
175 # }}}
112 # }}}
176 # function detect_encoding() {{{
113 # function detect_encoding() {{{
177
114
178 def detect_encoding(fpath, binary_mode=False):
115 def detect_encoding(file, binary_mode=False):
179 """
116 """
180 Try to detect the encoding used by the file *fpath*. The function will
117 Try to detect the encoding used by the ``file``. The ``file`` argument can
181 return polib default *encoding* if it's unable to detect it.
118 be a PO or MO file path or a string containing the contents of the file.
119 If the encoding cannot be detected, the function will return the value of
120 ``default_encoding``.
182
121
183 **Keyword argument**:
122 Arguments:
184 - *fpath*: string, full or relative path to the mo file to parse.
185
123
186 **Examples**:
124 ``file``
125 string, full or relative path to the po/mo file or its content.
187
126
188 >>> print(detect_encoding('tests/test_noencoding.po'))
127 ``binary_mode``
189 utf-8
128 boolean, set this to True if ``file`` is a mo file.
190 >>> print(detect_encoding('tests/test_utf8.po'))
191 UTF-8
192 >>> print(detect_encoding('tests/test_utf8.mo', True))
193 UTF-8
194 >>> print(detect_encoding('tests/test_iso-8859-15.po'))
195 ISO_8859-15
196 >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
197 ISO_8859-15
198 """
129 """
199 import re
200 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
130 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
131
132 def charset_exists(charset):
133 """Check whether ``charset`` is valid or not."""
134 try:
135 codecs.lookup(charset)
136 except LookupError:
137 return False
138 return True
139
140 if not os.path.exists(file):
141 match = rx.search(file)
142 if match:
143 enc = match.group(1).strip()
144 if charset_exists(enc):
145 return enc
146 else:
201 if binary_mode:
147 if binary_mode:
202 mode = 'rb'
148 mode = 'rb'
203 else:
149 else:
204 mode = 'r'
150 mode = 'r'
205 f = open(fpath, mode)
151 f = open(file, mode)
206 for l in f.readlines():
152 for l in f.readlines():
207 match = rx.search(l)
153 match = rx.search(l)
208 if match:
154 if match:
209 f.close()
155 f.close()
210 return match.group(1).strip()
156 enc = match.group(1).strip()
157 if charset_exists(enc):
158 return enc
211 f.close()
159 f.close()
212 return default_encoding
160 return default_encoding
213
161
214 # }}}
162 # }}}
215 # function escape() {{{
163 # function escape() {{{
216
164
217 def escape(st):
165 def escape(st):
218 """
166 """
219 Escape special chars and return the given string *st*.
167 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
220
168 the given string ``st`` and returns it.
221 **Examples**:
222
223 >>> escape('\\t and \\n and \\r and " and \\\\')
224 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
225 """
169 """
226 return st.replace('\\', r'\\')\
170 return st.replace('\\', r'\\')\
227 .replace('\t', r'\t')\
171 .replace('\t', r'\t')\
228 .replace('\r', r'\r')\
172 .replace('\r', r'\r')\
229 .replace('\n', r'\n')\
173 .replace('\n', r'\n')\
230 .replace('\"', r'\"')
174 .replace('\"', r'\"')
231
175
232 # }}}
176 # }}}
233 # function unescape() {{{
177 # function unescape() {{{
234
178
235 def unescape(st):
179 def unescape(st):
236 """
180 """
237 Unescape special chars and return the given string *st*.
181 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
238
182 the given string ``st`` and returns it.
239 **Examples**:
240
241 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
242 '\\t and \\n and \\r and " and \\\\'
243 >>> unescape(r'\\n')
244 '\\n'
245 >>> unescape(r'\\\\n')
246 '\\\\n'
247 >>> unescape(r'\\\\n\\n')
248 '\\\\n\\n'
249 """
183 """
250 def unescape_repl(m):
184 def unescape_repl(m):
251 m = m.group(1)
185 m = m.group(1)
252 if m == 'n':
186 if m == 'n':
253 return '\n'
187 return '\n'
254 if m == 't':
188 if m == 't':
255 return '\t'
189 return '\t'
256 if m == 'r':
190 if m == 'r':
257 return '\r'
191 return '\r'
258 if m == '\\':
192 if m == '\\':
259 return '\\'
193 return '\\'
260 return m # handles escaped double quote
194 return m # handles escaped double quote
261 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
195 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
262
196
263 # }}}
197 # }}}
264 # class _BaseFile {{{
198 # class _BaseFile {{{
265
199
266 class _BaseFile(list):
200 class _BaseFile(list):
267 """
201 """
268 Common parent class for POFile and MOFile classes.
202 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
269 This class must **not** be instanciated directly.
203 classes. This class should **not** be instanciated directly.
270 """
204 """
271
205
272 def __init__(self, *args, **kwargs):
206 def __init__(self, *args, **kwargs):
273 """
207 """
274 Constructor.
208 Constructor, accepts the following keyword arguments:
209
210 ``pofile``
211 string, the path to the po or mo file, or its content as a string.
275
212
276 **Keyword arguments**:
213 ``wrapwidth``
277 - *fpath*: string, path to po or mo file
214 integer, the wrap width, only useful when the ``-w`` option was
278 - *wrapwidth*: integer, the wrap width, only useful when -w option
215 passed to xgettext (optional, default: ``78``).
279 was passed to xgettext to generate the po file that was used to
216
280 format the mo file, default to 78 (optional),
217 ``encoding``
281 - *encoding*: string, the encoding to use, defaults to
218 string, the encoding to use, defaults to ``default_encoding``
282 "default_encoding" global variable (optional),
219 global variable (optional).
283 - *check_for_duplicates*: whether to check for duplicate entries
220
284 when adding entries to the file, default: False (optional).
221 ``check_for_duplicates``
222 whether to check for duplicate entries when adding entries to the
223 file, (optional, default: ``False``).
285 """
224 """
286 list.__init__(self)
225 list.__init__(self)
287 # the opened file handle
226 # the opened file handle
227 pofile = kwargs.get('pofile', None)
228 if pofile and os.path.exists(pofile):
229 self.fpath = pofile
230 else:
288 self.fpath = kwargs.get('fpath')
231 self.fpath = kwargs.get('fpath')
289 # the width at which lines should be wrapped
232 # the width at which lines should be wrapped
290 self.wrapwidth = kwargs.get('wrapwidth', 78)
233 self.wrapwidth = kwargs.get('wrapwidth', 78)
291 # the file encoding
234 # the file encoding
292 self.encoding = kwargs.get('encoding', default_encoding)
235 self.encoding = kwargs.get('encoding', default_encoding)
293 # whether to check for duplicate entries or not
236 # whether to check for duplicate entries or not
294 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
237 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
295 # header
238 # header
296 self.header = ''
239 self.header = ''
297 # both po and mo files have metadata
240 # both po and mo files have metadata
298 self.metadata = {}
241 self.metadata = {}
299 self.metadata_is_fuzzy = 0
242 self.metadata_is_fuzzy = 0
300
243
301 def __str__(self):
244 def __unicode__(self):
302 """
245 """
303 String representation of the file.
246 Returns the unicode representation of the file.
304 """
247 """
305 ret = []
248 ret = []
306 entries = [self.metadata_as_entry()] + \
249 entries = [self.metadata_as_entry()] + \
307 [e for e in self if not e.obsolete]
250 [e for e in self if not e.obsolete]
308 for entry in entries:
251 for entry in entries:
309 ret.append(entry.__str__(self.wrapwidth))
252 ret.append(entry.__unicode__(self.wrapwidth))
310 for entry in self.obsolete_entries():
253 for entry in self.obsolete_entries():
311 ret.append(entry.__str__(self.wrapwidth))
254 ret.append(entry.__unicode__(self.wrapwidth))
312 return '\n'.join(ret)
255 ret = '\n'.join(ret)
256
257 if type(ret) != types.UnicodeType:
258 return unicode(ret, self.encoding)
259 return ret
260
261 def __str__(self):
262 """
263 Returns the string representation of the file.
264 """
265 return unicode(self).encode(self.encoding)
313
266
314 def __contains__(self, entry):
267 def __contains__(self, entry):
315 """
268 """
316 Overriden method to implement the membership test (in and not in).
269 Overriden ``list`` method to implement the membership test (in and
317 The method considers that an entry is in the file if it finds an
270 not in).
318 entry that has the same msgid (case sensitive).
271 The method considers that an entry is in the file if it finds an entry
319
272 that has the same msgid (the test is **case sensitive**).
320 **Keyword argument**:
321 - *entry*: an instance of polib._BaseEntry
322
273
323 **Tests**:
274 Argument:
324 >>> po = POFile()
275
325 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
276 ``entry``
326 >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
277 an instance of :class:`~polib._BaseEntry`.
327 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
328 >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
329 >>> po.append(e1)
330 >>> po.append(e2)
331 >>> e1 in po
332 True
333 >>> e2 not in po
334 False
335 >>> e3 in po
336 True
337 >>> e4 in po
338 False
339 """
278 """
340 return self.find(entry.msgid, by='msgid') is not None
279 return self.find(entry.msgid, by='msgid') is not None
341
280
281 def __eq__(self, other):
282 return unicode(self) == unicode(other)
283
342 def append(self, entry):
284 def append(self, entry):
343 """
285 """
344 Overriden method to check for duplicates entries, if a user tries to
286 Overriden method to check for duplicates entries, if a user tries to
345 add an entry that already exists, the method will raise a ValueError
287 add an entry that is already in the file, the method will raise a
346 exception.
288 ``ValueError`` exception.
347
348 **Keyword argument**:
349 - *entry*: an instance of polib._BaseEntry
350
289
351 **Tests**:
290 Argument:
352 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
291
353 >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
292 ``entry``
354 >>> po = POFile(check_for_duplicates=True)
293 an instance of :class:`~polib._BaseEntry`.
355 >>> po.append(e1)
356 >>> try:
357 ... po.append(e2)
358 ... except ValueError, e:
359 ... unicode(e)
360 u'Entry "foobar" already exists'
361 """
294 """
362 if self.check_for_duplicates and entry in self:
295 if self.check_for_duplicates and entry in self:
363 raise ValueError('Entry "%s" already exists' % entry.msgid)
296 raise ValueError('Entry "%s" already exists' % entry.msgid)
364 super(_BaseFile, self).append(entry)
297 super(_BaseFile, self).append(entry)
365
298
366 def insert(self, index, entry):
299 def insert(self, index, entry):
367 """
300 """
368 Overriden method to check for duplicates entries, if a user tries to
301 Overriden method to check for duplicates entries, if a user tries to
369 insert an entry that already exists, the method will raise a ValueError
302 add an entry that is already in the file, the method will raise a
370 exception.
303 ``ValueError`` exception.
371
304
372 **Keyword arguments**:
305 Arguments:
373 - *index*: index at which the entry should be inserted
374 - *entry*: an instance of polib._BaseEntry
375
306
376 **Tests**:
307 ``index``
377 >>> import polib
308 index at which the entry should be inserted.
378 >>> polib.check_for_duplicates = True
309
379 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
310 ``entry``
380 >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
311 an instance of :class:`~polib._BaseEntry`.
381 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
382 >>> po = POFile(check_for_duplicates=True)
383 >>> po.insert(0, e1)
384 >>> po.insert(1, e2)
385 >>> try:
386 ... po.insert(0, e3)
387 ... except ValueError, e:
388 ... unicode(e)
389 u'Entry "foobar" already exists'
390 """
312 """
391 if self.check_for_duplicates and entry in self:
313 if self.check_for_duplicates and entry in self:
392 raise ValueError('Entry "%s" already exists' % entry.msgid)
314 raise ValueError('Entry "%s" already exists' % entry.msgid)
393 super(_BaseFile, self).insert(index, entry)
315 super(_BaseFile, self).insert(index, entry)
394
316
395 def __repr__(self):
396 """Return the official string representation of the object."""
397 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
398
399 def metadata_as_entry(self):
317 def metadata_as_entry(self):
400 """
318 """
401 Return the metadata as an entry:
319 Returns the file metadata as a :class:`~polib.POFile` instance.
402
403 >>> import polib
404 >>> po = polib.pofile('tests/test_fuzzy_header.po')
405 >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
406 True
407 """
320 """
408 e = POEntry(msgid='')
321 e = POEntry(msgid='')
409 mdata = self.ordered_metadata()
322 mdata = self.ordered_metadata()
410 if mdata:
323 if mdata:
411 strs = []
324 strs = []
412 e._multiline_str['msgstr'] = ''
413 for name, value in mdata:
325 for name, value in mdata:
414 # Strip whitespace off each line in a multi-line entry
326 # Strip whitespace off each line in a multi-line entry
415 strs.append('%s: %s' % (name, value))
327 strs.append('%s: %s' % (name, value))
416 e.msgstr = '\n'.join(strs) + '\n'
328 e.msgstr = '\n'.join(strs) + '\n'
417 e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
418 [s + '\n' for s in strs])
419 if self.metadata_is_fuzzy:
329 if self.metadata_is_fuzzy:
420 e.flags.append('fuzzy')
330 e.flags.append('fuzzy')
421 return e
331 return e
422
332
423 def save(self, fpath=None, repr_method='__str__'):
333 def save(self, fpath=None, repr_method='__str__'):
424 """
334 """
425 Save the po file to file *fpath* if no file handle exists for
335 Saves the po file to ``fpath``.
426 the object. If there's already an open file and no fpath is
336 If it is an existing file and no ``fpath`` is provided, then the
427 provided, then the existing file is rewritten with the modified
337 existing file is rewritten with the modified data.
428 data.
338
339 Keyword arguments:
429
340
430 **Keyword arguments**:
341 ``fpath``
431 - *fpath*: string, full or relative path to the file.
342 string, full or relative path to the file.
432 - *repr_method*: string, the method to use for output.
343
344 ``repr_method``
345 string, the method to use for output.
433 """
346 """
434 if self.fpath is None and fpath is None:
347 if self.fpath is None and fpath is None:
435 raise IOError('You must provide a file path to save() method')
348 raise IOError('You must provide a file path to save() method')
436 contents = getattr(self, repr_method)()
349 contents = getattr(self, repr_method)()
437 if fpath is None:
350 if fpath is None:
438 fpath = self.fpath
351 fpath = self.fpath
439 if repr_method == 'to_binary':
352 if repr_method == 'to_binary':
440 fhandle = open(fpath, 'wb')
353 fhandle = open(fpath, 'wb')
441 else:
354 else:
442 fhandle = codecs.open(fpath, 'w', self.encoding)
355 fhandle = codecs.open(fpath, 'w', self.encoding)
443 if type(contents) != types.UnicodeType:
356 if type(contents) != types.UnicodeType:
444 contents = contents.decode(self.encoding)
357 contents = contents.decode(self.encoding)
445 fhandle.write(contents)
358 fhandle.write(contents)
446 fhandle.close()
359 fhandle.close()
447
360 # set the file path if not set
448 def find(self, st, by='msgid'):
361 if self.fpath is None and fpath:
449 """
362 self.fpath = fpath
450 Find entry which msgid (or property identified by the *by*
451 attribute) matches the string *st*.
452
363
453 **Keyword arguments**:
364 def find(self, st, by='msgid', include_obsolete_entries=False,
454 - *st*: string, the string to search for
365 msgctxt=False):
455 - *by*: string, the comparison attribute
366 """
367 Find the entry which msgid (or property identified by the ``by``
368 argument) matches the string ``st``.
456
369
457 **Examples**:
370 Keyword arguments:
371
372 ``st``
373 string, the string to search for.
458
374
459 >>> po = pofile('tests/test_utf8.po')
375 ``by``
460 >>> entry = po.find('Thursday')
376 string, the property to use for comparison (default: ``msgid``).
461 >>> entry.msgstr
377
462 u'Jueves'
378 ``include_obsolete_entries``
463 >>> entry = po.find('Some unexistant msgid')
379 boolean, whether to also search in entries that are obsolete.
464 >>> entry is None
380
465 True
381 ``msgctxt``
466 >>> entry = po.find('Jueves', 'msgstr')
382 string, allows to specify a specific message context for the
467 >>> entry.msgid
383 search.
468 u'Thursday'
469 """
384 """
470 for e in self:
385 if include_obsolete_entries:
386 entries = self[:]
387 else:
388 entries = [e for e in self if not e.obsolete]
389 for e in entries:
471 if getattr(e, by) == st:
390 if getattr(e, by) == st:
391 if msgctxt and e.msgctxt != msgctxt:
392 continue
472 return e
393 return e
473 return None
394 return None
474
395
475 def ordered_metadata(self):
396 def ordered_metadata(self):
476 """
397 """
477 Convenience method that return the metadata ordered. The return
398 Convenience method that returns an ordered version of the metadata
478 value is list of tuples (metadata name, metadata_value).
399 dictionnary. The return value is list of tuples (metadata name,
400 metadata_value).
479 """
401 """
480 # copy the dict first
402 # copy the dict first
481 metadata = self.metadata.copy()
403 metadata = self.metadata.copy()
482 data_order = [
404 data_order = [
483 'Project-Id-Version',
405 'Project-Id-Version',
484 'Report-Msgid-Bugs-To',
406 'Report-Msgid-Bugs-To',
485 'POT-Creation-Date',
407 'POT-Creation-Date',
486 'PO-Revision-Date',
408 'PO-Revision-Date',
487 'Last-Translator',
409 'Last-Translator',
488 'Language-Team',
410 'Language-Team',
489 'MIME-Version',
411 'MIME-Version',
490 'Content-Type',
412 'Content-Type',
491 'Content-Transfer-Encoding'
413 'Content-Transfer-Encoding'
492 ]
414 ]
493 ordered_data = []
415 ordered_data = []
494 for data in data_order:
416 for data in data_order:
495 try:
417 try:
496 value = metadata.pop(data)
418 value = metadata.pop(data)
497 ordered_data.append((data, value))
419 ordered_data.append((data, value))
498 except KeyError:
420 except KeyError:
499 pass
421 pass
500 # the rest of the metadata won't be ordered there are no specs for this
422 # the rest of the metadata will be alphabetically ordered since there
423 # are no specs for this AFAIK
501 keys = metadata.keys()
424 keys = metadata.keys()
502 list(keys).sort()
425 keys.sort()
503 for data in keys:
426 for data in keys:
504 value = metadata[data]
427 value = metadata[data]
505 ordered_data.append((data, value))
428 ordered_data.append((data, value))
506 return ordered_data
429 return ordered_data
507
430
508 def to_binary(self):
431 def to_binary(self):
509 """
432 """
510 Return the mofile binary representation.
433 Return the binary representation of the file.
511 """
434 """
512 import array
513 import struct
514 import types
515 offsets = []
435 offsets = []
516 entries = self.translated_entries()
436 entries = self.translated_entries()
517 # the keys are sorted in the .mo file
437 # the keys are sorted in the .mo file
518 def cmp(_self, other):
438 def cmp(_self, other):
519 if _self.msgid > other.msgid:
439 # msgfmt compares entries with msgctxt if it exists
440 self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
441 other_msgid = other.msgctxt and other.msgctxt or other.msgid
442 if self_msgid > other_msgid:
520 return 1
443 return 1
521 elif _self.msgid < other.msgid:
444 elif self_msgid < other_msgid:
522 return -1
445 return -1
523 else:
446 else:
524 return 0
447 return 0
525 # add metadata entry
448 # add metadata entry
526 entries.sort(cmp)
449 entries.sort(cmp)
527 mentry = self.metadata_as_entry()
450 mentry = self.metadata_as_entry()
528 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
451 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
529 entries = [mentry] + entries
452 entries = [mentry] + entries
530 entries_len = len(entries)
453 entries_len = len(entries)
531 ids, strs = '', ''
454 ids, strs = '', ''
532 for e in entries:
455 for e in entries:
533 # For each string, we need size and file offset. Each string is
456 # For each string, we need size and file offset. Each string is
534 # NUL terminated; the NUL does not count into the size.
457 # NUL terminated; the NUL does not count into the size.
458 msgid = ''
459 if e.msgctxt:
460 # Contexts are stored by storing the concatenation of the
461 # context, a <EOT> byte, and the original string
462 msgid = self._encode(e.msgctxt + '\4')
535 if e.msgid_plural:
463 if e.msgid_plural:
536 indexes = e.msgstr_plural.keys()
464 indexes = e.msgstr_plural.keys()
537 indexes.sort()
465 indexes.sort()
538 msgstr = []
466 msgstr = []
539 for index in indexes:
467 for index in indexes:
540 msgstr.append(e.msgstr_plural[index])
468 msgstr.append(e.msgstr_plural[index])
541 msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
469 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
542 msgstr = self._encode('\0'.join(msgstr))
470 msgstr = self._encode('\0'.join(msgstr))
543 else:
471 else:
544 msgid = self._encode(e.msgid)
472 msgid += self._encode(e.msgid)
545 msgstr = self._encode(e.msgstr)
473 msgstr = self._encode(e.msgstr)
546 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
474 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
547 ids += msgid + '\0'
475 ids += msgid + '\0'
548 strs += msgstr + '\0'
476 strs += msgstr + '\0'
477
549 # The header is 7 32-bit unsigned integers.
478 # The header is 7 32-bit unsigned integers.
550 keystart = 7*4+16*entries_len
479 keystart = 7*4+16*entries_len
551 # and the values start after the keys
480 # and the values start after the keys
552 valuestart = keystart + len(ids)
481 valuestart = keystart + len(ids)
553 koffsets = []
482 koffsets = []
554 voffsets = []
483 voffsets = []
555 # The string table first has the list of keys, then the list of values.
484 # The string table first has the list of keys, then the list of values.
556 # Each entry has first the size of the string, then the file offset.
485 # Each entry has first the size of the string, then the file offset.
557 for o1, l1, o2, l2 in offsets:
486 for o1, l1, o2, l2 in offsets:
558 koffsets += [l1, o1+keystart]
487 koffsets += [l1, o1+keystart]
559 voffsets += [l2, o2+valuestart]
488 voffsets += [l2, o2+valuestart]
560 offsets = koffsets + voffsets
489 offsets = koffsets + voffsets
561 output = struct.pack("IIIIIII",
490 # check endianness for magic number
562 0x950412de, # Magic number
491 if struct.pack('@h', 1) == struct.pack('<h', 1):
492 magic_number = MOFile.LITTLE_ENDIAN
493 else:
494 magic_number = MOFile.BIG_ENDIAN
495
496 output = struct.pack(
497 "Iiiiiii",
498 magic_number, # Magic number
563 0, # Version
499 0, # Version
564 entries_len, # # of entries
500 entries_len, # # of entries
565 7*4, # start of key index
501 7*4, # start of key index
566 7*4+entries_len*8, # start of value index
502 7*4+entries_len*8, # start of value index
567 0, 0) # size and offset of hash table
503 0, keystart # size and offset of hash table
568 output += array.array("I", offsets).tostring()
504 # Important: we don't use hash tables
505 )
506 output += array.array("i", offsets).tostring()
569 output += ids
507 output += ids
570 output += strs
508 output += strs
571 return output
509 return output
572
510
573 def _encode(self, mixed):
511 def _encode(self, mixed):
574 """
512 """
575 Encode the given argument with the file encoding if the type is unicode
513 Encodes the given ``mixed`` argument with the file encoding if and
576 and return the encoded string.
514 only if it's an unicode string and returns the encoded string.
577 """
515 """
578 if type(mixed) == types.UnicodeType:
516 if type(mixed) == types.UnicodeType:
579 return mixed.encode(self.encoding)
517 return mixed.encode(self.encoding)
580 return mixed
518 return mixed
581
519
582 # }}}
520 # }}}
583 # class POFile {{{
521 # class POFile {{{
584
522
585 class POFile(_BaseFile):
523 class POFile(_BaseFile):
586 '''
524 """
587 Po (or Pot) file reader/writer.
525 Po (or Pot) file reader/writer.
588 POFile objects inherit the list objects methods.
526 This class inherits the :class:`~polib._BaseFile` class and, by extension,
589
527 the python ``list`` type.
590 **Example**:
528 """
591
529
592 >>> po = POFile()
530 def __unicode__(self):
593 >>> entry1 = POEntry(
531 """
594 ... msgid="Some english text",
532 Returns the unicode representation of the po file.
595 ... msgstr="Un texte en anglais"
533 """
596 ... )
597 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
598 >>> entry1.comment = "Some useful comment"
599 >>> entry2 = POEntry(
600 ... msgid="Peace in some languages",
601 ... msgstr="Pace سلام שלום Hasîtî 和平"
602 ... )
603 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
604 >>> entry2.comment = "Another useful comment"
605 >>> entry3 = POEntry(
606 ... msgid='Some entry with quotes " \\"',
607 ... msgstr='Un message unicode avec des quotes " \\"'
608 ... )
609 >>> entry3.comment = "Test string quoting"
610 >>> po.append(entry1)
611 >>> po.append(entry2)
612 >>> po.append(entry3)
613 >>> po.header = "Some Header"
614 >>> print(po)
615 # Some Header
616 msgid ""
617 msgstr ""
618 <BLANKLINE>
619 #. Some useful comment
620 #: testfile:12 another_file:1
621 msgid "Some english text"
622 msgstr "Un texte en anglais"
623 <BLANKLINE>
624 #. Another useful comment
625 #: testfile:15 another_file:5
626 msgid "Peace in some languages"
627 msgstr "Pace سلام שלום Hasîtî 和平"
628 <BLANKLINE>
629 #. Test string quoting
630 msgid "Some entry with quotes \\" \\""
631 msgstr "Un message unicode avec des quotes \\" \\""
632 <BLANKLINE>
633 '''
634
635 def __str__(self):
636 """Return the string representation of the po file"""
637 ret, headers = '', self.header.split('\n')
534 ret, headers = '', self.header.split('\n')
638 for header in headers:
535 for header in headers:
639 if header[:1] in [',', ':']:
536 if header[:1] in [',', ':']:
640 ret += '#%s\n' % header
537 ret += '#%s\n' % header
641 else:
538 else:
642 ret += '# %s\n' % header
539 ret += '# %s\n' % header
643 return ret + _BaseFile.__str__(self)
540
541 if type(ret) != types.UnicodeType:
542 ret = unicode(ret, self.encoding)
543
544 return ret + _BaseFile.__unicode__(self)
644
545
645 def save_as_mofile(self, fpath):
546 def save_as_mofile(self, fpath):
646 """
547 """
647 Save the binary representation of the file to *fpath*.
548 Saves the binary representation of the file to given ``fpath``.
648
549
649 **Keyword arguments**:
550 Keyword argument:
650 - *fpath*: string, full or relative path to the file.
551
552 ``fpath``
553 string, full or relative path to the mo file.
651 """
554 """
652 _BaseFile.save(self, fpath, 'to_binary')
555 _BaseFile.save(self, fpath, 'to_binary')
653
556
654 def percent_translated(self):
557 def percent_translated(self):
655 """
558 """
656 Convenience method that return the percentage of translated
559 Convenience method that returns the percentage of translated
657 messages.
560 messages.
658
659 **Example**:
660
661 >>> import polib
662 >>> po = polib.pofile('tests/test_pofile_helpers.po')
663 >>> po.percent_translated()
664 50
665 >>> po = POFile()
666 >>> po.percent_translated()
667 100
668 """
561 """
669 total = len([e for e in self if not e.obsolete])
562 total = len([e for e in self if not e.obsolete])
670 if total == 0:
563 if total == 0:
671 return 100
564 return 100
672 translated = len(self.translated_entries())
565 translated = len(self.translated_entries())
673 return int((100.00 / float(total)) * translated)
566 return int((100.00 / float(total)) * translated)
674
567
675 def translated_entries(self):
568 def translated_entries(self):
676 """
569 """
677 Convenience method that return a list of translated entries.
570 Convenience method that returns the list of translated entries.
678
679 **Example**:
680
681 >>> import polib
682 >>> po = polib.pofile('tests/test_pofile_helpers.po')
683 >>> len(po.translated_entries())
684 6
685 """
571 """
686 return [e for e in self if e.translated()]
572 return [e for e in self if e.translated()]
687
573
688 def untranslated_entries(self):
574 def untranslated_entries(self):
689 """
575 """
690 Convenience method that return a list of untranslated entries.
576 Convenience method that returns the list of untranslated entries.
691
692 **Example**:
693
694 >>> import polib
695 >>> po = polib.pofile('tests/test_pofile_helpers.po')
696 >>> len(po.untranslated_entries())
697 4
698 """
577 """
699 return [e for e in self if not e.translated() and not e.obsolete \
578 return [e for e in self if not e.translated() and not e.obsolete \
700 and not 'fuzzy' in e.flags]
579 and not 'fuzzy' in e.flags]
701
580
702 def fuzzy_entries(self):
581 def fuzzy_entries(self):
703 """
582 """
704 Convenience method that return the list of 'fuzzy' entries.
583 Convenience method that returns the list of fuzzy entries.
705
706 **Example**:
707
708 >>> import polib
709 >>> po = polib.pofile('tests/test_pofile_helpers.po')
710 >>> len(po.fuzzy_entries())
711 2
712 """
584 """
713 return [e for e in self if 'fuzzy' in e.flags]
585 return [e for e in self if 'fuzzy' in e.flags]
714
586
715 def obsolete_entries(self):
587 def obsolete_entries(self):
716 """
588 """
717 Convenience method that return the list of obsolete entries.
589 Convenience method that returns the list of obsolete entries.
718
719 **Example**:
720
721 >>> import polib
722 >>> po = polib.pofile('tests/test_pofile_helpers.po')
723 >>> len(po.obsolete_entries())
724 4
725 """
590 """
726 return [e for e in self if e.obsolete]
591 return [e for e in self if e.obsolete]
727
592
728 def merge(self, refpot):
593 def merge(self, refpot):
729 """
594 """
730 XXX this could not work if encodings are different, needs thinking
595 Convenience method that merges the current pofile with the pot file
731 and general refactoring of how polib handles encoding...
732
733 Convenience method that merge the current pofile with the pot file
734 provided. It behaves exactly as the gettext msgmerge utility:
596 provided. It behaves exactly as the gettext msgmerge utility:
735
597
736 - comments of this file will be preserved, but extracted comments
598 * comments of this file will be preserved, but extracted comments and
737 and occurrences will be discarded
599 occurrences will be discarded;
738 - any translations or comments in the file will be discarded,
600 * any translations or comments in the file will be discarded, however,
739 however dot comments and file positions will be preserved
601 dot comments and file positions will be preserved;
740
602 * the fuzzy flags are preserved.
741 **Keyword argument**:
742 - *refpot*: object POFile, the reference catalog.
743
603
744 **Example**:
604 Keyword argument:
745
605
746 >>> import polib
606 ``refpot``
747 >>> refpot = polib.pofile('tests/test_merge.pot')
607 object POFile, the reference catalog.
748 >>> po = polib.pofile('tests/test_merge_before.po')
749 >>> po.merge(refpot)
750 >>> expected_po = polib.pofile('tests/test_merge_after.po')
751 >>> unicode(po) == unicode(expected_po)
752 True
753 """
608 """
754 for entry in refpot:
609 for entry in refpot:
755 e = self.find(entry.msgid)
610 e = self.find(entry.msgid, include_obsolete_entries=True)
756 if e is None:
611 if e is None:
757 e = POEntry()
612 e = POEntry()
758 self.append(e)
613 self.append(e)
759 e.merge(entry)
614 e.merge(entry)
760 # ok, now we must "obsolete" entries that are not in the refpot
615 # ok, now we must "obsolete" entries that are not in the refpot anymore
761 # anymore
762 for entry in self:
616 for entry in self:
763 if refpot.find(entry.msgid) is None:
617 if refpot.find(entry.msgid) is None:
764 entry.obsolete = True
618 entry.obsolete = True
765
619
766 # }}}
620 # }}}
767 # class MOFile {{{
621 # class MOFile {{{
768
622
769 class MOFile(_BaseFile):
623 class MOFile(_BaseFile):
770 '''
624 """
771 Mo file reader/writer.
625 Mo file reader/writer.
772 MOFile objects inherit the list objects methods.
626 This class inherits the :class:`~polib._BaseFile` class and, by
773
627 extension, the python ``list`` type.
774 **Example**:
628 """
775
629 BIG_ENDIAN = 0xde120495
776 >>> mo = MOFile()
630 LITTLE_ENDIAN = 0x950412de
777 >>> entry1 = POEntry(
778 ... msgid="Some english text",
779 ... msgstr="Un texte en anglais"
780 ... )
781 >>> entry2 = POEntry(
782 ... msgid="I need my dirty cheese",
783 ... msgstr="Je veux mon sale fromage"
784 ... )
785 >>> entry3 = MOEntry(
786 ... msgid='Some entry with quotes " \\"',
787 ... msgstr='Un message unicode avec des quotes " \\"'
788 ... )
789 >>> mo.append(entry1)
790 >>> mo.append(entry2)
791 >>> mo.append(entry3)
792 >>> print(mo)
793 msgid ""
794 msgstr ""
795 <BLANKLINE>
796 msgid "Some english text"
797 msgstr "Un texte en anglais"
798 <BLANKLINE>
799 msgid "I need my dirty cheese"
800 msgstr "Je veux mon sale fromage"
801 <BLANKLINE>
802 msgid "Some entry with quotes \\" \\""
803 msgstr "Un message unicode avec des quotes \\" \\""
804 <BLANKLINE>
805 '''
806
631
807 def __init__(self, *args, **kwargs):
632 def __init__(self, *args, **kwargs):
808 """
633 """
809 MOFile constructor. Mo files have two other properties:
634 Constructor, accepts all keywords arguments accepted by
810 - magic_number: the magic_number of the binary file,
635 :class:`~polib._BaseFile` class.
811 - version: the version of the mo spec.
812 """
636 """
813 _BaseFile.__init__(self, *args, **kwargs)
637 _BaseFile.__init__(self, *args, **kwargs)
814 self.magic_number = None
638 self.magic_number = None
815 self.version = 0
639 self.version = 0
816
640
817 def save_as_pofile(self, fpath):
641 def save_as_pofile(self, fpath):
818 """
642 """
819 Save the string representation of the file to *fpath*.
643 Saves the mofile as a pofile to ``fpath``.
820
644
821 **Keyword argument**:
645 Keyword argument:
822 - *fpath*: string, full or relative path to the file.
646
647 ``fpath``
648 string, full or relative path to the file.
823 """
649 """
824 _BaseFile.save(self, fpath)
650 _BaseFile.save(self, fpath)
825
651
826 def save(self, fpath):
652 def save(self, fpath=None):
827 """
653 """
828 Save the binary representation of the file to *fpath*.
654 Saves the mofile to ``fpath``.
829
655
830 **Keyword argument**:
656 Keyword argument:
831 - *fpath*: string, full or relative path to the file.
657
658 ``fpath``
659 string, full or relative path to the file.
832 """
660 """
833 _BaseFile.save(self, fpath, 'to_binary')
661 _BaseFile.save(self, fpath, 'to_binary')
834
662
835 def percent_translated(self):
663 def percent_translated(self):
836 """
664 """
837 Convenience method to keep the same interface with POFile instances.
665 Convenience method to keep the same interface with POFile instances.
838 """
666 """
839 return 100
667 return 100
840
668
841 def translated_entries(self):
669 def translated_entries(self):
842 """
670 """
843 Convenience method to keep the same interface with POFile instances.
671 Convenience method to keep the same interface with POFile instances.
844 """
672 """
845 return self
673 return self
846
674
847 def untranslated_entries(self):
675 def untranslated_entries(self):
848 """
676 """
849 Convenience method to keep the same interface with POFile instances.
677 Convenience method to keep the same interface with POFile instances.
850 """
678 """
851 return []
679 return []
852
680
853 def fuzzy_entries(self):
681 def fuzzy_entries(self):
854 """
682 """
855 Convenience method to keep the same interface with POFile instances.
683 Convenience method to keep the same interface with POFile instances.
856 """
684 """
857 return []
685 return []
858
686
859 def obsolete_entries(self):
687 def obsolete_entries(self):
860 """
688 """
861 Convenience method to keep the same interface with POFile instances.
689 Convenience method to keep the same interface with POFile instances.
862 """
690 """
863 return []
691 return []
864
692
865 # }}}
693 # }}}
866 # class _BaseEntry {{{
694 # class _BaseEntry {{{
867
695
868 class _BaseEntry(object):
696 class _BaseEntry(object):
869 """
697 """
870 Base class for POEntry or MOEntry objects.
698 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
871 This class must *not* be instanciated directly.
699 This class should **not** be instanciated directly.
872 """
700 """
873
701
874 def __init__(self, *args, **kwargs):
702 def __init__(self, *args, **kwargs):
875 """Base Entry constructor."""
703 """
704 Constructor, accepts the following keyword arguments:
705
706 ``msgid``
707 string, the entry msgid.
708
709 ``msgstr``
710 string, the entry msgstr.
711
712 ``msgid_plural``
713 string, the entry msgid_plural.
714
715 ``msgstr_plural``
716 list, the entry msgstr_plural lines.
717
718 ``msgctxt``
719 string, the entry context (msgctxt).
720
721 ``obsolete``
722 bool, whether the entry is "obsolete" or not.
723
724 ``encoding``
725 string, the encoding to use, defaults to ``default_encoding``
726 global variable (optional).
727 """
876 self.msgid = kwargs.get('msgid', '')
728 self.msgid = kwargs.get('msgid', '')
877 self.msgstr = kwargs.get('msgstr', '')
729 self.msgstr = kwargs.get('msgstr', '')
878 self.msgid_plural = kwargs.get('msgid_plural', '')
730 self.msgid_plural = kwargs.get('msgid_plural', '')
879 self.msgstr_plural = kwargs.get('msgstr_plural', {})
731 self.msgstr_plural = kwargs.get('msgstr_plural', {})
732 self.msgctxt = kwargs.get('msgctxt', None)
880 self.obsolete = kwargs.get('obsolete', False)
733 self.obsolete = kwargs.get('obsolete', False)
881 self.encoding = kwargs.get('encoding', default_encoding)
734 self.encoding = kwargs.get('encoding', default_encoding)
882 self.msgctxt = kwargs.get('msgctxt', None)
883 self._multiline_str = {}
884
735
885 def __repr__(self):
736 def __unicode__(self, wrapwidth=78):
886 """Return the official string representation of the object."""
887 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
888
889 def __str__(self, wrapwidth=78):
890 """
737 """
891 Common string representation of the POEntry and MOEntry
738 Returns the unicode representation of the entry.
892 objects.
893 """
739 """
894 if self.obsolete:
740 if self.obsolete:
895 delflag = '#~ '
741 delflag = '#~ '
896 else:
742 else:
897 delflag = ''
743 delflag = ''
898 ret = []
744 ret = []
899 # write the msgctxt if any
745 # write the msgctxt if any
900 if self.msgctxt is not None:
746 if self.msgctxt is not None:
901 ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
747 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
902 # write the msgid
748 # write the msgid
903 ret += self._str_field("msgid", delflag, "", self.msgid)
749 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
904 # write the msgid_plural if any
750 # write the msgid_plural if any
905 if self.msgid_plural:
751 if self.msgid_plural:
906 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
752 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
907 if self.msgstr_plural:
753 if self.msgstr_plural:
908 # write the msgstr_plural if any
754 # write the msgstr_plural if any
909 msgstrs = self.msgstr_plural
755 msgstrs = self.msgstr_plural
910 keys = list(msgstrs)
756 keys = list(msgstrs)
911 keys.sort()
757 keys.sort()
912 for index in keys:
758 for index in keys:
913 msgstr = msgstrs[index]
759 msgstr = msgstrs[index]
914 plural_index = '[%s]' % index
760 plural_index = '[%s]' % index
915 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
761 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
916 else:
762 else:
917 # otherwise write the msgstr
763 # otherwise write the msgstr
918 ret += self._str_field("msgstr", delflag, "", self.msgstr)
764 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
919 ret.append('')
765 ret.append('')
920 return '\n'.join(ret)
766 ret = '\n'.join(ret)
767
768 if type(ret) != types.UnicodeType:
769 return unicode(ret, self.encoding)
770 return ret
921
771
922 def _str_field(self, fieldname, delflag, plural_index, field):
772 def __str__(self):
923 if (fieldname + plural_index) in self._multiline_str:
773 """
924 field = self._multiline_str[fieldname + plural_index]
774 Returns the string representation of the entry.
925 lines = [''] + field.split('__POLIB__NL__')
775 """
926 else:
776 return unicode(self).encode(self.encoding)
777
778 def __eq__(self, other):
779 return unicode(self) == unicode(other)
780
781 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
927 lines = field.splitlines(True)
782 lines = field.splitlines(True)
928 if len(lines) > 1:
783 if len(lines) > 1:
929 lines = ['']+lines # start with initial empty line
784 lines = [''] + lines # start with initial empty line
930 else:
785 else:
931 lines = [field] # needed for the empty string case
786 escaped_field = escape(field)
787 specialchars_count = 0
788 for c in ['\\', '\n', '\r', '\t', '"']:
789 specialchars_count += field.count(c)
790 # comparison must take into account fieldname length + one space
791 # + 2 quotes (eg. msgid "<string>")
792 flength = len(fieldname) + 3
793 if plural_index:
794 flength += len(plural_index)
795 real_wrapwidth = wrapwidth - flength + specialchars_count
796 if wrapwidth > 0 and len(field) > real_wrapwidth:
797 # Wrap the line but take field name into account
798 lines = [''] + [unescape(item) for item in wrap(
799 escaped_field,
800 wrapwidth - 2, # 2 for quotes ""
801 drop_whitespace=False,
802 break_long_words=False
803 )]
804 else:
805 lines = [field]
932 if fieldname.startswith('previous_'):
806 if fieldname.startswith('previous_'):
933 # quick and dirty trick to get the real field name
807 # quick and dirty trick to get the real field name
934 fieldname = fieldname[9:]
808 fieldname = fieldname[9:]
935
809
936 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
810 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
937 escape(lines.pop(0)))]
811 escape(lines.pop(0)))]
938 for mstr in lines:
812 for mstr in lines:
939 ret.append('%s"%s"' % (delflag, escape(mstr)))
813 ret.append('%s"%s"' % (delflag, escape(mstr)))
940 return ret
814 return ret
941
815
942 # }}}
816 # }}}
943 # class POEntry {{{
817 # class POEntry {{{
944
818
945 class POEntry(_BaseEntry):
819 class POEntry(_BaseEntry):
946 """
820 """
947 Represents a po file entry.
821 Represents a po file entry.
948
949 **Examples**:
950
951 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
952 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
953 >>> print(entry)
954 #: welcome.py:12 anotherfile.py:34
955 msgid "Welcome"
956 msgstr "Bienvenue"
957 <BLANKLINE>
958 >>> entry = POEntry()
959 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
960 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
961 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
962 >>> entry.flags.append('c-format')
963 >>> entry.previous_msgctxt = '@somecontext'
964 >>> entry.previous_msgid = 'I had eggs but no spam !'
965 >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
966 >>> entry.msgctxt = '@somenewcontext'
967 >>> entry.msgid = 'I have spam but no egg !'
968 >>> entry.msgid_plural = 'I have spam and %d eggs !'
969 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
970 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
971 >>> print(entry)
972 #. A plural translation. This is a very very very long line please do not
973 #. wrap, this is just for testing comment wrapping...
974 # A plural translation. This is a very very very long line please do not wrap,
975 # this is just for testing comment wrapping...
976 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
977 #: src/eggs.c:45
978 #, c-format
979 #| msgctxt "@somecontext"
980 #| msgid "I had eggs but no spam !"
981 #| msgid_plural "I had eggs and %d spam !"
982 msgctxt "@somenewcontext"
983 msgid "I have spam but no egg !"
984 msgid_plural "I have spam and %d eggs !"
985 msgstr[0] "J'ai du jambon mais aucun oeuf !"
986 msgstr[1] "J'ai du jambon et %d oeufs !"
987 <BLANKLINE>
988 """
822 """
989
823
990 def __init__(self, *args, **kwargs):
824 def __init__(self, *args, **kwargs):
991 """POEntry constructor."""
825 """
826 Constructor, accepts the following keyword arguments:
827
828 ``comment``
829 string, the entry comment.
830
831 ``tcomment``
832 string, the entry translator comment.
833
834 ``occurrences``
835 list, the entry occurrences.
836
837 ``flags``
838 list, the entry flags.
839
840 ``previous_msgctxt``
841 string, the entry previous context.
842
843 ``previous_msgid``
844 string, the entry previous msgid.
845
846 ``previous_msgid_plural``
847 string, the entry previous msgid_plural.
848 """
992 _BaseEntry.__init__(self, *args, **kwargs)
849 _BaseEntry.__init__(self, *args, **kwargs)
993 self.comment = kwargs.get('comment', '')
850 self.comment = kwargs.get('comment', '')
994 self.tcomment = kwargs.get('tcomment', '')
851 self.tcomment = kwargs.get('tcomment', '')
995 self.occurrences = kwargs.get('occurrences', [])
852 self.occurrences = kwargs.get('occurrences', [])
996 self.flags = kwargs.get('flags', [])
853 self.flags = kwargs.get('flags', [])
997 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
854 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
998 self.previous_msgid = kwargs.get('previous_msgid', None)
855 self.previous_msgid = kwargs.get('previous_msgid', None)
999 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
856 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
1000
857
1001 def __str__(self, wrapwidth=78):
858 def __unicode__(self, wrapwidth=78):
1002 """
859 """
1003 Return the string representation of the entry.
860 Returns the unicode representation of the entry.
1004 """
861 """
1005 if self.obsolete:
862 if self.obsolete:
1006 return _BaseEntry.__str__(self)
863 return _BaseEntry.__unicode__(self, wrapwidth)
864
1007 ret = []
865 ret = []
1008 # comment first, if any (with text wrapping as xgettext does)
866 # comments first, if any (with text wrapping as xgettext does)
1009 if self.comment != '':
867 comments = [('comment', '#. '), ('tcomment', '# ')]
1010 for comment in self.comment.split('\n'):
868 for c in comments:
1011 if wrapwidth > 0 and len(comment) > wrapwidth-3:
869 val = getattr(self, c[0])
1012 ret += textwrap.wrap(comment, wrapwidth,
870 if val:
1013 initial_indent='#. ',
871 for comment in val.split('\n'):
1014 subsequent_indent='#. ',
872 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
1015 break_long_words=False)
873 ret += wrap(
874 comment,
875 wrapwidth,
876 initial_indent=c[1],
877 subsequent_indent=c[1],
878 break_long_words=False
879 )
1016 else:
880 else:
1017 ret.append('#. %s' % comment)
881 ret.append('%s%s' % (c[1], comment))
1018 # translator comment, if any (with text wrapping as xgettext does)
882
1019 if self.tcomment != '':
1020 for tcomment in self.tcomment.split('\n'):
1021 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
1022 ret += textwrap.wrap(tcomment, wrapwidth,
1023 initial_indent='# ',
1024 subsequent_indent='# ',
1025 break_long_words=False)
1026 else:
1027 ret.append('# %s' % tcomment)
1028 # occurrences (with text wrapping as xgettext does)
883 # occurrences (with text wrapping as xgettext does)
1029 if self.occurrences:
884 if self.occurrences:
1030 filelist = []
885 filelist = []
1031 for fpath, lineno in self.occurrences:
886 for fpath, lineno in self.occurrences:
1032 if lineno:
887 if lineno:
1033 filelist.append('%s:%s' % (fpath, lineno))
888 filelist.append('%s:%s' % (fpath, lineno))
1034 else:
889 else:
1035 filelist.append(fpath)
890 filelist.append(fpath)
1036 filestr = ' '.join(filelist)
891 filestr = ' '.join(filelist)
1037 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
892 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
1038 # XXX textwrap split words that contain hyphen, this is not
893 # textwrap split words that contain hyphen, this is not
1039 # what we want for filenames, so the dirty hack is to
894 # what we want for filenames, so the dirty hack is to
1040 # temporally replace hyphens with a char that a file cannot
895 # temporally replace hyphens with a char that a file cannot
1041 # contain, like "*"
896 # contain, like "*"
1042 lines = textwrap.wrap(filestr.replace('-', '*'),
897 ret += [l.replace('*', '-') for l in wrap(
898 filestr.replace('-', '*'),
1043 wrapwidth,
899 wrapwidth,
1044 initial_indent='#: ',
900 initial_indent='#: ',
1045 subsequent_indent='#: ',
901 subsequent_indent='#: ',
1046 break_long_words=False)
902 break_long_words=False
1047 # end of the replace hack
903 )]
1048 for line in lines:
1049 ret.append(line.replace('*', '-'))
1050 else:
904 else:
1051 ret.append('#: '+filestr)
905 ret.append('#: ' + filestr)
1052 # flags
906
907 # flags (TODO: wrapping ?)
1053 if self.flags:
908 if self.flags:
1054 flags = []
909 ret.append('#, %s' % ', '.join(self.flags))
1055 for flag in self.flags:
1056 flags.append(flag)
1057 ret.append('#, %s' % ', '.join(flags))
1058
910
1059 # previous context and previous msgid/msgid_plural
911 # previous context and previous msgid/msgid_plural
1060 if self.previous_msgctxt:
912 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
1061 ret += self._str_field("previous_msgctxt", "#| ", "",
913 for f in fields:
1062 self.previous_msgctxt)
914 val = getattr(self, f)
1063 if self.previous_msgid:
915 if val:
1064 ret += self._str_field("previous_msgid", "#| ", "",
916 ret += self._str_field(f, "#| ", "", val, wrapwidth)
1065 self.previous_msgid)
1066 if self.previous_msgid_plural:
1067 ret += self._str_field("previous_msgid_plural", "#| ", "",
1068 self.previous_msgid_plural)
1069
917
1070 ret.append(_BaseEntry.__str__(self))
918 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
1071 return '\n'.join(ret)
919 ret = '\n'.join(ret)
920
921 if type(ret) != types.UnicodeType:
922 return unicode(ret, self.encoding)
923 return ret
1072
924
1073 def __cmp__(self, other):
925 def __cmp__(self, other):
1074 '''
926 """
1075 Called by comparison operations if rich comparison is not defined.
927 Called by comparison operations if rich comparison is not defined.
1076
928 """
1077 **Tests**:
1078 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
1079 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
1080 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
1081 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
1082 >>> po = POFile()
1083 >>> po.append(a)
1084 >>> po.append(b)
1085 >>> po.append(c1)
1086 >>> po.append(c2)
1087 >>> po.sort()
1088 >>> print(po)
1089 #
1090 msgid ""
1091 msgstr ""
1092 <BLANKLINE>
1093 #: a.py:1 a.py:3
1094 msgid "c2"
1095 msgstr ""
1096 <BLANKLINE>
1097 #: a.py:1 b.py:1
1098 msgid "c1"
1099 msgstr ""
1100 <BLANKLINE>
1101 #: b.py:1 b.py:3
1102 msgid "a"
1103 msgstr ""
1104 <BLANKLINE>
1105 #: b.py:1 b.py:3
1106 msgid "b"
1107 msgstr ""
1108 <BLANKLINE>
1109 '''
1110 def compare_occurrences(a, b):
929 def compare_occurrences(a, b):
1111 """
930 """
1112 Compare an entry occurrence with another one.
931 Compare an entry occurrence with another one.
1113 """
932 """
1114 if a[0] != b[0]:
933 if a[0] != b[0]:
1115 return a[0] < b[0]
934 return a[0] < b[0]
1116 if a[1] != b[1]:
935 if a[1] != b[1]:
1117 return a[1] < b[1]
936 return a[1] < b[1]
1118 return 0
937 return 0
1119
938
1120 # First: Obsolete test
939 # First: Obsolete test
1121 if self.obsolete != other.obsolete:
940 if self.obsolete != other.obsolete:
1122 if self.obsolete:
941 if self.obsolete:
1123 return -1
942 return -1
1124 else:
943 else:
1125 return 1
944 return 1
1126 # Work on a copy to protect original
945 # Work on a copy to protect original
1127 occ1 = self.occurrences[:]
946 occ1 = self.occurrences[:]
1128 occ2 = other.occurrences[:]
947 occ2 = other.occurrences[:]
1129 # Sorting using compare method
948 # Sorting using compare method
1130 occ1.sort(compare_occurrences)
949 occ1.sort(compare_occurrences)
1131 occ2.sort(compare_occurrences)
950 occ2.sort(compare_occurrences)
1132 # Comparing sorted occurrences
951 # Comparing sorted occurrences
1133 pos = 0
952 pos = 0
1134 for entry1 in occ1:
953 for entry1 in occ1:
1135 try:
954 try:
1136 entry2 = occ2[pos]
955 entry2 = occ2[pos]
1137 except IndexError:
956 except IndexError:
1138 return 1
957 return 1
1139 pos = pos + 1
958 pos = pos + 1
1140 if entry1[0] != entry2[0]:
959 if entry1[0] != entry2[0]:
1141 if entry1[0] > entry2[0]:
960 if entry1[0] > entry2[0]:
1142 return 1
961 return 1
1143 else:
962 else:
1144 return -1
963 return -1
1145 if entry1[1] != entry2[1]:
964 if entry1[1] != entry2[1]:
1146 if entry1[1] > entry2[1]:
965 if entry1[1] > entry2[1]:
1147 return 1
966 return 1
1148 else:
967 else:
1149 return -1
968 return -1
1150 # Finally: Compare message ID
969 # Finally: Compare message ID
1151 if self.msgid > other.msgid: return 1
970 if self.msgid > other.msgid: return 1
1152 else: return -1
971 else: return -1
1153
972
1154 def translated(self):
973 def translated(self):
1155 """
974 """
1156 Return True if the entry has been translated or False.
975 Returns ``True`` if the entry has been translated or ``False``
976 otherwise.
1157 """
977 """
1158 if self.obsolete or 'fuzzy' in self.flags:
978 if self.obsolete or 'fuzzy' in self.flags:
1159 return False
979 return False
1160 if self.msgstr != '':
980 if self.msgstr != '':
1161 return True
981 return True
1162 if self.msgstr_plural:
982 if self.msgstr_plural:
1163 for pos in self.msgstr_plural:
983 for pos in self.msgstr_plural:
1164 if self.msgstr_plural[pos] == '':
984 if self.msgstr_plural[pos] == '':
1165 return False
985 return False
1166 return True
986 return True
1167 return False
987 return False
1168
988
1169 def merge(self, other):
989 def merge(self, other):
1170 """
990 """
1171 Merge the current entry with the given pot entry.
991 Merge the current entry with the given pot entry.
1172 """
992 """
1173 self.msgid = other.msgid
993 self.msgid = other.msgid
994 self.msgctxt = other.msgctxt
1174 self.occurrences = other.occurrences
995 self.occurrences = other.occurrences
1175 self.comment = other.comment
996 self.comment = other.comment
1176 self.flags = other.flags
997 fuzzy = 'fuzzy' in self.flags
998 self.flags = other.flags[:] # clone flags
999 if fuzzy:
1000 self.flags.append('fuzzy')
1177 self.msgid_plural = other.msgid_plural
1001 self.msgid_plural = other.msgid_plural
1002 self.obsolete = other.obsolete
1003 self.previous_msgctxt = other.previous_msgctxt
1004 self.previous_msgid = other.previous_msgid
1005 self.previous_msgid_plural = other.previous_msgid_plural
1178 if other.msgstr_plural:
1006 if other.msgstr_plural:
1179 for pos in other.msgstr_plural:
1007 for pos in other.msgstr_plural:
1180 try:
1008 try:
1181 # keep existing translation at pos if any
1009 # keep existing translation at pos if any
1182 self.msgstr_plural[pos]
1010 self.msgstr_plural[pos]
1183 except KeyError:
1011 except KeyError:
1184 self.msgstr_plural[pos] = ''
1012 self.msgstr_plural[pos] = ''
1185
1013
1186 # }}}
1014 # }}}
1187 # class MOEntry {{{
1015 # class MOEntry {{{
1188
1016
1189 class MOEntry(_BaseEntry):
1017 class MOEntry(_BaseEntry):
1190 """
1018 """
1191 Represents a mo file entry.
1019 Represents a mo file entry.
1192
1193 **Examples**:
1194
1195 >>> entry = MOEntry()
1196 >>> entry.msgid = 'translate me !'
1197 >>> entry.msgstr = 'traduisez moi !'
1198 >>> print(entry)
1199 msgid "translate me !"
1200 msgstr "traduisez moi !"
1201 <BLANKLINE>
1202 """
1020 """
1203
1021 pass
1204 def __str__(self, wrapwidth=78):
1205 """
1206 Return the string representation of the entry.
1207 """
1208 return _BaseEntry.__str__(self, wrapwidth)
1209
1022
1210 # }}}
1023 # }}}
1211 # class _POFileParser {{{
1024 # class _POFileParser {{{
1212
1025
1213 class _POFileParser(object):
1026 class _POFileParser(object):
1214 """
1027 """
1215 A finite state machine to parse efficiently and correctly po
1028 A finite state machine to parse efficiently and correctly po
1216 file format.
1029 file format.
1217 """
1030 """
1218
1031
1219 def __init__(self, fpath, *args, **kwargs):
1032 def __init__(self, pofile, *args, **kwargs):
1220 """
1033 """
1221 Constructor.
1034 Constructor.
1222
1035
1223 **Arguments**:
1036 Keyword arguments:
1224 - *fpath*: string, path to the po file
1037
1225 - *encoding*: string, the encoding to use, defaults to
1038 ``pofile``
1226 "default_encoding" global variable (optional),
1039 string, path to the po file or its content
1227 - *check_for_duplicates*: whether to check for duplicate entries
1040
1228 when adding entries to the file, default: False (optional).
1041 ``encoding``
1042 string, the encoding to use, defaults to ``default_encoding``
1043 global variable (optional).
1044
1045 ``check_for_duplicates``
1046 whether to check for duplicate entries when adding entries to the
1047 file (optional, default: ``False``).
1229 """
1048 """
1230 enc = kwargs.get('encoding', default_encoding)
1049 enc = kwargs.get('encoding', default_encoding)
1231 check_dup = kwargs.get('check_for_duplicates', False)
1050 if os.path.exists(pofile):
1232 try:
1051 try:
1233 self.fhandle = codecs.open(fpath, 'rU', enc)
1052 self.fhandle = codecs.open(pofile, 'rU', enc)
1234 except LookupError:
1053 except LookupError:
1235 enc = default_encoding
1054 enc = default_encoding
1236 self.fhandle = codecs.open(fpath, 'rU', enc)
1055 self.fhandle = codecs.open(pofile, 'rU', enc)
1056 else:
1057 self.fhandle = pofile.splitlines()
1058
1237 self.instance = POFile(
1059 self.instance = POFile(
1238 fpath=fpath,
1060 pofile=pofile,
1239 encoding=enc,
1061 encoding=enc,
1240 check_for_duplicates=check_dup
1062 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1241 )
1063 )
1242 self.transitions = {}
1064 self.transitions = {}
1243 self.current_entry = POEntry()
1065 self.current_entry = POEntry()
1244 self.current_state = 'ST'
1066 self.current_state = 'ST'
1245 self.current_token = None
1067 self.current_token = None
1246 # two memo flags used in handlers
1068 # two memo flags used in handlers
1247 self.msgstr_index = 0
1069 self.msgstr_index = 0
1248 self.entry_obsolete = 0
1070 self.entry_obsolete = 0
1249 # Configure the state machine, by adding transitions.
1071 # Configure the state machine, by adding transitions.
1250 # Signification of symbols:
1072 # Signification of symbols:
1251 # * ST: Beginning of the file (start)
1073 # * ST: Beginning of the file (start)
1252 # * HE: Header
1074 # * HE: Header
1253 # * TC: a translation comment
1075 # * TC: a translation comment
1254 # * GC: a generated comment
1076 # * GC: a generated comment
1255 # * OC: a file/line occurence
1077 # * OC: a file/line occurence
1256 # * FL: a flags line
1078 # * FL: a flags line
1257 # * CT: a message context
1079 # * CT: a message context
1258 # * PC: a previous msgctxt
1080 # * PC: a previous msgctxt
1259 # * PM: a previous msgid
1081 # * PM: a previous msgid
1260 # * PP: a previous msgid_plural
1082 # * PP: a previous msgid_plural
1261 # * MI: a msgid
1083 # * MI: a msgid
1262 # * MP: a msgid plural
1084 # * MP: a msgid plural
1263 # * MS: a msgstr
1085 # * MS: a msgstr
1264 # * MX: a msgstr plural
1086 # * MX: a msgstr plural
1265 # * MC: a msgid or msgstr continuation line
1087 # * MC: a msgid or msgstr continuation line
1266 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1088 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1267 'MS', 'MP', 'MX', 'MI']
1089 'MS', 'MP', 'MX', 'MI']
1268
1090
1269 self.add('TC', ['ST', 'HE'], 'HE')
1091 self.add('TC', ['ST', 'HE'], 'HE')
1270 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1092 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1271 'MP', 'MX', 'MI'], 'TC')
1093 'MP', 'MX', 'MI'], 'TC')
1272 self.add('GC', all, 'GC')
1094 self.add('GC', all, 'GC')
1273 self.add('OC', all, 'OC')
1095 self.add('OC', all, 'OC')
1274 self.add('FL', all, 'FL')
1096 self.add('FL', all, 'FL')
1275 self.add('PC', all, 'PC')
1097 self.add('PC', all, 'PC')
1276 self.add('PM', all, 'PM')
1098 self.add('PM', all, 'PM')
1277 self.add('PP', all, 'PP')
1099 self.add('PP', all, 'PP')
1278 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1100 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1279 'PP', 'MS', 'MX'], 'CT')
1101 'PP', 'MS', 'MX'], 'CT')
1280 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1102 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1281 'PM', 'PP', 'MS', 'MX'], 'MI')
1103 'PM', 'PP', 'MS', 'MX'], 'MI')
1282 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1104 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1283 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1105 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1284 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1106 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1285 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1107 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1286
1108
1287 def parse(self):
1109 def parse(self):
1288 """
1110 """
1289 Run the state machine, parse the file line by line and call process()
1111 Run the state machine, parse the file line by line and call process()
1290 with the current matched symbol.
1112 with the current matched symbol.
1291 """
1113 """
1292 i, lastlen = 1, 0
1114 i = 0
1115
1116 keywords = {
1117 'msgctxt': 'CT',
1118 'msgid': 'MI',
1119 'msgstr': 'MS',
1120 'msgid_plural': 'MP',
1121 }
1122 prev_keywords = {
1123 'msgid_plural': 'PP',
1124 'msgid': 'PM',
1125 'msgctxt': 'PC',
1126 }
1127
1293 for line in self.fhandle:
1128 for line in self.fhandle:
1129 i += 1
1294 line = line.strip()
1130 line = line.strip()
1295 if line == '':
1131 if line == '':
1296 i = i+1
1297 continue
1132 continue
1298 if line[:3] == '#~ ':
1133
1299 line = line[3:]
1134 tokens = line.split(None, 2)
1135 nb_tokens = len(tokens)
1136
1137 if tokens[0] == '#~' and nb_tokens > 1:
1138 line = line[3:].strip()
1139 tokens = tokens[1:]
1140 nb_tokens -= 1
1300 self.entry_obsolete = 1
1141 self.entry_obsolete = 1
1301 else:
1142 else:
1302 self.entry_obsolete = 0
1143 self.entry_obsolete = 0
1144
1145 # Take care of keywords like
1146 # msgid, msgid_plural, msgctxt & msgstr.
1147 if tokens[0] in keywords and nb_tokens > 1:
1148 line = line[len(tokens[0]):].lstrip()
1303 self.current_token = line
1149 self.current_token = line
1304 if line[:2] == '#:':
1150 self.process(keywords[tokens[0]], i)
1151 continue
1152
1153 self.current_token = line
1154
1155 if tokens[0] == '#:' and nb_tokens > 1:
1305 # we are on a occurrences line
1156 # we are on a occurrences line
1306 self.process('OC', i)
1157 self.process('OC', i)
1307 elif line[:9] == 'msgctxt "':
1158
1308 # we are on a msgctxt
1159 elif line[:1] == '"':
1309 self.process('CT', i)
1160 # we are on a continuation line
1310 elif line[:7] == 'msgid "':
1311 # we are on a msgid
1312 self.process('MI', i)
1313 elif line[:8] == 'msgstr "':
1314 # we are on a msgstr
1315 self.process('MS', i)
1316 elif line[:1] == '"' or line[:4] == '#| "':
1317 # we are on a continuation line or some metadata
1318 self.process('MC', i)
1161 self.process('MC', i)
1319 elif line[:14] == 'msgid_plural "':
1162
1320 # we are on a msgid plural
1321 self.process('MP', i)
1322 elif line[:7] == 'msgstr[':
1163 elif line[:7] == 'msgstr[':
1323 # we are on a msgstr plural
1164 # we are on a msgstr plural
1324 self.process('MX', i)
1165 self.process('MX', i)
1325 elif line[:3] == '#, ':
1166
1167 elif tokens[0] == '#,' and nb_tokens > 1:
1326 # we are on a flags line
1168 # we are on a flags line
1327 self.process('FL', i)
1169 self.process('FL', i)
1328 elif line[:2] == '# ' or line == '#':
1170
1329 if line == '#': line = line + ' '
1171 elif tokens[0] == '#':
1172 if line == '#': line += ' '
1330 # we are on a translator comment line
1173 # we are on a translator comment line
1331 self.process('TC', i)
1174 self.process('TC', i)
1332 elif line[:2] == '#.':
1175
1176 elif tokens[0] == '#.' and nb_tokens > 1:
1333 # we are on a generated comment line
1177 # we are on a generated comment line
1334 self.process('GC', i)
1178 self.process('GC', i)
1335 elif line[:15] == '#| msgid_plural':
1179
1336 # we are on a previous msgid_plural
1180 elif tokens[0] == '#|':
1337 self.process('PP', i)
1181 if nb_tokens < 2:
1338 elif line[:8] == '#| msgid':
1182 self.process('??', i)
1339 self.process('PM', i)
1183 continue
1340 # we are on a previous msgid
1184
1341 elif line[:10] == '#| msgctxt':
1185 # Remove the marker and any whitespace right after that.
1342 # we are on a previous msgctxt
1186 line = line[2:].lstrip()
1343 self.process('PC', i)
1187 self.current_token = line
1344 i = i+1
1188
1189 if tokens[1].startswith('"'):
1190 # Continuation of previous metadata.
1191 self.process('MC', i)
1192 continue
1193
1194 if nb_tokens == 2:
1195 # Invalid continuation line.
1196 self.process('??', i)
1197
1198 # we are on a "previous translation" comment line,
1199 if tokens[1] not in prev_keywords:
1200 # Unknown keyword in previous translation comment.
1201 self.process('??', i)
1202
1203 # Remove the keyword and any whitespace
1204 # between it and the starting quote.
1205 line = line[len(tokens[1]):].lstrip()
1206 self.current_token = line
1207 self.process(prev_keywords[tokens[1]], i)
1208
1209 else:
1210 self.process('??', i)
1345
1211
1346 if self.current_entry:
1212 if self.current_entry:
1347 # since entries are added when another entry is found, we must add
1213 # since entries are added when another entry is found, we must add
1348 # the last entry here (only if there are lines)
1214 # the last entry here (only if there are lines)
1349 self.instance.append(self.current_entry)
1215 self.instance.append(self.current_entry)
1350 # before returning the instance, check if there's metadata and if
1216 # before returning the instance, check if there's metadata and if
1351 # so extract it in a dict
1217 # so extract it in a dict
1352 firstentry = self.instance[0]
1218 firstentry = self.instance[0]
1353 if firstentry.msgid == '': # metadata found
1219 if firstentry.msgid == '': # metadata found
1354 # remove the entry
1220 # remove the entry
1355 firstentry = self.instance.pop(0)
1221 firstentry = self.instance.pop(0)
1356 self.instance.metadata_is_fuzzy = firstentry.flags
1222 self.instance.metadata_is_fuzzy = firstentry.flags
1357 key = None
1223 key = None
1358 for msg in firstentry.msgstr.splitlines():
1224 for msg in firstentry.msgstr.splitlines():
1359 try:
1225 try:
1360 key, val = msg.split(':', 1)
1226 key, val = msg.split(':', 1)
1361 self.instance.metadata[key] = val.strip()
1227 self.instance.metadata[key] = val.strip()
1362 except:
1228 except:
1363 if key is not None:
1229 if key is not None:
1364 self.instance.metadata[key] += '\n'+ msg.strip()
1230 self.instance.metadata[key] += '\n'+ msg.strip()
1365 # close opened file
1231 # close opened file
1232 if isinstance(self.fhandle, file):
1366 self.fhandle.close()
1233 self.fhandle.close()
1367 return self.instance
1234 return self.instance
1368
1235
1369 def add(self, symbol, states, next_state):
1236 def add(self, symbol, states, next_state):
1370 """
1237 """
1371 Add a transition to the state machine.
1238 Add a transition to the state machine.
1239
1372 Keywords arguments:
1240 Keywords arguments:
1373
1241
1374 symbol -- string, the matched token (two chars symbol)
1242 ``symbol``
1375 states -- list, a list of states (two chars symbols)
1243 string, the matched token (two chars symbol).
1376 next_state -- the next state the fsm will have after the action
1244
1245 ``states``
1246 list, a list of states (two chars symbols).
1247
1248 ``next_state``
1249 the next state the fsm will have after the action.
1377 """
1250 """
1378 for state in states:
1251 for state in states:
1379 action = getattr(self, 'handle_%s' % next_state.lower())
1252 action = getattr(self, 'handle_%s' % next_state.lower())
1380 self.transitions[(symbol, state)] = (action, next_state)
1253 self.transitions[(symbol, state)] = (action, next_state)
1381
1254
1382 def process(self, symbol, linenum):
1255 def process(self, symbol, linenum):
1383 """
1256 """
1384 Process the transition corresponding to the current state and the
1257 Process the transition corresponding to the current state and the
1385 symbol provided.
1258 symbol provided.
1386
1259
1387 Keywords arguments:
1260 Keywords arguments:
1388 symbol -- string, the matched token (two chars symbol)
1261
1389 linenum -- integer, the current line number of the parsed file
1262 ``symbol``
1263 string, the matched token (two chars symbol).
1264
1265 ``linenum``
1266 integer, the current line number of the parsed file.
1390 """
1267 """
1391 try:
1268 try:
1392 (action, state) = self.transitions[(symbol, self.current_state)]
1269 (action, state) = self.transitions[(symbol, self.current_state)]
1393 if action():
1270 if action():
1394 self.current_state = state
1271 self.current_state = state
1395 except Exception, exc:
1272 except Exception, exc:
1396 raise IOError('Syntax error in po file (line %s)' % linenum)
1273 raise IOError('Syntax error in po file (line %s)' % linenum)
1397
1274
1398 # state handlers
1275 # state handlers
1399
1276
1400 def handle_he(self):
1277 def handle_he(self):
1401 """Handle a header comment."""
1278 """Handle a header comment."""
1402 if self.instance.header != '':
1279 if self.instance.header != '':
1403 self.instance.header += '\n'
1280 self.instance.header += '\n'
1404 self.instance.header += self.current_token[2:]
1281 self.instance.header += self.current_token[2:]
1405 return 1
1282 return 1
1406
1283
1407 def handle_tc(self):
1284 def handle_tc(self):
1408 """Handle a translator comment."""
1285 """Handle a translator comment."""
1409 if self.current_state in ['MC', 'MS', 'MX']:
1286 if self.current_state in ['MC', 'MS', 'MX']:
1410 self.instance.append(self.current_entry)
1287 self.instance.append(self.current_entry)
1411 self.current_entry = POEntry()
1288 self.current_entry = POEntry()
1412 if self.current_entry.tcomment != '':
1289 if self.current_entry.tcomment != '':
1413 self.current_entry.tcomment += '\n'
1290 self.current_entry.tcomment += '\n'
1414 self.current_entry.tcomment += self.current_token[2:]
1291 self.current_entry.tcomment += self.current_token[2:]
1415 return True
1292 return True
1416
1293
1417 def handle_gc(self):
1294 def handle_gc(self):
1418 """Handle a generated comment."""
1295 """Handle a generated comment."""
1419 if self.current_state in ['MC', 'MS', 'MX']:
1296 if self.current_state in ['MC', 'MS', 'MX']:
1420 self.instance.append(self.current_entry)
1297 self.instance.append(self.current_entry)
1421 self.current_entry = POEntry()
1298 self.current_entry = POEntry()
1422 if self.current_entry.comment != '':
1299 if self.current_entry.comment != '':
1423 self.current_entry.comment += '\n'
1300 self.current_entry.comment += '\n'
1424 self.current_entry.comment += self.current_token[3:]
1301 self.current_entry.comment += self.current_token[3:]
1425 return True
1302 return True
1426
1303
1427 def handle_oc(self):
1304 def handle_oc(self):
1428 """Handle a file:num occurence."""
1305 """Handle a file:num occurence."""
1429 if self.current_state in ['MC', 'MS', 'MX']:
1306 if self.current_state in ['MC', 'MS', 'MX']:
1430 self.instance.append(self.current_entry)
1307 self.instance.append(self.current_entry)
1431 self.current_entry = POEntry()
1308 self.current_entry = POEntry()
1432 occurrences = self.current_token[3:].split()
1309 occurrences = self.current_token[3:].split()
1433 for occurrence in occurrences:
1310 for occurrence in occurrences:
1434 if occurrence != '':
1311 if occurrence != '':
1435 try:
1312 try:
1436 fil, line = occurrence.split(':')
1313 fil, line = occurrence.split(':')
1437 if not line.isdigit():
1314 if not line.isdigit():
1438 fil = fil + line
1315 fil = fil + line
1439 line = ''
1316 line = ''
1440 self.current_entry.occurrences.append((fil, line))
1317 self.current_entry.occurrences.append((fil, line))
1441 except:
1318 except:
1442 self.current_entry.occurrences.append((occurrence, ''))
1319 self.current_entry.occurrences.append((occurrence, ''))
1443 return True
1320 return True
1444
1321
1445 def handle_fl(self):
1322 def handle_fl(self):
1446 """Handle a flags line."""
1323 """Handle a flags line."""
1447 if self.current_state in ['MC', 'MS', 'MX']:
1324 if self.current_state in ['MC', 'MS', 'MX']:
1448 self.instance.append(self.current_entry)
1325 self.instance.append(self.current_entry)
1449 self.current_entry = POEntry()
1326 self.current_entry = POEntry()
1450 self.current_entry.flags += self.current_token[3:].split(', ')
1327 self.current_entry.flags += self.current_token[3:].split(', ')
1451 return True
1328 return True
1452
1329
1453 def handle_pp(self):
1330 def handle_pp(self):
1454 """Handle a previous msgid_plural line."""
1331 """Handle a previous msgid_plural line."""
1455 if self.current_state in ['MC', 'MS', 'MX']:
1332 if self.current_state in ['MC', 'MS', 'MX']:
1456 self.instance.append(self.current_entry)
1333 self.instance.append(self.current_entry)
1457 self.current_entry = POEntry()
1334 self.current_entry = POEntry()
1458 self.current_entry.previous_msgid_plural = \
1335 self.current_entry.previous_msgid_plural = \
1459 unescape(self.current_token[17:-1])
1336 unescape(self.current_token[1:-1])
1460 return True
1337 return True
1461
1338
1462 def handle_pm(self):
1339 def handle_pm(self):
1463 """Handle a previous msgid line."""
1340 """Handle a previous msgid line."""
1464 if self.current_state in ['MC', 'MS', 'MX']:
1341 if self.current_state in ['MC', 'MS', 'MX']:
1465 self.instance.append(self.current_entry)
1342 self.instance.append(self.current_entry)
1466 self.current_entry = POEntry()
1343 self.current_entry = POEntry()
1467 self.current_entry.previous_msgid = \
1344 self.current_entry.previous_msgid = \
1468 unescape(self.current_token[10:-1])
1345 unescape(self.current_token[1:-1])
1469 return True
1346 return True
1470
1347
1471 def handle_pc(self):
1348 def handle_pc(self):
1472 """Handle a previous msgctxt line."""
1349 """Handle a previous msgctxt line."""
1473 if self.current_state in ['MC', 'MS', 'MX']:
1350 if self.current_state in ['MC', 'MS', 'MX']:
1474 self.instance.append(self.current_entry)
1351 self.instance.append(self.current_entry)
1475 self.current_entry = POEntry()
1352 self.current_entry = POEntry()
1476 self.current_entry.previous_msgctxt = \
1353 self.current_entry.previous_msgctxt = \
1477 unescape(self.current_token[12:-1])
1354 unescape(self.current_token[1:-1])
1478 return True
1355 return True
1479
1356
1480 def handle_ct(self):
1357 def handle_ct(self):
1481 """Handle a msgctxt."""
1358 """Handle a msgctxt."""
1482 if self.current_state in ['MC', 'MS', 'MX']:
1359 if self.current_state in ['MC', 'MS', 'MX']:
1483 self.instance.append(self.current_entry)
1360 self.instance.append(self.current_entry)
1484 self.current_entry = POEntry()
1361 self.current_entry = POEntry()
1485 self.current_entry.msgctxt = unescape(self.current_token[9:-1])
1362 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1486 return True
1363 return True
1487
1364
1488 def handle_mi(self):
1365 def handle_mi(self):
1489 """Handle a msgid."""
1366 """Handle a msgid."""
1490 if self.current_state in ['MC', 'MS', 'MX']:
1367 if self.current_state in ['MC', 'MS', 'MX']:
1491 self.instance.append(self.current_entry)
1368 self.instance.append(self.current_entry)
1492 self.current_entry = POEntry()
1369 self.current_entry = POEntry()
1493 self.current_entry.obsolete = self.entry_obsolete
1370 self.current_entry.obsolete = self.entry_obsolete
1494 self.current_entry.msgid = unescape(self.current_token[7:-1])
1371 self.current_entry.msgid = unescape(self.current_token[1:-1])
1495 return True
1372 return True
1496
1373
1497 def handle_mp(self):
1374 def handle_mp(self):
1498 """Handle a msgid plural."""
1375 """Handle a msgid plural."""
1499 self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
1376 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1500 return True
1377 return True
1501
1378
1502 def handle_ms(self):
1379 def handle_ms(self):
1503 """Handle a msgstr."""
1380 """Handle a msgstr."""
1504 self.current_entry.msgstr = unescape(self.current_token[8:-1])
1381 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1505 return True
1382 return True
1506
1383
1507 def handle_mx(self):
1384 def handle_mx(self):
1508 """Handle a msgstr plural."""
1385 """Handle a msgstr plural."""
1509 index, value = self.current_token[7], self.current_token[11:-1]
1386 index, value = self.current_token[7], self.current_token[11:-1]
1510 self.current_entry.msgstr_plural[index] = unescape(value)
1387 self.current_entry.msgstr_plural[index] = unescape(value)
1511 self.msgstr_index = index
1388 self.msgstr_index = index
1512 return True
1389 return True
1513
1390
1514 def handle_mc(self):
1391 def handle_mc(self):
1515 """Handle a msgid or msgstr continuation line."""
1392 """Handle a msgid or msgstr continuation line."""
1516 token = unescape(self.current_token[1:-1])
1393 token = unescape(self.current_token[1:-1])
1517 if self.current_state == 'CT':
1394 if self.current_state == 'CT':
1518 typ = 'msgctxt'
1395 typ = 'msgctxt'
1519 self.current_entry.msgctxt += token
1396 self.current_entry.msgctxt += token
1520 elif self.current_state == 'MI':
1397 elif self.current_state == 'MI':
1521 typ = 'msgid'
1398 typ = 'msgid'
1522 self.current_entry.msgid += token
1399 self.current_entry.msgid += token
1523 elif self.current_state == 'MP':
1400 elif self.current_state == 'MP':
1524 typ = 'msgid_plural'
1401 typ = 'msgid_plural'
1525 self.current_entry.msgid_plural += token
1402 self.current_entry.msgid_plural += token
1526 elif self.current_state == 'MS':
1403 elif self.current_state == 'MS':
1527 typ = 'msgstr'
1404 typ = 'msgstr'
1528 self.current_entry.msgstr += token
1405 self.current_entry.msgstr += token
1529 elif self.current_state == 'MX':
1406 elif self.current_state == 'MX':
1530 typ = 'msgstr[%s]' % self.msgstr_index
1407 typ = 'msgstr[%s]' % self.msgstr_index
1531 self.current_entry.msgstr_plural[self.msgstr_index] += token
1408 self.current_entry.msgstr_plural[self.msgstr_index] += token
1532 elif self.current_state == 'PP':
1409 elif self.current_state == 'PP':
1533 typ = 'previous_msgid_plural'
1410 typ = 'previous_msgid_plural'
1534 token = token[3:]
1411 token = token[3:]
1535 self.current_entry.previous_msgid_plural += token
1412 self.current_entry.previous_msgid_plural += token
1536 elif self.current_state == 'PM':
1413 elif self.current_state == 'PM':
1537 typ = 'previous_msgid'
1414 typ = 'previous_msgid'
1538 token = token[3:]
1415 token = token[3:]
1539 self.current_entry.previous_msgid += token
1416 self.current_entry.previous_msgid += token
1540 elif self.current_state == 'PC':
1417 elif self.current_state == 'PC':
1541 typ = 'previous_msgctxt'
1418 typ = 'previous_msgctxt'
1542 token = token[3:]
1419 token = token[3:]
1543 self.current_entry.previous_msgctxt += token
1420 self.current_entry.previous_msgctxt += token
1544 if typ not in self.current_entry._multiline_str:
1545 self.current_entry._multiline_str[typ] = token
1546 else:
1547 self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
1548 # don't change the current state
1421 # don't change the current state
1549 return False
1422 return False
1550
1423
1551 # }}}
1424 # }}}
1552 # class _MOFileParser {{{
1425 # class _MOFileParser {{{
1553
1426
1554 class _MOFileParser(object):
1427 class _MOFileParser(object):
1555 """
1428 """
1556 A class to parse binary mo files.
1429 A class to parse binary mo files.
1557 """
1430 """
1558 BIG_ENDIAN = 0xde120495
1559 LITTLE_ENDIAN = 0x950412de
1560
1431
1561 def __init__(self, fpath, *args, **kwargs):
1432 def __init__(self, mofile, *args, **kwargs):
1562 """
1433 """
1563 Constructor.
1434 Constructor.
1564
1435
1565 **Arguments**:
1436 Keyword arguments:
1566 - *fpath*: string, path to the po file
1437
1567 - *encoding*: string, the encoding to use, defaults to
1438 ``mofile``
1568 "default_encoding" global variable (optional),
1439 string, path to the mo file or its content
1569 - *check_for_duplicates*: whether to check for duplicate entries
1440
1570 when adding entries to the file, default: False (optional).
1441 ``encoding``
1442 string, the encoding to use, defaults to ``default_encoding``
1443 global variable (optional).
1444
1445 ``check_for_duplicates``
1446 whether to check for duplicate entries when adding entries to the
1447 file (optional, default: ``False``).
1571 """
1448 """
1572 enc = kwargs.get('encoding', default_encoding)
1449 self.fhandle = open(mofile, 'rb')
1573 check_dup = kwargs.get('check_for_duplicates', False)
1574 self.fhandle = open(fpath, 'rb')
1575 self.instance = MOFile(
1450 self.instance = MOFile(
1576 fpath=fpath,
1451 fpath=mofile,
1577 encoding=enc,
1452 encoding=kwargs.get('encoding', default_encoding),
1578 check_for_duplicates=check_dup
1453 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1579 )
1454 )
1580
1455
1581 def parse_magicnumber(self):
1582 """
1583 Parse the magic number and raise an exception if not valid.
1584 """
1585
1586 def parse(self):
1456 def parse(self):
1587 """
1457 """
1588 Build the instance with the file handle provided in the
1458 Build the instance with the file handle provided in the
1589 constructor.
1459 constructor.
1590 """
1460 """
1461 # parse magic number
1591 magic_number = self._readbinary('<I', 4)
1462 magic_number = self._readbinary('<I', 4)
1592 if magic_number == self.LITTLE_ENDIAN:
1463 if magic_number == MOFile.LITTLE_ENDIAN:
1593 ii = '<II'
1464 ii = '<II'
1594 elif magic_number == self.BIG_ENDIAN:
1465 elif magic_number == MOFile.BIG_ENDIAN:
1595 ii = '>II'
1466 ii = '>II'
1596 else:
1467 else:
1597 raise IOError('Invalid mo file, magic number is incorrect !')
1468 raise IOError('Invalid mo file, magic number is incorrect !')
1598 self.instance.magic_number = magic_number
1469 self.instance.magic_number = magic_number
1599 # parse the version number and the number of strings
1470 # parse the version number and the number of strings
1600 self.instance.version, numofstrings = self._readbinary(ii, 8)
1471 self.instance.version, numofstrings = self._readbinary(ii, 8)
1601 # original strings and translation strings hash table offset
1472 # original strings and translation strings hash table offset
1602 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1473 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1603 # move to msgid hash table and read length and offset of msgids
1474 # move to msgid hash table and read length and offset of msgids
1604 self.fhandle.seek(msgids_hash_offset)
1475 self.fhandle.seek(msgids_hash_offset)
1605 msgids_index = []
1476 msgids_index = []
1606 for i in range(numofstrings):
1477 for i in range(numofstrings):
1607 msgids_index.append(self._readbinary(ii, 8))
1478 msgids_index.append(self._readbinary(ii, 8))
1608 # move to msgstr hash table and read length and offset of msgstrs
1479 # move to msgstr hash table and read length and offset of msgstrs
1609 self.fhandle.seek(msgstrs_hash_offset)
1480 self.fhandle.seek(msgstrs_hash_offset)
1610 msgstrs_index = []
1481 msgstrs_index = []
1611 for i in range(numofstrings):
1482 for i in range(numofstrings):
1612 msgstrs_index.append(self._readbinary(ii, 8))
1483 msgstrs_index.append(self._readbinary(ii, 8))
1613 # build entries
1484 # build entries
1614 for i in range(numofstrings):
1485 for i in range(numofstrings):
1615 self.fhandle.seek(msgids_index[i][1])
1486 self.fhandle.seek(msgids_index[i][1])
1616 msgid = self.fhandle.read(msgids_index[i][0])
1487 msgid = self.fhandle.read(msgids_index[i][0])
1617 self.fhandle.seek(msgstrs_index[i][1])
1488 self.fhandle.seek(msgstrs_index[i][1])
1618 msgstr = self.fhandle.read(msgstrs_index[i][0])
1489 msgstr = self.fhandle.read(msgstrs_index[i][0])
1619 if i == 0: # metadata
1490 if i == 0: # metadata
1620 raw_metadata, metadata = msgstr.split('\n'), {}
1491 raw_metadata, metadata = msgstr.split('\n'), {}
1621 for line in raw_metadata:
1492 for line in raw_metadata:
1622 tokens = line.split(':', 1)
1493 tokens = line.split(':', 1)
1623 if tokens[0] != '':
1494 if tokens[0] != '':
1624 try:
1495 try:
1625 metadata[tokens[0]] = tokens[1].strip()
1496 metadata[tokens[0]] = tokens[1].strip()
1626 except IndexError:
1497 except IndexError:
1627 metadata[tokens[0]] = ''
1498 metadata[tokens[0]] = ''
1628 self.instance.metadata = metadata
1499 self.instance.metadata = metadata
1629 continue
1500 continue
1630 # test if we have a plural entry
1501 # test if we have a plural entry
1631 msgid_tokens = msgid.split('\0')
1502 msgid_tokens = msgid.split('\0')
1632 if len(msgid_tokens) > 1:
1503 if len(msgid_tokens) > 1:
1633 entry = MOEntry(
1504 entry = self._build_entry(
1634 msgid=msgid_tokens[0],
1505 msgid=msgid_tokens[0],
1635 msgid_plural=msgid_tokens[1],
1506 msgid_plural=msgid_tokens[1],
1636 msgstr_plural=dict((k,v) for k,v in \
1507 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1637 enumerate(msgstr.split('\0')))
1638 )
1508 )
1639 else:
1509 else:
1640 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1510 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1641 self.instance.append(entry)
1511 self.instance.append(entry)
1642 # close opened file
1512 # close opened file
1643 self.fhandle.close()
1513 self.fhandle.close()
1644 return self.instance
1514 return self.instance
1645
1515
1516 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1517 msgstr_plural=None):
1518 msgctxt_msgid = msgid.split('\x04')
1519 if len(msgctxt_msgid) > 1:
1520 kwargs = {
1521 'msgctxt': msgctxt_msgid[0],
1522 'msgid' : msgctxt_msgid[1],
1523 }
1524 else:
1525 kwargs = {'msgid': msgid}
1526 if msgstr:
1527 kwargs['msgstr'] = msgstr
1528 if msgid_plural:
1529 kwargs['msgid_plural'] = msgid_plural
1530 if msgstr_plural:
1531 kwargs['msgstr_plural'] = msgstr_plural
1532 return MOEntry(**kwargs)
1533
1646 def _readbinary(self, fmt, numbytes):
1534 def _readbinary(self, fmt, numbytes):
1647 """
1535 """
1648 Private method that unpack n bytes of data using format <fmt>.
1536 Private method that unpack n bytes of data using format <fmt>.
1649 It returns a tuple or a mixed value if the tuple length is 1.
1537 It returns a tuple or a mixed value if the tuple length is 1.
1650 """
1538 """
1651 bytes = self.fhandle.read(numbytes)
1539 bytes = self.fhandle.read(numbytes)
1652 tup = struct.unpack(fmt, bytes)
1540 tup = struct.unpack(fmt, bytes)
1653 if len(tup) == 1:
1541 if len(tup) == 1:
1654 return tup[0]
1542 return tup[0]
1655 return tup
1543 return tup
1656
1544
1657 # }}}
1545 # }}}
1658 # __main__ {{{
1546 # class TextWrapper {{{
1659
1547
1660 if __name__ == '__main__':
1548 class TextWrapper(textwrap.TextWrapper):
1549 """
1550 Subclass of textwrap.TextWrapper that backport the
1551 drop_whitespace option.
1661 """
1552 """
1662 **Main function**::
1553 def __init__(self, *args, **kwargs):
1663 - to **test** the module just run: *python polib.py [-v]*
1554 drop_whitespace = kwargs.pop('drop_whitespace', True)
1664 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1555 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1556 self.drop_whitespace = drop_whitespace
1557
1558 def _wrap_chunks(self, chunks):
1559 """_wrap_chunks(chunks : [string]) -> [string]
1560
1561 Wrap a sequence of text chunks and return a list of lines of
1562 length 'self.width' or less. (If 'break_long_words' is false,
1563 some lines may be longer than this.) Chunks correspond roughly
1564 to words and the whitespace between them: each chunk is
1565 indivisible (modulo 'break_long_words'), but a line break can
1566 come between any two chunks. Chunks should not have internal
1567 whitespace; ie. a chunk is either all whitespace or a "word".
1568 Whitespace chunks will be removed from the beginning and end of
1569 lines, but apart from that whitespace is preserved.
1665 """
1570 """
1666 import sys
1571 lines = []
1667 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1572 if self.width <= 0:
1668 def test(f):
1573 raise ValueError("invalid width %r (must be > 0)" % self.width)
1669 if f.endswith('po'):
1574
1670 p = pofile(f)
1575 # Arrange in reverse order so items can be efficiently popped
1576 # from a stack of chucks.
1577 chunks.reverse()
1578
1579 while chunks:
1580
1581 # Start the list of chunks that will make up the current line.
1582 # cur_len is just the length of all the chunks in cur_line.
1583 cur_line = []
1584 cur_len = 0
1585
1586 # Figure out which static string will prefix this line.
1587 if lines:
1588 indent = self.subsequent_indent
1671 else:
1589 else:
1672 p = mofile(f)
1590 indent = self.initial_indent
1673 s = unicode(p)
1591
1674 import profile
1592 # Maximum width for this line.
1675 profile.run('test("'+sys.argv[2]+'")')
1593 width = self.width - len(indent)
1594
1595 # First chunk on line is whitespace -- drop it, unless this
1596 # is the very beginning of the text (ie. no lines started yet).
1597 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1598 del chunks[-1]
1599
1600 while chunks:
1601 l = len(chunks[-1])
1602
1603 # Can at least squeeze this chunk onto the current line.
1604 if cur_len + l <= width:
1605 cur_line.append(chunks.pop())
1606 cur_len += l
1607
1608 # Nope, this line is full.
1676 else:
1609 else:
1677 import doctest
1610 break
1678 doctest.testmod()
1611
1612 # The current line is full, and the next chunk is too big to
1613 # fit on *any* line (not just this one).
1614 if chunks and len(chunks[-1]) > width:
1615 self._handle_long_word(chunks, cur_line, cur_len, width)
1616
1617 # If the last chunk on this line is all whitespace, drop it.
1618 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
1619 del cur_line[-1]
1620
1621 # Convert current line back to a string and store it in list
1622 # of all lines (return value).
1623 if cur_line:
1624 lines.append(indent + ''.join(cur_line))
1625
1626 return lines
1679
1627
1680 # }}}
1628 # }}}
1629 # function wrap() {{{
1630
1631 def wrap(text, width=70, **kwargs):
1632 """
1633 Wrap a single paragraph of text, returning a list of wrapped lines.
1634 """
1635 if sys.version_info < (2, 6):
1636 return TextWrapper(width=width, **kwargs).wrap(text)
1637 return textwrap.wrap(text, width=width, **kwargs)
1638
1639 #}}}
General Comments 0
You need to be logged in to leave comments. Login now