##// END OF EJS Templates
i18n: import polib 0.6.4 (rev 84598f2b5365)...
Wagner Bruna -
r15290:e40430fb stable
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (1487 lines changed) Show them Hide them
@@ -1,1680 +1,1639 b''
1 1 # -*- coding: utf-8 -*-
2 2 # no-check-code
3 3 #
4 4 # License: MIT (see LICENSE file provided)
5 5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6 6
7 7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po
9 and mo files). You can load existing files, iterate through it's entries,
10 add, modify entries, comments or metadata, etc... or create new po files
11 from scratch.
12
13 **polib** provides a simple and pythonic API, exporting only three
14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
16 new files/entries.
17
18 **Basic example**:
8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
9 mo files). You can load existing files, iterate through it's entries, add,
10 modify entries, comments or metadata, etc. or create new po files from scratch.
19 11
20 >>> import polib
21 >>> # load an existing po file
22 >>> po = polib.pofile('tests/test_utf8.po')
23 >>> for entry in po:
24 ... # do something with entry...
25 ... pass
26 >>> # add an entry
27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
29 >>> po.append(entry)
30 >>> # to save our modified po file:
31 >>> # po.save()
32 >>> # or you may want to compile the po file
33 >>> # po.save_as_mofile('tests/test_utf8.mo')
12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
13 :func:`~polib.mofile` convenience functions.
34 14 """
35 15
36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
37 __version__ = '0.5.2'
16 __author__ = 'David Jean Louis <izimobil@gmail.com>'
17 __version__ = '0.6.4'
38 18 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
39 19 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
40 20
21 import array
41 22 import codecs
23 import os
24 import re
42 25 import struct
26 import sys
43 27 import textwrap
44 28 import types
45 import re
29
46 30
31 # the default encoding to use when encoding cannot be detected
47 32 default_encoding = 'utf-8'
48 33
49 # function pofile() {{{
50
51 def pofile(fpath, **kwargs):
52 """
53 Convenience function that parse the po/pot file *fpath* and return
54 a POFile instance.
55
56 **Keyword arguments**:
57 - *fpath*: string, full or relative path to the po/pot file to parse
58 - *wrapwidth*: integer, the wrap width, only useful when -w option was
59 passed to xgettext (optional, default to 78)
60 - *autodetect_encoding*: boolean, if set to False the function will
61 not try to detect the po file encoding (optional, default to True)
62 - *encoding*: string, an encoding, only relevant if autodetect_encoding
63 is set to False
64 - *check_for_duplicates*: whether to check for duplicate entries when
65 adding entries to the file, default: False (optional)
66
67 **Example**:
34 # _pofile_or_mofile {{{
68 35
69 >>> import polib
70 >>> po = polib.pofile('tests/test_weird_occurrences.po',
71 ... check_for_duplicates=True)
72 >>> po #doctest: +ELLIPSIS
73 <POFile instance at ...>
74 >>> import os, tempfile
75 >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural',
76 ... 'msgstr_plural', 'obsolete', 'comment', 'tcomment',
77 ... 'occurrences', 'flags', 'previous_msgctxt',
78 ... 'previous_msgid', 'previous_msgid_plural')
79 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
80 ... orig_po = polib.pofile('tests/'+fname)
81 ... tmpf = tempfile.NamedTemporaryFile().name
82 ... orig_po.save(tmpf)
83 ... try:
84 ... new_po = polib.pofile(tmpf)
85 ... for old, new in zip(orig_po, new_po):
86 ... for attr in all_attrs:
87 ... if getattr(old, attr) != getattr(new, attr):
88 ... getattr(old, attr)
89 ... getattr(new, attr)
90 ... finally:
91 ... os.unlink(tmpf)
92 >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
93 >>> tmpf = tempfile.NamedTemporaryFile().name
94 >>> po_file.save_as_mofile(tmpf)
95 >>> try:
96 ... mo_file = polib.mofile(tmpf)
97 ... for old, new in zip(po_file, mo_file):
98 ... if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
99 ... 'OLD: ', po_file._encode(old.msgid)
100 ... 'NEW: ', mo_file._encode(new.msgid)
101 ... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
102 ... 'OLD: ', po_file._encode(old.msgstr)
103 ... 'NEW: ', mo_file._encode(new.msgstr)
104 ... print new.msgstr
105 ... finally:
106 ... os.unlink(tmpf)
36 def _pofile_or_mofile(f, type, **kwargs):
37 """
38 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
39 honor the DRY concept.
107 40 """
108 if kwargs.get('autodetect_encoding', True):
109 enc = detect_encoding(fpath)
110 else:
111 enc = kwargs.get('encoding', default_encoding)
112 check_for_duplicates = kwargs.get('check_for_duplicates', False)
113 parser = _POFileParser(
114 fpath,
41 # get the file encoding
42 enc = kwargs.get('encoding')
43 if enc is None:
44 enc = detect_encoding(f, type == 'mofile')
45
46 # parse the file
47 kls = type == 'pofile' and _POFileParser or _MOFileParser
48 parser = kls(
49 f,
115 50 encoding=enc,
116 51 check_for_duplicates=kwargs.get('check_for_duplicates', False)
117 52 )
118 53 instance = parser.parse()
119 54 instance.wrapwidth = kwargs.get('wrapwidth', 78)
120 55 return instance
121 56
122 57 # }}}
58 # function pofile() {{{
59
60 def pofile(pofile, **kwargs):
61 """
62 Convenience function that parses the po or pot file ``pofile`` and returns
63 a :class:`~polib.POFile` instance.
64
65 Arguments:
66
67 ``pofile``
68 string, full or relative path to the po/pot file or its content (data).
69
70 ``wrapwidth``
71 integer, the wrap width, only useful when the ``-w`` option was passed
72 to xgettext (optional, default: ``78``).
73
74 ``encoding``
75 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
76 encoding will be auto-detected).
77
78 ``check_for_duplicates``
79 whether to check for duplicate entries when adding entries to the
80 file (optional, default: ``False``).
81 """
82 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
83
84 # }}}
123 85 # function mofile() {{{
124 86
125 def mofile(fpath, **kwargs):
87 def mofile(mofile, **kwargs):
126 88 """
127 Convenience function that parse the mo file *fpath* and return
128 a MOFile instance.
89 Convenience function that parses the mo file ``mofile`` and returns a
90 :class:`~polib.MOFile` instance.
129 91
130 **Keyword arguments**:
131 - *fpath*: string, full or relative path to the mo file to parse
132 - *wrapwidth*: integer, the wrap width, only useful when -w option was
133 passed to xgettext to generate the po file that was used to format
134 the mo file (optional, default to 78)
135 - *autodetect_encoding*: boolean, if set to False the function will
136 not try to detect the po file encoding (optional, default to True)
137 - *encoding*: string, an encoding, only relevant if autodetect_encoding
138 is set to False
139 - *check_for_duplicates*: whether to check for duplicate entries when
140 adding entries to the file, default: False (optional)
92 Arguments:
141 93
142 **Example**:
94 ``mofile``
95 string, full or relative path to the mo file or its content (data).
143 96
144 >>> import polib
145 >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
146 >>> mo #doctest: +ELLIPSIS
147 <MOFile instance at ...>
148 >>> import os, tempfile
149 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
150 ... orig_mo = polib.mofile('tests/'+fname)
151 ... tmpf = tempfile.NamedTemporaryFile().name
152 ... orig_mo.save(tmpf)
153 ... try:
154 ... new_mo = polib.mofile(tmpf)
155 ... for old, new in zip(orig_mo, new_mo):
156 ... if old.msgid != new.msgid:
157 ... old.msgstr
158 ... new.msgstr
159 ... finally:
160 ... os.unlink(tmpf)
97 ``wrapwidth``
98 integer, the wrap width, only useful when the ``-w`` option was passed
99 to xgettext to generate the po file that was used to format the mo file
100 (optional, default: ``78``).
101
102 ``encoding``
103 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
104 encoding will be auto-detected).
105
106 ``check_for_duplicates``
107 whether to check for duplicate entries when adding entries to the
108 file (optional, default: ``False``).
161 109 """
162 if kwargs.get('autodetect_encoding', True):
163 enc = detect_encoding(fpath, True)
164 else:
165 enc = kwargs.get('encoding', default_encoding)
166 parser = _MOFileParser(
167 fpath,
168 encoding=enc,
169 check_for_duplicates=kwargs.get('check_for_duplicates', False)
170 )
171 instance = parser.parse()
172 instance.wrapwidth = kwargs.get('wrapwidth', 78)
173 return instance
110 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
174 111
175 112 # }}}
176 113 # function detect_encoding() {{{
177 114
178 def detect_encoding(fpath, binary_mode=False):
115 def detect_encoding(file, binary_mode=False):
179 116 """
180 Try to detect the encoding used by the file *fpath*. The function will
181 return polib default *encoding* if it's unable to detect it.
117 Try to detect the encoding used by the ``file``. The ``file`` argument can
118 be a PO or MO file path or a string containing the contents of the file.
119 If the encoding cannot be detected, the function will return the value of
120 ``default_encoding``.
182 121
183 **Keyword argument**:
184 - *fpath*: string, full or relative path to the mo file to parse.
122 Arguments:
123
124 ``file``
125 string, full or relative path to the po/mo file or its content.
185 126
186 **Examples**:
127 ``binary_mode``
128 boolean, set this to True if ``file`` is a mo file.
129 """
130 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
187 131
188 >>> print(detect_encoding('tests/test_noencoding.po'))
189 utf-8
190 >>> print(detect_encoding('tests/test_utf8.po'))
191 UTF-8
192 >>> print(detect_encoding('tests/test_utf8.mo', True))
193 UTF-8
194 >>> print(detect_encoding('tests/test_iso-8859-15.po'))
195 ISO_8859-15
196 >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
197 ISO_8859-15
198 """
199 import re
200 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
201 if binary_mode:
202 mode = 'rb'
132 def charset_exists(charset):
133 """Check whether ``charset`` is valid or not."""
134 try:
135 codecs.lookup(charset)
136 except LookupError:
137 return False
138 return True
139
140 if not os.path.exists(file):
141 match = rx.search(file)
142 if match:
143 enc = match.group(1).strip()
144 if charset_exists(enc):
145 return enc
203 146 else:
204 mode = 'r'
205 f = open(fpath, mode)
206 for l in f.readlines():
207 match = rx.search(l)
208 if match:
209 f.close()
210 return match.group(1).strip()
211 f.close()
147 if binary_mode:
148 mode = 'rb'
149 else:
150 mode = 'r'
151 f = open(file, mode)
152 for l in f.readlines():
153 match = rx.search(l)
154 if match:
155 f.close()
156 enc = match.group(1).strip()
157 if charset_exists(enc):
158 return enc
159 f.close()
212 160 return default_encoding
213 161
214 162 # }}}
215 163 # function escape() {{{
216 164
217 165 def escape(st):
218 166 """
219 Escape special chars and return the given string *st*.
220
221 **Examples**:
222
223 >>> escape('\\t and \\n and \\r and " and \\\\')
224 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
167 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
168 the given string ``st`` and returns it.
225 169 """
226 170 return st.replace('\\', r'\\')\
227 171 .replace('\t', r'\t')\
228 172 .replace('\r', r'\r')\
229 173 .replace('\n', r'\n')\
230 174 .replace('\"', r'\"')
231 175
232 176 # }}}
233 177 # function unescape() {{{
234 178
235 179 def unescape(st):
236 180 """
237 Unescape special chars and return the given string *st*.
238
239 **Examples**:
240
241 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
242 '\\t and \\n and \\r and " and \\\\'
243 >>> unescape(r'\\n')
244 '\\n'
245 >>> unescape(r'\\\\n')
246 '\\\\n'
247 >>> unescape(r'\\\\n\\n')
248 '\\\\n\\n'
181 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
182 the given string ``st`` and returns it.
249 183 """
250 184 def unescape_repl(m):
251 185 m = m.group(1)
252 186 if m == 'n':
253 187 return '\n'
254 188 if m == 't':
255 189 return '\t'
256 190 if m == 'r':
257 191 return '\r'
258 192 if m == '\\':
259 193 return '\\'
260 194 return m # handles escaped double quote
261 195 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
262 196
263 197 # }}}
264 198 # class _BaseFile {{{
265 199
266 200 class _BaseFile(list):
267 201 """
268 Common parent class for POFile and MOFile classes.
269 This class must **not** be instanciated directly.
202 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
203 classes. This class should **not** be instanciated directly.
270 204 """
271 205
272 206 def __init__(self, *args, **kwargs):
273 207 """
274 Constructor.
208 Constructor, accepts the following keyword arguments:
209
210 ``pofile``
211 string, the path to the po or mo file, or its content as a string.
275 212
276 **Keyword arguments**:
277 - *fpath*: string, path to po or mo file
278 - *wrapwidth*: integer, the wrap width, only useful when -w option
279 was passed to xgettext to generate the po file that was used to
280 format the mo file, default to 78 (optional),
281 - *encoding*: string, the encoding to use, defaults to
282 "default_encoding" global variable (optional),
283 - *check_for_duplicates*: whether to check for duplicate entries
284 when adding entries to the file, default: False (optional).
213 ``wrapwidth``
214 integer, the wrap width, only useful when the ``-w`` option was
215 passed to xgettext (optional, default: ``78``).
216
217 ``encoding``
218 string, the encoding to use, defaults to ``default_encoding``
219 global variable (optional).
220
221 ``check_for_duplicates``
222 whether to check for duplicate entries when adding entries to the
223 file, (optional, default: ``False``).
285 224 """
286 225 list.__init__(self)
287 226 # the opened file handle
288 self.fpath = kwargs.get('fpath')
227 pofile = kwargs.get('pofile', None)
228 if pofile and os.path.exists(pofile):
229 self.fpath = pofile
230 else:
231 self.fpath = kwargs.get('fpath')
289 232 # the width at which lines should be wrapped
290 233 self.wrapwidth = kwargs.get('wrapwidth', 78)
291 234 # the file encoding
292 235 self.encoding = kwargs.get('encoding', default_encoding)
293 236 # whether to check for duplicate entries or not
294 237 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
295 238 # header
296 239 self.header = ''
297 240 # both po and mo files have metadata
298 241 self.metadata = {}
299 242 self.metadata_is_fuzzy = 0
300 243
301 def __str__(self):
244 def __unicode__(self):
302 245 """
303 String representation of the file.
246 Returns the unicode representation of the file.
304 247 """
305 248 ret = []
306 249 entries = [self.metadata_as_entry()] + \
307 250 [e for e in self if not e.obsolete]
308 251 for entry in entries:
309 ret.append(entry.__str__(self.wrapwidth))
252 ret.append(entry.__unicode__(self.wrapwidth))
310 253 for entry in self.obsolete_entries():
311 ret.append(entry.__str__(self.wrapwidth))
312 return '\n'.join(ret)
254 ret.append(entry.__unicode__(self.wrapwidth))
255 ret = '\n'.join(ret)
256
257 if type(ret) != types.UnicodeType:
258 return unicode(ret, self.encoding)
259 return ret
260
261 def __str__(self):
262 """
263 Returns the string representation of the file.
264 """
265 return unicode(self).encode(self.encoding)
313 266
314 267 def __contains__(self, entry):
315 268 """
316 Overriden method to implement the membership test (in and not in).
317 The method considers that an entry is in the file if it finds an
318 entry that has the same msgid (case sensitive).
319
320 **Keyword argument**:
321 - *entry*: an instance of polib._BaseEntry
269 Overriden ``list`` method to implement the membership test (in and
270 not in).
271 The method considers that an entry is in the file if it finds an entry
272 that has the same msgid (the test is **case sensitive**).
322 273
323 **Tests**:
324 >>> po = POFile()
325 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
326 >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
327 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
328 >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
329 >>> po.append(e1)
330 >>> po.append(e2)
331 >>> e1 in po
332 True
333 >>> e2 not in po
334 False
335 >>> e3 in po
336 True
337 >>> e4 in po
338 False
274 Argument:
275
276 ``entry``
277 an instance of :class:`~polib._BaseEntry`.
339 278 """
340 279 return self.find(entry.msgid, by='msgid') is not None
280
281 def __eq__(self, other):
282 return unicode(self) == unicode(other)
341 283
342 284 def append(self, entry):
343 285 """
344 286 Overriden method to check for duplicates entries, if a user tries to
345 add an entry that already exists, the method will raise a ValueError
346 exception.
347
348 **Keyword argument**:
349 - *entry*: an instance of polib._BaseEntry
287 add an entry that is already in the file, the method will raise a
288 ``ValueError`` exception.
350 289
351 **Tests**:
352 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
353 >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
354 >>> po = POFile(check_for_duplicates=True)
355 >>> po.append(e1)
356 >>> try:
357 ... po.append(e2)
358 ... except ValueError, e:
359 ... unicode(e)
360 u'Entry "foobar" already exists'
290 Argument:
291
292 ``entry``
293 an instance of :class:`~polib._BaseEntry`.
361 294 """
362 295 if self.check_for_duplicates and entry in self:
363 296 raise ValueError('Entry "%s" already exists' % entry.msgid)
364 297 super(_BaseFile, self).append(entry)
365 298
366 299 def insert(self, index, entry):
367 300 """
368 301 Overriden method to check for duplicates entries, if a user tries to
369 insert an entry that already exists, the method will raise a ValueError
370 exception.
302 add an entry that is already in the file, the method will raise a
303 ``ValueError`` exception.
371 304
372 **Keyword arguments**:
373 - *index*: index at which the entry should be inserted
374 - *entry*: an instance of polib._BaseEntry
305 Arguments:
375 306
376 **Tests**:
377 >>> import polib
378 >>> polib.check_for_duplicates = True
379 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
380 >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
381 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
382 >>> po = POFile(check_for_duplicates=True)
383 >>> po.insert(0, e1)
384 >>> po.insert(1, e2)
385 >>> try:
386 ... po.insert(0, e3)
387 ... except ValueError, e:
388 ... unicode(e)
389 u'Entry "foobar" already exists'
307 ``index``
308 index at which the entry should be inserted.
309
310 ``entry``
311 an instance of :class:`~polib._BaseEntry`.
390 312 """
391 313 if self.check_for_duplicates and entry in self:
392 314 raise ValueError('Entry "%s" already exists' % entry.msgid)
393 315 super(_BaseFile, self).insert(index, entry)
394 316
395 def __repr__(self):
396 """Return the official string representation of the object."""
397 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
398
399 317 def metadata_as_entry(self):
400 318 """
401 Return the metadata as an entry:
402
403 >>> import polib
404 >>> po = polib.pofile('tests/test_fuzzy_header.po')
405 >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
406 True
319 Returns the file metadata as a :class:`~polib.POFile` instance.
407 320 """
408 321 e = POEntry(msgid='')
409 322 mdata = self.ordered_metadata()
410 323 if mdata:
411 324 strs = []
412 e._multiline_str['msgstr'] = ''
413 325 for name, value in mdata:
414 326 # Strip whitespace off each line in a multi-line entry
415 327 strs.append('%s: %s' % (name, value))
416 328 e.msgstr = '\n'.join(strs) + '\n'
417 e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
418 [s + '\n' for s in strs])
419 329 if self.metadata_is_fuzzy:
420 330 e.flags.append('fuzzy')
421 331 return e
422 332
423 333 def save(self, fpath=None, repr_method='__str__'):
424 334 """
425 Save the po file to file *fpath* if no file handle exists for
426 the object. If there's already an open file and no fpath is
427 provided, then the existing file is rewritten with the modified
428 data.
335 Saves the po file to ``fpath``.
336 If it is an existing file and no ``fpath`` is provided, then the
337 existing file is rewritten with the modified data.
338
339 Keyword arguments:
429 340
430 **Keyword arguments**:
431 - *fpath*: string, full or relative path to the file.
432 - *repr_method*: string, the method to use for output.
341 ``fpath``
342 string, full or relative path to the file.
343
344 ``repr_method``
345 string, the method to use for output.
433 346 """
434 347 if self.fpath is None and fpath is None:
435 348 raise IOError('You must provide a file path to save() method')
436 349 contents = getattr(self, repr_method)()
437 350 if fpath is None:
438 351 fpath = self.fpath
439 352 if repr_method == 'to_binary':
440 353 fhandle = open(fpath, 'wb')
441 354 else:
442 355 fhandle = codecs.open(fpath, 'w', self.encoding)
443 356 if type(contents) != types.UnicodeType:
444 357 contents = contents.decode(self.encoding)
445 358 fhandle.write(contents)
446 359 fhandle.close()
447
448 def find(self, st, by='msgid'):
449 """
450 Find entry which msgid (or property identified by the *by*
451 attribute) matches the string *st*.
360 # set the file path if not set
361 if self.fpath is None and fpath:
362 self.fpath = fpath
452 363
453 **Keyword arguments**:
454 - *st*: string, the string to search for
455 - *by*: string, the comparison attribute
364 def find(self, st, by='msgid', include_obsolete_entries=False,
365 msgctxt=False):
366 """
367 Find the entry which msgid (or property identified by the ``by``
368 argument) matches the string ``st``.
456 369
457 **Examples**:
370 Keyword arguments:
371
372 ``st``
373 string, the string to search for.
458 374
459 >>> po = pofile('tests/test_utf8.po')
460 >>> entry = po.find('Thursday')
461 >>> entry.msgstr
462 u'Jueves'
463 >>> entry = po.find('Some unexistant msgid')
464 >>> entry is None
465 True
466 >>> entry = po.find('Jueves', 'msgstr')
467 >>> entry.msgid
468 u'Thursday'
375 ``by``
376 string, the property to use for comparison (default: ``msgid``).
377
378 ``include_obsolete_entries``
379 boolean, whether to also search in entries that are obsolete.
380
381 ``msgctxt``
382 string, allows to specify a specific message context for the
383 search.
469 384 """
470 for e in self:
385 if include_obsolete_entries:
386 entries = self[:]
387 else:
388 entries = [e for e in self if not e.obsolete]
389 for e in entries:
471 390 if getattr(e, by) == st:
391 if msgctxt and e.msgctxt != msgctxt:
392 continue
472 393 return e
473 394 return None
474 395
475 396 def ordered_metadata(self):
476 397 """
477 Convenience method that return the metadata ordered. The return
478 value is list of tuples (metadata name, metadata_value).
398 Convenience method that returns an ordered version of the metadata
399 dictionnary. The return value is list of tuples (metadata name,
400 metadata_value).
479 401 """
480 402 # copy the dict first
481 403 metadata = self.metadata.copy()
482 404 data_order = [
483 405 'Project-Id-Version',
484 406 'Report-Msgid-Bugs-To',
485 407 'POT-Creation-Date',
486 408 'PO-Revision-Date',
487 409 'Last-Translator',
488 410 'Language-Team',
489 411 'MIME-Version',
490 412 'Content-Type',
491 413 'Content-Transfer-Encoding'
492 414 ]
493 415 ordered_data = []
494 416 for data in data_order:
495 417 try:
496 418 value = metadata.pop(data)
497 419 ordered_data.append((data, value))
498 420 except KeyError:
499 421 pass
500 # the rest of the metadata won't be ordered there are no specs for this
422 # the rest of the metadata will be alphabetically ordered since there
423 # are no specs for this AFAIK
501 424 keys = metadata.keys()
502 list(keys).sort()
425 keys.sort()
503 426 for data in keys:
504 427 value = metadata[data]
505 428 ordered_data.append((data, value))
506 429 return ordered_data
507 430
508 431 def to_binary(self):
509 432 """
510 Return the mofile binary representation.
433 Return the binary representation of the file.
511 434 """
512 import array
513 import struct
514 import types
515 435 offsets = []
516 436 entries = self.translated_entries()
517 437 # the keys are sorted in the .mo file
518 438 def cmp(_self, other):
519 if _self.msgid > other.msgid:
439 # msgfmt compares entries with msgctxt if it exists
440 self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
441 other_msgid = other.msgctxt and other.msgctxt or other.msgid
442 if self_msgid > other_msgid:
520 443 return 1
521 elif _self.msgid < other.msgid:
444 elif self_msgid < other_msgid:
522 445 return -1
523 446 else:
524 447 return 0
525 448 # add metadata entry
526 449 entries.sort(cmp)
527 450 mentry = self.metadata_as_entry()
528 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
451 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
529 452 entries = [mentry] + entries
530 453 entries_len = len(entries)
531 454 ids, strs = '', ''
532 455 for e in entries:
533 456 # For each string, we need size and file offset. Each string is
534 457 # NUL terminated; the NUL does not count into the size.
458 msgid = ''
459 if e.msgctxt:
460 # Contexts are stored by storing the concatenation of the
461 # context, a <EOT> byte, and the original string
462 msgid = self._encode(e.msgctxt + '\4')
535 463 if e.msgid_plural:
536 464 indexes = e.msgstr_plural.keys()
537 465 indexes.sort()
538 466 msgstr = []
539 467 for index in indexes:
540 468 msgstr.append(e.msgstr_plural[index])
541 msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
469 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
542 470 msgstr = self._encode('\0'.join(msgstr))
543 471 else:
544 msgid = self._encode(e.msgid)
472 msgid += self._encode(e.msgid)
545 473 msgstr = self._encode(e.msgstr)
546 474 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
547 475 ids += msgid + '\0'
548 476 strs += msgstr + '\0'
477
549 478 # The header is 7 32-bit unsigned integers.
550 479 keystart = 7*4+16*entries_len
551 480 # and the values start after the keys
552 481 valuestart = keystart + len(ids)
553 482 koffsets = []
554 483 voffsets = []
555 484 # The string table first has the list of keys, then the list of values.
556 485 # Each entry has first the size of the string, then the file offset.
557 486 for o1, l1, o2, l2 in offsets:
558 487 koffsets += [l1, o1+keystart]
559 488 voffsets += [l2, o2+valuestart]
560 489 offsets = koffsets + voffsets
561 output = struct.pack("IIIIIII",
562 0x950412de, # Magic number
563 0, # Version
564 entries_len, # # of entries
565 7*4, # start of key index
566 7*4+entries_len*8, # start of value index
567 0, 0) # size and offset of hash table
568 output += array.array("I", offsets).tostring()
490 # check endianness for magic number
491 if struct.pack('@h', 1) == struct.pack('<h', 1):
492 magic_number = MOFile.LITTLE_ENDIAN
493 else:
494 magic_number = MOFile.BIG_ENDIAN
495
496 output = struct.pack(
497 "Iiiiiii",
498 magic_number, # Magic number
499 0, # Version
500 entries_len, # # of entries
501 7*4, # start of key index
502 7*4+entries_len*8, # start of value index
503 0, keystart # size and offset of hash table
504 # Important: we don't use hash tables
505 )
506 output += array.array("i", offsets).tostring()
569 507 output += ids
570 508 output += strs
571 509 return output
572 510
573 511 def _encode(self, mixed):
574 512 """
575 Encode the given argument with the file encoding if the type is unicode
576 and return the encoded string.
513 Encodes the given ``mixed`` argument with the file encoding if and
514 only if it's an unicode string and returns the encoded string.
577 515 """
578 516 if type(mixed) == types.UnicodeType:
579 517 return mixed.encode(self.encoding)
580 518 return mixed
581 519
582 520 # }}}
583 521 # class POFile {{{
584 522
585 523 class POFile(_BaseFile):
586 '''
524 """
587 525 Po (or Pot) file reader/writer.
588 POFile objects inherit the list objects methods.
589
590 **Example**:
526 This class inherits the :class:`~polib._BaseFile` class and, by extension,
527 the python ``list`` type.
528 """
591 529
592 >>> po = POFile()
593 >>> entry1 = POEntry(
594 ... msgid="Some english text",
595 ... msgstr="Un texte en anglais"
596 ... )
597 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
598 >>> entry1.comment = "Some useful comment"
599 >>> entry2 = POEntry(
600 ... msgid="Peace in some languages",
601 ... msgstr="Pace سلام שלום Hasîtî 和平"
602 ... )
603 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
604 >>> entry2.comment = "Another useful comment"
605 >>> entry3 = POEntry(
606 ... msgid='Some entry with quotes " \\"',
607 ... msgstr='Un message unicode avec des quotes " \\"'
608 ... )
609 >>> entry3.comment = "Test string quoting"
610 >>> po.append(entry1)
611 >>> po.append(entry2)
612 >>> po.append(entry3)
613 >>> po.header = "Some Header"
614 >>> print(po)
615 # Some Header
616 msgid ""
617 msgstr ""
618 <BLANKLINE>
619 #. Some useful comment
620 #: testfile:12 another_file:1
621 msgid "Some english text"
622 msgstr "Un texte en anglais"
623 <BLANKLINE>
624 #. Another useful comment
625 #: testfile:15 another_file:5
626 msgid "Peace in some languages"
627 msgstr "Pace سلام שלום Hasîtî 和平"
628 <BLANKLINE>
629 #. Test string quoting
630 msgid "Some entry with quotes \\" \\""
631 msgstr "Un message unicode avec des quotes \\" \\""
632 <BLANKLINE>
633 '''
634
635 def __str__(self):
636 """Return the string representation of the po file"""
530 def __unicode__(self):
531 """
532 Returns the unicode representation of the po file.
533 """
637 534 ret, headers = '', self.header.split('\n')
638 535 for header in headers:
639 536 if header[:1] in [',', ':']:
640 537 ret += '#%s\n' % header
641 538 else:
642 539 ret += '# %s\n' % header
643 return ret + _BaseFile.__str__(self)
540
541 if type(ret) != types.UnicodeType:
542 ret = unicode(ret, self.encoding)
543
544 return ret + _BaseFile.__unicode__(self)
644 545
645 546 def save_as_mofile(self, fpath):
646 547 """
647 Save the binary representation of the file to *fpath*.
548 Saves the binary representation of the file to given ``fpath``.
648 549
649 **Keyword arguments**:
650 - *fpath*: string, full or relative path to the file.
550 Keyword argument:
551
552 ``fpath``
553 string, full or relative path to the mo file.
651 554 """
652 555 _BaseFile.save(self, fpath, 'to_binary')
653 556
654 557 def percent_translated(self):
655 558 """
656 Convenience method that return the percentage of translated
559 Convenience method that returns the percentage of translated
657 560 messages.
658
659 **Example**:
660
661 >>> import polib
662 >>> po = polib.pofile('tests/test_pofile_helpers.po')
663 >>> po.percent_translated()
664 50
665 >>> po = POFile()
666 >>> po.percent_translated()
667 100
668 561 """
669 562 total = len([e for e in self if not e.obsolete])
670 563 if total == 0:
671 564 return 100
672 565 translated = len(self.translated_entries())
673 566 return int((100.00 / float(total)) * translated)
674 567
675 568 def translated_entries(self):
676 569 """
677 Convenience method that return a list of translated entries.
678
679 **Example**:
680
681 >>> import polib
682 >>> po = polib.pofile('tests/test_pofile_helpers.po')
683 >>> len(po.translated_entries())
684 6
570 Convenience method that returns the list of translated entries.
685 571 """
686 572 return [e for e in self if e.translated()]
687 573
688 574 def untranslated_entries(self):
689 575 """
690 Convenience method that return a list of untranslated entries.
691
692 **Example**:
693
694 >>> import polib
695 >>> po = polib.pofile('tests/test_pofile_helpers.po')
696 >>> len(po.untranslated_entries())
697 4
576 Convenience method that returns the list of untranslated entries.
698 577 """
699 578 return [e for e in self if not e.translated() and not e.obsolete \
700 579 and not 'fuzzy' in e.flags]
701 580
702 581 def fuzzy_entries(self):
703 582 """
704 Convenience method that return the list of 'fuzzy' entries.
705
706 **Example**:
707
708 >>> import polib
709 >>> po = polib.pofile('tests/test_pofile_helpers.po')
710 >>> len(po.fuzzy_entries())
711 2
583 Convenience method that returns the list of fuzzy entries.
712 584 """
713 585 return [e for e in self if 'fuzzy' in e.flags]
714 586
715 587 def obsolete_entries(self):
716 588 """
717 Convenience method that return the list of obsolete entries.
718
719 **Example**:
720
721 >>> import polib
722 >>> po = polib.pofile('tests/test_pofile_helpers.po')
723 >>> len(po.obsolete_entries())
724 4
589 Convenience method that returns the list of obsolete entries.
725 590 """
726 591 return [e for e in self if e.obsolete]
727 592
728 593 def merge(self, refpot):
729 594 """
730 XXX this could not work if encodings are different, needs thinking
731 and general refactoring of how polib handles encoding...
732
733 Convenience method that merge the current pofile with the pot file
595 Convenience method that merges the current pofile with the pot file
734 596 provided. It behaves exactly as the gettext msgmerge utility:
735 597
736 - comments of this file will be preserved, but extracted comments
737 and occurrences will be discarded
738 - any translations or comments in the file will be discarded,
739 however dot comments and file positions will be preserved
740
741 **Keyword argument**:
742 - *refpot*: object POFile, the reference catalog.
598 * comments of this file will be preserved, but extracted comments and
599 occurrences will be discarded;
600 * any translations or comments in the file will be discarded, however,
601 dot comments and file positions will be preserved;
602 * the fuzzy flags are preserved.
743 603
744 **Example**:
604 Keyword argument:
745 605
746 >>> import polib
747 >>> refpot = polib.pofile('tests/test_merge.pot')
748 >>> po = polib.pofile('tests/test_merge_before.po')
749 >>> po.merge(refpot)
750 >>> expected_po = polib.pofile('tests/test_merge_after.po')
751 >>> unicode(po) == unicode(expected_po)
752 True
606 ``refpot``
607 object POFile, the reference catalog.
753 608 """
754 609 for entry in refpot:
755 e = self.find(entry.msgid)
610 e = self.find(entry.msgid, include_obsolete_entries=True)
756 611 if e is None:
757 612 e = POEntry()
758 613 self.append(e)
759 614 e.merge(entry)
760 # ok, now we must "obsolete" entries that are not in the refpot
761 # anymore
615 # ok, now we must "obsolete" entries that are not in the refpot anymore
762 616 for entry in self:
763 617 if refpot.find(entry.msgid) is None:
764 618 entry.obsolete = True
765 619
766 620 # }}}
767 621 # class MOFile {{{
768 622
769 623 class MOFile(_BaseFile):
770 '''
624 """
771 625 Mo file reader/writer.
772 MOFile objects inherit the list objects methods.
773
774 **Example**:
775
776 >>> mo = MOFile()
777 >>> entry1 = POEntry(
778 ... msgid="Some english text",
779 ... msgstr="Un texte en anglais"
780 ... )
781 >>> entry2 = POEntry(
782 ... msgid="I need my dirty cheese",
783 ... msgstr="Je veux mon sale fromage"
784 ... )
785 >>> entry3 = MOEntry(
786 ... msgid='Some entry with quotes " \\"',
787 ... msgstr='Un message unicode avec des quotes " \\"'
788 ... )
789 >>> mo.append(entry1)
790 >>> mo.append(entry2)
791 >>> mo.append(entry3)
792 >>> print(mo)
793 msgid ""
794 msgstr ""
795 <BLANKLINE>
796 msgid "Some english text"
797 msgstr "Un texte en anglais"
798 <BLANKLINE>
799 msgid "I need my dirty cheese"
800 msgstr "Je veux mon sale fromage"
801 <BLANKLINE>
802 msgid "Some entry with quotes \\" \\""
803 msgstr "Un message unicode avec des quotes \\" \\""
804 <BLANKLINE>
805 '''
626 This class inherits the :class:`~polib._BaseFile` class and, by
627 extension, the python ``list`` type.
628 """
629 BIG_ENDIAN = 0xde120495
630 LITTLE_ENDIAN = 0x950412de
806 631
807 632 def __init__(self, *args, **kwargs):
808 633 """
809 MOFile constructor. Mo files have two other properties:
810 - magic_number: the magic_number of the binary file,
811 - version: the version of the mo spec.
634 Constructor, accepts all keywords arguments accepted by
635 :class:`~polib._BaseFile` class.
812 636 """
813 637 _BaseFile.__init__(self, *args, **kwargs)
814 638 self.magic_number = None
815 639 self.version = 0
816 640
817 641 def save_as_pofile(self, fpath):
818 642 """
819 Save the string representation of the file to *fpath*.
643 Saves the mofile as a pofile to ``fpath``.
820 644
821 **Keyword argument**:
822 - *fpath*: string, full or relative path to the file.
645 Keyword argument:
646
647 ``fpath``
648 string, full or relative path to the file.
823 649 """
824 650 _BaseFile.save(self, fpath)
825 651
826 def save(self, fpath):
652 def save(self, fpath=None):
827 653 """
828 Save the binary representation of the file to *fpath*.
654 Saves the mofile to ``fpath``.
829 655
830 **Keyword argument**:
831 - *fpath*: string, full or relative path to the file.
656 Keyword argument:
657
658 ``fpath``
659 string, full or relative path to the file.
832 660 """
833 661 _BaseFile.save(self, fpath, 'to_binary')
834 662
835 663 def percent_translated(self):
836 664 """
837 665 Convenience method to keep the same interface with POFile instances.
838 666 """
839 667 return 100
840 668
841 669 def translated_entries(self):
842 670 """
843 671 Convenience method to keep the same interface with POFile instances.
844 672 """
845 673 return self
846 674
847 675 def untranslated_entries(self):
848 676 """
849 677 Convenience method to keep the same interface with POFile instances.
850 678 """
851 679 return []
852 680
853 681 def fuzzy_entries(self):
854 682 """
855 683 Convenience method to keep the same interface with POFile instances.
856 684 """
857 685 return []
858 686
859 687 def obsolete_entries(self):
860 688 """
861 689 Convenience method to keep the same interface with POFile instances.
862 690 """
863 691 return []
864 692
865 693 # }}}
866 694 # class _BaseEntry {{{
867 695
868 696 class _BaseEntry(object):
869 697 """
870 Base class for POEntry or MOEntry objects.
871 This class must *not* be instanciated directly.
698 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
699 This class should **not** be instanciated directly.
872 700 """
873 701
874 702 def __init__(self, *args, **kwargs):
875 """Base Entry constructor."""
703 """
704 Constructor, accepts the following keyword arguments:
705
706 ``msgid``
707 string, the entry msgid.
708
709 ``msgstr``
710 string, the entry msgstr.
711
712 ``msgid_plural``
713 string, the entry msgid_plural.
714
715 ``msgstr_plural``
716 list, the entry msgstr_plural lines.
717
718 ``msgctxt``
719 string, the entry context (msgctxt).
720
721 ``obsolete``
722 bool, whether the entry is "obsolete" or not.
723
724 ``encoding``
725 string, the encoding to use, defaults to ``default_encoding``
726 global variable (optional).
727 """
876 728 self.msgid = kwargs.get('msgid', '')
877 729 self.msgstr = kwargs.get('msgstr', '')
878 730 self.msgid_plural = kwargs.get('msgid_plural', '')
879 731 self.msgstr_plural = kwargs.get('msgstr_plural', {})
732 self.msgctxt = kwargs.get('msgctxt', None)
880 733 self.obsolete = kwargs.get('obsolete', False)
881 734 self.encoding = kwargs.get('encoding', default_encoding)
882 self.msgctxt = kwargs.get('msgctxt', None)
883 self._multiline_str = {}
884 735
885 def __repr__(self):
886 """Return the official string representation of the object."""
887 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
888
889 def __str__(self, wrapwidth=78):
736 def __unicode__(self, wrapwidth=78):
890 737 """
891 Common string representation of the POEntry and MOEntry
892 objects.
738 Returns the unicode representation of the entry.
893 739 """
894 740 if self.obsolete:
895 741 delflag = '#~ '
896 742 else:
897 743 delflag = ''
898 744 ret = []
899 745 # write the msgctxt if any
900 746 if self.msgctxt is not None:
901 ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
747 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
902 748 # write the msgid
903 ret += self._str_field("msgid", delflag, "", self.msgid)
749 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
904 750 # write the msgid_plural if any
905 751 if self.msgid_plural:
906 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
752 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
907 753 if self.msgstr_plural:
908 754 # write the msgstr_plural if any
909 755 msgstrs = self.msgstr_plural
910 756 keys = list(msgstrs)
911 757 keys.sort()
912 758 for index in keys:
913 759 msgstr = msgstrs[index]
914 760 plural_index = '[%s]' % index
915 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
761 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
916 762 else:
917 763 # otherwise write the msgstr
918 ret += self._str_field("msgstr", delflag, "", self.msgstr)
764 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
919 765 ret.append('')
920 return '\n'.join(ret)
766 ret = '\n'.join(ret)
767
768 if type(ret) != types.UnicodeType:
769 return unicode(ret, self.encoding)
770 return ret
771
772 def __str__(self):
773 """
774 Returns the string representation of the entry.
775 """
776 return unicode(self).encode(self.encoding)
777
778 def __eq__(self, other):
779 return unicode(self) == unicode(other)
921 780
922 def _str_field(self, fieldname, delflag, plural_index, field):
923 if (fieldname + plural_index) in self._multiline_str:
924 field = self._multiline_str[fieldname + plural_index]
925 lines = [''] + field.split('__POLIB__NL__')
781 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
782 lines = field.splitlines(True)
783 if len(lines) > 1:
784 lines = [''] + lines # start with initial empty line
926 785 else:
927 lines = field.splitlines(True)
928 if len(lines) > 1:
929 lines = ['']+lines # start with initial empty line
786 escaped_field = escape(field)
787 specialchars_count = 0
788 for c in ['\\', '\n', '\r', '\t', '"']:
789 specialchars_count += field.count(c)
790 # comparison must take into account fieldname length + one space
791 # + 2 quotes (eg. msgid "<string>")
792 flength = len(fieldname) + 3
793 if plural_index:
794 flength += len(plural_index)
795 real_wrapwidth = wrapwidth - flength + specialchars_count
796 if wrapwidth > 0 and len(field) > real_wrapwidth:
797 # Wrap the line but take field name into account
798 lines = [''] + [unescape(item) for item in wrap(
799 escaped_field,
800 wrapwidth - 2, # 2 for quotes ""
801 drop_whitespace=False,
802 break_long_words=False
803 )]
930 804 else:
931 lines = [field] # needed for the empty string case
805 lines = [field]
932 806 if fieldname.startswith('previous_'):
933 807 # quick and dirty trick to get the real field name
934 808 fieldname = fieldname[9:]
935 809
936 810 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
937 811 escape(lines.pop(0)))]
938 812 for mstr in lines:
939 813 ret.append('%s"%s"' % (delflag, escape(mstr)))
940 814 return ret
941 815
942 816 # }}}
943 817 # class POEntry {{{
944 818
945 819 class POEntry(_BaseEntry):
946 820 """
947 821 Represents a po file entry.
948
949 **Examples**:
950
951 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
952 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
953 >>> print(entry)
954 #: welcome.py:12 anotherfile.py:34
955 msgid "Welcome"
956 msgstr "Bienvenue"
957 <BLANKLINE>
958 >>> entry = POEntry()
959 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
960 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
961 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
962 >>> entry.flags.append('c-format')
963 >>> entry.previous_msgctxt = '@somecontext'
964 >>> entry.previous_msgid = 'I had eggs but no spam !'
965 >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
966 >>> entry.msgctxt = '@somenewcontext'
967 >>> entry.msgid = 'I have spam but no egg !'
968 >>> entry.msgid_plural = 'I have spam and %d eggs !'
969 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
970 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
971 >>> print(entry)
972 #. A plural translation. This is a very very very long line please do not
973 #. wrap, this is just for testing comment wrapping...
974 # A plural translation. This is a very very very long line please do not wrap,
975 # this is just for testing comment wrapping...
976 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
977 #: src/eggs.c:45
978 #, c-format
979 #| msgctxt "@somecontext"
980 #| msgid "I had eggs but no spam !"
981 #| msgid_plural "I had eggs and %d spam !"
982 msgctxt "@somenewcontext"
983 msgid "I have spam but no egg !"
984 msgid_plural "I have spam and %d eggs !"
985 msgstr[0] "J'ai du jambon mais aucun oeuf !"
986 msgstr[1] "J'ai du jambon et %d oeufs !"
987 <BLANKLINE>
988 822 """
989 823
990 824 def __init__(self, *args, **kwargs):
991 """POEntry constructor."""
825 """
826 Constructor, accepts the following keyword arguments:
827
828 ``comment``
829 string, the entry comment.
830
831 ``tcomment``
832 string, the entry translator comment.
833
834 ``occurrences``
835 list, the entry occurrences.
836
837 ``flags``
838 list, the entry flags.
839
840 ``previous_msgctxt``
841 string, the entry previous context.
842
843 ``previous_msgid``
844 string, the entry previous msgid.
845
846 ``previous_msgid_plural``
847 string, the entry previous msgid_plural.
848 """
992 849 _BaseEntry.__init__(self, *args, **kwargs)
993 850 self.comment = kwargs.get('comment', '')
994 851 self.tcomment = kwargs.get('tcomment', '')
995 852 self.occurrences = kwargs.get('occurrences', [])
996 853 self.flags = kwargs.get('flags', [])
997 854 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
998 855 self.previous_msgid = kwargs.get('previous_msgid', None)
999 856 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
1000 857
1001 def __str__(self, wrapwidth=78):
858 def __unicode__(self, wrapwidth=78):
1002 859 """
1003 Return the string representation of the entry.
860 Returns the unicode representation of the entry.
1004 861 """
1005 862 if self.obsolete:
1006 return _BaseEntry.__str__(self)
863 return _BaseEntry.__unicode__(self, wrapwidth)
864
1007 865 ret = []
1008 # comment first, if any (with text wrapping as xgettext does)
1009 if self.comment != '':
1010 for comment in self.comment.split('\n'):
1011 if wrapwidth > 0 and len(comment) > wrapwidth-3:
1012 ret += textwrap.wrap(comment, wrapwidth,
1013 initial_indent='#. ',
1014 subsequent_indent='#. ',
1015 break_long_words=False)
1016 else:
1017 ret.append('#. %s' % comment)
1018 # translator comment, if any (with text wrapping as xgettext does)
1019 if self.tcomment != '':
1020 for tcomment in self.tcomment.split('\n'):
1021 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
1022 ret += textwrap.wrap(tcomment, wrapwidth,
1023 initial_indent='# ',
1024 subsequent_indent='# ',
1025 break_long_words=False)
1026 else:
1027 ret.append('# %s' % tcomment)
866 # comments first, if any (with text wrapping as xgettext does)
867 comments = [('comment', '#. '), ('tcomment', '# ')]
868 for c in comments:
869 val = getattr(self, c[0])
870 if val:
871 for comment in val.split('\n'):
872 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
873 ret += wrap(
874 comment,
875 wrapwidth,
876 initial_indent=c[1],
877 subsequent_indent=c[1],
878 break_long_words=False
879 )
880 else:
881 ret.append('%s%s' % (c[1], comment))
882
1028 883 # occurrences (with text wrapping as xgettext does)
1029 884 if self.occurrences:
1030 885 filelist = []
1031 886 for fpath, lineno in self.occurrences:
1032 887 if lineno:
1033 888 filelist.append('%s:%s' % (fpath, lineno))
1034 889 else:
1035 890 filelist.append(fpath)
1036 891 filestr = ' '.join(filelist)
1037 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
1038 # XXX textwrap split words that contain hyphen, this is not
892 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
893 # textwrap split words that contain hyphen, this is not
1039 894 # what we want for filenames, so the dirty hack is to
1040 895 # temporally replace hyphens with a char that a file cannot
1041 896 # contain, like "*"
1042 lines = textwrap.wrap(filestr.replace('-', '*'),
1043 wrapwidth,
1044 initial_indent='#: ',
1045 subsequent_indent='#: ',
1046 break_long_words=False)
1047 # end of the replace hack
1048 for line in lines:
1049 ret.append(line.replace('*', '-'))
897 ret += [l.replace('*', '-') for l in wrap(
898 filestr.replace('-', '*'),
899 wrapwidth,
900 initial_indent='#: ',
901 subsequent_indent='#: ',
902 break_long_words=False
903 )]
1050 904 else:
1051 ret.append('#: '+filestr)
1052 # flags
905 ret.append('#: ' + filestr)
906
907 # flags (TODO: wrapping ?)
1053 908 if self.flags:
1054 flags = []
1055 for flag in self.flags:
1056 flags.append(flag)
1057 ret.append('#, %s' % ', '.join(flags))
909 ret.append('#, %s' % ', '.join(self.flags))
1058 910
1059 911 # previous context and previous msgid/msgid_plural
1060 if self.previous_msgctxt:
1061 ret += self._str_field("previous_msgctxt", "#| ", "",
1062 self.previous_msgctxt)
1063 if self.previous_msgid:
1064 ret += self._str_field("previous_msgid", "#| ", "",
1065 self.previous_msgid)
1066 if self.previous_msgid_plural:
1067 ret += self._str_field("previous_msgid_plural", "#| ", "",
1068 self.previous_msgid_plural)
912 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
913 for f in fields:
914 val = getattr(self, f)
915 if val:
916 ret += self._str_field(f, "#| ", "", val, wrapwidth)
1069 917
1070 ret.append(_BaseEntry.__str__(self))
1071 return '\n'.join(ret)
918 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
919 ret = '\n'.join(ret)
920
921 if type(ret) != types.UnicodeType:
922 return unicode(ret, self.encoding)
923 return ret
1072 924
1073 925 def __cmp__(self, other):
1074 '''
926 """
1075 927 Called by comparison operations if rich comparison is not defined.
1076
1077 **Tests**:
1078 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
1079 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
1080 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
1081 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
1082 >>> po = POFile()
1083 >>> po.append(a)
1084 >>> po.append(b)
1085 >>> po.append(c1)
1086 >>> po.append(c2)
1087 >>> po.sort()
1088 >>> print(po)
1089 #
1090 msgid ""
1091 msgstr ""
1092 <BLANKLINE>
1093 #: a.py:1 a.py:3
1094 msgid "c2"
1095 msgstr ""
1096 <BLANKLINE>
1097 #: a.py:1 b.py:1
1098 msgid "c1"
1099 msgstr ""
1100 <BLANKLINE>
1101 #: b.py:1 b.py:3
1102 msgid "a"
1103 msgstr ""
1104 <BLANKLINE>
1105 #: b.py:1 b.py:3
1106 msgid "b"
1107 msgstr ""
1108 <BLANKLINE>
1109 '''
928 """
1110 929 def compare_occurrences(a, b):
1111 930 """
1112 931 Compare an entry occurrence with another one.
1113 932 """
1114 933 if a[0] != b[0]:
1115 934 return a[0] < b[0]
1116 935 if a[1] != b[1]:
1117 936 return a[1] < b[1]
1118 937 return 0
1119 938
1120 939 # First: Obsolete test
1121 940 if self.obsolete != other.obsolete:
1122 941 if self.obsolete:
1123 942 return -1
1124 943 else:
1125 944 return 1
1126 945 # Work on a copy to protect original
1127 946 occ1 = self.occurrences[:]
1128 947 occ2 = other.occurrences[:]
1129 948 # Sorting using compare method
1130 949 occ1.sort(compare_occurrences)
1131 950 occ2.sort(compare_occurrences)
1132 951 # Comparing sorted occurrences
1133 952 pos = 0
1134 953 for entry1 in occ1:
1135 954 try:
1136 955 entry2 = occ2[pos]
1137 956 except IndexError:
1138 957 return 1
1139 958 pos = pos + 1
1140 959 if entry1[0] != entry2[0]:
1141 960 if entry1[0] > entry2[0]:
1142 961 return 1
1143 962 else:
1144 963 return -1
1145 964 if entry1[1] != entry2[1]:
1146 965 if entry1[1] > entry2[1]:
1147 966 return 1
1148 967 else:
1149 968 return -1
1150 969 # Finally: Compare message ID
1151 970 if self.msgid > other.msgid: return 1
1152 971 else: return -1
1153 972
1154 973 def translated(self):
1155 974 """
1156 Return True if the entry has been translated or False.
975 Returns ``True`` if the entry has been translated or ``False``
976 otherwise.
1157 977 """
1158 978 if self.obsolete or 'fuzzy' in self.flags:
1159 979 return False
1160 980 if self.msgstr != '':
1161 981 return True
1162 982 if self.msgstr_plural:
1163 983 for pos in self.msgstr_plural:
1164 984 if self.msgstr_plural[pos] == '':
1165 985 return False
1166 986 return True
1167 987 return False
1168 988
1169 989 def merge(self, other):
1170 990 """
1171 991 Merge the current entry with the given pot entry.
1172 992 """
1173 self.msgid = other.msgid
1174 self.occurrences = other.occurrences
1175 self.comment = other.comment
1176 self.flags = other.flags
993 self.msgid = other.msgid
994 self.msgctxt = other.msgctxt
995 self.occurrences = other.occurrences
996 self.comment = other.comment
997 fuzzy = 'fuzzy' in self.flags
998 self.flags = other.flags[:] # clone flags
999 if fuzzy:
1000 self.flags.append('fuzzy')
1177 1001 self.msgid_plural = other.msgid_plural
1002 self.obsolete = other.obsolete
1003 self.previous_msgctxt = other.previous_msgctxt
1004 self.previous_msgid = other.previous_msgid
1005 self.previous_msgid_plural = other.previous_msgid_plural
1178 1006 if other.msgstr_plural:
1179 1007 for pos in other.msgstr_plural:
1180 1008 try:
1181 1009 # keep existing translation at pos if any
1182 1010 self.msgstr_plural[pos]
1183 1011 except KeyError:
1184 1012 self.msgstr_plural[pos] = ''
1185 1013
1186 1014 # }}}
1187 1015 # class MOEntry {{{
1188 1016
1189 1017 class MOEntry(_BaseEntry):
1190 1018 """
1191 1019 Represents a mo file entry.
1192
1193 **Examples**:
1194
1195 >>> entry = MOEntry()
1196 >>> entry.msgid = 'translate me !'
1197 >>> entry.msgstr = 'traduisez moi !'
1198 >>> print(entry)
1199 msgid "translate me !"
1200 msgstr "traduisez moi !"
1201 <BLANKLINE>
1202 1020 """
1203
1204 def __str__(self, wrapwidth=78):
1205 """
1206 Return the string representation of the entry.
1207 """
1208 return _BaseEntry.__str__(self, wrapwidth)
1021 pass
1209 1022
1210 1023 # }}}
1211 1024 # class _POFileParser {{{
1212 1025
1213 1026 class _POFileParser(object):
1214 1027 """
1215 1028 A finite state machine to parse efficiently and correctly po
1216 1029 file format.
1217 1030 """
1218 1031
1219 def __init__(self, fpath, *args, **kwargs):
1032 def __init__(self, pofile, *args, **kwargs):
1220 1033 """
1221 1034 Constructor.
1222 1035
1223 **Arguments**:
1224 - *fpath*: string, path to the po file
1225 - *encoding*: string, the encoding to use, defaults to
1226 "default_encoding" global variable (optional),
1227 - *check_for_duplicates*: whether to check for duplicate entries
1228 when adding entries to the file, default: False (optional).
1036 Keyword arguments:
1037
1038 ``pofile``
1039 string, path to the po file or its content
1040
1041 ``encoding``
1042 string, the encoding to use, defaults to ``default_encoding``
1043 global variable (optional).
1044
1045 ``check_for_duplicates``
1046 whether to check for duplicate entries when adding entries to the
1047 file (optional, default: ``False``).
1229 1048 """
1230 1049 enc = kwargs.get('encoding', default_encoding)
1231 check_dup = kwargs.get('check_for_duplicates', False)
1232 try:
1233 self.fhandle = codecs.open(fpath, 'rU', enc)
1234 except LookupError:
1235 enc = default_encoding
1236 self.fhandle = codecs.open(fpath, 'rU', enc)
1050 if os.path.exists(pofile):
1051 try:
1052 self.fhandle = codecs.open(pofile, 'rU', enc)
1053 except LookupError:
1054 enc = default_encoding
1055 self.fhandle = codecs.open(pofile, 'rU', enc)
1056 else:
1057 self.fhandle = pofile.splitlines()
1058
1237 1059 self.instance = POFile(
1238 fpath=fpath,
1060 pofile=pofile,
1239 1061 encoding=enc,
1240 check_for_duplicates=check_dup
1062 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1241 1063 )
1242 1064 self.transitions = {}
1243 1065 self.current_entry = POEntry()
1244 1066 self.current_state = 'ST'
1245 1067 self.current_token = None
1246 1068 # two memo flags used in handlers
1247 1069 self.msgstr_index = 0
1248 1070 self.entry_obsolete = 0
1249 1071 # Configure the state machine, by adding transitions.
1250 1072 # Signification of symbols:
1251 1073 # * ST: Beginning of the file (start)
1252 1074 # * HE: Header
1253 1075 # * TC: a translation comment
1254 1076 # * GC: a generated comment
1255 1077 # * OC: a file/line occurence
1256 1078 # * FL: a flags line
1257 1079 # * CT: a message context
1258 1080 # * PC: a previous msgctxt
1259 1081 # * PM: a previous msgid
1260 1082 # * PP: a previous msgid_plural
1261 1083 # * MI: a msgid
1262 1084 # * MP: a msgid plural
1263 1085 # * MS: a msgstr
1264 1086 # * MX: a msgstr plural
1265 1087 # * MC: a msgid or msgstr continuation line
1266 1088 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1267 1089 'MS', 'MP', 'MX', 'MI']
1268 1090
1269 1091 self.add('TC', ['ST', 'HE'], 'HE')
1270 1092 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1271 1093 'MP', 'MX', 'MI'], 'TC')
1272 1094 self.add('GC', all, 'GC')
1273 1095 self.add('OC', all, 'OC')
1274 1096 self.add('FL', all, 'FL')
1275 1097 self.add('PC', all, 'PC')
1276 1098 self.add('PM', all, 'PM')
1277 1099 self.add('PP', all, 'PP')
1278 1100 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1279 1101 'PP', 'MS', 'MX'], 'CT')
1280 1102 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1281 1103 'PM', 'PP', 'MS', 'MX'], 'MI')
1282 1104 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1283 1105 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1284 1106 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1285 1107 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1286 1108
1287 1109 def parse(self):
1288 1110 """
1289 1111 Run the state machine, parse the file line by line and call process()
1290 1112 with the current matched symbol.
1291 1113 """
1292 i, lastlen = 1, 0
1114 i = 0
1115
1116 keywords = {
1117 'msgctxt': 'CT',
1118 'msgid': 'MI',
1119 'msgstr': 'MS',
1120 'msgid_plural': 'MP',
1121 }
1122 prev_keywords = {
1123 'msgid_plural': 'PP',
1124 'msgid': 'PM',
1125 'msgctxt': 'PC',
1126 }
1127
1293 1128 for line in self.fhandle:
1129 i += 1
1294 1130 line = line.strip()
1295 1131 if line == '':
1296 i = i+1
1297 1132 continue
1298 if line[:3] == '#~ ':
1299 line = line[3:]
1133
1134 tokens = line.split(None, 2)
1135 nb_tokens = len(tokens)
1136
1137 if tokens[0] == '#~' and nb_tokens > 1:
1138 line = line[3:].strip()
1139 tokens = tokens[1:]
1140 nb_tokens -= 1
1300 1141 self.entry_obsolete = 1
1301 1142 else:
1302 1143 self.entry_obsolete = 0
1144
1145 # Take care of keywords like
1146 # msgid, msgid_plural, msgctxt & msgstr.
1147 if tokens[0] in keywords and nb_tokens > 1:
1148 line = line[len(tokens[0]):].lstrip()
1149 self.current_token = line
1150 self.process(keywords[tokens[0]], i)
1151 continue
1152
1303 1153 self.current_token = line
1304 if line[:2] == '#:':
1154
1155 if tokens[0] == '#:' and nb_tokens > 1:
1305 1156 # we are on a occurrences line
1306 1157 self.process('OC', i)
1307 elif line[:9] == 'msgctxt "':
1308 # we are on a msgctxt
1309 self.process('CT', i)
1310 elif line[:7] == 'msgid "':
1311 # we are on a msgid
1312 self.process('MI', i)
1313 elif line[:8] == 'msgstr "':
1314 # we are on a msgstr
1315 self.process('MS', i)
1316 elif line[:1] == '"' or line[:4] == '#| "':
1317 # we are on a continuation line or some metadata
1158
1159 elif line[:1] == '"':
1160 # we are on a continuation line
1318 1161 self.process('MC', i)
1319 elif line[:14] == 'msgid_plural "':
1320 # we are on a msgid plural
1321 self.process('MP', i)
1162
1322 1163 elif line[:7] == 'msgstr[':
1323 1164 # we are on a msgstr plural
1324 1165 self.process('MX', i)
1325 elif line[:3] == '#, ':
1166
1167 elif tokens[0] == '#,' and nb_tokens > 1:
1326 1168 # we are on a flags line
1327 1169 self.process('FL', i)
1328 elif line[:2] == '# ' or line == '#':
1329 if line == '#': line = line + ' '
1170
1171 elif tokens[0] == '#':
1172 if line == '#': line += ' '
1330 1173 # we are on a translator comment line
1331 1174 self.process('TC', i)
1332 elif line[:2] == '#.':
1175
1176 elif tokens[0] == '#.' and nb_tokens > 1:
1333 1177 # we are on a generated comment line
1334 1178 self.process('GC', i)
1335 elif line[:15] == '#| msgid_plural':
1336 # we are on a previous msgid_plural
1337 self.process('PP', i)
1338 elif line[:8] == '#| msgid':
1339 self.process('PM', i)
1340 # we are on a previous msgid
1341 elif line[:10] == '#| msgctxt':
1342 # we are on a previous msgctxt
1343 self.process('PC', i)
1344 i = i+1
1179
1180 elif tokens[0] == '#|':
1181 if nb_tokens < 2:
1182 self.process('??', i)
1183 continue
1184
1185 # Remove the marker and any whitespace right after that.
1186 line = line[2:].lstrip()
1187 self.current_token = line
1188
1189 if tokens[1].startswith('"'):
1190 # Continuation of previous metadata.
1191 self.process('MC', i)
1192 continue
1193
1194 if nb_tokens == 2:
1195 # Invalid continuation line.
1196 self.process('??', i)
1197
1198 # we are on a "previous translation" comment line,
1199 if tokens[1] not in prev_keywords:
1200 # Unknown keyword in previous translation comment.
1201 self.process('??', i)
1202
1203 # Remove the keyword and any whitespace
1204 # between it and the starting quote.
1205 line = line[len(tokens[1]):].lstrip()
1206 self.current_token = line
1207 self.process(prev_keywords[tokens[1]], i)
1208
1209 else:
1210 self.process('??', i)
1345 1211
1346 1212 if self.current_entry:
1347 1213 # since entries are added when another entry is found, we must add
1348 1214 # the last entry here (only if there are lines)
1349 1215 self.instance.append(self.current_entry)
1350 1216 # before returning the instance, check if there's metadata and if
1351 1217 # so extract it in a dict
1352 1218 firstentry = self.instance[0]
1353 1219 if firstentry.msgid == '': # metadata found
1354 1220 # remove the entry
1355 1221 firstentry = self.instance.pop(0)
1356 1222 self.instance.metadata_is_fuzzy = firstentry.flags
1357 1223 key = None
1358 1224 for msg in firstentry.msgstr.splitlines():
1359 1225 try:
1360 1226 key, val = msg.split(':', 1)
1361 1227 self.instance.metadata[key] = val.strip()
1362 1228 except:
1363 1229 if key is not None:
1364 1230 self.instance.metadata[key] += '\n'+ msg.strip()
1365 1231 # close opened file
1366 self.fhandle.close()
1232 if isinstance(self.fhandle, file):
1233 self.fhandle.close()
1367 1234 return self.instance
1368 1235
1369 1236 def add(self, symbol, states, next_state):
1370 1237 """
1371 1238 Add a transition to the state machine.
1239
1372 1240 Keywords arguments:
1373 1241
1374 symbol -- string, the matched token (two chars symbol)
1375 states -- list, a list of states (two chars symbols)
1376 next_state -- the next state the fsm will have after the action
1242 ``symbol``
1243 string, the matched token (two chars symbol).
1244
1245 ``states``
1246 list, a list of states (two chars symbols).
1247
1248 ``next_state``
1249 the next state the fsm will have after the action.
1377 1250 """
1378 1251 for state in states:
1379 1252 action = getattr(self, 'handle_%s' % next_state.lower())
1380 1253 self.transitions[(symbol, state)] = (action, next_state)
1381 1254
1382 1255 def process(self, symbol, linenum):
1383 1256 """
1384 1257 Process the transition corresponding to the current state and the
1385 1258 symbol provided.
1386 1259
1387 1260 Keywords arguments:
1388 symbol -- string, the matched token (two chars symbol)
1389 linenum -- integer, the current line number of the parsed file
1261
1262 ``symbol``
1263 string, the matched token (two chars symbol).
1264
1265 ``linenum``
1266 integer, the current line number of the parsed file.
1390 1267 """
1391 1268 try:
1392 1269 (action, state) = self.transitions[(symbol, self.current_state)]
1393 1270 if action():
1394 1271 self.current_state = state
1395 1272 except Exception, exc:
1396 1273 raise IOError('Syntax error in po file (line %s)' % linenum)
1397 1274
1398 1275 # state handlers
1399 1276
1400 1277 def handle_he(self):
1401 1278 """Handle a header comment."""
1402 1279 if self.instance.header != '':
1403 1280 self.instance.header += '\n'
1404 1281 self.instance.header += self.current_token[2:]
1405 1282 return 1
1406 1283
1407 1284 def handle_tc(self):
1408 1285 """Handle a translator comment."""
1409 1286 if self.current_state in ['MC', 'MS', 'MX']:
1410 1287 self.instance.append(self.current_entry)
1411 1288 self.current_entry = POEntry()
1412 1289 if self.current_entry.tcomment != '':
1413 1290 self.current_entry.tcomment += '\n'
1414 1291 self.current_entry.tcomment += self.current_token[2:]
1415 1292 return True
1416 1293
1417 1294 def handle_gc(self):
1418 1295 """Handle a generated comment."""
1419 1296 if self.current_state in ['MC', 'MS', 'MX']:
1420 1297 self.instance.append(self.current_entry)
1421 1298 self.current_entry = POEntry()
1422 1299 if self.current_entry.comment != '':
1423 1300 self.current_entry.comment += '\n'
1424 1301 self.current_entry.comment += self.current_token[3:]
1425 1302 return True
1426 1303
1427 1304 def handle_oc(self):
1428 1305 """Handle a file:num occurence."""
1429 1306 if self.current_state in ['MC', 'MS', 'MX']:
1430 1307 self.instance.append(self.current_entry)
1431 1308 self.current_entry = POEntry()
1432 1309 occurrences = self.current_token[3:].split()
1433 1310 for occurrence in occurrences:
1434 1311 if occurrence != '':
1435 1312 try:
1436 1313 fil, line = occurrence.split(':')
1437 1314 if not line.isdigit():
1438 1315 fil = fil + line
1439 1316 line = ''
1440 1317 self.current_entry.occurrences.append((fil, line))
1441 1318 except:
1442 1319 self.current_entry.occurrences.append((occurrence, ''))
1443 1320 return True
1444 1321
1445 1322 def handle_fl(self):
1446 1323 """Handle a flags line."""
1447 1324 if self.current_state in ['MC', 'MS', 'MX']:
1448 1325 self.instance.append(self.current_entry)
1449 1326 self.current_entry = POEntry()
1450 1327 self.current_entry.flags += self.current_token[3:].split(', ')
1451 1328 return True
1452 1329
1453 1330 def handle_pp(self):
1454 1331 """Handle a previous msgid_plural line."""
1455 1332 if self.current_state in ['MC', 'MS', 'MX']:
1456 1333 self.instance.append(self.current_entry)
1457 1334 self.current_entry = POEntry()
1458 1335 self.current_entry.previous_msgid_plural = \
1459 unescape(self.current_token[17:-1])
1336 unescape(self.current_token[1:-1])
1460 1337 return True
1461 1338
1462 1339 def handle_pm(self):
1463 1340 """Handle a previous msgid line."""
1464 1341 if self.current_state in ['MC', 'MS', 'MX']:
1465 1342 self.instance.append(self.current_entry)
1466 1343 self.current_entry = POEntry()
1467 1344 self.current_entry.previous_msgid = \
1468 unescape(self.current_token[10:-1])
1345 unescape(self.current_token[1:-1])
1469 1346 return True
1470 1347
1471 1348 def handle_pc(self):
1472 1349 """Handle a previous msgctxt line."""
1473 1350 if self.current_state in ['MC', 'MS', 'MX']:
1474 1351 self.instance.append(self.current_entry)
1475 1352 self.current_entry = POEntry()
1476 1353 self.current_entry.previous_msgctxt = \
1477 unescape(self.current_token[12:-1])
1354 unescape(self.current_token[1:-1])
1478 1355 return True
1479 1356
1480 1357 def handle_ct(self):
1481 1358 """Handle a msgctxt."""
1482 1359 if self.current_state in ['MC', 'MS', 'MX']:
1483 1360 self.instance.append(self.current_entry)
1484 1361 self.current_entry = POEntry()
1485 self.current_entry.msgctxt = unescape(self.current_token[9:-1])
1362 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1486 1363 return True
1487 1364
1488 1365 def handle_mi(self):
1489 1366 """Handle a msgid."""
1490 1367 if self.current_state in ['MC', 'MS', 'MX']:
1491 1368 self.instance.append(self.current_entry)
1492 1369 self.current_entry = POEntry()
1493 1370 self.current_entry.obsolete = self.entry_obsolete
1494 self.current_entry.msgid = unescape(self.current_token[7:-1])
1371 self.current_entry.msgid = unescape(self.current_token[1:-1])
1495 1372 return True
1496 1373
1497 1374 def handle_mp(self):
1498 1375 """Handle a msgid plural."""
1499 self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
1376 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1500 1377 return True
1501 1378
1502 1379 def handle_ms(self):
1503 1380 """Handle a msgstr."""
1504 self.current_entry.msgstr = unescape(self.current_token[8:-1])
1381 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1505 1382 return True
1506 1383
1507 1384 def handle_mx(self):
1508 1385 """Handle a msgstr plural."""
1509 1386 index, value = self.current_token[7], self.current_token[11:-1]
1510 1387 self.current_entry.msgstr_plural[index] = unescape(value)
1511 1388 self.msgstr_index = index
1512 1389 return True
1513 1390
1514 1391 def handle_mc(self):
1515 1392 """Handle a msgid or msgstr continuation line."""
1516 1393 token = unescape(self.current_token[1:-1])
1517 1394 if self.current_state == 'CT':
1518 1395 typ = 'msgctxt'
1519 1396 self.current_entry.msgctxt += token
1520 1397 elif self.current_state == 'MI':
1521 1398 typ = 'msgid'
1522 1399 self.current_entry.msgid += token
1523 1400 elif self.current_state == 'MP':
1524 1401 typ = 'msgid_plural'
1525 1402 self.current_entry.msgid_plural += token
1526 1403 elif self.current_state == 'MS':
1527 1404 typ = 'msgstr'
1528 1405 self.current_entry.msgstr += token
1529 1406 elif self.current_state == 'MX':
1530 1407 typ = 'msgstr[%s]' % self.msgstr_index
1531 1408 self.current_entry.msgstr_plural[self.msgstr_index] += token
1532 1409 elif self.current_state == 'PP':
1533 1410 typ = 'previous_msgid_plural'
1534 1411 token = token[3:]
1535 1412 self.current_entry.previous_msgid_plural += token
1536 1413 elif self.current_state == 'PM':
1537 1414 typ = 'previous_msgid'
1538 1415 token = token[3:]
1539 1416 self.current_entry.previous_msgid += token
1540 1417 elif self.current_state == 'PC':
1541 1418 typ = 'previous_msgctxt'
1542 1419 token = token[3:]
1543 1420 self.current_entry.previous_msgctxt += token
1544 if typ not in self.current_entry._multiline_str:
1545 self.current_entry._multiline_str[typ] = token
1546 else:
1547 self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
1548 1421 # don't change the current state
1549 1422 return False
1550 1423
1551 1424 # }}}
1552 1425 # class _MOFileParser {{{
1553 1426
1554 1427 class _MOFileParser(object):
1555 1428 """
1556 1429 A class to parse binary mo files.
1557 1430 """
1558 BIG_ENDIAN = 0xde120495
1559 LITTLE_ENDIAN = 0x950412de
1560 1431
1561 def __init__(self, fpath, *args, **kwargs):
1432 def __init__(self, mofile, *args, **kwargs):
1562 1433 """
1563 1434 Constructor.
1564 1435
1565 **Arguments**:
1566 - *fpath*: string, path to the po file
1567 - *encoding*: string, the encoding to use, defaults to
1568 "default_encoding" global variable (optional),
1569 - *check_for_duplicates*: whether to check for duplicate entries
1570 when adding entries to the file, default: False (optional).
1436 Keyword arguments:
1437
1438 ``mofile``
1439 string, path to the mo file or its content
1440
1441 ``encoding``
1442 string, the encoding to use, defaults to ``default_encoding``
1443 global variable (optional).
1444
1445 ``check_for_duplicates``
1446 whether to check for duplicate entries when adding entries to the
1447 file (optional, default: ``False``).
1571 1448 """
1572 enc = kwargs.get('encoding', default_encoding)
1573 check_dup = kwargs.get('check_for_duplicates', False)
1574 self.fhandle = open(fpath, 'rb')
1449 self.fhandle = open(mofile, 'rb')
1575 1450 self.instance = MOFile(
1576 fpath=fpath,
1577 encoding=enc,
1578 check_for_duplicates=check_dup
1451 fpath=mofile,
1452 encoding=kwargs.get('encoding', default_encoding),
1453 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1579 1454 )
1580 1455
1581 def parse_magicnumber(self):
1582 """
1583 Parse the magic number and raise an exception if not valid.
1584 """
1585
1586 1456 def parse(self):
1587 1457 """
1588 1458 Build the instance with the file handle provided in the
1589 1459 constructor.
1590 1460 """
1461 # parse magic number
1591 1462 magic_number = self._readbinary('<I', 4)
1592 if magic_number == self.LITTLE_ENDIAN:
1463 if magic_number == MOFile.LITTLE_ENDIAN:
1593 1464 ii = '<II'
1594 elif magic_number == self.BIG_ENDIAN:
1465 elif magic_number == MOFile.BIG_ENDIAN:
1595 1466 ii = '>II'
1596 1467 else:
1597 1468 raise IOError('Invalid mo file, magic number is incorrect !')
1598 1469 self.instance.magic_number = magic_number
1599 1470 # parse the version number and the number of strings
1600 1471 self.instance.version, numofstrings = self._readbinary(ii, 8)
1601 1472 # original strings and translation strings hash table offset
1602 1473 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1603 1474 # move to msgid hash table and read length and offset of msgids
1604 1475 self.fhandle.seek(msgids_hash_offset)
1605 1476 msgids_index = []
1606 1477 for i in range(numofstrings):
1607 1478 msgids_index.append(self._readbinary(ii, 8))
1608 1479 # move to msgstr hash table and read length and offset of msgstrs
1609 1480 self.fhandle.seek(msgstrs_hash_offset)
1610 1481 msgstrs_index = []
1611 1482 for i in range(numofstrings):
1612 1483 msgstrs_index.append(self._readbinary(ii, 8))
1613 1484 # build entries
1614 1485 for i in range(numofstrings):
1615 1486 self.fhandle.seek(msgids_index[i][1])
1616 1487 msgid = self.fhandle.read(msgids_index[i][0])
1617 1488 self.fhandle.seek(msgstrs_index[i][1])
1618 1489 msgstr = self.fhandle.read(msgstrs_index[i][0])
1619 1490 if i == 0: # metadata
1620 1491 raw_metadata, metadata = msgstr.split('\n'), {}
1621 1492 for line in raw_metadata:
1622 1493 tokens = line.split(':', 1)
1623 1494 if tokens[0] != '':
1624 1495 try:
1625 1496 metadata[tokens[0]] = tokens[1].strip()
1626 1497 except IndexError:
1627 1498 metadata[tokens[0]] = ''
1628 1499 self.instance.metadata = metadata
1629 1500 continue
1630 1501 # test if we have a plural entry
1631 1502 msgid_tokens = msgid.split('\0')
1632 1503 if len(msgid_tokens) > 1:
1633 entry = MOEntry(
1504 entry = self._build_entry(
1634 1505 msgid=msgid_tokens[0],
1635 1506 msgid_plural=msgid_tokens[1],
1636 msgstr_plural=dict((k,v) for k,v in \
1637 enumerate(msgstr.split('\0')))
1507 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1638 1508 )
1639 1509 else:
1640 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1510 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1641 1511 self.instance.append(entry)
1642 1512 # close opened file
1643 1513 self.fhandle.close()
1644 1514 return self.instance
1515
1516 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1517 msgstr_plural=None):
1518 msgctxt_msgid = msgid.split('\x04')
1519 if len(msgctxt_msgid) > 1:
1520 kwargs = {
1521 'msgctxt': msgctxt_msgid[0],
1522 'msgid' : msgctxt_msgid[1],
1523 }
1524 else:
1525 kwargs = {'msgid': msgid}
1526 if msgstr:
1527 kwargs['msgstr'] = msgstr
1528 if msgid_plural:
1529 kwargs['msgid_plural'] = msgid_plural
1530 if msgstr_plural:
1531 kwargs['msgstr_plural'] = msgstr_plural
1532 return MOEntry(**kwargs)
1645 1533
1646 1534 def _readbinary(self, fmt, numbytes):
1647 1535 """
1648 1536 Private method that unpack n bytes of data using format <fmt>.
1649 1537 It returns a tuple or a mixed value if the tuple length is 1.
1650 1538 """
1651 1539 bytes = self.fhandle.read(numbytes)
1652 1540 tup = struct.unpack(fmt, bytes)
1653 1541 if len(tup) == 1:
1654 1542 return tup[0]
1655 1543 return tup
1656 1544
1657 1545 # }}}
1658 # __main__ {{{
1546 # class TextWrapper {{{
1659 1547
1660 if __name__ == '__main__':
1548 class TextWrapper(textwrap.TextWrapper):
1661 1549 """
1662 **Main function**::
1663 - to **test** the module just run: *python polib.py [-v]*
1664 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1550 Subclass of textwrap.TextWrapper that backport the
1551 drop_whitespace option.
1665 1552 """
1666 import sys
1667 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1668 def test(f):
1669 if f.endswith('po'):
1670 p = pofile(f)
1553 def __init__(self, *args, **kwargs):
1554 drop_whitespace = kwargs.pop('drop_whitespace', True)
1555 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1556 self.drop_whitespace = drop_whitespace
1557
1558 def _wrap_chunks(self, chunks):
1559 """_wrap_chunks(chunks : [string]) -> [string]
1560
1561 Wrap a sequence of text chunks and return a list of lines of
1562 length 'self.width' or less. (If 'break_long_words' is false,
1563 some lines may be longer than this.) Chunks correspond roughly
1564 to words and the whitespace between them: each chunk is
1565 indivisible (modulo 'break_long_words'), but a line break can
1566 come between any two chunks. Chunks should not have internal
1567 whitespace; ie. a chunk is either all whitespace or a "word".
1568 Whitespace chunks will be removed from the beginning and end of
1569 lines, but apart from that whitespace is preserved.
1570 """
1571 lines = []
1572 if self.width <= 0:
1573 raise ValueError("invalid width %r (must be > 0)" % self.width)
1574
1575 # Arrange in reverse order so items can be efficiently popped
1576 # from a stack of chucks.
1577 chunks.reverse()
1578
1579 while chunks:
1580
1581 # Start the list of chunks that will make up the current line.
1582 # cur_len is just the length of all the chunks in cur_line.
1583 cur_line = []
1584 cur_len = 0
1585
1586 # Figure out which static string will prefix this line.
1587 if lines:
1588 indent = self.subsequent_indent
1671 1589 else:
1672 p = mofile(f)
1673 s = unicode(p)
1674 import profile
1675 profile.run('test("'+sys.argv[2]+'")')
1676 else:
1677 import doctest
1678 doctest.testmod()
1590 indent = self.initial_indent
1591
1592 # Maximum width for this line.
1593 width = self.width - len(indent)
1594
1595 # First chunk on line is whitespace -- drop it, unless this
1596 # is the very beginning of the text (ie. no lines started yet).
1597 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1598 del chunks[-1]
1599
1600 while chunks:
1601 l = len(chunks[-1])
1602
1603 # Can at least squeeze this chunk onto the current line.
1604 if cur_len + l <= width:
1605 cur_line.append(chunks.pop())
1606 cur_len += l
1607
1608 # Nope, this line is full.
1609 else:
1610 break
1611
1612 # The current line is full, and the next chunk is too big to
1613 # fit on *any* line (not just this one).
1614 if chunks and len(chunks[-1]) > width:
1615 self._handle_long_word(chunks, cur_line, cur_len, width)
1616
1617 # If the last chunk on this line is all whitespace, drop it.
1618 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
1619 del cur_line[-1]
1620
1621 # Convert current line back to a string and store it in list
1622 # of all lines (return value).
1623 if cur_line:
1624 lines.append(indent + ''.join(cur_line))
1625
1626 return lines
1679 1627
1680 1628 # }}}
1629 # function wrap() {{{
1630
1631 def wrap(text, width=70, **kwargs):
1632 """
1633 Wrap a single paragraph of text, returning a list of wrapped lines.
1634 """
1635 if sys.version_info < (2, 6):
1636 return TextWrapper(width=width, **kwargs).wrap(text)
1637 return textwrap.wrap(text, width=width, **kwargs)
1638
1639 #}}}
General Comments 0
You need to be logged in to leave comments. Login now