##// END OF EJS Templates
polib: remove unnecessary comparisons with True...
Martin Geisler -
r13030:8ea51e9e default
parent child Browse files
Show More
@@ -1,1680 +1,1680 b''
1 1 # -*- coding: utf-8 -*-
2 2 # no-check-code
3 3 #
4 4 # License: MIT (see LICENSE file provided)
5 5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6 6
7 7 """
8 8 **polib** allows you to manipulate, create, modify gettext files (pot, po
9 9 and mo files). You can load existing files, iterate through it's entries,
10 10 add, modify entries, comments or metadata, etc... or create new po files
11 11 from scratch.
12 12
13 13 **polib** provides a simple and pythonic API, exporting only three
14 14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
15 15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
16 16 new files/entries.
17 17
18 18 **Basic example**:
19 19
20 20 >>> import polib
21 21 >>> # load an existing po file
22 22 >>> po = polib.pofile('tests/test_utf8.po')
23 23 >>> for entry in po:
24 24 ... # do something with entry...
25 25 ... pass
26 26 >>> # add an entry
27 27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
28 28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
29 29 >>> po.append(entry)
30 30 >>> # to save our modified po file:
31 31 >>> # po.save()
32 32 >>> # or you may want to compile the po file
33 33 >>> # po.save_as_mofile('tests/test_utf8.mo')
34 34 """
35 35
36 36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
37 37 __version__ = '0.5.2'
38 38 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
39 39 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
40 40
41 41 import codecs
42 42 import struct
43 43 import textwrap
44 44 import types
45 45 import re
46 46
47 47 default_encoding = 'utf-8'
48 48
49 49 # function pofile() {{{
50 50
51 51 def pofile(fpath, **kwargs):
52 52 """
53 53 Convenience function that parse the po/pot file *fpath* and return
54 54 a POFile instance.
55 55
56 56 **Keyword arguments**:
57 57 - *fpath*: string, full or relative path to the po/pot file to parse
58 58 - *wrapwidth*: integer, the wrap width, only useful when -w option was
59 59 passed to xgettext (optional, default to 78)
60 60 - *autodetect_encoding*: boolean, if set to False the function will
61 61 not try to detect the po file encoding (optional, default to True)
62 62 - *encoding*: string, an encoding, only relevant if autodetect_encoding
63 63 is set to False
64 64 - *check_for_duplicates*: whether to check for duplicate entries when
65 65 adding entries to the file, default: False (optional)
66 66
67 67 **Example**:
68 68
69 69 >>> import polib
70 70 >>> po = polib.pofile('tests/test_weird_occurrences.po',
71 71 ... check_for_duplicates=True)
72 72 >>> po #doctest: +ELLIPSIS
73 73 <POFile instance at ...>
74 74 >>> import os, tempfile
75 75 >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural',
76 76 ... 'msgstr_plural', 'obsolete', 'comment', 'tcomment',
77 77 ... 'occurrences', 'flags', 'previous_msgctxt',
78 78 ... 'previous_msgid', 'previous_msgid_plural')
79 79 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
80 80 ... orig_po = polib.pofile('tests/'+fname)
81 81 ... tmpf = tempfile.NamedTemporaryFile().name
82 82 ... orig_po.save(tmpf)
83 83 ... try:
84 84 ... new_po = polib.pofile(tmpf)
85 85 ... for old, new in zip(orig_po, new_po):
86 86 ... for attr in all_attrs:
87 87 ... if getattr(old, attr) != getattr(new, attr):
88 88 ... getattr(old, attr)
89 89 ... getattr(new, attr)
90 90 ... finally:
91 91 ... os.unlink(tmpf)
92 92 >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
93 93 >>> tmpf = tempfile.NamedTemporaryFile().name
94 94 >>> po_file.save_as_mofile(tmpf)
95 95 >>> try:
96 96 ... mo_file = polib.mofile(tmpf)
97 97 ... for old, new in zip(po_file, mo_file):
98 98 ... if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
99 99 ... 'OLD: ', po_file._encode(old.msgid)
100 100 ... 'NEW: ', mo_file._encode(new.msgid)
101 101 ... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
102 102 ... 'OLD: ', po_file._encode(old.msgstr)
103 103 ... 'NEW: ', mo_file._encode(new.msgstr)
104 104 ... print new.msgstr
105 105 ... finally:
106 106 ... os.unlink(tmpf)
107 107 """
108 if kwargs.get('autodetect_encoding', True) == True:
108 if kwargs.get('autodetect_encoding', True):
109 109 enc = detect_encoding(fpath)
110 110 else:
111 111 enc = kwargs.get('encoding', default_encoding)
112 112 check_for_duplicates = kwargs.get('check_for_duplicates', False)
113 113 parser = _POFileParser(
114 114 fpath,
115 115 encoding=enc,
116 116 check_for_duplicates=kwargs.get('check_for_duplicates', False)
117 117 )
118 118 instance = parser.parse()
119 119 instance.wrapwidth = kwargs.get('wrapwidth', 78)
120 120 return instance
121 121
122 122 # }}}
123 123 # function mofile() {{{
124 124
125 125 def mofile(fpath, **kwargs):
126 126 """
127 127 Convenience function that parse the mo file *fpath* and return
128 128 a MOFile instance.
129 129
130 130 **Keyword arguments**:
131 131 - *fpath*: string, full or relative path to the mo file to parse
132 132 - *wrapwidth*: integer, the wrap width, only useful when -w option was
133 133 passed to xgettext to generate the po file that was used to format
134 134 the mo file (optional, default to 78)
135 135 - *autodetect_encoding*: boolean, if set to False the function will
136 136 not try to detect the po file encoding (optional, default to True)
137 137 - *encoding*: string, an encoding, only relevant if autodetect_encoding
138 138 is set to False
139 139 - *check_for_duplicates*: whether to check for duplicate entries when
140 140 adding entries to the file, default: False (optional)
141 141
142 142 **Example**:
143 143
144 144 >>> import polib
145 145 >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
146 146 >>> mo #doctest: +ELLIPSIS
147 147 <MOFile instance at ...>
148 148 >>> import os, tempfile
149 149 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
150 150 ... orig_mo = polib.mofile('tests/'+fname)
151 151 ... tmpf = tempfile.NamedTemporaryFile().name
152 152 ... orig_mo.save(tmpf)
153 153 ... try:
154 154 ... new_mo = polib.mofile(tmpf)
155 155 ... for old, new in zip(orig_mo, new_mo):
156 156 ... if old.msgid != new.msgid:
157 157 ... old.msgstr
158 158 ... new.msgstr
159 159 ... finally:
160 160 ... os.unlink(tmpf)
161 161 """
162 if kwargs.get('autodetect_encoding', True) == True:
162 if kwargs.get('autodetect_encoding', True):
163 163 enc = detect_encoding(fpath, True)
164 164 else:
165 165 enc = kwargs.get('encoding', default_encoding)
166 166 parser = _MOFileParser(
167 167 fpath,
168 168 encoding=enc,
169 169 check_for_duplicates=kwargs.get('check_for_duplicates', False)
170 170 )
171 171 instance = parser.parse()
172 172 instance.wrapwidth = kwargs.get('wrapwidth', 78)
173 173 return instance
174 174
175 175 # }}}
176 176 # function detect_encoding() {{{
177 177
178 178 def detect_encoding(fpath, binary_mode=False):
179 179 """
180 180 Try to detect the encoding used by the file *fpath*. The function will
181 181 return polib default *encoding* if it's unable to detect it.
182 182
183 183 **Keyword argument**:
184 184 - *fpath*: string, full or relative path to the mo file to parse.
185 185
186 186 **Examples**:
187 187
188 188 >>> print(detect_encoding('tests/test_noencoding.po'))
189 189 utf-8
190 190 >>> print(detect_encoding('tests/test_utf8.po'))
191 191 UTF-8
192 192 >>> print(detect_encoding('tests/test_utf8.mo', True))
193 193 UTF-8
194 194 >>> print(detect_encoding('tests/test_iso-8859-15.po'))
195 195 ISO_8859-15
196 196 >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
197 197 ISO_8859-15
198 198 """
199 199 import re
200 200 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
201 201 if binary_mode:
202 202 mode = 'rb'
203 203 else:
204 204 mode = 'r'
205 205 f = open(fpath, mode)
206 206 for l in f.readlines():
207 207 match = rx.search(l)
208 208 if match:
209 209 f.close()
210 210 return match.group(1).strip()
211 211 f.close()
212 212 return default_encoding
213 213
214 214 # }}}
215 215 # function escape() {{{
216 216
217 217 def escape(st):
218 218 """
219 219 Escape special chars and return the given string *st*.
220 220
221 221 **Examples**:
222 222
223 223 >>> escape('\\t and \\n and \\r and " and \\\\')
224 224 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
225 225 """
226 226 return st.replace('\\', r'\\')\
227 227 .replace('\t', r'\t')\
228 228 .replace('\r', r'\r')\
229 229 .replace('\n', r'\n')\
230 230 .replace('\"', r'\"')
231 231
232 232 # }}}
233 233 # function unescape() {{{
234 234
235 235 def unescape(st):
236 236 """
237 237 Unescape special chars and return the given string *st*.
238 238
239 239 **Examples**:
240 240
241 241 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
242 242 '\\t and \\n and \\r and " and \\\\'
243 243 >>> unescape(r'\\n')
244 244 '\\n'
245 245 >>> unescape(r'\\\\n')
246 246 '\\\\n'
247 247 >>> unescape(r'\\\\n\\n')
248 248 '\\\\n\\n'
249 249 """
250 250 def unescape_repl(m):
251 251 m = m.group(1)
252 252 if m == 'n':
253 253 return '\n'
254 254 if m == 't':
255 255 return '\t'
256 256 if m == 'r':
257 257 return '\r'
258 258 if m == '\\':
259 259 return '\\'
260 260 return m # handles escaped double quote
261 261 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
262 262
263 263 # }}}
264 264 # class _BaseFile {{{
265 265
266 266 class _BaseFile(list):
267 267 """
268 268 Common parent class for POFile and MOFile classes.
269 269 This class must **not** be instanciated directly.
270 270 """
271 271
272 272 def __init__(self, *args, **kwargs):
273 273 """
274 274 Constructor.
275 275
276 276 **Keyword arguments**:
277 277 - *fpath*: string, path to po or mo file
278 278 - *wrapwidth*: integer, the wrap width, only useful when -w option
279 279 was passed to xgettext to generate the po file that was used to
280 280 format the mo file, default to 78 (optional),
281 281 - *encoding*: string, the encoding to use, defaults to
282 282 "default_encoding" global variable (optional),
283 283 - *check_for_duplicates*: whether to check for duplicate entries
284 284 when adding entries to the file, default: False (optional).
285 285 """
286 286 list.__init__(self)
287 287 # the opened file handle
288 288 self.fpath = kwargs.get('fpath')
289 289 # the width at which lines should be wrapped
290 290 self.wrapwidth = kwargs.get('wrapwidth', 78)
291 291 # the file encoding
292 292 self.encoding = kwargs.get('encoding', default_encoding)
293 293 # whether to check for duplicate entries or not
294 294 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
295 295 # header
296 296 self.header = ''
297 297 # both po and mo files have metadata
298 298 self.metadata = {}
299 299 self.metadata_is_fuzzy = 0
300 300
301 301 def __str__(self):
302 302 """
303 303 String representation of the file.
304 304 """
305 305 ret = []
306 306 entries = [self.metadata_as_entry()] + \
307 307 [e for e in self if not e.obsolete]
308 308 for entry in entries:
309 309 ret.append(entry.__str__(self.wrapwidth))
310 310 for entry in self.obsolete_entries():
311 311 ret.append(entry.__str__(self.wrapwidth))
312 312 return '\n'.join(ret)
313 313
314 314 def __contains__(self, entry):
315 315 """
316 316 Overriden method to implement the membership test (in and not in).
317 317 The method considers that an entry is in the file if it finds an
318 318 entry that has the same msgid (case sensitive).
319 319
320 320 **Keyword argument**:
321 321 - *entry*: an instance of polib._BaseEntry
322 322
323 323 **Tests**:
324 324 >>> po = POFile()
325 325 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
326 326 >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
327 327 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
328 328 >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
329 329 >>> po.append(e1)
330 330 >>> po.append(e2)
331 331 >>> e1 in po
332 332 True
333 333 >>> e2 not in po
334 334 False
335 335 >>> e3 in po
336 336 True
337 337 >>> e4 in po
338 338 False
339 339 """
340 340 return self.find(entry.msgid, by='msgid') is not None
341 341
342 342 def append(self, entry):
343 343 """
344 344 Overriden method to check for duplicates entries, if a user tries to
345 345 add an entry that already exists, the method will raise a ValueError
346 346 exception.
347 347
348 348 **Keyword argument**:
349 349 - *entry*: an instance of polib._BaseEntry
350 350
351 351 **Tests**:
352 352 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
353 353 >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
354 354 >>> po = POFile(check_for_duplicates=True)
355 355 >>> po.append(e1)
356 356 >>> try:
357 357 ... po.append(e2)
358 358 ... except ValueError, e:
359 359 ... unicode(e)
360 360 u'Entry "foobar" already exists'
361 361 """
362 362 if self.check_for_duplicates and entry in self:
363 363 raise ValueError('Entry "%s" already exists' % entry.msgid)
364 364 super(_BaseFile, self).append(entry)
365 365
366 366 def insert(self, index, entry):
367 367 """
368 368 Overriden method to check for duplicates entries, if a user tries to
369 369 insert an entry that already exists, the method will raise a ValueError
370 370 exception.
371 371
372 372 **Keyword arguments**:
373 373 - *index*: index at which the entry should be inserted
374 374 - *entry*: an instance of polib._BaseEntry
375 375
376 376 **Tests**:
377 377 >>> import polib
378 378 >>> polib.check_for_duplicates = True
379 379 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
380 380 >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
381 381 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
382 382 >>> po = POFile(check_for_duplicates=True)
383 383 >>> po.insert(0, e1)
384 384 >>> po.insert(1, e2)
385 385 >>> try:
386 386 ... po.insert(0, e3)
387 387 ... except ValueError, e:
388 388 ... unicode(e)
389 389 u'Entry "foobar" already exists'
390 390 """
391 391 if self.check_for_duplicates and entry in self:
392 392 raise ValueError('Entry "%s" already exists' % entry.msgid)
393 393 super(_BaseFile, self).insert(index, entry)
394 394
395 395 def __repr__(self):
396 396 """Return the official string representation of the object."""
397 397 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
398 398
399 399 def metadata_as_entry(self):
400 400 """
401 401 Return the metadata as an entry:
402 402
403 403 >>> import polib
404 404 >>> po = polib.pofile('tests/test_fuzzy_header.po')
405 405 >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
406 406 True
407 407 """
408 408 e = POEntry(msgid='')
409 409 mdata = self.ordered_metadata()
410 410 if mdata:
411 411 strs = []
412 412 e._multiline_str['msgstr'] = ''
413 413 for name, value in mdata:
414 414 # Strip whitespace off each line in a multi-line entry
415 415 strs.append('%s: %s' % (name, value))
416 416 e.msgstr = '\n'.join(strs) + '\n'
417 417 e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
418 418 [s + '\n' for s in strs])
419 419 if self.metadata_is_fuzzy:
420 420 e.flags.append('fuzzy')
421 421 return e
422 422
423 423 def save(self, fpath=None, repr_method='__str__'):
424 424 """
425 425 Save the po file to file *fpath* if no file handle exists for
426 426 the object. If there's already an open file and no fpath is
427 427 provided, then the existing file is rewritten with the modified
428 428 data.
429 429
430 430 **Keyword arguments**:
431 431 - *fpath*: string, full or relative path to the file.
432 432 - *repr_method*: string, the method to use for output.
433 433 """
434 434 if self.fpath is None and fpath is None:
435 435 raise IOError('You must provide a file path to save() method')
436 436 contents = getattr(self, repr_method)()
437 437 if fpath is None:
438 438 fpath = self.fpath
439 439 if repr_method == 'to_binary':
440 440 fhandle = open(fpath, 'wb')
441 441 else:
442 442 fhandle = codecs.open(fpath, 'w', self.encoding)
443 443 if type(contents) != types.UnicodeType:
444 444 contents = contents.decode(self.encoding)
445 445 fhandle.write(contents)
446 446 fhandle.close()
447 447
448 448 def find(self, st, by='msgid'):
449 449 """
450 450 Find entry which msgid (or property identified by the *by*
451 451 attribute) matches the string *st*.
452 452
453 453 **Keyword arguments**:
454 454 - *st*: string, the string to search for
455 455 - *by*: string, the comparison attribute
456 456
457 457 **Examples**:
458 458
459 459 >>> po = pofile('tests/test_utf8.po')
460 460 >>> entry = po.find('Thursday')
461 461 >>> entry.msgstr
462 462 u'Jueves'
463 463 >>> entry = po.find('Some unexistant msgid')
464 464 >>> entry is None
465 465 True
466 466 >>> entry = po.find('Jueves', 'msgstr')
467 467 >>> entry.msgid
468 468 u'Thursday'
469 469 """
470 470 for e in self:
471 471 if getattr(e, by) == st:
472 472 return e
473 473 return None
474 474
475 475 def ordered_metadata(self):
476 476 """
477 477 Convenience method that return the metadata ordered. The return
478 478 value is list of tuples (metadata name, metadata_value).
479 479 """
480 480 # copy the dict first
481 481 metadata = self.metadata.copy()
482 482 data_order = [
483 483 'Project-Id-Version',
484 484 'Report-Msgid-Bugs-To',
485 485 'POT-Creation-Date',
486 486 'PO-Revision-Date',
487 487 'Last-Translator',
488 488 'Language-Team',
489 489 'MIME-Version',
490 490 'Content-Type',
491 491 'Content-Transfer-Encoding'
492 492 ]
493 493 ordered_data = []
494 494 for data in data_order:
495 495 try:
496 496 value = metadata.pop(data)
497 497 ordered_data.append((data, value))
498 498 except KeyError:
499 499 pass
500 500 # the rest of the metadata won't be ordered there are no specs for this
501 501 keys = metadata.keys()
502 502 list(keys).sort()
503 503 for data in keys:
504 504 value = metadata[data]
505 505 ordered_data.append((data, value))
506 506 return ordered_data
507 507
508 508 def to_binary(self):
509 509 """
510 510 Return the mofile binary representation.
511 511 """
512 512 import array
513 513 import struct
514 514 import types
515 515 offsets = []
516 516 entries = self.translated_entries()
517 517 # the keys are sorted in the .mo file
518 518 def cmp(_self, other):
519 519 if _self.msgid > other.msgid:
520 520 return 1
521 521 elif _self.msgid < other.msgid:
522 522 return -1
523 523 else:
524 524 return 0
525 525 # add metadata entry
526 526 entries.sort(cmp)
527 527 mentry = self.metadata_as_entry()
528 528 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
529 529 entries = [mentry] + entries
530 530 entries_len = len(entries)
531 531 ids, strs = '', ''
532 532 for e in entries:
533 533 # For each string, we need size and file offset. Each string is
534 534 # NUL terminated; the NUL does not count into the size.
535 535 if e.msgid_plural:
536 536 indexes = e.msgstr_plural.keys()
537 537 indexes.sort()
538 538 msgstr = []
539 539 for index in indexes:
540 540 msgstr.append(e.msgstr_plural[index])
541 541 msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
542 542 msgstr = self._encode('\0'.join(msgstr))
543 543 else:
544 544 msgid = self._encode(e.msgid)
545 545 msgstr = self._encode(e.msgstr)
546 546 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
547 547 ids += msgid + '\0'
548 548 strs += msgstr + '\0'
549 549 # The header is 7 32-bit unsigned integers.
550 550 keystart = 7*4+16*entries_len
551 551 # and the values start after the keys
552 552 valuestart = keystart + len(ids)
553 553 koffsets = []
554 554 voffsets = []
555 555 # The string table first has the list of keys, then the list of values.
556 556 # Each entry has first the size of the string, then the file offset.
557 557 for o1, l1, o2, l2 in offsets:
558 558 koffsets += [l1, o1+keystart]
559 559 voffsets += [l2, o2+valuestart]
560 560 offsets = koffsets + voffsets
561 561 output = struct.pack("IIIIIII",
562 562 0x950412de, # Magic number
563 563 0, # Version
564 564 entries_len, # # of entries
565 565 7*4, # start of key index
566 566 7*4+entries_len*8, # start of value index
567 567 0, 0) # size and offset of hash table
568 568 output += array.array("I", offsets).tostring()
569 569 output += ids
570 570 output += strs
571 571 return output
572 572
573 573 def _encode(self, mixed):
574 574 """
575 575 Encode the given argument with the file encoding if the type is unicode
576 576 and return the encoded string.
577 577 """
578 578 if type(mixed) == types.UnicodeType:
579 579 return mixed.encode(self.encoding)
580 580 return mixed
581 581
582 582 # }}}
583 583 # class POFile {{{
584 584
585 585 class POFile(_BaseFile):
586 586 '''
587 587 Po (or Pot) file reader/writer.
588 588 POFile objects inherit the list objects methods.
589 589
590 590 **Example**:
591 591
592 592 >>> po = POFile()
593 593 >>> entry1 = POEntry(
594 594 ... msgid="Some english text",
595 595 ... msgstr="Un texte en anglais"
596 596 ... )
597 597 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
598 598 >>> entry1.comment = "Some useful comment"
599 599 >>> entry2 = POEntry(
600 600 ... msgid="Peace in some languages",
601 601 ... msgstr="Pace سلام שלום Hasîtî 和平"
602 602 ... )
603 603 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
604 604 >>> entry2.comment = "Another useful comment"
605 605 >>> entry3 = POEntry(
606 606 ... msgid='Some entry with quotes " \\"',
607 607 ... msgstr='Un message unicode avec des quotes " \\"'
608 608 ... )
609 609 >>> entry3.comment = "Test string quoting"
610 610 >>> po.append(entry1)
611 611 >>> po.append(entry2)
612 612 >>> po.append(entry3)
613 613 >>> po.header = "Some Header"
614 614 >>> print(po)
615 615 # Some Header
616 616 msgid ""
617 617 msgstr ""
618 618 <BLANKLINE>
619 619 #. Some useful comment
620 620 #: testfile:12 another_file:1
621 621 msgid "Some english text"
622 622 msgstr "Un texte en anglais"
623 623 <BLANKLINE>
624 624 #. Another useful comment
625 625 #: testfile:15 another_file:5
626 626 msgid "Peace in some languages"
627 627 msgstr "Pace سلام שלום Hasîtî 和平"
628 628 <BLANKLINE>
629 629 #. Test string quoting
630 630 msgid "Some entry with quotes \\" \\""
631 631 msgstr "Un message unicode avec des quotes \\" \\""
632 632 <BLANKLINE>
633 633 '''
634 634
635 635 def __str__(self):
636 636 """Return the string representation of the po file"""
637 637 ret, headers = '', self.header.split('\n')
638 638 for header in headers:
639 639 if header[:1] in [',', ':']:
640 640 ret += '#%s\n' % header
641 641 else:
642 642 ret += '# %s\n' % header
643 643 return ret + _BaseFile.__str__(self)
644 644
645 645 def save_as_mofile(self, fpath):
646 646 """
647 647 Save the binary representation of the file to *fpath*.
648 648
649 649 **Keyword arguments**:
650 650 - *fpath*: string, full or relative path to the file.
651 651 """
652 652 _BaseFile.save(self, fpath, 'to_binary')
653 653
654 654 def percent_translated(self):
655 655 """
656 656 Convenience method that return the percentage of translated
657 657 messages.
658 658
659 659 **Example**:
660 660
661 661 >>> import polib
662 662 >>> po = polib.pofile('tests/test_pofile_helpers.po')
663 663 >>> po.percent_translated()
664 664 50
665 665 >>> po = POFile()
666 666 >>> po.percent_translated()
667 667 100
668 668 """
669 669 total = len([e for e in self if not e.obsolete])
670 670 if total == 0:
671 671 return 100
672 672 translated = len(self.translated_entries())
673 673 return int((100.00 / float(total)) * translated)
674 674
675 675 def translated_entries(self):
676 676 """
677 677 Convenience method that return a list of translated entries.
678 678
679 679 **Example**:
680 680
681 681 >>> import polib
682 682 >>> po = polib.pofile('tests/test_pofile_helpers.po')
683 683 >>> len(po.translated_entries())
684 684 6
685 685 """
686 686 return [e for e in self if e.translated()]
687 687
688 688 def untranslated_entries(self):
689 689 """
690 690 Convenience method that return a list of untranslated entries.
691 691
692 692 **Example**:
693 693
694 694 >>> import polib
695 695 >>> po = polib.pofile('tests/test_pofile_helpers.po')
696 696 >>> len(po.untranslated_entries())
697 697 4
698 698 """
699 699 return [e for e in self if not e.translated() and not e.obsolete \
700 700 and not 'fuzzy' in e.flags]
701 701
702 702 def fuzzy_entries(self):
703 703 """
704 704 Convenience method that return the list of 'fuzzy' entries.
705 705
706 706 **Example**:
707 707
708 708 >>> import polib
709 709 >>> po = polib.pofile('tests/test_pofile_helpers.po')
710 710 >>> len(po.fuzzy_entries())
711 711 2
712 712 """
713 713 return [e for e in self if 'fuzzy' in e.flags]
714 714
715 715 def obsolete_entries(self):
716 716 """
717 717 Convenience method that return the list of obsolete entries.
718 718
719 719 **Example**:
720 720
721 721 >>> import polib
722 722 >>> po = polib.pofile('tests/test_pofile_helpers.po')
723 723 >>> len(po.obsolete_entries())
724 724 4
725 725 """
726 726 return [e for e in self if e.obsolete]
727 727
728 728 def merge(self, refpot):
729 729 """
730 730 XXX this could not work if encodings are different, needs thinking
731 731 and general refactoring of how polib handles encoding...
732 732
733 733 Convenience method that merge the current pofile with the pot file
734 734 provided. It behaves exactly as the gettext msgmerge utility:
735 735
736 736 - comments of this file will be preserved, but extracted comments
737 737 and occurrences will be discarded
738 738 - any translations or comments in the file will be discarded,
739 739 however dot comments and file positions will be preserved
740 740
741 741 **Keyword argument**:
742 742 - *refpot*: object POFile, the reference catalog.
743 743
744 744 **Example**:
745 745
746 746 >>> import polib
747 747 >>> refpot = polib.pofile('tests/test_merge.pot')
748 748 >>> po = polib.pofile('tests/test_merge_before.po')
749 749 >>> po.merge(refpot)
750 750 >>> expected_po = polib.pofile('tests/test_merge_after.po')
751 751 >>> unicode(po) == unicode(expected_po)
752 752 True
753 753 """
754 754 for entry in refpot:
755 755 e = self.find(entry.msgid)
756 756 if e is None:
757 757 e = POEntry()
758 758 self.append(e)
759 759 e.merge(entry)
760 760 # ok, now we must "obsolete" entries that are not in the refpot
761 761 # anymore
762 762 for entry in self:
763 763 if refpot.find(entry.msgid) is None:
764 764 entry.obsolete = True
765 765
766 766 # }}}
767 767 # class MOFile {{{
768 768
769 769 class MOFile(_BaseFile):
770 770 '''
771 771 Mo file reader/writer.
772 772 MOFile objects inherit the list objects methods.
773 773
774 774 **Example**:
775 775
776 776 >>> mo = MOFile()
777 777 >>> entry1 = POEntry(
778 778 ... msgid="Some english text",
779 779 ... msgstr="Un texte en anglais"
780 780 ... )
781 781 >>> entry2 = POEntry(
782 782 ... msgid="I need my dirty cheese",
783 783 ... msgstr="Je veux mon sale fromage"
784 784 ... )
785 785 >>> entry3 = MOEntry(
786 786 ... msgid='Some entry with quotes " \\"',
787 787 ... msgstr='Un message unicode avec des quotes " \\"'
788 788 ... )
789 789 >>> mo.append(entry1)
790 790 >>> mo.append(entry2)
791 791 >>> mo.append(entry3)
792 792 >>> print(mo)
793 793 msgid ""
794 794 msgstr ""
795 795 <BLANKLINE>
796 796 msgid "Some english text"
797 797 msgstr "Un texte en anglais"
798 798 <BLANKLINE>
799 799 msgid "I need my dirty cheese"
800 800 msgstr "Je veux mon sale fromage"
801 801 <BLANKLINE>
802 802 msgid "Some entry with quotes \\" \\""
803 803 msgstr "Un message unicode avec des quotes \\" \\""
804 804 <BLANKLINE>
805 805 '''
806 806
807 807 def __init__(self, *args, **kwargs):
808 808 """
809 809 MOFile constructor. Mo files have two other properties:
810 810 - magic_number: the magic_number of the binary file,
811 811 - version: the version of the mo spec.
812 812 """
813 813 _BaseFile.__init__(self, *args, **kwargs)
814 814 self.magic_number = None
815 815 self.version = 0
816 816
817 817 def save_as_pofile(self, fpath):
818 818 """
819 819 Save the string representation of the file to *fpath*.
820 820
821 821 **Keyword argument**:
822 822 - *fpath*: string, full or relative path to the file.
823 823 """
824 824 _BaseFile.save(self, fpath)
825 825
826 826 def save(self, fpath):
827 827 """
828 828 Save the binary representation of the file to *fpath*.
829 829
830 830 **Keyword argument**:
831 831 - *fpath*: string, full or relative path to the file.
832 832 """
833 833 _BaseFile.save(self, fpath, 'to_binary')
834 834
835 835 def percent_translated(self):
836 836 """
837 837 Convenience method to keep the same interface with POFile instances.
838 838 """
839 839 return 100
840 840
841 841 def translated_entries(self):
842 842 """
843 843 Convenience method to keep the same interface with POFile instances.
844 844 """
845 845 return self
846 846
847 847 def untranslated_entries(self):
848 848 """
849 849 Convenience method to keep the same interface with POFile instances.
850 850 """
851 851 return []
852 852
853 853 def fuzzy_entries(self):
854 854 """
855 855 Convenience method to keep the same interface with POFile instances.
856 856 """
857 857 return []
858 858
859 859 def obsolete_entries(self):
860 860 """
861 861 Convenience method to keep the same interface with POFile instances.
862 862 """
863 863 return []
864 864
865 865 # }}}
866 866 # class _BaseEntry {{{
867 867
868 868 class _BaseEntry(object):
869 869 """
870 870 Base class for POEntry or MOEntry objects.
871 871 This class must *not* be instanciated directly.
872 872 """
873 873
874 874 def __init__(self, *args, **kwargs):
875 875 """Base Entry constructor."""
876 876 self.msgid = kwargs.get('msgid', '')
877 877 self.msgstr = kwargs.get('msgstr', '')
878 878 self.msgid_plural = kwargs.get('msgid_plural', '')
879 879 self.msgstr_plural = kwargs.get('msgstr_plural', {})
880 880 self.obsolete = kwargs.get('obsolete', False)
881 881 self.encoding = kwargs.get('encoding', default_encoding)
882 882 self.msgctxt = kwargs.get('msgctxt', None)
883 883 self._multiline_str = {}
884 884
885 885 def __repr__(self):
886 886 """Return the official string representation of the object."""
887 887 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
888 888
889 889 def __str__(self, wrapwidth=78):
890 890 """
891 891 Common string representation of the POEntry and MOEntry
892 892 objects.
893 893 """
894 894 if self.obsolete:
895 895 delflag = '#~ '
896 896 else:
897 897 delflag = ''
898 898 ret = []
899 899 # write the msgctxt if any
900 900 if self.msgctxt is not None:
901 901 ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
902 902 # write the msgid
903 903 ret += self._str_field("msgid", delflag, "", self.msgid)
904 904 # write the msgid_plural if any
905 905 if self.msgid_plural:
906 906 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
907 907 if self.msgstr_plural:
908 908 # write the msgstr_plural if any
909 909 msgstrs = self.msgstr_plural
910 910 keys = list(msgstrs)
911 911 keys.sort()
912 912 for index in keys:
913 913 msgstr = msgstrs[index]
914 914 plural_index = '[%s]' % index
915 915 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
916 916 else:
917 917 # otherwise write the msgstr
918 918 ret += self._str_field("msgstr", delflag, "", self.msgstr)
919 919 ret.append('')
920 920 return '\n'.join(ret)
921 921
922 922 def _str_field(self, fieldname, delflag, plural_index, field):
923 923 if (fieldname + plural_index) in self._multiline_str:
924 924 field = self._multiline_str[fieldname + plural_index]
925 925 lines = [''] + field.split('__POLIB__NL__')
926 926 else:
927 927 lines = field.splitlines(True)
928 928 if len(lines) > 1:
929 929 lines = ['']+lines # start with initial empty line
930 930 else:
931 931 lines = [field] # needed for the empty string case
932 932 if fieldname.startswith('previous_'):
933 933 # quick and dirty trick to get the real field name
934 934 fieldname = fieldname[9:]
935 935
936 936 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
937 937 escape(lines.pop(0)))]
938 938 for mstr in lines:
939 939 ret.append('%s"%s"' % (delflag, escape(mstr)))
940 940 return ret
941 941
942 942 # }}}
943 943 # class POEntry {{{
944 944
945 945 class POEntry(_BaseEntry):
946 946 """
947 947 Represents a po file entry.
948 948
949 949 **Examples**:
950 950
951 951 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
952 952 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
953 953 >>> print(entry)
954 954 #: welcome.py:12 anotherfile.py:34
955 955 msgid "Welcome"
956 956 msgstr "Bienvenue"
957 957 <BLANKLINE>
958 958 >>> entry = POEntry()
959 959 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
960 960 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
961 961 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
962 962 >>> entry.flags.append('c-format')
963 963 >>> entry.previous_msgctxt = '@somecontext'
964 964 >>> entry.previous_msgid = 'I had eggs but no spam !'
965 965 >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
966 966 >>> entry.msgctxt = '@somenewcontext'
967 967 >>> entry.msgid = 'I have spam but no egg !'
968 968 >>> entry.msgid_plural = 'I have spam and %d eggs !'
969 969 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
970 970 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
971 971 >>> print(entry)
972 972 #. A plural translation. This is a very very very long line please do not
973 973 #. wrap, this is just for testing comment wrapping...
974 974 # A plural translation. This is a very very very long line please do not wrap,
975 975 # this is just for testing comment wrapping...
976 976 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
977 977 #: src/eggs.c:45
978 978 #, c-format
979 979 #| msgctxt "@somecontext"
980 980 #| msgid "I had eggs but no spam !"
981 981 #| msgid_plural "I had eggs and %d spam !"
982 982 msgctxt "@somenewcontext"
983 983 msgid "I have spam but no egg !"
984 984 msgid_plural "I have spam and %d eggs !"
985 985 msgstr[0] "J'ai du jambon mais aucun oeuf !"
986 986 msgstr[1] "J'ai du jambon et %d oeufs !"
987 987 <BLANKLINE>
988 988 """
989 989
990 990 def __init__(self, *args, **kwargs):
991 991 """POEntry constructor."""
992 992 _BaseEntry.__init__(self, *args, **kwargs)
993 993 self.comment = kwargs.get('comment', '')
994 994 self.tcomment = kwargs.get('tcomment', '')
995 995 self.occurrences = kwargs.get('occurrences', [])
996 996 self.flags = kwargs.get('flags', [])
997 997 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
998 998 self.previous_msgid = kwargs.get('previous_msgid', None)
999 999 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
1000 1000
1001 1001 def __str__(self, wrapwidth=78):
1002 1002 """
1003 1003 Return the string representation of the entry.
1004 1004 """
1005 1005 if self.obsolete:
1006 1006 return _BaseEntry.__str__(self)
1007 1007 ret = []
1008 1008 # comment first, if any (with text wrapping as xgettext does)
1009 1009 if self.comment != '':
1010 1010 for comment in self.comment.split('\n'):
1011 1011 if wrapwidth > 0 and len(comment) > wrapwidth-3:
1012 1012 ret += textwrap.wrap(comment, wrapwidth,
1013 1013 initial_indent='#. ',
1014 1014 subsequent_indent='#. ',
1015 1015 break_long_words=False)
1016 1016 else:
1017 1017 ret.append('#. %s' % comment)
1018 1018 # translator comment, if any (with text wrapping as xgettext does)
1019 1019 if self.tcomment != '':
1020 1020 for tcomment in self.tcomment.split('\n'):
1021 1021 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
1022 1022 ret += textwrap.wrap(tcomment, wrapwidth,
1023 1023 initial_indent='# ',
1024 1024 subsequent_indent='# ',
1025 1025 break_long_words=False)
1026 1026 else:
1027 1027 ret.append('# %s' % tcomment)
1028 1028 # occurrences (with text wrapping as xgettext does)
1029 1029 if self.occurrences:
1030 1030 filelist = []
1031 1031 for fpath, lineno in self.occurrences:
1032 1032 if lineno:
1033 1033 filelist.append('%s:%s' % (fpath, lineno))
1034 1034 else:
1035 1035 filelist.append(fpath)
1036 1036 filestr = ' '.join(filelist)
1037 1037 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
1038 1038 # XXX textwrap split words that contain hyphen, this is not
1039 1039 # what we want for filenames, so the dirty hack is to
1040 1040 # temporally replace hyphens with a char that a file cannot
1041 1041 # contain, like "*"
1042 1042 lines = textwrap.wrap(filestr.replace('-', '*'),
1043 1043 wrapwidth,
1044 1044 initial_indent='#: ',
1045 1045 subsequent_indent='#: ',
1046 1046 break_long_words=False)
1047 1047 # end of the replace hack
1048 1048 for line in lines:
1049 1049 ret.append(line.replace('*', '-'))
1050 1050 else:
1051 1051 ret.append('#: '+filestr)
1052 1052 # flags
1053 1053 if self.flags:
1054 1054 flags = []
1055 1055 for flag in self.flags:
1056 1056 flags.append(flag)
1057 1057 ret.append('#, %s' % ', '.join(flags))
1058 1058
1059 1059 # previous context and previous msgid/msgid_plural
1060 1060 if self.previous_msgctxt:
1061 1061 ret += self._str_field("previous_msgctxt", "#| ", "",
1062 1062 self.previous_msgctxt)
1063 1063 if self.previous_msgid:
1064 1064 ret += self._str_field("previous_msgid", "#| ", "",
1065 1065 self.previous_msgid)
1066 1066 if self.previous_msgid_plural:
1067 1067 ret += self._str_field("previous_msgid_plural", "#| ", "",
1068 1068 self.previous_msgid_plural)
1069 1069
1070 1070 ret.append(_BaseEntry.__str__(self))
1071 1071 return '\n'.join(ret)
1072 1072
1073 1073 def __cmp__(self, other):
1074 1074 '''
1075 1075 Called by comparison operations if rich comparison is not defined.
1076 1076
1077 1077 **Tests**:
1078 1078 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
1079 1079 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
1080 1080 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
1081 1081 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
1082 1082 >>> po = POFile()
1083 1083 >>> po.append(a)
1084 1084 >>> po.append(b)
1085 1085 >>> po.append(c1)
1086 1086 >>> po.append(c2)
1087 1087 >>> po.sort()
1088 1088 >>> print(po)
1089 1089 #
1090 1090 msgid ""
1091 1091 msgstr ""
1092 1092 <BLANKLINE>
1093 1093 #: a.py:1 a.py:3
1094 1094 msgid "c2"
1095 1095 msgstr ""
1096 1096 <BLANKLINE>
1097 1097 #: a.py:1 b.py:1
1098 1098 msgid "c1"
1099 1099 msgstr ""
1100 1100 <BLANKLINE>
1101 1101 #: b.py:1 b.py:3
1102 1102 msgid "a"
1103 1103 msgstr ""
1104 1104 <BLANKLINE>
1105 1105 #: b.py:1 b.py:3
1106 1106 msgid "b"
1107 1107 msgstr ""
1108 1108 <BLANKLINE>
1109 1109 '''
1110 1110 def compare_occurrences(a, b):
1111 1111 """
1112 1112 Compare an entry occurrence with another one.
1113 1113 """
1114 1114 if a[0] != b[0]:
1115 1115 return a[0] < b[0]
1116 1116 if a[1] != b[1]:
1117 1117 return a[1] < b[1]
1118 1118 return 0
1119 1119
1120 1120 # First: Obsolete test
1121 1121 if self.obsolete != other.obsolete:
1122 1122 if self.obsolete:
1123 1123 return -1
1124 1124 else:
1125 1125 return 1
1126 1126 # Work on a copy to protect original
1127 1127 occ1 = self.occurrences[:]
1128 1128 occ2 = other.occurrences[:]
1129 1129 # Sorting using compare method
1130 1130 occ1.sort(compare_occurrences)
1131 1131 occ2.sort(compare_occurrences)
1132 1132 # Comparing sorted occurrences
1133 1133 pos = 0
1134 1134 for entry1 in occ1:
1135 1135 try:
1136 1136 entry2 = occ2[pos]
1137 1137 except IndexError:
1138 1138 return 1
1139 1139 pos = pos + 1
1140 1140 if entry1[0] != entry2[0]:
1141 1141 if entry1[0] > entry2[0]:
1142 1142 return 1
1143 1143 else:
1144 1144 return -1
1145 1145 if entry1[1] != entry2[1]:
1146 1146 if entry1[1] > entry2[1]:
1147 1147 return 1
1148 1148 else:
1149 1149 return -1
1150 1150 # Finally: Compare message ID
1151 1151 if self.msgid > other.msgid: return 1
1152 1152 else: return -1
1153 1153
1154 1154 def translated(self):
1155 1155 """
1156 1156 Return True if the entry has been translated or False.
1157 1157 """
1158 1158 if self.obsolete or 'fuzzy' in self.flags:
1159 1159 return False
1160 1160 if self.msgstr != '':
1161 1161 return True
1162 1162 if self.msgstr_plural:
1163 1163 for pos in self.msgstr_plural:
1164 1164 if self.msgstr_plural[pos] == '':
1165 1165 return False
1166 1166 return True
1167 1167 return False
1168 1168
1169 1169 def merge(self, other):
1170 1170 """
1171 1171 Merge the current entry with the given pot entry.
1172 1172 """
1173 1173 self.msgid = other.msgid
1174 1174 self.occurrences = other.occurrences
1175 1175 self.comment = other.comment
1176 1176 self.flags = other.flags
1177 1177 self.msgid_plural = other.msgid_plural
1178 1178 if other.msgstr_plural:
1179 1179 for pos in other.msgstr_plural:
1180 1180 try:
1181 1181 # keep existing translation at pos if any
1182 1182 self.msgstr_plural[pos]
1183 1183 except KeyError:
1184 1184 self.msgstr_plural[pos] = ''
1185 1185
1186 1186 # }}}
1187 1187 # class MOEntry {{{
1188 1188
1189 1189 class MOEntry(_BaseEntry):
1190 1190 """
1191 1191 Represents a mo file entry.
1192 1192
1193 1193 **Examples**:
1194 1194
1195 1195 >>> entry = MOEntry()
1196 1196 >>> entry.msgid = 'translate me !'
1197 1197 >>> entry.msgstr = 'traduisez moi !'
1198 1198 >>> print(entry)
1199 1199 msgid "translate me !"
1200 1200 msgstr "traduisez moi !"
1201 1201 <BLANKLINE>
1202 1202 """
1203 1203
1204 1204 def __str__(self, wrapwidth=78):
1205 1205 """
1206 1206 Return the string representation of the entry.
1207 1207 """
1208 1208 return _BaseEntry.__str__(self, wrapwidth)
1209 1209
1210 1210 # }}}
1211 1211 # class _POFileParser {{{
1212 1212
1213 1213 class _POFileParser(object):
1214 1214 """
1215 1215 A finite state machine to parse efficiently and correctly po
1216 1216 file format.
1217 1217 """
1218 1218
1219 1219 def __init__(self, fpath, *args, **kwargs):
1220 1220 """
1221 1221 Constructor.
1222 1222
1223 1223 **Arguments**:
1224 1224 - *fpath*: string, path to the po file
1225 1225 - *encoding*: string, the encoding to use, defaults to
1226 1226 "default_encoding" global variable (optional),
1227 1227 - *check_for_duplicates*: whether to check for duplicate entries
1228 1228 when adding entries to the file, default: False (optional).
1229 1229 """
1230 1230 enc = kwargs.get('encoding', default_encoding)
1231 1231 check_dup = kwargs.get('check_for_duplicates', False)
1232 1232 try:
1233 1233 self.fhandle = codecs.open(fpath, 'rU', enc)
1234 1234 except LookupError:
1235 1235 enc = default_encoding
1236 1236 self.fhandle = codecs.open(fpath, 'rU', enc)
1237 1237 self.instance = POFile(
1238 1238 fpath=fpath,
1239 1239 encoding=enc,
1240 1240 check_for_duplicates=check_dup
1241 1241 )
1242 1242 self.transitions = {}
1243 1243 self.current_entry = POEntry()
1244 1244 self.current_state = 'ST'
1245 1245 self.current_token = None
1246 1246 # two memo flags used in handlers
1247 1247 self.msgstr_index = 0
1248 1248 self.entry_obsolete = 0
1249 1249 # Configure the state machine, by adding transitions.
1250 1250 # Signification of symbols:
1251 1251 # * ST: Beginning of the file (start)
1252 1252 # * HE: Header
1253 1253 # * TC: a translation comment
1254 1254 # * GC: a generated comment
1255 1255 # * OC: a file/line occurence
1256 1256 # * FL: a flags line
1257 1257 # * CT: a message context
1258 1258 # * PC: a previous msgctxt
1259 1259 # * PM: a previous msgid
1260 1260 # * PP: a previous msgid_plural
1261 1261 # * MI: a msgid
1262 1262 # * MP: a msgid plural
1263 1263 # * MS: a msgstr
1264 1264 # * MX: a msgstr plural
1265 1265 # * MC: a msgid or msgstr continuation line
1266 1266 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1267 1267 'MS', 'MP', 'MX', 'MI']
1268 1268
1269 1269 self.add('TC', ['ST', 'HE'], 'HE')
1270 1270 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1271 1271 'MP', 'MX', 'MI'], 'TC')
1272 1272 self.add('GC', all, 'GC')
1273 1273 self.add('OC', all, 'OC')
1274 1274 self.add('FL', all, 'FL')
1275 1275 self.add('PC', all, 'PC')
1276 1276 self.add('PM', all, 'PM')
1277 1277 self.add('PP', all, 'PP')
1278 1278 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1279 1279 'PP', 'MS', 'MX'], 'CT')
1280 1280 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1281 1281 'PM', 'PP', 'MS', 'MX'], 'MI')
1282 1282 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1283 1283 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1284 1284 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1285 1285 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1286 1286
1287 1287 def parse(self):
1288 1288 """
1289 1289 Run the state machine, parse the file line by line and call process()
1290 1290 with the current matched symbol.
1291 1291 """
1292 1292 i, lastlen = 1, 0
1293 1293 for line in self.fhandle:
1294 1294 line = line.strip()
1295 1295 if line == '':
1296 1296 i = i+1
1297 1297 continue
1298 1298 if line[:3] == '#~ ':
1299 1299 line = line[3:]
1300 1300 self.entry_obsolete = 1
1301 1301 else:
1302 1302 self.entry_obsolete = 0
1303 1303 self.current_token = line
1304 1304 if line[:2] == '#:':
1305 1305 # we are on a occurrences line
1306 1306 self.process('OC', i)
1307 1307 elif line[:9] == 'msgctxt "':
1308 1308 # we are on a msgctxt
1309 1309 self.process('CT', i)
1310 1310 elif line[:7] == 'msgid "':
1311 1311 # we are on a msgid
1312 1312 self.process('MI', i)
1313 1313 elif line[:8] == 'msgstr "':
1314 1314 # we are on a msgstr
1315 1315 self.process('MS', i)
1316 1316 elif line[:1] == '"' or line[:4] == '#| "':
1317 1317 # we are on a continuation line or some metadata
1318 1318 self.process('MC', i)
1319 1319 elif line[:14] == 'msgid_plural "':
1320 1320 # we are on a msgid plural
1321 1321 self.process('MP', i)
1322 1322 elif line[:7] == 'msgstr[':
1323 1323 # we are on a msgstr plural
1324 1324 self.process('MX', i)
1325 1325 elif line[:3] == '#, ':
1326 1326 # we are on a flags line
1327 1327 self.process('FL', i)
1328 1328 elif line[:2] == '# ' or line == '#':
1329 1329 if line == '#': line = line + ' '
1330 1330 # we are on a translator comment line
1331 1331 self.process('TC', i)
1332 1332 elif line[:2] == '#.':
1333 1333 # we are on a generated comment line
1334 1334 self.process('GC', i)
1335 1335 elif line[:15] == '#| msgid_plural':
1336 1336 # we are on a previous msgid_plural
1337 1337 self.process('PP', i)
1338 1338 elif line[:8] == '#| msgid':
1339 1339 self.process('PM', i)
1340 1340 # we are on a previous msgid
1341 1341 elif line[:10] == '#| msgctxt':
1342 1342 # we are on a previous msgctxt
1343 1343 self.process('PC', i)
1344 1344 i = i+1
1345 1345
1346 1346 if self.current_entry:
1347 1347 # since entries are added when another entry is found, we must add
1348 1348 # the last entry here (only if there are lines)
1349 1349 self.instance.append(self.current_entry)
1350 1350 # before returning the instance, check if there's metadata and if
1351 1351 # so extract it in a dict
1352 1352 firstentry = self.instance[0]
1353 1353 if firstentry.msgid == '': # metadata found
1354 1354 # remove the entry
1355 1355 firstentry = self.instance.pop(0)
1356 1356 self.instance.metadata_is_fuzzy = firstentry.flags
1357 1357 key = None
1358 1358 for msg in firstentry.msgstr.splitlines():
1359 1359 try:
1360 1360 key, val = msg.split(':', 1)
1361 1361 self.instance.metadata[key] = val.strip()
1362 1362 except:
1363 1363 if key is not None:
1364 1364 self.instance.metadata[key] += '\n'+ msg.strip()
1365 1365 # close opened file
1366 1366 self.fhandle.close()
1367 1367 return self.instance
1368 1368
1369 1369 def add(self, symbol, states, next_state):
1370 1370 """
1371 1371 Add a transition to the state machine.
1372 1372 Keywords arguments:
1373 1373
1374 1374 symbol -- string, the matched token (two chars symbol)
1375 1375 states -- list, a list of states (two chars symbols)
1376 1376 next_state -- the next state the fsm will have after the action
1377 1377 """
1378 1378 for state in states:
1379 1379 action = getattr(self, 'handle_%s' % next_state.lower())
1380 1380 self.transitions[(symbol, state)] = (action, next_state)
1381 1381
1382 1382 def process(self, symbol, linenum):
1383 1383 """
1384 1384 Process the transition corresponding to the current state and the
1385 1385 symbol provided.
1386 1386
1387 1387 Keywords arguments:
1388 1388 symbol -- string, the matched token (two chars symbol)
1389 1389 linenum -- integer, the current line number of the parsed file
1390 1390 """
1391 1391 try:
1392 1392 (action, state) = self.transitions[(symbol, self.current_state)]
1393 1393 if action():
1394 1394 self.current_state = state
1395 1395 except Exception, exc:
1396 1396 raise IOError('Syntax error in po file (line %s)' % linenum)
1397 1397
1398 1398 # state handlers
1399 1399
1400 1400 def handle_he(self):
1401 1401 """Handle a header comment."""
1402 1402 if self.instance.header != '':
1403 1403 self.instance.header += '\n'
1404 1404 self.instance.header += self.current_token[2:]
1405 1405 return 1
1406 1406
1407 1407 def handle_tc(self):
1408 1408 """Handle a translator comment."""
1409 1409 if self.current_state in ['MC', 'MS', 'MX']:
1410 1410 self.instance.append(self.current_entry)
1411 1411 self.current_entry = POEntry()
1412 1412 if self.current_entry.tcomment != '':
1413 1413 self.current_entry.tcomment += '\n'
1414 1414 self.current_entry.tcomment += self.current_token[2:]
1415 1415 return True
1416 1416
1417 1417 def handle_gc(self):
1418 1418 """Handle a generated comment."""
1419 1419 if self.current_state in ['MC', 'MS', 'MX']:
1420 1420 self.instance.append(self.current_entry)
1421 1421 self.current_entry = POEntry()
1422 1422 if self.current_entry.comment != '':
1423 1423 self.current_entry.comment += '\n'
1424 1424 self.current_entry.comment += self.current_token[3:]
1425 1425 return True
1426 1426
1427 1427 def handle_oc(self):
1428 1428 """Handle a file:num occurence."""
1429 1429 if self.current_state in ['MC', 'MS', 'MX']:
1430 1430 self.instance.append(self.current_entry)
1431 1431 self.current_entry = POEntry()
1432 1432 occurrences = self.current_token[3:].split()
1433 1433 for occurrence in occurrences:
1434 1434 if occurrence != '':
1435 1435 try:
1436 1436 fil, line = occurrence.split(':')
1437 1437 if not line.isdigit():
1438 1438 fil = fil + line
1439 1439 line = ''
1440 1440 self.current_entry.occurrences.append((fil, line))
1441 1441 except:
1442 1442 self.current_entry.occurrences.append((occurrence, ''))
1443 1443 return True
1444 1444
1445 1445 def handle_fl(self):
1446 1446 """Handle a flags line."""
1447 1447 if self.current_state in ['MC', 'MS', 'MX']:
1448 1448 self.instance.append(self.current_entry)
1449 1449 self.current_entry = POEntry()
1450 1450 self.current_entry.flags += self.current_token[3:].split(', ')
1451 1451 return True
1452 1452
1453 1453 def handle_pp(self):
1454 1454 """Handle a previous msgid_plural line."""
1455 1455 if self.current_state in ['MC', 'MS', 'MX']:
1456 1456 self.instance.append(self.current_entry)
1457 1457 self.current_entry = POEntry()
1458 1458 self.current_entry.previous_msgid_plural = \
1459 1459 unescape(self.current_token[17:-1])
1460 1460 return True
1461 1461
1462 1462 def handle_pm(self):
1463 1463 """Handle a previous msgid line."""
1464 1464 if self.current_state in ['MC', 'MS', 'MX']:
1465 1465 self.instance.append(self.current_entry)
1466 1466 self.current_entry = POEntry()
1467 1467 self.current_entry.previous_msgid = \
1468 1468 unescape(self.current_token[10:-1])
1469 1469 return True
1470 1470
1471 1471 def handle_pc(self):
1472 1472 """Handle a previous msgctxt line."""
1473 1473 if self.current_state in ['MC', 'MS', 'MX']:
1474 1474 self.instance.append(self.current_entry)
1475 1475 self.current_entry = POEntry()
1476 1476 self.current_entry.previous_msgctxt = \
1477 1477 unescape(self.current_token[12:-1])
1478 1478 return True
1479 1479
1480 1480 def handle_ct(self):
1481 1481 """Handle a msgctxt."""
1482 1482 if self.current_state in ['MC', 'MS', 'MX']:
1483 1483 self.instance.append(self.current_entry)
1484 1484 self.current_entry = POEntry()
1485 1485 self.current_entry.msgctxt = unescape(self.current_token[9:-1])
1486 1486 return True
1487 1487
1488 1488 def handle_mi(self):
1489 1489 """Handle a msgid."""
1490 1490 if self.current_state in ['MC', 'MS', 'MX']:
1491 1491 self.instance.append(self.current_entry)
1492 1492 self.current_entry = POEntry()
1493 1493 self.current_entry.obsolete = self.entry_obsolete
1494 1494 self.current_entry.msgid = unescape(self.current_token[7:-1])
1495 1495 return True
1496 1496
1497 1497 def handle_mp(self):
1498 1498 """Handle a msgid plural."""
1499 1499 self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
1500 1500 return True
1501 1501
1502 1502 def handle_ms(self):
1503 1503 """Handle a msgstr."""
1504 1504 self.current_entry.msgstr = unescape(self.current_token[8:-1])
1505 1505 return True
1506 1506
1507 1507 def handle_mx(self):
1508 1508 """Handle a msgstr plural."""
1509 1509 index, value = self.current_token[7], self.current_token[11:-1]
1510 1510 self.current_entry.msgstr_plural[index] = unescape(value)
1511 1511 self.msgstr_index = index
1512 1512 return True
1513 1513
1514 1514 def handle_mc(self):
1515 1515 """Handle a msgid or msgstr continuation line."""
1516 1516 token = unescape(self.current_token[1:-1])
1517 1517 if self.current_state == 'CT':
1518 1518 typ = 'msgctxt'
1519 1519 self.current_entry.msgctxt += token
1520 1520 elif self.current_state == 'MI':
1521 1521 typ = 'msgid'
1522 1522 self.current_entry.msgid += token
1523 1523 elif self.current_state == 'MP':
1524 1524 typ = 'msgid_plural'
1525 1525 self.current_entry.msgid_plural += token
1526 1526 elif self.current_state == 'MS':
1527 1527 typ = 'msgstr'
1528 1528 self.current_entry.msgstr += token
1529 1529 elif self.current_state == 'MX':
1530 1530 typ = 'msgstr[%s]' % self.msgstr_index
1531 1531 self.current_entry.msgstr_plural[self.msgstr_index] += token
1532 1532 elif self.current_state == 'PP':
1533 1533 typ = 'previous_msgid_plural'
1534 1534 token = token[3:]
1535 1535 self.current_entry.previous_msgid_plural += token
1536 1536 elif self.current_state == 'PM':
1537 1537 typ = 'previous_msgid'
1538 1538 token = token[3:]
1539 1539 self.current_entry.previous_msgid += token
1540 1540 elif self.current_state == 'PC':
1541 1541 typ = 'previous_msgctxt'
1542 1542 token = token[3:]
1543 1543 self.current_entry.previous_msgctxt += token
1544 1544 if typ not in self.current_entry._multiline_str:
1545 1545 self.current_entry._multiline_str[typ] = token
1546 1546 else:
1547 1547 self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
1548 1548 # don't change the current state
1549 1549 return False
1550 1550
1551 1551 # }}}
1552 1552 # class _MOFileParser {{{
1553 1553
1554 1554 class _MOFileParser(object):
1555 1555 """
1556 1556 A class to parse binary mo files.
1557 1557 """
1558 1558 BIG_ENDIAN = 0xde120495
1559 1559 LITTLE_ENDIAN = 0x950412de
1560 1560
1561 1561 def __init__(self, fpath, *args, **kwargs):
1562 1562 """
1563 1563 Constructor.
1564 1564
1565 1565 **Arguments**:
1566 1566 - *fpath*: string, path to the po file
1567 1567 - *encoding*: string, the encoding to use, defaults to
1568 1568 "default_encoding" global variable (optional),
1569 1569 - *check_for_duplicates*: whether to check for duplicate entries
1570 1570 when adding entries to the file, default: False (optional).
1571 1571 """
1572 1572 enc = kwargs.get('encoding', default_encoding)
1573 1573 check_dup = kwargs.get('check_for_duplicates', False)
1574 1574 self.fhandle = open(fpath, 'rb')
1575 1575 self.instance = MOFile(
1576 1576 fpath=fpath,
1577 1577 encoding=enc,
1578 1578 check_for_duplicates=check_dup
1579 1579 )
1580 1580
1581 1581 def parse_magicnumber(self):
1582 1582 """
1583 1583 Parse the magic number and raise an exception if not valid.
1584 1584 """
1585 1585
1586 1586 def parse(self):
1587 1587 """
1588 1588 Build the instance with the file handle provided in the
1589 1589 constructor.
1590 1590 """
1591 1591 magic_number = self._readbinary('<I', 4)
1592 1592 if magic_number == self.LITTLE_ENDIAN:
1593 1593 ii = '<II'
1594 1594 elif magic_number == self.BIG_ENDIAN:
1595 1595 ii = '>II'
1596 1596 else:
1597 1597 raise IOError('Invalid mo file, magic number is incorrect !')
1598 1598 self.instance.magic_number = magic_number
1599 1599 # parse the version number and the number of strings
1600 1600 self.instance.version, numofstrings = self._readbinary(ii, 8)
1601 1601 # original strings and translation strings hash table offset
1602 1602 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1603 1603 # move to msgid hash table and read length and offset of msgids
1604 1604 self.fhandle.seek(msgids_hash_offset)
1605 1605 msgids_index = []
1606 1606 for i in range(numofstrings):
1607 1607 msgids_index.append(self._readbinary(ii, 8))
1608 1608 # move to msgstr hash table and read length and offset of msgstrs
1609 1609 self.fhandle.seek(msgstrs_hash_offset)
1610 1610 msgstrs_index = []
1611 1611 for i in range(numofstrings):
1612 1612 msgstrs_index.append(self._readbinary(ii, 8))
1613 1613 # build entries
1614 1614 for i in range(numofstrings):
1615 1615 self.fhandle.seek(msgids_index[i][1])
1616 1616 msgid = self.fhandle.read(msgids_index[i][0])
1617 1617 self.fhandle.seek(msgstrs_index[i][1])
1618 1618 msgstr = self.fhandle.read(msgstrs_index[i][0])
1619 1619 if i == 0: # metadata
1620 1620 raw_metadata, metadata = msgstr.split('\n'), {}
1621 1621 for line in raw_metadata:
1622 1622 tokens = line.split(':', 1)
1623 1623 if tokens[0] != '':
1624 1624 try:
1625 1625 metadata[tokens[0]] = tokens[1].strip()
1626 1626 except IndexError:
1627 1627 metadata[tokens[0]] = ''
1628 1628 self.instance.metadata = metadata
1629 1629 continue
1630 1630 # test if we have a plural entry
1631 1631 msgid_tokens = msgid.split('\0')
1632 1632 if len(msgid_tokens) > 1:
1633 1633 entry = MOEntry(
1634 1634 msgid=msgid_tokens[0],
1635 1635 msgid_plural=msgid_tokens[1],
1636 1636 msgstr_plural=dict((k,v) for k,v in \
1637 1637 enumerate(msgstr.split('\0')))
1638 1638 )
1639 1639 else:
1640 1640 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1641 1641 self.instance.append(entry)
1642 1642 # close opened file
1643 1643 self.fhandle.close()
1644 1644 return self.instance
1645 1645
1646 1646 def _readbinary(self, fmt, numbytes):
1647 1647 """
1648 1648 Private method that unpack n bytes of data using format <fmt>.
1649 1649 It returns a tuple or a mixed value if the tuple length is 1.
1650 1650 """
1651 1651 bytes = self.fhandle.read(numbytes)
1652 1652 tup = struct.unpack(fmt, bytes)
1653 1653 if len(tup) == 1:
1654 1654 return tup[0]
1655 1655 return tup
1656 1656
1657 1657 # }}}
1658 1658 # __main__ {{{
1659 1659
1660 1660 if __name__ == '__main__':
1661 1661 """
1662 1662 **Main function**::
1663 1663 - to **test** the module just run: *python polib.py [-v]*
1664 1664 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1665 1665 """
1666 1666 import sys
1667 1667 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1668 1668 def test(f):
1669 1669 if f.endswith('po'):
1670 1670 p = pofile(f)
1671 1671 else:
1672 1672 p = mofile(f)
1673 1673 s = unicode(p)
1674 1674 import profile
1675 1675 profile.run('test("'+sys.argv[2]+'")')
1676 1676 else:
1677 1677 import doctest
1678 1678 doctest.testmod()
1679 1679
1680 1680 # }}}
General Comments 0
You need to be logged in to leave comments. Login now