##// END OF EJS Templates
i18n: drop a py25 conditional...
Matt Harbison -
r32889:a7310a47 default
parent child Browse files
Show More
@@ -1,1648 +1,1554 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 # no-check-code
2 # no-check-code
3 #
3 #
4 # License: MIT (see LICENSE file provided)
4 # License: MIT (see LICENSE file provided)
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6
6
7 """
7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
9 mo files). You can load existing files, iterate through it's entries, add,
9 mo files). You can load existing files, iterate through it's entries, add,
10 modify entries, comments or metadata, etc. or create new po files from scratch.
10 modify entries, comments or metadata, etc. or create new po files from scratch.
11
11
12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
13 :func:`~polib.mofile` convenience functions.
13 :func:`~polib.mofile` convenience functions.
14 """
14 """
15
15
16 from __future__ import absolute_import
16 from __future__ import absolute_import
17
17
18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
19 __version__ = '0.6.4'
19 __version__ = '0.6.4'
20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
21 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
21 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
22
22
23 import array
23 import array
24 import codecs
24 import codecs
25 import os
25 import os
26 import re
26 import re
27 import struct
27 import struct
28 import sys
28 import sys
29 import textwrap
29 import textwrap
30 import types
30 import types
31
31
32
32
33 # the default encoding to use when encoding cannot be detected
33 # the default encoding to use when encoding cannot be detected
34 default_encoding = 'utf-8'
34 default_encoding = 'utf-8'
35
35
36 # _pofile_or_mofile {{{
36 # _pofile_or_mofile {{{
37
37
38 def _pofile_or_mofile(f, type, **kwargs):
38 def _pofile_or_mofile(f, type, **kwargs):
39 """
39 """
40 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
40 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
41 honor the DRY concept.
41 honor the DRY concept.
42 """
42 """
43 # get the file encoding
43 # get the file encoding
44 enc = kwargs.get('encoding')
44 enc = kwargs.get('encoding')
45 if enc is None:
45 if enc is None:
46 enc = detect_encoding(f, type == 'mofile')
46 enc = detect_encoding(f, type == 'mofile')
47
47
48 # parse the file
48 # parse the file
49 kls = type == 'pofile' and _POFileParser or _MOFileParser
49 kls = type == 'pofile' and _POFileParser or _MOFileParser
50 parser = kls(
50 parser = kls(
51 f,
51 f,
52 encoding=enc,
52 encoding=enc,
53 check_for_duplicates=kwargs.get('check_for_duplicates', False)
53 check_for_duplicates=kwargs.get('check_for_duplicates', False)
54 )
54 )
55 instance = parser.parse()
55 instance = parser.parse()
56 instance.wrapwidth = kwargs.get('wrapwidth', 78)
56 instance.wrapwidth = kwargs.get('wrapwidth', 78)
57 return instance
57 return instance
58
58
59 # }}}
59 # }}}
60 # function pofile() {{{
60 # function pofile() {{{
61
61
62 def pofile(pofile, **kwargs):
62 def pofile(pofile, **kwargs):
63 """
63 """
64 Convenience function that parses the po or pot file ``pofile`` and returns
64 Convenience function that parses the po or pot file ``pofile`` and returns
65 a :class:`~polib.POFile` instance.
65 a :class:`~polib.POFile` instance.
66
66
67 Arguments:
67 Arguments:
68
68
69 ``pofile``
69 ``pofile``
70 string, full or relative path to the po/pot file or its content (data).
70 string, full or relative path to the po/pot file or its content (data).
71
71
72 ``wrapwidth``
72 ``wrapwidth``
73 integer, the wrap width, only useful when the ``-w`` option was passed
73 integer, the wrap width, only useful when the ``-w`` option was passed
74 to xgettext (optional, default: ``78``).
74 to xgettext (optional, default: ``78``).
75
75
76 ``encoding``
76 ``encoding``
77 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
77 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
78 encoding will be auto-detected).
78 encoding will be auto-detected).
79
79
80 ``check_for_duplicates``
80 ``check_for_duplicates``
81 whether to check for duplicate entries when adding entries to the
81 whether to check for duplicate entries when adding entries to the
82 file (optional, default: ``False``).
82 file (optional, default: ``False``).
83 """
83 """
84 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
84 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
85
85
86 # }}}
86 # }}}
87 # function mofile() {{{
87 # function mofile() {{{
88
88
89 def mofile(mofile, **kwargs):
89 def mofile(mofile, **kwargs):
90 """
90 """
91 Convenience function that parses the mo file ``mofile`` and returns a
91 Convenience function that parses the mo file ``mofile`` and returns a
92 :class:`~polib.MOFile` instance.
92 :class:`~polib.MOFile` instance.
93
93
94 Arguments:
94 Arguments:
95
95
96 ``mofile``
96 ``mofile``
97 string, full or relative path to the mo file or its content (data).
97 string, full or relative path to the mo file or its content (data).
98
98
99 ``wrapwidth``
99 ``wrapwidth``
100 integer, the wrap width, only useful when the ``-w`` option was passed
100 integer, the wrap width, only useful when the ``-w`` option was passed
101 to xgettext to generate the po file that was used to format the mo file
101 to xgettext to generate the po file that was used to format the mo file
102 (optional, default: ``78``).
102 (optional, default: ``78``).
103
103
104 ``encoding``
104 ``encoding``
105 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
105 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
106 encoding will be auto-detected).
106 encoding will be auto-detected).
107
107
108 ``check_for_duplicates``
108 ``check_for_duplicates``
109 whether to check for duplicate entries when adding entries to the
109 whether to check for duplicate entries when adding entries to the
110 file (optional, default: ``False``).
110 file (optional, default: ``False``).
111 """
111 """
112 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
112 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
113
113
114 # }}}
114 # }}}
115 # function detect_encoding() {{{
115 # function detect_encoding() {{{
116
116
117 def detect_encoding(file, binary_mode=False):
117 def detect_encoding(file, binary_mode=False):
118 """
118 """
119 Try to detect the encoding used by the ``file``. The ``file`` argument can
119 Try to detect the encoding used by the ``file``. The ``file`` argument can
120 be a PO or MO file path or a string containing the contents of the file.
120 be a PO or MO file path or a string containing the contents of the file.
121 If the encoding cannot be detected, the function will return the value of
121 If the encoding cannot be detected, the function will return the value of
122 ``default_encoding``.
122 ``default_encoding``.
123
123
124 Arguments:
124 Arguments:
125
125
126 ``file``
126 ``file``
127 string, full or relative path to the po/mo file or its content.
127 string, full or relative path to the po/mo file or its content.
128
128
129 ``binary_mode``
129 ``binary_mode``
130 boolean, set this to True if ``file`` is a mo file.
130 boolean, set this to True if ``file`` is a mo file.
131 """
131 """
132 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
132 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
133
133
134 def charset_exists(charset):
134 def charset_exists(charset):
135 """Check whether ``charset`` is valid or not."""
135 """Check whether ``charset`` is valid or not."""
136 try:
136 try:
137 codecs.lookup(charset)
137 codecs.lookup(charset)
138 except LookupError:
138 except LookupError:
139 return False
139 return False
140 return True
140 return True
141
141
142 if not os.path.exists(file):
142 if not os.path.exists(file):
143 match = rx.search(file)
143 match = rx.search(file)
144 if match:
144 if match:
145 enc = match.group(1).strip()
145 enc = match.group(1).strip()
146 if charset_exists(enc):
146 if charset_exists(enc):
147 return enc
147 return enc
148 else:
148 else:
149 if binary_mode:
149 if binary_mode:
150 mode = 'rb'
150 mode = 'rb'
151 else:
151 else:
152 mode = 'r'
152 mode = 'r'
153 f = open(file, mode)
153 f = open(file, mode)
154 for l in f.readlines():
154 for l in f.readlines():
155 match = rx.search(l)
155 match = rx.search(l)
156 if match:
156 if match:
157 f.close()
157 f.close()
158 enc = match.group(1).strip()
158 enc = match.group(1).strip()
159 if charset_exists(enc):
159 if charset_exists(enc):
160 return enc
160 return enc
161 f.close()
161 f.close()
162 return default_encoding
162 return default_encoding
163
163
164 # }}}
164 # }}}
165 # function escape() {{{
165 # function escape() {{{
166
166
167 def escape(st):
167 def escape(st):
168 """
168 """
169 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
169 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
170 the given string ``st`` and returns it.
170 the given string ``st`` and returns it.
171 """
171 """
172 return st.replace('\\', r'\\')\
172 return st.replace('\\', r'\\')\
173 .replace('\t', r'\t')\
173 .replace('\t', r'\t')\
174 .replace('\r', r'\r')\
174 .replace('\r', r'\r')\
175 .replace('\n', r'\n')\
175 .replace('\n', r'\n')\
176 .replace('\"', r'\"')
176 .replace('\"', r'\"')
177
177
178 # }}}
178 # }}}
179 # function unescape() {{{
179 # function unescape() {{{
180
180
181 def unescape(st):
181 def unescape(st):
182 """
182 """
183 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
183 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
184 the given string ``st`` and returns it.
184 the given string ``st`` and returns it.
185 """
185 """
186 def unescape_repl(m):
186 def unescape_repl(m):
187 m = m.group(1)
187 m = m.group(1)
188 if m == 'n':
188 if m == 'n':
189 return '\n'
189 return '\n'
190 if m == 't':
190 if m == 't':
191 return '\t'
191 return '\t'
192 if m == 'r':
192 if m == 'r':
193 return '\r'
193 return '\r'
194 if m == '\\':
194 if m == '\\':
195 return '\\'
195 return '\\'
196 return m # handles escaped double quote
196 return m # handles escaped double quote
197 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
197 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
198
198
199 # }}}
199 # }}}
200 # class _BaseFile {{{
200 # class _BaseFile {{{
201
201
202 class _BaseFile(list):
202 class _BaseFile(list):
203 """
203 """
204 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
204 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
205 classes. This class should **not** be instanciated directly.
205 classes. This class should **not** be instanciated directly.
206 """
206 """
207
207
208 def __init__(self, *args, **kwargs):
208 def __init__(self, *args, **kwargs):
209 """
209 """
210 Constructor, accepts the following keyword arguments:
210 Constructor, accepts the following keyword arguments:
211
211
212 ``pofile``
212 ``pofile``
213 string, the path to the po or mo file, or its content as a string.
213 string, the path to the po or mo file, or its content as a string.
214
214
215 ``wrapwidth``
215 ``wrapwidth``
216 integer, the wrap width, only useful when the ``-w`` option was
216 integer, the wrap width, only useful when the ``-w`` option was
217 passed to xgettext (optional, default: ``78``).
217 passed to xgettext (optional, default: ``78``).
218
218
219 ``encoding``
219 ``encoding``
220 string, the encoding to use, defaults to ``default_encoding``
220 string, the encoding to use, defaults to ``default_encoding``
221 global variable (optional).
221 global variable (optional).
222
222
223 ``check_for_duplicates``
223 ``check_for_duplicates``
224 whether to check for duplicate entries when adding entries to the
224 whether to check for duplicate entries when adding entries to the
225 file, (optional, default: ``False``).
225 file, (optional, default: ``False``).
226 """
226 """
227 list.__init__(self)
227 list.__init__(self)
228 # the opened file handle
228 # the opened file handle
229 pofile = kwargs.get('pofile', None)
229 pofile = kwargs.get('pofile', None)
230 if pofile and os.path.exists(pofile):
230 if pofile and os.path.exists(pofile):
231 self.fpath = pofile
231 self.fpath = pofile
232 else:
232 else:
233 self.fpath = kwargs.get('fpath')
233 self.fpath = kwargs.get('fpath')
234 # the width at which lines should be wrapped
234 # the width at which lines should be wrapped
235 self.wrapwidth = kwargs.get('wrapwidth', 78)
235 self.wrapwidth = kwargs.get('wrapwidth', 78)
236 # the file encoding
236 # the file encoding
237 self.encoding = kwargs.get('encoding', default_encoding)
237 self.encoding = kwargs.get('encoding', default_encoding)
238 # whether to check for duplicate entries or not
238 # whether to check for duplicate entries or not
239 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
239 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
240 # header
240 # header
241 self.header = ''
241 self.header = ''
242 # both po and mo files have metadata
242 # both po and mo files have metadata
243 self.metadata = {}
243 self.metadata = {}
244 self.metadata_is_fuzzy = 0
244 self.metadata_is_fuzzy = 0
245
245
246 def __unicode__(self):
246 def __unicode__(self):
247 """
247 """
248 Returns the unicode representation of the file.
248 Returns the unicode representation of the file.
249 """
249 """
250 ret = []
250 ret = []
251 entries = [self.metadata_as_entry()] + \
251 entries = [self.metadata_as_entry()] + \
252 [e for e in self if not e.obsolete]
252 [e for e in self if not e.obsolete]
253 for entry in entries:
253 for entry in entries:
254 ret.append(entry.__unicode__(self.wrapwidth))
254 ret.append(entry.__unicode__(self.wrapwidth))
255 for entry in self.obsolete_entries():
255 for entry in self.obsolete_entries():
256 ret.append(entry.__unicode__(self.wrapwidth))
256 ret.append(entry.__unicode__(self.wrapwidth))
257 ret = '\n'.join(ret)
257 ret = '\n'.join(ret)
258
258
259 if type(ret) != types.UnicodeType:
259 if type(ret) != types.UnicodeType:
260 return unicode(ret, self.encoding)
260 return unicode(ret, self.encoding)
261 return ret
261 return ret
262
262
263 def __str__(self):
263 def __str__(self):
264 """
264 """
265 Returns the string representation of the file.
265 Returns the string representation of the file.
266 """
266 """
267 return unicode(self).encode(self.encoding)
267 return unicode(self).encode(self.encoding)
268
268
269 def __contains__(self, entry):
269 def __contains__(self, entry):
270 """
270 """
271 Overriden ``list`` method to implement the membership test (in and
271 Overriden ``list`` method to implement the membership test (in and
272 not in).
272 not in).
273 The method considers that an entry is in the file if it finds an entry
273 The method considers that an entry is in the file if it finds an entry
274 that has the same msgid (the test is **case sensitive**).
274 that has the same msgid (the test is **case sensitive**).
275
275
276 Argument:
276 Argument:
277
277
278 ``entry``
278 ``entry``
279 an instance of :class:`~polib._BaseEntry`.
279 an instance of :class:`~polib._BaseEntry`.
280 """
280 """
281 return self.find(entry.msgid, by='msgid') is not None
281 return self.find(entry.msgid, by='msgid') is not None
282
282
283 def __eq__(self, other):
283 def __eq__(self, other):
284 return unicode(self) == unicode(other)
284 return unicode(self) == unicode(other)
285
285
286 def append(self, entry):
286 def append(self, entry):
287 """
287 """
288 Overriden method to check for duplicates entries, if a user tries to
288 Overriden method to check for duplicates entries, if a user tries to
289 add an entry that is already in the file, the method will raise a
289 add an entry that is already in the file, the method will raise a
290 ``ValueError`` exception.
290 ``ValueError`` exception.
291
291
292 Argument:
292 Argument:
293
293
294 ``entry``
294 ``entry``
295 an instance of :class:`~polib._BaseEntry`.
295 an instance of :class:`~polib._BaseEntry`.
296 """
296 """
297 if self.check_for_duplicates and entry in self:
297 if self.check_for_duplicates and entry in self:
298 raise ValueError('Entry "%s" already exists' % entry.msgid)
298 raise ValueError('Entry "%s" already exists' % entry.msgid)
299 super(_BaseFile, self).append(entry)
299 super(_BaseFile, self).append(entry)
300
300
301 def insert(self, index, entry):
301 def insert(self, index, entry):
302 """
302 """
303 Overriden method to check for duplicates entries, if a user tries to
303 Overriden method to check for duplicates entries, if a user tries to
304 add an entry that is already in the file, the method will raise a
304 add an entry that is already in the file, the method will raise a
305 ``ValueError`` exception.
305 ``ValueError`` exception.
306
306
307 Arguments:
307 Arguments:
308
308
309 ``index``
309 ``index``
310 index at which the entry should be inserted.
310 index at which the entry should be inserted.
311
311
312 ``entry``
312 ``entry``
313 an instance of :class:`~polib._BaseEntry`.
313 an instance of :class:`~polib._BaseEntry`.
314 """
314 """
315 if self.check_for_duplicates and entry in self:
315 if self.check_for_duplicates and entry in self:
316 raise ValueError('Entry "%s" already exists' % entry.msgid)
316 raise ValueError('Entry "%s" already exists' % entry.msgid)
317 super(_BaseFile, self).insert(index, entry)
317 super(_BaseFile, self).insert(index, entry)
318
318
319 def metadata_as_entry(self):
319 def metadata_as_entry(self):
320 """
320 """
321 Returns the file metadata as a :class:`~polib.POFile` instance.
321 Returns the file metadata as a :class:`~polib.POFile` instance.
322 """
322 """
323 e = POEntry(msgid='')
323 e = POEntry(msgid='')
324 mdata = self.ordered_metadata()
324 mdata = self.ordered_metadata()
325 if mdata:
325 if mdata:
326 strs = []
326 strs = []
327 for name, value in mdata:
327 for name, value in mdata:
328 # Strip whitespace off each line in a multi-line entry
328 # Strip whitespace off each line in a multi-line entry
329 strs.append('%s: %s' % (name, value))
329 strs.append('%s: %s' % (name, value))
330 e.msgstr = '\n'.join(strs) + '\n'
330 e.msgstr = '\n'.join(strs) + '\n'
331 if self.metadata_is_fuzzy:
331 if self.metadata_is_fuzzy:
332 e.flags.append('fuzzy')
332 e.flags.append('fuzzy')
333 return e
333 return e
334
334
335 def save(self, fpath=None, repr_method='__str__'):
335 def save(self, fpath=None, repr_method='__str__'):
336 """
336 """
337 Saves the po file to ``fpath``.
337 Saves the po file to ``fpath``.
338 If it is an existing file and no ``fpath`` is provided, then the
338 If it is an existing file and no ``fpath`` is provided, then the
339 existing file is rewritten with the modified data.
339 existing file is rewritten with the modified data.
340
340
341 Keyword arguments:
341 Keyword arguments:
342
342
343 ``fpath``
343 ``fpath``
344 string, full or relative path to the file.
344 string, full or relative path to the file.
345
345
346 ``repr_method``
346 ``repr_method``
347 string, the method to use for output.
347 string, the method to use for output.
348 """
348 """
349 if self.fpath is None and fpath is None:
349 if self.fpath is None and fpath is None:
350 raise IOError('You must provide a file path to save() method')
350 raise IOError('You must provide a file path to save() method')
351 contents = getattr(self, repr_method)()
351 contents = getattr(self, repr_method)()
352 if fpath is None:
352 if fpath is None:
353 fpath = self.fpath
353 fpath = self.fpath
354 if repr_method == 'to_binary':
354 if repr_method == 'to_binary':
355 fhandle = open(fpath, 'wb')
355 fhandle = open(fpath, 'wb')
356 else:
356 else:
357 fhandle = codecs.open(fpath, 'w', self.encoding)
357 fhandle = codecs.open(fpath, 'w', self.encoding)
358 if type(contents) != types.UnicodeType:
358 if type(contents) != types.UnicodeType:
359 contents = contents.decode(self.encoding)
359 contents = contents.decode(self.encoding)
360 fhandle.write(contents)
360 fhandle.write(contents)
361 fhandle.close()
361 fhandle.close()
362 # set the file path if not set
362 # set the file path if not set
363 if self.fpath is None and fpath:
363 if self.fpath is None and fpath:
364 self.fpath = fpath
364 self.fpath = fpath
365
365
366 def find(self, st, by='msgid', include_obsolete_entries=False,
366 def find(self, st, by='msgid', include_obsolete_entries=False,
367 msgctxt=False):
367 msgctxt=False):
368 """
368 """
369 Find the entry which msgid (or property identified by the ``by``
369 Find the entry which msgid (or property identified by the ``by``
370 argument) matches the string ``st``.
370 argument) matches the string ``st``.
371
371
372 Keyword arguments:
372 Keyword arguments:
373
373
374 ``st``
374 ``st``
375 string, the string to search for.
375 string, the string to search for.
376
376
377 ``by``
377 ``by``
378 string, the property to use for comparison (default: ``msgid``).
378 string, the property to use for comparison (default: ``msgid``).
379
379
380 ``include_obsolete_entries``
380 ``include_obsolete_entries``
381 boolean, whether to also search in entries that are obsolete.
381 boolean, whether to also search in entries that are obsolete.
382
382
383 ``msgctxt``
383 ``msgctxt``
384 string, allows to specify a specific message context for the
384 string, allows to specify a specific message context for the
385 search.
385 search.
386 """
386 """
387 if include_obsolete_entries:
387 if include_obsolete_entries:
388 entries = self[:]
388 entries = self[:]
389 else:
389 else:
390 entries = [e for e in self if not e.obsolete]
390 entries = [e for e in self if not e.obsolete]
391 for e in entries:
391 for e in entries:
392 if getattr(e, by) == st:
392 if getattr(e, by) == st:
393 if msgctxt and e.msgctxt != msgctxt:
393 if msgctxt and e.msgctxt != msgctxt:
394 continue
394 continue
395 return e
395 return e
396 return None
396 return None
397
397
398 def ordered_metadata(self):
398 def ordered_metadata(self):
399 """
399 """
400 Convenience method that returns an ordered version of the metadata
400 Convenience method that returns an ordered version of the metadata
401 dictionary. The return value is list of tuples (metadata name,
401 dictionary. The return value is list of tuples (metadata name,
402 metadata_value).
402 metadata_value).
403 """
403 """
404 # copy the dict first
404 # copy the dict first
405 metadata = self.metadata.copy()
405 metadata = self.metadata.copy()
406 data_order = [
406 data_order = [
407 'Project-Id-Version',
407 'Project-Id-Version',
408 'Report-Msgid-Bugs-To',
408 'Report-Msgid-Bugs-To',
409 'POT-Creation-Date',
409 'POT-Creation-Date',
410 'PO-Revision-Date',
410 'PO-Revision-Date',
411 'Last-Translator',
411 'Last-Translator',
412 'Language-Team',
412 'Language-Team',
413 'MIME-Version',
413 'MIME-Version',
414 'Content-Type',
414 'Content-Type',
415 'Content-Transfer-Encoding'
415 'Content-Transfer-Encoding'
416 ]
416 ]
417 ordered_data = []
417 ordered_data = []
418 for data in data_order:
418 for data in data_order:
419 try:
419 try:
420 value = metadata.pop(data)
420 value = metadata.pop(data)
421 ordered_data.append((data, value))
421 ordered_data.append((data, value))
422 except KeyError:
422 except KeyError:
423 pass
423 pass
424 # the rest of the metadata will be alphabetically ordered since there
424 # the rest of the metadata will be alphabetically ordered since there
425 # are no specs for this AFAIK
425 # are no specs for this AFAIK
426 keys = metadata.keys()
426 keys = metadata.keys()
427 keys.sort()
427 keys.sort()
428 for data in keys:
428 for data in keys:
429 value = metadata[data]
429 value = metadata[data]
430 ordered_data.append((data, value))
430 ordered_data.append((data, value))
431 return ordered_data
431 return ordered_data
432
432
433 def to_binary(self):
433 def to_binary(self):
434 """
434 """
435 Return the binary representation of the file.
435 Return the binary representation of the file.
436 """
436 """
437 offsets = []
437 offsets = []
438 entries = self.translated_entries()
438 entries = self.translated_entries()
439 # the keys are sorted in the .mo file
439 # the keys are sorted in the .mo file
440 def cmp(_self, other):
440 def cmp(_self, other):
441 # msgfmt compares entries with msgctxt if it exists
441 # msgfmt compares entries with msgctxt if it exists
442 if _self.msgctxt:
442 if _self.msgctxt:
443 self_msgid = _self.msgctxt
443 self_msgid = _self.msgctxt
444 else:
444 else:
445 self_msgid = _self.msgid
445 self_msgid = _self.msgid
446
446
447 if other.msgctxt:
447 if other.msgctxt:
448 other_msgid = other.msgctxt
448 other_msgid = other.msgctxt
449 else:
449 else:
450 other_msgid = other.msgid
450 other_msgid = other.msgid
451 if self_msgid > other_msgid:
451 if self_msgid > other_msgid:
452 return 1
452 return 1
453 elif self_msgid < other_msgid:
453 elif self_msgid < other_msgid:
454 return -1
454 return -1
455 else:
455 else:
456 return 0
456 return 0
457 # add metadata entry
457 # add metadata entry
458 entries.sort(cmp)
458 entries.sort(cmp)
459 mentry = self.metadata_as_entry()
459 mentry = self.metadata_as_entry()
460 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
460 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
461 entries = [mentry] + entries
461 entries = [mentry] + entries
462 entries_len = len(entries)
462 entries_len = len(entries)
463 ids, strs = '', ''
463 ids, strs = '', ''
464 for e in entries:
464 for e in entries:
465 # For each string, we need size and file offset. Each string is
465 # For each string, we need size and file offset. Each string is
466 # NUL terminated; the NUL does not count into the size.
466 # NUL terminated; the NUL does not count into the size.
467 msgid = ''
467 msgid = ''
468 if e.msgctxt:
468 if e.msgctxt:
469 # Contexts are stored by storing the concatenation of the
469 # Contexts are stored by storing the concatenation of the
470 # context, a <EOT> byte, and the original string
470 # context, a <EOT> byte, and the original string
471 msgid = self._encode(e.msgctxt + '\4')
471 msgid = self._encode(e.msgctxt + '\4')
472 if e.msgid_plural:
472 if e.msgid_plural:
473 indexes = e.msgstr_plural.keys()
473 indexes = e.msgstr_plural.keys()
474 indexes.sort()
474 indexes.sort()
475 msgstr = []
475 msgstr = []
476 for index in indexes:
476 for index in indexes:
477 msgstr.append(e.msgstr_plural[index])
477 msgstr.append(e.msgstr_plural[index])
478 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
478 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
479 msgstr = self._encode('\0'.join(msgstr))
479 msgstr = self._encode('\0'.join(msgstr))
480 else:
480 else:
481 msgid += self._encode(e.msgid)
481 msgid += self._encode(e.msgid)
482 msgstr = self._encode(e.msgstr)
482 msgstr = self._encode(e.msgstr)
483 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
483 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
484 ids += msgid + '\0'
484 ids += msgid + '\0'
485 strs += msgstr + '\0'
485 strs += msgstr + '\0'
486
486
487 # The header is 7 32-bit unsigned integers.
487 # The header is 7 32-bit unsigned integers.
488 keystart = 7*4+16*entries_len
488 keystart = 7*4+16*entries_len
489 # and the values start after the keys
489 # and the values start after the keys
490 valuestart = keystart + len(ids)
490 valuestart = keystart + len(ids)
491 koffsets = []
491 koffsets = []
492 voffsets = []
492 voffsets = []
493 # The string table first has the list of keys, then the list of values.
493 # The string table first has the list of keys, then the list of values.
494 # Each entry has first the size of the string, then the file offset.
494 # Each entry has first the size of the string, then the file offset.
495 for o1, l1, o2, l2 in offsets:
495 for o1, l1, o2, l2 in offsets:
496 koffsets += [l1, o1+keystart]
496 koffsets += [l1, o1+keystart]
497 voffsets += [l2, o2+valuestart]
497 voffsets += [l2, o2+valuestart]
498 offsets = koffsets + voffsets
498 offsets = koffsets + voffsets
499 # check endianness for magic number
499 # check endianness for magic number
500 if struct.pack('@h', 1) == struct.pack('<h', 1):
500 if struct.pack('@h', 1) == struct.pack('<h', 1):
501 magic_number = MOFile.LITTLE_ENDIAN
501 magic_number = MOFile.LITTLE_ENDIAN
502 else:
502 else:
503 magic_number = MOFile.BIG_ENDIAN
503 magic_number = MOFile.BIG_ENDIAN
504
504
505 output = struct.pack(
505 output = struct.pack(
506 "Iiiiiii",
506 "Iiiiiii",
507 magic_number, # Magic number
507 magic_number, # Magic number
508 0, # Version
508 0, # Version
509 entries_len, # # of entries
509 entries_len, # # of entries
510 7*4, # start of key index
510 7*4, # start of key index
511 7*4+entries_len*8, # start of value index
511 7*4+entries_len*8, # start of value index
512 0, keystart # size and offset of hash table
512 0, keystart # size and offset of hash table
513 # Important: we don't use hash tables
513 # Important: we don't use hash tables
514 )
514 )
515 output += array.array("i", offsets).tostring()
515 output += array.array("i", offsets).tostring()
516 output += ids
516 output += ids
517 output += strs
517 output += strs
518 return output
518 return output
519
519
520 def _encode(self, mixed):
520 def _encode(self, mixed):
521 """
521 """
522 Encodes the given ``mixed`` argument with the file encoding if and
522 Encodes the given ``mixed`` argument with the file encoding if and
523 only if it's an unicode string and returns the encoded string.
523 only if it's an unicode string and returns the encoded string.
524 """
524 """
525 if type(mixed) == types.UnicodeType:
525 if type(mixed) == types.UnicodeType:
526 return mixed.encode(self.encoding)
526 return mixed.encode(self.encoding)
527 return mixed
527 return mixed
528
528
529 # }}}
529 # }}}
530 # class POFile {{{
530 # class POFile {{{
531
531
532 class POFile(_BaseFile):
532 class POFile(_BaseFile):
533 """
533 """
534 Po (or Pot) file reader/writer.
534 Po (or Pot) file reader/writer.
535 This class inherits the :class:`~polib._BaseFile` class and, by extension,
535 This class inherits the :class:`~polib._BaseFile` class and, by extension,
536 the python ``list`` type.
536 the python ``list`` type.
537 """
537 """
538
538
539 def __unicode__(self):
539 def __unicode__(self):
540 """
540 """
541 Returns the unicode representation of the po file.
541 Returns the unicode representation of the po file.
542 """
542 """
543 ret, headers = '', self.header.split('\n')
543 ret, headers = '', self.header.split('\n')
544 for header in headers:
544 for header in headers:
545 if header[:1] in [',', ':']:
545 if header[:1] in [',', ':']:
546 ret += '#%s\n' % header
546 ret += '#%s\n' % header
547 else:
547 else:
548 ret += '# %s\n' % header
548 ret += '# %s\n' % header
549
549
550 if type(ret) != types.UnicodeType:
550 if type(ret) != types.UnicodeType:
551 ret = unicode(ret, self.encoding)
551 ret = unicode(ret, self.encoding)
552
552
553 return ret + _BaseFile.__unicode__(self)
553 return ret + _BaseFile.__unicode__(self)
554
554
555 def save_as_mofile(self, fpath):
555 def save_as_mofile(self, fpath):
556 """
556 """
557 Saves the binary representation of the file to given ``fpath``.
557 Saves the binary representation of the file to given ``fpath``.
558
558
559 Keyword argument:
559 Keyword argument:
560
560
561 ``fpath``
561 ``fpath``
562 string, full or relative path to the mo file.
562 string, full or relative path to the mo file.
563 """
563 """
564 _BaseFile.save(self, fpath, 'to_binary')
564 _BaseFile.save(self, fpath, 'to_binary')
565
565
566 def percent_translated(self):
566 def percent_translated(self):
567 """
567 """
568 Convenience method that returns the percentage of translated
568 Convenience method that returns the percentage of translated
569 messages.
569 messages.
570 """
570 """
571 total = len([e for e in self if not e.obsolete])
571 total = len([e for e in self if not e.obsolete])
572 if total == 0:
572 if total == 0:
573 return 100
573 return 100
574 translated = len(self.translated_entries())
574 translated = len(self.translated_entries())
575 return int((100.00 / float(total)) * translated)
575 return int((100.00 / float(total)) * translated)
576
576
577 def translated_entries(self):
577 def translated_entries(self):
578 """
578 """
579 Convenience method that returns the list of translated entries.
579 Convenience method that returns the list of translated entries.
580 """
580 """
581 return [e for e in self if e.translated()]
581 return [e for e in self if e.translated()]
582
582
583 def untranslated_entries(self):
583 def untranslated_entries(self):
584 """
584 """
585 Convenience method that returns the list of untranslated entries.
585 Convenience method that returns the list of untranslated entries.
586 """
586 """
587 return [e for e in self if not e.translated() and not e.obsolete \
587 return [e for e in self if not e.translated() and not e.obsolete \
588 and not 'fuzzy' in e.flags]
588 and not 'fuzzy' in e.flags]
589
589
590 def fuzzy_entries(self):
590 def fuzzy_entries(self):
591 """
591 """
592 Convenience method that returns the list of fuzzy entries.
592 Convenience method that returns the list of fuzzy entries.
593 """
593 """
594 return [e for e in self if 'fuzzy' in e.flags]
594 return [e for e in self if 'fuzzy' in e.flags]
595
595
596 def obsolete_entries(self):
596 def obsolete_entries(self):
597 """
597 """
598 Convenience method that returns the list of obsolete entries.
598 Convenience method that returns the list of obsolete entries.
599 """
599 """
600 return [e for e in self if e.obsolete]
600 return [e for e in self if e.obsolete]
601
601
602 def merge(self, refpot):
602 def merge(self, refpot):
603 """
603 """
604 Convenience method that merges the current pofile with the pot file
604 Convenience method that merges the current pofile with the pot file
605 provided. It behaves exactly as the gettext msgmerge utility:
605 provided. It behaves exactly as the gettext msgmerge utility:
606
606
607 * comments of this file will be preserved, but extracted comments and
607 * comments of this file will be preserved, but extracted comments and
608 occurrences will be discarded;
608 occurrences will be discarded;
609 * any translations or comments in the file will be discarded, however,
609 * any translations or comments in the file will be discarded, however,
610 dot comments and file positions will be preserved;
610 dot comments and file positions will be preserved;
611 * the fuzzy flags are preserved.
611 * the fuzzy flags are preserved.
612
612
613 Keyword argument:
613 Keyword argument:
614
614
615 ``refpot``
615 ``refpot``
616 object POFile, the reference catalog.
616 object POFile, the reference catalog.
617 """
617 """
618 for entry in refpot:
618 for entry in refpot:
619 e = self.find(entry.msgid, include_obsolete_entries=True)
619 e = self.find(entry.msgid, include_obsolete_entries=True)
620 if e is None:
620 if e is None:
621 e = POEntry()
621 e = POEntry()
622 self.append(e)
622 self.append(e)
623 e.merge(entry)
623 e.merge(entry)
624 # ok, now we must "obsolete" entries that are not in the refpot anymore
624 # ok, now we must "obsolete" entries that are not in the refpot anymore
625 for entry in self:
625 for entry in self:
626 if refpot.find(entry.msgid) is None:
626 if refpot.find(entry.msgid) is None:
627 entry.obsolete = True
627 entry.obsolete = True
628
628
629 # }}}
629 # }}}
630 # class MOFile {{{
630 # class MOFile {{{
631
631
632 class MOFile(_BaseFile):
632 class MOFile(_BaseFile):
633 """
633 """
634 Mo file reader/writer.
634 Mo file reader/writer.
635 This class inherits the :class:`~polib._BaseFile` class and, by
635 This class inherits the :class:`~polib._BaseFile` class and, by
636 extension, the python ``list`` type.
636 extension, the python ``list`` type.
637 """
637 """
638 BIG_ENDIAN = 0xde120495
638 BIG_ENDIAN = 0xde120495
639 LITTLE_ENDIAN = 0x950412de
639 LITTLE_ENDIAN = 0x950412de
640
640
641 def __init__(self, *args, **kwargs):
641 def __init__(self, *args, **kwargs):
642 """
642 """
643 Constructor, accepts all keywords arguments accepted by
643 Constructor, accepts all keywords arguments accepted by
644 :class:`~polib._BaseFile` class.
644 :class:`~polib._BaseFile` class.
645 """
645 """
646 _BaseFile.__init__(self, *args, **kwargs)
646 _BaseFile.__init__(self, *args, **kwargs)
647 self.magic_number = None
647 self.magic_number = None
648 self.version = 0
648 self.version = 0
649
649
650 def save_as_pofile(self, fpath):
650 def save_as_pofile(self, fpath):
651 """
651 """
652 Saves the mofile as a pofile to ``fpath``.
652 Saves the mofile as a pofile to ``fpath``.
653
653
654 Keyword argument:
654 Keyword argument:
655
655
656 ``fpath``
656 ``fpath``
657 string, full or relative path to the file.
657 string, full or relative path to the file.
658 """
658 """
659 _BaseFile.save(self, fpath)
659 _BaseFile.save(self, fpath)
660
660
661 def save(self, fpath=None):
661 def save(self, fpath=None):
662 """
662 """
663 Saves the mofile to ``fpath``.
663 Saves the mofile to ``fpath``.
664
664
665 Keyword argument:
665 Keyword argument:
666
666
667 ``fpath``
667 ``fpath``
668 string, full or relative path to the file.
668 string, full or relative path to the file.
669 """
669 """
670 _BaseFile.save(self, fpath, 'to_binary')
670 _BaseFile.save(self, fpath, 'to_binary')
671
671
672 def percent_translated(self):
672 def percent_translated(self):
673 """
673 """
674 Convenience method to keep the same interface with POFile instances.
674 Convenience method to keep the same interface with POFile instances.
675 """
675 """
676 return 100
676 return 100
677
677
678 def translated_entries(self):
678 def translated_entries(self):
679 """
679 """
680 Convenience method to keep the same interface with POFile instances.
680 Convenience method to keep the same interface with POFile instances.
681 """
681 """
682 return self
682 return self
683
683
684 def untranslated_entries(self):
684 def untranslated_entries(self):
685 """
685 """
686 Convenience method to keep the same interface with POFile instances.
686 Convenience method to keep the same interface with POFile instances.
687 """
687 """
688 return []
688 return []
689
689
690 def fuzzy_entries(self):
690 def fuzzy_entries(self):
691 """
691 """
692 Convenience method to keep the same interface with POFile instances.
692 Convenience method to keep the same interface with POFile instances.
693 """
693 """
694 return []
694 return []
695
695
696 def obsolete_entries(self):
696 def obsolete_entries(self):
697 """
697 """
698 Convenience method to keep the same interface with POFile instances.
698 Convenience method to keep the same interface with POFile instances.
699 """
699 """
700 return []
700 return []
701
701
702 # }}}
702 # }}}
703 # class _BaseEntry {{{
703 # class _BaseEntry {{{
704
704
705 class _BaseEntry(object):
705 class _BaseEntry(object):
706 """
706 """
707 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
707 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
708 This class should **not** be instanciated directly.
708 This class should **not** be instanciated directly.
709 """
709 """
710
710
711 def __init__(self, *args, **kwargs):
711 def __init__(self, *args, **kwargs):
712 """
712 """
713 Constructor, accepts the following keyword arguments:
713 Constructor, accepts the following keyword arguments:
714
714
715 ``msgid``
715 ``msgid``
716 string, the entry msgid.
716 string, the entry msgid.
717
717
718 ``msgstr``
718 ``msgstr``
719 string, the entry msgstr.
719 string, the entry msgstr.
720
720
721 ``msgid_plural``
721 ``msgid_plural``
722 string, the entry msgid_plural.
722 string, the entry msgid_plural.
723
723
724 ``msgstr_plural``
724 ``msgstr_plural``
725 list, the entry msgstr_plural lines.
725 list, the entry msgstr_plural lines.
726
726
727 ``msgctxt``
727 ``msgctxt``
728 string, the entry context (msgctxt).
728 string, the entry context (msgctxt).
729
729
730 ``obsolete``
730 ``obsolete``
731 bool, whether the entry is "obsolete" or not.
731 bool, whether the entry is "obsolete" or not.
732
732
733 ``encoding``
733 ``encoding``
734 string, the encoding to use, defaults to ``default_encoding``
734 string, the encoding to use, defaults to ``default_encoding``
735 global variable (optional).
735 global variable (optional).
736 """
736 """
737 self.msgid = kwargs.get('msgid', '')
737 self.msgid = kwargs.get('msgid', '')
738 self.msgstr = kwargs.get('msgstr', '')
738 self.msgstr = kwargs.get('msgstr', '')
739 self.msgid_plural = kwargs.get('msgid_plural', '')
739 self.msgid_plural = kwargs.get('msgid_plural', '')
740 self.msgstr_plural = kwargs.get('msgstr_plural', {})
740 self.msgstr_plural = kwargs.get('msgstr_plural', {})
741 self.msgctxt = kwargs.get('msgctxt', None)
741 self.msgctxt = kwargs.get('msgctxt', None)
742 self.obsolete = kwargs.get('obsolete', False)
742 self.obsolete = kwargs.get('obsolete', False)
743 self.encoding = kwargs.get('encoding', default_encoding)
743 self.encoding = kwargs.get('encoding', default_encoding)
744
744
745 def __unicode__(self, wrapwidth=78):
745 def __unicode__(self, wrapwidth=78):
746 """
746 """
747 Returns the unicode representation of the entry.
747 Returns the unicode representation of the entry.
748 """
748 """
749 if self.obsolete:
749 if self.obsolete:
750 delflag = '#~ '
750 delflag = '#~ '
751 else:
751 else:
752 delflag = ''
752 delflag = ''
753 ret = []
753 ret = []
754 # write the msgctxt if any
754 # write the msgctxt if any
755 if self.msgctxt is not None:
755 if self.msgctxt is not None:
756 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
756 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
757 # write the msgid
757 # write the msgid
758 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
758 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
759 # write the msgid_plural if any
759 # write the msgid_plural if any
760 if self.msgid_plural:
760 if self.msgid_plural:
761 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
761 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
762 if self.msgstr_plural:
762 if self.msgstr_plural:
763 # write the msgstr_plural if any
763 # write the msgstr_plural if any
764 msgstrs = self.msgstr_plural
764 msgstrs = self.msgstr_plural
765 keys = list(msgstrs)
765 keys = list(msgstrs)
766 keys.sort()
766 keys.sort()
767 for index in keys:
767 for index in keys:
768 msgstr = msgstrs[index]
768 msgstr = msgstrs[index]
769 plural_index = '[%s]' % index
769 plural_index = '[%s]' % index
770 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
770 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
771 else:
771 else:
772 # otherwise write the msgstr
772 # otherwise write the msgstr
773 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
773 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
774 ret.append('')
774 ret.append('')
775 ret = '\n'.join(ret)
775 ret = '\n'.join(ret)
776
776
777 if type(ret) != types.UnicodeType:
777 if type(ret) != types.UnicodeType:
778 return unicode(ret, self.encoding)
778 return unicode(ret, self.encoding)
779 return ret
779 return ret
780
780
781 def __str__(self):
781 def __str__(self):
782 """
782 """
783 Returns the string representation of the entry.
783 Returns the string representation of the entry.
784 """
784 """
785 return unicode(self).encode(self.encoding)
785 return unicode(self).encode(self.encoding)
786
786
787 def __eq__(self, other):
787 def __eq__(self, other):
788 return unicode(self) == unicode(other)
788 return unicode(self) == unicode(other)
789
789
790 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
790 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
791 lines = field.splitlines(True)
791 lines = field.splitlines(True)
792 if len(lines) > 1:
792 if len(lines) > 1:
793 lines = [''] + lines # start with initial empty line
793 lines = [''] + lines # start with initial empty line
794 else:
794 else:
795 escaped_field = escape(field)
795 escaped_field = escape(field)
796 specialchars_count = 0
796 specialchars_count = 0
797 for c in ['\\', '\n', '\r', '\t', '"']:
797 for c in ['\\', '\n', '\r', '\t', '"']:
798 specialchars_count += field.count(c)
798 specialchars_count += field.count(c)
799 # comparison must take into account fieldname length + one space
799 # comparison must take into account fieldname length + one space
800 # + 2 quotes (eg. msgid "<string>")
800 # + 2 quotes (eg. msgid "<string>")
801 flength = len(fieldname) + 3
801 flength = len(fieldname) + 3
802 if plural_index:
802 if plural_index:
803 flength += len(plural_index)
803 flength += len(plural_index)
804 real_wrapwidth = wrapwidth - flength + specialchars_count
804 real_wrapwidth = wrapwidth - flength + specialchars_count
805 if wrapwidth > 0 and len(field) > real_wrapwidth:
805 if wrapwidth > 0 and len(field) > real_wrapwidth:
806 # Wrap the line but take field name into account
806 # Wrap the line but take field name into account
807 lines = [''] + [unescape(item) for item in wrap(
807 lines = [''] + [unescape(item) for item in textwrap.wrap(
808 escaped_field,
808 escaped_field,
809 wrapwidth - 2, # 2 for quotes ""
809 wrapwidth - 2, # 2 for quotes ""
810 drop_whitespace=False,
810 drop_whitespace=False,
811 break_long_words=False
811 break_long_words=False
812 )]
812 )]
813 else:
813 else:
814 lines = [field]
814 lines = [field]
815 if fieldname.startswith('previous_'):
815 if fieldname.startswith('previous_'):
816 # quick and dirty trick to get the real field name
816 # quick and dirty trick to get the real field name
817 fieldname = fieldname[9:]
817 fieldname = fieldname[9:]
818
818
819 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
819 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
820 escape(lines.pop(0)))]
820 escape(lines.pop(0)))]
821 for mstr in lines:
821 for mstr in lines:
822 ret.append('%s"%s"' % (delflag, escape(mstr)))
822 ret.append('%s"%s"' % (delflag, escape(mstr)))
823 return ret
823 return ret
824
824
825 # }}}
825 # }}}
826 # class POEntry {{{
826 # class POEntry {{{
827
827
828 class POEntry(_BaseEntry):
828 class POEntry(_BaseEntry):
829 """
829 """
830 Represents a po file entry.
830 Represents a po file entry.
831 """
831 """
832
832
833 def __init__(self, *args, **kwargs):
833 def __init__(self, *args, **kwargs):
834 """
834 """
835 Constructor, accepts the following keyword arguments:
835 Constructor, accepts the following keyword arguments:
836
836
837 ``comment``
837 ``comment``
838 string, the entry comment.
838 string, the entry comment.
839
839
840 ``tcomment``
840 ``tcomment``
841 string, the entry translator comment.
841 string, the entry translator comment.
842
842
843 ``occurrences``
843 ``occurrences``
844 list, the entry occurrences.
844 list, the entry occurrences.
845
845
846 ``flags``
846 ``flags``
847 list, the entry flags.
847 list, the entry flags.
848
848
849 ``previous_msgctxt``
849 ``previous_msgctxt``
850 string, the entry previous context.
850 string, the entry previous context.
851
851
852 ``previous_msgid``
852 ``previous_msgid``
853 string, the entry previous msgid.
853 string, the entry previous msgid.
854
854
855 ``previous_msgid_plural``
855 ``previous_msgid_plural``
856 string, the entry previous msgid_plural.
856 string, the entry previous msgid_plural.
857 """
857 """
858 _BaseEntry.__init__(self, *args, **kwargs)
858 _BaseEntry.__init__(self, *args, **kwargs)
859 self.comment = kwargs.get('comment', '')
859 self.comment = kwargs.get('comment', '')
860 self.tcomment = kwargs.get('tcomment', '')
860 self.tcomment = kwargs.get('tcomment', '')
861 self.occurrences = kwargs.get('occurrences', [])
861 self.occurrences = kwargs.get('occurrences', [])
862 self.flags = kwargs.get('flags', [])
862 self.flags = kwargs.get('flags', [])
863 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
863 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
864 self.previous_msgid = kwargs.get('previous_msgid', None)
864 self.previous_msgid = kwargs.get('previous_msgid', None)
865 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
865 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
866
866
867 def __unicode__(self, wrapwidth=78):
867 def __unicode__(self, wrapwidth=78):
868 """
868 """
869 Returns the unicode representation of the entry.
869 Returns the unicode representation of the entry.
870 """
870 """
871 if self.obsolete:
871 if self.obsolete:
872 return _BaseEntry.__unicode__(self, wrapwidth)
872 return _BaseEntry.__unicode__(self, wrapwidth)
873
873
874 ret = []
874 ret = []
875 # comments first, if any (with text wrapping as xgettext does)
875 # comments first, if any (with text wrapping as xgettext does)
876 comments = [('comment', '#. '), ('tcomment', '# ')]
876 comments = [('comment', '#. '), ('tcomment', '# ')]
877 for c in comments:
877 for c in comments:
878 val = getattr(self, c[0])
878 val = getattr(self, c[0])
879 if val:
879 if val:
880 for comment in val.split('\n'):
880 for comment in val.split('\n'):
881 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
881 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
882 ret += wrap(
882 ret += textwrap.wrap(
883 comment,
883 comment,
884 wrapwidth,
884 wrapwidth,
885 initial_indent=c[1],
885 initial_indent=c[1],
886 subsequent_indent=c[1],
886 subsequent_indent=c[1],
887 break_long_words=False
887 break_long_words=False
888 )
888 )
889 else:
889 else:
890 ret.append('%s%s' % (c[1], comment))
890 ret.append('%s%s' % (c[1], comment))
891
891
892 # occurrences (with text wrapping as xgettext does)
892 # occurrences (with text wrapping as xgettext does)
893 if self.occurrences:
893 if self.occurrences:
894 filelist = []
894 filelist = []
895 for fpath, lineno in self.occurrences:
895 for fpath, lineno in self.occurrences:
896 if lineno:
896 if lineno:
897 filelist.append('%s:%s' % (fpath, lineno))
897 filelist.append('%s:%s' % (fpath, lineno))
898 else:
898 else:
899 filelist.append(fpath)
899 filelist.append(fpath)
900 filestr = ' '.join(filelist)
900 filestr = ' '.join(filelist)
901 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
901 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
902 # textwrap split words that contain hyphen, this is not
902 # textwrap split words that contain hyphen, this is not
903 # what we want for filenames, so the dirty hack is to
903 # what we want for filenames, so the dirty hack is to
904 # temporally replace hyphens with a char that a file cannot
904 # temporally replace hyphens with a char that a file cannot
905 # contain, like "*"
905 # contain, like "*"
906 ret += [l.replace('*', '-') for l in wrap(
906 ret += [l.replace('*', '-') for l in textwrap.wrap(
907 filestr.replace('-', '*'),
907 filestr.replace('-', '*'),
908 wrapwidth,
908 wrapwidth,
909 initial_indent='#: ',
909 initial_indent='#: ',
910 subsequent_indent='#: ',
910 subsequent_indent='#: ',
911 break_long_words=False
911 break_long_words=False
912 )]
912 )]
913 else:
913 else:
914 ret.append('#: ' + filestr)
914 ret.append('#: ' + filestr)
915
915
916 # flags (TODO: wrapping ?)
916 # flags (TODO: wrapping ?)
917 if self.flags:
917 if self.flags:
918 ret.append('#, %s' % ', '.join(self.flags))
918 ret.append('#, %s' % ', '.join(self.flags))
919
919
920 # previous context and previous msgid/msgid_plural
920 # previous context and previous msgid/msgid_plural
921 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
921 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
922 for f in fields:
922 for f in fields:
923 val = getattr(self, f)
923 val = getattr(self, f)
924 if val:
924 if val:
925 ret += self._str_field(f, "#| ", "", val, wrapwidth)
925 ret += self._str_field(f, "#| ", "", val, wrapwidth)
926
926
927 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
927 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
928 ret = '\n'.join(ret)
928 ret = '\n'.join(ret)
929
929
930 if type(ret) != types.UnicodeType:
930 if type(ret) != types.UnicodeType:
931 return unicode(ret, self.encoding)
931 return unicode(ret, self.encoding)
932 return ret
932 return ret
933
933
934 def __cmp__(self, other):
934 def __cmp__(self, other):
935 """
935 """
936 Called by comparison operations if rich comparison is not defined.
936 Called by comparison operations if rich comparison is not defined.
937 """
937 """
938 def compare_occurrences(a, b):
938 def compare_occurrences(a, b):
939 """
939 """
940 Compare an entry occurrence with another one.
940 Compare an entry occurrence with another one.
941 """
941 """
942 if a[0] != b[0]:
942 if a[0] != b[0]:
943 return a[0] < b[0]
943 return a[0] < b[0]
944 if a[1] != b[1]:
944 if a[1] != b[1]:
945 return a[1] < b[1]
945 return a[1] < b[1]
946 return 0
946 return 0
947
947
948 # First: Obsolete test
948 # First: Obsolete test
949 if self.obsolete != other.obsolete:
949 if self.obsolete != other.obsolete:
950 if self.obsolete:
950 if self.obsolete:
951 return -1
951 return -1
952 else:
952 else:
953 return 1
953 return 1
954 # Work on a copy to protect original
954 # Work on a copy to protect original
955 occ1 = self.occurrences[:]
955 occ1 = self.occurrences[:]
956 occ2 = other.occurrences[:]
956 occ2 = other.occurrences[:]
957 # Sorting using compare method
957 # Sorting using compare method
958 occ1.sort(compare_occurrences)
958 occ1.sort(compare_occurrences)
959 occ2.sort(compare_occurrences)
959 occ2.sort(compare_occurrences)
960 # Comparing sorted occurrences
960 # Comparing sorted occurrences
961 pos = 0
961 pos = 0
962 for entry1 in occ1:
962 for entry1 in occ1:
963 try:
963 try:
964 entry2 = occ2[pos]
964 entry2 = occ2[pos]
965 except IndexError:
965 except IndexError:
966 return 1
966 return 1
967 pos = pos + 1
967 pos = pos + 1
968 if entry1[0] != entry2[0]:
968 if entry1[0] != entry2[0]:
969 if entry1[0] > entry2[0]:
969 if entry1[0] > entry2[0]:
970 return 1
970 return 1
971 else:
971 else:
972 return -1
972 return -1
973 if entry1[1] != entry2[1]:
973 if entry1[1] != entry2[1]:
974 if entry1[1] > entry2[1]:
974 if entry1[1] > entry2[1]:
975 return 1
975 return 1
976 else:
976 else:
977 return -1
977 return -1
978 # Finally: Compare message ID
978 # Finally: Compare message ID
979 if self.msgid > other.msgid: return 1
979 if self.msgid > other.msgid: return 1
980 else: return -1
980 else: return -1
981
981
982 def translated(self):
982 def translated(self):
983 """
983 """
984 Returns ``True`` if the entry has been translated or ``False``
984 Returns ``True`` if the entry has been translated or ``False``
985 otherwise.
985 otherwise.
986 """
986 """
987 if self.obsolete or 'fuzzy' in self.flags:
987 if self.obsolete or 'fuzzy' in self.flags:
988 return False
988 return False
989 if self.msgstr != '':
989 if self.msgstr != '':
990 return True
990 return True
991 if self.msgstr_plural:
991 if self.msgstr_plural:
992 for pos in self.msgstr_plural:
992 for pos in self.msgstr_plural:
993 if self.msgstr_plural[pos] == '':
993 if self.msgstr_plural[pos] == '':
994 return False
994 return False
995 return True
995 return True
996 return False
996 return False
997
997
998 def merge(self, other):
998 def merge(self, other):
999 """
999 """
1000 Merge the current entry with the given pot entry.
1000 Merge the current entry with the given pot entry.
1001 """
1001 """
1002 self.msgid = other.msgid
1002 self.msgid = other.msgid
1003 self.msgctxt = other.msgctxt
1003 self.msgctxt = other.msgctxt
1004 self.occurrences = other.occurrences
1004 self.occurrences = other.occurrences
1005 self.comment = other.comment
1005 self.comment = other.comment
1006 fuzzy = 'fuzzy' in self.flags
1006 fuzzy = 'fuzzy' in self.flags
1007 self.flags = other.flags[:] # clone flags
1007 self.flags = other.flags[:] # clone flags
1008 if fuzzy:
1008 if fuzzy:
1009 self.flags.append('fuzzy')
1009 self.flags.append('fuzzy')
1010 self.msgid_plural = other.msgid_plural
1010 self.msgid_plural = other.msgid_plural
1011 self.obsolete = other.obsolete
1011 self.obsolete = other.obsolete
1012 self.previous_msgctxt = other.previous_msgctxt
1012 self.previous_msgctxt = other.previous_msgctxt
1013 self.previous_msgid = other.previous_msgid
1013 self.previous_msgid = other.previous_msgid
1014 self.previous_msgid_plural = other.previous_msgid_plural
1014 self.previous_msgid_plural = other.previous_msgid_plural
1015 if other.msgstr_plural:
1015 if other.msgstr_plural:
1016 for pos in other.msgstr_plural:
1016 for pos in other.msgstr_plural:
1017 try:
1017 try:
1018 # keep existing translation at pos if any
1018 # keep existing translation at pos if any
1019 self.msgstr_plural[pos]
1019 self.msgstr_plural[pos]
1020 except KeyError:
1020 except KeyError:
1021 self.msgstr_plural[pos] = ''
1021 self.msgstr_plural[pos] = ''
1022
1022
1023 # }}}
1023 # }}}
1024 # class MOEntry {{{
1024 # class MOEntry {{{
1025
1025
1026 class MOEntry(_BaseEntry):
1026 class MOEntry(_BaseEntry):
1027 """
1027 """
1028 Represents a mo file entry.
1028 Represents a mo file entry.
1029 """
1029 """
1030 pass
1030 pass
1031
1031
1032 # }}}
1032 # }}}
1033 # class _POFileParser {{{
1033 # class _POFileParser {{{
1034
1034
1035 class _POFileParser(object):
1035 class _POFileParser(object):
1036 """
1036 """
1037 A finite state machine to parse efficiently and correctly po
1037 A finite state machine to parse efficiently and correctly po
1038 file format.
1038 file format.
1039 """
1039 """
1040
1040
1041 def __init__(self, pofile, *args, **kwargs):
1041 def __init__(self, pofile, *args, **kwargs):
1042 """
1042 """
1043 Constructor.
1043 Constructor.
1044
1044
1045 Keyword arguments:
1045 Keyword arguments:
1046
1046
1047 ``pofile``
1047 ``pofile``
1048 string, path to the po file or its content
1048 string, path to the po file or its content
1049
1049
1050 ``encoding``
1050 ``encoding``
1051 string, the encoding to use, defaults to ``default_encoding``
1051 string, the encoding to use, defaults to ``default_encoding``
1052 global variable (optional).
1052 global variable (optional).
1053
1053
1054 ``check_for_duplicates``
1054 ``check_for_duplicates``
1055 whether to check for duplicate entries when adding entries to the
1055 whether to check for duplicate entries when adding entries to the
1056 file (optional, default: ``False``).
1056 file (optional, default: ``False``).
1057 """
1057 """
1058 enc = kwargs.get('encoding', default_encoding)
1058 enc = kwargs.get('encoding', default_encoding)
1059 if os.path.exists(pofile):
1059 if os.path.exists(pofile):
1060 try:
1060 try:
1061 self.fhandle = codecs.open(pofile, 'rU', enc)
1061 self.fhandle = codecs.open(pofile, 'rU', enc)
1062 except LookupError:
1062 except LookupError:
1063 enc = default_encoding
1063 enc = default_encoding
1064 self.fhandle = codecs.open(pofile, 'rU', enc)
1064 self.fhandle = codecs.open(pofile, 'rU', enc)
1065 else:
1065 else:
1066 self.fhandle = pofile.splitlines()
1066 self.fhandle = pofile.splitlines()
1067
1067
1068 self.instance = POFile(
1068 self.instance = POFile(
1069 pofile=pofile,
1069 pofile=pofile,
1070 encoding=enc,
1070 encoding=enc,
1071 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1071 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1072 )
1072 )
1073 self.transitions = {}
1073 self.transitions = {}
1074 self.current_entry = POEntry()
1074 self.current_entry = POEntry()
1075 self.current_state = 'ST'
1075 self.current_state = 'ST'
1076 self.current_token = None
1076 self.current_token = None
1077 # two memo flags used in handlers
1077 # two memo flags used in handlers
1078 self.msgstr_index = 0
1078 self.msgstr_index = 0
1079 self.entry_obsolete = 0
1079 self.entry_obsolete = 0
1080 # Configure the state machine, by adding transitions.
1080 # Configure the state machine, by adding transitions.
1081 # Signification of symbols:
1081 # Signification of symbols:
1082 # * ST: Beginning of the file (start)
1082 # * ST: Beginning of the file (start)
1083 # * HE: Header
1083 # * HE: Header
1084 # * TC: a translation comment
1084 # * TC: a translation comment
1085 # * GC: a generated comment
1085 # * GC: a generated comment
1086 # * OC: a file/line occurence
1086 # * OC: a file/line occurence
1087 # * FL: a flags line
1087 # * FL: a flags line
1088 # * CT: a message context
1088 # * CT: a message context
1089 # * PC: a previous msgctxt
1089 # * PC: a previous msgctxt
1090 # * PM: a previous msgid
1090 # * PM: a previous msgid
1091 # * PP: a previous msgid_plural
1091 # * PP: a previous msgid_plural
1092 # * MI: a msgid
1092 # * MI: a msgid
1093 # * MP: a msgid plural
1093 # * MP: a msgid plural
1094 # * MS: a msgstr
1094 # * MS: a msgstr
1095 # * MX: a msgstr plural
1095 # * MX: a msgstr plural
1096 # * MC: a msgid or msgstr continuation line
1096 # * MC: a msgid or msgstr continuation line
1097 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1097 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1098 'MS', 'MP', 'MX', 'MI']
1098 'MS', 'MP', 'MX', 'MI']
1099
1099
1100 self.add('TC', ['ST', 'HE'], 'HE')
1100 self.add('TC', ['ST', 'HE'], 'HE')
1101 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1101 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1102 'MP', 'MX', 'MI'], 'TC')
1102 'MP', 'MX', 'MI'], 'TC')
1103 self.add('GC', all, 'GC')
1103 self.add('GC', all, 'GC')
1104 self.add('OC', all, 'OC')
1104 self.add('OC', all, 'OC')
1105 self.add('FL', all, 'FL')
1105 self.add('FL', all, 'FL')
1106 self.add('PC', all, 'PC')
1106 self.add('PC', all, 'PC')
1107 self.add('PM', all, 'PM')
1107 self.add('PM', all, 'PM')
1108 self.add('PP', all, 'PP')
1108 self.add('PP', all, 'PP')
1109 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1109 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1110 'PP', 'MS', 'MX'], 'CT')
1110 'PP', 'MS', 'MX'], 'CT')
1111 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1111 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1112 'PM', 'PP', 'MS', 'MX'], 'MI')
1112 'PM', 'PP', 'MS', 'MX'], 'MI')
1113 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1113 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1114 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1114 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1115 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1115 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1116 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1116 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1117
1117
1118 def parse(self):
1118 def parse(self):
1119 """
1119 """
1120 Run the state machine, parse the file line by line and call process()
1120 Run the state machine, parse the file line by line and call process()
1121 with the current matched symbol.
1121 with the current matched symbol.
1122 """
1122 """
1123 i = 0
1123 i = 0
1124
1124
1125 keywords = {
1125 keywords = {
1126 'msgctxt': 'CT',
1126 'msgctxt': 'CT',
1127 'msgid': 'MI',
1127 'msgid': 'MI',
1128 'msgstr': 'MS',
1128 'msgstr': 'MS',
1129 'msgid_plural': 'MP',
1129 'msgid_plural': 'MP',
1130 }
1130 }
1131 prev_keywords = {
1131 prev_keywords = {
1132 'msgid_plural': 'PP',
1132 'msgid_plural': 'PP',
1133 'msgid': 'PM',
1133 'msgid': 'PM',
1134 'msgctxt': 'PC',
1134 'msgctxt': 'PC',
1135 }
1135 }
1136
1136
1137 for line in self.fhandle:
1137 for line in self.fhandle:
1138 i += 1
1138 i += 1
1139 line = line.strip()
1139 line = line.strip()
1140 if line == '':
1140 if line == '':
1141 continue
1141 continue
1142
1142
1143 tokens = line.split(None, 2)
1143 tokens = line.split(None, 2)
1144 nb_tokens = len(tokens)
1144 nb_tokens = len(tokens)
1145
1145
1146 if tokens[0] == '#~' and nb_tokens > 1:
1146 if tokens[0] == '#~' and nb_tokens > 1:
1147 line = line[3:].strip()
1147 line = line[3:].strip()
1148 tokens = tokens[1:]
1148 tokens = tokens[1:]
1149 nb_tokens -= 1
1149 nb_tokens -= 1
1150 self.entry_obsolete = 1
1150 self.entry_obsolete = 1
1151 else:
1151 else:
1152 self.entry_obsolete = 0
1152 self.entry_obsolete = 0
1153
1153
1154 # Take care of keywords like
1154 # Take care of keywords like
1155 # msgid, msgid_plural, msgctxt & msgstr.
1155 # msgid, msgid_plural, msgctxt & msgstr.
1156 if tokens[0] in keywords and nb_tokens > 1:
1156 if tokens[0] in keywords and nb_tokens > 1:
1157 line = line[len(tokens[0]):].lstrip()
1157 line = line[len(tokens[0]):].lstrip()
1158 self.current_token = line
1158 self.current_token = line
1159 self.process(keywords[tokens[0]], i)
1159 self.process(keywords[tokens[0]], i)
1160 continue
1160 continue
1161
1161
1162 self.current_token = line
1162 self.current_token = line
1163
1163
1164 if tokens[0] == '#:' and nb_tokens > 1:
1164 if tokens[0] == '#:' and nb_tokens > 1:
1165 # we are on a occurrences line
1165 # we are on a occurrences line
1166 self.process('OC', i)
1166 self.process('OC', i)
1167
1167
1168 elif line[:1] == '"':
1168 elif line[:1] == '"':
1169 # we are on a continuation line
1169 # we are on a continuation line
1170 self.process('MC', i)
1170 self.process('MC', i)
1171
1171
1172 elif line[:7] == 'msgstr[':
1172 elif line[:7] == 'msgstr[':
1173 # we are on a msgstr plural
1173 # we are on a msgstr plural
1174 self.process('MX', i)
1174 self.process('MX', i)
1175
1175
1176 elif tokens[0] == '#,' and nb_tokens > 1:
1176 elif tokens[0] == '#,' and nb_tokens > 1:
1177 # we are on a flags line
1177 # we are on a flags line
1178 self.process('FL', i)
1178 self.process('FL', i)
1179
1179
1180 elif tokens[0] == '#':
1180 elif tokens[0] == '#':
1181 if line == '#': line += ' '
1181 if line == '#': line += ' '
1182 # we are on a translator comment line
1182 # we are on a translator comment line
1183 self.process('TC', i)
1183 self.process('TC', i)
1184
1184
1185 elif tokens[0] == '#.' and nb_tokens > 1:
1185 elif tokens[0] == '#.' and nb_tokens > 1:
1186 # we are on a generated comment line
1186 # we are on a generated comment line
1187 self.process('GC', i)
1187 self.process('GC', i)
1188
1188
1189 elif tokens[0] == '#|':
1189 elif tokens[0] == '#|':
1190 if nb_tokens < 2:
1190 if nb_tokens < 2:
1191 self.process('??', i)
1191 self.process('??', i)
1192 continue
1192 continue
1193
1193
1194 # Remove the marker and any whitespace right after that.
1194 # Remove the marker and any whitespace right after that.
1195 line = line[2:].lstrip()
1195 line = line[2:].lstrip()
1196 self.current_token = line
1196 self.current_token = line
1197
1197
1198 if tokens[1].startswith('"'):
1198 if tokens[1].startswith('"'):
1199 # Continuation of previous metadata.
1199 # Continuation of previous metadata.
1200 self.process('MC', i)
1200 self.process('MC', i)
1201 continue
1201 continue
1202
1202
1203 if nb_tokens == 2:
1203 if nb_tokens == 2:
1204 # Invalid continuation line.
1204 # Invalid continuation line.
1205 self.process('??', i)
1205 self.process('??', i)
1206
1206
1207 # we are on a "previous translation" comment line,
1207 # we are on a "previous translation" comment line,
1208 if tokens[1] not in prev_keywords:
1208 if tokens[1] not in prev_keywords:
1209 # Unknown keyword in previous translation comment.
1209 # Unknown keyword in previous translation comment.
1210 self.process('??', i)
1210 self.process('??', i)
1211
1211
1212 # Remove the keyword and any whitespace
1212 # Remove the keyword and any whitespace
1213 # between it and the starting quote.
1213 # between it and the starting quote.
1214 line = line[len(tokens[1]):].lstrip()
1214 line = line[len(tokens[1]):].lstrip()
1215 self.current_token = line
1215 self.current_token = line
1216 self.process(prev_keywords[tokens[1]], i)
1216 self.process(prev_keywords[tokens[1]], i)
1217
1217
1218 else:
1218 else:
1219 self.process('??', i)
1219 self.process('??', i)
1220
1220
1221 if self.current_entry:
1221 if self.current_entry:
1222 # since entries are added when another entry is found, we must add
1222 # since entries are added when another entry is found, we must add
1223 # the last entry here (only if there are lines)
1223 # the last entry here (only if there are lines)
1224 self.instance.append(self.current_entry)
1224 self.instance.append(self.current_entry)
1225 # before returning the instance, check if there's metadata and if
1225 # before returning the instance, check if there's metadata and if
1226 # so extract it in a dict
1226 # so extract it in a dict
1227 firstentry = self.instance[0]
1227 firstentry = self.instance[0]
1228 if firstentry.msgid == '': # metadata found
1228 if firstentry.msgid == '': # metadata found
1229 # remove the entry
1229 # remove the entry
1230 firstentry = self.instance.pop(0)
1230 firstentry = self.instance.pop(0)
1231 self.instance.metadata_is_fuzzy = firstentry.flags
1231 self.instance.metadata_is_fuzzy = firstentry.flags
1232 key = None
1232 key = None
1233 for msg in firstentry.msgstr.splitlines():
1233 for msg in firstentry.msgstr.splitlines():
1234 try:
1234 try:
1235 key, val = msg.split(':', 1)
1235 key, val = msg.split(':', 1)
1236 self.instance.metadata[key] = val.strip()
1236 self.instance.metadata[key] = val.strip()
1237 except:
1237 except:
1238 if key is not None:
1238 if key is not None:
1239 self.instance.metadata[key] += '\n'+ msg.strip()
1239 self.instance.metadata[key] += '\n'+ msg.strip()
1240 # close opened file
1240 # close opened file
1241 if isinstance(self.fhandle, file):
1241 if isinstance(self.fhandle, file):
1242 self.fhandle.close()
1242 self.fhandle.close()
1243 return self.instance
1243 return self.instance
1244
1244
1245 def add(self, symbol, states, next_state):
1245 def add(self, symbol, states, next_state):
1246 """
1246 """
1247 Add a transition to the state machine.
1247 Add a transition to the state machine.
1248
1248
1249 Keywords arguments:
1249 Keywords arguments:
1250
1250
1251 ``symbol``
1251 ``symbol``
1252 string, the matched token (two chars symbol).
1252 string, the matched token (two chars symbol).
1253
1253
1254 ``states``
1254 ``states``
1255 list, a list of states (two chars symbols).
1255 list, a list of states (two chars symbols).
1256
1256
1257 ``next_state``
1257 ``next_state``
1258 the next state the fsm will have after the action.
1258 the next state the fsm will have after the action.
1259 """
1259 """
1260 for state in states:
1260 for state in states:
1261 action = getattr(self, 'handle_%s' % next_state.lower())
1261 action = getattr(self, 'handle_%s' % next_state.lower())
1262 self.transitions[(symbol, state)] = (action, next_state)
1262 self.transitions[(symbol, state)] = (action, next_state)
1263
1263
1264 def process(self, symbol, linenum):
1264 def process(self, symbol, linenum):
1265 """
1265 """
1266 Process the transition corresponding to the current state and the
1266 Process the transition corresponding to the current state and the
1267 symbol provided.
1267 symbol provided.
1268
1268
1269 Keywords arguments:
1269 Keywords arguments:
1270
1270
1271 ``symbol``
1271 ``symbol``
1272 string, the matched token (two chars symbol).
1272 string, the matched token (two chars symbol).
1273
1273
1274 ``linenum``
1274 ``linenum``
1275 integer, the current line number of the parsed file.
1275 integer, the current line number of the parsed file.
1276 """
1276 """
1277 try:
1277 try:
1278 (action, state) = self.transitions[(symbol, self.current_state)]
1278 (action, state) = self.transitions[(symbol, self.current_state)]
1279 if action():
1279 if action():
1280 self.current_state = state
1280 self.current_state = state
1281 except Exception as exc:
1281 except Exception as exc:
1282 raise IOError('Syntax error in po file (line %s)' % linenum)
1282 raise IOError('Syntax error in po file (line %s)' % linenum)
1283
1283
1284 # state handlers
1284 # state handlers
1285
1285
1286 def handle_he(self):
1286 def handle_he(self):
1287 """Handle a header comment."""
1287 """Handle a header comment."""
1288 if self.instance.header != '':
1288 if self.instance.header != '':
1289 self.instance.header += '\n'
1289 self.instance.header += '\n'
1290 self.instance.header += self.current_token[2:]
1290 self.instance.header += self.current_token[2:]
1291 return 1
1291 return 1
1292
1292
1293 def handle_tc(self):
1293 def handle_tc(self):
1294 """Handle a translator comment."""
1294 """Handle a translator comment."""
1295 if self.current_state in ['MC', 'MS', 'MX']:
1295 if self.current_state in ['MC', 'MS', 'MX']:
1296 self.instance.append(self.current_entry)
1296 self.instance.append(self.current_entry)
1297 self.current_entry = POEntry()
1297 self.current_entry = POEntry()
1298 if self.current_entry.tcomment != '':
1298 if self.current_entry.tcomment != '':
1299 self.current_entry.tcomment += '\n'
1299 self.current_entry.tcomment += '\n'
1300 self.current_entry.tcomment += self.current_token[2:]
1300 self.current_entry.tcomment += self.current_token[2:]
1301 return True
1301 return True
1302
1302
1303 def handle_gc(self):
1303 def handle_gc(self):
1304 """Handle a generated comment."""
1304 """Handle a generated comment."""
1305 if self.current_state in ['MC', 'MS', 'MX']:
1305 if self.current_state in ['MC', 'MS', 'MX']:
1306 self.instance.append(self.current_entry)
1306 self.instance.append(self.current_entry)
1307 self.current_entry = POEntry()
1307 self.current_entry = POEntry()
1308 if self.current_entry.comment != '':
1308 if self.current_entry.comment != '':
1309 self.current_entry.comment += '\n'
1309 self.current_entry.comment += '\n'
1310 self.current_entry.comment += self.current_token[3:]
1310 self.current_entry.comment += self.current_token[3:]
1311 return True
1311 return True
1312
1312
1313 def handle_oc(self):
1313 def handle_oc(self):
1314 """Handle a file:num occurence."""
1314 """Handle a file:num occurence."""
1315 if self.current_state in ['MC', 'MS', 'MX']:
1315 if self.current_state in ['MC', 'MS', 'MX']:
1316 self.instance.append(self.current_entry)
1316 self.instance.append(self.current_entry)
1317 self.current_entry = POEntry()
1317 self.current_entry = POEntry()
1318 occurrences = self.current_token[3:].split()
1318 occurrences = self.current_token[3:].split()
1319 for occurrence in occurrences:
1319 for occurrence in occurrences:
1320 if occurrence != '':
1320 if occurrence != '':
1321 try:
1321 try:
1322 fil, line = occurrence.split(':')
1322 fil, line = occurrence.split(':')
1323 if not line.isdigit():
1323 if not line.isdigit():
1324 fil = fil + line
1324 fil = fil + line
1325 line = ''
1325 line = ''
1326 self.current_entry.occurrences.append((fil, line))
1326 self.current_entry.occurrences.append((fil, line))
1327 except:
1327 except:
1328 self.current_entry.occurrences.append((occurrence, ''))
1328 self.current_entry.occurrences.append((occurrence, ''))
1329 return True
1329 return True
1330
1330
1331 def handle_fl(self):
1331 def handle_fl(self):
1332 """Handle a flags line."""
1332 """Handle a flags line."""
1333 if self.current_state in ['MC', 'MS', 'MX']:
1333 if self.current_state in ['MC', 'MS', 'MX']:
1334 self.instance.append(self.current_entry)
1334 self.instance.append(self.current_entry)
1335 self.current_entry = POEntry()
1335 self.current_entry = POEntry()
1336 self.current_entry.flags += self.current_token[3:].split(', ')
1336 self.current_entry.flags += self.current_token[3:].split(', ')
1337 return True
1337 return True
1338
1338
1339 def handle_pp(self):
1339 def handle_pp(self):
1340 """Handle a previous msgid_plural line."""
1340 """Handle a previous msgid_plural line."""
1341 if self.current_state in ['MC', 'MS', 'MX']:
1341 if self.current_state in ['MC', 'MS', 'MX']:
1342 self.instance.append(self.current_entry)
1342 self.instance.append(self.current_entry)
1343 self.current_entry = POEntry()
1343 self.current_entry = POEntry()
1344 self.current_entry.previous_msgid_plural = \
1344 self.current_entry.previous_msgid_plural = \
1345 unescape(self.current_token[1:-1])
1345 unescape(self.current_token[1:-1])
1346 return True
1346 return True
1347
1347
1348 def handle_pm(self):
1348 def handle_pm(self):
1349 """Handle a previous msgid line."""
1349 """Handle a previous msgid line."""
1350 if self.current_state in ['MC', 'MS', 'MX']:
1350 if self.current_state in ['MC', 'MS', 'MX']:
1351 self.instance.append(self.current_entry)
1351 self.instance.append(self.current_entry)
1352 self.current_entry = POEntry()
1352 self.current_entry = POEntry()
1353 self.current_entry.previous_msgid = \
1353 self.current_entry.previous_msgid = \
1354 unescape(self.current_token[1:-1])
1354 unescape(self.current_token[1:-1])
1355 return True
1355 return True
1356
1356
1357 def handle_pc(self):
1357 def handle_pc(self):
1358 """Handle a previous msgctxt line."""
1358 """Handle a previous msgctxt line."""
1359 if self.current_state in ['MC', 'MS', 'MX']:
1359 if self.current_state in ['MC', 'MS', 'MX']:
1360 self.instance.append(self.current_entry)
1360 self.instance.append(self.current_entry)
1361 self.current_entry = POEntry()
1361 self.current_entry = POEntry()
1362 self.current_entry.previous_msgctxt = \
1362 self.current_entry.previous_msgctxt = \
1363 unescape(self.current_token[1:-1])
1363 unescape(self.current_token[1:-1])
1364 return True
1364 return True
1365
1365
1366 def handle_ct(self):
1366 def handle_ct(self):
1367 """Handle a msgctxt."""
1367 """Handle a msgctxt."""
1368 if self.current_state in ['MC', 'MS', 'MX']:
1368 if self.current_state in ['MC', 'MS', 'MX']:
1369 self.instance.append(self.current_entry)
1369 self.instance.append(self.current_entry)
1370 self.current_entry = POEntry()
1370 self.current_entry = POEntry()
1371 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1371 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1372 return True
1372 return True
1373
1373
1374 def handle_mi(self):
1374 def handle_mi(self):
1375 """Handle a msgid."""
1375 """Handle a msgid."""
1376 if self.current_state in ['MC', 'MS', 'MX']:
1376 if self.current_state in ['MC', 'MS', 'MX']:
1377 self.instance.append(self.current_entry)
1377 self.instance.append(self.current_entry)
1378 self.current_entry = POEntry()
1378 self.current_entry = POEntry()
1379 self.current_entry.obsolete = self.entry_obsolete
1379 self.current_entry.obsolete = self.entry_obsolete
1380 self.current_entry.msgid = unescape(self.current_token[1:-1])
1380 self.current_entry.msgid = unescape(self.current_token[1:-1])
1381 return True
1381 return True
1382
1382
1383 def handle_mp(self):
1383 def handle_mp(self):
1384 """Handle a msgid plural."""
1384 """Handle a msgid plural."""
1385 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1385 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1386 return True
1386 return True
1387
1387
1388 def handle_ms(self):
1388 def handle_ms(self):
1389 """Handle a msgstr."""
1389 """Handle a msgstr."""
1390 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1390 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1391 return True
1391 return True
1392
1392
1393 def handle_mx(self):
1393 def handle_mx(self):
1394 """Handle a msgstr plural."""
1394 """Handle a msgstr plural."""
1395 index, value = self.current_token[7], self.current_token[11:-1]
1395 index, value = self.current_token[7], self.current_token[11:-1]
1396 self.current_entry.msgstr_plural[index] = unescape(value)
1396 self.current_entry.msgstr_plural[index] = unescape(value)
1397 self.msgstr_index = index
1397 self.msgstr_index = index
1398 return True
1398 return True
1399
1399
1400 def handle_mc(self):
1400 def handle_mc(self):
1401 """Handle a msgid or msgstr continuation line."""
1401 """Handle a msgid or msgstr continuation line."""
1402 token = unescape(self.current_token[1:-1])
1402 token = unescape(self.current_token[1:-1])
1403 if self.current_state == 'CT':
1403 if self.current_state == 'CT':
1404 typ = 'msgctxt'
1404 typ = 'msgctxt'
1405 self.current_entry.msgctxt += token
1405 self.current_entry.msgctxt += token
1406 elif self.current_state == 'MI':
1406 elif self.current_state == 'MI':
1407 typ = 'msgid'
1407 typ = 'msgid'
1408 self.current_entry.msgid += token
1408 self.current_entry.msgid += token
1409 elif self.current_state == 'MP':
1409 elif self.current_state == 'MP':
1410 typ = 'msgid_plural'
1410 typ = 'msgid_plural'
1411 self.current_entry.msgid_plural += token
1411 self.current_entry.msgid_plural += token
1412 elif self.current_state == 'MS':
1412 elif self.current_state == 'MS':
1413 typ = 'msgstr'
1413 typ = 'msgstr'
1414 self.current_entry.msgstr += token
1414 self.current_entry.msgstr += token
1415 elif self.current_state == 'MX':
1415 elif self.current_state == 'MX':
1416 typ = 'msgstr[%s]' % self.msgstr_index
1416 typ = 'msgstr[%s]' % self.msgstr_index
1417 self.current_entry.msgstr_plural[self.msgstr_index] += token
1417 self.current_entry.msgstr_plural[self.msgstr_index] += token
1418 elif self.current_state == 'PP':
1418 elif self.current_state == 'PP':
1419 typ = 'previous_msgid_plural'
1419 typ = 'previous_msgid_plural'
1420 token = token[3:]
1420 token = token[3:]
1421 self.current_entry.previous_msgid_plural += token
1421 self.current_entry.previous_msgid_plural += token
1422 elif self.current_state == 'PM':
1422 elif self.current_state == 'PM':
1423 typ = 'previous_msgid'
1423 typ = 'previous_msgid'
1424 token = token[3:]
1424 token = token[3:]
1425 self.current_entry.previous_msgid += token
1425 self.current_entry.previous_msgid += token
1426 elif self.current_state == 'PC':
1426 elif self.current_state == 'PC':
1427 typ = 'previous_msgctxt'
1427 typ = 'previous_msgctxt'
1428 token = token[3:]
1428 token = token[3:]
1429 self.current_entry.previous_msgctxt += token
1429 self.current_entry.previous_msgctxt += token
1430 # don't change the current state
1430 # don't change the current state
1431 return False
1431 return False
1432
1432
1433 # }}}
1433 # }}}
1434 # class _MOFileParser {{{
1434 # class _MOFileParser {{{
1435
1435
1436 class _MOFileParser(object):
1436 class _MOFileParser(object):
1437 """
1437 """
1438 A class to parse binary mo files.
1438 A class to parse binary mo files.
1439 """
1439 """
1440
1440
1441 def __init__(self, mofile, *args, **kwargs):
1441 def __init__(self, mofile, *args, **kwargs):
1442 """
1442 """
1443 Constructor.
1443 Constructor.
1444
1444
1445 Keyword arguments:
1445 Keyword arguments:
1446
1446
1447 ``mofile``
1447 ``mofile``
1448 string, path to the mo file or its content
1448 string, path to the mo file or its content
1449
1449
1450 ``encoding``
1450 ``encoding``
1451 string, the encoding to use, defaults to ``default_encoding``
1451 string, the encoding to use, defaults to ``default_encoding``
1452 global variable (optional).
1452 global variable (optional).
1453
1453
1454 ``check_for_duplicates``
1454 ``check_for_duplicates``
1455 whether to check for duplicate entries when adding entries to the
1455 whether to check for duplicate entries when adding entries to the
1456 file (optional, default: ``False``).
1456 file (optional, default: ``False``).
1457 """
1457 """
1458 self.fhandle = open(mofile, 'rb')
1458 self.fhandle = open(mofile, 'rb')
1459 self.instance = MOFile(
1459 self.instance = MOFile(
1460 fpath=mofile,
1460 fpath=mofile,
1461 encoding=kwargs.get('encoding', default_encoding),
1461 encoding=kwargs.get('encoding', default_encoding),
1462 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1462 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1463 )
1463 )
1464
1464
1465 def parse(self):
1465 def parse(self):
1466 """
1466 """
1467 Build the instance with the file handle provided in the
1467 Build the instance with the file handle provided in the
1468 constructor.
1468 constructor.
1469 """
1469 """
1470 # parse magic number
1470 # parse magic number
1471 magic_number = self._readbinary('<I', 4)
1471 magic_number = self._readbinary('<I', 4)
1472 if magic_number == MOFile.LITTLE_ENDIAN:
1472 if magic_number == MOFile.LITTLE_ENDIAN:
1473 ii = '<II'
1473 ii = '<II'
1474 elif magic_number == MOFile.BIG_ENDIAN:
1474 elif magic_number == MOFile.BIG_ENDIAN:
1475 ii = '>II'
1475 ii = '>II'
1476 else:
1476 else:
1477 raise IOError('Invalid mo file, magic number is incorrect !')
1477 raise IOError('Invalid mo file, magic number is incorrect !')
1478 self.instance.magic_number = magic_number
1478 self.instance.magic_number = magic_number
1479 # parse the version number and the number of strings
1479 # parse the version number and the number of strings
1480 self.instance.version, numofstrings = self._readbinary(ii, 8)
1480 self.instance.version, numofstrings = self._readbinary(ii, 8)
1481 # original strings and translation strings hash table offset
1481 # original strings and translation strings hash table offset
1482 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1482 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1483 # move to msgid hash table and read length and offset of msgids
1483 # move to msgid hash table and read length and offset of msgids
1484 self.fhandle.seek(msgids_hash_offset)
1484 self.fhandle.seek(msgids_hash_offset)
1485 msgids_index = []
1485 msgids_index = []
1486 for i in range(numofstrings):
1486 for i in range(numofstrings):
1487 msgids_index.append(self._readbinary(ii, 8))
1487 msgids_index.append(self._readbinary(ii, 8))
1488 # move to msgstr hash table and read length and offset of msgstrs
1488 # move to msgstr hash table and read length and offset of msgstrs
1489 self.fhandle.seek(msgstrs_hash_offset)
1489 self.fhandle.seek(msgstrs_hash_offset)
1490 msgstrs_index = []
1490 msgstrs_index = []
1491 for i in range(numofstrings):
1491 for i in range(numofstrings):
1492 msgstrs_index.append(self._readbinary(ii, 8))
1492 msgstrs_index.append(self._readbinary(ii, 8))
1493 # build entries
1493 # build entries
1494 for i in range(numofstrings):
1494 for i in range(numofstrings):
1495 self.fhandle.seek(msgids_index[i][1])
1495 self.fhandle.seek(msgids_index[i][1])
1496 msgid = self.fhandle.read(msgids_index[i][0])
1496 msgid = self.fhandle.read(msgids_index[i][0])
1497 self.fhandle.seek(msgstrs_index[i][1])
1497 self.fhandle.seek(msgstrs_index[i][1])
1498 msgstr = self.fhandle.read(msgstrs_index[i][0])
1498 msgstr = self.fhandle.read(msgstrs_index[i][0])
1499 if i == 0: # metadata
1499 if i == 0: # metadata
1500 raw_metadata, metadata = msgstr.split('\n'), {}
1500 raw_metadata, metadata = msgstr.split('\n'), {}
1501 for line in raw_metadata:
1501 for line in raw_metadata:
1502 tokens = line.split(':', 1)
1502 tokens = line.split(':', 1)
1503 if tokens[0] != '':
1503 if tokens[0] != '':
1504 try:
1504 try:
1505 metadata[tokens[0]] = tokens[1].strip()
1505 metadata[tokens[0]] = tokens[1].strip()
1506 except IndexError:
1506 except IndexError:
1507 metadata[tokens[0]] = ''
1507 metadata[tokens[0]] = ''
1508 self.instance.metadata = metadata
1508 self.instance.metadata = metadata
1509 continue
1509 continue
1510 # test if we have a plural entry
1510 # test if we have a plural entry
1511 msgid_tokens = msgid.split('\0')
1511 msgid_tokens = msgid.split('\0')
1512 if len(msgid_tokens) > 1:
1512 if len(msgid_tokens) > 1:
1513 entry = self._build_entry(
1513 entry = self._build_entry(
1514 msgid=msgid_tokens[0],
1514 msgid=msgid_tokens[0],
1515 msgid_plural=msgid_tokens[1],
1515 msgid_plural=msgid_tokens[1],
1516 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1516 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1517 )
1517 )
1518 else:
1518 else:
1519 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1519 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1520 self.instance.append(entry)
1520 self.instance.append(entry)
1521 # close opened file
1521 # close opened file
1522 self.fhandle.close()
1522 self.fhandle.close()
1523 return self.instance
1523 return self.instance
1524
1524
1525 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1525 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1526 msgstr_plural=None):
1526 msgstr_plural=None):
1527 msgctxt_msgid = msgid.split('\x04')
1527 msgctxt_msgid = msgid.split('\x04')
1528 if len(msgctxt_msgid) > 1:
1528 if len(msgctxt_msgid) > 1:
1529 kwargs = {
1529 kwargs = {
1530 'msgctxt': msgctxt_msgid[0],
1530 'msgctxt': msgctxt_msgid[0],
1531 'msgid' : msgctxt_msgid[1],
1531 'msgid' : msgctxt_msgid[1],
1532 }
1532 }
1533 else:
1533 else:
1534 kwargs = {'msgid': msgid}
1534 kwargs = {'msgid': msgid}
1535 if msgstr:
1535 if msgstr:
1536 kwargs['msgstr'] = msgstr
1536 kwargs['msgstr'] = msgstr
1537 if msgid_plural:
1537 if msgid_plural:
1538 kwargs['msgid_plural'] = msgid_plural
1538 kwargs['msgid_plural'] = msgid_plural
1539 if msgstr_plural:
1539 if msgstr_plural:
1540 kwargs['msgstr_plural'] = msgstr_plural
1540 kwargs['msgstr_plural'] = msgstr_plural
1541 return MOEntry(**kwargs)
1541 return MOEntry(**kwargs)
1542
1542
1543 def _readbinary(self, fmt, numbytes):
1543 def _readbinary(self, fmt, numbytes):
1544 """
1544 """
1545 Private method that unpack n bytes of data using format <fmt>.
1545 Private method that unpack n bytes of data using format <fmt>.
1546 It returns a tuple or a mixed value if the tuple length is 1.
1546 It returns a tuple or a mixed value if the tuple length is 1.
1547 """
1547 """
1548 bytes = self.fhandle.read(numbytes)
1548 bytes = self.fhandle.read(numbytes)
1549 tup = struct.unpack(fmt, bytes)
1549 tup = struct.unpack(fmt, bytes)
1550 if len(tup) == 1:
1550 if len(tup) == 1:
1551 return tup[0]
1551 return tup[0]
1552 return tup
1552 return tup
1553
1553
1554 # }}}
1554 # }}}
1555 # class TextWrapper {{{
1556
1557 class TextWrapper(textwrap.TextWrapper):
1558 """
1559 Subclass of textwrap.TextWrapper that backport the
1560 drop_whitespace option.
1561 """
1562 def __init__(self, *args, **kwargs):
1563 drop_whitespace = kwargs.pop('drop_whitespace', True)
1564 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1565 self.drop_whitespace = drop_whitespace
1566
1567 def _wrap_chunks(self, chunks):
1568 """_wrap_chunks(chunks : [string]) -> [string]
1569
1570 Wrap a sequence of text chunks and return a list of lines of
1571 length 'self.width' or less. (If 'break_long_words' is false,
1572 some lines may be longer than this.) Chunks correspond roughly
1573 to words and the whitespace between them: each chunk is
1574 indivisible (modulo 'break_long_words'), but a line break can
1575 come between any two chunks. Chunks should not have internal
1576 whitespace; ie. a chunk is either all whitespace or a "word".
1577 Whitespace chunks will be removed from the beginning and end of
1578 lines, but apart from that whitespace is preserved.
1579 """
1580 lines = []
1581 if self.width <= 0:
1582 raise ValueError("invalid width %r (must be > 0)" % self.width)
1583
1584 # Arrange in reverse order so items can be efficiently popped
1585 # from a stack of chucks.
1586 chunks.reverse()
1587
1588 while chunks:
1589
1590 # Start the list of chunks that will make up the current line.
1591 # cur_len is just the length of all the chunks in cur_line.
1592 cur_line = []
1593 cur_len = 0
1594
1595 # Figure out which static string will prefix this line.
1596 if lines:
1597 indent = self.subsequent_indent
1598 else:
1599 indent = self.initial_indent
1600
1601 # Maximum width for this line.
1602 width = self.width - len(indent)
1603
1604 # First chunk on line is whitespace -- drop it, unless this
1605 # is the very beginning of the text (ie. no lines started yet).
1606 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1607 del chunks[-1]
1608
1609 while chunks:
1610 l = len(chunks[-1])
1611
1612 # Can at least squeeze this chunk onto the current line.
1613 if cur_len + l <= width:
1614 cur_line.append(chunks.pop())
1615 cur_len += l
1616
1617 # Nope, this line is full.
1618 else:
1619 break
1620
1621 # The current line is full, and the next chunk is too big to
1622 # fit on *any* line (not just this one).
1623 if chunks and len(chunks[-1]) > width:
1624 self._handle_long_word(chunks, cur_line, cur_len, width)
1625
1626 # If the last chunk on this line is all whitespace, drop it.
1627 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
1628 del cur_line[-1]
1629
1630 # Convert current line back to a string and store it in list
1631 # of all lines (return value).
1632 if cur_line:
1633 lines.append(indent + ''.join(cur_line))
1634
1635 return lines
1636
1637 # }}}
1638 # function wrap() {{{
1639
1640 def wrap(text, width=70, **kwargs):
1641 """
1642 Wrap a single paragraph of text, returning a list of wrapped lines.
1643 """
1644 if sys.version_info < (2, 6):
1645 return TextWrapper(width=width, **kwargs).wrap(text)
1646 return textwrap.wrap(text, width=width, **kwargs)
1647
1648 #}}}
General Comments 0
You need to be logged in to leave comments. Login now