##// END OF EJS Templates
polib: remove unnecessary comparisons with True...
Martin Geisler -
r13030:8ea51e9e default
parent child Browse files
Show More
@@ -1,1680 +1,1680 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 # no-check-code
2 # no-check-code
3 #
3 #
4 # License: MIT (see LICENSE file provided)
4 # License: MIT (see LICENSE file provided)
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6
6
7 """
7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po
8 **polib** allows you to manipulate, create, modify gettext files (pot, po
9 and mo files). You can load existing files, iterate through it's entries,
9 and mo files). You can load existing files, iterate through it's entries,
10 add, modify entries, comments or metadata, etc... or create new po files
10 add, modify entries, comments or metadata, etc... or create new po files
11 from scratch.
11 from scratch.
12
12
13 **polib** provides a simple and pythonic API, exporting only three
13 **polib** provides a simple and pythonic API, exporting only three
14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
16 new files/entries.
16 new files/entries.
17
17
18 **Basic example**:
18 **Basic example**:
19
19
20 >>> import polib
20 >>> import polib
21 >>> # load an existing po file
21 >>> # load an existing po file
22 >>> po = polib.pofile('tests/test_utf8.po')
22 >>> po = polib.pofile('tests/test_utf8.po')
23 >>> for entry in po:
23 >>> for entry in po:
24 ... # do something with entry...
24 ... # do something with entry...
25 ... pass
25 ... pass
26 >>> # add an entry
26 >>> # add an entry
27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
29 >>> po.append(entry)
29 >>> po.append(entry)
30 >>> # to save our modified po file:
30 >>> # to save our modified po file:
31 >>> # po.save()
31 >>> # po.save()
32 >>> # or you may want to compile the po file
32 >>> # or you may want to compile the po file
33 >>> # po.save_as_mofile('tests/test_utf8.mo')
33 >>> # po.save_as_mofile('tests/test_utf8.mo')
34 """
34 """
35
35
36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
37 __version__ = '0.5.2'
37 __version__ = '0.5.2'
38 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
38 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
39 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
39 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
40
40
41 import codecs
41 import codecs
42 import struct
42 import struct
43 import textwrap
43 import textwrap
44 import types
44 import types
45 import re
45 import re
46
46
47 default_encoding = 'utf-8'
47 default_encoding = 'utf-8'
48
48
49 # function pofile() {{{
49 # function pofile() {{{
50
50
51 def pofile(fpath, **kwargs):
51 def pofile(fpath, **kwargs):
52 """
52 """
53 Convenience function that parse the po/pot file *fpath* and return
53 Convenience function that parse the po/pot file *fpath* and return
54 a POFile instance.
54 a POFile instance.
55
55
56 **Keyword arguments**:
56 **Keyword arguments**:
57 - *fpath*: string, full or relative path to the po/pot file to parse
57 - *fpath*: string, full or relative path to the po/pot file to parse
58 - *wrapwidth*: integer, the wrap width, only useful when -w option was
58 - *wrapwidth*: integer, the wrap width, only useful when -w option was
59 passed to xgettext (optional, default to 78)
59 passed to xgettext (optional, default to 78)
60 - *autodetect_encoding*: boolean, if set to False the function will
60 - *autodetect_encoding*: boolean, if set to False the function will
61 not try to detect the po file encoding (optional, default to True)
61 not try to detect the po file encoding (optional, default to True)
62 - *encoding*: string, an encoding, only relevant if autodetect_encoding
62 - *encoding*: string, an encoding, only relevant if autodetect_encoding
63 is set to False
63 is set to False
64 - *check_for_duplicates*: whether to check for duplicate entries when
64 - *check_for_duplicates*: whether to check for duplicate entries when
65 adding entries to the file, default: False (optional)
65 adding entries to the file, default: False (optional)
66
66
67 **Example**:
67 **Example**:
68
68
69 >>> import polib
69 >>> import polib
70 >>> po = polib.pofile('tests/test_weird_occurrences.po',
70 >>> po = polib.pofile('tests/test_weird_occurrences.po',
71 ... check_for_duplicates=True)
71 ... check_for_duplicates=True)
72 >>> po #doctest: +ELLIPSIS
72 >>> po #doctest: +ELLIPSIS
73 <POFile instance at ...>
73 <POFile instance at ...>
74 >>> import os, tempfile
74 >>> import os, tempfile
75 >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural',
75 >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural',
76 ... 'msgstr_plural', 'obsolete', 'comment', 'tcomment',
76 ... 'msgstr_plural', 'obsolete', 'comment', 'tcomment',
77 ... 'occurrences', 'flags', 'previous_msgctxt',
77 ... 'occurrences', 'flags', 'previous_msgctxt',
78 ... 'previous_msgid', 'previous_msgid_plural')
78 ... 'previous_msgid', 'previous_msgid_plural')
79 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
79 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
80 ... orig_po = polib.pofile('tests/'+fname)
80 ... orig_po = polib.pofile('tests/'+fname)
81 ... tmpf = tempfile.NamedTemporaryFile().name
81 ... tmpf = tempfile.NamedTemporaryFile().name
82 ... orig_po.save(tmpf)
82 ... orig_po.save(tmpf)
83 ... try:
83 ... try:
84 ... new_po = polib.pofile(tmpf)
84 ... new_po = polib.pofile(tmpf)
85 ... for old, new in zip(orig_po, new_po):
85 ... for old, new in zip(orig_po, new_po):
86 ... for attr in all_attrs:
86 ... for attr in all_attrs:
87 ... if getattr(old, attr) != getattr(new, attr):
87 ... if getattr(old, attr) != getattr(new, attr):
88 ... getattr(old, attr)
88 ... getattr(old, attr)
89 ... getattr(new, attr)
89 ... getattr(new, attr)
90 ... finally:
90 ... finally:
91 ... os.unlink(tmpf)
91 ... os.unlink(tmpf)
92 >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
92 >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
93 >>> tmpf = tempfile.NamedTemporaryFile().name
93 >>> tmpf = tempfile.NamedTemporaryFile().name
94 >>> po_file.save_as_mofile(tmpf)
94 >>> po_file.save_as_mofile(tmpf)
95 >>> try:
95 >>> try:
96 ... mo_file = polib.mofile(tmpf)
96 ... mo_file = polib.mofile(tmpf)
97 ... for old, new in zip(po_file, mo_file):
97 ... for old, new in zip(po_file, mo_file):
98 ... if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
98 ... if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
99 ... 'OLD: ', po_file._encode(old.msgid)
99 ... 'OLD: ', po_file._encode(old.msgid)
100 ... 'NEW: ', mo_file._encode(new.msgid)
100 ... 'NEW: ', mo_file._encode(new.msgid)
101 ... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
101 ... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
102 ... 'OLD: ', po_file._encode(old.msgstr)
102 ... 'OLD: ', po_file._encode(old.msgstr)
103 ... 'NEW: ', mo_file._encode(new.msgstr)
103 ... 'NEW: ', mo_file._encode(new.msgstr)
104 ... print new.msgstr
104 ... print new.msgstr
105 ... finally:
105 ... finally:
106 ... os.unlink(tmpf)
106 ... os.unlink(tmpf)
107 """
107 """
108 if kwargs.get('autodetect_encoding', True) == True:
108 if kwargs.get('autodetect_encoding', True):
109 enc = detect_encoding(fpath)
109 enc = detect_encoding(fpath)
110 else:
110 else:
111 enc = kwargs.get('encoding', default_encoding)
111 enc = kwargs.get('encoding', default_encoding)
112 check_for_duplicates = kwargs.get('check_for_duplicates', False)
112 check_for_duplicates = kwargs.get('check_for_duplicates', False)
113 parser = _POFileParser(
113 parser = _POFileParser(
114 fpath,
114 fpath,
115 encoding=enc,
115 encoding=enc,
116 check_for_duplicates=kwargs.get('check_for_duplicates', False)
116 check_for_duplicates=kwargs.get('check_for_duplicates', False)
117 )
117 )
118 instance = parser.parse()
118 instance = parser.parse()
119 instance.wrapwidth = kwargs.get('wrapwidth', 78)
119 instance.wrapwidth = kwargs.get('wrapwidth', 78)
120 return instance
120 return instance
121
121
122 # }}}
122 # }}}
123 # function mofile() {{{
123 # function mofile() {{{
124
124
125 def mofile(fpath, **kwargs):
125 def mofile(fpath, **kwargs):
126 """
126 """
127 Convenience function that parse the mo file *fpath* and return
127 Convenience function that parse the mo file *fpath* and return
128 a MOFile instance.
128 a MOFile instance.
129
129
130 **Keyword arguments**:
130 **Keyword arguments**:
131 - *fpath*: string, full or relative path to the mo file to parse
131 - *fpath*: string, full or relative path to the mo file to parse
132 - *wrapwidth*: integer, the wrap width, only useful when -w option was
132 - *wrapwidth*: integer, the wrap width, only useful when -w option was
133 passed to xgettext to generate the po file that was used to format
133 passed to xgettext to generate the po file that was used to format
134 the mo file (optional, default to 78)
134 the mo file (optional, default to 78)
135 - *autodetect_encoding*: boolean, if set to False the function will
135 - *autodetect_encoding*: boolean, if set to False the function will
136 not try to detect the po file encoding (optional, default to True)
136 not try to detect the po file encoding (optional, default to True)
137 - *encoding*: string, an encoding, only relevant if autodetect_encoding
137 - *encoding*: string, an encoding, only relevant if autodetect_encoding
138 is set to False
138 is set to False
139 - *check_for_duplicates*: whether to check for duplicate entries when
139 - *check_for_duplicates*: whether to check for duplicate entries when
140 adding entries to the file, default: False (optional)
140 adding entries to the file, default: False (optional)
141
141
142 **Example**:
142 **Example**:
143
143
144 >>> import polib
144 >>> import polib
145 >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
145 >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
146 >>> mo #doctest: +ELLIPSIS
146 >>> mo #doctest: +ELLIPSIS
147 <MOFile instance at ...>
147 <MOFile instance at ...>
148 >>> import os, tempfile
148 >>> import os, tempfile
149 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
149 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
150 ... orig_mo = polib.mofile('tests/'+fname)
150 ... orig_mo = polib.mofile('tests/'+fname)
151 ... tmpf = tempfile.NamedTemporaryFile().name
151 ... tmpf = tempfile.NamedTemporaryFile().name
152 ... orig_mo.save(tmpf)
152 ... orig_mo.save(tmpf)
153 ... try:
153 ... try:
154 ... new_mo = polib.mofile(tmpf)
154 ... new_mo = polib.mofile(tmpf)
155 ... for old, new in zip(orig_mo, new_mo):
155 ... for old, new in zip(orig_mo, new_mo):
156 ... if old.msgid != new.msgid:
156 ... if old.msgid != new.msgid:
157 ... old.msgstr
157 ... old.msgstr
158 ... new.msgstr
158 ... new.msgstr
159 ... finally:
159 ... finally:
160 ... os.unlink(tmpf)
160 ... os.unlink(tmpf)
161 """
161 """
162 if kwargs.get('autodetect_encoding', True) == True:
162 if kwargs.get('autodetect_encoding', True):
163 enc = detect_encoding(fpath, True)
163 enc = detect_encoding(fpath, True)
164 else:
164 else:
165 enc = kwargs.get('encoding', default_encoding)
165 enc = kwargs.get('encoding', default_encoding)
166 parser = _MOFileParser(
166 parser = _MOFileParser(
167 fpath,
167 fpath,
168 encoding=enc,
168 encoding=enc,
169 check_for_duplicates=kwargs.get('check_for_duplicates', False)
169 check_for_duplicates=kwargs.get('check_for_duplicates', False)
170 )
170 )
171 instance = parser.parse()
171 instance = parser.parse()
172 instance.wrapwidth = kwargs.get('wrapwidth', 78)
172 instance.wrapwidth = kwargs.get('wrapwidth', 78)
173 return instance
173 return instance
174
174
175 # }}}
175 # }}}
176 # function detect_encoding() {{{
176 # function detect_encoding() {{{
177
177
178 def detect_encoding(fpath, binary_mode=False):
178 def detect_encoding(fpath, binary_mode=False):
179 """
179 """
180 Try to detect the encoding used by the file *fpath*. The function will
180 Try to detect the encoding used by the file *fpath*. The function will
181 return polib default *encoding* if it's unable to detect it.
181 return polib default *encoding* if it's unable to detect it.
182
182
183 **Keyword argument**:
183 **Keyword argument**:
184 - *fpath*: string, full or relative path to the mo file to parse.
184 - *fpath*: string, full or relative path to the mo file to parse.
185
185
186 **Examples**:
186 **Examples**:
187
187
188 >>> print(detect_encoding('tests/test_noencoding.po'))
188 >>> print(detect_encoding('tests/test_noencoding.po'))
189 utf-8
189 utf-8
190 >>> print(detect_encoding('tests/test_utf8.po'))
190 >>> print(detect_encoding('tests/test_utf8.po'))
191 UTF-8
191 UTF-8
192 >>> print(detect_encoding('tests/test_utf8.mo', True))
192 >>> print(detect_encoding('tests/test_utf8.mo', True))
193 UTF-8
193 UTF-8
194 >>> print(detect_encoding('tests/test_iso-8859-15.po'))
194 >>> print(detect_encoding('tests/test_iso-8859-15.po'))
195 ISO_8859-15
195 ISO_8859-15
196 >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
196 >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
197 ISO_8859-15
197 ISO_8859-15
198 """
198 """
199 import re
199 import re
200 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
200 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
201 if binary_mode:
201 if binary_mode:
202 mode = 'rb'
202 mode = 'rb'
203 else:
203 else:
204 mode = 'r'
204 mode = 'r'
205 f = open(fpath, mode)
205 f = open(fpath, mode)
206 for l in f.readlines():
206 for l in f.readlines():
207 match = rx.search(l)
207 match = rx.search(l)
208 if match:
208 if match:
209 f.close()
209 f.close()
210 return match.group(1).strip()
210 return match.group(1).strip()
211 f.close()
211 f.close()
212 return default_encoding
212 return default_encoding
213
213
214 # }}}
214 # }}}
215 # function escape() {{{
215 # function escape() {{{
216
216
217 def escape(st):
217 def escape(st):
218 """
218 """
219 Escape special chars and return the given string *st*.
219 Escape special chars and return the given string *st*.
220
220
221 **Examples**:
221 **Examples**:
222
222
223 >>> escape('\\t and \\n and \\r and " and \\\\')
223 >>> escape('\\t and \\n and \\r and " and \\\\')
224 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
224 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
225 """
225 """
226 return st.replace('\\', r'\\')\
226 return st.replace('\\', r'\\')\
227 .replace('\t', r'\t')\
227 .replace('\t', r'\t')\
228 .replace('\r', r'\r')\
228 .replace('\r', r'\r')\
229 .replace('\n', r'\n')\
229 .replace('\n', r'\n')\
230 .replace('\"', r'\"')
230 .replace('\"', r'\"')
231
231
232 # }}}
232 # }}}
233 # function unescape() {{{
233 # function unescape() {{{
234
234
235 def unescape(st):
235 def unescape(st):
236 """
236 """
237 Unescape special chars and return the given string *st*.
237 Unescape special chars and return the given string *st*.
238
238
239 **Examples**:
239 **Examples**:
240
240
241 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
241 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
242 '\\t and \\n and \\r and " and \\\\'
242 '\\t and \\n and \\r and " and \\\\'
243 >>> unescape(r'\\n')
243 >>> unescape(r'\\n')
244 '\\n'
244 '\\n'
245 >>> unescape(r'\\\\n')
245 >>> unescape(r'\\\\n')
246 '\\\\n'
246 '\\\\n'
247 >>> unescape(r'\\\\n\\n')
247 >>> unescape(r'\\\\n\\n')
248 '\\\\n\\n'
248 '\\\\n\\n'
249 """
249 """
250 def unescape_repl(m):
250 def unescape_repl(m):
251 m = m.group(1)
251 m = m.group(1)
252 if m == 'n':
252 if m == 'n':
253 return '\n'
253 return '\n'
254 if m == 't':
254 if m == 't':
255 return '\t'
255 return '\t'
256 if m == 'r':
256 if m == 'r':
257 return '\r'
257 return '\r'
258 if m == '\\':
258 if m == '\\':
259 return '\\'
259 return '\\'
260 return m # handles escaped double quote
260 return m # handles escaped double quote
261 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
261 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
262
262
263 # }}}
263 # }}}
264 # class _BaseFile {{{
264 # class _BaseFile {{{
265
265
266 class _BaseFile(list):
266 class _BaseFile(list):
267 """
267 """
268 Common parent class for POFile and MOFile classes.
268 Common parent class for POFile and MOFile classes.
269 This class must **not** be instanciated directly.
269 This class must **not** be instanciated directly.
270 """
270 """
271
271
272 def __init__(self, *args, **kwargs):
272 def __init__(self, *args, **kwargs):
273 """
273 """
274 Constructor.
274 Constructor.
275
275
276 **Keyword arguments**:
276 **Keyword arguments**:
277 - *fpath*: string, path to po or mo file
277 - *fpath*: string, path to po or mo file
278 - *wrapwidth*: integer, the wrap width, only useful when -w option
278 - *wrapwidth*: integer, the wrap width, only useful when -w option
279 was passed to xgettext to generate the po file that was used to
279 was passed to xgettext to generate the po file that was used to
280 format the mo file, default to 78 (optional),
280 format the mo file, default to 78 (optional),
281 - *encoding*: string, the encoding to use, defaults to
281 - *encoding*: string, the encoding to use, defaults to
282 "default_encoding" global variable (optional),
282 "default_encoding" global variable (optional),
283 - *check_for_duplicates*: whether to check for duplicate entries
283 - *check_for_duplicates*: whether to check for duplicate entries
284 when adding entries to the file, default: False (optional).
284 when adding entries to the file, default: False (optional).
285 """
285 """
286 list.__init__(self)
286 list.__init__(self)
287 # the opened file handle
287 # the opened file handle
288 self.fpath = kwargs.get('fpath')
288 self.fpath = kwargs.get('fpath')
289 # the width at which lines should be wrapped
289 # the width at which lines should be wrapped
290 self.wrapwidth = kwargs.get('wrapwidth', 78)
290 self.wrapwidth = kwargs.get('wrapwidth', 78)
291 # the file encoding
291 # the file encoding
292 self.encoding = kwargs.get('encoding', default_encoding)
292 self.encoding = kwargs.get('encoding', default_encoding)
293 # whether to check for duplicate entries or not
293 # whether to check for duplicate entries or not
294 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
294 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
295 # header
295 # header
296 self.header = ''
296 self.header = ''
297 # both po and mo files have metadata
297 # both po and mo files have metadata
298 self.metadata = {}
298 self.metadata = {}
299 self.metadata_is_fuzzy = 0
299 self.metadata_is_fuzzy = 0
300
300
301 def __str__(self):
301 def __str__(self):
302 """
302 """
303 String representation of the file.
303 String representation of the file.
304 """
304 """
305 ret = []
305 ret = []
306 entries = [self.metadata_as_entry()] + \
306 entries = [self.metadata_as_entry()] + \
307 [e for e in self if not e.obsolete]
307 [e for e in self if not e.obsolete]
308 for entry in entries:
308 for entry in entries:
309 ret.append(entry.__str__(self.wrapwidth))
309 ret.append(entry.__str__(self.wrapwidth))
310 for entry in self.obsolete_entries():
310 for entry in self.obsolete_entries():
311 ret.append(entry.__str__(self.wrapwidth))
311 ret.append(entry.__str__(self.wrapwidth))
312 return '\n'.join(ret)
312 return '\n'.join(ret)
313
313
314 def __contains__(self, entry):
314 def __contains__(self, entry):
315 """
315 """
316 Overriden method to implement the membership test (in and not in).
316 Overriden method to implement the membership test (in and not in).
317 The method considers that an entry is in the file if it finds an
317 The method considers that an entry is in the file if it finds an
318 entry that has the same msgid (case sensitive).
318 entry that has the same msgid (case sensitive).
319
319
320 **Keyword argument**:
320 **Keyword argument**:
321 - *entry*: an instance of polib._BaseEntry
321 - *entry*: an instance of polib._BaseEntry
322
322
323 **Tests**:
323 **Tests**:
324 >>> po = POFile()
324 >>> po = POFile()
325 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
325 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
326 >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
326 >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
327 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
327 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
328 >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
328 >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
329 >>> po.append(e1)
329 >>> po.append(e1)
330 >>> po.append(e2)
330 >>> po.append(e2)
331 >>> e1 in po
331 >>> e1 in po
332 True
332 True
333 >>> e2 not in po
333 >>> e2 not in po
334 False
334 False
335 >>> e3 in po
335 >>> e3 in po
336 True
336 True
337 >>> e4 in po
337 >>> e4 in po
338 False
338 False
339 """
339 """
340 return self.find(entry.msgid, by='msgid') is not None
340 return self.find(entry.msgid, by='msgid') is not None
341
341
342 def append(self, entry):
342 def append(self, entry):
343 """
343 """
344 Overriden method to check for duplicates entries, if a user tries to
344 Overriden method to check for duplicates entries, if a user tries to
345 add an entry that already exists, the method will raise a ValueError
345 add an entry that already exists, the method will raise a ValueError
346 exception.
346 exception.
347
347
348 **Keyword argument**:
348 **Keyword argument**:
349 - *entry*: an instance of polib._BaseEntry
349 - *entry*: an instance of polib._BaseEntry
350
350
351 **Tests**:
351 **Tests**:
352 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
352 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
353 >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
353 >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
354 >>> po = POFile(check_for_duplicates=True)
354 >>> po = POFile(check_for_duplicates=True)
355 >>> po.append(e1)
355 >>> po.append(e1)
356 >>> try:
356 >>> try:
357 ... po.append(e2)
357 ... po.append(e2)
358 ... except ValueError, e:
358 ... except ValueError, e:
359 ... unicode(e)
359 ... unicode(e)
360 u'Entry "foobar" already exists'
360 u'Entry "foobar" already exists'
361 """
361 """
362 if self.check_for_duplicates and entry in self:
362 if self.check_for_duplicates and entry in self:
363 raise ValueError('Entry "%s" already exists' % entry.msgid)
363 raise ValueError('Entry "%s" already exists' % entry.msgid)
364 super(_BaseFile, self).append(entry)
364 super(_BaseFile, self).append(entry)
365
365
366 def insert(self, index, entry):
366 def insert(self, index, entry):
367 """
367 """
368 Overriden method to check for duplicates entries, if a user tries to
368 Overriden method to check for duplicates entries, if a user tries to
369 insert an entry that already exists, the method will raise a ValueError
369 insert an entry that already exists, the method will raise a ValueError
370 exception.
370 exception.
371
371
372 **Keyword arguments**:
372 **Keyword arguments**:
373 - *index*: index at which the entry should be inserted
373 - *index*: index at which the entry should be inserted
374 - *entry*: an instance of polib._BaseEntry
374 - *entry*: an instance of polib._BaseEntry
375
375
376 **Tests**:
376 **Tests**:
377 >>> import polib
377 >>> import polib
378 >>> polib.check_for_duplicates = True
378 >>> polib.check_for_duplicates = True
379 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
379 >>> e1 = POEntry(msgid='foobar', msgstr='spam')
380 >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
380 >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
381 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
381 >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
382 >>> po = POFile(check_for_duplicates=True)
382 >>> po = POFile(check_for_duplicates=True)
383 >>> po.insert(0, e1)
383 >>> po.insert(0, e1)
384 >>> po.insert(1, e2)
384 >>> po.insert(1, e2)
385 >>> try:
385 >>> try:
386 ... po.insert(0, e3)
386 ... po.insert(0, e3)
387 ... except ValueError, e:
387 ... except ValueError, e:
388 ... unicode(e)
388 ... unicode(e)
389 u'Entry "foobar" already exists'
389 u'Entry "foobar" already exists'
390 """
390 """
391 if self.check_for_duplicates and entry in self:
391 if self.check_for_duplicates and entry in self:
392 raise ValueError('Entry "%s" already exists' % entry.msgid)
392 raise ValueError('Entry "%s" already exists' % entry.msgid)
393 super(_BaseFile, self).insert(index, entry)
393 super(_BaseFile, self).insert(index, entry)
394
394
395 def __repr__(self):
395 def __repr__(self):
396 """Return the official string representation of the object."""
396 """Return the official string representation of the object."""
397 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
397 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
398
398
399 def metadata_as_entry(self):
399 def metadata_as_entry(self):
400 """
400 """
401 Return the metadata as an entry:
401 Return the metadata as an entry:
402
402
403 >>> import polib
403 >>> import polib
404 >>> po = polib.pofile('tests/test_fuzzy_header.po')
404 >>> po = polib.pofile('tests/test_fuzzy_header.po')
405 >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
405 >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
406 True
406 True
407 """
407 """
408 e = POEntry(msgid='')
408 e = POEntry(msgid='')
409 mdata = self.ordered_metadata()
409 mdata = self.ordered_metadata()
410 if mdata:
410 if mdata:
411 strs = []
411 strs = []
412 e._multiline_str['msgstr'] = ''
412 e._multiline_str['msgstr'] = ''
413 for name, value in mdata:
413 for name, value in mdata:
414 # Strip whitespace off each line in a multi-line entry
414 # Strip whitespace off each line in a multi-line entry
415 strs.append('%s: %s' % (name, value))
415 strs.append('%s: %s' % (name, value))
416 e.msgstr = '\n'.join(strs) + '\n'
416 e.msgstr = '\n'.join(strs) + '\n'
417 e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
417 e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
418 [s + '\n' for s in strs])
418 [s + '\n' for s in strs])
419 if self.metadata_is_fuzzy:
419 if self.metadata_is_fuzzy:
420 e.flags.append('fuzzy')
420 e.flags.append('fuzzy')
421 return e
421 return e
422
422
423 def save(self, fpath=None, repr_method='__str__'):
423 def save(self, fpath=None, repr_method='__str__'):
424 """
424 """
425 Save the po file to file *fpath* if no file handle exists for
425 Save the po file to file *fpath* if no file handle exists for
426 the object. If there's already an open file and no fpath is
426 the object. If there's already an open file and no fpath is
427 provided, then the existing file is rewritten with the modified
427 provided, then the existing file is rewritten with the modified
428 data.
428 data.
429
429
430 **Keyword arguments**:
430 **Keyword arguments**:
431 - *fpath*: string, full or relative path to the file.
431 - *fpath*: string, full or relative path to the file.
432 - *repr_method*: string, the method to use for output.
432 - *repr_method*: string, the method to use for output.
433 """
433 """
434 if self.fpath is None and fpath is None:
434 if self.fpath is None and fpath is None:
435 raise IOError('You must provide a file path to save() method')
435 raise IOError('You must provide a file path to save() method')
436 contents = getattr(self, repr_method)()
436 contents = getattr(self, repr_method)()
437 if fpath is None:
437 if fpath is None:
438 fpath = self.fpath
438 fpath = self.fpath
439 if repr_method == 'to_binary':
439 if repr_method == 'to_binary':
440 fhandle = open(fpath, 'wb')
440 fhandle = open(fpath, 'wb')
441 else:
441 else:
442 fhandle = codecs.open(fpath, 'w', self.encoding)
442 fhandle = codecs.open(fpath, 'w', self.encoding)
443 if type(contents) != types.UnicodeType:
443 if type(contents) != types.UnicodeType:
444 contents = contents.decode(self.encoding)
444 contents = contents.decode(self.encoding)
445 fhandle.write(contents)
445 fhandle.write(contents)
446 fhandle.close()
446 fhandle.close()
447
447
448 def find(self, st, by='msgid'):
448 def find(self, st, by='msgid'):
449 """
449 """
450 Find entry which msgid (or property identified by the *by*
450 Find entry which msgid (or property identified by the *by*
451 attribute) matches the string *st*.
451 attribute) matches the string *st*.
452
452
453 **Keyword arguments**:
453 **Keyword arguments**:
454 - *st*: string, the string to search for
454 - *st*: string, the string to search for
455 - *by*: string, the comparison attribute
455 - *by*: string, the comparison attribute
456
456
457 **Examples**:
457 **Examples**:
458
458
459 >>> po = pofile('tests/test_utf8.po')
459 >>> po = pofile('tests/test_utf8.po')
460 >>> entry = po.find('Thursday')
460 >>> entry = po.find('Thursday')
461 >>> entry.msgstr
461 >>> entry.msgstr
462 u'Jueves'
462 u'Jueves'
463 >>> entry = po.find('Some unexistant msgid')
463 >>> entry = po.find('Some unexistant msgid')
464 >>> entry is None
464 >>> entry is None
465 True
465 True
466 >>> entry = po.find('Jueves', 'msgstr')
466 >>> entry = po.find('Jueves', 'msgstr')
467 >>> entry.msgid
467 >>> entry.msgid
468 u'Thursday'
468 u'Thursday'
469 """
469 """
470 for e in self:
470 for e in self:
471 if getattr(e, by) == st:
471 if getattr(e, by) == st:
472 return e
472 return e
473 return None
473 return None
474
474
475 def ordered_metadata(self):
475 def ordered_metadata(self):
476 """
476 """
477 Convenience method that return the metadata ordered. The return
477 Convenience method that return the metadata ordered. The return
478 value is list of tuples (metadata name, metadata_value).
478 value is list of tuples (metadata name, metadata_value).
479 """
479 """
480 # copy the dict first
480 # copy the dict first
481 metadata = self.metadata.copy()
481 metadata = self.metadata.copy()
482 data_order = [
482 data_order = [
483 'Project-Id-Version',
483 'Project-Id-Version',
484 'Report-Msgid-Bugs-To',
484 'Report-Msgid-Bugs-To',
485 'POT-Creation-Date',
485 'POT-Creation-Date',
486 'PO-Revision-Date',
486 'PO-Revision-Date',
487 'Last-Translator',
487 'Last-Translator',
488 'Language-Team',
488 'Language-Team',
489 'MIME-Version',
489 'MIME-Version',
490 'Content-Type',
490 'Content-Type',
491 'Content-Transfer-Encoding'
491 'Content-Transfer-Encoding'
492 ]
492 ]
493 ordered_data = []
493 ordered_data = []
494 for data in data_order:
494 for data in data_order:
495 try:
495 try:
496 value = metadata.pop(data)
496 value = metadata.pop(data)
497 ordered_data.append((data, value))
497 ordered_data.append((data, value))
498 except KeyError:
498 except KeyError:
499 pass
499 pass
500 # the rest of the metadata won't be ordered there are no specs for this
500 # the rest of the metadata won't be ordered there are no specs for this
501 keys = metadata.keys()
501 keys = metadata.keys()
502 list(keys).sort()
502 list(keys).sort()
503 for data in keys:
503 for data in keys:
504 value = metadata[data]
504 value = metadata[data]
505 ordered_data.append((data, value))
505 ordered_data.append((data, value))
506 return ordered_data
506 return ordered_data
507
507
508 def to_binary(self):
508 def to_binary(self):
509 """
509 """
510 Return the mofile binary representation.
510 Return the mofile binary representation.
511 """
511 """
512 import array
512 import array
513 import struct
513 import struct
514 import types
514 import types
515 offsets = []
515 offsets = []
516 entries = self.translated_entries()
516 entries = self.translated_entries()
517 # the keys are sorted in the .mo file
517 # the keys are sorted in the .mo file
518 def cmp(_self, other):
518 def cmp(_self, other):
519 if _self.msgid > other.msgid:
519 if _self.msgid > other.msgid:
520 return 1
520 return 1
521 elif _self.msgid < other.msgid:
521 elif _self.msgid < other.msgid:
522 return -1
522 return -1
523 else:
523 else:
524 return 0
524 return 0
525 # add metadata entry
525 # add metadata entry
526 entries.sort(cmp)
526 entries.sort(cmp)
527 mentry = self.metadata_as_entry()
527 mentry = self.metadata_as_entry()
528 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
528 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
529 entries = [mentry] + entries
529 entries = [mentry] + entries
530 entries_len = len(entries)
530 entries_len = len(entries)
531 ids, strs = '', ''
531 ids, strs = '', ''
532 for e in entries:
532 for e in entries:
533 # For each string, we need size and file offset. Each string is
533 # For each string, we need size and file offset. Each string is
534 # NUL terminated; the NUL does not count into the size.
534 # NUL terminated; the NUL does not count into the size.
535 if e.msgid_plural:
535 if e.msgid_plural:
536 indexes = e.msgstr_plural.keys()
536 indexes = e.msgstr_plural.keys()
537 indexes.sort()
537 indexes.sort()
538 msgstr = []
538 msgstr = []
539 for index in indexes:
539 for index in indexes:
540 msgstr.append(e.msgstr_plural[index])
540 msgstr.append(e.msgstr_plural[index])
541 msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
541 msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
542 msgstr = self._encode('\0'.join(msgstr))
542 msgstr = self._encode('\0'.join(msgstr))
543 else:
543 else:
544 msgid = self._encode(e.msgid)
544 msgid = self._encode(e.msgid)
545 msgstr = self._encode(e.msgstr)
545 msgstr = self._encode(e.msgstr)
546 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
546 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
547 ids += msgid + '\0'
547 ids += msgid + '\0'
548 strs += msgstr + '\0'
548 strs += msgstr + '\0'
549 # The header is 7 32-bit unsigned integers.
549 # The header is 7 32-bit unsigned integers.
550 keystart = 7*4+16*entries_len
550 keystart = 7*4+16*entries_len
551 # and the values start after the keys
551 # and the values start after the keys
552 valuestart = keystart + len(ids)
552 valuestart = keystart + len(ids)
553 koffsets = []
553 koffsets = []
554 voffsets = []
554 voffsets = []
555 # The string table first has the list of keys, then the list of values.
555 # The string table first has the list of keys, then the list of values.
556 # Each entry has first the size of the string, then the file offset.
556 # Each entry has first the size of the string, then the file offset.
557 for o1, l1, o2, l2 in offsets:
557 for o1, l1, o2, l2 in offsets:
558 koffsets += [l1, o1+keystart]
558 koffsets += [l1, o1+keystart]
559 voffsets += [l2, o2+valuestart]
559 voffsets += [l2, o2+valuestart]
560 offsets = koffsets + voffsets
560 offsets = koffsets + voffsets
561 output = struct.pack("IIIIIII",
561 output = struct.pack("IIIIIII",
562 0x950412de, # Magic number
562 0x950412de, # Magic number
563 0, # Version
563 0, # Version
564 entries_len, # # of entries
564 entries_len, # # of entries
565 7*4, # start of key index
565 7*4, # start of key index
566 7*4+entries_len*8, # start of value index
566 7*4+entries_len*8, # start of value index
567 0, 0) # size and offset of hash table
567 0, 0) # size and offset of hash table
568 output += array.array("I", offsets).tostring()
568 output += array.array("I", offsets).tostring()
569 output += ids
569 output += ids
570 output += strs
570 output += strs
571 return output
571 return output
572
572
573 def _encode(self, mixed):
573 def _encode(self, mixed):
574 """
574 """
575 Encode the given argument with the file encoding if the type is unicode
575 Encode the given argument with the file encoding if the type is unicode
576 and return the encoded string.
576 and return the encoded string.
577 """
577 """
578 if type(mixed) == types.UnicodeType:
578 if type(mixed) == types.UnicodeType:
579 return mixed.encode(self.encoding)
579 return mixed.encode(self.encoding)
580 return mixed
580 return mixed
581
581
582 # }}}
582 # }}}
583 # class POFile {{{
583 # class POFile {{{
584
584
585 class POFile(_BaseFile):
585 class POFile(_BaseFile):
586 '''
586 '''
587 Po (or Pot) file reader/writer.
587 Po (or Pot) file reader/writer.
588 POFile objects inherit the list objects methods.
588 POFile objects inherit the list objects methods.
589
589
590 **Example**:
590 **Example**:
591
591
592 >>> po = POFile()
592 >>> po = POFile()
593 >>> entry1 = POEntry(
593 >>> entry1 = POEntry(
594 ... msgid="Some english text",
594 ... msgid="Some english text",
595 ... msgstr="Un texte en anglais"
595 ... msgstr="Un texte en anglais"
596 ... )
596 ... )
597 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
597 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
598 >>> entry1.comment = "Some useful comment"
598 >>> entry1.comment = "Some useful comment"
599 >>> entry2 = POEntry(
599 >>> entry2 = POEntry(
600 ... msgid="Peace in some languages",
600 ... msgid="Peace in some languages",
601 ... msgstr="Pace سلام שלום Hasîtî 和平"
601 ... msgstr="Pace سلام שלום Hasîtî 和平"
602 ... )
602 ... )
603 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
603 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
604 >>> entry2.comment = "Another useful comment"
604 >>> entry2.comment = "Another useful comment"
605 >>> entry3 = POEntry(
605 >>> entry3 = POEntry(
606 ... msgid='Some entry with quotes " \\"',
606 ... msgid='Some entry with quotes " \\"',
607 ... msgstr='Un message unicode avec des quotes " \\"'
607 ... msgstr='Un message unicode avec des quotes " \\"'
608 ... )
608 ... )
609 >>> entry3.comment = "Test string quoting"
609 >>> entry3.comment = "Test string quoting"
610 >>> po.append(entry1)
610 >>> po.append(entry1)
611 >>> po.append(entry2)
611 >>> po.append(entry2)
612 >>> po.append(entry3)
612 >>> po.append(entry3)
613 >>> po.header = "Some Header"
613 >>> po.header = "Some Header"
614 >>> print(po)
614 >>> print(po)
615 # Some Header
615 # Some Header
616 msgid ""
616 msgid ""
617 msgstr ""
617 msgstr ""
618 <BLANKLINE>
618 <BLANKLINE>
619 #. Some useful comment
619 #. Some useful comment
620 #: testfile:12 another_file:1
620 #: testfile:12 another_file:1
621 msgid "Some english text"
621 msgid "Some english text"
622 msgstr "Un texte en anglais"
622 msgstr "Un texte en anglais"
623 <BLANKLINE>
623 <BLANKLINE>
624 #. Another useful comment
624 #. Another useful comment
625 #: testfile:15 another_file:5
625 #: testfile:15 another_file:5
626 msgid "Peace in some languages"
626 msgid "Peace in some languages"
627 msgstr "Pace سلام שלום Hasîtî 和平"
627 msgstr "Pace سلام שלום Hasîtî 和平"
628 <BLANKLINE>
628 <BLANKLINE>
629 #. Test string quoting
629 #. Test string quoting
630 msgid "Some entry with quotes \\" \\""
630 msgid "Some entry with quotes \\" \\""
631 msgstr "Un message unicode avec des quotes \\" \\""
631 msgstr "Un message unicode avec des quotes \\" \\""
632 <BLANKLINE>
632 <BLANKLINE>
633 '''
633 '''
634
634
635 def __str__(self):
635 def __str__(self):
636 """Return the string representation of the po file"""
636 """Return the string representation of the po file"""
637 ret, headers = '', self.header.split('\n')
637 ret, headers = '', self.header.split('\n')
638 for header in headers:
638 for header in headers:
639 if header[:1] in [',', ':']:
639 if header[:1] in [',', ':']:
640 ret += '#%s\n' % header
640 ret += '#%s\n' % header
641 else:
641 else:
642 ret += '# %s\n' % header
642 ret += '# %s\n' % header
643 return ret + _BaseFile.__str__(self)
643 return ret + _BaseFile.__str__(self)
644
644
645 def save_as_mofile(self, fpath):
645 def save_as_mofile(self, fpath):
646 """
646 """
647 Save the binary representation of the file to *fpath*.
647 Save the binary representation of the file to *fpath*.
648
648
649 **Keyword arguments**:
649 **Keyword arguments**:
650 - *fpath*: string, full or relative path to the file.
650 - *fpath*: string, full or relative path to the file.
651 """
651 """
652 _BaseFile.save(self, fpath, 'to_binary')
652 _BaseFile.save(self, fpath, 'to_binary')
653
653
654 def percent_translated(self):
654 def percent_translated(self):
655 """
655 """
656 Convenience method that return the percentage of translated
656 Convenience method that return the percentage of translated
657 messages.
657 messages.
658
658
659 **Example**:
659 **Example**:
660
660
661 >>> import polib
661 >>> import polib
662 >>> po = polib.pofile('tests/test_pofile_helpers.po')
662 >>> po = polib.pofile('tests/test_pofile_helpers.po')
663 >>> po.percent_translated()
663 >>> po.percent_translated()
664 50
664 50
665 >>> po = POFile()
665 >>> po = POFile()
666 >>> po.percent_translated()
666 >>> po.percent_translated()
667 100
667 100
668 """
668 """
669 total = len([e for e in self if not e.obsolete])
669 total = len([e for e in self if not e.obsolete])
670 if total == 0:
670 if total == 0:
671 return 100
671 return 100
672 translated = len(self.translated_entries())
672 translated = len(self.translated_entries())
673 return int((100.00 / float(total)) * translated)
673 return int((100.00 / float(total)) * translated)
674
674
675 def translated_entries(self):
675 def translated_entries(self):
676 """
676 """
677 Convenience method that return a list of translated entries.
677 Convenience method that return a list of translated entries.
678
678
679 **Example**:
679 **Example**:
680
680
681 >>> import polib
681 >>> import polib
682 >>> po = polib.pofile('tests/test_pofile_helpers.po')
682 >>> po = polib.pofile('tests/test_pofile_helpers.po')
683 >>> len(po.translated_entries())
683 >>> len(po.translated_entries())
684 6
684 6
685 """
685 """
686 return [e for e in self if e.translated()]
686 return [e for e in self if e.translated()]
687
687
688 def untranslated_entries(self):
688 def untranslated_entries(self):
689 """
689 """
690 Convenience method that return a list of untranslated entries.
690 Convenience method that return a list of untranslated entries.
691
691
692 **Example**:
692 **Example**:
693
693
694 >>> import polib
694 >>> import polib
695 >>> po = polib.pofile('tests/test_pofile_helpers.po')
695 >>> po = polib.pofile('tests/test_pofile_helpers.po')
696 >>> len(po.untranslated_entries())
696 >>> len(po.untranslated_entries())
697 4
697 4
698 """
698 """
699 return [e for e in self if not e.translated() and not e.obsolete \
699 return [e for e in self if not e.translated() and not e.obsolete \
700 and not 'fuzzy' in e.flags]
700 and not 'fuzzy' in e.flags]
701
701
702 def fuzzy_entries(self):
702 def fuzzy_entries(self):
703 """
703 """
704 Convenience method that return the list of 'fuzzy' entries.
704 Convenience method that return the list of 'fuzzy' entries.
705
705
706 **Example**:
706 **Example**:
707
707
708 >>> import polib
708 >>> import polib
709 >>> po = polib.pofile('tests/test_pofile_helpers.po')
709 >>> po = polib.pofile('tests/test_pofile_helpers.po')
710 >>> len(po.fuzzy_entries())
710 >>> len(po.fuzzy_entries())
711 2
711 2
712 """
712 """
713 return [e for e in self if 'fuzzy' in e.flags]
713 return [e for e in self if 'fuzzy' in e.flags]
714
714
715 def obsolete_entries(self):
715 def obsolete_entries(self):
716 """
716 """
717 Convenience method that return the list of obsolete entries.
717 Convenience method that return the list of obsolete entries.
718
718
719 **Example**:
719 **Example**:
720
720
721 >>> import polib
721 >>> import polib
722 >>> po = polib.pofile('tests/test_pofile_helpers.po')
722 >>> po = polib.pofile('tests/test_pofile_helpers.po')
723 >>> len(po.obsolete_entries())
723 >>> len(po.obsolete_entries())
724 4
724 4
725 """
725 """
726 return [e for e in self if e.obsolete]
726 return [e for e in self if e.obsolete]
727
727
728 def merge(self, refpot):
728 def merge(self, refpot):
729 """
729 """
730 XXX this could not work if encodings are different, needs thinking
730 XXX this could not work if encodings are different, needs thinking
731 and general refactoring of how polib handles encoding...
731 and general refactoring of how polib handles encoding...
732
732
733 Convenience method that merge the current pofile with the pot file
733 Convenience method that merge the current pofile with the pot file
734 provided. It behaves exactly as the gettext msgmerge utility:
734 provided. It behaves exactly as the gettext msgmerge utility:
735
735
736 - comments of this file will be preserved, but extracted comments
736 - comments of this file will be preserved, but extracted comments
737 and occurrences will be discarded
737 and occurrences will be discarded
738 - any translations or comments in the file will be discarded,
738 - any translations or comments in the file will be discarded,
739 however dot comments and file positions will be preserved
739 however dot comments and file positions will be preserved
740
740
741 **Keyword argument**:
741 **Keyword argument**:
742 - *refpot*: object POFile, the reference catalog.
742 - *refpot*: object POFile, the reference catalog.
743
743
744 **Example**:
744 **Example**:
745
745
746 >>> import polib
746 >>> import polib
747 >>> refpot = polib.pofile('tests/test_merge.pot')
747 >>> refpot = polib.pofile('tests/test_merge.pot')
748 >>> po = polib.pofile('tests/test_merge_before.po')
748 >>> po = polib.pofile('tests/test_merge_before.po')
749 >>> po.merge(refpot)
749 >>> po.merge(refpot)
750 >>> expected_po = polib.pofile('tests/test_merge_after.po')
750 >>> expected_po = polib.pofile('tests/test_merge_after.po')
751 >>> unicode(po) == unicode(expected_po)
751 >>> unicode(po) == unicode(expected_po)
752 True
752 True
753 """
753 """
754 for entry in refpot:
754 for entry in refpot:
755 e = self.find(entry.msgid)
755 e = self.find(entry.msgid)
756 if e is None:
756 if e is None:
757 e = POEntry()
757 e = POEntry()
758 self.append(e)
758 self.append(e)
759 e.merge(entry)
759 e.merge(entry)
760 # ok, now we must "obsolete" entries that are not in the refpot
760 # ok, now we must "obsolete" entries that are not in the refpot
761 # anymore
761 # anymore
762 for entry in self:
762 for entry in self:
763 if refpot.find(entry.msgid) is None:
763 if refpot.find(entry.msgid) is None:
764 entry.obsolete = True
764 entry.obsolete = True
765
765
766 # }}}
766 # }}}
767 # class MOFile {{{
767 # class MOFile {{{
768
768
769 class MOFile(_BaseFile):
769 class MOFile(_BaseFile):
770 '''
770 '''
771 Mo file reader/writer.
771 Mo file reader/writer.
772 MOFile objects inherit the list objects methods.
772 MOFile objects inherit the list objects methods.
773
773
774 **Example**:
774 **Example**:
775
775
776 >>> mo = MOFile()
776 >>> mo = MOFile()
777 >>> entry1 = POEntry(
777 >>> entry1 = POEntry(
778 ... msgid="Some english text",
778 ... msgid="Some english text",
779 ... msgstr="Un texte en anglais"
779 ... msgstr="Un texte en anglais"
780 ... )
780 ... )
781 >>> entry2 = POEntry(
781 >>> entry2 = POEntry(
782 ... msgid="I need my dirty cheese",
782 ... msgid="I need my dirty cheese",
783 ... msgstr="Je veux mon sale fromage"
783 ... msgstr="Je veux mon sale fromage"
784 ... )
784 ... )
785 >>> entry3 = MOEntry(
785 >>> entry3 = MOEntry(
786 ... msgid='Some entry with quotes " \\"',
786 ... msgid='Some entry with quotes " \\"',
787 ... msgstr='Un message unicode avec des quotes " \\"'
787 ... msgstr='Un message unicode avec des quotes " \\"'
788 ... )
788 ... )
789 >>> mo.append(entry1)
789 >>> mo.append(entry1)
790 >>> mo.append(entry2)
790 >>> mo.append(entry2)
791 >>> mo.append(entry3)
791 >>> mo.append(entry3)
792 >>> print(mo)
792 >>> print(mo)
793 msgid ""
793 msgid ""
794 msgstr ""
794 msgstr ""
795 <BLANKLINE>
795 <BLANKLINE>
796 msgid "Some english text"
796 msgid "Some english text"
797 msgstr "Un texte en anglais"
797 msgstr "Un texte en anglais"
798 <BLANKLINE>
798 <BLANKLINE>
799 msgid "I need my dirty cheese"
799 msgid "I need my dirty cheese"
800 msgstr "Je veux mon sale fromage"
800 msgstr "Je veux mon sale fromage"
801 <BLANKLINE>
801 <BLANKLINE>
802 msgid "Some entry with quotes \\" \\""
802 msgid "Some entry with quotes \\" \\""
803 msgstr "Un message unicode avec des quotes \\" \\""
803 msgstr "Un message unicode avec des quotes \\" \\""
804 <BLANKLINE>
804 <BLANKLINE>
805 '''
805 '''
806
806
807 def __init__(self, *args, **kwargs):
807 def __init__(self, *args, **kwargs):
808 """
808 """
809 MOFile constructor. Mo files have two other properties:
809 MOFile constructor. Mo files have two other properties:
810 - magic_number: the magic_number of the binary file,
810 - magic_number: the magic_number of the binary file,
811 - version: the version of the mo spec.
811 - version: the version of the mo spec.
812 """
812 """
813 _BaseFile.__init__(self, *args, **kwargs)
813 _BaseFile.__init__(self, *args, **kwargs)
814 self.magic_number = None
814 self.magic_number = None
815 self.version = 0
815 self.version = 0
816
816
817 def save_as_pofile(self, fpath):
817 def save_as_pofile(self, fpath):
818 """
818 """
819 Save the string representation of the file to *fpath*.
819 Save the string representation of the file to *fpath*.
820
820
821 **Keyword argument**:
821 **Keyword argument**:
822 - *fpath*: string, full or relative path to the file.
822 - *fpath*: string, full or relative path to the file.
823 """
823 """
824 _BaseFile.save(self, fpath)
824 _BaseFile.save(self, fpath)
825
825
826 def save(self, fpath):
826 def save(self, fpath):
827 """
827 """
828 Save the binary representation of the file to *fpath*.
828 Save the binary representation of the file to *fpath*.
829
829
830 **Keyword argument**:
830 **Keyword argument**:
831 - *fpath*: string, full or relative path to the file.
831 - *fpath*: string, full or relative path to the file.
832 """
832 """
833 _BaseFile.save(self, fpath, 'to_binary')
833 _BaseFile.save(self, fpath, 'to_binary')
834
834
835 def percent_translated(self):
835 def percent_translated(self):
836 """
836 """
837 Convenience method to keep the same interface with POFile instances.
837 Convenience method to keep the same interface with POFile instances.
838 """
838 """
839 return 100
839 return 100
840
840
841 def translated_entries(self):
841 def translated_entries(self):
842 """
842 """
843 Convenience method to keep the same interface with POFile instances.
843 Convenience method to keep the same interface with POFile instances.
844 """
844 """
845 return self
845 return self
846
846
847 def untranslated_entries(self):
847 def untranslated_entries(self):
848 """
848 """
849 Convenience method to keep the same interface with POFile instances.
849 Convenience method to keep the same interface with POFile instances.
850 """
850 """
851 return []
851 return []
852
852
853 def fuzzy_entries(self):
853 def fuzzy_entries(self):
854 """
854 """
855 Convenience method to keep the same interface with POFile instances.
855 Convenience method to keep the same interface with POFile instances.
856 """
856 """
857 return []
857 return []
858
858
859 def obsolete_entries(self):
859 def obsolete_entries(self):
860 """
860 """
861 Convenience method to keep the same interface with POFile instances.
861 Convenience method to keep the same interface with POFile instances.
862 """
862 """
863 return []
863 return []
864
864
865 # }}}
865 # }}}
866 # class _BaseEntry {{{
866 # class _BaseEntry {{{
867
867
868 class _BaseEntry(object):
868 class _BaseEntry(object):
869 """
869 """
870 Base class for POEntry or MOEntry objects.
870 Base class for POEntry or MOEntry objects.
871 This class must *not* be instanciated directly.
871 This class must *not* be instanciated directly.
872 """
872 """
873
873
874 def __init__(self, *args, **kwargs):
874 def __init__(self, *args, **kwargs):
875 """Base Entry constructor."""
875 """Base Entry constructor."""
876 self.msgid = kwargs.get('msgid', '')
876 self.msgid = kwargs.get('msgid', '')
877 self.msgstr = kwargs.get('msgstr', '')
877 self.msgstr = kwargs.get('msgstr', '')
878 self.msgid_plural = kwargs.get('msgid_plural', '')
878 self.msgid_plural = kwargs.get('msgid_plural', '')
879 self.msgstr_plural = kwargs.get('msgstr_plural', {})
879 self.msgstr_plural = kwargs.get('msgstr_plural', {})
880 self.obsolete = kwargs.get('obsolete', False)
880 self.obsolete = kwargs.get('obsolete', False)
881 self.encoding = kwargs.get('encoding', default_encoding)
881 self.encoding = kwargs.get('encoding', default_encoding)
882 self.msgctxt = kwargs.get('msgctxt', None)
882 self.msgctxt = kwargs.get('msgctxt', None)
883 self._multiline_str = {}
883 self._multiline_str = {}
884
884
885 def __repr__(self):
885 def __repr__(self):
886 """Return the official string representation of the object."""
886 """Return the official string representation of the object."""
887 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
887 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
888
888
889 def __str__(self, wrapwidth=78):
889 def __str__(self, wrapwidth=78):
890 """
890 """
891 Common string representation of the POEntry and MOEntry
891 Common string representation of the POEntry and MOEntry
892 objects.
892 objects.
893 """
893 """
894 if self.obsolete:
894 if self.obsolete:
895 delflag = '#~ '
895 delflag = '#~ '
896 else:
896 else:
897 delflag = ''
897 delflag = ''
898 ret = []
898 ret = []
899 # write the msgctxt if any
899 # write the msgctxt if any
900 if self.msgctxt is not None:
900 if self.msgctxt is not None:
901 ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
901 ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
902 # write the msgid
902 # write the msgid
903 ret += self._str_field("msgid", delflag, "", self.msgid)
903 ret += self._str_field("msgid", delflag, "", self.msgid)
904 # write the msgid_plural if any
904 # write the msgid_plural if any
905 if self.msgid_plural:
905 if self.msgid_plural:
906 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
906 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
907 if self.msgstr_plural:
907 if self.msgstr_plural:
908 # write the msgstr_plural if any
908 # write the msgstr_plural if any
909 msgstrs = self.msgstr_plural
909 msgstrs = self.msgstr_plural
910 keys = list(msgstrs)
910 keys = list(msgstrs)
911 keys.sort()
911 keys.sort()
912 for index in keys:
912 for index in keys:
913 msgstr = msgstrs[index]
913 msgstr = msgstrs[index]
914 plural_index = '[%s]' % index
914 plural_index = '[%s]' % index
915 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
915 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
916 else:
916 else:
917 # otherwise write the msgstr
917 # otherwise write the msgstr
918 ret += self._str_field("msgstr", delflag, "", self.msgstr)
918 ret += self._str_field("msgstr", delflag, "", self.msgstr)
919 ret.append('')
919 ret.append('')
920 return '\n'.join(ret)
920 return '\n'.join(ret)
921
921
922 def _str_field(self, fieldname, delflag, plural_index, field):
922 def _str_field(self, fieldname, delflag, plural_index, field):
923 if (fieldname + plural_index) in self._multiline_str:
923 if (fieldname + plural_index) in self._multiline_str:
924 field = self._multiline_str[fieldname + plural_index]
924 field = self._multiline_str[fieldname + plural_index]
925 lines = [''] + field.split('__POLIB__NL__')
925 lines = [''] + field.split('__POLIB__NL__')
926 else:
926 else:
927 lines = field.splitlines(True)
927 lines = field.splitlines(True)
928 if len(lines) > 1:
928 if len(lines) > 1:
929 lines = ['']+lines # start with initial empty line
929 lines = ['']+lines # start with initial empty line
930 else:
930 else:
931 lines = [field] # needed for the empty string case
931 lines = [field] # needed for the empty string case
932 if fieldname.startswith('previous_'):
932 if fieldname.startswith('previous_'):
933 # quick and dirty trick to get the real field name
933 # quick and dirty trick to get the real field name
934 fieldname = fieldname[9:]
934 fieldname = fieldname[9:]
935
935
936 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
936 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
937 escape(lines.pop(0)))]
937 escape(lines.pop(0)))]
938 for mstr in lines:
938 for mstr in lines:
939 ret.append('%s"%s"' % (delflag, escape(mstr)))
939 ret.append('%s"%s"' % (delflag, escape(mstr)))
940 return ret
940 return ret
941
941
942 # }}}
942 # }}}
943 # class POEntry {{{
943 # class POEntry {{{
944
944
945 class POEntry(_BaseEntry):
945 class POEntry(_BaseEntry):
946 """
946 """
947 Represents a po file entry.
947 Represents a po file entry.
948
948
949 **Examples**:
949 **Examples**:
950
950
951 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
951 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
952 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
952 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
953 >>> print(entry)
953 >>> print(entry)
954 #: welcome.py:12 anotherfile.py:34
954 #: welcome.py:12 anotherfile.py:34
955 msgid "Welcome"
955 msgid "Welcome"
956 msgstr "Bienvenue"
956 msgstr "Bienvenue"
957 <BLANKLINE>
957 <BLANKLINE>
958 >>> entry = POEntry()
958 >>> entry = POEntry()
959 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
959 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
960 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
960 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
961 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
961 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
962 >>> entry.flags.append('c-format')
962 >>> entry.flags.append('c-format')
963 >>> entry.previous_msgctxt = '@somecontext'
963 >>> entry.previous_msgctxt = '@somecontext'
964 >>> entry.previous_msgid = 'I had eggs but no spam !'
964 >>> entry.previous_msgid = 'I had eggs but no spam !'
965 >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
965 >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
966 >>> entry.msgctxt = '@somenewcontext'
966 >>> entry.msgctxt = '@somenewcontext'
967 >>> entry.msgid = 'I have spam but no egg !'
967 >>> entry.msgid = 'I have spam but no egg !'
968 >>> entry.msgid_plural = 'I have spam and %d eggs !'
968 >>> entry.msgid_plural = 'I have spam and %d eggs !'
969 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
969 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
970 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
970 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
971 >>> print(entry)
971 >>> print(entry)
972 #. A plural translation. This is a very very very long line please do not
972 #. A plural translation. This is a very very very long line please do not
973 #. wrap, this is just for testing comment wrapping...
973 #. wrap, this is just for testing comment wrapping...
974 # A plural translation. This is a very very very long line please do not wrap,
974 # A plural translation. This is a very very very long line please do not wrap,
975 # this is just for testing comment wrapping...
975 # this is just for testing comment wrapping...
976 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
976 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
977 #: src/eggs.c:45
977 #: src/eggs.c:45
978 #, c-format
978 #, c-format
979 #| msgctxt "@somecontext"
979 #| msgctxt "@somecontext"
980 #| msgid "I had eggs but no spam !"
980 #| msgid "I had eggs but no spam !"
981 #| msgid_plural "I had eggs and %d spam !"
981 #| msgid_plural "I had eggs and %d spam !"
982 msgctxt "@somenewcontext"
982 msgctxt "@somenewcontext"
983 msgid "I have spam but no egg !"
983 msgid "I have spam but no egg !"
984 msgid_plural "I have spam and %d eggs !"
984 msgid_plural "I have spam and %d eggs !"
985 msgstr[0] "J'ai du jambon mais aucun oeuf !"
985 msgstr[0] "J'ai du jambon mais aucun oeuf !"
986 msgstr[1] "J'ai du jambon et %d oeufs !"
986 msgstr[1] "J'ai du jambon et %d oeufs !"
987 <BLANKLINE>
987 <BLANKLINE>
988 """
988 """
989
989
990 def __init__(self, *args, **kwargs):
990 def __init__(self, *args, **kwargs):
991 """POEntry constructor."""
991 """POEntry constructor."""
992 _BaseEntry.__init__(self, *args, **kwargs)
992 _BaseEntry.__init__(self, *args, **kwargs)
993 self.comment = kwargs.get('comment', '')
993 self.comment = kwargs.get('comment', '')
994 self.tcomment = kwargs.get('tcomment', '')
994 self.tcomment = kwargs.get('tcomment', '')
995 self.occurrences = kwargs.get('occurrences', [])
995 self.occurrences = kwargs.get('occurrences', [])
996 self.flags = kwargs.get('flags', [])
996 self.flags = kwargs.get('flags', [])
997 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
997 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
998 self.previous_msgid = kwargs.get('previous_msgid', None)
998 self.previous_msgid = kwargs.get('previous_msgid', None)
999 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
999 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
1000
1000
1001 def __str__(self, wrapwidth=78):
1001 def __str__(self, wrapwidth=78):
1002 """
1002 """
1003 Return the string representation of the entry.
1003 Return the string representation of the entry.
1004 """
1004 """
1005 if self.obsolete:
1005 if self.obsolete:
1006 return _BaseEntry.__str__(self)
1006 return _BaseEntry.__str__(self)
1007 ret = []
1007 ret = []
1008 # comment first, if any (with text wrapping as xgettext does)
1008 # comment first, if any (with text wrapping as xgettext does)
1009 if self.comment != '':
1009 if self.comment != '':
1010 for comment in self.comment.split('\n'):
1010 for comment in self.comment.split('\n'):
1011 if wrapwidth > 0 and len(comment) > wrapwidth-3:
1011 if wrapwidth > 0 and len(comment) > wrapwidth-3:
1012 ret += textwrap.wrap(comment, wrapwidth,
1012 ret += textwrap.wrap(comment, wrapwidth,
1013 initial_indent='#. ',
1013 initial_indent='#. ',
1014 subsequent_indent='#. ',
1014 subsequent_indent='#. ',
1015 break_long_words=False)
1015 break_long_words=False)
1016 else:
1016 else:
1017 ret.append('#. %s' % comment)
1017 ret.append('#. %s' % comment)
1018 # translator comment, if any (with text wrapping as xgettext does)
1018 # translator comment, if any (with text wrapping as xgettext does)
1019 if self.tcomment != '':
1019 if self.tcomment != '':
1020 for tcomment in self.tcomment.split('\n'):
1020 for tcomment in self.tcomment.split('\n'):
1021 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
1021 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
1022 ret += textwrap.wrap(tcomment, wrapwidth,
1022 ret += textwrap.wrap(tcomment, wrapwidth,
1023 initial_indent='# ',
1023 initial_indent='# ',
1024 subsequent_indent='# ',
1024 subsequent_indent='# ',
1025 break_long_words=False)
1025 break_long_words=False)
1026 else:
1026 else:
1027 ret.append('# %s' % tcomment)
1027 ret.append('# %s' % tcomment)
1028 # occurrences (with text wrapping as xgettext does)
1028 # occurrences (with text wrapping as xgettext does)
1029 if self.occurrences:
1029 if self.occurrences:
1030 filelist = []
1030 filelist = []
1031 for fpath, lineno in self.occurrences:
1031 for fpath, lineno in self.occurrences:
1032 if lineno:
1032 if lineno:
1033 filelist.append('%s:%s' % (fpath, lineno))
1033 filelist.append('%s:%s' % (fpath, lineno))
1034 else:
1034 else:
1035 filelist.append(fpath)
1035 filelist.append(fpath)
1036 filestr = ' '.join(filelist)
1036 filestr = ' '.join(filelist)
1037 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
1037 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
1038 # XXX textwrap split words that contain hyphen, this is not
1038 # XXX textwrap split words that contain hyphen, this is not
1039 # what we want for filenames, so the dirty hack is to
1039 # what we want for filenames, so the dirty hack is to
1040 # temporally replace hyphens with a char that a file cannot
1040 # temporally replace hyphens with a char that a file cannot
1041 # contain, like "*"
1041 # contain, like "*"
1042 lines = textwrap.wrap(filestr.replace('-', '*'),
1042 lines = textwrap.wrap(filestr.replace('-', '*'),
1043 wrapwidth,
1043 wrapwidth,
1044 initial_indent='#: ',
1044 initial_indent='#: ',
1045 subsequent_indent='#: ',
1045 subsequent_indent='#: ',
1046 break_long_words=False)
1046 break_long_words=False)
1047 # end of the replace hack
1047 # end of the replace hack
1048 for line in lines:
1048 for line in lines:
1049 ret.append(line.replace('*', '-'))
1049 ret.append(line.replace('*', '-'))
1050 else:
1050 else:
1051 ret.append('#: '+filestr)
1051 ret.append('#: '+filestr)
1052 # flags
1052 # flags
1053 if self.flags:
1053 if self.flags:
1054 flags = []
1054 flags = []
1055 for flag in self.flags:
1055 for flag in self.flags:
1056 flags.append(flag)
1056 flags.append(flag)
1057 ret.append('#, %s' % ', '.join(flags))
1057 ret.append('#, %s' % ', '.join(flags))
1058
1058
1059 # previous context and previous msgid/msgid_plural
1059 # previous context and previous msgid/msgid_plural
1060 if self.previous_msgctxt:
1060 if self.previous_msgctxt:
1061 ret += self._str_field("previous_msgctxt", "#| ", "",
1061 ret += self._str_field("previous_msgctxt", "#| ", "",
1062 self.previous_msgctxt)
1062 self.previous_msgctxt)
1063 if self.previous_msgid:
1063 if self.previous_msgid:
1064 ret += self._str_field("previous_msgid", "#| ", "",
1064 ret += self._str_field("previous_msgid", "#| ", "",
1065 self.previous_msgid)
1065 self.previous_msgid)
1066 if self.previous_msgid_plural:
1066 if self.previous_msgid_plural:
1067 ret += self._str_field("previous_msgid_plural", "#| ", "",
1067 ret += self._str_field("previous_msgid_plural", "#| ", "",
1068 self.previous_msgid_plural)
1068 self.previous_msgid_plural)
1069
1069
1070 ret.append(_BaseEntry.__str__(self))
1070 ret.append(_BaseEntry.__str__(self))
1071 return '\n'.join(ret)
1071 return '\n'.join(ret)
1072
1072
1073 def __cmp__(self, other):
1073 def __cmp__(self, other):
1074 '''
1074 '''
1075 Called by comparison operations if rich comparison is not defined.
1075 Called by comparison operations if rich comparison is not defined.
1076
1076
1077 **Tests**:
1077 **Tests**:
1078 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
1078 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
1079 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
1079 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
1080 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
1080 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
1081 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
1081 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
1082 >>> po = POFile()
1082 >>> po = POFile()
1083 >>> po.append(a)
1083 >>> po.append(a)
1084 >>> po.append(b)
1084 >>> po.append(b)
1085 >>> po.append(c1)
1085 >>> po.append(c1)
1086 >>> po.append(c2)
1086 >>> po.append(c2)
1087 >>> po.sort()
1087 >>> po.sort()
1088 >>> print(po)
1088 >>> print(po)
1089 #
1089 #
1090 msgid ""
1090 msgid ""
1091 msgstr ""
1091 msgstr ""
1092 <BLANKLINE>
1092 <BLANKLINE>
1093 #: a.py:1 a.py:3
1093 #: a.py:1 a.py:3
1094 msgid "c2"
1094 msgid "c2"
1095 msgstr ""
1095 msgstr ""
1096 <BLANKLINE>
1096 <BLANKLINE>
1097 #: a.py:1 b.py:1
1097 #: a.py:1 b.py:1
1098 msgid "c1"
1098 msgid "c1"
1099 msgstr ""
1099 msgstr ""
1100 <BLANKLINE>
1100 <BLANKLINE>
1101 #: b.py:1 b.py:3
1101 #: b.py:1 b.py:3
1102 msgid "a"
1102 msgid "a"
1103 msgstr ""
1103 msgstr ""
1104 <BLANKLINE>
1104 <BLANKLINE>
1105 #: b.py:1 b.py:3
1105 #: b.py:1 b.py:3
1106 msgid "b"
1106 msgid "b"
1107 msgstr ""
1107 msgstr ""
1108 <BLANKLINE>
1108 <BLANKLINE>
1109 '''
1109 '''
1110 def compare_occurrences(a, b):
1110 def compare_occurrences(a, b):
1111 """
1111 """
1112 Compare an entry occurrence with another one.
1112 Compare an entry occurrence with another one.
1113 """
1113 """
1114 if a[0] != b[0]:
1114 if a[0] != b[0]:
1115 return a[0] < b[0]
1115 return a[0] < b[0]
1116 if a[1] != b[1]:
1116 if a[1] != b[1]:
1117 return a[1] < b[1]
1117 return a[1] < b[1]
1118 return 0
1118 return 0
1119
1119
1120 # First: Obsolete test
1120 # First: Obsolete test
1121 if self.obsolete != other.obsolete:
1121 if self.obsolete != other.obsolete:
1122 if self.obsolete:
1122 if self.obsolete:
1123 return -1
1123 return -1
1124 else:
1124 else:
1125 return 1
1125 return 1
1126 # Work on a copy to protect original
1126 # Work on a copy to protect original
1127 occ1 = self.occurrences[:]
1127 occ1 = self.occurrences[:]
1128 occ2 = other.occurrences[:]
1128 occ2 = other.occurrences[:]
1129 # Sorting using compare method
1129 # Sorting using compare method
1130 occ1.sort(compare_occurrences)
1130 occ1.sort(compare_occurrences)
1131 occ2.sort(compare_occurrences)
1131 occ2.sort(compare_occurrences)
1132 # Comparing sorted occurrences
1132 # Comparing sorted occurrences
1133 pos = 0
1133 pos = 0
1134 for entry1 in occ1:
1134 for entry1 in occ1:
1135 try:
1135 try:
1136 entry2 = occ2[pos]
1136 entry2 = occ2[pos]
1137 except IndexError:
1137 except IndexError:
1138 return 1
1138 return 1
1139 pos = pos + 1
1139 pos = pos + 1
1140 if entry1[0] != entry2[0]:
1140 if entry1[0] != entry2[0]:
1141 if entry1[0] > entry2[0]:
1141 if entry1[0] > entry2[0]:
1142 return 1
1142 return 1
1143 else:
1143 else:
1144 return -1
1144 return -1
1145 if entry1[1] != entry2[1]:
1145 if entry1[1] != entry2[1]:
1146 if entry1[1] > entry2[1]:
1146 if entry1[1] > entry2[1]:
1147 return 1
1147 return 1
1148 else:
1148 else:
1149 return -1
1149 return -1
1150 # Finally: Compare message ID
1150 # Finally: Compare message ID
1151 if self.msgid > other.msgid: return 1
1151 if self.msgid > other.msgid: return 1
1152 else: return -1
1152 else: return -1
1153
1153
1154 def translated(self):
1154 def translated(self):
1155 """
1155 """
1156 Return True if the entry has been translated or False.
1156 Return True if the entry has been translated or False.
1157 """
1157 """
1158 if self.obsolete or 'fuzzy' in self.flags:
1158 if self.obsolete or 'fuzzy' in self.flags:
1159 return False
1159 return False
1160 if self.msgstr != '':
1160 if self.msgstr != '':
1161 return True
1161 return True
1162 if self.msgstr_plural:
1162 if self.msgstr_plural:
1163 for pos in self.msgstr_plural:
1163 for pos in self.msgstr_plural:
1164 if self.msgstr_plural[pos] == '':
1164 if self.msgstr_plural[pos] == '':
1165 return False
1165 return False
1166 return True
1166 return True
1167 return False
1167 return False
1168
1168
1169 def merge(self, other):
1169 def merge(self, other):
1170 """
1170 """
1171 Merge the current entry with the given pot entry.
1171 Merge the current entry with the given pot entry.
1172 """
1172 """
1173 self.msgid = other.msgid
1173 self.msgid = other.msgid
1174 self.occurrences = other.occurrences
1174 self.occurrences = other.occurrences
1175 self.comment = other.comment
1175 self.comment = other.comment
1176 self.flags = other.flags
1176 self.flags = other.flags
1177 self.msgid_plural = other.msgid_plural
1177 self.msgid_plural = other.msgid_plural
1178 if other.msgstr_plural:
1178 if other.msgstr_plural:
1179 for pos in other.msgstr_plural:
1179 for pos in other.msgstr_plural:
1180 try:
1180 try:
1181 # keep existing translation at pos if any
1181 # keep existing translation at pos if any
1182 self.msgstr_plural[pos]
1182 self.msgstr_plural[pos]
1183 except KeyError:
1183 except KeyError:
1184 self.msgstr_plural[pos] = ''
1184 self.msgstr_plural[pos] = ''
1185
1185
1186 # }}}
1186 # }}}
1187 # class MOEntry {{{
1187 # class MOEntry {{{
1188
1188
1189 class MOEntry(_BaseEntry):
1189 class MOEntry(_BaseEntry):
1190 """
1190 """
1191 Represents a mo file entry.
1191 Represents a mo file entry.
1192
1192
1193 **Examples**:
1193 **Examples**:
1194
1194
1195 >>> entry = MOEntry()
1195 >>> entry = MOEntry()
1196 >>> entry.msgid = 'translate me !'
1196 >>> entry.msgid = 'translate me !'
1197 >>> entry.msgstr = 'traduisez moi !'
1197 >>> entry.msgstr = 'traduisez moi !'
1198 >>> print(entry)
1198 >>> print(entry)
1199 msgid "translate me !"
1199 msgid "translate me !"
1200 msgstr "traduisez moi !"
1200 msgstr "traduisez moi !"
1201 <BLANKLINE>
1201 <BLANKLINE>
1202 """
1202 """
1203
1203
1204 def __str__(self, wrapwidth=78):
1204 def __str__(self, wrapwidth=78):
1205 """
1205 """
1206 Return the string representation of the entry.
1206 Return the string representation of the entry.
1207 """
1207 """
1208 return _BaseEntry.__str__(self, wrapwidth)
1208 return _BaseEntry.__str__(self, wrapwidth)
1209
1209
1210 # }}}
1210 # }}}
1211 # class _POFileParser {{{
1211 # class _POFileParser {{{
1212
1212
1213 class _POFileParser(object):
1213 class _POFileParser(object):
1214 """
1214 """
1215 A finite state machine to parse efficiently and correctly po
1215 A finite state machine to parse efficiently and correctly po
1216 file format.
1216 file format.
1217 """
1217 """
1218
1218
1219 def __init__(self, fpath, *args, **kwargs):
1219 def __init__(self, fpath, *args, **kwargs):
1220 """
1220 """
1221 Constructor.
1221 Constructor.
1222
1222
1223 **Arguments**:
1223 **Arguments**:
1224 - *fpath*: string, path to the po file
1224 - *fpath*: string, path to the po file
1225 - *encoding*: string, the encoding to use, defaults to
1225 - *encoding*: string, the encoding to use, defaults to
1226 "default_encoding" global variable (optional),
1226 "default_encoding" global variable (optional),
1227 - *check_for_duplicates*: whether to check for duplicate entries
1227 - *check_for_duplicates*: whether to check for duplicate entries
1228 when adding entries to the file, default: False (optional).
1228 when adding entries to the file, default: False (optional).
1229 """
1229 """
1230 enc = kwargs.get('encoding', default_encoding)
1230 enc = kwargs.get('encoding', default_encoding)
1231 check_dup = kwargs.get('check_for_duplicates', False)
1231 check_dup = kwargs.get('check_for_duplicates', False)
1232 try:
1232 try:
1233 self.fhandle = codecs.open(fpath, 'rU', enc)
1233 self.fhandle = codecs.open(fpath, 'rU', enc)
1234 except LookupError:
1234 except LookupError:
1235 enc = default_encoding
1235 enc = default_encoding
1236 self.fhandle = codecs.open(fpath, 'rU', enc)
1236 self.fhandle = codecs.open(fpath, 'rU', enc)
1237 self.instance = POFile(
1237 self.instance = POFile(
1238 fpath=fpath,
1238 fpath=fpath,
1239 encoding=enc,
1239 encoding=enc,
1240 check_for_duplicates=check_dup
1240 check_for_duplicates=check_dup
1241 )
1241 )
1242 self.transitions = {}
1242 self.transitions = {}
1243 self.current_entry = POEntry()
1243 self.current_entry = POEntry()
1244 self.current_state = 'ST'
1244 self.current_state = 'ST'
1245 self.current_token = None
1245 self.current_token = None
1246 # two memo flags used in handlers
1246 # two memo flags used in handlers
1247 self.msgstr_index = 0
1247 self.msgstr_index = 0
1248 self.entry_obsolete = 0
1248 self.entry_obsolete = 0
1249 # Configure the state machine, by adding transitions.
1249 # Configure the state machine, by adding transitions.
1250 # Signification of symbols:
1250 # Signification of symbols:
1251 # * ST: Beginning of the file (start)
1251 # * ST: Beginning of the file (start)
1252 # * HE: Header
1252 # * HE: Header
1253 # * TC: a translation comment
1253 # * TC: a translation comment
1254 # * GC: a generated comment
1254 # * GC: a generated comment
1255 # * OC: a file/line occurence
1255 # * OC: a file/line occurence
1256 # * FL: a flags line
1256 # * FL: a flags line
1257 # * CT: a message context
1257 # * CT: a message context
1258 # * PC: a previous msgctxt
1258 # * PC: a previous msgctxt
1259 # * PM: a previous msgid
1259 # * PM: a previous msgid
1260 # * PP: a previous msgid_plural
1260 # * PP: a previous msgid_plural
1261 # * MI: a msgid
1261 # * MI: a msgid
1262 # * MP: a msgid plural
1262 # * MP: a msgid plural
1263 # * MS: a msgstr
1263 # * MS: a msgstr
1264 # * MX: a msgstr plural
1264 # * MX: a msgstr plural
1265 # * MC: a msgid or msgstr continuation line
1265 # * MC: a msgid or msgstr continuation line
1266 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1266 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1267 'MS', 'MP', 'MX', 'MI']
1267 'MS', 'MP', 'MX', 'MI']
1268
1268
1269 self.add('TC', ['ST', 'HE'], 'HE')
1269 self.add('TC', ['ST', 'HE'], 'HE')
1270 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1270 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1271 'MP', 'MX', 'MI'], 'TC')
1271 'MP', 'MX', 'MI'], 'TC')
1272 self.add('GC', all, 'GC')
1272 self.add('GC', all, 'GC')
1273 self.add('OC', all, 'OC')
1273 self.add('OC', all, 'OC')
1274 self.add('FL', all, 'FL')
1274 self.add('FL', all, 'FL')
1275 self.add('PC', all, 'PC')
1275 self.add('PC', all, 'PC')
1276 self.add('PM', all, 'PM')
1276 self.add('PM', all, 'PM')
1277 self.add('PP', all, 'PP')
1277 self.add('PP', all, 'PP')
1278 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1278 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1279 'PP', 'MS', 'MX'], 'CT')
1279 'PP', 'MS', 'MX'], 'CT')
1280 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1280 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1281 'PM', 'PP', 'MS', 'MX'], 'MI')
1281 'PM', 'PP', 'MS', 'MX'], 'MI')
1282 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1282 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1283 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1283 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1284 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1284 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1285 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1285 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1286
1286
1287 def parse(self):
1287 def parse(self):
1288 """
1288 """
1289 Run the state machine, parse the file line by line and call process()
1289 Run the state machine, parse the file line by line and call process()
1290 with the current matched symbol.
1290 with the current matched symbol.
1291 """
1291 """
1292 i, lastlen = 1, 0
1292 i, lastlen = 1, 0
1293 for line in self.fhandle:
1293 for line in self.fhandle:
1294 line = line.strip()
1294 line = line.strip()
1295 if line == '':
1295 if line == '':
1296 i = i+1
1296 i = i+1
1297 continue
1297 continue
1298 if line[:3] == '#~ ':
1298 if line[:3] == '#~ ':
1299 line = line[3:]
1299 line = line[3:]
1300 self.entry_obsolete = 1
1300 self.entry_obsolete = 1
1301 else:
1301 else:
1302 self.entry_obsolete = 0
1302 self.entry_obsolete = 0
1303 self.current_token = line
1303 self.current_token = line
1304 if line[:2] == '#:':
1304 if line[:2] == '#:':
1305 # we are on a occurrences line
1305 # we are on a occurrences line
1306 self.process('OC', i)
1306 self.process('OC', i)
1307 elif line[:9] == 'msgctxt "':
1307 elif line[:9] == 'msgctxt "':
1308 # we are on a msgctxt
1308 # we are on a msgctxt
1309 self.process('CT', i)
1309 self.process('CT', i)
1310 elif line[:7] == 'msgid "':
1310 elif line[:7] == 'msgid "':
1311 # we are on a msgid
1311 # we are on a msgid
1312 self.process('MI', i)
1312 self.process('MI', i)
1313 elif line[:8] == 'msgstr "':
1313 elif line[:8] == 'msgstr "':
1314 # we are on a msgstr
1314 # we are on a msgstr
1315 self.process('MS', i)
1315 self.process('MS', i)
1316 elif line[:1] == '"' or line[:4] == '#| "':
1316 elif line[:1] == '"' or line[:4] == '#| "':
1317 # we are on a continuation line or some metadata
1317 # we are on a continuation line or some metadata
1318 self.process('MC', i)
1318 self.process('MC', i)
1319 elif line[:14] == 'msgid_plural "':
1319 elif line[:14] == 'msgid_plural "':
1320 # we are on a msgid plural
1320 # we are on a msgid plural
1321 self.process('MP', i)
1321 self.process('MP', i)
1322 elif line[:7] == 'msgstr[':
1322 elif line[:7] == 'msgstr[':
1323 # we are on a msgstr plural
1323 # we are on a msgstr plural
1324 self.process('MX', i)
1324 self.process('MX', i)
1325 elif line[:3] == '#, ':
1325 elif line[:3] == '#, ':
1326 # we are on a flags line
1326 # we are on a flags line
1327 self.process('FL', i)
1327 self.process('FL', i)
1328 elif line[:2] == '# ' or line == '#':
1328 elif line[:2] == '# ' or line == '#':
1329 if line == '#': line = line + ' '
1329 if line == '#': line = line + ' '
1330 # we are on a translator comment line
1330 # we are on a translator comment line
1331 self.process('TC', i)
1331 self.process('TC', i)
1332 elif line[:2] == '#.':
1332 elif line[:2] == '#.':
1333 # we are on a generated comment line
1333 # we are on a generated comment line
1334 self.process('GC', i)
1334 self.process('GC', i)
1335 elif line[:15] == '#| msgid_plural':
1335 elif line[:15] == '#| msgid_plural':
1336 # we are on a previous msgid_plural
1336 # we are on a previous msgid_plural
1337 self.process('PP', i)
1337 self.process('PP', i)
1338 elif line[:8] == '#| msgid':
1338 elif line[:8] == '#| msgid':
1339 self.process('PM', i)
1339 self.process('PM', i)
1340 # we are on a previous msgid
1340 # we are on a previous msgid
1341 elif line[:10] == '#| msgctxt':
1341 elif line[:10] == '#| msgctxt':
1342 # we are on a previous msgctxt
1342 # we are on a previous msgctxt
1343 self.process('PC', i)
1343 self.process('PC', i)
1344 i = i+1
1344 i = i+1
1345
1345
1346 if self.current_entry:
1346 if self.current_entry:
1347 # since entries are added when another entry is found, we must add
1347 # since entries are added when another entry is found, we must add
1348 # the last entry here (only if there are lines)
1348 # the last entry here (only if there are lines)
1349 self.instance.append(self.current_entry)
1349 self.instance.append(self.current_entry)
1350 # before returning the instance, check if there's metadata and if
1350 # before returning the instance, check if there's metadata and if
1351 # so extract it in a dict
1351 # so extract it in a dict
1352 firstentry = self.instance[0]
1352 firstentry = self.instance[0]
1353 if firstentry.msgid == '': # metadata found
1353 if firstentry.msgid == '': # metadata found
1354 # remove the entry
1354 # remove the entry
1355 firstentry = self.instance.pop(0)
1355 firstentry = self.instance.pop(0)
1356 self.instance.metadata_is_fuzzy = firstentry.flags
1356 self.instance.metadata_is_fuzzy = firstentry.flags
1357 key = None
1357 key = None
1358 for msg in firstentry.msgstr.splitlines():
1358 for msg in firstentry.msgstr.splitlines():
1359 try:
1359 try:
1360 key, val = msg.split(':', 1)
1360 key, val = msg.split(':', 1)
1361 self.instance.metadata[key] = val.strip()
1361 self.instance.metadata[key] = val.strip()
1362 except:
1362 except:
1363 if key is not None:
1363 if key is not None:
1364 self.instance.metadata[key] += '\n'+ msg.strip()
1364 self.instance.metadata[key] += '\n'+ msg.strip()
1365 # close opened file
1365 # close opened file
1366 self.fhandle.close()
1366 self.fhandle.close()
1367 return self.instance
1367 return self.instance
1368
1368
1369 def add(self, symbol, states, next_state):
1369 def add(self, symbol, states, next_state):
1370 """
1370 """
1371 Add a transition to the state machine.
1371 Add a transition to the state machine.
1372 Keywords arguments:
1372 Keywords arguments:
1373
1373
1374 symbol -- string, the matched token (two chars symbol)
1374 symbol -- string, the matched token (two chars symbol)
1375 states -- list, a list of states (two chars symbols)
1375 states -- list, a list of states (two chars symbols)
1376 next_state -- the next state the fsm will have after the action
1376 next_state -- the next state the fsm will have after the action
1377 """
1377 """
1378 for state in states:
1378 for state in states:
1379 action = getattr(self, 'handle_%s' % next_state.lower())
1379 action = getattr(self, 'handle_%s' % next_state.lower())
1380 self.transitions[(symbol, state)] = (action, next_state)
1380 self.transitions[(symbol, state)] = (action, next_state)
1381
1381
1382 def process(self, symbol, linenum):
1382 def process(self, symbol, linenum):
1383 """
1383 """
1384 Process the transition corresponding to the current state and the
1384 Process the transition corresponding to the current state and the
1385 symbol provided.
1385 symbol provided.
1386
1386
1387 Keywords arguments:
1387 Keywords arguments:
1388 symbol -- string, the matched token (two chars symbol)
1388 symbol -- string, the matched token (two chars symbol)
1389 linenum -- integer, the current line number of the parsed file
1389 linenum -- integer, the current line number of the parsed file
1390 """
1390 """
1391 try:
1391 try:
1392 (action, state) = self.transitions[(symbol, self.current_state)]
1392 (action, state) = self.transitions[(symbol, self.current_state)]
1393 if action():
1393 if action():
1394 self.current_state = state
1394 self.current_state = state
1395 except Exception, exc:
1395 except Exception, exc:
1396 raise IOError('Syntax error in po file (line %s)' % linenum)
1396 raise IOError('Syntax error in po file (line %s)' % linenum)
1397
1397
1398 # state handlers
1398 # state handlers
1399
1399
1400 def handle_he(self):
1400 def handle_he(self):
1401 """Handle a header comment."""
1401 """Handle a header comment."""
1402 if self.instance.header != '':
1402 if self.instance.header != '':
1403 self.instance.header += '\n'
1403 self.instance.header += '\n'
1404 self.instance.header += self.current_token[2:]
1404 self.instance.header += self.current_token[2:]
1405 return 1
1405 return 1
1406
1406
1407 def handle_tc(self):
1407 def handle_tc(self):
1408 """Handle a translator comment."""
1408 """Handle a translator comment."""
1409 if self.current_state in ['MC', 'MS', 'MX']:
1409 if self.current_state in ['MC', 'MS', 'MX']:
1410 self.instance.append(self.current_entry)
1410 self.instance.append(self.current_entry)
1411 self.current_entry = POEntry()
1411 self.current_entry = POEntry()
1412 if self.current_entry.tcomment != '':
1412 if self.current_entry.tcomment != '':
1413 self.current_entry.tcomment += '\n'
1413 self.current_entry.tcomment += '\n'
1414 self.current_entry.tcomment += self.current_token[2:]
1414 self.current_entry.tcomment += self.current_token[2:]
1415 return True
1415 return True
1416
1416
1417 def handle_gc(self):
1417 def handle_gc(self):
1418 """Handle a generated comment."""
1418 """Handle a generated comment."""
1419 if self.current_state in ['MC', 'MS', 'MX']:
1419 if self.current_state in ['MC', 'MS', 'MX']:
1420 self.instance.append(self.current_entry)
1420 self.instance.append(self.current_entry)
1421 self.current_entry = POEntry()
1421 self.current_entry = POEntry()
1422 if self.current_entry.comment != '':
1422 if self.current_entry.comment != '':
1423 self.current_entry.comment += '\n'
1423 self.current_entry.comment += '\n'
1424 self.current_entry.comment += self.current_token[3:]
1424 self.current_entry.comment += self.current_token[3:]
1425 return True
1425 return True
1426
1426
1427 def handle_oc(self):
1427 def handle_oc(self):
1428 """Handle a file:num occurence."""
1428 """Handle a file:num occurence."""
1429 if self.current_state in ['MC', 'MS', 'MX']:
1429 if self.current_state in ['MC', 'MS', 'MX']:
1430 self.instance.append(self.current_entry)
1430 self.instance.append(self.current_entry)
1431 self.current_entry = POEntry()
1431 self.current_entry = POEntry()
1432 occurrences = self.current_token[3:].split()
1432 occurrences = self.current_token[3:].split()
1433 for occurrence in occurrences:
1433 for occurrence in occurrences:
1434 if occurrence != '':
1434 if occurrence != '':
1435 try:
1435 try:
1436 fil, line = occurrence.split(':')
1436 fil, line = occurrence.split(':')
1437 if not line.isdigit():
1437 if not line.isdigit():
1438 fil = fil + line
1438 fil = fil + line
1439 line = ''
1439 line = ''
1440 self.current_entry.occurrences.append((fil, line))
1440 self.current_entry.occurrences.append((fil, line))
1441 except:
1441 except:
1442 self.current_entry.occurrences.append((occurrence, ''))
1442 self.current_entry.occurrences.append((occurrence, ''))
1443 return True
1443 return True
1444
1444
1445 def handle_fl(self):
1445 def handle_fl(self):
1446 """Handle a flags line."""
1446 """Handle a flags line."""
1447 if self.current_state in ['MC', 'MS', 'MX']:
1447 if self.current_state in ['MC', 'MS', 'MX']:
1448 self.instance.append(self.current_entry)
1448 self.instance.append(self.current_entry)
1449 self.current_entry = POEntry()
1449 self.current_entry = POEntry()
1450 self.current_entry.flags += self.current_token[3:].split(', ')
1450 self.current_entry.flags += self.current_token[3:].split(', ')
1451 return True
1451 return True
1452
1452
1453 def handle_pp(self):
1453 def handle_pp(self):
1454 """Handle a previous msgid_plural line."""
1454 """Handle a previous msgid_plural line."""
1455 if self.current_state in ['MC', 'MS', 'MX']:
1455 if self.current_state in ['MC', 'MS', 'MX']:
1456 self.instance.append(self.current_entry)
1456 self.instance.append(self.current_entry)
1457 self.current_entry = POEntry()
1457 self.current_entry = POEntry()
1458 self.current_entry.previous_msgid_plural = \
1458 self.current_entry.previous_msgid_plural = \
1459 unescape(self.current_token[17:-1])
1459 unescape(self.current_token[17:-1])
1460 return True
1460 return True
1461
1461
1462 def handle_pm(self):
1462 def handle_pm(self):
1463 """Handle a previous msgid line."""
1463 """Handle a previous msgid line."""
1464 if self.current_state in ['MC', 'MS', 'MX']:
1464 if self.current_state in ['MC', 'MS', 'MX']:
1465 self.instance.append(self.current_entry)
1465 self.instance.append(self.current_entry)
1466 self.current_entry = POEntry()
1466 self.current_entry = POEntry()
1467 self.current_entry.previous_msgid = \
1467 self.current_entry.previous_msgid = \
1468 unescape(self.current_token[10:-1])
1468 unescape(self.current_token[10:-1])
1469 return True
1469 return True
1470
1470
1471 def handle_pc(self):
1471 def handle_pc(self):
1472 """Handle a previous msgctxt line."""
1472 """Handle a previous msgctxt line."""
1473 if self.current_state in ['MC', 'MS', 'MX']:
1473 if self.current_state in ['MC', 'MS', 'MX']:
1474 self.instance.append(self.current_entry)
1474 self.instance.append(self.current_entry)
1475 self.current_entry = POEntry()
1475 self.current_entry = POEntry()
1476 self.current_entry.previous_msgctxt = \
1476 self.current_entry.previous_msgctxt = \
1477 unescape(self.current_token[12:-1])
1477 unescape(self.current_token[12:-1])
1478 return True
1478 return True
1479
1479
1480 def handle_ct(self):
1480 def handle_ct(self):
1481 """Handle a msgctxt."""
1481 """Handle a msgctxt."""
1482 if self.current_state in ['MC', 'MS', 'MX']:
1482 if self.current_state in ['MC', 'MS', 'MX']:
1483 self.instance.append(self.current_entry)
1483 self.instance.append(self.current_entry)
1484 self.current_entry = POEntry()
1484 self.current_entry = POEntry()
1485 self.current_entry.msgctxt = unescape(self.current_token[9:-1])
1485 self.current_entry.msgctxt = unescape(self.current_token[9:-1])
1486 return True
1486 return True
1487
1487
1488 def handle_mi(self):
1488 def handle_mi(self):
1489 """Handle a msgid."""
1489 """Handle a msgid."""
1490 if self.current_state in ['MC', 'MS', 'MX']:
1490 if self.current_state in ['MC', 'MS', 'MX']:
1491 self.instance.append(self.current_entry)
1491 self.instance.append(self.current_entry)
1492 self.current_entry = POEntry()
1492 self.current_entry = POEntry()
1493 self.current_entry.obsolete = self.entry_obsolete
1493 self.current_entry.obsolete = self.entry_obsolete
1494 self.current_entry.msgid = unescape(self.current_token[7:-1])
1494 self.current_entry.msgid = unescape(self.current_token[7:-1])
1495 return True
1495 return True
1496
1496
1497 def handle_mp(self):
1497 def handle_mp(self):
1498 """Handle a msgid plural."""
1498 """Handle a msgid plural."""
1499 self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
1499 self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
1500 return True
1500 return True
1501
1501
1502 def handle_ms(self):
1502 def handle_ms(self):
1503 """Handle a msgstr."""
1503 """Handle a msgstr."""
1504 self.current_entry.msgstr = unescape(self.current_token[8:-1])
1504 self.current_entry.msgstr = unescape(self.current_token[8:-1])
1505 return True
1505 return True
1506
1506
1507 def handle_mx(self):
1507 def handle_mx(self):
1508 """Handle a msgstr plural."""
1508 """Handle a msgstr plural."""
1509 index, value = self.current_token[7], self.current_token[11:-1]
1509 index, value = self.current_token[7], self.current_token[11:-1]
1510 self.current_entry.msgstr_plural[index] = unescape(value)
1510 self.current_entry.msgstr_plural[index] = unescape(value)
1511 self.msgstr_index = index
1511 self.msgstr_index = index
1512 return True
1512 return True
1513
1513
1514 def handle_mc(self):
1514 def handle_mc(self):
1515 """Handle a msgid or msgstr continuation line."""
1515 """Handle a msgid or msgstr continuation line."""
1516 token = unescape(self.current_token[1:-1])
1516 token = unescape(self.current_token[1:-1])
1517 if self.current_state == 'CT':
1517 if self.current_state == 'CT':
1518 typ = 'msgctxt'
1518 typ = 'msgctxt'
1519 self.current_entry.msgctxt += token
1519 self.current_entry.msgctxt += token
1520 elif self.current_state == 'MI':
1520 elif self.current_state == 'MI':
1521 typ = 'msgid'
1521 typ = 'msgid'
1522 self.current_entry.msgid += token
1522 self.current_entry.msgid += token
1523 elif self.current_state == 'MP':
1523 elif self.current_state == 'MP':
1524 typ = 'msgid_plural'
1524 typ = 'msgid_plural'
1525 self.current_entry.msgid_plural += token
1525 self.current_entry.msgid_plural += token
1526 elif self.current_state == 'MS':
1526 elif self.current_state == 'MS':
1527 typ = 'msgstr'
1527 typ = 'msgstr'
1528 self.current_entry.msgstr += token
1528 self.current_entry.msgstr += token
1529 elif self.current_state == 'MX':
1529 elif self.current_state == 'MX':
1530 typ = 'msgstr[%s]' % self.msgstr_index
1530 typ = 'msgstr[%s]' % self.msgstr_index
1531 self.current_entry.msgstr_plural[self.msgstr_index] += token
1531 self.current_entry.msgstr_plural[self.msgstr_index] += token
1532 elif self.current_state == 'PP':
1532 elif self.current_state == 'PP':
1533 typ = 'previous_msgid_plural'
1533 typ = 'previous_msgid_plural'
1534 token = token[3:]
1534 token = token[3:]
1535 self.current_entry.previous_msgid_plural += token
1535 self.current_entry.previous_msgid_plural += token
1536 elif self.current_state == 'PM':
1536 elif self.current_state == 'PM':
1537 typ = 'previous_msgid'
1537 typ = 'previous_msgid'
1538 token = token[3:]
1538 token = token[3:]
1539 self.current_entry.previous_msgid += token
1539 self.current_entry.previous_msgid += token
1540 elif self.current_state == 'PC':
1540 elif self.current_state == 'PC':
1541 typ = 'previous_msgctxt'
1541 typ = 'previous_msgctxt'
1542 token = token[3:]
1542 token = token[3:]
1543 self.current_entry.previous_msgctxt += token
1543 self.current_entry.previous_msgctxt += token
1544 if typ not in self.current_entry._multiline_str:
1544 if typ not in self.current_entry._multiline_str:
1545 self.current_entry._multiline_str[typ] = token
1545 self.current_entry._multiline_str[typ] = token
1546 else:
1546 else:
1547 self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
1547 self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
1548 # don't change the current state
1548 # don't change the current state
1549 return False
1549 return False
1550
1550
1551 # }}}
1551 # }}}
1552 # class _MOFileParser {{{
1552 # class _MOFileParser {{{
1553
1553
1554 class _MOFileParser(object):
1554 class _MOFileParser(object):
1555 """
1555 """
1556 A class to parse binary mo files.
1556 A class to parse binary mo files.
1557 """
1557 """
1558 BIG_ENDIAN = 0xde120495
1558 BIG_ENDIAN = 0xde120495
1559 LITTLE_ENDIAN = 0x950412de
1559 LITTLE_ENDIAN = 0x950412de
1560
1560
1561 def __init__(self, fpath, *args, **kwargs):
1561 def __init__(self, fpath, *args, **kwargs):
1562 """
1562 """
1563 Constructor.
1563 Constructor.
1564
1564
1565 **Arguments**:
1565 **Arguments**:
1566 - *fpath*: string, path to the po file
1566 - *fpath*: string, path to the po file
1567 - *encoding*: string, the encoding to use, defaults to
1567 - *encoding*: string, the encoding to use, defaults to
1568 "default_encoding" global variable (optional),
1568 "default_encoding" global variable (optional),
1569 - *check_for_duplicates*: whether to check for duplicate entries
1569 - *check_for_duplicates*: whether to check for duplicate entries
1570 when adding entries to the file, default: False (optional).
1570 when adding entries to the file, default: False (optional).
1571 """
1571 """
1572 enc = kwargs.get('encoding', default_encoding)
1572 enc = kwargs.get('encoding', default_encoding)
1573 check_dup = kwargs.get('check_for_duplicates', False)
1573 check_dup = kwargs.get('check_for_duplicates', False)
1574 self.fhandle = open(fpath, 'rb')
1574 self.fhandle = open(fpath, 'rb')
1575 self.instance = MOFile(
1575 self.instance = MOFile(
1576 fpath=fpath,
1576 fpath=fpath,
1577 encoding=enc,
1577 encoding=enc,
1578 check_for_duplicates=check_dup
1578 check_for_duplicates=check_dup
1579 )
1579 )
1580
1580
1581 def parse_magicnumber(self):
1581 def parse_magicnumber(self):
1582 """
1582 """
1583 Parse the magic number and raise an exception if not valid.
1583 Parse the magic number and raise an exception if not valid.
1584 """
1584 """
1585
1585
1586 def parse(self):
1586 def parse(self):
1587 """
1587 """
1588 Build the instance with the file handle provided in the
1588 Build the instance with the file handle provided in the
1589 constructor.
1589 constructor.
1590 """
1590 """
1591 magic_number = self._readbinary('<I', 4)
1591 magic_number = self._readbinary('<I', 4)
1592 if magic_number == self.LITTLE_ENDIAN:
1592 if magic_number == self.LITTLE_ENDIAN:
1593 ii = '<II'
1593 ii = '<II'
1594 elif magic_number == self.BIG_ENDIAN:
1594 elif magic_number == self.BIG_ENDIAN:
1595 ii = '>II'
1595 ii = '>II'
1596 else:
1596 else:
1597 raise IOError('Invalid mo file, magic number is incorrect !')
1597 raise IOError('Invalid mo file, magic number is incorrect !')
1598 self.instance.magic_number = magic_number
1598 self.instance.magic_number = magic_number
1599 # parse the version number and the number of strings
1599 # parse the version number and the number of strings
1600 self.instance.version, numofstrings = self._readbinary(ii, 8)
1600 self.instance.version, numofstrings = self._readbinary(ii, 8)
1601 # original strings and translation strings hash table offset
1601 # original strings and translation strings hash table offset
1602 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1602 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1603 # move to msgid hash table and read length and offset of msgids
1603 # move to msgid hash table and read length and offset of msgids
1604 self.fhandle.seek(msgids_hash_offset)
1604 self.fhandle.seek(msgids_hash_offset)
1605 msgids_index = []
1605 msgids_index = []
1606 for i in range(numofstrings):
1606 for i in range(numofstrings):
1607 msgids_index.append(self._readbinary(ii, 8))
1607 msgids_index.append(self._readbinary(ii, 8))
1608 # move to msgstr hash table and read length and offset of msgstrs
1608 # move to msgstr hash table and read length and offset of msgstrs
1609 self.fhandle.seek(msgstrs_hash_offset)
1609 self.fhandle.seek(msgstrs_hash_offset)
1610 msgstrs_index = []
1610 msgstrs_index = []
1611 for i in range(numofstrings):
1611 for i in range(numofstrings):
1612 msgstrs_index.append(self._readbinary(ii, 8))
1612 msgstrs_index.append(self._readbinary(ii, 8))
1613 # build entries
1613 # build entries
1614 for i in range(numofstrings):
1614 for i in range(numofstrings):
1615 self.fhandle.seek(msgids_index[i][1])
1615 self.fhandle.seek(msgids_index[i][1])
1616 msgid = self.fhandle.read(msgids_index[i][0])
1616 msgid = self.fhandle.read(msgids_index[i][0])
1617 self.fhandle.seek(msgstrs_index[i][1])
1617 self.fhandle.seek(msgstrs_index[i][1])
1618 msgstr = self.fhandle.read(msgstrs_index[i][0])
1618 msgstr = self.fhandle.read(msgstrs_index[i][0])
1619 if i == 0: # metadata
1619 if i == 0: # metadata
1620 raw_metadata, metadata = msgstr.split('\n'), {}
1620 raw_metadata, metadata = msgstr.split('\n'), {}
1621 for line in raw_metadata:
1621 for line in raw_metadata:
1622 tokens = line.split(':', 1)
1622 tokens = line.split(':', 1)
1623 if tokens[0] != '':
1623 if tokens[0] != '':
1624 try:
1624 try:
1625 metadata[tokens[0]] = tokens[1].strip()
1625 metadata[tokens[0]] = tokens[1].strip()
1626 except IndexError:
1626 except IndexError:
1627 metadata[tokens[0]] = ''
1627 metadata[tokens[0]] = ''
1628 self.instance.metadata = metadata
1628 self.instance.metadata = metadata
1629 continue
1629 continue
1630 # test if we have a plural entry
1630 # test if we have a plural entry
1631 msgid_tokens = msgid.split('\0')
1631 msgid_tokens = msgid.split('\0')
1632 if len(msgid_tokens) > 1:
1632 if len(msgid_tokens) > 1:
1633 entry = MOEntry(
1633 entry = MOEntry(
1634 msgid=msgid_tokens[0],
1634 msgid=msgid_tokens[0],
1635 msgid_plural=msgid_tokens[1],
1635 msgid_plural=msgid_tokens[1],
1636 msgstr_plural=dict((k,v) for k,v in \
1636 msgstr_plural=dict((k,v) for k,v in \
1637 enumerate(msgstr.split('\0')))
1637 enumerate(msgstr.split('\0')))
1638 )
1638 )
1639 else:
1639 else:
1640 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1640 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1641 self.instance.append(entry)
1641 self.instance.append(entry)
1642 # close opened file
1642 # close opened file
1643 self.fhandle.close()
1643 self.fhandle.close()
1644 return self.instance
1644 return self.instance
1645
1645
1646 def _readbinary(self, fmt, numbytes):
1646 def _readbinary(self, fmt, numbytes):
1647 """
1647 """
1648 Private method that unpack n bytes of data using format <fmt>.
1648 Private method that unpack n bytes of data using format <fmt>.
1649 It returns a tuple or a mixed value if the tuple length is 1.
1649 It returns a tuple or a mixed value if the tuple length is 1.
1650 """
1650 """
1651 bytes = self.fhandle.read(numbytes)
1651 bytes = self.fhandle.read(numbytes)
1652 tup = struct.unpack(fmt, bytes)
1652 tup = struct.unpack(fmt, bytes)
1653 if len(tup) == 1:
1653 if len(tup) == 1:
1654 return tup[0]
1654 return tup[0]
1655 return tup
1655 return tup
1656
1656
1657 # }}}
1657 # }}}
1658 # __main__ {{{
1658 # __main__ {{{
1659
1659
1660 if __name__ == '__main__':
1660 if __name__ == '__main__':
1661 """
1661 """
1662 **Main function**::
1662 **Main function**::
1663 - to **test** the module just run: *python polib.py [-v]*
1663 - to **test** the module just run: *python polib.py [-v]*
1664 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1664 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1665 """
1665 """
1666 import sys
1666 import sys
1667 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1667 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1668 def test(f):
1668 def test(f):
1669 if f.endswith('po'):
1669 if f.endswith('po'):
1670 p = pofile(f)
1670 p = pofile(f)
1671 else:
1671 else:
1672 p = mofile(f)
1672 p = mofile(f)
1673 s = unicode(p)
1673 s = unicode(p)
1674 import profile
1674 import profile
1675 profile.run('test("'+sys.argv[2]+'")')
1675 profile.run('test("'+sys.argv[2]+'")')
1676 else:
1676 else:
1677 import doctest
1677 import doctest
1678 doctest.testmod()
1678 doctest.testmod()
1679
1679
1680 # }}}
1680 # }}}
General Comments 0
You need to be logged in to leave comments. Login now