##// END OF EJS Templates
polib: update to latest release 1.0.7 (upstream rev d75ce6dbbc2a)...
Augie Fackler -
r40221:19fc5a98 default
parent child Browse files
Show More
@@ -1,246 +1,238
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 #
2 #
3 # check-translation.py - check Mercurial specific translation problems
3 # check-translation.py - check Mercurial specific translation problems
4 from __future__ import absolute_import
4 from __future__ import absolute_import
5
5
6 import re
6 import re
7
7
8 import polib
8 import polib
9
9
10 scanners = []
10 scanners = []
11 checkers = []
11 checkers = []
12
12
13 def scanner():
13 def scanner():
14 def decorator(func):
14 def decorator(func):
15 scanners.append(func)
15 scanners.append(func)
16 return func
16 return func
17 return decorator
17 return decorator
18
18
19 def levelchecker(level, msgidpat):
19 def levelchecker(level, msgidpat):
20 def decorator(func):
20 def decorator(func):
21 if msgidpat:
21 if msgidpat:
22 match = re.compile(msgidpat).search
22 match = re.compile(msgidpat).search
23 else:
23 else:
24 match = lambda msgid: True
24 match = lambda msgid: True
25 checkers.append((func, level))
25 checkers.append((func, level))
26 func.match = match
26 func.match = match
27 return func
27 return func
28 return decorator
28 return decorator
29
29
30 def match(checker, pe):
30 def match(checker, pe):
31 """Examine whether POEntry "pe" is target of specified checker or not
31 """Examine whether POEntry "pe" is target of specified checker or not
32 """
32 """
33 if not checker.match(pe.msgid):
33 if not checker.match(pe.msgid):
34 return
34 return
35 # examine suppression by translator comment
35 # examine suppression by translator comment
36 nochecker = 'no-%s-check' % checker.__name__
36 nochecker = 'no-%s-check' % checker.__name__
37 for tc in pe.tcomment.split():
37 for tc in pe.tcomment.split():
38 if nochecker == tc:
38 if nochecker == tc:
39 return
39 return
40 return True
40 return True
41
41
42 ####################
42 ####################
43
43
44 def fatalchecker(msgidpat=None):
44 def fatalchecker(msgidpat=None):
45 return levelchecker('fatal', msgidpat)
45 return levelchecker('fatal', msgidpat)
46
46
47 @fatalchecker(r'\$\$')
47 @fatalchecker(r'\$\$')
48 def promptchoice(pe):
48 def promptchoice(pe):
49 """Check translation of the string given to "ui.promptchoice()"
49 """Check translation of the string given to "ui.promptchoice()"
50
50
51 >>> pe = polib.POEntry(
51 >>> pe = polib.POEntry(
52 ... msgid ='prompt$$missing &sep$$missing &amp$$followed by &none',
52 ... msgid ='prompt$$missing &sep$$missing &amp$$followed by &none',
53 ... msgstr='prompt missing &sep$$missing amp$$followed by none&')
53 ... msgstr='prompt missing &sep$$missing amp$$followed by none&')
54 >>> match(promptchoice, pe)
54 >>> match(promptchoice, pe)
55 True
55 True
56 >>> for e in promptchoice(pe): print(e)
56 >>> for e in promptchoice(pe): print(e)
57 number of choices differs between msgid and msgstr
57 number of choices differs between msgid and msgstr
58 msgstr has invalid choice missing '&'
58 msgstr has invalid choice missing '&'
59 msgstr has invalid '&' followed by none
59 msgstr has invalid '&' followed by none
60 """
60 """
61 idchoices = [c.rstrip(' ') for c in pe.msgid.split('$$')[1:]]
61 idchoices = [c.rstrip(' ') for c in pe.msgid.split('$$')[1:]]
62 strchoices = [c.rstrip(' ') for c in pe.msgstr.split('$$')[1:]]
62 strchoices = [c.rstrip(' ') for c in pe.msgstr.split('$$')[1:]]
63
63
64 if len(idchoices) != len(strchoices):
64 if len(idchoices) != len(strchoices):
65 yield "number of choices differs between msgid and msgstr"
65 yield "number of choices differs between msgid and msgstr"
66
66
67 indices = [(c, c.find('&')) for c in strchoices]
67 indices = [(c, c.find('&')) for c in strchoices]
68 if [c for c, i in indices if i == -1]:
68 if [c for c, i in indices if i == -1]:
69 yield "msgstr has invalid choice missing '&'"
69 yield "msgstr has invalid choice missing '&'"
70 if [c for c, i in indices if len(c) == i + 1]:
70 if [c for c, i in indices if len(c) == i + 1]:
71 yield "msgstr has invalid '&' followed by none"
71 yield "msgstr has invalid '&' followed by none"
72
72
73 deprecatedpe = None
73 deprecatedpe = None
74 @scanner()
74 @scanner()
75 def deprecatedsetup(pofile):
75 def deprecatedsetup(pofile):
76 pes = [p for p in pofile if p.msgid == '(DEPRECATED)' and p.msgstr]
76 pes = [p for p in pofile if p.msgid == '(DEPRECATED)' and p.msgstr]
77 if len(pes):
77 if len(pes):
78 global deprecatedpe
78 global deprecatedpe
79 deprecatedpe = pes[0]
79 deprecatedpe = pes[0]
80
80
81 @fatalchecker(r'\(DEPRECATED\)')
81 @fatalchecker(r'\(DEPRECATED\)')
82 def deprecated(pe):
82 def deprecated(pe):
83 """Check for DEPRECATED
83 """Check for DEPRECATED
84 >>> ped = polib.POEntry(
84 >>> ped = polib.POEntry(
85 ... msgid = '(DEPRECATED)',
85 ... msgid = '(DEPRECATED)',
86 ... msgstr= '(DETACERPED)')
86 ... msgstr= '(DETACERPED)')
87 >>> deprecatedsetup([ped])
87 >>> deprecatedsetup([ped])
88 >>> pe = polib.POEntry(
88 >>> pe = polib.POEntry(
89 ... msgid = 'Something (DEPRECATED)',
89 ... msgid = 'Something (DEPRECATED)',
90 ... msgstr= 'something (DEPRECATED)')
90 ... msgstr= 'something (DEPRECATED)')
91 >>> match(deprecated, pe)
91 >>> match(deprecated, pe)
92 True
92 True
93 >>> for e in deprecated(pe): print(e)
93 >>> for e in deprecated(pe): print(e)
94 >>> pe = polib.POEntry(
94 >>> pe = polib.POEntry(
95 ... msgid = 'Something (DEPRECATED)',
95 ... msgid = 'Something (DEPRECATED)',
96 ... msgstr= 'something (DETACERPED)')
96 ... msgstr= 'something (DETACERPED)')
97 >>> match(deprecated, pe)
97 >>> match(deprecated, pe)
98 True
98 True
99 >>> for e in deprecated(pe): print(e)
99 >>> for e in deprecated(pe): print(e)
100 >>> pe = polib.POEntry(
100 >>> pe = polib.POEntry(
101 ... msgid = 'Something (DEPRECATED)',
101 ... msgid = 'Something (DEPRECATED)',
102 ... msgstr= 'something')
102 ... msgstr= 'something')
103 >>> match(deprecated, pe)
103 >>> match(deprecated, pe)
104 True
104 True
105 >>> for e in deprecated(pe): print(e)
105 >>> for e in deprecated(pe): print(e)
106 msgstr inconsistently translated (DEPRECATED)
106 msgstr inconsistently translated (DEPRECATED)
107 >>> pe = polib.POEntry(
107 >>> pe = polib.POEntry(
108 ... msgid = 'Something (DEPRECATED, foo bar)',
108 ... msgid = 'Something (DEPRECATED, foo bar)',
109 ... msgstr= 'something (DETACERPED, foo bar)')
109 ... msgstr= 'something (DETACERPED, foo bar)')
110 >>> match(deprecated, pe)
110 >>> match(deprecated, pe)
111 """
111 """
112 if not ('(DEPRECATED)' in pe.msgstr or
112 if not ('(DEPRECATED)' in pe.msgstr or
113 (deprecatedpe and
113 (deprecatedpe and
114 deprecatedpe.msgstr in pe.msgstr)):
114 deprecatedpe.msgstr in pe.msgstr)):
115 yield "msgstr inconsistently translated (DEPRECATED)"
115 yield "msgstr inconsistently translated (DEPRECATED)"
116
116
117 ####################
117 ####################
118
118
119 def warningchecker(msgidpat=None):
119 def warningchecker(msgidpat=None):
120 return levelchecker('warning', msgidpat)
120 return levelchecker('warning', msgidpat)
121
121
122 @warningchecker()
122 @warningchecker()
123 def taildoublecolons(pe):
123 def taildoublecolons(pe):
124 """Check equality of tail '::'-ness between msgid and msgstr
124 """Check equality of tail '::'-ness between msgid and msgstr
125
125
126 >>> pe = polib.POEntry(
126 >>> pe = polib.POEntry(
127 ... msgid ='ends with ::',
127 ... msgid ='ends with ::',
128 ... msgstr='ends with ::')
128 ... msgstr='ends with ::')
129 >>> for e in taildoublecolons(pe): print(e)
129 >>> for e in taildoublecolons(pe): print(e)
130 >>> pe = polib.POEntry(
130 >>> pe = polib.POEntry(
131 ... msgid ='ends with ::',
131 ... msgid ='ends with ::',
132 ... msgstr='ends without double-colons')
132 ... msgstr='ends without double-colons')
133 >>> for e in taildoublecolons(pe): print(e)
133 >>> for e in taildoublecolons(pe): print(e)
134 tail '::'-ness differs between msgid and msgstr
134 tail '::'-ness differs between msgid and msgstr
135 >>> pe = polib.POEntry(
135 >>> pe = polib.POEntry(
136 ... msgid ='ends without double-colons',
136 ... msgid ='ends without double-colons',
137 ... msgstr='ends with ::')
137 ... msgstr='ends with ::')
138 >>> for e in taildoublecolons(pe): print(e)
138 >>> for e in taildoublecolons(pe): print(e)
139 tail '::'-ness differs between msgid and msgstr
139 tail '::'-ness differs between msgid and msgstr
140 """
140 """
141 if pe.msgid.endswith('::') != pe.msgstr.endswith('::'):
141 if pe.msgid.endswith('::') != pe.msgstr.endswith('::'):
142 yield "tail '::'-ness differs between msgid and msgstr"
142 yield "tail '::'-ness differs between msgid and msgstr"
143
143
144 @warningchecker()
144 @warningchecker()
145 def indentation(pe):
145 def indentation(pe):
146 """Check equality of initial indentation between msgid and msgstr
146 """Check equality of initial indentation between msgid and msgstr
147
147
148 This may report unexpected warning, because this doesn't aware
148 This may report unexpected warning, because this doesn't aware
149 the syntax of rst document and the context of msgstr.
149 the syntax of rst document and the context of msgstr.
150
150
151 >>> pe = polib.POEntry(
151 >>> pe = polib.POEntry(
152 ... msgid =' indented text',
152 ... msgid =' indented text',
153 ... msgstr=' narrowed indentation')
153 ... msgstr=' narrowed indentation')
154 >>> for e in indentation(pe): print(e)
154 >>> for e in indentation(pe): print(e)
155 initial indentation width differs betweeen msgid and msgstr
155 initial indentation width differs betweeen msgid and msgstr
156 """
156 """
157 idindent = len(pe.msgid) - len(pe.msgid.lstrip())
157 idindent = len(pe.msgid) - len(pe.msgid.lstrip())
158 strindent = len(pe.msgstr) - len(pe.msgstr.lstrip())
158 strindent = len(pe.msgstr) - len(pe.msgstr.lstrip())
159 if idindent != strindent:
159 if idindent != strindent:
160 yield "initial indentation width differs betweeen msgid and msgstr"
160 yield "initial indentation width differs betweeen msgid and msgstr"
161
161
162 ####################
162 ####################
163
163
164 def check(pofile, fatal=True, warning=False):
164 def check(pofile, fatal=True, warning=False):
165 targetlevel = { 'fatal': fatal, 'warning': warning }
165 targetlevel = { 'fatal': fatal, 'warning': warning }
166 targetcheckers = [(checker, level)
166 targetcheckers = [(checker, level)
167 for checker, level in checkers
167 for checker, level in checkers
168 if targetlevel[level]]
168 if targetlevel[level]]
169 if not targetcheckers:
169 if not targetcheckers:
170 return []
170 return []
171
171
172 detected = []
172 detected = []
173 for checker in scanners:
173 for checker in scanners:
174 checker(pofile)
174 checker(pofile)
175 for pe in pofile.translated_entries():
175 for pe in pofile.translated_entries():
176 errors = []
176 errors = []
177 for checker, level in targetcheckers:
177 for checker, level in targetcheckers:
178 if match(checker, pe):
178 if match(checker, pe):
179 errors.extend((level, checker.__name__, error)
179 errors.extend((level, checker.__name__, error)
180 for error in checker(pe))
180 for error in checker(pe))
181 if errors:
181 if errors:
182 detected.append((pe, errors))
182 detected.append((pe, errors))
183 return detected
183 return detected
184
184
185 ########################################
185 ########################################
186
186
187 if __name__ == "__main__":
187 if __name__ == "__main__":
188 import sys
188 import sys
189 import optparse
189 import optparse
190
190
191 optparser = optparse.OptionParser("""%prog [options] pofile ...
191 optparser = optparse.OptionParser("""%prog [options] pofile ...
192
192
193 This checks Mercurial specific translation problems in specified
193 This checks Mercurial specific translation problems in specified
194 '*.po' files.
194 '*.po' files.
195
195
196 Each detected problems are shown in the format below::
196 Each detected problems are shown in the format below::
197
197
198 filename:linenum:type(checker): problem detail .....
198 filename:linenum:type(checker): problem detail .....
199
199
200 "type" is "fatal" or "warning". "checker" is the name of the function
200 "type" is "fatal" or "warning". "checker" is the name of the function
201 detecting corresponded error.
201 detecting corresponded error.
202
202
203 Checking by checker "foo" on the specific msgstr can be suppressed by
203 Checking by checker "foo" on the specific msgstr can be suppressed by
204 the "translator comment" like below. Multiple "no-xxxx-check" should
204 the "translator comment" like below. Multiple "no-xxxx-check" should
205 be separated by whitespaces::
205 be separated by whitespaces::
206
206
207 # no-foo-check
207 # no-foo-check
208 msgid = "....."
208 msgid = "....."
209 msgstr = "....."
209 msgstr = "....."
210 """)
210 """)
211 optparser.add_option("", "--warning",
211 optparser.add_option("", "--warning",
212 help="show also warning level problems",
212 help="show also warning level problems",
213 action="store_true")
213 action="store_true")
214 optparser.add_option("", "--doctest",
214 optparser.add_option("", "--doctest",
215 help="run doctest of this tool, instead of check",
215 help="run doctest of this tool, instead of check",
216 action="store_true")
216 action="store_true")
217 (options, args) = optparser.parse_args()
217 (options, args) = optparser.parse_args()
218
218
219 if options.doctest:
219 if options.doctest:
220 import os
220 import os
221 if 'TERM' in os.environ:
221 if 'TERM' in os.environ:
222 del os.environ['TERM']
222 del os.environ['TERM']
223 import doctest
223 import doctest
224 failures, tests = doctest.testmod()
224 failures, tests = doctest.testmod()
225 sys.exit(failures and 1 or 0)
225 sys.exit(failures and 1 or 0)
226
226
227 # replace polib._POFileParser to show linenum of problematic msgstr
228 class ExtPOFileParser(polib._POFileParser):
229 def process(self, symbol, linenum):
230 super(ExtPOFileParser, self).process(symbol, linenum)
231 if symbol == 'MS': # msgstr
232 self.current_entry.linenum = linenum
233 polib._POFileParser = ExtPOFileParser
234
235 detected = []
227 detected = []
236 warning = options.warning
228 warning = options.warning
237 for f in args:
229 for f in args:
238 detected.extend((f, pe, errors)
230 detected.extend((f, pe, errors)
239 for pe, errors in check(polib.pofile(f),
231 for pe, errors in check(polib.pofile(f),
240 warning=warning))
232 warning=warning))
241 if detected:
233 if detected:
242 for f, pe, errors in detected:
234 for f, pe, errors in detected:
243 for level, checker, error in errors:
235 for level, checker, error in errors:
244 sys.stderr.write('%s:%d:%s(%s): %s\n'
236 sys.stderr.write('%s:%d:%s(%s): %s\n'
245 % (f, pe.linenum, level, checker, error))
237 % (f, pe.linenum, level, checker, error))
246 sys.exit(1)
238 sys.exit(1)
This diff has been collapsed as it changes many lines, (776 lines changed) Show them Hide them
@@ -1,1554 +1,1838
1 # -*- coding: utf-8 -*-
2 # no-check-code
1 # no-check-code
2 # -* coding: utf-8 -*-
3 #
3 #
4 # License: MIT (see LICENSE file provided)
4 # License: MIT (see LICENSE file provided)
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6
6
7 """
7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
9 mo files). You can load existing files, iterate through it's entries, add,
9 mo files). You can load existing files, iterate through it's entries, add,
10 modify entries, comments or metadata, etc. or create new po files from scratch.
10 modify entries, comments or metadata, etc. or create new po files from scratch.
11
11
12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
13 :func:`~polib.mofile` convenience functions.
13 :func:`~polib.mofile` convenience functions.
14 """
14 """
15
15
16 from __future__ import absolute_import
16 from __future__ import absolute_import
17
17
18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
19 __version__ = '0.6.4'
19 __version__ = '1.0.7'
20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
21 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
21 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
22
22
23 import array
23 import array
24 import codecs
24 import codecs
25 import os
25 import os
26 import re
26 import re
27 import struct
27 import struct
28 import sys
28 import sys
29 import textwrap
29 import textwrap
30 import types
30
31 try:
32 import io
33 except ImportError:
34 # replacement of io.open() for python < 2.6
35 # we use codecs instead
36 class io(object):
37 @staticmethod
38 def open(fpath, mode='r', encoding=None):
39 return codecs.open(fpath, mode, encoding)
31
40
32
41
33 # the default encoding to use when encoding cannot be detected
42 # the default encoding to use when encoding cannot be detected
34 default_encoding = 'utf-8'
43 default_encoding = 'utf-8'
35
44
45 # python 2/3 compatibility helpers {{{
46
47
48 if sys.version_info[:2] < (3, 0):
49 PY3 = False
50 text_type = unicode
51
52 def b(s):
53 return s
54
55 def u(s):
56 return unicode(s, "unicode_escape")
57
58 else:
59 PY3 = True
60 text_type = str
61
62 def b(s):
63 return s.encode("latin-1")
64
65 def u(s):
66 return s
67 # }}}
36 # _pofile_or_mofile {{{
68 # _pofile_or_mofile {{{
37
69
70
38 def _pofile_or_mofile(f, type, **kwargs):
71 def _pofile_or_mofile(f, type, **kwargs):
39 """
72 """
40 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
73 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
41 honor the DRY concept.
74 honor the DRY concept.
42 """
75 """
43 # get the file encoding
76 # get the file encoding
44 enc = kwargs.get('encoding')
77 enc = kwargs.get('encoding')
45 if enc is None:
78 if enc is None:
46 enc = detect_encoding(f, type == 'mofile')
79 enc = detect_encoding(f, type == 'mofile')
47
80
48 # parse the file
81 # parse the file
49 kls = type == 'pofile' and _POFileParser or _MOFileParser
82 kls = type == 'pofile' and _POFileParser or _MOFileParser
50 parser = kls(
83 parser = kls(
51 f,
84 f,
52 encoding=enc,
85 encoding=enc,
53 check_for_duplicates=kwargs.get('check_for_duplicates', False)
86 check_for_duplicates=kwargs.get('check_for_duplicates', False),
87 klass=kwargs.get('klass')
54 )
88 )
55 instance = parser.parse()
89 instance = parser.parse()
56 instance.wrapwidth = kwargs.get('wrapwidth', 78)
90 instance.wrapwidth = kwargs.get('wrapwidth', 78)
57 return instance
91 return instance
92 # }}}
93 # _is_file {{{
58
94
95
96 def _is_file(filename_or_contents):
97 """
98 Safely returns the value of os.path.exists(filename_or_contents).
99
100 Arguments:
101
102 ``filename_or_contents``
103 either a filename, or a string holding the contents of some file.
104 In the latter case, this function will always return False.
105 """
106 try:
107 return os.path.exists(filename_or_contents)
108 except (ValueError, UnicodeEncodeError):
109 return False
59 # }}}
110 # }}}
60 # function pofile() {{{
111 # function pofile() {{{
61
112
113
62 def pofile(pofile, **kwargs):
114 def pofile(pofile, **kwargs):
63 """
115 """
64 Convenience function that parses the po or pot file ``pofile`` and returns
116 Convenience function that parses the po or pot file ``pofile`` and returns
65 a :class:`~polib.POFile` instance.
117 a :class:`~polib.POFile` instance.
66
118
67 Arguments:
119 Arguments:
68
120
69 ``pofile``
121 ``pofile``
70 string, full or relative path to the po/pot file or its content (data).
122 string, full or relative path to the po/pot file or its content (data).
71
123
72 ``wrapwidth``
124 ``wrapwidth``
73 integer, the wrap width, only useful when the ``-w`` option was passed
125 integer, the wrap width, only useful when the ``-w`` option was passed
74 to xgettext (optional, default: ``78``).
126 to xgettext (optional, default: ``78``).
75
127
76 ``encoding``
128 ``encoding``
77 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
129 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
78 encoding will be auto-detected).
130 encoding will be auto-detected).
79
131
80 ``check_for_duplicates``
132 ``check_for_duplicates``
81 whether to check for duplicate entries when adding entries to the
133 whether to check for duplicate entries when adding entries to the
82 file (optional, default: ``False``).
134 file (optional, default: ``False``).
135
136 ``klass``
137 class which is used to instantiate the return value (optional,
138 default: ``None``, the return value with be a :class:`~polib.POFile`
139 instance).
83 """
140 """
84 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
141 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
85
86 # }}}
142 # }}}
87 # function mofile() {{{
143 # function mofile() {{{
88
144
145
89 def mofile(mofile, **kwargs):
146 def mofile(mofile, **kwargs):
90 """
147 """
91 Convenience function that parses the mo file ``mofile`` and returns a
148 Convenience function that parses the mo file ``mofile`` and returns a
92 :class:`~polib.MOFile` instance.
149 :class:`~polib.MOFile` instance.
93
150
94 Arguments:
151 Arguments:
95
152
96 ``mofile``
153 ``mofile``
97 string, full or relative path to the mo file or its content (data).
154 string, full or relative path to the mo file or its content (data).
98
155
99 ``wrapwidth``
156 ``wrapwidth``
100 integer, the wrap width, only useful when the ``-w`` option was passed
157 integer, the wrap width, only useful when the ``-w`` option was passed
101 to xgettext to generate the po file that was used to format the mo file
158 to xgettext to generate the po file that was used to format the mo file
102 (optional, default: ``78``).
159 (optional, default: ``78``).
103
160
104 ``encoding``
161 ``encoding``
105 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
162 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
106 encoding will be auto-detected).
163 encoding will be auto-detected).
107
164
108 ``check_for_duplicates``
165 ``check_for_duplicates``
109 whether to check for duplicate entries when adding entries to the
166 whether to check for duplicate entries when adding entries to the
110 file (optional, default: ``False``).
167 file (optional, default: ``False``).
168
169 ``klass``
170 class which is used to instantiate the return value (optional,
171 default: ``None``, the return value with be a :class:`~polib.POFile`
172 instance).
111 """
173 """
112 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
174 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
113
114 # }}}
175 # }}}
115 # function detect_encoding() {{{
176 # function detect_encoding() {{{
116
177
178
117 def detect_encoding(file, binary_mode=False):
179 def detect_encoding(file, binary_mode=False):
118 """
180 """
119 Try to detect the encoding used by the ``file``. The ``file`` argument can
181 Try to detect the encoding used by the ``file``. The ``file`` argument can
120 be a PO or MO file path or a string containing the contents of the file.
182 be a PO or MO file path or a string containing the contents of the file.
121 If the encoding cannot be detected, the function will return the value of
183 If the encoding cannot be detected, the function will return the value of
122 ``default_encoding``.
184 ``default_encoding``.
123
185
124 Arguments:
186 Arguments:
125
187
126 ``file``
188 ``file``
127 string, full or relative path to the po/mo file or its content.
189 string, full or relative path to the po/mo file or its content.
128
190
129 ``binary_mode``
191 ``binary_mode``
130 boolean, set this to True if ``file`` is a mo file.
192 boolean, set this to True if ``file`` is a mo file.
131 """
193 """
132 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
194 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
195 rxt = re.compile(u(PATTERN))
196 rxb = re.compile(b(PATTERN))
133
197
134 def charset_exists(charset):
198 def charset_exists(charset):
135 """Check whether ``charset`` is valid or not."""
199 """Check whether ``charset`` is valid or not."""
136 try:
200 try:
137 codecs.lookup(charset)
201 codecs.lookup(charset)
138 except LookupError:
202 except LookupError:
139 return False
203 return False
140 return True
204 return True
141
205
142 if not os.path.exists(file):
206 if not _is_file(file):
143 match = rx.search(file)
207 match = rxt.search(file)
144 if match:
208 if match:
145 enc = match.group(1).strip()
209 enc = match.group(1).strip()
146 if charset_exists(enc):
210 if charset_exists(enc):
147 return enc
211 return enc
148 else:
212 else:
149 if binary_mode:
213 # For PY3, always treat as binary
214 if binary_mode or PY3:
150 mode = 'rb'
215 mode = 'rb'
216 rx = rxb
151 else:
217 else:
152 mode = 'r'
218 mode = 'r'
219 rx = rxt
153 f = open(file, mode)
220 f = open(file, mode)
154 for l in f.readlines():
221 for l in f.readlines():
155 match = rx.search(l)
222 match = rx.search(l)
156 if match:
223 if match:
157 f.close()
224 f.close()
158 enc = match.group(1).strip()
225 enc = match.group(1).strip()
226 if not isinstance(enc, text_type):
227 enc = enc.decode('utf-8')
159 if charset_exists(enc):
228 if charset_exists(enc):
160 return enc
229 return enc
161 f.close()
230 f.close()
162 return default_encoding
231 return default_encoding
163
164 # }}}
232 # }}}
165 # function escape() {{{
233 # function escape() {{{
166
234
235
167 def escape(st):
236 def escape(st):
168 """
237 """
169 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
238 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
170 the given string ``st`` and returns it.
239 the given string ``st`` and returns it.
171 """
240 """
172 return st.replace('\\', r'\\')\
241 return st.replace('\\', r'\\')\
173 .replace('\t', r'\t')\
242 .replace('\t', r'\t')\
174 .replace('\r', r'\r')\
243 .replace('\r', r'\r')\
175 .replace('\n', r'\n')\
244 .replace('\n', r'\n')\
176 .replace('\"', r'\"')
245 .replace('\"', r'\"')
177
178 # }}}
246 # }}}
179 # function unescape() {{{
247 # function unescape() {{{
180
248
249
181 def unescape(st):
250 def unescape(st):
182 """
251 """
183 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
252 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
184 the given string ``st`` and returns it.
253 the given string ``st`` and returns it.
185 """
254 """
186 def unescape_repl(m):
255 def unescape_repl(m):
187 m = m.group(1)
256 m = m.group(1)
188 if m == 'n':
257 if m == 'n':
189 return '\n'
258 return '\n'
190 if m == 't':
259 if m == 't':
191 return '\t'
260 return '\t'
192 if m == 'r':
261 if m == 'r':
193 return '\r'
262 return '\r'
194 if m == '\\':
263 if m == '\\':
195 return '\\'
264 return '\\'
196 return m # handles escaped double quote
265 return m # handles escaped double quote
197 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
266 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
198
199 # }}}
267 # }}}
200 # class _BaseFile {{{
268 # class _BaseFile {{{
201
269
270
202 class _BaseFile(list):
271 class _BaseFile(list):
203 """
272 """
204 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
273 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
205 classes. This class should **not** be instanciated directly.
274 classes. This class should **not** be instanciated directly.
206 """
275 """
207
276
208 def __init__(self, *args, **kwargs):
277 def __init__(self, *args, **kwargs):
209 """
278 """
210 Constructor, accepts the following keyword arguments:
279 Constructor, accepts the following keyword arguments:
211
280
212 ``pofile``
281 ``pofile``
213 string, the path to the po or mo file, or its content as a string.
282 string, the path to the po or mo file, or its content as a string.
214
283
215 ``wrapwidth``
284 ``wrapwidth``
216 integer, the wrap width, only useful when the ``-w`` option was
285 integer, the wrap width, only useful when the ``-w`` option was
217 passed to xgettext (optional, default: ``78``).
286 passed to xgettext (optional, default: ``78``).
218
287
219 ``encoding``
288 ``encoding``
220 string, the encoding to use, defaults to ``default_encoding``
289 string, the encoding to use, defaults to ``default_encoding``
221 global variable (optional).
290 global variable (optional).
222
291
223 ``check_for_duplicates``
292 ``check_for_duplicates``
224 whether to check for duplicate entries when adding entries to the
293 whether to check for duplicate entries when adding entries to the
225 file, (optional, default: ``False``).
294 file, (optional, default: ``False``).
226 """
295 """
227 list.__init__(self)
296 list.__init__(self)
228 # the opened file handle
297 # the opened file handle
229 pofile = kwargs.get('pofile', None)
298 pofile = kwargs.get('pofile', None)
230 if pofile and os.path.exists(pofile):
299 if pofile and _is_file(pofile):
231 self.fpath = pofile
300 self.fpath = pofile
232 else:
301 else:
233 self.fpath = kwargs.get('fpath')
302 self.fpath = kwargs.get('fpath')
234 # the width at which lines should be wrapped
303 # the width at which lines should be wrapped
235 self.wrapwidth = kwargs.get('wrapwidth', 78)
304 self.wrapwidth = kwargs.get('wrapwidth', 78)
236 # the file encoding
305 # the file encoding
237 self.encoding = kwargs.get('encoding', default_encoding)
306 self.encoding = kwargs.get('encoding', default_encoding)
238 # whether to check for duplicate entries or not
307 # whether to check for duplicate entries or not
239 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
308 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
240 # header
309 # header
241 self.header = ''
310 self.header = ''
242 # both po and mo files have metadata
311 # both po and mo files have metadata
243 self.metadata = {}
312 self.metadata = {}
244 self.metadata_is_fuzzy = 0
313 self.metadata_is_fuzzy = 0
245
314
246 def __unicode__(self):
315 def __unicode__(self):
247 """
316 """
248 Returns the unicode representation of the file.
317 Returns the unicode representation of the file.
249 """
318 """
250 ret = []
319 ret = []
251 entries = [self.metadata_as_entry()] + \
320 entries = [self.metadata_as_entry()] + \
252 [e for e in self if not e.obsolete]
321 [e for e in self if not e.obsolete]
253 for entry in entries:
322 for entry in entries:
254 ret.append(entry.__unicode__(self.wrapwidth))
323 ret.append(entry.__unicode__(self.wrapwidth))
255 for entry in self.obsolete_entries():
324 for entry in self.obsolete_entries():
256 ret.append(entry.__unicode__(self.wrapwidth))
325 ret.append(entry.__unicode__(self.wrapwidth))
257 ret = '\n'.join(ret)
326 ret = u('\n').join(ret)
258
327
259 if type(ret) != types.UnicodeType:
328 assert isinstance(ret, text_type)
260 return unicode(ret, self.encoding)
329 #if type(ret) != text_type:
330 # return unicode(ret, self.encoding)
261 return ret
331 return ret
262
332
333 if PY3:
334 def __str__(self):
335 return self.__unicode__()
336 else:
263 def __str__(self):
337 def __str__(self):
264 """
338 """
265 Returns the string representation of the file.
339 Returns the string representation of the file.
266 """
340 """
267 return unicode(self).encode(self.encoding)
341 return unicode(self).encode(self.encoding)
268
342
269 def __contains__(self, entry):
343 def __contains__(self, entry):
270 """
344 """
271 Overriden ``list`` method to implement the membership test (in and
345 Overridden ``list`` method to implement the membership test (in and
272 not in).
346 not in).
273 The method considers that an entry is in the file if it finds an entry
347 The method considers that an entry is in the file if it finds an entry
274 that has the same msgid (the test is **case sensitive**).
348 that has the same msgid (the test is **case sensitive**) and the same
349 msgctxt (or none for both entries).
275
350
276 Argument:
351 Argument:
277
352
278 ``entry``
353 ``entry``
279 an instance of :class:`~polib._BaseEntry`.
354 an instance of :class:`~polib._BaseEntry`.
280 """
355 """
281 return self.find(entry.msgid, by='msgid') is not None
356 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
357 is not None
282
358
283 def __eq__(self, other):
359 def __eq__(self, other):
284 return unicode(self) == unicode(other)
360 return str(self) == str(other)
285
361
286 def append(self, entry):
362 def append(self, entry):
287 """
363 """
288 Overriden method to check for duplicates entries, if a user tries to
364 Overridden method to check for duplicates entries, if a user tries to
289 add an entry that is already in the file, the method will raise a
365 add an entry that is already in the file, the method will raise a
290 ``ValueError`` exception.
366 ``ValueError`` exception.
291
367
292 Argument:
368 Argument:
293
369
294 ``entry``
370 ``entry``
295 an instance of :class:`~polib._BaseEntry`.
371 an instance of :class:`~polib._BaseEntry`.
296 """
372 """
297 if self.check_for_duplicates and entry in self:
373 if self.check_for_duplicates and entry in self:
298 raise ValueError('Entry "%s" already exists' % entry.msgid)
374 raise ValueError('Entry "%s" already exists' % entry.msgid)
299 super(_BaseFile, self).append(entry)
375 super(_BaseFile, self).append(entry)
300
376
301 def insert(self, index, entry):
377 def insert(self, index, entry):
302 """
378 """
303 Overriden method to check for duplicates entries, if a user tries to
379 Overridden method to check for duplicates entries, if a user tries to
304 add an entry that is already in the file, the method will raise a
380 add an entry that is already in the file, the method will raise a
305 ``ValueError`` exception.
381 ``ValueError`` exception.
306
382
307 Arguments:
383 Arguments:
308
384
309 ``index``
385 ``index``
310 index at which the entry should be inserted.
386 index at which the entry should be inserted.
311
387
312 ``entry``
388 ``entry``
313 an instance of :class:`~polib._BaseEntry`.
389 an instance of :class:`~polib._BaseEntry`.
314 """
390 """
315 if self.check_for_duplicates and entry in self:
391 if self.check_for_duplicates and entry in self:
316 raise ValueError('Entry "%s" already exists' % entry.msgid)
392 raise ValueError('Entry "%s" already exists' % entry.msgid)
317 super(_BaseFile, self).insert(index, entry)
393 super(_BaseFile, self).insert(index, entry)
318
394
319 def metadata_as_entry(self):
395 def metadata_as_entry(self):
320 """
396 """
321 Returns the file metadata as a :class:`~polib.POFile` instance.
397 Returns the file metadata as a :class:`~polib.POFile` instance.
322 """
398 """
323 e = POEntry(msgid='')
399 e = POEntry(msgid='')
324 mdata = self.ordered_metadata()
400 mdata = self.ordered_metadata()
325 if mdata:
401 if mdata:
326 strs = []
402 strs = []
327 for name, value in mdata:
403 for name, value in mdata:
328 # Strip whitespace off each line in a multi-line entry
404 # Strip whitespace off each line in a multi-line entry
329 strs.append('%s: %s' % (name, value))
405 strs.append('%s: %s' % (name, value))
330 e.msgstr = '\n'.join(strs) + '\n'
406 e.msgstr = '\n'.join(strs) + '\n'
331 if self.metadata_is_fuzzy:
407 if self.metadata_is_fuzzy:
332 e.flags.append('fuzzy')
408 e.flags.append('fuzzy')
333 return e
409 return e
334
410
335 def save(self, fpath=None, repr_method='__str__'):
411 def save(self, fpath=None, repr_method='__unicode__'):
336 """
412 """
337 Saves the po file to ``fpath``.
413 Saves the po file to ``fpath``.
338 If it is an existing file and no ``fpath`` is provided, then the
414 If it is an existing file and no ``fpath`` is provided, then the
339 existing file is rewritten with the modified data.
415 existing file is rewritten with the modified data.
340
416
341 Keyword arguments:
417 Keyword arguments:
342
418
343 ``fpath``
419 ``fpath``
344 string, full or relative path to the file.
420 string, full or relative path to the file.
345
421
346 ``repr_method``
422 ``repr_method``
347 string, the method to use for output.
423 string, the method to use for output.
348 """
424 """
349 if self.fpath is None and fpath is None:
425 if self.fpath is None and fpath is None:
350 raise IOError('You must provide a file path to save() method')
426 raise IOError('You must provide a file path to save() method')
351 contents = getattr(self, repr_method)()
427 contents = getattr(self, repr_method)()
352 if fpath is None:
428 if fpath is None:
353 fpath = self.fpath
429 fpath = self.fpath
354 if repr_method == 'to_binary':
430 if repr_method == 'to_binary':
355 fhandle = open(fpath, 'wb')
431 fhandle = open(fpath, 'wb')
356 else:
432 else:
357 fhandle = codecs.open(fpath, 'w', self.encoding)
433 fhandle = io.open(fpath, 'w', encoding=self.encoding)
358 if type(contents) != types.UnicodeType:
434 if not isinstance(contents, text_type):
359 contents = contents.decode(self.encoding)
435 contents = contents.decode(self.encoding)
360 fhandle.write(contents)
436 fhandle.write(contents)
361 fhandle.close()
437 fhandle.close()
362 # set the file path if not set
438 # set the file path if not set
363 if self.fpath is None and fpath:
439 if self.fpath is None and fpath:
364 self.fpath = fpath
440 self.fpath = fpath
365
441
366 def find(self, st, by='msgid', include_obsolete_entries=False,
442 def find(self, st, by='msgid', include_obsolete_entries=False,
367 msgctxt=False):
443 msgctxt=False):
368 """
444 """
369 Find the entry which msgid (or property identified by the ``by``
445 Find the entry which msgid (or property identified by the ``by``
370 argument) matches the string ``st``.
446 argument) matches the string ``st``.
371
447
372 Keyword arguments:
448 Keyword arguments:
373
449
374 ``st``
450 ``st``
375 string, the string to search for.
451 string, the string to search for.
376
452
377 ``by``
453 ``by``
378 string, the property to use for comparison (default: ``msgid``).
454 string, the property to use for comparison (default: ``msgid``).
379
455
380 ``include_obsolete_entries``
456 ``include_obsolete_entries``
381 boolean, whether to also search in entries that are obsolete.
457 boolean, whether to also search in entries that are obsolete.
382
458
383 ``msgctxt``
459 ``msgctxt``
384 string, allows to specify a specific message context for the
460 string, allows specifying a specific message context for the
385 search.
461 search.
386 """
462 """
387 if include_obsolete_entries:
463 if include_obsolete_entries:
388 entries = self[:]
464 entries = self[:]
389 else:
465 else:
390 entries = [e for e in self if not e.obsolete]
466 entries = [e for e in self if not e.obsolete]
391 for e in entries:
467 for e in entries:
392 if getattr(e, by) == st:
468 if getattr(e, by) == st:
393 if msgctxt and e.msgctxt != msgctxt:
469 if msgctxt is not False and e.msgctxt != msgctxt:
394 continue
470 continue
395 return e
471 return e
396 return None
472 return None
397
473
398 def ordered_metadata(self):
474 def ordered_metadata(self):
399 """
475 """
400 Convenience method that returns an ordered version of the metadata
476 Convenience method that returns an ordered version of the metadata
401 dictionary. The return value is list of tuples (metadata name,
477 dictionary. The return value is list of tuples (metadata name,
402 metadata_value).
478 metadata_value).
403 """
479 """
404 # copy the dict first
480 # copy the dict first
405 metadata = self.metadata.copy()
481 metadata = self.metadata.copy()
406 data_order = [
482 data_order = [
407 'Project-Id-Version',
483 'Project-Id-Version',
408 'Report-Msgid-Bugs-To',
484 'Report-Msgid-Bugs-To',
409 'POT-Creation-Date',
485 'POT-Creation-Date',
410 'PO-Revision-Date',
486 'PO-Revision-Date',
411 'Last-Translator',
487 'Last-Translator',
412 'Language-Team',
488 'Language-Team',
413 'MIME-Version',
489 'MIME-Version',
414 'Content-Type',
490 'Content-Type',
415 'Content-Transfer-Encoding'
491 'Content-Transfer-Encoding',
492 'Language',
493 'Plural-Forms'
416 ]
494 ]
417 ordered_data = []
495 ordered_data = []
418 for data in data_order:
496 for data in data_order:
419 try:
497 try:
420 value = metadata.pop(data)
498 value = metadata.pop(data)
421 ordered_data.append((data, value))
499 ordered_data.append((data, value))
422 except KeyError:
500 except KeyError:
423 pass
501 pass
424 # the rest of the metadata will be alphabetically ordered since there
502 # the rest of the metadata will be alphabetically ordered since there
425 # are no specs for this AFAIK
503 # are no specs for this AFAIK
426 keys = metadata.keys()
504 for data in sorted(metadata.keys()):
427 keys.sort()
428 for data in keys:
429 value = metadata[data]
505 value = metadata[data]
430 ordered_data.append((data, value))
506 ordered_data.append((data, value))
431 return ordered_data
507 return ordered_data
432
508
433 def to_binary(self):
509 def to_binary(self):
434 """
510 """
435 Return the binary representation of the file.
511 Return the binary representation of the file.
436 """
512 """
437 offsets = []
513 offsets = []
438 entries = self.translated_entries()
514 entries = self.translated_entries()
515
439 # the keys are sorted in the .mo file
516 # the keys are sorted in the .mo file
440 def cmp(_self, other):
517 def cmp(_self, other):
441 # msgfmt compares entries with msgctxt if it exists
518 # msgfmt compares entries with msgctxt if it exists
442 if _self.msgctxt:
519 self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
443 self_msgid = _self.msgctxt
520 other_msgid = other.msgctxt and other.msgctxt or other.msgid
444 else:
445 self_msgid = _self.msgid
446
447 if other.msgctxt:
448 other_msgid = other.msgctxt
449 else:
450 other_msgid = other.msgid
451 if self_msgid > other_msgid:
521 if self_msgid > other_msgid:
452 return 1
522 return 1
453 elif self_msgid < other_msgid:
523 elif self_msgid < other_msgid:
454 return -1
524 return -1
455 else:
525 else:
456 return 0
526 return 0
457 # add metadata entry
527 # add metadata entry
458 entries.sort(cmp)
528 entries.sort(key=lambda o: o.msgctxt or o.msgid)
459 mentry = self.metadata_as_entry()
529 mentry = self.metadata_as_entry()
460 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
530 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
461 entries = [mentry] + entries
531 entries = [mentry] + entries
462 entries_len = len(entries)
532 entries_len = len(entries)
463 ids, strs = '', ''
533 ids, strs = b(''), b('')
464 for e in entries:
534 for e in entries:
465 # For each string, we need size and file offset. Each string is
535 # For each string, we need size and file offset. Each string is
466 # NUL terminated; the NUL does not count into the size.
536 # NUL terminated; the NUL does not count into the size.
467 msgid = ''
537 msgid = b('')
468 if e.msgctxt:
538 if e.msgctxt:
469 # Contexts are stored by storing the concatenation of the
539 # Contexts are stored by storing the concatenation of the
470 # context, a <EOT> byte, and the original string
540 # context, a <EOT> byte, and the original string
471 msgid = self._encode(e.msgctxt + '\4')
541 msgid = self._encode(e.msgctxt + '\4')
472 if e.msgid_plural:
542 if e.msgid_plural:
473 indexes = e.msgstr_plural.keys()
474 indexes.sort()
475 msgstr = []
543 msgstr = []
476 for index in indexes:
544 for index in sorted(e.msgstr_plural.keys()):
477 msgstr.append(e.msgstr_plural[index])
545 msgstr.append(e.msgstr_plural[index])
478 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
546 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
479 msgstr = self._encode('\0'.join(msgstr))
547 msgstr = self._encode('\0'.join(msgstr))
480 else:
548 else:
481 msgid += self._encode(e.msgid)
549 msgid += self._encode(e.msgid)
482 msgstr = self._encode(e.msgstr)
550 msgstr = self._encode(e.msgstr)
483 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
551 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
484 ids += msgid + '\0'
552 ids += msgid + b('\0')
485 strs += msgstr + '\0'
553 strs += msgstr + b('\0')
486
554
487 # The header is 7 32-bit unsigned integers.
555 # The header is 7 32-bit unsigned integers.
488 keystart = 7*4+16*entries_len
556 keystart = 7 * 4 + 16 * entries_len
489 # and the values start after the keys
557 # and the values start after the keys
490 valuestart = keystart + len(ids)
558 valuestart = keystart + len(ids)
491 koffsets = []
559 koffsets = []
492 voffsets = []
560 voffsets = []
493 # The string table first has the list of keys, then the list of values.
561 # The string table first has the list of keys, then the list of values.
494 # Each entry has first the size of the string, then the file offset.
562 # Each entry has first the size of the string, then the file offset.
495 for o1, l1, o2, l2 in offsets:
563 for o1, l1, o2, l2 in offsets:
496 koffsets += [l1, o1+keystart]
564 koffsets += [l1, o1 + keystart]
497 voffsets += [l2, o2+valuestart]
565 voffsets += [l2, o2 + valuestart]
498 offsets = koffsets + voffsets
566 offsets = koffsets + voffsets
499 # check endianness for magic number
500 if struct.pack('@h', 1) == struct.pack('<h', 1):
501 magic_number = MOFile.LITTLE_ENDIAN
502 else:
503 magic_number = MOFile.BIG_ENDIAN
504
567
505 output = struct.pack(
568 output = struct.pack(
506 "Iiiiiii",
569 "Iiiiiii",
507 magic_number, # Magic number
570 # Magic number
508 0, # Version
571 MOFile.MAGIC,
509 entries_len, # # of entries
572 # Version
510 7*4, # start of key index
573 0,
511 7*4+entries_len*8, # start of value index
574 # number of entries
512 0, keystart # size and offset of hash table
575 entries_len,
513 # Important: we don't use hash tables
576 # start of key index
577 7 * 4,
578 # start of value index
579 7 * 4 + entries_len * 8,
580 # size and offset of hash table, we don't use hash tables
581 0, keystart
582
514 )
583 )
584 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
585 output += array.array("i", offsets).tobytes()
586 else:
515 output += array.array("i", offsets).tostring()
587 output += array.array("i", offsets).tostring()
516 output += ids
588 output += ids
517 output += strs
589 output += strs
518 return output
590 return output
519
591
520 def _encode(self, mixed):
592 def _encode(self, mixed):
521 """
593 """
522 Encodes the given ``mixed`` argument with the file encoding if and
594 Encodes the given ``mixed`` argument with the file encoding if and
523 only if it's an unicode string and returns the encoded string.
595 only if it's an unicode string and returns the encoded string.
524 """
596 """
525 if type(mixed) == types.UnicodeType:
597 if isinstance(mixed, text_type):
526 return mixed.encode(self.encoding)
598 mixed = mixed.encode(self.encoding)
527 return mixed
599 return mixed
528
529 # }}}
600 # }}}
530 # class POFile {{{
601 # class POFile {{{
531
602
603
532 class POFile(_BaseFile):
604 class POFile(_BaseFile):
533 """
605 """
534 Po (or Pot) file reader/writer.
606 Po (or Pot) file reader/writer.
535 This class inherits the :class:`~polib._BaseFile` class and, by extension,
607 This class inherits the :class:`~polib._BaseFile` class and, by extension,
536 the python ``list`` type.
608 the python ``list`` type.
537 """
609 """
538
610
539 def __unicode__(self):
611 def __unicode__(self):
540 """
612 """
541 Returns the unicode representation of the po file.
613 Returns the unicode representation of the po file.
542 """
614 """
543 ret, headers = '', self.header.split('\n')
615 ret, headers = '', self.header.split('\n')
544 for header in headers:
616 for header in headers:
545 if header[:1] in [',', ':']:
617 if not len(header):
618 ret += "#\n"
619 elif header[:1] in [',', ':']:
546 ret += '#%s\n' % header
620 ret += '#%s\n' % header
547 else:
621 else:
548 ret += '# %s\n' % header
622 ret += '# %s\n' % header
549
623
550 if type(ret) != types.UnicodeType:
624 if not isinstance(ret, text_type):
551 ret = unicode(ret, self.encoding)
625 ret = ret.decode(self.encoding)
552
626
553 return ret + _BaseFile.__unicode__(self)
627 return ret + _BaseFile.__unicode__(self)
554
628
555 def save_as_mofile(self, fpath):
629 def save_as_mofile(self, fpath):
556 """
630 """
557 Saves the binary representation of the file to given ``fpath``.
631 Saves the binary representation of the file to given ``fpath``.
558
632
559 Keyword argument:
633 Keyword argument:
560
634
561 ``fpath``
635 ``fpath``
562 string, full or relative path to the mo file.
636 string, full or relative path to the mo file.
563 """
637 """
564 _BaseFile.save(self, fpath, 'to_binary')
638 _BaseFile.save(self, fpath, 'to_binary')
565
639
566 def percent_translated(self):
640 def percent_translated(self):
567 """
641 """
568 Convenience method that returns the percentage of translated
642 Convenience method that returns the percentage of translated
569 messages.
643 messages.
570 """
644 """
571 total = len([e for e in self if not e.obsolete])
645 total = len([e for e in self if not e.obsolete])
572 if total == 0:
646 if total == 0:
573 return 100
647 return 100
574 translated = len(self.translated_entries())
648 translated = len(self.translated_entries())
575 return int((100.00 / float(total)) * translated)
649 return int(translated * 100 / float(total))
576
650
577 def translated_entries(self):
651 def translated_entries(self):
578 """
652 """
579 Convenience method that returns the list of translated entries.
653 Convenience method that returns the list of translated entries.
580 """
654 """
581 return [e for e in self if e.translated()]
655 return [e for e in self if e.translated()]
582
656
583 def untranslated_entries(self):
657 def untranslated_entries(self):
584 """
658 """
585 Convenience method that returns the list of untranslated entries.
659 Convenience method that returns the list of untranslated entries.
586 """
660 """
587 return [e for e in self if not e.translated() and not e.obsolete \
661 return [e for e in self if not e.translated() and not e.obsolete
588 and not 'fuzzy' in e.flags]
662 and not 'fuzzy' in e.flags]
589
663
590 def fuzzy_entries(self):
664 def fuzzy_entries(self):
591 """
665 """
592 Convenience method that returns the list of fuzzy entries.
666 Convenience method that returns the list of fuzzy entries.
593 """
667 """
594 return [e for e in self if 'fuzzy' in e.flags]
668 return [e for e in self if 'fuzzy' in e.flags]
595
669
596 def obsolete_entries(self):
670 def obsolete_entries(self):
597 """
671 """
598 Convenience method that returns the list of obsolete entries.
672 Convenience method that returns the list of obsolete entries.
599 """
673 """
600 return [e for e in self if e.obsolete]
674 return [e for e in self if e.obsolete]
601
675
602 def merge(self, refpot):
676 def merge(self, refpot):
603 """
677 """
604 Convenience method that merges the current pofile with the pot file
678 Convenience method that merges the current pofile with the pot file
605 provided. It behaves exactly as the gettext msgmerge utility:
679 provided. It behaves exactly as the gettext msgmerge utility:
606
680
607 * comments of this file will be preserved, but extracted comments and
681 * comments of this file will be preserved, but extracted comments and
608 occurrences will be discarded;
682 occurrences will be discarded;
609 * any translations or comments in the file will be discarded, however,
683 * any translations or comments in the file will be discarded, however,
610 dot comments and file positions will be preserved;
684 dot comments and file positions will be preserved;
611 * the fuzzy flags are preserved.
685 * the fuzzy flags are preserved.
612
686
613 Keyword argument:
687 Keyword argument:
614
688
615 ``refpot``
689 ``refpot``
616 object POFile, the reference catalog.
690 object POFile, the reference catalog.
617 """
691 """
692 # Store entries in dict/set for faster access
693 self_entries = dict((entry.msgid, entry) for entry in self)
694 refpot_msgids = set(entry.msgid for entry in refpot)
695 # Merge entries that are in the refpot
618 for entry in refpot:
696 for entry in refpot:
619 e = self.find(entry.msgid, include_obsolete_entries=True)
697 e = self_entries.get(entry.msgid)
620 if e is None:
698 if e is None:
621 e = POEntry()
699 e = POEntry()
622 self.append(e)
700 self.append(e)
623 e.merge(entry)
701 e.merge(entry)
624 # ok, now we must "obsolete" entries that are not in the refpot anymore
702 # ok, now we must "obsolete" entries that are not in the refpot anymore
625 for entry in self:
703 for entry in self:
626 if refpot.find(entry.msgid) is None:
704 if entry.msgid not in refpot_msgids:
627 entry.obsolete = True
705 entry.obsolete = True
628
629 # }}}
706 # }}}
630 # class MOFile {{{
707 # class MOFile {{{
631
708
709
632 class MOFile(_BaseFile):
710 class MOFile(_BaseFile):
633 """
711 """
634 Mo file reader/writer.
712 Mo file reader/writer.
635 This class inherits the :class:`~polib._BaseFile` class and, by
713 This class inherits the :class:`~polib._BaseFile` class and, by
636 extension, the python ``list`` type.
714 extension, the python ``list`` type.
637 """
715 """
638 BIG_ENDIAN = 0xde120495
716 MAGIC = 0x950412de
639 LITTLE_ENDIAN = 0x950412de
717 MAGIC_SWAPPED = 0xde120495
640
718
641 def __init__(self, *args, **kwargs):
719 def __init__(self, *args, **kwargs):
642 """
720 """
643 Constructor, accepts all keywords arguments accepted by
721 Constructor, accepts all keywords arguments accepted by
644 :class:`~polib._BaseFile` class.
722 :class:`~polib._BaseFile` class.
645 """
723 """
646 _BaseFile.__init__(self, *args, **kwargs)
724 _BaseFile.__init__(self, *args, **kwargs)
647 self.magic_number = None
725 self.magic_number = None
648 self.version = 0
726 self.version = 0
649
727
650 def save_as_pofile(self, fpath):
728 def save_as_pofile(self, fpath):
651 """
729 """
652 Saves the mofile as a pofile to ``fpath``.
730 Saves the mofile as a pofile to ``fpath``.
653
731
654 Keyword argument:
732 Keyword argument:
655
733
656 ``fpath``
734 ``fpath``
657 string, full or relative path to the file.
735 string, full or relative path to the file.
658 """
736 """
659 _BaseFile.save(self, fpath)
737 _BaseFile.save(self, fpath)
660
738
661 def save(self, fpath=None):
739 def save(self, fpath=None):
662 """
740 """
663 Saves the mofile to ``fpath``.
741 Saves the mofile to ``fpath``.
664
742
665 Keyword argument:
743 Keyword argument:
666
744
667 ``fpath``
745 ``fpath``
668 string, full or relative path to the file.
746 string, full or relative path to the file.
669 """
747 """
670 _BaseFile.save(self, fpath, 'to_binary')
748 _BaseFile.save(self, fpath, 'to_binary')
671
749
672 def percent_translated(self):
750 def percent_translated(self):
673 """
751 """
674 Convenience method to keep the same interface with POFile instances.
752 Convenience method to keep the same interface with POFile instances.
675 """
753 """
676 return 100
754 return 100
677
755
678 def translated_entries(self):
756 def translated_entries(self):
679 """
757 """
680 Convenience method to keep the same interface with POFile instances.
758 Convenience method to keep the same interface with POFile instances.
681 """
759 """
682 return self
760 return self
683
761
684 def untranslated_entries(self):
762 def untranslated_entries(self):
685 """
763 """
686 Convenience method to keep the same interface with POFile instances.
764 Convenience method to keep the same interface with POFile instances.
687 """
765 """
688 return []
766 return []
689
767
690 def fuzzy_entries(self):
768 def fuzzy_entries(self):
691 """
769 """
692 Convenience method to keep the same interface with POFile instances.
770 Convenience method to keep the same interface with POFile instances.
693 """
771 """
694 return []
772 return []
695
773
696 def obsolete_entries(self):
774 def obsolete_entries(self):
697 """
775 """
698 Convenience method to keep the same interface with POFile instances.
776 Convenience method to keep the same interface with POFile instances.
699 """
777 """
700 return []
778 return []
701
702 # }}}
779 # }}}
703 # class _BaseEntry {{{
780 # class _BaseEntry {{{
704
781
782
705 class _BaseEntry(object):
783 class _BaseEntry(object):
706 """
784 """
707 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
785 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
708 This class should **not** be instanciated directly.
786 This class should **not** be instanciated directly.
709 """
787 """
710
788
711 def __init__(self, *args, **kwargs):
789 def __init__(self, *args, **kwargs):
712 """
790 """
713 Constructor, accepts the following keyword arguments:
791 Constructor, accepts the following keyword arguments:
714
792
715 ``msgid``
793 ``msgid``
716 string, the entry msgid.
794 string, the entry msgid.
717
795
718 ``msgstr``
796 ``msgstr``
719 string, the entry msgstr.
797 string, the entry msgstr.
720
798
721 ``msgid_plural``
799 ``msgid_plural``
722 string, the entry msgid_plural.
800 string, the entry msgid_plural.
723
801
724 ``msgstr_plural``
802 ``msgstr_plural``
725 list, the entry msgstr_plural lines.
803 list, the entry msgstr_plural lines.
726
804
727 ``msgctxt``
805 ``msgctxt``
728 string, the entry context (msgctxt).
806 string, the entry context (msgctxt).
729
807
730 ``obsolete``
808 ``obsolete``
731 bool, whether the entry is "obsolete" or not.
809 bool, whether the entry is "obsolete" or not.
732
810
733 ``encoding``
811 ``encoding``
734 string, the encoding to use, defaults to ``default_encoding``
812 string, the encoding to use, defaults to ``default_encoding``
735 global variable (optional).
813 global variable (optional).
736 """
814 """
737 self.msgid = kwargs.get('msgid', '')
815 self.msgid = kwargs.get('msgid', '')
738 self.msgstr = kwargs.get('msgstr', '')
816 self.msgstr = kwargs.get('msgstr', '')
739 self.msgid_plural = kwargs.get('msgid_plural', '')
817 self.msgid_plural = kwargs.get('msgid_plural', '')
740 self.msgstr_plural = kwargs.get('msgstr_plural', {})
818 self.msgstr_plural = kwargs.get('msgstr_plural', {})
741 self.msgctxt = kwargs.get('msgctxt', None)
819 self.msgctxt = kwargs.get('msgctxt', None)
742 self.obsolete = kwargs.get('obsolete', False)
820 self.obsolete = kwargs.get('obsolete', False)
743 self.encoding = kwargs.get('encoding', default_encoding)
821 self.encoding = kwargs.get('encoding', default_encoding)
744
822
745 def __unicode__(self, wrapwidth=78):
823 def __unicode__(self, wrapwidth=78):
746 """
824 """
747 Returns the unicode representation of the entry.
825 Returns the unicode representation of the entry.
748 """
826 """
749 if self.obsolete:
827 if self.obsolete:
750 delflag = '#~ '
828 delflag = '#~ '
751 else:
829 else:
752 delflag = ''
830 delflag = ''
753 ret = []
831 ret = []
754 # write the msgctxt if any
832 # write the msgctxt if any
755 if self.msgctxt is not None:
833 if self.msgctxt is not None:
756 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
834 ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
835 wrapwidth)
757 # write the msgid
836 # write the msgid
758 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
837 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
759 # write the msgid_plural if any
838 # write the msgid_plural if any
760 if self.msgid_plural:
839 if self.msgid_plural:
761 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
840 ret += self._str_field("msgid_plural", delflag, "",
841 self.msgid_plural, wrapwidth)
762 if self.msgstr_plural:
842 if self.msgstr_plural:
763 # write the msgstr_plural if any
843 # write the msgstr_plural if any
764 msgstrs = self.msgstr_plural
844 msgstrs = self.msgstr_plural
765 keys = list(msgstrs)
845 keys = list(msgstrs)
766 keys.sort()
846 keys.sort()
767 for index in keys:
847 for index in keys:
768 msgstr = msgstrs[index]
848 msgstr = msgstrs[index]
769 plural_index = '[%s]' % index
849 plural_index = '[%s]' % index
770 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
850 ret += self._str_field("msgstr", delflag, plural_index, msgstr,
851 wrapwidth)
771 else:
852 else:
772 # otherwise write the msgstr
853 # otherwise write the msgstr
773 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
854 ret += self._str_field("msgstr", delflag, "", self.msgstr,
855 wrapwidth)
774 ret.append('')
856 ret.append('')
775 ret = '\n'.join(ret)
857 ret = u('\n').join(ret)
776
777 if type(ret) != types.UnicodeType:
778 return unicode(ret, self.encoding)
779 return ret
858 return ret
780
859
860 if PY3:
861 def __str__(self):
862 return self.__unicode__()
863 else:
781 def __str__(self):
864 def __str__(self):
782 """
865 """
783 Returns the string representation of the entry.
866 Returns the string representation of the entry.
784 """
867 """
785 return unicode(self).encode(self.encoding)
868 return unicode(self).encode(self.encoding)
786
869
787 def __eq__(self, other):
870 def __eq__(self, other):
788 return unicode(self) == unicode(other)
871 return str(self) == str(other)
789
872
790 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
873 def _str_field(self, fieldname, delflag, plural_index, field,
874 wrapwidth=78):
791 lines = field.splitlines(True)
875 lines = field.splitlines(True)
792 if len(lines) > 1:
876 if len(lines) > 1:
793 lines = [''] + lines # start with initial empty line
877 lines = [''] + lines # start with initial empty line
794 else:
878 else:
795 escaped_field = escape(field)
879 escaped_field = escape(field)
796 specialchars_count = 0
880 specialchars_count = 0
797 for c in ['\\', '\n', '\r', '\t', '"']:
881 for c in ['\\', '\n', '\r', '\t', '"']:
798 specialchars_count += field.count(c)
882 specialchars_count += field.count(c)
799 # comparison must take into account fieldname length + one space
883 # comparison must take into account fieldname length + one space
800 # + 2 quotes (eg. msgid "<string>")
884 # + 2 quotes (eg. msgid "<string>")
801 flength = len(fieldname) + 3
885 flength = len(fieldname) + 3
802 if plural_index:
886 if plural_index:
803 flength += len(plural_index)
887 flength += len(plural_index)
804 real_wrapwidth = wrapwidth - flength + specialchars_count
888 real_wrapwidth = wrapwidth - flength + specialchars_count
805 if wrapwidth > 0 and len(field) > real_wrapwidth:
889 if wrapwidth > 0 and len(field) > real_wrapwidth:
806 # Wrap the line but take field name into account
890 # Wrap the line but take field name into account
807 lines = [''] + [unescape(item) for item in textwrap.wrap(
891 lines = [''] + [unescape(item) for item in wrap(
808 escaped_field,
892 escaped_field,
809 wrapwidth - 2, # 2 for quotes ""
893 wrapwidth - 2, # 2 for quotes ""
810 drop_whitespace=False,
894 drop_whitespace=False,
811 break_long_words=False
895 break_long_words=False
812 )]
896 )]
813 else:
897 else:
814 lines = [field]
898 lines = [field]
815 if fieldname.startswith('previous_'):
899 if fieldname.startswith('previous_'):
816 # quick and dirty trick to get the real field name
900 # quick and dirty trick to get the real field name
817 fieldname = fieldname[9:]
901 fieldname = fieldname[9:]
818
902
819 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
903 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
820 escape(lines.pop(0)))]
904 escape(lines.pop(0)))]
821 for mstr in lines:
905 for line in lines:
822 ret.append('%s"%s"' % (delflag, escape(mstr)))
906 ret.append('%s"%s"' % (delflag, escape(line)))
823 return ret
907 return ret
824
825 # }}}
908 # }}}
826 # class POEntry {{{
909 # class POEntry {{{
827
910
911
828 class POEntry(_BaseEntry):
912 class POEntry(_BaseEntry):
829 """
913 """
830 Represents a po file entry.
914 Represents a po file entry.
831 """
915 """
832
916
833 def __init__(self, *args, **kwargs):
917 def __init__(self, *args, **kwargs):
834 """
918 """
835 Constructor, accepts the following keyword arguments:
919 Constructor, accepts the following keyword arguments:
836
920
837 ``comment``
921 ``comment``
838 string, the entry comment.
922 string, the entry comment.
839
923
840 ``tcomment``
924 ``tcomment``
841 string, the entry translator comment.
925 string, the entry translator comment.
842
926
843 ``occurrences``
927 ``occurrences``
844 list, the entry occurrences.
928 list, the entry occurrences.
845
929
846 ``flags``
930 ``flags``
847 list, the entry flags.
931 list, the entry flags.
848
932
849 ``previous_msgctxt``
933 ``previous_msgctxt``
850 string, the entry previous context.
934 string, the entry previous context.
851
935
852 ``previous_msgid``
936 ``previous_msgid``
853 string, the entry previous msgid.
937 string, the entry previous msgid.
854
938
855 ``previous_msgid_plural``
939 ``previous_msgid_plural``
856 string, the entry previous msgid_plural.
940 string, the entry previous msgid_plural.
941
942 ``linenum``
943 integer, the line number of the entry
857 """
944 """
858 _BaseEntry.__init__(self, *args, **kwargs)
945 _BaseEntry.__init__(self, *args, **kwargs)
859 self.comment = kwargs.get('comment', '')
946 self.comment = kwargs.get('comment', '')
860 self.tcomment = kwargs.get('tcomment', '')
947 self.tcomment = kwargs.get('tcomment', '')
861 self.occurrences = kwargs.get('occurrences', [])
948 self.occurrences = kwargs.get('occurrences', [])
862 self.flags = kwargs.get('flags', [])
949 self.flags = kwargs.get('flags', [])
863 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
950 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
864 self.previous_msgid = kwargs.get('previous_msgid', None)
951 self.previous_msgid = kwargs.get('previous_msgid', None)
865 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
952 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
953 self.linenum = kwargs.get('linenum', None)
866
954
867 def __unicode__(self, wrapwidth=78):
955 def __unicode__(self, wrapwidth=78):
868 """
956 """
869 Returns the unicode representation of the entry.
957 Returns the unicode representation of the entry.
870 """
958 """
871 if self.obsolete:
959 if self.obsolete:
872 return _BaseEntry.__unicode__(self, wrapwidth)
960 return _BaseEntry.__unicode__(self, wrapwidth)
873
961
874 ret = []
962 ret = []
875 # comments first, if any (with text wrapping as xgettext does)
963 # comments first, if any (with text wrapping as xgettext does)
876 comments = [('comment', '#. '), ('tcomment', '# ')]
964 comments = [('comment', '#. '), ('tcomment', '# ')]
877 for c in comments:
965 for c in comments:
878 val = getattr(self, c[0])
966 val = getattr(self, c[0])
879 if val:
967 if val:
880 for comment in val.split('\n'):
968 for comment in val.split('\n'):
881 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
969 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
882 ret += textwrap.wrap(
970 ret += wrap(
883 comment,
971 comment,
884 wrapwidth,
972 wrapwidth,
885 initial_indent=c[1],
973 initial_indent=c[1],
886 subsequent_indent=c[1],
974 subsequent_indent=c[1],
887 break_long_words=False
975 break_long_words=False
888 )
976 )
889 else:
977 else:
890 ret.append('%s%s' % (c[1], comment))
978 ret.append('%s%s' % (c[1], comment))
891
979
892 # occurrences (with text wrapping as xgettext does)
980 # occurrences (with text wrapping as xgettext does)
893 if self.occurrences:
981 if self.occurrences:
894 filelist = []
982 filelist = []
895 for fpath, lineno in self.occurrences:
983 for fpath, lineno in self.occurrences:
896 if lineno:
984 if lineno:
897 filelist.append('%s:%s' % (fpath, lineno))
985 filelist.append('%s:%s' % (fpath, lineno))
898 else:
986 else:
899 filelist.append(fpath)
987 filelist.append(fpath)
900 filestr = ' '.join(filelist)
988 filestr = ' '.join(filelist)
901 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
989 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
902 # textwrap split words that contain hyphen, this is not
990 # textwrap split words that contain hyphen, this is not
903 # what we want for filenames, so the dirty hack is to
991 # what we want for filenames, so the dirty hack is to
904 # temporally replace hyphens with a char that a file cannot
992 # temporally replace hyphens with a char that a file cannot
905 # contain, like "*"
993 # contain, like "*"
906 ret += [l.replace('*', '-') for l in textwrap.wrap(
994 ret += [l.replace('*', '-') for l in wrap(
907 filestr.replace('-', '*'),
995 filestr.replace('-', '*'),
908 wrapwidth,
996 wrapwidth,
909 initial_indent='#: ',
997 initial_indent='#: ',
910 subsequent_indent='#: ',
998 subsequent_indent='#: ',
911 break_long_words=False
999 break_long_words=False
912 )]
1000 )]
913 else:
1001 else:
914 ret.append('#: ' + filestr)
1002 ret.append('#: ' + filestr)
915
1003
916 # flags (TODO: wrapping ?)
1004 # flags (TODO: wrapping ?)
917 if self.flags:
1005 if self.flags:
918 ret.append('#, %s' % ', '.join(self.flags))
1006 ret.append('#, %s' % ', '.join(self.flags))
919
1007
920 # previous context and previous msgid/msgid_plural
1008 # previous context and previous msgid/msgid_plural
921 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
1009 fields = ['previous_msgctxt', 'previous_msgid',
1010 'previous_msgid_plural']
922 for f in fields:
1011 for f in fields:
923 val = getattr(self, f)
1012 val = getattr(self, f)
924 if val:
1013 if val:
925 ret += self._str_field(f, "#| ", "", val, wrapwidth)
1014 ret += self._str_field(f, "#| ", "", val, wrapwidth)
926
1015
927 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
1016 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
928 ret = '\n'.join(ret)
1017 ret = u('\n').join(ret)
929
1018
930 if type(ret) != types.UnicodeType:
1019 assert isinstance(ret, text_type)
931 return unicode(ret, self.encoding)
1020 #if type(ret) != types.UnicodeType:
1021 # return unicode(ret, self.encoding)
932 return ret
1022 return ret
933
1023
934 def __cmp__(self, other):
1024 def __cmp__(self, other):
935 """
1025 """
936 Called by comparison operations if rich comparison is not defined.
1026 Called by comparison operations if rich comparison is not defined.
937 """
1027 """
938 def compare_occurrences(a, b):
939 """
940 Compare an entry occurrence with another one.
941 """
942 if a[0] != b[0]:
943 return a[0] < b[0]
944 if a[1] != b[1]:
945 return a[1] < b[1]
946 return 0
947
1028
948 # First: Obsolete test
1029 # First: Obsolete test
949 if self.obsolete != other.obsolete:
1030 if self.obsolete != other.obsolete:
950 if self.obsolete:
1031 if self.obsolete:
951 return -1
1032 return -1
952 else:
1033 else:
953 return 1
1034 return 1
954 # Work on a copy to protect original
1035 # Work on a copy to protect original
955 occ1 = self.occurrences[:]
1036 occ1 = sorted(self.occurrences[:])
956 occ2 = other.occurrences[:]
1037 occ2 = sorted(other.occurrences[:])
957 # Sorting using compare method
958 occ1.sort(compare_occurrences)
959 occ2.sort(compare_occurrences)
960 # Comparing sorted occurrences
961 pos = 0
1038 pos = 0
962 for entry1 in occ1:
1039 for entry1 in occ1:
963 try:
1040 try:
964 entry2 = occ2[pos]
1041 entry2 = occ2[pos]
965 except IndexError:
1042 except IndexError:
966 return 1
1043 return 1
967 pos = pos + 1
1044 pos = pos + 1
968 if entry1[0] != entry2[0]:
1045 if entry1[0] != entry2[0]:
969 if entry1[0] > entry2[0]:
1046 if entry1[0] > entry2[0]:
970 return 1
1047 return 1
971 else:
1048 else:
972 return -1
1049 return -1
973 if entry1[1] != entry2[1]:
1050 if entry1[1] != entry2[1]:
974 if entry1[1] > entry2[1]:
1051 if entry1[1] > entry2[1]:
975 return 1
1052 return 1
976 else:
1053 else:
977 return -1
1054 return -1
1055 # Compare msgid_plural if set
1056 if self.msgid_plural:
1057 if not other.msgid_plural:
1058 return 1
1059 for pos in self.msgid_plural:
1060 if pos not in other.msgid_plural:
1061 return 1
1062 if self.msgid_plural[pos] > other.msgid_plural[pos]:
1063 return 1
1064 if self.msgid_plural[pos] < other.msgid_plural[pos]:
1065 return -1
978 # Finally: Compare message ID
1066 # Finally: Compare message ID
979 if self.msgid > other.msgid: return 1
1067 if self.msgid > other.msgid:
980 else: return -1
1068 return 1
1069 elif self.msgid < other.msgid:
1070 return -1
1071 return 0
1072
1073 def __gt__(self, other):
1074 return self.__cmp__(other) > 0
1075
1076 def __lt__(self, other):
1077 return self.__cmp__(other) < 0
1078
1079 def __ge__(self, other):
1080 return self.__cmp__(other) >= 0
1081
1082 def __le__(self, other):
1083 return self.__cmp__(other) <= 0
1084
1085 def __eq__(self, other):
1086 return self.__cmp__(other) == 0
1087
1088 def __ne__(self, other):
1089 return self.__cmp__(other) != 0
981
1090
982 def translated(self):
1091 def translated(self):
983 """
1092 """
984 Returns ``True`` if the entry has been translated or ``False``
1093 Returns ``True`` if the entry has been translated or ``False``
985 otherwise.
1094 otherwise.
986 """
1095 """
987 if self.obsolete or 'fuzzy' in self.flags:
1096 if self.obsolete or 'fuzzy' in self.flags:
988 return False
1097 return False
989 if self.msgstr != '':
1098 if self.msgstr != '':
990 return True
1099 return True
991 if self.msgstr_plural:
1100 if self.msgstr_plural:
992 for pos in self.msgstr_plural:
1101 for pos in self.msgstr_plural:
993 if self.msgstr_plural[pos] == '':
1102 if self.msgstr_plural[pos] == '':
994 return False
1103 return False
995 return True
1104 return True
996 return False
1105 return False
997
1106
998 def merge(self, other):
1107 def merge(self, other):
999 """
1108 """
1000 Merge the current entry with the given pot entry.
1109 Merge the current entry with the given pot entry.
1001 """
1110 """
1002 self.msgid = other.msgid
1111 self.msgid = other.msgid
1003 self.msgctxt = other.msgctxt
1112 self.msgctxt = other.msgctxt
1004 self.occurrences = other.occurrences
1113 self.occurrences = other.occurrences
1005 self.comment = other.comment
1114 self.comment = other.comment
1006 fuzzy = 'fuzzy' in self.flags
1115 fuzzy = 'fuzzy' in self.flags
1007 self.flags = other.flags[:] # clone flags
1116 self.flags = other.flags[:] # clone flags
1008 if fuzzy:
1117 if fuzzy:
1009 self.flags.append('fuzzy')
1118 self.flags.append('fuzzy')
1010 self.msgid_plural = other.msgid_plural
1119 self.msgid_plural = other.msgid_plural
1011 self.obsolete = other.obsolete
1120 self.obsolete = other.obsolete
1012 self.previous_msgctxt = other.previous_msgctxt
1121 self.previous_msgctxt = other.previous_msgctxt
1013 self.previous_msgid = other.previous_msgid
1122 self.previous_msgid = other.previous_msgid
1014 self.previous_msgid_plural = other.previous_msgid_plural
1123 self.previous_msgid_plural = other.previous_msgid_plural
1015 if other.msgstr_plural:
1124 if other.msgstr_plural:
1016 for pos in other.msgstr_plural:
1125 for pos in other.msgstr_plural:
1017 try:
1126 try:
1018 # keep existing translation at pos if any
1127 # keep existing translation at pos if any
1019 self.msgstr_plural[pos]
1128 self.msgstr_plural[pos]
1020 except KeyError:
1129 except KeyError:
1021 self.msgstr_plural[pos] = ''
1130 self.msgstr_plural[pos] = ''
1022
1131
1132 def __hash__(self):
1133 return hash((self.msgid, self.msgstr))
1023 # }}}
1134 # }}}
1024 # class MOEntry {{{
1135 # class MOEntry {{{
1025
1136
1137
1026 class MOEntry(_BaseEntry):
1138 class MOEntry(_BaseEntry):
1027 """
1139 """
1028 Represents a mo file entry.
1140 Represents a mo file entry.
1029 """
1141 """
1030 pass
1142 def __init__(self, *args, **kwargs):
1143 """
1144 Constructor, accepts the following keyword arguments,
1145 for consistency with :class:`~polib.POEntry`:
1146
1147 ``comment``
1148 ``tcomment``
1149 ``occurrences``
1150 ``flags``
1151 ``previous_msgctxt``
1152 ``previous_msgid``
1153 ``previous_msgid_plural``
1154
1155 Note: even though these keyword arguments are accepted,
1156 they hold no real meaning in the context of MO files
1157 and are simply ignored.
1158 """
1159 _BaseEntry.__init__(self, *args, **kwargs)
1160 self.comment = ''
1161 self.tcomment = ''
1162 self.occurrences = []
1163 self.flags = []
1164 self.previous_msgctxt = None
1165 self.previous_msgid = None
1166 self.previous_msgid_plural = None
1167
1168 def __hash__(self):
1169 return hash((self.msgid, self.msgstr))
1031
1170
1032 # }}}
1171 # }}}
1033 # class _POFileParser {{{
1172 # class _POFileParser {{{
1034
1173
1174
1035 class _POFileParser(object):
1175 class _POFileParser(object):
1036 """
1176 """
1037 A finite state machine to parse efficiently and correctly po
1177 A finite state machine to parse efficiently and correctly po
1038 file format.
1178 file format.
1039 """
1179 """
1040
1180
1041 def __init__(self, pofile, *args, **kwargs):
1181 def __init__(self, pofile, *args, **kwargs):
1042 """
1182 """
1043 Constructor.
1183 Constructor.
1044
1184
1045 Keyword arguments:
1185 Keyword arguments:
1046
1186
1047 ``pofile``
1187 ``pofile``
1048 string, path to the po file or its content
1188 string, path to the po file or its content
1049
1189
1050 ``encoding``
1190 ``encoding``
1051 string, the encoding to use, defaults to ``default_encoding``
1191 string, the encoding to use, defaults to ``default_encoding``
1052 global variable (optional).
1192 global variable (optional).
1053
1193
1054 ``check_for_duplicates``
1194 ``check_for_duplicates``
1055 whether to check for duplicate entries when adding entries to the
1195 whether to check for duplicate entries when adding entries to the
1056 file (optional, default: ``False``).
1196 file (optional, default: ``False``).
1057 """
1197 """
1058 enc = kwargs.get('encoding', default_encoding)
1198 enc = kwargs.get('encoding', default_encoding)
1059 if os.path.exists(pofile):
1199 if _is_file(pofile):
1060 try:
1200 try:
1061 self.fhandle = codecs.open(pofile, 'rU', enc)
1201 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1062 except LookupError:
1202 except LookupError:
1063 enc = default_encoding
1203 enc = default_encoding
1064 self.fhandle = codecs.open(pofile, 'rU', enc)
1204 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1065 else:
1205 else:
1066 self.fhandle = pofile.splitlines()
1206 self.fhandle = pofile.splitlines()
1067
1207
1068 self.instance = POFile(
1208 klass = kwargs.get('klass')
1209 if klass is None:
1210 klass = POFile
1211 self.instance = klass(
1069 pofile=pofile,
1212 pofile=pofile,
1070 encoding=enc,
1213 encoding=enc,
1071 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1214 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1072 )
1215 )
1073 self.transitions = {}
1216 self.transitions = {}
1074 self.current_entry = POEntry()
1217 self.current_line = 0
1075 self.current_state = 'ST'
1218 self.current_entry = POEntry(linenum=self.current_line)
1219 self.current_state = 'st'
1076 self.current_token = None
1220 self.current_token = None
1077 # two memo flags used in handlers
1221 # two memo flags used in handlers
1078 self.msgstr_index = 0
1222 self.msgstr_index = 0
1079 self.entry_obsolete = 0
1223 self.entry_obsolete = 0
1080 # Configure the state machine, by adding transitions.
1224 # Configure the state machine, by adding transitions.
1081 # Signification of symbols:
1225 # Signification of symbols:
1082 # * ST: Beginning of the file (start)
1226 # * ST: Beginning of the file (start)
1083 # * HE: Header
1227 # * HE: Header
1084 # * TC: a translation comment
1228 # * TC: a translation comment
1085 # * GC: a generated comment
1229 # * GC: a generated comment
1086 # * OC: a file/line occurence
1230 # * OC: a file/line occurrence
1087 # * FL: a flags line
1231 # * FL: a flags line
1088 # * CT: a message context
1232 # * CT: a message context
1089 # * PC: a previous msgctxt
1233 # * PC: a previous msgctxt
1090 # * PM: a previous msgid
1234 # * PM: a previous msgid
1091 # * PP: a previous msgid_plural
1235 # * PP: a previous msgid_plural
1092 # * MI: a msgid
1236 # * MI: a msgid
1093 # * MP: a msgid plural
1237 # * MP: a msgid plural
1094 # * MS: a msgstr
1238 # * MS: a msgstr
1095 # * MX: a msgstr plural
1239 # * MX: a msgstr plural
1096 # * MC: a msgid or msgstr continuation line
1240 # * MC: a msgid or msgstr continuation line
1097 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
1241 all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
1098 'MS', 'MP', 'MX', 'MI']
1242 'ms', 'mp', 'mx', 'mi']
1099
1243
1100 self.add('TC', ['ST', 'HE'], 'HE')
1244 self.add('tc', ['st', 'he'], 'he')
1101 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
1245 self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
1102 'MP', 'MX', 'MI'], 'TC')
1246 'mp', 'mx', 'mi'], 'tc')
1103 self.add('GC', all, 'GC')
1247 self.add('gc', all, 'gc')
1104 self.add('OC', all, 'OC')
1248 self.add('oc', all, 'oc')
1105 self.add('FL', all, 'FL')
1249 self.add('fl', all, 'fl')
1106 self.add('PC', all, 'PC')
1250 self.add('pc', all, 'pc')
1107 self.add('PM', all, 'PM')
1251 self.add('pm', all, 'pm')
1108 self.add('PP', all, 'PP')
1252 self.add('pp', all, 'pp')
1109 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
1253 self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
1110 'PP', 'MS', 'MX'], 'CT')
1254 'pp', 'ms', 'mx'], 'ct')
1111 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
1255 self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
1112 'PM', 'PP', 'MS', 'MX'], 'MI')
1256 'pm', 'pp', 'ms', 'mx'], 'mi')
1113 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
1257 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
1114 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1258 self.add('ms', ['mi', 'mp', 'tc'], 'ms')
1115 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1259 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
1116 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
1260 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1117
1261
1118 def parse(self):
1262 def parse(self):
1119 """
1263 """
1120 Run the state machine, parse the file line by line and call process()
1264 Run the state machine, parse the file line by line and call process()
1121 with the current matched symbol.
1265 with the current matched symbol.
1122 """
1266 """
1123 i = 0
1124
1267
1125 keywords = {
1268 keywords = {
1126 'msgctxt': 'CT',
1269 'msgctxt': 'ct',
1127 'msgid': 'MI',
1270 'msgid': 'mi',
1128 'msgstr': 'MS',
1271 'msgstr': 'ms',
1129 'msgid_plural': 'MP',
1272 'msgid_plural': 'mp',
1130 }
1273 }
1131 prev_keywords = {
1274 prev_keywords = {
1132 'msgid_plural': 'PP',
1275 'msgid_plural': 'pp',
1133 'msgid': 'PM',
1276 'msgid': 'pm',
1134 'msgctxt': 'PC',
1277 'msgctxt': 'pc',
1135 }
1278 }
1136
1279 tokens = []
1137 for line in self.fhandle:
1280 for line in self.fhandle:
1138 i += 1
1281 self.current_line += 1
1139 line = line.strip()
1282 line = line.strip()
1140 if line == '':
1283 if line == '':
1141 continue
1284 continue
1142
1285
1143 tokens = line.split(None, 2)
1286 tokens = line.split(None, 2)
1144 nb_tokens = len(tokens)
1287 nb_tokens = len(tokens)
1145
1288
1289 if tokens[0] == '#~|':
1290 continue
1291
1146 if tokens[0] == '#~' and nb_tokens > 1:
1292 if tokens[0] == '#~' and nb_tokens > 1:
1147 line = line[3:].strip()
1293 line = line[3:].strip()
1148 tokens = tokens[1:]
1294 tokens = tokens[1:]
1149 nb_tokens -= 1
1295 nb_tokens -= 1
1150 self.entry_obsolete = 1
1296 self.entry_obsolete = 1
1151 else:
1297 else:
1152 self.entry_obsolete = 0
1298 self.entry_obsolete = 0
1153
1299
1154 # Take care of keywords like
1300 # Take care of keywords like
1155 # msgid, msgid_plural, msgctxt & msgstr.
1301 # msgid, msgid_plural, msgctxt & msgstr.
1156 if tokens[0] in keywords and nb_tokens > 1:
1302 if tokens[0] in keywords and nb_tokens > 1:
1157 line = line[len(tokens[0]):].lstrip()
1303 line = line[len(tokens[0]):].lstrip()
1304 if re.search(r'([^\\]|^)"', line[1:-1]):
1305 raise IOError('Syntax error in po file %s (line %s): '
1306 'unescaped double quote found' %
1307 (self.instance.fpath, self.current_line))
1158 self.current_token = line
1308 self.current_token = line
1159 self.process(keywords[tokens[0]], i)
1309 self.process(keywords[tokens[0]])
1160 continue
1310 continue
1161
1311
1162 self.current_token = line
1312 self.current_token = line
1163
1313
1164 if tokens[0] == '#:' and nb_tokens > 1:
1314 if tokens[0] == '#:':
1315 if nb_tokens <= 1:
1316 continue
1165 # we are on a occurrences line
1317 # we are on a occurrences line
1166 self.process('OC', i)
1318 self.process('oc')
1167
1319
1168 elif line[:1] == '"':
1320 elif line[:1] == '"':
1169 # we are on a continuation line
1321 # we are on a continuation line
1170 self.process('MC', i)
1322 if re.search(r'([^\\]|^)"', line[1:-1]):
1323 raise IOError('Syntax error in po file %s (line %s): '
1324 'unescaped double quote found' %
1325 (self.instance.fpath, self.current_line))
1326 self.process('mc')
1171
1327
1172 elif line[:7] == 'msgstr[':
1328 elif line[:7] == 'msgstr[':
1173 # we are on a msgstr plural
1329 # we are on a msgstr plural
1174 self.process('MX', i)
1330 self.process('mx')
1175
1331
1176 elif tokens[0] == '#,' and nb_tokens > 1:
1332 elif tokens[0] == '#,':
1333 if nb_tokens <= 1:
1334 continue
1177 # we are on a flags line
1335 # we are on a flags line
1178 self.process('FL', i)
1336 self.process('fl')
1179
1337
1180 elif tokens[0] == '#':
1338 elif tokens[0] == '#' or tokens[0].startswith('##'):
1181 if line == '#': line += ' '
1339 if line == '#':
1340 line += ' '
1182 # we are on a translator comment line
1341 # we are on a translator comment line
1183 self.process('TC', i)
1342 self.process('tc')
1184
1343
1185 elif tokens[0] == '#.' and nb_tokens > 1:
1344 elif tokens[0] == '#.':
1345 if nb_tokens <= 1:
1346 continue
1186 # we are on a generated comment line
1347 # we are on a generated comment line
1187 self.process('GC', i)
1348 self.process('gc')
1188
1349
1189 elif tokens[0] == '#|':
1350 elif tokens[0] == '#|':
1190 if nb_tokens < 2:
1351 if nb_tokens <= 1:
1191 self.process('??', i)
1352 raise IOError('Syntax error in po file %s (line %s)' %
1192 continue
1353 (self.instance.fpath, self.current_line))
1193
1354
1194 # Remove the marker and any whitespace right after that.
1355 # Remove the marker and any whitespace right after that.
1195 line = line[2:].lstrip()
1356 line = line[2:].lstrip()
1196 self.current_token = line
1357 self.current_token = line
1197
1358
1198 if tokens[1].startswith('"'):
1359 if tokens[1].startswith('"'):
1199 # Continuation of previous metadata.
1360 # Continuation of previous metadata.
1200 self.process('MC', i)
1361 self.process('mc')
1201 continue
1362 continue
1202
1363
1203 if nb_tokens == 2:
1364 if nb_tokens == 2:
1204 # Invalid continuation line.
1365 # Invalid continuation line.
1205 self.process('??', i)
1366 raise IOError('Syntax error in po file %s (line %s): '
1367 'invalid continuation line' %
1368 (self.instance.fpath, self.current_line))
1206
1369
1207 # we are on a "previous translation" comment line,
1370 # we are on a "previous translation" comment line,
1208 if tokens[1] not in prev_keywords:
1371 if tokens[1] not in prev_keywords:
1209 # Unknown keyword in previous translation comment.
1372 # Unknown keyword in previous translation comment.
1210 self.process('??', i)
1373 raise IOError('Syntax error in po file %s (line %s): '
1374 'unknown keyword %s' %
1375 (self.instance.fpath, self.current_line,
1376 tokens[1]))
1211
1377
1212 # Remove the keyword and any whitespace
1378 # Remove the keyword and any whitespace
1213 # between it and the starting quote.
1379 # between it and the starting quote.
1214 line = line[len(tokens[1]):].lstrip()
1380 line = line[len(tokens[1]):].lstrip()
1215 self.current_token = line
1381 self.current_token = line
1216 self.process(prev_keywords[tokens[1]], i)
1382 self.process(prev_keywords[tokens[1]])
1217
1383
1218 else:
1384 else:
1219 self.process('??', i)
1385 raise IOError('Syntax error in po file %s (line %s)' %
1386 (self.instance.fpath, self.current_line))
1220
1387
1221 if self.current_entry:
1388 if self.current_entry and len(tokens) > 0 and \
1389 not tokens[0].startswith('#'):
1222 # since entries are added when another entry is found, we must add
1390 # since entries are added when another entry is found, we must add
1223 # the last entry here (only if there are lines)
1391 # the last entry here (only if there are lines). Trailing comments
1392 # are ignored
1224 self.instance.append(self.current_entry)
1393 self.instance.append(self.current_entry)
1394
1225 # before returning the instance, check if there's metadata and if
1395 # before returning the instance, check if there's metadata and if
1226 # so extract it in a dict
1396 # so extract it in a dict
1227 firstentry = self.instance[0]
1397 metadataentry = self.instance.find('')
1228 if firstentry.msgid == '': # metadata found
1398 if metadataentry: # metadata found
1229 # remove the entry
1399 # remove the entry
1230 firstentry = self.instance.pop(0)
1400 self.instance.remove(metadataentry)
1231 self.instance.metadata_is_fuzzy = firstentry.flags
1401 self.instance.metadata_is_fuzzy = metadataentry.flags
1232 key = None
1402 key = None
1233 for msg in firstentry.msgstr.splitlines():
1403 for msg in metadataentry.msgstr.splitlines():
1234 try:
1404 try:
1235 key, val = msg.split(':', 1)
1405 key, val = msg.split(':', 1)
1236 self.instance.metadata[key] = val.strip()
1406 self.instance.metadata[key] = val.strip()
1237 except:
1407 except (ValueError, KeyError):
1238 if key is not None:
1408 if key is not None:
1239 self.instance.metadata[key] += '\n'+ msg.strip()
1409 self.instance.metadata[key] += '\n' + msg.strip()
1240 # close opened file
1410 # close opened file
1241 if isinstance(self.fhandle, file):
1411 if not isinstance(self.fhandle, list): # must be file
1242 self.fhandle.close()
1412 self.fhandle.close()
1243 return self.instance
1413 return self.instance
1244
1414
1245 def add(self, symbol, states, next_state):
1415 def add(self, symbol, states, next_state):
1246 """
1416 """
1247 Add a transition to the state machine.
1417 Add a transition to the state machine.
1248
1418
1249 Keywords arguments:
1419 Keywords arguments:
1250
1420
1251 ``symbol``
1421 ``symbol``
1252 string, the matched token (two chars symbol).
1422 string, the matched token (two chars symbol).
1253
1423
1254 ``states``
1424 ``states``
1255 list, a list of states (two chars symbols).
1425 list, a list of states (two chars symbols).
1256
1426
1257 ``next_state``
1427 ``next_state``
1258 the next state the fsm will have after the action.
1428 the next state the fsm will have after the action.
1259 """
1429 """
1260 for state in states:
1430 for state in states:
1261 action = getattr(self, 'handle_%s' % next_state.lower())
1431 action = getattr(self, 'handle_%s' % next_state)
1262 self.transitions[(symbol, state)] = (action, next_state)
1432 self.transitions[(symbol, state)] = (action, next_state)
1263
1433
1264 def process(self, symbol, linenum):
1434 def process(self, symbol):
1265 """
1435 """
1266 Process the transition corresponding to the current state and the
1436 Process the transition corresponding to the current state and the
1267 symbol provided.
1437 symbol provided.
1268
1438
1269 Keywords arguments:
1439 Keywords arguments:
1270
1440
1271 ``symbol``
1441 ``symbol``
1272 string, the matched token (two chars symbol).
1442 string, the matched token (two chars symbol).
1273
1443
1274 ``linenum``
1444 ``linenum``
1275 integer, the current line number of the parsed file.
1445 integer, the current line number of the parsed file.
1276 """
1446 """
1277 try:
1447 try:
1278 (action, state) = self.transitions[(symbol, self.current_state)]
1448 (action, state) = self.transitions[(symbol, self.current_state)]
1279 if action():
1449 if action():
1280 self.current_state = state
1450 self.current_state = state
1281 except Exception as exc:
1451 except Exception:
1282 raise IOError('Syntax error in po file (line %s)' % linenum)
1452 raise IOError('Syntax error in po file (line %s)' %
1453 self.current_line)
1283
1454
1284 # state handlers
1455 # state handlers
1285
1456
1286 def handle_he(self):
1457 def handle_he(self):
1287 """Handle a header comment."""
1458 """Handle a header comment."""
1288 if self.instance.header != '':
1459 if self.instance.header != '':
1289 self.instance.header += '\n'
1460 self.instance.header += '\n'
1290 self.instance.header += self.current_token[2:]
1461 self.instance.header += self.current_token[2:]
1291 return 1
1462 return 1
1292
1463
1293 def handle_tc(self):
1464 def handle_tc(self):
1294 """Handle a translator comment."""
1465 """Handle a translator comment."""
1295 if self.current_state in ['MC', 'MS', 'MX']:
1466 if self.current_state in ['mc', 'ms', 'mx']:
1296 self.instance.append(self.current_entry)
1467 self.instance.append(self.current_entry)
1297 self.current_entry = POEntry()
1468 self.current_entry = POEntry(linenum=self.current_line)
1298 if self.current_entry.tcomment != '':
1469 if self.current_entry.tcomment != '':
1299 self.current_entry.tcomment += '\n'
1470 self.current_entry.tcomment += '\n'
1300 self.current_entry.tcomment += self.current_token[2:]
1471 tcomment = self.current_token.lstrip('#')
1472 if tcomment.startswith(' '):
1473 tcomment = tcomment[1:]
1474 self.current_entry.tcomment += tcomment
1301 return True
1475 return True
1302
1476
1303 def handle_gc(self):
1477 def handle_gc(self):
1304 """Handle a generated comment."""
1478 """Handle a generated comment."""
1305 if self.current_state in ['MC', 'MS', 'MX']:
1479 if self.current_state in ['mc', 'ms', 'mx']:
1306 self.instance.append(self.current_entry)
1480 self.instance.append(self.current_entry)
1307 self.current_entry = POEntry()
1481 self.current_entry = POEntry(linenum=self.current_line)
1308 if self.current_entry.comment != '':
1482 if self.current_entry.comment != '':
1309 self.current_entry.comment += '\n'
1483 self.current_entry.comment += '\n'
1310 self.current_entry.comment += self.current_token[3:]
1484 self.current_entry.comment += self.current_token[3:]
1311 return True
1485 return True
1312
1486
1313 def handle_oc(self):
1487 def handle_oc(self):
1314 """Handle a file:num occurence."""
1488 """Handle a file:num occurrence."""
1315 if self.current_state in ['MC', 'MS', 'MX']:
1489 if self.current_state in ['mc', 'ms', 'mx']:
1316 self.instance.append(self.current_entry)
1490 self.instance.append(self.current_entry)
1317 self.current_entry = POEntry()
1491 self.current_entry = POEntry(linenum=self.current_line)
1318 occurrences = self.current_token[3:].split()
1492 occurrences = self.current_token[3:].split()
1319 for occurrence in occurrences:
1493 for occurrence in occurrences:
1320 if occurrence != '':
1494 if occurrence != '':
1321 try:
1495 try:
1322 fil, line = occurrence.split(':')
1496 fil, line = occurrence.split(':')
1323 if not line.isdigit():
1497 if not line.isdigit():
1324 fil = fil + line
1498 fil = fil + line
1325 line = ''
1499 line = ''
1326 self.current_entry.occurrences.append((fil, line))
1500 self.current_entry.occurrences.append((fil, line))
1327 except:
1501 except (ValueError, AttributeError):
1328 self.current_entry.occurrences.append((occurrence, ''))
1502 self.current_entry.occurrences.append((occurrence, ''))
1329 return True
1503 return True
1330
1504
1331 def handle_fl(self):
1505 def handle_fl(self):
1332 """Handle a flags line."""
1506 """Handle a flags line."""
1333 if self.current_state in ['MC', 'MS', 'MX']:
1507 if self.current_state in ['mc', 'ms', 'mx']:
1334 self.instance.append(self.current_entry)
1508 self.instance.append(self.current_entry)
1335 self.current_entry = POEntry()
1509 self.current_entry = POEntry(linenum=self.current_line)
1336 self.current_entry.flags += self.current_token[3:].split(', ')
1510 self.current_entry.flags += [c.strip() for c in
1511 self.current_token[3:].split(',')]
1337 return True
1512 return True
1338
1513
1339 def handle_pp(self):
1514 def handle_pp(self):
1340 """Handle a previous msgid_plural line."""
1515 """Handle a previous msgid_plural line."""
1341 if self.current_state in ['MC', 'MS', 'MX']:
1516 if self.current_state in ['mc', 'ms', 'mx']:
1342 self.instance.append(self.current_entry)
1517 self.instance.append(self.current_entry)
1343 self.current_entry = POEntry()
1518 self.current_entry = POEntry(linenum=self.current_line)
1344 self.current_entry.previous_msgid_plural = \
1519 self.current_entry.previous_msgid_plural = \
1345 unescape(self.current_token[1:-1])
1520 unescape(self.current_token[1:-1])
1346 return True
1521 return True
1347
1522
1348 def handle_pm(self):
1523 def handle_pm(self):
1349 """Handle a previous msgid line."""
1524 """Handle a previous msgid line."""
1350 if self.current_state in ['MC', 'MS', 'MX']:
1525 if self.current_state in ['mc', 'ms', 'mx']:
1351 self.instance.append(self.current_entry)
1526 self.instance.append(self.current_entry)
1352 self.current_entry = POEntry()
1527 self.current_entry = POEntry(linenum=self.current_line)
1353 self.current_entry.previous_msgid = \
1528 self.current_entry.previous_msgid = \
1354 unescape(self.current_token[1:-1])
1529 unescape(self.current_token[1:-1])
1355 return True
1530 return True
1356
1531
1357 def handle_pc(self):
1532 def handle_pc(self):
1358 """Handle a previous msgctxt line."""
1533 """Handle a previous msgctxt line."""
1359 if self.current_state in ['MC', 'MS', 'MX']:
1534 if self.current_state in ['mc', 'ms', 'mx']:
1360 self.instance.append(self.current_entry)
1535 self.instance.append(self.current_entry)
1361 self.current_entry = POEntry()
1536 self.current_entry = POEntry(linenum=self.current_line)
1362 self.current_entry.previous_msgctxt = \
1537 self.current_entry.previous_msgctxt = \
1363 unescape(self.current_token[1:-1])
1538 unescape(self.current_token[1:-1])
1364 return True
1539 return True
1365
1540
1366 def handle_ct(self):
1541 def handle_ct(self):
1367 """Handle a msgctxt."""
1542 """Handle a msgctxt."""
1368 if self.current_state in ['MC', 'MS', 'MX']:
1543 if self.current_state in ['mc', 'ms', 'mx']:
1369 self.instance.append(self.current_entry)
1544 self.instance.append(self.current_entry)
1370 self.current_entry = POEntry()
1545 self.current_entry = POEntry(linenum=self.current_line)
1371 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1546 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1372 return True
1547 return True
1373
1548
1374 def handle_mi(self):
1549 def handle_mi(self):
1375 """Handle a msgid."""
1550 """Handle a msgid."""
1376 if self.current_state in ['MC', 'MS', 'MX']:
1551 if self.current_state in ['mc', 'ms', 'mx']:
1377 self.instance.append(self.current_entry)
1552 self.instance.append(self.current_entry)
1378 self.current_entry = POEntry()
1553 self.current_entry = POEntry(linenum=self.current_line)
1379 self.current_entry.obsolete = self.entry_obsolete
1554 self.current_entry.obsolete = self.entry_obsolete
1380 self.current_entry.msgid = unescape(self.current_token[1:-1])
1555 self.current_entry.msgid = unescape(self.current_token[1:-1])
1381 return True
1556 return True
1382
1557
1383 def handle_mp(self):
1558 def handle_mp(self):
1384 """Handle a msgid plural."""
1559 """Handle a msgid plural."""
1385 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1560 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1386 return True
1561 return True
1387
1562
1388 def handle_ms(self):
1563 def handle_ms(self):
1389 """Handle a msgstr."""
1564 """Handle a msgstr."""
1390 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1565 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1391 return True
1566 return True
1392
1567
1393 def handle_mx(self):
1568 def handle_mx(self):
1394 """Handle a msgstr plural."""
1569 """Handle a msgstr plural."""
1395 index, value = self.current_token[7], self.current_token[11:-1]
1570 index = self.current_token[7]
1396 self.current_entry.msgstr_plural[index] = unescape(value)
1571 value = self.current_token[self.current_token.find('"') + 1:-1]
1397 self.msgstr_index = index
1572 self.current_entry.msgstr_plural[int(index)] = unescape(value)
1573 self.msgstr_index = int(index)
1398 return True
1574 return True
1399
1575
1400 def handle_mc(self):
1576 def handle_mc(self):
1401 """Handle a msgid or msgstr continuation line."""
1577 """Handle a msgid or msgstr continuation line."""
1402 token = unescape(self.current_token[1:-1])
1578 token = unescape(self.current_token[1:-1])
1403 if self.current_state == 'CT':
1579 if self.current_state == 'ct':
1404 typ = 'msgctxt'
1405 self.current_entry.msgctxt += token
1580 self.current_entry.msgctxt += token
1406 elif self.current_state == 'MI':
1581 elif self.current_state == 'mi':
1407 typ = 'msgid'
1408 self.current_entry.msgid += token
1582 self.current_entry.msgid += token
1409 elif self.current_state == 'MP':
1583 elif self.current_state == 'mp':
1410 typ = 'msgid_plural'
1411 self.current_entry.msgid_plural += token
1584 self.current_entry.msgid_plural += token
1412 elif self.current_state == 'MS':
1585 elif self.current_state == 'ms':
1413 typ = 'msgstr'
1414 self.current_entry.msgstr += token
1586 self.current_entry.msgstr += token
1415 elif self.current_state == 'MX':
1587 elif self.current_state == 'mx':
1416 typ = 'msgstr[%s]' % self.msgstr_index
1417 self.current_entry.msgstr_plural[self.msgstr_index] += token
1588 self.current_entry.msgstr_plural[self.msgstr_index] += token
1418 elif self.current_state == 'PP':
1589 elif self.current_state == 'pp':
1419 typ = 'previous_msgid_plural'
1420 token = token[3:]
1421 self.current_entry.previous_msgid_plural += token
1590 self.current_entry.previous_msgid_plural += token
1422 elif self.current_state == 'PM':
1591 elif self.current_state == 'pm':
1423 typ = 'previous_msgid'
1424 token = token[3:]
1425 self.current_entry.previous_msgid += token
1592 self.current_entry.previous_msgid += token
1426 elif self.current_state == 'PC':
1593 elif self.current_state == 'pc':
1427 typ = 'previous_msgctxt'
1428 token = token[3:]
1429 self.current_entry.previous_msgctxt += token
1594 self.current_entry.previous_msgctxt += token
1430 # don't change the current state
1595 # don't change the current state
1431 return False
1596 return False
1432
1433 # }}}
1597 # }}}
1434 # class _MOFileParser {{{
1598 # class _MOFileParser {{{
1435
1599
1600
1436 class _MOFileParser(object):
1601 class _MOFileParser(object):
1437 """
1602 """
1438 A class to parse binary mo files.
1603 A class to parse binary mo files.
1439 """
1604 """
1440
1605
1441 def __init__(self, mofile, *args, **kwargs):
1606 def __init__(self, mofile, *args, **kwargs):
1442 """
1607 """
1443 Constructor.
1608 Constructor.
1444
1609
1445 Keyword arguments:
1610 Keyword arguments:
1446
1611
1447 ``mofile``
1612 ``mofile``
1448 string, path to the mo file or its content
1613 string, path to the mo file or its content
1449
1614
1450 ``encoding``
1615 ``encoding``
1451 string, the encoding to use, defaults to ``default_encoding``
1616 string, the encoding to use, defaults to ``default_encoding``
1452 global variable (optional).
1617 global variable (optional).
1453
1618
1454 ``check_for_duplicates``
1619 ``check_for_duplicates``
1455 whether to check for duplicate entries when adding entries to the
1620 whether to check for duplicate entries when adding entries to the
1456 file (optional, default: ``False``).
1621 file (optional, default: ``False``).
1457 """
1622 """
1458 self.fhandle = open(mofile, 'rb')
1623 self.fhandle = open(mofile, 'rb')
1459 self.instance = MOFile(
1624
1625 klass = kwargs.get('klass')
1626 if klass is None:
1627 klass = MOFile
1628 self.instance = klass(
1460 fpath=mofile,
1629 fpath=mofile,
1461 encoding=kwargs.get('encoding', default_encoding),
1630 encoding=kwargs.get('encoding', default_encoding),
1462 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1631 check_for_duplicates=kwargs.get('check_for_duplicates', False)
1463 )
1632 )
1464
1633
1634 def __del__(self):
1635 """
1636 Make sure the file is closed, this prevents warnings on unclosed file
1637 when running tests with python >= 3.2.
1638 """
1639 if self.fhandle:
1640 self.fhandle.close()
1641
1465 def parse(self):
1642 def parse(self):
1466 """
1643 """
1467 Build the instance with the file handle provided in the
1644 Build the instance with the file handle provided in the
1468 constructor.
1645 constructor.
1469 """
1646 """
1470 # parse magic number
1647 # parse magic number
1471 magic_number = self._readbinary('<I', 4)
1648 magic_number = self._readbinary('<I', 4)
1472 if magic_number == MOFile.LITTLE_ENDIAN:
1649 if magic_number == MOFile.MAGIC:
1473 ii = '<II'
1650 ii = '<II'
1474 elif magic_number == MOFile.BIG_ENDIAN:
1651 elif magic_number == MOFile.MAGIC_SWAPPED:
1475 ii = '>II'
1652 ii = '>II'
1476 else:
1653 else:
1477 raise IOError('Invalid mo file, magic number is incorrect !')
1654 raise IOError('Invalid mo file, magic number is incorrect !')
1478 self.instance.magic_number = magic_number
1655 self.instance.magic_number = magic_number
1479 # parse the version number and the number of strings
1656 # parse the version number and the number of strings
1480 self.instance.version, numofstrings = self._readbinary(ii, 8)
1657 version, numofstrings = self._readbinary(ii, 8)
1658 # from MO file format specs: "A program seeing an unexpected major
1659 # revision number should stop reading the MO file entirely"
1660 if version not in (0, 1):
1661 raise IOError('Invalid mo file, unexpected major revision number')
1662 self.instance.version = version
1481 # original strings and translation strings hash table offset
1663 # original strings and translation strings hash table offset
1482 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1664 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1483 # move to msgid hash table and read length and offset of msgids
1665 # move to msgid hash table and read length and offset of msgids
1484 self.fhandle.seek(msgids_hash_offset)
1666 self.fhandle.seek(msgids_hash_offset)
1485 msgids_index = []
1667 msgids_index = []
1486 for i in range(numofstrings):
1668 for i in range(numofstrings):
1487 msgids_index.append(self._readbinary(ii, 8))
1669 msgids_index.append(self._readbinary(ii, 8))
1488 # move to msgstr hash table and read length and offset of msgstrs
1670 # move to msgstr hash table and read length and offset of msgstrs
1489 self.fhandle.seek(msgstrs_hash_offset)
1671 self.fhandle.seek(msgstrs_hash_offset)
1490 msgstrs_index = []
1672 msgstrs_index = []
1491 for i in range(numofstrings):
1673 for i in range(numofstrings):
1492 msgstrs_index.append(self._readbinary(ii, 8))
1674 msgstrs_index.append(self._readbinary(ii, 8))
1493 # build entries
1675 # build entries
1676 encoding = self.instance.encoding
1494 for i in range(numofstrings):
1677 for i in range(numofstrings):
1495 self.fhandle.seek(msgids_index[i][1])
1678 self.fhandle.seek(msgids_index[i][1])
1496 msgid = self.fhandle.read(msgids_index[i][0])
1679 msgid = self.fhandle.read(msgids_index[i][0])
1680
1497 self.fhandle.seek(msgstrs_index[i][1])
1681 self.fhandle.seek(msgstrs_index[i][1])
1498 msgstr = self.fhandle.read(msgstrs_index[i][0])
1682 msgstr = self.fhandle.read(msgstrs_index[i][0])
1499 if i == 0: # metadata
1683 if i == 0 and not msgid: # metadata
1500 raw_metadata, metadata = msgstr.split('\n'), {}
1684 raw_metadata, metadata = msgstr.split(b('\n')), {}
1501 for line in raw_metadata:
1685 for line in raw_metadata:
1502 tokens = line.split(':', 1)
1686 tokens = line.split(b(':'), 1)
1503 if tokens[0] != '':
1687 if tokens[0] != b(''):
1504 try:
1688 try:
1505 metadata[tokens[0]] = tokens[1].strip()
1689 k = tokens[0].decode(encoding)
1690 v = tokens[1].decode(encoding)
1691 metadata[k] = v.strip()
1506 except IndexError:
1692 except IndexError:
1507 metadata[tokens[0]] = ''
1693 metadata[k] = u('')
1508 self.instance.metadata = metadata
1694 self.instance.metadata = metadata
1509 continue
1695 continue
1510 # test if we have a plural entry
1696 # test if we have a plural entry
1511 msgid_tokens = msgid.split('\0')
1697 msgid_tokens = msgid.split(b('\0'))
1512 if len(msgid_tokens) > 1:
1698 if len(msgid_tokens) > 1:
1513 entry = self._build_entry(
1699 entry = self._build_entry(
1514 msgid=msgid_tokens[0],
1700 msgid=msgid_tokens[0],
1515 msgid_plural=msgid_tokens[1],
1701 msgid_plural=msgid_tokens[1],
1516 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
1702 msgstr_plural=dict((k, v) for k, v in
1703 enumerate(msgstr.split(b('\0'))))
1517 )
1704 )
1518 else:
1705 else:
1519 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1706 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1520 self.instance.append(entry)
1707 self.instance.append(entry)
1521 # close opened file
1708 # close opened file
1522 self.fhandle.close()
1709 self.fhandle.close()
1523 return self.instance
1710 return self.instance
1524
1711
1525 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1712 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
1526 msgstr_plural=None):
1713 msgstr_plural=None):
1527 msgctxt_msgid = msgid.split('\x04')
1714 msgctxt_msgid = msgid.split(b('\x04'))
1715 encoding = self.instance.encoding
1528 if len(msgctxt_msgid) > 1:
1716 if len(msgctxt_msgid) > 1:
1529 kwargs = {
1717 kwargs = {
1530 'msgctxt': msgctxt_msgid[0],
1718 'msgctxt': msgctxt_msgid[0].decode(encoding),
1531 'msgid' : msgctxt_msgid[1],
1719 'msgid': msgctxt_msgid[1].decode(encoding),
1532 }
1720 }
1533 else:
1721 else:
1534 kwargs = {'msgid': msgid}
1722 kwargs = {'msgid': msgid.decode(encoding)}
1535 if msgstr:
1723 if msgstr:
1536 kwargs['msgstr'] = msgstr
1724 kwargs['msgstr'] = msgstr.decode(encoding)
1537 if msgid_plural:
1725 if msgid_plural:
1538 kwargs['msgid_plural'] = msgid_plural
1726 kwargs['msgid_plural'] = msgid_plural.decode(encoding)
1539 if msgstr_plural:
1727 if msgstr_plural:
1728 for k in msgstr_plural:
1729 msgstr_plural[k] = msgstr_plural[k].decode(encoding)
1540 kwargs['msgstr_plural'] = msgstr_plural
1730 kwargs['msgstr_plural'] = msgstr_plural
1541 return MOEntry(**kwargs)
1731 return MOEntry(**kwargs)
1542
1732
1543 def _readbinary(self, fmt, numbytes):
1733 def _readbinary(self, fmt, numbytes):
1544 """
1734 """
1545 Private method that unpack n bytes of data using format <fmt>.
1735 Private method that unpack n bytes of data using format <fmt>.
1546 It returns a tuple or a mixed value if the tuple length is 1.
1736 It returns a tuple or a mixed value if the tuple length is 1.
1547 """
1737 """
1548 bytes = self.fhandle.read(numbytes)
1738 bytes = self.fhandle.read(numbytes)
1549 tup = struct.unpack(fmt, bytes)
1739 tup = struct.unpack(fmt, bytes)
1550 if len(tup) == 1:
1740 if len(tup) == 1:
1551 return tup[0]
1741 return tup[0]
1552 return tup
1742 return tup
1743 # }}}
1744 # class TextWrapper {{{
1745
1746
1747 class TextWrapper(textwrap.TextWrapper):
1748 """
1749 Subclass of textwrap.TextWrapper that backport the
1750 drop_whitespace option.
1751 """
1752 def __init__(self, *args, **kwargs):
1753 drop_whitespace = kwargs.pop('drop_whitespace', True)
1754 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1755 self.drop_whitespace = drop_whitespace
1756
1757 def _wrap_chunks(self, chunks):
1758 """_wrap_chunks(chunks : [string]) -> [string]
1759
1760 Wrap a sequence of text chunks and return a list of lines of
1761 length 'self.width' or less. (If 'break_long_words' is false,
1762 some lines may be longer than this.) Chunks correspond roughly
1763 to words and the whitespace between them: each chunk is
1764 indivisible (modulo 'break_long_words'), but a line break can
1765 come between any two chunks. Chunks should not have internal
1766 whitespace; ie. a chunk is either all whitespace or a "word".
1767 Whitespace chunks will be removed from the beginning and end of
1768 lines, but apart from that whitespace is preserved.
1769 """
1770 lines = []
1771 if self.width <= 0:
1772 raise ValueError("invalid width %r (must be > 0)" % self.width)
1773
1774 # Arrange in reverse order so items can be efficiently popped
1775 # from a stack of chucks.
1776 chunks.reverse()
1777
1778 while chunks:
1779
1780 # Start the list of chunks that will make up the current line.
1781 # cur_len is just the length of all the chunks in cur_line.
1782 cur_line = []
1783 cur_len = 0
1784
1785 # Figure out which static string will prefix this line.
1786 if lines:
1787 indent = self.subsequent_indent
1788 else:
1789 indent = self.initial_indent
1790
1791 # Maximum width for this line.
1792 width = self.width - len(indent)
1793
1794 # First chunk on line is whitespace -- drop it, unless this
1795 # is the very beginning of the text (ie. no lines started yet).
1796 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1797 del chunks[-1]
1798
1799 while chunks:
1800 l = len(chunks[-1])
1801
1802 # Can at least squeeze this chunk onto the current line.
1803 if cur_len + l <= width:
1804 cur_line.append(chunks.pop())
1805 cur_len += l
1806
1807 # Nope, this line is full.
1808 else:
1809 break
1810
1811 # The current line is full, and the next chunk is too big to
1812 # fit on *any* line (not just this one).
1813 if chunks and len(chunks[-1]) > width:
1814 self._handle_long_word(chunks, cur_line, cur_len, width)
1815
1816 # If the last chunk on this line is all whitespace, drop it.
1817 if self.drop_whitespace and cur_line and not cur_line[-1].strip():
1818 del cur_line[-1]
1819
1820 # Convert current line back to a string and store it in list
1821 # of all lines (return value).
1822 if cur_line:
1823 lines.append(indent + ''.join(cur_line))
1824
1825 return lines
1826 # }}}
1827 # function wrap() {{{
1828
1829
1830 def wrap(text, width=70, **kwargs):
1831 """
1832 Wrap a single paragraph of text, returning a list of wrapped lines.
1833 """
1834 if sys.version_info < (2, 6):
1835 return TextWrapper(width=width, **kwargs).wrap(text)
1836 return textwrap.wrap(text, width=width, **kwargs)
1553
1837
1554 # }}}
1838 # }}}
General Comments 0
You need to be logged in to leave comments. Login now