upstream/mercurial-mirror Commit - r15290:e40430fb

5

# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:

5

# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:

6

7

"""

7

"""

8

**polib** allows you to manipulate, create, modify gettext files (pot, po

8

**polib** allows you to manipulate, create, modify gettext files (pot, po and

9

~~and~~ mo files). You can load existing files, iterate through it's entries,

9

mo files). You can load existing files, iterate through it's entries, add,

10

~~add,~~ modify entries, comments or metadata, etc... or create new po files

10

modify entries, comments or metadata, etc. or create new po files from scratch.

11

from scratch.

12

13

**polib** provides a simple and pythonic API, exporting only three

14

convenience functions (*pofile*, *mofile* and *detect_encoding*), and the

15

four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating

16

new files/entries.

17

18

**Basic example**:

19

11

20

>>> import polib

12

**polib** provides a simple and pythonic API via the :func:`~polib.pofile` and

21

>>> # load an existing po file

13

:func:`~polib.mofile` convenience functions.

22

>>> po = polib.pofile('tests/test_utf8.po')

23

>>> for entry in po:

24

... # do something with entry...

25

... pass

26

>>> # add an entry

27

>>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')

28

>>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]

29

>>> po.append(entry)

30

>>> # to save our modified po file:

31

>>> # po.save()

32

>>> # or you may want to compile the po file

33

>>> # po.save_as_mofile('tests/test_utf8.mo')

34

"""

14

"""

35

15

36

__author__ = 'David J~~EAN LOUIS~~ <izimobil@gmail.com>'

16

__author__ = 'David Jean Louis <izimobil@gmail.com>'

37

__version__ = '0.~~5.2~~'

17

__version__ = '0.6.4'

38

__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',

18

__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',

39

'detect_encoding', 'escape', 'unescape', 'detect_encoding',]

19

'detect_encoding', 'escape', 'unescape', 'detect_encoding',]

40

20

21

import array

41

import codecs

22

import codecs

23

import os

24

import re

42

import struct

25

import struct

26

import sys

43

import textwrap

27

import textwrap

44

import types

28

import types

45

import re

29

46

30

31

# the default encoding to use when encoding cannot be detected

47

default_encoding = 'utf-8'

32

default_encoding = 'utf-8'

48

33

49

# function pofile() {{{

34

# _pofile_or_mofile {{{

50

51

def pofile(fpath, **kwargs):

52

"""

53

Convenience function that parse the po/pot file *fpath* and return

54

a POFile instance.

55

56

**Keyword arguments**:

57

- *fpath*: string, full or relative path to the po/pot file to parse

58

- *wrapwidth*: integer, the wrap width, only useful when -w option was

59

passed to xgettext (optional, default to 78)

60

- *autodetect_encoding*: boolean, if set to False the function will

61

not try to detect the po file encoding (optional, default to True)

62

- *encoding*: string, an encoding, only relevant if autodetect_encoding

63

is set to False

64

- *check_for_duplicates*: whether to check for duplicate entries when

65

adding entries to the file, default: False (optional)

66

67

**Example**:

68

35

69

>>> import polib

36

def _pofile_or_mofile(f, type, **kwargs):

70

>>> po = polib.pofile('tests/test_weird_occurrences.po',

37

"""

71

... check_for_duplicates=True)

38

Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to

72

>>> po #doctest: +ELLIPSIS

39

honor the DRY concept.

73

74

>>> import os, tempfile

75

>>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural',

76

... 'msgstr_plural', 'obsolete', 'comment', 'tcomment',

77

... 'occurrences', 'flags', 'previous_msgctxt',

78

... 'previous_msgid', 'previous_msgid_plural')

79

>>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:

80

... orig_po = polib.pofile('tests/'+fname)

81

... tmpf = tempfile.NamedTemporaryFile().name

82

... orig_po.save(tmpf)

83

... try:

84

... new_po = polib.pofile(tmpf)

85

... for old, new in zip(orig_po, new_po):

86

... for attr in all_attrs:

87

... if getattr(old, attr) != getattr(new, attr):

88

... getattr(old, attr)

89

... getattr(new, attr)

90

... finally:

91

... os.unlink(tmpf)

92

>>> po_file = polib.pofile('tests/test_save_as_mofile.po')

93

>>> tmpf = tempfile.NamedTemporaryFile().name

94

>>> po_file.save_as_mofile(tmpf)

95

>>> try:

96

... mo_file = polib.mofile(tmpf)

97

... for old, new in zip(po_file, mo_file):

98

... if po_file._encode(old.msgid) != mo_file._encode(new.msgid):

99

... 'OLD: ', po_file._encode(old.msgid)

100

... 'NEW: ', mo_file._encode(new.msgid)

101

... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):

102

... 'OLD: ', po_file._encode(old.msgstr)

103

... 'NEW: ', mo_file._encode(new.msgstr)

104

... print new.msgstr

105

... finally:

106

... os.unlink(tmpf)

107

"""

40

"""

108

if kwargs.get('autodetect_encoding', True):

41

# get the file encoding

109

enc = ~~detect_encoding~~(~~fpath~~)

42

enc = kwargs.get('encoding')

110

else:

43

if enc is None:

111

enc = kwargs.get('encoding', default_encoding)

44

enc = detect_encoding(f, type == 'mofile')

112

check_for_duplicates = kwargs.get('check_for_duplicates', False)

45

113

parser = _POFileParser(

46

# parse the file

114

fpath,

47

kls = type == 'pofile' and _POFileParser or _MOFileParser

48

parser = kls(

49

f,

115

encoding=enc,

50

encoding=enc,

116

check_for_duplicates=kwargs.get('check_for_duplicates', False)

51

check_for_duplicates=kwargs.get('check_for_duplicates', False)

117

)

52

)

120

return instance

55

return instance

121

56

122

# }}}

57

# }}}

58

# function pofile() {{{

59

60

def pofile(pofile, **kwargs):

61

"""

62

Convenience function that parses the po or pot file ``pofile`` and returns

63

a :class:`~polib.POFile` instance.

64

65

Arguments:

66

67

``pofile``

68

string, full or relative path to the po/pot file or its content (data).

69

70

``wrapwidth``

71

integer, the wrap width, only useful when the ``-w`` option was passed

72

to xgettext (optional, default: ``78``).

73

74

``encoding``

75

string, the encoding to use (e.g. "utf-8") (default: ``None``, the

76

encoding will be auto-detected).

77

78

``check_for_duplicates``

79

whether to check for duplicate entries when adding entries to the

80

file (optional, default: ``False``).

81

"""

82

return _pofile_or_mofile(pofile, 'pofile', **kwargs)

83

84

# }}}

123

# function mofile() {{{

85

# function mofile() {{{

124

86

125

def mofile(~~fpath~~, **kwargs):

87

def mofile(mofile, **kwargs):

126

"""

88

"""

127

Convenience function that parse the mo file ~~*fpath*~~ and return

89

Convenience function that parses the mo file ``mofile`` and returns a

128

a MOFile instance.

90

:class:`~polib.MOFile` instance.

129

91

130

**Keyword arguments**:

92

Arguments:

131

- *fpath*: string, full or relative path to the mo file to parse

132

- *wrapwidth*: integer, the wrap width, only useful when -w option was

133

passed to xgettext to generate the po file that was used to format

134

the mo file (optional, default to 78)

135

- *autodetect_encoding*: boolean, if set to False the function will

136

not try to detect the po file encoding (optional, default to True)

137

- *encoding*: string, an encoding, only relevant if autodetect_encoding

138

is set to False

139

- *check_for_duplicates*: whether to check for duplicate entries when

140

adding entries to the file, default: False (optional)

141

93

142

**Example**:

94

``mofile``

95

string, full or relative path to the mo file or its content (data).

143

96

144

>>> import polib

97

``wrapwidth``

145

>>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)

98

integer, the wrap width, only useful when the ``-w`` option was passed

146

>>> mo #doctest: +ELLIPSIS

99

to xgettext to generate the po file that was used to format the mo file

147

100

(optional, default: ``78``).

148

>>> import os, tempfile

101

149

>>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:

102

``encoding``

150

... orig_mo = polib.mofile('tests/'+fname)

103

string, the encoding to use (e.g. "utf-8") (default: ``None``, the

151

... tmpf = tempfile.NamedTemporaryFile().name

104

encoding will be auto-detected).

152

... orig_mo.save(tmpf)

105

153

... try:

106

``check_for_duplicates``

154

... new_mo = polib.mofile(tmpf)

107

whether to check for duplicate entries when adding entries to the

155

... for old, new in zip(orig_mo, new_mo):

108

file (optional, default: ``False``).

156

... if old.msgid != new.msgid:

157

... old.msgstr

158

... new.msgstr

159

... finally:

160

... os.unlink(tmpf)

161

"""

109

"""

162

if kwargs.get('autodetect_encoding', True):

110

return _pofile_or_mofile(mofile, 'mofile', **kwargs)

163

enc = detect_encoding(fpath, True)

164

else:

165

enc = kwargs.get('encoding', default_encoding)

166

parser = _MOFileParser(

167

fpath,

168

encoding=enc,

169

check_for_duplicates=kwargs.get('check_for_duplicates', False)

170

)

171

instance = parser.parse()

172

instance.wrapwidth = kwargs.get('wrapwidth', 78)

173

return instance

174

111

175

# }}}

112

# }}}

176

# function detect_encoding() {{{

113

# function detect_encoding() {{{

177

114

178

def detect_encoding(f~~path~~, binary_mode=False):

115

def detect_encoding(file, binary_mode=False):

179

"""

116

"""

180

Try to detect the encoding used by the ~~file *fpath*. The function will~~

117

Try to detect the encoding used by the ``file``. The ``file`` argument can

181

return polib default *encoding* if it's unable to detect it.

118

be a PO or MO file path or a string containing the contents of the file.

119

If the encoding cannot be detected, the function will return the value of

120

``default_encoding``.

182

121

183

~~**Keyword argument**~~:

122

Arguments:

184

- *fpath*: string, full or relative path to the mo file to parse.

123

124

``file``

125

string, full or relative path to the po/mo file or its content.

185

126

186

**Examples**:

127

``binary_mode``

128

boolean, set this to True if ``file`` is a mo file.

129

"""

130

rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')

187

131

188

>>> print(detect_encoding('tests/test_noencoding.po'))

132

def charset_exists(charset):

189

utf-8

133

"""Check whether ``charset`` is valid or not."""

190

>>> print(detect_encoding('tests/test_utf8.po'))

134

try:

191

UTF-8

135

codecs.lookup(charset)

192

>>> print(detect_encoding('tests/test_utf8.mo', True))

136

except LookupError:

193

UTF-8

137

return False

194

>>> print(detect_encoding('tests/test_iso-8859-15.po'))

138

return True

195

ISO_8859-15

139

196

>>> print(detect_encoding('tests/test_iso-8859-15.mo', True))

140

if not os.path.exists(file):

197

ISO_8859-15

141

match = rx.search(file)

198

"""

142

if match:

199

import re

143

enc = match.group(1).strip()

200

rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')

144

if charset_exists(enc):

201

if binary_mode:

145

return enc

202

mode = 'rb'

203

else:

146

else:

204

mode = 'r'

147

if binary_mode:

205

f = open(fpath, mode)

148

mode = 'rb'

206

for l in f.readlines():

149

else:

207

match = rx.search(l)

150

mode = 'r'

208

if match:

151

f = open(file, mode)

209

f.close()

152

for l in f.readlines():

210

~~return~~ ~~match~~.~~group~~(1).~~strip~~()

153

match = rx.search(l)

211

f.close()

154

if match:

155

f.close()

156

enc = match.group(1).strip()

157

if charset_exists(enc):

158

return enc

159

f.close()

212

return default_encoding

160

return default_encoding

213

161

214

# }}}

162

# }}}

216

164

217

def escape(st):

165

def escape(st):

218

"""

166

"""

219

Escape special chars and return the given string *st*.

167

Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in

220

168

the given string ``st`` and returns it.

221

**Examples**:

222

223

>>> escape('\\t and \\n and \\r and " and \\\\')

224

'\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'

225

"""

169

"""

226

return st.replace('\\', r'\\')\

170

return st.replace('\\', r'\\')\

227

.replace('\t', r'\t')\

171

.replace('\t', r'\t')\

234

178

235

def unescape(st):

179

def unescape(st):

236

"""

180

"""

237

Unescape special chars and return the given string *st*.

181

Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in

238

182

the given string ``st`` and returns it.

239

**Examples**:

240

241

>>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')

242

'\\t and \\n and \\r and " and \\\\'

243

>>> unescape(r'\\n')

244

'\\n'

245

>>> unescape(r'\\\\n')

246

'\\\\n'

247

>>> unescape(r'\\\\n\\n')

248

'\\\\n\\n'

249

"""

183

"""

250

def unescape_repl(m):

184

def unescape_repl(m):

251

m = m.group(1)

185

m = m.group(1)

265

199

266

class _BaseFile(list):

200

class _BaseFile(list):

267

"""

201

"""

268

Common ~~parent~~ class for POFile and MOFile ~~classes.~~

202

Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`

269

This class ~~must~~ **not** be instanciated directly.

203

classes. This class should **not** be instanciated directly.

270

"""

204

"""

271

205

272

def __init__(self, *args, **kwargs):

206

def __init__(self, *args, **kwargs):

273

"""

207

"""

274

Constructor.

208

Constructor, accepts the following keyword arguments:

209

210

``pofile``

211

string, the path to the po or mo file, or its content as a string.

275

212

276

**Keyword arguments**:

213

``wrapwidth``

277

- *fpath*: string, path to po or mo file

214

integer, the wrap width, only useful when the ``-w`` option was

278

- *wrapwidth*: integer, the wrap width, only useful when -w option

215

passed to xgettext (optional, default: ``78``).

279

was passed to xgettext to generate the po file that was used to

216

280

format the mo file, default to 78 (optional),

217

``encoding``

281

- *encoding*: string, the encoding to use, defaults to

218

string, the encoding to use, defaults to ``default_encoding``

282

~~"default_encoding"~~ global variable (optional),

219

global variable (optional).

283

- *check_for_duplicates*: whether to check for duplicate entries

220

284

when adding entries to the file, default: False (optional).

221

``check_for_duplicates``

222

whether to check for duplicate entries when adding entries to the

223

file, (optional, default: ``False``).

285

"""

224

"""

286

list.__init__(self)

225

list.__init__(self)

287

# the opened file handle

226

# the opened file handle

288

~~self~~.~~fpath~~ = kwargs.get('~~fpath~~')

227

pofile = kwargs.get('pofile', None)

228

if pofile and os.path.exists(pofile):

229

self.fpath = pofile

230

else:

231

self.fpath = kwargs.get('fpath')

289

# the width at which lines should be wrapped

232

# the width at which lines should be wrapped

290

self.wrapwidth = kwargs.get('wrapwidth', 78)

233

self.wrapwidth = kwargs.get('wrapwidth', 78)

291

# the file encoding

234

# the file encoding

298

self.metadata = {}

241

self.metadata = {}

299

self.metadata_is_fuzzy = 0

242

self.metadata_is_fuzzy = 0

300

243

301

def __~~str~~__(self):

244

def __unicode__(self):

302

"""

245

"""

303

~~String~~ representation of the file.

246

Returns the unicode representation of the file.

304

"""

247

"""

305

ret = []

248

ret = []

306

entries = [self.metadata_as_entry()] + \

249

entries = [self.metadata_as_entry()] + \

307

[e for e in self if not e.obsolete]

250

[e for e in self if not e.obsolete]

308

for entry in entries:

251

for entry in entries:

309

ret.append(entry.__~~str~~__(self.wrapwidth))

252

ret.append(entry.__unicode__(self.wrapwidth))

310

for entry in self.obsolete_entries():

253

for entry in self.obsolete_entries():

311

ret.append(entry.__~~str~~__(self.wrapwidth))

254

ret.append(entry.__unicode__(self.wrapwidth))

312

ret~~urn~~ '\n'.join(ret)

255

ret = '\n'.join(ret)

256

257

if type(ret) != types.UnicodeType:

258

return unicode(ret, self.encoding)

259

return ret

260

261

def __str__(self):

262

"""

263

Returns the string representation of the file.

264

"""

265

return unicode(self).encode(self.encoding)

313

266

314

def __contains__(self, entry):

267

def __contains__(self, entry):

315

"""

268

"""

316

Overriden method to implement the membership test (in and ~~not in).~~

269

Overriden ``list`` method to implement the membership test (in and

317

The method considers that an entry is in the file if it finds an

270

not in).

318

entry that has the same msgid (case sensitive).

271

The method considers that an entry is in the file if it finds an entry

319

272

that has the same msgid (the test is **case sensitive**).

320

**Keyword argument**:

321

- *entry*: an instance of polib._BaseEntry

322

273

323

~~**Tests**~~:

274

Argument:

324

>>> po = POFile()

275

325

>>> e1 = POEntry(msgid='foobar', msgstr='spam')

276

``entry``

326

>>> e2 = POEntry(msgid='barfoo', msgstr='spam')

277

an instance of :class:`~polib._BaseEntry`.

327

>>> e3 = POEntry(msgid='foobar', msgstr='eggs')

328

>>> e4 = POEntry(msgid='spameggs', msgstr='eggs')

329

>>> po.append(e1)

330

>>> po.append(e2)

331

>>> e1 in po

332

True

333

>>> e2 not in po

334

False

335

>>> e3 in po

336

True

337

>>> e4 in po

338

False

339

"""

278

"""

340

return self.find(entry.msgid, by='msgid') is not None

279

return self.find(entry.msgid, by='msgid') is not None

280

281

def __eq__(self, other):

282

return unicode(self) == unicode(other)

341

283

342

def append(self, entry):

284

def append(self, entry):

343

"""

285

"""

344

Overriden method to check for duplicates entries, if a user tries to

286

Overriden method to check for duplicates entries, if a user tries to

345

add an entry that already ~~exists~~, the method will raise a ~~ValueError~~

287

add an entry that is already in the file, the method will raise a

346

exception.

288

``ValueError`` exception.

347

348

**Keyword argument**:

349

- *entry*: an instance of polib._BaseEntry

350

289

351

~~**Tests**~~:

290

Argument:

352

>>> e1 = POEntry(msgid='foobar', msgstr='spam')

291

353

>>> e2 = POEntry(msgid='foobar', msgstr='eggs')

292

``entry``

354

>>> po = POFile(check_for_duplicates=True)

293

an instance of :class:`~polib._BaseEntry`.

355

>>> po.append(e1)

356

>>> try:

357

... po.append(e2)

358

... except ValueError, e:

359

... unicode(e)

360

u'Entry "foobar" already exists'

361

"""

294

"""

362

if self.check_for_duplicates and entry in self:

295

if self.check_for_duplicates and entry in self:

363

raise ValueError('Entry "%s" already exists' % entry.msgid)

296

raise ValueError('Entry "%s" already exists' % entry.msgid)

366

def insert(self, index, entry):

299

def insert(self, index, entry):

367

"""

300

"""

368

Overriden method to check for duplicates entries, if a user tries to

301

Overriden method to check for duplicates entries, if a user tries to

369

~~insert~~ an entry that already ~~exists~~, the method will raise a ~~ValueError~~

302

add an entry that is already in the file, the method will raise a

370

exception.

303

``ValueError`` exception.

371

304

372

**Keyword arguments**:

305

Arguments:

373

- *index*: index at which the entry should be inserted

374

- *entry*: an instance of polib._BaseEntry

375

306

376

**Tests**:

307

``index``

377

>>> import polib

308

index at which the entry should be inserted.

378

>>> polib.check_for_duplicates = True

309

379

>>> e1 = POEntry(msgid='foobar', msgstr='spam')

310

``entry``

380

>>> e2 = POEntry(msgid='barfoo', msgstr='eggs')

311

an instance of :class:`~polib._BaseEntry`.

381

>>> e3 = POEntry(msgid='foobar', msgstr='eggs')

382

>>> po = POFile(check_for_duplicates=True)

383

>>> po.insert(0, e1)

384

>>> po.insert(1, e2)

385

>>> try:

386

... po.insert(0, e3)

387

... except ValueError, e:

388

... unicode(e)

389

u'Entry "foobar" already exists'

390

"""

312

"""

391

if self.check_for_duplicates and entry in self:

313

if self.check_for_duplicates and entry in self:

392

raise ValueError('Entry "%s" already exists' % entry.msgid)

314

raise ValueError('Entry "%s" already exists' % entry.msgid)

393

super(_BaseFile, self).insert(index, entry)

315

super(_BaseFile, self).insert(index, entry)

394

316

395

def __repr__(self):

396

"""Return the official string representation of the object."""

397

return '<%s instance at %x>' % (self.__class__.__name__, id(self))

398

399

def metadata_as_entry(self):

317

def metadata_as_entry(self):

400

"""

318

"""

401

Return the metadata as a~~n entry:~~

319

Returns the file metadata as a :class:`~polib.POFile` instance.

402

403

>>> import polib

404

>>> po = polib.pofile('tests/test_fuzzy_header.po')

405

>>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())

406

True

407

"""

320

"""

408

e = POEntry(msgid='')

321

e = POEntry(msgid='')

409

mdata = self.ordered_metadata()

322

mdata = self.ordered_metadata()

410

if mdata:

323

if mdata:

411

strs = []

324

strs = []

412

e._multiline_str['msgstr'] = ''

413

for name, value in mdata:

325

for name, value in mdata:

414

# Strip whitespace off each line in a multi-line entry

326

# Strip whitespace off each line in a multi-line entry

415

strs.append('%s: %s' % (name, value))

327

strs.append('%s: %s' % (name, value))

416

e.msgstr = '\n'.join(strs) + '\n'

328

e.msgstr = '\n'.join(strs) + '\n'

417

e._multiline_str['msgstr'] = '__POLIB__NL__'.join(

418

[s + '\n' for s in strs])

419

if self.metadata_is_fuzzy:

329

if self.metadata_is_fuzzy:

420

e.flags.append('fuzzy')

330

e.flags.append('fuzzy')

421

return e

331

return e

422

332

423

def save(self, fpath=None, repr_method='__str__'):

333

def save(self, fpath=None, repr_method='__str__'):

424

"""

334

"""

425

Save the po file to ~~file *fpath* if no file handle exists for~~

335

Saves the po file to ``fpath``.

426

the object. If there's already an open file and no fpath is

336

If it is an existing file and no ``fpath`` is provided, then the

427

~~provided, then the~~ existing file is rewritten with the modified

337

existing file is rewritten with the modified data.

428

data.

338

339

Keyword arguments:

429

340

430

**Keyword arguments**:

341

``fpath``

431

- *fpath*: string, full or relative path to the file.

342

string, full or relative path to the file.

432

- *repr_method*: string, the method to use for output.

343

344

``repr_method``

345

string, the method to use for output.

433

"""

346

"""

434

if self.fpath is None and fpath is None:

347

if self.fpath is None and fpath is None:

435

raise IOError('You must provide a file path to save() method')

348

raise IOError('You must provide a file path to save() method')

444

contents = contents.decode(self.encoding)

357

contents = contents.decode(self.encoding)

445

fhandle.write(contents)

358

fhandle.write(contents)

446

fhandle.close()

359

fhandle.close()

447

360

# set the file path if not set

448

def find(self, st, by='msgid'):

361

if self.fpath is None and fpath:

449

"""

362

self.fpath = fpath

450

Find entry which msgid (or property identified by the *by*

451

attribute) matches the string *st*.

452

363

453

**Keyword arguments**:

364

def find(self, st, by='msgid', include_obsolete_entries=False,

454

- *st*: string, the string to search for

365

msgctxt=False):

455

- *by*: string, the comparison attribute

366

"""

367

Find the entry which msgid (or property identified by the ``by``

368

argument) matches the string ``st``.

456

369

457

**Examples**:

370

Keyword arguments:

371

372

``st``

373

string, the string to search for.

458

374

459

>>> po = pofile('tests/test_utf8.po')

375

``by``

460

>>> entry = po.find('Thursday')

376

string, the property to use for comparison (default: ``msgid``).

461

>>> entry.msgstr

377

462

u'Jueves'

378

``include_obsolete_entries``

463

>>> entry = po.find('Some unexistant msgid')

379

boolean, whether to also search in entries that are obsolete.

464

>>> entry is None

380

465

True

381

``msgctxt``

466

>>> entry = po.find('Jueves', 'msgstr')

382

string, allows to specify a specific message context for the

467

>>> entry.msgid

383

search.

468

u'Thursday'

469

"""

384

"""

470

for e in self:

385

if include_obsolete_entries:

386

entries = self[:]

387

else:

388

entries = [e for e in self if not e.obsolete]

389

for e in entries:

471

if getattr(e, by) == st:

390

if getattr(e, by) == st:

391

if msgctxt and e.msgctxt != msgctxt:

392

continue

472

return e

393

return e

473

return None

394

return None

474

395

475

def ordered_metadata(self):

396

def ordered_metadata(self):

476

"""

397

"""

477

Convenience method that return ~~the metadata ordered. The return~~

398

Convenience method that returns an ordered version of the metadata

478

value is list of tuples (metadata name, ~~metadata_value).~~

399

dictionnary. The return value is list of tuples (metadata name,

400

metadata_value).

479

"""

401

"""

480

# copy the dict first

402

# copy the dict first

481

metadata = self.metadata.copy()

403

metadata = self.metadata.copy()

497

ordered_data.append((data, value))

419

ordered_data.append((data, value))

498

except KeyError:

420

except KeyError:

499

pass

421

pass

500

# the rest of the metadata w~~on't be ordered there are no specs for this~~

422

# the rest of the metadata will be alphabetically ordered since there

423

# are no specs for this AFAIK

501

keys = metadata.keys()

424

keys = metadata.keys()

502

~~list~~(keys).sort()

425

keys.sort()

503

for data in keys:

426

for data in keys:

504

value = metadata[data]

427

value = metadata[data]

505

ordered_data.append((data, value))

428

ordered_data.append((data, value))

507

430

508

def to_binary(self):

431

def to_binary(self):

509

"""

432

"""

510

Return the ~~mofile~~ binary representation.

433

Return the binary representation of the file.

511

"""

434

"""

512

import array

513

import struct

514

import types

515

offsets = []

435

offsets = []

516

entries = self.translated_entries()

436

entries = self.translated_entries()

517

# the keys are sorted in the .mo file

437

# the keys are sorted in the .mo file

518

def cmp(_self, other):

438

def cmp(_self, other):

519

if _self.msgid > other.msgid:

439

# msgfmt compares entries with msgctxt if it exists

440

self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid

441

other_msgid = other.msgctxt and other.msgctxt or other.msgid

442

if self_msgid > other_msgid:

520

return 1

443

return 1

521

elif ~~_self~~.msgid < other.msgid:

444

elif self_msgid < other_msgid:

522

return -1

445

return -1

523

else:

446

else:

524

return 0

447

return 0

525

# add metadata entry

448

# add metadata entry

526

entries.sort(cmp)

449

entries.sort(cmp)

527

mentry = self.metadata_as_entry()

450

mentry = self.metadata_as_entry()

528

mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()

451

#mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()

529

entries = [mentry] + entries

452

entries = [mentry] + entries

530

entries_len = len(entries)

453

entries_len = len(entries)

531

ids, strs = '', ''

454

ids, strs = '', ''

532

for e in entries:

455

for e in entries:

533

# For each string, we need size and file offset. Each string is

456

# For each string, we need size and file offset. Each string is

534

# NUL terminated; the NUL does not count into the size.

457

# NUL terminated; the NUL does not count into the size.

458

msgid = ''

459

if e.msgctxt:

460

# Contexts are stored by storing the concatenation of the

461

# context, a <EOT> byte, and the original string

462

msgid = self._encode(e.msgctxt + '\4')

535

if e.msgid_plural:

463

if e.msgid_plural:

536

indexes = e.msgstr_plural.keys()

464

indexes = e.msgstr_plural.keys()

537

indexes.sort()

465

indexes.sort()

538

msgstr = []

466

msgstr = []

539

for index in indexes:

467

for index in indexes:

540

msgstr.append(e.msgstr_plural[index])

468

msgstr.append(e.msgstr_plural[index])

541

msgid = self._encode(e.msgid + '\0' + e.msgid_plural)

469

msgid += self._encode(e.msgid + '\0' + e.msgid_plural)

542

msgstr = self._encode('\0'.join(msgstr))

470

msgstr = self._encode('\0'.join(msgstr))

543

else:

471

else:

544

msgid = self._encode(e.msgid)

472

msgid += self._encode(e.msgid)

545

msgstr = self._encode(e.msgstr)

473

msgstr = self._encode(e.msgstr)

546

offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))

474

offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))

547

ids += msgid + '\0'

475

ids += msgid + '\0'

548

strs += msgstr + '\0'

476

strs += msgstr + '\0'

477

549

# The header is 7 32-bit unsigned integers.

478

# The header is 7 32-bit unsigned integers.

550

keystart = 7*4+16*entries_len

479

keystart = 7*4+16*entries_len

551

# and the values start after the keys

480

# and the values start after the keys

558

koffsets += [l1, o1+keystart]

487

koffsets += [l1, o1+keystart]

559

voffsets += [l2, o2+valuestart]

488

voffsets += [l2, o2+valuestart]

560

offsets = koffsets + voffsets

489

offsets = koffsets + voffsets

561

output = struct.pack("IIIIIII",

490

# check endianness for magic number

562

0x950412de, # Magic number

491

if struct.pack('@h', 1) == struct.pack('<h', 1):

563

0, # Version

492

magic_number = MOFile.LITTLE_ENDIAN

564

entries_len, # # of entries

493

else:

565

7*4, # start of key index

494

magic_number = MOFile.BIG_ENDIAN

566

7*4+entries_len*8, # start of value index

495

567

0, 0) # size and offset of hash table

496

output = struct.pack(

568

output += array.array("I", offsets).tostring()

497

"Iiiiiii",

498

magic_number, # Magic number

499

0, # Version

500

entries_len, # # of entries

501

7*4, # start of key index

502

7*4+entries_len*8, # start of value index

503

0, keystart # size and offset of hash table

504

# Important: we don't use hash tables

505

)

506

output += array.array("i", offsets).tostring()

569

output += ids

507

output += ids

570

output += strs

508

output += strs

571

return output

509

return output

572

510

573

def _encode(self, mixed):

511

def _encode(self, mixed):

574

"""

512

"""

575

Encode the given argument with the file encoding if ~~the type is unicode~~

513

Encodes the given ``mixed`` argument with the file encoding if and

576

and return the encoded string.

514

only if it's an unicode string and returns the encoded string.

577

"""

515

"""

578

if type(mixed) == types.UnicodeType:

516

if type(mixed) == types.UnicodeType:

579

return mixed.encode(self.encoding)

517

return mixed.encode(self.encoding)

583

# class POFile {{{

521

# class POFile {{{

584

522

585

class POFile(_BaseFile):

523

class POFile(_BaseFile):

586

'''

524

"""

587

Po (or Pot) file reader/writer.

525

Po (or Pot) file reader/writer.

588

POFile objects inherit the list objects methods.

526

This class inherits the :class:`~polib._BaseFile` class and, by extension,

589

527

the python ``list`` type.

590

**Example**:

528

"""

591

529

592

>>> po = POFile()

530

def __unicode__(self):

593

>>> entry1 = POEntry(

531

"""

594

... msgid="Some english text",

532

Returns the unicode representation of the po file.

595

... msgstr="Un texte en anglais"

533

"""

596

... )

597

>>> entry1.occurrences = [('testfile', 12),('another_file', 1)]

598

>>> entry1.comment = "Some useful comment"

599

>>> entry2 = POEntry(

600

... msgid="Peace in some languages",

601

... msgstr="Pace سلام שלום Hasîtî 和平"

602

... )

603

>>> entry2.occurrences = [('testfile', 15),('another_file', 5)]

604

>>> entry2.comment = "Another useful comment"

605

>>> entry3 = POEntry(

606

... msgid='Some entry with quotes " \\"',

607

... msgstr='Un message unicode avec des quotes " \\"'

608

... )

609

>>> entry3.comment = "Test string quoting"

610

>>> po.append(entry1)

611

>>> po.append(entry2)

612

>>> po.append(entry3)

613

>>> po.header = "Some Header"

614

>>> print(po)

615

# Some Header

616

msgid ""

617

msgstr ""

618

619

#. Some useful comment

620

#: testfile:12 another_file:1

621

msgid "Some english text"

622

msgstr "Un texte en anglais"

623

624

#. Another useful comment

625

#: testfile:15 another_file:5

626

msgid "Peace in some languages"

627

msgstr "Pace سلام שלום Hasîtî 和平"

628

629

#. Test string quoting

630

msgid "Some entry with quotes \\" \\""

631

msgstr "Un message unicode avec des quotes \\" \\""

632

633

'''

634

635

def __str__(self):

636

"""Return the string representation of the po file"""

637

ret, headers = '', self.header.split('\n')

534

ret, headers = '', self.header.split('\n')

638

for header in headers:

535

for header in headers:

639

if header[:1] in [',', ':']:

536

if header[:1] in [',', ':']:

640

ret += '#%s\n' % header

537

ret += '#%s\n' % header

641

else:

538

else:

642

ret += '# %s\n' % header

539

ret += '# %s\n' % header

643

return ret + _BaseFile.__str__(self)

540

541

if type(ret) != types.UnicodeType:

542

ret = unicode(ret, self.encoding)

543

544

return ret + _BaseFile.__unicode__(self)

644

545

645

def save_as_mofile(self, fpath):

546

def save_as_mofile(self, fpath):

646

"""

547

"""

647

Save the binary representation of the file to ~~*fpath*~~.

548

Saves the binary representation of the file to given ``fpath``.

648

549

649

**Keyword arguments**:

550

Keyword argument:

650

- *fpath*: string, full or relative path to the file.

551

552

``fpath``

553

string, full or relative path to the mo file.

651

"""

554

"""

652

_BaseFile.save(self, fpath, 'to_binary')

555

_BaseFile.save(self, fpath, 'to_binary')

653

556

654

def percent_translated(self):

557

def percent_translated(self):

655

"""

558

"""

656

Convenience method that return the percentage of translated

559

Convenience method that returns the percentage of translated

657

messages.

560

messages.

658

659

**Example**:

660

661

>>> import polib

662

>>> po = polib.pofile('tests/test_pofile_helpers.po')

663

>>> po.percent_translated()

664

50

665

>>> po = POFile()

666

>>> po.percent_translated()

667

100

668

"""

561

"""

669

total = len([e for e in self if not e.obsolete])

562

total = len([e for e in self if not e.obsolete])

670

if total == 0:

563

if total == 0:

674

567

675

def translated_entries(self):

568

def translated_entries(self):

676

"""

569

"""

677

Convenience method that return a list of translated entries.

570

Convenience method that returns the list of translated entries.

678

679

**Example**:

680

681

>>> import polib

682

>>> po = polib.pofile('tests/test_pofile_helpers.po')

683

>>> len(po.translated_entries())

684

6

685

"""

571

"""

686

return [e for e in self if e.translated()]

572

return [e for e in self if e.translated()]

687

573

688

def untranslated_entries(self):

574

def untranslated_entries(self):

689

"""

575

"""

690

Convenience method that return a list of untranslated entries.

576

Convenience method that returns the list of untranslated entries.

691

692

**Example**:

693

694

>>> import polib

695

>>> po = polib.pofile('tests/test_pofile_helpers.po')

696

>>> len(po.untranslated_entries())

697

4

698

"""

577

"""

699

return [e for e in self if not e.translated() and not e.obsolete \

578

return [e for e in self if not e.translated() and not e.obsolete \

700

and not 'fuzzy' in e.flags]

579

and not 'fuzzy' in e.flags]

701

580

702

def fuzzy_entries(self):

581

def fuzzy_entries(self):

703

"""

582

"""

704

Convenience method that return the list of 'fuzzy' entries.

583

Convenience method that returns the list of fuzzy entries.

705

706

**Example**:

707

708

>>> import polib

709

>>> po = polib.pofile('tests/test_pofile_helpers.po')

710

>>> len(po.fuzzy_entries())

711

2

712

"""

584

"""

713

return [e for e in self if 'fuzzy' in e.flags]

585

return [e for e in self if 'fuzzy' in e.flags]

714

586

715

def obsolete_entries(self):

587

def obsolete_entries(self):

716

"""

588

"""

717

Convenience method that return the list of obsolete entries.

589

Convenience method that returns the list of obsolete entries.

718

719

**Example**:

720

721

>>> import polib

722

>>> po = polib.pofile('tests/test_pofile_helpers.po')

723

>>> len(po.obsolete_entries())

724

4

725

"""

590

"""

726

return [e for e in self if e.obsolete]

591

return [e for e in self if e.obsolete]

727

592

728

def merge(self, refpot):

593

def merge(self, refpot):

729

"""

594

"""

730

XXX this could not work if encodings are different, needs thinking

595

Convenience method that merges the current pofile with the pot file

731

and general refactoring of how polib handles encoding...

732

733

Convenience method that merge the current pofile with the pot file

734

provided. It behaves exactly as the gettext msgmerge utility:

596

provided. It behaves exactly as the gettext msgmerge utility:

735

597

736

- comments of this file will be preserved, but extracted comments

598

* comments of this file will be preserved, but extracted comments and

737

~~and~~ occurrences will be discarded

599

occurrences will be discarded;

738

- any translations or comments in the file will be discarded,

600

* any translations or comments in the file will be discarded, however,

739

~~however~~ dot comments and file positions will be preserved

601

dot comments and file positions will be preserved;

740

602

* the fuzzy flags are preserved.

741

**Keyword argument**:

742

- *refpot*: object POFile, the reference catalog.

743

603

744

**Example**:

604

Keyword argument:

745

605

746

>>> import polib

606

``refpot``

747

>>> refpot = polib.pofile('tests/test_merge.pot')

607

object POFile, the reference catalog.

748

>>> po = polib.pofile('tests/test_merge_before.po')

749

>>> po.merge(refpot)

750

>>> expected_po = polib.pofile('tests/test_merge_after.po')

751

>>> unicode(po) == unicode(expected_po)

752

True

753

"""

608

"""

754

for entry in refpot:

609

for entry in refpot:

755

e = self.find(entry.msgid)

610

e = self.find(entry.msgid, include_obsolete_entries=True)

756

if e is None:

611

if e is None:

757

e = POEntry()

612

e = POEntry()

758

self.append(e)

613

self.append(e)

759

e.merge(entry)

614

e.merge(entry)

760

# ok, now we must "obsolete" entries that are not in the refpot

615

# ok, now we must "obsolete" entries that are not in the refpot anymore

761

# anymore

762

for entry in self:

616

for entry in self:

763

if refpot.find(entry.msgid) is None:

617

if refpot.find(entry.msgid) is None:

764

entry.obsolete = True

618

entry.obsolete = True

767

# class MOFile {{{

621

# class MOFile {{{

768

622

769

class MOFile(_BaseFile):

623

class MOFile(_BaseFile):

770

'''

624

"""

771

Mo file reader/writer.

625

Mo file reader/writer.

772

MOFile objects inherit the list objects methods.

626

This class inherits the :class:`~polib._BaseFile` class and, by

773

627

extension, the python ``list`` type.

774

**Example**:

628

"""

775

629

BIG_ENDIAN = 0xde120495

776

>>> mo = MOFile()

630

LITTLE_ENDIAN = 0x950412de

777

>>> entry1 = POEntry(

778

... msgid="Some english text",

779

... msgstr="Un texte en anglais"

780

... )

781

>>> entry2 = POEntry(

782

... msgid="I need my dirty cheese",

783

... msgstr="Je veux mon sale fromage"

784

... )

785

>>> entry3 = MOEntry(

786

... msgid='Some entry with quotes " \\"',

787

... msgstr='Un message unicode avec des quotes " \\"'

788

... )

789

>>> mo.append(entry1)

790

>>> mo.append(entry2)

791

>>> mo.append(entry3)

792

>>> print(mo)

793

msgid ""

794

msgstr ""

795

796

msgid "Some english text"

797

msgstr "Un texte en anglais"

798

799

msgid "I need my dirty cheese"

800

msgstr "Je veux mon sale fromage"

801

802

msgid "Some entry with quotes \\" \\""

803

msgstr "Un message unicode avec des quotes \\" \\""

804

805

'''

806

631

807

def __init__(self, *args, **kwargs):

632

def __init__(self, *args, **kwargs):

808

"""

633

"""

809

MOFile constructor. Mo files have two other properties:

634

Constructor, accepts all keywords arguments accepted by

810

- magic_number: the magic_number of the binary file,

635

:class:`~polib._BaseFile` class.

811

- version: the version of the mo spec.

812

"""

636

"""

813

_BaseFile.__init__(self, *args, **kwargs)

637

_BaseFile.__init__(self, *args, **kwargs)

814

self.magic_number = None

638

self.magic_number = None

816

640

817

def save_as_pofile(self, fpath):

641

def save_as_pofile(self, fpath):

818

"""

642

"""

819

Save the string representation of the file to *fpath*.

643

Saves the mofile as a pofile to ``fpath``.

820

644

821

**Keyword argument**:

645

Keyword argument:

822

- *fpath*: string, full or relative path to the file.

646

647

``fpath``

648

string, full or relative path to the file.

823

"""

649

"""

824

_BaseFile.save(self, fpath)

650

_BaseFile.save(self, fpath)

825

651

826

def save(self, fpath):

652

def save(self, fpath=None):

827

"""

653

"""

828

Save ~~the binary repre~~s~~entation of~~ the file to ~~*fpath*~~.

654

Saves the mofile to ``fpath``.

829

655

830

**Keyword argument**:

656

Keyword argument:

831

- *fpath*: string, full or relative path to the file.

657

658

``fpath``

659

string, full or relative path to the file.

832

"""

660

"""

833

_BaseFile.save(self, fpath, 'to_binary')

661

_BaseFile.save(self, fpath, 'to_binary')

834

662

867

695

868

class _BaseEntry(object):

696

class _BaseEntry(object):

869

"""

697

"""

870

Base class for POEntry ~~or MOEntry object~~s.

698

Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.

871

This class ~~must *not~~* be instanciated directly.

699

This class should **not** be instanciated directly.

872

"""

700

"""

873

701

874

def __init__(self, *args, **kwargs):

702

def __init__(self, *args, **kwargs):

875

"""Base Entry constructor."""

703

"""

704

Constructor, accepts the following keyword arguments:

705

706

``msgid``

707

string, the entry msgid.

708

709

``msgstr``

710

string, the entry msgstr.

711

712

``msgid_plural``

713

string, the entry msgid_plural.

714

715

``msgstr_plural``

716

list, the entry msgstr_plural lines.

717

718

``msgctxt``

719

string, the entry context (msgctxt).

720

721

``obsolete``

722

bool, whether the entry is "obsolete" or not.

723

724

``encoding``

725

string, the encoding to use, defaults to ``default_encoding``

726

global variable (optional).

727

"""

876

self.msgid = kwargs.get('msgid', '')

728

self.msgid = kwargs.get('msgid', '')

877

self.msgstr = kwargs.get('msgstr', '')

729

self.msgstr = kwargs.get('msgstr', '')

878

self.msgid_plural = kwargs.get('msgid_plural', '')

730

self.msgid_plural = kwargs.get('msgid_plural', '')

879

self.msgstr_plural = kwargs.get('msgstr_plural', {})

731

self.msgstr_plural = kwargs.get('msgstr_plural', {})

732

self.msgctxt = kwargs.get('msgctxt', None)

880

self.obsolete = kwargs.get('obsolete', False)

733

self.obsolete = kwargs.get('obsolete', False)

881

self.encoding = kwargs.get('encoding', default_encoding)

734

self.encoding = kwargs.get('encoding', default_encoding)

882

self.msgctxt = kwargs.get('msgctxt', None)

883

self._multiline_str = {}

884

735

885

def __~~repr~~__(self):

736

def __unicode__(self, wrapwidth=78):

886

"""Return the official string representation of the object."""

887

return '<%s instance at %x>' % (self.__class__.__name__, id(self))

888

889

def __str__(self, wrapwidth=78):

890

"""

737

"""

891

~~Common string~~ representation of the ~~POEntry and MOEntry~~

738

Returns the unicode representation of the entry.

892

objects.

893

"""

739

"""

894

if self.obsolete:

740

if self.obsolete:

895

delflag = '#~ '

741

delflag = '#~ '

898

ret = []

744

ret = []

899

# write the msgctxt if any

745

# write the msgctxt if any

900

if self.msgctxt is not None:

746

if self.msgctxt is not None:

901

ret += self._str_field("msgctxt", delflag, "", self.msgctxt)

747

ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)

902

# write the msgid

748

# write the msgid

903

ret += self._str_field("msgid", delflag, "", self.msgid)

749

ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)

904

# write the msgid_plural if any

750

# write the msgid_plural if any

905

if self.msgid_plural:

751

if self.msgid_plural:

906

ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)

752

ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)

907

if self.msgstr_plural:

753

if self.msgstr_plural:

908

# write the msgstr_plural if any

754

# write the msgstr_plural if any

909

msgstrs = self.msgstr_plural

755

msgstrs = self.msgstr_plural

912

for index in keys:

758

for index in keys:

913

msgstr = msgstrs[index]

759

msgstr = msgstrs[index]

914

plural_index = '[%s]' % index

760

plural_index = '[%s]' % index

915

ret += self._str_field("msgstr", delflag, plural_index, msgstr)

761

ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)

916

else:

762

else:

917

# otherwise write the msgstr

763

# otherwise write the msgstr

918

ret += self._str_field("msgstr", delflag, "", self.msgstr)

764

ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)

919

ret.append('')

765

ret.append('')

920

ret~~urn~~ '\n'.join(ret)

766

ret = '\n'.join(ret)

767

768

if type(ret) != types.UnicodeType:

769

return unicode(ret, self.encoding)

770

return ret

771

772

def __str__(self):

773

"""

774

Returns the string representation of the entry.

775

"""

776

return unicode(self).encode(self.encoding)

777

778

def __eq__(self, other):

779

return unicode(self) == unicode(other)

921

780

922

def _str_field(self, fieldname, delflag, plural_index, field):

781

def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):

923

if (fieldname + plural_index) in self._multiline_str:

782

lines = field.splitlines(True)

924

field = self._multiline_str[fieldname + plural_index]

783

if len(lines) > 1:

925

lines = [''] + field.split('__POLIB__NL__')

784

lines = [''] + lines # start with initial empty line

926

else:

785

else:

927

lines = field.splitlines(True)

786

escaped_field = escape(field)

928

if len(lines) > 1:

787

specialchars_count = 0

929

lines = ['']+lines # start with initial empty line

788

for c in ['\\', '\n', '\r', '\t', '"']:

789

specialchars_count += field.count(c)

790

# comparison must take into account fieldname length + one space

791

# + 2 quotes (eg. msgid "<string>")

792

flength = len(fieldname) + 3

793

if plural_index:

794

flength += len(plural_index)

795

real_wrapwidth = wrapwidth - flength + specialchars_count

796

if wrapwidth > 0 and len(field) > real_wrapwidth:

797

# Wrap the line but take field name into account

798

lines = [''] + [unescape(item) for item in wrap(

799

escaped_field,

800

wrapwidth - 2, # 2 for quotes ""

801

drop_whitespace=False,

802

break_long_words=False

803

)]

930

else:

804

else:

931

lines = [field] ~~# needed for the empty string case~~

805

lines = [field]

932

if fieldname.startswith('previous_'):

806

if fieldname.startswith('previous_'):

933

# quick and dirty trick to get the real field name

807

# quick and dirty trick to get the real field name

934

fieldname = fieldname[9:]

808

fieldname = fieldname[9:]

945

class POEntry(_BaseEntry):

819

class POEntry(_BaseEntry):

946

"""

820

"""

947

Represents a po file entry.

821

Represents a po file entry.

948

949

**Examples**:

950

951

>>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')

952

>>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]

953

>>> print(entry)

954

#: welcome.py:12 anotherfile.py:34

955

msgid "Welcome"

956

msgstr "Bienvenue"

957

958

>>> entry = POEntry()

959

>>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]

960

>>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'

961

>>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'

962

>>> entry.flags.append('c-format')

963

>>> entry.previous_msgctxt = '@somecontext'

964

>>> entry.previous_msgid = 'I had eggs but no spam !'

965

>>> entry.previous_msgid_plural = 'I had eggs and %d spam !'

966

>>> entry.msgctxt = '@somenewcontext'

967

>>> entry.msgid = 'I have spam but no egg !'

968

>>> entry.msgid_plural = 'I have spam and %d eggs !'

969

>>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"

970

>>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"

971

>>> print(entry)

972

#. A plural translation. This is a very very very long line please do not

973

#. wrap, this is just for testing comment wrapping...

974

# A plural translation. This is a very very very long line please do not wrap,

975

# this is just for testing comment wrapping...

976

#: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32

977

#: src/eggs.c:45

978

#, c-format

979

#| msgctxt "@somecontext"

980

#| msgid "I had eggs but no spam !"

981

#| msgid_plural "I had eggs and %d spam !"

982

msgctxt "@somenewcontext"

983

msgid "I have spam but no egg !"

984

msgid_plural "I have spam and %d eggs !"

985

msgstr[0] "J'ai du jambon mais aucun oeuf !"

986

msgstr[1] "J'ai du jambon et %d oeufs !"

987

988

"""

822

"""

989

823

990

def __init__(self, *args, **kwargs):

824

def __init__(self, *args, **kwargs):

991

"""POEntry constructor."""

825

"""

826

Constructor, accepts the following keyword arguments:

827

828

``comment``

829

string, the entry comment.

830

831

``tcomment``

832

string, the entry translator comment.

833

834

``occurrences``

835

list, the entry occurrences.

836

837

``flags``

838

list, the entry flags.

839

840

``previous_msgctxt``

841

string, the entry previous context.

842

843

``previous_msgid``

844

string, the entry previous msgid.

845

846

``previous_msgid_plural``

847

string, the entry previous msgid_plural.

848

"""

992

_BaseEntry.__init__(self, *args, **kwargs)

849

_BaseEntry.__init__(self, *args, **kwargs)

993

self.comment = kwargs.get('comment', '')

850

self.comment = kwargs.get('comment', '')

994

self.tcomment = kwargs.get('tcomment', '')

851

self.tcomment = kwargs.get('tcomment', '')

998

self.previous_msgid = kwargs.get('previous_msgid', None)

855

self.previous_msgid = kwargs.get('previous_msgid', None)

999

self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)

856

self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)

1000

857

1001

def __~~str~~__(self, wrapwidth=78):

858

def __unicode__(self, wrapwidth=78):

1002

"""

859

"""

1003

Return ~~the string~~ representation of the entry.

860

Returns the unicode representation of the entry.

1004

"""

861

"""

1005

if self.obsolete:

862

if self.obsolete:

1006

return _BaseEntry.__~~str~~__(self)

863

return _BaseEntry.__unicode__(self, wrapwidth)

864

1007

ret = []

865

ret = []

1008

# comment first, if any (with text wrapping as xgettext does)

866

# comments first, if any (with text wrapping as xgettext does)

1009

if self.comment != '':

867

comments = [('comment', '#. '), ('tcomment', '# ')]

1010

for c~~omment~~ in ~~self~~.comment.s~~plit~~('\n'):

868

for c in comments:

1011

if wrapwidth > 0 and len(comment) > wrapwidth-3:

869

val = getattr(self, c[0])

1012

ret += textwrap.wrap(comment, wrapwidth,

870

if val:

1013

initial_indent='#. ',

871

for comment in val.split('\n'):

1014

subsequent_indent='#. ',

872

if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:

1015

break_long_words=False)

873

ret += wrap(

1016

else:

874

comment,

1017

ret.append('#. %s' % comment)

875

wrapwidth,

1018

# translator comment, if any (with text wrapping as xgettext does)

876

initial_indent=c[1],

1019

if self.tcomment != '':

877

subsequent_indent=c[1],

1020

for tcomment in self.tcomment.split('\n'):

878

break_long_words=False

1021

if wrapwidth > 0 and len(tcomment) > wrapwidth-2:

879

)

1022

ret += textwrap.wrap(tcomment, wrapwidth,

880

else:

1023

initial_indent='# ',

881

ret.append('%s%s' % (c[1], comment))

1024

subsequent_indent='# ',

882

1025

break_long_words=False)

1026

else:

1027

ret.append('# %s' % tcomment)

1028

# occurrences (with text wrapping as xgettext does)

883

# occurrences (with text wrapping as xgettext does)

1029

if self.occurrences:

884

if self.occurrences:

1030

filelist = []

885

filelist = []

1034

else:

889

else:

1035

filelist.append(fpath)

890

filelist.append(fpath)

1036

filestr = ' '.join(filelist)

891

filestr = ' '.join(filelist)

1037

if wrapwidth > 0 and len(filestr)+3 > wrapwidth:

892

if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:

1038

# ~~XXX~~ textwrap split words that contain hyphen, this is not

893

# textwrap split words that contain hyphen, this is not

1039

# what we want for filenames, so the dirty hack is to

894

# what we want for filenames, so the dirty hack is to

1040

# temporally replace hyphens with a char that a file cannot

895

# temporally replace hyphens with a char that a file cannot

1041

# contain, like "*"

896

# contain, like "*"

1042

~~lines~~ = ~~textwrap~~.~~wrap~~(~~filestr~~.replace('-', '*'),

897

ret += [l.replace('*', '-') for l in wrap(

1043

wrapwidth,

898

filestr.replace('-', '*'),

1044

initial_indent='#: ',

899

wrapwidth,

1045

~~subsequent~~_indent='#: ',

900

initial_indent='#: ',

1046

break_long_words=False)

901

subsequent_indent='#: ',

1047

~~# end of the replace hack~~

902

break_long_words=False

1048

~~for~~ ~~line~~ in ~~lines~~:

903

)]

1049

ret.append(line.replace('*', '-'))

1050

else:

904

else:

1051

ret.append('#: '+filestr)

905

ret.append('#: ' + filestr)

1052

# flags

906

907

# flags (TODO: wrapping ?)

1053

if self.flags:

908

if self.flags:

1054

flags = []

909

ret.append('#, %s' % ', '.join(self.flags))

1055

for flag in self.flags:

1056

flags.append(flag)

1057

ret.append('#, %s' % ', '.join(flags))

1058

910

1059

# previous context and previous msgid/msgid_plural

911

# previous context and previous msgid/msgid_plural

1060

if self.previous_msgctxt:

912

fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']

1061

ret += self._str_field("previous_msgctxt", "#| ", "",

913

for f in fields:

1062

self.previous_msgctxt)

914

val = getattr(self, f)

1063

if self.previous_msgid:

915

if val:

1064

ret += self._str_field(~~"previous_msgid"~~, "#| ", "",

916

ret += self._str_field(f, "#| ", "", val, wrapwidth)

1065

self.previous_msgid)

1066

if self.previous_msgid_plural:

1067

ret += self._str_field("previous_msgid_plural", "#| ", "",

1068

self.previous_msgid_plural)

1069

917

1070

ret.append(_BaseEntry.__~~str~~__(self))

918

ret.append(_BaseEntry.__unicode__(self, wrapwidth))

1071

ret~~urn~~ '\n'.join(ret)

919

ret = '\n'.join(ret)

920

921

if type(ret) != types.UnicodeType:

922

return unicode(ret, self.encoding)

923

return ret

1072

924

1073

def __cmp__(self, other):

925

def __cmp__(self, other):

1074

~~'''~~

926

"""

1075

Called by comparison operations if rich comparison is not defined.

927

Called by comparison operations if rich comparison is not defined.

1076

928

"""

1077

**Tests**:

1078

>>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])

1079

>>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])

1080

>>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])

1081

>>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])

1082

>>> po = POFile()

1083

>>> po.append(a)

1084

>>> po.append(b)

1085

>>> po.append(c1)

1086

>>> po.append(c2)

1087

>>> po.sort()

1088

>>> print(po)

1089

#

1090

msgid ""

1091

msgstr ""

1092

1093

#: a.py:1 a.py:3

1094

msgid "c2"

1095

msgstr ""

1096

1097

#: a.py:1 b.py:1

1098

msgid "c1"

1099

msgstr ""

1100

1101

#: b.py:1 b.py:3

1102

msgid "a"

1103

msgstr ""

1104

1105

#: b.py:1 b.py:3

1106

msgid "b"

1107

msgstr ""

1108

1109

'''

1110

def compare_occurrences(a, b):

929

def compare_occurrences(a, b):

1111

"""

930

"""

1112

Compare an entry occurrence with another one.

931

Compare an entry occurrence with another one.

1153

972

1154

def translated(self):

973

def translated(self):

1155

"""

974

"""

1156

Return True if the entry has been translated or ~~False.~~

975

Returns ``True`` if the entry has been translated or ``False``

976

otherwise.

1157

"""

977

"""

1158

if self.obsolete or 'fuzzy' in self.flags:

978

if self.obsolete or 'fuzzy' in self.flags:

1159

return False

979

return False

1170

"""

990

"""

1171

Merge the current entry with the given pot entry.

991

Merge the current entry with the given pot entry.

1172

"""

992

"""

1173

self.msgid = other.msgid

993

self.msgid = other.msgid

1174

self.occurrences = other.occurrences

994

self.msgctxt = other.msgctxt

1175

self.~~comment~~ = other.~~comment~~

995

self.occurrences = other.occurrences

1176

self.flags = other.flags

996

self.comment = other.comment

997

fuzzy = 'fuzzy' in self.flags

998

self.flags = other.flags[:] # clone flags

999

if fuzzy:

1000

self.flags.append('fuzzy')

1177

self.msgid_plural = other.msgid_plural

1001

self.msgid_plural = other.msgid_plural

1002

self.obsolete = other.obsolete

1003

self.previous_msgctxt = other.previous_msgctxt

1004

self.previous_msgid = other.previous_msgid

1005

self.previous_msgid_plural = other.previous_msgid_plural

1178

if other.msgstr_plural:

1006

if other.msgstr_plural:

1179

for pos in other.msgstr_plural:

1007

for pos in other.msgstr_plural:

1180

try:

1008

try:

1189

class MOEntry(_BaseEntry):

1017

class MOEntry(_BaseEntry):

1190

"""

1018

"""

1191

Represents a mo file entry.

1019

Represents a mo file entry.

1192

1193

**Examples**:

1194

1195

>>> entry = MOEntry()

1196

>>> entry.msgid = 'translate me !'

1197

>>> entry.msgstr = 'traduisez moi !'

1198

>>> print(entry)

1199

msgid "translate me !"

1200

msgstr "traduisez moi !"

1201

1202

"""

1020

"""

1203

1021

pass

1204

def __str__(self, wrapwidth=78):

1205

"""

1206

Return the string representation of the entry.

1207

"""

1208

return _BaseEntry.__str__(self, wrapwidth)

1209

1022

1210

# }}}

1023

# }}}

1211

# class _POFileParser {{{

1024

# class _POFileParser {{{

1216

file format.

1029

file format.

1217

"""

1030

"""

1218

1031

1219

def __init__(self, ~~fpath~~, *args, **kwargs):

1032

def __init__(self, pofile, *args, **kwargs):

1220

"""

1033

"""

1221

Constructor.

1034

Constructor.

1222

1035

1223

**Arguments**:

1036

Keyword arguments:

1224

- *fpath*: string, path to the po file

1037

1225

- *encoding*: string, the encoding to use, defaults to

1038

``pofile``

1226

"default_encoding" global variable (optional),

1039

string, path to the po file or its content

1227

- *check_for_duplicates*: whether to check for duplicate entries

1040

1228

when adding entries to the file, default: False (optional).

1041

``encoding``

1042

string, the encoding to use, defaults to ``default_encoding``

1043

global variable (optional).

1044

1045

``check_for_duplicates``

1046

whether to check for duplicate entries when adding entries to the

1047

file (optional, default: ``False``).

1229

"""

1048

"""

1230

enc = kwargs.get('encoding', default_encoding)

1049

enc = kwargs.get('encoding', default_encoding)

1231

check_dup = kwargs.get('check_for_duplicates', False)

1050

if os.path.exists(pofile):

1232

try:

1051

try:

1233

self.fhandle = codecs.open(~~fpath~~, 'rU', enc)

1052

self.fhandle = codecs.open(pofile, 'rU', enc)

1234

except LookupError:

1053

except LookupError:

1235

enc = default_encoding

1054

enc = default_encoding

1236

self.fhandle = codecs.open(~~fpath~~, 'rU', enc)

1055

self.fhandle = codecs.open(pofile, 'rU', enc)

1056

else:

1057

self.fhandle = pofile.splitlines()

1058

1237

self.instance = POFile(

1059

self.instance = POFile(

1238

~~fpath~~=~~fpath~~,

1060

pofile=pofile,

1239

encoding=enc,

1061

encoding=enc,

1240

check_for_duplicates=check_dup

1062

check_for_duplicates=kwargs.get('check_for_duplicates', False)

1241

)

1063

)

1242

self.transitions = {}

1064

self.transitions = {}

1243

self.current_entry = POEntry()

1065

self.current_entry = POEntry()

1289

Run the state machine, parse the file line by line and call process()

1111

Run the state machine, parse the file line by line and call process()

1290

with the current matched symbol.

1112

with the current matched symbol.

1291

"""

1113

"""

1292

i, ~~lastlen~~ = 1, 0

1114

i = 0

1115

1116

keywords = {

1117

'msgctxt': 'CT',

1118

'msgid': 'MI',

1119

'msgstr': 'MS',

1120

'msgid_plural': 'MP',

1121

}

1122

prev_keywords = {

1123

'msgid_plural': 'PP',

1124

'msgid': 'PM',

1125

'msgctxt': 'PC',

1126

}

1127

1293

for line in self.fhandle:

1128

for line in self.fhandle:

1129

i += 1

1294

line = line.strip()

1130

line = line.strip()

1295

if line == '':

1131

if line == '':

1296

i = i+1

1297

continue

1132

continue

1298

if line[:3] == '#~ ':

1133

1299

line = line[3:]

1134

tokens = line.split(None, 2)

1135

nb_tokens = len(tokens)

1136

1137

if tokens[0] == '#~' and nb_tokens > 1:

1138

line = line[3:].strip()

1139

tokens = tokens[1:]

1140

nb_tokens -= 1

1300

self.entry_obsolete = 1

1141

self.entry_obsolete = 1

1301

else:

1142

else:

1302

self.entry_obsolete = 0

1143

self.entry_obsolete = 0

1144

1145

# Take care of keywords like

1146

# msgid, msgid_plural, msgctxt & msgstr.

1147

if tokens[0] in keywords and nb_tokens > 1:

1148

line = line[len(tokens[0]):].lstrip()

1149

self.current_token = line

1150

self.process(keywords[tokens[0]], i)

1151

continue

1152

1303

self.current_token = line

1153

self.current_token = line

1304

if line[:2] == '#:':

1154

1155

if tokens[0] == '#:' and nb_tokens > 1:

1305

# we are on a occurrences line

1156

# we are on a occurrences line

1306

self.process('OC', i)

1157

self.process('OC', i)

1307

elif line[:9] == 'msgctxt "':

1158

1308

# we are on a msgctxt

1159

elif line[:1] == '"':

1309

self.process('CT', i)

1160

# we are on a continuation line

1310

elif line[:7] == 'msgid "':

1311

# we are on a msgid

1312

self.process('MI', i)

1313

elif line[:8] == 'msgstr "':

1314

# we are on a msgstr

1315

self.process('MS', i)

1316

elif line[:1] == '"' or line[:4] == '#| "':

1317

# we are on a continuation line or some metadata

1318

self.process('MC', i)

1161

self.process('MC', i)

1319

elif line[:14] == 'msgid_plural "':

1162

1320

# we are on a msgid plural

1321

self.process('MP', i)

1322

elif line[:7] == 'msgstr[':

1163

elif line[:7] == 'msgstr[':

1323

# we are on a msgstr plural

1164

# we are on a msgstr plural

1324

self.process('MX', i)

1165

self.process('MX', i)

1325

elif line[:3] == '#, ':

1166

1167

elif tokens[0] == '#,' and nb_tokens > 1:

1326

# we are on a flags line

1168

# we are on a flags line

1327

self.process('FL', i)

1169

self.process('FL', i)

1328

elif line[:2] == '# ' or line == '#':

1170

1329

if line == '#': line = line + ' '

1171

elif tokens[0] == '#':

1172

if line == '#': line += ' '

1330

# we are on a translator comment line

1173

# we are on a translator comment line

1331

self.process('TC', i)

1174

self.process('TC', i)

1332

elif line[:2] == '#.':

1175

1176

elif tokens[0] == '#.' and nb_tokens > 1:

1333

# we are on a generated comment line

1177

# we are on a generated comment line

1334

self.process('GC', i)

1178

self.process('GC', i)

1335

elif line[:15] == '#| msgid_plural':

1179

1336

# we are on a previous msgid_plural

1180

elif tokens[0] == '#|':

1337

self.process('PP', i)

1181

if nb_tokens < 2:

1338

elif line[:8] == '#| msgid':

1182

self.process('??', i)

1339

self.process('PM', i)

1183

continue

1340

# we are on a previous msgid

1184

1341

elif line[:10] == '#| msgctxt':

1185

# Remove the marker and any whitespace right after that.

1342

# we are on a previous msgctxt

1186

line = line[2:].lstrip()

1343

self.~~process~~(~~'PC'~~, i)

1187

self.current_token = line

1344

i = i+1

1188

1189

if tokens[1].startswith('"'):

1190

# Continuation of previous metadata.

1191

self.process('MC', i)

1192

continue

1193

1194

if nb_tokens == 2:

1195

# Invalid continuation line.

1196

self.process('??', i)

1197

1198

# we are on a "previous translation" comment line,

1199

if tokens[1] not in prev_keywords:

1200

# Unknown keyword in previous translation comment.

1201

self.process('??', i)

1202

1203

# Remove the keyword and any whitespace

1204

# between it and the starting quote.

1205

line = line[len(tokens[1]):].lstrip()

1206

self.current_token = line

1207

self.process(prev_keywords[tokens[1]], i)

1208

1209

else:

1210

self.process('??', i)

1345

1211

1346

if self.current_entry:

1212

if self.current_entry:

1347

# since entries are added when another entry is found, we must add

1213

# since entries are added when another entry is found, we must add

1363

if key is not None:

1229

if key is not None:

1364

self.instance.metadata[key] += '\n'+ msg.strip()

1230

self.instance.metadata[key] += '\n'+ msg.strip()

1365

# close opened file

1231

# close opened file

1366

self.fhandle.~~close~~()

1232

if isinstance(self.fhandle, file):

1233

self.fhandle.close()

1367

return self.instance

1234

return self.instance

1368

1235

1369

def add(self, symbol, states, next_state):

1236

def add(self, symbol, states, next_state):

1370

"""

1237

"""

1371

Add a transition to the state machine.

1238

Add a transition to the state machine.

1239

1372

Keywords arguments:

1240

Keywords arguments:

1373

1241

1374

symbol -- string, the matched token (two chars symbol)

1242

``symbol``

1375

states -- list, a list of states (two chars symbols)

1243

string, the matched token (two chars symbol).

1376

next_state -- the next state the fsm will have after the action

1244

1245

``states``

1246

list, a list of states (two chars symbols).

1247

1248

``next_state``

1249

the next state the fsm will have after the action.

1377

"""

1250

"""

1378

for state in states:

1251

for state in states:

1379

action = getattr(self, 'handle_%s' % next_state.lower())

1252

action = getattr(self, 'handle_%s' % next_state.lower())

1385

symbol provided.

1258

symbol provided.

1386

1259

1387

Keywords arguments:

1260

Keywords arguments:

1388

symbol -- string, the matched token (two chars symbol)

1261

1389

linenum -- integer, the current line number of the parsed file

1262

``symbol``

1263

string, the matched token (two chars symbol).

1264

1265

``linenum``

1266

integer, the current line number of the parsed file.

1390

"""

1267

"""

1391

try:

1268

try:

1392

(action, state) = self.transitions[(symbol, self.current_state)]

1269

(action, state) = self.transitions[(symbol, self.current_state)]

1456

self.instance.append(self.current_entry)

1333

self.instance.append(self.current_entry)

1457

self.current_entry = POEntry()

1334

self.current_entry = POEntry()

1458

self.current_entry.previous_msgid_plural = \

1335

self.current_entry.previous_msgid_plural = \

1459

unescape(self.current_token[17:-1])

1336

unescape(self.current_token[1:-1])

1460

return True

1337

return True

1461

1338

1462

def handle_pm(self):

1339

def handle_pm(self):

1465

self.instance.append(self.current_entry)

1342

self.instance.append(self.current_entry)

1466

self.current_entry = POEntry()

1343

self.current_entry = POEntry()

1467

self.current_entry.previous_msgid = \

1344

self.current_entry.previous_msgid = \

1468

unescape(self.current_token[10:-1])

1345

unescape(self.current_token[1:-1])

1469

return True

1346

return True

1470

1347

1471

def handle_pc(self):

1348

def handle_pc(self):

1474

self.instance.append(self.current_entry)

1351

self.instance.append(self.current_entry)

1475

self.current_entry = POEntry()

1352

self.current_entry = POEntry()

1476

self.current_entry.previous_msgctxt = \

1353

self.current_entry.previous_msgctxt = \

1477

unescape(self.current_token[12:-1])

1354

unescape(self.current_token[1:-1])

1478

return True

1355

return True

1479

1356

1480

def handle_ct(self):

1357

def handle_ct(self):

1482

if self.current_state in ['MC', 'MS', 'MX']:

1359

if self.current_state in ['MC', 'MS', 'MX']:

1483

self.instance.append(self.current_entry)

1360

self.instance.append(self.current_entry)

1484

self.current_entry = POEntry()

1361

self.current_entry = POEntry()

1485

self.current_entry.msgctxt = unescape(self.current_token[9:-1])

1362

self.current_entry.msgctxt = unescape(self.current_token[1:-1])

1486

return True

1363

return True

1487

1364

1488

def handle_mi(self):

1365

def handle_mi(self):

1491

self.instance.append(self.current_entry)

1368

self.instance.append(self.current_entry)

1492

self.current_entry = POEntry()

1369

self.current_entry = POEntry()

1493

self.current_entry.obsolete = self.entry_obsolete

1370

self.current_entry.obsolete = self.entry_obsolete

1494

self.current_entry.msgid = unescape(self.current_token[7:-1])

1371

self.current_entry.msgid = unescape(self.current_token[1:-1])

1495

return True

1372

return True

1496

1373

1497

def handle_mp(self):

1374

def handle_mp(self):

1498

"""Handle a msgid plural."""

1375

"""Handle a msgid plural."""

1499

self.current_entry.msgid_plural = unescape(self.current_token[14:-1])

1376

self.current_entry.msgid_plural = unescape(self.current_token[1:-1])

1500

return True

1377

return True

1501

1378

1502

def handle_ms(self):

1379

def handle_ms(self):

1503

"""Handle a msgstr."""

1380

"""Handle a msgstr."""

1504

self.current_entry.msgstr = unescape(self.current_token[8:-1])

1381

self.current_entry.msgstr = unescape(self.current_token[1:-1])

1505

return True

1382

return True

1506

1383

1507

def handle_mx(self):

1384

def handle_mx(self):

1541

typ = 'previous_msgctxt'

1418

typ = 'previous_msgctxt'

1542

token = token[3:]

1419

token = token[3:]

1543

self.current_entry.previous_msgctxt += token

1420

self.current_entry.previous_msgctxt += token

1544

if typ not in self.current_entry._multiline_str:

1545

self.current_entry._multiline_str[typ] = token

1546

else:

1547

self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token

1548

# don't change the current state

1421

# don't change the current state

1549

return False

1422

return False

1550

1423

1555

"""

1428

"""

1556

A class to parse binary mo files.

1429

A class to parse binary mo files.

1557

"""

1430

"""

1558

BIG_ENDIAN = 0xde120495

1559

LITTLE_ENDIAN = 0x950412de

1560

1431

1561

def __init__(self, ~~fpath~~, *args, **kwargs):

1432

def __init__(self, mofile, *args, **kwargs):

1562

"""

1433

"""

1563

Constructor.

1434

Constructor.

1564

1435

1565

**Arguments**:

1436

Keyword arguments:

1566

- *fpath*: string, path to the po file

1437

1567

- *encoding*: string, the encoding to use, defaults to

1438

``mofile``

1568

"default_encoding" global variable (optional),

1439

string, path to the mo file or its content

1569

- *check_for_duplicates*: whether to check for duplicate entries

1440

1570

when adding entries to the file, default: False (optional).

1441

``encoding``

1442

string, the encoding to use, defaults to ``default_encoding``

1443

global variable (optional).

1444

1445

``check_for_duplicates``

1446

whether to check for duplicate entries when adding entries to the

1447

file (optional, default: ``False``).

1571

"""

1448

"""

1572

enc = kwargs.get('encoding', default_encoding)

1449

self.fhandle = open(mofile, 'rb')

1573

check_dup = kwargs.get('check_for_duplicates', False)

1574

self.fhandle = open(fpath, 'rb')

1575

self.instance = MOFile(

1450

self.instance = MOFile(

1576

fpath=~~fpath~~,

1451

fpath=mofile,

1577

encoding=enc,

1452

encoding=kwargs.get('encoding', default_encoding),

1578

check_for_duplicates=check_dup

1453

check_for_duplicates=kwargs.get('check_for_duplicates', False)

1579

)

1454

)

1580

1455

1581

def parse_magicnumber(self):

1582

"""

1583

Parse the magic number and raise an exception if not valid.

1584

"""

1585

1586

def parse(self):

1456

def parse(self):

1587

"""

1457

"""

1588

Build the instance with the file handle provided in the

1458

Build the instance with the file handle provided in the

1589

constructor.

1459

constructor.

1590

"""

1460

"""

1461

# parse magic number

1591

magic_number = self._readbinary('<I', 4)

1462

magic_number = self._readbinary('<I', 4)

1592

if magic_number == ~~self~~.LITTLE_ENDIAN:

1463

if magic_number == MOFile.LITTLE_ENDIAN:

1593

ii = '<II'

1464

ii = '<II'

1594

elif magic_number == ~~self~~.BIG_ENDIAN:

1465

elif magic_number == MOFile.BIG_ENDIAN:

1595

ii = '>II'

1466

ii = '>II'

1596

else:

1467

else:

1597

raise IOError('Invalid mo file, magic number is incorrect !')

1468

raise IOError('Invalid mo file, magic number is incorrect !')

1630

# test if we have a plural entry

1501

# test if we have a plural entry

1631

msgid_tokens = msgid.split('\0')

1502

msgid_tokens = msgid.split('\0')

1632

if len(msgid_tokens) > 1:

1503

if len(msgid_tokens) > 1:

1633

entry = ~~MOE~~ntry(

1504

entry = self._build_entry(

1634

msgid=msgid_tokens[0],

1505

msgid=msgid_tokens[0],

1635

msgid_plural=msgid_tokens[1],

1506

msgid_plural=msgid_tokens[1],

1636

msgstr_plural=dict((k,v) for k,v in \

1507

msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))

1637

enumerate(msgstr.split('\0')))

1638

)

1508

)

1639

else:

1509

else:

1640

entry = ~~MOE~~ntry(msgid=msgid, msgstr=msgstr)

1510

entry = self._build_entry(msgid=msgid, msgstr=msgstr)

1641

self.instance.append(entry)

1511

self.instance.append(entry)

1642

# close opened file

1512

# close opened file

1643

self.fhandle.close()

1513

self.fhandle.close()

1644

return self.instance

1514

return self.instance

1515

1516

def _build_entry(self, msgid, msgstr=None, msgid_plural=None,

1517

msgstr_plural=None):

1518

msgctxt_msgid = msgid.split('\x04')

1519

if len(msgctxt_msgid) > 1:

1520

kwargs = {

1521

'msgctxt': msgctxt_msgid[0],

1522

'msgid' : msgctxt_msgid[1],

1523

}

1524

else:

1525

kwargs = {'msgid': msgid}

1526

if msgstr:

1527

kwargs['msgstr'] = msgstr

1528

if msgid_plural:

1529

kwargs['msgid_plural'] = msgid_plural

1530

if msgstr_plural:

1531

kwargs['msgstr_plural'] = msgstr_plural

1532

return MOEntry(**kwargs)

1645

1533

1646

def _readbinary(self, fmt, numbytes):

1534

def _readbinary(self, fmt, numbytes):

1647

"""

1535

"""

1655

return tup

1543

return tup

1656

1544

1657

# }}}

1545

# }}}

1658

# __main__ {{{

1546

# class TextWrapper {{{

1659

1547

1660

if __name__ == '__main__':

1548

class TextWrapper(textwrap.TextWrapper):

1661

"""

1549

"""

1662

**Main function**::

1550

Subclass of textwrap.TextWrapper that backport the

1663

- to **test** the module just run: *python polib.py [-v]*

1551

drop_whitespace option.

1664

- to **profile** the module: *python polib.py -p <some_pofile.po>*

1665

"""

1552

"""

1666

import sys

1553

def __init__(self, *args, **kwargs):

1667

if len(sys.argv) > 2 and sys.argv[1] == '-p':

1554

drop_whitespace = kwargs.pop('drop_whitespace', True)

1668

def test(f):

1555

textwrap.TextWrapper.__init__(self, *args, **kwargs)

1669

if f.endswith('po'):

1556

self.drop_whitespace = drop_whitespace

1670

p = pofile(f)

1557

1558

def _wrap_chunks(self, chunks):

1559

"""_wrap_chunks(chunks : [string]) -> [string]

1560

1561

Wrap a sequence of text chunks and return a list of lines of

1562

length 'self.width' or less. (If 'break_long_words' is false,

1563

some lines may be longer than this.) Chunks correspond roughly

1564

to words and the whitespace between them: each chunk is

1565

indivisible (modulo 'break_long_words'), but a line break can

1566

come between any two chunks. Chunks should not have internal

1567

whitespace; ie. a chunk is either all whitespace or a "word".

1568

Whitespace chunks will be removed from the beginning and end of

1569

lines, but apart from that whitespace is preserved.

1570

"""

1571

lines = []

1572

if self.width <= 0:

1573

raise ValueError("invalid width %r (must be > 0)" % self.width)

1574

1575

# Arrange in reverse order so items can be efficiently popped

1576

# from a stack of chucks.

1577

chunks.reverse()

1578

1579

while chunks:

1580

1581

# Start the list of chunks that will make up the current line.

1582

# cur_len is just the length of all the chunks in cur_line.

1583

cur_line = []

1584

cur_len = 0

1585

1586

# Figure out which static string will prefix this line.

1587

if lines:

1588

indent = self.subsequent_indent

1671

else:

1589

else:

1672

p = ~~mofile~~(f)

1590

indent = self.initial_indent

1673

s = unicode(p)

1591

1674

import profile

1592

# Maximum width for this line.

1675

profile.run('test("'+sys.argv[2]+'")')

1593

width = self.width - len(indent)

1676

else:

1594

1677

import doctest

1595

# First chunk on line is whitespace -- drop it, unless this

1678

doctest.testmod()

1596

# is the very beginning of the text (ie. no lines started yet).

1597

if self.drop_whitespace and chunks[-1].strip() == '' and lines:

1598

del chunks[-1]

1599

1600

while chunks:

1601

l = len(chunks[-1])

1602

1603

# Can at least squeeze this chunk onto the current line.

1604

if cur_len + l <= width:

1605

cur_line.append(chunks.pop())

1606

cur_len += l

1607

1608

# Nope, this line is full.

1609

else:

1610

break

1611

1612

# The current line is full, and the next chunk is too big to

1613

# fit on *any* line (not just this one).

1614

if chunks and len(chunks[-1]) > width:

1615

self._handle_long_word(chunks, cur_line, cur_len, width)

1616

1617

# If the last chunk on this line is all whitespace, drop it.

1618

if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':

1619

del cur_line[-1]

1620

1621

# Convert current line back to a string and store it in list

1622

# of all lines (return value).

1623

if cur_line:

1624

lines.append(indent + ''.join(cur_line))

1625

1626

return lines

1679

1627

1680

# }}}

1628

# }}}

1629

# function wrap() {{{

1630

1631

def wrap(text, width=70, **kwargs):

1632

"""

1633

Wrap a single paragraph of text, returning a list of wrapped lines.

1634

"""

1635

if sys.version_info < (2, 6):

1636

return TextWrapper(width=width, **kwargs).wrap(text)

1637

return textwrap.wrap(text, width=width, **kwargs)

1638

1639

#}}}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages