upstream/mercurial-mirror Commit - r37263:0e7550b0

1

# stringutil.py - utility for generic string formatting, parsing, etc.

1

# stringutil.py - utility for generic string formatting, parsing, etc.

2

#

2

#

3

4

5

6

#

6

#

7

# This software may be used and distributed according to the terms of the

7

# This software may be used and distributed according to the terms of the

8

# GNU General Public License version 2 or any later version.

8

# GNU General Public License version 2 or any later version.

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import codecs

12

import codecs

13

import re as remod

13

import re as remod

14

import textwrap

14

import textwrap

15

16

from ..i18n import _

16

from ..i18n import _

17

from ..thirdparty import attr

17

from ..thirdparty import attr

18

19

from .. import (

19

from .. import (

20

encoding,

20

encoding,

21

error,

21

error,

22

pycompat,

22

pycompat,

23

)

23

)

24

25

_DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}

25

_DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}

26

_DATA_ESCAPE_MAP.update({

26

_DATA_ESCAPE_MAP.update({

27

b'\\': b'\\\\',

27

b'\\': b'\\\\',

28

b'\r': br'\r',

28

b'\r': br'\r',

29

b'\n': br'\n',

29

b'\n': br'\n',

30

})

30

})

31

_DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')

31

_DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')

32

33

def escapedata(s):

33

def escapedata(s):

34

if isinstance(s, bytearray):

34

if isinstance(s, bytearray):

35

s = bytes(s)

35

s = bytes(s)

36

37

return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)

37

return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)

38

39

def binary(s):

39

def binary(s):

40

"""return true if a string is binary data"""

40

"""return true if a string is binary data"""

41

return bool(s and '\0' in s)

41

return bool(s and '\0' in s)

42

43

def stringmatcher(pattern, casesensitive=True):

43

def stringmatcher(pattern, casesensitive=True):

44

"""

44

"""

45

accepts a string, possibly starting with 're:' or 'literal:' prefix.

45

accepts a string, possibly starting with 're:' or 'literal:' prefix.

46

returns the matcher name, pattern, and matcher function.

46

returns the matcher name, pattern, and matcher function.

47

missing or unknown prefixes are treated as literal matches.

47

missing or unknown prefixes are treated as literal matches.

48

49

helper for tests:

49

helper for tests:

50

>>> def test(pattern, *tests):

50

>>> def test(pattern, *tests):

51

... kind, pattern, matcher = stringmatcher(pattern)

51

... kind, pattern, matcher = stringmatcher(pattern)

52

... return (kind, pattern, [bool(matcher(t)) for t in tests])

52

... return (kind, pattern, [bool(matcher(t)) for t in tests])

53

>>> def itest(pattern, *tests):

53

>>> def itest(pattern, *tests):

54

... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)

54

... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)

55

... return (kind, pattern, [bool(matcher(t)) for t in tests])

55

... return (kind, pattern, [bool(matcher(t)) for t in tests])

56

57

exact matching (no prefix):

57

exact matching (no prefix):

58

>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')

58

>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')

59

('literal', 'abcdefg', [False, False, True])

59

('literal', 'abcdefg', [False, False, True])

60

61

regex matching ('re:' prefix)

61

regex matching ('re:' prefix)

62

>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

62

>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

63

('re', 'a.+b', [False, False, True])

63

('re', 'a.+b', [False, False, True])

64

65

force exact matches ('literal:' prefix)

65

force exact matches ('literal:' prefix)

66

>>> test(b'literal:re:foobar', b'foobar', b're:foobar')

66

>>> test(b'literal:re:foobar', b'foobar', b're:foobar')

67

('literal', 're:foobar', [False, True])

67

('literal', 're:foobar', [False, True])

68

69

unknown prefixes are ignored and treated as literals

69

unknown prefixes are ignored and treated as literals

70

>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')

70

>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')

71

('literal', 'foo:bar', [False, False, True])

71

('literal', 'foo:bar', [False, False, True])

72

73

case insensitive regex matches

73

case insensitive regex matches

74

>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

74

>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

75

('re', 'A.+b', [False, False, True])

75

('re', 'A.+b', [False, False, True])

76

77

case insensitive literal matches

77

case insensitive literal matches

78

>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')

78

>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')

79

('literal', 'ABCDEFG', [False, False, True])

79

('literal', 'ABCDEFG', [False, False, True])

80

"""

80

"""

81

if pattern.startswith('re:'):

81

if pattern.startswith('re:'):

82

pattern = pattern[3:]

82

pattern = pattern[3:]

83

try:

83

try:

84

flags = 0

84

flags = 0

85

if not casesensitive:

85

if not casesensitive:

86

flags = remod.I

86

flags = remod.I

87

regex = remod.compile(pattern, flags)

87

regex = remod.compile(pattern, flags)

88

except remod.error as e:

88

except remod.error as e:

89

raise error.ParseError(_('invalid regular expression: %s')

89

raise error.ParseError(_('invalid regular expression: %s')

90

% e)

90

% e)

91

return 're', pattern, regex.search

91

return 're', pattern, regex.search

92

elif pattern.startswith('literal:'):

92

elif pattern.startswith('literal:'):

93

pattern = pattern[8:]

93

pattern = pattern[8:]

94

95

match = pattern.__eq__

95

match = pattern.__eq__

96

97

if not casesensitive:

97

if not casesensitive:

98

ipat = encoding.lower(pattern)

98

ipat = encoding.lower(pattern)

99

match = lambda s: ipat == encoding.lower(s)

99

match = lambda s: ipat == encoding.lower(s)

100

return 'literal', pattern, match

100

return 'literal', pattern, match

101

102

def shortuser(user):

102

def shortuser(user):

103

"""Return a short representation of a user name or email address."""

103

"""Return a short representation of a user name or email address."""

104

f = user.find('@')

104

f = user.find('@')

105

if f >= 0:

105

if f >= 0:

106

user = user[:f]

106

user = user[:f]

107

f = user.find('<')

107

f = user.find('<')

108

if f >= 0:

108

if f >= 0:

109

user = user[f + 1:]

109

user = user[f + 1:]

110

f = user.find(' ')

110

f = user.find(' ')

111

if f >= 0:

111

if f >= 0:

112

user = user[:f]

112

user = user[:f]

113

f = user.find('.')

113

f = user.find('.')

114

if f >= 0:

114

if f >= 0:

115

user = user[:f]

115

user = user[:f]

116

return user

116

return user

117

118

def emailuser(user):

118

def emailuser(user):

119

"""Return the user portion of an email address."""

119

"""Return the user portion of an email address."""

120

f = user.find('@')

120

f = user.find('@')

121

if f >= 0:

121

if f >= 0:

122

user = user[:f]

122

user = user[:f]

123

f = user.find('<')

123

f = user.find('<')

124

if f >= 0:

124

if f >= 0:

125

user = user[f + 1:]

125

user = user[f + 1:]

126

return user

126

return user

127

128

def email(author):

128

def email(author):

129

'''get email of author.'''

129

'''get email of author.'''

130

r = author.find('>')

130

r = author.find('>')

131

if r == -1:

131

if r == -1:

132

r = None

132

r = None

133

return author[author.find('<') + 1:r]

133

return author[author.find('<') + 1:r]

134

135

def person(author):

135

def person(author):

136

"""Returns the name before an email address,

136

"""Returns the name before an email address,

137

interpreting it as per RFC 5322

137

interpreting it as per RFC 5322

138

139

>>> person(b'foo@bar')

139

>>> person(b'foo@bar')

140

'foo'

140

'foo'

141

>>> person(b'Foo Bar <foo@bar>')

141

>>> person(b'Foo Bar <foo@bar>')

142

'Foo Bar'

142

'Foo Bar'

143

>>> person(b'"Foo Bar" <foo@bar>')

143

>>> person(b'"Foo Bar" <foo@bar>')

144

'Foo Bar'

144

'Foo Bar'

145

>>> person(b'"Foo \"buz\" Bar" <foo@bar>')

145

>>> person(b'"Foo \"buz\" Bar" <foo@bar>')

146

'Foo "buz" Bar'

146

'Foo "buz" Bar'

147

>>> # The following are invalid, but do exist in real-life

147

>>> # The following are invalid, but do exist in real-life

148

...

148

...

149

>>> person(b'Foo "buz" Bar <foo@bar>')

149

>>> person(b'Foo "buz" Bar <foo@bar>')

150

'Foo "buz" Bar'

150

'Foo "buz" Bar'

151

>>> person(b'"Foo Bar <foo@bar>')

151

>>> person(b'"Foo Bar <foo@bar>')

152

'Foo Bar'

152

'Foo Bar'

153

"""

153

"""

154

if '@' not in author:

154

if '@' not in author:

155

return author

155

return author

156

f = author.find('<')

156

f = author.find('<')

157

if f != -1:

157

if f != -1:

158

return author[:f].strip(' "').replace('\\"', '"')

158

return author[:f].strip(' "').replace('\\"', '"')

159

f = author.find('@')

159

f = author.find('@')

160

return author[:f].replace('.', ' ')

160

return author[:f].replace('.', ' ')

161

162

@attr.s(hash=True)

162

@attr.s(hash=True)

163

class mailmapping(object):

163

class mailmapping(object):

164

'''Represents a username/email key or value in

164

'''Represents a username/email key or value in

165

a mailmap file'''

165

a mailmap file'''

166

email = attr.ib()

166

email = attr.ib()

167

name = attr.ib(default=None)

167

name = attr.ib(default=None)

168

169

def _ismailmaplineinvalid(names, emails):

170

'''Returns True if the parsed names and emails

171

in a mailmap entry are invalid.

172

173

>>> # No names or emails fails

174

>>> names, emails = [], []

175

>>> _ismailmaplineinvalid(names, emails)

176

True

177

>>> # Only one email fails

178

>>> emails = [b'email@email.com']

179

>>> _ismailmaplineinvalid(names, emails)

180

True

181

>>> # One email and one name passes

182

>>> names = [b'Test Name']

183

>>> _ismailmaplineinvalid(names, emails)

184

False

185

>>> # No names but two emails passes

186

>>> names = []

187

>>> emails = [b'proper@email.com', b'commit@email.com']

188

>>> _ismailmaplineinvalid(names, emails)

189

False

190

'''

191

return not emails or not names and len(emails) < 2

192

169

def parsemailmap(mailmapcontent):

193

def parsemailmap(mailmapcontent):

170

"""Parses data in the .mailmap format

194

"""Parses data in the .mailmap format

171

195

172

>>> mmdata = b"\\n".join([

196

>>> mmdata = b"\\n".join([

173

... b'# Comment',

197

... b'# Comment',

174

... b'Name <commit1@email.xx>',

198

... b'Name <commit1@email.xx>',

175

... b'<name@email.xx> <commit2@email.xx>',

199

... b'<name@email.xx> <commit2@email.xx>',

176

... b'Name <proper@email.xx> <commit3@email.xx>',

200

... b'Name <proper@email.xx> <commit3@email.xx>',

177

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

201

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

178

... ])

202

... ])

179

>>> mm = parsemailmap(mmdata)

203

>>> mm = parsemailmap(mmdata)

180

>>> for key in sorted(mm.keys()):

204

>>> for key in sorted(mm.keys()):

181

... print(key)

205

... print(key)

182

mailmapping(email='commit1@email.xx', name=None)

206

mailmapping(email='commit1@email.xx', name=None)

183

mailmapping(email='commit2@email.xx', name=None)

207

mailmapping(email='commit2@email.xx', name=None)

184

mailmapping(email='commit3@email.xx', name=None)

208

mailmapping(email='commit3@email.xx', name=None)

185

mailmapping(email='commit4@email.xx', name='Commit')

209

mailmapping(email='commit4@email.xx', name='Commit')

186

>>> for val in sorted(mm.values()):

210

>>> for val in sorted(mm.values()):

187

... print(val)

211

... print(val)

188

mailmapping(email='commit1@email.xx', name='Name')

212

mailmapping(email='commit1@email.xx', name='Name')

189

mailmapping(email='name@email.xx', name=None)

213

mailmapping(email='name@email.xx', name=None)

190

mailmapping(email='proper@email.xx', name='Name')

214

mailmapping(email='proper@email.xx', name='Name')

191

mailmapping(email='proper@email.xx', name='Name')

215

mailmapping(email='proper@email.xx', name='Name')

192

"""

216

"""

193

mailmap = {}

217

mailmap = {}

194

218

195

if mailmapcontent is None:

219

if mailmapcontent is None:

196

return mailmap

220

return mailmap

197

221

198

for line in mailmapcontent.splitlines():

222

for line in mailmapcontent.splitlines():

199

223

200

# Don't bother checking the line if it is a comment or

224

# Don't bother checking the line if it is a comment or

201

# is an improperly formed author field

225

# is an improperly formed author field

202

if line.lstrip().startswith('#') or ~~any~~(c ~~not~~ in ~~line~~ ~~for~~ c in ~~'<>@'~~):

226

if line.lstrip().startswith('#'):

203

continue

227

continue

204

228

205

# names, emails hold the parsed emails and names for each line

229

# names, emails hold the parsed emails and names for each line

206

# name_builder holds the words in a persons name

230

# name_builder holds the words in a persons name

207

names, emails = [], []

231

names, emails = [], []

208

namebuilder = []

232

namebuilder = []

209

233

210

for element in line.split():

234

for element in line.split():

211

if element.startswith('#'):

235

if element.startswith('#'):

212

# If we reach a comment in the mailmap file, move on

236

# If we reach a comment in the mailmap file, move on

213

break

237

break

214

238

215

elif element.startswith('<') and element.endswith('>'):

239

elif element.startswith('<') and element.endswith('>'):

216

# We have found an email.

240

# We have found an email.

217

# Parse it, and finalize any names from earlier

241

# Parse it, and finalize any names from earlier

218

emails.append(element[1:-1]) # Slice off the "<>"

242

emails.append(element[1:-1]) # Slice off the "<>"

219

243

220

if namebuilder:

244

if namebuilder:

221

names.append(' '.join(namebuilder))

245

names.append(' '.join(namebuilder))

222

namebuilder = []

246

namebuilder = []

223

247

224

# Break if we have found a second email, any other

248

# Break if we have found a second email, any other

225

# data does not fit the spec for .mailmap

249

# data does not fit the spec for .mailmap

226

if len(emails) > 1:

250

if len(emails) > 1:

227

break

251

break

228

252

229

else:

253

else:

230

# We have found another word in the committers name

254

# We have found another word in the committers name

231

namebuilder.append(element)

255

namebuilder.append(element)

232

256

257

# Check to see if we have parsed the line into a valid form

258

# We require at least one email, and either at least one

259

# name or a second email

260

if _ismailmaplineinvalid(names, emails):

261

continue

262

233

mailmapkey = mailmapping(

263

mailmapkey = mailmapping(

234

email=emails[-1],

264

email=emails[-1],

235

name=names[-1] if len(names) == 2 else None,

265

name=names[-1] if len(names) == 2 else None,

236

)

266

)

237

267

238

mailmap[mailmapkey] = mailmapping(

268

mailmap[mailmapkey] = mailmapping(

239

email=emails[0],

269

email=emails[0],

240

name=names[0] if names else None,

270

name=names[0] if names else None,

241

)

271

)

242

272

243

return mailmap

273

return mailmap

244

274

245

def mapname(mailmap, author):

275

def mapname(mailmap, author):

246

"""Returns the author field according to the mailmap cache, or

276

"""Returns the author field according to the mailmap cache, or

247

the original author field.

277

the original author field.

248

278

249

>>> mmdata = b"\\n".join([

279

>>> mmdata = b"\\n".join([

250

... b'# Comment',

280

... b'# Comment',

251

... b'Name <commit1@email.xx>',

281

... b'Name <commit1@email.xx>',

252

... b'<name@email.xx> <commit2@email.xx>',

282

... b'<name@email.xx> <commit2@email.xx>',

253

... b'Name <proper@email.xx> <commit3@email.xx>',

283

... b'Name <proper@email.xx> <commit3@email.xx>',

254

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

284

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

255

... ])

285

... ])

256

>>> m = parsemailmap(mmdata)

286

>>> m = parsemailmap(mmdata)

257

>>> mapname(m, b'Commit <commit1@email.xx>')

287

>>> mapname(m, b'Commit <commit1@email.xx>')

258

'Name <commit1@email.xx>'

288

'Name <commit1@email.xx>'

259

>>> mapname(m, b'Name <commit2@email.xx>')

289

>>> mapname(m, b'Name <commit2@email.xx>')

260

'Name <name@email.xx>'

290

'Name <name@email.xx>'

261

>>> mapname(m, b'Commit <commit3@email.xx>')

291

>>> mapname(m, b'Commit <commit3@email.xx>')

262

'Name <proper@email.xx>'

292

'Name <proper@email.xx>'

263

>>> mapname(m, b'Commit <commit4@email.xx>')

293

>>> mapname(m, b'Commit <commit4@email.xx>')

264

'Name <proper@email.xx>'

294

'Name <proper@email.xx>'

265

>>> mapname(m, b'Unknown Name <unknown@email.com>')

295

>>> mapname(m, b'Unknown Name <unknown@email.com>')

266

'Unknown Name <unknown@email.com>'

296

'Unknown Name <unknown@email.com>'

267

"""

297

"""

268

# If the author field coming in isn't in the correct format,

298

# If the author field coming in isn't in the correct format,

269

# or the mailmap is empty just return the original author field

299

# or the mailmap is empty just return the original author field

270

if not isauthorwellformed(author) or not mailmap:

300

if not isauthorwellformed(author) or not mailmap:

271

return author

301

return author

272

302

273

# Turn the user name into a mailmaptup

303

# Turn the user name into a mailmaptup

274

commit = mailmapping(name=person(author), email=email(author))

304

commit = mailmapping(name=person(author), email=email(author))

275

305

276

try:

306

try:

277

# Try and use both the commit email and name as the key

307

# Try and use both the commit email and name as the key

278

proper = mailmap[commit]

308

proper = mailmap[commit]

279

309

280

except KeyError:

310

except KeyError:

281

# If the lookup fails, use just the email as the key instead

311

# If the lookup fails, use just the email as the key instead

282

# We call this commit2 as not to erase original commit fields

312

# We call this commit2 as not to erase original commit fields

283

commit2 = mailmapping(email=commit.email)

313

commit2 = mailmapping(email=commit.email)

284

proper = mailmap.get(commit2, mailmapping(None, None))

314

proper = mailmap.get(commit2, mailmapping(None, None))

285

315

286

# Return the author field with proper values filled in

316

# Return the author field with proper values filled in

287

return '%s <%s>' % (

317

return '%s <%s>' % (

288

proper.name if proper.name else commit.name,

318

proper.name if proper.name else commit.name,

289

proper.email if proper.email else commit.email,

319

proper.email if proper.email else commit.email,

290

)

320

)

291

321

292

_correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')

322

_correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')

293

323

294

def isauthorwellformed(author):

324

def isauthorwellformed(author):

295

'''Return True if the author field is well formed

325

'''Return True if the author field is well formed

296

(ie "Contributor Name <contrib@email.dom>")

326

(ie "Contributor Name <contrib@email.dom>")

297

327

298

>>> isauthorwellformed(b'Good Author <good@author.com>')

328

>>> isauthorwellformed(b'Good Author <good@author.com>')

299

True

329

True

300

>>> isauthorwellformed(b'Author <good@author.com>')

330

>>> isauthorwellformed(b'Author <good@author.com>')

301

True

331

True

302

>>> isauthorwellformed(b'Bad Author')

332

>>> isauthorwellformed(b'Bad Author')

303

False

333

False

304

>>> isauthorwellformed(b'Bad Author <author@author.com')

334

>>> isauthorwellformed(b'Bad Author <author@author.com')

305

False

335

False

306

>>> isauthorwellformed(b'Bad Author author@author.com')

336

>>> isauthorwellformed(b'Bad Author author@author.com')

307

False

337

False

308

>>> isauthorwellformed(b'<author@author.com>')

338

>>> isauthorwellformed(b'<author@author.com>')

309

False

339

False

310

>>> isauthorwellformed(b'Bad Author <author>')

340

>>> isauthorwellformed(b'Bad Author <author>')

311

False

341

False

312

'''

342

'''

313

return _correctauthorformat.match(author) is not None

343

return _correctauthorformat.match(author) is not None

314

344

315

def ellipsis(text, maxlength=400):

345

def ellipsis(text, maxlength=400):

316

"""Trim string to at most maxlength (default: 400) columns in display."""

346

"""Trim string to at most maxlength (default: 400) columns in display."""

317

return encoding.trim(text, maxlength, ellipsis='...')

347

return encoding.trim(text, maxlength, ellipsis='...')

318

348

319

def escapestr(s):

349

def escapestr(s):

320

# call underlying function of s.encode('string_escape') directly for

350

# call underlying function of s.encode('string_escape') directly for

321

# Python 3 compatibility

351

# Python 3 compatibility

322

return codecs.escape_encode(s)[0]

352

return codecs.escape_encode(s)[0]

323

353

324

def unescapestr(s):

354

def unescapestr(s):

325

return codecs.escape_decode(s)[0]

355

return codecs.escape_decode(s)[0]

326

356

327

def forcebytestr(obj):

357

def forcebytestr(obj):

328

"""Portably format an arbitrary object (e.g. exception) into a byte

358

"""Portably format an arbitrary object (e.g. exception) into a byte

329

string."""

359

string."""

330

try:

360

try:

331

return pycompat.bytestr(obj)

361

return pycompat.bytestr(obj)

332

except UnicodeEncodeError:

362

except UnicodeEncodeError:

333

# non-ascii string, may be lossy

363

# non-ascii string, may be lossy

334

return pycompat.bytestr(encoding.strtolocal(str(obj)))

364

return pycompat.bytestr(encoding.strtolocal(str(obj)))

335

365

336

def uirepr(s):

366

def uirepr(s):

337

# Avoid double backslash in Windows path repr()

367

# Avoid double backslash in Windows path repr()

338

return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')

368

return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')

339

369

340

# delay import of textwrap

370

# delay import of textwrap

341

def _MBTextWrapper(**kwargs):

371

def _MBTextWrapper(**kwargs):

342

class tw(textwrap.TextWrapper):

372

class tw(textwrap.TextWrapper):

343

"""

373

"""

344

Extend TextWrapper for width-awareness.

374

Extend TextWrapper for width-awareness.

345

375

346

Neither number of 'bytes' in any encoding nor 'characters' is

376

Neither number of 'bytes' in any encoding nor 'characters' is

347

appropriate to calculate terminal columns for specified string.

377

appropriate to calculate terminal columns for specified string.

348

378

349

Original TextWrapper implementation uses built-in 'len()' directly,

379

Original TextWrapper implementation uses built-in 'len()' directly,

350

so overriding is needed to use width information of each characters.

380

so overriding is needed to use width information of each characters.

351

381

352

In addition, characters classified into 'ambiguous' width are

382

In addition, characters classified into 'ambiguous' width are

353

treated as wide in East Asian area, but as narrow in other.

383

treated as wide in East Asian area, but as narrow in other.

354

384

355

This requires use decision to determine width of such characters.

385

This requires use decision to determine width of such characters.

356

"""

386

"""

357

def _cutdown(self, ucstr, space_left):

387

def _cutdown(self, ucstr, space_left):

358

l = 0

388

l = 0

359

colwidth = encoding.ucolwidth

389

colwidth = encoding.ucolwidth

360

for i in xrange(len(ucstr)):

390

for i in xrange(len(ucstr)):

361

l += colwidth(ucstr[i])

391

l += colwidth(ucstr[i])

362

if space_left < l:

392

if space_left < l:

363

return (ucstr[:i], ucstr[i:])

393

return (ucstr[:i], ucstr[i:])

364

return ucstr, ''

394

return ucstr, ''

365

395

366

# overriding of base class

396

# overriding of base class

367

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):

397

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):

368

space_left = max(width - cur_len, 1)

398

space_left = max(width - cur_len, 1)

369

399

370

if self.break_long_words:

400

if self.break_long_words:

371

cut, res = self._cutdown(reversed_chunks[-1], space_left)

401

cut, res = self._cutdown(reversed_chunks[-1], space_left)

372

cur_line.append(cut)

402

cur_line.append(cut)

373

reversed_chunks[-1] = res

403

reversed_chunks[-1] = res

374

elif not cur_line:

404

elif not cur_line:

375

cur_line.append(reversed_chunks.pop())

405

cur_line.append(reversed_chunks.pop())

376

406

377

# this overriding code is imported from TextWrapper of Python 2.6

407

# this overriding code is imported from TextWrapper of Python 2.6

378

# to calculate columns of string by 'encoding.ucolwidth()'

408

# to calculate columns of string by 'encoding.ucolwidth()'

379

def _wrap_chunks(self, chunks):

409

def _wrap_chunks(self, chunks):

380

colwidth = encoding.ucolwidth

410

colwidth = encoding.ucolwidth

381

411

382

lines = []

412

lines = []

383

if self.width <= 0:

413

if self.width <= 0:

384

raise ValueError("invalid width %r (must be > 0)" % self.width)

414

raise ValueError("invalid width %r (must be > 0)" % self.width)

385

415

386

# Arrange in reverse order so items can be efficiently popped

416

# Arrange in reverse order so items can be efficiently popped

387

# from a stack of chucks.

417

# from a stack of chucks.

388

chunks.reverse()

418

chunks.reverse()

389

419

390

while chunks:

420

while chunks:

391

421

392

# Start the list of chunks that will make up the current line.

422

# Start the list of chunks that will make up the current line.

393

# cur_len is just the length of all the chunks in cur_line.

423

# cur_len is just the length of all the chunks in cur_line.

394

cur_line = []

424

cur_line = []

395

cur_len = 0

425

cur_len = 0

396

426

397

# Figure out which static string will prefix this line.

427

# Figure out which static string will prefix this line.

398

if lines:

428

if lines:

399

indent = self.subsequent_indent

429

indent = self.subsequent_indent

400

else:

430

else:

401

indent = self.initial_indent

431

indent = self.initial_indent

402

432

403

# Maximum width for this line.

433

# Maximum width for this line.

404

width = self.width - len(indent)

434

width = self.width - len(indent)

405

435

406

# First chunk on line is whitespace -- drop it, unless this

436

# First chunk on line is whitespace -- drop it, unless this

407

# is the very beginning of the text (i.e. no lines started yet).

437

# is the very beginning of the text (i.e. no lines started yet).

408

if self.drop_whitespace and chunks[-1].strip() == r'' and lines:

438

if self.drop_whitespace and chunks[-1].strip() == r'' and lines:

409

del chunks[-1]

439

del chunks[-1]

410

440

411

while chunks:

441

while chunks:

412

l = colwidth(chunks[-1])

442

l = colwidth(chunks[-1])

413

443

414

# Can at least squeeze this chunk onto the current line.

444

# Can at least squeeze this chunk onto the current line.

415

if cur_len + l <= width:

445

if cur_len + l <= width:

416

cur_line.append(chunks.pop())

446

cur_line.append(chunks.pop())

417

cur_len += l

447

cur_len += l

418

448

419

# Nope, this line is full.

449

# Nope, this line is full.

420

else:

450

else:

421

break

451

break

422

452

423

# The current line is full, and the next chunk is too big to

453

# The current line is full, and the next chunk is too big to

424

# fit on *any* line (not just this one).

454

# fit on *any* line (not just this one).

425

if chunks and colwidth(chunks[-1]) > width:

455

if chunks and colwidth(chunks[-1]) > width:

426

self._handle_long_word(chunks, cur_line, cur_len, width)

456

self._handle_long_word(chunks, cur_line, cur_len, width)

427

457

428

# If the last chunk on this line is all whitespace, drop it.

458

# If the last chunk on this line is all whitespace, drop it.

429

if (self.drop_whitespace and

459

if (self.drop_whitespace and

430

cur_line and cur_line[-1].strip() == r''):

460

cur_line and cur_line[-1].strip() == r''):

431

del cur_line[-1]

461

del cur_line[-1]

432

462

433

# Convert current line back to a string and store it in list

463

# Convert current line back to a string and store it in list

434

# of all lines (return value).

464

# of all lines (return value).

435

if cur_line:

465

if cur_line:

436

lines.append(indent + r''.join(cur_line))

466

lines.append(indent + r''.join(cur_line))

437

467

438

return lines

468

return lines

439

469

440

global _MBTextWrapper

470

global _MBTextWrapper

441

_MBTextWrapper = tw

471

_MBTextWrapper = tw

442

return tw(**kwargs)

472

return tw(**kwargs)

443

473

444

def wrap(line, width, initindent='', hangindent=''):

474

def wrap(line, width, initindent='', hangindent=''):

445

maxindent = max(len(hangindent), len(initindent))

475

maxindent = max(len(hangindent), len(initindent))

446

if width <= maxindent:

476

if width <= maxindent:

447

# adjust for weird terminal size

477

# adjust for weird terminal size

448

width = max(78, maxindent + 1)

478

width = max(78, maxindent + 1)

449

line = line.decode(pycompat.sysstr(encoding.encoding),

479

line = line.decode(pycompat.sysstr(encoding.encoding),

450