upstream/mercurial-mirror Commit - r46314:d502caab

1

# stringutil.py - utility for generic string formatting, parsing, etc.

1

# stringutil.py - utility for generic string formatting, parsing, etc.

2

#

2

#

3

4

5

6

#

6

#

7

# This software may be used and distributed according to the terms of the

7

# This software may be used and distributed according to the terms of the

8

# GNU General Public License version 2 or any later version.

8

# GNU General Public License version 2 or any later version.

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import ast

12

import ast

13

import codecs

13

import codecs

14

import re as remod

14

import re as remod

15

import textwrap

15

import textwrap

16

import types

16

import types

17

18

from ..i18n import _

18

from ..i18n import _

19

from ..thirdparty import attr

19

from ..thirdparty import attr

20

21

from .. import (

21

from .. import (

22

encoding,

22

encoding,

23

error,

23

error,

24

pycompat,

24

pycompat,

25

)

25

)

26

27

# regex special chars pulled from https://bugs.python.org/issue29995

27

# regex special chars pulled from https://bugs.python.org/issue29995

28

# which was part of Python 3.7.

28

# which was part of Python 3.7.

29

_respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')

29

_respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')

30

_regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}

30

_regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}

31

regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}

31

regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}

32

33

34

def reescape(pat):

34

def reescape(pat):

35

"""Drop-in replacement for re.escape."""

35

"""Drop-in replacement for re.escape."""

36

# NOTE: it is intentional that this works on unicodes and not

36

# NOTE: it is intentional that this works on unicodes and not

37

# bytes, as it's only possible to do the escaping with

37

# bytes, as it's only possible to do the escaping with

38

# unicode.translate, not bytes.translate. Sigh.

38

# unicode.translate, not bytes.translate. Sigh.

39

wantuni = True

39

wantuni = True

40

if isinstance(pat, bytes):

40

if isinstance(pat, bytes):

41

wantuni = False

41

wantuni = False

42

pat = pat.decode('latin1')

42

pat = pat.decode('latin1')

43

pat = pat.translate(_regexescapemap)

43

pat = pat.translate(_regexescapemap)

44

if wantuni:

44

if wantuni:

45

return pat

45

return pat

46

return pat.encode('latin1')

46

return pat.encode('latin1')

47

48

49

def pprint(o, bprefix=False, indent=0, level=0):

49

def pprint(o, bprefix=False, indent=0, level=0):

50

"""Pretty print an object."""

50

"""Pretty print an object."""

51

return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))

51

return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))

52

53

54

def pprintgen(o, bprefix=False, indent=0, level=0):

54

def pprintgen(o, bprefix=False, indent=0, level=0):

55

"""Pretty print an object to a generator of atoms.

55

"""Pretty print an object to a generator of atoms.

56

57

``bprefix`` is a flag influencing whether bytestrings are preferred with

57

``bprefix`` is a flag influencing whether bytestrings are preferred with

58

a ``b''`` prefix.

58

a ``b''`` prefix.

59

60

``indent`` controls whether collections and nested data structures

60

``indent`` controls whether collections and nested data structures

61

span multiple lines via the indentation amount in spaces. By default,

61

span multiple lines via the indentation amount in spaces. By default,

62

no newlines are emitted.

62

no newlines are emitted.

63

64

``level`` specifies the initial indent level. Used if ``indent > 0``.

64

``level`` specifies the initial indent level. Used if ``indent > 0``.

65

"""

65

"""

66

67

if isinstance(o, bytes):

67

if isinstance(o, bytes):

68

if bprefix:

68

if bprefix:

69

yield b"b'%s'" % escapestr(o)

69

yield b"b'%s'" % escapestr(o)

70

else:

70

else:

71

yield b"'%s'" % escapestr(o)

71

yield b"'%s'" % escapestr(o)

72

elif isinstance(o, bytearray):

72

elif isinstance(o, bytearray):

73

# codecs.escape_encode() can't handle bytearray, so escapestr fails

73

# codecs.escape_encode() can't handle bytearray, so escapestr fails

74

# without coercion.

74

# without coercion.

75

yield b"bytearray['%s']" % escapestr(bytes(o))

75

yield b"bytearray['%s']" % escapestr(bytes(o))

76

elif isinstance(o, list):

76

elif isinstance(o, list):

77

if not o:

77

if not o:

78

yield b'[]'

78

yield b'[]'

79

return

79

return

80

81

yield b'['

81

yield b'['

82

83

if indent:

83

if indent:

84

level += 1

84

level += 1

85

yield b'\n'

85

yield b'\n'

86

yield b' ' * (level * indent)

86

yield b' ' * (level * indent)

87

88

for i, a in enumerate(o):

88

for i, a in enumerate(o):

89

for chunk in pprintgen(

89

for chunk in pprintgen(

90

a, bprefix=bprefix, indent=indent, level=level

90

a, bprefix=bprefix, indent=indent, level=level

91

):

91

):

92

yield chunk

92

yield chunk

93

94

if i + 1 < len(o):

94

if i + 1 < len(o):

95

if indent:

95

if indent:

96

yield b',\n'

96

yield b',\n'

97

yield b' ' * (level * indent)

97

yield b' ' * (level * indent)

98

else:

98

else:

99

yield b', '

99

yield b', '

100

101

if indent:

101

if indent:

102

level -= 1

102

level -= 1

103

yield b'\n'

103

yield b'\n'

104

yield b' ' * (level * indent)

104

yield b' ' * (level * indent)

105

106

yield b']'

106

yield b']'

107

elif isinstance(o, dict):

107

elif isinstance(o, dict):

108

if not o:

108

if not o:

109

yield b'{}'

109

yield b'{}'

110

return

110

return

111

112

yield b'{'

112

yield b'{'

113

114

if indent:

114

if indent:

115

level += 1

115

level += 1

116

yield b'\n'

116

yield b'\n'

117

yield b' ' * (level * indent)

117

yield b' ' * (level * indent)

118

119

for i, (k, v) in enumerate(sorted(o.items())):

119

for i, (k, v) in enumerate(sorted(o.items())):

120

for chunk in pprintgen(

120

for chunk in pprintgen(

121

k, bprefix=bprefix, indent=indent, level=level

121

k, bprefix=bprefix, indent=indent, level=level

122

):

122

):

123

yield chunk

123

yield chunk

124

125

yield b': '

125

yield b': '

126

127

for chunk in pprintgen(

127

for chunk in pprintgen(

128

v, bprefix=bprefix, indent=indent, level=level

128

v, bprefix=bprefix, indent=indent, level=level

129

):

129

):

130

yield chunk

130

yield chunk

131

132

if i + 1 < len(o):

132

if i + 1 < len(o):

133

if indent:

133

if indent:

134

yield b',\n'

134

yield b',\n'

135

yield b' ' * (level * indent)

135

yield b' ' * (level * indent)

136

else:

136

else:

137

yield b', '

137

yield b', '

138

139

if indent:

139

if indent:

140

level -= 1

140

level -= 1

141

yield b'\n'

141

yield b'\n'

142

yield b' ' * (level * indent)

142

yield b' ' * (level * indent)

143

144

yield b'}'

144

yield b'}'

145

elif isinstance(o, set):

145

elif isinstance(o, set):

146

if not o:

146

if not o:

147

yield b'set([])'

147

yield b'set([])'

148

return

148

return

149

150

yield b'set(['

150

yield b'set(['

151

152

if indent:

152

if indent:

153

level += 1

153

level += 1

154

yield b'\n'

154

yield b'\n'

155

yield b' ' * (level * indent)

155

yield b' ' * (level * indent)

156

157

for i, k in enumerate(sorted(o)):

157

for i, k in enumerate(sorted(o)):

158

for chunk in pprintgen(

158

for chunk in pprintgen(

159

k, bprefix=bprefix, indent=indent, level=level

159

k, bprefix=bprefix, indent=indent, level=level

160

):

160

):

161

yield chunk

161

yield chunk

162

163

if i + 1 < len(o):

163

if i + 1 < len(o):

164

if indent:

164

if indent:

165

yield b',\n'

165

yield b',\n'

166

yield b' ' * (level * indent)

166

yield b' ' * (level * indent)

167

else:

167

else:

168

yield b', '

168

yield b', '

169

170

if indent:

170

if indent:

171

level -= 1

171

level -= 1

172

yield b'\n'

172

yield b'\n'

173

yield b' ' * (level * indent)

173

yield b' ' * (level * indent)

174

175

yield b'])'

175

yield b'])'

176

elif isinstance(o, tuple):

176

elif isinstance(o, tuple):

177

if not o:

177

if not o:

178

yield b'()'

178

yield b'()'

179

return

179

return

180

181

yield b'('

181

yield b'('

182

183

if indent:

183

if indent:

184

level += 1

184

level += 1

185

yield b'\n'

185

yield b'\n'

186

yield b' ' * (level * indent)

186

yield b' ' * (level * indent)

187

188

for i, a in enumerate(o):

188

for i, a in enumerate(o):

189

for chunk in pprintgen(

189

for chunk in pprintgen(

190

a, bprefix=bprefix, indent=indent, level=level

190

a, bprefix=bprefix, indent=indent, level=level

191

):

191

):

192

yield chunk

192

yield chunk

193

194

if i + 1 < len(o):

194

if i + 1 < len(o):

195

if indent:

195

if indent:

196

yield b',\n'

196

yield b',\n'

197

yield b' ' * (level * indent)

197

yield b' ' * (level * indent)

198

else:

198

else:

199

yield b', '

199

yield b', '

200

201

if indent:

201

if indent:

202

level -= 1

202

level -= 1

203

yield b'\n'

203

yield b'\n'

204

yield b' ' * (level * indent)

204

yield b' ' * (level * indent)

205

206

yield b')'

206

yield b')'

207

elif isinstance(o, types.GeneratorType):

207

elif isinstance(o, types.GeneratorType):

208

# Special case of empty generator.

208

# Special case of empty generator.

209

try:

209

try:

210

nextitem = next(o)

210

nextitem = next(o)

211

except StopIteration:

211

except StopIteration:

212

yield b'gen[]'

212

yield b'gen[]'

213

return

213

return

214

215

yield b'gen['

215

yield b'gen['

216

217

if indent:

217

if indent:

218

level += 1

218

level += 1

219

yield b'\n'

219

yield b'\n'

220

yield b' ' * (level * indent)

220

yield b' ' * (level * indent)

221

222

last = False

222

last = False

223

224

while not last:

224

while not last:

225

current = nextitem

225

current = nextitem

226

227

try:

227

try:

228

nextitem = next(o)

228

nextitem = next(o)

229

except StopIteration:

229

except StopIteration:

230

last = True

230

last = True

231

232

for chunk in pprintgen(

232

for chunk in pprintgen(

233

current, bprefix=bprefix, indent=indent, level=level

233

current, bprefix=bprefix, indent=indent, level=level

234

):

234

):

235

yield chunk

235

yield chunk

236

237

if not last:

237

if not last:

238

if indent:

238

if indent:

239

yield b',\n'

239

yield b',\n'

240

yield b' ' * (level * indent)

240

yield b' ' * (level * indent)

241

else:

241

else:

242

yield b', '

242

yield b', '

243

244

if indent:

244

if indent:

245

level -= 1

245

level -= 1

246

yield b'\n'

246

yield b'\n'

247

yield b' ' * (level * indent)

247

yield b' ' * (level * indent)

248

249

yield b']'

249

yield b']'

250

else:

250

else:

251

yield pycompat.byterepr(o)

251

yield pycompat.byterepr(o)

252

253

254

def prettyrepr(o):

254

def prettyrepr(o):

255

"""Pretty print a representation of a possibly-nested object"""

255

"""Pretty print a representation of a possibly-nested object"""

256

lines = []

256

lines = []

257

rs = pycompat.byterepr(o)

257

rs = pycompat.byterepr(o)

258

p0 = p1 = 0

258

p0 = p1 = 0

259

while p0 < len(rs):

259

while p0 < len(rs):

260

# '... field=<type ... field=<type ...'

260

# '... field=<type ... field=<type ...'

261

# ~~~~~~~~~~~~~~~~

261

# ~~~~~~~~~~~~~~~~

262

# p0 p1 q0 q1

262

# p0 p1 q0 q1

263

q0 = -1

263

q0 = -1

264

q1 = rs.find(b'<', p1 + 1)

264

q1 = rs.find(b'<', p1 + 1)

265

if q1 < 0:

265

if q1 < 0:

266

q1 = len(rs)

266

q1 = len(rs)

267

elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):

267

elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):

268

# backtrack for ' field=<'

268

# backtrack for ' field=<'

269

q0 = rs.rfind(b' ', p1 + 1, q1 - 1)

269

q0 = rs.rfind(b' ', p1 + 1, q1 - 1)

270

if q0 < 0:

270

if q0 < 0:

271

q0 = q1

271

q0 = q1

272

else:

272

else:

273

q0 += 1 # skip ' '

273

q0 += 1 # skip ' '

274

l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)

274

l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)

275

assert l >= 0

275

assert l >= 0

276

lines.append((l, rs[p0:q0].rstrip()))

276

lines.append((l, rs[p0:q0].rstrip()))

277

p0, p1 = q0, q1

277

p0, p1 = q0, q1

278

return b'\n'.join(b' ' * l + s for l, s in lines)

278

return b'\n'.join(b' ' * l + s for l, s in lines)

279

280

281

def buildrepr(r):

281

def buildrepr(r):

282

"""Format an optional printable representation from unexpanded bits

282

"""Format an optional printable representation from unexpanded bits

283

284

======== =================================

284

======== =================================

285

type(r) example

285

type(r) example

286

======== =================================

286

======== =================================

287

tuple ('<not %r>', other)

287

tuple ('<not %r>', other)

288

bytes '<branch closed>'

288

bytes '<branch closed>'

289

callable lambda: '<branch %r>' % sorted(b)

289

callable lambda: '<branch %r>' % sorted(b)

290

object other

290

object other

291

======== =================================

291

======== =================================

292

"""

292

"""

293

if r is None:

293

if r is None:

294

return b''

294

return b''

295

elif isinstance(r, tuple):

295

elif isinstance(r, tuple):

296

return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])

296

return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])

297

elif isinstance(r, bytes):

297

elif isinstance(r, bytes):

298

return r

298

return r

299

elif callable(r):

299

elif callable(r):

300

return r()

300

return r()

301

else:

301

else:

302

return pprint(r)

302

return pprint(r)

303

304

305

def binary(s):

305

def binary(s):

306

"""return true if a string is binary data"""

306

"""return true if a string is binary data"""

307

return bool(s and b'\0' in s)

307

return bool(s and b'\0' in s)

308

309

310

def _splitpattern(pattern):

311

if pattern.startswith(b're:'):

312

return b're', pattern[3:]

313

elif pattern.startswith(b'literal:'):

314

return b'literal', pattern[8:]

315

return b'literal', pattern

316

317

310

def stringmatcher(pattern, casesensitive=True):

318

def stringmatcher(pattern, casesensitive=True):

311

"""

319

"""

312

accepts a string, possibly starting with 're:' or 'literal:' prefix.

320

accepts a string, possibly starting with 're:' or 'literal:' prefix.

313

returns the matcher name, pattern, and matcher function.

321

returns the matcher name, pattern, and matcher function.

314

missing or unknown prefixes are treated as literal matches.

322

missing or unknown prefixes are treated as literal matches.

315

323

316

helper for tests:

324

helper for tests:

317

>>> def test(pattern, *tests):

325

>>> def test(pattern, *tests):

318

... kind, pattern, matcher = stringmatcher(pattern)

326

... kind, pattern, matcher = stringmatcher(pattern)

319

... return (kind, pattern, [bool(matcher(t)) for t in tests])

327

... return (kind, pattern, [bool(matcher(t)) for t in tests])

320

>>> def itest(pattern, *tests):

328

>>> def itest(pattern, *tests):

321

... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)

329

... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)

322

... return (kind, pattern, [bool(matcher(t)) for t in tests])

330

... return (kind, pattern, [bool(matcher(t)) for t in tests])

323

331

324

exact matching (no prefix):

332

exact matching (no prefix):

325

>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')

333

>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')

326

('literal', 'abcdefg', [False, False, True])

334

('literal', 'abcdefg', [False, False, True])

327

335

328

regex matching ('re:' prefix)

336

regex matching ('re:' prefix)

329

>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

337

>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

330

('re', 'a.+b', [False, False, True])

338

('re', 'a.+b', [False, False, True])

331

339

332

force exact matches ('literal:' prefix)

340

force exact matches ('literal:' prefix)

333

>>> test(b'literal:re:foobar', b'foobar', b're:foobar')

341

>>> test(b'literal:re:foobar', b'foobar', b're:foobar')

334

('literal', 're:foobar', [False, True])

342

('literal', 're:foobar', [False, True])

335

343

336

unknown prefixes are ignored and treated as literals

344

unknown prefixes are ignored and treated as literals

337

>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')

345

>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')

338

('literal', 'foo:bar', [False, False, True])

346

('literal', 'foo:bar', [False, False, True])

339

347

340

case insensitive regex matches

348

case insensitive regex matches

341

>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

349

>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

342

('re', 'A.+b', [False, False, True])

350

('re', 'A.+b', [False, False, True])

343

351

344

case insensitive literal matches

352

case insensitive literal matches

345

>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')

353

>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')

346

('literal', 'ABCDEFG', [False, False, True])

354

('literal', 'ABCDEFG', [False, False, True])

347

"""

355

"""

348

if pattern.startswith(b're:'):

356

kind, pattern = _splitpattern(pattern)

349

pattern = pattern[3:]

357

if kind == b're':

350

try:

358

try:

351

flags = 0

359

flags = 0

352

if not casesensitive:

360

if not casesensitive:

353

flags = remod.I

361

flags = remod.I

354

regex = remod.compile(pattern, flags)

362

regex = remod.compile(pattern, flags)

355

except remod.error as e:

363

except remod.error as e:

356

raise error.ParseError(_(b'invalid regular expression: %s') % e)

364

raise error.ParseError(_(b'invalid regular expression: %s') % e)

357

return b~~'re'~~, pattern, regex.search

365

return kind, pattern, regex.search

358

elif pattern.startswith(b'literal:'):

366

elif kind == b'literal':

359

pattern = pattern[8:]

367

if casesensitive:

368

match = pattern.__eq__

369

else:

370

ipat = encoding.lower(pattern)

371

match = lambda s: ipat == encoding.lower(s)

372

return kind, pattern, match

360

373

361

match = pattern.__eq__

374

raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)

362

363

if not casesensitive:

364

ipat = encoding.lower(pattern)

365

match = lambda s: ipat == encoding.lower(s)

366

return b'literal', pattern, match

367

375

368

376

369

def shortuser(user):

377

def shortuser(user):

370

"""Return a short representation of a user name or email address."""

378

"""Return a short representation of a user name or email address."""

371

f = user.find(b'@')

379

f = user.find(b'@')

372

if f >= 0:

380

if f >= 0:

373

user = user[:f]

381

user = user[:f]

374

f = user.find(b'<')

382

f = user.find(b'<')

375

if f >= 0:

383

if f >= 0:

376

user = user[f + 1 :]

384

user = user[f + 1 :]

377

f = user.find(b' ')

385

f = user.find(b' ')

378

if f >= 0:

386

if f >= 0:

379

user = user[:f]

387

user = user[:f]

380

f = user.find(b'.')

388

f = user.find(b'.')

381

if f >= 0:

389

if f >= 0:

382

user = user[:f]

390

user = user[:f]

383

return user

391

return user

384

392

385

393

386

def emailuser(user):

394

def emailuser(user):

387

"""Return the user portion of an email address."""

395

"""Return the user portion of an email address."""

388

f = user.find(b'@')

396

f = user.find(b'@')

389

if f >= 0:

397

if f >= 0:

390

user = user[:f]

398

user = user[:f]

391

f = user.find(b'<')

399

f = user.find(b'<')

392

if f >= 0:

400

if f >= 0:

393

user = user[f + 1 :]

401

user = user[f + 1 :]

394

return user

402

return user

395

403

396

404

397

def email(author):

405

def email(author):

398

'''get email of author.'''

406

'''get email of author.'''

399

r = author.find(b'>')

407

r = author.find(b'>')

400

if r == -1:

408

if r == -1:

401

r = None

409

r = None

402

return author[author.find(b'<') + 1 : r]

410

return author[author.find(b'<') + 1 : r]

403

411

404

412

405

def person(author):

413

def person(author):

406

"""Returns the name before an email address,

414

"""Returns the name before an email address,

407

interpreting it as per RFC 5322

415

interpreting it as per RFC 5322

408

416

409

>>> person(b'foo@bar')

417

>>> person(b'foo@bar')

410

'foo'

418

'foo'

411

>>> person(b'Foo Bar <foo@bar>')

419

>>> person(b'Foo Bar <foo@bar>')

412

'Foo Bar'

420

'Foo Bar'

413

>>> person(b'"Foo Bar" <foo@bar>')

421

>>> person(b'"Foo Bar" <foo@bar>')

414

'Foo Bar'

422

'Foo Bar'

415

>>> person(b'"Foo \"buz\" Bar" <foo@bar>')

423

>>> person(b'"Foo \"buz\" Bar" <foo@bar>')

416

'Foo "buz" Bar'

424

'Foo "buz" Bar'

417

>>> # The following are invalid, but do exist in real-life

425

>>> # The following are invalid, but do exist in real-life

418

...

426

...

419

>>> person(b'Foo "buz" Bar <foo@bar>')

427

>>> person(b'Foo "buz" Bar <foo@bar>')

420

'Foo "buz" Bar'

428

'Foo "buz" Bar'

421

>>> person(b'"Foo Bar <foo@bar>')

429

>>> person(b'"Foo Bar <foo@bar>')

422

'Foo Bar'

430

'Foo Bar'

423

"""

431

"""

424

if b'@' not in author:

432

if b'@' not in author:

425

return author

433

return author

426

f = author.find(b'<')

434

f = author.find(b'<')

427

if f != -1:

435

if f != -1:

428

return author[:f].strip(b' "').replace(b'\\"', b'"')

436

return author[:f].strip(b' "').replace(b'\\"', b'"')

429

f = author.find(b'@')

437

f = author.find(b'@')

430

return author[:f].replace(b'.', b' ')

438

return author[:f].replace(b'.', b' ')

431

439

432

440

433

@attr.s(hash=True)

441

@attr.s(hash=True)

434

class mailmapping(object):

442

class mailmapping(object):

435

'''Represents a username/email key or value in

443

'''Represents a username/email key or value in

436

a mailmap file'''

444

a mailmap file'''

437

445

438

email = attr.ib()

446

email = attr.ib()

439

name = attr.ib(default=None)

447

name = attr.ib(default=None)

440

448

441

449

442

def _ismailmaplineinvalid(names, emails):

450

def _ismailmaplineinvalid(names, emails):

443

'''Returns True if the parsed names and emails

451

'''Returns True if the parsed names and emails

444

in a mailmap entry are invalid.

452

in a mailmap entry are invalid.

445

453

446

>>> # No names or emails fails

454

>>> # No names or emails fails

447

>>> names, emails = [], []

455

>>> names, emails = [], []

448

>>> _ismailmaplineinvalid(names, emails)

456

>>> _ismailmaplineinvalid(names, emails)

449

True

457

True

450

>>> # Only one email fails

458

>>> # Only one email fails

451

>>> emails = [b'email@email.com']

459

>>> emails = [b'email@email.com']

452

>>> _ismailmaplineinvalid(names, emails)

460

>>> _ismailmaplineinvalid(names, emails)

453

True

461

True

454

>>> # One email and one name passes

462

>>> # One email and one name passes

455

>>> names = [b'Test Name']

463

>>> names = [b'Test Name']

456

>>> _ismailmaplineinvalid(names, emails)

464

>>> _ismailmaplineinvalid(names, emails)

457

False

465

False

458

>>> # No names but two emails passes

466

>>> # No names but two emails passes

459

>>> names = []

467

>>> names = []

460

>>> emails = [b'proper@email.com', b'commit@email.com']

468

>>> emails = [b'proper@email.com', b'commit@email.com']

461

>>> _ismailmaplineinvalid(names, emails)

469

>>> _ismailmaplineinvalid(names, emails)

462

False

470

False

463

'''

471

'''

464

return not emails or not names and len(emails) < 2

472

return not emails or not names and len(emails) < 2

465

473

466

474

467

def parsemailmap(mailmapcontent):

475

def parsemailmap(mailmapcontent):

468

"""Parses data in the .mailmap format

476

"""Parses data in the .mailmap format

469

477

470

>>> mmdata = b"\\n".join([

478

>>> mmdata = b"\\n".join([

471

... b'# Comment',

479

... b'# Comment',

472

... b'Name <commit1@email.xx>',

480

... b'Name <commit1@email.xx>',

473

... b'<name@email.xx> <commit2@email.xx>',

481

... b'<name@email.xx> <commit2@email.xx>',

474

... b'Name <proper@email.xx> <commit3@email.xx>',

482

... b'Name <proper@email.xx> <commit3@email.xx>',

475

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

483

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

476

... ])

484

... ])

477

>>> mm = parsemailmap(mmdata)

485

>>> mm = parsemailmap(mmdata)

478

>>> for key in sorted(mm.keys()):

486

>>> for key in sorted(mm.keys()):

479

... print(key)

487

... print(key)

480

mailmapping(email='commit1@email.xx', name=None)

488

mailmapping(email='commit1@email.xx', name=None)

481

mailmapping(email='commit2@email.xx', name=None)

489

mailmapping(email='commit2@email.xx', name=None)

482

mailmapping(email='commit3@email.xx', name=None)

490

mailmapping(email='commit3@email.xx', name=None)

483

mailmapping(email='commit4@email.xx', name='Commit')

491

mailmapping(email='commit4@email.xx', name='Commit')

484

>>> for val in sorted(mm.values()):

492

>>> for val in sorted(mm.values()):

485

... print(val)

493

... print(val)

486

mailmapping(email='commit1@email.xx', name='Name')

494

mailmapping(email='commit1@email.xx', name='Name')

487

mailmapping(email='name@email.xx', name=None)

495

mailmapping(email='name@email.xx', name=None)

488

mailmapping(email='proper@email.xx', name='Name')

496

mailmapping(email='proper@email.xx', name='Name')

489

mailmapping(email='proper@email.xx', name='Name')

497

mailmapping(email='proper@email.xx', name='Name')

490

"""

498

"""

491

mailmap = {}

499

mailmap = {}

492

500

493

if mailmapcontent is None:

501

if mailmapcontent is None:

494

return mailmap

502

return mailmap

495

503

496

for line in mailmapcontent.splitlines():

504

for line in mailmapcontent.splitlines():

497

505

498

# Don't bother checking the line if it is a comment or

506

# Don't bother checking the line if it is a comment or

499

# is an improperly formed author field

507

# is an improperly formed author field

500

if line.lstrip().startswith(b'#'):

508

if line.lstrip().startswith(b'#'):

501

continue

509

continue

502

510

503

# names, emails hold the parsed emails and names for each line

511

# names, emails hold the parsed emails and names for each line

504

# name_builder holds the words in a persons name

512

# name_builder holds the words in a persons name

505

names, emails = [], []

513

names, emails = [], []

506

namebuilder = []

514

namebuilder = []

507

515

508

for element in line.split():

516

for element in line.split():

509

if element.startswith(b'#'):

517

if element.startswith(b'#'):

510

# If we reach a comment in the mailmap file, move on

518

# If we reach a comment in the mailmap file, move on

511

break

519

break

512

520

513

elif element.startswith(b'<') and element.endswith(b'>'):

521

elif element.startswith(b'<') and element.endswith(b'>'):

514

# We have found an email.

522

# We have found an email.

515

# Parse it, and finalize any names from earlier

523

# Parse it, and finalize any names from earlier

516

emails.append(element[1:-1]) # Slice off the "<>"

524

emails.append(element[1:-1]) # Slice off the "<>"

517

525

518

if namebuilder:

526

if namebuilder:

519

names.append(b' '.join(namebuilder))

527

names.append(b' '.join(namebuilder))

520

namebuilder = []

528

namebuilder = []

521

529

522

# Break if we have found a second email, any other

530

# Break if we have found a second email, any other

523

# data does not fit the spec for .mailmap

531

# data does not fit the spec for .mailmap

524

if len(emails) > 1:

532

if len(emails) > 1:

525

break

533

break

526

534

527

else:

535

else:

528

# We have found another word in the committers name

536

# We have found another word in the committers name

529

namebuilder.append(element)

537

namebuilder.append(element)

530

538

531

# Check to see if we have parsed the line into a valid form

539

# Check to see if we have parsed the line into a valid form

532

# We require at least one email, and either at least one

540

# We require at least one email, and either at least one

533

# name or a second email

541

# name or a second email

534

if _ismailmaplineinvalid(names, emails):

542

if _ismailmaplineinvalid(names, emails):

535

continue

543

continue

536

544

537

mailmapkey = mailmapping(

545

mailmapkey = mailmapping(

538

email=emails[-1], name=names[-1] if len(names) == 2 else None,

546

email=emails[-1], name=names[-1] if len(names) == 2 else None,

539

)

547

)

540

548

541

mailmap[mailmapkey] = mailmapping(

549

mailmap[mailmapkey] = mailmapping(

542

email=emails[0], name=names[0] if names else None,

550

email=emails[0], name=names[0] if names else None,

543

)

551

)

544

552

545

return mailmap

553

return mailmap

546

554

547

555

548

def mapname(mailmap, author):

556

def mapname(mailmap, author):

549

"""Returns the author field according to the mailmap cache, or

557

"""Returns the author field according to the mailmap cache, or

550

the original author field.

558

the original author field.

551

559

552

>>> mmdata = b"\\n".join([

560

>>> mmdata = b"\\n".join([

553

... b'# Comment',

561

... b'# Comment',

554

... b'Name <commit1@email.xx>',

562

... b'Name <commit1@email.xx>',

555

... b'<name@email.xx> <commit2@email.xx>',

563

... b'<name@email.xx> <commit2@email.xx>',

556

... b'Name <proper@email.xx> <commit3@email.xx>',

564

... b'Name <proper@email.xx> <commit3@email.xx>',

557

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

565

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

558

... ])

566

... ])

559

>>> m = parsemailmap(mmdata)

567

>>> m = parsemailmap(mmdata)

560

>>> mapname(m, b'Commit <commit1@email.xx>')

568

>>> mapname(m, b'Commit <commit1@email.xx>')

561

'Name <commit1@email.xx>'

569

'Name <commit1@email.xx>'

562

>>> mapname(m, b'Name <commit2@email.xx>')

570

>>> mapname(m, b'Name <commit2@email.xx>')

563

'Name <name@email.xx>'

571

'Name <name@email.xx>'

564

>>> mapname(m, b'Commit <commit3@email.xx>')

572

>>> mapname(m, b'Commit <commit3@email.xx>')

565

'Name <proper@email.xx>'

573

'Name <proper@email.xx>'

566

>>> mapname(m, b'Commit <commit4@email.xx>')

574

>>> mapname(m, b'Commit <commit4@email.xx>')

567

'Name <proper@email.xx>'

575

'Name <proper@email.xx>'

568

>>> mapname(m, b'Unknown Name <unknown@email.com>')

576

>>> mapname(m, b'Unknown Name <unknown@email.com>')

569

'Unknown Name <unknown@email.com>'

577

'Unknown Name <unknown@email.com>'

570

"""

578

"""

571

# If the author field coming in isn't in the correct format,

579

# If the author field coming in isn't in the correct format,

572

# or the mailmap is empty just return the original author field

580

# or the mailmap is empty just return the original author field

573

if not isauthorwellformed(author) or not mailmap:

581

if not isauthorwellformed(author) or not mailmap:

574

return author

582

return author

575

583

576

# Turn the user name into a mailmapping

584

# Turn the user name into a mailmapping

577

commit = mailmapping(name=person(author), email=email(author))

585

commit = mailmapping(name=person(author), email=email(author))

578

586

579

try:

587

try:

580

# Try and use both the commit email and name as the key

588

# Try and use both the commit email and name as the key

581

proper = mailmap[commit]

589

proper = mailmap[commit]

582

590

583

except KeyError:

591

except KeyError:

584

# If the lookup fails, use just the email as the key instead

592

# If the lookup fails, use just the email as the key instead

585

# We call this commit2 as not to erase original commit fields

593

# We call this commit2 as not to erase original commit fields

586

commit2 = mailmapping(email=commit.email)

594

commit2 = mailmapping(email=commit.email)

587

proper = mailmap.get(commit2, mailmapping(None, None))

595

proper = mailmap.get(commit2, mailmapping(None, None))

588

596

589

# Return the author field with proper values filled in

597

# Return the author field with proper values filled in

590

return b'%s <%s>' % (

598

return b'%s <%s>' % (

591

proper.name if proper.name else commit.name,

599

proper.name if proper.name else commit.name,

592

proper.email if proper.email else commit.email,

600

proper.email if proper.email else commit.email,

593

)

601

)

594

602

595

603

596

_correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')

604

_correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')

597

605

598

606

599

def isauthorwellformed(author):

607

def isauthorwellformed(author):

600

'''Return True if the author field is well formed

608

'''Return True if the author field is well formed

601

(ie "Contributor Name <contrib@email.dom>")

609

(ie "Contributor Name <contrib@email.dom>")

602

610

603

>>> isauthorwellformed(b'Good Author <good@author.com>')

611

>>> isauthorwellformed(b'Good Author <good@author.com>')

604

True

612

True

605

>>> isauthorwellformed(b'Author <good@author.com>')

613

>>> isauthorwellformed(b'Author <good@author.com>')

606

True

614

True

607

>>> isauthorwellformed(b'Bad Author')

615

>>> isauthorwellformed(b'Bad Author')

608

False

616

False

609

>>> isauthorwellformed(b'Bad Author <author@author.com')

617

>>> isauthorwellformed(b'Bad Author <author@author.com')

610

False

618

False

611

>>> isauthorwellformed(b'Bad Author author@author.com')

619

>>> isauthorwellformed(b'Bad Author author@author.com')

612

False

620

False

613

>>> isauthorwellformed(b'<author@author.com>')

621

>>> isauthorwellformed(b'<author@author.com>')

614

False

622

False

615

>>> isauthorwellformed(b'Bad Author <author>')

623

>>> isauthorwellformed(b'Bad Author <author>')

616

False

624

False

617

'''

625

'''

618

return _correctauthorformat.match(author) is not None

626

return _correctauthorformat.match(author) is not None

619

627

620

628

621

def ellipsis(text, maxlength=400):

629

def ellipsis(text, maxlength=400):

622

"""Trim string to at most maxlength (default: 400) columns in display."""

630

"""Trim string to at most maxlength (default: 400) columns in display."""

623

return encoding.trim(text, maxlength, ellipsis=b'...')

631

return encoding.trim(text, maxlength, ellipsis=b'...')

624

632

625

633

626

def escapestr(s):

634

def escapestr(s):

627

if isinstance(s, memoryview):

635

if isinstance(s, memoryview):

628

s = bytes(s)

636

s = bytes(s)

629

# call underlying function of s.encode('string_escape') directly for

637

# call underlying function of s.encode('string_escape') directly for

630

# Python 3 compatibility

638

# Python 3 compatibility

631

return codecs.escape_encode(s)[0]

639

return codecs.escape_encode(s)[0]

632

640

633

641

634

def unescapestr(s):

642

def unescapestr(s):

635

return codecs.escape_decode(s)[0]

643

return codecs.escape_decode(s)[0]

636

644

637

645

638

def forcebytestr(obj):

646

def forcebytestr(obj):

639

"""Portably format an arbitrary object (e.g. exception) into a byte

647

"""Portably format an arbitrary object (e.g. exception) into a byte

640

string."""

648

string."""

641

try:

649

try:

642

return pycompat.bytestr(obj)

650

return pycompat.bytestr(obj)

643

except UnicodeEncodeError:

651

except UnicodeEncodeError:

644

# non-ascii string, may be lossy

652

# non-ascii string, may be lossy

645

return pycompat.bytestr(encoding.strtolocal(str(obj)))

653

return pycompat.bytestr(encoding.strtolocal(str(obj)))

646

654

647

655

648

def uirepr(s):

656

def uirepr(s):

649

# Avoid double backslash in Windows path repr()

657

# Avoid double backslash in Windows path repr()

650

return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')

658

return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')

651

659

652

660

653

# delay import of textwrap

661

# delay import of textwrap

654

def _MBTextWrapper(**kwargs):

662

def _MBTextWrapper(**kwargs):

655

class tw(textwrap.TextWrapper):

663

class tw(textwrap.TextWrapper):

656

"""

664

"""

657

Extend TextWrapper for width-awareness.

665

Extend TextWrapper for width-awareness.

658

666

659

Neither number of 'bytes' in any encoding nor 'characters' is

667

Neither number of 'bytes' in any encoding nor 'characters' is

660

appropriate to calculate terminal columns for specified string.

668

appropriate to calculate terminal columns for specified string.

661

669

662

Original TextWrapper implementation uses built-in 'len()' directly,

670

Original TextWrapper implementation uses built-in 'len()' directly,

663

so overriding is needed to use width information of each characters.

671

so overriding is needed to use width information of each characters.

664

672

665

In addition, characters classified into 'ambiguous' width are

673

In addition, characters classified into 'ambiguous' width are

666

treated as wide in East Asian area, but as narrow in other.

674

treated as wide in East Asian area, but as narrow in other.

667

675

668

This requires use decision to determine width of such characters.

676

This requires use decision to determine width of such characters.

669

"""

677

"""

670

678

671

def _cutdown(self, ucstr, space_left):

679

def _cutdown(self, ucstr, space_left):

672

l = 0

680

l = 0

673

colwidth = encoding.ucolwidth

681

colwidth = encoding.ucolwidth

674

for i in pycompat.xrange(len(ucstr)):

682

for i in pycompat.xrange(len(ucstr)):

675

l += colwidth(ucstr[i])

683

l += colwidth(ucstr[i])

676

if space_left < l:

684

if space_left < l:

677

return (ucstr[:i], ucstr[i:])

685

return (ucstr[:i], ucstr[i:])

678

return ucstr, b''

686

return ucstr, b''

679

687

680

# overriding of base class

688

# overriding of base class

681

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):

689

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):

682

space_left = max(width - cur_len, 1)

690

space_left = max(width - cur_len, 1)

683

691

684

if self.break_long_words:

692

if self.break_long_words:

685

cut, res = self._cutdown(reversed_chunks[-1], space_left)

693

cut, res = self._cutdown(reversed_chunks[-1], space_left)

686

cur_line.append(cut)

694

cur_line.append(cut)

687

reversed_chunks[-1] = res

695

reversed_chunks[-1] = res

688

elif not cur_line:

696

elif not cur_line:

689

cur_line.append(reversed_chunks.pop())

697

cur_line.append(reversed_chunks.pop())

690

698

691

# this overriding code is imported from TextWrapper of Python 2.6

699

# this overriding code is imported from TextWrapper of Python 2.6

692

# to calculate columns of string by 'encoding.ucolwidth()'

700

# to calculate columns of string by 'encoding.ucolwidth()'

693

def _wrap_chunks(self, chunks):

701

def _wrap_chunks(self, chunks):

694

colwidth = encoding.ucolwidth

702

colwidth = encoding.ucolwidth

695

703

696

lines = []

704

lines = []

697

if self.width <= 0:

705

if self.width <= 0:

698

raise ValueError(b"invalid width %r (must be > 0)" % self.width)

706

raise ValueError(b"invalid width %r (must be > 0)" % self.width)

699

707

700

# Arrange in reverse order so items can be efficiently popped

708

# Arrange in reverse order so items can be efficiently popped

701

# from a stack of chucks.

709

# from a stack of chucks.

702

chunks.reverse()

710

chunks.reverse()

703

711

704

while chunks:

712

while chunks:

705

713

706

# Start the list of chunks that will make up the current line.

714

# Start the list of chunks that will make up the current line.

707

# cur_len is just the length of all the chunks in cur_line.

715

# cur_len is just the length of all the chunks in cur_line.

708

cur_line = []

716

cur_line = []

709

cur_len = 0

717

cur_len = 0

710

718

711

# Figure out which static string will prefix this line.

719

# Figure out which static string will prefix this line.

712

if lines:

720

if lines:

713

indent = self.subsequent_indent

721

indent = self.subsequent_indent

714

else:

722

else:

715

indent = self.initial_indent

723

indent = self.initial_indent

716

724

717

# Maximum width for this line.

725

# Maximum width for this line.

718

width = self.width - len(indent)

726

width = self.width - len(indent)

719

727

720

# First chunk on line is whitespace -- drop it, unless this

728

# First chunk on line is whitespace -- drop it, unless this

721

# is the very beginning of the text (i.e. no lines started yet).

729

# is the very beginning of the text (i.e. no lines started yet).

722

if self.drop_whitespace and chunks[-1].strip() == '' and lines:

730

if self.drop_whitespace and chunks[-1].strip() == '' and lines:

723

del chunks[-1]

731

del chunks[-1]

724

732

725

while chunks:

733

while chunks:

726

l = colwidth(chunks[-1])

734

l = colwidth(chunks[-1])

727

735

728

# Can at least squeeze this chunk onto the current line.

736

# Can at least squeeze this chunk onto the current line.

729

if cur_len + l <= width:

737

if cur_len + l <= width:

730

cur_line.append(chunks.pop())

738

cur_line.append(chunks.pop())

731

cur_len += l

739

cur_len += l

732

740

733

# Nope, this line is full.

741

# Nope, this line is full.

734

else:

742

else:

735

break

743

break

736

744

737

# The current line is full, and the next chunk is too big to

745

# The current line is full, and the next chunk is too big to

738

# fit on *any* line (not just this one).

746

# fit on *any* line (not just this one).

739

if chunks and colwidth(chunks[-1]) > width:

747

if chunks and colwidth(chunks[-1]) > width:

740

self._handle_long_word(chunks, cur_line, cur_len, width)

748

self._handle_long_word(chunks, cur_line, cur_len, width)

741

749

742

# If the last chunk on this line is all whitespace, drop it.

750

# If the last chunk on this line is all whitespace, drop it.

743

if (

751

if (

744

self.drop_whitespace

752

self.drop_whitespace

745

and cur_line

753

and cur_line

746

and cur_line[-1].strip() == r''

754

and cur_line[-1].strip() == r''

747

):

755

):

748

del cur_line[-1]

756

del cur_line[-1]

749

757

750

# Convert current line back to a string and store it in list

758

# Convert current line back to a string and store it in list

751

# of all lines (return value).

759

# of all lines (return value).

752

if cur_line:

760

if cur_line:

753

lines.append(indent + ''.join(cur_line))

761

lines.append(indent + ''.join(cur_line))

754

762

755

return lines

763

return lines

756

764

757

global _MBTextWrapper

765

global _MBTextWrapper

758

_MBTextWrapper = tw

766

_MBTextWrapper = tw

759

return tw(**kwargs)

767

return tw(**kwargs)

760

768

761

769

762

def wrap(line, width, initindent=b'', hangindent=b''):

770

def wrap(line, width, initindent=b'', hangindent=b''):

763

maxindent = max(len(hangindent), len(initindent))

771

maxindent = max(len(hangindent), len(initindent))

764

if width <= maxindent:

772

if width <= maxindent:

765

# adjust for weird terminal size

773

# adjust for weird terminal size

766

width = max(78, maxindent + 1)

774

width = max(78, maxindent + 1)

767

line = line.decode(

775

line = line.decode(

768

pycompat.sysstr(encoding.encoding),

776

pycompat.sysstr(encoding.encoding),

769

pycompat.sysstr(encoding.encodingmode),

777

pycompat.sysstr(encoding.encodingmode),

770

)

778

)

771

initindent = initindent.decode(

779

initindent = initindent.decode(

772

pycompat.sysstr(encoding.encoding),

780

pycompat.sysstr(encoding.encoding),

773

pycompat.sysstr(encoding.encodingmode),

781

pycompat.sysstr(encoding.encodingmode),

774

)

782

)

775

hangindent = hangindent.decode(

783

hangindent = hangindent.decode(

776

pycompat.sysstr(encoding.encoding),

784

pycompat.sysstr(encoding.encoding),

777

pycompat.sysstr(encoding.encodingmode),

785

pycompat.sysstr(encoding.encodingmode),

778

)

786

)

779

wrapper = _MBTextWrapper(

787

wrapper = _MBTextWrapper(

780

width=width, initial_indent=initindent, subsequent_indent=hangindent

788

width=width, initial_indent=initindent, subsequent_indent=hangindent

781

)

789

)

782

return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))

790

return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))

783

791

784

792

785

_booleans = {

793

_booleans = {

786

b'1': True,

794

b'1': True,

787

b'yes': True,

795

b'yes': True,

788

b'true': True,

796

b'true': True,

789

b'on': True,

797

b'on': True,

790

b'always': True,

798

b'always': True,

791

b'0': False,

799

b'0': False,

792

b'no': False,

800

b'no': False,

793

b'false': False,

801

b'false': False,

794

b'off': False,

802

b'off': False,

795

b'never': False,

803

b'never': False,

796

}

804

}

797

805

798

806

799

def parsebool(s):

807

def parsebool(s):

800

"""Parse s into a boolean.

808

"""Parse s into a boolean.

801

809

802

If s is not a valid boolean, returns None.

810

If s is not a valid boolean, returns None.

803

"""

811

"""

804

return _booleans.get(s.lower(), None)

812

return _booleans.get(s.lower(), None)

805

813

806

814

807

def evalpythonliteral(s):

815

def evalpythonliteral(s):

808

"""Evaluate a string containing a Python literal expression"""

816

"""Evaluate a string containing a Python literal expression"""

809

# We could backport our tokenizer hack to rewrite '' to u'' if we want

817

# We could backport our tokenizer hack to rewrite '' to u'' if we want

810

if pycompat.ispy3:

818

if pycompat.ispy3:

811

return ast.literal_eval(s.decode('latin1'))

819

return ast.literal_eval(s.decode('latin1'))

812

return ast.literal_eval(s)

820

return ast.literal_eval(s)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # stringutil.py - utility for generic string formatting, parsing, etc.
             #
             #  Copyright 2005 K. Thananchayan <thananck@yahoo.com>
             #  Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #  Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import ast
             import codecs
             import re as remod
             import textwrap
             import types
             from ..i18n import _
             from ..thirdparty import attr
             from .. import (
                 encoding,
                 error,
                 pycompat,
             )
             # regex special chars pulled from https://bugs.python.org/issue29995
             # which was part of Python 3.7.
             _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
             _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
             regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
             def reescape(pat):
                 """Drop-in replacement for re.escape."""
                 # NOTE: it is intentional that this works on unicodes and not
                 # bytes, as it's only possible to do the escaping with
                 # unicode.translate, not bytes.translate. Sigh.
                 wantuni = True
                 if isinstance(pat, bytes):
                     wantuni = False
                     pat = pat.decode('latin1')
                 pat = pat.translate(_regexescapemap)
                 if wantuni:
                     return pat
                 return pat.encode('latin1')
             def pprint(o, bprefix=False, indent=0, level=0):
                 """Pretty print an object."""
                 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
             def pprintgen(o, bprefix=False, indent=0, level=0):
                 """Pretty print an object to a generator of atoms.
                 ``bprefix`` is a flag influencing whether bytestrings are preferred with
                 a ``b''`` prefix.
                 ``indent`` controls whether collections and nested data structures
                 span multiple lines via the indentation amount in spaces. By default,
                 no newlines are emitted.
                 ``level`` specifies the initial indent level. Used if ``indent > 0``.
                 """
                 if isinstance(o, bytes):
                     if bprefix:
                         yield b"b'%s'" % escapestr(o)
                     else:
                         yield b"'%s'" % escapestr(o)
                 elif isinstance(o, bytearray):
                     # codecs.escape_encode() can't handle bytearray, so escapestr fails
                     # without coercion.
                     yield b"bytearray['%s']" % escapestr(bytes(o))
                 elif isinstance(o, list):
                     if not o:
                         yield b'[]'
                         return
                     yield b'['
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     for i, a in enumerate(o):
                         for chunk in pprintgen(
                             a, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if i + 1 < len(o):
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b']'
                 elif isinstance(o, dict):
                     if not o:
                         yield b'{}'
                         return
                     yield b'{'
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     for i, (k, v) in enumerate(sorted(o.items())):
                         for chunk in pprintgen(
                             k, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         yield b': '
                         for chunk in pprintgen(
                             v, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if i + 1 < len(o):
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b'}'
                 elif isinstance(o, set):
                     if not o:
                         yield b'set([])'
                         return
                     yield b'set(['
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     for i, k in enumerate(sorted(o)):
                         for chunk in pprintgen(
                             k, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if i + 1 < len(o):
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b'])'
                 elif isinstance(o, tuple):
                     if not o:
                         yield b'()'
                         return
                     yield b'('
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     for i, a in enumerate(o):
                         for chunk in pprintgen(
                             a, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if i + 1 < len(o):
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b')'
                 elif isinstance(o, types.GeneratorType):
                     # Special case of empty generator.
                     try:
                         nextitem = next(o)
                     except StopIteration:
                         yield b'gen[]'
                         return
                     yield b'gen['
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     last = False
                     while not last:
                         current = nextitem
                         try:
                             nextitem = next(o)
                         except StopIteration:
                             last = True
                         for chunk in pprintgen(
                             current, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if not last:
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b']'
                 else:
                     yield pycompat.byterepr(o)
             def prettyrepr(o):
                 """Pretty print a representation of a possibly-nested object"""
                 lines = []
                 rs = pycompat.byterepr(o)
                 p0 = p1 = 0
                 while p0 < len(rs):
                     # '... field=<type ... field=<type ...'
                     #      ~~~~~~~~~~~~~~~~
                     #      p0    p1        q0    q1
                     q0 = -1
                     q1 = rs.find(b'<', p1 + 1)
                     if q1 < 0:
                         q1 = len(rs)
                     elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
                         # backtrack for ' field=<'
                         q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
                     if q0 < 0:
                         q0 = q1
                     else:
                         q0 += 1  # skip ' '
                     l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
                     assert l >= 0
                     lines.append((l, rs[p0:q0].rstrip()))
                     p0, p1 = q0, q1
                 return b'\n'.join(b'  ' * l + s for l, s in lines)
             def buildrepr(r):
                 """Format an optional printable representation from unexpanded bits
                 ========  =================================
                 type(r)   example
                 ========  =================================
                 tuple     ('<not %r>', other)
                 bytes     '<branch closed>'
                 callable  lambda: '<branch %r>' % sorted(b)
                 object    other
                 ========  =================================
                 """
                 if r is None:
                     return b''
                 elif isinstance(r, tuple):
                     return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
                 elif isinstance(r, bytes):
                     return r
                 elif callable(r):
                     return r()
                 else:
                     return pprint(r)
             def binary(s):
                 """return true if a string is binary data"""
                 return bool(s and b'\0' in s)
+            def _splitpattern(pattern):
+                if pattern.startswith(b're:'):
+                    return b're', pattern[3:]
+                elif pattern.startswith(b'literal:'):
+                    return b'literal', pattern[8:]
+                return b'literal', pattern
             def stringmatcher(pattern, casesensitive=True):
                 """
                 accepts a string, possibly starting with 're:' or 'literal:' prefix.
                 returns the matcher name, pattern, and matcher function.
                 missing or unknown prefixes are treated as literal matches.
                 helper for tests:
                 >>> def test(pattern, *tests):
                 ...     kind, pattern, matcher = stringmatcher(pattern)
                 ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
                 >>> def itest(pattern, *tests):
                 ...     kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
                 ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
                 exact matching (no prefix):
                 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
                 ('literal', 'abcdefg', [False, False, True])
                 regex matching ('re:' prefix)
                 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
                 ('re', 'a.+b', [False, False, True])
                 force exact matches ('literal:' prefix)
                 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
                 ('literal', 're:foobar', [False, True])
                 unknown prefixes are ignored and treated as literals
                 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
                 ('literal', 'foo:bar', [False, False, True])
                 case insensitive regex matches
                 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
                 ('re', 'A.+b', [False, False, True])
                 case insensitive literal matches
                 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
                 ('literal', 'ABCDEFG', [False, False, True])
                 """
-                if pattern.startswith(b're:'):
+                kind, pattern = _splitpattern(pattern)
-                    pattern = pattern[3:]
+                if kind == b're':
                     try:
                         flags = 0
                         if not casesensitive:
                             flags = remod.I
                         regex = remod.compile(pattern, flags)
                     except remod.error as e:
                         raise error.ParseError(_(b'invalid regular expression: %s') % e)
-                    return b're', pattern, regex.search
+                    return kind, pattern, regex.search
-                elif pattern.startswith(b'literal:'):
+                elif kind == b'literal':
-                    pattern = pattern[8:]
+                    if casesensitive:
+                        match = pattern.__eq__
+                    else:
+                        ipat = encoding.lower(pattern)
+                        match = lambda s: ipat == encoding.lower(s)
+                    return kind, pattern, match
-                match = pattern.__eq__
+                raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
-                if not casesensitive:
-                    ipat = encoding.lower(pattern)
-                    match = lambda s: ipat == encoding.lower(s)
-                return b'literal', pattern, match
             def shortuser(user):
                 """Return a short representation of a user name or email address."""
                 f = user.find(b'@')
                 if f >= 0:
                     user = user[:f]
                 f = user.find(b'<')
                 if f >= 0:
                     user = user[f + 1 :]
                 f = user.find(b' ')
                 if f >= 0:
                     user = user[:f]
                 f = user.find(b'.')
                 if f >= 0:
                     user = user[:f]
                 return user
             def emailuser(user):
                 """Return the user portion of an email address."""
                 f = user.find(b'@')
                 if f >= 0:
                     user = user[:f]
                 f = user.find(b'<')
                 if f >= 0:
                     user = user[f + 1 :]
                 return user
             def email(author):
                 '''get email of author.'''
                 r = author.find(b'>')
                 if r == -1:
                     r = None
                 return author[author.find(b'<') + 1 : r]
             def person(author):
                 """Returns the name before an email address,
                 interpreting it as per RFC 5322
                 >>> person(b'foo@bar')
                 'foo'
                 >>> person(b'Foo Bar <foo@bar>')
                 'Foo Bar'
                 >>> person(b'"Foo Bar" <foo@bar>')
                 'Foo Bar'
                 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
                 'Foo "buz" Bar'
                 >>> # The following are invalid, but do exist in real-life
                 ...
                 >>> person(b'Foo "buz" Bar <foo@bar>')
                 'Foo "buz" Bar'
                 >>> person(b'"Foo Bar <foo@bar>')
                 'Foo Bar'
                 """
                 if b'@' not in author:
                     return author
                 f = author.find(b'<')
                 if f != -1:
                     return author[:f].strip(b' "').replace(b'\\"', b'"')
                 f = author.find(b'@')
                 return author[:f].replace(b'.', b' ')
             @attr.s(hash=True)
             class mailmapping(object):
                 '''Represents a username/email key or value in
                 a mailmap file'''
                 email = attr.ib()
                 name = attr.ib(default=None)
             def _ismailmaplineinvalid(names, emails):
                 '''Returns True if the parsed names and emails
                 in a mailmap entry are invalid.
                 >>> # No names or emails fails
                 >>> names, emails = [], []
                 >>> _ismailmaplineinvalid(names, emails)
                 True
                 >>> # Only one email fails
                 >>> emails = [b'email@email.com']
                 >>> _ismailmaplineinvalid(names, emails)
                 True
                 >>> # One email and one name passes
                 >>> names = [b'Test Name']
                 >>> _ismailmaplineinvalid(names, emails)
                 False
                 >>> # No names but two emails passes
                 >>> names = []
                 >>> emails = [b'proper@email.com', b'commit@email.com']
                 >>> _ismailmaplineinvalid(names, emails)
                 False
                 '''
                 return not emails or not names and len(emails) < 2
             def parsemailmap(mailmapcontent):
                 """Parses data in the .mailmap format
                 >>> mmdata = b"\\n".join([
                 ... b'# Comment',
                 ... b'Name <commit1@email.xx>',
                 ... b'<name@email.xx> <commit2@email.xx>',
                 ... b'Name <proper@email.xx> <commit3@email.xx>',
                 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
                 ... ])
                 >>> mm = parsemailmap(mmdata)
                 >>> for key in sorted(mm.keys()):
                 ...     print(key)
                 mailmapping(email='commit1@email.xx', name=None)
                 mailmapping(email='commit2@email.xx', name=None)
                 mailmapping(email='commit3@email.xx', name=None)
                 mailmapping(email='commit4@email.xx', name='Commit')
                 >>> for val in sorted(mm.values()):
                 ...     print(val)
                 mailmapping(email='commit1@email.xx', name='Name')
                 mailmapping(email='name@email.xx', name=None)
                 mailmapping(email='proper@email.xx', name='Name')
                 mailmapping(email='proper@email.xx', name='Name')
                 """
                 mailmap = {}
                 if mailmapcontent is None:
                     return mailmap
                 for line in mailmapcontent.splitlines():
                     # Don't bother checking the line if it is a comment or
                     # is an improperly formed author field
                     if line.lstrip().startswith(b'#'):
                         continue
                     # names, emails hold the parsed emails and names for each line
                     # name_builder holds the words in a persons name
                     names, emails = [], []
                     namebuilder = []
                     for element in line.split():
                         if element.startswith(b'#'):
                             # If we reach a comment in the mailmap file, move on
                             break
                         elif element.startswith(b'<') and element.endswith(b'>'):
                             # We have found an email.
                             # Parse it, and finalize any names from earlier
                             emails.append(element[1:-1])  # Slice off the "<>"
                             if namebuilder:
                                 names.append(b' '.join(namebuilder))
                                 namebuilder = []
                             # Break if we have found a second email, any other
                             # data does not fit the spec for .mailmap
                             if len(emails) > 1:
                                 break
                         else:
                             # We have found another word in the committers name
                             namebuilder.append(element)
                     # Check to see if we have parsed the line into a valid form
                     # We require at least one email, and either at least one
                     # name or a second email
                     if _ismailmaplineinvalid(names, emails):
                         continue
                     mailmapkey = mailmapping(
                         email=emails[-1], name=names[-1] if len(names) == 2 else None,
                     )
                     mailmap[mailmapkey] = mailmapping(
                         email=emails[0], name=names[0] if names else None,
                     )
                 return mailmap
             def mapname(mailmap, author):
                 """Returns the author field according to the mailmap cache, or
                 the original author field.
                 >>> mmdata = b"\\n".join([
                 ...     b'# Comment',
                 ...     b'Name <commit1@email.xx>',
                 ...     b'<name@email.xx> <commit2@email.xx>',
                 ...     b'Name <proper@email.xx> <commit3@email.xx>',
                 ...     b'Name <proper@email.xx> Commit <commit4@email.xx>',
                 ... ])
                 >>> m = parsemailmap(mmdata)
                 >>> mapname(m, b'Commit <commit1@email.xx>')
                 'Name <commit1@email.xx>'
                 >>> mapname(m, b'Name <commit2@email.xx>')
                 'Name <name@email.xx>'
                 >>> mapname(m, b'Commit <commit3@email.xx>')
                 'Name <proper@email.xx>'
                 >>> mapname(m, b'Commit <commit4@email.xx>')
                 'Name <proper@email.xx>'
                 >>> mapname(m, b'Unknown Name <unknown@email.com>')
                 'Unknown Name <unknown@email.com>'
                 """
                 # If the author field coming in isn't in the correct format,
                 # or the mailmap is empty just return the original author field
                 if not isauthorwellformed(author) or not mailmap:
                     return author
                 # Turn the user name into a mailmapping
                 commit = mailmapping(name=person(author), email=email(author))
                 try:
                     # Try and use both the commit email and name as the key
                     proper = mailmap[commit]
                 except KeyError:
                     # If the lookup fails, use just the email as the key instead
                     # We call this commit2 as not to erase original commit fields
                     commit2 = mailmapping(email=commit.email)
                     proper = mailmap.get(commit2, mailmapping(None, None))
                 # Return the author field with proper values filled in
                 return b'%s <%s>' % (
                     proper.name if proper.name else commit.name,
                     proper.email if proper.email else commit.email,
                 )
             _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
             def isauthorwellformed(author):
                 '''Return True if the author field is well formed
                 (ie "Contributor Name <contrib@email.dom>")
                 >>> isauthorwellformed(b'Good Author <good@author.com>')
                 True
                 >>> isauthorwellformed(b'Author <good@author.com>')
                 True
                 >>> isauthorwellformed(b'Bad Author')
                 False
                 >>> isauthorwellformed(b'Bad Author <author@author.com')
                 False
                 >>> isauthorwellformed(b'Bad Author author@author.com')
                 False
                 >>> isauthorwellformed(b'<author@author.com>')
                 False
                 >>> isauthorwellformed(b'Bad Author <author>')
                 False
                 '''
                 return _correctauthorformat.match(author) is not None
             def ellipsis(text, maxlength=400):
                 """Trim string to at most maxlength (default: 400) columns in display."""
                 return encoding.trim(text, maxlength, ellipsis=b'...')
             def escapestr(s):
                 if isinstance(s, memoryview):
                     s = bytes(s)
                 # call underlying function of s.encode('string_escape') directly for
                 # Python 3 compatibility
                 return codecs.escape_encode(s)[0]
             def unescapestr(s):
                 return codecs.escape_decode(s)[0]
             def forcebytestr(obj):
                 """Portably format an arbitrary object (e.g. exception) into a byte
                 string."""
                 try:
                     return pycompat.bytestr(obj)
                 except UnicodeEncodeError:
                     # non-ascii string, may be lossy
                     return pycompat.bytestr(encoding.strtolocal(str(obj)))
             def uirepr(s):
                 # Avoid double backslash in Windows path repr()
                 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
             # delay import of textwrap
             def _MBTextWrapper(**kwargs):
                 class tw(textwrap.TextWrapper):
                     """
                     Extend TextWrapper for width-awareness.
                     Neither number of 'bytes' in any encoding nor 'characters' is
                     appropriate to calculate terminal columns for specified string.
                     Original TextWrapper implementation uses built-in 'len()' directly,
                     so overriding is needed to use width information of each characters.
                     In addition, characters classified into 'ambiguous' width are
                     treated as wide in East Asian area, but as narrow in other.
                     This requires use decision to determine width of such characters.
                     """
                     def _cutdown(self, ucstr, space_left):
                         l = 0
                         colwidth = encoding.ucolwidth
                         for i in pycompat.xrange(len(ucstr)):
                             l += colwidth(ucstr[i])
                             if space_left < l:
                                 return (ucstr[:i], ucstr[i:])
                         return ucstr, b''
                     # overriding of base class
                     def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
                         space_left = max(width - cur_len, 1)
                         if self.break_long_words:
                             cut, res = self._cutdown(reversed_chunks[-1], space_left)
                             cur_line.append(cut)
                             reversed_chunks[-1] = res
                         elif not cur_line:
                             cur_line.append(reversed_chunks.pop())
                     # this overriding code is imported from TextWrapper of Python 2.6
                     # to calculate columns of string by 'encoding.ucolwidth()'
                     def _wrap_chunks(self, chunks):
                         colwidth = encoding.ucolwidth
                         lines = []
                         if self.width <= 0:
                             raise ValueError(b"invalid width %r (must be > 0)" % self.width)
                         # Arrange in reverse order so items can be efficiently popped
                         # from a stack of chucks.
                         chunks.reverse()
                         while chunks:
                             # Start the list of chunks that will make up the current line.
                             # cur_len is just the length of all the chunks in cur_line.
                             cur_line = []
                             cur_len = 0
                             # Figure out which static string will prefix this line.
                             if lines:
                                 indent = self.subsequent_indent
                             else:
                                 indent = self.initial_indent
                             # Maximum width for this line.
                             width = self.width - len(indent)
                             # First chunk on line is whitespace -- drop it, unless this
                             # is the very beginning of the text (i.e. no lines started yet).
                             if self.drop_whitespace and chunks[-1].strip() == '' and lines:
                                 del chunks[-1]
                             while chunks:
                                 l = colwidth(chunks[-1])
                                 # Can at least squeeze this chunk onto the current line.
                                 if cur_len + l <= width:
                                     cur_line.append(chunks.pop())
                                     cur_len += l
                                 # Nope, this line is full.
                                 else:
                                     break
                             # The current line is full, and the next chunk is too big to
                             # fit on *any* line (not just this one).
                             if chunks and colwidth(chunks[-1]) > width:
                                 self._handle_long_word(chunks, cur_line, cur_len, width)
                             # If the last chunk on this line is all whitespace, drop it.
                             if (
                                 self.drop_whitespace
                                 and cur_line
                                 and cur_line[-1].strip() == r''
                             ):
                                 del cur_line[-1]
                             # Convert current line back to a string and store it in list
                             # of all lines (return value).
                             if cur_line:
                                 lines.append(indent + ''.join(cur_line))
                         return lines
                 global _MBTextWrapper
                 _MBTextWrapper = tw
                 return tw(**kwargs)
             def wrap(line, width, initindent=b'', hangindent=b''):
                 maxindent = max(len(hangindent), len(initindent))
                 if width <= maxindent:
                     # adjust for weird terminal size
                     width = max(78, maxindent + 1)
                 line = line.decode(
                     pycompat.sysstr(encoding.encoding),
                     pycompat.sysstr(encoding.encodingmode),
                 )
                 initindent = initindent.decode(
                     pycompat.sysstr(encoding.encoding),
                     pycompat.sysstr(encoding.encodingmode),
                 )
                 hangindent = hangindent.decode(
                     pycompat.sysstr(encoding.encoding),
                     pycompat.sysstr(encoding.encodingmode),
                 )
                 wrapper = _MBTextWrapper(
                     width=width, initial_indent=initindent, subsequent_indent=hangindent
                 )
                 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
             _booleans = {
                 b'1': True,
                 b'yes': True,
                 b'true': True,
                 b'on': True,
                 b'always': True,
                 b'0': False,
                 b'no': False,
                 b'false': False,
                 b'off': False,
                 b'never': False,
             }
             def parsebool(s):
                 """Parse s into a boolean.
                 If s is not a valid boolean, returns None.
                 """
                 return _booleans.get(s.lower(), None)
             def evalpythonliteral(s):
                 """Evaluate a string containing a Python literal expression"""
                 # We could backport our tokenizer hack to rewrite '' to u'' if we want
                 if pycompat.ispy3:
                     return ast.literal_eval(s.decode('latin1'))
                 return ast.literal_eval(s)