upstream/mercurial-mirror Commit - r50470:bbbb5213

1

# stringutil.py - utility for generic string formatting, parsing, etc.

1

# stringutil.py - utility for generic string formatting, parsing, etc.

2

#

2

#

3

4

5

6

#

6

#

7

# This software may be used and distributed according to the terms of the

7

# This software may be used and distributed according to the terms of the

8

# GNU General Public License version 2 or any later version.

8

# GNU General Public License version 2 or any later version.

9

10

11

import ast

11

import ast

12

import codecs

12

import codecs

13

import re as remod

13

import re as remod

14

import textwrap

14

import textwrap

15

import types

15

import types

16

17

from typing import (

18

Optional,

19

overload,

20

)

21

17

from ..i18n import _

22

from ..i18n import _

18

from ..thirdparty import attr

23

from ..thirdparty import attr

19

24

20

from .. import (

25

from .. import (

21

encoding,

26

encoding,

22

error,

27

error,

23

pycompat,

28

pycompat,

24

)

29

)

25

30

26

# regex special chars pulled from https://bugs.python.org/issue29995

31

# regex special chars pulled from https://bugs.python.org/issue29995

27

# which was part of Python 3.7.

32

# which was part of Python 3.7.

28

_respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')

33

_respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')

29

_regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}

34

_regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}

30

regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}

35

regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}

31

36

32

37

38

@overload

39

def reescape(pat: bytes) -> bytes:

40

...

41

42

43

@overload

44

def reescape(pat: str) -> str:

45

...

46

47

33

def reescape(pat):

48

def reescape(pat):

34

"""Drop-in replacement for re.escape."""

49

"""Drop-in replacement for re.escape."""

35

# NOTE: it is intentional that this works on unicodes and not

50

# NOTE: it is intentional that this works on unicodes and not

36

# bytes, as it's only possible to do the escaping with

51

# bytes, as it's only possible to do the escaping with

37

# unicode.translate, not bytes.translate. Sigh.

52

# unicode.translate, not bytes.translate. Sigh.

38

wantuni = True

53

wantuni = True

39

if isinstance(pat, bytes):

54

if isinstance(pat, bytes):

40

wantuni = False

55

wantuni = False

41

pat = pat.decode('latin1')

56

pat = pat.decode('latin1')

42

pat = pat.translate(_regexescapemap)

57

pat = pat.translate(_regexescapemap)

43

if wantuni:

58

if wantuni:

44

return pat

59

return pat

45

return pat.encode('latin1')

60

return pat.encode('latin1')

46

61

47

62

48

def pprint(o, bprefix=False, indent=0, level=0):

63

def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:

49

"""Pretty print an object."""

64

"""Pretty print an object."""

50

return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))

65

return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))

51

66

52

67

53

def pprintgen(o, bprefix=False, indent=0, level=0):

68

def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):

54

"""Pretty print an object to a generator of atoms.

69

"""Pretty print an object to a generator of atoms.

55

70

56

``bprefix`` is a flag influencing whether bytestrings are preferred with

71

``bprefix`` is a flag influencing whether bytestrings are preferred with

57

a ``b''`` prefix.

72

a ``b''`` prefix.

58

73

59

``indent`` controls whether collections and nested data structures

74

``indent`` controls whether collections and nested data structures

60

span multiple lines via the indentation amount in spaces. By default,

75

span multiple lines via the indentation amount in spaces. By default,

61

no newlines are emitted.

76

no newlines are emitted.

62

77

63

``level`` specifies the initial indent level. Used if ``indent > 0``.

78

``level`` specifies the initial indent level. Used if ``indent > 0``.

64

"""

79

"""

65

80

66

if isinstance(o, bytes):

81

if isinstance(o, bytes):

67

if bprefix:

82

if bprefix:

68

yield b"b'%s'" % escapestr(o)

83

yield b"b'%s'" % escapestr(o)

69

else:

84

else:

70

yield b"'%s'" % escapestr(o)

85

yield b"'%s'" % escapestr(o)

71

elif isinstance(o, bytearray):

86

elif isinstance(o, bytearray):

72

# codecs.escape_encode() can't handle bytearray, so escapestr fails

87

# codecs.escape_encode() can't handle bytearray, so escapestr fails

73

# without coercion.

88

# without coercion.

74

yield b"bytearray['%s']" % escapestr(bytes(o))

89

yield b"bytearray['%s']" % escapestr(bytes(o))

75

elif isinstance(o, list):

90

elif isinstance(o, list):

76

if not o:

91

if not o:

77

yield b'[]'

92

yield b'[]'

78

return

93

return

79

94

80

yield b'['

95

yield b'['

81

96

82

if indent:

97

if indent:

83

level += 1

98

level += 1

84

yield b'\n'

99

yield b'\n'

85

yield b' ' * (level * indent)

100

yield b' ' * (level * indent)

86

101

87

for i, a in enumerate(o):

102

for i, a in enumerate(o):

88

for chunk in pprintgen(

103

for chunk in pprintgen(

89

a, bprefix=bprefix, indent=indent, level=level

104

a, bprefix=bprefix, indent=indent, level=level

90

):

105

):

91

yield chunk

106

yield chunk

92

107

93

if i + 1 < len(o):

108

if i + 1 < len(o):

94

if indent:

109

if indent:

95

yield b',\n'

110

yield b',\n'

96

yield b' ' * (level * indent)

111

yield b' ' * (level * indent)

97

else:

112

else:

98

yield b', '

113

yield b', '

99

114

100

if indent:

115

if indent:

101

level -= 1

116

level -= 1

102

yield b'\n'

117

yield b'\n'

103

yield b' ' * (level * indent)

118

yield b' ' * (level * indent)

104

119

105

yield b']'

120

yield b']'

106

elif isinstance(o, dict):

121

elif isinstance(o, dict):

107

if not o:

122

if not o:

108

yield b'{}'

123

yield b'{}'

109

return

124

return

110

125

111

yield b'{'

126

yield b'{'

112

127

113

if indent:

128

if indent:

114

level += 1

129

level += 1

115

yield b'\n'

130

yield b'\n'

116

yield b' ' * (level * indent)

131

yield b' ' * (level * indent)

117

132

118

for i, (k, v) in enumerate(sorted(o.items())):

133

for i, (k, v) in enumerate(sorted(o.items())):

119

for chunk in pprintgen(

134

for chunk in pprintgen(

120

k, bprefix=bprefix, indent=indent, level=level

135

k, bprefix=bprefix, indent=indent, level=level

121

):

136

):

122

yield chunk

137

yield chunk

123

138

124

yield b': '

139

yield b': '

125

140

126

for chunk in pprintgen(

141

for chunk in pprintgen(

127

v, bprefix=bprefix, indent=indent, level=level

142

v, bprefix=bprefix, indent=indent, level=level

128

):

143

):

129

yield chunk

144

yield chunk

130

145

131

if i + 1 < len(o):

146

if i + 1 < len(o):

132

if indent:

147

if indent:

133

yield b',\n'

148

yield b',\n'

134

yield b' ' * (level * indent)

149

yield b' ' * (level * indent)

135

else:

150

else:

136

yield b', '

151

yield b', '

137

152

138

if indent:

153

if indent:

139

level -= 1

154

level -= 1

140

yield b'\n'

155

yield b'\n'

141

yield b' ' * (level * indent)

156

yield b' ' * (level * indent)

142

157

143

yield b'}'

158

yield b'}'

144

elif isinstance(o, set):

159

elif isinstance(o, set):

145

if not o:

160

if not o:

146

yield b'set([])'

161

yield b'set([])'

147

return

162

return

148

163

149

yield b'set(['

164

yield b'set(['

150

165

151

if indent:

166

if indent:

152

level += 1

167

level += 1

153

yield b'\n'

168

yield b'\n'

154

yield b' ' * (level * indent)

169

yield b' ' * (level * indent)

155

170

156

for i, k in enumerate(sorted(o)):

171

for i, k in enumerate(sorted(o)):

157

for chunk in pprintgen(

172

for chunk in pprintgen(

158

k, bprefix=bprefix, indent=indent, level=level

173

k, bprefix=bprefix, indent=indent, level=level

159

):

174

):

160

yield chunk

175

yield chunk

161

176

162

if i + 1 < len(o):

177

if i + 1 < len(o):

163

if indent:

178

if indent:

164

yield b',\n'

179

yield b',\n'

165

yield b' ' * (level * indent)

180

yield b' ' * (level * indent)

166

else:

181

else:

167

yield b', '

182

yield b', '

168

183

169

if indent:

184

if indent:

170

level -= 1

185

level -= 1

171

yield b'\n'

186

yield b'\n'

172

yield b' ' * (level * indent)

187

yield b' ' * (level * indent)

173

188

174

yield b'])'

189

yield b'])'

175

elif isinstance(o, tuple):

190

elif isinstance(o, tuple):

176

if not o:

191

if not o:

177

yield b'()'

192

yield b'()'

178

return

193

return

179

194

180

yield b'('

195

yield b'('

181

196

182

if indent:

197

if indent:

183

level += 1

198

level += 1

184

yield b'\n'

199

yield b'\n'

185

yield b' ' * (level * indent)

200

yield b' ' * (level * indent)

186

201

187

for i, a in enumerate(o):

202

for i, a in enumerate(o):

188

for chunk in pprintgen(

203

for chunk in pprintgen(

189

a, bprefix=bprefix, indent=indent, level=level

204

a, bprefix=bprefix, indent=indent, level=level

190

):

205

):

191

yield chunk

206

yield chunk

192

207

193

if i + 1 < len(o):

208

if i + 1 < len(o):

194

if indent:

209

if indent:

195

yield b',\n'

210

yield b',\n'

196

yield b' ' * (level * indent)

211

yield b' ' * (level * indent)

197

else:

212

else:

198

yield b', '

213

yield b', '

199

214

200

if indent:

215

if indent:

201

level -= 1

216

level -= 1

202

yield b'\n'

217

yield b'\n'

203

yield b' ' * (level * indent)

218

yield b' ' * (level * indent)

204

219

205

yield b')'

220

yield b')'

206

elif isinstance(o, types.GeneratorType):

221

elif isinstance(o, types.GeneratorType):

207

# Special case of empty generator.

222

# Special case of empty generator.

208

try:

223

try:

209

nextitem = next(o)

224

nextitem = next(o)

210

except StopIteration:

225

except StopIteration:

211

yield b'gen[]'

226

yield b'gen[]'

212

return

227

return

213

228

214

yield b'gen['

229

yield b'gen['

215

230

216

if indent:

231

if indent:

217

level += 1

232

level += 1

218

yield b'\n'

233

yield b'\n'

219

yield b' ' * (level * indent)

234

yield b' ' * (level * indent)

220

235

221

last = False

236

last = False

222

237

223

while not last:

238

while not last:

224

current = nextitem

239

current = nextitem

225

240

226

try:

241

try:

227

nextitem = next(o)

242

nextitem = next(o)

228

except StopIteration:

243

except StopIteration:

229

last = True

244

last = True

230

245

231

for chunk in pprintgen(

246

for chunk in pprintgen(

232

current, bprefix=bprefix, indent=indent, level=level

247

current, bprefix=bprefix, indent=indent, level=level

233

):

248

):

234

yield chunk

249

yield chunk

235

250

236

if not last:

251

if not last:

237

if indent:

252

if indent:

238

yield b',\n'

253

yield b',\n'

239

yield b' ' * (level * indent)

254

yield b' ' * (level * indent)

240

else:

255

else:

241

yield b', '

256

yield b', '

242

257

243

if indent:

258

if indent:

244

level -= 1

259

level -= 1

245

yield b'\n'

260

yield b'\n'

246

yield b' ' * (level * indent)

261

yield b' ' * (level * indent)

247

262

248

yield b']'

263

yield b']'

249

else:

264

else:

250

yield pycompat.byterepr(o)

265

yield pycompat.byterepr(o)

251

266

252

267

253

def prettyrepr(o):

268

def prettyrepr(o) -> bytes:

254

"""Pretty print a representation of a possibly-nested object"""

269

"""Pretty print a representation of a possibly-nested object"""

255

lines = []

270

lines = []

256

rs = pycompat.byterepr(o)

271

rs = pycompat.byterepr(o)

257

p0 = p1 = 0

272

p0 = p1 = 0

258

while p0 < len(rs):

273

while p0 < len(rs):

259

# '... field=<type ... field=<type ...'

274

# '... field=<type ... field=<type ...'

260

# ~~~~~~~~~~~~~~~~

275

# ~~~~~~~~~~~~~~~~

261

# p0 p1 q0 q1

276

# p0 p1 q0 q1

262

q0 = -1

277

q0 = -1

263

q1 = rs.find(b'<', p1 + 1)

278

q1 = rs.find(b'<', p1 + 1)

264

if q1 < 0:

279

if q1 < 0:

265

q1 = len(rs)

280

q1 = len(rs)

266

# pytype: disable=wrong-arg-count

281

# pytype: disable=wrong-arg-count

267

# TODO: figure out why pytype doesn't recognize the optional start

282

# TODO: figure out why pytype doesn't recognize the optional start

268

# arg

283

# arg

269

elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):

284

elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):

270

# pytype: enable=wrong-arg-count

285

# pytype: enable=wrong-arg-count

271

# backtrack for ' field=<'

286

# backtrack for ' field=<'

272

q0 = rs.rfind(b' ', p1 + 1, q1 - 1)

287

q0 = rs.rfind(b' ', p1 + 1, q1 - 1)

273

if q0 < 0:

288

if q0 < 0:

274

q0 = q1

289

q0 = q1

275

else:

290

else:

276

q0 += 1 # skip ' '

291

q0 += 1 # skip ' '

277

l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)

292

l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)

278

assert l >= 0

293

assert l >= 0

279

lines.append((l, rs[p0:q0].rstrip()))

294

lines.append((l, rs[p0:q0].rstrip()))

280

p0, p1 = q0, q1

295

p0, p1 = q0, q1

281

return b'\n'.join(b' ' * l + s for l, s in lines)

296

return b'\n'.join(b' ' * l + s for l, s in lines)

282

297

283

298

284

def buildrepr(r):

299

def buildrepr(r) -> bytes:

285

"""Format an optional printable representation from unexpanded bits

300

"""Format an optional printable representation from unexpanded bits

286

301

287

======== =================================

302

======== =================================

288

type(r) example

303

type(r) example

289

======== =================================

304

======== =================================

290

tuple ('<not %r>', other)

305

tuple ('<not %r>', other)

291

bytes '<branch closed>'

306

bytes '<branch closed>'

292

callable lambda: '<branch %r>' % sorted(b)

307

callable lambda: '<branch %r>' % sorted(b)

293

object other

308

object other

294

======== =================================

309

======== =================================

295

"""

310

"""

296

if r is None:

311

if r is None:

297

return b''

312

return b''

298

elif isinstance(r, tuple):

313

elif isinstance(r, tuple):

299

return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])

314

return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])

300

elif isinstance(r, bytes):

315

elif isinstance(r, bytes):

301

return r

316

return r

302

elif callable(r):

317

elif callable(r):

303

return r()

318

return r()

304

else:

319

else:

305

return pprint(r)

320

return pprint(r)

306

321

307

322

308

def binary(s):

323

def binary(s: bytes) -> bool:

309

"""return true if a string is binary data"""

324

"""return true if a string is binary data"""

310

return bool(s and b'\0' in s)

325

return bool(s and b'\0' in s)

311

326

312

327

313

def _splitpattern(pattern):

328

def _splitpattern(pattern: bytes):

314

if pattern.startswith(b're:'):

329

if pattern.startswith(b're:'):

315

return b're', pattern[3:]

330

return b're', pattern[3:]

316

elif pattern.startswith(b'literal:'):

331

elif pattern.startswith(b'literal:'):

317

return b'literal', pattern[8:]

332

return b'literal', pattern[8:]

318

return b'literal', pattern

333

return b'literal', pattern

319

334

320

335

321

def stringmatcher(pattern, casesensitive=True):

336

def stringmatcher(pattern: bytes, casesensitive: bool = True):

322

"""

337

"""

323

accepts a string, possibly starting with 're:' or 'literal:' prefix.

338

accepts a string, possibly starting with 're:' or 'literal:' prefix.

324

returns the matcher name, pattern, and matcher function.

339

returns the matcher name, pattern, and matcher function.

325

missing or unknown prefixes are treated as literal matches.

340

missing or unknown prefixes are treated as literal matches.

326

341

327

helper for tests:

342

helper for tests:

328

>>> def test(pattern, *tests):

343

>>> def test(pattern, *tests):

329

... kind, pattern, matcher = stringmatcher(pattern)

344

... kind, pattern, matcher = stringmatcher(pattern)

330

... return (kind, pattern, [bool(matcher(t)) for t in tests])

345

... return (kind, pattern, [bool(matcher(t)) for t in tests])

331

>>> def itest(pattern, *tests):

346

>>> def itest(pattern, *tests):

332

... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)

347

... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)

333

... return (kind, pattern, [bool(matcher(t)) for t in tests])

348

... return (kind, pattern, [bool(matcher(t)) for t in tests])

334

349

335

exact matching (no prefix):

350

exact matching (no prefix):

336

>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')

351

>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')

337

('literal', 'abcdefg', [False, False, True])

352

('literal', 'abcdefg', [False, False, True])

338

353

339

regex matching ('re:' prefix)

354

regex matching ('re:' prefix)

340

>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

355

>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

341

('re', 'a.+b', [False, False, True])

356

('re', 'a.+b', [False, False, True])

342

357

343

force exact matches ('literal:' prefix)

358

force exact matches ('literal:' prefix)

344

>>> test(b'literal:re:foobar', b'foobar', b're:foobar')

359

>>> test(b'literal:re:foobar', b'foobar', b're:foobar')

345

('literal', 're:foobar', [False, True])

360

('literal', 're:foobar', [False, True])

346

361

347

unknown prefixes are ignored and treated as literals

362

unknown prefixes are ignored and treated as literals

348

>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')

363

>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')

349

('literal', 'foo:bar', [False, False, True])

364

('literal', 'foo:bar', [False, False, True])

350

365

351

case insensitive regex matches

366

case insensitive regex matches

352

>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

367

>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

353

('re', 'A.+b', [False, False, True])

368

('re', 'A.+b', [False, False, True])

354

369

355

case insensitive literal matches

370

case insensitive literal matches

356

>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')

371

>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')

357

('literal', 'ABCDEFG', [False, False, True])

372

('literal', 'ABCDEFG', [False, False, True])

358

"""

373

"""

359

kind, pattern = _splitpattern(pattern)

374

kind, pattern = _splitpattern(pattern)

360

if kind == b're':

375

if kind == b're':

361

try:

376

try:

362

flags = 0

377

flags = 0

363

if not casesensitive:

378

if not casesensitive:

364

flags = remod.I

379

flags = remod.I

365

regex = remod.compile(pattern, flags)

380

regex = remod.compile(pattern, flags)

366

except remod.error as e:

381

except remod.error as e:

367

raise error.ParseError(

382

raise error.ParseError(

368

_(b'invalid regular expression: %s') % forcebytestr(e)

383

_(b'invalid regular expression: %s') % forcebytestr(e)

369

)

384

)

370

return kind, pattern, regex.search

385

return kind, pattern, regex.search

371

elif kind == b'literal':

386

elif kind == b'literal':

372

if casesensitive:

387

if casesensitive:

373

match = pattern.__eq__

388

match = pattern.__eq__

374

else:

389

else:

375

ipat = encoding.lower(pattern)

390

ipat = encoding.lower(pattern)

376

match = lambda s: ipat == encoding.lower(s)

391

match = lambda s: ipat == encoding.lower(s)

377

return kind, pattern, match

392

return kind, pattern, match

378

393

379

raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)

394

raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)

380

395

381

396

382

def substringregexp(pattern, flags=0):

397

def substringregexp(pattern: bytes, flags: int = 0):

383

"""Build a regexp object from a string pattern possibly starting with

398

"""Build a regexp object from a string pattern possibly starting with

384

're:' or 'literal:' prefix.

399

're:' or 'literal:' prefix.

385

400

386

helper for tests:

401

helper for tests:

387

>>> def test(pattern, *tests):

402

>>> def test(pattern, *tests):

388

... regexp = substringregexp(pattern)

403

... regexp = substringregexp(pattern)

389

... return [bool(regexp.search(t)) for t in tests]

404

... return [bool(regexp.search(t)) for t in tests]

390

>>> def itest(pattern, *tests):

405

>>> def itest(pattern, *tests):

391

... regexp = substringregexp(pattern, remod.I)

406

... regexp = substringregexp(pattern, remod.I)

392

... return [bool(regexp.search(t)) for t in tests]

407

... return [bool(regexp.search(t)) for t in tests]

393

408

394

substring matching (no prefix):

409

substring matching (no prefix):

395

>>> test(b'bcde', b'abc', b'def', b'abcdefg')

410

>>> test(b'bcde', b'abc', b'def', b'abcdefg')

396

[False, False, True]

411

[False, False, True]

397

412

398

substring pattern should be escaped:

413

substring pattern should be escaped:

399

>>> substringregexp(b'.bc').pattern

414

>>> substringregexp(b'.bc').pattern

400

'\\\\.bc'

415

'\\\\.bc'

401

>>> test(b'.bc', b'abc', b'def', b'abcdefg')

416

>>> test(b'.bc', b'abc', b'def', b'abcdefg')

402

[False, False, False]

417

[False, False, False]

403

418

404

regex matching ('re:' prefix)

419

regex matching ('re:' prefix)

405

>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

420

>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

406

[False, False, True]

421

[False, False, True]

407

422

408

force substring matches ('literal:' prefix)

423

force substring matches ('literal:' prefix)

409

>>> test(b'literal:re:foobar', b'foobar', b're:foobar')

424

>>> test(b'literal:re:foobar', b'foobar', b're:foobar')

410

[False, True]

425

[False, True]

411

426

412

case insensitive literal matches

427

case insensitive literal matches

413

>>> itest(b'BCDE', b'abc', b'def', b'abcdefg')

428

>>> itest(b'BCDE', b'abc', b'def', b'abcdefg')

414

[False, False, True]

429

[False, False, True]

415

430

416

case insensitive regex matches

431

case insensitive regex matches

417

>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

432

>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

418

[False, False, True]

433

[False, False, True]

419

"""

434

"""

420

kind, pattern = _splitpattern(pattern)

435

kind, pattern = _splitpattern(pattern)

421

if kind == b're':

436

if kind == b're':

422

try:

437

try:

423

return remod.compile(pattern, flags)

438

return remod.compile(pattern, flags)

424

except remod.error as e:

439

except remod.error as e:

425

raise error.ParseError(

440

raise error.ParseError(

426

_(b'invalid regular expression: %s') % forcebytestr(e)

441

_(b'invalid regular expression: %s') % forcebytestr(e)

427

)

442

)

428

elif kind == b'literal':

443

elif kind == b'literal':

429

return remod.compile(remod.escape(pattern), flags)

444

return remod.compile(remod.escape(pattern), flags)

430

445

431

raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)

446

raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)

432

447

433

448

434

def shortuser(user):

449

def shortuser(user: bytes) -> bytes:

435

"""Return a short representation of a user name or email address."""

450

"""Return a short representation of a user name or email address."""

436

f = user.find(b'@')

451

f = user.find(b'@')

437

if f >= 0:

452

if f >= 0:

438

user = user[:f]

453

user = user[:f]

439

f = user.find(b'<')

454

f = user.find(b'<')

440

if f >= 0:

455

if f >= 0:

441

user = user[f + 1 :]

456

user = user[f + 1 :]

442

f = user.find(b' ')

457

f = user.find(b' ')

443

if f >= 0:

458

if f >= 0:

444

user = user[:f]

459

user = user[:f]

445

f = user.find(b'.')

460

f = user.find(b'.')

446

if f >= 0:

461

if f >= 0:

447

user = user[:f]

462

user = user[:f]

448

return user

463

return user

449

464

450

465

451

def emailuser(user):

466

def emailuser(user: bytes) -> bytes:

452

"""Return the user portion of an email address."""

467

"""Return the user portion of an email address."""

453

f = user.find(b'@')

468

f = user.find(b'@')

454

if f >= 0:

469

if f >= 0:

455

user = user[:f]

470

user = user[:f]

456

f = user.find(b'<')

471

f = user.find(b'<')

457

if f >= 0:

472

if f >= 0:

458

user = user[f + 1 :]

473

user = user[f + 1 :]

459

return user

474

return user

460

475

461

476

462

def email(author):

477

def email(author: bytes) -> bytes:

463

'''get email of author.'''

478

'''get email of author.'''

464

r = author.find(b'>')

479

r = author.find(b'>')

465

if r == -1:

480

if r == -1:

466

r = None

481

r = None

467

return author[author.find(b'<') + 1 : r]

482

return author[author.find(b'<') + 1 : r]

468

483

469

484

470

def person(author):

485

def person(author: bytes) -> bytes:

471

"""Returns the name before an email address,

486

"""Returns the name before an email address,

472

interpreting it as per RFC 5322

487

interpreting it as per RFC 5322

473

488

474

>>> person(b'foo@bar')

489

>>> person(b'foo@bar')

475

'foo'

490

'foo'

476

>>> person(b'Foo Bar <foo@bar>')

491

>>> person(b'Foo Bar <foo@bar>')

477

'Foo Bar'

492

'Foo Bar'

478

>>> person(b'"Foo Bar" <foo@bar>')

493

>>> person(b'"Foo Bar" <foo@bar>')

479

'Foo Bar'

494

'Foo Bar'

480

>>> person(b'"Foo \"buz\" Bar" <foo@bar>')

495

>>> person(b'"Foo \"buz\" Bar" <foo@bar>')

481

'Foo "buz" Bar'

496

'Foo "buz" Bar'

482

>>> # The following are invalid, but do exist in real-life

497

>>> # The following are invalid, but do exist in real-life

483

...

498

...

484

>>> person(b'Foo "buz" Bar <foo@bar>')

499

>>> person(b'Foo "buz" Bar <foo@bar>')

485

'Foo "buz" Bar'

500

'Foo "buz" Bar'

486

>>> person(b'"Foo Bar <foo@bar>')

501

>>> person(b'"Foo Bar <foo@bar>')

487

'Foo Bar'

502

'Foo Bar'

488

"""

503

"""

489

if b'@' not in author:

504

if b'@' not in author:

490

return author

505

return author

491

f = author.find(b'<')

506

f = author.find(b'<')

492

if f != -1:

507

if f != -1:

493

return author[:f].strip(b' "').replace(b'\\"', b'"')

508

return author[:f].strip(b' "').replace(b'\\"', b'"')

494

f = author.find(b'@')

509

f = author.find(b'@')

495

return author[:f].replace(b'.', b' ')

510

return author[:f].replace(b'.', b' ')

496

511

497

512

498

@attr.s(hash=True)

513

@attr.s(hash=True)

499

class mailmapping:

514

class mailmapping:

500

"""Represents a username/email key or value in

515

"""Represents a username/email key or value in

501

a mailmap file"""

516

a mailmap file"""

502

517

503

email = attr.ib()

518

email = attr.ib()

504

name = attr.ib(default=None)

519

name = attr.ib(default=None)

505

520

506

521

507

def _ismailmaplineinvalid(names, emails):

522

def _ismailmaplineinvalid(names, emails):

508

"""Returns True if the parsed names and emails

523

"""Returns True if the parsed names and emails

509

in a mailmap entry are invalid.

524

in a mailmap entry are invalid.

510

525

511

>>> # No names or emails fails

526

>>> # No names or emails fails

512

>>> names, emails = [], []

527

>>> names, emails = [], []

513

>>> _ismailmaplineinvalid(names, emails)

528

>>> _ismailmaplineinvalid(names, emails)

514

True

529

True

515

>>> # Only one email fails

530

>>> # Only one email fails

516

>>> emails = [b'email@email.com']

531

>>> emails = [b'email@email.com']

517

>>> _ismailmaplineinvalid(names, emails)

532

>>> _ismailmaplineinvalid(names, emails)

518

True

533

True

519

>>> # One email and one name passes

534

>>> # One email and one name passes

520

>>> names = [b'Test Name']

535

>>> names = [b'Test Name']

521

>>> _ismailmaplineinvalid(names, emails)

536

>>> _ismailmaplineinvalid(names, emails)

522

False

537

False

523

>>> # No names but two emails passes

538

>>> # No names but two emails passes

524

>>> names = []

539

>>> names = []

525

>>> emails = [b'proper@email.com', b'commit@email.com']

540

>>> emails = [b'proper@email.com', b'commit@email.com']

526

>>> _ismailmaplineinvalid(names, emails)

541

>>> _ismailmaplineinvalid(names, emails)

527

False

542

False

528

"""

543

"""

529

return not emails or not names and len(emails) < 2

544

return not emails or not names and len(emails) < 2

530

545

531

546

532

def parsemailmap(mailmapcontent):

547

def parsemailmap(mailmapcontent):

533

"""Parses data in the .mailmap format

548

"""Parses data in the .mailmap format

534

549

535

>>> mmdata = b"\\n".join([

550

>>> mmdata = b"\\n".join([

536

... b'# Comment',

551

... b'# Comment',

537

... b'Name <commit1@email.xx>',

552

... b'Name <commit1@email.xx>',

538

... b'<name@email.xx> <commit2@email.xx>',

553

... b'<name@email.xx> <commit2@email.xx>',

539

... b'Name <proper@email.xx> <commit3@email.xx>',

554

... b'Name <proper@email.xx> <commit3@email.xx>',

540

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

555

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

541

... ])

556

... ])

542

>>> mm = parsemailmap(mmdata)

557

>>> mm = parsemailmap(mmdata)

543

>>> for key in sorted(mm.keys()):

558

>>> for key in sorted(mm.keys()):

544

... print(key)

559

... print(key)

545

mailmapping(email='commit1@email.xx', name=None)

560

mailmapping(email='commit1@email.xx', name=None)

546

mailmapping(email='commit2@email.xx', name=None)

561

mailmapping(email='commit2@email.xx', name=None)

547

mailmapping(email='commit3@email.xx', name=None)

562

mailmapping(email='commit3@email.xx', name=None)

548

mailmapping(email='commit4@email.xx', name='Commit')

563

mailmapping(email='commit4@email.xx', name='Commit')

549

>>> for val in sorted(mm.values()):

564

>>> for val in sorted(mm.values()):

550

... print(val)

565

... print(val)

551

mailmapping(email='commit1@email.xx', name='Name')

566

mailmapping(email='commit1@email.xx', name='Name')

552

mailmapping(email='name@email.xx', name=None)

567

mailmapping(email='name@email.xx', name=None)

553

mailmapping(email='proper@email.xx', name='Name')

568

mailmapping(email='proper@email.xx', name='Name')

554

mailmapping(email='proper@email.xx', name='Name')

569

mailmapping(email='proper@email.xx', name='Name')

555

"""

570

"""

556

mailmap = {}

571

mailmap = {}

557

572

558

if mailmapcontent is None:

573

if mailmapcontent is None:

559

return mailmap

574

return mailmap

560

575

561

for line in mailmapcontent.splitlines():

576

for line in mailmapcontent.splitlines():

562

577

563

# Don't bother checking the line if it is a comment or

578

# Don't bother checking the line if it is a comment or

564

# is an improperly formed author field

579

# is an improperly formed author field

565

if line.lstrip().startswith(b'#'):

580

if line.lstrip().startswith(b'#'):

566

continue

581

continue

567

582

568

# names, emails hold the parsed emails and names for each line

583

# names, emails hold the parsed emails and names for each line

569

# name_builder holds the words in a persons name

584

# name_builder holds the words in a persons name

570

names, emails = [], []

585

names, emails = [], []

571

namebuilder = []

586

namebuilder = []

572

587

573

for element in line.split():

588

for element in line.split():

574

if element.startswith(b'#'):

589

if element.startswith(b'#'):

575

# If we reach a comment in the mailmap file, move on

590

# If we reach a comment in the mailmap file, move on

576

break

591

break

577

592

578

elif element.startswith(b'<') and element.endswith(b'>'):

593

elif element.startswith(b'<') and element.endswith(b'>'):

579

# We have found an email.

594

# We have found an email.

580

# Parse it, and finalize any names from earlier

595

# Parse it, and finalize any names from earlier

581

emails.append(element[1:-1]) # Slice off the "<>"

596

emails.append(element[1:-1]) # Slice off the "<>"

582

597

583

if namebuilder:

598

if namebuilder:

584

names.append(b' '.join(namebuilder))

599

names.append(b' '.join(namebuilder))

585

namebuilder = []

600

namebuilder = []

586

601

587

# Break if we have found a second email, any other

602

# Break if we have found a second email, any other

588

# data does not fit the spec for .mailmap

603

# data does not fit the spec for .mailmap

589

if len(emails) > 1:

604

if len(emails) > 1:

590

break

605

break

591

606

592

else:

607

else:

593

# We have found another word in the committers name

608

# We have found another word in the committers name

594

namebuilder.append(element)

609

namebuilder.append(element)

595

610

596

# Check to see if we have parsed the line into a valid form

611

# Check to see if we have parsed the line into a valid form

597

# We require at least one email, and either at least one

612

# We require at least one email, and either at least one

598

# name or a second email

613

# name or a second email

599

if _ismailmaplineinvalid(names, emails):

614

if _ismailmaplineinvalid(names, emails):

600

continue

615

continue

601

616

602

mailmapkey = mailmapping(

617

mailmapkey = mailmapping(

603

email=emails[-1],

618

email=emails[-1],

604

name=names[-1] if len(names) == 2 else None,

619

name=names[-1] if len(names) == 2 else None,

605

)

620

)

606

621

607

mailmap[mailmapkey] = mailmapping(

622

mailmap[mailmapkey] = mailmapping(

608

email=emails[0],

623

email=emails[0],

609

name=names[0] if names else None,

624

name=names[0] if names else None,

610

)

625

)

611

626

612

return mailmap

627

return mailmap

613

628

614

629

615

def mapname(mailmap, author):

630

def mapname(mailmap, author: bytes) -> bytes:

616

"""Returns the author field according to the mailmap cache, or

631

"""Returns the author field according to the mailmap cache, or

617

the original author field.

632

the original author field.

618

633

619

>>> mmdata = b"\\n".join([

634

>>> mmdata = b"\\n".join([

620

... b'# Comment',

635

... b'# Comment',

621

... b'Name <commit1@email.xx>',

636

... b'Name <commit1@email.xx>',

622

... b'<name@email.xx> <commit2@email.xx>',

637

... b'<name@email.xx> <commit2@email.xx>',

623

... b'Name <proper@email.xx> <commit3@email.xx>',

638

... b'Name <proper@email.xx> <commit3@email.xx>',

624

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

639

... b'Name <proper@email.xx> Commit <commit4@email.xx>',

625

... ])

640

... ])

626

>>> m = parsemailmap(mmdata)

641

>>> m = parsemailmap(mmdata)

627

>>> mapname(m, b'Commit <commit1@email.xx>')

642

>>> mapname(m, b'Commit <commit1@email.xx>')

628

'Name <commit1@email.xx>'

643

'Name <commit1@email.xx>'

629

>>> mapname(m, b'Name <commit2@email.xx>')

644

>>> mapname(m, b'Name <commit2@email.xx>')

630

'Name <name@email.xx>'

645

'Name <name@email.xx>'

631

>>> mapname(m, b'Commit <commit3@email.xx>')

646

>>> mapname(m, b'Commit <commit3@email.xx>')

632

'Name <proper@email.xx>'

647

'Name <proper@email.xx>'

633

>>> mapname(m, b'Commit <commit4@email.xx>')

648

>>> mapname(m, b'Commit <commit4@email.xx>')

634

'Name <proper@email.xx>'

649

'Name <proper@email.xx>'

635

>>> mapname(m, b'Unknown Name <unknown@email.com>')

650

>>> mapname(m, b'Unknown Name <unknown@email.com>')

636

'Unknown Name <unknown@email.com>'

651

'Unknown Name <unknown@email.com>'

637

"""

652

"""

638

# If the author field coming in isn't in the correct format,

653

# If the author field coming in isn't in the correct format,

639

# or the mailmap is empty just return the original author field

654

# or the mailmap is empty just return the original author field

640

if not isauthorwellformed(author) or not mailmap:

655

if not isauthorwellformed(author) or not mailmap:

641

return author

656

return author

642

657

643

# Turn the user name into a mailmapping

658

# Turn the user name into a mailmapping

644

commit = mailmapping(name=person(author), email=email(author))

659

commit = mailmapping(name=person(author), email=email(author))

645

660

646

try:

661

try:

647

# Try and use both the commit email and name as the key

662

# Try and use both the commit email and name as the key

648

proper = mailmap[commit]

663

proper = mailmap[commit]

649

664

650

except KeyError:

665

except KeyError:

651

# If the lookup fails, use just the email as the key instead

666

# If the lookup fails, use just the email as the key instead

652

# We call this commit2 as not to erase original commit fields

667

# We call this commit2 as not to erase original commit fields

653

commit2 = mailmapping(email=commit.email)

668

commit2 = mailmapping(email=commit.email)

654

proper = mailmap.get(commit2, mailmapping(None, None))

669

proper = mailmap.get(commit2, mailmapping(None, None))

655

670

656

# Return the author field with proper values filled in

671

# Return the author field with proper values filled in

657

return b'%s <%s>' % (

672

return b'%s <%s>' % (

658

proper.name if proper.name else commit.name,

673

proper.name if proper.name else commit.name,

659

proper.email if proper.email else commit.email,

674

proper.email if proper.email else commit.email,

660

)

675

)

661

676

662

677

663

_correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')

678

_correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')

664

679

665

680

666

def isauthorwellformed(author):

681

def isauthorwellformed(author: bytes) -> bool:

667

"""Return True if the author field is well formed

682

"""Return True if the author field is well formed

668

(ie "Contributor Name <contrib@email.dom>")

683

(ie "Contributor Name <contrib@email.dom>")

669

684

670

>>> isauthorwellformed(b'Good Author <good@author.com>')

685

>>> isauthorwellformed(b'Good Author <good@author.com>')

671

True

686

True

672

>>> isauthorwellformed(b'Author <good@author.com>')

687

>>> isauthorwellformed(b'Author <good@author.com>')

673

True

688

True

674

>>> isauthorwellformed(b'Bad Author')

689

>>> isauthorwellformed(b'Bad Author')

675

False

690

False

676

>>> isauthorwellformed(b'Bad Author <author@author.com')

691

>>> isauthorwellformed(b'Bad Author <author@author.com')

677

False

692

False

678

>>> isauthorwellformed(b'Bad Author author@author.com')

693

>>> isauthorwellformed(b'Bad Author author@author.com')

679

False

694

False

680

>>> isauthorwellformed(b'<author@author.com>')

695

>>> isauthorwellformed(b'<author@author.com>')

681

False

696

False

682

>>> isauthorwellformed(b'Bad Author <author>')

697

>>> isauthorwellformed(b'Bad Author <author>')

683

False

698

False

684

"""

699

"""

685

return _correctauthorformat.match(author) is not None

700

return _correctauthorformat.match(author) is not None

686

701

687

702

688

def firstline(text):

703

def firstline(text: bytes) -> bytes:

689

"""Return the first line of the input"""

704

"""Return the first line of the input"""

690

# Try to avoid running splitlines() on the whole string

705

# Try to avoid running splitlines() on the whole string

691

i = text.find(b'\n')

706

i = text.find(b'\n')

692

if i != -1:

707

if i != -1:

693

text = text[:i]

708

text = text[:i]

694

try:

709

try:

695

return text.splitlines()[0]

710

return text.splitlines()[0]

696

except IndexError:

711

except IndexError:

697

return b''

712

return b''

698

713

699

714

700

def ellipsis(text, maxlength=400):

715

def ellipsis(text: bytes, maxlength: int = 400) -> bytes:

701

"""Trim string to at most maxlength (default: 400) columns in display."""

716

"""Trim string to at most maxlength (default: 400) columns in display."""

702

return encoding.trim(text, maxlength, ellipsis=b'...')

717

return encoding.trim(text, maxlength, ellipsis=b'...')

703

718

704

719

705

def escapestr(s):

720

def escapestr(s: bytes) -> bytes:

721

# "bytes" is also a typing shortcut for bytes, bytearray, and memoryview

706

if isinstance(s, memoryview):

722

if isinstance(s, memoryview):

707

s = bytes(s)

723

s = bytes(s)

708

# call underlying function of s.encode('string_escape') directly for

724

# call underlying function of s.encode('string_escape') directly for

709

# Python 3 compatibility

725

# Python 3 compatibility

710

return codecs.escape_encode(s)[0] # pytype: disable=module-attr

726

return codecs.escape_encode(s)[0] # pytype: disable=module-attr

711

727

712

728

713

def unescapestr(s):

729

def unescapestr(s: bytes) -> bytes:

714

return codecs.escape_decode(s)[0] # pytype: disable=module-attr

730

return codecs.escape_decode(s)[0] # pytype: disable=module-attr

715

731

716

732

717

def forcebytestr(obj):

733

def forcebytestr(obj):

718

"""Portably format an arbitrary object (e.g. exception) into a byte

734

"""Portably format an arbitrary object (e.g. exception) into a byte

719

string."""

735

string."""

720

try:

736

try:

721

return pycompat.bytestr(obj)

737

return pycompat.bytestr(obj)

722

except UnicodeEncodeError:

738

except UnicodeEncodeError:

723

# non-ascii string, may be lossy

739

# non-ascii string, may be lossy

724

return pycompat.bytestr(encoding.strtolocal(str(obj)))

740

return pycompat.bytestr(encoding.strtolocal(str(obj)))

725

741

726

742

727

def uirepr(s):

743

def uirepr(s: bytes) -> bytes:

728

# Avoid double backslash in Windows path repr()

744

# Avoid double backslash in Windows path repr()

729

return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')

745

return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')

730

746

731

747

732

# delay import of textwrap

748

# delay import of textwrap

733

def _MBTextWrapper(**kwargs):

749

def _MBTextWrapper(**kwargs):

734

class tw(textwrap.TextWrapper):

750

class tw(textwrap.TextWrapper):

735

"""

751

"""

736

Extend TextWrapper for width-awareness.

752

Extend TextWrapper for width-awareness.

737

753

738

Neither number of 'bytes' in any encoding nor 'characters' is

754

Neither number of 'bytes' in any encoding nor 'characters' is

739

appropriate to calculate terminal columns for specified string.

755

appropriate to calculate terminal columns for specified string.

740

756

741

Original TextWrapper implementation uses built-in 'len()' directly,

757

Original TextWrapper implementation uses built-in 'len()' directly,

742

so overriding is needed to use width information of each characters.

758

so overriding is needed to use width information of each characters.

743

759

744

In addition, characters classified into 'ambiguous' width are

760

In addition, characters classified into 'ambiguous' width are

745

treated as wide in East Asian area, but as narrow in other.

761

treated as wide in East Asian area, but as narrow in other.

746

762

747

This requires use decision to determine width of such characters.

763

This requires use decision to determine width of such characters.

748

"""

764

"""

749

765

750

def _cutdown(self, ucstr, space_left):

766

def _cutdown(self, ucstr, space_left):

751

l = 0

767

l = 0

752

colwidth = encoding.ucolwidth

768

colwidth = encoding.ucolwidth

753

for i in range(len(ucstr)):

769

for i in range(len(ucstr)):

754

l += colwidth(ucstr[i])

770

l += colwidth(ucstr[i])

755

if space_left < l:

771

if space_left < l:

756

return (ucstr[:i], ucstr[i:])

772

return (ucstr[:i], ucstr[i:])

757

return ucstr, b''

773

return ucstr, b''

758

774

759

# overriding of base class

775

# overriding of base class

760

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):

776

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):

761

space_left = max(width - cur_len, 1)

777

space_left = max(width - cur_len, 1)

762

778

763

if self.break_long_words:

779

if self.break_long_words:

764

cut, res = self._cutdown(reversed_chunks[-1], space_left)

780

cut, res = self._cutdown(reversed_chunks[-1], space_left)

765

cur_line.append(cut)

781

cur_line.append(cut)

766

reversed_chunks[-1] = res

782

reversed_chunks[-1] = res

767

elif not cur_line:

783

elif not cur_line:

768

cur_line.append(reversed_chunks.pop())

784

cur_line.append(reversed_chunks.pop())

769

785

770

# this overriding code is imported from TextWrapper of Python 2.6

786

# this overriding code is imported from TextWrapper of Python 2.6

771

# to calculate columns of string by 'encoding.ucolwidth()'

787

# to calculate columns of string by 'encoding.ucolwidth()'

772

def _wrap_chunks(self, chunks):

788

def _wrap_chunks(self, chunks):

773

colwidth = encoding.ucolwidth

789

colwidth = encoding.ucolwidth

774

790

775

lines = []

791

lines = []

776

if self.width <= 0:

792

if self.width <= 0:

777

raise ValueError(b"invalid width %r (must be > 0)" % self.width)

793

raise ValueError(b"invalid width %r (must be > 0)" % self.width)

778

794

779

# Arrange in reverse order so items can be efficiently popped

795

# Arrange in reverse order so items can be efficiently popped

780

# from a stack of chucks.

796

# from a stack of chucks.

781

chunks.reverse()

797

chunks.reverse()

782

798

783

while chunks:

799

while chunks:

784

800

785

# Start the list of chunks that will make up the current line.

801

# Start the list of chunks that will make up the current line.

786

# cur_len is just the length of all the chunks in cur_line.

802

# cur_len is just the length of all the chunks in cur_line.

787

cur_line = []

803

cur_line = []

788

cur_len = 0

804

cur_len = 0

789

805

790

# Figure out which static string will prefix this line.

806

# Figure out which static string will prefix this line.

791

if lines:

807

if lines:

792

indent = self.subsequent_indent

808

indent = self.subsequent_indent

793

else:

809

else:

794

indent = self.initial_indent

810

indent = self.initial_indent

795

811

796

# Maximum width for this line.

812

# Maximum width for this line.

797

width = self.width - len(indent)

813

width = self.width - len(indent)

798

814

799

# First chunk on line is whitespace -- drop it, unless this

815

# First chunk on line is whitespace -- drop it, unless this

800

# is the very beginning of the text (i.e. no lines started yet).

816

# is the very beginning of the text (i.e. no lines started yet).

801

if self.drop_whitespace and chunks[-1].strip() == '' and lines:

817

if self.drop_whitespace and chunks[-1].strip() == '' and lines:

802

del chunks[-1]

818

del chunks[-1]

803

819

804

while chunks:

820

while chunks:

805

l = colwidth(chunks[-1])

821

l = colwidth(chunks[-1])

806

822

807

# Can at least squeeze this chunk onto the current line.

823

# Can at least squeeze this chunk onto the current line.

808

if cur_len + l <= width:

824

if cur_len + l <= width:

809

cur_line.append(chunks.pop())

825

cur_line.append(chunks.pop())

810

cur_len += l

826

cur_len += l

811

827

812

# Nope, this line is full.

828

# Nope, this line is full.

813

else:

829

else:

814

break

830

break

815

831

816

# The current line is full, and the next chunk is too big to

832

# The current line is full, and the next chunk is too big to

817

# fit on *any* line (not just this one).

833

# fit on *any* line (not just this one).

818

if chunks and colwidth(chunks[-1]) > width:

834

if chunks and colwidth(chunks[-1]) > width:

819

self._handle_long_word(chunks, cur_line, cur_len, width)

835

self._handle_long_word(chunks, cur_line, cur_len, width)

820

836

821

# If the last chunk on this line is all whitespace, drop it.

837

# If the last chunk on this line is all whitespace, drop it.

822

if (

838

if (

823

self.drop_whitespace

839

self.drop_whitespace

824

and cur_line

840

and cur_line

825

and cur_line[-1].strip() == r''

841

and cur_line[-1].strip() == r''

826

):

842

):

827

del cur_line[-1]

843

del cur_line[-1]

828

844

829

# Convert current line back to a string and store it in list

845

# Convert current line back to a string and store it in list

830

# of all lines (return value).

846

# of all lines (return value).

831

if cur_line:

847

if cur_line:

832

lines.append(indent + ''.join(cur_line))

848

lines.append(indent + ''.join(cur_line))

833

849

834

return lines

850

return lines

835

851

836

global _MBTextWrapper

852

global _MBTextWrapper

837

_MBTextWrapper = tw

853

_MBTextWrapper = tw

838

return tw(**kwargs)

854

return tw(**kwargs)

839

855

840

856

841

def wrap(line, width, initindent=b'', hangindent=b''):

857

def wrap(

858

line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''

859

) -> bytes:

842

maxindent = max(len(hangindent), len(initindent))

860

maxindent = max(len(hangindent), len(initindent))

843

if width <= maxindent:

861

if width <= maxindent:

844

# adjust for weird terminal size

862

# adjust for weird terminal size

845

width = max(78, maxindent + 1)

863

width = max(78, maxindent + 1)

846

line = line.decode(

864

line = line.decode(

847

pycompat.sysstr(encoding.encoding),

865

pycompat.sysstr(encoding.encoding),

848

pycompat.sysstr(encoding.encodingmode),

866

pycompat.sysstr(encoding.encodingmode),

849

)

867

)

850

initindent = initindent.decode(

868

initindent = initindent.decode(

851

pycompat.sysstr(encoding.encoding),

869

pycompat.sysstr(encoding.encoding),

852

pycompat.sysstr(encoding.encodingmode),

870

pycompat.sysstr(encoding.encodingmode),

853

)

871

)

854

hangindent = hangindent.decode(

872

hangindent = hangindent.decode(

855

pycompat.sysstr(encoding.encoding),

873

pycompat.sysstr(encoding.encoding),

856

pycompat.sysstr(encoding.encodingmode),

874

pycompat.sysstr(encoding.encodingmode),

857

)

875

)

858

wrapper = _MBTextWrapper(

876

wrapper = _MBTextWrapper(

859

width=width, initial_indent=initindent, subsequent_indent=hangindent

877

width=width, initial_indent=initindent, subsequent_indent=hangindent

860

)

878

)

861

return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))

879

return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))

862

880

863

881

864

_booleans = {

882

_booleans = {

865

b'1': True,

883

b'1': True,

866

b'yes': True,

884

b'yes': True,

867

b'true': True,

885

b'true': True,

868

b'on': True,

886

b'on': True,

869

b'always': True,

887

b'always': True,

870

b'0': False,

888

b'0': False,

871

b'no': False,

889

b'no': False,

872

b'false': False,

890

b'false': False,

873

b'off': False,

891

b'off': False,

874

b'never': False,

892

b'never': False,

875

}

893

}

876

894

877

895

878

def parsebool(s):

896

def parsebool(s: bytes) -> Optional[bool]:

879

"""Parse s into a boolean.

897

"""Parse s into a boolean.

880

898

881

If s is not a valid boolean, returns None.

899

If s is not a valid boolean, returns None.

882

"""

900

"""

883

return _booleans.get(s.lower(), None)

901

return _booleans.get(s.lower(), None)

884

902

885

903

886

def parselist(value):

904

# TODO: make arg mandatory (and fix code below?)

905

def parselist(value: Optional[bytes]):

887

"""parse a configuration value as a list of comma/space separated strings

906

"""parse a configuration value as a list of comma/space separated strings

888

907

889

>>> parselist(b'this,is "a small" ,test')

908

>>> parselist(b'this,is "a small" ,test')

890

['this', 'is', 'a small', 'test']

909

['this', 'is', 'a small', 'test']

891

"""

910

"""

892

911

893

def _parse_plain(parts, s, offset):

912

def _parse_plain(parts, s, offset):

894

whitespace = False

913

whitespace = False

895

while offset < len(s) and (

914

while offset < len(s) and (

896

s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','

915

s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','

897

):

916

):

898

whitespace = True

917

whitespace = True

899

offset += 1

918

offset += 1

900

if offset >= len(s):

919

if offset >= len(s):

901

return None, parts, offset

920

return None, parts, offset

902

if whitespace:

921

if whitespace:

903

parts.append(b'')

922

parts.append(b'')

904

if s[offset : offset + 1] == b'"' and not parts[-1]:

923

if s[offset : offset + 1] == b'"' and not parts[-1]:

905

return _parse_quote, parts, offset + 1

924

return _parse_quote, parts, offset + 1

906

elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':

925

elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':

907

parts[-1] = parts[-1][:-1] + s[offset : offset + 1]

926

parts[-1] = parts[-1][:-1] + s[offset : offset + 1]

908

return _parse_plain, parts, offset + 1

927

return _parse_plain, parts, offset + 1

909

parts[-1] += s[offset : offset + 1]

928

parts[-1] += s[offset : offset + 1]

910

return _parse_plain, parts, offset + 1

929

return _parse_plain, parts, offset + 1

911

930

912

def _parse_quote(parts, s, offset):

931

def _parse_quote(parts, s, offset):

913

if offset < len(s) and s[offset : offset + 1] == b'"': # ""

932

if offset < len(s) and s[offset : offset + 1] == b'"': # ""

914

parts.append(b'')

933

parts.append(b'')

915

offset += 1

934

offset += 1

916

while offset < len(s) and (

935

while offset < len(s) and (

917

s[offset : offset + 1].isspace()

936

s[offset : offset + 1].isspace()

918

or s[offset : offset + 1] == b','

937

or s[offset : offset + 1] == b','

919

):

938

):

920

offset += 1

939

offset += 1

921

return _parse_plain, parts, offset

940

return _parse_plain, parts, offset

922

941

923

while offset < len(s) and s[offset : offset + 1] != b'"':

942

while offset < len(s) and s[offset : offset + 1] != b'"':

924

if (

943

if (

925

s[offset : offset + 1] == b'\\'

944

s[offset : offset + 1] == b'\\'

926

and offset + 1 < len(s)

945

and offset + 1 < len(s)

927

and s[offset + 1 : offset + 2] == b'"'

946

and s[offset + 1 : offset + 2] == b'"'

928

):

947

):

929

offset += 1

948

offset += 1

930

parts[-1] += b'"'

949

parts[-1] += b'"'

931

else:

950

else:

932

parts[-1] += s[offset : offset + 1]

951

parts[-1] += s[offset : offset + 1]

933

offset += 1

952

offset += 1

934

953

935

if offset >= len(s):

954

if offset >= len(s):

936

real_parts = _configlist(parts[-1])

955

real_parts = _configlist(parts[-1])

937

if not real_parts:

956

if not real_parts:

938

parts[-1] = b'"'

957

parts[-1] = b'"'

939

else:

958

else:

940

real_parts[0] = b'"' + real_parts[0]

959

real_parts[0] = b'"' + real_parts[0]

941

parts = parts[:-1]

960

parts = parts[:-1]

942

parts.extend(real_parts)

961

parts.extend(real_parts)

943

return None, parts, offset

962

return None, parts, offset

944

963

945

offset += 1

964

offset += 1

946

while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:

965

while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:

947

offset += 1

966

offset += 1

948

967

949

if offset < len(s):

968

if offset < len(s):

950

if offset + 1 == len(s) and s[offset : offset + 1] == b'"':

969

if offset + 1 == len(s) and s[offset : offset + 1] == b'"':

951

parts[-1] += b'"'

970

parts[-1] += b'"'

952

offset += 1

971

offset += 1

953

else:

972

else:

954

parts.append(b'')

973

parts.append(b'')

955

else:

974

else:

956

return None, parts, offset

975

return None, parts, offset

957

976

958

return _parse_plain, parts, offset

977

return _parse_plain, parts, offset

959

978

960

def _configlist(s):

979

def _configlist(s):

961

s = s.rstrip(b' ,')

980

s = s.rstrip(b' ,')

962

if not s:

981

if not s:

963

return []

982

return []

964

parser, parts, offset = _parse_plain, [b''], 0

983

parser, parts, offset = _parse_plain, [b''], 0

965

while parser:

984

while parser:

966

parser, parts, offset = parser(parts, s, offset)

985

parser, parts, offset = parser(parts, s, offset)

967

return parts

986

return parts

968

987

969

if value is not None and isinstance(value, bytes):

988

if value is not None and isinstance(value, bytes):

970

result = _configlist(value.lstrip(b' ,\n'))

989

result = _configlist(value.lstrip(b' ,\n'))

971

else:

990

else:

972

result = value

991

result = value

973

return result or []

992

return result or []

974

993

975

994

976

def evalpythonliteral(s):

995

def evalpythonliteral(s: bytes):

977

"""Evaluate a string containing a Python literal expression"""

996

"""Evaluate a string containing a Python literal expression"""

978

# We could backport our tokenizer hack to rewrite '' to u'' if we want

997

# We could backport our tokenizer hack to rewrite '' to u'' if we want

979

return ast.literal_eval(s.decode('latin1'))

998

return ast.literal_eval(s.decode('latin1'))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # stringutil.py - utility for generic string formatting, parsing, etc.
             #
             #  Copyright 2005 K. Thananchayan <thananck@yahoo.com>
             #  Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #  Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import ast
             import codecs
             import re as remod
             import textwrap
             import types
+            from typing import (
+                Optional,
+                overload,
+            )
             from ..i18n import _
             from ..thirdparty import attr
             from .. import (
                 encoding,
                 error,
                 pycompat,
             )
             # regex special chars pulled from https://bugs.python.org/issue29995
             # which was part of Python 3.7.
             _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
             _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
             regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
+            @overload
+            def reescape(pat: bytes) -> bytes:
+                ...
+            @overload
+            def reescape(pat: str) -> str:
+                ...
             def reescape(pat):
                 """Drop-in replacement for re.escape."""
                 # NOTE: it is intentional that this works on unicodes and not
                 # bytes, as it's only possible to do the escaping with
                 # unicode.translate, not bytes.translate. Sigh.
                 wantuni = True
                 if isinstance(pat, bytes):
                     wantuni = False
                     pat = pat.decode('latin1')
                 pat = pat.translate(_regexescapemap)
                 if wantuni:
                     return pat
                 return pat.encode('latin1')
-            def pprint(o, bprefix=False, indent=0, level=0):
+            def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
                 """Pretty print an object."""
                 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
-            def pprintgen(o, bprefix=False, indent=0, level=0):
+            def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
                 """Pretty print an object to a generator of atoms.
                 ``bprefix`` is a flag influencing whether bytestrings are preferred with
                 a ``b''`` prefix.
                 ``indent`` controls whether collections and nested data structures
                 span multiple lines via the indentation amount in spaces. By default,
                 no newlines are emitted.
                 ``level`` specifies the initial indent level. Used if ``indent > 0``.
                 """
                 if isinstance(o, bytes):
                     if bprefix:
                         yield b"b'%s'" % escapestr(o)
                     else:
                         yield b"'%s'" % escapestr(o)
                 elif isinstance(o, bytearray):
                     # codecs.escape_encode() can't handle bytearray, so escapestr fails
                     # without coercion.
                     yield b"bytearray['%s']" % escapestr(bytes(o))
                 elif isinstance(o, list):
                     if not o:
                         yield b'[]'
                         return
                     yield b'['
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     for i, a in enumerate(o):
                         for chunk in pprintgen(
                             a, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if i + 1 < len(o):
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b']'
                 elif isinstance(o, dict):
                     if not o:
                         yield b'{}'
                         return
                     yield b'{'
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     for i, (k, v) in enumerate(sorted(o.items())):
                         for chunk in pprintgen(
                             k, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         yield b': '
                         for chunk in pprintgen(
                             v, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if i + 1 < len(o):
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b'}'
                 elif isinstance(o, set):
                     if not o:
                         yield b'set([])'
                         return
                     yield b'set(['
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     for i, k in enumerate(sorted(o)):
                         for chunk in pprintgen(
                             k, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if i + 1 < len(o):
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b'])'
                 elif isinstance(o, tuple):
                     if not o:
                         yield b'()'
                         return
                     yield b'('
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     for i, a in enumerate(o):
                         for chunk in pprintgen(
                             a, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if i + 1 < len(o):
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b')'
                 elif isinstance(o, types.GeneratorType):
                     # Special case of empty generator.
                     try:
                         nextitem = next(o)
                     except StopIteration:
                         yield b'gen[]'
                         return
                     yield b'gen['
                     if indent:
                         level += 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     last = False
                     while not last:
                         current = nextitem
                         try:
                             nextitem = next(o)
                         except StopIteration:
                             last = True
                         for chunk in pprintgen(
                             current, bprefix=bprefix, indent=indent, level=level
                         ):
                             yield chunk
                         if not last:
                             if indent:
                                 yield b',\n'
                                 yield b' ' * (level * indent)
                             else:
                                 yield b', '
                     if indent:
                         level -= 1
                         yield b'\n'
                         yield b' ' * (level * indent)
                     yield b']'
                 else:
                     yield pycompat.byterepr(o)
-            def prettyrepr(o):
+            def prettyrepr(o) -> bytes:
                 """Pretty print a representation of a possibly-nested object"""
                 lines = []
                 rs = pycompat.byterepr(o)
                 p0 = p1 = 0
                 while p0 < len(rs):
                     # '... field=<type ... field=<type ...'
                     #      ~~~~~~~~~~~~~~~~
                     #      p0    p1        q0    q1
                     q0 = -1
                     q1 = rs.find(b'<', p1 + 1)
                     if q1 < 0:
                         q1 = len(rs)
                         # pytype: disable=wrong-arg-count
                         # TODO: figure out why pytype doesn't recognize the optional start
                         #  arg
                     elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
                         # pytype: enable=wrong-arg-count
                         # backtrack for ' field=<'
                         q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
                     if q0 < 0:
                         q0 = q1
                     else:
                         q0 += 1  # skip ' '
                     l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
                     assert l >= 0
                     lines.append((l, rs[p0:q0].rstrip()))
                     p0, p1 = q0, q1
                 return b'\n'.join(b'  ' * l + s for l, s in lines)
-            def buildrepr(r):
+            def buildrepr(r) -> bytes:
                 """Format an optional printable representation from unexpanded bits
                 ========  =================================
                 type(r)   example
                 ========  =================================
                 tuple     ('<not %r>', other)
                 bytes     '<branch closed>'
                 callable  lambda: '<branch %r>' % sorted(b)
                 object    other
                 ========  =================================
                 """
                 if r is None:
                     return b''
                 elif isinstance(r, tuple):
                     return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
                 elif isinstance(r, bytes):
                     return r
                 elif callable(r):
                     return r()
                 else:
                     return pprint(r)
-            def binary(s):
+            def binary(s: bytes) -> bool:
                 """return true if a string is binary data"""
                 return bool(s and b'\0' in s)
-            def _splitpattern(pattern):
+            def _splitpattern(pattern: bytes):
                 if pattern.startswith(b're:'):
                     return b're', pattern[3:]
                 elif pattern.startswith(b'literal:'):
                     return b'literal', pattern[8:]
                 return b'literal', pattern
-            def stringmatcher(pattern, casesensitive=True):
+            def stringmatcher(pattern: bytes, casesensitive: bool = True):
                 """
                 accepts a string, possibly starting with 're:' or 'literal:' prefix.
                 returns the matcher name, pattern, and matcher function.
                 missing or unknown prefixes are treated as literal matches.
                 helper for tests:
                 >>> def test(pattern, *tests):
                 ...     kind, pattern, matcher = stringmatcher(pattern)
                 ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
                 >>> def itest(pattern, *tests):
                 ...     kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
                 ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
                 exact matching (no prefix):
                 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
                 ('literal', 'abcdefg', [False, False, True])
                 regex matching ('re:' prefix)
                 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
                 ('re', 'a.+b', [False, False, True])
                 force exact matches ('literal:' prefix)
                 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
                 ('literal', 're:foobar', [False, True])
                 unknown prefixes are ignored and treated as literals
                 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
                 ('literal', 'foo:bar', [False, False, True])
                 case insensitive regex matches
                 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
                 ('re', 'A.+b', [False, False, True])
                 case insensitive literal matches
                 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
                 ('literal', 'ABCDEFG', [False, False, True])
                 """
                 kind, pattern = _splitpattern(pattern)
                 if kind == b're':
                     try:
                         flags = 0
                         if not casesensitive:
                             flags = remod.I
                         regex = remod.compile(pattern, flags)
                     except remod.error as e:
                         raise error.ParseError(
                             _(b'invalid regular expression: %s') % forcebytestr(e)
                         )
                     return kind, pattern, regex.search
                 elif kind == b'literal':
                     if casesensitive:
                         match = pattern.__eq__
                     else:
                         ipat = encoding.lower(pattern)
                         match = lambda s: ipat == encoding.lower(s)
                     return kind, pattern, match
                 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
-            def substringregexp(pattern, flags=0):
+            def substringregexp(pattern: bytes, flags: int = 0):
                 """Build a regexp object from a string pattern possibly starting with
                 're:' or 'literal:' prefix.
                 helper for tests:
                 >>> def test(pattern, *tests):
                 ...     regexp = substringregexp(pattern)
                 ...     return [bool(regexp.search(t)) for t in tests]
                 >>> def itest(pattern, *tests):
                 ...     regexp = substringregexp(pattern, remod.I)
                 ...     return [bool(regexp.search(t)) for t in tests]
                 substring matching (no prefix):
                 >>> test(b'bcde', b'abc', b'def', b'abcdefg')
                 [False, False, True]
                 substring pattern should be escaped:
                 >>> substringregexp(b'.bc').pattern
                 '\\\\.bc'
                 >>> test(b'.bc', b'abc', b'def', b'abcdefg')
                 [False, False, False]
                 regex matching ('re:' prefix)
                 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
                 [False, False, True]
                 force substring matches ('literal:' prefix)
                 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
                 [False, True]
                 case insensitive literal matches
                 >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
                 [False, False, True]
                 case insensitive regex matches
                 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
                 [False, False, True]
                 """
                 kind, pattern = _splitpattern(pattern)
                 if kind == b're':
                     try:
                         return remod.compile(pattern, flags)
                     except remod.error as e:
                         raise error.ParseError(
                             _(b'invalid regular expression: %s') % forcebytestr(e)
                         )
                 elif kind == b'literal':
                     return remod.compile(remod.escape(pattern), flags)
                 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
-            def shortuser(user):
+            def shortuser(user: bytes) -> bytes:
                 """Return a short representation of a user name or email address."""
                 f = user.find(b'@')
                 if f >= 0:
                     user = user[:f]
                 f = user.find(b'<')
                 if f >= 0:
                     user = user[f + 1 :]
                 f = user.find(b' ')
                 if f >= 0:
                     user = user[:f]
                 f = user.find(b'.')
                 if f >= 0:
                     user = user[:f]
                 return user
-            def emailuser(user):
+            def emailuser(user: bytes) -> bytes:
                 """Return the user portion of an email address."""
                 f = user.find(b'@')
                 if f >= 0:
                     user = user[:f]
                 f = user.find(b'<')
                 if f >= 0:
                     user = user[f + 1 :]
                 return user
-            def email(author):
+            def email(author: bytes) -> bytes:
                 '''get email of author.'''
                 r = author.find(b'>')
                 if r == -1:
                     r = None
                 return author[author.find(b'<') + 1 : r]
-            def person(author):
+            def person(author: bytes) -> bytes:
                 """Returns the name before an email address,
                 interpreting it as per RFC 5322
                 >>> person(b'foo@bar')
                 'foo'
                 >>> person(b'Foo Bar <foo@bar>')
                 'Foo Bar'
                 >>> person(b'"Foo Bar" <foo@bar>')
                 'Foo Bar'
                 >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
                 'Foo "buz" Bar'
                 >>> # The following are invalid, but do exist in real-life
                 ...
                 >>> person(b'Foo "buz" Bar <foo@bar>')
                 'Foo "buz" Bar'
                 >>> person(b'"Foo Bar <foo@bar>')
                 'Foo Bar'
                 """
                 if b'@' not in author:
                     return author
                 f = author.find(b'<')
                 if f != -1:
                     return author[:f].strip(b' "').replace(b'\\"', b'"')
                 f = author.find(b'@')
                 return author[:f].replace(b'.', b' ')
             @attr.s(hash=True)
             class mailmapping:
                 """Represents a username/email key or value in
                 a mailmap file"""
                 email = attr.ib()
                 name = attr.ib(default=None)
             def _ismailmaplineinvalid(names, emails):
                 """Returns True if the parsed names and emails
                 in a mailmap entry are invalid.
                 >>> # No names or emails fails
                 >>> names, emails = [], []
                 >>> _ismailmaplineinvalid(names, emails)
                 True
                 >>> # Only one email fails
                 >>> emails = [b'email@email.com']
                 >>> _ismailmaplineinvalid(names, emails)
                 True
                 >>> # One email and one name passes
                 >>> names = [b'Test Name']
                 >>> _ismailmaplineinvalid(names, emails)
                 False
                 >>> # No names but two emails passes
                 >>> names = []
                 >>> emails = [b'proper@email.com', b'commit@email.com']
                 >>> _ismailmaplineinvalid(names, emails)
                 False
                 """
                 return not emails or not names and len(emails) < 2
             def parsemailmap(mailmapcontent):
                 """Parses data in the .mailmap format
                 >>> mmdata = b"\\n".join([
                 ... b'# Comment',
                 ... b'Name <commit1@email.xx>',
                 ... b'<name@email.xx> <commit2@email.xx>',
                 ... b'Name <proper@email.xx> <commit3@email.xx>',
                 ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
                 ... ])
                 >>> mm = parsemailmap(mmdata)
                 >>> for key in sorted(mm.keys()):
                 ...     print(key)
                 mailmapping(email='commit1@email.xx', name=None)
                 mailmapping(email='commit2@email.xx', name=None)
                 mailmapping(email='commit3@email.xx', name=None)
                 mailmapping(email='commit4@email.xx', name='Commit')
                 >>> for val in sorted(mm.values()):
                 ...     print(val)
                 mailmapping(email='commit1@email.xx', name='Name')
                 mailmapping(email='name@email.xx', name=None)
                 mailmapping(email='proper@email.xx', name='Name')
                 mailmapping(email='proper@email.xx', name='Name')
                 """
                 mailmap = {}
                 if mailmapcontent is None:
                     return mailmap
                 for line in mailmapcontent.splitlines():
                     # Don't bother checking the line if it is a comment or
                     # is an improperly formed author field
                     if line.lstrip().startswith(b'#'):
                         continue
                     # names, emails hold the parsed emails and names for each line
                     # name_builder holds the words in a persons name
                     names, emails = [], []
                     namebuilder = []
                     for element in line.split():
                         if element.startswith(b'#'):
                             # If we reach a comment in the mailmap file, move on
                             break
                         elif element.startswith(b'<') and element.endswith(b'>'):
                             # We have found an email.
                             # Parse it, and finalize any names from earlier
                             emails.append(element[1:-1])  # Slice off the "<>"
                             if namebuilder:
                                 names.append(b' '.join(namebuilder))
                                 namebuilder = []
                             # Break if we have found a second email, any other
                             # data does not fit the spec for .mailmap
                             if len(emails) > 1:
                                 break
                         else:
                             # We have found another word in the committers name
                             namebuilder.append(element)
                     # Check to see if we have parsed the line into a valid form
                     # We require at least one email, and either at least one
                     # name or a second email
                     if _ismailmaplineinvalid(names, emails):
                         continue
                     mailmapkey = mailmapping(
                         email=emails[-1],
                         name=names[-1] if len(names) == 2 else None,
                     )
                     mailmap[mailmapkey] = mailmapping(
                         email=emails[0],
                         name=names[0] if names else None,
                     )
                 return mailmap
-            def mapname(mailmap, author):
+            def mapname(mailmap, author: bytes) -> bytes:
                 """Returns the author field according to the mailmap cache, or
                 the original author field.
                 >>> mmdata = b"\\n".join([
                 ...     b'# Comment',
                 ...     b'Name <commit1@email.xx>',
                 ...     b'<name@email.xx> <commit2@email.xx>',
                 ...     b'Name <proper@email.xx> <commit3@email.xx>',
                 ...     b'Name <proper@email.xx> Commit <commit4@email.xx>',
                 ... ])
                 >>> m = parsemailmap(mmdata)
                 >>> mapname(m, b'Commit <commit1@email.xx>')
                 'Name <commit1@email.xx>'
                 >>> mapname(m, b'Name <commit2@email.xx>')
                 'Name <name@email.xx>'
                 >>> mapname(m, b'Commit <commit3@email.xx>')
                 'Name <proper@email.xx>'
                 >>> mapname(m, b'Commit <commit4@email.xx>')
                 'Name <proper@email.xx>'
                 >>> mapname(m, b'Unknown Name <unknown@email.com>')
                 'Unknown Name <unknown@email.com>'
                 """
                 # If the author field coming in isn't in the correct format,
                 # or the mailmap is empty just return the original author field
                 if not isauthorwellformed(author) or not mailmap:
                     return author
                 # Turn the user name into a mailmapping
                 commit = mailmapping(name=person(author), email=email(author))
                 try:
                     # Try and use both the commit email and name as the key
                     proper = mailmap[commit]
                 except KeyError:
                     # If the lookup fails, use just the email as the key instead
                     # We call this commit2 as not to erase original commit fields
                     commit2 = mailmapping(email=commit.email)
                     proper = mailmap.get(commit2, mailmapping(None, None))
                 # Return the author field with proper values filled in
                 return b'%s <%s>' % (
                     proper.name if proper.name else commit.name,
                     proper.email if proper.email else commit.email,
                 )
             _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
-            def isauthorwellformed(author):
+            def isauthorwellformed(author: bytes) -> bool:
                 """Return True if the author field is well formed
                 (ie "Contributor Name <contrib@email.dom>")
                 >>> isauthorwellformed(b'Good Author <good@author.com>')
                 True
                 >>> isauthorwellformed(b'Author <good@author.com>')
                 True
                 >>> isauthorwellformed(b'Bad Author')
                 False
                 >>> isauthorwellformed(b'Bad Author <author@author.com')
                 False
                 >>> isauthorwellformed(b'Bad Author author@author.com')
                 False
                 >>> isauthorwellformed(b'<author@author.com>')
                 False
                 >>> isauthorwellformed(b'Bad Author <author>')
                 False
                 """
                 return _correctauthorformat.match(author) is not None
-            def firstline(text):
+            def firstline(text: bytes) -> bytes:
                 """Return the first line of the input"""
                 # Try to avoid running splitlines() on the whole string
                 i = text.find(b'\n')
                 if i != -1:
                     text = text[:i]
                 try:
                     return text.splitlines()[0]
                 except IndexError:
                     return b''
-            def ellipsis(text, maxlength=400):
+            def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
                 """Trim string to at most maxlength (default: 400) columns in display."""
                 return encoding.trim(text, maxlength, ellipsis=b'...')
-            def escapestr(s):
+            def escapestr(s: bytes) -> bytes:
+                # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
                 if isinstance(s, memoryview):
                     s = bytes(s)
                 # call underlying function of s.encode('string_escape') directly for
                 # Python 3 compatibility
                 return codecs.escape_encode(s)[0]  # pytype: disable=module-attr
-            def unescapestr(s):
+            def unescapestr(s: bytes) -> bytes:
                 return codecs.escape_decode(s)[0]  # pytype: disable=module-attr
             def forcebytestr(obj):
                 """Portably format an arbitrary object (e.g. exception) into a byte
                 string."""
                 try:
                     return pycompat.bytestr(obj)
                 except UnicodeEncodeError:
                     # non-ascii string, may be lossy
                     return pycompat.bytestr(encoding.strtolocal(str(obj)))
-            def uirepr(s):
+            def uirepr(s: bytes) -> bytes:
                 # Avoid double backslash in Windows path repr()
                 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
             # delay import of textwrap
             def _MBTextWrapper(**kwargs):
                 class tw(textwrap.TextWrapper):
                     """
                     Extend TextWrapper for width-awareness.
                     Neither number of 'bytes' in any encoding nor 'characters' is
                     appropriate to calculate terminal columns for specified string.
                     Original TextWrapper implementation uses built-in 'len()' directly,
                     so overriding is needed to use width information of each characters.
                     In addition, characters classified into 'ambiguous' width are
                     treated as wide in East Asian area, but as narrow in other.
                     This requires use decision to determine width of such characters.
                     """
                     def _cutdown(self, ucstr, space_left):
                         l = 0
                         colwidth = encoding.ucolwidth
                         for i in range(len(ucstr)):
                             l += colwidth(ucstr[i])
                             if space_left < l:
                                 return (ucstr[:i], ucstr[i:])
                         return ucstr, b''
                     # overriding of base class
                     def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
                         space_left = max(width - cur_len, 1)
                         if self.break_long_words:
                             cut, res = self._cutdown(reversed_chunks[-1], space_left)
                             cur_line.append(cut)
                             reversed_chunks[-1] = res
                         elif not cur_line:
                             cur_line.append(reversed_chunks.pop())
                     # this overriding code is imported from TextWrapper of Python 2.6
                     # to calculate columns of string by 'encoding.ucolwidth()'
                     def _wrap_chunks(self, chunks):
                         colwidth = encoding.ucolwidth
                         lines = []
                         if self.width <= 0:
                             raise ValueError(b"invalid width %r (must be > 0)" % self.width)
                         # Arrange in reverse order so items can be efficiently popped
                         # from a stack of chucks.
                         chunks.reverse()
                         while chunks:
                             # Start the list of chunks that will make up the current line.
                             # cur_len is just the length of all the chunks in cur_line.
                             cur_line = []
                             cur_len = 0
                             # Figure out which static string will prefix this line.
                             if lines:
                                 indent = self.subsequent_indent
                             else:
                                 indent = self.initial_indent
                             # Maximum width for this line.
                             width = self.width - len(indent)
                             # First chunk on line is whitespace -- drop it, unless this
                             # is the very beginning of the text (i.e. no lines started yet).
                             if self.drop_whitespace and chunks[-1].strip() == '' and lines:
                                 del chunks[-1]
                             while chunks:
                                 l = colwidth(chunks[-1])
                                 # Can at least squeeze this chunk onto the current line.
                                 if cur_len + l <= width:
                                     cur_line.append(chunks.pop())
                                     cur_len += l
                                 # Nope, this line is full.
                                 else:
                                     break
                             # The current line is full, and the next chunk is too big to
                             # fit on *any* line (not just this one).
                             if chunks and colwidth(chunks[-1]) > width:
                                 self._handle_long_word(chunks, cur_line, cur_len, width)
                             # If the last chunk on this line is all whitespace, drop it.
                             if (
                                 self.drop_whitespace
                                 and cur_line
                                 and cur_line[-1].strip() == r''
                             ):
                                 del cur_line[-1]
                             # Convert current line back to a string and store it in list
                             # of all lines (return value).
                             if cur_line:
                                 lines.append(indent + ''.join(cur_line))
                         return lines
                 global _MBTextWrapper
                 _MBTextWrapper = tw
                 return tw(**kwargs)
-            def wrap(line, width, initindent=b'', hangindent=b''):
+            def wrap(
+                line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
+            ) -> bytes:
                 maxindent = max(len(hangindent), len(initindent))
                 if width <= maxindent:
                     # adjust for weird terminal size
                     width = max(78, maxindent + 1)
                 line = line.decode(
                     pycompat.sysstr(encoding.encoding),
                     pycompat.sysstr(encoding.encodingmode),
                 )
                 initindent = initindent.decode(
                     pycompat.sysstr(encoding.encoding),
                     pycompat.sysstr(encoding.encodingmode),
                 )
                 hangindent = hangindent.decode(
                     pycompat.sysstr(encoding.encoding),
                     pycompat.sysstr(encoding.encodingmode),
                 )
                 wrapper = _MBTextWrapper(
                     width=width, initial_indent=initindent, subsequent_indent=hangindent
                 )
                 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
             _booleans = {
                 b'1': True,
                 b'yes': True,
                 b'true': True,
                 b'on': True,
                 b'always': True,
                 b'0': False,
                 b'no': False,
                 b'false': False,
                 b'off': False,
                 b'never': False,
             }
-            def parsebool(s):
+            def parsebool(s: bytes) -> Optional[bool]:
                 """Parse s into a boolean.
                 If s is not a valid boolean, returns None.
                 """
                 return _booleans.get(s.lower(), None)
-            def parselist(value):
+            # TODO: make arg mandatory (and fix code below?)
+            def parselist(value: Optional[bytes]):
                 """parse a configuration value as a list of comma/space separated strings
                 >>> parselist(b'this,is "a small" ,test')
                 ['this', 'is', 'a small', 'test']
                 """
                 def _parse_plain(parts, s, offset):
                     whitespace = False
                     while offset < len(s) and (
                         s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
                     ):
                         whitespace = True
                         offset += 1
                     if offset >= len(s):
                         return None, parts, offset
                     if whitespace:
                         parts.append(b'')
                     if s[offset : offset + 1] == b'"' and not parts[-1]:
                         return _parse_quote, parts, offset + 1
                     elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
                         parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
                         return _parse_plain, parts, offset + 1
                     parts[-1] += s[offset : offset + 1]
                     return _parse_plain, parts, offset + 1
                 def _parse_quote(parts, s, offset):
                     if offset < len(s) and s[offset : offset + 1] == b'"':  # ""
                         parts.append(b'')
                         offset += 1
                         while offset < len(s) and (
                             s[offset : offset + 1].isspace()
                             or s[offset : offset + 1] == b','
                         ):
                             offset += 1
                         return _parse_plain, parts, offset
                     while offset < len(s) and s[offset : offset + 1] != b'"':
                         if (
                             s[offset : offset + 1] == b'\\'
                             and offset + 1 < len(s)
                             and s[offset + 1 : offset + 2] == b'"'
                         ):
                             offset += 1
                             parts[-1] += b'"'
                         else:
                             parts[-1] += s[offset : offset + 1]
                         offset += 1
                     if offset >= len(s):
                         real_parts = _configlist(parts[-1])
                         if not real_parts:
                             parts[-1] = b'"'
                         else:
                             real_parts[0] = b'"' + real_parts[0]
                             parts = parts[:-1]
                             parts.extend(real_parts)
                         return None, parts, offset
                     offset += 1
                     while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
                         offset += 1
                     if offset < len(s):
                         if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
                             parts[-1] += b'"'
                             offset += 1
                         else:
                             parts.append(b'')
                     else:
                         return None, parts, offset
                     return _parse_plain, parts, offset
                 def _configlist(s):
                     s = s.rstrip(b' ,')
                     if not s:
                         return []
                     parser, parts, offset = _parse_plain, [b''], 0
                     while parser:
                         parser, parts, offset = parser(parts, s, offset)
                     return parts
                 if value is not None and isinstance(value, bytes):
                     result = _configlist(value.lstrip(b' ,\n'))
                 else:
                     result = value
                 return result or []
-            def evalpythonliteral(s):
+            def evalpythonliteral(s: bytes):
                 """Evaluate a string containing a Python literal expression"""
                 # We could backport our tokenizer hack to rewrite '' to u'' if we want
                 return ast.literal_eval(s.decode('latin1'))