upstream/mercurial-mirror Commit - r37793:03d7f885

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

from .utils import (

20

from .utils import (

21

stringutil,

21

stringutil,

22

)

22

)

23

24

elements = {

24

elements = {

25

# token-type: binding-strength, primary, prefix, infix, suffix

25

# token-type: binding-strength, primary, prefix, infix, suffix

26

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

26

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

27

"[": (21, None, None, ("subscript", 1, "]"), None),

27

"[": (21, None, None, ("subscript", 1, "]"), None),

28

"#": (21, None, None, ("relation", 21), None),

28

"#": (21, None, None, ("relation", 21), None),

29

"##": (20, None, None, ("_concat", 20), None),

29

"##": (20, None, None, ("_concat", 20), None),

30

"~": (18, None, None, ("ancestor", 18), None),

30

"~": (18, None, None, ("ancestor", 18), None),

31

"^": (18, None, None, ("parent", 18), "parentpost"),

31

"^": (18, None, None, ("parent", 18), "parentpost"),

32

"-": (5, None, ("negate", 19), ("minus", 5), None),

32

"-": (5, None, ("negate", 19), ("minus", 5), None),

33

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

33

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

34

"dagrangepost"),

34

"dagrangepost"),

35

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

35

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

36

"dagrangepost"),

36

"dagrangepost"),

37

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

37

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

38

"not": (10, None, ("not", 10), None, None),

38

"not": (10, None, ("not", 10), None, None),

39

"!": (10, None, ("not", 10), None, None),

39

"!": (10, None, ("not", 10), None, None),

40

"and": (5, None, None, ("and", 5), None),

40

"and": (5, None, None, ("and", 5), None),

41

"&": (5, None, None, ("and", 5), None),

41

"&": (5, None, None, ("and", 5), None),

42

"%": (5, None, None, ("only", 5), "onlypost"),

42

"%": (5, None, None, ("only", 5), "onlypost"),

43

"or": (4, None, None, ("or", 4), None),

43

"or": (4, None, None, ("or", 4), None),

44

"|": (4, None, None, ("or", 4), None),

44

"|": (4, None, None, ("or", 4), None),

45

"+": (4, None, None, ("or", 4), None),

45

"+": (4, None, None, ("or", 4), None),

46

"=": (3, None, None, ("keyvalue", 3), None),

46

"=": (3, None, None, ("keyvalue", 3), None),

47

",": (2, None, None, ("list", 2), None),

47

",": (2, None, None, ("list", 2), None),

48

")": (0, None, None, None, None),

48

")": (0, None, None, None, None),

49

"]": (0, None, None, None, None),

49

"]": (0, None, None, None, None),

50

"symbol": (0, "symbol", None, None, None),

50

"symbol": (0, "symbol", None, None, None),

51

"string": (0, "string", None, None, None),

51

"string": (0, "string", None, None, None),

52

"end": (0, None, None, None, None),

52

"end": (0, None, None, None, None),

53

}

53

}

54

55

keywords = {'and', 'or', 'not'}

55

keywords = {'and', 'or', 'not'}

56

57

symbols = {}

57

symbols = {}

58

59

_quoteletters = {'"', "'"}

59

_quoteletters = {'"', "'"}

60

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

60

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

61

62

# default set of valid characters for the initial letter of symbols

62

# default set of valid characters for the initial letter of symbols

63

_syminitletters = set(pycompat.iterbytestr(

63

_syminitletters = set(pycompat.iterbytestr(

64

string.ascii_letters.encode('ascii') +

64

string.ascii_letters.encode('ascii') +

65

string.digits.encode('ascii') +

65

string.digits.encode('ascii') +

66

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

66

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

67

68

# default set of valid characters for non-initial letters of symbols

68

# default set of valid characters for non-initial letters of symbols

69

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

69

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

70

71

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

71

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

72

'''

72

'''

73

Parse a revset statement into a stream of tokens

73

Parse a revset statement into a stream of tokens

74

75

``syminitletters`` is the set of valid characters for the initial

75

``syminitletters`` is the set of valid characters for the initial

76

letter of symbols.

76

letter of symbols.

77

78

By default, character ``c`` is recognized as valid for initial

78

By default, character ``c`` is recognized as valid for initial

79

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

79

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

80

81

``symletters`` is the set of valid characters for non-initial

81

``symletters`` is the set of valid characters for non-initial

82

letters of symbols.

82

letters of symbols.

83

84

By default, character ``c`` is recognized as valid for non-initial

84

By default, character ``c`` is recognized as valid for non-initial

85

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

85

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

86

87

Check that @ is a valid unquoted token character (issue3686):

87

Check that @ is a valid unquoted token character (issue3686):

88

>>> list(tokenize(b"@::"))

88

>>> list(tokenize(b"@::"))

89

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

89

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

90

91

'''

91

'''

92

if not isinstance(program, bytes):

93

raise error.ProgrammingError('revset statement must be bytes, got %r'

94

% program)

92

program = pycompat.bytestr(program)

95

program = pycompat.bytestr(program)

93

if syminitletters is None:

96

if syminitletters is None:

94

syminitletters = _syminitletters

97

syminitletters = _syminitletters

95

if symletters is None:

98

if symletters is None:

96

symletters = _symletters

99

symletters = _symletters

97

100

98

if program and lookup:

101

if program and lookup:

99

# attempt to parse old-style ranges first to deal with

102

# attempt to parse old-style ranges first to deal with

100

# things like old-tag which contain query metacharacters

103

# things like old-tag which contain query metacharacters

101

parts = program.split(':', 1)

104

parts = program.split(':', 1)

102

if all(lookup(sym) for sym in parts if sym):

105

if all(lookup(sym) for sym in parts if sym):

103

if parts[0]:

106

if parts[0]:

104

yield ('symbol', parts[0], 0)

107

yield ('symbol', parts[0], 0)

105

if len(parts) > 1:

108

if len(parts) > 1:

106

s = len(parts[0])

109

s = len(parts[0])

107

yield (':', None, s)

110

yield (':', None, s)

108

if parts[1]:

111

if parts[1]:

109

yield ('symbol', parts[1], s + 1)

112

yield ('symbol', parts[1], s + 1)

110

yield ('end', None, len(program))

113

yield ('end', None, len(program))

111

return

114

return

112

115

113

pos, l = 0, len(program)

116

pos, l = 0, len(program)

114

while pos < l:

117

while pos < l:

115

c = program[pos]

118

c = program[pos]

116

if c.isspace(): # skip inter-token whitespace

119

if c.isspace(): # skip inter-token whitespace

117

pass

120

pass

118

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

121

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

119

yield ('::', None, pos)

122

yield ('::', None, pos)

120

pos += 1 # skip ahead

123

pos += 1 # skip ahead

121

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

124

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

122

yield ('..', None, pos)

125

yield ('..', None, pos)

123

pos += 1 # skip ahead

126

pos += 1 # skip ahead

124

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

127

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

125

yield ('##', None, pos)

128

yield ('##', None, pos)

126

pos += 1 # skip ahead

129

pos += 1 # skip ahead

127

elif c in _simpleopletters: # handle simple operators

130

elif c in _simpleopletters: # handle simple operators

128

yield (c, None, pos)

131

yield (c, None, pos)

129

elif (c in _quoteletters or c == 'r' and

132

elif (c in _quoteletters or c == 'r' and

130

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

133

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

131

if c == 'r':

134

if c == 'r':

132

pos += 1

135

pos += 1

133

c = program[pos]

136

c = program[pos]

134

decode = lambda x: x

137

decode = lambda x: x

135

else:

138

else:

136

decode = parser.unescapestr

139

decode = parser.unescapestr

137

pos += 1

140

pos += 1

138

s = pos

141

s = pos

139

while pos < l: # find closing quote

142

while pos < l: # find closing quote

140

d = program[pos]

143

d = program[pos]

141

if d == '\\': # skip over escaped characters

144

if d == '\\': # skip over escaped characters

142

pos += 2

145

pos += 2

143

continue

146

continue

144

if d == c:

147

if d == c:

145

yield ('string', decode(program[s:pos]), s)

148

yield ('string', decode(program[s:pos]), s)

146

break

149

break

147

pos += 1

150

pos += 1

148

else:

151

else:

149

raise error.ParseError(_("unterminated string"), s)

152

raise error.ParseError(_("unterminated string"), s)

150

# gather up a symbol/keyword

153

# gather up a symbol/keyword

151

elif c in syminitletters:

154

elif c in syminitletters:

152

s = pos

155

s = pos

153

pos += 1

156

pos += 1

154

while pos < l: # find end of symbol

157

while pos < l: # find end of symbol

155

d = program[pos]

158

d = program[pos]

156

if d not in symletters:

159

if d not in symletters:

157

break

160

break

158

if d == '.' and program[pos - 1] == '.': # special case for ..

161

if d == '.' and program[pos - 1] == '.': # special case for ..

159

pos -= 1

162

pos -= 1

160

break

163

break

161

pos += 1

164

pos += 1

162

sym = program[s:pos]

165

sym = program[s:pos]

163

if sym in keywords: # operator keywords

166

if sym in keywords: # operator keywords

164

yield (sym, None, s)

167

yield (sym, None, s)

165

elif '-' in sym:

168

elif '-' in sym:

166

# some jerk gave us foo-bar-baz, try to check if it's a symbol

169

# some jerk gave us foo-bar-baz, try to check if it's a symbol

167

if lookup and lookup(sym):

170

if lookup and lookup(sym):

168

# looks like a real symbol

171

# looks like a real symbol

169

yield ('symbol', sym, s)

172

yield ('symbol', sym, s)

170

else:

173

else:

171

# looks like an expression

174

# looks like an expression

172

parts = sym.split('-')

175

parts = sym.split('-')

173

for p in parts[:-1]:

176

for p in parts[:-1]:

174

if p: # possible consecutive -

177

if p: # possible consecutive -

175

yield ('symbol', p, s)

178

yield ('symbol', p, s)

176

s += len(p)

179

s += len(p)

177

yield ('-', None, pos)

180

yield ('-', None, pos)

178

s += 1

181

s += 1

179

if parts[-1]: # possible trailing -

182

if parts[-1]: # possible trailing -

180

yield ('symbol', parts[-1], s)

183

yield ('symbol', parts[-1], s)

181

else:

184

else:

182

yield ('symbol', sym, s)

185

yield ('symbol', sym, s)

183

pos -= 1

186

pos -= 1

184

else:

187

else:

185

raise error.ParseError(_("syntax error in revset '%s'") %

188

raise error.ParseError(_("syntax error in revset '%s'") %

186

program, pos)

189

program, pos)

187

pos += 1

190

pos += 1

188

yield ('end', None, pos)

191

yield ('end', None, pos)

189

192

190

# helpers

193

# helpers

191

194

192

_notset = object()

195

_notset = object()

193

196

194

def getsymbol(x):

197

def getsymbol(x):

195

if x and x[0] == 'symbol':

198

if x and x[0] == 'symbol':

196

return x[1]

199

return x[1]

197

raise error.ParseError(_('not a symbol'))

200

raise error.ParseError(_('not a symbol'))

198

201

199

def getstring(x, err):

202

def getstring(x, err):

200

if x and (x[0] == 'string' or x[0] == 'symbol'):

203

if x and (x[0] == 'string' or x[0] == 'symbol'):

201

return x[1]

204

return x[1]

202

raise error.ParseError(err)

205

raise error.ParseError(err)

203

206

204

def getinteger(x, err, default=_notset):

207

def getinteger(x, err, default=_notset):

205

if not x and default is not _notset:

208

if not x and default is not _notset:

206

return default

209

return default

207

try:

210

try:

208

return int(getstring(x, err))

211

return int(getstring(x, err))

209

except ValueError:

212

except ValueError:

210

raise error.ParseError(err)

213

raise error.ParseError(err)

211

214

212

def getboolean(x, err):

215

def getboolean(x, err):

213

value = stringutil.parsebool(getsymbol(x))

216

value = stringutil.parsebool(getsymbol(x))

214

if value is not None:

217

if value is not None:

215

return value

218

return value

216

raise error.ParseError(err)

219

raise error.ParseError(err)

217

220

218

def getlist(x):

221

def getlist(x):

219

if not x:

222

if not x:

220

return []

223

return []

221

if x[0] == 'list':

224

if x[0] == 'list':

222

return list(x[1:])

225

return list(x[1:])

223

return [x]

226

return [x]

224

227

225

def getrange(x, err):

228

def getrange(x, err):

226

if not x:

229

if not x:

227

raise error.ParseError(err)

230

raise error.ParseError(err)

228

op = x[0]

231

op = x[0]

229

if op == 'range':

232

if op == 'range':

230

return x[1], x[2]

233

return x[1], x[2]

231

elif op == 'rangepre':

234

elif op == 'rangepre':

232

return None, x[1]

235

return None, x[1]

233

elif op == 'rangepost':

236

elif op == 'rangepost':

234

return x[1], None

237

return x[1], None

235

elif op == 'rangeall':

238

elif op == 'rangeall':

236

return None, None

239

return None, None

237

raise error.ParseError(err)

240

raise error.ParseError(err)

238

241

239

def getargs(x, min, max, err):

242

def getargs(x, min, max, err):

240

l = getlist(x)

243

l = getlist(x)

241

if len(l) < min or (max >= 0 and len(l) > max):

244

if len(l) < min or (max >= 0 and len(l) > max):

242

raise error.ParseError(err)

245

raise error.ParseError(err)

243

return l

246

return l

244

247

245

def getargsdict(x, funcname, keys):

248

def getargsdict(x, funcname, keys):

246

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

249

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

247

keyvaluenode='keyvalue', keynode='symbol')

250

keyvaluenode='keyvalue', keynode='symbol')

248

251

249

# cache of {spec: raw parsed tree} built internally

252

# cache of {spec: raw parsed tree} built internally

250

_treecache = {}

253

_treecache = {}

251

254

252

def _cachedtree(spec):

255

def _cachedtree(spec):

253

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

256

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

254

tree = _treecache.get(spec)

257

tree = _treecache.get(spec)

255

if tree is None:

258

if tree is None:

256

_treecache[spec] = tree = parse(spec)

259

_treecache[spec] = tree = parse(spec)

257

return tree

260

return tree

258

261

259

def _build(tmplspec, *repls):

262

def _build(tmplspec, *repls):

260

"""Create raw parsed tree from a template revset statement

263

"""Create raw parsed tree from a template revset statement

261

264

262

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

265

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

263

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

266

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

264

"""

267

"""

265

template = _cachedtree(tmplspec)

268

template = _cachedtree(tmplspec)

266

return parser.buildtree(template, ('symbol', '_'), *repls)

269

return parser.buildtree(template, ('symbol', '_'), *repls)

267

270

268

def _match(patspec, tree):

271

def _match(patspec, tree):

269

"""Test if a tree matches the given pattern statement; return the matches

272

"""Test if a tree matches the given pattern statement; return the matches

270

273

271

>>> _match(b'f(_)', parse(b'f()'))

274

>>> _match(b'f(_)', parse(b'f()'))

272

>>> _match(b'f(_)', parse(b'f(1)'))

275

>>> _match(b'f(_)', parse(b'f(1)'))

273

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

276

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

274

>>> _match(b'f(_)', parse(b'f(1, 2)'))

277

>>> _match(b'f(_)', parse(b'f(1, 2)'))

275

"""

278

"""

276

pattern = _cachedtree(patspec)

279

pattern = _cachedtree(patspec)

277

return parser.matchtree(pattern, tree, ('symbol', '_'),

280

return parser.matchtree(pattern, tree, ('symbol', '_'),

278

{'keyvalue', 'list'})

281

{'keyvalue', 'list'})

279

282

280

def _matchonly(revs, bases):

283

def _matchonly(revs, bases):

281

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

284

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

282

285

283

def _fixops(x):

286

def _fixops(x):

284

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

287

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

285

handled well by our simple top-down parser"""

288

handled well by our simple top-down parser"""

286

if not isinstance(x, tuple):

289

if not isinstance(x, tuple):

287

return x

290

return x

288

291

289

op = x[0]

292

op = x[0]

290

if op == 'parent':

293

if op == 'parent':

291

# x^:y means (x^) : y, not x ^ (:y)

294

# x^:y means (x^) : y, not x ^ (:y)

292

# x^: means (x^) :, not x ^ (:)

295

# x^: means (x^) :, not x ^ (:)

293

post = ('parentpost', x[1])

296

post = ('parentpost', x[1])

294

if x[2][0] == 'dagrangepre':

297

if x[2][0] == 'dagrangepre':

295

return _fixops(('dagrange', post, x[2][1]))

298

return _fixops(('dagrange', post, x[2][1]))

296

elif x[2][0] == 'dagrangeall':

299

elif x[2][0] == 'dagrangeall':

297

return _fixops(('dagrangepost', post))

300

return _fixops(('dagrangepost', post))

298

elif x[2][0] == 'rangepre':

301

elif x[2][0] == 'rangepre':

299

return _fixops(('range', post, x[2][1]))

302

return _fixops(('range', post, x[2][1]))

300

elif x[2][0] == 'rangeall':

303

elif x[2][0] == 'rangeall':

301

return _fixops(('rangepost', post))

304

return _fixops(('rangepost', post))

302

elif op == 'or':

305

elif op == 'or':

303

# make number of arguments deterministic:

306

# make number of arguments deterministic:

304

# x + y + z -> (or x y z) -> (or (list x y z))

307

# x + y + z -> (or x y z) -> (or (list x y z))

305

return (op, _fixops(('list',) + x[1:]))

308

return (op, _fixops(('list',) + x[1:]))

306

elif op == 'subscript' and x[1][0] == 'relation':

309

elif op == 'subscript' and x[1][0] == 'relation':

307

# x#y[z] ternary

310

# x#y[z] ternary

308

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

311

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

309

312

310

return (op,) + tuple(_fixops(y) for y in x[1:])

313

return (op,) + tuple(_fixops(y) for y in x[1:])

311

314

312

def _analyze(x):

315

def _analyze(x):

313

if x is None:

316

if x is None:

314

return x

317

return x

315

318

316

op = x[0]

319

op = x[0]

317

if op == 'minus':

320

if op == 'minus':

318

return _analyze(_build('_ and not _', *x[1:]))

321

return _analyze(_build('_ and not _', *x[1:]))

319

elif op == 'only':

322

elif op == 'only':

320

return _analyze(_build('only(_, _)', *x[1:]))

323

return _analyze(_build('only(_, _)', *x[1:]))

321

elif op == 'onlypost':

324

elif op == 'onlypost':

322

return _analyze(_build('only(_)', x[1]))

325

return _analyze(_build('only(_)', x[1]))

323

elif op == 'dagrangeall':

326

elif op == 'dagrangeall':

324

raise error.ParseError(_("can't use '::' in this context"))

327

raise error.ParseError(_("can't use '::' in this context"))

325

elif op == 'dagrangepre':

328

elif op == 'dagrangepre':

326

return _analyze(_build('ancestors(_)', x[1]))

329

return _analyze(_build('ancestors(_)', x[1]))

327

elif op == 'dagrangepost':

330

elif op == 'dagrangepost':

328

return _analyze(_build('descendants(_)', x[1]))

331

return _analyze(_build('descendants(_)', x[1]))

329

elif op == 'negate':

332

elif op == 'negate':

330

s = getstring(x[1], _("can't negate that"))

333

s = getstring(x[1], _("can't negate that"))

331

return _analyze(('string', '-' + s))

334

return _analyze(('string', '-' + s))

332

elif op in ('string', 'symbol'):

335

elif op in ('string', 'symbol'):

333

return x

336

return x

334

elif op == 'rangeall':

337

elif op == 'rangeall':

335

return (op, None)

338

return (op, None)

336

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

339

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

337

return (op, _analyze(x[1]))

340

return (op, _analyze(x[1]))

338

elif op == 'group':

341

elif op == 'group':

339

return _analyze(x[1])

342

return _analyze(x[1])

340

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

343

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

341

'subscript'}:

344

'subscript'}:

342

ta = _analyze(x[1])

345

ta = _analyze(x[1])

343

tb = _analyze(x[2])

346

tb = _analyze(x[2])

344

return (op, ta, tb)

347

return (op, ta, tb)

345

elif op == 'relsubscript':

348

elif op == 'relsubscript':

346

ta = _analyze(x[1])

349

ta = _analyze(x[1])

347

tb = _analyze(x[2])

350

tb = _analyze(x[2])

348

tc = _analyze(x[3])

351

tc = _analyze(x[3])

349

return (op, ta, tb, tc)

352

return (op, ta, tb, tc)

350

elif op == 'list':

353

elif op == 'list':

351

return (op,) + tuple(_analyze(y) for y in x[1:])

354

return (op,) + tuple(_analyze(y) for y in x[1:])

352

elif op == 'keyvalue':

355

elif op == 'keyvalue':

353

return (op, x[1], _analyze(x[2]))

356

return (op, x[1], _analyze(x[2]))

354

elif op == 'func':

357

elif op == 'func':

355

f = getsymbol(x[1])

358

f = getsymbol(x[1])

356

if f == 'revset':

359

if f == 'revset':

357

return _analyze(x[2])

360

return _analyze(x[2])

358

return (op, x[1], _analyze(x[2]))

361

return (op, x[1], _analyze(x[2]))

359

raise ValueError('invalid operator %r' % op)

362

raise ValueError('invalid operator %r' % op)

360

363

361

def analyze(x):

364

def analyze(x):

362

"""Transform raw parsed tree to evaluatable tree which can be fed to

365

"""Transform raw parsed tree to evaluatable tree which can be fed to

363

optimize() or getset()

366

optimize() or getset()

364

367

365

All pseudo operations should be mapped to real operations or functions

368

All pseudo operations should be mapped to real operations or functions

366

defined in methods or symbols table respectively.

369

defined in methods or symbols table respectively.

367

"""

370

"""

368

return _analyze(x)

371

return _analyze(x)

369

372

370

def _optimize(x):

373

def _optimize(x):

371

if x is None:

374

if x is None:

372

return 0, x

375

return 0, x

373

376

374

op = x[0]

377

op = x[0]

375

if op in ('string', 'symbol'):

378

if op in ('string', 'symbol'):

376

return 0.5, x # single revisions are small

379

return 0.5, x # single revisions are small

377

elif op == 'and':

380

elif op == 'and':

378

wa, ta = _optimize(x[1])

381

wa, ta = _optimize(x[1])

379

wb, tb = _optimize(x[2])

382

wb, tb = _optimize(x[2])

380

w = min(wa, wb)

383

w = min(wa, wb)

381

384

382

# (draft/secret/_notpublic() & ::x) have a fast path

385

# (draft/secret/_notpublic() & ::x) have a fast path

383

m = _match('_() & ancestors(_)', ('and', ta, tb))

386

m = _match('_() & ancestors(_)', ('and', ta, tb))

384

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

387

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

385

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

388

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

386

389

387

# (::x and not ::y)/(not ::y and ::x) have a fast path

390

# (::x and not ::y)/(not ::y and ::x) have a fast path

388

m = _matchonly(ta, tb) or _matchonly(tb, ta)

391

m = _matchonly(ta, tb) or _matchonly(tb, ta)

389

if m:

392

if m:

390

return w, _build('only(_, _)', *m[1:])

393

return w, _build('only(_, _)', *m[1:])

391

394

392

m = _match('not _', tb)

395

m = _match('not _', tb)

393

if m:

396

if m:

394

return wa, ('difference', ta, m[1])

397

return wa, ('difference', ta, m[1])

395

if wa > wb:

398

if wa > wb:

396

op = 'andsmally'

399

op = 'andsmally'

397

return w, (op, ta, tb)

400

return w, (op, ta, tb)

398

elif op == 'or':

401

elif op == 'or':

399

# fast path for machine-generated expression, that is likely to have

402

# fast path for machine-generated expression, that is likely to have

400

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

403

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

401

ws, ts, ss = [], [], []

404

ws, ts, ss = [], [], []

402

def flushss():

405

def flushss():

403

if not ss:

406

if not ss:

404

return

407

return

405

if len(ss) == 1:

408

if len(ss) == 1:

406

w, t = ss[0]

409

w, t = ss[0]

407

else:

410

else:

408

s = '\0'.join(t[1] for w, t in ss)

411

s = '\0'.join(t[1] for w, t in ss)

409

y = _build('_list(_)', ('string', s))

412

y = _build('_list(_)', ('string', s))

410

w, t = _optimize(y)

413

w, t = _optimize(y)

411

ws.append(w)

414

ws.append(w)

412

ts.append(t)

415

ts.append(t)

413

del ss[:]

416

del ss[:]

414

for y in getlist(x[1]):

417

for y in getlist(x[1]):

415

w, t = _optimize(y)

418

w, t = _optimize(y)

416

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

419

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

417

ss.append((w, t))

420

ss.append((w, t))

418

continue

421

continue

419

flushss()

422

flushss()

420

ws.append(w)

423

ws.append(w)

421

ts.append(t)

424

ts.append(t)

422

flushss()

425

flushss()

423

if len(ts) == 1:

426

if len(ts) == 1:

424

return ws[0], ts[0] # 'or' operation is fully optimized out

427

return ws[0], ts[0] # 'or' operation is fully optimized out

425

return max(ws), (op, ('list',) + tuple(ts))

428

return max(ws), (op, ('list',) + tuple(ts))

426

elif op == 'not':

429

elif op == 'not':

427

# Optimize not public() to _notpublic() because we have a fast version

430

# Optimize not public() to _notpublic() because we have a fast version

428

if _match('public()', x[1]):

431

if _match('public()', x[1]):

429

o = _optimize(_build('_notpublic()'))

432

o = _optimize(_build('_notpublic()'))

430

return o[0], o[1]

433

return o[0], o[1]

431

else:

434

else:

432

o = _optimize(x[1])

435

o = _optimize(x[1])

433

return o[0], (op, o[1])

436

return o[0], (op, o[1])

434

elif op == 'rangeall':

437

elif op == 'rangeall':

435

return 1, x

438

return 1, x

436

elif op in ('rangepre', 'rangepost', 'parentpost'):

439

elif op in ('rangepre', 'rangepost', 'parentpost'):

437

o = _optimize(x[1])

440

o = _optimize(x[1])

438

return o[0], (op, o[1])

441

return o[0], (op, o[1])

439

elif op in ('dagrange', 'range'):

442

elif op in ('dagrange', 'range'):

440

wa, ta = _optimize(x[1])

443

wa, ta = _optimize(x[1])

441

wb, tb = _optimize(x[2])

444

wb, tb = _optimize(x[2])

442

return wa + wb, (op, ta, tb)

445

return wa + wb, (op, ta, tb)

443

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

446

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

444

w, t = _optimize(x[1])

447

w, t = _optimize(x[1])

445

return w, (op, t, x[2])

448

return w, (op, t, x[2])

446

elif op == 'relsubscript':

449

elif op == 'relsubscript':

447

w, t = _optimize(x[1])

450

w, t = _optimize(x[1])

448

return w, (op, t, x[2], x[3])

451

return w, (op, t, x[2], x[3])

449

elif op == 'list':

452

elif op == 'list':

450

ws, ts = zip(*(_optimize(y) for y in x[1:]))

453

ws, ts = zip(*(_optimize(y) for y in x[1:]))

451

return sum(ws), (op,) + ts

454

return sum(ws), (op,) + ts

452

elif op == 'keyvalue':

455

elif op == 'keyvalue':

453

w, t = _optimize(x[2])

456

w, t = _optimize(x[2])

454

return w, (op, x[1], t)

457

return w, (op, x[1], t)

455

elif op == 'func':

458

elif op == 'func':

456

f = getsymbol(x[1])

459

f = getsymbol(x[1])

457

wa, ta = _optimize(x[2])

460

wa, ta = _optimize(x[2])

458

w = getattr(symbols.get(f), '_weight', 1)

461

w = getattr(symbols.get(f), '_weight', 1)

459

return w + wa, (op, x[1], ta)

462

return w + wa, (op, x[1], ta)

460

raise ValueError('invalid operator %r' % op)

463

raise ValueError('invalid operator %r' % op)

461

464

462

def optimize(tree):

465

def optimize(tree):

463

"""Optimize evaluatable tree

466

"""Optimize evaluatable tree

464

467

465

All pseudo operations should be transformed beforehand.

468

All pseudo operations should be transformed beforehand.

466

"""

469

"""

467

_weight, newtree = _optimize(tree)

470

_weight, newtree = _optimize(tree)

468

return newtree

471

return newtree

469

472

470

# the set of valid characters for the initial letter of symbols in

473

# the set of valid characters for the initial letter of symbols in

471

# alias declarations and definitions

474

# alias declarations and definitions

472

_aliassyminitletters = _syminitletters | {'$'}

475

_aliassyminitletters = _syminitletters | {'$'}

473

476

474

def _parsewith(spec, lookup=None, syminitletters=None):

477

def _parsewith(spec, lookup=None, syminitletters=None):

475

"""Generate a parse tree of given spec with given tokenizing options

478

"""Generate a parse tree of given spec with given tokenizing options

476

479

477

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

480

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

478

('func', ('symbol', 'foo'), ('symbol', '$1'))

481

('func', ('symbol', 'foo'), ('symbol', '$1'))

479

>>> _parsewith(b'$1')

482

>>> _parsewith(b'$1')

480

Traceback (most recent call last):

483

Traceback (most recent call last):

481

...

484

...

482

ParseError: ("syntax error in revset '$1'", 0)

485

ParseError: ("syntax error in revset '$1'", 0)

483

>>> _parsewith(b'foo bar')

486

>>> _parsewith(b'foo bar')

484

Traceback (most recent call last):

487

Traceback (most recent call last):

485

...

488

...

486

ParseError: ('invalid token', 4)

489

ParseError: ('invalid token', 4)

487

"""

490

"""

488

if lookup and spec.startswith('revset(') and spec.endswith(')'):

491

if lookup and spec.startswith('revset(') and spec.endswith(')'):

489

lookup = None

492

lookup = None

490

p = parser.parser(elements)

493

p = parser.parser(elements)

491

tree, pos = p.parse(tokenize(spec, lookup=lookup,

494

tree, pos = p.parse(tokenize(spec, lookup=lookup,

492

syminitletters=syminitletters))

495

syminitletters=syminitletters))

493

if pos != len(spec):

496

if pos != len(spec):

494

raise error.ParseError(_('invalid token'), pos)

497

raise error.ParseError(_('invalid token'), pos)

495

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

498

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

496

499

497

class _aliasrules(parser.basealiasrules):

500

class _aliasrules(parser.basealiasrules):

498

"""Parsing and expansion rule set of revset aliases"""

501

"""Parsing and expansion rule set of revset aliases"""

499

_section = _('revset alias')

502

_section = _('revset alias')

500

503

501

@staticmethod

504

@staticmethod

502

def _parse(spec):

505

def _parse(spec):

503

"""Parse alias declaration/definition ``spec``

506

"""Parse alias declaration/definition ``spec``

504

507

505

This allows symbol names to use also ``$`` as an initial letter

508

This allows symbol names to use also ``$`` as an initial letter

506

(for backward compatibility), and callers of this function should

509

(for backward compatibility), and callers of this function should

507

examine whether ``$`` is used also for unexpected symbols or not.

510

examine whether ``$`` is used also for unexpected symbols or not.

508

"""

511

"""

509

return _parsewith(spec, syminitletters=_aliassyminitletters)

512

return _parsewith(spec, syminitletters=_aliassyminitletters)

510

513

511

@staticmethod

514

@staticmethod

512

def _trygetfunc(tree):

515

def _trygetfunc(tree):

513

if tree[0] == 'func' and tree[1][0] == 'symbol':

516

if tree[0] == 'func' and tree[1][0] == 'symbol':

514

return tree[1][1], getlist(tree[2])

517

return tree[1][1], getlist(tree[2])

515

518

516

def expandaliases(tree, aliases, warn=None):

519

def expandaliases(tree, aliases, warn=None):

517

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

520

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

518

aliases = _aliasrules.buildmap(aliases)

521

aliases = _aliasrules.buildmap(aliases)

519

tree = _aliasrules.expand(aliases, tree)

522

tree = _aliasrules.expand(aliases, tree)

520

# warn about problematic (but not referred) aliases

523

# warn about problematic (but not referred) aliases

521

if warn is not None:

524

if warn is not None:

522

for name, alias in sorted(aliases.iteritems()):

525

for name, alias in sorted(aliases.iteritems()):

523

if alias.error and not alias.warned:

526

if alias.error and not alias.warned:

524

warn(_('warning: %s\n') % (alias.error))

527

warn(_('warning: %s\n') % (alias.error))

525

alias.warned = True

528

alias.warned = True

526

return tree

529

return tree

527

530

528

def foldconcat(tree):

531

def foldconcat(tree):

529

"""Fold elements to be concatenated by `##`

532

"""Fold elements to be concatenated by `##`

530

"""

533

"""

531

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

534

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

532

return tree

535

return tree

533

if tree[0] == '_concat':

536

if tree[0] == '_concat':

534

pending = [tree]

537

pending = [tree]

535

l = []

538

l = []

536

while pending:

539

while pending:

537

e = pending.pop()

540

e = pending.pop()

538

if e[0] == '_concat':

541

if e[0] == '_concat':

539

pending.extend(reversed(e[1:]))

542

pending.extend(reversed(e[1:]))

540

elif e[0] in ('string', 'symbol'):

543

elif e[0] in ('string', 'symbol'):

541

l.append(e[1])

544

l.append(e[1])

542

else:

545

else:

543

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

546

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

544

raise error.ParseError(msg)

547

raise error.ParseError(msg)

545

return ('string', ''.join(l))

548

return ('string', ''.join(l))

546

else:

549

else:

547

return tuple(foldconcat(t) for t in tree)

550

return tuple(foldconcat(t) for t in tree)

548

551

549

def parse(spec, lookup=None):

552

def parse(spec, lookup=None):

550

try:

553

try:

551

return _parsewith(spec, lookup=lookup)

554

return _parsewith(spec, lookup=lookup)

552

except error.ParseError as inst:

555

except error.ParseError as inst:

553

if len(inst.args) > 1: # has location

556

if len(inst.args) > 1: # has location

554

loc = inst.args[1]

557

loc = inst.args[1]

555

# Remove newlines -- spaces are equivalent whitespace.

558

# Remove newlines -- spaces are equivalent whitespace.

556

spec = spec.replace('\n', ' ')

559

spec = spec.replace('\n', ' ')

557

# We want the caret to point to the place in the template that

560

# We want the caret to point to the place in the template that

558

# failed to parse, but in a hint we get a open paren at the

561

# failed to parse, but in a hint we get a open paren at the

559

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

562

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

560

# to line up the caret with the location of the error.

563

# to line up the caret with the location of the error.

561

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

564

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

562

raise

565

raise

563

566

564

def _quote(s):

567

def _quote(s):

565

r"""Quote a value in order to make it safe for the revset engine.

568

r"""Quote a value in order to make it safe for the revset engine.

566

569

567

>>> _quote(b'asdf')

570

>>> _quote(b'asdf')

568

"'asdf'"

571

"'asdf'"

569

>>> _quote(b"asdf'\"")

572

>>> _quote(b"asdf'\"")

570

'\'asdf\\\'"\''

573

'\'asdf\\\'"\''

571

>>> _quote(b'asdf\'')

574

>>> _quote(b'asdf\'')

572

"'asdf\\''"

575

"'asdf\\''"

573

>>> _quote(1)

576

>>> _quote(1)

574

"'1'"

577

"'1'"

575

"""

578

"""

576

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

579

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

577

580

578

def _formatargtype(c, arg):

581

def _formatargtype(c, arg):

579

if c == 'd':

582

if c == 'd':

580

return '%d' % int(arg)

583

return '%d' % int(arg)

581

elif c == 's':

584

elif c == 's':

582

return _quote(arg)

585

return _quote(arg)

583

elif c == 'r':

586

elif c == 'r':

587

if not isinstance(arg, bytes):

588

raise TypeError

584

parse(arg) # make sure syntax errors are confined

589

parse(arg) # make sure syntax errors are confined

585

return '(%s)' % arg

590

return '(%s)' % arg

586

elif c == 'n':

591

elif c == 'n':

587

return _quote(node.hex(arg))

592

return _quote(node.hex(arg))

588

elif c == 'b':

593

elif c == 'b':

589

try:

594

try:

590

return _quote(arg.branch())

595

return _quote(arg.branch())

591

except AttributeError:

596

except AttributeError:

592

raise TypeError

597

raise TypeError

593

raise error.ParseError(_('unexpected revspec format character %s') % c)

598

raise error.ParseError(_('unexpected revspec format character %s') % c)

594

599

595

def _formatlistexp(s, t):

600

def _formatlistexp(s, t):

596

l = len(s)

601

l = len(s)

597

if l == 0:

602

if l == 0:

598

return "_list('')"

603

return "_list('')"

599

elif l == 1:

604

elif l == 1:

600

return _formatargtype(t, s[0])

605

return _formatargtype(t, s[0])

601

elif t == 'd':

606

elif t == 'd':

602

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

607

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

603

elif t == 's':

608

elif t == 's':

604

return "_list(%s)" % _quote("\0".join(s))

609

return "_list(%s)" % _quote("\0".join(s))

605

elif t == 'n':

610

elif t == 'n':

606

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

611

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

607

elif t == 'b':

612

elif t == 'b':

608

try:

613

try:

609

return "_list('%s')" % "\0".join(a.branch() for a in s)

614

return "_list('%s')" % "\0".join(a.branch() for a in s)

610

except AttributeError:

615

except AttributeError:

611

raise TypeError

616

raise TypeError

612

617

613

m = l // 2

618

m = l // 2

614

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

619

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

615

620

616

def _formatparamexp(args, t):

621

def _formatparamexp(args, t):

617

return ', '.join(_formatargtype(t, a) for a in args)

622

return ', '.join(_formatargtype(t, a) for a in args)

618

623

619

_formatlistfuncs = {

624

_formatlistfuncs = {

620

'l': _formatlistexp,

625

'l': _formatlistexp,

621

'p': _formatparamexp,

626

'p': _formatparamexp,

622

}

627

}

623

628

624

def formatspec(expr, *args):

629

def formatspec(expr, *args):

625

'''

630

'''

626

This is a convenience function for using revsets internally, and

631

This is a convenience function for using revsets internally, and

627

escapes arguments appropriately. Aliases are intentionally ignored

632

escapes arguments appropriately. Aliases are intentionally ignored

628

so that intended expression behavior isn't accidentally subverted.

633

so that intended expression behavior isn't accidentally subverted.

629

634

630

Supported arguments:

635

Supported arguments:

631

636

632

%r = revset expression, parenthesized

637

%r = revset expression, parenthesized

633

%d = int(arg), no quoting

638

%d = int(arg), no quoting

634

%s = string(arg), escaped and single-quoted

639

%s = string(arg), escaped and single-quoted

635

%b = arg.branch(), escaped and single-quoted

640

%b = arg.branch(), escaped and single-quoted

636

%n = hex(arg), single-quoted

641

%n = hex(arg), single-quoted

637

%% = a literal '%'

642

%% = a literal '%'

638

643

639

Prefixing the type with 'l' specifies a parenthesized list of that type,

644

Prefixing the type with 'l' specifies a parenthesized list of that type,

640

and 'p' specifies a list of function parameters of that type.

645

and 'p' specifies a list of function parameters of that type.

641

646

642

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

647

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

643

'(10 or 11):: and ((this()) or (that()))'

648

'(10 or 11):: and ((this()) or (that()))'

644

>>> formatspec(b'%d:: and not %d::', 10, 20)

649

>>> formatspec(b'%d:: and not %d::', 10, 20)

645

'10:: and not 20::'

650

'10:: and not 20::'

646

>>> formatspec(b'%ld or %ld', [], [1])

651

>>> formatspec(b'%ld or %ld', [], [1])

647

"_list('') or 1"

652

"_list('') or 1"

648

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

653

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

649

"keyword('foo\\\\xe9')"

654

"keyword('foo\\\\xe9')"

650

>>> b = lambda: b'default'

655

>>> b = lambda: b'default'

651

>>> b.branch = b

656

>>> b.branch = b

652

>>> formatspec(b'branch(%b)', b)

657

>>> formatspec(b'branch(%b)', b)

653

"branch('default')"

658

"branch('default')"

654

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

659

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

655

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

660

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

656

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

661

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

657

"sort((:), 'desc', 'user')"

662

"sort((:), 'desc', 'user')"

658

>>> formatspec(b'%ls', [b'a', b"'"])

663

>>> formatspec(b'%ls', [b'a', b"'"])

659

"_list('a\\\\x00\\\\'')"

664

"_list('a\\\\x00\\\\'')"

660

'''

665

'''

661

expr = pycompat.bytestr(expr)

666

expr = pycompat.bytestr(expr)

662

argiter = iter(args)

667

argiter = iter(args)

663

ret = []

668

ret = []

664

pos = 0

669

pos = 0

665

while pos < len(expr):

670

while pos < len(expr):

666

q = expr.find('%', pos)

671

q = expr.find('%', pos)

667

if q < 0:

672

if q < 0:

668

ret.append(expr[pos:])

673

ret.append(expr[pos:])

669

break

674

break

670

ret.append(expr[pos:q])

675

ret.append(expr[pos:q])

671

pos = q + 1

676

pos = q + 1

672

try:

677

try:

673

d = expr[pos]

678

d = expr[pos]

674

except IndexError:

679

except IndexError:

675

raise error.ParseError(_('incomplete revspec format character'))

680

raise error.ParseError(_('incomplete revspec format character'))

676

if d == '%':

681

if d == '%':

677

ret.append(d)

682

ret.append(d)

678

pos += 1

683

pos += 1

679

continue

684

continue

680

685

681

try:

686

try:

682

arg = next(argiter)

687

arg = next(argiter)

683

except StopIteration:

688

except StopIteration:

684

raise error.ParseError(_('missing argument for revspec'))

689

raise error.ParseError(_('missing argument for revspec'))

685

f = _formatlistfuncs.get(d)

690

f = _formatlistfuncs.get(d)

686

if f:

691

if f:

687

# a list of some type

692

# a list of some type

688

pos += 1

693

pos += 1

689

try:

694

try:

690

d = expr[pos]

695

d = expr[pos]

691

except IndexError:

696

except IndexError:

692

raise error.ParseError(_('incomplete revspec format character'))

697

raise error.ParseError(_('incomplete revspec format character'))

693

try:

698

try:

694

ret.append(f(list(arg), d))

699

ret.append(f(list(arg), d))

695

except (TypeError, ValueError):

700

except (TypeError, ValueError):

696

raise error.ParseError(_('invalid argument for revspec'))

701

raise error.ParseError(_('invalid argument for revspec'))

697

else:

702

else:

698

try:

703

try:

699

ret.append(_formatargtype(d, arg))

704

ret.append(_formatargtype(d, arg))

700

except (TypeError, ValueError):

705

except (TypeError, ValueError):

701

raise error.ParseError(_('invalid argument for revspec'))

706

raise error.ParseError(_('invalid argument for revspec'))

702

pos += 1

707

pos += 1

703

708

704

try:

709

try:

705

next(argiter)

710

next(argiter)

706

raise error.ParseError(_('too many revspec arguments specified'))

711

raise error.ParseError(_('too many revspec arguments specified'))

707

except StopIteration:

712

except StopIteration:

708

pass

713

pass

709

return ''.join(ret)

714

return ''.join(ret)

710

715

711

def prettyformat(tree):

716

def prettyformat(tree):

712

return parser.prettyformat(tree, ('string', 'symbol'))

717

return parser.prettyformat(tree, ('string', 'symbol'))

713

718

714

def depth(tree):

719

def depth(tree):

715

if isinstance(tree, tuple):

720

if isinstance(tree, tuple):

716

return max(map(depth, tree)) + 1

721

return max(map(depth, tree)) + 1

717

else:

722

else:

718

return 0

723

return 0

719

724

720

def funcsused(tree):

725

def funcsused(tree):

721

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

726

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

722

return set()

727

return set()

723

else:

728

else:

724

funcs = set()

729

funcs = set()

725

for s in tree[1:]:

730

for s in tree[1:]:

726

funcs |= funcsused(s)

731

funcs |= funcsused(s)

727

if tree[0] == 'func':

732

if tree[0] == 'func':

728

funcs.add(tree[1][1])

733

funcs.add(tree[1][1])

729

return funcs

734

return funcs

730

735

731

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

736

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

732

737

733

def _ishashlikesymbol(symbol):

738

def _ishashlikesymbol(symbol):

734

"""returns true if the symbol looks like a hash"""

739

"""returns true if the symbol looks like a hash"""

735

return _hashre.match(symbol)

740

return _hashre.match(symbol)

736

741

737

def gethashlikesymbols(tree):

742

def gethashlikesymbols(tree):

738

"""returns the list of symbols of the tree that look like hashes

743

"""returns the list of symbols of the tree that look like hashes

739

744

740

>>> gethashlikesymbols(parse(b'3::abe3ff'))

745

>>> gethashlikesymbols(parse(b'3::abe3ff'))

741

['3', 'abe3ff']

746

['3', 'abe3ff']

742

>>> gethashlikesymbols(parse(b'precursors(.)'))

747

>>> gethashlikesymbols(parse(b'precursors(.)'))

743

[]

748

[]

744

>>> gethashlikesymbols(parse(b'precursors(34)'))

749

>>> gethashlikesymbols(parse(b'precursors(34)'))

745

['34']

750

['34']

746

>>> gethashlikesymbols(parse(b'abe3ffZ'))

751

>>> gethashlikesymbols(parse(b'abe3ffZ'))

747

[]

752

[]

748

"""

753

"""

749

if not tree:

754

if not tree:

750

return []

755

return []

751

756

752

if tree[0] == "symbol":

757

if tree[0] == "symbol":

753

if _ishashlikesymbol(tree[1]):

758

if _ishashlikesymbol(tree[1]):

754

return [tree[1]]

759

return [tree[1]]

755

elif len(tree) >= 3:

760

elif len(tree) >= 3:

756

results = []

761

results = []

757

for subtree in tree[1:]:

762

for subtree in tree[1:]:

758

results += gethashlikesymbols(subtree)

763

results += gethashlikesymbols(subtree)

759

return results

764

return results

760

return []

765

return []

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             symbols = {}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize(b"@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
+                if not isinstance(program, bytes):
+                    raise error.ProgrammingError('revset statement must be bytes, got %r'
+                                                 % program)
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, pos)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = stringutil.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
             def _match(patspec, tree):
                 """Test if a tree matches the given pattern statement; return the matches
                 >>> _match(b'f(_)', parse(b'f()'))
                 >>> _match(b'f(_)', parse(b'f(1)'))
                 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
                 >>> _match(b'f(_)', parse(b'f(1, 2)'))
                 """
                 pattern = _cachedtree(patspec)
                 return parser.matchtree(pattern, tree, ('symbol', '_'),
                                         {'keyvalue', 'list'})
             def _matchonly(revs, bases):
                 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'dagrangeall':
                         return _fixops(('dagrangepost', post))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangeall':
                     raise error.ParseError(_("can't use '::' in this context"))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     f = getsymbol(x[1])
                     if f == 'revset':
                         return _analyze(x[2])
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x):
                 if x is None:
                     return 0, x
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return 0.5, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     w = min(wa, wb)
                     # (draft/secret/_notpublic() & ::x) have a fast path
                     m = _match('_() & ancestors(_)', ('and', ta, tb))
                     if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
                         return w, _build('_phaseandancestors(_, _)', m[1], m[2])
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
                     m = _match('not _', tb)
                     if m:
                         return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if _match('public()', x[1]):
                         o = _optimize(_build('_notpublic()'))
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1])
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return 1, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1])
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2])
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2])
                     w = getattr(symbols.get(f), '_weight', 1)
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | {'$'}
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith(b'$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith(b'foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 if lookup and spec.startswith('revset(') and spec.endswith(')'):
                     lookup = None
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 try:
                     return _parsewith(spec, lookup=lookup)
                 except error.ParseError as inst:
                     if len(inst.args) > 1:  # has location
                         loc = inst.args[1]
                         # Remove newlines -- spaces are equivalent whitespace.
                         spec = spec.replace('\n', ' ')
                         # We want the caret to point to the place in the template that
                         # failed to parse, but in a hint we get a open paren at the
                         # start. Therefore, we print "loc + 1" spaces (instead of "loc")
                         # to line up the caret with the location of the error.
                         inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
                     raise
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote(b'asdf')
                 "'asdf'"
                 >>> _quote(b"asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote(b'asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
             def _formatargtype(c, arg):
                 if c == 'd':
                     return '%d' % int(arg)
                 elif c == 's':
                     return _quote(arg)
                 elif c == 'r':
+                    if not isinstance(arg, bytes):
+                        raise TypeError
                     parse(arg) # make sure syntax errors are confined
                     return '(%s)' % arg
                 elif c == 'n':
                     return _quote(node.hex(arg))
                 elif c == 'b':
                     try:
                         return _quote(arg.branch())
                     except AttributeError:
                         raise TypeError
                 raise error.ParseError(_('unexpected revspec format character %s') % c)
             def _formatlistexp(s, t):
                 l = len(s)
                 if l == 0:
                     return "_list('')"
                 elif l == 1:
                     return _formatargtype(t, s[0])
                 elif t == 'd':
                     return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
                 elif t == 's':
                     return "_list(%s)" % _quote("\0".join(s))
                 elif t == 'n':
                     return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                 elif t == 'b':
                     try:
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     except AttributeError:
                         raise TypeError
                 m = l // 2
                 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
             def _formatparamexp(args, t):
                 return ', '.join(_formatargtype(t, a) for a in args)
             _formatlistfuncs = {
                 'l': _formatlistexp,
                 'p': _formatparamexp,
             }
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = int(arg), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type,
                 and 'p' specifies a list of function parameters of that type.
                 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec(b'%d:: and not %d::', 10, 20)
                 '10:: and not 20::'
                 >>> formatspec(b'%ld or %ld', [], [1])
                 "_list('') or 1"
                 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: b'default'
                 >>> b.branch = b
                 >>> formatspec(b'branch(%b)', b)
                 "branch('default')"
                 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
                 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
                 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
                 "sort((:), 'desc', 'user')"
                 >>> formatspec(b'%ls', [b'a', b"'"])
                 "_list('a\\\\x00\\\\'')"
                 '''
                 expr = pycompat.bytestr(expr)
                 argiter = iter(args)
                 ret = []
                 pos = 0
                 while pos < len(expr):
                     q = expr.find('%', pos)
                     if q < 0:
                         ret.append(expr[pos:])
                         break
                     ret.append(expr[pos:q])
                     pos = q + 1
                     try:
                         d = expr[pos]
                     except IndexError:
                         raise error.ParseError(_('incomplete revspec format character'))
                     if d == '%':
                         ret.append(d)
                         pos += 1
                         continue
                     try:
                         arg = next(argiter)
                     except StopIteration:
                         raise error.ParseError(_('missing argument for revspec'))
                     f = _formatlistfuncs.get(d)
                     if f:
                         # a list of some type
                         pos += 1
                         try:
                             d = expr[pos]
                         except IndexError:
                             raise error.ParseError(_('incomplete revspec format character'))
                         try:
                             ret.append(f(list(arg), d))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     else:
                         try:
                             ret.append(_formatargtype(d, arg))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     pos += 1
                 try:
                     next(argiter)
                     raise error.ParseError(_('too many revspec arguments specified'))
                 except StopIteration:
                     pass
                 return ''.join(ret)
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs
             _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
             def _ishashlikesymbol(symbol):
                 """returns true if the symbol looks like a hash"""
                 return _hashre.match(symbol)
             def gethashlikesymbols(tree):
                 """returns the list of symbols of the tree that look like hashes
                 >>> gethashlikesymbols(parse(b'3::abe3ff'))
                 ['3', 'abe3ff']
                 >>> gethashlikesymbols(parse(b'precursors(.)'))
                 []
                 >>> gethashlikesymbols(parse(b'precursors(34)'))
                 ['34']
                 >>> gethashlikesymbols(parse(b'abe3ffZ'))
                 []
                 """
                 if not tree:
                     return []
                 if tree[0] == "symbol":
                     if _ishashlikesymbol(tree[1]):
                         return [tree[1]]
                 elif len(tree) >= 3:
                     results = []
                     for subtree in tree[1:]:
                         results += gethashlikesymbols(subtree)
                     return results
                 return []