upstream/mercurial-mirror Commit - r34046:b862e6fc

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

21

elements = {

21

elements = {

22

# token-type: binding-strength, primary, prefix, infix, suffix

22

# token-type: binding-strength, primary, prefix, infix, suffix

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

25

"#": (21, None, None, ("relation", 21), None),

25

"#": (21, None, None, ("relation", 21), None),

26

"##": (20, None, None, ("_concat", 20), None),

26

"##": (20, None, None, ("_concat", 20), None),

27

"~": (18, None, None, ("ancestor", 18), None),

27

"~": (18, None, None, ("ancestor", 18), None),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

30

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

30

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

31

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

31

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

32

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

32

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

33

"not": (10, None, ("not", 10), None, None),

33

"not": (10, None, ("not", 10), None, None),

34

"!": (10, None, ("not", 10), None, None),

34

"!": (10, None, ("not", 10), None, None),

35

"and": (5, None, None, ("and", 5), None),

35

"and": (5, None, None, ("and", 5), None),

36

"&": (5, None, None, ("and", 5), None),

36

"&": (5, None, None, ("and", 5), None),

37

"%": (5, None, None, ("only", 5), "onlypost"),

37

"%": (5, None, None, ("only", 5), "onlypost"),

38

"or": (4, None, None, ("or", 4), None),

38

"or": (4, None, None, ("or", 4), None),

39

"|": (4, None, None, ("or", 4), None),

39

"|": (4, None, None, ("or", 4), None),

40

"+": (4, None, None, ("or", 4), None),

40

"+": (4, None, None, ("or", 4), None),

41

"=": (3, None, None, ("keyvalue", 3), None),

41

"=": (3, None, None, ("keyvalue", 3), None),

42

",": (2, None, None, ("list", 2), None),

42

",": (2, None, None, ("list", 2), None),

43

")": (0, None, None, None, None),

43

")": (0, None, None, None, None),

44

"]": (0, None, None, None, None),

44

"]": (0, None, None, None, None),

45

"symbol": (0, "symbol", None, None, None),

45

"symbol": (0, "symbol", None, None, None),

46

"string": (0, "string", None, None, None),

46

"string": (0, "string", None, None, None),

47

"end": (0, None, None, None, None),

47

"end": (0, None, None, None, None),

48

}

48

}

49

50

keywords = {'and', 'or', 'not'}

50

keywords = {'and', 'or', 'not'}

51

52

_quoteletters = {'"', "'"}

52

_quoteletters = {'"', "'"}

53

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

53

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

54

55

# default set of valid characters for the initial letter of symbols

55

# default set of valid characters for the initial letter of symbols

56

_syminitletters = set(pycompat.iterbytestr(

56

_syminitletters = set(pycompat.iterbytestr(

57

string.ascii_letters.encode('ascii') +

57

string.ascii_letters.encode('ascii') +

58

string.digits.encode('ascii') +

58

string.digits.encode('ascii') +

59

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

59

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

60

61

# default set of valid characters for non-initial letters of symbols

61

# default set of valid characters for non-initial letters of symbols

62

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

62

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

63

64

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

64

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

65

'''

65

'''

66

Parse a revset statement into a stream of tokens

66

Parse a revset statement into a stream of tokens

67

68

``syminitletters`` is the set of valid characters for the initial

68

``syminitletters`` is the set of valid characters for the initial

69

letter of symbols.

69

letter of symbols.

70

71

By default, character ``c`` is recognized as valid for initial

71

By default, character ``c`` is recognized as valid for initial

72

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

72

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

73

74

``symletters`` is the set of valid characters for non-initial

74

``symletters`` is the set of valid characters for non-initial

75

letters of symbols.

75

letters of symbols.

76

77

By default, character ``c`` is recognized as valid for non-initial

77

By default, character ``c`` is recognized as valid for non-initial

78

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

78

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

79

80

Check that @ is a valid unquoted token character (issue3686):

80

Check that @ is a valid unquoted token character (issue3686):

81

>>> list(tokenize("@::"))

81

>>> list(tokenize("@::"))

82

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

82

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

83

84

'''

84

'''

85

program = pycompat.bytestr(program)

85

program = pycompat.bytestr(program)

86

if syminitletters is None:

86

if syminitletters is None:

87

syminitletters = _syminitletters

87

syminitletters = _syminitletters

88

if symletters is None:

88

if symletters is None:

89

symletters = _symletters

89

symletters = _symletters

90

91

if program and lookup:

91

if program and lookup:

92

# attempt to parse old-style ranges first to deal with

92

# attempt to parse old-style ranges first to deal with

93

# things like old-tag which contain query metacharacters

93

# things like old-tag which contain query metacharacters

94

parts = program.split(':', 1)

94

parts = program.split(':', 1)

95

if all(lookup(sym) for sym in parts if sym):

95

if all(lookup(sym) for sym in parts if sym):

96

if parts[0]:

96

if parts[0]:

97

yield ('symbol', parts[0], 0)

97

yield ('symbol', parts[0], 0)

98

if len(parts) > 1:

98

if len(parts) > 1:

99

s = len(parts[0])

99

s = len(parts[0])

100

yield (':', None, s)

100

yield (':', None, s)

101

if parts[1]:

101

if parts[1]:

102

yield ('symbol', parts[1], s + 1)

102

yield ('symbol', parts[1], s + 1)

103

yield ('end', None, len(program))

103

yield ('end', None, len(program))

104

return

104

return

105

106

pos, l = 0, len(program)

106

pos, l = 0, len(program)

107

while pos < l:

107

while pos < l:

108

c = program[pos]

108

c = program[pos]

109

if c.isspace(): # skip inter-token whitespace

109

if c.isspace(): # skip inter-token whitespace

110

pass

110

pass

111

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

111

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

112

yield ('::', None, pos)

112

yield ('::', None, pos)

113

pos += 1 # skip ahead

113

pos += 1 # skip ahead

114

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

114

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

115

yield ('..', None, pos)

115

yield ('..', None, pos)

116

pos += 1 # skip ahead

116

pos += 1 # skip ahead

117

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

117

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

118

yield ('##', None, pos)

118

yield ('##', None, pos)

119

pos += 1 # skip ahead

119

pos += 1 # skip ahead

120

elif c in _simpleopletters: # handle simple operators

120

elif c in _simpleopletters: # handle simple operators

121

yield (c, None, pos)

121

yield (c, None, pos)

122

elif (c in _quoteletters or c == 'r' and

122

elif (c in _quoteletters or c == 'r' and

123

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

123

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

124

if c == 'r':

124

if c == 'r':

125

pos += 1

125

pos += 1

126

c = program[pos]

126

c = program[pos]

127

decode = lambda x: x

127

decode = lambda x: x

128

else:

128

else:

129

decode = parser.unescapestr

129

decode = parser.unescapestr

130

pos += 1

130

pos += 1

131

s = pos

131

s = pos

132

while pos < l: # find closing quote

132

while pos < l: # find closing quote

133

d = program[pos]

133

d = program[pos]

134

if d == '\\': # skip over escaped characters

134

if d == '\\': # skip over escaped characters

135

pos += 2

135

pos += 2

136

continue

136

continue

137

if d == c:

137

if d == c:

138

yield ('string', decode(program[s:pos]), s)

138

yield ('string', decode(program[s:pos]), s)

139

break

139

break

140

pos += 1

140

pos += 1

141

else:

141

else:

142

raise error.ParseError(_("unterminated string"), s)

142

raise error.ParseError(_("unterminated string"), s)

143

# gather up a symbol/keyword

143

# gather up a symbol/keyword

144

elif c in syminitletters:

144

elif c in syminitletters:

145

s = pos

145

s = pos

146

pos += 1

146

pos += 1

147

while pos < l: # find end of symbol

147

while pos < l: # find end of symbol

148

d = program[pos]

148

d = program[pos]

149

if d not in symletters:

149

if d not in symletters:

150

break

150

break

151

if d == '.' and program[pos - 1] == '.': # special case for ..

151

if d == '.' and program[pos - 1] == '.': # special case for ..

152

pos -= 1

152

pos -= 1

153

break

153

break

154

pos += 1

154

pos += 1

155

sym = program[s:pos]

155

sym = program[s:pos]

156

if sym in keywords: # operator keywords

156

if sym in keywords: # operator keywords

157

yield (sym, None, s)

157

yield (sym, None, s)

158

elif '-' in sym:

158

elif '-' in sym:

159

# some jerk gave us foo-bar-baz, try to check if it's a symbol

159

# some jerk gave us foo-bar-baz, try to check if it's a symbol

160

if lookup and lookup(sym):

160

if lookup and lookup(sym):

161

# looks like a real symbol

161

# looks like a real symbol

162

yield ('symbol', sym, s)

162

yield ('symbol', sym, s)

163

else:

163

else:

164

# looks like an expression

164

# looks like an expression

165

parts = sym.split('-')

165

parts = sym.split('-')

166

for p in parts[:-1]:

166

for p in parts[:-1]:

167

if p: # possible consecutive -

167

if p: # possible consecutive -

168

yield ('symbol', p, s)

168

yield ('symbol', p, s)

169

s += len(p)

169

s += len(p)

170

yield ('-', None, pos)

170

yield ('-', None, pos)

171

s += 1

171

s += 1

172

if parts[-1]: # possible trailing -

172

if parts[-1]: # possible trailing -

173

yield ('symbol', parts[-1], s)

173

yield ('symbol', parts[-1], s)

174

else:

174

else:

175

yield ('symbol', sym, s)

175

yield ('symbol', sym, s)

176

pos -= 1

176

pos -= 1

177

else:

177

else:

178

raise error.ParseError(_("syntax error in revset '%s'") %

178

raise error.ParseError(_("syntax error in revset '%s'") %

179

program, pos)

179

program, pos)

180

pos += 1

180

pos += 1

181

yield ('end', None, pos)

181

yield ('end', None, pos)

182

183

# helpers

183

# helpers

184

185

_notset = object()

185

_notset = object()

186

187

def getsymbol(x):

187

def getsymbol(x):

188

if x and x[0] == 'symbol':

188

if x and x[0] == 'symbol':

189

return x[1]

189

return x[1]

190

raise error.ParseError(_('not a symbol'))

190

raise error.ParseError(_('not a symbol'))

191

192

def getstring(x, err):

192

def getstring(x, err):

193

if x and (x[0] == 'string' or x[0] == 'symbol'):

193

if x and (x[0] == 'string' or x[0] == 'symbol'):

194

return x[1]

194

return x[1]

195

raise error.ParseError(err)

195

raise error.ParseError(err)

196

197

def getinteger(x, err, default=_notset):

197

def getinteger(x, err, default=_notset):

198

if not x and default is not _notset:

198

if not x and default is not _notset:

199

return default

199

return default

200

try:

200

try:

201

return int(getstring(x, err))

201

return int(getstring(x, err))

202

except ValueError:

202

except ValueError:

203

raise error.ParseError(err)

203

raise error.ParseError(err)

204

205

def getboolean(x, err):

205

def getboolean(x, err):

206

value = util.parsebool(getsymbol(x))

206

value = util.parsebool(getsymbol(x))

207

if value is not None:

207

if value is not None:

208

return value

208

return value

209

raise error.ParseError(err)

209

raise error.ParseError(err)

210

211

def getlist(x):

211

def getlist(x):

212

if not x:

212

if not x:

213

return []

213

return []

214

if x[0] == 'list':

214

if x[0] == 'list':

215

return list(x[1:])

215

return list(x[1:])

216

return [x]

216

return [x]

217

218

def getrange(x, err):

218

def getrange(x, err):

219

if not x:

219

if not x:

220

raise error.ParseError(err)

220

raise error.ParseError(err)

221

op = x[0]

221

op = x[0]

222

if op == 'range':

222

if op == 'range':

223

return x[1], x[2]

223

return x[1], x[2]

224

elif op == 'rangepre':

224

elif op == 'rangepre':

225

return None, x[1]

225

return None, x[1]

226

elif op == 'rangepost':

226

elif op == 'rangepost':

227

return x[1], None

227

return x[1], None

228

elif op == 'rangeall':

228

elif op == 'rangeall':

229

return None, None

229

return None, None

230

raise error.ParseError(err)

230

raise error.ParseError(err)

231

232

def getargs(x, min, max, err):

232

def getargs(x, min, max, err):

233

l = getlist(x)

233

l = getlist(x)

234

if len(l) < min or (max >= 0 and len(l) > max):

234

if len(l) < min or (max >= 0 and len(l) > max):

235

raise error.ParseError(err)

235

raise error.ParseError(err)

236

return l

236

return l

237

238

def getargsdict(x, funcname, keys):

238

def getargsdict(x, funcname, keys):

239

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

239

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

240

keyvaluenode='keyvalue', keynode='symbol')

240

keyvaluenode='keyvalue', keynode='symbol')

241

242

# cache of {spec: raw parsed tree} built internally

243

_treecache = {}

244

245

def _cachedtree(spec):

246

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

247

tree = _treecache.get(spec)

248

if tree is None:

249

_treecache[spec] = tree = parse(spec)

250

return tree

251

252

def _build(tmplspec, *repls):

253

"""Create raw parsed tree from a template revset statement

254

255

>>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))

256

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

257

"""

258

template = _cachedtree(tmplspec)

259

return parser.buildtree(template, ('symbol', '_'), *repls)

260

242

def _isnamedfunc(x, funcname):

261

def _isnamedfunc(x, funcname):

243

"""Check if given tree matches named function"""

262

"""Check if given tree matches named function"""

244

return x and x[0] == 'func' and getsymbol(x[1]) == funcname

263

return x and x[0] == 'func' and getsymbol(x[1]) == funcname

245

264

246

def _isposargs(x, n):

265

def _isposargs(x, n):

247

"""Check if given tree is n-length list of positional arguments"""

266

"""Check if given tree is n-length list of positional arguments"""

248

l = getlist(x)

267

l = getlist(x)

249

return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)

268

return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)

250

269

251

def _matchnamedfunc(x, funcname):

270

def _matchnamedfunc(x, funcname):

252

"""Return args tree if given tree matches named function; otherwise None

271

"""Return args tree if given tree matches named function; otherwise None

253

272

254

This can't be used for testing a nullary function since its args tree

273

This can't be used for testing a nullary function since its args tree

255

is also None. Use _isnamedfunc() instead.

274

is also None. Use _isnamedfunc() instead.

256

"""

275

"""

257

if not _isnamedfunc(x, funcname):

276

if not _isnamedfunc(x, funcname):

258

return

277

return

259

return x[2]

278

return x[2]

260

279

261

def _matchonly(revs, bases):

280

def _matchonly(revs, bases):

262

"""

281

"""

263

>>> f = lambda *args: _matchonly(*map(parse, args))

282

>>> f = lambda *args: _matchonly(*map(parse, args))

264

>>> f('ancestors(A)', 'not ancestors(B)')

283

>>> f('ancestors(A)', 'not ancestors(B)')

265

('list', ('symbol', 'A'), ('symbol', 'B'))

284

('list', ('symbol', 'A'), ('symbol', 'B'))

266

"""

285

"""

267

ta = _matchnamedfunc(revs, 'ancestors')

286

ta = _matchnamedfunc(revs, 'ancestors')

268

tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')

287

tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')

269

if _isposargs(ta, 1) and _isposargs(tb, 1):

288

if _isposargs(ta, 1) and _isposargs(tb, 1):

270

return ('list', ta, tb)

289

return ('list', ta, tb)

271

290

272

def _fixops(x):

291

def _fixops(x):

273

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

292

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

274

handled well by our simple top-down parser"""

293

handled well by our simple top-down parser"""

275

if not isinstance(x, tuple):

294

if not isinstance(x, tuple):

276

return x

295

return x

277

296

278

op = x[0]

297

op = x[0]

279

if op == 'parent':

298

if op == 'parent':

280

# x^:y means (x^) : y, not x ^ (:y)

299

# x^:y means (x^) : y, not x ^ (:y)

281

# x^: means (x^) :, not x ^ (:)

300

# x^: means (x^) :, not x ^ (:)

282

post = ('parentpost', x[1])

301

post = ('parentpost', x[1])

283

if x[2][0] == 'dagrangepre':

302

if x[2][0] == 'dagrangepre':

284

return _fixops(('dagrange', post, x[2][1]))

303

return _fixops(('dagrange', post, x[2][1]))

285

elif x[2][0] == 'rangepre':

304

elif x[2][0] == 'rangepre':

286

return _fixops(('range', post, x[2][1]))

305

return _fixops(('range', post, x[2][1]))

287

elif x[2][0] == 'rangeall':

306

elif x[2][0] == 'rangeall':

288

return _fixops(('rangepost', post))

307

return _fixops(('rangepost', post))

289

elif op == 'or':

308

elif op == 'or':

290

# make number of arguments deterministic:

309

# make number of arguments deterministic:

291

# x + y + z -> (or x y z) -> (or (list x y z))

310

# x + y + z -> (or x y z) -> (or (list x y z))

292

return (op, _fixops(('list',) + x[1:]))

311

return (op, _fixops(('list',) + x[1:]))

293

elif op == 'subscript' and x[1][0] == 'relation':

312

elif op == 'subscript' and x[1][0] == 'relation':

294

# x#y[z] ternary

313

# x#y[z] ternary

295

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

314

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

296

315

297

return (op,) + tuple(_fixops(y) for y in x[1:])

316

return (op,) + tuple(_fixops(y) for y in x[1:])

298

317

299

def _analyze(x):

318

def _analyze(x):

300

if x is None:

319

if x is None:

301

return x

320

return x

302

321

303

op = x[0]

322

op = x[0]

304

if op == 'minus':

323

if op == 'minus':

305

return _analyze(('and', x[1], (~~'not'~~, x[2])))

324

return _analyze(_build('_ and not _', *x[1:]))

306

elif op == 'only':

325

elif op == 'only':

307

t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))

326

return _analyze(_build('only(_, _)', *x[1:]))

308

return _analyze(t)

309

elif op == 'onlypost':

327

elif op == 'onlypost':

310

return _analyze(('~~func~~', (~~'symbol'~~, ~~'only'~~), x[1]))

328

return _analyze(_build('only(_)', x[1]))

311

elif op == 'dagrangepre':

329

elif op == 'dagrangepre':

312

return _analyze(('~~func~~', (~~'symbol'~~, ~~'ancestors'~~), x[1]))

330

return _analyze(_build('ancestors(_)', x[1]))

313

elif op == 'dagrangepost':

331

elif op == 'dagrangepost':

314

return _analyze(('~~func~~', (~~'symbol'~~, ~~'descendants'~~), x[1]))

332

return _analyze(_build('descendants(_)', x[1]))

315

elif op == 'negate':

333

elif op == 'negate':

316

s = getstring(x[1], _("can't negate that"))

334

s = getstring(x[1], _("can't negate that"))

317

return _analyze(('string', '-' + s))

335

return _analyze(('string', '-' + s))

318

elif op in ('string', 'symbol'):

336

elif op in ('string', 'symbol'):

319

return x

337

return x

320

elif op == 'rangeall':

338

elif op == 'rangeall':

321

return (op, None)

339

return (op, None)

322

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

340

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

323

return (op, _analyze(x[1]))

341

return (op, _analyze(x[1]))

324

elif op == 'group':

342

elif op == 'group':

325

return _analyze(x[1])

343

return _analyze(x[1])

326

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

344

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

327

'subscript'}:

345

'subscript'}:

328

ta = _analyze(x[1])

346

ta = _analyze(x[1])

329

tb = _analyze(x[2])

347

tb = _analyze(x[2])

330

return (op, ta, tb)

348

return (op, ta, tb)

331

elif op == 'relsubscript':

349

elif op == 'relsubscript':

332

ta = _analyze(x[1])

350

ta = _analyze(x[1])

333

tb = _analyze(x[2])

351

tb = _analyze(x[2])

334

tc = _analyze(x[3])

352

tc = _analyze(x[3])

335

return (op, ta, tb, tc)

353

return (op, ta, tb, tc)

336

elif op == 'list':

354

elif op == 'list':

337

return (op,) + tuple(_analyze(y) for y in x[1:])

355

return (op,) + tuple(_analyze(y) for y in x[1:])

338

elif op == 'keyvalue':

356

elif op == 'keyvalue':

339

return (op, x[1], _analyze(x[2]))

357

return (op, x[1], _analyze(x[2]))

340

elif op == 'func':

358

elif op == 'func':

341

return (op, x[1], _analyze(x[2]))

359

return (op, x[1], _analyze(x[2]))

342

raise ValueError('invalid operator %r' % op)

360

raise ValueError('invalid operator %r' % op)

343

361

344

def analyze(x):

362

def analyze(x):

345

"""Transform raw parsed tree to evaluatable tree which can be fed to

363

"""Transform raw parsed tree to evaluatable tree which can be fed to

346

optimize() or getset()

364

optimize() or getset()

347

365

348

All pseudo operations should be mapped to real operations or functions

366

All pseudo operations should be mapped to real operations or functions

349

defined in methods or symbols table respectively.

367

defined in methods or symbols table respectively.

350

"""

368

"""

351

return _analyze(x)

369

return _analyze(x)

352

370

353

def _optimize(x, small):

371

def _optimize(x, small):

354

if x is None:

372

if x is None:

355

return 0, x

373

return 0, x

356

374

357

smallbonus = 1

375

smallbonus = 1

358

if small:

376

if small:

359

smallbonus = .5

377

smallbonus = .5

360

378

361

op = x[0]

379

op = x[0]

362

if op in ('string', 'symbol'):

380

if op in ('string', 'symbol'):

363

return smallbonus, x # single revisions are small

381

return smallbonus, x # single revisions are small

364

elif op == 'and':

382

elif op == 'and':

365

wa, ta = _optimize(x[1], True)

383

wa, ta = _optimize(x[1], True)

366

wb, tb = _optimize(x[2], True)

384

wb, tb = _optimize(x[2], True)

367

w = min(wa, wb)

385

w = min(wa, wb)

368

386

369

# (::x and not ::y)/(not ::y and ::x) have a fast path

387

# (::x and not ::y)/(not ::y and ::x) have a fast path

370

tm = _matchonly(ta, tb) or _matchonly(tb, ta)

388

m = _matchonly(ta, tb) or _matchonly(tb, ta)

371

if tm:

389

if m:

372

return w, ('~~func~~', (~~'symbol'~~, ~~'only'~~), tm)

390

return w, _build('only(_, _)', *m[1:])

373

391

374

if tb is not None and tb[0] == 'not':

392

if tb is not None and tb[0] == 'not':

375

return wa, ('difference', ta, tb[1])

393

return wa, ('difference', ta, tb[1])

376

if wa > wb:

394

if wa > wb:

377

op = 'andsmally'

395

op = 'andsmally'

378

return w, (op, ta, tb)

396

return w, (op, ta, tb)

379

elif op == 'or':

397

elif op == 'or':

380

# fast path for machine-generated expression, that is likely to have

398

# fast path for machine-generated expression, that is likely to have

381

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

399

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

382

ws, ts, ss = [], [], []

400

ws, ts, ss = [], [], []

383

def flushss():

401

def flushss():

384

if not ss:

402

if not ss:

385

return

403

return

386

if len(ss) == 1:

404

if len(ss) == 1:

387

w, t = ss[0]

405

w, t = ss[0]

388

else:

406

else:

389

s = '\0'.join(t[1] for w, t in ss)

407

s = '\0'.join(t[1] for w, t in ss)

390

y = ('~~func~~', (~~'symbol'~~, ~~'_list'~~), ('string', s))

408

y = _build('_list(_)', ('string', s))

391

w, t = _optimize(y, False)

409

w, t = _optimize(y, False)

392

ws.append(w)

410

ws.append(w)

393

ts.append(t)

411

ts.append(t)

394

del ss[:]

412

del ss[:]

395

for y in getlist(x[1]):

413

for y in getlist(x[1]):

396

w, t = _optimize(y, False)

414

w, t = _optimize(y, False)

397

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

415

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

398

ss.append((w, t))

416

ss.append((w, t))

399

continue

417

continue

400

flushss()

418

flushss()

401

ws.append(w)

419

ws.append(w)

402

ts.append(t)

420

ts.append(t)

403

flushss()

421

flushss()

404

if len(ts) == 1:

422

if len(ts) == 1:

405

return ws[0], ts[0] # 'or' operation is fully optimized out

423

return ws[0], ts[0] # 'or' operation is fully optimized out

406

return max(ws), (op, ('list',) + tuple(ts))

424

return max(ws), (op, ('list',) + tuple(ts))

407

elif op == 'not':

425

elif op == 'not':

408

# Optimize not public() to _notpublic() because we have a fast version

426

# Optimize not public() to _notpublic() because we have a fast version

409

if x[1][:3] == ('func', ('symbol', 'public'), None):

427

if x[1][:3] == ('func', ('symbol', 'public'), None):

410

newsym = ('func', ('symbol', '_notpublic'), None)

428

o = _optimize(_build('_notpublic()'), not small)

411

o = _optimize(newsym, not small)

412

return o[0], o[1]

429

return o[0], o[1]

413

else:

430

else:

414

o = _optimize(x[1], not small)

431

o = _optimize(x[1], not small)

415

return o[0], (op, o[1])

432

return o[0], (op, o[1])

416

elif op == 'rangeall':

433

elif op == 'rangeall':

417

return smallbonus, x

434

return smallbonus, x

418

elif op in ('rangepre', 'rangepost', 'parentpost'):

435

elif op in ('rangepre', 'rangepost', 'parentpost'):

419

o = _optimize(x[1], small)

436

o = _optimize(x[1], small)

420

return o[0], (op, o[1])

437

return o[0], (op, o[1])

421

elif op in ('dagrange', 'range'):

438

elif op in ('dagrange', 'range'):

422

wa, ta = _optimize(x[1], small)

439

wa, ta = _optimize(x[1], small)

423

wb, tb = _optimize(x[2], small)

440

wb, tb = _optimize(x[2], small)

424

return wa + wb, (op, ta, tb)

441

return wa + wb, (op, ta, tb)

425

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

442

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

426

w, t = _optimize(x[1], small)

443

w, t = _optimize(x[1], small)

427

return w, (op, t, x[2])

444

return w, (op, t, x[2])

428

elif op == 'relsubscript':

445

elif op == 'relsubscript':

429

w, t = _optimize(x[1], small)

446

w, t = _optimize(x[1], small)

430

return w, (op, t, x[2], x[3])

447

return w, (op, t, x[2], x[3])

431

elif op == 'list':

448

elif op == 'list':

432

ws, ts = zip(*(_optimize(y, small) for y in x[1:]))

449

ws, ts = zip(*(_optimize(y, small) for y in x[1:]))

433

return sum(ws), (op,) + ts

450

return sum(ws), (op,) + ts

434

elif op == 'keyvalue':

451

elif op == 'keyvalue':

435

w, t = _optimize(x[2], small)

452

w, t = _optimize(x[2], small)

436

return w, (op, x[1], t)

453

return w, (op, x[1], t)

437

elif op == 'func':

454

elif op == 'func':

438

f = getsymbol(x[1])

455

f = getsymbol(x[1])

439

wa, ta = _optimize(x[2], small)

456

wa, ta = _optimize(x[2], small)

440

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

457

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

441

'keyword', 'outgoing', 'user', 'destination'):

458

'keyword', 'outgoing', 'user', 'destination'):

442

w = 10 # slow

459

w = 10 # slow

443

elif f in ('modifies', 'adds', 'removes'):

460

elif f in ('modifies', 'adds', 'removes'):

444

w = 30 # slower

461

w = 30 # slower

445

elif f == "contains":

462

elif f == "contains":

446

w = 100 # very slow

463

w = 100 # very slow

447

elif f == "ancestor":

464

elif f == "ancestor":

448

w = 1 * smallbonus

465

w = 1 * smallbonus

449

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

466

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

450

w = 0

467

w = 0

451

elif f == "sort":

468

elif f == "sort":

452

w = 10 # assume most sorts look at changelog

469

w = 10 # assume most sorts look at changelog

453

else:

470

else:

454

w = 1

471

w = 1

455

return w + wa, (op, x[1], ta)

472

return w + wa, (op, x[1], ta)

456

raise ValueError('invalid operator %r' % op)

473

raise ValueError('invalid operator %r' % op)

457

474

458

def optimize(tree):

475

def optimize(tree):

459

"""Optimize evaluatable tree

476

"""Optimize evaluatable tree

460

477

461

All pseudo operations should be transformed beforehand.

478

All pseudo operations should be transformed beforehand.

462

"""

479

"""

463

_weight, newtree = _optimize(tree, small=True)

480

_weight, newtree = _optimize(tree, small=True)

464

return newtree

481

return newtree

465

482

466

# the set of valid characters for the initial letter of symbols in

483

# the set of valid characters for the initial letter of symbols in

467

# alias declarations and definitions

484

# alias declarations and definitions

468

_aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))

485

_aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))

469

486

470

def _parsewith(spec, lookup=None, syminitletters=None):

487

def _parsewith(spec, lookup=None, syminitletters=None):

471

"""Generate a parse tree of given spec with given tokenizing options

488

"""Generate a parse tree of given spec with given tokenizing options

472

489

473

>>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)

490

>>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)

474

('func', ('symbol', 'foo'), ('symbol', '$1'))

491

('func', ('symbol', 'foo'), ('symbol', '$1'))

475

>>> _parsewith('$1')

492

>>> _parsewith('$1')

476

Traceback (most recent call last):

493

Traceback (most recent call last):

477

...

494

...

478

ParseError: ("syntax error in revset '$1'", 0)

495

ParseError: ("syntax error in revset '$1'", 0)

479

>>> _parsewith('foo bar')

496

>>> _parsewith('foo bar')

480

Traceback (most recent call last):

497

Traceback (most recent call last):

481

...

498

...

482

ParseError: ('invalid token', 4)

499

ParseError: ('invalid token', 4)

483

"""

500

"""

484

p = parser.parser(elements)

501

p = parser.parser(elements)

485

tree, pos = p.parse(tokenize(spec, lookup=lookup,

502

tree, pos = p.parse(tokenize(spec, lookup=lookup,

486

syminitletters=syminitletters))

503

syminitletters=syminitletters))

487

if pos != len(spec):

504

if pos != len(spec):

488

raise error.ParseError(_('invalid token'), pos)

505

raise error.ParseError(_('invalid token'), pos)

489

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

506

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

490

507

491

class _aliasrules(parser.basealiasrules):

508

class _aliasrules(parser.basealiasrules):

492

"""Parsing and expansion rule set of revset aliases"""

509

"""Parsing and expansion rule set of revset aliases"""

493

_section = _('revset alias')

510

_section = _('revset alias')

494

511

495

@staticmethod

512

@staticmethod

496

def _parse(spec):

513

def _parse(spec):

497

"""Parse alias declaration/definition ``spec``

514

"""Parse alias declaration/definition ``spec``

498

515

499

This allows symbol names to use also ``$`` as an initial letter

516

This allows symbol names to use also ``$`` as an initial letter

500

(for backward compatibility), and callers of this function should

517

(for backward compatibility), and callers of this function should

501

examine whether ``$`` is used also for unexpected symbols or not.

518

examine whether ``$`` is used also for unexpected symbols or not.

502

"""

519

"""

503

return _parsewith(spec, syminitletters=_aliassyminitletters)

520

return _parsewith(spec, syminitletters=_aliassyminitletters)

504

521

505

@staticmethod

522

@staticmethod

506

def _trygetfunc(tree):

523

def _trygetfunc(tree):

507

if tree[0] == 'func' and tree[1][0] == 'symbol':

524

if tree[0] == 'func' and tree[1][0] == 'symbol':

508

return tree[1][1], getlist(tree[2])

525

return tree[1][1], getlist(tree[2])

509

526

510

def expandaliases(tree, aliases, warn=None):

527

def expandaliases(tree, aliases, warn=None):

511

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

528

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

512

aliases = _aliasrules.buildmap(aliases)

529

aliases = _aliasrules.buildmap(aliases)

513

tree = _aliasrules.expand(aliases, tree)

530

tree = _aliasrules.expand(aliases, tree)

514

# warn about problematic (but not referred) aliases

531

# warn about problematic (but not referred) aliases

515

if warn is not None:

532

if warn is not None:

516

for name, alias in sorted(aliases.iteritems()):

533

for name, alias in sorted(aliases.iteritems()):

517

if alias.error and not alias.warned:

534

if alias.error and not alias.warned:

518

warn(_('warning: %s\n') % (alias.error))

535

warn(_('warning: %s\n') % (alias.error))

519

alias.warned = True

536

alias.warned = True

520

return tree

537

return tree

521

538

522

def foldconcat(tree):

539

def foldconcat(tree):

523

"""Fold elements to be concatenated by `##`

540

"""Fold elements to be concatenated by `##`

524

"""

541

"""

525

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

542

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

526

return tree

543

return tree

527

if tree[0] == '_concat':

544

if tree[0] == '_concat':

528

pending = [tree]

545

pending = [tree]

529

l = []

546

l = []

530

while pending:

547

while pending:

531

e = pending.pop()

548

e = pending.pop()

532

if e[0] == '_concat':

549

if e[0] == '_concat':

533

pending.extend(reversed(e[1:]))

550

pending.extend(reversed(e[1:]))

534

elif e[0] in ('string', 'symbol'):

551

elif e[0] in ('string', 'symbol'):

535

l.append(e[1])

552

l.append(e[1])

536

else:

553

else:

537

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

554

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

538

raise error.ParseError(msg)

555

raise error.ParseError(msg)

539

return ('string', ''.join(l))

556

return ('string', ''.join(l))

540

else:

557

else:

541

return tuple(foldconcat(t) for t in tree)

558

return tuple(foldconcat(t) for t in tree)

542

559

543

def parse(spec, lookup=None):

560

def parse(spec, lookup=None):

544

return _parsewith(spec, lookup=lookup)

561

return _parsewith(spec, lookup=lookup)

545

562

546

def _quote(s):

563

def _quote(s):

547

r"""Quote a value in order to make it safe for the revset engine.

564

r"""Quote a value in order to make it safe for the revset engine.

548

565

549

>>> _quote('asdf')

566

>>> _quote('asdf')

550

"'asdf'"

567

"'asdf'"

551

>>> _quote("asdf'\"")

568

>>> _quote("asdf'\"")

552

'\'asdf\\\'"\''

569

'\'asdf\\\'"\''

553

>>> _quote('asdf\'')

570

>>> _quote('asdf\'')

554

"'asdf\\''"

571

"'asdf\\''"

555

>>> _quote(1)

572

>>> _quote(1)

556

"'1'"

573

"'1'"

557

"""

574

"""

558

return "'%s'" % util.escapestr(pycompat.bytestr(s))

575

return "'%s'" % util.escapestr(pycompat.bytestr(s))

559

576

560

def formatspec(expr, *args):

577

def formatspec(expr, *args):

561

'''

578

'''

562

This is a convenience function for using revsets internally, and

579

This is a convenience function for using revsets internally, and

563

escapes arguments appropriately. Aliases are intentionally ignored

580

escapes arguments appropriately. Aliases are intentionally ignored

564

so that intended expression behavior isn't accidentally subverted.

581

so that intended expression behavior isn't accidentally subverted.

565

582

566

Supported arguments:

583

Supported arguments:

567

584

568

%r = revset expression, parenthesized

585

%r = revset expression, parenthesized

569

%d = int(arg), no quoting

586

%d = int(arg), no quoting

570

%s = string(arg), escaped and single-quoted

587

%s = string(arg), escaped and single-quoted

571

%b = arg.branch(), escaped and single-quoted

588

%b = arg.branch(), escaped and single-quoted

572

%n = hex(arg), single-quoted

589

%n = hex(arg), single-quoted

573

%% = a literal '%'

590

%% = a literal '%'

574

591

575

Prefixing the type with 'l' specifies a parenthesized list of that type.

592

Prefixing the type with 'l' specifies a parenthesized list of that type.

576

593

577

>>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))

594

>>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))

578

'(10 or 11):: and ((this()) or (that()))'

595

'(10 or 11):: and ((this()) or (that()))'

579

>>> formatspec('%d:: and not %d::', 10, 20)

596

>>> formatspec('%d:: and not %d::', 10, 20)

580

'10:: and not 20::'

597

'10:: and not 20::'

581

>>> formatspec('%ld or %ld', [], [1])

598

>>> formatspec('%ld or %ld', [], [1])

582

"_list('') or 1"

599

"_list('') or 1"

583

>>> formatspec('keyword(%s)', 'foo\\xe9')

600

>>> formatspec('keyword(%s)', 'foo\\xe9')

584

"keyword('foo\\\\xe9')"

601

"keyword('foo\\\\xe9')"

585

>>> b = lambda: 'default'

602

>>> b = lambda: 'default'

586

>>> b.branch = b

603

>>> b.branch = b

587

>>> formatspec('branch(%b)', b)

604

>>> formatspec('branch(%b)', b)

588

"branch('default')"

605

"branch('default')"

589

>>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])

606

>>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])

590

"root(_list('a\\x00b\\x00c\\x00d'))"

607

"root(_list('a\\x00b\\x00c\\x00d'))"

591

'''

608

'''

592

609

593

def argtype(c, arg):

610

def argtype(c, arg):

594

if c == 'd':

611

if c == 'd':

595

return '%d' % int(arg)

612

return '%d' % int(arg)

596

elif c == 's':

613

elif c == 's':

597

return _quote(arg)

614

return _quote(arg)

598

elif c == 'r':

615

elif c == 'r':

599

parse(arg) # make sure syntax errors are confined

616

parse(arg) # make sure syntax errors are confined

600

return '(%s)' % arg

617

return '(%s)' % arg

601

elif c == 'n':

618

elif c == 'n':

602

return _quote(node.hex(arg))

619

return _quote(node.hex(arg))

603

elif c == 'b':

620

elif c == 'b':

604

return _quote(arg.branch())

621

return _quote(arg.branch())

605

622

606

def listexp(s, t):

623

def listexp(s, t):

607

l = len(s)

624

l = len(s)

608

if l == 0:

625

if l == 0:

609

return "_list('')"

626

return "_list('')"

610

elif l == 1:

627

elif l == 1:

611

return argtype(t, s[0])

628

return argtype(t, s[0])

612

elif t == 'd':

629

elif t == 'd':

613

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

630

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

614

elif t == 's':

631

elif t == 's':

615

return "_list('%s')" % "\0".join(s)

632

return "_list('%s')" % "\0".join(s)

616

elif t == 'n':

633

elif t == 'n':

617

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

634

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

618

elif t == 'b':

635

elif t == 'b':

619

return "_list('%s')" % "\0".join(a.branch() for a in s)

636

return "_list('%s')" % "\0".join(a.branch() for a in s)

620

637

621

m = l // 2

638

m = l // 2

622

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

639

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

623

640

624

expr = pycompat.bytestr(expr)

641

expr = pycompat.bytestr(expr)

625

ret = ''

642

ret = ''

626

pos = 0

643

pos = 0

627

arg = 0

644

arg = 0

628

while pos < len(expr):

645

while pos < len(expr):

629

c = expr[pos]

646

c = expr[pos]

630

if c == '%':

647

if c == '%':

631

pos += 1

648

pos += 1

632

d = expr[pos]

649

d = expr[pos]

633

if d == '%':

650

if d == '%':

634

ret += d

651

ret += d

635

elif d in 'dsnbr':

652

elif d in 'dsnbr':

636

ret += argtype(d, args[arg])

653

ret += argtype(d, args[arg])

637

arg += 1

654

arg += 1

638

elif d == 'l':

655

elif d == 'l':

639

# a list of some type

656

# a list of some type

640

pos += 1

657

pos += 1

641

d = expr[pos]

658

d = expr[pos]

642

ret += listexp(list(args[arg]), d)

659

ret += listexp(list(args[arg]), d)

643

arg += 1

660

arg += 1

644

else:

661

else:

645

raise error.Abort(_('unexpected revspec format character %s')

662

raise error.Abort(_('unexpected revspec format character %s')

646

% d)

663

% d)

647

else:

664

else:

648

ret += c

665

ret += c

649

pos += 1

666

pos += 1

650

667

651

return ret

668

return ret

652

669

653

def prettyformat(tree):

670

def prettyformat(tree):

654

return parser.prettyformat(tree, ('string', 'symbol'))

671

return parser.prettyformat(tree, ('string', 'symbol'))

655

672

656

def depth(tree):

673

def depth(tree):

657

if isinstance(tree, tuple):

674

if isinstance(tree, tuple):

658

return max(map(depth, tree)) + 1

675

return max(map(depth, tree)) + 1

659

else:

676

else:

660

return 0

677

return 0

661

678

662

def funcsused(tree):

679

def funcsused(tree):

663

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

680

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

664

return set()

681

return set()

665

else:

682

else:

666

funcs = set()

683

funcs = set()

667

for s in tree[1:]:

684

for s in tree[1:]:

668

funcs |= funcsused(s)

685

funcs |= funcsused(s)

669

if tree[0] == 'func':

686

if tree[0] == 'func':

670

funcs.add(tree[1][1])

687

funcs.add(tree[1][1])

671

return funcs

688

return funcs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 util,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize("@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, pos)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = util.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
+            # cache of {spec: raw parsed tree} built internally
+            _treecache = {}
+            def _cachedtree(spec):
+                # thread safe because parse() is reentrant and dict.__setitem__() is atomic
+                tree = _treecache.get(spec)
+                if tree is None:
+                    _treecache[spec] = tree = parse(spec)
+                return tree
+            def _build(tmplspec, *repls):
+                """Create raw parsed tree from a template revset statement
+                >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
+                ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
+                """
+                template = _cachedtree(tmplspec)
+                return parser.buildtree(template, ('symbol', '_'), *repls)
             def _isnamedfunc(x, funcname):
                 """Check if given tree matches named function"""
                 return x and x[0] == 'func' and getsymbol(x[1]) == funcname
             def _isposargs(x, n):
                 """Check if given tree is n-length list of positional arguments"""
                 l = getlist(x)
                 return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)
             def _matchnamedfunc(x, funcname):
                 """Return args tree if given tree matches named function; otherwise None
                 This can't be used for testing a nullary function since its args tree
                 is also None. Use _isnamedfunc() instead.
                 """
                 if not _isnamedfunc(x, funcname):
                     return
                 return x[2]
             def _matchonly(revs, bases):
                 """
                 >>> f = lambda *args: _matchonly(*map(parse, args))
                 >>> f('ancestors(A)', 'not ancestors(B)')
                 ('list', ('symbol', 'A'), ('symbol', 'B'))
                 """
                 ta = _matchnamedfunc(revs, 'ancestors')
                 tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')
                 if _isposargs(ta, 1) and _isposargs(tb, 1):
                     return ('list', ta, tb)
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
-                    return _analyze(('and', x[1], ('not', x[2])))
+                    return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
-                    t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
+                    return _analyze(_build('only(_, _)', *x[1:]))
-                    return _analyze(t)
                 elif op == 'onlypost':
-                    return _analyze(('func', ('symbol', 'only'), x[1]))
+                    return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangepre':
-                    return _analyze(('func', ('symbol', 'ancestors'), x[1]))
+                    return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
-                    return _analyze(('func', ('symbol', 'descendants'), x[1]))
+                    return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x, small):
                 if x is None:
                     return 0, x
                 smallbonus = 1
                 if small:
                     smallbonus = .5
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return smallbonus, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1], True)
                     wb, tb = _optimize(x[2], True)
                     w = min(wa, wb)
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
-                    tm = _matchonly(ta, tb) or _matchonly(tb, ta)
+                    m = _matchonly(ta, tb) or _matchonly(tb, ta)
-                    if tm:
+                    if m:
-                        return w, ('func', ('symbol', 'only'), tm)
+                        return w, _build('only(_, _)', *m[1:])
                     if tb is not None and tb[0] == 'not':
                         return wa, ('difference', ta, tb[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
-                            y = ('func', ('symbol', '_list'), ('string', s))
+                            y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y, False)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y, False)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if x[1][:3] == ('func', ('symbol', 'public'), None):
-                        newsym = ('func', ('symbol', '_notpublic'), None)
+                        o = _optimize(_build('_notpublic()'), not small)
-                        o = _optimize(newsym, not small)
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1], not small)
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return smallbonus, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1], small)
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1], small)
                     wb, tb = _optimize(x[2], small)
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1], small)
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1], small)
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2], small)
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2], small)
                     if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
                              'keyword', 'outgoing', 'user', 'destination'):
                         w = 10 # slow
                     elif f in ('modifies', 'adds', 'removes'):
                         w = 30 # slower
                     elif f == "contains":
                         w = 100 # very slow
                     elif f == "ancestor":
                         w = 1 * smallbonus
                     elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
                         w = 0
                     elif f == "sort":
                         w = 10 # assume most sorts look at changelog
                     else:
                         w = 1
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree, small=True)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith('$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith('foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 return _parsewith(spec, lookup=lookup)
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote('asdf')
                 "'asdf'"
                 >>> _quote("asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote('asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % util.escapestr(pycompat.bytestr(s))
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = int(arg), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type.
                 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec('%d:: and not %d::', 10, 20)
                 '10:: and not 20::'
                 >>> formatspec('%ld or %ld', [], [1])
                 "_list('') or 1"
                 >>> formatspec('keyword(%s)', 'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: 'default'
                 >>> b.branch = b
                 >>> formatspec('branch(%b)', b)
                 "branch('default')"
                 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
                 "root(_list('a\\x00b\\x00c\\x00d'))"
                 '''
                 def argtype(c, arg):
                     if c == 'd':
                         return '%d' % int(arg)
                     elif c == 's':
                         return _quote(arg)
                     elif c == 'r':
                         parse(arg) # make sure syntax errors are confined
                         return '(%s)' % arg
                     elif c == 'n':
                         return _quote(node.hex(arg))
                     elif c == 'b':
                         return _quote(arg.branch())
                 def listexp(s, t):
                     l = len(s)
                     if l == 0:
                         return "_list('')"
                     elif l == 1:
                         return argtype(t, s[0])
                     elif t == 'd':
                         return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
                     elif t == 's':
                         return "_list('%s')" % "\0".join(s)
                     elif t == 'n':
                         return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                     elif t == 'b':
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     m = l // 2
                     return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
                 expr = pycompat.bytestr(expr)
                 ret = ''
                 pos = 0
                 arg = 0
                 while pos < len(expr):
                     c = expr[pos]
                     if c == '%':
                         pos += 1
                         d = expr[pos]
                         if d == '%':
                             ret += d
                         elif d in 'dsnbr':
                             ret += argtype(d, args[arg])
                             arg += 1
                         elif d == 'l':
                             # a list of some type
                             pos += 1
                             d = expr[pos]
                             ret += listexp(list(args[arg]), d)
                             arg += 1
                         else:
                             raise error.Abort(_('unexpected revspec format character %s')
                                               % d)
                     else:
                         ret += c
                     pos += 1
                 return ret
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs