upstream/mercurial-mirror Commit - r34048:f23cbca9

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

21

elements = {

21

elements = {

22

# token-type: binding-strength, primary, prefix, infix, suffix

22

# token-type: binding-strength, primary, prefix, infix, suffix

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

25

"#": (21, None, None, ("relation", 21), None),

25

"#": (21, None, None, ("relation", 21), None),

26

"##": (20, None, None, ("_concat", 20), None),

26

"##": (20, None, None, ("_concat", 20), None),

27

"~": (18, None, None, ("ancestor", 18), None),

27

"~": (18, None, None, ("ancestor", 18), None),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

30

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

30

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

31

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

31

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

32

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

32

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

33

"not": (10, None, ("not", 10), None, None),

33

"not": (10, None, ("not", 10), None, None),

34

"!": (10, None, ("not", 10), None, None),

34

"!": (10, None, ("not", 10), None, None),

35

"and": (5, None, None, ("and", 5), None),

35

"and": (5, None, None, ("and", 5), None),

36

"&": (5, None, None, ("and", 5), None),

36

"&": (5, None, None, ("and", 5), None),

37

"%": (5, None, None, ("only", 5), "onlypost"),

37

"%": (5, None, None, ("only", 5), "onlypost"),

38

"or": (4, None, None, ("or", 4), None),

38

"or": (4, None, None, ("or", 4), None),

39

"|": (4, None, None, ("or", 4), None),

39

"|": (4, None, None, ("or", 4), None),

40

"+": (4, None, None, ("or", 4), None),

40

"+": (4, None, None, ("or", 4), None),

41

"=": (3, None, None, ("keyvalue", 3), None),

41

"=": (3, None, None, ("keyvalue", 3), None),

42

",": (2, None, None, ("list", 2), None),

42

",": (2, None, None, ("list", 2), None),

43

")": (0, None, None, None, None),

43

")": (0, None, None, None, None),

44

"]": (0, None, None, None, None),

44

"]": (0, None, None, None, None),

45

"symbol": (0, "symbol", None, None, None),

45

"symbol": (0, "symbol", None, None, None),

46

"string": (0, "string", None, None, None),

46

"string": (0, "string", None, None, None),

47

"end": (0, None, None, None, None),

47

"end": (0, None, None, None, None),

48

}

48

}

49

50

keywords = {'and', 'or', 'not'}

50

keywords = {'and', 'or', 'not'}

51

52

_quoteletters = {'"', "'"}

52

_quoteletters = {'"', "'"}

53

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

53

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

54

55

# default set of valid characters for the initial letter of symbols

55

# default set of valid characters for the initial letter of symbols

56

_syminitletters = set(pycompat.iterbytestr(

56

_syminitletters = set(pycompat.iterbytestr(

57

string.ascii_letters.encode('ascii') +

57

string.ascii_letters.encode('ascii') +

58

string.digits.encode('ascii') +

58

string.digits.encode('ascii') +

59

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

59

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

60

61

# default set of valid characters for non-initial letters of symbols

61

# default set of valid characters for non-initial letters of symbols

62

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

62

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

63

64

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

64

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

65

'''

65

'''

66

Parse a revset statement into a stream of tokens

66

Parse a revset statement into a stream of tokens

67

68

``syminitletters`` is the set of valid characters for the initial

68

``syminitletters`` is the set of valid characters for the initial

69

letter of symbols.

69

letter of symbols.

70

71

By default, character ``c`` is recognized as valid for initial

71

By default, character ``c`` is recognized as valid for initial

72

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

72

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

73

74

``symletters`` is the set of valid characters for non-initial

74

``symletters`` is the set of valid characters for non-initial

75

letters of symbols.

75

letters of symbols.

76

77

By default, character ``c`` is recognized as valid for non-initial

77

By default, character ``c`` is recognized as valid for non-initial

78

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

78

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

79

80

Check that @ is a valid unquoted token character (issue3686):

80

Check that @ is a valid unquoted token character (issue3686):

81

>>> list(tokenize("@::"))

81

>>> list(tokenize("@::"))

82

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

82

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

83

84

'''

84

'''

85

program = pycompat.bytestr(program)

85

program = pycompat.bytestr(program)

86

if syminitletters is None:

86

if syminitletters is None:

87

syminitletters = _syminitletters

87

syminitletters = _syminitletters

88

if symletters is None:

88

if symletters is None:

89

symletters = _symletters

89

symletters = _symletters

90

91

if program and lookup:

91

if program and lookup:

92

# attempt to parse old-style ranges first to deal with

92

# attempt to parse old-style ranges first to deal with

93

# things like old-tag which contain query metacharacters

93

# things like old-tag which contain query metacharacters

94

parts = program.split(':', 1)

94

parts = program.split(':', 1)

95

if all(lookup(sym) for sym in parts if sym):

95

if all(lookup(sym) for sym in parts if sym):

96

if parts[0]:

96

if parts[0]:

97

yield ('symbol', parts[0], 0)

97

yield ('symbol', parts[0], 0)

98

if len(parts) > 1:

98

if len(parts) > 1:

99

s = len(parts[0])

99

s = len(parts[0])

100

yield (':', None, s)

100

yield (':', None, s)

101

if parts[1]:

101

if parts[1]:

102

yield ('symbol', parts[1], s + 1)

102

yield ('symbol', parts[1], s + 1)

103

yield ('end', None, len(program))

103

yield ('end', None, len(program))

104

return

104

return

105

106

pos, l = 0, len(program)

106

pos, l = 0, len(program)

107

while pos < l:

107

while pos < l:

108

c = program[pos]

108

c = program[pos]

109

if c.isspace(): # skip inter-token whitespace

109

if c.isspace(): # skip inter-token whitespace

110

pass

110

pass

111

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

111

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

112

yield ('::', None, pos)

112

yield ('::', None, pos)

113

pos += 1 # skip ahead

113

pos += 1 # skip ahead

114

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

114

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

115

yield ('..', None, pos)

115

yield ('..', None, pos)

116

pos += 1 # skip ahead

116

pos += 1 # skip ahead

117

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

117

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

118

yield ('##', None, pos)

118

yield ('##', None, pos)

119

pos += 1 # skip ahead

119

pos += 1 # skip ahead

120

elif c in _simpleopletters: # handle simple operators

120

elif c in _simpleopletters: # handle simple operators

121

yield (c, None, pos)

121

yield (c, None, pos)

122

elif (c in _quoteletters or c == 'r' and

122

elif (c in _quoteletters or c == 'r' and

123

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

123

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

124

if c == 'r':

124

if c == 'r':

125

pos += 1

125

pos += 1

126

c = program[pos]

126

c = program[pos]

127

decode = lambda x: x

127

decode = lambda x: x

128

else:

128

else:

129

decode = parser.unescapestr

129

decode = parser.unescapestr

130

pos += 1

130

pos += 1

131

s = pos

131

s = pos

132

while pos < l: # find closing quote

132

while pos < l: # find closing quote

133

d = program[pos]

133

d = program[pos]

134

if d == '\\': # skip over escaped characters

134

if d == '\\': # skip over escaped characters

135

pos += 2

135

pos += 2

136

continue

136

continue

137

if d == c:

137

if d == c:

138

yield ('string', decode(program[s:pos]), s)

138

yield ('string', decode(program[s:pos]), s)

139

break

139

break

140

pos += 1

140

pos += 1

141

else:

141

else:

142

raise error.ParseError(_("unterminated string"), s)

142

raise error.ParseError(_("unterminated string"), s)

143

# gather up a symbol/keyword

143

# gather up a symbol/keyword

144

elif c in syminitletters:

144

elif c in syminitletters:

145

s = pos

145

s = pos

146

pos += 1

146

pos += 1

147

while pos < l: # find end of symbol

147

while pos < l: # find end of symbol

148

d = program[pos]

148

d = program[pos]

149

if d not in symletters:

149

if d not in symletters:

150

break

150

break

151

if d == '.' and program[pos - 1] == '.': # special case for ..

151

if d == '.' and program[pos - 1] == '.': # special case for ..

152

pos -= 1

152

pos -= 1

153

break

153

break

154

pos += 1

154

pos += 1

155

sym = program[s:pos]

155

sym = program[s:pos]

156

if sym in keywords: # operator keywords

156

if sym in keywords: # operator keywords

157

yield (sym, None, s)

157

yield (sym, None, s)

158

elif '-' in sym:

158

elif '-' in sym:

159

# some jerk gave us foo-bar-baz, try to check if it's a symbol

159

# some jerk gave us foo-bar-baz, try to check if it's a symbol

160

if lookup and lookup(sym):

160

if lookup and lookup(sym):

161

# looks like a real symbol

161

# looks like a real symbol

162

yield ('symbol', sym, s)

162

yield ('symbol', sym, s)

163

else:

163

else:

164

# looks like an expression

164

# looks like an expression

165

parts = sym.split('-')

165

parts = sym.split('-')

166

for p in parts[:-1]:

166

for p in parts[:-1]:

167

if p: # possible consecutive -

167

if p: # possible consecutive -

168

yield ('symbol', p, s)

168

yield ('symbol', p, s)

169

s += len(p)

169

s += len(p)

170

yield ('-', None, pos)

170

yield ('-', None, pos)

171

s += 1

171

s += 1

172

if parts[-1]: # possible trailing -

172

if parts[-1]: # possible trailing -

173

yield ('symbol', parts[-1], s)

173

yield ('symbol', parts[-1], s)

174

else:

174

else:

175

yield ('symbol', sym, s)

175

yield ('symbol', sym, s)

176

pos -= 1

176

pos -= 1

177

else:

177

else:

178

raise error.ParseError(_("syntax error in revset '%s'") %

178

raise error.ParseError(_("syntax error in revset '%s'") %

179

program, pos)

179

program, pos)

180

pos += 1

180

pos += 1

181

yield ('end', None, pos)

181

yield ('end', None, pos)

182

183

# helpers

183

# helpers

184

185

_notset = object()

185

_notset = object()

186

187

def getsymbol(x):

187

def getsymbol(x):

188

if x and x[0] == 'symbol':

188

if x and x[0] == 'symbol':

189

return x[1]

189

return x[1]

190

raise error.ParseError(_('not a symbol'))

190

raise error.ParseError(_('not a symbol'))

191

192

def getstring(x, err):

192

def getstring(x, err):

193

if x and (x[0] == 'string' or x[0] == 'symbol'):

193

if x and (x[0] == 'string' or x[0] == 'symbol'):

194

return x[1]

194

return x[1]

195

raise error.ParseError(err)

195

raise error.ParseError(err)

196

197

def getinteger(x, err, default=_notset):

197

def getinteger(x, err, default=_notset):

198

if not x and default is not _notset:

198

if not x and default is not _notset:

199

return default

199

return default

200

try:

200

try:

201

return int(getstring(x, err))

201

return int(getstring(x, err))

202

except ValueError:

202

except ValueError:

203

raise error.ParseError(err)

203

raise error.ParseError(err)

204

205

def getboolean(x, err):

205

def getboolean(x, err):

206

value = util.parsebool(getsymbol(x))

206

value = util.parsebool(getsymbol(x))

207

if value is not None:

207

if value is not None:

208

return value

208

return value

209

raise error.ParseError(err)

209

raise error.ParseError(err)

210

211

def getlist(x):

211

def getlist(x):

212

if not x:

212

if not x:

213

return []

213

return []

214

if x[0] == 'list':

214

if x[0] == 'list':

215

return list(x[1:])

215

return list(x[1:])

216

return [x]

216

return [x]

217

218

def getrange(x, err):

218

def getrange(x, err):

219

if not x:

219

if not x:

220

raise error.ParseError(err)

220

raise error.ParseError(err)

221

op = x[0]

221

op = x[0]

222

if op == 'range':

222

if op == 'range':

223

return x[1], x[2]

223

return x[1], x[2]

224

elif op == 'rangepre':

224

elif op == 'rangepre':

225

return None, x[1]

225

return None, x[1]

226

elif op == 'rangepost':

226

elif op == 'rangepost':

227

return x[1], None

227

return x[1], None

228

elif op == 'rangeall':

228

elif op == 'rangeall':

229

return None, None

229

return None, None

230

raise error.ParseError(err)

230

raise error.ParseError(err)

231

232

def getargs(x, min, max, err):

232

def getargs(x, min, max, err):

233

l = getlist(x)

233

l = getlist(x)

234

if len(l) < min or (max >= 0 and len(l) > max):

234

if len(l) < min or (max >= 0 and len(l) > max):

235

raise error.ParseError(err)

235

raise error.ParseError(err)

236

return l

236

return l

237

238

def getargsdict(x, funcname, keys):

238

def getargsdict(x, funcname, keys):

239

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

239

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

240

keyvaluenode='keyvalue', keynode='symbol')

240

keyvaluenode='keyvalue', keynode='symbol')

241

242

# cache of {spec: raw parsed tree} built internally

242

# cache of {spec: raw parsed tree} built internally

243

_treecache = {}

243

_treecache = {}

244

245

def _cachedtree(spec):

245

def _cachedtree(spec):

246

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

246

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

247

tree = _treecache.get(spec)

247

tree = _treecache.get(spec)

248

if tree is None:

248

if tree is None:

249

_treecache[spec] = tree = parse(spec)

249

_treecache[spec] = tree = parse(spec)

250

return tree

250

return tree

251

252

def _build(tmplspec, *repls):

252

def _build(tmplspec, *repls):

253

"""Create raw parsed tree from a template revset statement

253

"""Create raw parsed tree from a template revset statement

254

255

>>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))

255

>>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))

256

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

256

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

257

"""

257

"""

258

template = _cachedtree(tmplspec)

258

template = _cachedtree(tmplspec)

259

return parser.buildtree(template, ('symbol', '_'), *repls)

259

return parser.buildtree(template, ('symbol', '_'), *repls)

260

261

def _match(patspec, tree):

262

"""Test if a tree matches the given pattern statement; return the matches

263

264

>>> _match('f(_)', parse('f()'))

265

>>> _match('f(_)', parse('f(1)'))

266

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

267

>>> _match('f(_)', parse('f(1, 2)'))

268

"""

269

pattern = _cachedtree(patspec)

270

return parser.matchtree(pattern, tree, ('symbol', '_'),

271

{'keyvalue', 'list'})

272

261

def _isnamedfunc(x, funcname):

273

def _isnamedfunc(x, funcname):

262

"""Check if given tree matches named function"""

274

"""Check if given tree matches named function"""

263

return x and x[0] == 'func' and getsymbol(x[1]) == funcname

275

return x and x[0] == 'func' and getsymbol(x[1]) == funcname

264

276

265

def _isposargs(x, n):

277

def _isposargs(x, n):

266

"""Check if given tree is n-length list of positional arguments"""

278

"""Check if given tree is n-length list of positional arguments"""

267

l = getlist(x)

279

l = getlist(x)

268

return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)

280

return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)

269

281

270

def _matchnamedfunc(x, funcname):

282

def _matchnamedfunc(x, funcname):

271

"""Return args tree if given tree matches named function; otherwise None

283

"""Return args tree if given tree matches named function; otherwise None

272

284

273

This can't be used for testing a nullary function since its args tree

285

This can't be used for testing a nullary function since its args tree

274

is also None. Use _isnamedfunc() instead.

286

is also None. Use _isnamedfunc() instead.

275

"""

287

"""

276

if not _isnamedfunc(x, funcname):

288

if not _isnamedfunc(x, funcname):

277

return

289

return

278

return x[2]

290

return x[2]

279

291

280

def _matchonly(revs, bases):

292

def _matchonly(revs, bases):

281

"""

293

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

282

>>> f = lambda *args: _matchonly(*map(parse, args))

283

>>> f('ancestors(A)', 'not ancestors(B)')

284

('list', ('symbol', 'A'), ('symbol', 'B'))

285

"""

286

ta = _matchnamedfunc(revs, 'ancestors')

287

tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')

288

if _isposargs(ta, 1) and _isposargs(tb, 1):

289

return ('list', ta, tb)

290

294

291

def _fixops(x):

295

def _fixops(x):

292

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

296

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

293

handled well by our simple top-down parser"""

297

handled well by our simple top-down parser"""

294

if not isinstance(x, tuple):

298

if not isinstance(x, tuple):

295

return x

299

return x

296

300

297

op = x[0]

301

op = x[0]

298

if op == 'parent':

302

if op == 'parent':

299

# x^:y means (x^) : y, not x ^ (:y)

303

# x^:y means (x^) : y, not x ^ (:y)

300

# x^: means (x^) :, not x ^ (:)

304

# x^: means (x^) :, not x ^ (:)

301

post = ('parentpost', x[1])

305

post = ('parentpost', x[1])

302

if x[2][0] == 'dagrangepre':

306

if x[2][0] == 'dagrangepre':

303

return _fixops(('dagrange', post, x[2][1]))

307

return _fixops(('dagrange', post, x[2][1]))

304

elif x[2][0] == 'rangepre':

308

elif x[2][0] == 'rangepre':

305

return _fixops(('range', post, x[2][1]))

309

return _fixops(('range', post, x[2][1]))

306

elif x[2][0] == 'rangeall':

310

elif x[2][0] == 'rangeall':

307

return _fixops(('rangepost', post))

311

return _fixops(('rangepost', post))

308

elif op == 'or':

312

elif op == 'or':

309

# make number of arguments deterministic:

313

# make number of arguments deterministic:

310

# x + y + z -> (or x y z) -> (or (list x y z))

314

# x + y + z -> (or x y z) -> (or (list x y z))

311

return (op, _fixops(('list',) + x[1:]))

315

return (op, _fixops(('list',) + x[1:]))

312

elif op == 'subscript' and x[1][0] == 'relation':

316

elif op == 'subscript' and x[1][0] == 'relation':

313

# x#y[z] ternary

317

# x#y[z] ternary

314

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

318

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

315

319

316

return (op,) + tuple(_fixops(y) for y in x[1:])

320

return (op,) + tuple(_fixops(y) for y in x[1:])

317

321

318

def _analyze(x):

322

def _analyze(x):

319

if x is None:

323

if x is None:

320

return x

324

return x

321

325

322

op = x[0]

326

op = x[0]

323

if op == 'minus':

327

if op == 'minus':

324

return _analyze(_build('_ and not _', *x[1:]))

328

return _analyze(_build('_ and not _', *x[1:]))

325

elif op == 'only':

329

elif op == 'only':

326

return _analyze(_build('only(_, _)', *x[1:]))

330

return _analyze(_build('only(_, _)', *x[1:]))

327

elif op == 'onlypost':

331

elif op == 'onlypost':

328

return _analyze(_build('only(_)', x[1]))

332

return _analyze(_build('only(_)', x[1]))

329

elif op == 'dagrangepre':

333

elif op == 'dagrangepre':

330

return _analyze(_build('ancestors(_)', x[1]))

334

return _analyze(_build('ancestors(_)', x[1]))

331

elif op == 'dagrangepost':

335

elif op == 'dagrangepost':

332

return _analyze(_build('descendants(_)', x[1]))

336

return _analyze(_build('descendants(_)', x[1]))

333

elif op == 'negate':

337

elif op == 'negate':

334

s = getstring(x[1], _("can't negate that"))

338

s = getstring(x[1], _("can't negate that"))

335

return _analyze(('string', '-' + s))

339

return _analyze(('string', '-' + s))

336

elif op in ('string', 'symbol'):

340

elif op in ('string', 'symbol'):

337

return x

341

return x

338

elif op == 'rangeall':

342

elif op == 'rangeall':

339

return (op, None)

343

return (op, None)

340

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

344

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

341

return (op, _analyze(x[1]))

345

return (op, _analyze(x[1]))

342

elif op == 'group':

346

elif op == 'group':

343

return _analyze(x[1])

347

return _analyze(x[1])

344

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

348

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

345

'subscript'}:

349

'subscript'}:

346

ta = _analyze(x[1])

350

ta = _analyze(x[1])

347

tb = _analyze(x[2])

351

tb = _analyze(x[2])

348

return (op, ta, tb)

352

return (op, ta, tb)

349

elif op == 'relsubscript':

353

elif op == 'relsubscript':

350

ta = _analyze(x[1])

354

ta = _analyze(x[1])

351

tb = _analyze(x[2])

355

tb = _analyze(x[2])

352

tc = _analyze(x[3])

356

tc = _analyze(x[3])

353

return (op, ta, tb, tc)

357

return (op, ta, tb, tc)

354

elif op == 'list':

358

elif op == 'list':

355

return (op,) + tuple(_analyze(y) for y in x[1:])

359

return (op,) + tuple(_analyze(y) for y in x[1:])

356

elif op == 'keyvalue':

360

elif op == 'keyvalue':

357

return (op, x[1], _analyze(x[2]))

361

return (op, x[1], _analyze(x[2]))

358

elif op == 'func':

362

elif op == 'func':

359

return (op, x[1], _analyze(x[2]))

363

return (op, x[1], _analyze(x[2]))

360

raise ValueError('invalid operator %r' % op)

364

raise ValueError('invalid operator %r' % op)

361

365

362

def analyze(x):

366

def analyze(x):

363

"""Transform raw parsed tree to evaluatable tree which can be fed to

367

"""Transform raw parsed tree to evaluatable tree which can be fed to

364

optimize() or getset()

368

optimize() or getset()

365

369

366

All pseudo operations should be mapped to real operations or functions

370

All pseudo operations should be mapped to real operations or functions

367

defined in methods or symbols table respectively.

371

defined in methods or symbols table respectively.

368

"""

372

"""

369

return _analyze(x)

373

return _analyze(x)

370

374

371

def _optimize(x, small):

375

def _optimize(x, small):

372

if x is None:

376

if x is None:

373

return 0, x

377

return 0, x

374

378

375

smallbonus = 1

379

smallbonus = 1

376

if small:

380

if small:

377

smallbonus = .5

381

smallbonus = .5

378

382

379

op = x[0]

383

op = x[0]

380

if op in ('string', 'symbol'):

384

if op in ('string', 'symbol'):

381

return smallbonus, x # single revisions are small

385

return smallbonus, x # single revisions are small

382

elif op == 'and':

386

elif op == 'and':

383

wa, ta = _optimize(x[1], True)

387

wa, ta = _optimize(x[1], True)

384

wb, tb = _optimize(x[2], True)

388

wb, tb = _optimize(x[2], True)

385

w = min(wa, wb)

389

w = min(wa, wb)

386

390

387

# (::x and not ::y)/(not ::y and ::x) have a fast path

391

# (::x and not ::y)/(not ::y and ::x) have a fast path

388

m = _matchonly(ta, tb) or _matchonly(tb, ta)

392

m = _matchonly(ta, tb) or _matchonly(tb, ta)

389

if m:

393

if m:

390

return w, _build('only(_, _)', *m[1:])

394

return w, _build('only(_, _)', *m[1:])

391

395

392

if tb is not None and tb[0] == 'not':

396

m = _match('not _', tb)

393

return wa, ('difference', ta, tb[1])

397

if m:

398

return wa, ('difference', ta, m[1])

394

if wa > wb:

399

if wa > wb:

395

op = 'andsmally'

400

op = 'andsmally'

396

return w, (op, ta, tb)

401

return w, (op, ta, tb)

397

elif op == 'or':

402

elif op == 'or':

398

# fast path for machine-generated expression, that is likely to have

403

# fast path for machine-generated expression, that is likely to have

399

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

404

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

400

ws, ts, ss = [], [], []

405

ws, ts, ss = [], [], []

401

def flushss():

406

def flushss():

402

if not ss:

407

if not ss:

403

return

408

return

404

if len(ss) == 1:

409

if len(ss) == 1:

405

w, t = ss[0]

410

w, t = ss[0]

406

else:

411

else:

407

s = '\0'.join(t[1] for w, t in ss)

412

s = '\0'.join(t[1] for w, t in ss)

408

y = _build('_list(_)', ('string', s))

413

y = _build('_list(_)', ('string', s))

409

w, t = _optimize(y, False)

414

w, t = _optimize(y, False)

410

ws.append(w)

415

ws.append(w)

411

ts.append(t)

416

ts.append(t)

412

del ss[:]

417

del ss[:]

413

for y in getlist(x[1]):

418

for y in getlist(x[1]):

414

w, t = _optimize(y, False)

419

w, t = _optimize(y, False)

415

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

420

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

416

ss.append((w, t))

421

ss.append((w, t))

417

continue

422

continue

418

flushss()

423

flushss()

419

ws.append(w)

424

ws.append(w)

420

ts.append(t)

425

ts.append(t)

421

flushss()

426

flushss()

422

if len(ts) == 1:

427

if len(ts) == 1:

423

return ws[0], ts[0] # 'or' operation is fully optimized out

428

return ws[0], ts[0] # 'or' operation is fully optimized out

424

return max(ws), (op, ('list',) + tuple(ts))

429

return max(ws), (op, ('list',) + tuple(ts))

425

elif op == 'not':

430

elif op == 'not':

426

# Optimize not public() to _notpublic() because we have a fast version

431

# Optimize not public() to _notpublic() because we have a fast version

427

if x[1][:3] == ('func', ('symbol', 'public'), None):

432

if _match('public()', x[1]):

428

o = _optimize(_build('_notpublic()'), not small)

433

o = _optimize(_build('_notpublic()'), not small)

429

return o[0], o[1]

434

return o[0], o[1]

430

else:

435

else:

431

o = _optimize(x[1], not small)

436

o = _optimize(x[1], not small)

432

return o[0], (op, o[1])

437

return o[0], (op, o[1])

433

elif op == 'rangeall':

438

elif op == 'rangeall':

434

return smallbonus, x

439

return smallbonus, x

435

elif op in ('rangepre', 'rangepost', 'parentpost'):

440

elif op in ('rangepre', 'rangepost', 'parentpost'):

436

o = _optimize(x[1], small)

441

o = _optimize(x[1], small)

437

return o[0], (op, o[1])

442

return o[0], (op, o[1])

438

elif op in ('dagrange', 'range'):

443

elif op in ('dagrange', 'range'):

439

wa, ta = _optimize(x[1], small)

444

wa, ta = _optimize(x[1], small)

440

wb, tb = _optimize(x[2], small)

445

wb, tb = _optimize(x[2], small)

441

return wa + wb, (op, ta, tb)

446

return wa + wb, (op, ta, tb)

442

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

447

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

443

w, t = _optimize(x[1], small)

448

w, t = _optimize(x[1], small)

444

return w, (op, t, x[2])

449

return w, (op, t, x[2])

445

elif op == 'relsubscript':

450

elif op == 'relsubscript':

446

w, t = _optimize(x[1], small)

451

w, t = _optimize(x[1], small)

447

return w, (op, t, x[2], x[3])

452

return w, (op, t, x[2], x[3])

448

elif op == 'list':

453

elif op == 'list':

449

ws, ts = zip(*(_optimize(y, small) for y in x[1:]))

454

ws, ts = zip(*(_optimize(y, small) for y in x[1:]))

450

return sum(ws), (op,) + ts

455

return sum(ws), (op,) + ts

451

elif op == 'keyvalue':

456

elif op == 'keyvalue':

452

w, t = _optimize(x[2], small)

457

w, t = _optimize(x[2], small)

453

return w, (op, x[1], t)

458

return w, (op, x[1], t)

454

elif op == 'func':

459

elif op == 'func':

455

f = getsymbol(x[1])

460

f = getsymbol(x[1])

456

wa, ta = _optimize(x[2], small)

461

wa, ta = _optimize(x[2], small)

457

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

462

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

458

'keyword', 'outgoing', 'user', 'destination'):

463

'keyword', 'outgoing', 'user', 'destination'):

459

w = 10 # slow

464

w = 10 # slow

460

elif f in ('modifies', 'adds', 'removes'):

465

elif f in ('modifies', 'adds', 'removes'):

461

w = 30 # slower

466

w = 30 # slower

462

elif f == "contains":

467

elif f == "contains":

463

w = 100 # very slow

468

w = 100 # very slow

464

elif f == "ancestor":

469

elif f == "ancestor":

465

w = 1 * smallbonus

470

w = 1 * smallbonus

466

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

471

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

467

w = 0

472

w = 0

468

elif f == "sort":

473

elif f == "sort":

469

w = 10 # assume most sorts look at changelog

474

w = 10 # assume most sorts look at changelog

470

else:

475

else:

471

w = 1

476

w = 1

472

return w + wa, (op, x[1], ta)

477

return w + wa, (op, x[1], ta)

473

raise ValueError('invalid operator %r' % op)

478

raise ValueError('invalid operator %r' % op)

474

479

475

def optimize(tree):

480

def optimize(tree):

476

"""Optimize evaluatable tree

481

"""Optimize evaluatable tree

477

482

478

All pseudo operations should be transformed beforehand.

483

All pseudo operations should be transformed beforehand.

479

"""

484

"""

480

_weight, newtree = _optimize(tree, small=True)

485

_weight, newtree = _optimize(tree, small=True)

481

return newtree

486

return newtree

482

487

483

# the set of valid characters for the initial letter of symbols in

488

# the set of valid characters for the initial letter of symbols in

484

# alias declarations and definitions

489

# alias declarations and definitions

485

_aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))

490

_aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))

486

491

487

def _parsewith(spec, lookup=None, syminitletters=None):

492

def _parsewith(spec, lookup=None, syminitletters=None):

488

"""Generate a parse tree of given spec with given tokenizing options

493

"""Generate a parse tree of given spec with given tokenizing options

489

494

490

>>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)

495

>>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)

491

('func', ('symbol', 'foo'), ('symbol', '$1'))

496

('func', ('symbol', 'foo'), ('symbol', '$1'))

492

>>> _parsewith('$1')

497

>>> _parsewith('$1')

493

Traceback (most recent call last):

498

Traceback (most recent call last):

494

...

499

...

495

ParseError: ("syntax error in revset '$1'", 0)

500

ParseError: ("syntax error in revset '$1'", 0)

496

>>> _parsewith('foo bar')

501

>>> _parsewith('foo bar')

497

Traceback (most recent call last):

502

Traceback (most recent call last):

498

...

503

...

499

ParseError: ('invalid token', 4)

504

ParseError: ('invalid token', 4)

500

"""

505

"""

501

p = parser.parser(elements)

506

p = parser.parser(elements)

502

tree, pos = p.parse(tokenize(spec, lookup=lookup,

507

tree, pos = p.parse(tokenize(spec, lookup=lookup,

503

syminitletters=syminitletters))

508

syminitletters=syminitletters))

504

if pos != len(spec):

509

if pos != len(spec):

505

raise error.ParseError(_('invalid token'), pos)

510

raise error.ParseError(_('invalid token'), pos)

506

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

511

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

507

512

508

class _aliasrules(parser.basealiasrules):

513

class _aliasrules(parser.basealiasrules):

509

"""Parsing and expansion rule set of revset aliases"""

514

"""Parsing and expansion rule set of revset aliases"""

510

_section = _('revset alias')

515

_section = _('revset alias')

511

516

512

@staticmethod

517

@staticmethod

513

def _parse(spec):

518

def _parse(spec):

514

"""Parse alias declaration/definition ``spec``

519

"""Parse alias declaration/definition ``spec``

515

520

516

This allows symbol names to use also ``$`` as an initial letter

521

This allows symbol names to use also ``$`` as an initial letter

517

(for backward compatibility), and callers of this function should

522

(for backward compatibility), and callers of this function should

518

examine whether ``$`` is used also for unexpected symbols or not.

523

examine whether ``$`` is used also for unexpected symbols or not.

519

"""

524

"""

520

return _parsewith(spec, syminitletters=_aliassyminitletters)

525

return _parsewith(spec, syminitletters=_aliassyminitletters)

521

526

522

@staticmethod

527

@staticmethod

523

def _trygetfunc(tree):

528

def _trygetfunc(tree):

524

if tree[0] == 'func' and tree[1][0] == 'symbol':

529

if tree[0] == 'func' and tree[1][0] == 'symbol':

525

return tree[1][1], getlist(tree[2])

530

return tree[1][1], getlist(tree[2])

526

531

527

def expandaliases(tree, aliases, warn=None):

532

def expandaliases(tree, aliases, warn=None):

528

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

533

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

529

aliases = _aliasrules.buildmap(aliases)

534

aliases = _aliasrules.buildmap(aliases)

530

tree = _aliasrules.expand(aliases, tree)

535

tree = _aliasrules.expand(aliases, tree)

531

# warn about problematic (but not referred) aliases

536

# warn about problematic (but not referred) aliases

532

if warn is not None:

537

if warn is not None:

533

for name, alias in sorted(aliases.iteritems()):

538

for name, alias in sorted(aliases.iteritems()):

534

if alias.error and not alias.warned:

539

if alias.error and not alias.warned:

535

warn(_('warning: %s\n') % (alias.error))

540

warn(_('warning: %s\n') % (alias.error))

536

alias.warned = True

541

alias.warned = True

537

return tree

542

return tree

538

543

539

def foldconcat(tree):

544

def foldconcat(tree):

540

"""Fold elements to be concatenated by `##`

545

"""Fold elements to be concatenated by `##`

541

"""

546

"""

542

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

547

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

543

return tree

548

return tree

544

if tree[0] == '_concat':

549

if tree[0] == '_concat':

545

pending = [tree]

550

pending = [tree]

546

l = []

551

l = []

547

while pending:

552

while pending:

548

e = pending.pop()

553

e = pending.pop()

549

if e[0] == '_concat':

554

if e[0] == '_concat':

550

pending.extend(reversed(e[1:]))

555

pending.extend(reversed(e[1:]))

551

elif e[0] in ('string', 'symbol'):

556

elif e[0] in ('string', 'symbol'):

552

l.append(e[1])

557

l.append(e[1])

553

else:

558

else:

554

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

559

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

555

raise error.ParseError(msg)

560

raise error.ParseError(msg)

556

return ('string', ''.join(l))

561

return ('string', ''.join(l))

557

else:

562

else:

558

return tuple(foldconcat(t) for t in tree)

563

return tuple(foldconcat(t) for t in tree)

559

564

560

def parse(spec, lookup=None):

565

def parse(spec, lookup=None):

561

return _parsewith(spec, lookup=lookup)

566

return _parsewith(spec, lookup=lookup)

562

567

563

def _quote(s):

568

def _quote(s):

564

r"""Quote a value in order to make it safe for the revset engine.

569

r"""Quote a value in order to make it safe for the revset engine.

565

570

566

>>> _quote('asdf')

571

>>> _quote('asdf')

567

"'asdf'"

572

"'asdf'"

568

>>> _quote("asdf'\"")

573

>>> _quote("asdf'\"")

569

'\'asdf\\\'"\''

574

'\'asdf\\\'"\''

570

>>> _quote('asdf\'')

575

>>> _quote('asdf\'')

571

"'asdf\\''"

576

"'asdf\\''"

572

>>> _quote(1)

577

>>> _quote(1)

573

"'1'"

578

"'1'"

574

"""

579

"""

575

return "'%s'" % util.escapestr(pycompat.bytestr(s))

580

return "'%s'" % util.escapestr(pycompat.bytestr(s))

576

581

577

def formatspec(expr, *args):

582

def formatspec(expr, *args):

578

'''

583

'''

579

This is a convenience function for using revsets internally, and

584

This is a convenience function for using revsets internally, and

580

escapes arguments appropriately. Aliases are intentionally ignored

585

escapes arguments appropriately. Aliases are intentionally ignored

581

so that intended expression behavior isn't accidentally subverted.

586

so that intended expression behavior isn't accidentally subverted.

582

587

583

Supported arguments:

588

Supported arguments:

584

589

585

%r = revset expression, parenthesized

590

%r = revset expression, parenthesized

586

%d = int(arg), no quoting

591

%d = int(arg), no quoting

587

%s = string(arg), escaped and single-quoted

592

%s = string(arg), escaped and single-quoted

588

%b = arg.branch(), escaped and single-quoted

593

%b = arg.branch(), escaped and single-quoted

589

%n = hex(arg), single-quoted

594

%n = hex(arg), single-quoted

590

%% = a literal '%'

595

%% = a literal '%'

591

596

592

Prefixing the type with 'l' specifies a parenthesized list of that type.

597

Prefixing the type with 'l' specifies a parenthesized list of that type.

593

598

594

>>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))

599

>>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))

595

'(10 or 11):: and ((this()) or (that()))'

600

'(10 or 11):: and ((this()) or (that()))'

596

>>> formatspec('%d:: and not %d::', 10, 20)

601

>>> formatspec('%d:: and not %d::', 10, 20)

597

'10:: and not 20::'

602

'10:: and not 20::'

598

>>> formatspec('%ld or %ld', [], [1])

603

>>> formatspec('%ld or %ld', [], [1])

599

"_list('') or 1"

604

"_list('') or 1"

600

>>> formatspec('keyword(%s)', 'foo\\xe9')

605

>>> formatspec('keyword(%s)', 'foo\\xe9')

601

"keyword('foo\\\\xe9')"

606

"keyword('foo\\\\xe9')"

602

>>> b = lambda: 'default'

607

>>> b = lambda: 'default'

603

>>> b.branch = b

608

>>> b.branch = b

604

>>> formatspec('branch(%b)', b)

609

>>> formatspec('branch(%b)', b)

605

"branch('default')"

610

"branch('default')"

606

>>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])

611

>>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])

607

"root(_list('a\\x00b\\x00c\\x00d'))"

612

"root(_list('a\\x00b\\x00c\\x00d'))"

608

'''

613

'''

609

614

610

def argtype(c, arg):

615

def argtype(c, arg):

611

if c == 'd':

616

if c == 'd':

612

return '%d' % int(arg)

617

return '%d' % int(arg)

613

elif c == 's':

618

elif c == 's':

614

return _quote(arg)

619

return _quote(arg)

615

elif c == 'r':

620

elif c == 'r':

616

parse(arg) # make sure syntax errors are confined

621

parse(arg) # make sure syntax errors are confined

617

return '(%s)' % arg

622

return '(%s)' % arg

618

elif c == 'n':

623

elif c == 'n':

619

return _quote(node.hex(arg))

624

return _quote(node.hex(arg))

620

elif c == 'b':

625

elif c == 'b':

621

return _quote(arg.branch())

626

return _quote(arg.branch())

622

627

623

def listexp(s, t):

628

def listexp(s, t):

624

l = len(s)

629

l = len(s)

625

if l == 0:

630

if l == 0:

626

return "_list('')"

631

return "_list('')"

627

elif l == 1:

632

elif l == 1:

628

return argtype(t, s[0])

633

return argtype(t, s[0])

629

elif t == 'd':

634

elif t == 'd':

630

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

635

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

631

elif t == 's':

636

elif t == 's':

632

return "_list('%s')" % "\0".join(s)

637

return "_list('%s')" % "\0".join(s)

633

elif t == 'n':

638

elif t == 'n':

634

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

639

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

635

elif t == 'b':

640

elif t == 'b':

636

return "_list('%s')" % "\0".join(a.branch() for a in s)

641

return "_list('%s')" % "\0".join(a.branch() for a in s)

637

642

638

m = l // 2

643

m = l // 2

639

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

644

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

640

645

641

expr = pycompat.bytestr(expr)

646

expr = pycompat.bytestr(expr)

642

ret = ''

647

ret = ''

643

pos = 0

648

pos = 0

644

arg = 0

649

arg = 0

645

while pos < len(expr):

650

while pos < len(expr):

646

c = expr[pos]

651

c = expr[pos]

647

if c == '%':

652

if c == '%':

648

pos += 1

653

pos += 1

649

d = expr[pos]

654

d = expr[pos]

650

if d == '%':

655

if d == '%':

651

ret += d

656

ret += d

652

elif d in 'dsnbr':

657

elif d in 'dsnbr':

653

ret += argtype(d, args[arg])

658

ret += argtype(d, args[arg])

654

arg += 1

659

arg += 1

655

elif d == 'l':

660

elif d == 'l':

656

# a list of some type

661

# a list of some type

657

pos += 1

662

pos += 1

658

d = expr[pos]

663

d = expr[pos]

659

ret += listexp(list(args[arg]), d)

664

ret += listexp(list(args[arg]), d)

660

arg += 1

665

arg += 1

661

else:

666

else:

662

raise error.Abort(_('unexpected revspec format character %s')

667

raise error.Abort(_('unexpected revspec format character %s')

663

% d)

668

% d)

664

else:

669

else:

665

ret += c

670

ret += c

666

pos += 1

671

pos += 1

667

672

668

return ret

673

return ret

669

674

670

def prettyformat(tree):

675

def prettyformat(tree):

671

return parser.prettyformat(tree, ('string', 'symbol'))

676

return parser.prettyformat(tree, ('string', 'symbol'))

672

677

673

def depth(tree):

678

def depth(tree):

674

if isinstance(tree, tuple):

679

if isinstance(tree, tuple):

675

return max(map(depth, tree)) + 1

680

return max(map(depth, tree)) + 1

676

else:

681

else:

677

return 0

682

return 0

678

683

679

def funcsused(tree):

684

def funcsused(tree):

680

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

685

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

681

return set()

686

return set()

682

else:

687

else:

683

funcs = set()

688

funcs = set()

684

for s in tree[1:]:

689

for s in tree[1:]:

685

funcs |= funcsused(s)

690

funcs |= funcsused(s)

686

if tree[0] == 'func':

691

if tree[0] == 'func':

687

funcs.add(tree[1][1])

692

funcs.add(tree[1][1])

688

return funcs

693

return funcs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 util,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize("@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, pos)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = util.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
+            def _match(patspec, tree):
+                """Test if a tree matches the given pattern statement; return the matches
+                >>> _match('f(_)', parse('f()'))
+                >>> _match('f(_)', parse('f(1)'))
+                [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
+                >>> _match('f(_)', parse('f(1, 2)'))
+                """
+                pattern = _cachedtree(patspec)
+                return parser.matchtree(pattern, tree, ('symbol', '_'),
+                                        {'keyvalue', 'list'})
             def _isnamedfunc(x, funcname):
                 """Check if given tree matches named function"""
                 return x and x[0] == 'func' and getsymbol(x[1]) == funcname
             def _isposargs(x, n):
                 """Check if given tree is n-length list of positional arguments"""
                 l = getlist(x)
                 return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)
             def _matchnamedfunc(x, funcname):
                 """Return args tree if given tree matches named function; otherwise None
                 This can't be used for testing a nullary function since its args tree
                 is also None. Use _isnamedfunc() instead.
                 """
                 if not _isnamedfunc(x, funcname):
                     return
                 return x[2]
             def _matchonly(revs, bases):
-                """
+                return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
-                >>> f = lambda *args: _matchonly(*map(parse, args))
-                >>> f('ancestors(A)', 'not ancestors(B)')
-                ('list', ('symbol', 'A'), ('symbol', 'B'))
-                """
-                ta = _matchnamedfunc(revs, 'ancestors')
-                tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')
-                if _isposargs(ta, 1) and _isposargs(tb, 1):
-                    return ('list', ta, tb)
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x, small):
                 if x is None:
                     return 0, x
                 smallbonus = 1
                 if small:
                     smallbonus = .5
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return smallbonus, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1], True)
                     wb, tb = _optimize(x[2], True)
                     w = min(wa, wb)
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
-                    if tb is not None and tb[0] == 'not':
+                    m = _match('not _', tb)
-                        return wa, ('difference', ta, tb[1])
+                    if m:
+                        return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y, False)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y, False)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
-                    if x[1][:3] == ('func', ('symbol', 'public'), None):
+                    if _match('public()', x[1]):
                         o = _optimize(_build('_notpublic()'), not small)
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1], not small)
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return smallbonus, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1], small)
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1], small)
                     wb, tb = _optimize(x[2], small)
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1], small)
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1], small)
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2], small)
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2], small)
                     if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
                              'keyword', 'outgoing', 'user', 'destination'):
                         w = 10 # slow
                     elif f in ('modifies', 'adds', 'removes'):
                         w = 30 # slower
                     elif f == "contains":
                         w = 100 # very slow
                     elif f == "ancestor":
                         w = 1 * smallbonus
                     elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
                         w = 0
                     elif f == "sort":
                         w = 10 # assume most sorts look at changelog
                     else:
                         w = 1
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree, small=True)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith('$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith('foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 return _parsewith(spec, lookup=lookup)
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote('asdf')
                 "'asdf'"
                 >>> _quote("asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote('asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % util.escapestr(pycompat.bytestr(s))
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = int(arg), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type.
                 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec('%d:: and not %d::', 10, 20)
                 '10:: and not 20::'
                 >>> formatspec('%ld or %ld', [], [1])
                 "_list('') or 1"
                 >>> formatspec('keyword(%s)', 'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: 'default'
                 >>> b.branch = b
                 >>> formatspec('branch(%b)', b)
                 "branch('default')"
                 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
                 "root(_list('a\\x00b\\x00c\\x00d'))"
                 '''
                 def argtype(c, arg):
                     if c == 'd':
                         return '%d' % int(arg)
                     elif c == 's':
                         return _quote(arg)
                     elif c == 'r':
                         parse(arg) # make sure syntax errors are confined
                         return '(%s)' % arg
                     elif c == 'n':
                         return _quote(node.hex(arg))
                     elif c == 'b':
                         return _quote(arg.branch())
                 def listexp(s, t):
                     l = len(s)
                     if l == 0:
                         return "_list('')"
                     elif l == 1:
                         return argtype(t, s[0])
                     elif t == 'd':
                         return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
                     elif t == 's':
                         return "_list('%s')" % "\0".join(s)
                     elif t == 'n':
                         return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                     elif t == 'b':
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     m = l // 2
                     return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
                 expr = pycompat.bytestr(expr)
                 ret = ''
                 pos = 0
                 arg = 0
                 while pos < len(expr):
                     c = expr[pos]
                     if c == '%':
                         pos += 1
                         d = expr[pos]
                         if d == '%':
                             ret += d
                         elif d in 'dsnbr':
                             ret += argtype(d, args[arg])
                             arg += 1
                         elif d == 'l':
                             # a list of some type
                             pos += 1
                             d = expr[pos]
                             ret += listexp(list(args[arg]), d)
                             arg += 1
                         else:
                             raise error.Abort(_('unexpected revspec format character %s')
                                               % d)
                     else:
                         ret += c
                     pos += 1
                 return ret
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs