upstream/mercurial-mirror Commit - r34273:53fb09c7

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

21

elements = {

21

elements = {

22

# token-type: binding-strength, primary, prefix, infix, suffix

22

# token-type: binding-strength, primary, prefix, infix, suffix

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

25

"#": (21, None, None, ("relation", 21), None),

25

"#": (21, None, None, ("relation", 21), None),

26

"##": (20, None, None, ("_concat", 20), None),

26

"##": (20, None, None, ("_concat", 20), None),

27

"~": (18, None, None, ("ancestor", 18), None),

27

"~": (18, None, None, ("ancestor", 18), None),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

30

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

30

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

31

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

31

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

32

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

32

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

33

"not": (10, None, ("not", 10), None, None),

33

"not": (10, None, ("not", 10), None, None),

34

"!": (10, None, ("not", 10), None, None),

34

"!": (10, None, ("not", 10), None, None),

35

"and": (5, None, None, ("and", 5), None),

35

"and": (5, None, None, ("and", 5), None),

36

"&": (5, None, None, ("and", 5), None),

36

"&": (5, None, None, ("and", 5), None),

37

"%": (5, None, None, ("only", 5), "onlypost"),

37

"%": (5, None, None, ("only", 5), "onlypost"),

38

"or": (4, None, None, ("or", 4), None),

38

"or": (4, None, None, ("or", 4), None),

39

"|": (4, None, None, ("or", 4), None),

39

"|": (4, None, None, ("or", 4), None),

40

"+": (4, None, None, ("or", 4), None),

40

"+": (4, None, None, ("or", 4), None),

41

"=": (3, None, None, ("keyvalue", 3), None),

41

"=": (3, None, None, ("keyvalue", 3), None),

42

",": (2, None, None, ("list", 2), None),

42

",": (2, None, None, ("list", 2), None),

43

")": (0, None, None, None, None),

43

")": (0, None, None, None, None),

44

"]": (0, None, None, None, None),

44

"]": (0, None, None, None, None),

45

"symbol": (0, "symbol", None, None, None),

45

"symbol": (0, "symbol", None, None, None),

46

"string": (0, "string", None, None, None),

46

"string": (0, "string", None, None, None),

47

"end": (0, None, None, None, None),

47

"end": (0, None, None, None, None),

48

}

48

}

49

50

keywords = {'and', 'or', 'not'}

50

keywords = {'and', 'or', 'not'}

51

52

_quoteletters = {'"', "'"}

52

_quoteletters = {'"', "'"}

53

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

53

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

54

55

# default set of valid characters for the initial letter of symbols

55

# default set of valid characters for the initial letter of symbols

56

_syminitletters = set(pycompat.iterbytestr(

56

_syminitletters = set(pycompat.iterbytestr(

57

string.ascii_letters.encode('ascii') +

57

string.ascii_letters.encode('ascii') +

58

string.digits.encode('ascii') +

58

string.digits.encode('ascii') +

59

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

59

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

60

61

# default set of valid characters for non-initial letters of symbols

61

# default set of valid characters for non-initial letters of symbols

62

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

62

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

63

64

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

64

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

65

'''

65

'''

66

Parse a revset statement into a stream of tokens

66

Parse a revset statement into a stream of tokens

67

68

``syminitletters`` is the set of valid characters for the initial

68

``syminitletters`` is the set of valid characters for the initial

69

letter of symbols.

69

letter of symbols.

70

71

By default, character ``c`` is recognized as valid for initial

71

By default, character ``c`` is recognized as valid for initial

72

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

72

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

73

74

``symletters`` is the set of valid characters for non-initial

74

``symletters`` is the set of valid characters for non-initial

75

letters of symbols.

75

letters of symbols.

76

77

By default, character ``c`` is recognized as valid for non-initial

77

By default, character ``c`` is recognized as valid for non-initial

78

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

78

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

79

80

Check that @ is a valid unquoted token character (issue3686):

80

Check that @ is a valid unquoted token character (issue3686):

81

>>> list(tokenize(b"@::"))

81

>>> list(tokenize(b"@::"))

82

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

82

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

83

84

'''

84

'''

85

program = pycompat.bytestr(program)

85

program = pycompat.bytestr(program)

86

if syminitletters is None:

86

if syminitletters is None:

87

syminitletters = _syminitletters

87

syminitletters = _syminitletters

88

if symletters is None:

88

if symletters is None:

89

symletters = _symletters

89

symletters = _symletters

90

91

if program and lookup:

91

if program and lookup:

92

# attempt to parse old-style ranges first to deal with

92

# attempt to parse old-style ranges first to deal with

93

# things like old-tag which contain query metacharacters

93

# things like old-tag which contain query metacharacters

94

parts = program.split(':', 1)

94

parts = program.split(':', 1)

95

if all(lookup(sym) for sym in parts if sym):

95

if all(lookup(sym) for sym in parts if sym):

96

if parts[0]:

96

if parts[0]:

97

yield ('symbol', parts[0], 0)

97

yield ('symbol', parts[0], 0)

98

if len(parts) > 1:

98

if len(parts) > 1:

99

s = len(parts[0])

99

s = len(parts[0])

100

yield (':', None, s)

100

yield (':', None, s)

101

if parts[1]:

101

if parts[1]:

102

yield ('symbol', parts[1], s + 1)

102

yield ('symbol', parts[1], s + 1)

103

yield ('end', None, len(program))

103

yield ('end', None, len(program))

104

return

104

return

105

106

pos, l = 0, len(program)

106

pos, l = 0, len(program)

107

while pos < l:

107

while pos < l:

108

c = program[pos]

108

c = program[pos]

109

if c.isspace(): # skip inter-token whitespace

109

if c.isspace(): # skip inter-token whitespace

110

pass

110

pass

111

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

111

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

112

yield ('::', None, pos)

112

yield ('::', None, pos)

113

pos += 1 # skip ahead

113

pos += 1 # skip ahead

114

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

114

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

115

yield ('..', None, pos)

115

yield ('..', None, pos)

116

pos += 1 # skip ahead

116

pos += 1 # skip ahead

117

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

117

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

118

yield ('##', None, pos)

118

yield ('##', None, pos)

119

pos += 1 # skip ahead

119

pos += 1 # skip ahead

120

elif c in _simpleopletters: # handle simple operators

120

elif c in _simpleopletters: # handle simple operators

121

yield (c, None, pos)

121

yield (c, None, pos)

122

elif (c in _quoteletters or c == 'r' and

122

elif (c in _quoteletters or c == 'r' and

123

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

123

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

124

if c == 'r':

124

if c == 'r':

125

pos += 1

125

pos += 1

126

c = program[pos]

126

c = program[pos]

127

decode = lambda x: x

127

decode = lambda x: x

128

else:

128

else:

129

decode = parser.unescapestr

129

decode = parser.unescapestr

130

pos += 1

130

pos += 1

131

s = pos

131

s = pos

132

while pos < l: # find closing quote

132

while pos < l: # find closing quote

133

d = program[pos]

133

d = program[pos]

134

if d == '\\': # skip over escaped characters

134

if d == '\\': # skip over escaped characters

135

pos += 2

135

pos += 2

136

continue

136

continue

137

if d == c:

137

if d == c:

138

yield ('string', decode(program[s:pos]), s)

138

yield ('string', decode(program[s:pos]), s)

139

break

139

break

140

pos += 1

140

pos += 1

141

else:

141

else:

142

raise error.ParseError(_("unterminated string"), s)

142

raise error.ParseError(_("unterminated string"), s)

143

# gather up a symbol/keyword

143

# gather up a symbol/keyword

144

elif c in syminitletters:

144

elif c in syminitletters:

145

s = pos

145

s = pos

146

pos += 1

146

pos += 1

147

while pos < l: # find end of symbol

147

while pos < l: # find end of symbol

148

d = program[pos]

148

d = program[pos]

149

if d not in symletters:

149

if d not in symletters:

150

break

150

break

151

if d == '.' and program[pos - 1] == '.': # special case for ..

151

if d == '.' and program[pos - 1] == '.': # special case for ..

152

pos -= 1

152

pos -= 1

153

break

153

break

154

pos += 1

154

pos += 1

155

sym = program[s:pos]

155

sym = program[s:pos]

156

if sym in keywords: # operator keywords

156

if sym in keywords: # operator keywords

157

yield (sym, None, s)

157

yield (sym, None, s)

158

elif '-' in sym:

158

elif '-' in sym:

159

# some jerk gave us foo-bar-baz, try to check if it's a symbol

159

# some jerk gave us foo-bar-baz, try to check if it's a symbol

160

if lookup and lookup(sym):

160

if lookup and lookup(sym):

161

# looks like a real symbol

161

# looks like a real symbol

162

yield ('symbol', sym, s)

162

yield ('symbol', sym, s)

163

else:

163

else:

164

# looks like an expression

164

# looks like an expression

165

parts = sym.split('-')

165

parts = sym.split('-')

166

for p in parts[:-1]:

166

for p in parts[:-1]:

167

if p: # possible consecutive -

167

if p: # possible consecutive -

168

yield ('symbol', p, s)

168

yield ('symbol', p, s)

169

s += len(p)

169

s += len(p)

170

yield ('-', None, pos)

170

yield ('-', None, pos)

171

s += 1

171

s += 1

172

if parts[-1]: # possible trailing -

172

if parts[-1]: # possible trailing -

173

yield ('symbol', parts[-1], s)

173

yield ('symbol', parts[-1], s)

174

else:

174

else:

175

yield ('symbol', sym, s)

175

yield ('symbol', sym, s)

176

pos -= 1

176

pos -= 1

177

else:

177

else:

178

raise error.ParseError(_("syntax error in revset '%s'") %

178

raise error.ParseError(_("syntax error in revset '%s'") %

179

program, pos)

179

program, pos)

180

pos += 1

180

pos += 1

181

yield ('end', None, pos)

181

yield ('end', None, pos)

182

183

# helpers

183

# helpers

184

185

_notset = object()

185

_notset = object()

186

187

def getsymbol(x):

187

def getsymbol(x):

188

if x and x[0] == 'symbol':

188

if x and x[0] == 'symbol':

189

return x[1]

189

return x[1]

190

raise error.ParseError(_('not a symbol'))

190

raise error.ParseError(_('not a symbol'))

191

192

def getstring(x, err):

192

def getstring(x, err):

193

if x and (x[0] == 'string' or x[0] == 'symbol'):

193

if x and (x[0] == 'string' or x[0] == 'symbol'):

194

return x[1]

194

return x[1]

195

raise error.ParseError(err)

195

raise error.ParseError(err)

196

197

def getinteger(x, err, default=_notset):

197

def getinteger(x, err, default=_notset):

198

if not x and default is not _notset:

198

if not x and default is not _notset:

199

return default

199

return default

200

try:

200

try:

201

return int(getstring(x, err))

201

return int(getstring(x, err))

202

except ValueError:

202

except ValueError:

203

raise error.ParseError(err)

203

raise error.ParseError(err)

204

205

def getboolean(x, err):

205

def getboolean(x, err):

206

value = util.parsebool(getsymbol(x))

206

value = util.parsebool(getsymbol(x))

207

if value is not None:

207

if value is not None:

208

return value

208

return value

209

raise error.ParseError(err)

209

raise error.ParseError(err)

210

211

def getlist(x):

211

def getlist(x):

212

if not x:

212

if not x:

213

return []

213

return []

214

if x[0] == 'list':

214

if x[0] == 'list':

215

return list(x[1:])

215

return list(x[1:])

216

return [x]

216

return [x]

217

218

def getrange(x, err):

218

def getrange(x, err):

219

if not x:

219

if not x:

220

raise error.ParseError(err)

220

raise error.ParseError(err)

221

op = x[0]

221

op = x[0]

222

if op == 'range':

222

if op == 'range':

223

return x[1], x[2]

223

return x[1], x[2]

224

elif op == 'rangepre':

224

elif op == 'rangepre':

225

return None, x[1]

225

return None, x[1]

226

elif op == 'rangepost':

226

elif op == 'rangepost':

227

return x[1], None

227

return x[1], None

228

elif op == 'rangeall':

228

elif op == 'rangeall':

229

return None, None

229

return None, None

230

raise error.ParseError(err)

230

raise error.ParseError(err)

231

232

def getargs(x, min, max, err):

232

def getargs(x, min, max, err):

233

l = getlist(x)

233

l = getlist(x)

234

if len(l) < min or (max >= 0 and len(l) > max):

234

if len(l) < min or (max >= 0 and len(l) > max):

235

raise error.ParseError(err)

235

raise error.ParseError(err)

236

return l

236

return l

237

238

def getargsdict(x, funcname, keys):

238

def getargsdict(x, funcname, keys):

239

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

239

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

240

keyvaluenode='keyvalue', keynode='symbol')

240

keyvaluenode='keyvalue', keynode='symbol')

241

242

# cache of {spec: raw parsed tree} built internally

242

# cache of {spec: raw parsed tree} built internally

243

_treecache = {}

243

_treecache = {}

244

245

def _cachedtree(spec):

245

def _cachedtree(spec):

246

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

246

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

247

tree = _treecache.get(spec)

247

tree = _treecache.get(spec)

248

if tree is None:

248

if tree is None:

249

_treecache[spec] = tree = parse(spec)

249

_treecache[spec] = tree = parse(spec)

250

return tree

250

return tree

251

252

def _build(tmplspec, *repls):

252

def _build(tmplspec, *repls):

253

"""Create raw parsed tree from a template revset statement

253

"""Create raw parsed tree from a template revset statement

254

255

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

255

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

256

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

256

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

257

"""

257

"""

258

template = _cachedtree(tmplspec)

258

template = _cachedtree(tmplspec)

259

return parser.buildtree(template, ('symbol', '_'), *repls)

259

return parser.buildtree(template, ('symbol', '_'), *repls)

260

261

def _match(patspec, tree):

261

def _match(patspec, tree):

262

"""Test if a tree matches the given pattern statement; return the matches

262

"""Test if a tree matches the given pattern statement; return the matches

263

264

>>> _match(b'f(_)', parse(b'f()'))

264

>>> _match(b'f(_)', parse(b'f()'))

265

>>> _match(b'f(_)', parse(b'f(1)'))

265

>>> _match(b'f(_)', parse(b'f(1)'))

266

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

266

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

267

>>> _match(b'f(_)', parse(b'f(1, 2)'))

267

>>> _match(b'f(_)', parse(b'f(1, 2)'))

268

"""

268

"""

269

pattern = _cachedtree(patspec)

269

pattern = _cachedtree(patspec)

270

return parser.matchtree(pattern, tree, ('symbol', '_'),

270

return parser.matchtree(pattern, tree, ('symbol', '_'),

271

{'keyvalue', 'list'})

271

{'keyvalue', 'list'})

272

273

def _matchonly(revs, bases):

273

def _matchonly(revs, bases):

274

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

274

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

275

276

def _fixops(x):

276

def _fixops(x):

277

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

277

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

278

handled well by our simple top-down parser"""

278

handled well by our simple top-down parser"""

279

if not isinstance(x, tuple):

279

if not isinstance(x, tuple):

280

return x

280

return x

281

282

op = x[0]

282

op = x[0]

283

if op == 'parent':

283

if op == 'parent':

284

# x^:y means (x^) : y, not x ^ (:y)

284

# x^:y means (x^) : y, not x ^ (:y)

285

# x^: means (x^) :, not x ^ (:)

285

# x^: means (x^) :, not x ^ (:)

286

post = ('parentpost', x[1])

286

post = ('parentpost', x[1])

287

if x[2][0] == 'dagrangepre':

287

if x[2][0] == 'dagrangepre':

288

return _fixops(('dagrange', post, x[2][1]))

288

return _fixops(('dagrange', post, x[2][1]))

289

elif x[2][0] == 'rangepre':

289

elif x[2][0] == 'rangepre':

290

return _fixops(('range', post, x[2][1]))

290

return _fixops(('range', post, x[2][1]))

291

elif x[2][0] == 'rangeall':

291

elif x[2][0] == 'rangeall':

292

return _fixops(('rangepost', post))

292

return _fixops(('rangepost', post))

293

elif op == 'or':

293

elif op == 'or':

294

# make number of arguments deterministic:

294

# make number of arguments deterministic:

295

# x + y + z -> (or x y z) -> (or (list x y z))

295

# x + y + z -> (or x y z) -> (or (list x y z))

296

return (op, _fixops(('list',) + x[1:]))

296

return (op, _fixops(('list',) + x[1:]))

297

elif op == 'subscript' and x[1][0] == 'relation':

297

elif op == 'subscript' and x[1][0] == 'relation':

298

# x#y[z] ternary

298

# x#y[z] ternary

299

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

299

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

300

301

return (op,) + tuple(_fixops(y) for y in x[1:])

301

return (op,) + tuple(_fixops(y) for y in x[1:])

302

303

def _analyze(x):

303

def _analyze(x):

304

if x is None:

304

if x is None:

305

return x

305

return x

306

307

op = x[0]

307

op = x[0]

308

if op == 'minus':

308

if op == 'minus':

309

return _analyze(_build('_ and not _', *x[1:]))

309

return _analyze(_build('_ and not _', *x[1:]))

310

elif op == 'only':

310

elif op == 'only':

311

return _analyze(_build('only(_, _)', *x[1:]))

311

return _analyze(_build('only(_, _)', *x[1:]))

312

elif op == 'onlypost':

312

elif op == 'onlypost':

313

return _analyze(_build('only(_)', x[1]))

313

return _analyze(_build('only(_)', x[1]))

314

elif op == 'dagrangepre':

314

elif op == 'dagrangepre':

315

return _analyze(_build('ancestors(_)', x[1]))

315

return _analyze(_build('ancestors(_)', x[1]))

316

elif op == 'dagrangepost':

316

elif op == 'dagrangepost':

317

return _analyze(_build('descendants(_)', x[1]))

317

return _analyze(_build('descendants(_)', x[1]))

318

elif op == 'negate':

318

elif op == 'negate':

319

s = getstring(x[1], _("can't negate that"))

319

s = getstring(x[1], _("can't negate that"))

320

return _analyze(('string', '-' + s))

320

return _analyze(('string', '-' + s))

321

elif op in ('string', 'symbol'):

321

elif op in ('string', 'symbol'):

322

return x

322

return x

323

elif op == 'rangeall':

323

elif op == 'rangeall':

324

return (op, None)

324

return (op, None)

325

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

325

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

326

return (op, _analyze(x[1]))

326

return (op, _analyze(x[1]))

327

elif op == 'group':

327

elif op == 'group':

328

return _analyze(x[1])

328

return _analyze(x[1])

329

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

329

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

330

'subscript'}:

330

'subscript'}:

331

ta = _analyze(x[1])

331

ta = _analyze(x[1])

332

tb = _analyze(x[2])

332

tb = _analyze(x[2])

333

return (op, ta, tb)

333

return (op, ta, tb)

334

elif op == 'relsubscript':

334

elif op == 'relsubscript':

335

ta = _analyze(x[1])

335

ta = _analyze(x[1])

336

tb = _analyze(x[2])

336

tb = _analyze(x[2])

337

tc = _analyze(x[3])

337

tc = _analyze(x[3])

338

return (op, ta, tb, tc)

338

return (op, ta, tb, tc)

339

elif op == 'list':

339

elif op == 'list':

340

return (op,) + tuple(_analyze(y) for y in x[1:])

340

return (op,) + tuple(_analyze(y) for y in x[1:])

341

elif op == 'keyvalue':

341

elif op == 'keyvalue':

342

return (op, x[1], _analyze(x[2]))

342

return (op, x[1], _analyze(x[2]))

343

elif op == 'func':

343

elif op == 'func':

344

return (op, x[1], _analyze(x[2]))

344

return (op, x[1], _analyze(x[2]))

345

raise ValueError('invalid operator %r' % op)

345

raise ValueError('invalid operator %r' % op)

346

347

def analyze(x):

347

def analyze(x):

348

"""Transform raw parsed tree to evaluatable tree which can be fed to

348

"""Transform raw parsed tree to evaluatable tree which can be fed to

349

optimize() or getset()

349

optimize() or getset()

350

351

All pseudo operations should be mapped to real operations or functions

351

All pseudo operations should be mapped to real operations or functions

352

defined in methods or symbols table respectively.

352

defined in methods or symbols table respectively.

353

"""

353

"""

354

return _analyze(x)

354

return _analyze(x)

355

356

def _optimize(x, ~~small~~):

356

def _optimize(x):

357

if x is None:

357

if x is None:

358

return 0, x

358

return 0, x

359

360

smallbonus = 1

361

if small:

362

smallbonus = .5

363

364

op = x[0]

360

op = x[0]

365

if op in ('string', 'symbol'):

361

if op in ('string', 'symbol'):

366

return ~~smallbonus~~, x # single revisions are small

362

return 0.5, x # single revisions are small

367

elif op == 'and':

363

elif op == 'and':

368

wa, ta = _optimize(x[1], ~~True~~)

364

wa, ta = _optimize(x[1])

369

wb, tb = _optimize(x[2], ~~True~~)

365

wb, tb = _optimize(x[2])

370

w = min(wa, wb)

366

w = min(wa, wb)

371

367

372

# (draft/secret/_notpublic() & ::x) have a fast path

368

# (draft/secret/_notpublic() & ::x) have a fast path

373

m = _match('_() & ancestors(_)', ('and', ta, tb))

369

m = _match('_() & ancestors(_)', ('and', ta, tb))

374

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

370

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

375

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

371

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

376

372

377

# (::x and not ::y)/(not ::y and ::x) have a fast path

373

# (::x and not ::y)/(not ::y and ::x) have a fast path

378

m = _matchonly(ta, tb) or _matchonly(tb, ta)

374

m = _matchonly(ta, tb) or _matchonly(tb, ta)

379

if m:

375

if m:

380

return w, _build('only(_, _)', *m[1:])

376

return w, _build('only(_, _)', *m[1:])

381

377

382

m = _match('not _', tb)

378

m = _match('not _', tb)

383

if m:

379

if m:

384

return wa, ('difference', ta, m[1])

380

return wa, ('difference', ta, m[1])

385

if wa > wb:

381

if wa > wb:

386

op = 'andsmally'

382

op = 'andsmally'

387

return w, (op, ta, tb)

383

return w, (op, ta, tb)

388

elif op == 'or':

384

elif op == 'or':

389

# fast path for machine-generated expression, that is likely to have

385

# fast path for machine-generated expression, that is likely to have

390

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

386

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

391

ws, ts, ss = [], [], []

387

ws, ts, ss = [], [], []

392

def flushss():

388

def flushss():

393

if not ss:

389

if not ss:

394

return

390

return

395

if len(ss) == 1:

391

if len(ss) == 1:

396

w, t = ss[0]

392

w, t = ss[0]

397

else:

393

else:

398

s = '\0'.join(t[1] for w, t in ss)

394

s = '\0'.join(t[1] for w, t in ss)

399

y = _build('_list(_)', ('string', s))

395

y = _build('_list(_)', ('string', s))

400

w, t = _optimize(y, ~~False~~)

396

w, t = _optimize(y)

401

ws.append(w)

397

ws.append(w)

402

ts.append(t)

398

ts.append(t)

403

del ss[:]

399

del ss[:]

404

for y in getlist(x[1]):

400

for y in getlist(x[1]):

405

w, t = _optimize(y, ~~False~~)

401

w, t = _optimize(y)

406

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

402

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

407

ss.append((w, t))

403

ss.append((w, t))

408

continue

404

continue

409

flushss()

405

flushss()

410

ws.append(w)

406

ws.append(w)

411

ts.append(t)

407

ts.append(t)

412

flushss()

408

flushss()

413

if len(ts) == 1:

409

if len(ts) == 1:

414

return ws[0], ts[0] # 'or' operation is fully optimized out

410

return ws[0], ts[0] # 'or' operation is fully optimized out

415

return max(ws), (op, ('list',) + tuple(ts))

411

return max(ws), (op, ('list',) + tuple(ts))

416

elif op == 'not':

412

elif op == 'not':

417

# Optimize not public() to _notpublic() because we have a fast version

413

# Optimize not public() to _notpublic() because we have a fast version

418

if _match('public()', x[1]):

414

if _match('public()', x[1]):

419

o = _optimize(_build('_notpublic()'), ~~not~~ ~~small~~)

415

o = _optimize(_build('_notpublic()'))

420

return o[0], o[1]

416

return o[0], o[1]

421

else:

417

else:

422

o = _optimize(x[1], ~~not~~ ~~small~~)

418

o = _optimize(x[1])

423

return o[0], (op, o[1])

419

return o[0], (op, o[1])

424

elif op == 'rangeall':

420

elif op == 'rangeall':

425

return ~~smallbonus~~, x

421

return 1, x

426

elif op in ('rangepre', 'rangepost', 'parentpost'):

422

elif op in ('rangepre', 'rangepost', 'parentpost'):

427

o = _optimize(x[1], ~~small~~)

423

o = _optimize(x[1])

428

return o[0], (op, o[1])

424

return o[0], (op, o[1])

429

elif op in ('dagrange', 'range'):

425

elif op in ('dagrange', 'range'):

430

wa, ta = _optimize(x[1], ~~small~~)

426

wa, ta = _optimize(x[1])

431

wb, tb = _optimize(x[2], ~~small~~)

427

wb, tb = _optimize(x[2])

432

return wa + wb, (op, ta, tb)

428

return wa + wb, (op, ta, tb)

433

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

429

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

434

w, t = _optimize(x[1], ~~small~~)

430

w, t = _optimize(x[1])

435

return w, (op, t, x[2])

431

return w, (op, t, x[2])

436

elif op == 'relsubscript':

432

elif op == 'relsubscript':

437

w, t = _optimize(x[1], ~~small~~)

433

w, t = _optimize(x[1])

438

return w, (op, t, x[2], x[3])

434

return w, (op, t, x[2], x[3])

439

elif op == 'list':

435

elif op == 'list':

440

ws, ts = zip(*(_optimize(y, ~~small~~) for y in x[1:]))

436

ws, ts = zip(*(_optimize(y) for y in x[1:]))

441

return sum(ws), (op,) + ts

437

return sum(ws), (op,) + ts

442

elif op == 'keyvalue':

438

elif op == 'keyvalue':

443

w, t = _optimize(x[2], ~~small~~)

439

w, t = _optimize(x[2])

444

return w, (op, x[1], t)

440

return w, (op, x[1], t)

445

elif op == 'func':

441

elif op == 'func':

446

f = getsymbol(x[1])

442

f = getsymbol(x[1])

447

wa, ta = _optimize(x[2], ~~small~~)

443

wa, ta = _optimize(x[2])

448

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

444

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

449

'keyword', 'outgoing', 'user', 'destination'):

445

'keyword', 'outgoing', 'user', 'destination'):

450

w = 10 # slow

446

w = 10 # slow

451

elif f in ('modifies', 'adds', 'removes'):

447

elif f in ('modifies', 'adds', 'removes'):

452

w = 30 # slower

448

w = 30 # slower

453

elif f == "contains":

449

elif f == "contains":

454

w = 100 # very slow

450

w = 100 # very slow

455

elif f == "ancestor":

451

elif f == "ancestor":

456

w = 1 * ~~smallbonus~~

452

w = 0.5

457

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

453

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

458

w = 0

454

w = 0

459

elif f == "sort":

455

elif f == "sort":

460

w = 10 # assume most sorts look at changelog

456

w = 10 # assume most sorts look at changelog

461

else:

457

else:

462

w = 1

458

w = 1

463

return w + wa, (op, x[1], ta)

459

return w + wa, (op, x[1], ta)

464

raise ValueError('invalid operator %r' % op)

460

raise ValueError('invalid operator %r' % op)

465

461

466

def optimize(tree):

462

def optimize(tree):

467

"""Optimize evaluatable tree

463

"""Optimize evaluatable tree

468

464

469

All pseudo operations should be transformed beforehand.

465

All pseudo operations should be transformed beforehand.

470

"""

466

"""

471

_weight, newtree = _optimize(tree, ~~small~~=~~True~~)

467

_weight, newtree = _optimize(tree)

472

return newtree

468

return newtree

473

469

474

# the set of valid characters for the initial letter of symbols in

470

# the set of valid characters for the initial letter of symbols in

475

# alias declarations and definitions

471

# alias declarations and definitions

476

_aliassyminitletters = _syminitletters | {'$'}

472

_aliassyminitletters = _syminitletters | {'$'}

477

473

478

def _parsewith(spec, lookup=None, syminitletters=None):

474

def _parsewith(spec, lookup=None, syminitletters=None):

479

"""Generate a parse tree of given spec with given tokenizing options

475

"""Generate a parse tree of given spec with given tokenizing options

480

476

481

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

477

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

482

('func', ('symbol', 'foo'), ('symbol', '$1'))

478

('func', ('symbol', 'foo'), ('symbol', '$1'))

483

>>> _parsewith(b'$1')

479

>>> _parsewith(b'$1')

484

Traceback (most recent call last):

480

Traceback (most recent call last):

485

...

481

...

486

ParseError: ("syntax error in revset '$1'", 0)

482

ParseError: ("syntax error in revset '$1'", 0)

487

>>> _parsewith(b'foo bar')

483

>>> _parsewith(b'foo bar')

488

Traceback (most recent call last):

484

Traceback (most recent call last):

489

...

485

...

490

ParseError: ('invalid token', 4)

486

ParseError: ('invalid token', 4)

491

"""

487

"""

492

p = parser.parser(elements)

488

p = parser.parser(elements)

493

tree, pos = p.parse(tokenize(spec, lookup=lookup,

489

tree, pos = p.parse(tokenize(spec, lookup=lookup,

494

syminitletters=syminitletters))

490

syminitletters=syminitletters))

495

if pos != len(spec):

491

if pos != len(spec):

496

raise error.ParseError(_('invalid token'), pos)

492

raise error.ParseError(_('invalid token'), pos)

497

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

493

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

498

494

499

class _aliasrules(parser.basealiasrules):

495

class _aliasrules(parser.basealiasrules):

500

"""Parsing and expansion rule set of revset aliases"""

496

"""Parsing and expansion rule set of revset aliases"""

501

_section = _('revset alias')

497

_section = _('revset alias')

502

498

503

@staticmethod

499

@staticmethod

504

def _parse(spec):

500

def _parse(spec):

505

"""Parse alias declaration/definition ``spec``

501

"""Parse alias declaration/definition ``spec``

506

502

507

This allows symbol names to use also ``$`` as an initial letter

503

This allows symbol names to use also ``$`` as an initial letter

508

(for backward compatibility), and callers of this function should

504

(for backward compatibility), and callers of this function should

509

examine whether ``$`` is used also for unexpected symbols or not.

505

examine whether ``$`` is used also for unexpected symbols or not.

510

"""

506

"""

511

return _parsewith(spec, syminitletters=_aliassyminitletters)

507

return _parsewith(spec, syminitletters=_aliassyminitletters)

512

508

513

@staticmethod

509

@staticmethod

514

def _trygetfunc(tree):

510

def _trygetfunc(tree):

515

if tree[0] == 'func' and tree[1][0] == 'symbol':

511

if tree[0] == 'func' and tree[1][0] == 'symbol':

516

return tree[1][1], getlist(tree[2])

512

return tree[1][1], getlist(tree[2])

517

513

518

def expandaliases(tree, aliases, warn=None):

514

def expandaliases(tree, aliases, warn=None):

519

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

515

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

520

aliases = _aliasrules.buildmap(aliases)

516

aliases = _aliasrules.buildmap(aliases)

521

tree = _aliasrules.expand(aliases, tree)

517

tree = _aliasrules.expand(aliases, tree)

522

# warn about problematic (but not referred) aliases

518

# warn about problematic (but not referred) aliases

523

if warn is not None:

519

if warn is not None:

524

for name, alias in sorted(aliases.iteritems()):

520

for name, alias in sorted(aliases.iteritems()):

525

if alias.error and not alias.warned:

521

if alias.error and not alias.warned:

526

warn(_('warning: %s\n') % (alias.error))

522

warn(_('warning: %s\n') % (alias.error))

527

alias.warned = True

523

alias.warned = True

528

return tree

524

return tree

529

525

530

def foldconcat(tree):

526

def foldconcat(tree):

531

"""Fold elements to be concatenated by `##`

527

"""Fold elements to be concatenated by `##`

532

"""

528

"""

533

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

529

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

534

return tree

530

return tree

535

if tree[0] == '_concat':

531

if tree[0] == '_concat':

536

pending = [tree]

532

pending = [tree]

537

l = []

533

l = []

538

while pending:

534

while pending:

539

e = pending.pop()

535

e = pending.pop()

540

if e[0] == '_concat':

536

if e[0] == '_concat':

541

pending.extend(reversed(e[1:]))

537

pending.extend(reversed(e[1:]))

542

elif e[0] in ('string', 'symbol'):

538

elif e[0] in ('string', 'symbol'):

543

l.append(e[1])

539

l.append(e[1])

544

else:

540

else:

545

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

541

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

546

raise error.ParseError(msg)

542

raise error.ParseError(msg)

547

return ('string', ''.join(l))

543

return ('string', ''.join(l))

548

else:

544

else:

549

return tuple(foldconcat(t) for t in tree)

545

return tuple(foldconcat(t) for t in tree)

550

546

551

def parse(spec, lookup=None):

547

def parse(spec, lookup=None):

552

return _parsewith(spec, lookup=lookup)

548

return _parsewith(spec, lookup=lookup)

553

549

554

def _quote(s):

550

def _quote(s):

555

r"""Quote a value in order to make it safe for the revset engine.

551

r"""Quote a value in order to make it safe for the revset engine.

556

552

557

>>> _quote(b'asdf')

553

>>> _quote(b'asdf')

558

"'asdf'"

554

"'asdf'"

559

>>> _quote(b"asdf'\"")

555

>>> _quote(b"asdf'\"")

560

'\'asdf\\\'"\''

556

'\'asdf\\\'"\''

561

>>> _quote(b'asdf\'')

557

>>> _quote(b'asdf\'')

562

"'asdf\\''"

558

"'asdf\\''"

563

>>> _quote(1)

559

>>> _quote(1)

564

"'1'"

560

"'1'"

565

"""

561

"""

566

return "'%s'" % util.escapestr(pycompat.bytestr(s))

562

return "'%s'" % util.escapestr(pycompat.bytestr(s))

567

563

568

def formatspec(expr, *args):

564

def formatspec(expr, *args):

569

'''

565

'''

570

This is a convenience function for using revsets internally, and

566

This is a convenience function for using revsets internally, and

571

escapes arguments appropriately. Aliases are intentionally ignored

567

escapes arguments appropriately. Aliases are intentionally ignored

572

so that intended expression behavior isn't accidentally subverted.

568

so that intended expression behavior isn't accidentally subverted.

573

569

574

Supported arguments:

570

Supported arguments:

575

571

576

%r = revset expression, parenthesized

572

%r = revset expression, parenthesized

577

%d = int(arg), no quoting

573

%d = int(arg), no quoting

578

%s = string(arg), escaped and single-quoted

574

%s = string(arg), escaped and single-quoted

579

%b = arg.branch(), escaped and single-quoted

575

%b = arg.branch(), escaped and single-quoted

580

%n = hex(arg), single-quoted

576

%n = hex(arg), single-quoted

581

%% = a literal '%'

577

%% = a literal '%'

582

578

583

Prefixing the type with 'l' specifies a parenthesized list of that type.

579

Prefixing the type with 'l' specifies a parenthesized list of that type.

584

580

585

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

581

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

586

'(10 or 11):: and ((this()) or (that()))'

582

'(10 or 11):: and ((this()) or (that()))'

587

>>> formatspec(b'%d:: and not %d::', 10, 20)

583

>>> formatspec(b'%d:: and not %d::', 10, 20)

588

'10:: and not 20::'

584

'10:: and not 20::'

589

>>> formatspec(b'%ld or %ld', [], [1])

585

>>> formatspec(b'%ld or %ld', [], [1])

590

"_list('') or 1"

586

"_list('') or 1"

591

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

587

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

592

"keyword('foo\\\\xe9')"

588

"keyword('foo\\\\xe9')"

593

>>> b = lambda: b'default'

589

>>> b = lambda: b'default'

594

>>> b.branch = b

590

>>> b.branch = b

595

>>> formatspec(b'branch(%b)', b)

591

>>> formatspec(b'branch(%b)', b)

596

"branch('default')"

592

"branch('default')"

597

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

593

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

598

"root(_list('a\\x00b\\x00c\\x00d'))"

594

"root(_list('a\\x00b\\x00c\\x00d'))"

599

'''

595

'''

600

596

601

def argtype(c, arg):

597

def argtype(c, arg):

602

if c == 'd':

598

if c == 'd':

603

return '%d' % int(arg)

599

return '%d' % int(arg)

604

elif c == 's':

600

elif c == 's':

605

return _quote(arg)

601

return _quote(arg)

606

elif c == 'r':

602

elif c == 'r':

607

parse(arg) # make sure syntax errors are confined

603

parse(arg) # make sure syntax errors are confined

608

return '(%s)' % arg

604

return '(%s)' % arg

609

elif c == 'n':

605

elif c == 'n':

610

return _quote(node.hex(arg))

606

return _quote(node.hex(arg))

611

elif c == 'b':

607

elif c == 'b':

612

return _quote(arg.branch())

608

return _quote(arg.branch())

613

609

614

def listexp(s, t):

610

def listexp(s, t):

615

l = len(s)

611

l = len(s)

616

if l == 0:

612

if l == 0:

617

return "_list('')"

613

return "_list('')"

618

elif l == 1:

614

elif l == 1:

619

return argtype(t, s[0])

615

return argtype(t, s[0])

620

elif t == 'd':

616

elif t == 'd':

621

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

617

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

622

elif t == 's':

618

elif t == 's':

623

return "_list('%s')" % "\0".join(s)

619

return "_list('%s')" % "\0".join(s)

624

elif t == 'n':

620

elif t == 'n':

625

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

621

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

626

elif t == 'b':

622

elif t == 'b':

627

return "_list('%s')" % "\0".join(a.branch() for a in s)

623

return "_list('%s')" % "\0".join(a.branch() for a in s)

628

624

629

m = l // 2

625

m = l // 2

630

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

626

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

631

627

632

expr = pycompat.bytestr(expr)

628

expr = pycompat.bytestr(expr)

633

ret = ''

629

ret = ''

634

pos = 0

630

pos = 0

635

arg = 0

631

arg = 0

636

while pos < len(expr):

632

while pos < len(expr):

637

c = expr[pos]

633

c = expr[pos]

638

if c == '%':

634

if c == '%':

639

pos += 1

635

pos += 1

640

d = expr[pos]

636

d = expr[pos]

641

if d == '%':

637

if d == '%':

642

ret += d

638

ret += d

643

elif d in 'dsnbr':

639

elif d in 'dsnbr':

644

ret += argtype(d, args[arg])

640

ret += argtype(d, args[arg])

645

arg += 1

641

arg += 1

646

elif d == 'l':

642

elif d == 'l':

647

# a list of some type

643

# a list of some type

648

pos += 1

644

pos += 1

649

d = expr[pos]

645

d = expr[pos]

650

ret += listexp(list(args[arg]), d)

646

ret += listexp(list(args[arg]), d)

651

arg += 1

647

arg += 1

652

else:

648

else:

653

raise error.Abort(_('unexpected revspec format character %s')

649

raise error.Abort(_('unexpected revspec format character %s')

654

% d)

650

% d)

655

else:

651

else:

656

ret += c

652

ret += c

657

pos += 1

653

pos += 1

658

654

659

return ret

655

return ret

660

656

661

def prettyformat(tree):

657

def prettyformat(tree):

662

return parser.prettyformat(tree, ('string', 'symbol'))

658

return parser.prettyformat(tree, ('string', 'symbol'))

663

659

664

def depth(tree):

660

def depth(tree):

665

if isinstance(tree, tuple):

661

if isinstance(tree, tuple):

666

return max(map(depth, tree)) + 1

662

return max(map(depth, tree)) + 1

667

else:

663

else:

668

return 0

664

return 0

669

665

670

def funcsused(tree):

666

def funcsused(tree):

671

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

667

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

672

return set()

668

return set()

673

else:

669

else:

674

funcs = set()

670

funcs = set()

675

for s in tree[1:]:

671

for s in tree[1:]:

676

funcs |= funcsused(s)

672

funcs |= funcsused(s)

677

if tree[0] == 'func':

673

if tree[0] == 'func':

678

funcs.add(tree[1][1])

674

funcs.add(tree[1][1])

679

return funcs

675

return funcs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 util,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize(b"@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, pos)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = util.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
             def _match(patspec, tree):
                 """Test if a tree matches the given pattern statement; return the matches
                 >>> _match(b'f(_)', parse(b'f()'))
                 >>> _match(b'f(_)', parse(b'f(1)'))
                 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
                 >>> _match(b'f(_)', parse(b'f(1, 2)'))
                 """
                 pattern = _cachedtree(patspec)
                 return parser.matchtree(pattern, tree, ('symbol', '_'),
                                         {'keyvalue', 'list'})
             def _matchonly(revs, bases):
                 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
-            def _optimize(x, small):
+            def _optimize(x):
                 if x is None:
                     return 0, x
-                smallbonus = 1
-                if small:
-                    smallbonus = .5
                 op = x[0]
                 if op in ('string', 'symbol'):
-                    return smallbonus, x # single revisions are small
+                    return 0.5, x # single revisions are small
                 elif op == 'and':
-                    wa, ta = _optimize(x[1], True)
+                    wa, ta = _optimize(x[1])
-                    wb, tb = _optimize(x[2], True)
+                    wb, tb = _optimize(x[2])
                     w = min(wa, wb)
                     # (draft/secret/_notpublic() & ::x) have a fast path
                     m = _match('_() & ancestors(_)', ('and', ta, tb))
                     if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
                         return w, _build('_phaseandancestors(_, _)', m[1], m[2])
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
                     m = _match('not _', tb)
                     if m:
                         return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
-                            w, t = _optimize(y, False)
+                            w, t = _optimize(y)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
-                        w, t = _optimize(y, False)
+                        w, t = _optimize(y)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if _match('public()', x[1]):
-                        o = _optimize(_build('_notpublic()'), not small)
+                        o = _optimize(_build('_notpublic()'))
                         return o[0], o[1]
                     else:
-                        o = _optimize(x[1], not small)
+                        o = _optimize(x[1])
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
-                    return smallbonus, x
+                    return 1, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
-                    o = _optimize(x[1], small)
+                    o = _optimize(x[1])
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
-                    wa, ta = _optimize(x[1], small)
+                    wa, ta = _optimize(x[1])
-                    wb, tb = _optimize(x[2], small)
+                    wb, tb = _optimize(x[2])
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
-                    w, t = _optimize(x[1], small)
+                    w, t = _optimize(x[1])
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
-                    w, t = _optimize(x[1], small)
+                    w, t = _optimize(x[1])
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
-                    ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
+                    ws, ts = zip(*(_optimize(y) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
-                    w, t = _optimize(x[2], small)
+                    w, t = _optimize(x[2])
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
-                    wa, ta = _optimize(x[2], small)
+                    wa, ta = _optimize(x[2])
                     if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
                              'keyword', 'outgoing', 'user', 'destination'):
                         w = 10 # slow
                     elif f in ('modifies', 'adds', 'removes'):
                         w = 30 # slower
                     elif f == "contains":
                         w = 100 # very slow
                     elif f == "ancestor":
-                        w = 1 * smallbonus
+                        w = 0.5
                     elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
                         w = 0
                     elif f == "sort":
                         w = 10 # assume most sorts look at changelog
                     else:
                         w = 1
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
-                _weight, newtree = _optimize(tree, small=True)
+                _weight, newtree = _optimize(tree)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | {'$'}
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith(b'$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith(b'foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 return _parsewith(spec, lookup=lookup)
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote(b'asdf')
                 "'asdf'"
                 >>> _quote(b"asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote(b'asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % util.escapestr(pycompat.bytestr(s))
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = int(arg), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type.
                 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec(b'%d:: and not %d::', 10, 20)
                 '10:: and not 20::'
                 >>> formatspec(b'%ld or %ld', [], [1])
                 "_list('') or 1"
                 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: b'default'
                 >>> b.branch = b
                 >>> formatspec(b'branch(%b)', b)
                 "branch('default')"
                 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
                 "root(_list('a\\x00b\\x00c\\x00d'))"
                 '''
                 def argtype(c, arg):
                     if c == 'd':
                         return '%d' % int(arg)
                     elif c == 's':
                         return _quote(arg)
                     elif c == 'r':
                         parse(arg) # make sure syntax errors are confined
                         return '(%s)' % arg
                     elif c == 'n':
                         return _quote(node.hex(arg))
                     elif c == 'b':
                         return _quote(arg.branch())
                 def listexp(s, t):
                     l = len(s)
                     if l == 0:
                         return "_list('')"
                     elif l == 1:
                         return argtype(t, s[0])
                     elif t == 'd':
                         return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
                     elif t == 's':
                         return "_list('%s')" % "\0".join(s)
                     elif t == 'n':
                         return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                     elif t == 'b':
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     m = l // 2
                     return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
                 expr = pycompat.bytestr(expr)
                 ret = ''
                 pos = 0
                 arg = 0
                 while pos < len(expr):
                     c = expr[pos]
                     if c == '%':
                         pos += 1
                         d = expr[pos]
                         if d == '%':
                             ret += d
                         elif d in 'dsnbr':
                             ret += argtype(d, args[arg])
                             arg += 1
                         elif d == 'l':
                             # a list of some type
                             pos += 1
                             d = expr[pos]
                             ret += listexp(list(args[arg]), d)
                             arg += 1
                         else:
                             raise error.Abort(_('unexpected revspec format character %s')
                                               % d)
                     else:
                         ret += c
                     pos += 1
                 return ret
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs