upstream/mercurial-mirror Commit - r34071:e18119b1

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

21

elements = {

21

elements = {

22

# token-type: binding-strength, primary, prefix, infix, suffix

22

# token-type: binding-strength, primary, prefix, infix, suffix

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

25

"#": (21, None, None, ("relation", 21), None),

25

"#": (21, None, None, ("relation", 21), None),

26

"##": (20, None, None, ("_concat", 20), None),

26

"##": (20, None, None, ("_concat", 20), None),

27

"~": (18, None, None, ("ancestor", 18), None),

27

"~": (18, None, None, ("ancestor", 18), None),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

30

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

30

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

31

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

31

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

32

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

32

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

33

"not": (10, None, ("not", 10), None, None),

33

"not": (10, None, ("not", 10), None, None),

34

"!": (10, None, ("not", 10), None, None),

34

"!": (10, None, ("not", 10), None, None),

35

"and": (5, None, None, ("and", 5), None),

35

"and": (5, None, None, ("and", 5), None),

36

"&": (5, None, None, ("and", 5), None),

36

"&": (5, None, None, ("and", 5), None),

37

"%": (5, None, None, ("only", 5), "onlypost"),

37

"%": (5, None, None, ("only", 5), "onlypost"),

38

"or": (4, None, None, ("or", 4), None),

38

"or": (4, None, None, ("or", 4), None),

39

"|": (4, None, None, ("or", 4), None),

39

"|": (4, None, None, ("or", 4), None),

40

"+": (4, None, None, ("or", 4), None),

40

"+": (4, None, None, ("or", 4), None),

41

"=": (3, None, None, ("keyvalue", 3), None),

41

"=": (3, None, None, ("keyvalue", 3), None),

42

",": (2, None, None, ("list", 2), None),

42

",": (2, None, None, ("list", 2), None),

43

")": (0, None, None, None, None),

43

")": (0, None, None, None, None),

44

"]": (0, None, None, None, None),

44

"]": (0, None, None, None, None),

45

"symbol": (0, "symbol", None, None, None),

45

"symbol": (0, "symbol", None, None, None),

46

"string": (0, "string", None, None, None),

46

"string": (0, "string", None, None, None),

47

"end": (0, None, None, None, None),

47

"end": (0, None, None, None, None),

48

}

48

}

49

50

keywords = {'and', 'or', 'not'}

50

keywords = {'and', 'or', 'not'}

51

52

_quoteletters = {'"', "'"}

52

_quoteletters = {'"', "'"}

53

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

53

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

54

55

# default set of valid characters for the initial letter of symbols

55

# default set of valid characters for the initial letter of symbols

56

_syminitletters = set(pycompat.iterbytestr(

56

_syminitletters = set(pycompat.iterbytestr(

57

string.ascii_letters.encode('ascii') +

57

string.ascii_letters.encode('ascii') +

58

string.digits.encode('ascii') +

58

string.digits.encode('ascii') +

59

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

59

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

60

61

# default set of valid characters for non-initial letters of symbols

61

# default set of valid characters for non-initial letters of symbols

62

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

62

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

63

64

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

64

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

65

'''

65

'''

66

Parse a revset statement into a stream of tokens

66

Parse a revset statement into a stream of tokens

67

68

``syminitletters`` is the set of valid characters for the initial

68

``syminitletters`` is the set of valid characters for the initial

69

letter of symbols.

69

letter of symbols.

70

71

By default, character ``c`` is recognized as valid for initial

71

By default, character ``c`` is recognized as valid for initial

72

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

72

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

73

74

``symletters`` is the set of valid characters for non-initial

74

``symletters`` is the set of valid characters for non-initial

75

letters of symbols.

75

letters of symbols.

76

77

By default, character ``c`` is recognized as valid for non-initial

77

By default, character ``c`` is recognized as valid for non-initial

78

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

78

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

79

80

Check that @ is a valid unquoted token character (issue3686):

80

Check that @ is a valid unquoted token character (issue3686):

81

>>> list(tokenize("@::"))

81

>>> list(tokenize("@::"))

82

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

82

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

83

84

'''

84

'''

85

program = pycompat.bytestr(program)

85

program = pycompat.bytestr(program)

86

if syminitletters is None:

86

if syminitletters is None:

87

syminitletters = _syminitletters

87

syminitletters = _syminitletters

88

if symletters is None:

88

if symletters is None:

89

symletters = _symletters

89

symletters = _symletters

90

91

if program and lookup:

91

if program and lookup:

92

# attempt to parse old-style ranges first to deal with

92

# attempt to parse old-style ranges first to deal with

93

# things like old-tag which contain query metacharacters

93

# things like old-tag which contain query metacharacters

94

parts = program.split(':', 1)

94

parts = program.split(':', 1)

95

if all(lookup(sym) for sym in parts if sym):

95

if all(lookup(sym) for sym in parts if sym):

96

if parts[0]:

96

if parts[0]:

97

yield ('symbol', parts[0], 0)

97

yield ('symbol', parts[0], 0)

98

if len(parts) > 1:

98

if len(parts) > 1:

99

s = len(parts[0])

99

s = len(parts[0])

100

yield (':', None, s)

100

yield (':', None, s)

101

if parts[1]:

101

if parts[1]:

102

yield ('symbol', parts[1], s + 1)

102

yield ('symbol', parts[1], s + 1)

103

yield ('end', None, len(program))

103

yield ('end', None, len(program))

104

return

104

return

105

106

pos, l = 0, len(program)

106

pos, l = 0, len(program)

107

while pos < l:

107

while pos < l:

108

c = program[pos]

108

c = program[pos]

109

if c.isspace(): # skip inter-token whitespace

109

if c.isspace(): # skip inter-token whitespace

110

pass

110

pass

111

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

111

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

112

yield ('::', None, pos)

112

yield ('::', None, pos)

113

pos += 1 # skip ahead

113

pos += 1 # skip ahead

114

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

114

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

115

yield ('..', None, pos)

115

yield ('..', None, pos)

116

pos += 1 # skip ahead

116

pos += 1 # skip ahead

117

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

117

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

118

yield ('##', None, pos)

118

yield ('##', None, pos)

119

pos += 1 # skip ahead

119

pos += 1 # skip ahead

120

elif c in _simpleopletters: # handle simple operators

120

elif c in _simpleopletters: # handle simple operators

121

yield (c, None, pos)

121

yield (c, None, pos)

122

elif (c in _quoteletters or c == 'r' and

122

elif (c in _quoteletters or c == 'r' and

123

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

123

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

124

if c == 'r':

124

if c == 'r':

125

pos += 1

125

pos += 1

126

c = program[pos]

126

c = program[pos]

127

decode = lambda x: x

127

decode = lambda x: x

128

else:

128

else:

129

decode = parser.unescapestr

129

decode = parser.unescapestr

130

pos += 1

130

pos += 1

131

s = pos

131

s = pos

132

while pos < l: # find closing quote

132

while pos < l: # find closing quote

133

d = program[pos]

133

d = program[pos]

134

if d == '\\': # skip over escaped characters

134

if d == '\\': # skip over escaped characters

135

pos += 2

135

pos += 2

136

continue

136

continue

137

if d == c:

137

if d == c:

138

yield ('string', decode(program[s:pos]), s)

138

yield ('string', decode(program[s:pos]), s)

139

break

139

break

140

pos += 1

140

pos += 1

141

else:

141

else:

142

raise error.ParseError(_("unterminated string"), s)

142

raise error.ParseError(_("unterminated string"), s)

143

# gather up a symbol/keyword

143

# gather up a symbol/keyword

144

elif c in syminitletters:

144

elif c in syminitletters:

145

s = pos

145

s = pos

146

pos += 1

146

pos += 1

147

while pos < l: # find end of symbol

147

while pos < l: # find end of symbol

148

d = program[pos]

148

d = program[pos]

149

if d not in symletters:

149

if d not in symletters:

150

break

150

break

151

if d == '.' and program[pos - 1] == '.': # special case for ..

151

if d == '.' and program[pos - 1] == '.': # special case for ..

152

pos -= 1

152

pos -= 1

153

break

153

break

154

pos += 1

154

pos += 1

155

sym = program[s:pos]

155

sym = program[s:pos]

156

if sym in keywords: # operator keywords

156

if sym in keywords: # operator keywords

157

yield (sym, None, s)

157

yield (sym, None, s)

158

elif '-' in sym:

158

elif '-' in sym:

159

# some jerk gave us foo-bar-baz, try to check if it's a symbol

159

# some jerk gave us foo-bar-baz, try to check if it's a symbol

160

if lookup and lookup(sym):

160

if lookup and lookup(sym):

161

# looks like a real symbol

161

# looks like a real symbol

162

yield ('symbol', sym, s)

162

yield ('symbol', sym, s)

163

else:

163

else:

164

# looks like an expression

164

# looks like an expression

165

parts = sym.split('-')

165

parts = sym.split('-')

166

for p in parts[:-1]:

166

for p in parts[:-1]:

167

if p: # possible consecutive -

167

if p: # possible consecutive -

168

yield ('symbol', p, s)

168

yield ('symbol', p, s)

169

s += len(p)

169

s += len(p)

170

yield ('-', None, pos)

170

yield ('-', None, pos)

171

s += 1

171

s += 1

172

if parts[-1]: # possible trailing -

172

if parts[-1]: # possible trailing -

173

yield ('symbol', parts[-1], s)

173

yield ('symbol', parts[-1], s)

174

else:

174

else:

175

yield ('symbol', sym, s)

175

yield ('symbol', sym, s)

176

pos -= 1

176

pos -= 1

177

else:

177

else:

178

raise error.ParseError(_("syntax error in revset '%s'") %

178

raise error.ParseError(_("syntax error in revset '%s'") %

179

program, pos)

179

program, pos)

180

pos += 1

180

pos += 1

181

yield ('end', None, pos)

181

yield ('end', None, pos)

182

183

# helpers

183

# helpers

184

185

_notset = object()

185

_notset = object()

186

187

def getsymbol(x):

187

def getsymbol(x):

188

if x and x[0] == 'symbol':

188

if x and x[0] == 'symbol':

189

return x[1]

189

return x[1]

190

raise error.ParseError(_('not a symbol'))

190

raise error.ParseError(_('not a symbol'))

191

192

def getstring(x, err):

192

def getstring(x, err):

193

if x and (x[0] == 'string' or x[0] == 'symbol'):

193

if x and (x[0] == 'string' or x[0] == 'symbol'):

194

return x[1]

194

return x[1]

195

raise error.ParseError(err)

195

raise error.ParseError(err)

196

197

def getinteger(x, err, default=_notset):

197

def getinteger(x, err, default=_notset):

198

if not x and default is not _notset:

198

if not x and default is not _notset:

199

return default

199

return default

200

try:

200

try:

201

return int(getstring(x, err))

201

return int(getstring(x, err))

202

except ValueError:

202

except ValueError:

203

raise error.ParseError(err)

203

raise error.ParseError(err)

204

205

def getboolean(x, err):

205

def getboolean(x, err):

206

value = util.parsebool(getsymbol(x))

206

value = util.parsebool(getsymbol(x))

207

if value is not None:

207

if value is not None:

208

return value

208

return value

209

raise error.ParseError(err)

209

raise error.ParseError(err)

210

211

def getlist(x):

211

def getlist(x):

212

if not x:

212

if not x:

213

return []

213

return []

214

if x[0] == 'list':

214

if x[0] == 'list':

215

return list(x[1:])

215

return list(x[1:])

216

return [x]

216

return [x]

217

218

def getrange(x, err):

218

def getrange(x, err):

219

if not x:

219

if not x:

220

raise error.ParseError(err)

220

raise error.ParseError(err)

221

op = x[0]

221

op = x[0]

222

if op == 'range':

222

if op == 'range':

223

return x[1], x[2]

223

return x[1], x[2]

224

elif op == 'rangepre':

224

elif op == 'rangepre':

225

return None, x[1]

225

return None, x[1]

226

elif op == 'rangepost':

226

elif op == 'rangepost':

227

return x[1], None

227

return x[1], None

228

elif op == 'rangeall':

228

elif op == 'rangeall':

229

return None, None

229

return None, None

230

raise error.ParseError(err)

230

raise error.ParseError(err)

231

232

def getargs(x, min, max, err):

232

def getargs(x, min, max, err):

233

l = getlist(x)

233

l = getlist(x)

234

if len(l) < min or (max >= 0 and len(l) > max):

234

if len(l) < min or (max >= 0 and len(l) > max):

235

raise error.ParseError(err)

235

raise error.ParseError(err)

236

return l

236

return l

237

238

def getargsdict(x, funcname, keys):

238

def getargsdict(x, funcname, keys):

239

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

239

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

240

keyvaluenode='keyvalue', keynode='symbol')

240

keyvaluenode='keyvalue', keynode='symbol')

241

242

# cache of {spec: raw parsed tree} built internally

242

# cache of {spec: raw parsed tree} built internally

243

_treecache = {}

243

_treecache = {}

244

245

def _cachedtree(spec):

245

def _cachedtree(spec):

246

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

246

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

247

tree = _treecache.get(spec)

247

tree = _treecache.get(spec)

248

if tree is None:

248

if tree is None:

249

_treecache[spec] = tree = parse(spec)

249

_treecache[spec] = tree = parse(spec)

250

return tree

250

return tree

251

252

def _build(tmplspec, *repls):

252

def _build(tmplspec, *repls):

253

"""Create raw parsed tree from a template revset statement

253

"""Create raw parsed tree from a template revset statement

254

255

>>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))

255

>>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))

256

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

256

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

257

"""

257

"""

258

template = _cachedtree(tmplspec)

258

template = _cachedtree(tmplspec)

259

return parser.buildtree(template, ('symbol', '_'), *repls)

259

return parser.buildtree(template, ('symbol', '_'), *repls)

260

261

def _match(patspec, tree):

261

def _match(patspec, tree):

262

"""Test if a tree matches the given pattern statement; return the matches

262

"""Test if a tree matches the given pattern statement; return the matches

263

264

>>> _match('f(_)', parse('f()'))

264

>>> _match('f(_)', parse('f()'))

265

>>> _match('f(_)', parse('f(1)'))

265

>>> _match('f(_)', parse('f(1)'))

266

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

266

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

267

>>> _match('f(_)', parse('f(1, 2)'))

267

>>> _match('f(_)', parse('f(1, 2)'))

268

"""

268

"""

269

pattern = _cachedtree(patspec)

269

pattern = _cachedtree(patspec)

270

return parser.matchtree(pattern, tree, ('symbol', '_'),

270

return parser.matchtree(pattern, tree, ('symbol', '_'),

271

{'keyvalue', 'list'})

271

{'keyvalue', 'list'})

272

273

def _matchonly(revs, bases):

273

def _matchonly(revs, bases):

274

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

274

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

275

276

def _fixops(x):

276

def _fixops(x):

277

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

277

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

278

handled well by our simple top-down parser"""

278

handled well by our simple top-down parser"""

279

if not isinstance(x, tuple):

279

if not isinstance(x, tuple):

280

return x

280

return x

281

282

op = x[0]

282

op = x[0]

283

if op == 'parent':

283

if op == 'parent':

284

# x^:y means (x^) : y, not x ^ (:y)

284

# x^:y means (x^) : y, not x ^ (:y)

285

# x^: means (x^) :, not x ^ (:)

285

# x^: means (x^) :, not x ^ (:)

286

post = ('parentpost', x[1])

286

post = ('parentpost', x[1])

287

if x[2][0] == 'dagrangepre':

287

if x[2][0] == 'dagrangepre':

288

return _fixops(('dagrange', post, x[2][1]))

288

return _fixops(('dagrange', post, x[2][1]))

289

elif x[2][0] == 'rangepre':

289

elif x[2][0] == 'rangepre':

290

return _fixops(('range', post, x[2][1]))

290

return _fixops(('range', post, x[2][1]))

291

elif x[2][0] == 'rangeall':

291

elif x[2][0] == 'rangeall':

292

return _fixops(('rangepost', post))

292

return _fixops(('rangepost', post))

293

elif op == 'or':

293

elif op == 'or':

294

# make number of arguments deterministic:

294

# make number of arguments deterministic:

295

# x + y + z -> (or x y z) -> (or (list x y z))

295

# x + y + z -> (or x y z) -> (or (list x y z))

296

return (op, _fixops(('list',) + x[1:]))

296

return (op, _fixops(('list',) + x[1:]))

297

elif op == 'subscript' and x[1][0] == 'relation':

297

elif op == 'subscript' and x[1][0] == 'relation':

298

# x#y[z] ternary

298

# x#y[z] ternary

299

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

299

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

300

301

return (op,) + tuple(_fixops(y) for y in x[1:])

301

return (op,) + tuple(_fixops(y) for y in x[1:])

302

303

def _analyze(x):

303

def _analyze(x):

304

if x is None:

304

if x is None:

305

return x

305

return x

306

307

op = x[0]

307

op = x[0]

308

if op == 'minus':

308

if op == 'minus':

309

return _analyze(_build('_ and not _', *x[1:]))

309

return _analyze(_build('_ and not _', *x[1:]))

310

elif op == 'only':

310

elif op == 'only':

311

return _analyze(_build('only(_, _)', *x[1:]))

311

return _analyze(_build('only(_, _)', *x[1:]))

312

elif op == 'onlypost':

312

elif op == 'onlypost':

313

return _analyze(_build('only(_)', x[1]))

313

return _analyze(_build('only(_)', x[1]))

314

elif op == 'dagrangepre':

314

elif op == 'dagrangepre':

315

return _analyze(_build('ancestors(_)', x[1]))

315

return _analyze(_build('ancestors(_)', x[1]))

316

elif op == 'dagrangepost':

316

elif op == 'dagrangepost':

317

return _analyze(_build('descendants(_)', x[1]))

317

return _analyze(_build('descendants(_)', x[1]))

318

elif op == 'negate':

318

elif op == 'negate':

319

s = getstring(x[1], _("can't negate that"))

319

s = getstring(x[1], _("can't negate that"))

320

return _analyze(('string', '-' + s))

320

return _analyze(('string', '-' + s))

321

elif op in ('string', 'symbol'):

321

elif op in ('string', 'symbol'):

322

return x

322

return x

323

elif op == 'rangeall':

323

elif op == 'rangeall':

324

return (op, None)

324

return (op, None)

325

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

325

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

326

return (op, _analyze(x[1]))

326

return (op, _analyze(x[1]))

327

elif op == 'group':

327

elif op == 'group':

328

return _analyze(x[1])

328

return _analyze(x[1])

329

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

329

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

330

'subscript'}:

330

'subscript'}:

331

ta = _analyze(x[1])

331

ta = _analyze(x[1])

332

tb = _analyze(x[2])

332

tb = _analyze(x[2])

333

return (op, ta, tb)

333

return (op, ta, tb)

334

elif op == 'relsubscript':

334

elif op == 'relsubscript':

335

ta = _analyze(x[1])

335

ta = _analyze(x[1])

336

tb = _analyze(x[2])

336

tb = _analyze(x[2])

337

tc = _analyze(x[3])

337

tc = _analyze(x[3])

338

return (op, ta, tb, tc)

338

return (op, ta, tb, tc)

339

elif op == 'list':

339

elif op == 'list':

340

return (op,) + tuple(_analyze(y) for y in x[1:])

340

return (op,) + tuple(_analyze(y) for y in x[1:])

341

elif op == 'keyvalue':

341

elif op == 'keyvalue':

342

return (op, x[1], _analyze(x[2]))

342

return (op, x[1], _analyze(x[2]))

343

elif op == 'func':

343

elif op == 'func':

344

return (op, x[1], _analyze(x[2]))

344

return (op, x[1], _analyze(x[2]))

345

raise ValueError('invalid operator %r' % op)

345

raise ValueError('invalid operator %r' % op)

346

347

def analyze(x):

347

def analyze(x):

348

"""Transform raw parsed tree to evaluatable tree which can be fed to

348

"""Transform raw parsed tree to evaluatable tree which can be fed to

349

optimize() or getset()

349

optimize() or getset()

350

351

All pseudo operations should be mapped to real operations or functions

351

All pseudo operations should be mapped to real operations or functions

352

defined in methods or symbols table respectively.

352

defined in methods or symbols table respectively.

353

"""

353

"""

354

return _analyze(x)

354

return _analyze(x)

355

356

def _optimize(x, small):

356

def _optimize(x, small):

357

if x is None:

357

if x is None:

358

return 0, x

358

return 0, x

359

360

smallbonus = 1

360

smallbonus = 1

361

if small:

361

if small:

362

smallbonus = .5

362

smallbonus = .5

363

364

op = x[0]

364

op = x[0]

365

if op in ('string', 'symbol'):

365

if op in ('string', 'symbol'):

366

return smallbonus, x # single revisions are small

366

return smallbonus, x # single revisions are small

367

elif op == 'and':

367

elif op == 'and':

368

wa, ta = _optimize(x[1], True)

368

wa, ta = _optimize(x[1], True)

369

wb, tb = _optimize(x[2], True)

369

wb, tb = _optimize(x[2], True)

370

w = min(wa, wb)

370

w = min(wa, wb)

371

372

# (draft/secret/_notpublic() & ::x) have a fast path

372

# (draft/secret/_notpublic() & ::x) have a fast path

373

m = _match('_() & ancestors(_)', ('and', ta, tb))

373

m = _match('_() & ancestors(_)', ('and', ta, tb))

374

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

374

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

375

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

375

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

376

377

# (::x and not ::y)/(not ::y and ::x) have a fast path

377

# (::x and not ::y)/(not ::y and ::x) have a fast path

378

m = _matchonly(ta, tb) or _matchonly(tb, ta)

378

m = _matchonly(ta, tb) or _matchonly(tb, ta)

379

if m:

379

if m:

380

return w, _build('only(_, _)', *m[1:])

380

return w, _build('only(_, _)', *m[1:])

381

382

m = _match('not _', tb)

382

m = _match('not _', tb)

383

if m:

383

if m:

384

return wa, ('difference', ta, m[1])

384

return wa, ('difference', ta, m[1])

385

if wa > wb:

385

if wa > wb:

386

op = 'andsmally'

386

op = 'andsmally'

387

return w, (op, ta, tb)

387

return w, (op, ta, tb)

388

elif op == 'or':

388

elif op == 'or':

389

# fast path for machine-generated expression, that is likely to have

389

# fast path for machine-generated expression, that is likely to have

390

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

390

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

391

ws, ts, ss = [], [], []

391

ws, ts, ss = [], [], []

392

def flushss():

392

def flushss():

393

if not ss:

393

if not ss:

394

return

394

return

395

if len(ss) == 1:

395

if len(ss) == 1:

396

w, t = ss[0]

396

w, t = ss[0]

397

else:

397

else:

398

s = '\0'.join(t[1] for w, t in ss)

398

s = '\0'.join(t[1] for w, t in ss)

399

y = _build('_list(_)', ('string', s))

399

y = _build('_list(_)', ('string', s))

400

w, t = _optimize(y, False)

400

w, t = _optimize(y, False)

401

ws.append(w)

401

ws.append(w)

402

ts.append(t)

402

ts.append(t)

403

del ss[:]

403

del ss[:]

404

for y in getlist(x[1]):

404

for y in getlist(x[1]):

405

w, t = _optimize(y, False)

405

w, t = _optimize(y, False)

406

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

406

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

407

ss.append((w, t))

407

ss.append((w, t))

408

continue

408

continue

409

flushss()

409

flushss()

410

ws.append(w)

410

ws.append(w)

411

ts.append(t)

411

ts.append(t)

412

flushss()

412

flushss()

413

if len(ts) == 1:

413

if len(ts) == 1:

414

return ws[0], ts[0] # 'or' operation is fully optimized out

414

return ws[0], ts[0] # 'or' operation is fully optimized out

415

return max(ws), (op, ('list',) + tuple(ts))

415

return max(ws), (op, ('list',) + tuple(ts))

416

elif op == 'not':

416

elif op == 'not':

417

# Optimize not public() to _notpublic() because we have a fast version

417

# Optimize not public() to _notpublic() because we have a fast version

418

if _match('public()', x[1]):

418

if _match('public()', x[1]):

419

o = _optimize(_build('_notpublic()'), not small)

419

o = _optimize(_build('_notpublic()'), not small)

420

return o[0], o[1]

420

return o[0], o[1]

421

else:

421

else:

422

o = _optimize(x[1], not small)

422

o = _optimize(x[1], not small)

423

return o[0], (op, o[1])

423

return o[0], (op, o[1])

424

elif op == 'rangeall':

424

elif op == 'rangeall':

425

return smallbonus, x

425

return smallbonus, x

426

elif op in ('rangepre', 'rangepost', 'parentpost'):

426

elif op in ('rangepre', 'rangepost', 'parentpost'):

427

o = _optimize(x[1], small)

427

o = _optimize(x[1], small)

428

return o[0], (op, o[1])

428

return o[0], (op, o[1])

429

elif op in ('dagrange', 'range'):

429

elif op in ('dagrange', 'range'):

430

wa, ta = _optimize(x[1], small)

430

wa, ta = _optimize(x[1], small)

431

wb, tb = _optimize(x[2], small)

431

wb, tb = _optimize(x[2], small)

432

return wa + wb, (op, ta, tb)

432

return wa + wb, (op, ta, tb)

433

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

433

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

434

w, t = _optimize(x[1], small)

434

w, t = _optimize(x[1], small)

435

return w, (op, t, x[2])

435

return w, (op, t, x[2])

436

elif op == 'relsubscript':

436

elif op == 'relsubscript':

437

w, t = _optimize(x[1], small)

437

w, t = _optimize(x[1], small)

438

return w, (op, t, x[2], x[3])

438

return w, (op, t, x[2], x[3])

439

elif op == 'list':

439

elif op == 'list':

440

ws, ts = zip(*(_optimize(y, small) for y in x[1:]))

440

ws, ts = zip(*(_optimize(y, small) for y in x[1:]))

441

return sum(ws), (op,) + ts

441

return sum(ws), (op,) + ts

442

elif op == 'keyvalue':

442

elif op == 'keyvalue':

443

w, t = _optimize(x[2], small)

443

w, t = _optimize(x[2], small)

444

return w, (op, x[1], t)

444

return w, (op, x[1], t)

445

elif op == 'func':

445

elif op == 'func':

446

f = getsymbol(x[1])

446

f = getsymbol(x[1])

447

wa, ta = _optimize(x[2], small)

447

wa, ta = _optimize(x[2], small)

448

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

448

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

449

'keyword', 'outgoing', 'user', 'destination'):

449

'keyword', 'outgoing', 'user', 'destination'):

450

w = 10 # slow

450

w = 10 # slow

451

elif f in ('modifies', 'adds', 'removes'):

451

elif f in ('modifies', 'adds', 'removes'):

452

w = 30 # slower

452

w = 30 # slower

453

elif f == "contains":

453

elif f == "contains":

454

w = 100 # very slow

454

w = 100 # very slow

455

elif f == "ancestor":

455

elif f == "ancestor":

456

w = 1 * smallbonus

456

w = 1 * smallbonus

457

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

457

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

458

w = 0

458

w = 0

459

elif f == "sort":

459

elif f == "sort":

460

w = 10 # assume most sorts look at changelog

460

w = 10 # assume most sorts look at changelog

461

else:

461

else:

462

w = 1

462

w = 1

463

return w + wa, (op, x[1], ta)

463

return w + wa, (op, x[1], ta)

464

raise ValueError('invalid operator %r' % op)

464

raise ValueError('invalid operator %r' % op)

465

466

def optimize(tree):

466

def optimize(tree):

467

"""Optimize evaluatable tree

467

"""Optimize evaluatable tree

468

469

All pseudo operations should be transformed beforehand.

469

All pseudo operations should be transformed beforehand.

470

"""

470

"""

471

_weight, newtree = _optimize(tree, small=True)

471

_weight, newtree = _optimize(tree, small=True)

472

return newtree

472

return newtree

473

474

# the set of valid characters for the initial letter of symbols in

474

# the set of valid characters for the initial letter of symbols in

475

# alias declarations and definitions

475

# alias declarations and definitions

476

_aliassyminitletters = _syminitletters | ~~set~~(~~pycompat~~.~~sysstr~~('$'))

476

_aliassyminitletters = _syminitletters | {'$'}

477

478

def _parsewith(spec, lookup=None, syminitletters=None):

478

def _parsewith(spec, lookup=None, syminitletters=None):

479

"""Generate a parse tree of given spec with given tokenizing options

479

"""Generate a parse tree of given spec with given tokenizing options

480

481

>>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)

481

>>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)

482

('func', ('symbol', 'foo'), ('symbol', '$1'))

482

('func', ('symbol', 'foo'), ('symbol', '$1'))

483

>>> _parsewith('$1')

483

>>> _parsewith('$1')

484

Traceback (most recent call last):

484

Traceback (most recent call last):

485

...

485

...

486

ParseError: ("syntax error in revset '$1'", 0)

486

ParseError: ("syntax error in revset '$1'", 0)

487

>>> _parsewith('foo bar')

487

>>> _parsewith('foo bar')

488

Traceback (most recent call last):

488

Traceback (most recent call last):

489

...

489

...

490

ParseError: ('invalid token', 4)

490

ParseError: ('invalid token', 4)

491

"""

491

"""

492

p = parser.parser(elements)

492

p = parser.parser(elements)

493

tree, pos = p.parse(tokenize(spec, lookup=lookup,

493

tree, pos = p.parse(tokenize(spec, lookup=lookup,

494

syminitletters=syminitletters))

494

syminitletters=syminitletters))

495

if pos != len(spec):

495

if pos != len(spec):

496

raise error.ParseError(_('invalid token'), pos)

496

raise error.ParseError(_('invalid token'), pos)

497

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

497

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

498

499

class _aliasrules(parser.basealiasrules):

499

class _aliasrules(parser.basealiasrules):

500

"""Parsing and expansion rule set of revset aliases"""

500

"""Parsing and expansion rule set of revset aliases"""

501

_section = _('revset alias')

501

_section = _('revset alias')

502

503

@staticmethod

503

@staticmethod

504

def _parse(spec):

504

def _parse(spec):

505

"""Parse alias declaration/definition ``spec``

505

"""Parse alias declaration/definition ``spec``

506

507

This allows symbol names to use also ``$`` as an initial letter

507

This allows symbol names to use also ``$`` as an initial letter

508

(for backward compatibility), and callers of this function should

508

(for backward compatibility), and callers of this function should

509

examine whether ``$`` is used also for unexpected symbols or not.

509

examine whether ``$`` is used also for unexpected symbols or not.

510

"""

510

"""

511

return _parsewith(spec, syminitletters=_aliassyminitletters)

511

return _parsewith(spec, syminitletters=_aliassyminitletters)

512

513

@staticmethod

513

@staticmethod

514

def _trygetfunc(tree):

514

def _trygetfunc(tree):

515

if tree[0] == 'func' and tree[1][0] == 'symbol':

515

if tree[0] == 'func' and tree[1][0] == 'symbol':

516

return tree[1][1], getlist(tree[2])

516

return tree[1][1], getlist(tree[2])

517

518

def expandaliases(tree, aliases, warn=None):

518

def expandaliases(tree, aliases, warn=None):

519

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

519

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

520

aliases = _aliasrules.buildmap(aliases)

520

aliases = _aliasrules.buildmap(aliases)

521

tree = _aliasrules.expand(aliases, tree)

521

tree = _aliasrules.expand(aliases, tree)

522

# warn about problematic (but not referred) aliases

522

# warn about problematic (but not referred) aliases

523

if warn is not None:

523

if warn is not None:

524

for name, alias in sorted(aliases.iteritems()):

524

for name, alias in sorted(aliases.iteritems()):

525

if alias.error and not alias.warned:

525

if alias.error and not alias.warned:

526

warn(_('warning: %s\n') % (alias.error))

526

warn(_('warning: %s\n') % (alias.error))

527

alias.warned = True

527

alias.warned = True

528

return tree

528

return tree

529

530

def foldconcat(tree):

530

def foldconcat(tree):

531

"""Fold elements to be concatenated by `##`

531

"""Fold elements to be concatenated by `##`

532

"""

532

"""

533

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

533

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

534

return tree

534

return tree

535

if tree[0] == '_concat':

535

if tree[0] == '_concat':

536

pending = [tree]

536

pending = [tree]

537

l = []

537

l = []

538

while pending:

538

while pending:

539

e = pending.pop()

539

e = pending.pop()

540

if e[0] == '_concat':

540

if e[0] == '_concat':

541

pending.extend(reversed(e[1:]))

541

pending.extend(reversed(e[1:]))

542

elif e[0] in ('string', 'symbol'):

542

elif e[0] in ('string', 'symbol'):

543

l.append(e[1])

543

l.append(e[1])

544

else:

544

else:

545

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

545

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

546

raise error.ParseError(msg)

546

raise error.ParseError(msg)

547

return ('string', ''.join(l))

547

return ('string', ''.join(l))

548

else:

548

else:

549

return tuple(foldconcat(t) for t in tree)

549

return tuple(foldconcat(t) for t in tree)

550

551

def parse(spec, lookup=None):

551

def parse(spec, lookup=None):

552

return _parsewith(spec, lookup=lookup)

552

return _parsewith(spec, lookup=lookup)

553

554

def _quote(s):

554

def _quote(s):

555

r"""Quote a value in order to make it safe for the revset engine.

555

r"""Quote a value in order to make it safe for the revset engine.

556

557

>>> _quote('asdf')

557

>>> _quote('asdf')

558

"'asdf'"

558

"'asdf'"

559

>>> _quote("asdf'\"")

559

>>> _quote("asdf'\"")

560

'\'asdf\\\'"\''

560

'\'asdf\\\'"\''

561

>>> _quote('asdf\'')

561

>>> _quote('asdf\'')

562

"'asdf\\''"

562

"'asdf\\''"

563

>>> _quote(1)

563

>>> _quote(1)

564

"'1'"

564

"'1'"

565

"""

565

"""

566

return "'%s'" % util.escapestr(pycompat.bytestr(s))

566

return "'%s'" % util.escapestr(pycompat.bytestr(s))

567

568

def formatspec(expr, *args):

568

def formatspec(expr, *args):

569

'''

569

'''

570

This is a convenience function for using revsets internally, and

570

This is a convenience function for using revsets internally, and

571

escapes arguments appropriately. Aliases are intentionally ignored

571

escapes arguments appropriately. Aliases are intentionally ignored

572

so that intended expression behavior isn't accidentally subverted.

572

so that intended expression behavior isn't accidentally subverted.

573

574

Supported arguments:

574

Supported arguments:

575

576

%r = revset expression, parenthesized

576

%r = revset expression, parenthesized

577

%d = int(arg), no quoting

577

%d = int(arg), no quoting

578

%s = string(arg), escaped and single-quoted

578

%s = string(arg), escaped and single-quoted

579

%b = arg.branch(), escaped and single-quoted

579

%b = arg.branch(), escaped and single-quoted

580

%n = hex(arg), single-quoted

580

%n = hex(arg), single-quoted

581

%% = a literal '%'

581

%% = a literal '%'

582

583

Prefixing the type with 'l' specifies a parenthesized list of that type.

583

Prefixing the type with 'l' specifies a parenthesized list of that type.

584

585

>>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))

585

>>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))

586

'(10 or 11):: and ((this()) or (that()))'

586

'(10 or 11):: and ((this()) or (that()))'

587

>>> formatspec('%d:: and not %d::', 10, 20)

587

>>> formatspec('%d:: and not %d::', 10, 20)

588

'10:: and not 20::'

588

'10:: and not 20::'

589

>>> formatspec('%ld or %ld', [], [1])

589

>>> formatspec('%ld or %ld', [], [1])

590

"_list('') or 1"

590

"_list('') or 1"

591

>>> formatspec('keyword(%s)', 'foo\\xe9')

591

>>> formatspec('keyword(%s)', 'foo\\xe9')

592

"keyword('foo\\\\xe9')"

592

"keyword('foo\\\\xe9')"

593

>>> b = lambda: 'default'

593

>>> b = lambda: 'default'

594

>>> b.branch = b

594

>>> b.branch = b

595

>>> formatspec('branch(%b)', b)

595

>>> formatspec('branch(%b)', b)

596

"branch('default')"

596

"branch('default')"

597

>>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])

597

>>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])

598

"root(_list('a\\x00b\\x00c\\x00d'))"

598

"root(_list('a\\x00b\\x00c\\x00d'))"

599

'''

599

'''

600

601

def argtype(c, arg):

601

def argtype(c, arg):

602

if c == 'd':

602

if c == 'd':

603

return '%d' % int(arg)

603

return '%d' % int(arg)

604

elif c == 's':

604

elif c == 's':

605

return _quote(arg)

605

return _quote(arg)

606

elif c == 'r':

606

elif c == 'r':

607

parse(arg) # make sure syntax errors are confined

607

parse(arg) # make sure syntax errors are confined

608

return '(%s)' % arg

608

return '(%s)' % arg

609

elif c == 'n':

609

elif c == 'n':

610

return _quote(node.hex(arg))

610

return _quote(node.hex(arg))

611

elif c == 'b':

611

elif c == 'b':

612

return _quote(arg.branch())

612

return _quote(arg.branch())

613

614

def listexp(s, t):

614

def listexp(s, t):

615

l = len(s)

615

l = len(s)

616

if l == 0:

616

if l == 0:

617

return "_list('')"

617

return "_list('')"

618

elif l == 1:

618

elif l == 1:

619

return argtype(t, s[0])

619

return argtype(t, s[0])

620

elif t == 'd':

620

elif t == 'd':

621

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

621

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

622

elif t == 's':

622

elif t == 's':

623

return "_list('%s')" % "\0".join(s)

623

return "_list('%s')" % "\0".join(s)

624

elif t == 'n':

624

elif t == 'n':

625

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

625

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

626

elif t == 'b':

626

elif t == 'b':

627

return "_list('%s')" % "\0".join(a.branch() for a in s)

627

return "_list('%s')" % "\0".join(a.branch() for a in s)

628

629

m = l // 2

629

m = l // 2

630

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

630

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

631

632

expr = pycompat.bytestr(expr)

632

expr = pycompat.bytestr(expr)

633

ret = ''

633

ret = ''

634

pos = 0

634

pos = 0

635

arg = 0

635

arg = 0

636

while pos < len(expr):

636

while pos < len(expr):

637

c = expr[pos]

637

c = expr[pos]

638

if c == '%':

638

if c == '%':

639

pos += 1

639

pos += 1

640

d = expr[pos]

640

d = expr[pos]

641

if d == '%':

641

if d == '%':

642

ret += d

642

ret += d

643

elif d in 'dsnbr':

643

elif d in 'dsnbr':

644

ret += argtype(d, args[arg])

644

ret += argtype(d, args[arg])

645

arg += 1

645

arg += 1

646

elif d == 'l':

646

elif d == 'l':

647

# a list of some type

647

# a list of some type

648

pos += 1

648

pos += 1

649

d = expr[pos]

649

d = expr[pos]

650

ret += listexp(list(args[arg]), d)

650

ret += listexp(list(args[arg]), d)

651

arg += 1

651

arg += 1

652

else:

652

else:

653

raise error.Abort(_('unexpected revspec format character %s')

653

raise error.Abort(_('unexpected revspec format character %s')

654

% d)

654

% d)

655

else:

655

else:

656

ret += c

656

ret += c

657

pos += 1

657

pos += 1

658

659

return ret

659

return ret

660

661

def prettyformat(tree):

661

def prettyformat(tree):

662

return parser.prettyformat(tree, ('string', 'symbol'))

662

return parser.prettyformat(tree, ('string', 'symbol'))

663

664

def depth(tree):

664

def depth(tree):

665

if isinstance(tree, tuple):

665

if isinstance(tree, tuple):

666

return max(map(depth, tree)) + 1

666

return max(map(depth, tree)) + 1

667

else:

667

else:

668

return 0

668

return 0

669

670

def funcsused(tree):

670

def funcsused(tree):

671

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

671

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

672

return set()

672

return set()

673

else:

673

else:

674

funcs = set()

674

funcs = set()

675

for s in tree[1:]:

675

for s in tree[1:]:

676

funcs |= funcsused(s)

676

funcs |= funcsused(s)

677

if tree[0] == 'func':

677

if tree[0] == 'func':

678

funcs.add(tree[1][1])

678

funcs.add(tree[1][1])

679

return funcs

679

return funcs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 util,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize("@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, pos)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = util.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
             def _match(patspec, tree):
                 """Test if a tree matches the given pattern statement; return the matches
                 >>> _match('f(_)', parse('f()'))
                 >>> _match('f(_)', parse('f(1)'))
                 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
                 >>> _match('f(_)', parse('f(1, 2)'))
                 """
                 pattern = _cachedtree(patspec)
                 return parser.matchtree(pattern, tree, ('symbol', '_'),
                                         {'keyvalue', 'list'})
             def _matchonly(revs, bases):
                 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x, small):
                 if x is None:
                     return 0, x
                 smallbonus = 1
                 if small:
                     smallbonus = .5
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return smallbonus, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1], True)
                     wb, tb = _optimize(x[2], True)
                     w = min(wa, wb)
                     # (draft/secret/_notpublic() & ::x) have a fast path
                     m = _match('_() & ancestors(_)', ('and', ta, tb))
                     if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
                         return w, _build('_phaseandancestors(_, _)', m[1], m[2])
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
                     m = _match('not _', tb)
                     if m:
                         return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y, False)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y, False)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if _match('public()', x[1]):
                         o = _optimize(_build('_notpublic()'), not small)
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1], not small)
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return smallbonus, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1], small)
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1], small)
                     wb, tb = _optimize(x[2], small)
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1], small)
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1], small)
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2], small)
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2], small)
                     if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
                              'keyword', 'outgoing', 'user', 'destination'):
                         w = 10 # slow
                     elif f in ('modifies', 'adds', 'removes'):
                         w = 30 # slower
                     elif f == "contains":
                         w = 100 # very slow
                     elif f == "ancestor":
                         w = 1 * smallbonus
                     elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
                         w = 0
                     elif f == "sort":
                         w = 10 # assume most sorts look at changelog
                     else:
                         w = 1
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree, small=True)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
-            _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
+            _aliassyminitletters = _syminitletters | {'$'}
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith('$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith('foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 return _parsewith(spec, lookup=lookup)
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote('asdf')
                 "'asdf'"
                 >>> _quote("asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote('asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % util.escapestr(pycompat.bytestr(s))
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = int(arg), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type.
                 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec('%d:: and not %d::', 10, 20)
                 '10:: and not 20::'
                 >>> formatspec('%ld or %ld', [], [1])
                 "_list('') or 1"
                 >>> formatspec('keyword(%s)', 'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: 'default'
                 >>> b.branch = b
                 >>> formatspec('branch(%b)', b)
                 "branch('default')"
                 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
                 "root(_list('a\\x00b\\x00c\\x00d'))"
                 '''
                 def argtype(c, arg):
                     if c == 'd':
                         return '%d' % int(arg)
                     elif c == 's':
                         return _quote(arg)
                     elif c == 'r':
                         parse(arg) # make sure syntax errors are confined
                         return '(%s)' % arg
                     elif c == 'n':
                         return _quote(node.hex(arg))
                     elif c == 'b':
                         return _quote(arg.branch())
                 def listexp(s, t):
                     l = len(s)
                     if l == 0:
                         return "_list('')"
                     elif l == 1:
                         return argtype(t, s[0])
                     elif t == 'd':
                         return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
                     elif t == 's':
                         return "_list('%s')" % "\0".join(s)
                     elif t == 'n':
                         return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                     elif t == 'b':
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     m = l // 2
                     return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
                 expr = pycompat.bytestr(expr)
                 ret = ''
                 pos = 0
                 arg = 0
                 while pos < len(expr):
                     c = expr[pos]
                     if c == '%':
                         pos += 1
                         d = expr[pos]
                         if d == '%':
                             ret += d
                         elif d in 'dsnbr':
                             ret += argtype(d, args[arg])
                             arg += 1
                         elif d == 'l':
                             # a list of some type
                             pos += 1
                             d = expr[pos]
                             ret += listexp(list(args[arg]), d)
                             arg += 1
                         else:
                             raise error.Abort(_('unexpected revspec format character %s')
                                               % d)
                     else:
                         ret += c
                     pos += 1
                 return ret
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs