upstream/mercurial-mirror Commit - r31384:fac5cd3b

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

)

18

)

19

20

elements = {

20

elements = {

21

# token-type: binding-strength, primary, prefix, infix, suffix

21

# token-type: binding-strength, primary, prefix, infix, suffix

22

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

22

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

23

"##": (20, None, None, ("_concat", 20), None),

23

"##": (20, None, None, ("_concat", 20), None),

24

"~": (18, None, None, ("ancestor", 18), None),

24

"~": (18, None, None, ("ancestor", 18), None),

25

"^": (18, None, None, ("parent", 18), "parentpost"),

25

"^": (18, None, None, ("parent", 18), "parentpost"),

26

"-": (5, None, ("negate", 19), ("minus", 5), None),

26

"-": (5, None, ("negate", 19), ("minus", 5), None),

27

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

27

"::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

28

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

28

"..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),

29

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

29

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

30

"not": (10, None, ("not", 10), None, None),

30

"not": (10, None, ("not", 10), None, None),

31

"!": (10, None, ("not", 10), None, None),

31

"!": (10, None, ("not", 10), None, None),

32

"and": (5, None, None, ("and", 5), None),

32

"and": (5, None, None, ("and", 5), None),

33

"&": (5, None, None, ("and", 5), None),

33

"&": (5, None, None, ("and", 5), None),

34

"%": (5, None, None, ("only", 5), "onlypost"),

34

"%": (5, None, None, ("only", 5), "onlypost"),

35

"or": (4, None, None, ("or", 4), None),

35

"or": (4, None, None, ("or", 4), None),

36

"|": (4, None, None, ("or", 4), None),

36

"|": (4, None, None, ("or", 4), None),

37

"+": (4, None, None, ("or", 4), None),

37

"+": (4, None, None, ("or", 4), None),

38

"=": (3, None, None, ("keyvalue", 3), None),

38

"=": (3, None, None, ("keyvalue", 3), None),

39

",": (2, None, None, ("list", 2), None),

39

",": (2, None, None, ("list", 2), None),

40

")": (0, None, None, None, None),

40

")": (0, None, None, None, None),

41

"symbol": (0, "symbol", None, None, None),

41

"symbol": (0, "symbol", None, None, None),

42

"string": (0, "string", None, None, None),

42

"string": (0, "string", None, None, None),

43

"end": (0, None, None, None, None),

43

"end": (0, None, None, None, None),

44

}

44

}

45

46

keywords = set(['and', 'or', 'not'])

46

keywords = set(['and', 'or', 'not'])

47

48

_quoteletters = set(['"', "'"])

49

_simpleopletters = set(pycompat.iterbytestr("():=,-|&+!~^%"))

50

48

# default set of valid characters for the initial letter of symbols

51

# default set of valid characters for the initial letter of symbols

49

_syminitletters = set(pycompat.iterbytestr(

52

_syminitletters = set(pycompat.iterbytestr(

50

string.ascii_letters.encode('ascii') +

53

string.ascii_letters.encode('ascii') +

51

string.digits.encode('ascii') +

54

string.digits.encode('ascii') +

52

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

55

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

53

56

54

# default set of valid characters for non-initial letters of symbols

57

# default set of valid characters for non-initial letters of symbols

55

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

58

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

56

59

57

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

60

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

58

'''

61

'''

59

Parse a revset statement into a stream of tokens

62

Parse a revset statement into a stream of tokens

60

63

61

``syminitletters`` is the set of valid characters for the initial

64

``syminitletters`` is the set of valid characters for the initial

62

letter of symbols.

65

letter of symbols.

63

66

64

By default, character ``c`` is recognized as valid for initial

67

By default, character ``c`` is recognized as valid for initial

65

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

68

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

66

69

67

``symletters`` is the set of valid characters for non-initial

70

``symletters`` is the set of valid characters for non-initial

68

letters of symbols.

71

letters of symbols.

69

72

70

By default, character ``c`` is recognized as valid for non-initial

73

By default, character ``c`` is recognized as valid for non-initial

71

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

74

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

72

75

73

Check that @ is a valid unquoted token character (issue3686):

76

Check that @ is a valid unquoted token character (issue3686):

74

>>> list(tokenize("@::"))

77

>>> list(tokenize("@::"))

75

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

78

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

76

79

77

'''

80

'''

78

if syminitletters is None:

81

if syminitletters is None:

79

syminitletters = _syminitletters

82

syminitletters = _syminitletters

80

if symletters is None:

83

if symletters is None:

81

symletters = _symletters

84

symletters = _symletters

82

85

83

if program and lookup:

86

if program and lookup:

84

# attempt to parse old-style ranges first to deal with

87

# attempt to parse old-style ranges first to deal with

85

# things like old-tag which contain query metacharacters

88

# things like old-tag which contain query metacharacters

86

parts = program.split(':', 1)

89

parts = program.split(':', 1)

87

if all(lookup(sym) for sym in parts if sym):

90

if all(lookup(sym) for sym in parts if sym):

88

if parts[0]:

91

if parts[0]:

89

yield ('symbol', parts[0], 0)

92

yield ('symbol', parts[0], 0)

90

if len(parts) > 1:

93

if len(parts) > 1:

91

s = len(parts[0])

94

s = len(parts[0])

92

yield (':', None, s)

95

yield (':', None, s)

93

if parts[1]:

96

if parts[1]:

94

yield ('symbol', parts[1], s + 1)

97

yield ('symbol', parts[1], s + 1)

95

yield ('end', None, len(program))

98

yield ('end', None, len(program))

96

return

99

return

97

100

98

pos, l = 0, len(program)

101

pos, l = 0, len(program)

99

while pos < l:

102

while pos < l:

100

c = program[pos:pos + 1]

103

c = program[pos:pos + 1]

101

if c.isspace(): # skip inter-token whitespace

104

if c.isspace(): # skip inter-token whitespace

102

pass

105

pass

103

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

106

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

104

yield ('::', None, pos)

107

yield ('::', None, pos)

105

pos += 1 # skip ahead

108

pos += 1 # skip ahead

106

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

109

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

107

yield ('..', None, pos)

110

yield ('..', None, pos)

108

pos += 1 # skip ahead

111

pos += 1 # skip ahead

109

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

112

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

110

yield ('##', None, pos)

113

yield ('##', None, pos)

111

pos += 1 # skip ahead

114

pos += 1 # skip ahead

112

elif c in ~~"():=,-|&+!~^%"~~: # handle simple operators

115

elif c in _simpleopletters: # handle simple operators

113

yield (c, None, pos)

116

yield (c, None, pos)

114

elif (c in '"\'' or c == 'r' and

117

elif (c in _quoteletters or c == 'r' and

115

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

118

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

116

if c == 'r':

119

if c == 'r':

117

pos += 1

120

pos += 1

118

c = program[pos:pos + 1]

121

c = program[pos:pos + 1]

119

decode = lambda x: x

122

decode = lambda x: x

120

else:

123

else:

121

decode = parser.unescapestr

124

decode = parser.unescapestr

122

pos += 1

125

pos += 1

123

s = pos

126

s = pos

124

while pos < l: # find closing quote

127

while pos < l: # find closing quote

125

d = program[pos:pos + 1]

128

d = program[pos:pos + 1]

126

if d == '\\': # skip over escaped characters

129

if d == '\\': # skip over escaped characters

127

pos += 2

130

pos += 2

128

continue

131

continue

129

if d == c:

132

if d == c:

130

yield ('string', decode(program[s:pos]), s)

133

yield ('string', decode(program[s:pos]), s)

131

break

134

break

132

pos += 1

135

pos += 1

133

else:

136

else:

134

raise error.ParseError(_("unterminated string"), s)

137

raise error.ParseError(_("unterminated string"), s)

135

# gather up a symbol/keyword

138

# gather up a symbol/keyword

136

elif c in syminitletters:

139

elif c in syminitletters:

137

s = pos

140

s = pos

138

pos += 1

141

pos += 1

139

while pos < l: # find end of symbol

142

while pos < l: # find end of symbol

140

d = program[pos:pos + 1]

143

d = program[pos:pos + 1]

141

if d not in symletters:

144

if d not in symletters:

142

break

145

break

143

if (d == '.'

146

if (d == '.'

144

and program[pos - 1:pos] == '.'): # special case for ..

147

and program[pos - 1:pos] == '.'): # special case for ..

145

pos -= 1

148

pos -= 1

146

break

149

break

147

pos += 1

150

pos += 1

148

sym = program[s:pos]

151

sym = program[s:pos]

149

if sym in keywords: # operator keywords

152

if sym in keywords: # operator keywords

150

yield (sym, None, s)

153

yield (sym, None, s)

151

elif '-' in sym:

154

elif '-' in sym:

152

# some jerk gave us foo-bar-baz, try to check if it's a symbol

155

# some jerk gave us foo-bar-baz, try to check if it's a symbol

153

if lookup and lookup(sym):

156

if lookup and lookup(sym):

154

# looks like a real symbol

157

# looks like a real symbol

155

yield ('symbol', sym, s)

158

yield ('symbol', sym, s)

156

else:

159

else:

157

# looks like an expression

160

# looks like an expression

158

parts = sym.split('-')

161

parts = sym.split('-')

159

for p in parts[:-1]:

162

for p in parts[:-1]:

160

if p: # possible consecutive -

163

if p: # possible consecutive -

161

yield ('symbol', p, s)

164

yield ('symbol', p, s)

162

s += len(p)

165

s += len(p)

163

yield ('-', None, pos)

166

yield ('-', None, pos)

164

s += 1

167

s += 1

165

if parts[-1]: # possible trailing -

168

if parts[-1]: # possible trailing -

166

yield ('symbol', parts[-1], s)

169

yield ('symbol', parts[-1], s)

167

else:

170

else:

168

yield ('symbol', sym, s)

171

yield ('symbol', sym, s)

169

pos -= 1

172

pos -= 1

170

else:

173

else:

171

raise error.ParseError(_("syntax error in revset '%s'") %

174

raise error.ParseError(_("syntax error in revset '%s'") %

172

program, pos)

175

program, pos)

173

pos += 1

176

pos += 1

174

yield ('end', None, pos)

177

yield ('end', None, pos)

175

178

176

# helpers

179

# helpers

177

180

178

_notset = object()

181

_notset = object()

179

182

180

def getsymbol(x):

183

def getsymbol(x):

181

if x and x[0] == 'symbol':

184

if x and x[0] == 'symbol':

182

return x[1]

185

return x[1]

183

raise error.ParseError(_('not a symbol'))

186

raise error.ParseError(_('not a symbol'))

184

187

185

def getstring(x, err):

188

def getstring(x, err):

186

if x and (x[0] == 'string' or x[0] == 'symbol'):

189

if x and (x[0] == 'string' or x[0] == 'symbol'):

187

return x[1]

190

return x[1]

188

raise error.ParseError(err)

191

raise error.ParseError(err)

189

192

190

def getinteger(x, err, default=_notset):

193

def getinteger(x, err, default=_notset):

191

if not x and default is not _notset:

194

if not x and default is not _notset:

192

return default

195

return default

193

try:

196

try:

194

return int(getstring(x, err))

197

return int(getstring(x, err))

195

except ValueError:

198

except ValueError:

196

raise error.ParseError(err)

199

raise error.ParseError(err)

197

200

198

def getlist(x):

201

def getlist(x):

199

if not x:

202

if not x:

200

return []

203

return []

201

if x[0] == 'list':

204

if x[0] == 'list':

202

return list(x[1:])

205

return list(x[1:])

203

return [x]

206

return [x]

204

207

205

def getrange(x, err):

208

def getrange(x, err):

206

if not x:

209

if not x:

207

raise error.ParseError(err)

210

raise error.ParseError(err)

208

op = x[0]

211

op = x[0]

209

if op == 'range':

212

if op == 'range':

210

return x[1], x[2]

213

return x[1], x[2]

211

elif op == 'rangepre':

214

elif op == 'rangepre':

212

return None, x[1]

215

return None, x[1]

213

elif op == 'rangepost':

216

elif op == 'rangepost':

214

return x[1], None

217

return x[1], None

215

elif op == 'rangeall':

218

elif op == 'rangeall':

216

return None, None

219

return None, None

217

raise error.ParseError(err)

220

raise error.ParseError(err)

218

221

219

def getargs(x, min, max, err):

222

def getargs(x, min, max, err):

220

l = getlist(x)

223

l = getlist(x)

221

if len(l) < min or (max >= 0 and len(l) > max):

224

if len(l) < min or (max >= 0 and len(l) > max):

222

raise error.ParseError(err)

225

raise error.ParseError(err)

223

return l

226

return l

224

227

225

def getargsdict(x, funcname, keys):

228

def getargsdict(x, funcname, keys):

226

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

229

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

227

keyvaluenode='keyvalue', keynode='symbol')

230

keyvaluenode='keyvalue', keynode='symbol')

228

231

229

# Constants for ordering requirement, used in _analyze():

232

# Constants for ordering requirement, used in _analyze():

230

#

233

#

231

# If 'define', any nested functions and operations can change the ordering of

234

# If 'define', any nested functions and operations can change the ordering of

232

# the entries in the set. If 'follow', any nested functions and operations

235

# the entries in the set. If 'follow', any nested functions and operations

233

# should take the ordering specified by the first operand to the '&' operator.

236

# should take the ordering specified by the first operand to the '&' operator.

234

#

237

#

235

# For instance,

238

# For instance,

236

#

239

#

237

# X & (Y | Z)

240

# X & (Y | Z)

238

# ^ ^^^^^^^

241

# ^ ^^^^^^^

239

# | follow

242

# | follow

240

# define

243

# define

241

#

244

#

242

# will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order

245

# will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order

243

# of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.

246

# of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.

244

#

247

#

245

# 'any' means the order doesn't matter. For instance,

248

# 'any' means the order doesn't matter. For instance,

246

#

249

#

247

# X & !Y

250

# X & !Y

248

# ^

251

# ^

249

# any

252

# any

250

#

253

#

251

# 'y()' can either enforce its ordering requirement or take the ordering

254

# 'y()' can either enforce its ordering requirement or take the ordering

252

# specified by 'x()' because 'not()' doesn't care the order.

255

# specified by 'x()' because 'not()' doesn't care the order.

253

#

256

#

254

# Transition of ordering requirement:

257

# Transition of ordering requirement:

255

#

258

#

256

# 1. starts with 'define'

259

# 1. starts with 'define'

257

# 2. shifts to 'follow' by 'x & y'

260

# 2. shifts to 'follow' by 'x & y'

258

# 3. changes back to 'define' on function call 'f(x)' or function-like

261

# 3. changes back to 'define' on function call 'f(x)' or function-like

259

# operation 'x (f) y' because 'f' may have its own ordering requirement

262

# operation 'x (f) y' because 'f' may have its own ordering requirement

260

# for 'x' and 'y' (e.g. 'first(x)')

263

# for 'x' and 'y' (e.g. 'first(x)')

261

#

264

#

262

anyorder = 'any' # don't care the order

265

anyorder = 'any' # don't care the order

263

defineorder = 'define' # should define the order

266

defineorder = 'define' # should define the order

264

followorder = 'follow' # must follow the current order

267

followorder = 'follow' # must follow the current order

265

268

266

# transition table for 'x & y', from the current expression 'x' to 'y'

269

# transition table for 'x & y', from the current expression 'x' to 'y'

267

_tofolloworder = {

270

_tofolloworder = {

268

anyorder: anyorder,

271

anyorder: anyorder,

269

defineorder: followorder,

272

defineorder: followorder,

270

followorder: followorder,

273

followorder: followorder,

271

}

274

}

272

275

273

def _matchonly(revs, bases):

276

def _matchonly(revs, bases):

274

"""

277

"""

275

>>> f = lambda *args: _matchonly(*map(parse, args))

278

>>> f = lambda *args: _matchonly(*map(parse, args))

276

>>> f('ancestors(A)', 'not ancestors(B)')

279

>>> f('ancestors(A)', 'not ancestors(B)')

277

('list', ('symbol', 'A'), ('symbol', 'B'))

280

('list', ('symbol', 'A'), ('symbol', 'B'))

278

"""

281

"""

279

if (revs is not None

282

if (revs is not None

280

and revs[0] == 'func'

283

and revs[0] == 'func'

281

and getsymbol(revs[1]) == 'ancestors'

284

and getsymbol(revs[1]) == 'ancestors'

282

and bases is not None

285

and bases is not None

283

and bases[0] == 'not'

286

and bases[0] == 'not'

284

and bases[1][0] == 'func'

287

and bases[1][0] == 'func'

285

and getsymbol(bases[1][1]) == 'ancestors'):

288

and getsymbol(bases[1][1]) == 'ancestors'):

286

return ('list', revs[2], bases[1][2])

289

return ('list', revs[2], bases[1][2])

287

290

288

def _fixops(x):

291

def _fixops(x):

289

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

292

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

290

handled well by our simple top-down parser"""

293

handled well by our simple top-down parser"""

291

if not isinstance(x, tuple):

294

if not isinstance(x, tuple):

292

return x

295

return x

293

296

294

op = x[0]

297

op = x[0]

295

if op == 'parent':

298

if op == 'parent':

296

# x^:y means (x^) : y, not x ^ (:y)

299

# x^:y means (x^) : y, not x ^ (:y)

297

# x^: means (x^) :, not x ^ (:)

300

# x^: means (x^) :, not x ^ (:)

298

post = ('parentpost', x[1])

301

post = ('parentpost', x[1])

299

if x[2][0] == 'dagrangepre':

302

if x[2][0] == 'dagrangepre':

300

return _fixops(('dagrange', post, x[2][1]))

303

return _fixops(('dagrange', post, x[2][1]))

301

elif x[2][0] == 'rangepre':

304

elif x[2][0] == 'rangepre':

302

return _fixops(('range', post, x[2][1]))

305

return _fixops(('range', post, x[2][1]))

303

elif x[2][0] == 'rangeall':

306

elif x[2][0] == 'rangeall':

304

return _fixops(('rangepost', post))

307

return _fixops(('rangepost', post))

305

elif op == 'or':

308

elif op == 'or':

306

# make number of arguments deterministic:

309

# make number of arguments deterministic:

307

# x + y + z -> (or x y z) -> (or (list x y z))

310

# x + y + z -> (or x y z) -> (or (list x y z))

308

return (op, _fixops(('list',) + x[1:]))

311

return (op, _fixops(('list',) + x[1:]))

309

312

310

return (op,) + tuple(_fixops(y) for y in x[1:])

313

return (op,) + tuple(_fixops(y) for y in x[1:])

311

314

312

def _analyze(x, order):

315

def _analyze(x, order):

313

if x is None:

316

if x is None:

314

return x

317

return x

315

318

316

op = x[0]

319

op = x[0]

317

if op == 'minus':

320

if op == 'minus':

318

return _analyze(('and', x[1], ('not', x[2])), order)

321

return _analyze(('and', x[1], ('not', x[2])), order)

319

elif op == 'only':

322

elif op == 'only':

320

t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))

323

t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))

321

return _analyze(t, order)

324

return _analyze(t, order)

322

elif op == 'onlypost':

325

elif op == 'onlypost':

323

return _analyze(('func', ('symbol', 'only'), x[1]), order)

326

return _analyze(('func', ('symbol', 'only'), x[1]), order)

324

elif op == 'dagrangepre':

327

elif op == 'dagrangepre':

325

return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)

328

return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)

326

elif op == 'dagrangepost':

329

elif op == 'dagrangepost':

327

return _analyze(('func', ('symbol', 'descendants'), x[1]), order)

330

return _analyze(('func', ('symbol', 'descendants'), x[1]), order)

328

elif op == 'negate':

331

elif op == 'negate':

329

s = getstring(x[1], _("can't negate that"))

332

s = getstring(x[1], _("can't negate that"))

330

return _analyze(('string', '-' + s), order)

333

return _analyze(('string', '-' + s), order)

331

elif op in ('string', 'symbol'):

334

elif op in ('string', 'symbol'):

332

return x

335

return x

333

elif op == 'and':

336

elif op == 'and':

334

ta = _analyze(x[1], order)

337

ta = _analyze(x[1], order)

335

tb = _analyze(x[2], _tofolloworder[order])

338

tb = _analyze(x[2], _tofolloworder[order])

336

return (op, ta, tb, order)

339

return (op, ta, tb, order)

337

elif op == 'or':

340

elif op == 'or':

338

return (op, _analyze(x[1], order), order)

341

return (op, _analyze(x[1], order), order)

339

elif op == 'not':

342

elif op == 'not':

340

return (op, _analyze(x[1], anyorder), order)

343

return (op, _analyze(x[1], anyorder), order)

341

elif op == 'rangeall':

344

elif op == 'rangeall':

342

return (op, None, order)

345

return (op, None, order)

343

elif op in ('rangepre', 'rangepost', 'parentpost'):

346

elif op in ('rangepre', 'rangepost', 'parentpost'):

344

return (op, _analyze(x[1], defineorder), order)

347

return (op, _analyze(x[1], defineorder), order)

345

elif op == 'group':

348

elif op == 'group':

346

return _analyze(x[1], order)

349

return _analyze(x[1], order)

347

elif op in ('dagrange', 'range', 'parent', 'ancestor'):

350

elif op in ('dagrange', 'range', 'parent', 'ancestor'):

348

ta = _analyze(x[1], defineorder)

351

ta = _analyze(x[1], defineorder)

349

tb = _analyze(x[2], defineorder)

352

tb = _analyze(x[2], defineorder)

350

return (op, ta, tb, order)

353

return (op, ta, tb, order)

351

elif op == 'list':

354

elif op == 'list':

352

return (op,) + tuple(_analyze(y, order) for y in x[1:])

355

return (op,) + tuple(_analyze(y, order) for y in x[1:])

353

elif op == 'keyvalue':

356

elif op == 'keyvalue':

354

return (op, x[1], _analyze(x[2], order))

357

return (op, x[1], _analyze(x[2], order))

355

elif op == 'func':

358

elif op == 'func':

356

f = getsymbol(x[1])

359

f = getsymbol(x[1])

357

d = defineorder

360

d = defineorder

358

if f == 'present':

361

if f == 'present':

359

# 'present(set)' is known to return the argument set with no

362

# 'present(set)' is known to return the argument set with no

360

# modification, so forward the current order to its argument

363

# modification, so forward the current order to its argument

361

d = order

364

d = order

362

return (op, x[1], _analyze(x[2], d), order)

365

return (op, x[1], _analyze(x[2], d), order)

363

raise ValueError('invalid operator %r' % op)

366

raise ValueError('invalid operator %r' % op)

364

367

365

def analyze(x, order=defineorder):

368

def analyze(x, order=defineorder):

366

"""Transform raw parsed tree to evaluatable tree which can be fed to

369

"""Transform raw parsed tree to evaluatable tree which can be fed to

367

optimize() or getset()

370

optimize() or getset()

368

371

369

All pseudo operations should be mapped to real operations or functions

372

All pseudo operations should be mapped to real operations or functions

370

defined in methods or symbols table respectively.

373

defined in methods or symbols table respectively.

371

374

372

'order' specifies how the current expression 'x' is ordered (see the

375

'order' specifies how the current expression 'x' is ordered (see the

373

constants defined above.)

376

constants defined above.)

374

"""

377

"""

375

return _analyze(x, order)

378

return _analyze(x, order)

376

379

377

def _optimize(x, small):

380

def _optimize(x, small):

378

if x is None:

381

if x is None:

379

return 0, x

382

return 0, x

380

383

381

smallbonus = 1

384

smallbonus = 1

382

if small:

385

if small:

383

smallbonus = .5

386

smallbonus = .5

384

387

385

op = x[0]

388

op = x[0]

386

if op in ('string', 'symbol'):

389

if op in ('string', 'symbol'):

387

return smallbonus, x # single revisions are small

390

return smallbonus, x # single revisions are small

388

elif op == 'and':

391

elif op == 'and':

389

wa, ta = _optimize(x[1], True)

392

wa, ta = _optimize(x[1], True)

390

wb, tb = _optimize(x[2], True)

393

wb, tb = _optimize(x[2], True)

391

order = x[3]

394

order = x[3]

392

w = min(wa, wb)

395

w = min(wa, wb)

393

396

394

# (::x and not ::y)/(not ::y and ::x) have a fast path

397

# (::x and not ::y)/(not ::y and ::x) have a fast path

395

tm = _matchonly(ta, tb) or _matchonly(tb, ta)

398

tm = _matchonly(ta, tb) or _matchonly(tb, ta)

396

if tm:

399

if tm:

397

return w, ('func', ('symbol', 'only'), tm, order)

400

return w, ('func', ('symbol', 'only'), tm, order)

398

401

399

if tb is not None and tb[0] == 'not':

402

if tb is not None and tb[0] == 'not':

400

return wa, ('difference', ta, tb[1], order)

403

return wa, ('difference', ta, tb[1], order)

401

404

402

if wa > wb:

405

if wa > wb:

403

return w, (op, tb, ta, order)

406

return w, (op, tb, ta, order)

404

return w, (op, ta, tb, order)

407

return w, (op, ta, tb, order)

405

elif op == 'or':

408

elif op == 'or':

406

# fast path for machine-generated expression, that is likely to have

409

# fast path for machine-generated expression, that is likely to have

407

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

410

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

408

order = x[2]

411

order = x[2]

409

ws, ts, ss = [], [], []

412

ws, ts, ss = [], [], []

410

def flushss():

413

def flushss():

411

if not ss:

414

if not ss:

412

return

415

return

413

if len(ss) == 1:

416

if len(ss) == 1:

414

w, t = ss[0]

417

w, t = ss[0]

415

else:

418

else:

416

s = '\0'.join(t[1] for w, t in ss)

419

s = '\0'.join(t[1] for w, t in ss)

417

y = ('func', ('symbol', '_list'), ('string', s), order)

420

y = ('func', ('symbol', '_list'), ('string', s), order)

418

w, t = _optimize(y, False)

421

w, t = _optimize(y, False)

419

ws.append(w)

422

ws.append(w)

420

ts.append(t)

423

ts.append(t)

421

del ss[:]

424

del ss[:]

422

for y in getlist(x[1]):

425

for y in getlist(x[1]):

423

w, t = _optimize(y, False)

426

w, t = _optimize(y, False)

424

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

427

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

425

ss.append((w, t))

428

ss.append((w, t))

426

continue

429

continue

427

flushss()

430

flushss()

428

ws.append(w)

431

ws.append(w)

429

ts.append(t)

432

ts.append(t)

430

flushss()

433

flushss()

431

if len(ts) == 1:

434

if len(ts) == 1:

432

return ws[0], ts[0] # 'or' operation is fully optimized out

435

return ws[0], ts[0] # 'or' operation is fully optimized out

433

# we can't reorder trees by weight because it would change the order.

436

# we can't reorder trees by weight because it would change the order.

434

# ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")

437

# ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")

435

# ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))

438

# ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))

436

return max(ws), (op, ('list',) + tuple(ts), order)

439

return max(ws), (op, ('list',) + tuple(ts), order)

437

elif op == 'not':

440

elif op == 'not':

438

# Optimize not public() to _notpublic() because we have a fast version

441

# Optimize not public() to _notpublic() because we have a fast version

439

if x[1][:3] == ('func', ('symbol', 'public'), None):

442

if x[1][:3] == ('func', ('symbol', 'public'), None):

440

order = x[1][3]

443

order = x[1][3]

441

newsym = ('func', ('symbol', '_notpublic'), None, order)

444

newsym = ('func', ('symbol', '_notpublic'), None, order)

442

o = _optimize(newsym, not small)

445

o = _optimize(newsym, not small)

443

return o[0], o[1]

446

return o[0], o[1]

444

else:

447

else:

445

o = _optimize(x[1], not small)

448

o = _optimize(x[1], not small)

446

order = x[2]

449

order = x[2]

447

return o[0], (op, o[1], order)

450

return o[0], (op, o[1], order)

448

elif op == 'rangeall':

451

elif op == 'rangeall':

449

return smallbonus, x

452

return smallbonus, x

450

elif op in ('rangepre', 'rangepost', 'parentpost'):

453

elif op in ('rangepre', 'rangepost', 'parentpost'):

451

o = _optimize(x[1], small)

454

o = _optimize(x[1], small)

452

order = x[2]

455

order = x[2]

453

return o[0], (op, o[1], order)

456

return o[0], (op, o[1], order)

454

elif op in ('dagrange', 'range', 'parent', 'ancestor'):

457

elif op in ('dagrange', 'range', 'parent', 'ancestor'):

455

wa, ta = _optimize(x[1], small)

458

wa, ta = _optimize(x[1], small)

456

wb, tb = _optimize(x[2], small)

459

wb, tb = _optimize(x[2], small)

457

order = x[3]

460

order = x[3]

458

return wa + wb, (op, ta, tb, order)

461

return wa + wb, (op, ta, tb, order)

459

elif op == 'list':

462

elif op == 'list':

460

ws, ts = zip(*(_optimize(y, small) for y in x[1:]))

463

ws, ts = zip(*(_optimize(y, small) for y in x[1:]))

461

return sum(ws), (op,) + ts

464

return sum(ws), (op,) + ts

462

elif op == 'keyvalue':

465

elif op == 'keyvalue':

463

w, t = _optimize(x[2], small)

466

w, t = _optimize(x[2], small)

464

return w, (op, x[1], t)

467

return w, (op, x[1], t)

465

elif op == 'func':

468

elif op == 'func':

466

f = getsymbol(x[1])

469

f = getsymbol(x[1])

467

wa, ta = _optimize(x[2], small)

470

wa, ta = _optimize(x[2], small)

468

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

471

if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',

469

'keyword', 'outgoing', 'user', 'destination'):

472

'keyword', 'outgoing', 'user', 'destination'):

470

w = 10 # slow

473

w = 10 # slow

471

elif f in ('modifies', 'adds', 'removes'):

474

elif f in ('modifies', 'adds', 'removes'):

472

w = 30 # slower

475

w = 30 # slower

473

elif f == "contains":

476

elif f == "contains":

474

w = 100 # very slow

477

w = 100 # very slow

475

elif f == "ancestor":

478

elif f == "ancestor":

476

w = 1 * smallbonus

479

w = 1 * smallbonus

477

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

480

elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):

478

w = 0

481

w = 0

479

elif f == "sort":

482

elif f == "sort":

480

w = 10 # assume most sorts look at changelog

483

w = 10 # assume most sorts look at changelog

481

else:

484

else:

482

w = 1

485

w = 1

483

order = x[3]

486

order = x[3]

484

return w + wa, (op, x[1], ta, order)

487

return w + wa, (op, x[1], ta, order)

485

raise ValueError('invalid operator %r' % op)

488

raise ValueError('invalid operator %r' % op)

486

489

487

def optimize(tree):

490

def optimize(tree):

488

"""Optimize evaluatable tree

491

"""Optimize evaluatable tree

489

492

490

All pseudo operations should be transformed beforehand.

493

All pseudo operations should be transformed beforehand.

491

"""

494

"""

492

_weight, newtree = _optimize(tree, small=True)

495

_weight, newtree = _optimize(tree, small=True)

493

return newtree

496

return newtree

494

497

495

# the set of valid characters for the initial letter of symbols in

498

# the set of valid characters for the initial letter of symbols in

496

# alias declarations and definitions

499

# alias declarations and definitions

497

_aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))

500

_aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))

498

501

499

def _parsewith(spec, lookup=None, syminitletters=None):

502

def _parsewith(spec, lookup=None, syminitletters=None):

500

"""Generate a parse tree of given spec with given tokenizing options

503

"""Generate a parse tree of given spec with given tokenizing options

501

504

502

>>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)

505

>>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)

503

('func', ('symbol', 'foo'), ('symbol', '$1'))

506

('func', ('symbol', 'foo'), ('symbol', '$1'))

504

>>> _parsewith('$1')

507

>>> _parsewith('$1')

505

Traceback (most recent call last):

508

Traceback (most recent call last):

506

...

509

...

507

ParseError: ("syntax error in revset '$1'", 0)

510

ParseError: ("syntax error in revset '$1'", 0)

508

>>> _parsewith('foo bar')

511

>>> _parsewith('foo bar')

509

Traceback (most recent call last):

512

Traceback (most recent call last):

510

...

513

...

511

ParseError: ('invalid token', 4)

514

ParseError: ('invalid token', 4)

512

"""

515

"""

513

p = parser.parser(elements)

516

p = parser.parser(elements)

514

tree, pos = p.parse(tokenize(spec, lookup=lookup,

517

tree, pos = p.parse(tokenize(spec, lookup=lookup,

515

syminitletters=syminitletters))

518

syminitletters=syminitletters))

516

if pos != len(spec):

519

if pos != len(spec):

517

raise error.ParseError(_('invalid token'), pos)

520

raise error.ParseError(_('invalid token'), pos)

518

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

521

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

519

522

520

class _aliasrules(parser.basealiasrules):

523

class _aliasrules(parser.basealiasrules):

521

"""Parsing and expansion rule set of revset aliases"""

524

"""Parsing and expansion rule set of revset aliases"""

522

_section = _('revset alias')

525

_section = _('revset alias')

523

526

524

@staticmethod

527

@staticmethod

525

def _parse(spec):

528

def _parse(spec):

526

"""Parse alias declaration/definition ``spec``

529

"""Parse alias declaration/definition ``spec``

527

530

528

This allows symbol names to use also ``$`` as an initial letter

531

This allows symbol names to use also ``$`` as an initial letter

529

(for backward compatibility), and callers of this function should

532

(for backward compatibility), and callers of this function should

530

examine whether ``$`` is used also for unexpected symbols or not.

533

examine whether ``$`` is used also for unexpected symbols or not.

531

"""

534

"""

532

return _parsewith(spec, syminitletters=_aliassyminitletters)

535

return _parsewith(spec, syminitletters=_aliassyminitletters)

533

536

534

@staticmethod

537

@staticmethod

535

def _trygetfunc(tree):

538

def _trygetfunc(tree):

536

if tree[0] == 'func' and tree[1][0] == 'symbol':

539

if tree[0] == 'func' and tree[1][0] == 'symbol':

537

return tree[1][1], getlist(tree[2])

540

return tree[1][1], getlist(tree[2])

538

541

539

def expandaliases(ui, tree):

542

def expandaliases(ui, tree):

540

aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))

543

aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))

541

tree = _aliasrules.expand(aliases, tree)

544

tree = _aliasrules.expand(aliases, tree)

542

# warn about problematic (but not referred) aliases

545

# warn about problematic (but not referred) aliases

543

for name, alias in sorted(aliases.iteritems()):

546

for name, alias in sorted(aliases.iteritems()):

544

if alias.error and not alias.warned:

547

if alias.error and not alias.warned:

545

ui.warn(_('warning: %s\n') % (alias.error))

548

ui.warn(_('warning: %s\n') % (alias.error))

546

alias.warned = True

549

alias.warned = True

547

return tree

550

return tree

548

551

549

def foldconcat(tree):

552

def foldconcat(tree):

550

"""Fold elements to be concatenated by `##`

553

"""Fold elements to be concatenated by `##`

551

"""

554

"""

552

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

555

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

553

return tree

556

return tree

554

if tree[0] == '_concat':

557

if tree[0] == '_concat':

555

pending = [tree]

558

pending = [tree]

556

l = []

559

l = []

557

while pending:

560

while pending:

558

e = pending.pop()

561

e = pending.pop()

559

if e[0] == '_concat':

562

if e[0] == '_concat':

560

pending.extend(reversed(e[1:]))

563

pending.extend(reversed(e[1:]))

561

elif e[0] in ('string', 'symbol'):

564

elif e[0] in ('string', 'symbol'):

562

l.append(e[1])

565

l.append(e[1])

563

else:

566

else:

564

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

567

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

565

raise error.ParseError(msg)

568

raise error.ParseError(msg)

566

return ('string', ''.join(l))

569

return ('string', ''.join(l))

567

else:

570

else:

568

return tuple(foldconcat(t) for t in tree)

571

return tuple(foldconcat(t) for t in tree)

569

572

570

def parse(spec, lookup=None):

573

def parse(spec, lookup=None):

571

return _parsewith(spec, lookup=lookup)

574

return _parsewith(spec, lookup=lookup)

572

575

573

def formatspec(expr, *args):

576

def formatspec(expr, *args):

574

'''

577

'''

575

This is a convenience function for using revsets internally, and

578

This is a convenience function for using revsets internally, and

576

escapes arguments appropriately. Aliases are intentionally ignored

579

escapes arguments appropriately. Aliases are intentionally ignored

577

so that intended expression behavior isn't accidentally subverted.

580

so that intended expression behavior isn't accidentally subverted.

578

581

579

Supported arguments:

582

Supported arguments:

580

583

581

%r = revset expression, parenthesized

584

%r = revset expression, parenthesized

582

%d = int(arg), no quoting

585

%d = int(arg), no quoting

583

%s = string(arg), escaped and single-quoted

586

%s = string(arg), escaped and single-quoted

584

%b = arg.branch(), escaped and single-quoted

587

%b = arg.branch(), escaped and single-quoted

585

%n = hex(arg), single-quoted

588

%n = hex(arg), single-quoted

586

%% = a literal '%'

589

%% = a literal '%'

587

590

588

Prefixing the type with 'l' specifies a parenthesized list of that type.

591

Prefixing the type with 'l' specifies a parenthesized list of that type.

589

592

590

>>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))

593

>>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))

591

'(10 or 11):: and ((this()) or (that()))'

594

'(10 or 11):: and ((this()) or (that()))'

592

>>> formatspec('%d:: and not %d::', 10, 20)

595

>>> formatspec('%d:: and not %d::', 10, 20)

593

'10:: and not 20::'

596

'10:: and not 20::'

594

>>> formatspec('%ld or %ld', [], [1])

597

>>> formatspec('%ld or %ld', [], [1])

595

"_list('') or 1"

598

"_list('') or 1"

596

>>> formatspec('keyword(%s)', 'foo\\xe9')

599

>>> formatspec('keyword(%s)', 'foo\\xe9')

597

"keyword('foo\\\\xe9')"

600

"keyword('foo\\\\xe9')"

598

>>> b = lambda: 'default'

601

>>> b = lambda: 'default'

599

>>> b.branch = b

602

>>> b.branch = b

600

>>> formatspec('branch(%b)', b)

603

>>> formatspec('branch(%b)', b)

601

"branch('default')"

604

"branch('default')"

602

>>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])

605

>>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])

603

"root(_list('a\\x00b\\x00c\\x00d'))"

606

"root(_list('a\\x00b\\x00c\\x00d'))"

604

'''

607

'''

605

608

606

def quote(s):

609

def quote(s):

607

return repr(str(s))

610

return repr(str(s))

608

611

609

def argtype(c, arg):

612

def argtype(c, arg):

610

if c == 'd':

613

if c == 'd':

611

return str(int(arg))

614

return str(int(arg))

612

elif c == 's':

615

elif c == 's':

613

return quote(arg)

616

return quote(arg)

614

elif c == 'r':

617

elif c == 'r':

615

parse(arg) # make sure syntax errors are confined

618

parse(arg) # make sure syntax errors are confined

616

return '(%s)' % arg

619

return '(%s)' % arg

617

elif c == 'n':

620

elif c == 'n':

618

return quote(node.hex(arg))

621

return quote(node.hex(arg))

619

elif c == 'b':

622

elif c == 'b':

620

return quote(arg.branch())

623

return quote(arg.branch())

621

624

622

def listexp(s, t):

625

def listexp(s, t):

623

l = len(s)

626

l = len(s)

624

if l == 0:

627

if l == 0:

625

return "_list('')"

628

return "_list('')"

626

elif l == 1:

629

elif l == 1:

627

return argtype(t, s[0])

630

return argtype(t, s[0])

628

elif t == 'd':

631

elif t == 'd':

629

return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)

632

return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)

630

elif t == 's':

633

elif t == 's':

631

return "_list('%s')" % "\0".join(s)

634

return "_list('%s')" % "\0".join(s)

632

elif t == 'n':

635

elif t == 'n':

633

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

636

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

634

elif t == 'b':

637

elif t == 'b':

635

return "_list('%s')" % "\0".join(a.branch() for a in s)

638

return "_list('%s')" % "\0".join(a.branch() for a in s)

636

639

637

m = l // 2

640

m = l // 2

638

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

641

return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))

639

642

640

ret = ''

643

ret = ''

641

pos = 0

644

pos = 0

642

arg = 0

645

arg = 0

643

while pos < len(expr):

646

while pos < len(expr):

644

c = expr[pos]

647

c = expr[pos]

645

if c == '%':

648

if c == '%':

646

pos += 1

649

pos += 1

647

d = expr[pos]

650

d = expr[pos]

648

if d == '%':

651

if d == '%':

649

ret += d

652

ret += d

650

elif d in 'dsnbr':

653

elif d in 'dsnbr':

651

ret += argtype(d, args[arg])

654

ret += argtype(d, args[arg])

652

arg += 1

655

arg += 1

653

elif d == 'l':

656

elif d == 'l':

654

# a list of some type

657

# a list of some type

655

pos += 1

658

pos += 1

656

d = expr[pos]

659

d = expr[pos]

657

ret += listexp(list(args[arg]), d)

660

ret += listexp(list(args[arg]), d)

658

arg += 1

661

arg += 1

659

else:

662

else:

660

raise error.Abort(_('unexpected revspec format character %s')

663

raise error.Abort(_('unexpected revspec format character %s')

661

% d)

664

% d)

662

else:

665

else:

663

ret += c

666

ret += c

664

pos += 1

667

pos += 1

665

668

666

return ret

669

return ret

667

670

668

def prettyformat(tree):

671

def prettyformat(tree):

669

return parser.prettyformat(tree, ('string', 'symbol'))

672

return parser.prettyformat(tree, ('string', 'symbol'))

670

673

671

def depth(tree):

674

def depth(tree):

672

if isinstance(tree, tuple):

675

if isinstance(tree, tuple):

673

return max(map(depth, tree)) + 1

676

return max(map(depth, tree)) + 1

674

else:

677

else:

675

return 0

678

return 0

676

679

677

def funcsused(tree):

680

def funcsused(tree):

678

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

681

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

679

return set()

682

return set()

680

else:

683

else:

681

funcs = set()

684

funcs = set()

682

for s in tree[1:]:

685

for s in tree[1:]:

683

funcs |= funcsused(s)

686

funcs |= funcsused(s)

684

if tree[0] == 'func':

687

if tree[0] == 'func':

685

funcs.add(tree[1][1])

688

funcs.add(tree[1][1])

686

return funcs

689

return funcs

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = set(['and', 'or', 'not'])
+            _quoteletters = set(['"', "'"])
+            _simpleopletters = set(pycompat.iterbytestr("():=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize("@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos:pos + 1]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
-                    elif c in "():=,-|&+!~^%": # handle simple operators
+                    elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
-                    elif (c in '"\'' or c == 'r' and
+                    elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos:pos + 1]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos:pos + 1]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos:pos + 1]
                             if d not in symletters:
                                 break
                             if (d == '.'
                                 and program[pos - 1:pos] == '.'): # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, pos)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # Constants for ordering requirement, used in _analyze():
             #
             # If 'define', any nested functions and operations can change the ordering of
             # the entries in the set. If 'follow', any nested functions and operations
             # should take the ordering specified by the first operand to the '&' operator.
             #
             # For instance,
             #
             #   X & (Y | Z)
             #   ^   ^^^^^^^
             #   |   follow
             #   define
             #
             # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
             # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
             #
             # 'any' means the order doesn't matter. For instance,
             #
             #   X & !Y
             #        ^
             #        any
             #
             # 'y()' can either enforce its ordering requirement or take the ordering
             # specified by 'x()' because 'not()' doesn't care the order.
             #
             # Transition of ordering requirement:
             #
             # 1. starts with 'define'
             # 2. shifts to 'follow' by 'x & y'
             # 3. changes back to 'define' on function call 'f(x)' or function-like
             #    operation 'x (f) y' because 'f' may have its own ordering requirement
             #    for 'x' and 'y' (e.g. 'first(x)')
             #
             anyorder = 'any'        # don't care the order
             defineorder = 'define'  # should define the order
             followorder = 'follow'  # must follow the current order
             # transition table for 'x & y', from the current expression 'x' to 'y'
             _tofolloworder = {
                 anyorder: anyorder,
                 defineorder: followorder,
                 followorder: followorder,
             }
             def _matchonly(revs, bases):
                 """
                 >>> f = lambda *args: _matchonly(*map(parse, args))
                 >>> f('ancestors(A)', 'not ancestors(B)')
                 ('list', ('symbol', 'A'), ('symbol', 'B'))
                 """
                 if (revs is not None
                     and revs[0] == 'func'
                     and getsymbol(revs[1]) == 'ancestors'
                     and bases is not None
                     and bases[0] == 'not'
                     and bases[1][0] == 'func'
                     and getsymbol(bases[1][1]) == 'ancestors'):
                     return ('list', revs[2], bases[1][2])
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x, order):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(('and', x[1], ('not', x[2])), order)
                 elif op == 'only':
                     t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
                     return _analyze(t, order)
                 elif op == 'onlypost':
                     return _analyze(('func', ('symbol', 'only'), x[1]), order)
                 elif op == 'dagrangepre':
                     return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
                 elif op == 'dagrangepost':
                     return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s), order)
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'and':
                     ta = _analyze(x[1], order)
                     tb = _analyze(x[2], _tofolloworder[order])
                     return (op, ta, tb, order)
                 elif op == 'or':
                     return (op, _analyze(x[1], order), order)
                 elif op == 'not':
                     return (op, _analyze(x[1], anyorder), order)
                 elif op == 'rangeall':
                     return (op, None, order)
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     return (op, _analyze(x[1], defineorder), order)
                 elif op == 'group':
                     return _analyze(x[1], order)
                 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
                     ta = _analyze(x[1], defineorder)
                     tb = _analyze(x[2], defineorder)
                     return (op, ta, tb, order)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y, order) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2], order))
                 elif op == 'func':
                     f = getsymbol(x[1])
                     d = defineorder
                     if f == 'present':
                         # 'present(set)' is known to return the argument set with no
                         # modification, so forward the current order to its argument
                         d = order
                     return (op, x[1], _analyze(x[2], d), order)
                 raise ValueError('invalid operator %r' % op)
             def analyze(x, order=defineorder):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 'order' specifies how the current expression 'x' is ordered (see the
                 constants defined above.)
                 """
                 return _analyze(x, order)
             def _optimize(x, small):
                 if x is None:
                     return 0, x
                 smallbonus = 1
                 if small:
                     smallbonus = .5
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return smallbonus, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1], True)
                     wb, tb = _optimize(x[2], True)
                     order = x[3]
                     w = min(wa, wb)
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     tm = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if tm:
                         return w, ('func', ('symbol', 'only'), tm, order)
                     if tb is not None and tb[0] == 'not':
                         return wa, ('difference', ta, tb[1], order)
                     if wa > wb:
                         return w, (op, tb, ta, order)
                     return w, (op, ta, tb, order)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     order = x[2]
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = ('func', ('symbol', '_list'), ('string', s), order)
                             w, t = _optimize(y, False)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y, False)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     # we can't reorder trees by weight because it would change the order.
                     # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
                     #   ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
                     return max(ws), (op, ('list',) + tuple(ts), order)
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if x[1][:3] == ('func', ('symbol', 'public'), None):
                         order = x[1][3]
                         newsym = ('func', ('symbol', '_notpublic'), None, order)
                         o = _optimize(newsym, not small)
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1], not small)
                         order = x[2]
                         return o[0], (op, o[1], order)
                 elif op == 'rangeall':
                     return smallbonus, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1], small)
                     order = x[2]
                     return o[0], (op, o[1], order)
                 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
                     wa, ta = _optimize(x[1], small)
                     wb, tb = _optimize(x[2], small)
                     order = x[3]
                     return wa + wb, (op, ta, tb, order)
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2], small)
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2], small)
                     if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
                              'keyword', 'outgoing', 'user', 'destination'):
                         w = 10 # slow
                     elif f in ('modifies', 'adds', 'removes'):
                         w = 30 # slower
                     elif f == "contains":
                         w = 100 # very slow
                     elif f == "ancestor":
                         w = 1 * smallbonus
                     elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
                         w = 0
                     elif f == "sort":
                         w = 10 # assume most sorts look at changelog
                     else:
                         w = 1
                     order = x[3]
                     return w + wa, (op, x[1], ta, order)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree, small=True)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith('$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith('foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(ui, tree):
                 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 for name, alias in sorted(aliases.iteritems()):
                     if alias.error and not alias.warned:
                         ui.warn(_('warning: %s\n') % (alias.error))
                         alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 return _parsewith(spec, lookup=lookup)
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = int(arg), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type.
                 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec('%d:: and not %d::', 10, 20)
                 '10:: and not 20::'
                 >>> formatspec('%ld or %ld', [], [1])
                 "_list('') or 1"
                 >>> formatspec('keyword(%s)', 'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: 'default'
                 >>> b.branch = b
                 >>> formatspec('branch(%b)', b)
                 "branch('default')"
                 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
                 "root(_list('a\\x00b\\x00c\\x00d'))"
                 '''
                 def quote(s):
                     return repr(str(s))
                 def argtype(c, arg):
                     if c == 'd':
                         return str(int(arg))
                     elif c == 's':
                         return quote(arg)
                     elif c == 'r':
                         parse(arg) # make sure syntax errors are confined
                         return '(%s)' % arg
                     elif c == 'n':
                         return quote(node.hex(arg))
                     elif c == 'b':
                         return quote(arg.branch())
                 def listexp(s, t):
                     l = len(s)
                     if l == 0:
                         return "_list('')"
                     elif l == 1:
                         return argtype(t, s[0])
                     elif t == 'd':
                         return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
                     elif t == 's':
                         return "_list('%s')" % "\0".join(s)
                     elif t == 'n':
                         return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                     elif t == 'b':
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     m = l // 2
                     return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
                 ret = ''
                 pos = 0
                 arg = 0
                 while pos < len(expr):
                     c = expr[pos]
                     if c == '%':
                         pos += 1
                         d = expr[pos]
                         if d == '%':
                             ret += d
                         elif d in 'dsnbr':
                             ret += argtype(d, args[arg])
                             arg += 1
                         elif d == 'l':
                             # a list of some type
                             pos += 1
                             d = expr[pos]
                             ret += listexp(list(args[arg]), d)
                             arg += 1
                         else:
                             raise error.Abort(_('unexpected revspec format character %s')
                                               % d)
                     else:
                         ret += c
                     pos += 1
                 return ret
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs