upstream/mercurial-mirror Commit - r42001:ddb17451

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

smartset,

18

smartset,

19

util,

19

util,

20

)

20

)

21

from .utils import (

21

from .utils import (

22

stringutil,

22

stringutil,

23

)

23

)

24

25

elements = {

25

elements = {

26

# token-type: binding-strength, primary, prefix, infix, suffix

26

# token-type: binding-strength, primary, prefix, infix, suffix

27

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

27

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

28

"[": (21, None, None, ("subscript", 1, "]"), None),

28

"[": (21, None, None, ("subscript", 1, "]"), None),

29

"#": (21, None, None, ("relation", 21), None),

29

"#": (21, None, None, ("relation", 21), None),

30

"##": (20, None, None, ("_concat", 20), None),

30

"##": (20, None, None, ("_concat", 20), None),

31

"~": (18, None, None, ("ancestor", 18), None),

31

"~": (18, None, None, ("ancestor", 18), None),

32

"^": (18, None, None, ("parent", 18), "parentpost"),

32

"^": (18, None, None, ("parent", 18), "parentpost"),

33

"-": (5, None, ("negate", 19), ("minus", 5), None),

33

"-": (5, None, ("negate", 19), ("minus", 5), None),

34

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

34

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

35

"dagrangepost"),

35

"dagrangepost"),

36

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

36

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

37

"dagrangepost"),

37

"dagrangepost"),

38

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

38

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

39

"not": (10, None, ("not", 10), None, None),

39

"not": (10, None, ("not", 10), None, None),

40

"!": (10, None, ("not", 10), None, None),

40

"!": (10, None, ("not", 10), None, None),

41

"and": (5, None, None, ("and", 5), None),

41

"and": (5, None, None, ("and", 5), None),

42

"&": (5, None, None, ("and", 5), None),

42

"&": (5, None, None, ("and", 5), None),

43

"%": (5, None, None, ("only", 5), "onlypost"),

43

"%": (5, None, None, ("only", 5), "onlypost"),

44

"or": (4, None, None, ("or", 4), None),

44

"or": (4, None, None, ("or", 4), None),

45

"|": (4, None, None, ("or", 4), None),

45

"|": (4, None, None, ("or", 4), None),

46

"+": (4, None, None, ("or", 4), None),

46

"+": (4, None, None, ("or", 4), None),

47

"=": (3, None, None, ("keyvalue", 3), None),

47

"=": (3, None, None, ("keyvalue", 3), None),

48

",": (2, None, None, ("list", 2), None),

48

",": (2, None, None, ("list", 2), None),

49

")": (0, None, None, None, None),

49

")": (0, None, None, None, None),

50

"]": (0, None, None, None, None),

50

"]": (0, None, None, None, None),

51

"symbol": (0, "symbol", None, None, None),

51

"symbol": (0, "symbol", None, None, None),

52

"string": (0, "string", None, None, None),

52

"string": (0, "string", None, None, None),

53

"end": (0, None, None, None, None),

53

"end": (0, None, None, None, None),

54

}

54

}

55

56

keywords = {'and', 'or', 'not'}

56

keywords = {'and', 'or', 'not'}

57

58

symbols = {}

58

symbols = {}

59

60

_quoteletters = {'"', "'"}

60

_quoteletters = {'"', "'"}

61

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

61

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

62

63

# default set of valid characters for the initial letter of symbols

63

# default set of valid characters for the initial letter of symbols

64

_syminitletters = set(pycompat.iterbytestr(

64

_syminitletters = set(pycompat.iterbytestr(

65

string.ascii_letters.~~encode~~(~~'ascii'~~) +

65

pycompat.sysbytes(string.ascii_letters) +

66

string.digits.encode('ascii') +

66

pycompat.sysbytes(string.digits) +

67

'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))

67

'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))

68

69

# default set of valid characters for non-initial letters of symbols

69

# default set of valid characters for non-initial letters of symbols

70

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

70

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

71

72

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

72

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

73

'''

73

'''

74

Parse a revset statement into a stream of tokens

74

Parse a revset statement into a stream of tokens

75

76

``syminitletters`` is the set of valid characters for the initial

76

``syminitletters`` is the set of valid characters for the initial

77

letter of symbols.

77

letter of symbols.

78

79

By default, character ``c`` is recognized as valid for initial

79

By default, character ``c`` is recognized as valid for initial

80

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

80

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

81

82

``symletters`` is the set of valid characters for non-initial

82

``symletters`` is the set of valid characters for non-initial

83

letters of symbols.

83

letters of symbols.

84

85

By default, character ``c`` is recognized as valid for non-initial

85

By default, character ``c`` is recognized as valid for non-initial

86

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

86

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

87

88

Check that @ is a valid unquoted token character (issue3686):

88

Check that @ is a valid unquoted token character (issue3686):

89

>>> list(tokenize(b"@::"))

89

>>> list(tokenize(b"@::"))

90

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

90

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

91

92

'''

92

'''

93

if not isinstance(program, bytes):

93

if not isinstance(program, bytes):

94

raise error.ProgrammingError('revset statement must be bytes, got %r'

94

raise error.ProgrammingError('revset statement must be bytes, got %r'

95

% program)

95

% program)

96

program = pycompat.bytestr(program)

96

program = pycompat.bytestr(program)

97

if syminitletters is None:

97

if syminitletters is None:

98

syminitletters = _syminitletters

98

syminitletters = _syminitletters

99

if symletters is None:

99

if symletters is None:

100

symletters = _symletters

100

symletters = _symletters

101

102

if program and lookup:

102

if program and lookup:

103

# attempt to parse old-style ranges first to deal with

103

# attempt to parse old-style ranges first to deal with

104

# things like old-tag which contain query metacharacters

104

# things like old-tag which contain query metacharacters

105

parts = program.split(':', 1)

105

parts = program.split(':', 1)

106

if all(lookup(sym) for sym in parts if sym):

106

if all(lookup(sym) for sym in parts if sym):

107

if parts[0]:

107

if parts[0]:

108

yield ('symbol', parts[0], 0)

108

yield ('symbol', parts[0], 0)

109

if len(parts) > 1:

109

if len(parts) > 1:

110

s = len(parts[0])

110

s = len(parts[0])

111

yield (':', None, s)

111

yield (':', None, s)

112

if parts[1]:

112

if parts[1]:

113

yield ('symbol', parts[1], s + 1)

113

yield ('symbol', parts[1], s + 1)

114

yield ('end', None, len(program))

114

yield ('end', None, len(program))

115

return

115

return

116

117

pos, l = 0, len(program)

117

pos, l = 0, len(program)

118

while pos < l:

118

while pos < l:

119

c = program[pos]

119

c = program[pos]

120

if c.isspace(): # skip inter-token whitespace

120

if c.isspace(): # skip inter-token whitespace

121

pass

121

pass

122

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

122

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

123

yield ('::', None, pos)

123

yield ('::', None, pos)

124

pos += 1 # skip ahead

124

pos += 1 # skip ahead

125

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

125

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

126

yield ('..', None, pos)

126

yield ('..', None, pos)

127

pos += 1 # skip ahead

127

pos += 1 # skip ahead

128

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

128

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

129

yield ('##', None, pos)

129

yield ('##', None, pos)

130

pos += 1 # skip ahead

130

pos += 1 # skip ahead

131

elif c in _simpleopletters: # handle simple operators

131

elif c in _simpleopletters: # handle simple operators

132

yield (c, None, pos)

132

yield (c, None, pos)

133

elif (c in _quoteletters or c == 'r' and

133

elif (c in _quoteletters or c == 'r' and

134

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

134

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

135

if c == 'r':

135

if c == 'r':

136

pos += 1

136

pos += 1

137

c = program[pos]

137

c = program[pos]

138

decode = lambda x: x

138

decode = lambda x: x

139

else:

139

else:

140

decode = parser.unescapestr

140

decode = parser.unescapestr

141

pos += 1

141

pos += 1

142

s = pos

142

s = pos

143

while pos < l: # find closing quote

143

while pos < l: # find closing quote

144

d = program[pos]

144

d = program[pos]

145

if d == '\\': # skip over escaped characters

145

if d == '\\': # skip over escaped characters

146

pos += 2

146

pos += 2

147

continue

147

continue

148

if d == c:

148

if d == c:

149

yield ('string', decode(program[s:pos]), s)

149

yield ('string', decode(program[s:pos]), s)

150

break

150

break

151

pos += 1

151

pos += 1

152

else:

152

else:

153

raise error.ParseError(_("unterminated string"), s)

153

raise error.ParseError(_("unterminated string"), s)

154

# gather up a symbol/keyword

154

# gather up a symbol/keyword

155

elif c in syminitletters:

155

elif c in syminitletters:

156

s = pos

156

s = pos

157

pos += 1

157

pos += 1

158

while pos < l: # find end of symbol

158

while pos < l: # find end of symbol

159

d = program[pos]

159

d = program[pos]

160

if d not in symletters:

160

if d not in symletters:

161

break

161

break

162

if d == '.' and program[pos - 1] == '.': # special case for ..

162

if d == '.' and program[pos - 1] == '.': # special case for ..

163

pos -= 1

163

pos -= 1

164

break

164

break

165

pos += 1

165

pos += 1

166

sym = program[s:pos]

166

sym = program[s:pos]

167

if sym in keywords: # operator keywords

167

if sym in keywords: # operator keywords

168

yield (sym, None, s)

168

yield (sym, None, s)

169

elif '-' in sym:

169

elif '-' in sym:

170

# some jerk gave us foo-bar-baz, try to check if it's a symbol

170

# some jerk gave us foo-bar-baz, try to check if it's a symbol

171

if lookup and lookup(sym):

171

if lookup and lookup(sym):

172

# looks like a real symbol

172

# looks like a real symbol

173

yield ('symbol', sym, s)

173

yield ('symbol', sym, s)

174

else:

174

else:

175

# looks like an expression

175

# looks like an expression

176

parts = sym.split('-')

176

parts = sym.split('-')

177

for p in parts[:-1]:

177

for p in parts[:-1]:

178

if p: # possible consecutive -

178

if p: # possible consecutive -

179

yield ('symbol', p, s)

179

yield ('symbol', p, s)

180

s += len(p)

180

s += len(p)

181

yield ('-', None, s)

181

yield ('-', None, s)

182

s += 1

182

s += 1

183

if parts[-1]: # possible trailing -

183

if parts[-1]: # possible trailing -

184

yield ('symbol', parts[-1], s)

184

yield ('symbol', parts[-1], s)

185

else:

185

else:

186

yield ('symbol', sym, s)

186

yield ('symbol', sym, s)

187

pos -= 1

187

pos -= 1

188

else:

188

else:

189

raise error.ParseError(_("syntax error in revset '%s'") %

189

raise error.ParseError(_("syntax error in revset '%s'") %

190

program, pos)

190

program, pos)

191

pos += 1

191

pos += 1

192

yield ('end', None, pos)

192

yield ('end', None, pos)

193

194

# helpers

194

# helpers

195

196

_notset = object()

196

_notset = object()

197

198

def getsymbol(x):

198

def getsymbol(x):

199

if x and x[0] == 'symbol':

199

if x and x[0] == 'symbol':

200

return x[1]

200

return x[1]

201

raise error.ParseError(_('not a symbol'))

201

raise error.ParseError(_('not a symbol'))

202

203

def getstring(x, err):

203

def getstring(x, err):

204

if x and (x[0] == 'string' or x[0] == 'symbol'):

204

if x and (x[0] == 'string' or x[0] == 'symbol'):

205

return x[1]

205

return x[1]

206

raise error.ParseError(err)

206

raise error.ParseError(err)

207

208

def getinteger(x, err, default=_notset):

208

def getinteger(x, err, default=_notset):

209

if not x and default is not _notset:

209

if not x and default is not _notset:

210

return default

210

return default

211

try:

211

try:

212

return int(getstring(x, err))

212

return int(getstring(x, err))

213

except ValueError:

213

except ValueError:

214

raise error.ParseError(err)

214

raise error.ParseError(err)

215

216

def getboolean(x, err):

216

def getboolean(x, err):

217

value = stringutil.parsebool(getsymbol(x))

217

value = stringutil.parsebool(getsymbol(x))

218

if value is not None:

218

if value is not None:

219

return value

219

return value

220

raise error.ParseError(err)

220

raise error.ParseError(err)

221

222

def getlist(x):

222

def getlist(x):

223

if not x:

223

if not x:

224

return []

224

return []

225

if x[0] == 'list':

225

if x[0] == 'list':

226

return list(x[1:])

226

return list(x[1:])

227

return [x]

227

return [x]

228

229

def getrange(x, err):

229

def getrange(x, err):

230

if not x:

230

if not x:

231

raise error.ParseError(err)

231

raise error.ParseError(err)

232

op = x[0]

232

op = x[0]

233

if op == 'range':

233

if op == 'range':

234

return x[1], x[2]

234

return x[1], x[2]

235

elif op == 'rangepre':

235

elif op == 'rangepre':

236

return None, x[1]

236

return None, x[1]

237

elif op == 'rangepost':

237

elif op == 'rangepost':

238

return x[1], None

238

return x[1], None

239

elif op == 'rangeall':

239

elif op == 'rangeall':

240

return None, None

240

return None, None

241

raise error.ParseError(err)

241

raise error.ParseError(err)

242

243

def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):

243

def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):

244

"""Get [first, last] integer range (both inclusive) from a parsed tree

244

"""Get [first, last] integer range (both inclusive) from a parsed tree

245

246

If any of the sides omitted, and if no default provided, ParseError will

246

If any of the sides omitted, and if no default provided, ParseError will

247

be raised.

247

be raised.

248

"""

248

"""

249

if x and (x[0] == 'string' or x[0] == 'symbol'):

249

if x and (x[0] == 'string' or x[0] == 'symbol'):

250

n = getinteger(x, err1)

250

n = getinteger(x, err1)

251

return n, n

251

return n, n

252

a, b = getrange(x, err1)

252

a, b = getrange(x, err1)

253

return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)

253

return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)

254

255

def getargs(x, min, max, err):

255

def getargs(x, min, max, err):

256

l = getlist(x)

256

l = getlist(x)

257

if len(l) < min or (max >= 0 and len(l) > max):

257

if len(l) < min or (max >= 0 and len(l) > max):

258

raise error.ParseError(err)

258

raise error.ParseError(err)

259

return l

259

return l

260

261

def getargsdict(x, funcname, keys):

261

def getargsdict(x, funcname, keys):

262

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

262

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

263

keyvaluenode='keyvalue', keynode='symbol')

263

keyvaluenode='keyvalue', keynode='symbol')

264

265

# cache of {spec: raw parsed tree} built internally

265

# cache of {spec: raw parsed tree} built internally

266

_treecache = {}

266

_treecache = {}

267

268

def _cachedtree(spec):

268

def _cachedtree(spec):

269

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

269

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

270

tree = _treecache.get(spec)

270

tree = _treecache.get(spec)

271

if tree is None:

271

if tree is None:

272

_treecache[spec] = tree = parse(spec)

272

_treecache[spec] = tree = parse(spec)

273

return tree

273

return tree

274

275

def _build(tmplspec, *repls):

275

def _build(tmplspec, *repls):

276

"""Create raw parsed tree from a template revset statement

276

"""Create raw parsed tree from a template revset statement

277

278

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

278

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

279

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

279

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

280

"""

280

"""

281

template = _cachedtree(tmplspec)

281

template = _cachedtree(tmplspec)

282

return parser.buildtree(template, ('symbol', '_'), *repls)

282

return parser.buildtree(template, ('symbol', '_'), *repls)

283

284

def _match(patspec, tree):

284

def _match(patspec, tree):

285

"""Test if a tree matches the given pattern statement; return the matches

285

"""Test if a tree matches the given pattern statement; return the matches

286

287

>>> _match(b'f(_)', parse(b'f()'))

287

>>> _match(b'f(_)', parse(b'f()'))

288

>>> _match(b'f(_)', parse(b'f(1)'))

288

>>> _match(b'f(_)', parse(b'f(1)'))

289

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

289

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

290

>>> _match(b'f(_)', parse(b'f(1, 2)'))

290

>>> _match(b'f(_)', parse(b'f(1, 2)'))

291

"""

291

"""

292

pattern = _cachedtree(patspec)

292

pattern = _cachedtree(patspec)

293

return parser.matchtree(pattern, tree, ('symbol', '_'),

293

return parser.matchtree(pattern, tree, ('symbol', '_'),

294

{'keyvalue', 'list'})

294

{'keyvalue', 'list'})

295

296

def _matchonly(revs, bases):

296

def _matchonly(revs, bases):

297

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

297

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

298

299

def _fixops(x):

299

def _fixops(x):

300

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

300

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

301

handled well by our simple top-down parser"""

301

handled well by our simple top-down parser"""

302

if not isinstance(x, tuple):

302

if not isinstance(x, tuple):

303

return x

303

return x

304

305

op = x[0]

305

op = x[0]

306

if op == 'parent':

306

if op == 'parent':

307

# x^:y means (x^) : y, not x ^ (:y)

307

# x^:y means (x^) : y, not x ^ (:y)

308

# x^: means (x^) :, not x ^ (:)

308

# x^: means (x^) :, not x ^ (:)

309

post = ('parentpost', x[1])

309

post = ('parentpost', x[1])

310

if x[2][0] == 'dagrangepre':

310

if x[2][0] == 'dagrangepre':

311

return _fixops(('dagrange', post, x[2][1]))

311

return _fixops(('dagrange', post, x[2][1]))

312

elif x[2][0] == 'dagrangeall':

312

elif x[2][0] == 'dagrangeall':

313

return _fixops(('dagrangepost', post))

313

return _fixops(('dagrangepost', post))

314

elif x[2][0] == 'rangepre':

314

elif x[2][0] == 'rangepre':

315

return _fixops(('range', post, x[2][1]))

315

return _fixops(('range', post, x[2][1]))

316

elif x[2][0] == 'rangeall':

316

elif x[2][0] == 'rangeall':

317

return _fixops(('rangepost', post))

317

return _fixops(('rangepost', post))

318

elif op == 'or':

318

elif op == 'or':

319

# make number of arguments deterministic:

319

# make number of arguments deterministic:

320

# x + y + z -> (or x y z) -> (or (list x y z))

320

# x + y + z -> (or x y z) -> (or (list x y z))

321

return (op, _fixops(('list',) + x[1:]))

321

return (op, _fixops(('list',) + x[1:]))

322

elif op == 'subscript' and x[1][0] == 'relation':

322

elif op == 'subscript' and x[1][0] == 'relation':

323

# x#y[z] ternary

323

# x#y[z] ternary

324

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

324

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

325

326

return (op,) + tuple(_fixops(y) for y in x[1:])

326

return (op,) + tuple(_fixops(y) for y in x[1:])

327

328

def _analyze(x):

328

def _analyze(x):

329

if x is None:

329

if x is None:

330

return x

330

return x

331

332

op = x[0]

332

op = x[0]

333

if op == 'minus':

333

if op == 'minus':

334

return _analyze(_build('_ and not _', *x[1:]))

334

return _analyze(_build('_ and not _', *x[1:]))

335

elif op == 'only':

335

elif op == 'only':

336

return _analyze(_build('only(_, _)', *x[1:]))

336

return _analyze(_build('only(_, _)', *x[1:]))

337

elif op == 'onlypost':

337

elif op == 'onlypost':

338

return _analyze(_build('only(_)', x[1]))

338

return _analyze(_build('only(_)', x[1]))

339

elif op == 'dagrangeall':

339

elif op == 'dagrangeall':

340

raise error.ParseError(_("can't use '::' in this context"))

340

raise error.ParseError(_("can't use '::' in this context"))

341

elif op == 'dagrangepre':

341

elif op == 'dagrangepre':

342

return _analyze(_build('ancestors(_)', x[1]))

342

return _analyze(_build('ancestors(_)', x[1]))

343

elif op == 'dagrangepost':

343

elif op == 'dagrangepost':

344

return _analyze(_build('descendants(_)', x[1]))

344

return _analyze(_build('descendants(_)', x[1]))

345

elif op == 'negate':

345

elif op == 'negate':

346

s = getstring(x[1], _("can't negate that"))

346

s = getstring(x[1], _("can't negate that"))

347

return _analyze(('string', '-' + s))

347

return _analyze(('string', '-' + s))

348

elif op in ('string', 'symbol', 'smartset'):

348

elif op in ('string', 'symbol', 'smartset'):

349

return x

349

return x

350

elif op == 'rangeall':

350

elif op == 'rangeall':

351

return (op, None)

351

return (op, None)

352

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

352

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

353

return (op, _analyze(x[1]))

353

return (op, _analyze(x[1]))

354

elif op == 'group':

354

elif op == 'group':

355

return _analyze(x[1])

355

return _analyze(x[1])

356

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

356

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

357

'subscript'}:

357

'subscript'}:

358

ta = _analyze(x[1])

358

ta = _analyze(x[1])

359

tb = _analyze(x[2])

359

tb = _analyze(x[2])

360

return (op, ta, tb)

360

return (op, ta, tb)

361

elif op == 'relsubscript':

361

elif op == 'relsubscript':

362

ta = _analyze(x[1])

362

ta = _analyze(x[1])

363

tb = _analyze(x[2])

363

tb = _analyze(x[2])

364

tc = _analyze(x[3])

364

tc = _analyze(x[3])

365

return (op, ta, tb, tc)

365

return (op, ta, tb, tc)

366

elif op == 'list':

366

elif op == 'list':

367

return (op,) + tuple(_analyze(y) for y in x[1:])

367

return (op,) + tuple(_analyze(y) for y in x[1:])

368

elif op == 'keyvalue':

368

elif op == 'keyvalue':

369

return (op, x[1], _analyze(x[2]))

369

return (op, x[1], _analyze(x[2]))

370

elif op == 'func':

370

elif op == 'func':

371

return (op, x[1], _analyze(x[2]))

371

return (op, x[1], _analyze(x[2]))

372

raise ValueError('invalid operator %r' % op)

372

raise ValueError('invalid operator %r' % op)

373

374

def analyze(x):

374

def analyze(x):

375

"""Transform raw parsed tree to evaluatable tree which can be fed to

375

"""Transform raw parsed tree to evaluatable tree which can be fed to

376

optimize() or getset()

376

optimize() or getset()

377

378

All pseudo operations should be mapped to real operations or functions

378

All pseudo operations should be mapped to real operations or functions

379

defined in methods or symbols table respectively.

379

defined in methods or symbols table respectively.

380

"""

380

"""

381

return _analyze(x)

381

return _analyze(x)

382

383

def _optimize(x):

383

def _optimize(x):

384

if x is None:

384

if x is None:

385

return 0, x

385

return 0, x

386

387

op = x[0]

387

op = x[0]

388

if op in ('string', 'symbol', 'smartset'):

388

if op in ('string', 'symbol', 'smartset'):

389

return 0.5, x # single revisions are small

389

return 0.5, x # single revisions are small

390

elif op == 'and':

390

elif op == 'and':

391

wa, ta = _optimize(x[1])

391

wa, ta = _optimize(x[1])

392

wb, tb = _optimize(x[2])

392

wb, tb = _optimize(x[2])

393

w = min(wa, wb)

393

w = min(wa, wb)

394

395

# (draft/secret/_notpublic() & ::x) have a fast path

395

# (draft/secret/_notpublic() & ::x) have a fast path

396

m = _match('_() & ancestors(_)', ('and', ta, tb))

396

m = _match('_() & ancestors(_)', ('and', ta, tb))

397

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

397

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

398

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

398

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

399

400

# (::x and not ::y)/(not ::y and ::x) have a fast path

400

# (::x and not ::y)/(not ::y and ::x) have a fast path

401

m = _matchonly(ta, tb) or _matchonly(tb, ta)

401

m = _matchonly(ta, tb) or _matchonly(tb, ta)

402

if m:

402

if m:

403

return w, _build('only(_, _)', *m[1:])

403

return w, _build('only(_, _)', *m[1:])

404

405

m = _match('not _', tb)

405

m = _match('not _', tb)

406

if m:

406

if m:

407

return wa, ('difference', ta, m[1])

407

return wa, ('difference', ta, m[1])

408

if wa > wb:

408

if wa > wb:

409

op = 'andsmally'

409

op = 'andsmally'

410

return w, (op, ta, tb)

410

return w, (op, ta, tb)

411

elif op == 'or':

411

elif op == 'or':

412

# fast path for machine-generated expression, that is likely to have

412

# fast path for machine-generated expression, that is likely to have

413

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

413

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

414

ws, ts, ss = [], [], []

414

ws, ts, ss = [], [], []

415

def flushss():

415

def flushss():

416

if not ss:

416

if not ss:

417

return

417

return

418

if len(ss) == 1:

418

if len(ss) == 1:

419

w, t = ss[0]

419

w, t = ss[0]

420

else:

420

else:

421

s = '\0'.join(t[1] for w, t in ss)

421

s = '\0'.join(t[1] for w, t in ss)

422

y = _build('_list(_)', ('string', s))

422

y = _build('_list(_)', ('string', s))

423

w, t = _optimize(y)

423

w, t = _optimize(y)

424

ws.append(w)

424

ws.append(w)

425

ts.append(t)

425

ts.append(t)

426

del ss[:]

426

del ss[:]

427

for y in getlist(x[1]):

427

for y in getlist(x[1]):

428

w, t = _optimize(y)

428

w, t = _optimize(y)

429

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

429

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

430

ss.append((w, t))

430

ss.append((w, t))

431

continue

431

continue

432

flushss()

432

flushss()

433

ws.append(w)

433

ws.append(w)

434

ts.append(t)

434

ts.append(t)

435

flushss()

435

flushss()

436

if len(ts) == 1:

436

if len(ts) == 1:

437

return ws[0], ts[0] # 'or' operation is fully optimized out

437

return ws[0], ts[0] # 'or' operation is fully optimized out

438

return max(ws), (op, ('list',) + tuple(ts))

438

return max(ws), (op, ('list',) + tuple(ts))

439

elif op == 'not':

439

elif op == 'not':

440

# Optimize not public() to _notpublic() because we have a fast version

440

# Optimize not public() to _notpublic() because we have a fast version

441

if _match('public()', x[1]):

441

if _match('public()', x[1]):

442

o = _optimize(_build('_notpublic()'))

442

o = _optimize(_build('_notpublic()'))

443

return o[0], o[1]

443

return o[0], o[1]

444

else:

444

else:

445

o = _optimize(x[1])

445

o = _optimize(x[1])

446

return o[0], (op, o[1])

446

return o[0], (op, o[1])

447

elif op == 'rangeall':

447

elif op == 'rangeall':

448

return 1, x

448

return 1, x

449

elif op in ('rangepre', 'rangepost', 'parentpost'):

449

elif op in ('rangepre', 'rangepost', 'parentpost'):

450

o = _optimize(x[1])

450

o = _optimize(x[1])

451

return o[0], (op, o[1])

451

return o[0], (op, o[1])

452

elif op in ('dagrange', 'range'):

452

elif op in ('dagrange', 'range'):

453

wa, ta = _optimize(x[1])

453

wa, ta = _optimize(x[1])

454

wb, tb = _optimize(x[2])

454

wb, tb = _optimize(x[2])

455

return wa + wb, (op, ta, tb)

455

return wa + wb, (op, ta, tb)

456

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

456

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

457

w, t = _optimize(x[1])

457

w, t = _optimize(x[1])

458

return w, (op, t, x[2])

458

return w, (op, t, x[2])

459

elif op == 'relsubscript':

459

elif op == 'relsubscript':

460

w, t = _optimize(x[1])

460

w, t = _optimize(x[1])

461

return w, (op, t, x[2], x[3])

461

return w, (op, t, x[2], x[3])

462

elif op == 'list':

462

elif op == 'list':

463

ws, ts = zip(*(_optimize(y) for y in x[1:]))

463

ws, ts = zip(*(_optimize(y) for y in x[1:]))

464

return sum(ws), (op,) + ts

464

return sum(ws), (op,) + ts

465

elif op == 'keyvalue':

465

elif op == 'keyvalue':

466

w, t = _optimize(x[2])

466

w, t = _optimize(x[2])

467

return w, (op, x[1], t)

467

return w, (op, x[1], t)

468

elif op == 'func':

468

elif op == 'func':

469

f = getsymbol(x[1])

469

f = getsymbol(x[1])

470

wa, ta = _optimize(x[2])

470

wa, ta = _optimize(x[2])

471

w = getattr(symbols.get(f), '_weight', 1)

471

w = getattr(symbols.get(f), '_weight', 1)

472

m = _match('commonancestors(_)', ta)

472

m = _match('commonancestors(_)', ta)

473

474

# Optimize heads(commonancestors(_)) because we have a fast version

474

# Optimize heads(commonancestors(_)) because we have a fast version

475

if f == 'heads' and m:

475

if f == 'heads' and m:

476

return w + wa, _build('_commonancestorheads(_)', m[1])

476

return w + wa, _build('_commonancestorheads(_)', m[1])

477

478

return w + wa, (op, x[1], ta)

478

return w + wa, (op, x[1], ta)

479

raise ValueError('invalid operator %r' % op)

479

raise ValueError('invalid operator %r' % op)

480

481

def optimize(tree):

481

def optimize(tree):

482

"""Optimize evaluatable tree

482

"""Optimize evaluatable tree

483

484

All pseudo operations should be transformed beforehand.

484

All pseudo operations should be transformed beforehand.

485

"""

485

"""

486

_weight, newtree = _optimize(tree)

486

_weight, newtree = _optimize(tree)

487

return newtree

487

return newtree

488

489

# the set of valid characters for the initial letter of symbols in

489

# the set of valid characters for the initial letter of symbols in

490

# alias declarations and definitions

490

# alias declarations and definitions

491

_aliassyminitletters = _syminitletters | {'$'}

491

_aliassyminitletters = _syminitletters | {'$'}

492

493

def _parsewith(spec, lookup=None, syminitletters=None):

493

def _parsewith(spec, lookup=None, syminitletters=None):

494

"""Generate a parse tree of given spec with given tokenizing options

494

"""Generate a parse tree of given spec with given tokenizing options

495

496

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

496

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

497

('func', ('symbol', 'foo'), ('symbol', '$1'))

497

('func', ('symbol', 'foo'), ('symbol', '$1'))

498

>>> _parsewith(b'$1')

498

>>> _parsewith(b'$1')

499

Traceback (most recent call last):

499

Traceback (most recent call last):

500

...

500

...

501

ParseError: ("syntax error in revset '$1'", 0)

501

ParseError: ("syntax error in revset '$1'", 0)

502

>>> _parsewith(b'foo bar')

502

>>> _parsewith(b'foo bar')

503

Traceback (most recent call last):

503

Traceback (most recent call last):

504

...

504

...

505

ParseError: ('invalid token', 4)

505

ParseError: ('invalid token', 4)

506

"""

506

"""

507

if lookup and spec.startswith('revset(') and spec.endswith(')'):

507

if lookup and spec.startswith('revset(') and spec.endswith(')'):

508

lookup = None

508

lookup = None

509

p = parser.parser(elements)

509

p = parser.parser(elements)

510

tree, pos = p.parse(tokenize(spec, lookup=lookup,

510

tree, pos = p.parse(tokenize(spec, lookup=lookup,

511

syminitletters=syminitletters))

511

syminitletters=syminitletters))

512

if pos != len(spec):

512

if pos != len(spec):

513

raise error.ParseError(_('invalid token'), pos)

513

raise error.ParseError(_('invalid token'), pos)

514

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

514

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

515

516

class _aliasrules(parser.basealiasrules):

516

class _aliasrules(parser.basealiasrules):

517

"""Parsing and expansion rule set of revset aliases"""

517

"""Parsing and expansion rule set of revset aliases"""

518

_section = _('revset alias')

518

_section = _('revset alias')

519

520

@staticmethod

520

@staticmethod

521

def _parse(spec):

521

def _parse(spec):

522

"""Parse alias declaration/definition ``spec``

522

"""Parse alias declaration/definition ``spec``

523

524

This allows symbol names to use also ``$`` as an initial letter

524

This allows symbol names to use also ``$`` as an initial letter

525

(for backward compatibility), and callers of this function should

525

(for backward compatibility), and callers of this function should

526

examine whether ``$`` is used also for unexpected symbols or not.

526

examine whether ``$`` is used also for unexpected symbols or not.

527

"""

527

"""

528

return _parsewith(spec, syminitletters=_aliassyminitletters)

528

return _parsewith(spec, syminitletters=_aliassyminitletters)

529

530

@staticmethod

530

@staticmethod

531

def _trygetfunc(tree):

531

def _trygetfunc(tree):

532

if tree[0] == 'func' and tree[1][0] == 'symbol':

532

if tree[0] == 'func' and tree[1][0] == 'symbol':

533

return tree[1][1], getlist(tree[2])

533

return tree[1][1], getlist(tree[2])

534

535

def expandaliases(tree, aliases, warn=None):

535

def expandaliases(tree, aliases, warn=None):

536

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

536

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

537

aliases = _aliasrules.buildmap(aliases)

537

aliases = _aliasrules.buildmap(aliases)

538

tree = _aliasrules.expand(aliases, tree)

538

tree = _aliasrules.expand(aliases, tree)

539

# warn about problematic (but not referred) aliases

539

# warn about problematic (but not referred) aliases

540

if warn is not None:

540

if warn is not None:

541

for name, alias in sorted(aliases.iteritems()):

541

for name, alias in sorted(aliases.iteritems()):

542

if alias.error and not alias.warned:

542

if alias.error and not alias.warned:

543

warn(_('warning: %s\n') % (alias.error))

543

warn(_('warning: %s\n') % (alias.error))

544

alias.warned = True

544

alias.warned = True

545

return tree

545

return tree

546

547

def foldconcat(tree):

547

def foldconcat(tree):

548

"""Fold elements to be concatenated by `##`

548

"""Fold elements to be concatenated by `##`

549

"""

549

"""

550

if (not isinstance(tree, tuple)

550

if (not isinstance(tree, tuple)

551

or tree[0] in ('string', 'symbol', 'smartset')):

551

or tree[0] in ('string', 'symbol', 'smartset')):

552

return tree

552

return tree

553

if tree[0] == '_concat':

553

if tree[0] == '_concat':

554

pending = [tree]

554

pending = [tree]

555

l = []

555

l = []

556

while pending:

556

while pending:

557

e = pending.pop()

557

e = pending.pop()

558

if e[0] == '_concat':

558

if e[0] == '_concat':

559

pending.extend(reversed(e[1:]))

559

pending.extend(reversed(e[1:]))

560

elif e[0] in ('string', 'symbol'):

560

elif e[0] in ('string', 'symbol'):

561

l.append(e[1])

561

l.append(e[1])

562

else:

562

else:

563

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

563

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

564

raise error.ParseError(msg)

564

raise error.ParseError(msg)

565

return ('string', ''.join(l))

565

return ('string', ''.join(l))

566

else:

566

else:

567

return tuple(foldconcat(t) for t in tree)

567

return tuple(foldconcat(t) for t in tree)

568

569

def parse(spec, lookup=None):

569

def parse(spec, lookup=None):

570

try:

570

try:

571

return _parsewith(spec, lookup=lookup)

571

return _parsewith(spec, lookup=lookup)

572

except error.ParseError as inst:

572

except error.ParseError as inst:

573

if len(inst.args) > 1: # has location

573

if len(inst.args) > 1: # has location

574

loc = inst.args[1]

574

loc = inst.args[1]

575

# Remove newlines -- spaces are equivalent whitespace.

575

# Remove newlines -- spaces are equivalent whitespace.

576

spec = spec.replace('\n', ' ')

576

spec = spec.replace('\n', ' ')

577

# We want the caret to point to the place in the template that

577

# We want the caret to point to the place in the template that

578

# failed to parse, but in a hint we get a open paren at the

578

# failed to parse, but in a hint we get a open paren at the

579

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

579

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

580

# to line up the caret with the location of the error.

580

# to line up the caret with the location of the error.

581

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

581

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

582

raise

582

raise

583

584

def _quote(s):

584

def _quote(s):

585

r"""Quote a value in order to make it safe for the revset engine.

585

r"""Quote a value in order to make it safe for the revset engine.

586

587

>>> _quote(b'asdf')

587

>>> _quote(b'asdf')

588

"'asdf'"

588

"'asdf'"

589

>>> _quote(b"asdf'\"")

589

>>> _quote(b"asdf'\"")

590

'\'asdf\\\'"\''

590

'\'asdf\\\'"\''

591

>>> _quote(b'asdf\'')

591

>>> _quote(b'asdf\'')

592

"'asdf\\''"

592

"'asdf\\''"

593

>>> _quote(1)

593

>>> _quote(1)

594

"'1'"

594

"'1'"

595

"""

595

"""

596

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

596

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

597

598

def _formatargtype(c, arg):

598

def _formatargtype(c, arg):

599

if c == 'd':

599

if c == 'd':

600

return '_rev(%d)' % int(arg)

600

return '_rev(%d)' % int(arg)

601

elif c == 's':

601

elif c == 's':

602

return _quote(arg)

602

return _quote(arg)

603

elif c == 'r':

603

elif c == 'r':

604

if not isinstance(arg, bytes):

604

if not isinstance(arg, bytes):

605

raise TypeError

605

raise TypeError

606

parse(arg) # make sure syntax errors are confined

606

parse(arg) # make sure syntax errors are confined

607

return '(%s)' % arg

607

return '(%s)' % arg

608

elif c == 'n':

608

elif c == 'n':

609

return _quote(node.hex(arg))

609

return _quote(node.hex(arg))

610

elif c == 'b':

610

elif c == 'b':

611

try:

611

try:

612

return _quote(arg.branch())

612

return _quote(arg.branch())

613

except AttributeError:

613

except AttributeError:

614

raise TypeError

614

raise TypeError

615

raise error.ParseError(_('unexpected revspec format character %s') % c)

615

raise error.ParseError(_('unexpected revspec format character %s') % c)

616

617

def _formatlistexp(s, t):

617

def _formatlistexp(s, t):

618

l = len(s)

618

l = len(s)

619

if l == 0:

619

if l == 0:

620

return "_list('')"

620

return "_list('')"

621

elif l == 1:

621

elif l == 1:

622

return _formatargtype(t, s[0])

622

return _formatargtype(t, s[0])

623

elif t == 'd':

623

elif t == 'd':

624

return _formatintlist(s)

624

return _formatintlist(s)

625

elif t == 's':

625

elif t == 's':

626

return "_list(%s)" % _quote("\0".join(s))

626

return "_list(%s)" % _quote("\0".join(s))

627

elif t == 'n':

627

elif t == 'n':

628

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

628

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

629

elif t == 'b':

629

elif t == 'b':

630

try:

630

try:

631

return "_list('%s')" % "\0".join(a.branch() for a in s)

631

return "_list('%s')" % "\0".join(a.branch() for a in s)

632

except AttributeError:

632

except AttributeError:

633

raise TypeError

633

raise TypeError

634

635

m = l // 2

635

m = l // 2

636

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

636

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

637

638

def _formatintlist(data):

638

def _formatintlist(data):

639

try:

639

try:

640

l = len(data)

640

l = len(data)

641

if l == 0:

641

if l == 0:

642

return "_list('')"

642

return "_list('')"

643

elif l == 1:

643

elif l == 1:

644

return _formatargtype('d', data[0])

644

return _formatargtype('d', data[0])

645

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)

645

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)

646

except (TypeError, ValueError):

646

except (TypeError, ValueError):

647

raise error.ParseError(_('invalid argument for revspec'))

647

raise error.ParseError(_('invalid argument for revspec'))

648

649

def _formatparamexp(args, t):

649

def _formatparamexp(args, t):

650

return ', '.join(_formatargtype(t, a) for a in args)

650

return ', '.join(_formatargtype(t, a) for a in args)

651

652

_formatlistfuncs = {

652

_formatlistfuncs = {

653

'l': _formatlistexp,

653

'l': _formatlistexp,

654

'p': _formatparamexp,

654

'p': _formatparamexp,

655

}

655

}

656

657

def formatspec(expr, *args):

657

def formatspec(expr, *args):

658

'''

658

'''

659

This is a convenience function for using revsets internally, and

659

This is a convenience function for using revsets internally, and

660

escapes arguments appropriately. Aliases are intentionally ignored

660

escapes arguments appropriately. Aliases are intentionally ignored

661

so that intended expression behavior isn't accidentally subverted.

661

so that intended expression behavior isn't accidentally subverted.

662

663

Supported arguments:

663

Supported arguments:

664

665

%r = revset expression, parenthesized

665

%r = revset expression, parenthesized

666

%d = rev(int(arg)), no quoting

666

%d = rev(int(arg)), no quoting

667

%s = string(arg), escaped and single-quoted

667

%s = string(arg), escaped and single-quoted

668

%b = arg.branch(), escaped and single-quoted

668

%b = arg.branch(), escaped and single-quoted

669

%n = hex(arg), single-quoted

669

%n = hex(arg), single-quoted

670

%% = a literal '%'

670

%% = a literal '%'

671

672

Prefixing the type with 'l' specifies a parenthesized list of that type,

672

Prefixing the type with 'l' specifies a parenthesized list of that type,

673

and 'p' specifies a list of function parameters of that type.

673

and 'p' specifies a list of function parameters of that type.

674

675

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

675

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

676

'(10 or 11):: and ((this()) or (that()))'

676

'(10 or 11):: and ((this()) or (that()))'

677

>>> formatspec(b'%d:: and not %d::', 10, 20)

677

>>> formatspec(b'%d:: and not %d::', 10, 20)

678

'_rev(10):: and not _rev(20)::'

678

'_rev(10):: and not _rev(20)::'

679

>>> formatspec(b'%ld or %ld', [], [1])

679

>>> formatspec(b'%ld or %ld', [], [1])

680

"_list('') or _rev(1)"

680

"_list('') or _rev(1)"

681

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

681

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

682

"keyword('foo\\\\xe9')"

682

"keyword('foo\\\\xe9')"

683

>>> b = lambda: b'default'

683

>>> b = lambda: b'default'

684

>>> b.branch = b

684

>>> b.branch = b

685

>>> formatspec(b'branch(%b)', b)

685

>>> formatspec(b'branch(%b)', b)

686

"branch('default')"

686

"branch('default')"

687

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

687

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

688

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

688

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

689

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

689

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

690

"sort((:), 'desc', 'user')"

690

"sort((:), 'desc', 'user')"

691

>>> formatspec(b'%ls', [b'a', b"'"])

691

>>> formatspec(b'%ls', [b'a', b"'"])

692

"_list('a\\\\x00\\\\'')"

692

"_list('a\\\\x00\\\\'')"

693

'''

693

'''

694

parsed = _parseargs(expr, args)

694

parsed = _parseargs(expr, args)

695

ret = []

695

ret = []

696

for t, arg in parsed:

696

for t, arg in parsed:

697

if t is None:

697

if t is None:

698

ret.append(arg)

698

ret.append(arg)

699

elif t == 'baseset':

699

elif t == 'baseset':

700

if isinstance(arg, set):

700

if isinstance(arg, set):

701

arg = sorted(arg)

701

arg = sorted(arg)

702

ret.append(_formatintlist(list(arg)))

702

ret.append(_formatintlist(list(arg)))

703

else:

703

else:

704

raise error.ProgrammingError("unknown revspec item type: %r" % t)

704

raise error.ProgrammingError("unknown revspec item type: %r" % t)

705

return b''.join(ret)

705

return b''.join(ret)

706

707

def spectree(expr, *args):

707

def spectree(expr, *args):

708

"""similar to formatspec but return a parsed and optimized tree"""

708

"""similar to formatspec but return a parsed and optimized tree"""

709

parsed = _parseargs(expr, args)

709

parsed = _parseargs(expr, args)

710

ret = []

710

ret = []

711

inputs = []

711

inputs = []

712

for t, arg in parsed:

712

for t, arg in parsed:

713

if t is None:

713

if t is None:

714

ret.append(arg)

714

ret.append(arg)

715

elif t == 'baseset':

715

elif t == 'baseset':

716

newtree = ('smartset', smartset.baseset(arg))

716

newtree = ('smartset', smartset.baseset(arg))

717

inputs.append(newtree)

717

inputs.append(newtree)

718

ret.append("$")

718

ret.append("$")

719

else:

719

else:

720

raise error.ProgrammingError("unknown revspec item type: %r" % t)

720

raise error.ProgrammingError("unknown revspec item type: %r" % t)

721

expr = b''.join(ret)

721

expr = b''.join(ret)

722

tree = _parsewith(expr, syminitletters=_aliassyminitletters)

722

tree = _parsewith(expr, syminitletters=_aliassyminitletters)

723

tree = parser.buildtree(tree, ('symbol', '$'), *inputs)

723

tree = parser.buildtree(tree, ('symbol', '$'), *inputs)

724

tree = foldconcat(tree)

724

tree = foldconcat(tree)

725

tree = analyze(tree)

725

tree = analyze(tree)

726

tree = optimize(tree)

726

tree = optimize(tree)

727

return tree

727

return tree

728

729

def _parseargs(expr, args):

729

def _parseargs(expr, args):

730

"""parse the expression and replace all inexpensive args

730

"""parse the expression and replace all inexpensive args

731

732

return a list of tuple [(arg-type, arg-value)]

732

return a list of tuple [(arg-type, arg-value)]

733

734

Arg-type can be:

734

Arg-type can be:

735

* None: a string ready to be concatenated into a final spec

735

* None: a string ready to be concatenated into a final spec

736

* 'baseset': an iterable of revisions

736

* 'baseset': an iterable of revisions

737

"""

737

"""

738

expr = pycompat.bytestr(expr)

738

expr = pycompat.bytestr(expr)

739

argiter = iter(args)

739

argiter = iter(args)

740

ret = []

740

ret = []

741

pos = 0

741

pos = 0

742

while pos < len(expr):

742

while pos < len(expr):

743

q = expr.find('%', pos)

743

q = expr.find('%', pos)

744

if q < 0:

744

if q < 0:

745

ret.append((None, expr[pos:]))

745

ret.append((None, expr[pos:]))

746

break

746

break

747

ret.append((None, expr[pos:q]))

747

ret.append((None, expr[pos:q]))

748

pos = q + 1

748

pos = q + 1

749

try:

749

try:

750

d = expr[pos]

750

d = expr[pos]

751

except IndexError:

751

except IndexError:

752

raise error.ParseError(_('incomplete revspec format character'))

752

raise error.ParseError(_('incomplete revspec format character'))

753

if d == '%':

753

if d == '%':

754

ret.append((None, d))

754

ret.append((None, d))

755

pos += 1

755

pos += 1

756

continue

756

continue

757

758

try:

758

try:

759

arg = next(argiter)

759

arg = next(argiter)

760

except StopIteration:

760

except StopIteration:

761

raise error.ParseError(_('missing argument for revspec'))

761

raise error.ParseError(_('missing argument for revspec'))

762

f = _formatlistfuncs.get(d)

762

f = _formatlistfuncs.get(d)

763

if f:

763

if f:

764

# a list of some type, might be expensive, do not replace

764

# a list of some type, might be expensive, do not replace

765

pos += 1

765

pos += 1

766

islist = (d == 'l')

766

islist = (d == 'l')

767

try:

767

try:

768

d = expr[pos]

768

d = expr[pos]

769

except IndexError:

769

except IndexError:

770

raise error.ParseError(_('incomplete revspec format character'))

770

raise error.ParseError(_('incomplete revspec format character'))

771

if islist and d == 'd' and arg:

771

if islist and d == 'd' and arg:

772

# we don't create a baseset yet, because it come with an

772

# we don't create a baseset yet, because it come with an

773

# extra cost. If we are going to serialize it we better

773

# extra cost. If we are going to serialize it we better

774

# skip it.

774

# skip it.

775

ret.append(('baseset', arg))

775

ret.append(('baseset', arg))

776

pos += 1

776

pos += 1

777

continue

777

continue

778

try:

778

try:

779

ret.append((None, f(list(arg), d)))

779

ret.append((None, f(list(arg), d)))

780

except (TypeError, ValueError):

780

except (TypeError, ValueError):

781

raise error.ParseError(_('invalid argument for revspec'))

781

raise error.ParseError(_('invalid argument for revspec'))

782

else:

782

else:

783

# a single entry, not expensive, replace

783

# a single entry, not expensive, replace

784

try:

784

try:

785

ret.append((None, _formatargtype(d, arg)))

785

ret.append((None, _formatargtype(d, arg)))

786

except (TypeError, ValueError):

786

except (TypeError, ValueError):

787

raise error.ParseError(_('invalid argument for revspec'))

787

raise error.ParseError(_('invalid argument for revspec'))

788

pos += 1

788

pos += 1

789

790

try:

790

try:

791

next(argiter)

791

next(argiter)

792

raise error.ParseError(_('too many revspec arguments specified'))

792

raise error.ParseError(_('too many revspec arguments specified'))

793

except StopIteration:

793

except StopIteration:

794

pass

794

pass

795

return ret

795

return ret

796

797

def prettyformat(tree):

797

def prettyformat(tree):

798

return parser.prettyformat(tree, ('string', 'symbol'))

798

return parser.prettyformat(tree, ('string', 'symbol'))

799

800

def depth(tree):

800

def depth(tree):

801

if isinstance(tree, tuple):

801

if isinstance(tree, tuple):

802

return max(map(depth, tree)) + 1

802

return max(map(depth, tree)) + 1

803

else:

803

else:

804

return 0

804

return 0

805

806

def funcsused(tree):

806

def funcsused(tree):

807

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

807

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

808

return set()

808

return set()

809

else:

809

else:

810

funcs = set()

810

funcs = set()

811

for s in tree[1:]:

811

for s in tree[1:]:

812

funcs |= funcsused(s)

812

funcs |= funcsused(s)

813

if tree[0] == 'func':

813

if tree[0] == 'func':

814

funcs.add(tree[1][1])

814

funcs.add(tree[1][1])

815

return funcs

815

return funcs

816

817

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

817

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

818

819

def _ishashlikesymbol(symbol):

819

def _ishashlikesymbol(symbol):

820

"""returns true if the symbol looks like a hash"""

820

"""returns true if the symbol looks like a hash"""

821

return _hashre.match(symbol)

821

return _hashre.match(symbol)

822

823

def gethashlikesymbols(tree):

823

def gethashlikesymbols(tree):

824

"""returns the list of symbols of the tree that look like hashes

824

"""returns the list of symbols of the tree that look like hashes

825

826

>>> gethashlikesymbols(parse(b'3::abe3ff'))

826

>>> gethashlikesymbols(parse(b'3::abe3ff'))

827

['3', 'abe3ff']

827

['3', 'abe3ff']

828

>>> gethashlikesymbols(parse(b'precursors(.)'))

828

>>> gethashlikesymbols(parse(b'precursors(.)'))

829

[]

829

[]

830

>>> gethashlikesymbols(parse(b'precursors(34)'))

830

>>> gethashlikesymbols(parse(b'precursors(34)'))

831

['34']

831

['34']

832

>>> gethashlikesymbols(parse(b'abe3ffZ'))

832

>>> gethashlikesymbols(parse(b'abe3ffZ'))

833

[]

833

[]

834

"""

834

"""

835

if not tree:

835

if not tree:

836

return []

836

return []

837

838

if tree[0] == "symbol":

838

if tree[0] == "symbol":

839

if _ishashlikesymbol(tree[1]):

839

if _ishashlikesymbol(tree[1]):

840

return [tree[1]]

840

return [tree[1]]

841

elif len(tree) >= 3:

841

elif len(tree) >= 3:

842

results = []

842

results = []

843

for subtree in tree[1:]:

843

for subtree in tree[1:]:

844

results += gethashlikesymbols(subtree)

844

results += gethashlikesymbols(subtree)

845

return results

845

return results

846

return []

846

return []

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 smartset,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             symbols = {}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
-                string.ascii_letters.encode('ascii') +
+                pycompat.sysbytes(string.ascii_letters) +
-                string.digits.encode('ascii') +
+                pycompat.sysbytes(string.digits) +
                 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize(b"@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 if not isinstance(program, bytes):
                     raise error.ProgrammingError('revset statement must be bytes, got %r'
                                                  % program)
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, s)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = stringutil.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):
                 """Get [first, last] integer range (both inclusive) from a parsed tree
                 If any of the sides omitted, and if no default provided, ParseError will
                 be raised.
                 """
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     n = getinteger(x, err1)
                     return n, n
                 a, b = getrange(x, err1)
                 return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
             def _match(patspec, tree):
                 """Test if a tree matches the given pattern statement; return the matches
                 >>> _match(b'f(_)', parse(b'f()'))
                 >>> _match(b'f(_)', parse(b'f(1)'))
                 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
                 >>> _match(b'f(_)', parse(b'f(1, 2)'))
                 """
                 pattern = _cachedtree(patspec)
                 return parser.matchtree(pattern, tree, ('symbol', '_'),
                                         {'keyvalue', 'list'})
             def _matchonly(revs, bases):
                 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'dagrangeall':
                         return _fixops(('dagrangepost', post))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangeall':
                     raise error.ParseError(_("can't use '::' in this context"))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol', 'smartset'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x):
                 if x is None:
                     return 0, x
                 op = x[0]
                 if op in ('string', 'symbol', 'smartset'):
                     return 0.5, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     w = min(wa, wb)
                     # (draft/secret/_notpublic() & ::x) have a fast path
                     m = _match('_() & ancestors(_)', ('and', ta, tb))
                     if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
                         return w, _build('_phaseandancestors(_, _)', m[1], m[2])
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
                     m = _match('not _', tb)
                     if m:
                         return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if _match('public()', x[1]):
                         o = _optimize(_build('_notpublic()'))
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1])
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return 1, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1])
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2])
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2])
                     w = getattr(symbols.get(f), '_weight', 1)
                     m = _match('commonancestors(_)', ta)
                     # Optimize heads(commonancestors(_)) because we have a fast version
                     if f == 'heads' and m:
                         return w + wa, _build('_commonancestorheads(_)', m[1])
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | {'$'}
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith(b'$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith(b'foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 if lookup and spec.startswith('revset(') and spec.endswith(')'):
                     lookup = None
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if (not isinstance(tree, tuple)
                     or tree[0] in ('string', 'symbol', 'smartset')):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 try:
                     return _parsewith(spec, lookup=lookup)
                 except error.ParseError as inst:
                     if len(inst.args) > 1:  # has location
                         loc = inst.args[1]
                         # Remove newlines -- spaces are equivalent whitespace.
                         spec = spec.replace('\n', ' ')
                         # We want the caret to point to the place in the template that
                         # failed to parse, but in a hint we get a open paren at the
                         # start. Therefore, we print "loc + 1" spaces (instead of "loc")
                         # to line up the caret with the location of the error.
                         inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
                     raise
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote(b'asdf')
                 "'asdf'"
                 >>> _quote(b"asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote(b'asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
             def _formatargtype(c, arg):
                 if c == 'd':
                     return '_rev(%d)' % int(arg)
                 elif c == 's':
                     return _quote(arg)
                 elif c == 'r':
                     if not isinstance(arg, bytes):
                         raise TypeError
                     parse(arg) # make sure syntax errors are confined
                     return '(%s)' % arg
                 elif c == 'n':
                     return _quote(node.hex(arg))
                 elif c == 'b':
                     try:
                         return _quote(arg.branch())
                     except AttributeError:
                         raise TypeError
                 raise error.ParseError(_('unexpected revspec format character %s') % c)
             def _formatlistexp(s, t):
                 l = len(s)
                 if l == 0:
                     return "_list('')"
                 elif l == 1:
                     return _formatargtype(t, s[0])
                 elif t == 'd':
                     return _formatintlist(s)
                 elif t == 's':
                     return "_list(%s)" % _quote("\0".join(s))
                 elif t == 'n':
                     return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                 elif t == 'b':
                     try:
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     except AttributeError:
                         raise TypeError
                 m = l // 2
                 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
             def _formatintlist(data):
                 try:
                     l = len(data)
                     if l == 0:
                         return "_list('')"
                     elif l == 1:
                         return _formatargtype('d', data[0])
                     return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
                 except (TypeError, ValueError):
                     raise error.ParseError(_('invalid argument for revspec'))
             def _formatparamexp(args, t):
                 return ', '.join(_formatargtype(t, a) for a in args)
             _formatlistfuncs = {
                 'l': _formatlistexp,
                 'p': _formatparamexp,
             }
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = rev(int(arg)), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type,
                 and 'p' specifies a list of function parameters of that type.
                 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec(b'%d:: and not %d::', 10, 20)
                 '_rev(10):: and not _rev(20)::'
                 >>> formatspec(b'%ld or %ld', [], [1])
                 "_list('') or _rev(1)"
                 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: b'default'
                 >>> b.branch = b
                 >>> formatspec(b'branch(%b)', b)
                 "branch('default')"
                 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
                 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
                 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
                 "sort((:), 'desc', 'user')"
                 >>> formatspec(b'%ls', [b'a', b"'"])
                 "_list('a\\\\x00\\\\'')"
                 '''
                 parsed = _parseargs(expr, args)
                 ret = []
                 for t, arg in parsed:
                     if t is None:
                         ret.append(arg)
                     elif t == 'baseset':
                         if isinstance(arg, set):
                             arg = sorted(arg)
                         ret.append(_formatintlist(list(arg)))
                     else:
                         raise error.ProgrammingError("unknown revspec item type: %r" % t)
                 return b''.join(ret)
             def spectree(expr, *args):
                 """similar to formatspec but return a parsed and optimized tree"""
                 parsed = _parseargs(expr, args)
                 ret = []
                 inputs = []
                 for t, arg in parsed:
                     if t is None:
                         ret.append(arg)
                     elif t == 'baseset':
                         newtree = ('smartset', smartset.baseset(arg))
                         inputs.append(newtree)
                         ret.append("$")
                     else:
                         raise error.ProgrammingError("unknown revspec item type: %r" % t)
                 expr = b''.join(ret)
                 tree = _parsewith(expr, syminitletters=_aliassyminitletters)
                 tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
                 tree = foldconcat(tree)
                 tree = analyze(tree)
                 tree = optimize(tree)
                 return tree
             def _parseargs(expr, args):
                 """parse the expression and replace all inexpensive args
                 return a list of tuple [(arg-type, arg-value)]
                 Arg-type can be:
                 * None:      a string ready to be concatenated into a final spec
                 * 'baseset': an iterable of revisions
                 """
                 expr = pycompat.bytestr(expr)
                 argiter = iter(args)
                 ret = []
                 pos = 0
                 while pos < len(expr):
                     q = expr.find('%', pos)
                     if q < 0:
                         ret.append((None, expr[pos:]))
                         break
                     ret.append((None, expr[pos:q]))
                     pos = q + 1
                     try:
                         d = expr[pos]
                     except IndexError:
                         raise error.ParseError(_('incomplete revspec format character'))
                     if d == '%':
                         ret.append((None, d))
                         pos += 1
                         continue
                     try:
                         arg = next(argiter)
                     except StopIteration:
                         raise error.ParseError(_('missing argument for revspec'))
                     f = _formatlistfuncs.get(d)
                     if f:
                         # a list of some type, might be expensive, do not replace
                         pos += 1
                         islist = (d == 'l')
                         try:
                             d = expr[pos]
                         except IndexError:
                             raise error.ParseError(_('incomplete revspec format character'))
                         if islist and d == 'd' and arg:
                             # we don't create a baseset yet, because it come with an
                             # extra cost. If we are going to serialize it we better
                             # skip it.
                             ret.append(('baseset', arg))
                             pos += 1
                             continue
                         try:
                             ret.append((None, f(list(arg), d)))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     else:
                         # a single entry, not expensive, replace
                         try:
                             ret.append((None, _formatargtype(d, arg)))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     pos += 1
                 try:
                     next(argiter)
                     raise error.ParseError(_('too many revspec arguments specified'))
                 except StopIteration:
                     pass
                 return ret
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs
             _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
             def _ishashlikesymbol(symbol):
                 """returns true if the symbol looks like a hash"""
                 return _hashre.match(symbol)
             def gethashlikesymbols(tree):
                 """returns the list of symbols of the tree that look like hashes
                 >>> gethashlikesymbols(parse(b'3::abe3ff'))
                 ['3', 'abe3ff']
                 >>> gethashlikesymbols(parse(b'precursors(.)'))
                 []
                 >>> gethashlikesymbols(parse(b'precursors(34)'))
                 ['34']
                 >>> gethashlikesymbols(parse(b'abe3ffZ'))
                 []
                 """
                 if not tree:
                     return []
                 if tree[0] == "symbol":
                     if _ishashlikesymbol(tree[1]):
                         return [tree[1]]
                 elif len(tree) >= 3:
                     results = []
                     for subtree in tree[1:]:
                         results += gethashlikesymbols(subtree)
                     return results
                 return []