upstream/mercurial-mirror Commit - r41257:73203cdf

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

smartset,

18

util,

19

util,

19

)

20

)

20

from .utils import (

21

from .utils import (

21

stringutil,

22

stringutil,

22

)

23

)

23

24

elements = {

25

elements = {

25

# token-type: binding-strength, primary, prefix, infix, suffix

26

# token-type: binding-strength, primary, prefix, infix, suffix

26

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

27

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

27

"[": (21, None, None, ("subscript", 1, "]"), None),

28

"[": (21, None, None, ("subscript", 1, "]"), None),

28

"#": (21, None, None, ("relation", 21), None),

29

"#": (21, None, None, ("relation", 21), None),

29

"##": (20, None, None, ("_concat", 20), None),

30

"##": (20, None, None, ("_concat", 20), None),

30

"~": (18, None, None, ("ancestor", 18), None),

31

"~": (18, None, None, ("ancestor", 18), None),

31

"^": (18, None, None, ("parent", 18), "parentpost"),

32

"^": (18, None, None, ("parent", 18), "parentpost"),

32

"-": (5, None, ("negate", 19), ("minus", 5), None),

33

"-": (5, None, ("negate", 19), ("minus", 5), None),

33

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

34

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

34

"dagrangepost"),

35

"dagrangepost"),

35

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

36

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

36

"dagrangepost"),

37

"dagrangepost"),

37

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

38

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

38

"not": (10, None, ("not", 10), None, None),

39

"not": (10, None, ("not", 10), None, None),

39

"!": (10, None, ("not", 10), None, None),

40

"!": (10, None, ("not", 10), None, None),

40

"and": (5, None, None, ("and", 5), None),

41

"and": (5, None, None, ("and", 5), None),

41

"&": (5, None, None, ("and", 5), None),

42

"&": (5, None, None, ("and", 5), None),

42

"%": (5, None, None, ("only", 5), "onlypost"),

43

"%": (5, None, None, ("only", 5), "onlypost"),

43

"or": (4, None, None, ("or", 4), None),

44

"or": (4, None, None, ("or", 4), None),

44

"|": (4, None, None, ("or", 4), None),

45

"|": (4, None, None, ("or", 4), None),

45

"+": (4, None, None, ("or", 4), None),

46

"+": (4, None, None, ("or", 4), None),

46

"=": (3, None, None, ("keyvalue", 3), None),

47

"=": (3, None, None, ("keyvalue", 3), None),

47

",": (2, None, None, ("list", 2), None),

48

",": (2, None, None, ("list", 2), None),

48

")": (0, None, None, None, None),

49

")": (0, None, None, None, None),

49

"]": (0, None, None, None, None),

50

"]": (0, None, None, None, None),

50

"symbol": (0, "symbol", None, None, None),

51

"symbol": (0, "symbol", None, None, None),

51

"string": (0, "string", None, None, None),

52

"string": (0, "string", None, None, None),

52

"end": (0, None, None, None, None),

53

"end": (0, None, None, None, None),

53

}

54

}

54

55

keywords = {'and', 'or', 'not'}

56

keywords = {'and', 'or', 'not'}

56

57

symbols = {}

58

symbols = {}

58

59

_quoteletters = {'"', "'"}

60

_quoteletters = {'"', "'"}

60

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

61

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

61

62

# default set of valid characters for the initial letter of symbols

63

# default set of valid characters for the initial letter of symbols

63

_syminitletters = set(pycompat.iterbytestr(

64

_syminitletters = set(pycompat.iterbytestr(

64

string.ascii_letters.encode('ascii') +

65

string.ascii_letters.encode('ascii') +

65

string.digits.encode('ascii') +

66

string.digits.encode('ascii') +

66

'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))

67

'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))

67

68

# default set of valid characters for non-initial letters of symbols

69

# default set of valid characters for non-initial letters of symbols

69

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

70

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

70

71

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

72

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

72

'''

73

'''

73

Parse a revset statement into a stream of tokens

74

Parse a revset statement into a stream of tokens

74

75

``syminitletters`` is the set of valid characters for the initial

76

``syminitletters`` is the set of valid characters for the initial

76

letter of symbols.

77

letter of symbols.

77

78

By default, character ``c`` is recognized as valid for initial

79

By default, character ``c`` is recognized as valid for initial

79

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

80

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

80

81

``symletters`` is the set of valid characters for non-initial

82

``symletters`` is the set of valid characters for non-initial

82

letters of symbols.

83

letters of symbols.

83

84

By default, character ``c`` is recognized as valid for non-initial

85

By default, character ``c`` is recognized as valid for non-initial

85

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

86

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

86

87

Check that @ is a valid unquoted token character (issue3686):

88

Check that @ is a valid unquoted token character (issue3686):

88

>>> list(tokenize(b"@::"))

89

>>> list(tokenize(b"@::"))

89

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

90

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

90

91

'''

92

'''

92

if not isinstance(program, bytes):

93

if not isinstance(program, bytes):

93

raise error.ProgrammingError('revset statement must be bytes, got %r'

94

raise error.ProgrammingError('revset statement must be bytes, got %r'

94

% program)

95

% program)

95

program = pycompat.bytestr(program)

96

program = pycompat.bytestr(program)

96

if syminitletters is None:

97

if syminitletters is None:

97

syminitletters = _syminitletters

98

syminitletters = _syminitletters

98

if symletters is None:

99

if symletters is None:

99

symletters = _symletters

100

symletters = _symletters

100

101

if program and lookup:

102

if program and lookup:

102

# attempt to parse old-style ranges first to deal with

103

# attempt to parse old-style ranges first to deal with

103

# things like old-tag which contain query metacharacters

104

# things like old-tag which contain query metacharacters

104

parts = program.split(':', 1)

105

parts = program.split(':', 1)

105

if all(lookup(sym) for sym in parts if sym):

106

if all(lookup(sym) for sym in parts if sym):

106

if parts[0]:

107

if parts[0]:

107

yield ('symbol', parts[0], 0)

108

yield ('symbol', parts[0], 0)

108

if len(parts) > 1:

109

if len(parts) > 1:

109

s = len(parts[0])

110

s = len(parts[0])

110

yield (':', None, s)

111

yield (':', None, s)

111

if parts[1]:

112

if parts[1]:

112

yield ('symbol', parts[1], s + 1)

113

yield ('symbol', parts[1], s + 1)

113

yield ('end', None, len(program))

114

yield ('end', None, len(program))

114

return

115

return

115

116

pos, l = 0, len(program)

117

pos, l = 0, len(program)

117

while pos < l:

118

while pos < l:

118

c = program[pos]

119

c = program[pos]

119

if c.isspace(): # skip inter-token whitespace

120

if c.isspace(): # skip inter-token whitespace

120

pass

121

pass

121

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

122

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

122

yield ('::', None, pos)

123

yield ('::', None, pos)

123

pos += 1 # skip ahead

124

pos += 1 # skip ahead

124

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

125

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

125

yield ('..', None, pos)

126

yield ('..', None, pos)

126

pos += 1 # skip ahead

127

pos += 1 # skip ahead

127

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

128

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

128

yield ('##', None, pos)

129

yield ('##', None, pos)

129

pos += 1 # skip ahead

130

pos += 1 # skip ahead

130

elif c in _simpleopletters: # handle simple operators

131

elif c in _simpleopletters: # handle simple operators

131

yield (c, None, pos)

132

yield (c, None, pos)

132

elif (c in _quoteletters or c == 'r' and

133

elif (c in _quoteletters or c == 'r' and

133

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

134

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

134

if c == 'r':

135

if c == 'r':

135

pos += 1

136

pos += 1

136

c = program[pos]

137

c = program[pos]

137

decode = lambda x: x

138

decode = lambda x: x

138

else:

139

else:

139

decode = parser.unescapestr

140

decode = parser.unescapestr

140

pos += 1

141

pos += 1

141

s = pos

142

s = pos

142

while pos < l: # find closing quote

143

while pos < l: # find closing quote

143

d = program[pos]

144

d = program[pos]

144

if d == '\\': # skip over escaped characters

145

if d == '\\': # skip over escaped characters

145

pos += 2

146

pos += 2

146

continue

147

continue

147

if d == c:

148

if d == c:

148

yield ('string', decode(program[s:pos]), s)

149

yield ('string', decode(program[s:pos]), s)

149

break

150

break

150

pos += 1

151

pos += 1

151

else:

152

else:

152

raise error.ParseError(_("unterminated string"), s)

153

raise error.ParseError(_("unterminated string"), s)

153

# gather up a symbol/keyword

154

# gather up a symbol/keyword

154

elif c in syminitletters:

155

elif c in syminitletters:

155

s = pos

156

s = pos

156

pos += 1

157

pos += 1

157

while pos < l: # find end of symbol

158

while pos < l: # find end of symbol

158

d = program[pos]

159

d = program[pos]

159

if d not in symletters:

160

if d not in symletters:

160

break

161

break

161

if d == '.' and program[pos - 1] == '.': # special case for ..

162

if d == '.' and program[pos - 1] == '.': # special case for ..

162

pos -= 1

163

pos -= 1

163

break

164

break

164

pos += 1

165

pos += 1

165

sym = program[s:pos]

166

sym = program[s:pos]

166

if sym in keywords: # operator keywords

167

if sym in keywords: # operator keywords

167

yield (sym, None, s)

168

yield (sym, None, s)

168

elif '-' in sym:

169

elif '-' in sym:

169

# some jerk gave us foo-bar-baz, try to check if it's a symbol

170

# some jerk gave us foo-bar-baz, try to check if it's a symbol

170

if lookup and lookup(sym):

171

if lookup and lookup(sym):

171

# looks like a real symbol

172

# looks like a real symbol

172

yield ('symbol', sym, s)

173

yield ('symbol', sym, s)

173

else:

174

else:

174

# looks like an expression

175

# looks like an expression

175

parts = sym.split('-')

176

parts = sym.split('-')

176

for p in parts[:-1]:

177

for p in parts[:-1]:

177

if p: # possible consecutive -

178

if p: # possible consecutive -

178

yield ('symbol', p, s)

179

yield ('symbol', p, s)

179

s += len(p)

180

s += len(p)

180

yield ('-', None, s)

181

yield ('-', None, s)

181

s += 1

182

s += 1

182

if parts[-1]: # possible trailing -

183

if parts[-1]: # possible trailing -

183

yield ('symbol', parts[-1], s)

184

yield ('symbol', parts[-1], s)

184

else:

185

else:

185

yield ('symbol', sym, s)

186

yield ('symbol', sym, s)

186

pos -= 1

187

pos -= 1

187

else:

188

else:

188

raise error.ParseError(_("syntax error in revset '%s'") %

189

raise error.ParseError(_("syntax error in revset '%s'") %

189

program, pos)

190

program, pos)

190

pos += 1

191

pos += 1

191

yield ('end', None, pos)

192

yield ('end', None, pos)

192

193

# helpers

194

# helpers

194

195

_notset = object()

196

_notset = object()

196

197

def getsymbol(x):

198

def getsymbol(x):

198

if x and x[0] == 'symbol':

199

if x and x[0] == 'symbol':

199

return x[1]

200

return x[1]

200

raise error.ParseError(_('not a symbol'))

201

raise error.ParseError(_('not a symbol'))

201

202

def getstring(x, err):

203

def getstring(x, err):

203

if x and (x[0] == 'string' or x[0] == 'symbol'):

204

if x and (x[0] == 'string' or x[0] == 'symbol'):

204

return x[1]

205

return x[1]

205

raise error.ParseError(err)

206

raise error.ParseError(err)

206

207

def getinteger(x, err, default=_notset):

208

def getinteger(x, err, default=_notset):

208

if not x and default is not _notset:

209

if not x and default is not _notset:

209

return default

210

return default

210

try:

211

try:

211

return int(getstring(x, err))

212

return int(getstring(x, err))

212

except ValueError:

213

except ValueError:

213

raise error.ParseError(err)

214

raise error.ParseError(err)

214

215

def getboolean(x, err):

216

def getboolean(x, err):

216

value = stringutil.parsebool(getsymbol(x))

217

value = stringutil.parsebool(getsymbol(x))

217

if value is not None:

218

if value is not None:

218

return value

219

return value

219

raise error.ParseError(err)

220

raise error.ParseError(err)

220

221

def getlist(x):

222

def getlist(x):

222

if not x:

223

if not x:

223

return []

224

return []

224

if x[0] == 'list':

225

if x[0] == 'list':

225

return list(x[1:])

226

return list(x[1:])

226

return [x]

227

return [x]

227

228

def getrange(x, err):

229

def getrange(x, err):

229

if not x:

230

if not x:

230

raise error.ParseError(err)

231

raise error.ParseError(err)

231

op = x[0]

232

op = x[0]

232

if op == 'range':

233

if op == 'range':

233

return x[1], x[2]

234

return x[1], x[2]

234

elif op == 'rangepre':

235

elif op == 'rangepre':

235

return None, x[1]

236

return None, x[1]

236

elif op == 'rangepost':

237

elif op == 'rangepost':

237

return x[1], None

238

return x[1], None

238

elif op == 'rangeall':

239

elif op == 'rangeall':

239

return None, None

240

return None, None

240

raise error.ParseError(err)

241

raise error.ParseError(err)

241

242

def getargs(x, min, max, err):

243

def getargs(x, min, max, err):

243

l = getlist(x)

244

l = getlist(x)

244

if len(l) < min or (max >= 0 and len(l) > max):

245

if len(l) < min or (max >= 0 and len(l) > max):

245

raise error.ParseError(err)

246

raise error.ParseError(err)

246

return l

247

return l

247

248

def getargsdict(x, funcname, keys):

249

def getargsdict(x, funcname, keys):

249

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

250

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

250

keyvaluenode='keyvalue', keynode='symbol')

251

keyvaluenode='keyvalue', keynode='symbol')

251

252

# cache of {spec: raw parsed tree} built internally

253

# cache of {spec: raw parsed tree} built internally

253

_treecache = {}

254

_treecache = {}

254

255

def _cachedtree(spec):

256

def _cachedtree(spec):

256

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

257

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

257

tree = _treecache.get(spec)

258

tree = _treecache.get(spec)

258

if tree is None:

259

if tree is None:

259

_treecache[spec] = tree = parse(spec)

260

_treecache[spec] = tree = parse(spec)

260

return tree

261

return tree

261

262

def _build(tmplspec, *repls):

263

def _build(tmplspec, *repls):

263

"""Create raw parsed tree from a template revset statement

264

"""Create raw parsed tree from a template revset statement

264

265

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

266

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

266

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

267

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

267

"""

268

"""

268

template = _cachedtree(tmplspec)

269

template = _cachedtree(tmplspec)

269

return parser.buildtree(template, ('symbol', '_'), *repls)

270

return parser.buildtree(template, ('symbol', '_'), *repls)

270

271

def _match(patspec, tree):

272

def _match(patspec, tree):

272

"""Test if a tree matches the given pattern statement; return the matches

273

"""Test if a tree matches the given pattern statement; return the matches

273

274

>>> _match(b'f(_)', parse(b'f()'))

275

>>> _match(b'f(_)', parse(b'f()'))

275

>>> _match(b'f(_)', parse(b'f(1)'))

276

>>> _match(b'f(_)', parse(b'f(1)'))

276

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

277

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

277

>>> _match(b'f(_)', parse(b'f(1, 2)'))

278

>>> _match(b'f(_)', parse(b'f(1, 2)'))

278

"""

279

"""

279

pattern = _cachedtree(patspec)

280

pattern = _cachedtree(patspec)

280

return parser.matchtree(pattern, tree, ('symbol', '_'),

281

return parser.matchtree(pattern, tree, ('symbol', '_'),

281

{'keyvalue', 'list'})

282

{'keyvalue', 'list'})

282

283

def _matchonly(revs, bases):

284

def _matchonly(revs, bases):

284

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

285

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

285

286

def _fixops(x):

287

def _fixops(x):

287

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

288

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

288

handled well by our simple top-down parser"""

289

handled well by our simple top-down parser"""

289

if not isinstance(x, tuple):

290

if not isinstance(x, tuple):

290

return x

291

return x

291

292

op = x[0]

293

op = x[0]

293

if op == 'parent':

294

if op == 'parent':

294

# x^:y means (x^) : y, not x ^ (:y)

295

# x^:y means (x^) : y, not x ^ (:y)

295

# x^: means (x^) :, not x ^ (:)

296

# x^: means (x^) :, not x ^ (:)

296

post = ('parentpost', x[1])

297

post = ('parentpost', x[1])

297

if x[2][0] == 'dagrangepre':

298

if x[2][0] == 'dagrangepre':

298

return _fixops(('dagrange', post, x[2][1]))

299

return _fixops(('dagrange', post, x[2][1]))

299

elif x[2][0] == 'dagrangeall':

300

elif x[2][0] == 'dagrangeall':

300

return _fixops(('dagrangepost', post))

301

return _fixops(('dagrangepost', post))

301

elif x[2][0] == 'rangepre':

302

elif x[2][0] == 'rangepre':

302

return _fixops(('range', post, x[2][1]))

303

return _fixops(('range', post, x[2][1]))

303

elif x[2][0] == 'rangeall':

304

elif x[2][0] == 'rangeall':

304

return _fixops(('rangepost', post))

305

return _fixops(('rangepost', post))

305

elif op == 'or':

306

elif op == 'or':

306

# make number of arguments deterministic:

307

# make number of arguments deterministic:

307

# x + y + z -> (or x y z) -> (or (list x y z))

308

# x + y + z -> (or x y z) -> (or (list x y z))

308

return (op, _fixops(('list',) + x[1:]))

309

return (op, _fixops(('list',) + x[1:]))

309

elif op == 'subscript' and x[1][0] == 'relation':

310

elif op == 'subscript' and x[1][0] == 'relation':

310

# x#y[z] ternary

311

# x#y[z] ternary

311

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

312

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

312

313

return (op,) + tuple(_fixops(y) for y in x[1:])

314

return (op,) + tuple(_fixops(y) for y in x[1:])

314

315

def _analyze(x):

316

def _analyze(x):

316

if x is None:

317

if x is None:

317

return x

318

return x

318

319

op = x[0]

320

op = x[0]

320

if op == 'minus':

321

if op == 'minus':

321

return _analyze(_build('_ and not _', *x[1:]))

322

return _analyze(_build('_ and not _', *x[1:]))

322

elif op == 'only':

323

elif op == 'only':

323

return _analyze(_build('only(_, _)', *x[1:]))

324

return _analyze(_build('only(_, _)', *x[1:]))

324

elif op == 'onlypost':

325

elif op == 'onlypost':

325

return _analyze(_build('only(_)', x[1]))

326

return _analyze(_build('only(_)', x[1]))

326

elif op == 'dagrangeall':

327

elif op == 'dagrangeall':

327

raise error.ParseError(_("can't use '::' in this context"))

328

raise error.ParseError(_("can't use '::' in this context"))

328

elif op == 'dagrangepre':

329

elif op == 'dagrangepre':

329

return _analyze(_build('ancestors(_)', x[1]))

330

return _analyze(_build('ancestors(_)', x[1]))

330

elif op == 'dagrangepost':

331

elif op == 'dagrangepost':

331

return _analyze(_build('descendants(_)', x[1]))

332

return _analyze(_build('descendants(_)', x[1]))

332

elif op == 'negate':

333

elif op == 'negate':

333

s = getstring(x[1], _("can't negate that"))

334

s = getstring(x[1], _("can't negate that"))

334

return _analyze(('string', '-' + s))

335

return _analyze(('string', '-' + s))

335

elif op in ('string', 'symbol'):

336

elif op in ('string', 'symbol'):

336

return x

337

return x

337

elif op == 'rangeall':

338

elif op == 'rangeall':

338

return (op, None)

339

return (op, None)

339

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

340

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

340

return (op, _analyze(x[1]))

341

return (op, _analyze(x[1]))

341

elif op == 'group':

342

elif op == 'group':

342

return _analyze(x[1])

343

return _analyze(x[1])

343

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

344

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

344

'subscript'}:

345

'subscript'}:

345

ta = _analyze(x[1])

346

ta = _analyze(x[1])

346

tb = _analyze(x[2])

347

tb = _analyze(x[2])

347

return (op, ta, tb)

348

return (op, ta, tb)

348

elif op == 'relsubscript':

349

elif op == 'relsubscript':

349

ta = _analyze(x[1])

350

ta = _analyze(x[1])

350

tb = _analyze(x[2])

351

tb = _analyze(x[2])

351

tc = _analyze(x[3])

352

tc = _analyze(x[3])

352

return (op, ta, tb, tc)

353

return (op, ta, tb, tc)

353

elif op == 'list':

354

elif op == 'list':

354

return (op,) + tuple(_analyze(y) for y in x[1:])

355

return (op,) + tuple(_analyze(y) for y in x[1:])

355

elif op == 'keyvalue':

356

elif op == 'keyvalue':

356

return (op, x[1], _analyze(x[2]))

357

return (op, x[1], _analyze(x[2]))

357

elif op == 'func':

358

elif op == 'func':

358

return (op, x[1], _analyze(x[2]))

359

return (op, x[1], _analyze(x[2]))

359

raise ValueError('invalid operator %r' % op)

360

raise ValueError('invalid operator %r' % op)

360

361

def analyze(x):

362

def analyze(x):

362

"""Transform raw parsed tree to evaluatable tree which can be fed to

363

"""Transform raw parsed tree to evaluatable tree which can be fed to

363

optimize() or getset()

364

optimize() or getset()

364

365

All pseudo operations should be mapped to real operations or functions

366

All pseudo operations should be mapped to real operations or functions

366

defined in methods or symbols table respectively.

367

defined in methods or symbols table respectively.

367

"""

368

"""

368

return _analyze(x)

369

return _analyze(x)

369

370

def _optimize(x):

371

def _optimize(x):

371

if x is None:

372

if x is None:

372

return 0, x

373

return 0, x

373

374

op = x[0]

375

op = x[0]

375

if op in ('string', 'symbol'):

376

if op in ('string', 'symbol'):

376

return 0.5, x # single revisions are small

377

return 0.5, x # single revisions are small

377

elif op == 'and':

378

elif op == 'and':

378

wa, ta = _optimize(x[1])

379

wa, ta = _optimize(x[1])

379

wb, tb = _optimize(x[2])

380

wb, tb = _optimize(x[2])

380

w = min(wa, wb)

381

w = min(wa, wb)

381

382

# (draft/secret/_notpublic() & ::x) have a fast path

383

# (draft/secret/_notpublic() & ::x) have a fast path

383

m = _match('_() & ancestors(_)', ('and', ta, tb))

384

m = _match('_() & ancestors(_)', ('and', ta, tb))

384

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

385

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

385

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

386

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

386

387

# (::x and not ::y)/(not ::y and ::x) have a fast path

388

# (::x and not ::y)/(not ::y and ::x) have a fast path

388

m = _matchonly(ta, tb) or _matchonly(tb, ta)

389

m = _matchonly(ta, tb) or _matchonly(tb, ta)

389

if m:

390

if m:

390

return w, _build('only(_, _)', *m[1:])

391

return w, _build('only(_, _)', *m[1:])

391

392

m = _match('not _', tb)

393

m = _match('not _', tb)

393

if m:

394

if m:

394

return wa, ('difference', ta, m[1])

395

return wa, ('difference', ta, m[1])

395

if wa > wb:

396

if wa > wb:

396

op = 'andsmally'

397

op = 'andsmally'

397

return w, (op, ta, tb)

398

return w, (op, ta, tb)

398

elif op == 'or':

399

elif op == 'or':

399

# fast path for machine-generated expression, that is likely to have

400

# fast path for machine-generated expression, that is likely to have

400

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

401

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

401

ws, ts, ss = [], [], []

402

ws, ts, ss = [], [], []

402

def flushss():

403

def flushss():

403

if not ss:

404

if not ss:

404

return

405

return

405

if len(ss) == 1:

406

if len(ss) == 1:

406

w, t = ss[0]

407

w, t = ss[0]

407

else:

408

else:

408

s = '\0'.join(t[1] for w, t in ss)

409

s = '\0'.join(t[1] for w, t in ss)

409

y = _build('_list(_)', ('string', s))

410

y = _build('_list(_)', ('string', s))

410

w, t = _optimize(y)

411

w, t = _optimize(y)

411

ws.append(w)

412

ws.append(w)

412

ts.append(t)

413

ts.append(t)

413

del ss[:]

414

del ss[:]

414

for y in getlist(x[1]):

415

for y in getlist(x[1]):

415

w, t = _optimize(y)

416

w, t = _optimize(y)

416

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

417

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

417

ss.append((w, t))

418

ss.append((w, t))

418

continue

419

continue

419

flushss()

420

flushss()

420

ws.append(w)

421

ws.append(w)

421

ts.append(t)

422

ts.append(t)

422

flushss()

423

flushss()

423

if len(ts) == 1:

424

if len(ts) == 1:

424

return ws[0], ts[0] # 'or' operation is fully optimized out

425

return ws[0], ts[0] # 'or' operation is fully optimized out

425

return max(ws), (op, ('list',) + tuple(ts))

426

return max(ws), (op, ('list',) + tuple(ts))

426

elif op == 'not':

427

elif op == 'not':

427

# Optimize not public() to _notpublic() because we have a fast version

428

# Optimize not public() to _notpublic() because we have a fast version

428

if _match('public()', x[1]):

429

if _match('public()', x[1]):

429

o = _optimize(_build('_notpublic()'))

430

o = _optimize(_build('_notpublic()'))

430

return o[0], o[1]

431

return o[0], o[1]

431

else:

432

else:

432

o = _optimize(x[1])

433

o = _optimize(x[1])

433

return o[0], (op, o[1])

434

return o[0], (op, o[1])

434

elif op == 'rangeall':

435

elif op == 'rangeall':

435

return 1, x

436

return 1, x

436

elif op in ('rangepre', 'rangepost', 'parentpost'):

437

elif op in ('rangepre', 'rangepost', 'parentpost'):

437

o = _optimize(x[1])

438

o = _optimize(x[1])

438

return o[0], (op, o[1])

439

return o[0], (op, o[1])

439

elif op in ('dagrange', 'range'):

440

elif op in ('dagrange', 'range'):

440

wa, ta = _optimize(x[1])

441

wa, ta = _optimize(x[1])

441

wb, tb = _optimize(x[2])

442

wb, tb = _optimize(x[2])

442

return wa + wb, (op, ta, tb)

443

return wa + wb, (op, ta, tb)

443

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

444

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

444

w, t = _optimize(x[1])

445

w, t = _optimize(x[1])

445

return w, (op, t, x[2])

446

return w, (op, t, x[2])

446

elif op == 'relsubscript':

447

elif op == 'relsubscript':

447

w, t = _optimize(x[1])

448

w, t = _optimize(x[1])

448

return w, (op, t, x[2], x[3])

449

return w, (op, t, x[2], x[3])

449

elif op == 'list':

450

elif op == 'list':

450

ws, ts = zip(*(_optimize(y) for y in x[1:]))

451

ws, ts = zip(*(_optimize(y) for y in x[1:]))

451

return sum(ws), (op,) + ts

452

return sum(ws), (op,) + ts

452

elif op == 'keyvalue':

453

elif op == 'keyvalue':

453

w, t = _optimize(x[2])

454

w, t = _optimize(x[2])

454

return w, (op, x[1], t)

455

return w, (op, x[1], t)

455

elif op == 'func':

456

elif op == 'func':

456

f = getsymbol(x[1])

457

f = getsymbol(x[1])

457

wa, ta = _optimize(x[2])

458

wa, ta = _optimize(x[2])

458

w = getattr(symbols.get(f), '_weight', 1)

459

w = getattr(symbols.get(f), '_weight', 1)

459

m = _match('commonancestors(_)', ta)

460

m = _match('commonancestors(_)', ta)

460

461

# Optimize heads(commonancestors(_)) because we have a fast version

462

# Optimize heads(commonancestors(_)) because we have a fast version

462

if f == 'heads' and m:

463

if f == 'heads' and m:

463

return w + wa, _build('_commonancestorheads(_)', m[1])

464

return w + wa, _build('_commonancestorheads(_)', m[1])

464

465

return w + wa, (op, x[1], ta)

466

return w + wa, (op, x[1], ta)

466

raise ValueError('invalid operator %r' % op)

467

raise ValueError('invalid operator %r' % op)

467

468

def optimize(tree):

469

def optimize(tree):

469

"""Optimize evaluatable tree

470

"""Optimize evaluatable tree

470

471

All pseudo operations should be transformed beforehand.

472

All pseudo operations should be transformed beforehand.

472

"""

473

"""

473

_weight, newtree = _optimize(tree)

474

_weight, newtree = _optimize(tree)

474

return newtree

475

return newtree

475

476

# the set of valid characters for the initial letter of symbols in

477

# the set of valid characters for the initial letter of symbols in

477

# alias declarations and definitions

478

# alias declarations and definitions

478

_aliassyminitletters = _syminitletters | {'$'}

479

_aliassyminitletters = _syminitletters | {'$'}

479

480

def _parsewith(spec, lookup=None, syminitletters=None):

481

def _parsewith(spec, lookup=None, syminitletters=None):

481

"""Generate a parse tree of given spec with given tokenizing options

482

"""Generate a parse tree of given spec with given tokenizing options

482

483

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

484

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

484

('func', ('symbol', 'foo'), ('symbol', '$1'))

485

('func', ('symbol', 'foo'), ('symbol', '$1'))

485

>>> _parsewith(b'$1')

486

>>> _parsewith(b'$1')

486

Traceback (most recent call last):

487

Traceback (most recent call last):

487

...

488

...

488

ParseError: ("syntax error in revset '$1'", 0)

489

ParseError: ("syntax error in revset '$1'", 0)

489

>>> _parsewith(b'foo bar')

490

>>> _parsewith(b'foo bar')

490

Traceback (most recent call last):

491

Traceback (most recent call last):

491

...

492

...

492

ParseError: ('invalid token', 4)

493

ParseError: ('invalid token', 4)

493

"""

494

"""

494

if lookup and spec.startswith('revset(') and spec.endswith(')'):

495

if lookup and spec.startswith('revset(') and spec.endswith(')'):

495

lookup = None

496

lookup = None

496

p = parser.parser(elements)

497

p = parser.parser(elements)

497

tree, pos = p.parse(tokenize(spec, lookup=lookup,

498

tree, pos = p.parse(tokenize(spec, lookup=lookup,

498

syminitletters=syminitletters))

499

syminitletters=syminitletters))

499

if pos != len(spec):

500

if pos != len(spec):

500

raise error.ParseError(_('invalid token'), pos)

501

raise error.ParseError(_('invalid token'), pos)

501

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

502

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

502

503

class _aliasrules(parser.basealiasrules):

504

class _aliasrules(parser.basealiasrules):

504

"""Parsing and expansion rule set of revset aliases"""

505

"""Parsing and expansion rule set of revset aliases"""

505

_section = _('revset alias')

506

_section = _('revset alias')

506

507

@staticmethod

508

@staticmethod

508

def _parse(spec):

509

def _parse(spec):

509

"""Parse alias declaration/definition ``spec``

510

"""Parse alias declaration/definition ``spec``

510

511

This allows symbol names to use also ``$`` as an initial letter

512

This allows symbol names to use also ``$`` as an initial letter

512

(for backward compatibility), and callers of this function should

513

(for backward compatibility), and callers of this function should

513

examine whether ``$`` is used also for unexpected symbols or not.

514

examine whether ``$`` is used also for unexpected symbols or not.

514

"""

515

"""

515

return _parsewith(spec, syminitletters=_aliassyminitletters)

516

return _parsewith(spec, syminitletters=_aliassyminitletters)

516

517

@staticmethod

518

@staticmethod

518

def _trygetfunc(tree):

519

def _trygetfunc(tree):

519

if tree[0] == 'func' and tree[1][0] == 'symbol':

520

if tree[0] == 'func' and tree[1][0] == 'symbol':

520

return tree[1][1], getlist(tree[2])

521

return tree[1][1], getlist(tree[2])

521

522

def expandaliases(tree, aliases, warn=None):

523

def expandaliases(tree, aliases, warn=None):

523

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

524

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

524

aliases = _aliasrules.buildmap(aliases)

525

aliases = _aliasrules.buildmap(aliases)

525

tree = _aliasrules.expand(aliases, tree)

526

tree = _aliasrules.expand(aliases, tree)

526

# warn about problematic (but not referred) aliases

527

# warn about problematic (but not referred) aliases

527

if warn is not None:

528

if warn is not None:

528

for name, alias in sorted(aliases.iteritems()):

529

for name, alias in sorted(aliases.iteritems()):

529

if alias.error and not alias.warned:

530

if alias.error and not alias.warned:

530

warn(_('warning: %s\n') % (alias.error))

531

warn(_('warning: %s\n') % (alias.error))

531

alias.warned = True

532

alias.warned = True

532

return tree

533

return tree

533

534

def foldconcat(tree):

535

def foldconcat(tree):

535

"""Fold elements to be concatenated by `##`

536

"""Fold elements to be concatenated by `##`

536

"""

537

"""

537

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

538

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

538

return tree

539

return tree

539

if tree[0] == '_concat':

540

if tree[0] == '_concat':

540

pending = [tree]

541

pending = [tree]

541

l = []

542

l = []

542

while pending:

543

while pending:

543

e = pending.pop()

544

e = pending.pop()

544

if e[0] == '_concat':

545

if e[0] == '_concat':

545

pending.extend(reversed(e[1:]))

546

pending.extend(reversed(e[1:]))

546

elif e[0] in ('string', 'symbol'):

547

elif e[0] in ('string', 'symbol'):

547

l.append(e[1])

548

l.append(e[1])

548

else:

549

else:

549

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

550

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

550

raise error.ParseError(msg)

551

raise error.ParseError(msg)

551

return ('string', ''.join(l))

552

return ('string', ''.join(l))

552

else:

553

else:

553

return tuple(foldconcat(t) for t in tree)

554

return tuple(foldconcat(t) for t in tree)

554

555

def parse(spec, lookup=None):

556

def parse(spec, lookup=None):

556

try:

557

try:

557

return _parsewith(spec, lookup=lookup)

558

return _parsewith(spec, lookup=lookup)

558

except error.ParseError as inst:

559

except error.ParseError as inst:

559

if len(inst.args) > 1: # has location

560

if len(inst.args) > 1: # has location

560

loc = inst.args[1]

561

loc = inst.args[1]

561

# Remove newlines -- spaces are equivalent whitespace.

562

# Remove newlines -- spaces are equivalent whitespace.

562

spec = spec.replace('\n', ' ')

563

spec = spec.replace('\n', ' ')

563

# We want the caret to point to the place in the template that

564

# We want the caret to point to the place in the template that

564

# failed to parse, but in a hint we get a open paren at the

565

# failed to parse, but in a hint we get a open paren at the

565

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

566

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

566

# to line up the caret with the location of the error.

567

# to line up the caret with the location of the error.

567

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

568

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

568

raise

569

raise

569

570

def _quote(s):

571

def _quote(s):

571

r"""Quote a value in order to make it safe for the revset engine.

572

r"""Quote a value in order to make it safe for the revset engine.

572

573

>>> _quote(b'asdf')

574

>>> _quote(b'asdf')

574

"'asdf'"

575

"'asdf'"

575

>>> _quote(b"asdf'\"")

576

>>> _quote(b"asdf'\"")

576

'\'asdf\\\'"\''

577

'\'asdf\\\'"\''

577

>>> _quote(b'asdf\'')

578

>>> _quote(b'asdf\'')

578

"'asdf\\''"

579

"'asdf\\''"

579

>>> _quote(1)

580

>>> _quote(1)

580

"'1'"

581

"'1'"

581

"""

582

"""

582

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

583

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

583

584

def _formatargtype(c, arg):

585

def _formatargtype(c, arg):

585

if c == 'd':

586

if c == 'd':

586

return 'rev(%d)' % int(arg)

587

return 'rev(%d)' % int(arg)

587

elif c == 's':

588

elif c == 's':

588

return _quote(arg)

589

return _quote(arg)

589

elif c == 'r':

590

elif c == 'r':

590

if not isinstance(arg, bytes):

591

if not isinstance(arg, bytes):

591

raise TypeError

592

raise TypeError

592

parse(arg) # make sure syntax errors are confined

593

parse(arg) # make sure syntax errors are confined

593

return '(%s)' % arg

594

return '(%s)' % arg

594

elif c == 'n':

595

elif c == 'n':

595

return _quote(node.hex(arg))

596

return _quote(node.hex(arg))

596

elif c == 'b':

597

elif c == 'b':

597

try:

598

try:

598

return _quote(arg.branch())

599

return _quote(arg.branch())

599

except AttributeError:

600

except AttributeError:

600

raise TypeError

601

raise TypeError

601

raise error.ParseError(_('unexpected revspec format character %s') % c)

602

raise error.ParseError(_('unexpected revspec format character %s') % c)

602

603

def _formatlistexp(s, t):

604

def _formatlistexp(s, t):

604

l = len(s)

605

l = len(s)

605

if l == 0:

606

if l == 0:

606

return "_list('')"

607

return "_list('')"

607

elif l == 1:

608

elif l == 1:

608

return _formatargtype(t, s[0])

609

return _formatargtype(t, s[0])

609

elif t == 'd':

610

elif t == 'd':

610

return _formatintlist(s)

611

return _formatintlist(s)

611

elif t == 's':

612

elif t == 's':

612

return "_list(%s)" % _quote("\0".join(s))

613

return "_list(%s)" % _quote("\0".join(s))

613

elif t == 'n':

614

elif t == 'n':

614

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

615

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

615

elif t == 'b':

616

elif t == 'b':

616

try:

617

try:

617

return "_list('%s')" % "\0".join(a.branch() for a in s)

618

return "_list('%s')" % "\0".join(a.branch() for a in s)

618

except AttributeError:

619

except AttributeError:

619

raise TypeError

620

raise TypeError

620

621

m = l // 2

622

m = l // 2

622

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

623

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

623

624

def _formatintlist(data):

625

def _formatintlist(data):

625

try:

626

try:

626

l = len(data)

627

l = len(data)

627

if l == 0:

628

if l == 0:

628

return "_list('')"

629

return "_list('')"

629

elif l == 1:

630

elif l == 1:

630

return _formatargtype('d', data[0])

631

return _formatargtype('d', data[0])

631

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)

632

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)

632

except (TypeError, ValueError):

633

except (TypeError, ValueError):

633

raise error.ParseError(_('invalid argument for revspec'))

634

raise error.ParseError(_('invalid argument for revspec'))

634

635

def _formatparamexp(args, t):

636

def _formatparamexp(args, t):

636

return ', '.join(_formatargtype(t, a) for a in args)

637

return ', '.join(_formatargtype(t, a) for a in args)

637

638

_formatlistfuncs = {

639

_formatlistfuncs = {

639

'l': _formatlistexp,

640

'l': _formatlistexp,

640

'p': _formatparamexp,

641

'p': _formatparamexp,

641

}

642

}

642

643

def formatspec(expr, *args):

644

def formatspec(expr, *args):

644

'''

645

'''

645

This is a convenience function for using revsets internally, and

646

This is a convenience function for using revsets internally, and

646

escapes arguments appropriately. Aliases are intentionally ignored

647

escapes arguments appropriately. Aliases are intentionally ignored

647

so that intended expression behavior isn't accidentally subverted.

648

so that intended expression behavior isn't accidentally subverted.

648

649

Supported arguments:

650

Supported arguments:

650

651

%r = revset expression, parenthesized

652

%r = revset expression, parenthesized

652

%d = rev(int(arg)), no quoting

653

%d = rev(int(arg)), no quoting

653

%s = string(arg), escaped and single-quoted

654

%s = string(arg), escaped and single-quoted

654

%b = arg.branch(), escaped and single-quoted

655

%b = arg.branch(), escaped and single-quoted

655

%n = hex(arg), single-quoted

656

%n = hex(arg), single-quoted

656

%% = a literal '%'

657

%% = a literal '%'

657

658

Prefixing the type with 'l' specifies a parenthesized list of that type,

659

Prefixing the type with 'l' specifies a parenthesized list of that type,

659

and 'p' specifies a list of function parameters of that type.

660

and 'p' specifies a list of function parameters of that type.

660

661

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

662

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

662

'(10 or 11):: and ((this()) or (that()))'

663

'(10 or 11):: and ((this()) or (that()))'

663

>>> formatspec(b'%d:: and not %d::', 10, 20)

664

>>> formatspec(b'%d:: and not %d::', 10, 20)

664

'rev(10):: and not rev(20)::'

665

'rev(10):: and not rev(20)::'

665

>>> formatspec(b'%ld or %ld', [], [1])

666

>>> formatspec(b'%ld or %ld', [], [1])

666

"_list('') or rev(1)"

667

"_list('') or rev(1)"

667

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

668

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

668

"keyword('foo\\\\xe9')"

669

"keyword('foo\\\\xe9')"

669

>>> b = lambda: b'default'

670

>>> b = lambda: b'default'

670

>>> b.branch = b

671

>>> b.branch = b

671

>>> formatspec(b'branch(%b)', b)

672

>>> formatspec(b'branch(%b)', b)

672

"branch('default')"

673

"branch('default')"

673

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

674

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

674

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

675

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

675

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

676

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

676

"sort((:), 'desc', 'user')"

677

"sort((:), 'desc', 'user')"

677

>>> formatspec(b'%ls', [b'a', b"'"])

678

>>> formatspec(b'%ls', [b'a', b"'"])

678

"_list('a\\\\x00\\\\'')"

679

"_list('a\\\\x00\\\\'')"

679

'''

680

'''

680

parsed = _parseargs(expr, args)

681

parsed = _parseargs(expr, args)

681

ret = []

682

ret = []

682

for t, arg in parsed:

683

for t, arg in parsed:

683

if t is None:

684

if t is None:

684

ret.append(arg)

685

ret.append(arg)

686

elif t == 'baseset':

687

if isinstance(arg, set):

688

arg = sorted(arg)

689

ret.append(_formatintlist(list(arg)))

685

else:

690

else:

686

raise error.ProgrammingError("unknown revspec item type: %r" % t)

691

raise error.ProgrammingError("unknown revspec item type: %r" % t)

687

return b''.join(ret)

692

return b''.join(ret)

688

693

689

def _parseargs(expr, args):

694

def _parseargs(expr, args):

690

"""parse the expression and replace all inexpensive args

695

"""parse the expression and replace all inexpensive args

691

696

692

return a list of tuple [(arg-type, arg-value)]

697

return a list of tuple [(arg-type, arg-value)]

693

698

694

Arg-type can be:

699

Arg-type can be:

695

* None: a string ready to be concatenated into a final spec

700

* None: a string ready to be concatenated into a final spec

701

* 'baseset': an iterable of revisions

696

"""

702

"""

697

expr = pycompat.bytestr(expr)

703

expr = pycompat.bytestr(expr)

698

argiter = iter(args)

704

argiter = iter(args)

699

ret = []

705

ret = []

700

pos = 0

706

pos = 0

701

while pos < len(expr):

707

while pos < len(expr):

702

q = expr.find('%', pos)

708

q = expr.find('%', pos)

703

if q < 0:

709

if q < 0:

704

ret.append((None, expr[pos:]))

710

ret.append((None, expr[pos:]))

705

break

711

break

706

ret.append((None, expr[pos:q]))

712

ret.append((None, expr[pos:q]))

707

pos = q + 1

713

pos = q + 1

708

try:

714

try:

709

d = expr[pos]

715

d = expr[pos]

710

except IndexError:

716

except IndexError:

711

raise error.ParseError(_('incomplete revspec format character'))

717

raise error.ParseError(_('incomplete revspec format character'))

712

if d == '%':

718

if d == '%':

713

ret.append((None, d))

719

ret.append((None, d))

714

pos += 1

720

pos += 1

715

continue

721

continue

716

722

717

try:

723

try:

718

arg = next(argiter)

724

arg = next(argiter)

719

except StopIteration:

725

except StopIteration:

720

raise error.ParseError(_('missing argument for revspec'))

726

raise error.ParseError(_('missing argument for revspec'))

721

f = _formatlistfuncs.get(d)

727

f = _formatlistfuncs.get(d)

722

if f:

728

if f:

723

# a list of some type, might be expensive, do not replace

729

# a list of some type, might be expensive, do not replace

724

pos += 1

730

pos += 1

731

islist = (d == 'l')

725

try:

732

try:

726

d = expr[pos]

733

d = expr[pos]

727

except IndexError:

734

except IndexError:

728

raise error.ParseError(_('incomplete revspec format character'))

735

raise error.ParseError(_('incomplete revspec format character'))

736

if islist and d == 'd' and arg:

737

# special case, we might be able to speedup the list of int case

738

#

739

# We have been very conservative here for the first version.

740

# Other types (eg: generator) are probably fine, but we did not

741

# wanted to take any risk>

742

safeinputtype = (list, tuple, set, smartset.abstractsmartset)

743

if isinstance(arg, safeinputtype):

744

# we don't create a baseset yet, because it come with an

745

# extra cost. If we are going to serialize it we better

746

# skip it.

747

ret.append(('baseset', arg))

748

pos += 1

749

continue

729

try:

750

try:

730

ret.append((None, f(list(arg), d)))

751

ret.append((None, f(list(arg), d)))

731

except (TypeError, ValueError):

752

except (TypeError, ValueError):

732

raise error.ParseError(_('invalid argument for revspec'))

753

raise error.ParseError(_('invalid argument for revspec'))

733

else:

754

else:

734

# a single entry, not expensive, replace

755

# a single entry, not expensive, replace

735

try:

756

try:

736

ret.append((None, _formatargtype(d, arg)))

757

ret.append((None, _formatargtype(d, arg)))

737

except (TypeError, ValueError):

758

except (TypeError, ValueError):

738

raise error.ParseError(_('invalid argument for revspec'))

759

raise error.ParseError(_('invalid argument for revspec'))

739

pos += 1

760

pos += 1

740

761

741

try:

762

try:

742

next(argiter)

763

next(argiter)

743

raise error.ParseError(_('too many revspec arguments specified'))

764

raise error.ParseError(_('too many revspec arguments specified'))

744

except StopIteration:

765

except StopIteration:

745

pass

766

pass

746

return ret

767

return ret

747

768

748

def prettyformat(tree):

769

def prettyformat(tree):

749

return parser.prettyformat(tree, ('string', 'symbol'))

770

return parser.prettyformat(tree, ('string', 'symbol'))

750

771

751

def depth(tree):

772

def depth(tree):

752

if isinstance(tree, tuple):

773

if isinstance(tree, tuple):

753

return max(map(depth, tree)) + 1

774

return max(map(depth, tree)) + 1

754

else:

775

else:

755

return 0

776

return 0

756

777

757

def funcsused(tree):

778

def funcsused(tree):

758

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

779

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

759

return set()

780

return set()

760

else:

781

else:

761

funcs = set()

782

funcs = set()

762

for s in tree[1:]:

783

for s in tree[1:]:

763

funcs |= funcsused(s)

784

funcs |= funcsused(s)

764

if tree[0] == 'func':

785

if tree[0] == 'func':

765

funcs.add(tree[1][1])

786

funcs.add(tree[1][1])

766

return funcs

787

return funcs

767

788

768

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

789

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

769

790

770

def _ishashlikesymbol(symbol):

791

def _ishashlikesymbol(symbol):

771

"""returns true if the symbol looks like a hash"""

792

"""returns true if the symbol looks like a hash"""

772

return _hashre.match(symbol)

793

return _hashre.match(symbol)

773

794

774

def gethashlikesymbols(tree):

795

def gethashlikesymbols(tree):

775

"""returns the list of symbols of the tree that look like hashes

796

"""returns the list of symbols of the tree that look like hashes

776

797

777

>>> gethashlikesymbols(parse(b'3::abe3ff'))

798

>>> gethashlikesymbols(parse(b'3::abe3ff'))

778

['3', 'abe3ff']

799

['3', 'abe3ff']

779

>>> gethashlikesymbols(parse(b'precursors(.)'))

800

>>> gethashlikesymbols(parse(b'precursors(.)'))

780

[]

801

[]

781

>>> gethashlikesymbols(parse(b'precursors(34)'))

802

>>> gethashlikesymbols(parse(b'precursors(34)'))

782

['34']

803

['34']

783

>>> gethashlikesymbols(parse(b'abe3ffZ'))

804

>>> gethashlikesymbols(parse(b'abe3ffZ'))

784

[]

805

[]

785

"""

806

"""

786

if not tree:

807

if not tree:

787

return []

808

return []

788

809

789

if tree[0] == "symbol":

810

if tree[0] == "symbol":

790

if _ishashlikesymbol(tree[1]):

811

if _ishashlikesymbol(tree[1]):

791

return [tree[1]]

812

return [tree[1]]

792

elif len(tree) >= 3:

813

elif len(tree) >= 3:

793

results = []

814

results = []

794

for subtree in tree[1:]:

815

for subtree in tree[1:]:

795

results += gethashlikesymbols(subtree)

816

results += gethashlikesymbols(subtree)

796

return results

817

return results

797

return []

818

return []

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
+                smartset,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             symbols = {}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize(b"@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 if not isinstance(program, bytes):
                     raise error.ProgrammingError('revset statement must be bytes, got %r'
                                                  % program)
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, s)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = stringutil.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
             def _match(patspec, tree):
                 """Test if a tree matches the given pattern statement; return the matches
                 >>> _match(b'f(_)', parse(b'f()'))
                 >>> _match(b'f(_)', parse(b'f(1)'))
                 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
                 >>> _match(b'f(_)', parse(b'f(1, 2)'))
                 """
                 pattern = _cachedtree(patspec)
                 return parser.matchtree(pattern, tree, ('symbol', '_'),
                                         {'keyvalue', 'list'})
             def _matchonly(revs, bases):
                 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'dagrangeall':
                         return _fixops(('dagrangepost', post))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangeall':
                     raise error.ParseError(_("can't use '::' in this context"))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x):
                 if x is None:
                     return 0, x
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return 0.5, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     w = min(wa, wb)
                     # (draft/secret/_notpublic() & ::x) have a fast path
                     m = _match('_() & ancestors(_)', ('and', ta, tb))
                     if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
                         return w, _build('_phaseandancestors(_, _)', m[1], m[2])
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
                     m = _match('not _', tb)
                     if m:
                         return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if _match('public()', x[1]):
                         o = _optimize(_build('_notpublic()'))
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1])
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return 1, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1])
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2])
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2])
                     w = getattr(symbols.get(f), '_weight', 1)
                     m = _match('commonancestors(_)', ta)
                     # Optimize heads(commonancestors(_)) because we have a fast version
                     if f == 'heads' and m:
                         return w + wa, _build('_commonancestorheads(_)', m[1])
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | {'$'}
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith(b'$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith(b'foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 if lookup and spec.startswith('revset(') and spec.endswith(')'):
                     lookup = None
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 try:
                     return _parsewith(spec, lookup=lookup)
                 except error.ParseError as inst:
                     if len(inst.args) > 1:  # has location
                         loc = inst.args[1]
                         # Remove newlines -- spaces are equivalent whitespace.
                         spec = spec.replace('\n', ' ')
                         # We want the caret to point to the place in the template that
                         # failed to parse, but in a hint we get a open paren at the
                         # start. Therefore, we print "loc + 1" spaces (instead of "loc")
                         # to line up the caret with the location of the error.
                         inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
                     raise
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote(b'asdf')
                 "'asdf'"
                 >>> _quote(b"asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote(b'asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
             def _formatargtype(c, arg):
                 if c == 'd':
                     return 'rev(%d)' % int(arg)
                 elif c == 's':
                     return _quote(arg)
                 elif c == 'r':
                     if not isinstance(arg, bytes):
                         raise TypeError
                     parse(arg) # make sure syntax errors are confined
                     return '(%s)' % arg
                 elif c == 'n':
                     return _quote(node.hex(arg))
                 elif c == 'b':
                     try:
                         return _quote(arg.branch())
                     except AttributeError:
                         raise TypeError
                 raise error.ParseError(_('unexpected revspec format character %s') % c)
             def _formatlistexp(s, t):
                 l = len(s)
                 if l == 0:
                     return "_list('')"
                 elif l == 1:
                     return _formatargtype(t, s[0])
                 elif t == 'd':
                     return _formatintlist(s)
                 elif t == 's':
                     return "_list(%s)" % _quote("\0".join(s))
                 elif t == 'n':
                     return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                 elif t == 'b':
                     try:
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     except AttributeError:
                         raise TypeError
                 m = l // 2
                 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
             def _formatintlist(data):
                 try:
                     l = len(data)
                     if l == 0:
                         return "_list('')"
                     elif l == 1:
                         return _formatargtype('d', data[0])
                     return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
                 except (TypeError, ValueError):
                     raise error.ParseError(_('invalid argument for revspec'))
             def _formatparamexp(args, t):
                 return ', '.join(_formatargtype(t, a) for a in args)
             _formatlistfuncs = {
                 'l': _formatlistexp,
                 'p': _formatparamexp,
             }
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = rev(int(arg)), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type,
                 and 'p' specifies a list of function parameters of that type.
                 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec(b'%d:: and not %d::', 10, 20)
                 'rev(10):: and not rev(20)::'
                 >>> formatspec(b'%ld or %ld', [], [1])
                 "_list('') or rev(1)"
                 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: b'default'
                 >>> b.branch = b
                 >>> formatspec(b'branch(%b)', b)
                 "branch('default')"
                 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
                 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
                 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
                 "sort((:), 'desc', 'user')"
                 >>> formatspec(b'%ls', [b'a', b"'"])
                 "_list('a\\\\x00\\\\'')"
                 '''
                 parsed = _parseargs(expr, args)
                 ret = []
                 for t, arg in parsed:
                     if t is None:
                         ret.append(arg)
+                    elif t == 'baseset':
+                        if isinstance(arg, set):
+                            arg = sorted(arg)
+                        ret.append(_formatintlist(list(arg)))
                     else:
                         raise error.ProgrammingError("unknown revspec item type: %r" % t)
                 return b''.join(ret)
             def _parseargs(expr, args):
                 """parse the expression and replace all inexpensive args
                 return a list of tuple [(arg-type, arg-value)]
                 Arg-type can be:
-                * None: a string ready to be concatenated into a final spec
+                * None:      a string ready to be concatenated into a final spec
+                * 'baseset': an iterable of revisions
                 """
                 expr = pycompat.bytestr(expr)
                 argiter = iter(args)
                 ret = []
                 pos = 0
                 while pos < len(expr):
                     q = expr.find('%', pos)
                     if q < 0:
                         ret.append((None, expr[pos:]))
                         break
                     ret.append((None, expr[pos:q]))
                     pos = q + 1
                     try:
                         d = expr[pos]
                     except IndexError:
                         raise error.ParseError(_('incomplete revspec format character'))
                     if d == '%':
                         ret.append((None, d))
                         pos += 1
                         continue
                     try:
                         arg = next(argiter)
                     except StopIteration:
                         raise error.ParseError(_('missing argument for revspec'))
                     f = _formatlistfuncs.get(d)
                     if f:
                         # a list of some type, might be expensive, do not replace
                         pos += 1
+                        islist = (d == 'l')
                         try:
                             d = expr[pos]
                         except IndexError:
                             raise error.ParseError(_('incomplete revspec format character'))
+                        if islist and d == 'd' and arg:
+                            # special case, we might be able to speedup the list of int case
+                            #
+                            # We have been very conservative here for the first version.
+                            # Other types (eg: generator) are probably fine, but we did not
+                            # wanted to take any risk>
+                            safeinputtype = (list, tuple, set, smartset.abstractsmartset)
+                            if isinstance(arg, safeinputtype):
+                                # we don't create a baseset yet, because it come with an
+                                # extra cost. If we are going to serialize it we better
+                                # skip it.
+                                ret.append(('baseset', arg))
+                                pos += 1
+                                continue
                         try:
                             ret.append((None, f(list(arg), d)))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     else:
                         # a single entry, not expensive, replace
                         try:
                             ret.append((None, _formatargtype(d, arg)))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     pos += 1
                 try:
                     next(argiter)
                     raise error.ParseError(_('too many revspec arguments specified'))
                 except StopIteration:
                     pass
                 return ret
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs
             _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
             def _ishashlikesymbol(symbol):
                 """returns true if the symbol looks like a hash"""
                 return _hashre.match(symbol)
             def gethashlikesymbols(tree):
                 """returns the list of symbols of the tree that look like hashes
                 >>> gethashlikesymbols(parse(b'3::abe3ff'))
                 ['3', 'abe3ff']
                 >>> gethashlikesymbols(parse(b'precursors(.)'))
                 []
                 >>> gethashlikesymbols(parse(b'precursors(34)'))
                 ['34']
                 >>> gethashlikesymbols(parse(b'abe3ffZ'))
                 []
                 """
                 if not tree:
                     return []
                 if tree[0] == "symbol":
                     if _ishashlikesymbol(tree[1]):
                         return [tree[1]]
                 elif len(tree) >= 3:
                     results = []
                     for subtree in tree[1:]:
                         results += gethashlikesymbols(subtree)
                     return results
                 return []