upstream/mercurial-mirror Commit - r41255:e5b227f4

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

from .utils import (

20

from .utils import (

21

stringutil,

21

stringutil,

22

)

22

)

23

24

elements = {

24

elements = {

25

# token-type: binding-strength, primary, prefix, infix, suffix

25

# token-type: binding-strength, primary, prefix, infix, suffix

26

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

26

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

27

"[": (21, None, None, ("subscript", 1, "]"), None),

27

"[": (21, None, None, ("subscript", 1, "]"), None),

28

"#": (21, None, None, ("relation", 21), None),

28

"#": (21, None, None, ("relation", 21), None),

29

"##": (20, None, None, ("_concat", 20), None),

29

"##": (20, None, None, ("_concat", 20), None),

30

"~": (18, None, None, ("ancestor", 18), None),

30

"~": (18, None, None, ("ancestor", 18), None),

31

"^": (18, None, None, ("parent", 18), "parentpost"),

31

"^": (18, None, None, ("parent", 18), "parentpost"),

32

"-": (5, None, ("negate", 19), ("minus", 5), None),

32

"-": (5, None, ("negate", 19), ("minus", 5), None),

33

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

33

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

34

"dagrangepost"),

34

"dagrangepost"),

35

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

35

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

36

"dagrangepost"),

36

"dagrangepost"),

37

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

37

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

38

"not": (10, None, ("not", 10), None, None),

38

"not": (10, None, ("not", 10), None, None),

39

"!": (10, None, ("not", 10), None, None),

39

"!": (10, None, ("not", 10), None, None),

40

"and": (5, None, None, ("and", 5), None),

40

"and": (5, None, None, ("and", 5), None),

41

"&": (5, None, None, ("and", 5), None),

41

"&": (5, None, None, ("and", 5), None),

42

"%": (5, None, None, ("only", 5), "onlypost"),

42

"%": (5, None, None, ("only", 5), "onlypost"),

43

"or": (4, None, None, ("or", 4), None),

43

"or": (4, None, None, ("or", 4), None),

44

"|": (4, None, None, ("or", 4), None),

44

"|": (4, None, None, ("or", 4), None),

45

"+": (4, None, None, ("or", 4), None),

45

"+": (4, None, None, ("or", 4), None),

46

"=": (3, None, None, ("keyvalue", 3), None),

46

"=": (3, None, None, ("keyvalue", 3), None),

47

",": (2, None, None, ("list", 2), None),

47

",": (2, None, None, ("list", 2), None),

48

")": (0, None, None, None, None),

48

")": (0, None, None, None, None),

49

"]": (0, None, None, None, None),

49

"]": (0, None, None, None, None),

50

"symbol": (0, "symbol", None, None, None),

50

"symbol": (0, "symbol", None, None, None),

51

"string": (0, "string", None, None, None),

51

"string": (0, "string", None, None, None),

52

"end": (0, None, None, None, None),

52

"end": (0, None, None, None, None),

53

}

53

}

54

55

keywords = {'and', 'or', 'not'}

55

keywords = {'and', 'or', 'not'}

56

57

symbols = {}

57

symbols = {}

58

59

_quoteletters = {'"', "'"}

59

_quoteletters = {'"', "'"}

60

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

60

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

61

62

# default set of valid characters for the initial letter of symbols

62

# default set of valid characters for the initial letter of symbols

63

_syminitletters = set(pycompat.iterbytestr(

63

_syminitletters = set(pycompat.iterbytestr(

64

string.ascii_letters.encode('ascii') +

64

string.ascii_letters.encode('ascii') +

65

string.digits.encode('ascii') +

65

string.digits.encode('ascii') +

66

'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))

66

'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))

67

68

# default set of valid characters for non-initial letters of symbols

68

# default set of valid characters for non-initial letters of symbols

69

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

69

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

70

71

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

71

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

72

'''

72

'''

73

Parse a revset statement into a stream of tokens

73

Parse a revset statement into a stream of tokens

74

75

``syminitletters`` is the set of valid characters for the initial

75

``syminitletters`` is the set of valid characters for the initial

76

letter of symbols.

76

letter of symbols.

77

78

By default, character ``c`` is recognized as valid for initial

78

By default, character ``c`` is recognized as valid for initial

79

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

79

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

80

81

``symletters`` is the set of valid characters for non-initial

81

``symletters`` is the set of valid characters for non-initial

82

letters of symbols.

82

letters of symbols.

83

84

By default, character ``c`` is recognized as valid for non-initial

84

By default, character ``c`` is recognized as valid for non-initial

85

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

85

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

86

87

Check that @ is a valid unquoted token character (issue3686):

87

Check that @ is a valid unquoted token character (issue3686):

88

>>> list(tokenize(b"@::"))

88

>>> list(tokenize(b"@::"))

89

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

89

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

90

91

'''

91

'''

92

if not isinstance(program, bytes):

92

if not isinstance(program, bytes):

93

raise error.ProgrammingError('revset statement must be bytes, got %r'

93

raise error.ProgrammingError('revset statement must be bytes, got %r'

94

% program)

94

% program)

95

program = pycompat.bytestr(program)

95

program = pycompat.bytestr(program)

96

if syminitletters is None:

96

if syminitletters is None:

97

syminitletters = _syminitletters

97

syminitletters = _syminitletters

98

if symletters is None:

98

if symletters is None:

99

symletters = _symletters

99

symletters = _symletters

100

101

if program and lookup:

101

if program and lookup:

102

# attempt to parse old-style ranges first to deal with

102

# attempt to parse old-style ranges first to deal with

103

# things like old-tag which contain query metacharacters

103

# things like old-tag which contain query metacharacters

104

parts = program.split(':', 1)

104

parts = program.split(':', 1)

105

if all(lookup(sym) for sym in parts if sym):

105

if all(lookup(sym) for sym in parts if sym):

106

if parts[0]:

106

if parts[0]:

107

yield ('symbol', parts[0], 0)

107

yield ('symbol', parts[0], 0)

108

if len(parts) > 1:

108

if len(parts) > 1:

109

s = len(parts[0])

109

s = len(parts[0])

110

yield (':', None, s)

110

yield (':', None, s)

111

if parts[1]:

111

if parts[1]:

112

yield ('symbol', parts[1], s + 1)

112

yield ('symbol', parts[1], s + 1)

113

yield ('end', None, len(program))

113

yield ('end', None, len(program))

114

return

114

return

115

116

pos, l = 0, len(program)

116

pos, l = 0, len(program)

117

while pos < l:

117

while pos < l:

118

c = program[pos]

118

c = program[pos]

119

if c.isspace(): # skip inter-token whitespace

119

if c.isspace(): # skip inter-token whitespace

120

pass

120

pass

121

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

121

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

122

yield ('::', None, pos)

122

yield ('::', None, pos)

123

pos += 1 # skip ahead

123

pos += 1 # skip ahead

124

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

124

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

125

yield ('..', None, pos)

125

yield ('..', None, pos)

126

pos += 1 # skip ahead

126

pos += 1 # skip ahead

127

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

127

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

128

yield ('##', None, pos)

128

yield ('##', None, pos)

129

pos += 1 # skip ahead

129

pos += 1 # skip ahead

130

elif c in _simpleopletters: # handle simple operators

130

elif c in _simpleopletters: # handle simple operators

131

yield (c, None, pos)

131

yield (c, None, pos)

132

elif (c in _quoteletters or c == 'r' and

132

elif (c in _quoteletters or c == 'r' and

133

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

133

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

134

if c == 'r':

134

if c == 'r':

135

pos += 1

135

pos += 1

136

c = program[pos]

136

c = program[pos]

137

decode = lambda x: x

137

decode = lambda x: x

138

else:

138

else:

139

decode = parser.unescapestr

139

decode = parser.unescapestr

140

pos += 1

140

pos += 1

141

s = pos

141

s = pos

142

while pos < l: # find closing quote

142

while pos < l: # find closing quote

143

d = program[pos]

143

d = program[pos]

144

if d == '\\': # skip over escaped characters

144

if d == '\\': # skip over escaped characters

145

pos += 2

145

pos += 2

146

continue

146

continue

147

if d == c:

147

if d == c:

148

yield ('string', decode(program[s:pos]), s)

148

yield ('string', decode(program[s:pos]), s)

149

break

149

break

150

pos += 1

150

pos += 1

151

else:

151

else:

152

raise error.ParseError(_("unterminated string"), s)

152

raise error.ParseError(_("unterminated string"), s)

153

# gather up a symbol/keyword

153

# gather up a symbol/keyword

154

elif c in syminitletters:

154

elif c in syminitletters:

155

s = pos

155

s = pos

156

pos += 1

156

pos += 1

157

while pos < l: # find end of symbol

157

while pos < l: # find end of symbol

158

d = program[pos]

158

d = program[pos]

159

if d not in symletters:

159

if d not in symletters:

160

break

160

break

161

if d == '.' and program[pos - 1] == '.': # special case for ..

161

if d == '.' and program[pos - 1] == '.': # special case for ..

162

pos -= 1

162

pos -= 1

163

break

163

break

164

pos += 1

164

pos += 1

165

sym = program[s:pos]

165

sym = program[s:pos]

166

if sym in keywords: # operator keywords

166

if sym in keywords: # operator keywords

167

yield (sym, None, s)

167

yield (sym, None, s)

168

elif '-' in sym:

168

elif '-' in sym:

169

# some jerk gave us foo-bar-baz, try to check if it's a symbol

169

# some jerk gave us foo-bar-baz, try to check if it's a symbol

170

if lookup and lookup(sym):

170

if lookup and lookup(sym):

171

# looks like a real symbol

171

# looks like a real symbol

172

yield ('symbol', sym, s)

172

yield ('symbol', sym, s)

173

else:

173

else:

174

# looks like an expression

174

# looks like an expression

175

parts = sym.split('-')

175

parts = sym.split('-')

176

for p in parts[:-1]:

176

for p in parts[:-1]:

177

if p: # possible consecutive -

177

if p: # possible consecutive -

178

yield ('symbol', p, s)

178

yield ('symbol', p, s)

179

s += len(p)

179

s += len(p)

180

yield ('-', None, s)

180

yield ('-', None, s)

181

s += 1

181

s += 1

182

if parts[-1]: # possible trailing -

182

if parts[-1]: # possible trailing -

183

yield ('symbol', parts[-1], s)

183

yield ('symbol', parts[-1], s)

184

else:

184

else:

185

yield ('symbol', sym, s)

185

yield ('symbol', sym, s)

186

pos -= 1

186

pos -= 1

187

else:

187

else:

188

raise error.ParseError(_("syntax error in revset '%s'") %

188

raise error.ParseError(_("syntax error in revset '%s'") %

189

program, pos)

189

program, pos)

190

pos += 1

190

pos += 1

191

yield ('end', None, pos)

191

yield ('end', None, pos)

192

193

# helpers

193

# helpers

194

195

_notset = object()

195

_notset = object()

196

197

def getsymbol(x):

197

def getsymbol(x):

198

if x and x[0] == 'symbol':

198

if x and x[0] == 'symbol':

199

return x[1]

199

return x[1]

200

raise error.ParseError(_('not a symbol'))

200

raise error.ParseError(_('not a symbol'))

201

202

def getstring(x, err):

202

def getstring(x, err):

203

if x and (x[0] == 'string' or x[0] == 'symbol'):

203

if x and (x[0] == 'string' or x[0] == 'symbol'):

204

return x[1]

204

return x[1]

205

raise error.ParseError(err)

205

raise error.ParseError(err)

206

207

def getinteger(x, err, default=_notset):

207

def getinteger(x, err, default=_notset):

208

if not x and default is not _notset:

208

if not x and default is not _notset:

209

return default

209

return default

210

try:

210

try:

211

return int(getstring(x, err))

211

return int(getstring(x, err))

212

except ValueError:

212

except ValueError:

213

raise error.ParseError(err)

213

raise error.ParseError(err)

214

215

def getboolean(x, err):

215

def getboolean(x, err):

216

value = stringutil.parsebool(getsymbol(x))

216

value = stringutil.parsebool(getsymbol(x))

217

if value is not None:

217

if value is not None:

218

return value

218

return value

219

raise error.ParseError(err)

219

raise error.ParseError(err)

220

221

def getlist(x):

221

def getlist(x):

222

if not x:

222

if not x:

223

return []

223

return []

224

if x[0] == 'list':

224

if x[0] == 'list':

225

return list(x[1:])

225

return list(x[1:])

226

return [x]

226

return [x]

227

228

def getrange(x, err):

228

def getrange(x, err):

229

if not x:

229

if not x:

230

raise error.ParseError(err)

230

raise error.ParseError(err)

231

op = x[0]

231

op = x[0]

232

if op == 'range':

232

if op == 'range':

233

return x[1], x[2]

233

return x[1], x[2]

234

elif op == 'rangepre':

234

elif op == 'rangepre':

235

return None, x[1]

235

return None, x[1]

236

elif op == 'rangepost':

236

elif op == 'rangepost':

237

return x[1], None

237

return x[1], None

238

elif op == 'rangeall':

238

elif op == 'rangeall':

239

return None, None

239

return None, None

240

raise error.ParseError(err)

240

raise error.ParseError(err)

241

242

def getargs(x, min, max, err):

242

def getargs(x, min, max, err):

243

l = getlist(x)

243

l = getlist(x)

244

if len(l) < min or (max >= 0 and len(l) > max):

244

if len(l) < min or (max >= 0 and len(l) > max):

245

raise error.ParseError(err)

245

raise error.ParseError(err)

246

return l

246

return l

247

248

def getargsdict(x, funcname, keys):

248

def getargsdict(x, funcname, keys):

249

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

249

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

250

keyvaluenode='keyvalue', keynode='symbol')

250

keyvaluenode='keyvalue', keynode='symbol')

251

252

# cache of {spec: raw parsed tree} built internally

252

# cache of {spec: raw parsed tree} built internally

253

_treecache = {}

253

_treecache = {}

254

255

def _cachedtree(spec):

255

def _cachedtree(spec):

256

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

256

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

257

tree = _treecache.get(spec)

257

tree = _treecache.get(spec)

258

if tree is None:

258

if tree is None:

259

_treecache[spec] = tree = parse(spec)

259

_treecache[spec] = tree = parse(spec)

260

return tree

260

return tree

261

262

def _build(tmplspec, *repls):

262

def _build(tmplspec, *repls):

263

"""Create raw parsed tree from a template revset statement

263

"""Create raw parsed tree from a template revset statement

264

265

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

265

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

266

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

266

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

267

"""

267

"""

268

template = _cachedtree(tmplspec)

268

template = _cachedtree(tmplspec)

269

return parser.buildtree(template, ('symbol', '_'), *repls)

269

return parser.buildtree(template, ('symbol', '_'), *repls)

270

271

def _match(patspec, tree):

271

def _match(patspec, tree):

272

"""Test if a tree matches the given pattern statement; return the matches

272

"""Test if a tree matches the given pattern statement; return the matches

273

274

>>> _match(b'f(_)', parse(b'f()'))

274

>>> _match(b'f(_)', parse(b'f()'))

275

>>> _match(b'f(_)', parse(b'f(1)'))

275

>>> _match(b'f(_)', parse(b'f(1)'))

276

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

276

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

277

>>> _match(b'f(_)', parse(b'f(1, 2)'))

277

>>> _match(b'f(_)', parse(b'f(1, 2)'))

278

"""

278

"""

279

pattern = _cachedtree(patspec)

279

pattern = _cachedtree(patspec)

280

return parser.matchtree(pattern, tree, ('symbol', '_'),

280

return parser.matchtree(pattern, tree, ('symbol', '_'),

281

{'keyvalue', 'list'})

281

{'keyvalue', 'list'})

282

283

def _matchonly(revs, bases):

283

def _matchonly(revs, bases):

284

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

284

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

285

286

def _fixops(x):

286

def _fixops(x):

287

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

287

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

288

handled well by our simple top-down parser"""

288

handled well by our simple top-down parser"""

289

if not isinstance(x, tuple):

289

if not isinstance(x, tuple):

290

return x

290

return x

291

292

op = x[0]

292

op = x[0]

293

if op == 'parent':

293

if op == 'parent':

294

# x^:y means (x^) : y, not x ^ (:y)

294

# x^:y means (x^) : y, not x ^ (:y)

295

# x^: means (x^) :, not x ^ (:)

295

# x^: means (x^) :, not x ^ (:)

296

post = ('parentpost', x[1])

296

post = ('parentpost', x[1])

297

if x[2][0] == 'dagrangepre':

297

if x[2][0] == 'dagrangepre':

298

return _fixops(('dagrange', post, x[2][1]))

298

return _fixops(('dagrange', post, x[2][1]))

299

elif x[2][0] == 'dagrangeall':

299

elif x[2][0] == 'dagrangeall':

300

return _fixops(('dagrangepost', post))

300

return _fixops(('dagrangepost', post))

301

elif x[2][0] == 'rangepre':

301

elif x[2][0] == 'rangepre':

302

return _fixops(('range', post, x[2][1]))

302

return _fixops(('range', post, x[2][1]))

303

elif x[2][0] == 'rangeall':

303

elif x[2][0] == 'rangeall':

304

return _fixops(('rangepost', post))

304

return _fixops(('rangepost', post))

305

elif op == 'or':

305

elif op == 'or':

306

# make number of arguments deterministic:

306

# make number of arguments deterministic:

307

# x + y + z -> (or x y z) -> (or (list x y z))

307

# x + y + z -> (or x y z) -> (or (list x y z))

308

return (op, _fixops(('list',) + x[1:]))

308

return (op, _fixops(('list',) + x[1:]))

309

elif op == 'subscript' and x[1][0] == 'relation':

309

elif op == 'subscript' and x[1][0] == 'relation':

310

# x#y[z] ternary

310

# x#y[z] ternary

311

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

311

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

312

313

return (op,) + tuple(_fixops(y) for y in x[1:])

313

return (op,) + tuple(_fixops(y) for y in x[1:])

314

315

def _analyze(x):

315

def _analyze(x):

316

if x is None:

316

if x is None:

317

return x

317

return x

318

319

op = x[0]

319

op = x[0]

320

if op == 'minus':

320

if op == 'minus':

321

return _analyze(_build('_ and not _', *x[1:]))

321

return _analyze(_build('_ and not _', *x[1:]))

322

elif op == 'only':

322

elif op == 'only':

323

return _analyze(_build('only(_, _)', *x[1:]))

323

return _analyze(_build('only(_, _)', *x[1:]))

324

elif op == 'onlypost':

324

elif op == 'onlypost':

325

return _analyze(_build('only(_)', x[1]))

325

return _analyze(_build('only(_)', x[1]))

326

elif op == 'dagrangeall':

326

elif op == 'dagrangeall':

327

raise error.ParseError(_("can't use '::' in this context"))

327

raise error.ParseError(_("can't use '::' in this context"))

328

elif op == 'dagrangepre':

328

elif op == 'dagrangepre':

329

return _analyze(_build('ancestors(_)', x[1]))

329

return _analyze(_build('ancestors(_)', x[1]))

330

elif op == 'dagrangepost':

330

elif op == 'dagrangepost':

331

return _analyze(_build('descendants(_)', x[1]))

331

return _analyze(_build('descendants(_)', x[1]))

332

elif op == 'negate':

332

elif op == 'negate':

333

s = getstring(x[1], _("can't negate that"))

333

s = getstring(x[1], _("can't negate that"))

334

return _analyze(('string', '-' + s))

334

return _analyze(('string', '-' + s))

335

elif op in ('string', 'symbol'):

335

elif op in ('string', 'symbol'):

336

return x

336

return x

337

elif op == 'rangeall':

337

elif op == 'rangeall':

338

return (op, None)

338

return (op, None)

339

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

339

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

340

return (op, _analyze(x[1]))

340

return (op, _analyze(x[1]))

341

elif op == 'group':

341

elif op == 'group':

342

return _analyze(x[1])

342

return _analyze(x[1])

343

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

343

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

344

'subscript'}:

344

'subscript'}:

345

ta = _analyze(x[1])

345

ta = _analyze(x[1])

346

tb = _analyze(x[2])

346

tb = _analyze(x[2])

347

return (op, ta, tb)

347

return (op, ta, tb)

348

elif op == 'relsubscript':

348

elif op == 'relsubscript':

349

ta = _analyze(x[1])

349

ta = _analyze(x[1])

350

tb = _analyze(x[2])

350

tb = _analyze(x[2])

351

tc = _analyze(x[3])

351

tc = _analyze(x[3])

352

return (op, ta, tb, tc)

352

return (op, ta, tb, tc)

353

elif op == 'list':

353

elif op == 'list':

354

return (op,) + tuple(_analyze(y) for y in x[1:])

354

return (op,) + tuple(_analyze(y) for y in x[1:])

355

elif op == 'keyvalue':

355

elif op == 'keyvalue':

356

return (op, x[1], _analyze(x[2]))

356

return (op, x[1], _analyze(x[2]))

357

elif op == 'func':

357

elif op == 'func':

358

return (op, x[1], _analyze(x[2]))

358

return (op, x[1], _analyze(x[2]))

359

raise ValueError('invalid operator %r' % op)

359

raise ValueError('invalid operator %r' % op)

360

361

def analyze(x):

361

def analyze(x):

362

"""Transform raw parsed tree to evaluatable tree which can be fed to

362

"""Transform raw parsed tree to evaluatable tree which can be fed to

363

optimize() or getset()

363

optimize() or getset()

364

365

All pseudo operations should be mapped to real operations or functions

365

All pseudo operations should be mapped to real operations or functions

366

defined in methods or symbols table respectively.

366

defined in methods or symbols table respectively.

367

"""

367

"""

368

return _analyze(x)

368

return _analyze(x)

369

370

def _optimize(x):

370

def _optimize(x):

371

if x is None:

371

if x is None:

372

return 0, x

372

return 0, x

373

374

op = x[0]

374

op = x[0]

375

if op in ('string', 'symbol'):

375

if op in ('string', 'symbol'):

376

return 0.5, x # single revisions are small

376

return 0.5, x # single revisions are small

377

elif op == 'and':

377

elif op == 'and':

378

wa, ta = _optimize(x[1])

378

wa, ta = _optimize(x[1])

379

wb, tb = _optimize(x[2])

379

wb, tb = _optimize(x[2])

380

w = min(wa, wb)

380

w = min(wa, wb)

381

382

# (draft/secret/_notpublic() & ::x) have a fast path

382

# (draft/secret/_notpublic() & ::x) have a fast path

383

m = _match('_() & ancestors(_)', ('and', ta, tb))

383

m = _match('_() & ancestors(_)', ('and', ta, tb))

384

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

384

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

385

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

385

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

386

387

# (::x and not ::y)/(not ::y and ::x) have a fast path

387

# (::x and not ::y)/(not ::y and ::x) have a fast path

388

m = _matchonly(ta, tb) or _matchonly(tb, ta)

388

m = _matchonly(ta, tb) or _matchonly(tb, ta)

389

if m:

389

if m:

390

return w, _build('only(_, _)', *m[1:])

390

return w, _build('only(_, _)', *m[1:])

391

392

m = _match('not _', tb)

392

m = _match('not _', tb)

393

if m:

393

if m:

394

return wa, ('difference', ta, m[1])

394

return wa, ('difference', ta, m[1])

395

if wa > wb:

395

if wa > wb:

396

op = 'andsmally'

396

op = 'andsmally'

397

return w, (op, ta, tb)

397

return w, (op, ta, tb)

398

elif op == 'or':

398

elif op == 'or':

399

# fast path for machine-generated expression, that is likely to have

399

# fast path for machine-generated expression, that is likely to have

400

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

400

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

401

ws, ts, ss = [], [], []

401

ws, ts, ss = [], [], []

402

def flushss():

402

def flushss():

403

if not ss:

403

if not ss:

404

return

404

return

405

if len(ss) == 1:

405

if len(ss) == 1:

406

w, t = ss[0]

406

w, t = ss[0]

407

else:

407

else:

408

s = '\0'.join(t[1] for w, t in ss)

408

s = '\0'.join(t[1] for w, t in ss)

409

y = _build('_list(_)', ('string', s))

409

y = _build('_list(_)', ('string', s))

410

w, t = _optimize(y)

410

w, t = _optimize(y)

411

ws.append(w)

411

ws.append(w)

412

ts.append(t)

412

ts.append(t)

413

del ss[:]

413

del ss[:]

414

for y in getlist(x[1]):

414

for y in getlist(x[1]):

415

w, t = _optimize(y)

415

w, t = _optimize(y)

416

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

416

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

417

ss.append((w, t))

417

ss.append((w, t))

418

continue

418

continue

419

flushss()

419

flushss()

420

ws.append(w)

420

ws.append(w)

421

ts.append(t)

421

ts.append(t)

422

flushss()

422

flushss()

423

if len(ts) == 1:

423

if len(ts) == 1:

424

return ws[0], ts[0] # 'or' operation is fully optimized out

424

return ws[0], ts[0] # 'or' operation is fully optimized out

425

return max(ws), (op, ('list',) + tuple(ts))

425

return max(ws), (op, ('list',) + tuple(ts))

426

elif op == 'not':

426

elif op == 'not':

427

# Optimize not public() to _notpublic() because we have a fast version

427

# Optimize not public() to _notpublic() because we have a fast version

428

if _match('public()', x[1]):

428

if _match('public()', x[1]):

429

o = _optimize(_build('_notpublic()'))

429

o = _optimize(_build('_notpublic()'))

430

return o[0], o[1]

430

return o[0], o[1]

431

else:

431

else:

432

o = _optimize(x[1])

432

o = _optimize(x[1])

433

return o[0], (op, o[1])

433

return o[0], (op, o[1])

434

elif op == 'rangeall':

434

elif op == 'rangeall':

435

return 1, x

435

return 1, x

436

elif op in ('rangepre', 'rangepost', 'parentpost'):

436

elif op in ('rangepre', 'rangepost', 'parentpost'):

437

o = _optimize(x[1])

437

o = _optimize(x[1])

438

return o[0], (op, o[1])

438

return o[0], (op, o[1])

439

elif op in ('dagrange', 'range'):

439

elif op in ('dagrange', 'range'):

440

wa, ta = _optimize(x[1])

440

wa, ta = _optimize(x[1])

441

wb, tb = _optimize(x[2])

441

wb, tb = _optimize(x[2])

442

return wa + wb, (op, ta, tb)

442

return wa + wb, (op, ta, tb)

443

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

443

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

444

w, t = _optimize(x[1])

444

w, t = _optimize(x[1])

445

return w, (op, t, x[2])

445

return w, (op, t, x[2])

446

elif op == 'relsubscript':

446

elif op == 'relsubscript':

447

w, t = _optimize(x[1])

447

w, t = _optimize(x[1])

448

return w, (op, t, x[2], x[3])

448

return w, (op, t, x[2], x[3])

449

elif op == 'list':

449

elif op == 'list':

450

ws, ts = zip(*(_optimize(y) for y in x[1:]))

450

ws, ts = zip(*(_optimize(y) for y in x[1:]))

451

return sum(ws), (op,) + ts

451

return sum(ws), (op,) + ts

452

elif op == 'keyvalue':

452

elif op == 'keyvalue':

453

w, t = _optimize(x[2])

453

w, t = _optimize(x[2])

454

return w, (op, x[1], t)

454

return w, (op, x[1], t)

455

elif op == 'func':

455

elif op == 'func':

456

f = getsymbol(x[1])

456

f = getsymbol(x[1])

457

wa, ta = _optimize(x[2])

457

wa, ta = _optimize(x[2])

458

w = getattr(symbols.get(f), '_weight', 1)

458

w = getattr(symbols.get(f), '_weight', 1)

459

m = _match('commonancestors(_)', ta)

459

m = _match('commonancestors(_)', ta)

460

461

# Optimize heads(commonancestors(_)) because we have a fast version

461

# Optimize heads(commonancestors(_)) because we have a fast version

462

if f == 'heads' and m:

462

if f == 'heads' and m:

463

return w + wa, _build('_commonancestorheads(_)', m[1])

463

return w + wa, _build('_commonancestorheads(_)', m[1])

464

465

return w + wa, (op, x[1], ta)

465

return w + wa, (op, x[1], ta)

466

raise ValueError('invalid operator %r' % op)

466

raise ValueError('invalid operator %r' % op)

467

468

def optimize(tree):

468

def optimize(tree):

469

"""Optimize evaluatable tree

469

"""Optimize evaluatable tree

470

471

All pseudo operations should be transformed beforehand.

471

All pseudo operations should be transformed beforehand.

472

"""

472

"""

473

_weight, newtree = _optimize(tree)

473

_weight, newtree = _optimize(tree)

474

return newtree

474

return newtree

475

476

# the set of valid characters for the initial letter of symbols in

476

# the set of valid characters for the initial letter of symbols in

477

# alias declarations and definitions

477

# alias declarations and definitions

478

_aliassyminitletters = _syminitletters | {'$'}

478

_aliassyminitletters = _syminitletters | {'$'}

479

480

def _parsewith(spec, lookup=None, syminitletters=None):

480

def _parsewith(spec, lookup=None, syminitletters=None):

481

"""Generate a parse tree of given spec with given tokenizing options

481

"""Generate a parse tree of given spec with given tokenizing options

482

483

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

483

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

484

('func', ('symbol', 'foo'), ('symbol', '$1'))

484

('func', ('symbol', 'foo'), ('symbol', '$1'))

485

>>> _parsewith(b'$1')

485

>>> _parsewith(b'$1')

486

Traceback (most recent call last):

486

Traceback (most recent call last):

487

...

487

...

488

ParseError: ("syntax error in revset '$1'", 0)

488

ParseError: ("syntax error in revset '$1'", 0)

489

>>> _parsewith(b'foo bar')

489

>>> _parsewith(b'foo bar')

490

Traceback (most recent call last):

490

Traceback (most recent call last):

491

...

491

...

492

ParseError: ('invalid token', 4)

492

ParseError: ('invalid token', 4)

493

"""

493

"""

494

if lookup and spec.startswith('revset(') and spec.endswith(')'):

494

if lookup and spec.startswith('revset(') and spec.endswith(')'):

495

lookup = None

495

lookup = None

496

p = parser.parser(elements)

496

p = parser.parser(elements)

497

tree, pos = p.parse(tokenize(spec, lookup=lookup,

497

tree, pos = p.parse(tokenize(spec, lookup=lookup,

498

syminitletters=syminitletters))

498

syminitletters=syminitletters))

499

if pos != len(spec):

499

if pos != len(spec):

500

raise error.ParseError(_('invalid token'), pos)

500

raise error.ParseError(_('invalid token'), pos)

501

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

501

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

502

503

class _aliasrules(parser.basealiasrules):

503

class _aliasrules(parser.basealiasrules):

504

"""Parsing and expansion rule set of revset aliases"""

504

"""Parsing and expansion rule set of revset aliases"""

505

_section = _('revset alias')

505

_section = _('revset alias')

506

507

@staticmethod

507

@staticmethod

508

def _parse(spec):

508

def _parse(spec):

509

"""Parse alias declaration/definition ``spec``

509

"""Parse alias declaration/definition ``spec``

510

511

This allows symbol names to use also ``$`` as an initial letter

511

This allows symbol names to use also ``$`` as an initial letter

512

(for backward compatibility), and callers of this function should

512

(for backward compatibility), and callers of this function should

513

examine whether ``$`` is used also for unexpected symbols or not.

513

examine whether ``$`` is used also for unexpected symbols or not.

514

"""

514

"""

515

return _parsewith(spec, syminitletters=_aliassyminitletters)

515

return _parsewith(spec, syminitletters=_aliassyminitletters)

516

517

@staticmethod

517

@staticmethod

518

def _trygetfunc(tree):

518

def _trygetfunc(tree):

519

if tree[0] == 'func' and tree[1][0] == 'symbol':

519

if tree[0] == 'func' and tree[1][0] == 'symbol':

520

return tree[1][1], getlist(tree[2])

520

return tree[1][1], getlist(tree[2])

521

522

def expandaliases(tree, aliases, warn=None):

522

def expandaliases(tree, aliases, warn=None):

523

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

523

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

524

aliases = _aliasrules.buildmap(aliases)

524

aliases = _aliasrules.buildmap(aliases)

525

tree = _aliasrules.expand(aliases, tree)

525

tree = _aliasrules.expand(aliases, tree)

526

# warn about problematic (but not referred) aliases

526

# warn about problematic (but not referred) aliases

527

if warn is not None:

527

if warn is not None:

528

for name, alias in sorted(aliases.iteritems()):

528

for name, alias in sorted(aliases.iteritems()):

529

if alias.error and not alias.warned:

529

if alias.error and not alias.warned:

530

warn(_('warning: %s\n') % (alias.error))

530

warn(_('warning: %s\n') % (alias.error))

531

alias.warned = True

531

alias.warned = True

532

return tree

532

return tree

533

534

def foldconcat(tree):

534

def foldconcat(tree):

535

"""Fold elements to be concatenated by `##`

535

"""Fold elements to be concatenated by `##`

536

"""

536

"""

537

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

537

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

538

return tree

538

return tree

539

if tree[0] == '_concat':

539

if tree[0] == '_concat':

540

pending = [tree]

540

pending = [tree]

541

l = []

541

l = []

542

while pending:

542

while pending:

543

e = pending.pop()

543

e = pending.pop()

544

if e[0] == '_concat':

544

if e[0] == '_concat':

545

pending.extend(reversed(e[1:]))

545

pending.extend(reversed(e[1:]))

546

elif e[0] in ('string', 'symbol'):

546

elif e[0] in ('string', 'symbol'):

547

l.append(e[1])

547

l.append(e[1])

548

else:

548

else:

549

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

549

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

550

raise error.ParseError(msg)

550

raise error.ParseError(msg)

551

return ('string', ''.join(l))

551

return ('string', ''.join(l))

552

else:

552

else:

553

return tuple(foldconcat(t) for t in tree)

553

return tuple(foldconcat(t) for t in tree)

554

555

def parse(spec, lookup=None):

555

def parse(spec, lookup=None):

556

try:

556

try:

557

return _parsewith(spec, lookup=lookup)

557

return _parsewith(spec, lookup=lookup)

558

except error.ParseError as inst:

558

except error.ParseError as inst:

559

if len(inst.args) > 1: # has location

559

if len(inst.args) > 1: # has location

560

loc = inst.args[1]

560

loc = inst.args[1]

561

# Remove newlines -- spaces are equivalent whitespace.

561

# Remove newlines -- spaces are equivalent whitespace.

562

spec = spec.replace('\n', ' ')

562

spec = spec.replace('\n', ' ')

563

# We want the caret to point to the place in the template that

563

# We want the caret to point to the place in the template that

564

# failed to parse, but in a hint we get a open paren at the

564

# failed to parse, but in a hint we get a open paren at the

565

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

565

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

566

# to line up the caret with the location of the error.

566

# to line up the caret with the location of the error.

567

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

567

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

568

raise

568

raise

569

570

def _quote(s):

570

def _quote(s):

571

r"""Quote a value in order to make it safe for the revset engine.

571

r"""Quote a value in order to make it safe for the revset engine.

572

573

>>> _quote(b'asdf')

573

>>> _quote(b'asdf')

574

"'asdf'"

574

"'asdf'"

575

>>> _quote(b"asdf'\"")

575

>>> _quote(b"asdf'\"")

576

'\'asdf\\\'"\''

576

'\'asdf\\\'"\''

577

>>> _quote(b'asdf\'')

577

>>> _quote(b'asdf\'')

578

"'asdf\\''"

578

"'asdf\\''"

579

>>> _quote(1)

579

>>> _quote(1)

580

"'1'"

580

"'1'"

581

"""

581

"""

582

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

582

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

583

584

def _formatargtype(c, arg):

584

def _formatargtype(c, arg):

585

if c == 'd':

585

if c == 'd':

586

return 'rev(%d)' % int(arg)

586

return 'rev(%d)' % int(arg)

587

elif c == 's':

587

elif c == 's':

588

return _quote(arg)

588

return _quote(arg)

589

elif c == 'r':

589

elif c == 'r':

590

if not isinstance(arg, bytes):

590

if not isinstance(arg, bytes):

591

raise TypeError

591

raise TypeError

592

parse(arg) # make sure syntax errors are confined

592

parse(arg) # make sure syntax errors are confined

593

return '(%s)' % arg

593

return '(%s)' % arg

594

elif c == 'n':

594

elif c == 'n':

595

return _quote(node.hex(arg))

595

return _quote(node.hex(arg))

596

elif c == 'b':

596

elif c == 'b':

597

try:

597

try:

598

return _quote(arg.branch())

598

return _quote(arg.branch())

599

except AttributeError:

599

except AttributeError:

600

raise TypeError

600

raise TypeError

601

raise error.ParseError(_('unexpected revspec format character %s') % c)

601

raise error.ParseError(_('unexpected revspec format character %s') % c)

602

603

def _formatlistexp(s, t):

603

def _formatlistexp(s, t):

604

l = len(s)

604

l = len(s)

605

if l == 0:

605

if l == 0:

606

return "_list('')"

606

return "_list('')"

607

elif l == 1:

607

elif l == 1:

608

return _formatargtype(t, s[0])

608

return _formatargtype(t, s[0])

609

elif t == 'd':

609

elif t == 'd':

610

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

610

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

611

elif t == 's':

611

elif t == 's':

612

return "_list(%s)" % _quote("\0".join(s))

612

return "_list(%s)" % _quote("\0".join(s))

613

elif t == 'n':

613

elif t == 'n':

614

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

614

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

615

elif t == 'b':

615

elif t == 'b':

616

try:

616

try:

617

return "_list('%s')" % "\0".join(a.branch() for a in s)

617

return "_list('%s')" % "\0".join(a.branch() for a in s)

618

except AttributeError:

618

except AttributeError:

619

raise TypeError

619

raise TypeError

620

621

m = l // 2

621

m = l // 2

622

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

622

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

623

624

def _formatparamexp(args, t):

624

def _formatparamexp(args, t):

625

return ', '.join(_formatargtype(t, a) for a in args)

625

return ', '.join(_formatargtype(t, a) for a in args)

626

627

_formatlistfuncs = {

627

_formatlistfuncs = {

628

'l': _formatlistexp,

628

'l': _formatlistexp,

629

'p': _formatparamexp,

629

'p': _formatparamexp,

630

}

630

}

631

632

def formatspec(expr, *args):

632

def formatspec(expr, *args):

633

'''

633

'''

634

This is a convenience function for using revsets internally, and

634

This is a convenience function for using revsets internally, and

635

escapes arguments appropriately. Aliases are intentionally ignored

635

escapes arguments appropriately. Aliases are intentionally ignored

636

so that intended expression behavior isn't accidentally subverted.

636

so that intended expression behavior isn't accidentally subverted.

637

638

Supported arguments:

638

Supported arguments:

639

640

%r = revset expression, parenthesized

640

%r = revset expression, parenthesized

641

%d = rev(int(arg)), no quoting

641

%d = rev(int(arg)), no quoting

642

%s = string(arg), escaped and single-quoted

642

%s = string(arg), escaped and single-quoted

643

%b = arg.branch(), escaped and single-quoted

643

%b = arg.branch(), escaped and single-quoted

644

%n = hex(arg), single-quoted

644

%n = hex(arg), single-quoted

645

%% = a literal '%'

645

%% = a literal '%'

646

647

Prefixing the type with 'l' specifies a parenthesized list of that type,

647

Prefixing the type with 'l' specifies a parenthesized list of that type,

648

and 'p' specifies a list of function parameters of that type.

648

and 'p' specifies a list of function parameters of that type.

649

650

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

650

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

651

'(10 or 11):: and ((this()) or (that()))'

651

'(10 or 11):: and ((this()) or (that()))'

652

>>> formatspec(b'%d:: and not %d::', 10, 20)

652

>>> formatspec(b'%d:: and not %d::', 10, 20)

653

'rev(10):: and not rev(20)::'

653

'rev(10):: and not rev(20)::'

654

>>> formatspec(b'%ld or %ld', [], [1])

654

>>> formatspec(b'%ld or %ld', [], [1])

655

"_list('') or rev(1)"

655

"_list('') or rev(1)"

656

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

656

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

657

"keyword('foo\\\\xe9')"

657

"keyword('foo\\\\xe9')"

658

>>> b = lambda: b'default'

658

>>> b = lambda: b'default'

659

>>> b.branch = b

659

>>> b.branch = b

660

>>> formatspec(b'branch(%b)', b)

660

>>> formatspec(b'branch(%b)', b)

661

"branch('default')"

661

"branch('default')"

662

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

662

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

663

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

663

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

664

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

664

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

665

"sort((:), 'desc', 'user')"

665

"sort((:), 'desc', 'user')"

666

>>> formatspec(b'%ls', [b'a', b"'"])

666

>>> formatspec(b'%ls', [b'a', b"'"])

667

"_list('a\\\\x00\\\\'')"

667

"_list('a\\\\x00\\\\'')"

668

'''

668

'''

669

parsed = _parseargs(expr, args)

670

ret = []

671

for t, arg in parsed:

672

if t is None:

673

ret.append(arg)

674

else:

675

raise error.ProgrammingError("unknown revspec item type: %r" % t)

676

return b''.join(ret)

677

678

def _parseargs(expr, args):

679

"""parse the expression and replace all inexpensive args

680

681

return a list of tuple [(arg-type, arg-value)]

682

683

Arg-type can be:

684

* None: a string ready to be concatenated into a final spec

685

"""

669

expr = pycompat.bytestr(expr)

686

expr = pycompat.bytestr(expr)

670

argiter = iter(args)

687

argiter = iter(args)

671

ret = []

688

ret = []

672

pos = 0

689

pos = 0

673

while pos < len(expr):

690

while pos < len(expr):

674

q = expr.find('%', pos)

691

q = expr.find('%', pos)

675

if q < 0:

692

if q < 0:

676

ret.append(expr[pos:])

693

ret.append((None, expr[pos:]))

677

break

694

break

678

ret.append(expr[pos:q])

695

ret.append((None, expr[pos:q]))

679

pos = q + 1

696

pos = q + 1

680

try:

697

try:

681

d = expr[pos]

698

d = expr[pos]

682

except IndexError:

699

except IndexError:

683

raise error.ParseError(_('incomplete revspec format character'))

700

raise error.ParseError(_('incomplete revspec format character'))

684

if d == '%':

701

if d == '%':

685

ret.append(d)

702

ret.append((None, d))

686

pos += 1

703

pos += 1

687

continue

704

continue

688

705

689

try:

706

try:

690

arg = next(argiter)

707

arg = next(argiter)

691

except StopIteration:

708

except StopIteration:

692

raise error.ParseError(_('missing argument for revspec'))

709

raise error.ParseError(_('missing argument for revspec'))

693

f = _formatlistfuncs.get(d)

710

f = _formatlistfuncs.get(d)

694

if f:

711

if f:

695

# a list of some type

712

# a list of some type, might be expensive, do not replace

696

pos += 1

713

pos += 1

697

try:

714

try:

698

d = expr[pos]

715

d = expr[pos]

699

except IndexError:

716

except IndexError:

700

raise error.ParseError(_('incomplete revspec format character'))

717

raise error.ParseError(_('incomplete revspec format character'))

701

try:

718

try:

702

ret.append(f(list(arg), d))

719

ret.append((None, f(list(arg), d)))

703

except (TypeError, ValueError):

720

except (TypeError, ValueError):

704

raise error.ParseError(_('invalid argument for revspec'))

721

raise error.ParseError(_('invalid argument for revspec'))

705

else:

722

else:

723

# a single entry, not expensive, replace

706

try:

724

try:

707

ret.append(_formatargtype(d, arg))

725

ret.append((None, _formatargtype(d, arg)))

708

except (TypeError, ValueError):

726

except (TypeError, ValueError):

709

raise error.ParseError(_('invalid argument for revspec'))

727

raise error.ParseError(_('invalid argument for revspec'))

710

pos += 1

728

pos += 1

711

729

712

try:

730

try:

713

next(argiter)

731

next(argiter)

714

raise error.ParseError(_('too many revspec arguments specified'))

732

raise error.ParseError(_('too many revspec arguments specified'))

715

except StopIteration:

733

except StopIteration:

716

pass

734

pass

717

return ''.~~join~~(ret)

735

return ret

718

736

719

def prettyformat(tree):

737

def prettyformat(tree):

720

return parser.prettyformat(tree, ('string', 'symbol'))

738

return parser.prettyformat(tree, ('string', 'symbol'))

721

739

722

def depth(tree):

740

def depth(tree):

723

if isinstance(tree, tuple):

741

if isinstance(tree, tuple):

724

return max(map(depth, tree)) + 1

742

return max(map(depth, tree)) + 1

725

else:

743

else:

726

return 0

744

return 0

727

745

728

def funcsused(tree):

746

def funcsused(tree):

729

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

747

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

730

return set()

748

return set()

731

else:

749

else:

732

funcs = set()

750

funcs = set()

733

for s in tree[1:]:

751

for s in tree[1:]:

734

funcs |= funcsused(s)

752

funcs |= funcsused(s)

735

if tree[0] == 'func':

753

if tree[0] == 'func':

736

funcs.add(tree[1][1])

754

funcs.add(tree[1][1])

737

return funcs

755

return funcs

738

756

739

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

757

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

740

758

741

def _ishashlikesymbol(symbol):

759

def _ishashlikesymbol(symbol):

742

"""returns true if the symbol looks like a hash"""

760

"""returns true if the symbol looks like a hash"""

743

return _hashre.match(symbol)

761

return _hashre.match(symbol)

744

762

745

def gethashlikesymbols(tree):

763

def gethashlikesymbols(tree):

746

"""returns the list of symbols of the tree that look like hashes

764

"""returns the list of symbols of the tree that look like hashes

747

765

748

>>> gethashlikesymbols(parse(b'3::abe3ff'))

766

>>> gethashlikesymbols(parse(b'3::abe3ff'))

749

['3', 'abe3ff']

767

['3', 'abe3ff']

750

>>> gethashlikesymbols(parse(b'precursors(.)'))

768

>>> gethashlikesymbols(parse(b'precursors(.)'))

751

[]

769

[]

752

>>> gethashlikesymbols(parse(b'precursors(34)'))

770

>>> gethashlikesymbols(parse(b'precursors(34)'))

753

['34']

771

['34']

754

>>> gethashlikesymbols(parse(b'abe3ffZ'))

772

>>> gethashlikesymbols(parse(b'abe3ffZ'))

755

[]

773

[]

756

"""

774

"""

757

if not tree:

775

if not tree:

758

return []

776

return []

759

777

760

if tree[0] == "symbol":

778

if tree[0] == "symbol":

761

if _ishashlikesymbol(tree[1]):

779

if _ishashlikesymbol(tree[1]):

762

return [tree[1]]

780

return [tree[1]]

763

elif len(tree) >= 3:

781

elif len(tree) >= 3:

764

results = []

782

results = []

765

for subtree in tree[1:]:

783

for subtree in tree[1:]:

766

results += gethashlikesymbols(subtree)

784

results += gethashlikesymbols(subtree)

767

return results

785

return results

768

return []

786

return []

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             symbols = {}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize(b"@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 if not isinstance(program, bytes):
                     raise error.ProgrammingError('revset statement must be bytes, got %r'
                                                  % program)
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, s)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = stringutil.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
             def _match(patspec, tree):
                 """Test if a tree matches the given pattern statement; return the matches
                 >>> _match(b'f(_)', parse(b'f()'))
                 >>> _match(b'f(_)', parse(b'f(1)'))
                 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
                 >>> _match(b'f(_)', parse(b'f(1, 2)'))
                 """
                 pattern = _cachedtree(patspec)
                 return parser.matchtree(pattern, tree, ('symbol', '_'),
                                         {'keyvalue', 'list'})
             def _matchonly(revs, bases):
                 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'dagrangeall':
                         return _fixops(('dagrangepost', post))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangeall':
                     raise error.ParseError(_("can't use '::' in this context"))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x):
                 if x is None:
                     return 0, x
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return 0.5, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     w = min(wa, wb)
                     # (draft/secret/_notpublic() & ::x) have a fast path
                     m = _match('_() & ancestors(_)', ('and', ta, tb))
                     if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
                         return w, _build('_phaseandancestors(_, _)', m[1], m[2])
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
                     m = _match('not _', tb)
                     if m:
                         return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if _match('public()', x[1]):
                         o = _optimize(_build('_notpublic()'))
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1])
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return 1, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1])
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2])
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2])
                     w = getattr(symbols.get(f), '_weight', 1)
                     m = _match('commonancestors(_)', ta)
                     # Optimize heads(commonancestors(_)) because we have a fast version
                     if f == 'heads' and m:
                         return w + wa, _build('_commonancestorheads(_)', m[1])
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | {'$'}
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith(b'$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith(b'foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 if lookup and spec.startswith('revset(') and spec.endswith(')'):
                     lookup = None
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 try:
                     return _parsewith(spec, lookup=lookup)
                 except error.ParseError as inst:
                     if len(inst.args) > 1:  # has location
                         loc = inst.args[1]
                         # Remove newlines -- spaces are equivalent whitespace.
                         spec = spec.replace('\n', ' ')
                         # We want the caret to point to the place in the template that
                         # failed to parse, but in a hint we get a open paren at the
                         # start. Therefore, we print "loc + 1" spaces (instead of "loc")
                         # to line up the caret with the location of the error.
                         inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
                     raise
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote(b'asdf')
                 "'asdf'"
                 >>> _quote(b"asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote(b'asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
             def _formatargtype(c, arg):
                 if c == 'd':
                     return 'rev(%d)' % int(arg)
                 elif c == 's':
                     return _quote(arg)
                 elif c == 'r':
                     if not isinstance(arg, bytes):
                         raise TypeError
                     parse(arg) # make sure syntax errors are confined
                     return '(%s)' % arg
                 elif c == 'n':
                     return _quote(node.hex(arg))
                 elif c == 'b':
                     try:
                         return _quote(arg.branch())
                     except AttributeError:
                         raise TypeError
                 raise error.ParseError(_('unexpected revspec format character %s') % c)
             def _formatlistexp(s, t):
                 l = len(s)
                 if l == 0:
                     return "_list('')"
                 elif l == 1:
                     return _formatargtype(t, s[0])
                 elif t == 'd':
                     return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
                 elif t == 's':
                     return "_list(%s)" % _quote("\0".join(s))
                 elif t == 'n':
                     return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                 elif t == 'b':
                     try:
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     except AttributeError:
                         raise TypeError
                 m = l // 2
                 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
             def _formatparamexp(args, t):
                 return ', '.join(_formatargtype(t, a) for a in args)
             _formatlistfuncs = {
                 'l': _formatlistexp,
                 'p': _formatparamexp,
             }
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = rev(int(arg)), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type,
                 and 'p' specifies a list of function parameters of that type.
                 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec(b'%d:: and not %d::', 10, 20)
                 'rev(10):: and not rev(20)::'
                 >>> formatspec(b'%ld or %ld', [], [1])
                 "_list('') or rev(1)"
                 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: b'default'
                 >>> b.branch = b
                 >>> formatspec(b'branch(%b)', b)
                 "branch('default')"
                 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
                 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
                 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
                 "sort((:), 'desc', 'user')"
                 >>> formatspec(b'%ls', [b'a', b"'"])
                 "_list('a\\\\x00\\\\'')"
                 '''
+                parsed = _parseargs(expr, args)
+                ret = []
+                for t, arg in parsed:
+                    if t is None:
+                        ret.append(arg)
+                    else:
+                        raise error.ProgrammingError("unknown revspec item type: %r" % t)
+                return b''.join(ret)
+            def _parseargs(expr, args):
+                """parse the expression and replace all inexpensive args
+                return a list of tuple [(arg-type, arg-value)]
+                Arg-type can be:
+                * None: a string ready to be concatenated into a final spec
+                """
                 expr = pycompat.bytestr(expr)
                 argiter = iter(args)
                 ret = []
                 pos = 0
                 while pos < len(expr):
                     q = expr.find('%', pos)
                     if q < 0:
-                        ret.append(expr[pos:])
+                        ret.append((None, expr[pos:]))
                         break
-                    ret.append(expr[pos:q])
+                    ret.append((None, expr[pos:q]))
                     pos = q + 1
                     try:
                         d = expr[pos]
                     except IndexError:
                         raise error.ParseError(_('incomplete revspec format character'))
                     if d == '%':
-                        ret.append(d)
+                        ret.append((None, d))
                         pos += 1
                         continue
                     try:
                         arg = next(argiter)
                     except StopIteration:
                         raise error.ParseError(_('missing argument for revspec'))
                     f = _formatlistfuncs.get(d)
                     if f:
-                        # a list of some type
+                        # a list of some type, might be expensive, do not replace
                         pos += 1
                         try:
                             d = expr[pos]
                         except IndexError:
                             raise error.ParseError(_('incomplete revspec format character'))
                         try:
-                            ret.append(f(list(arg), d))
+                            ret.append((None, f(list(arg), d)))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     else:
+                        # a single entry, not expensive, replace
                         try:
-                            ret.append(_formatargtype(d, arg))
+                            ret.append((None, _formatargtype(d, arg)))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     pos += 1
                 try:
                     next(argiter)
                     raise error.ParseError(_('too many revspec arguments specified'))
                 except StopIteration:
                     pass
-                return ''.join(ret)
+                return ret
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs
             _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
             def _ishashlikesymbol(symbol):
                 """returns true if the symbol looks like a hash"""
                 return _hashre.match(symbol)
             def gethashlikesymbols(tree):
                 """returns the list of symbols of the tree that look like hashes
                 >>> gethashlikesymbols(parse(b'3::abe3ff'))
                 ['3', 'abe3ff']
                 >>> gethashlikesymbols(parse(b'precursors(.)'))
                 []
                 >>> gethashlikesymbols(parse(b'precursors(34)'))
                 ['34']
                 >>> gethashlikesymbols(parse(b'abe3ffZ'))
                 []
                 """
                 if not tree:
                     return []
                 if tree[0] == "symbol":
                     if _ishashlikesymbol(tree[1]):
                         return [tree[1]]
                 elif len(tree) >= 3:
                     results = []
                     for subtree in tree[1:]:
                         results += gethashlikesymbols(subtree)
                     return results
                 return []