upstream/mercurial-mirror Commit - r39360:b902b177

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

from .utils import (

20

from .utils import (

21

stringutil,

21

stringutil,

22

)

22

)

23

24

elements = {

24

elements = {

25

# token-type: binding-strength, primary, prefix, infix, suffix

25

# token-type: binding-strength, primary, prefix, infix, suffix

26

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

26

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

27

"[": (21, None, None, ("subscript", 1, "]"), None),

27

"[": (21, None, None, ("subscript", 1, "]"), None),

28

"#": (21, None, None, ("relation", 21), None),

28

"#": (21, None, None, ("relation", 21), None),

29

"##": (20, None, None, ("_concat", 20), None),

29

"##": (20, None, None, ("_concat", 20), None),

30

"~": (18, None, None, ("ancestor", 18), None),

30

"~": (18, None, None, ("ancestor", 18), None),

31

"^": (18, None, None, ("parent", 18), "parentpost"),

31

"^": (18, None, None, ("parent", 18), "parentpost"),

32

"-": (5, None, ("negate", 19), ("minus", 5), None),

32

"-": (5, None, ("negate", 19), ("minus", 5), None),

33

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

33

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

34

"dagrangepost"),

34

"dagrangepost"),

35

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

35

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

36

"dagrangepost"),

36

"dagrangepost"),

37

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

37

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

38

"not": (10, None, ("not", 10), None, None),

38

"not": (10, None, ("not", 10), None, None),

39

"!": (10, None, ("not", 10), None, None),

39

"!": (10, None, ("not", 10), None, None),

40

"and": (5, None, None, ("and", 5), None),

40

"and": (5, None, None, ("and", 5), None),

41

"&": (5, None, None, ("and", 5), None),

41

"&": (5, None, None, ("and", 5), None),

42

"%": (5, None, None, ("only", 5), "onlypost"),

42

"%": (5, None, None, ("only", 5), "onlypost"),

43

"or": (4, None, None, ("or", 4), None),

43

"or": (4, None, None, ("or", 4), None),

44

"|": (4, None, None, ("or", 4), None),

44

"|": (4, None, None, ("or", 4), None),

45

"+": (4, None, None, ("or", 4), None),

45

"+": (4, None, None, ("or", 4), None),

46

"=": (3, None, None, ("keyvalue", 3), None),

46

"=": (3, None, None, ("keyvalue", 3), None),

47

",": (2, None, None, ("list", 2), None),

47

",": (2, None, None, ("list", 2), None),

48

")": (0, None, None, None, None),

48

")": (0, None, None, None, None),

49

"]": (0, None, None, None, None),

49

"]": (0, None, None, None, None),

50

"symbol": (0, "symbol", None, None, None),

50

"symbol": (0, "symbol", None, None, None),

51

"string": (0, "string", None, None, None),

51

"string": (0, "string", None, None, None),

52

"end": (0, None, None, None, None),

52

"end": (0, None, None, None, None),

53

}

53

}

54

55

keywords = {'and', 'or', 'not'}

55

keywords = {'and', 'or', 'not'}

56

57

symbols = {}

57

symbols = {}

58

59

_quoteletters = {'"', "'"}

59

_quoteletters = {'"', "'"}

60

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

60

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

61

62

# default set of valid characters for the initial letter of symbols

62

# default set of valid characters for the initial letter of symbols

63

_syminitletters = set(pycompat.iterbytestr(

63

_syminitletters = set(pycompat.iterbytestr(

64

string.ascii_letters.encode('ascii') +

64

string.ascii_letters.encode('ascii') +

65

string.digits.encode('ascii') +

65

string.digits.encode('ascii') +

66

'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))

66

'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))

67

68

# default set of valid characters for non-initial letters of symbols

68

# default set of valid characters for non-initial letters of symbols

69

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

69

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

70

71

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

71

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

72

'''

72

'''

73

Parse a revset statement into a stream of tokens

73

Parse a revset statement into a stream of tokens

74

75

``syminitletters`` is the set of valid characters for the initial

75

``syminitletters`` is the set of valid characters for the initial

76

letter of symbols.

76

letter of symbols.

77

78

By default, character ``c`` is recognized as valid for initial

78

By default, character ``c`` is recognized as valid for initial

79

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

79

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

80

81

``symletters`` is the set of valid characters for non-initial

81

``symletters`` is the set of valid characters for non-initial

82

letters of symbols.

82

letters of symbols.

83

84

By default, character ``c`` is recognized as valid for non-initial

84

By default, character ``c`` is recognized as valid for non-initial

85

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

85

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

86

87

Check that @ is a valid unquoted token character (issue3686):

87

Check that @ is a valid unquoted token character (issue3686):

88

>>> list(tokenize(b"@::"))

88

>>> list(tokenize(b"@::"))

89

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

89

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

90

91

'''

91

'''

92

if not isinstance(program, bytes):

92

if not isinstance(program, bytes):

93

raise error.ProgrammingError('revset statement must be bytes, got %r'

93

raise error.ProgrammingError('revset statement must be bytes, got %r'

94

% program)

94

% program)

95

program = pycompat.bytestr(program)

95

program = pycompat.bytestr(program)

96

if syminitletters is None:

96

if syminitletters is None:

97

syminitletters = _syminitletters

97

syminitletters = _syminitletters

98

if symletters is None:

98

if symletters is None:

99

symletters = _symletters

99

symletters = _symletters

100

101

if program and lookup:

101

if program and lookup:

102

# attempt to parse old-style ranges first to deal with

102

# attempt to parse old-style ranges first to deal with

103

# things like old-tag which contain query metacharacters

103

# things like old-tag which contain query metacharacters

104

parts = program.split(':', 1)

104

parts = program.split(':', 1)

105

if all(lookup(sym) for sym in parts if sym):

105

if all(lookup(sym) for sym in parts if sym):

106

if parts[0]:

106

if parts[0]:

107

yield ('symbol', parts[0], 0)

107

yield ('symbol', parts[0], 0)

108

if len(parts) > 1:

108

if len(parts) > 1:

109

s = len(parts[0])

109

s = len(parts[0])

110

yield (':', None, s)

110

yield (':', None, s)

111

if parts[1]:

111

if parts[1]:

112

yield ('symbol', parts[1], s + 1)

112

yield ('symbol', parts[1], s + 1)

113

yield ('end', None, len(program))

113

yield ('end', None, len(program))

114

return

114

return

115

116

pos, l = 0, len(program)

116

pos, l = 0, len(program)

117

while pos < l:

117

while pos < l:

118

c = program[pos]

118

c = program[pos]

119

if c.isspace(): # skip inter-token whitespace

119

if c.isspace(): # skip inter-token whitespace

120

pass

120

pass

121

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

121

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

122

yield ('::', None, pos)

122

yield ('::', None, pos)

123

pos += 1 # skip ahead

123

pos += 1 # skip ahead

124

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

124

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

125

yield ('..', None, pos)

125

yield ('..', None, pos)

126

pos += 1 # skip ahead

126

pos += 1 # skip ahead

127

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

127

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

128

yield ('##', None, pos)

128

yield ('##', None, pos)

129

pos += 1 # skip ahead

129

pos += 1 # skip ahead

130

elif c in _simpleopletters: # handle simple operators

130

elif c in _simpleopletters: # handle simple operators

131

yield (c, None, pos)

131

yield (c, None, pos)

132

elif (c in _quoteletters or c == 'r' and

132

elif (c in _quoteletters or c == 'r' and

133

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

133

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

134

if c == 'r':

134

if c == 'r':

135

pos += 1

135

pos += 1

136

c = program[pos]

136

c = program[pos]

137

decode = lambda x: x

137

decode = lambda x: x

138

else:

138

else:

139

decode = parser.unescapestr

139

decode = parser.unescapestr

140

pos += 1

140

pos += 1

141

s = pos

141

s = pos

142

while pos < l: # find closing quote

142

while pos < l: # find closing quote

143

d = program[pos]

143

d = program[pos]

144

if d == '\\': # skip over escaped characters

144

if d == '\\': # skip over escaped characters

145

pos += 2

145

pos += 2

146

continue

146

continue

147

if d == c:

147

if d == c:

148

yield ('string', decode(program[s:pos]), s)

148

yield ('string', decode(program[s:pos]), s)

149

break

149

break

150

pos += 1

150

pos += 1

151

else:

151

else:

152

raise error.ParseError(_("unterminated string"), s)

152

raise error.ParseError(_("unterminated string"), s)

153

# gather up a symbol/keyword

153

# gather up a symbol/keyword

154

elif c in syminitletters:

154

elif c in syminitletters:

155

s = pos

155

s = pos

156

pos += 1

156

pos += 1

157

while pos < l: # find end of symbol

157

while pos < l: # find end of symbol

158

d = program[pos]

158

d = program[pos]

159

if d not in symletters:

159

if d not in symletters:

160

break

160

break

161

if d == '.' and program[pos - 1] == '.': # special case for ..

161

if d == '.' and program[pos - 1] == '.': # special case for ..

162

pos -= 1

162

pos -= 1

163

break

163

break

164

pos += 1

164

pos += 1

165

sym = program[s:pos]

165

sym = program[s:pos]

166

if sym in keywords: # operator keywords

166

if sym in keywords: # operator keywords

167

yield (sym, None, s)

167

yield (sym, None, s)

168

elif '-' in sym:

168

elif '-' in sym:

169

# some jerk gave us foo-bar-baz, try to check if it's a symbol

169

# some jerk gave us foo-bar-baz, try to check if it's a symbol

170

if lookup and lookup(sym):

170

if lookup and lookup(sym):

171

# looks like a real symbol

171

# looks like a real symbol

172

yield ('symbol', sym, s)

172

yield ('symbol', sym, s)

173

else:

173

else:

174

# looks like an expression

174

# looks like an expression

175

parts = sym.split('-')

175

parts = sym.split('-')

176

for p in parts[:-1]:

176

for p in parts[:-1]:

177

if p: # possible consecutive -

177

if p: # possible consecutive -

178

yield ('symbol', p, s)

178

yield ('symbol', p, s)

179

s += len(p)

179

s += len(p)

180

yield ('-', None, pos)

180

yield ('-', None, s)

181

s += 1

181

s += 1

182

if parts[-1]: # possible trailing -

182

if parts[-1]: # possible trailing -

183

yield ('symbol', parts[-1], s)

183

yield ('symbol', parts[-1], s)

184

else:

184

else:

185

yield ('symbol', sym, s)

185

yield ('symbol', sym, s)

186

pos -= 1

186

pos -= 1

187

else:

187

else:

188

raise error.ParseError(_("syntax error in revset '%s'") %

188

raise error.ParseError(_("syntax error in revset '%s'") %

189

program, pos)

189

program, pos)

190

pos += 1

190

pos += 1

191

yield ('end', None, pos)

191

yield ('end', None, pos)

192

193

# helpers

193

# helpers

194

195

_notset = object()

195

_notset = object()

196

197

def getsymbol(x):

197

def getsymbol(x):

198

if x and x[0] == 'symbol':

198

if x and x[0] == 'symbol':

199

return x[1]

199

return x[1]

200

raise error.ParseError(_('not a symbol'))

200

raise error.ParseError(_('not a symbol'))

201

202

def getstring(x, err):

202

def getstring(x, err):

203

if x and (x[0] == 'string' or x[0] == 'symbol'):

203

if x and (x[0] == 'string' or x[0] == 'symbol'):

204

return x[1]

204

return x[1]

205

raise error.ParseError(err)

205

raise error.ParseError(err)

206

207

def getinteger(x, err, default=_notset):

207

def getinteger(x, err, default=_notset):

208

if not x and default is not _notset:

208

if not x and default is not _notset:

209

return default

209

return default

210

try:

210

try:

211

return int(getstring(x, err))

211

return int(getstring(x, err))

212

except ValueError:

212

except ValueError:

213

raise error.ParseError(err)

213

raise error.ParseError(err)

214

215

def getboolean(x, err):

215

def getboolean(x, err):

216

value = stringutil.parsebool(getsymbol(x))

216

value = stringutil.parsebool(getsymbol(x))

217

if value is not None:

217

if value is not None:

218

return value

218

return value

219

raise error.ParseError(err)

219

raise error.ParseError(err)

220

221

def getlist(x):

221

def getlist(x):

222

if not x:

222

if not x:

223

return []

223

return []

224

if x[0] == 'list':

224

if x[0] == 'list':

225

return list(x[1:])

225

return list(x[1:])

226

return [x]

226

return [x]

227

228

def getrange(x, err):

228

def getrange(x, err):

229

if not x:

229

if not x:

230

raise error.ParseError(err)

230

raise error.ParseError(err)

231

op = x[0]

231

op = x[0]

232

if op == 'range':

232

if op == 'range':

233

return x[1], x[2]

233

return x[1], x[2]

234

elif op == 'rangepre':

234

elif op == 'rangepre':

235

return None, x[1]

235

return None, x[1]

236

elif op == 'rangepost':

236

elif op == 'rangepost':

237

return x[1], None

237

return x[1], None

238

elif op == 'rangeall':

238

elif op == 'rangeall':

239

return None, None

239

return None, None

240

raise error.ParseError(err)

240

raise error.ParseError(err)

241

242

def getargs(x, min, max, err):

242

def getargs(x, min, max, err):

243

l = getlist(x)

243

l = getlist(x)

244

if len(l) < min or (max >= 0 and len(l) > max):

244

if len(l) < min or (max >= 0 and len(l) > max):

245

raise error.ParseError(err)

245

raise error.ParseError(err)

246

return l

246

return l

247

248

def getargsdict(x, funcname, keys):

248

def getargsdict(x, funcname, keys):

249

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

249

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

250

keyvaluenode='keyvalue', keynode='symbol')

250

keyvaluenode='keyvalue', keynode='symbol')

251

252

# cache of {spec: raw parsed tree} built internally

252

# cache of {spec: raw parsed tree} built internally

253

_treecache = {}

253

_treecache = {}

254

255

def _cachedtree(spec):

255

def _cachedtree(spec):

256

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

256

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

257

tree = _treecache.get(spec)

257

tree = _treecache.get(spec)

258

if tree is None:

258

if tree is None:

259

_treecache[spec] = tree = parse(spec)

259

_treecache[spec] = tree = parse(spec)

260

return tree

260

return tree

261

262

def _build(tmplspec, *repls):

262

def _build(tmplspec, *repls):

263

"""Create raw parsed tree from a template revset statement

263

"""Create raw parsed tree from a template revset statement

264

265

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

265

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

266

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

266

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

267

"""

267

"""

268

template = _cachedtree(tmplspec)

268

template = _cachedtree(tmplspec)

269

return parser.buildtree(template, ('symbol', '_'), *repls)

269

return parser.buildtree(template, ('symbol', '_'), *repls)

270

271

def _match(patspec, tree):

271

def _match(patspec, tree):

272

"""Test if a tree matches the given pattern statement; return the matches

272

"""Test if a tree matches the given pattern statement; return the matches

273

274

>>> _match(b'f(_)', parse(b'f()'))

274

>>> _match(b'f(_)', parse(b'f()'))

275

>>> _match(b'f(_)', parse(b'f(1)'))

275

>>> _match(b'f(_)', parse(b'f(1)'))

276

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

276

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

277

>>> _match(b'f(_)', parse(b'f(1, 2)'))

277

>>> _match(b'f(_)', parse(b'f(1, 2)'))

278

"""

278

"""

279

pattern = _cachedtree(patspec)

279

pattern = _cachedtree(patspec)

280

return parser.matchtree(pattern, tree, ('symbol', '_'),

280

return parser.matchtree(pattern, tree, ('symbol', '_'),

281

{'keyvalue', 'list'})

281

{'keyvalue', 'list'})

282

283

def _matchonly(revs, bases):

283

def _matchonly(revs, bases):

284

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

284

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

285

286

def _fixops(x):

286

def _fixops(x):

287

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

287

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

288

handled well by our simple top-down parser"""

288

handled well by our simple top-down parser"""

289

if not isinstance(x, tuple):

289

if not isinstance(x, tuple):

290

return x

290

return x

291

292

op = x[0]

292

op = x[0]

293

if op == 'parent':

293

if op == 'parent':

294

# x^:y means (x^) : y, not x ^ (:y)

294

# x^:y means (x^) : y, not x ^ (:y)

295

# x^: means (x^) :, not x ^ (:)

295

# x^: means (x^) :, not x ^ (:)

296

post = ('parentpost', x[1])

296

post = ('parentpost', x[1])

297

if x[2][0] == 'dagrangepre':

297

if x[2][0] == 'dagrangepre':

298

return _fixops(('dagrange', post, x[2][1]))

298

return _fixops(('dagrange', post, x[2][1]))

299

elif x[2][0] == 'dagrangeall':

299

elif x[2][0] == 'dagrangeall':

300

return _fixops(('dagrangepost', post))

300

return _fixops(('dagrangepost', post))

301

elif x[2][0] == 'rangepre':

301

elif x[2][0] == 'rangepre':

302

return _fixops(('range', post, x[2][1]))

302

return _fixops(('range', post, x[2][1]))

303

elif x[2][0] == 'rangeall':

303

elif x[2][0] == 'rangeall':

304

return _fixops(('rangepost', post))

304

return _fixops(('rangepost', post))

305

elif op == 'or':

305

elif op == 'or':

306

# make number of arguments deterministic:

306

# make number of arguments deterministic:

307

# x + y + z -> (or x y z) -> (or (list x y z))

307

# x + y + z -> (or x y z) -> (or (list x y z))

308

return (op, _fixops(('list',) + x[1:]))

308

return (op, _fixops(('list',) + x[1:]))

309

elif op == 'subscript' and x[1][0] == 'relation':

309

elif op == 'subscript' and x[1][0] == 'relation':

310

# x#y[z] ternary

310

# x#y[z] ternary

311

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

311

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

312

313

return (op,) + tuple(_fixops(y) for y in x[1:])

313

return (op,) + tuple(_fixops(y) for y in x[1:])

314

315

def _analyze(x):

315

def _analyze(x):

316

if x is None:

316

if x is None:

317

return x

317

return x

318

319

op = x[0]

319

op = x[0]

320

if op == 'minus':

320

if op == 'minus':

321

return _analyze(_build('_ and not _', *x[1:]))

321

return _analyze(_build('_ and not _', *x[1:]))

322

elif op == 'only':

322

elif op == 'only':

323

return _analyze(_build('only(_, _)', *x[1:]))

323

return _analyze(_build('only(_, _)', *x[1:]))

324

elif op == 'onlypost':

324

elif op == 'onlypost':

325

return _analyze(_build('only(_)', x[1]))

325

return _analyze(_build('only(_)', x[1]))

326

elif op == 'dagrangeall':

326

elif op == 'dagrangeall':

327

raise error.ParseError(_("can't use '::' in this context"))

327

raise error.ParseError(_("can't use '::' in this context"))

328

elif op == 'dagrangepre':

328

elif op == 'dagrangepre':

329

return _analyze(_build('ancestors(_)', x[1]))

329

return _analyze(_build('ancestors(_)', x[1]))

330

elif op == 'dagrangepost':

330

elif op == 'dagrangepost':

331

return _analyze(_build('descendants(_)', x[1]))

331

return _analyze(_build('descendants(_)', x[1]))

332

elif op == 'negate':

332

elif op == 'negate':

333

s = getstring(x[1], _("can't negate that"))

333

s = getstring(x[1], _("can't negate that"))

334

return _analyze(('string', '-' + s))

334

return _analyze(('string', '-' + s))

335

elif op in ('string', 'symbol'):

335

elif op in ('string', 'symbol'):

336

return x

336

return x

337

elif op == 'rangeall':

337

elif op == 'rangeall':

338

return (op, None)

338

return (op, None)

339

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

339

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

340

return (op, _analyze(x[1]))

340

return (op, _analyze(x[1]))

341

elif op == 'group':

341

elif op == 'group':

342

return _analyze(x[1])

342

return _analyze(x[1])

343

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

343

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

344

'subscript'}:

344

'subscript'}:

345

ta = _analyze(x[1])

345

ta = _analyze(x[1])

346

tb = _analyze(x[2])

346

tb = _analyze(x[2])

347

return (op, ta, tb)

347

return (op, ta, tb)

348

elif op == 'relsubscript':

348

elif op == 'relsubscript':

349

ta = _analyze(x[1])

349

ta = _analyze(x[1])

350

tb = _analyze(x[2])

350

tb = _analyze(x[2])

351

tc = _analyze(x[3])

351

tc = _analyze(x[3])

352

return (op, ta, tb, tc)

352

return (op, ta, tb, tc)

353

elif op == 'list':

353

elif op == 'list':

354

return (op,) + tuple(_analyze(y) for y in x[1:])

354

return (op,) + tuple(_analyze(y) for y in x[1:])

355

elif op == 'keyvalue':

355

elif op == 'keyvalue':

356

return (op, x[1], _analyze(x[2]))

356

return (op, x[1], _analyze(x[2]))

357

elif op == 'func':

357

elif op == 'func':

358

f = getsymbol(x[1])

358

f = getsymbol(x[1])

359

if f == 'revset':

359

if f == 'revset':

360

return _analyze(x[2])

360

return _analyze(x[2])

361

return (op, x[1], _analyze(x[2]))

361

return (op, x[1], _analyze(x[2]))

362

raise ValueError('invalid operator %r' % op)

362

raise ValueError('invalid operator %r' % op)

363

364

def analyze(x):

364

def analyze(x):

365

"""Transform raw parsed tree to evaluatable tree which can be fed to

365

"""Transform raw parsed tree to evaluatable tree which can be fed to

366

optimize() or getset()

366

optimize() or getset()

367

368

All pseudo operations should be mapped to real operations or functions

368

All pseudo operations should be mapped to real operations or functions

369

defined in methods or symbols table respectively.

369

defined in methods or symbols table respectively.

370

"""

370

"""

371

return _analyze(x)

371

return _analyze(x)

372

373

def _optimize(x):

373

def _optimize(x):

374

if x is None:

374

if x is None:

375

return 0, x

375

return 0, x

376

377

op = x[0]

377

op = x[0]

378

if op in ('string', 'symbol'):

378

if op in ('string', 'symbol'):

379

return 0.5, x # single revisions are small

379

return 0.5, x # single revisions are small

380

elif op == 'and':

380

elif op == 'and':

381

wa, ta = _optimize(x[1])

381

wa, ta = _optimize(x[1])

382

wb, tb = _optimize(x[2])

382

wb, tb = _optimize(x[2])

383

w = min(wa, wb)

383

w = min(wa, wb)

384

385

# (draft/secret/_notpublic() & ::x) have a fast path

385

# (draft/secret/_notpublic() & ::x) have a fast path

386

m = _match('_() & ancestors(_)', ('and', ta, tb))

386

m = _match('_() & ancestors(_)', ('and', ta, tb))

387

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

387

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

388

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

388

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

389

390

# (::x and not ::y)/(not ::y and ::x) have a fast path

390

# (::x and not ::y)/(not ::y and ::x) have a fast path

391

m = _matchonly(ta, tb) or _matchonly(tb, ta)

391

m = _matchonly(ta, tb) or _matchonly(tb, ta)

392

if m:

392

if m:

393

return w, _build('only(_, _)', *m[1:])

393

return w, _build('only(_, _)', *m[1:])

394

395

m = _match('not _', tb)

395

m = _match('not _', tb)

396

if m:

396

if m:

397

return wa, ('difference', ta, m[1])

397

return wa, ('difference', ta, m[1])

398

if wa > wb:

398

if wa > wb:

399

op = 'andsmally'

399

op = 'andsmally'

400

return w, (op, ta, tb)

400

return w, (op, ta, tb)

401

elif op == 'or':

401

elif op == 'or':

402

# fast path for machine-generated expression, that is likely to have

402

# fast path for machine-generated expression, that is likely to have

403

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

403

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

404

ws, ts, ss = [], [], []

404

ws, ts, ss = [], [], []

405

def flushss():

405

def flushss():

406

if not ss:

406

if not ss:

407

return

407

return

408

if len(ss) == 1:

408

if len(ss) == 1:

409

w, t = ss[0]

409

w, t = ss[0]

410

else:

410

else:

411

s = '\0'.join(t[1] for w, t in ss)

411

s = '\0'.join(t[1] for w, t in ss)

412

y = _build('_list(_)', ('string', s))

412

y = _build('_list(_)', ('string', s))

413

w, t = _optimize(y)

413

w, t = _optimize(y)

414

ws.append(w)

414

ws.append(w)

415

ts.append(t)

415

ts.append(t)

416

del ss[:]

416

del ss[:]

417

for y in getlist(x[1]):

417

for y in getlist(x[1]):

418

w, t = _optimize(y)

418

w, t = _optimize(y)

419

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

419

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

420

ss.append((w, t))

420

ss.append((w, t))

421

continue

421

continue

422

flushss()

422

flushss()

423

ws.append(w)

423

ws.append(w)

424

ts.append(t)

424

ts.append(t)

425

flushss()

425

flushss()

426

if len(ts) == 1:

426

if len(ts) == 1:

427

return ws[0], ts[0] # 'or' operation is fully optimized out

427

return ws[0], ts[0] # 'or' operation is fully optimized out

428

return max(ws), (op, ('list',) + tuple(ts))

428

return max(ws), (op, ('list',) + tuple(ts))

429

elif op == 'not':

429

elif op == 'not':

430

# Optimize not public() to _notpublic() because we have a fast version

430

# Optimize not public() to _notpublic() because we have a fast version

431

if _match('public()', x[1]):

431

if _match('public()', x[1]):

432

o = _optimize(_build('_notpublic()'))

432

o = _optimize(_build('_notpublic()'))

433

return o[0], o[1]

433

return o[0], o[1]

434

else:

434

else:

435

o = _optimize(x[1])

435

o = _optimize(x[1])

436

return o[0], (op, o[1])

436

return o[0], (op, o[1])

437

elif op == 'rangeall':

437

elif op == 'rangeall':

438

return 1, x

438

return 1, x

439

elif op in ('rangepre', 'rangepost', 'parentpost'):

439

elif op in ('rangepre', 'rangepost', 'parentpost'):

440

o = _optimize(x[1])

440

o = _optimize(x[1])

441

return o[0], (op, o[1])

441

return o[0], (op, o[1])

442

elif op in ('dagrange', 'range'):

442

elif op in ('dagrange', 'range'):

443

wa, ta = _optimize(x[1])

443

wa, ta = _optimize(x[1])

444

wb, tb = _optimize(x[2])

444

wb, tb = _optimize(x[2])

445

return wa + wb, (op, ta, tb)

445

return wa + wb, (op, ta, tb)

446

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

446

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

447

w, t = _optimize(x[1])

447

w, t = _optimize(x[1])

448

return w, (op, t, x[2])

448

return w, (op, t, x[2])

449

elif op == 'relsubscript':

449

elif op == 'relsubscript':

450

w, t = _optimize(x[1])

450

w, t = _optimize(x[1])

451

return w, (op, t, x[2], x[3])

451

return w, (op, t, x[2], x[3])

452

elif op == 'list':

452

elif op == 'list':

453

ws, ts = zip(*(_optimize(y) for y in x[1:]))

453

ws, ts = zip(*(_optimize(y) for y in x[1:]))

454

return sum(ws), (op,) + ts

454

return sum(ws), (op,) + ts

455

elif op == 'keyvalue':

455

elif op == 'keyvalue':

456

w, t = _optimize(x[2])

456

w, t = _optimize(x[2])

457

return w, (op, x[1], t)

457

return w, (op, x[1], t)

458

elif op == 'func':

458

elif op == 'func':

459

f = getsymbol(x[1])

459

f = getsymbol(x[1])

460

wa, ta = _optimize(x[2])

460

wa, ta = _optimize(x[2])

461

w = getattr(symbols.get(f), '_weight', 1)

461

w = getattr(symbols.get(f), '_weight', 1)

462

m = _match('commonancestors(_)', ta)

462

m = _match('commonancestors(_)', ta)

463

464

# Optimize heads(commonancestors(_)) because we have a fast version

464

# Optimize heads(commonancestors(_)) because we have a fast version

465

if f == 'heads' and m:

465

if f == 'heads' and m:

466

return w + wa, _build('_commonancestorheads(_)', m[1])

466

return w + wa, _build('_commonancestorheads(_)', m[1])

467

468

return w + wa, (op, x[1], ta)

468

return w + wa, (op, x[1], ta)

469

raise ValueError('invalid operator %r' % op)

469

raise ValueError('invalid operator %r' % op)

470

471

def optimize(tree):

471

def optimize(tree):

472

"""Optimize evaluatable tree

472

"""Optimize evaluatable tree

473

474

All pseudo operations should be transformed beforehand.

474

All pseudo operations should be transformed beforehand.

475

"""

475

"""

476

_weight, newtree = _optimize(tree)

476

_weight, newtree = _optimize(tree)

477

return newtree

477

return newtree

478

479

# the set of valid characters for the initial letter of symbols in

479

# the set of valid characters for the initial letter of symbols in

480

# alias declarations and definitions

480

# alias declarations and definitions

481

_aliassyminitletters = _syminitletters | {'$'}

481

_aliassyminitletters = _syminitletters | {'$'}

482

483

def _parsewith(spec, lookup=None, syminitletters=None):

483

def _parsewith(spec, lookup=None, syminitletters=None):

484

"""Generate a parse tree of given spec with given tokenizing options

484

"""Generate a parse tree of given spec with given tokenizing options

485

486

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

486

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

487

('func', ('symbol', 'foo'), ('symbol', '$1'))

487

('func', ('symbol', 'foo'), ('symbol', '$1'))

488

>>> _parsewith(b'$1')

488

>>> _parsewith(b'$1')

489

Traceback (most recent call last):

489

Traceback (most recent call last):

490

...

490

...

491

ParseError: ("syntax error in revset '$1'", 0)

491

ParseError: ("syntax error in revset '$1'", 0)

492

>>> _parsewith(b'foo bar')

492

>>> _parsewith(b'foo bar')

493

Traceback (most recent call last):

493

Traceback (most recent call last):

494

...

494

...

495

ParseError: ('invalid token', 4)

495

ParseError: ('invalid token', 4)

496

"""

496

"""

497

if lookup and spec.startswith('revset(') and spec.endswith(')'):

497

if lookup and spec.startswith('revset(') and spec.endswith(')'):

498

lookup = None

498

lookup = None

499

p = parser.parser(elements)

499

p = parser.parser(elements)

500

tree, pos = p.parse(tokenize(spec, lookup=lookup,

500

tree, pos = p.parse(tokenize(spec, lookup=lookup,

501

syminitletters=syminitletters))

501

syminitletters=syminitletters))

502

if pos != len(spec):

502

if pos != len(spec):

503

raise error.ParseError(_('invalid token'), pos)

503

raise error.ParseError(_('invalid token'), pos)

504

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

504

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

505

506

class _aliasrules(parser.basealiasrules):

506

class _aliasrules(parser.basealiasrules):

507

"""Parsing and expansion rule set of revset aliases"""

507

"""Parsing and expansion rule set of revset aliases"""

508

_section = _('revset alias')

508

_section = _('revset alias')

509

510

@staticmethod

510

@staticmethod

511

def _parse(spec):

511

def _parse(spec):

512

"""Parse alias declaration/definition ``spec``

512

"""Parse alias declaration/definition ``spec``

513

514

This allows symbol names to use also ``$`` as an initial letter

514

This allows symbol names to use also ``$`` as an initial letter

515

(for backward compatibility), and callers of this function should

515

(for backward compatibility), and callers of this function should

516

examine whether ``$`` is used also for unexpected symbols or not.

516

examine whether ``$`` is used also for unexpected symbols or not.

517

"""

517

"""

518

return _parsewith(spec, syminitletters=_aliassyminitletters)

518

return _parsewith(spec, syminitletters=_aliassyminitletters)

519

520

@staticmethod

520

@staticmethod

521

def _trygetfunc(tree):

521

def _trygetfunc(tree):

522

if tree[0] == 'func' and tree[1][0] == 'symbol':

522

if tree[0] == 'func' and tree[1][0] == 'symbol':

523

return tree[1][1], getlist(tree[2])

523

return tree[1][1], getlist(tree[2])

524

525

def expandaliases(tree, aliases, warn=None):

525

def expandaliases(tree, aliases, warn=None):

526

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

526

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

527

aliases = _aliasrules.buildmap(aliases)

527

aliases = _aliasrules.buildmap(aliases)

528

tree = _aliasrules.expand(aliases, tree)

528

tree = _aliasrules.expand(aliases, tree)

529

# warn about problematic (but not referred) aliases

529

# warn about problematic (but not referred) aliases

530

if warn is not None:

530

if warn is not None:

531

for name, alias in sorted(aliases.iteritems()):

531

for name, alias in sorted(aliases.iteritems()):

532

if alias.error and not alias.warned:

532

if alias.error and not alias.warned:

533

warn(_('warning: %s\n') % (alias.error))

533

warn(_('warning: %s\n') % (alias.error))

534

alias.warned = True

534

alias.warned = True

535

return tree

535

return tree

536

537

def foldconcat(tree):

537

def foldconcat(tree):

538

"""Fold elements to be concatenated by `##`

538

"""Fold elements to be concatenated by `##`

539

"""

539

"""

540

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

540

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

541

return tree

541

return tree

542

if tree[0] == '_concat':

542

if tree[0] == '_concat':

543

pending = [tree]

543

pending = [tree]

544

l = []

544

l = []

545

while pending:

545

while pending:

546

e = pending.pop()

546

e = pending.pop()

547

if e[0] == '_concat':

547

if e[0] == '_concat':

548

pending.extend(reversed(e[1:]))

548

pending.extend(reversed(e[1:]))

549

elif e[0] in ('string', 'symbol'):

549

elif e[0] in ('string', 'symbol'):

550

l.append(e[1])

550

l.append(e[1])

551

else:

551

else:

552

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

552

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

553

raise error.ParseError(msg)

553

raise error.ParseError(msg)

554

return ('string', ''.join(l))

554

return ('string', ''.join(l))

555

else:

555

else:

556

return tuple(foldconcat(t) for t in tree)

556

return tuple(foldconcat(t) for t in tree)

557

558

def parse(spec, lookup=None):

558

def parse(spec, lookup=None):

559

try:

559

try:

560

return _parsewith(spec, lookup=lookup)

560

return _parsewith(spec, lookup=lookup)

561

except error.ParseError as inst:

561

except error.ParseError as inst:

562

if len(inst.args) > 1: # has location

562

if len(inst.args) > 1: # has location

563

loc = inst.args[1]

563

loc = inst.args[1]

564

# Remove newlines -- spaces are equivalent whitespace.

564

# Remove newlines -- spaces are equivalent whitespace.

565

spec = spec.replace('\n', ' ')

565

spec = spec.replace('\n', ' ')

566

# We want the caret to point to the place in the template that

566

# We want the caret to point to the place in the template that

567

# failed to parse, but in a hint we get a open paren at the

567

# failed to parse, but in a hint we get a open paren at the

568

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

568

# start. Therefore, we print "loc + 1" spaces (instead of "loc")

569

# to line up the caret with the location of the error.

569

# to line up the caret with the location of the error.

570

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

570

inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')

571

raise

571

raise

572

573

def _quote(s):

573

def _quote(s):

574

r"""Quote a value in order to make it safe for the revset engine.

574

r"""Quote a value in order to make it safe for the revset engine.

575

576

>>> _quote(b'asdf')

576

>>> _quote(b'asdf')

577

"'asdf'"

577

"'asdf'"

578

>>> _quote(b"asdf'\"")

578

>>> _quote(b"asdf'\"")

579

'\'asdf\\\'"\''

579

'\'asdf\\\'"\''

580

>>> _quote(b'asdf\'')

580

>>> _quote(b'asdf\'')

581

"'asdf\\''"

581

"'asdf\\''"

582

>>> _quote(1)

582

>>> _quote(1)

583

"'1'"

583

"'1'"

584

"""

584

"""

585

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

585

return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))

586

587

def _formatargtype(c, arg):

587

def _formatargtype(c, arg):

588

if c == 'd':

588

if c == 'd':

589

return '%d' % int(arg)

589

return '%d' % int(arg)

590

elif c == 's':

590

elif c == 's':

591

return _quote(arg)

591

return _quote(arg)

592

elif c == 'r':

592

elif c == 'r':

593

if not isinstance(arg, bytes):

593

if not isinstance(arg, bytes):

594

raise TypeError

594

raise TypeError

595

parse(arg) # make sure syntax errors are confined

595

parse(arg) # make sure syntax errors are confined

596

return '(%s)' % arg

596

return '(%s)' % arg

597

elif c == 'n':

597

elif c == 'n':

598

return _quote(node.hex(arg))

598

return _quote(node.hex(arg))

599

elif c == 'b':

599

elif c == 'b':

600

try:

600

try:

601

return _quote(arg.branch())

601

return _quote(arg.branch())

602

except AttributeError:

602

except AttributeError:

603

raise TypeError

603

raise TypeError

604

raise error.ParseError(_('unexpected revspec format character %s') % c)

604

raise error.ParseError(_('unexpected revspec format character %s') % c)

605

606

def _formatlistexp(s, t):

606

def _formatlistexp(s, t):

607

l = len(s)

607

l = len(s)

608

if l == 0:

608

if l == 0:

609

return "_list('')"

609

return "_list('')"

610

elif l == 1:

610

elif l == 1:

611

return _formatargtype(t, s[0])

611

return _formatargtype(t, s[0])

612

elif t == 'd':

612

elif t == 'd':

613

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

613

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

614

elif t == 's':

614

elif t == 's':

615

return "_list(%s)" % _quote("\0".join(s))

615

return "_list(%s)" % _quote("\0".join(s))

616

elif t == 'n':

616

elif t == 'n':

617

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

617

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

618

elif t == 'b':

618

elif t == 'b':

619

try:

619

try:

620

return "_list('%s')" % "\0".join(a.branch() for a in s)

620

return "_list('%s')" % "\0".join(a.branch() for a in s)

621

except AttributeError:

621

except AttributeError:

622

raise TypeError

622

raise TypeError

623

624

m = l // 2

624

m = l // 2

625

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

625

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

626

627

def _formatparamexp(args, t):

627

def _formatparamexp(args, t):

628

return ', '.join(_formatargtype(t, a) for a in args)

628

return ', '.join(_formatargtype(t, a) for a in args)

629

630

_formatlistfuncs = {

630

_formatlistfuncs = {

631

'l': _formatlistexp,

631

'l': _formatlistexp,

632

'p': _formatparamexp,

632

'p': _formatparamexp,

633

}

633

}

634

635

def formatspec(expr, *args):

635

def formatspec(expr, *args):

636

'''

636

'''

637

This is a convenience function for using revsets internally, and

637

This is a convenience function for using revsets internally, and

638

escapes arguments appropriately. Aliases are intentionally ignored

638

escapes arguments appropriately. Aliases are intentionally ignored

639

so that intended expression behavior isn't accidentally subverted.

639

so that intended expression behavior isn't accidentally subverted.

640

641

Supported arguments:

641

Supported arguments:

642

643

%r = revset expression, parenthesized

643

%r = revset expression, parenthesized

644

%d = int(arg), no quoting

644

%d = int(arg), no quoting

645

%s = string(arg), escaped and single-quoted

645

%s = string(arg), escaped and single-quoted

646

%b = arg.branch(), escaped and single-quoted

646

%b = arg.branch(), escaped and single-quoted

647

%n = hex(arg), single-quoted

647

%n = hex(arg), single-quoted

648

%% = a literal '%'

648

%% = a literal '%'

649

650

Prefixing the type with 'l' specifies a parenthesized list of that type,

650

Prefixing the type with 'l' specifies a parenthesized list of that type,

651

and 'p' specifies a list of function parameters of that type.

651

and 'p' specifies a list of function parameters of that type.

652

653

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

653

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

654

'(10 or 11):: and ((this()) or (that()))'

654

'(10 or 11):: and ((this()) or (that()))'

655

>>> formatspec(b'%d:: and not %d::', 10, 20)

655

>>> formatspec(b'%d:: and not %d::', 10, 20)

656

'10:: and not 20::'

656

'10:: and not 20::'

657

>>> formatspec(b'%ld or %ld', [], [1])

657

>>> formatspec(b'%ld or %ld', [], [1])

658

"_list('') or 1"

658

"_list('') or 1"

659

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

659

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

660

"keyword('foo\\\\xe9')"

660

"keyword('foo\\\\xe9')"

661

>>> b = lambda: b'default'

661

>>> b = lambda: b'default'

662

>>> b.branch = b

662

>>> b.branch = b

663

>>> formatspec(b'branch(%b)', b)

663

>>> formatspec(b'branch(%b)', b)

664

"branch('default')"

664

"branch('default')"

665

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

665

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

666

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

666

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

667

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

667

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

668

"sort((:), 'desc', 'user')"

668

"sort((:), 'desc', 'user')"

669

>>> formatspec(b'%ls', [b'a', b"'"])

669

>>> formatspec(b'%ls', [b'a', b"'"])

670

"_list('a\\\\x00\\\\'')"

670

"_list('a\\\\x00\\\\'')"

671

'''

671

'''

672

expr = pycompat.bytestr(expr)

672

expr = pycompat.bytestr(expr)

673

argiter = iter(args)

673

argiter = iter(args)

674

ret = []

674

ret = []

675

pos = 0

675

pos = 0

676

while pos < len(expr):

676

while pos < len(expr):

677

q = expr.find('%', pos)

677

q = expr.find('%', pos)

678

if q < 0:

678

if q < 0:

679

ret.append(expr[pos:])

679

ret.append(expr[pos:])

680

break

680

break

681

ret.append(expr[pos:q])

681

ret.append(expr[pos:q])

682

pos = q + 1

682

pos = q + 1

683

try:

683

try:

684

d = expr[pos]

684

d = expr[pos]

685

except IndexError:

685

except IndexError:

686

raise error.ParseError(_('incomplete revspec format character'))

686

raise error.ParseError(_('incomplete revspec format character'))

687

if d == '%':

687

if d == '%':

688

ret.append(d)

688

ret.append(d)

689

pos += 1

689

pos += 1

690

continue

690

continue

691

692

try:

692

try:

693

arg = next(argiter)

693

arg = next(argiter)

694

except StopIteration:

694

except StopIteration:

695

raise error.ParseError(_('missing argument for revspec'))

695

raise error.ParseError(_('missing argument for revspec'))

696

f = _formatlistfuncs.get(d)

696

f = _formatlistfuncs.get(d)

697

if f:

697

if f:

698

# a list of some type

698

# a list of some type

699

pos += 1

699

pos += 1

700

try:

700

try:

701

d = expr[pos]

701

d = expr[pos]

702

except IndexError:

702

except IndexError:

703

raise error.ParseError(_('incomplete revspec format character'))

703

raise error.ParseError(_('incomplete revspec format character'))

704

try:

704

try:

705

ret.append(f(list(arg), d))

705

ret.append(f(list(arg), d))

706

except (TypeError, ValueError):

706

except (TypeError, ValueError):

707

raise error.ParseError(_('invalid argument for revspec'))

707

raise error.ParseError(_('invalid argument for revspec'))

708

else:

708

else:

709

try:

709

try:

710

ret.append(_formatargtype(d, arg))

710

ret.append(_formatargtype(d, arg))

711

except (TypeError, ValueError):

711

except (TypeError, ValueError):

712

raise error.ParseError(_('invalid argument for revspec'))

712

raise error.ParseError(_('invalid argument for revspec'))

713

pos += 1

713

pos += 1

714

715

try:

715

try:

716

next(argiter)

716

next(argiter)

717

raise error.ParseError(_('too many revspec arguments specified'))

717

raise error.ParseError(_('too many revspec arguments specified'))

718

except StopIteration:

718

except StopIteration:

719

pass

719

pass

720

return ''.join(ret)

720

return ''.join(ret)

721

722

def prettyformat(tree):

722

def prettyformat(tree):

723

return parser.prettyformat(tree, ('string', 'symbol'))

723

return parser.prettyformat(tree, ('string', 'symbol'))

724

725

def depth(tree):

725

def depth(tree):

726

if isinstance(tree, tuple):

726

if isinstance(tree, tuple):

727

return max(map(depth, tree)) + 1

727

return max(map(depth, tree)) + 1

728

else:

728

else:

729

return 0

729

return 0

730

731

def funcsused(tree):

731

def funcsused(tree):

732

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

732

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

733

return set()

733

return set()

734

else:

734

else:

735

funcs = set()

735

funcs = set()

736

for s in tree[1:]:

736

for s in tree[1:]:

737

funcs |= funcsused(s)

737

funcs |= funcsused(s)

738

if tree[0] == 'func':

738

if tree[0] == 'func':

739

funcs.add(tree[1][1])

739

funcs.add(tree[1][1])

740

return funcs

740

return funcs

741

742

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

742

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

743

744

def _ishashlikesymbol(symbol):

744

def _ishashlikesymbol(symbol):

745

"""returns true if the symbol looks like a hash"""

745

"""returns true if the symbol looks like a hash"""

746

return _hashre.match(symbol)

746

return _hashre.match(symbol)

747

748

def gethashlikesymbols(tree):

748

def gethashlikesymbols(tree):

749

"""returns the list of symbols of the tree that look like hashes

749

"""returns the list of symbols of the tree that look like hashes

750

751

>>> gethashlikesymbols(parse(b'3::abe3ff'))

751

>>> gethashlikesymbols(parse(b'3::abe3ff'))

752

['3', 'abe3ff']

752

['3', 'abe3ff']

753

>>> gethashlikesymbols(parse(b'precursors(.)'))

753

>>> gethashlikesymbols(parse(b'precursors(.)'))

754

[]

754

[]

755

>>> gethashlikesymbols(parse(b'precursors(34)'))

755

>>> gethashlikesymbols(parse(b'precursors(34)'))

756

['34']

756

['34']

757

>>> gethashlikesymbols(parse(b'abe3ffZ'))

757

>>> gethashlikesymbols(parse(b'abe3ffZ'))

758

[]

758

[]

759

"""

759

"""

760

if not tree:

760

if not tree:

761

return []

761

return []

762

763

if tree[0] == "symbol":

763

if tree[0] == "symbol":

764

if _ishashlikesymbol(tree[1]):

764

if _ishashlikesymbol(tree[1]):

765

return [tree[1]]

765

return [tree[1]]

766

elif len(tree) >= 3:

766

elif len(tree) >= 3:

767

results = []

767

results = []

768

for subtree in tree[1:]:

768

for subtree in tree[1:]:

769

results += gethashlikesymbols(subtree)

769

results += gethashlikesymbols(subtree)

770

return results

770

return results

771

return []

771

return []

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             symbols = {}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize(b"@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 if not isinstance(program, bytes):
                     raise error.ProgrammingError('revset statement must be bytes, got %r'
                                                  % program)
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
-                                    yield ('-', None, pos)
+                                    yield ('-', None, s)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = stringutil.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
             def _match(patspec, tree):
                 """Test if a tree matches the given pattern statement; return the matches
                 >>> _match(b'f(_)', parse(b'f()'))
                 >>> _match(b'f(_)', parse(b'f(1)'))
                 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
                 >>> _match(b'f(_)', parse(b'f(1, 2)'))
                 """
                 pattern = _cachedtree(patspec)
                 return parser.matchtree(pattern, tree, ('symbol', '_'),
                                         {'keyvalue', 'list'})
             def _matchonly(revs, bases):
                 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'dagrangeall':
                         return _fixops(('dagrangepost', post))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangeall':
                     raise error.ParseError(_("can't use '::' in this context"))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     f = getsymbol(x[1])
                     if f == 'revset':
                         return _analyze(x[2])
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x):
                 if x is None:
                     return 0, x
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return 0.5, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     w = min(wa, wb)
                     # (draft/secret/_notpublic() & ::x) have a fast path
                     m = _match('_() & ancestors(_)', ('and', ta, tb))
                     if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
                         return w, _build('_phaseandancestors(_, _)', m[1], m[2])
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
                     m = _match('not _', tb)
                     if m:
                         return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if _match('public()', x[1]):
                         o = _optimize(_build('_notpublic()'))
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1])
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return 1, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1])
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2])
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2])
                     w = getattr(symbols.get(f), '_weight', 1)
                     m = _match('commonancestors(_)', ta)
                     # Optimize heads(commonancestors(_)) because we have a fast version
                     if f == 'heads' and m:
                         return w + wa, _build('_commonancestorheads(_)', m[1])
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | {'$'}
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith(b'$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith(b'foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 if lookup and spec.startswith('revset(') and spec.endswith(')'):
                     lookup = None
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 try:
                     return _parsewith(spec, lookup=lookup)
                 except error.ParseError as inst:
                     if len(inst.args) > 1:  # has location
                         loc = inst.args[1]
                         # Remove newlines -- spaces are equivalent whitespace.
                         spec = spec.replace('\n', ' ')
                         # We want the caret to point to the place in the template that
                         # failed to parse, but in a hint we get a open paren at the
                         # start. Therefore, we print "loc + 1" spaces (instead of "loc")
                         # to line up the caret with the location of the error.
                         inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
                     raise
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote(b'asdf')
                 "'asdf'"
                 >>> _quote(b"asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote(b'asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
             def _formatargtype(c, arg):
                 if c == 'd':
                     return '%d' % int(arg)
                 elif c == 's':
                     return _quote(arg)
                 elif c == 'r':
                     if not isinstance(arg, bytes):
                         raise TypeError
                     parse(arg) # make sure syntax errors are confined
                     return '(%s)' % arg
                 elif c == 'n':
                     return _quote(node.hex(arg))
                 elif c == 'b':
                     try:
                         return _quote(arg.branch())
                     except AttributeError:
                         raise TypeError
                 raise error.ParseError(_('unexpected revspec format character %s') % c)
             def _formatlistexp(s, t):
                 l = len(s)
                 if l == 0:
                     return "_list('')"
                 elif l == 1:
                     return _formatargtype(t, s[0])
                 elif t == 'd':
                     return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
                 elif t == 's':
                     return "_list(%s)" % _quote("\0".join(s))
                 elif t == 'n':
                     return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                 elif t == 'b':
                     try:
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     except AttributeError:
                         raise TypeError
                 m = l // 2
                 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
             def _formatparamexp(args, t):
                 return ', '.join(_formatargtype(t, a) for a in args)
             _formatlistfuncs = {
                 'l': _formatlistexp,
                 'p': _formatparamexp,
             }
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = int(arg), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type,
                 and 'p' specifies a list of function parameters of that type.
                 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec(b'%d:: and not %d::', 10, 20)
                 '10:: and not 20::'
                 >>> formatspec(b'%ld or %ld', [], [1])
                 "_list('') or 1"
                 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: b'default'
                 >>> b.branch = b
                 >>> formatspec(b'branch(%b)', b)
                 "branch('default')"
                 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
                 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
                 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
                 "sort((:), 'desc', 'user')"
                 >>> formatspec(b'%ls', [b'a', b"'"])
                 "_list('a\\\\x00\\\\'')"
                 '''
                 expr = pycompat.bytestr(expr)
                 argiter = iter(args)
                 ret = []
                 pos = 0
                 while pos < len(expr):
                     q = expr.find('%', pos)
                     if q < 0:
                         ret.append(expr[pos:])
                         break
                     ret.append(expr[pos:q])
                     pos = q + 1
                     try:
                         d = expr[pos]
                     except IndexError:
                         raise error.ParseError(_('incomplete revspec format character'))
                     if d == '%':
                         ret.append(d)
                         pos += 1
                         continue
                     try:
                         arg = next(argiter)
                     except StopIteration:
                         raise error.ParseError(_('missing argument for revspec'))
                     f = _formatlistfuncs.get(d)
                     if f:
                         # a list of some type
                         pos += 1
                         try:
                             d = expr[pos]
                         except IndexError:
                             raise error.ParseError(_('incomplete revspec format character'))
                         try:
                             ret.append(f(list(arg), d))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     else:
                         try:
                             ret.append(_formatargtype(d, arg))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     pos += 1
                 try:
                     next(argiter)
                     raise error.ParseError(_('too many revspec arguments specified'))
                 except StopIteration:
                     pass
                 return ''.join(ret)
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs
             _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
             def _ishashlikesymbol(symbol):
                 """returns true if the symbol looks like a hash"""
                 return _hashre.match(symbol)
             def gethashlikesymbols(tree):
                 """returns the list of symbols of the tree that look like hashes
                 >>> gethashlikesymbols(parse(b'3::abe3ff'))
                 ['3', 'abe3ff']
                 >>> gethashlikesymbols(parse(b'precursors(.)'))
                 []
                 >>> gethashlikesymbols(parse(b'precursors(34)'))
                 ['34']
                 >>> gethashlikesymbols(parse(b'abe3ffZ'))
                 []
                 """
                 if not tree:
                     return []
                 if tree[0] == "symbol":
                     if _ishashlikesymbol(tree[1]):
                         return [tree[1]]
                 elif len(tree) >= 3:
                     results = []
                     for subtree in tree[1:]:
                         results += gethashlikesymbols(subtree)
                     return results
                 return []