upstream/mercurial-mirror Commit - r35840:4269971b

1

# revsetlang.py - parser, tokenizer and utility for revision set language

1

# revsetlang.py - parser, tokenizer and utility for revision set language

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import string

10

import string

11

12

from .i18n import _

12

from .i18n import _

13

from . import (

13

from . import (

14

error,

14

error,

15

node,

15

node,

16

parser,

16

parser,

17

pycompat,

17

pycompat,

18

util,

18

util,

19

)

19

)

20

21

elements = {

21

elements = {

22

# token-type: binding-strength, primary, prefix, infix, suffix

22

# token-type: binding-strength, primary, prefix, infix, suffix

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

23

"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

24

"[": (21, None, None, ("subscript", 1, "]"), None),

25

"#": (21, None, None, ("relation", 21), None),

25

"#": (21, None, None, ("relation", 21), None),

26

"##": (20, None, None, ("_concat", 20), None),

26

"##": (20, None, None, ("_concat", 20), None),

27

"~": (18, None, None, ("ancestor", 18), None),

27

"~": (18, None, None, ("ancestor", 18), None),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

28

"^": (18, None, None, ("parent", 18), "parentpost"),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

29

"-": (5, None, ("negate", 19), ("minus", 5), None),

30

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

30

"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

31

"dagrangepost"),

31

"dagrangepost"),

32

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

32

"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),

33

"dagrangepost"),

33

"dagrangepost"),

34

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

34

":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),

35

"not": (10, None, ("not", 10), None, None),

35

"not": (10, None, ("not", 10), None, None),

36

"!": (10, None, ("not", 10), None, None),

36

"!": (10, None, ("not", 10), None, None),

37

"and": (5, None, None, ("and", 5), None),

37

"and": (5, None, None, ("and", 5), None),

38

"&": (5, None, None, ("and", 5), None),

38

"&": (5, None, None, ("and", 5), None),

39

"%": (5, None, None, ("only", 5), "onlypost"),

39

"%": (5, None, None, ("only", 5), "onlypost"),

40

"or": (4, None, None, ("or", 4), None),

40

"or": (4, None, None, ("or", 4), None),

41

"|": (4, None, None, ("or", 4), None),

41

"|": (4, None, None, ("or", 4), None),

42

"+": (4, None, None, ("or", 4), None),

42

"+": (4, None, None, ("or", 4), None),

43

"=": (3, None, None, ("keyvalue", 3), None),

43

"=": (3, None, None, ("keyvalue", 3), None),

44

",": (2, None, None, ("list", 2), None),

44

",": (2, None, None, ("list", 2), None),

45

")": (0, None, None, None, None),

45

")": (0, None, None, None, None),

46

"]": (0, None, None, None, None),

46

"]": (0, None, None, None, None),

47

"symbol": (0, "symbol", None, None, None),

47

"symbol": (0, "symbol", None, None, None),

48

"string": (0, "string", None, None, None),

48

"string": (0, "string", None, None, None),

49

"end": (0, None, None, None, None),

49

"end": (0, None, None, None, None),

50

}

50

}

51

52

keywords = {'and', 'or', 'not'}

52

keywords = {'and', 'or', 'not'}

53

54

symbols = {}

54

symbols = {}

55

56

_quoteletters = {'"', "'"}

56

_quoteletters = {'"', "'"}

57

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

57

_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))

58

59

# default set of valid characters for the initial letter of symbols

59

# default set of valid characters for the initial letter of symbols

60

_syminitletters = set(pycompat.iterbytestr(

60

_syminitletters = set(pycompat.iterbytestr(

61

string.ascii_letters.encode('ascii') +

61

string.ascii_letters.encode('ascii') +

62

string.digits.encode('ascii') +

62

string.digits.encode('ascii') +

63

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

63

'._@')) | set(map(pycompat.bytechr, xrange(128, 256)))

64

65

# default set of valid characters for non-initial letters of symbols

65

# default set of valid characters for non-initial letters of symbols

66

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

66

_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))

67

68

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

68

def tokenize(program, lookup=None, syminitletters=None, symletters=None):

69

'''

69

'''

70

Parse a revset statement into a stream of tokens

70

Parse a revset statement into a stream of tokens

71

72

``syminitletters`` is the set of valid characters for the initial

72

``syminitletters`` is the set of valid characters for the initial

73

letter of symbols.

73

letter of symbols.

74

75

By default, character ``c`` is recognized as valid for initial

75

By default, character ``c`` is recognized as valid for initial

76

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

76

letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.

77

78

``symletters`` is the set of valid characters for non-initial

78

``symletters`` is the set of valid characters for non-initial

79

letters of symbols.

79

letters of symbols.

80

81

By default, character ``c`` is recognized as valid for non-initial

81

By default, character ``c`` is recognized as valid for non-initial

82

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

82

letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.

83

84

Check that @ is a valid unquoted token character (issue3686):

84

Check that @ is a valid unquoted token character (issue3686):

85

>>> list(tokenize(b"@::"))

85

>>> list(tokenize(b"@::"))

86

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

86

[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]

87

88

'''

88

'''

89

program = pycompat.bytestr(program)

89

program = pycompat.bytestr(program)

90

if syminitletters is None:

90

if syminitletters is None:

91

syminitletters = _syminitletters

91

syminitletters = _syminitletters

92

if symletters is None:

92

if symletters is None:

93

symletters = _symletters

93

symletters = _symletters

94

95

if program and lookup:

95

if program and lookup:

96

# attempt to parse old-style ranges first to deal with

96

# attempt to parse old-style ranges first to deal with

97

# things like old-tag which contain query metacharacters

97

# things like old-tag which contain query metacharacters

98

parts = program.split(':', 1)

98

parts = program.split(':', 1)

99

if all(lookup(sym) for sym in parts if sym):

99

if all(lookup(sym) for sym in parts if sym):

100

if parts[0]:

100

if parts[0]:

101

yield ('symbol', parts[0], 0)

101

yield ('symbol', parts[0], 0)

102

if len(parts) > 1:

102

if len(parts) > 1:

103

s = len(parts[0])

103

s = len(parts[0])

104

yield (':', None, s)

104

yield (':', None, s)

105

if parts[1]:

105

if parts[1]:

106

yield ('symbol', parts[1], s + 1)

106

yield ('symbol', parts[1], s + 1)

107

yield ('end', None, len(program))

107

yield ('end', None, len(program))

108

return

108

return

109

110

pos, l = 0, len(program)

110

pos, l = 0, len(program)

111

while pos < l:

111

while pos < l:

112

c = program[pos]

112

c = program[pos]

113

if c.isspace(): # skip inter-token whitespace

113

if c.isspace(): # skip inter-token whitespace

114

pass

114

pass

115

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

115

elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully

116

yield ('::', None, pos)

116

yield ('::', None, pos)

117

pos += 1 # skip ahead

117

pos += 1 # skip ahead

118

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

118

elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully

119

yield ('..', None, pos)

119

yield ('..', None, pos)

120

pos += 1 # skip ahead

120

pos += 1 # skip ahead

121

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

121

elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully

122

yield ('##', None, pos)

122

yield ('##', None, pos)

123

pos += 1 # skip ahead

123

pos += 1 # skip ahead

124

elif c in _simpleopletters: # handle simple operators

124

elif c in _simpleopletters: # handle simple operators

125

yield (c, None, pos)

125

yield (c, None, pos)

126

elif (c in _quoteletters or c == 'r' and

126

elif (c in _quoteletters or c == 'r' and

127

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

127

program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings

128

if c == 'r':

128

if c == 'r':

129

pos += 1

129

pos += 1

130

c = program[pos]

130

c = program[pos]

131

decode = lambda x: x

131

decode = lambda x: x

132

else:

132

else:

133

decode = parser.unescapestr

133

decode = parser.unescapestr

134

pos += 1

134

pos += 1

135

s = pos

135

s = pos

136

while pos < l: # find closing quote

136

while pos < l: # find closing quote

137

d = program[pos]

137

d = program[pos]

138

if d == '\\': # skip over escaped characters

138

if d == '\\': # skip over escaped characters

139

pos += 2

139

pos += 2

140

continue

140

continue

141

if d == c:

141

if d == c:

142

yield ('string', decode(program[s:pos]), s)

142

yield ('string', decode(program[s:pos]), s)

143

break

143

break

144

pos += 1

144

pos += 1

145

else:

145

else:

146

raise error.ParseError(_("unterminated string"), s)

146

raise error.ParseError(_("unterminated string"), s)

147

# gather up a symbol/keyword

147

# gather up a symbol/keyword

148

elif c in syminitletters:

148

elif c in syminitletters:

149

s = pos

149

s = pos

150

pos += 1

150

pos += 1

151

while pos < l: # find end of symbol

151

while pos < l: # find end of symbol

152

d = program[pos]

152

d = program[pos]

153

if d not in symletters:

153

if d not in symletters:

154

break

154

break

155

if d == '.' and program[pos - 1] == '.': # special case for ..

155

if d == '.' and program[pos - 1] == '.': # special case for ..

156

pos -= 1

156

pos -= 1

157

break

157

break

158

pos += 1

158

pos += 1

159

sym = program[s:pos]

159

sym = program[s:pos]

160

if sym in keywords: # operator keywords

160

if sym in keywords: # operator keywords

161

yield (sym, None, s)

161

yield (sym, None, s)

162

elif '-' in sym:

162

elif '-' in sym:

163

# some jerk gave us foo-bar-baz, try to check if it's a symbol

163

# some jerk gave us foo-bar-baz, try to check if it's a symbol

164

if lookup and lookup(sym):

164

if lookup and lookup(sym):

165

# looks like a real symbol

165

# looks like a real symbol

166

yield ('symbol', sym, s)

166

yield ('symbol', sym, s)

167

else:

167

else:

168

# looks like an expression

168

# looks like an expression

169

parts = sym.split('-')

169

parts = sym.split('-')

170

for p in parts[:-1]:

170

for p in parts[:-1]:

171

if p: # possible consecutive -

171

if p: # possible consecutive -

172

yield ('symbol', p, s)

172

yield ('symbol', p, s)

173

s += len(p)

173

s += len(p)

174

yield ('-', None, pos)

174

yield ('-', None, pos)

175

s += 1

175

s += 1

176

if parts[-1]: # possible trailing -

176

if parts[-1]: # possible trailing -

177

yield ('symbol', parts[-1], s)

177

yield ('symbol', parts[-1], s)

178

else:

178

else:

179

yield ('symbol', sym, s)

179

yield ('symbol', sym, s)

180

pos -= 1

180

pos -= 1

181

else:

181

else:

182

raise error.ParseError(_("syntax error in revset '%s'") %

182

raise error.ParseError(_("syntax error in revset '%s'") %

183

program, pos)

183

program, pos)

184

pos += 1

184

pos += 1

185

yield ('end', None, pos)

185

yield ('end', None, pos)

186

187

# helpers

187

# helpers

188

189

_notset = object()

189

_notset = object()

190

191

def getsymbol(x):

191

def getsymbol(x):

192

if x and x[0] == 'symbol':

192

if x and x[0] == 'symbol':

193

return x[1]

193

return x[1]

194

raise error.ParseError(_('not a symbol'))

194

raise error.ParseError(_('not a symbol'))

195

196

def getstring(x, err):

196

def getstring(x, err):

197

if x and (x[0] == 'string' or x[0] == 'symbol'):

197

if x and (x[0] == 'string' or x[0] == 'symbol'):

198

return x[1]

198

return x[1]

199

raise error.ParseError(err)

199

raise error.ParseError(err)

200

201

def getinteger(x, err, default=_notset):

201

def getinteger(x, err, default=_notset):

202

if not x and default is not _notset:

202

if not x and default is not _notset:

203

return default

203

return default

204

try:

204

try:

205

return int(getstring(x, err))

205

return int(getstring(x, err))

206

except ValueError:

206

except ValueError:

207

raise error.ParseError(err)

207

raise error.ParseError(err)

208

209

def getboolean(x, err):

209

def getboolean(x, err):

210

value = util.parsebool(getsymbol(x))

210

value = util.parsebool(getsymbol(x))

211

if value is not None:

211

if value is not None:

212

return value

212

return value

213

raise error.ParseError(err)

213

raise error.ParseError(err)

214

215

def getlist(x):

215

def getlist(x):

216

if not x:

216

if not x:

217

return []

217

return []

218

if x[0] == 'list':

218

if x[0] == 'list':

219

return list(x[1:])

219

return list(x[1:])

220

return [x]

220

return [x]

221

222

def getrange(x, err):

222

def getrange(x, err):

223

if not x:

223

if not x:

224

raise error.ParseError(err)

224

raise error.ParseError(err)

225

op = x[0]

225

op = x[0]

226

if op == 'range':

226

if op == 'range':

227

return x[1], x[2]

227

return x[1], x[2]

228

elif op == 'rangepre':

228

elif op == 'rangepre':

229

return None, x[1]

229

return None, x[1]

230

elif op == 'rangepost':

230

elif op == 'rangepost':

231

return x[1], None

231

return x[1], None

232

elif op == 'rangeall':

232

elif op == 'rangeall':

233

return None, None

233

return None, None

234

raise error.ParseError(err)

234

raise error.ParseError(err)

235

236

def getargs(x, min, max, err):

236

def getargs(x, min, max, err):

237

l = getlist(x)

237

l = getlist(x)

238

if len(l) < min or (max >= 0 and len(l) > max):

238

if len(l) < min or (max >= 0 and len(l) > max):

239

raise error.ParseError(err)

239

raise error.ParseError(err)

240

return l

240

return l

241

242

def getargsdict(x, funcname, keys):

242

def getargsdict(x, funcname, keys):

243

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

243

return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),

244

keyvaluenode='keyvalue', keynode='symbol')

244

keyvaluenode='keyvalue', keynode='symbol')

245

246

# cache of {spec: raw parsed tree} built internally

246

# cache of {spec: raw parsed tree} built internally

247

_treecache = {}

247

_treecache = {}

248

249

def _cachedtree(spec):

249

def _cachedtree(spec):

250

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

250

# thread safe because parse() is reentrant and dict.__setitem__() is atomic

251

tree = _treecache.get(spec)

251

tree = _treecache.get(spec)

252

if tree is None:

252

if tree is None:

253

_treecache[spec] = tree = parse(spec)

253

_treecache[spec] = tree = parse(spec)

254

return tree

254

return tree

255

256

def _build(tmplspec, *repls):

256

def _build(tmplspec, *repls):

257

"""Create raw parsed tree from a template revset statement

257

"""Create raw parsed tree from a template revset statement

258

259

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

259

>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))

260

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

260

('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))

261

"""

261

"""

262

template = _cachedtree(tmplspec)

262

template = _cachedtree(tmplspec)

263

return parser.buildtree(template, ('symbol', '_'), *repls)

263

return parser.buildtree(template, ('symbol', '_'), *repls)

264

265

def _match(patspec, tree):

265

def _match(patspec, tree):

266

"""Test if a tree matches the given pattern statement; return the matches

266

"""Test if a tree matches the given pattern statement; return the matches

267

268

>>> _match(b'f(_)', parse(b'f()'))

268

>>> _match(b'f(_)', parse(b'f()'))

269

>>> _match(b'f(_)', parse(b'f(1)'))

269

>>> _match(b'f(_)', parse(b'f(1)'))

270

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

270

[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]

271

>>> _match(b'f(_)', parse(b'f(1, 2)'))

271

>>> _match(b'f(_)', parse(b'f(1, 2)'))

272

"""

272

"""

273

pattern = _cachedtree(patspec)

273

pattern = _cachedtree(patspec)

274

return parser.matchtree(pattern, tree, ('symbol', '_'),

274

return parser.matchtree(pattern, tree, ('symbol', '_'),

275

{'keyvalue', 'list'})

275

{'keyvalue', 'list'})

276

277

def _matchonly(revs, bases):

277

def _matchonly(revs, bases):

278

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

278

return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))

279

280

def _fixops(x):

280

def _fixops(x):

281

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

281

"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be

282

handled well by our simple top-down parser"""

282

handled well by our simple top-down parser"""

283

if not isinstance(x, tuple):

283

if not isinstance(x, tuple):

284

return x

284

return x

285

286

op = x[0]

286

op = x[0]

287

if op == 'parent':

287

if op == 'parent':

288

# x^:y means (x^) : y, not x ^ (:y)

288

# x^:y means (x^) : y, not x ^ (:y)

289

# x^: means (x^) :, not x ^ (:)

289

# x^: means (x^) :, not x ^ (:)

290

post = ('parentpost', x[1])

290

post = ('parentpost', x[1])

291

if x[2][0] == 'dagrangepre':

291

if x[2][0] == 'dagrangepre':

292

return _fixops(('dagrange', post, x[2][1]))

292

return _fixops(('dagrange', post, x[2][1]))

293

elif x[2][0] == 'dagrangeall':

293

elif x[2][0] == 'dagrangeall':

294

return _fixops(('dagrangepost', post))

294

return _fixops(('dagrangepost', post))

295

elif x[2][0] == 'rangepre':

295

elif x[2][0] == 'rangepre':

296

return _fixops(('range', post, x[2][1]))

296

return _fixops(('range', post, x[2][1]))

297

elif x[2][0] == 'rangeall':

297

elif x[2][0] == 'rangeall':

298

return _fixops(('rangepost', post))

298

return _fixops(('rangepost', post))

299

elif op == 'or':

299

elif op == 'or':

300

# make number of arguments deterministic:

300

# make number of arguments deterministic:

301

# x + y + z -> (or x y z) -> (or (list x y z))

301

# x + y + z -> (or x y z) -> (or (list x y z))

302

return (op, _fixops(('list',) + x[1:]))

302

return (op, _fixops(('list',) + x[1:]))

303

elif op == 'subscript' and x[1][0] == 'relation':

303

elif op == 'subscript' and x[1][0] == 'relation':

304

# x#y[z] ternary

304

# x#y[z] ternary

305

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

305

return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))

306

307

return (op,) + tuple(_fixops(y) for y in x[1:])

307

return (op,) + tuple(_fixops(y) for y in x[1:])

308

309

def _analyze(x):

309

def _analyze(x):

310

if x is None:

310

if x is None:

311

return x

311

return x

312

313

op = x[0]

313

op = x[0]

314

if op == 'minus':

314

if op == 'minus':

315

return _analyze(_build('_ and not _', *x[1:]))

315

return _analyze(_build('_ and not _', *x[1:]))

316

elif op == 'only':

316

elif op == 'only':

317

return _analyze(_build('only(_, _)', *x[1:]))

317

return _analyze(_build('only(_, _)', *x[1:]))

318

elif op == 'onlypost':

318

elif op == 'onlypost':

319

return _analyze(_build('only(_)', x[1]))

319

return _analyze(_build('only(_)', x[1]))

320

elif op == 'dagrangeall':

320

elif op == 'dagrangeall':

321

raise error.ParseError(_("can't use '::' in this context"))

321

raise error.ParseError(_("can't use '::' in this context"))

322

elif op == 'dagrangepre':

322

elif op == 'dagrangepre':

323

return _analyze(_build('ancestors(_)', x[1]))

323

return _analyze(_build('ancestors(_)', x[1]))

324

elif op == 'dagrangepost':

324

elif op == 'dagrangepost':

325

return _analyze(_build('descendants(_)', x[1]))

325

return _analyze(_build('descendants(_)', x[1]))

326

elif op == 'negate':

326

elif op == 'negate':

327

s = getstring(x[1], _("can't negate that"))

327

s = getstring(x[1], _("can't negate that"))

328

return _analyze(('string', '-' + s))

328

return _analyze(('string', '-' + s))

329

elif op in ('string', 'symbol'):

329

elif op in ('string', 'symbol'):

330

return x

330

return x

331

elif op == 'rangeall':

331

elif op == 'rangeall':

332

return (op, None)

332

return (op, None)

333

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

333

elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:

334

return (op, _analyze(x[1]))

334

return (op, _analyze(x[1]))

335

elif op == 'group':

335

elif op == 'group':

336

return _analyze(x[1])

336

return _analyze(x[1])

337

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

337

elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',

338

'subscript'}:

338

'subscript'}:

339

ta = _analyze(x[1])

339

ta = _analyze(x[1])

340

tb = _analyze(x[2])

340

tb = _analyze(x[2])

341

return (op, ta, tb)

341

return (op, ta, tb)

342

elif op == 'relsubscript':

342

elif op == 'relsubscript':

343

ta = _analyze(x[1])

343

ta = _analyze(x[1])

344

tb = _analyze(x[2])

344

tb = _analyze(x[2])

345

tc = _analyze(x[3])

345

tc = _analyze(x[3])

346

return (op, ta, tb, tc)

346

return (op, ta, tb, tc)

347

elif op == 'list':

347

elif op == 'list':

348

return (op,) + tuple(_analyze(y) for y in x[1:])

348

return (op,) + tuple(_analyze(y) for y in x[1:])

349

elif op == 'keyvalue':

349

elif op == 'keyvalue':

350

return (op, x[1], _analyze(x[2]))

350

return (op, x[1], _analyze(x[2]))

351

elif op == 'func':

351

elif op == 'func':

352

return (op, x[1], _analyze(x[2]))

352

return (op, x[1], _analyze(x[2]))

353

raise ValueError('invalid operator %r' % op)

353

raise ValueError('invalid operator %r' % op)

354

355

def analyze(x):

355

def analyze(x):

356

"""Transform raw parsed tree to evaluatable tree which can be fed to

356

"""Transform raw parsed tree to evaluatable tree which can be fed to

357

optimize() or getset()

357

optimize() or getset()

358

359

All pseudo operations should be mapped to real operations or functions

359

All pseudo operations should be mapped to real operations or functions

360

defined in methods or symbols table respectively.

360

defined in methods or symbols table respectively.

361

"""

361

"""

362

return _analyze(x)

362

return _analyze(x)

363

364

def _optimize(x):

364

def _optimize(x):

365

if x is None:

365

if x is None:

366

return 0, x

366

return 0, x

367

368

op = x[0]

368

op = x[0]

369

if op in ('string', 'symbol'):

369

if op in ('string', 'symbol'):

370

return 0.5, x # single revisions are small

370

return 0.5, x # single revisions are small

371

elif op == 'and':

371

elif op == 'and':

372

wa, ta = _optimize(x[1])

372

wa, ta = _optimize(x[1])

373

wb, tb = _optimize(x[2])

373

wb, tb = _optimize(x[2])

374

w = min(wa, wb)

374

w = min(wa, wb)

375

376

# (draft/secret/_notpublic() & ::x) have a fast path

376

# (draft/secret/_notpublic() & ::x) have a fast path

377

m = _match('_() & ancestors(_)', ('and', ta, tb))

377

m = _match('_() & ancestors(_)', ('and', ta, tb))

378

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

378

if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:

379

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

379

return w, _build('_phaseandancestors(_, _)', m[1], m[2])

380

381

# (::x and not ::y)/(not ::y and ::x) have a fast path

381

# (::x and not ::y)/(not ::y and ::x) have a fast path

382

m = _matchonly(ta, tb) or _matchonly(tb, ta)

382

m = _matchonly(ta, tb) or _matchonly(tb, ta)

383

if m:

383

if m:

384

return w, _build('only(_, _)', *m[1:])

384

return w, _build('only(_, _)', *m[1:])

385

386

m = _match('not _', tb)

386

m = _match('not _', tb)

387

if m:

387

if m:

388

return wa, ('difference', ta, m[1])

388

return wa, ('difference', ta, m[1])

389

if wa > wb:

389

if wa > wb:

390

op = 'andsmally'

390

op = 'andsmally'

391

return w, (op, ta, tb)

391

return w, (op, ta, tb)

392

elif op == 'or':

392

elif op == 'or':

393

# fast path for machine-generated expression, that is likely to have

393

# fast path for machine-generated expression, that is likely to have

394

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

394

# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'

395

ws, ts, ss = [], [], []

395

ws, ts, ss = [], [], []

396

def flushss():

396

def flushss():

397

if not ss:

397

if not ss:

398

return

398

return

399

if len(ss) == 1:

399

if len(ss) == 1:

400

w, t = ss[0]

400

w, t = ss[0]

401

else:

401

else:

402

s = '\0'.join(t[1] for w, t in ss)

402

s = '\0'.join(t[1] for w, t in ss)

403

y = _build('_list(_)', ('string', s))

403

y = _build('_list(_)', ('string', s))

404

w, t = _optimize(y)

404

w, t = _optimize(y)

405

ws.append(w)

405

ws.append(w)

406

ts.append(t)

406

ts.append(t)

407

del ss[:]

407

del ss[:]

408

for y in getlist(x[1]):

408

for y in getlist(x[1]):

409

w, t = _optimize(y)

409

w, t = _optimize(y)

410

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

410

if t is not None and (t[0] == 'string' or t[0] == 'symbol'):

411

ss.append((w, t))

411

ss.append((w, t))

412

continue

412

continue

413

flushss()

413

flushss()

414

ws.append(w)

414

ws.append(w)

415

ts.append(t)

415

ts.append(t)

416

flushss()

416

flushss()

417

if len(ts) == 1:

417

if len(ts) == 1:

418

return ws[0], ts[0] # 'or' operation is fully optimized out

418

return ws[0], ts[0] # 'or' operation is fully optimized out

419

return max(ws), (op, ('list',) + tuple(ts))

419

return max(ws), (op, ('list',) + tuple(ts))

420

elif op == 'not':

420

elif op == 'not':

421

# Optimize not public() to _notpublic() because we have a fast version

421

# Optimize not public() to _notpublic() because we have a fast version

422

if _match('public()', x[1]):

422

if _match('public()', x[1]):

423

o = _optimize(_build('_notpublic()'))

423

o = _optimize(_build('_notpublic()'))

424

return o[0], o[1]

424

return o[0], o[1]

425

else:

425

else:

426

o = _optimize(x[1])

426

o = _optimize(x[1])

427

return o[0], (op, o[1])

427

return o[0], (op, o[1])

428

elif op == 'rangeall':

428

elif op == 'rangeall':

429

return 1, x

429

return 1, x

430

elif op in ('rangepre', 'rangepost', 'parentpost'):

430

elif op in ('rangepre', 'rangepost', 'parentpost'):

431

o = _optimize(x[1])

431

o = _optimize(x[1])

432

return o[0], (op, o[1])

432

return o[0], (op, o[1])

433

elif op in ('dagrange', 'range'):

433

elif op in ('dagrange', 'range'):

434

wa, ta = _optimize(x[1])

434

wa, ta = _optimize(x[1])

435

wb, tb = _optimize(x[2])

435

wb, tb = _optimize(x[2])

436

return wa + wb, (op, ta, tb)

436

return wa + wb, (op, ta, tb)

437

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

437

elif op in ('parent', 'ancestor', 'relation', 'subscript'):

438

w, t = _optimize(x[1])

438

w, t = _optimize(x[1])

439

return w, (op, t, x[2])

439

return w, (op, t, x[2])

440

elif op == 'relsubscript':

440

elif op == 'relsubscript':

441

w, t = _optimize(x[1])

441

w, t = _optimize(x[1])

442

return w, (op, t, x[2], x[3])

442

return w, (op, t, x[2], x[3])

443

elif op == 'list':

443

elif op == 'list':

444

ws, ts = zip(*(_optimize(y) for y in x[1:]))

444

ws, ts = zip(*(_optimize(y) for y in x[1:]))

445

return sum(ws), (op,) + ts

445

return sum(ws), (op,) + ts

446

elif op == 'keyvalue':

446

elif op == 'keyvalue':

447

w, t = _optimize(x[2])

447

w, t = _optimize(x[2])

448

return w, (op, x[1], t)

448

return w, (op, x[1], t)

449

elif op == 'func':

449

elif op == 'func':

450

f = getsymbol(x[1])

450

f = getsymbol(x[1])

451

wa, ta = _optimize(x[2])

451

wa, ta = _optimize(x[2])

452

w = getattr(symbols.get(f), '_weight', 1)

452

w = getattr(symbols.get(f), '_weight', 1)

453

return w + wa, (op, x[1], ta)

453

return w + wa, (op, x[1], ta)

454

raise ValueError('invalid operator %r' % op)

454

raise ValueError('invalid operator %r' % op)

455

456

def optimize(tree):

456

def optimize(tree):

457

"""Optimize evaluatable tree

457

"""Optimize evaluatable tree

458

459

All pseudo operations should be transformed beforehand.

459

All pseudo operations should be transformed beforehand.

460

"""

460

"""

461

_weight, newtree = _optimize(tree)

461

_weight, newtree = _optimize(tree)

462

return newtree

462

return newtree

463

464

# the set of valid characters for the initial letter of symbols in

464

# the set of valid characters for the initial letter of symbols in

465

# alias declarations and definitions

465

# alias declarations and definitions

466

_aliassyminitletters = _syminitletters | {'$'}

466

_aliassyminitletters = _syminitletters | {'$'}

467

468

def _parsewith(spec, lookup=None, syminitletters=None):

468

def _parsewith(spec, lookup=None, syminitletters=None):

469

"""Generate a parse tree of given spec with given tokenizing options

469

"""Generate a parse tree of given spec with given tokenizing options

470

471

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

471

>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)

472

('func', ('symbol', 'foo'), ('symbol', '$1'))

472

('func', ('symbol', 'foo'), ('symbol', '$1'))

473

>>> _parsewith(b'$1')

473

>>> _parsewith(b'$1')

474

Traceback (most recent call last):

474

Traceback (most recent call last):

475

...

475

...

476

ParseError: ("syntax error in revset '$1'", 0)

476

ParseError: ("syntax error in revset '$1'", 0)

477

>>> _parsewith(b'foo bar')

477

>>> _parsewith(b'foo bar')

478

Traceback (most recent call last):

478

Traceback (most recent call last):

479

...

479

...

480

ParseError: ('invalid token', 4)

480

ParseError: ('invalid token', 4)

481

"""

481

"""

482

p = parser.parser(elements)

482

p = parser.parser(elements)

483

tree, pos = p.parse(tokenize(spec, lookup=lookup,

483

tree, pos = p.parse(tokenize(spec, lookup=lookup,

484

syminitletters=syminitletters))

484

syminitletters=syminitletters))

485

if pos != len(spec):

485

if pos != len(spec):

486

raise error.ParseError(_('invalid token'), pos)

486

raise error.ParseError(_('invalid token'), pos)

487

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

487

return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))

488

489

class _aliasrules(parser.basealiasrules):

489

class _aliasrules(parser.basealiasrules):

490

"""Parsing and expansion rule set of revset aliases"""

490

"""Parsing and expansion rule set of revset aliases"""

491

_section = _('revset alias')

491

_section = _('revset alias')

492

493

@staticmethod

493

@staticmethod

494

def _parse(spec):

494

def _parse(spec):

495

"""Parse alias declaration/definition ``spec``

495

"""Parse alias declaration/definition ``spec``

496

497

This allows symbol names to use also ``$`` as an initial letter

497

This allows symbol names to use also ``$`` as an initial letter

498

(for backward compatibility), and callers of this function should

498

(for backward compatibility), and callers of this function should

499

examine whether ``$`` is used also for unexpected symbols or not.

499

examine whether ``$`` is used also for unexpected symbols or not.

500

"""

500

"""

501

return _parsewith(spec, syminitletters=_aliassyminitletters)

501

return _parsewith(spec, syminitletters=_aliassyminitletters)

502

503

@staticmethod

503

@staticmethod

504

def _trygetfunc(tree):

504

def _trygetfunc(tree):

505

if tree[0] == 'func' and tree[1][0] == 'symbol':

505

if tree[0] == 'func' and tree[1][0] == 'symbol':

506

return tree[1][1], getlist(tree[2])

506

return tree[1][1], getlist(tree[2])

507

508

def expandaliases(tree, aliases, warn=None):

508

def expandaliases(tree, aliases, warn=None):

509

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

509

"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""

510

aliases = _aliasrules.buildmap(aliases)

510

aliases = _aliasrules.buildmap(aliases)

511

tree = _aliasrules.expand(aliases, tree)

511

tree = _aliasrules.expand(aliases, tree)

512

# warn about problematic (but not referred) aliases

512

# warn about problematic (but not referred) aliases

513

if warn is not None:

513

if warn is not None:

514

for name, alias in sorted(aliases.iteritems()):

514

for name, alias in sorted(aliases.iteritems()):

515

if alias.error and not alias.warned:

515

if alias.error and not alias.warned:

516

warn(_('warning: %s\n') % (alias.error))

516

warn(_('warning: %s\n') % (alias.error))

517

alias.warned = True

517

alias.warned = True

518

return tree

518

return tree

519

520

def foldconcat(tree):

520

def foldconcat(tree):

521

"""Fold elements to be concatenated by `##`

521

"""Fold elements to be concatenated by `##`

522

"""

522

"""

523

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

523

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

524

return tree

524

return tree

525

if tree[0] == '_concat':

525

if tree[0] == '_concat':

526

pending = [tree]

526

pending = [tree]

527

l = []

527

l = []

528

while pending:

528

while pending:

529

e = pending.pop()

529

e = pending.pop()

530

if e[0] == '_concat':

530

if e[0] == '_concat':

531

pending.extend(reversed(e[1:]))

531

pending.extend(reversed(e[1:]))

532

elif e[0] in ('string', 'symbol'):

532

elif e[0] in ('string', 'symbol'):

533

l.append(e[1])

533

l.append(e[1])

534

else:

534

else:

535

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

535

msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])

536

raise error.ParseError(msg)

536

raise error.ParseError(msg)

537

return ('string', ''.join(l))

537

return ('string', ''.join(l))

538

else:

538

else:

539

return tuple(foldconcat(t) for t in tree)

539

return tuple(foldconcat(t) for t in tree)

540

541

def parse(spec, lookup=None):

541

def parse(spec, lookup=None):

542

return _parsewith(spec, lookup=lookup)

542

return _parsewith(spec, lookup=lookup)

543

544

def _quote(s):

544

def _quote(s):

545

r"""Quote a value in order to make it safe for the revset engine.

545

r"""Quote a value in order to make it safe for the revset engine.

546

547

>>> _quote(b'asdf')

547

>>> _quote(b'asdf')

548

"'asdf'"

548

"'asdf'"

549

>>> _quote(b"asdf'\"")

549

>>> _quote(b"asdf'\"")

550

'\'asdf\\\'"\''

550

'\'asdf\\\'"\''

551

>>> _quote(b'asdf\'')

551

>>> _quote(b'asdf\'')

552

"'asdf\\''"

552

"'asdf\\''"

553

>>> _quote(1)

553

>>> _quote(1)

554

"'1'"

554

"'1'"

555

"""

555

"""

556

return "'%s'" % util.escapestr(pycompat.bytestr(s))

556

return "'%s'" % util.escapestr(pycompat.bytestr(s))

557

558

def _formatargtype(c, arg):

558

def _formatargtype(c, arg):

559

if c == 'd':

559

if c == 'd':

560

return '%d' % int(arg)

560

return '%d' % int(arg)

561

elif c == 's':

561

elif c == 's':

562

return _quote(arg)

562

return _quote(arg)

563

elif c == 'r':

563

elif c == 'r':

564

parse(arg) # make sure syntax errors are confined

564

parse(arg) # make sure syntax errors are confined

565

return '(%s)' % arg

565

return '(%s)' % arg

566

elif c == 'n':

566

elif c == 'n':

567

return _quote(node.hex(arg))

567

return _quote(node.hex(arg))

568

elif c == 'b':

568

elif c == 'b':

569

try:

569

try:

570

return _quote(arg.branch())

570

return _quote(arg.branch())

571

except AttributeError:

571

except AttributeError:

572

raise TypeError

572

raise TypeError

573

raise error.ParseError(_('unexpected revspec format character %s') % c)

573

raise error.ParseError(_('unexpected revspec format character %s') % c)

574

575

def _formatlistexp(s, t):

575

def _formatlistexp(s, t):

576

l = len(s)

576

l = len(s)

577

if l == 0:

577

if l == 0:

578

return "_list('')"

578

return "_list('')"

579

elif l == 1:

579

elif l == 1:

580

return _formatargtype(t, s[0])

580

return _formatargtype(t, s[0])

581

elif t == 'd':

581

elif t == 'd':

582

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

582

return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)

583

elif t == 's':

583

elif t == 's':

584

return "_list(%s)" % _quote("\0".join(s))

584

return "_list(%s)" % _quote("\0".join(s))

585

elif t == 'n':

585

elif t == 'n':

586

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

586

return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)

587

elif t == 'b':

587

elif t == 'b':

588

try:

588

try:

589

return "_list('%s')" % "\0".join(a.branch() for a in s)

589

return "_list('%s')" % "\0".join(a.branch() for a in s)

590

except AttributeError:

590

except AttributeError:

591

raise TypeError

591

raise TypeError

592

593

m = l // 2

593

m = l // 2

594

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

594

return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))

595

596

def _formatparamexp(args, t):

596

def _formatparamexp(args, t):

597

return ', '.join(_formatargtype(t, a) for a in args)

597

return ', '.join(_formatargtype(t, a) for a in args)

598

599

_formatlistfuncs = {

599

_formatlistfuncs = {

600

'l': _formatlistexp,

600

'l': _formatlistexp,

601

'p': _formatparamexp,

601

'p': _formatparamexp,

602

}

602

}

603

604

def formatspec(expr, *args):

604

def formatspec(expr, *args):

605

'''

605

'''

606

This is a convenience function for using revsets internally, and

606

This is a convenience function for using revsets internally, and

607

escapes arguments appropriately. Aliases are intentionally ignored

607

escapes arguments appropriately. Aliases are intentionally ignored

608

so that intended expression behavior isn't accidentally subverted.

608

so that intended expression behavior isn't accidentally subverted.

609

610

Supported arguments:

610

Supported arguments:

611

612

%r = revset expression, parenthesized

612

%r = revset expression, parenthesized

613

%d = int(arg), no quoting

613

%d = int(arg), no quoting

614

%s = string(arg), escaped and single-quoted

614

%s = string(arg), escaped and single-quoted

615

%b = arg.branch(), escaped and single-quoted

615

%b = arg.branch(), escaped and single-quoted

616

%n = hex(arg), single-quoted

616

%n = hex(arg), single-quoted

617

%% = a literal '%'

617

%% = a literal '%'

618

619

Prefixing the type with 'l' specifies a parenthesized list of that type,

619

Prefixing the type with 'l' specifies a parenthesized list of that type,

620

and 'p' specifies a list of function parameters of that type.

620

and 'p' specifies a list of function parameters of that type.

621

622

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

622

>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))

623

'(10 or 11):: and ((this()) or (that()))'

623

'(10 or 11):: and ((this()) or (that()))'

624

>>> formatspec(b'%d:: and not %d::', 10, 20)

624

>>> formatspec(b'%d:: and not %d::', 10, 20)

625

'10:: and not 20::'

625

'10:: and not 20::'

626

>>> formatspec(b'%ld or %ld', [], [1])

626

>>> formatspec(b'%ld or %ld', [], [1])

627

"_list('') or 1"

627

"_list('') or 1"

628

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

628

>>> formatspec(b'keyword(%s)', b'foo\\xe9')

629

"keyword('foo\\\\xe9')"

629

"keyword('foo\\\\xe9')"

630

>>> b = lambda: b'default'

630

>>> b = lambda: b'default'

631

>>> b.branch = b

631

>>> b.branch = b

632

>>> formatspec(b'branch(%b)', b)

632

>>> formatspec(b'branch(%b)', b)

633

"branch('default')"

633

"branch('default')"

634

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

634

>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])

635

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

635

"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"

636

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

636

>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])

637

"sort((:), 'desc', 'user')"

637

"sort((:), 'desc', 'user')"

638

>>> formatspec('%ls', ['a', "'"])

638

>>> formatspec(b'%ls', [b'a', b"'"])

639

"_list('a\\\\x00\\\\'')"

639

"_list('a\\\\x00\\\\'')"

640

'''

640

'''

641

expr = pycompat.bytestr(expr)

641

expr = pycompat.bytestr(expr)

642

argiter = iter(args)

642

argiter = iter(args)

643

ret = []

643

ret = []

644

pos = 0

644

pos = 0

645

while pos < len(expr):

645

while pos < len(expr):

646

q = expr.find('%', pos)

646

q = expr.find('%', pos)

647

if q < 0:

647

if q < 0:

648

ret.append(expr[pos:])

648

ret.append(expr[pos:])

649

break

649

break

650

ret.append(expr[pos:q])

650

ret.append(expr[pos:q])

651

pos = q + 1

651

pos = q + 1

652

try:

652

try:

653

d = expr[pos]

653

d = expr[pos]

654

except IndexError:

654

except IndexError:

655

raise error.ParseError(_('incomplete revspec format character'))

655

raise error.ParseError(_('incomplete revspec format character'))

656

if d == '%':

656

if d == '%':

657

ret.append(d)

657

ret.append(d)

658

pos += 1

658

pos += 1

659

continue

659

continue

660

661

try:

661

try:

662

arg = next(argiter)

662

arg = next(argiter)

663

except StopIteration:

663

except StopIteration:

664

raise error.ParseError(_('missing argument for revspec'))

664

raise error.ParseError(_('missing argument for revspec'))

665

f = _formatlistfuncs.get(d)

665

f = _formatlistfuncs.get(d)

666

if f:

666

if f:

667

# a list of some type

667

# a list of some type

668

pos += 1

668

pos += 1

669

try:

669

try:

670

d = expr[pos]

670

d = expr[pos]

671

except IndexError:

671

except IndexError:

672

raise error.ParseError(_('incomplete revspec format character'))

672

raise error.ParseError(_('incomplete revspec format character'))

673

try:

673

try:

674

ret.append(f(list(arg), d))

674

ret.append(f(list(arg), d))

675

except (TypeError, ValueError):

675

except (TypeError, ValueError):

676

raise error.ParseError(_('invalid argument for revspec'))

676

raise error.ParseError(_('invalid argument for revspec'))

677

else:

677

else:

678

try:

678

try:

679

ret.append(_formatargtype(d, arg))

679

ret.append(_formatargtype(d, arg))

680

except (TypeError, ValueError):

680

except (TypeError, ValueError):

681

raise error.ParseError(_('invalid argument for revspec'))

681

raise error.ParseError(_('invalid argument for revspec'))

682

pos += 1

682

pos += 1

683

684

try:

684

try:

685

next(argiter)

685

next(argiter)

686

raise error.ParseError(_('too many revspec arguments specified'))

686

raise error.ParseError(_('too many revspec arguments specified'))

687

except StopIteration:

687

except StopIteration:

688

pass

688

pass

689

return ''.join(ret)

689

return ''.join(ret)

690

691

def prettyformat(tree):

691

def prettyformat(tree):

692

return parser.prettyformat(tree, ('string', 'symbol'))

692

return parser.prettyformat(tree, ('string', 'symbol'))

693

694

def depth(tree):

694

def depth(tree):

695

if isinstance(tree, tuple):

695

if isinstance(tree, tuple):

696

return max(map(depth, tree)) + 1

696

return max(map(depth, tree)) + 1

697

else:

697

else:

698

return 0

698

return 0

699

700

def funcsused(tree):

700

def funcsused(tree):

701

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

701

if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):

702

return set()

702

return set()

703

else:

703

else:

704

funcs = set()

704

funcs = set()

705

for s in tree[1:]:

705

for s in tree[1:]:

706

funcs |= funcsused(s)

706

funcs |= funcsused(s)

707

if tree[0] == 'func':

707

if tree[0] == 'func':

708

funcs.add(tree[1][1])

708

funcs.add(tree[1][1])

709

return funcs

709

return funcs

710

711

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

711

_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')

712

713

def _ishashlikesymbol(symbol):

713

def _ishashlikesymbol(symbol):

714

"""returns true if the symbol looks like a hash"""

714

"""returns true if the symbol looks like a hash"""

715

return _hashre.match(symbol)

715

return _hashre.match(symbol)

716

717

def gethashlikesymbols(tree):

717

def gethashlikesymbols(tree):

718

"""returns the list of symbols of the tree that look like hashes

718

"""returns the list of symbols of the tree that look like hashes

719

720

>>> gethashlikesymbols(('dagrange', ('symbol', '3'), ('symbol', 'abe3ff')))

720

>>> gethashlikesymbols(('dagrange', ('symbol', '3'), ('symbol', 'abe3ff')))

721

['3', 'abe3ff']

721

['3', 'abe3ff']

722

>>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '.')))

722

>>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '.')))

723

[]

723

[]

724

>>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '34')))

724

>>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '34')))

725

['34']

725

['34']

726

>>> gethashlikesymbols(('symbol', 'abe3ffZ'))

726

>>> gethashlikesymbols(('symbol', 'abe3ffZ'))

727

[]

727

[]

728

"""

728

"""

729

if not tree:

729

if not tree:

730

return []

730

return []

731

732

if tree[0] == "symbol":

732

if tree[0] == "symbol":

733

if _ishashlikesymbol(tree[1]):

733

if _ishashlikesymbol(tree[1]):

734

return [tree[1]]

734

return [tree[1]]

735

elif len(tree) >= 3:

735

elif len(tree) >= 3:

736

results = []

736

results = []

737

for subtree in tree[1:]:

737

for subtree in tree[1:]:

738

results += gethashlikesymbols(subtree)

738

results += gethashlikesymbols(subtree)

739

return results

739

return results

740

return []

740

return []

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revsetlang.py - parser, tokenizer and utility for revision set language
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import string
             from .i18n import _
             from . import (
                 error,
                 node,
                 parser,
                 pycompat,
                 util,
             )
             elements = {
                 # token-type: binding-strength, primary, prefix, infix, suffix
                 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
                 "[": (21, None, None, ("subscript", 1, "]"), None),
                 "#": (21, None, None, ("relation", 21), None),
                 "##": (20, None, None, ("_concat", 20), None),
                 "~": (18, None, None, ("ancestor", 18), None),
                 "^": (18, None, None, ("parent", 18), "parentpost"),
                 "-": (5, None, ("negate", 19), ("minus", 5), None),
                 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
                        "dagrangepost"),
                 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
                 "not": (10, None, ("not", 10), None, None),
                 "!": (10, None, ("not", 10), None, None),
                 "and": (5, None, None, ("and", 5), None),
                 "&": (5, None, None, ("and", 5), None),
                 "%": (5, None, None, ("only", 5), "onlypost"),
                 "or": (4, None, None, ("or", 4), None),
                 "|": (4, None, None, ("or", 4), None),
                 "+": (4, None, None, ("or", 4), None),
                 "=": (3, None, None, ("keyvalue", 3), None),
                 ",": (2, None, None, ("list", 2), None),
                 ")": (0, None, None, None, None),
                 "]": (0, None, None, None, None),
                 "symbol": (0, "symbol", None, None, None),
                 "string": (0, "string", None, None, None),
                 "end": (0, None, None, None, None),
             }
             keywords = {'and', 'or', 'not'}
             symbols = {}
             _quoteletters = {'"', "'"}
             _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
             # default set of valid characters for the initial letter of symbols
             _syminitletters = set(pycompat.iterbytestr(
                 string.ascii_letters.encode('ascii') +
                 string.digits.encode('ascii') +
                 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
             # default set of valid characters for non-initial letters of symbols
             _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
             def tokenize(program, lookup=None, syminitletters=None, symletters=None):
                 '''
                 Parse a revset statement into a stream of tokens
                 ``syminitletters`` is the set of valid characters for the initial
                 letter of symbols.
                 By default, character ``c`` is recognized as valid for initial
                 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
                 ``symletters`` is the set of valid characters for non-initial
                 letters of symbols.
                 By default, character ``c`` is recognized as valid for non-initial
                 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
                 Check that @ is a valid unquoted token character (issue3686):
                 >>> list(tokenize(b"@::"))
                 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
                 '''
                 program = pycompat.bytestr(program)
                 if syminitletters is None:
                     syminitletters = _syminitletters
                 if symletters is None:
                     symletters = _symletters
                 if program and lookup:
                     # attempt to parse old-style ranges first to deal with
                     # things like old-tag which contain query metacharacters
                     parts = program.split(':', 1)
                     if all(lookup(sym) for sym in parts if sym):
                         if parts[0]:
                             yield ('symbol', parts[0], 0)
                         if len(parts) > 1:
                             s = len(parts[0])
                             yield (':', None, s)
                             if parts[1]:
                                 yield ('symbol', parts[1], s + 1)
                         yield ('end', None, len(program))
                         return
                 pos, l = 0, len(program)
                 while pos < l:
                     c = program[pos]
                     if c.isspace(): # skip inter-token whitespace
                         pass
                     elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
                         yield ('::', None, pos)
                         pos += 1 # skip ahead
                     elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
                         yield ('..', None, pos)
                         pos += 1 # skip ahead
                     elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
                         yield ('##', None, pos)
                         pos += 1 # skip ahead
                     elif c in _simpleopletters: # handle simple operators
                         yield (c, None, pos)
                     elif (c in _quoteletters or c == 'r' and
                           program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
                         if c == 'r':
                             pos += 1
                             c = program[pos]
                             decode = lambda x: x
                         else:
                             decode = parser.unescapestr
                         pos += 1
                         s = pos
                         while pos < l: # find closing quote
                             d = program[pos]
                             if d == '\\': # skip over escaped characters
                                 pos += 2
                                 continue
                             if d == c:
                                 yield ('string', decode(program[s:pos]), s)
                                 break
                             pos += 1
                         else:
                             raise error.ParseError(_("unterminated string"), s)
                     # gather up a symbol/keyword
                     elif c in syminitletters:
                         s = pos
                         pos += 1
                         while pos < l: # find end of symbol
                             d = program[pos]
                             if d not in symletters:
                                 break
                             if d == '.' and program[pos - 1] == '.': # special case for ..
                                 pos -= 1
                                 break
                             pos += 1
                         sym = program[s:pos]
                         if sym in keywords: # operator keywords
                             yield (sym, None, s)
                         elif '-' in sym:
                             # some jerk gave us foo-bar-baz, try to check if it's a symbol
                             if lookup and lookup(sym):
                                 # looks like a real symbol
                                 yield ('symbol', sym, s)
                             else:
                                 # looks like an expression
                                 parts = sym.split('-')
                                 for p in parts[:-1]:
                                     if p: # possible consecutive -
                                         yield ('symbol', p, s)
                                     s += len(p)
                                     yield ('-', None, pos)
                                     s += 1
                                 if parts[-1]: # possible trailing -
                                     yield ('symbol', parts[-1], s)
                         else:
                             yield ('symbol', sym, s)
                         pos -= 1
                     else:
                         raise error.ParseError(_("syntax error in revset '%s'") %
                                                program, pos)
                     pos += 1
                 yield ('end', None, pos)
             # helpers
             _notset = object()
             def getsymbol(x):
                 if x and x[0] == 'symbol':
                     return x[1]
                 raise error.ParseError(_('not a symbol'))
             def getstring(x, err):
                 if x and (x[0] == 'string' or x[0] == 'symbol'):
                     return x[1]
                 raise error.ParseError(err)
             def getinteger(x, err, default=_notset):
                 if not x and default is not _notset:
                     return default
                 try:
                     return int(getstring(x, err))
                 except ValueError:
                     raise error.ParseError(err)
             def getboolean(x, err):
                 value = util.parsebool(getsymbol(x))
                 if value is not None:
                     return value
                 raise error.ParseError(err)
             def getlist(x):
                 if not x:
                     return []
                 if x[0] == 'list':
                     return list(x[1:])
                 return [x]
             def getrange(x, err):
                 if not x:
                     raise error.ParseError(err)
                 op = x[0]
                 if op == 'range':
                     return x[1], x[2]
                 elif op == 'rangepre':
                     return None, x[1]
                 elif op == 'rangepost':
                     return x[1], None
                 elif op == 'rangeall':
                     return None, None
                 raise error.ParseError(err)
             def getargs(x, min, max, err):
                 l = getlist(x)
                 if len(l) < min or (max >= 0 and len(l) > max):
                     raise error.ParseError(err)
                 return l
             def getargsdict(x, funcname, keys):
                 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
                                             keyvaluenode='keyvalue', keynode='symbol')
             # cache of {spec: raw parsed tree} built internally
             _treecache = {}
             def _cachedtree(spec):
                 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
                 tree = _treecache.get(spec)
                 if tree is None:
                     _treecache[spec] = tree = parse(spec)
                 return tree
             def _build(tmplspec, *repls):
                 """Create raw parsed tree from a template revset statement
                 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
                 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
                 """
                 template = _cachedtree(tmplspec)
                 return parser.buildtree(template, ('symbol', '_'), *repls)
             def _match(patspec, tree):
                 """Test if a tree matches the given pattern statement; return the matches
                 >>> _match(b'f(_)', parse(b'f()'))
                 >>> _match(b'f(_)', parse(b'f(1)'))
                 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
                 >>> _match(b'f(_)', parse(b'f(1, 2)'))
                 """
                 pattern = _cachedtree(patspec)
                 return parser.matchtree(pattern, tree, ('symbol', '_'),
                                         {'keyvalue', 'list'})
             def _matchonly(revs, bases):
                 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
             def _fixops(x):
                 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
                 handled well by our simple top-down parser"""
                 if not isinstance(x, tuple):
                     return x
                 op = x[0]
                 if op == 'parent':
                     # x^:y means (x^) : y, not x ^ (:y)
                     # x^:  means (x^) :,   not x ^ (:)
                     post = ('parentpost', x[1])
                     if x[2][0] == 'dagrangepre':
                         return _fixops(('dagrange', post, x[2][1]))
                     elif x[2][0] == 'dagrangeall':
                         return _fixops(('dagrangepost', post))
                     elif x[2][0] == 'rangepre':
                         return _fixops(('range', post, x[2][1]))
                     elif x[2][0] == 'rangeall':
                         return _fixops(('rangepost', post))
                 elif op == 'or':
                     # make number of arguments deterministic:
                     # x + y + z -> (or x y z) -> (or (list x y z))
                     return (op, _fixops(('list',) + x[1:]))
                 elif op == 'subscript' and x[1][0] == 'relation':
                     # x#y[z] ternary
                     return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
                 return (op,) + tuple(_fixops(y) for y in x[1:])
             def _analyze(x):
                 if x is None:
                     return x
                 op = x[0]
                 if op == 'minus':
                     return _analyze(_build('_ and not _', *x[1:]))
                 elif op == 'only':
                     return _analyze(_build('only(_, _)', *x[1:]))
                 elif op == 'onlypost':
                     return _analyze(_build('only(_)', x[1]))
                 elif op == 'dagrangeall':
                     raise error.ParseError(_("can't use '::' in this context"))
                 elif op == 'dagrangepre':
                     return _analyze(_build('ancestors(_)', x[1]))
                 elif op == 'dagrangepost':
                     return _analyze(_build('descendants(_)', x[1]))
                 elif op == 'negate':
                     s = getstring(x[1], _("can't negate that"))
                     return _analyze(('string', '-' + s))
                 elif op in ('string', 'symbol'):
                     return x
                 elif op == 'rangeall':
                     return (op, None)
                 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
                     return (op, _analyze(x[1]))
                 elif op == 'group':
                     return _analyze(x[1])
                 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
                             'subscript'}:
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     return (op, ta, tb)
                 elif op == 'relsubscript':
                     ta = _analyze(x[1])
                     tb = _analyze(x[2])
                     tc = _analyze(x[3])
                     return (op, ta, tb, tc)
                 elif op == 'list':
                     return (op,) + tuple(_analyze(y) for y in x[1:])
                 elif op == 'keyvalue':
                     return (op, x[1], _analyze(x[2]))
                 elif op == 'func':
                     return (op, x[1], _analyze(x[2]))
                 raise ValueError('invalid operator %r' % op)
             def analyze(x):
                 """Transform raw parsed tree to evaluatable tree which can be fed to
                 optimize() or getset()
                 All pseudo operations should be mapped to real operations or functions
                 defined in methods or symbols table respectively.
                 """
                 return _analyze(x)
             def _optimize(x):
                 if x is None:
                     return 0, x
                 op = x[0]
                 if op in ('string', 'symbol'):
                     return 0.5, x # single revisions are small
                 elif op == 'and':
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     w = min(wa, wb)
                     # (draft/secret/_notpublic() & ::x) have a fast path
                     m = _match('_() & ancestors(_)', ('and', ta, tb))
                     if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
                         return w, _build('_phaseandancestors(_, _)', m[1], m[2])
                     # (::x and not ::y)/(not ::y and ::x) have a fast path
                     m = _matchonly(ta, tb) or _matchonly(tb, ta)
                     if m:
                         return w, _build('only(_, _)', *m[1:])
                     m = _match('not _', tb)
                     if m:
                         return wa, ('difference', ta, m[1])
                     if wa > wb:
                         op = 'andsmally'
                     return w, (op, ta, tb)
                 elif op == 'or':
                     # fast path for machine-generated expression, that is likely to have
                     # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
                     ws, ts, ss = [], [], []
                     def flushss():
                         if not ss:
                             return
                         if len(ss) == 1:
                             w, t = ss[0]
                         else:
                             s = '\0'.join(t[1] for w, t in ss)
                             y = _build('_list(_)', ('string', s))
                             w, t = _optimize(y)
                         ws.append(w)
                         ts.append(t)
                         del ss[:]
                     for y in getlist(x[1]):
                         w, t = _optimize(y)
                         if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                             ss.append((w, t))
                             continue
                         flushss()
                         ws.append(w)
                         ts.append(t)
                     flushss()
                     if len(ts) == 1:
                         return ws[0], ts[0] # 'or' operation is fully optimized out
                     return max(ws), (op, ('list',) + tuple(ts))
                 elif op == 'not':
                     # Optimize not public() to _notpublic() because we have a fast version
                     if _match('public()', x[1]):
                         o = _optimize(_build('_notpublic()'))
                         return o[0], o[1]
                     else:
                         o = _optimize(x[1])
                         return o[0], (op, o[1])
                 elif op == 'rangeall':
                     return 1, x
                 elif op in ('rangepre', 'rangepost', 'parentpost'):
                     o = _optimize(x[1])
                     return o[0], (op, o[1])
                 elif op in ('dagrange', 'range'):
                     wa, ta = _optimize(x[1])
                     wb, tb = _optimize(x[2])
                     return wa + wb, (op, ta, tb)
                 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2])
                 elif op == 'relsubscript':
                     w, t = _optimize(x[1])
                     return w, (op, t, x[2], x[3])
                 elif op == 'list':
                     ws, ts = zip(*(_optimize(y) for y in x[1:]))
                     return sum(ws), (op,) + ts
                 elif op == 'keyvalue':
                     w, t = _optimize(x[2])
                     return w, (op, x[1], t)
                 elif op == 'func':
                     f = getsymbol(x[1])
                     wa, ta = _optimize(x[2])
                     w = getattr(symbols.get(f), '_weight', 1)
                     return w + wa, (op, x[1], ta)
                 raise ValueError('invalid operator %r' % op)
             def optimize(tree):
                 """Optimize evaluatable tree
                 All pseudo operations should be transformed beforehand.
                 """
                 _weight, newtree = _optimize(tree)
                 return newtree
             # the set of valid characters for the initial letter of symbols in
             # alias declarations and definitions
             _aliassyminitletters = _syminitletters | {'$'}
             def _parsewith(spec, lookup=None, syminitletters=None):
                 """Generate a parse tree of given spec with given tokenizing options
                 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
                 ('func', ('symbol', 'foo'), ('symbol', '$1'))
                 >>> _parsewith(b'$1')
                 Traceback (most recent call last):
                   ...
                 ParseError: ("syntax error in revset '$1'", 0)
                 >>> _parsewith(b'foo bar')
                 Traceback (most recent call last):
                   ...
                 ParseError: ('invalid token', 4)
                 """
                 p = parser.parser(elements)
                 tree, pos = p.parse(tokenize(spec, lookup=lookup,
                                              syminitletters=syminitletters))
                 if pos != len(spec):
                     raise error.ParseError(_('invalid token'), pos)
                 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
             class _aliasrules(parser.basealiasrules):
                 """Parsing and expansion rule set of revset aliases"""
                 _section = _('revset alias')
                 @staticmethod
                 def _parse(spec):
                     """Parse alias declaration/definition ``spec``
                     This allows symbol names to use also ``$`` as an initial letter
                     (for backward compatibility), and callers of this function should
                     examine whether ``$`` is used also for unexpected symbols or not.
                     """
                     return _parsewith(spec, syminitletters=_aliassyminitletters)
                 @staticmethod
                 def _trygetfunc(tree):
                     if tree[0] == 'func' and tree[1][0] == 'symbol':
                         return tree[1][1], getlist(tree[2])
             def expandaliases(tree, aliases, warn=None):
                 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
                 aliases = _aliasrules.buildmap(aliases)
                 tree = _aliasrules.expand(aliases, tree)
                 # warn about problematic (but not referred) aliases
                 if warn is not None:
                     for name, alias in sorted(aliases.iteritems()):
                         if alias.error and not alias.warned:
                             warn(_('warning: %s\n') % (alias.error))
                             alias.warned = True
                 return tree
             def foldconcat(tree):
                 """Fold elements to be concatenated by `##`
                 """
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return tree
                 if tree[0] == '_concat':
                     pending = [tree]
                     l = []
                     while pending:
                         e = pending.pop()
                         if e[0] == '_concat':
                             pending.extend(reversed(e[1:]))
                         elif e[0] in ('string', 'symbol'):
                             l.append(e[1])
                         else:
                             msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
                             raise error.ParseError(msg)
                     return ('string', ''.join(l))
                 else:
                     return tuple(foldconcat(t) for t in tree)
             def parse(spec, lookup=None):
                 return _parsewith(spec, lookup=lookup)
             def _quote(s):
                 r"""Quote a value in order to make it safe for the revset engine.
                 >>> _quote(b'asdf')
                 "'asdf'"
                 >>> _quote(b"asdf'\"")
                 '\'asdf\\\'"\''
                 >>> _quote(b'asdf\'')
                 "'asdf\\''"
                 >>> _quote(1)
                 "'1'"
                 """
                 return "'%s'" % util.escapestr(pycompat.bytestr(s))
             def _formatargtype(c, arg):
                 if c == 'd':
                     return '%d' % int(arg)
                 elif c == 's':
                     return _quote(arg)
                 elif c == 'r':
                     parse(arg) # make sure syntax errors are confined
                     return '(%s)' % arg
                 elif c == 'n':
                     return _quote(node.hex(arg))
                 elif c == 'b':
                     try:
                         return _quote(arg.branch())
                     except AttributeError:
                         raise TypeError
                 raise error.ParseError(_('unexpected revspec format character %s') % c)
             def _formatlistexp(s, t):
                 l = len(s)
                 if l == 0:
                     return "_list('')"
                 elif l == 1:
                     return _formatargtype(t, s[0])
                 elif t == 'd':
                     return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
                 elif t == 's':
                     return "_list(%s)" % _quote("\0".join(s))
                 elif t == 'n':
                     return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
                 elif t == 'b':
                     try:
                         return "_list('%s')" % "\0".join(a.branch() for a in s)
                     except AttributeError:
                         raise TypeError
                 m = l // 2
                 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
             def _formatparamexp(args, t):
                 return ', '.join(_formatargtype(t, a) for a in args)
             _formatlistfuncs = {
                 'l': _formatlistexp,
                 'p': _formatparamexp,
             }
             def formatspec(expr, *args):
                 '''
                 This is a convenience function for using revsets internally, and
                 escapes arguments appropriately. Aliases are intentionally ignored
                 so that intended expression behavior isn't accidentally subverted.
                 Supported arguments:
                 %r = revset expression, parenthesized
                 %d = int(arg), no quoting
                 %s = string(arg), escaped and single-quoted
                 %b = arg.branch(), escaped and single-quoted
                 %n = hex(arg), single-quoted
                 %% = a literal '%'
                 Prefixing the type with 'l' specifies a parenthesized list of that type,
                 and 'p' specifies a list of function parameters of that type.
                 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
                 '(10 or 11):: and ((this()) or (that()))'
                 >>> formatspec(b'%d:: and not %d::', 10, 20)
                 '10:: and not 20::'
                 >>> formatspec(b'%ld or %ld', [], [1])
                 "_list('') or 1"
                 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
                 "keyword('foo\\\\xe9')"
                 >>> b = lambda: b'default'
                 >>> b.branch = b
                 >>> formatspec(b'branch(%b)', b)
                 "branch('default')"
                 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
                 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
                 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
                 "sort((:), 'desc', 'user')"
-                >>> formatspec('%ls', ['a', "'"])
+                >>> formatspec(b'%ls', [b'a', b"'"])
                 "_list('a\\\\x00\\\\'')"
                 '''
                 expr = pycompat.bytestr(expr)
                 argiter = iter(args)
                 ret = []
                 pos = 0
                 while pos < len(expr):
                     q = expr.find('%', pos)
                     if q < 0:
                         ret.append(expr[pos:])
                         break
                     ret.append(expr[pos:q])
                     pos = q + 1
                     try:
                         d = expr[pos]
                     except IndexError:
                         raise error.ParseError(_('incomplete revspec format character'))
                     if d == '%':
                         ret.append(d)
                         pos += 1
                         continue
                     try:
                         arg = next(argiter)
                     except StopIteration:
                         raise error.ParseError(_('missing argument for revspec'))
                     f = _formatlistfuncs.get(d)
                     if f:
                         # a list of some type
                         pos += 1
                         try:
                             d = expr[pos]
                         except IndexError:
                             raise error.ParseError(_('incomplete revspec format character'))
                         try:
                             ret.append(f(list(arg), d))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     else:
                         try:
                             ret.append(_formatargtype(d, arg))
                         except (TypeError, ValueError):
                             raise error.ParseError(_('invalid argument for revspec'))
                     pos += 1
                 try:
                     next(argiter)
                     raise error.ParseError(_('too many revspec arguments specified'))
                 except StopIteration:
                     pass
                 return ''.join(ret)
             def prettyformat(tree):
                 return parser.prettyformat(tree, ('string', 'symbol'))
             def depth(tree):
                 if isinstance(tree, tuple):
                     return max(map(depth, tree)) + 1
                 else:
                     return 0
             def funcsused(tree):
                 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
                     return set()
                 else:
                     funcs = set()
                     for s in tree[1:]:
                         funcs |= funcsused(s)
                     if tree[0] == 'func':
                         funcs.add(tree[1][1])
                     return funcs
             _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
             def _ishashlikesymbol(symbol):
                 """returns true if the symbol looks like a hash"""
                 return _hashre.match(symbol)
             def gethashlikesymbols(tree):
                 """returns the list of symbols of the tree that look like hashes
                 >>> gethashlikesymbols(('dagrange', ('symbol', '3'), ('symbol', 'abe3ff')))
                 ['3', 'abe3ff']
                 >>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '.')))
                 []
                 >>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '34')))
                 ['34']
                 >>> gethashlikesymbols(('symbol', 'abe3ffZ'))
                 []
                 """
                 if not tree:
                     return []
                 if tree[0] == "symbol":
                     if _ishashlikesymbol(tree[1]):
                         return [tree[1]]
                 elif len(tree) >= 3:
                     results = []
                     for subtree in tree[1:]:
                         results += gethashlikesymbols(subtree)
                     return results
                 return []