##// END OF EJS Templates
revsetlang: build optimized tree by helper function...
Yuya Nishihara -
r34046:b862e6fc default
parent child Browse files
Show More
@@ -1,671 +1,688 b''
1 1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import string
11 11
12 12 from .i18n import _
13 13 from . import (
14 14 error,
15 15 node,
16 16 parser,
17 17 pycompat,
18 18 util,
19 19 )
20 20
21 21 elements = {
22 22 # token-type: binding-strength, primary, prefix, infix, suffix
23 23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
24 24 "[": (21, None, None, ("subscript", 1, "]"), None),
25 25 "#": (21, None, None, ("relation", 21), None),
26 26 "##": (20, None, None, ("_concat", 20), None),
27 27 "~": (18, None, None, ("ancestor", 18), None),
28 28 "^": (18, None, None, ("parent", 18), "parentpost"),
29 29 "-": (5, None, ("negate", 19), ("minus", 5), None),
30 30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
32 32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
33 33 "not": (10, None, ("not", 10), None, None),
34 34 "!": (10, None, ("not", 10), None, None),
35 35 "and": (5, None, None, ("and", 5), None),
36 36 "&": (5, None, None, ("and", 5), None),
37 37 "%": (5, None, None, ("only", 5), "onlypost"),
38 38 "or": (4, None, None, ("or", 4), None),
39 39 "|": (4, None, None, ("or", 4), None),
40 40 "+": (4, None, None, ("or", 4), None),
41 41 "=": (3, None, None, ("keyvalue", 3), None),
42 42 ",": (2, None, None, ("list", 2), None),
43 43 ")": (0, None, None, None, None),
44 44 "]": (0, None, None, None, None),
45 45 "symbol": (0, "symbol", None, None, None),
46 46 "string": (0, "string", None, None, None),
47 47 "end": (0, None, None, None, None),
48 48 }
49 49
50 50 keywords = {'and', 'or', 'not'}
51 51
52 52 _quoteletters = {'"', "'"}
53 53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
54 54
55 55 # default set of valid characters for the initial letter of symbols
56 56 _syminitletters = set(pycompat.iterbytestr(
57 57 string.ascii_letters.encode('ascii') +
58 58 string.digits.encode('ascii') +
59 59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
60 60
61 61 # default set of valid characters for non-initial letters of symbols
62 62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
63 63
64 64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
65 65 '''
66 66 Parse a revset statement into a stream of tokens
67 67
68 68 ``syminitletters`` is the set of valid characters for the initial
69 69 letter of symbols.
70 70
71 71 By default, character ``c`` is recognized as valid for initial
72 72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
73 73
74 74 ``symletters`` is the set of valid characters for non-initial
75 75 letters of symbols.
76 76
77 77 By default, character ``c`` is recognized as valid for non-initial
78 78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
79 79
80 80 Check that @ is a valid unquoted token character (issue3686):
81 81 >>> list(tokenize("@::"))
82 82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
83 83
84 84 '''
85 85 program = pycompat.bytestr(program)
86 86 if syminitletters is None:
87 87 syminitletters = _syminitletters
88 88 if symletters is None:
89 89 symletters = _symletters
90 90
91 91 if program and lookup:
92 92 # attempt to parse old-style ranges first to deal with
93 93 # things like old-tag which contain query metacharacters
94 94 parts = program.split(':', 1)
95 95 if all(lookup(sym) for sym in parts if sym):
96 96 if parts[0]:
97 97 yield ('symbol', parts[0], 0)
98 98 if len(parts) > 1:
99 99 s = len(parts[0])
100 100 yield (':', None, s)
101 101 if parts[1]:
102 102 yield ('symbol', parts[1], s + 1)
103 103 yield ('end', None, len(program))
104 104 return
105 105
106 106 pos, l = 0, len(program)
107 107 while pos < l:
108 108 c = program[pos]
109 109 if c.isspace(): # skip inter-token whitespace
110 110 pass
111 111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
112 112 yield ('::', None, pos)
113 113 pos += 1 # skip ahead
114 114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
115 115 yield ('..', None, pos)
116 116 pos += 1 # skip ahead
117 117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
118 118 yield ('##', None, pos)
119 119 pos += 1 # skip ahead
120 120 elif c in _simpleopletters: # handle simple operators
121 121 yield (c, None, pos)
122 122 elif (c in _quoteletters or c == 'r' and
123 123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
124 124 if c == 'r':
125 125 pos += 1
126 126 c = program[pos]
127 127 decode = lambda x: x
128 128 else:
129 129 decode = parser.unescapestr
130 130 pos += 1
131 131 s = pos
132 132 while pos < l: # find closing quote
133 133 d = program[pos]
134 134 if d == '\\': # skip over escaped characters
135 135 pos += 2
136 136 continue
137 137 if d == c:
138 138 yield ('string', decode(program[s:pos]), s)
139 139 break
140 140 pos += 1
141 141 else:
142 142 raise error.ParseError(_("unterminated string"), s)
143 143 # gather up a symbol/keyword
144 144 elif c in syminitletters:
145 145 s = pos
146 146 pos += 1
147 147 while pos < l: # find end of symbol
148 148 d = program[pos]
149 149 if d not in symletters:
150 150 break
151 151 if d == '.' and program[pos - 1] == '.': # special case for ..
152 152 pos -= 1
153 153 break
154 154 pos += 1
155 155 sym = program[s:pos]
156 156 if sym in keywords: # operator keywords
157 157 yield (sym, None, s)
158 158 elif '-' in sym:
159 159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
160 160 if lookup and lookup(sym):
161 161 # looks like a real symbol
162 162 yield ('symbol', sym, s)
163 163 else:
164 164 # looks like an expression
165 165 parts = sym.split('-')
166 166 for p in parts[:-1]:
167 167 if p: # possible consecutive -
168 168 yield ('symbol', p, s)
169 169 s += len(p)
170 170 yield ('-', None, pos)
171 171 s += 1
172 172 if parts[-1]: # possible trailing -
173 173 yield ('symbol', parts[-1], s)
174 174 else:
175 175 yield ('symbol', sym, s)
176 176 pos -= 1
177 177 else:
178 178 raise error.ParseError(_("syntax error in revset '%s'") %
179 179 program, pos)
180 180 pos += 1
181 181 yield ('end', None, pos)
182 182
183 183 # helpers
184 184
185 185 _notset = object()
186 186
187 187 def getsymbol(x):
188 188 if x and x[0] == 'symbol':
189 189 return x[1]
190 190 raise error.ParseError(_('not a symbol'))
191 191
192 192 def getstring(x, err):
193 193 if x and (x[0] == 'string' or x[0] == 'symbol'):
194 194 return x[1]
195 195 raise error.ParseError(err)
196 196
197 197 def getinteger(x, err, default=_notset):
198 198 if not x and default is not _notset:
199 199 return default
200 200 try:
201 201 return int(getstring(x, err))
202 202 except ValueError:
203 203 raise error.ParseError(err)
204 204
205 205 def getboolean(x, err):
206 206 value = util.parsebool(getsymbol(x))
207 207 if value is not None:
208 208 return value
209 209 raise error.ParseError(err)
210 210
211 211 def getlist(x):
212 212 if not x:
213 213 return []
214 214 if x[0] == 'list':
215 215 return list(x[1:])
216 216 return [x]
217 217
218 218 def getrange(x, err):
219 219 if not x:
220 220 raise error.ParseError(err)
221 221 op = x[0]
222 222 if op == 'range':
223 223 return x[1], x[2]
224 224 elif op == 'rangepre':
225 225 return None, x[1]
226 226 elif op == 'rangepost':
227 227 return x[1], None
228 228 elif op == 'rangeall':
229 229 return None, None
230 230 raise error.ParseError(err)
231 231
232 232 def getargs(x, min, max, err):
233 233 l = getlist(x)
234 234 if len(l) < min or (max >= 0 and len(l) > max):
235 235 raise error.ParseError(err)
236 236 return l
237 237
238 238 def getargsdict(x, funcname, keys):
239 239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
240 240 keyvaluenode='keyvalue', keynode='symbol')
241 241
242 # cache of {spec: raw parsed tree} built internally
243 _treecache = {}
244
245 def _cachedtree(spec):
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
247 tree = _treecache.get(spec)
248 if tree is None:
249 _treecache[spec] = tree = parse(spec)
250 return tree
251
252 def _build(tmplspec, *repls):
253 """Create raw parsed tree from a template revset statement
254
255 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
257 """
258 template = _cachedtree(tmplspec)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
260
242 261 def _isnamedfunc(x, funcname):
243 262 """Check if given tree matches named function"""
244 263 return x and x[0] == 'func' and getsymbol(x[1]) == funcname
245 264
246 265 def _isposargs(x, n):
247 266 """Check if given tree is n-length list of positional arguments"""
248 267 l = getlist(x)
249 268 return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)
250 269
251 270 def _matchnamedfunc(x, funcname):
252 271 """Return args tree if given tree matches named function; otherwise None
253 272
254 273 This can't be used for testing a nullary function since its args tree
255 274 is also None. Use _isnamedfunc() instead.
256 275 """
257 276 if not _isnamedfunc(x, funcname):
258 277 return
259 278 return x[2]
260 279
261 280 def _matchonly(revs, bases):
262 281 """
263 282 >>> f = lambda *args: _matchonly(*map(parse, args))
264 283 >>> f('ancestors(A)', 'not ancestors(B)')
265 284 ('list', ('symbol', 'A'), ('symbol', 'B'))
266 285 """
267 286 ta = _matchnamedfunc(revs, 'ancestors')
268 287 tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')
269 288 if _isposargs(ta, 1) and _isposargs(tb, 1):
270 289 return ('list', ta, tb)
271 290
272 291 def _fixops(x):
273 292 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
274 293 handled well by our simple top-down parser"""
275 294 if not isinstance(x, tuple):
276 295 return x
277 296
278 297 op = x[0]
279 298 if op == 'parent':
280 299 # x^:y means (x^) : y, not x ^ (:y)
281 300 # x^: means (x^) :, not x ^ (:)
282 301 post = ('parentpost', x[1])
283 302 if x[2][0] == 'dagrangepre':
284 303 return _fixops(('dagrange', post, x[2][1]))
285 304 elif x[2][0] == 'rangepre':
286 305 return _fixops(('range', post, x[2][1]))
287 306 elif x[2][0] == 'rangeall':
288 307 return _fixops(('rangepost', post))
289 308 elif op == 'or':
290 309 # make number of arguments deterministic:
291 310 # x + y + z -> (or x y z) -> (or (list x y z))
292 311 return (op, _fixops(('list',) + x[1:]))
293 312 elif op == 'subscript' and x[1][0] == 'relation':
294 313 # x#y[z] ternary
295 314 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
296 315
297 316 return (op,) + tuple(_fixops(y) for y in x[1:])
298 317
299 318 def _analyze(x):
300 319 if x is None:
301 320 return x
302 321
303 322 op = x[0]
304 323 if op == 'minus':
305 return _analyze(('and', x[1], ('not', x[2])))
324 return _analyze(_build('_ and not _', *x[1:]))
306 325 elif op == 'only':
307 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
308 return _analyze(t)
326 return _analyze(_build('only(_, _)', *x[1:]))
309 327 elif op == 'onlypost':
310 return _analyze(('func', ('symbol', 'only'), x[1]))
328 return _analyze(_build('only(_)', x[1]))
311 329 elif op == 'dagrangepre':
312 return _analyze(('func', ('symbol', 'ancestors'), x[1]))
330 return _analyze(_build('ancestors(_)', x[1]))
313 331 elif op == 'dagrangepost':
314 return _analyze(('func', ('symbol', 'descendants'), x[1]))
332 return _analyze(_build('descendants(_)', x[1]))
315 333 elif op == 'negate':
316 334 s = getstring(x[1], _("can't negate that"))
317 335 return _analyze(('string', '-' + s))
318 336 elif op in ('string', 'symbol'):
319 337 return x
320 338 elif op == 'rangeall':
321 339 return (op, None)
322 340 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
323 341 return (op, _analyze(x[1]))
324 342 elif op == 'group':
325 343 return _analyze(x[1])
326 344 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
327 345 'subscript'}:
328 346 ta = _analyze(x[1])
329 347 tb = _analyze(x[2])
330 348 return (op, ta, tb)
331 349 elif op == 'relsubscript':
332 350 ta = _analyze(x[1])
333 351 tb = _analyze(x[2])
334 352 tc = _analyze(x[3])
335 353 return (op, ta, tb, tc)
336 354 elif op == 'list':
337 355 return (op,) + tuple(_analyze(y) for y in x[1:])
338 356 elif op == 'keyvalue':
339 357 return (op, x[1], _analyze(x[2]))
340 358 elif op == 'func':
341 359 return (op, x[1], _analyze(x[2]))
342 360 raise ValueError('invalid operator %r' % op)
343 361
344 362 def analyze(x):
345 363 """Transform raw parsed tree to evaluatable tree which can be fed to
346 364 optimize() or getset()
347 365
348 366 All pseudo operations should be mapped to real operations or functions
349 367 defined in methods or symbols table respectively.
350 368 """
351 369 return _analyze(x)
352 370
353 371 def _optimize(x, small):
354 372 if x is None:
355 373 return 0, x
356 374
357 375 smallbonus = 1
358 376 if small:
359 377 smallbonus = .5
360 378
361 379 op = x[0]
362 380 if op in ('string', 'symbol'):
363 381 return smallbonus, x # single revisions are small
364 382 elif op == 'and':
365 383 wa, ta = _optimize(x[1], True)
366 384 wb, tb = _optimize(x[2], True)
367 385 w = min(wa, wb)
368 386
369 387 # (::x and not ::y)/(not ::y and ::x) have a fast path
370 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
371 if tm:
372 return w, ('func', ('symbol', 'only'), tm)
388 m = _matchonly(ta, tb) or _matchonly(tb, ta)
389 if m:
390 return w, _build('only(_, _)', *m[1:])
373 391
374 392 if tb is not None and tb[0] == 'not':
375 393 return wa, ('difference', ta, tb[1])
376 394 if wa > wb:
377 395 op = 'andsmally'
378 396 return w, (op, ta, tb)
379 397 elif op == 'or':
380 398 # fast path for machine-generated expression, that is likely to have
381 399 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
382 400 ws, ts, ss = [], [], []
383 401 def flushss():
384 402 if not ss:
385 403 return
386 404 if len(ss) == 1:
387 405 w, t = ss[0]
388 406 else:
389 407 s = '\0'.join(t[1] for w, t in ss)
390 y = ('func', ('symbol', '_list'), ('string', s))
408 y = _build('_list(_)', ('string', s))
391 409 w, t = _optimize(y, False)
392 410 ws.append(w)
393 411 ts.append(t)
394 412 del ss[:]
395 413 for y in getlist(x[1]):
396 414 w, t = _optimize(y, False)
397 415 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
398 416 ss.append((w, t))
399 417 continue
400 418 flushss()
401 419 ws.append(w)
402 420 ts.append(t)
403 421 flushss()
404 422 if len(ts) == 1:
405 423 return ws[0], ts[0] # 'or' operation is fully optimized out
406 424 return max(ws), (op, ('list',) + tuple(ts))
407 425 elif op == 'not':
408 426 # Optimize not public() to _notpublic() because we have a fast version
409 427 if x[1][:3] == ('func', ('symbol', 'public'), None):
410 newsym = ('func', ('symbol', '_notpublic'), None)
411 o = _optimize(newsym, not small)
428 o = _optimize(_build('_notpublic()'), not small)
412 429 return o[0], o[1]
413 430 else:
414 431 o = _optimize(x[1], not small)
415 432 return o[0], (op, o[1])
416 433 elif op == 'rangeall':
417 434 return smallbonus, x
418 435 elif op in ('rangepre', 'rangepost', 'parentpost'):
419 436 o = _optimize(x[1], small)
420 437 return o[0], (op, o[1])
421 438 elif op in ('dagrange', 'range'):
422 439 wa, ta = _optimize(x[1], small)
423 440 wb, tb = _optimize(x[2], small)
424 441 return wa + wb, (op, ta, tb)
425 442 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
426 443 w, t = _optimize(x[1], small)
427 444 return w, (op, t, x[2])
428 445 elif op == 'relsubscript':
429 446 w, t = _optimize(x[1], small)
430 447 return w, (op, t, x[2], x[3])
431 448 elif op == 'list':
432 449 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
433 450 return sum(ws), (op,) + ts
434 451 elif op == 'keyvalue':
435 452 w, t = _optimize(x[2], small)
436 453 return w, (op, x[1], t)
437 454 elif op == 'func':
438 455 f = getsymbol(x[1])
439 456 wa, ta = _optimize(x[2], small)
440 457 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
441 458 'keyword', 'outgoing', 'user', 'destination'):
442 459 w = 10 # slow
443 460 elif f in ('modifies', 'adds', 'removes'):
444 461 w = 30 # slower
445 462 elif f == "contains":
446 463 w = 100 # very slow
447 464 elif f == "ancestor":
448 465 w = 1 * smallbonus
449 466 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
450 467 w = 0
451 468 elif f == "sort":
452 469 w = 10 # assume most sorts look at changelog
453 470 else:
454 471 w = 1
455 472 return w + wa, (op, x[1], ta)
456 473 raise ValueError('invalid operator %r' % op)
457 474
458 475 def optimize(tree):
459 476 """Optimize evaluatable tree
460 477
461 478 All pseudo operations should be transformed beforehand.
462 479 """
463 480 _weight, newtree = _optimize(tree, small=True)
464 481 return newtree
465 482
466 483 # the set of valid characters for the initial letter of symbols in
467 484 # alias declarations and definitions
468 485 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
469 486
470 487 def _parsewith(spec, lookup=None, syminitletters=None):
471 488 """Generate a parse tree of given spec with given tokenizing options
472 489
473 490 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
474 491 ('func', ('symbol', 'foo'), ('symbol', '$1'))
475 492 >>> _parsewith('$1')
476 493 Traceback (most recent call last):
477 494 ...
478 495 ParseError: ("syntax error in revset '$1'", 0)
479 496 >>> _parsewith('foo bar')
480 497 Traceback (most recent call last):
481 498 ...
482 499 ParseError: ('invalid token', 4)
483 500 """
484 501 p = parser.parser(elements)
485 502 tree, pos = p.parse(tokenize(spec, lookup=lookup,
486 503 syminitletters=syminitletters))
487 504 if pos != len(spec):
488 505 raise error.ParseError(_('invalid token'), pos)
489 506 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
490 507
491 508 class _aliasrules(parser.basealiasrules):
492 509 """Parsing and expansion rule set of revset aliases"""
493 510 _section = _('revset alias')
494 511
495 512 @staticmethod
496 513 def _parse(spec):
497 514 """Parse alias declaration/definition ``spec``
498 515
499 516 This allows symbol names to use also ``$`` as an initial letter
500 517 (for backward compatibility), and callers of this function should
501 518 examine whether ``$`` is used also for unexpected symbols or not.
502 519 """
503 520 return _parsewith(spec, syminitletters=_aliassyminitletters)
504 521
505 522 @staticmethod
506 523 def _trygetfunc(tree):
507 524 if tree[0] == 'func' and tree[1][0] == 'symbol':
508 525 return tree[1][1], getlist(tree[2])
509 526
510 527 def expandaliases(tree, aliases, warn=None):
511 528 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
512 529 aliases = _aliasrules.buildmap(aliases)
513 530 tree = _aliasrules.expand(aliases, tree)
514 531 # warn about problematic (but not referred) aliases
515 532 if warn is not None:
516 533 for name, alias in sorted(aliases.iteritems()):
517 534 if alias.error and not alias.warned:
518 535 warn(_('warning: %s\n') % (alias.error))
519 536 alias.warned = True
520 537 return tree
521 538
522 539 def foldconcat(tree):
523 540 """Fold elements to be concatenated by `##`
524 541 """
525 542 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
526 543 return tree
527 544 if tree[0] == '_concat':
528 545 pending = [tree]
529 546 l = []
530 547 while pending:
531 548 e = pending.pop()
532 549 if e[0] == '_concat':
533 550 pending.extend(reversed(e[1:]))
534 551 elif e[0] in ('string', 'symbol'):
535 552 l.append(e[1])
536 553 else:
537 554 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
538 555 raise error.ParseError(msg)
539 556 return ('string', ''.join(l))
540 557 else:
541 558 return tuple(foldconcat(t) for t in tree)
542 559
543 560 def parse(spec, lookup=None):
544 561 return _parsewith(spec, lookup=lookup)
545 562
546 563 def _quote(s):
547 564 r"""Quote a value in order to make it safe for the revset engine.
548 565
549 566 >>> _quote('asdf')
550 567 "'asdf'"
551 568 >>> _quote("asdf'\"")
552 569 '\'asdf\\\'"\''
553 570 >>> _quote('asdf\'')
554 571 "'asdf\\''"
555 572 >>> _quote(1)
556 573 "'1'"
557 574 """
558 575 return "'%s'" % util.escapestr(pycompat.bytestr(s))
559 576
560 577 def formatspec(expr, *args):
561 578 '''
562 579 This is a convenience function for using revsets internally, and
563 580 escapes arguments appropriately. Aliases are intentionally ignored
564 581 so that intended expression behavior isn't accidentally subverted.
565 582
566 583 Supported arguments:
567 584
568 585 %r = revset expression, parenthesized
569 586 %d = int(arg), no quoting
570 587 %s = string(arg), escaped and single-quoted
571 588 %b = arg.branch(), escaped and single-quoted
572 589 %n = hex(arg), single-quoted
573 590 %% = a literal '%'
574 591
575 592 Prefixing the type with 'l' specifies a parenthesized list of that type.
576 593
577 594 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
578 595 '(10 or 11):: and ((this()) or (that()))'
579 596 >>> formatspec('%d:: and not %d::', 10, 20)
580 597 '10:: and not 20::'
581 598 >>> formatspec('%ld or %ld', [], [1])
582 599 "_list('') or 1"
583 600 >>> formatspec('keyword(%s)', 'foo\\xe9')
584 601 "keyword('foo\\\\xe9')"
585 602 >>> b = lambda: 'default'
586 603 >>> b.branch = b
587 604 >>> formatspec('branch(%b)', b)
588 605 "branch('default')"
589 606 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
590 607 "root(_list('a\\x00b\\x00c\\x00d'))"
591 608 '''
592 609
593 610 def argtype(c, arg):
594 611 if c == 'd':
595 612 return '%d' % int(arg)
596 613 elif c == 's':
597 614 return _quote(arg)
598 615 elif c == 'r':
599 616 parse(arg) # make sure syntax errors are confined
600 617 return '(%s)' % arg
601 618 elif c == 'n':
602 619 return _quote(node.hex(arg))
603 620 elif c == 'b':
604 621 return _quote(arg.branch())
605 622
606 623 def listexp(s, t):
607 624 l = len(s)
608 625 if l == 0:
609 626 return "_list('')"
610 627 elif l == 1:
611 628 return argtype(t, s[0])
612 629 elif t == 'd':
613 630 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
614 631 elif t == 's':
615 632 return "_list('%s')" % "\0".join(s)
616 633 elif t == 'n':
617 634 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
618 635 elif t == 'b':
619 636 return "_list('%s')" % "\0".join(a.branch() for a in s)
620 637
621 638 m = l // 2
622 639 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
623 640
624 641 expr = pycompat.bytestr(expr)
625 642 ret = ''
626 643 pos = 0
627 644 arg = 0
628 645 while pos < len(expr):
629 646 c = expr[pos]
630 647 if c == '%':
631 648 pos += 1
632 649 d = expr[pos]
633 650 if d == '%':
634 651 ret += d
635 652 elif d in 'dsnbr':
636 653 ret += argtype(d, args[arg])
637 654 arg += 1
638 655 elif d == 'l':
639 656 # a list of some type
640 657 pos += 1
641 658 d = expr[pos]
642 659 ret += listexp(list(args[arg]), d)
643 660 arg += 1
644 661 else:
645 662 raise error.Abort(_('unexpected revspec format character %s')
646 663 % d)
647 664 else:
648 665 ret += c
649 666 pos += 1
650 667
651 668 return ret
652 669
653 670 def prettyformat(tree):
654 671 return parser.prettyformat(tree, ('string', 'symbol'))
655 672
656 673 def depth(tree):
657 674 if isinstance(tree, tuple):
658 675 return max(map(depth, tree)) + 1
659 676 else:
660 677 return 0
661 678
662 679 def funcsused(tree):
663 680 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
664 681 return set()
665 682 else:
666 683 funcs = set()
667 684 for s in tree[1:]:
668 685 funcs |= funcsused(s)
669 686 if tree[0] == 'func':
670 687 funcs.add(tree[1][1])
671 688 return funcs
General Comments 0
You need to be logged in to leave comments. Login now