##// END OF EJS Templates
revset: detect integer list on parsing...
Boris Feld -
r41257:73203cdf default
parent child Browse files
Show More
@@ -1,797 +1,818 b''
1 1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import string
11 11
12 12 from .i18n import _
13 13 from . import (
14 14 error,
15 15 node,
16 16 parser,
17 17 pycompat,
18 smartset,
18 19 util,
19 20 )
20 21 from .utils import (
21 22 stringutil,
22 23 )
23 24
24 25 elements = {
25 26 # token-type: binding-strength, primary, prefix, infix, suffix
26 27 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
27 28 "[": (21, None, None, ("subscript", 1, "]"), None),
28 29 "#": (21, None, None, ("relation", 21), None),
29 30 "##": (20, None, None, ("_concat", 20), None),
30 31 "~": (18, None, None, ("ancestor", 18), None),
31 32 "^": (18, None, None, ("parent", 18), "parentpost"),
32 33 "-": (5, None, ("negate", 19), ("minus", 5), None),
33 34 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
34 35 "dagrangepost"),
35 36 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
36 37 "dagrangepost"),
37 38 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
38 39 "not": (10, None, ("not", 10), None, None),
39 40 "!": (10, None, ("not", 10), None, None),
40 41 "and": (5, None, None, ("and", 5), None),
41 42 "&": (5, None, None, ("and", 5), None),
42 43 "%": (5, None, None, ("only", 5), "onlypost"),
43 44 "or": (4, None, None, ("or", 4), None),
44 45 "|": (4, None, None, ("or", 4), None),
45 46 "+": (4, None, None, ("or", 4), None),
46 47 "=": (3, None, None, ("keyvalue", 3), None),
47 48 ",": (2, None, None, ("list", 2), None),
48 49 ")": (0, None, None, None, None),
49 50 "]": (0, None, None, None, None),
50 51 "symbol": (0, "symbol", None, None, None),
51 52 "string": (0, "string", None, None, None),
52 53 "end": (0, None, None, None, None),
53 54 }
54 55
55 56 keywords = {'and', 'or', 'not'}
56 57
57 58 symbols = {}
58 59
59 60 _quoteletters = {'"', "'"}
60 61 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
61 62
62 63 # default set of valid characters for the initial letter of symbols
63 64 _syminitletters = set(pycompat.iterbytestr(
64 65 string.ascii_letters.encode('ascii') +
65 66 string.digits.encode('ascii') +
66 67 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
67 68
68 69 # default set of valid characters for non-initial letters of symbols
69 70 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
70 71
71 72 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
72 73 '''
73 74 Parse a revset statement into a stream of tokens
74 75
75 76 ``syminitletters`` is the set of valid characters for the initial
76 77 letter of symbols.
77 78
78 79 By default, character ``c`` is recognized as valid for initial
79 80 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
80 81
81 82 ``symletters`` is the set of valid characters for non-initial
82 83 letters of symbols.
83 84
84 85 By default, character ``c`` is recognized as valid for non-initial
85 86 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
86 87
87 88 Check that @ is a valid unquoted token character (issue3686):
88 89 >>> list(tokenize(b"@::"))
89 90 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
90 91
91 92 '''
92 93 if not isinstance(program, bytes):
93 94 raise error.ProgrammingError('revset statement must be bytes, got %r'
94 95 % program)
95 96 program = pycompat.bytestr(program)
96 97 if syminitletters is None:
97 98 syminitletters = _syminitletters
98 99 if symletters is None:
99 100 symletters = _symletters
100 101
101 102 if program and lookup:
102 103 # attempt to parse old-style ranges first to deal with
103 104 # things like old-tag which contain query metacharacters
104 105 parts = program.split(':', 1)
105 106 if all(lookup(sym) for sym in parts if sym):
106 107 if parts[0]:
107 108 yield ('symbol', parts[0], 0)
108 109 if len(parts) > 1:
109 110 s = len(parts[0])
110 111 yield (':', None, s)
111 112 if parts[1]:
112 113 yield ('symbol', parts[1], s + 1)
113 114 yield ('end', None, len(program))
114 115 return
115 116
116 117 pos, l = 0, len(program)
117 118 while pos < l:
118 119 c = program[pos]
119 120 if c.isspace(): # skip inter-token whitespace
120 121 pass
121 122 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
122 123 yield ('::', None, pos)
123 124 pos += 1 # skip ahead
124 125 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
125 126 yield ('..', None, pos)
126 127 pos += 1 # skip ahead
127 128 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
128 129 yield ('##', None, pos)
129 130 pos += 1 # skip ahead
130 131 elif c in _simpleopletters: # handle simple operators
131 132 yield (c, None, pos)
132 133 elif (c in _quoteletters or c == 'r' and
133 134 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
134 135 if c == 'r':
135 136 pos += 1
136 137 c = program[pos]
137 138 decode = lambda x: x
138 139 else:
139 140 decode = parser.unescapestr
140 141 pos += 1
141 142 s = pos
142 143 while pos < l: # find closing quote
143 144 d = program[pos]
144 145 if d == '\\': # skip over escaped characters
145 146 pos += 2
146 147 continue
147 148 if d == c:
148 149 yield ('string', decode(program[s:pos]), s)
149 150 break
150 151 pos += 1
151 152 else:
152 153 raise error.ParseError(_("unterminated string"), s)
153 154 # gather up a symbol/keyword
154 155 elif c in syminitletters:
155 156 s = pos
156 157 pos += 1
157 158 while pos < l: # find end of symbol
158 159 d = program[pos]
159 160 if d not in symletters:
160 161 break
161 162 if d == '.' and program[pos - 1] == '.': # special case for ..
162 163 pos -= 1
163 164 break
164 165 pos += 1
165 166 sym = program[s:pos]
166 167 if sym in keywords: # operator keywords
167 168 yield (sym, None, s)
168 169 elif '-' in sym:
169 170 # some jerk gave us foo-bar-baz, try to check if it's a symbol
170 171 if lookup and lookup(sym):
171 172 # looks like a real symbol
172 173 yield ('symbol', sym, s)
173 174 else:
174 175 # looks like an expression
175 176 parts = sym.split('-')
176 177 for p in parts[:-1]:
177 178 if p: # possible consecutive -
178 179 yield ('symbol', p, s)
179 180 s += len(p)
180 181 yield ('-', None, s)
181 182 s += 1
182 183 if parts[-1]: # possible trailing -
183 184 yield ('symbol', parts[-1], s)
184 185 else:
185 186 yield ('symbol', sym, s)
186 187 pos -= 1
187 188 else:
188 189 raise error.ParseError(_("syntax error in revset '%s'") %
189 190 program, pos)
190 191 pos += 1
191 192 yield ('end', None, pos)
192 193
193 194 # helpers
194 195
195 196 _notset = object()
196 197
197 198 def getsymbol(x):
198 199 if x and x[0] == 'symbol':
199 200 return x[1]
200 201 raise error.ParseError(_('not a symbol'))
201 202
202 203 def getstring(x, err):
203 204 if x and (x[0] == 'string' or x[0] == 'symbol'):
204 205 return x[1]
205 206 raise error.ParseError(err)
206 207
207 208 def getinteger(x, err, default=_notset):
208 209 if not x and default is not _notset:
209 210 return default
210 211 try:
211 212 return int(getstring(x, err))
212 213 except ValueError:
213 214 raise error.ParseError(err)
214 215
215 216 def getboolean(x, err):
216 217 value = stringutil.parsebool(getsymbol(x))
217 218 if value is not None:
218 219 return value
219 220 raise error.ParseError(err)
220 221
221 222 def getlist(x):
222 223 if not x:
223 224 return []
224 225 if x[0] == 'list':
225 226 return list(x[1:])
226 227 return [x]
227 228
228 229 def getrange(x, err):
229 230 if not x:
230 231 raise error.ParseError(err)
231 232 op = x[0]
232 233 if op == 'range':
233 234 return x[1], x[2]
234 235 elif op == 'rangepre':
235 236 return None, x[1]
236 237 elif op == 'rangepost':
237 238 return x[1], None
238 239 elif op == 'rangeall':
239 240 return None, None
240 241 raise error.ParseError(err)
241 242
242 243 def getargs(x, min, max, err):
243 244 l = getlist(x)
244 245 if len(l) < min or (max >= 0 and len(l) > max):
245 246 raise error.ParseError(err)
246 247 return l
247 248
248 249 def getargsdict(x, funcname, keys):
249 250 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
250 251 keyvaluenode='keyvalue', keynode='symbol')
251 252
252 253 # cache of {spec: raw parsed tree} built internally
253 254 _treecache = {}
254 255
255 256 def _cachedtree(spec):
256 257 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
257 258 tree = _treecache.get(spec)
258 259 if tree is None:
259 260 _treecache[spec] = tree = parse(spec)
260 261 return tree
261 262
262 263 def _build(tmplspec, *repls):
263 264 """Create raw parsed tree from a template revset statement
264 265
265 266 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
266 267 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
267 268 """
268 269 template = _cachedtree(tmplspec)
269 270 return parser.buildtree(template, ('symbol', '_'), *repls)
270 271
271 272 def _match(patspec, tree):
272 273 """Test if a tree matches the given pattern statement; return the matches
273 274
274 275 >>> _match(b'f(_)', parse(b'f()'))
275 276 >>> _match(b'f(_)', parse(b'f(1)'))
276 277 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
277 278 >>> _match(b'f(_)', parse(b'f(1, 2)'))
278 279 """
279 280 pattern = _cachedtree(patspec)
280 281 return parser.matchtree(pattern, tree, ('symbol', '_'),
281 282 {'keyvalue', 'list'})
282 283
283 284 def _matchonly(revs, bases):
284 285 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
285 286
286 287 def _fixops(x):
287 288 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
288 289 handled well by our simple top-down parser"""
289 290 if not isinstance(x, tuple):
290 291 return x
291 292
292 293 op = x[0]
293 294 if op == 'parent':
294 295 # x^:y means (x^) : y, not x ^ (:y)
295 296 # x^: means (x^) :, not x ^ (:)
296 297 post = ('parentpost', x[1])
297 298 if x[2][0] == 'dagrangepre':
298 299 return _fixops(('dagrange', post, x[2][1]))
299 300 elif x[2][0] == 'dagrangeall':
300 301 return _fixops(('dagrangepost', post))
301 302 elif x[2][0] == 'rangepre':
302 303 return _fixops(('range', post, x[2][1]))
303 304 elif x[2][0] == 'rangeall':
304 305 return _fixops(('rangepost', post))
305 306 elif op == 'or':
306 307 # make number of arguments deterministic:
307 308 # x + y + z -> (or x y z) -> (or (list x y z))
308 309 return (op, _fixops(('list',) + x[1:]))
309 310 elif op == 'subscript' and x[1][0] == 'relation':
310 311 # x#y[z] ternary
311 312 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
312 313
313 314 return (op,) + tuple(_fixops(y) for y in x[1:])
314 315
315 316 def _analyze(x):
316 317 if x is None:
317 318 return x
318 319
319 320 op = x[0]
320 321 if op == 'minus':
321 322 return _analyze(_build('_ and not _', *x[1:]))
322 323 elif op == 'only':
323 324 return _analyze(_build('only(_, _)', *x[1:]))
324 325 elif op == 'onlypost':
325 326 return _analyze(_build('only(_)', x[1]))
326 327 elif op == 'dagrangeall':
327 328 raise error.ParseError(_("can't use '::' in this context"))
328 329 elif op == 'dagrangepre':
329 330 return _analyze(_build('ancestors(_)', x[1]))
330 331 elif op == 'dagrangepost':
331 332 return _analyze(_build('descendants(_)', x[1]))
332 333 elif op == 'negate':
333 334 s = getstring(x[1], _("can't negate that"))
334 335 return _analyze(('string', '-' + s))
335 336 elif op in ('string', 'symbol'):
336 337 return x
337 338 elif op == 'rangeall':
338 339 return (op, None)
339 340 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
340 341 return (op, _analyze(x[1]))
341 342 elif op == 'group':
342 343 return _analyze(x[1])
343 344 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
344 345 'subscript'}:
345 346 ta = _analyze(x[1])
346 347 tb = _analyze(x[2])
347 348 return (op, ta, tb)
348 349 elif op == 'relsubscript':
349 350 ta = _analyze(x[1])
350 351 tb = _analyze(x[2])
351 352 tc = _analyze(x[3])
352 353 return (op, ta, tb, tc)
353 354 elif op == 'list':
354 355 return (op,) + tuple(_analyze(y) for y in x[1:])
355 356 elif op == 'keyvalue':
356 357 return (op, x[1], _analyze(x[2]))
357 358 elif op == 'func':
358 359 return (op, x[1], _analyze(x[2]))
359 360 raise ValueError('invalid operator %r' % op)
360 361
361 362 def analyze(x):
362 363 """Transform raw parsed tree to evaluatable tree which can be fed to
363 364 optimize() or getset()
364 365
365 366 All pseudo operations should be mapped to real operations or functions
366 367 defined in methods or symbols table respectively.
367 368 """
368 369 return _analyze(x)
369 370
370 371 def _optimize(x):
371 372 if x is None:
372 373 return 0, x
373 374
374 375 op = x[0]
375 376 if op in ('string', 'symbol'):
376 377 return 0.5, x # single revisions are small
377 378 elif op == 'and':
378 379 wa, ta = _optimize(x[1])
379 380 wb, tb = _optimize(x[2])
380 381 w = min(wa, wb)
381 382
382 383 # (draft/secret/_notpublic() & ::x) have a fast path
383 384 m = _match('_() & ancestors(_)', ('and', ta, tb))
384 385 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
385 386 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
386 387
387 388 # (::x and not ::y)/(not ::y and ::x) have a fast path
388 389 m = _matchonly(ta, tb) or _matchonly(tb, ta)
389 390 if m:
390 391 return w, _build('only(_, _)', *m[1:])
391 392
392 393 m = _match('not _', tb)
393 394 if m:
394 395 return wa, ('difference', ta, m[1])
395 396 if wa > wb:
396 397 op = 'andsmally'
397 398 return w, (op, ta, tb)
398 399 elif op == 'or':
399 400 # fast path for machine-generated expression, that is likely to have
400 401 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
401 402 ws, ts, ss = [], [], []
402 403 def flushss():
403 404 if not ss:
404 405 return
405 406 if len(ss) == 1:
406 407 w, t = ss[0]
407 408 else:
408 409 s = '\0'.join(t[1] for w, t in ss)
409 410 y = _build('_list(_)', ('string', s))
410 411 w, t = _optimize(y)
411 412 ws.append(w)
412 413 ts.append(t)
413 414 del ss[:]
414 415 for y in getlist(x[1]):
415 416 w, t = _optimize(y)
416 417 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
417 418 ss.append((w, t))
418 419 continue
419 420 flushss()
420 421 ws.append(w)
421 422 ts.append(t)
422 423 flushss()
423 424 if len(ts) == 1:
424 425 return ws[0], ts[0] # 'or' operation is fully optimized out
425 426 return max(ws), (op, ('list',) + tuple(ts))
426 427 elif op == 'not':
427 428 # Optimize not public() to _notpublic() because we have a fast version
428 429 if _match('public()', x[1]):
429 430 o = _optimize(_build('_notpublic()'))
430 431 return o[0], o[1]
431 432 else:
432 433 o = _optimize(x[1])
433 434 return o[0], (op, o[1])
434 435 elif op == 'rangeall':
435 436 return 1, x
436 437 elif op in ('rangepre', 'rangepost', 'parentpost'):
437 438 o = _optimize(x[1])
438 439 return o[0], (op, o[1])
439 440 elif op in ('dagrange', 'range'):
440 441 wa, ta = _optimize(x[1])
441 442 wb, tb = _optimize(x[2])
442 443 return wa + wb, (op, ta, tb)
443 444 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
444 445 w, t = _optimize(x[1])
445 446 return w, (op, t, x[2])
446 447 elif op == 'relsubscript':
447 448 w, t = _optimize(x[1])
448 449 return w, (op, t, x[2], x[3])
449 450 elif op == 'list':
450 451 ws, ts = zip(*(_optimize(y) for y in x[1:]))
451 452 return sum(ws), (op,) + ts
452 453 elif op == 'keyvalue':
453 454 w, t = _optimize(x[2])
454 455 return w, (op, x[1], t)
455 456 elif op == 'func':
456 457 f = getsymbol(x[1])
457 458 wa, ta = _optimize(x[2])
458 459 w = getattr(symbols.get(f), '_weight', 1)
459 460 m = _match('commonancestors(_)', ta)
460 461
461 462 # Optimize heads(commonancestors(_)) because we have a fast version
462 463 if f == 'heads' and m:
463 464 return w + wa, _build('_commonancestorheads(_)', m[1])
464 465
465 466 return w + wa, (op, x[1], ta)
466 467 raise ValueError('invalid operator %r' % op)
467 468
468 469 def optimize(tree):
469 470 """Optimize evaluatable tree
470 471
471 472 All pseudo operations should be transformed beforehand.
472 473 """
473 474 _weight, newtree = _optimize(tree)
474 475 return newtree
475 476
476 477 # the set of valid characters for the initial letter of symbols in
477 478 # alias declarations and definitions
478 479 _aliassyminitletters = _syminitletters | {'$'}
479 480
480 481 def _parsewith(spec, lookup=None, syminitletters=None):
481 482 """Generate a parse tree of given spec with given tokenizing options
482 483
483 484 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
484 485 ('func', ('symbol', 'foo'), ('symbol', '$1'))
485 486 >>> _parsewith(b'$1')
486 487 Traceback (most recent call last):
487 488 ...
488 489 ParseError: ("syntax error in revset '$1'", 0)
489 490 >>> _parsewith(b'foo bar')
490 491 Traceback (most recent call last):
491 492 ...
492 493 ParseError: ('invalid token', 4)
493 494 """
494 495 if lookup and spec.startswith('revset(') and spec.endswith(')'):
495 496 lookup = None
496 497 p = parser.parser(elements)
497 498 tree, pos = p.parse(tokenize(spec, lookup=lookup,
498 499 syminitletters=syminitletters))
499 500 if pos != len(spec):
500 501 raise error.ParseError(_('invalid token'), pos)
501 502 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
502 503
503 504 class _aliasrules(parser.basealiasrules):
504 505 """Parsing and expansion rule set of revset aliases"""
505 506 _section = _('revset alias')
506 507
507 508 @staticmethod
508 509 def _parse(spec):
509 510 """Parse alias declaration/definition ``spec``
510 511
511 512 This allows symbol names to use also ``$`` as an initial letter
512 513 (for backward compatibility), and callers of this function should
513 514 examine whether ``$`` is used also for unexpected symbols or not.
514 515 """
515 516 return _parsewith(spec, syminitletters=_aliassyminitletters)
516 517
517 518 @staticmethod
518 519 def _trygetfunc(tree):
519 520 if tree[0] == 'func' and tree[1][0] == 'symbol':
520 521 return tree[1][1], getlist(tree[2])
521 522
522 523 def expandaliases(tree, aliases, warn=None):
523 524 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
524 525 aliases = _aliasrules.buildmap(aliases)
525 526 tree = _aliasrules.expand(aliases, tree)
526 527 # warn about problematic (but not referred) aliases
527 528 if warn is not None:
528 529 for name, alias in sorted(aliases.iteritems()):
529 530 if alias.error and not alias.warned:
530 531 warn(_('warning: %s\n') % (alias.error))
531 532 alias.warned = True
532 533 return tree
533 534
534 535 def foldconcat(tree):
535 536 """Fold elements to be concatenated by `##`
536 537 """
537 538 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
538 539 return tree
539 540 if tree[0] == '_concat':
540 541 pending = [tree]
541 542 l = []
542 543 while pending:
543 544 e = pending.pop()
544 545 if e[0] == '_concat':
545 546 pending.extend(reversed(e[1:]))
546 547 elif e[0] in ('string', 'symbol'):
547 548 l.append(e[1])
548 549 else:
549 550 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
550 551 raise error.ParseError(msg)
551 552 return ('string', ''.join(l))
552 553 else:
553 554 return tuple(foldconcat(t) for t in tree)
554 555
555 556 def parse(spec, lookup=None):
556 557 try:
557 558 return _parsewith(spec, lookup=lookup)
558 559 except error.ParseError as inst:
559 560 if len(inst.args) > 1: # has location
560 561 loc = inst.args[1]
561 562 # Remove newlines -- spaces are equivalent whitespace.
562 563 spec = spec.replace('\n', ' ')
563 564 # We want the caret to point to the place in the template that
564 565 # failed to parse, but in a hint we get a open paren at the
565 566 # start. Therefore, we print "loc + 1" spaces (instead of "loc")
566 567 # to line up the caret with the location of the error.
567 568 inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
568 569 raise
569 570
570 571 def _quote(s):
571 572 r"""Quote a value in order to make it safe for the revset engine.
572 573
573 574 >>> _quote(b'asdf')
574 575 "'asdf'"
575 576 >>> _quote(b"asdf'\"")
576 577 '\'asdf\\\'"\''
577 578 >>> _quote(b'asdf\'')
578 579 "'asdf\\''"
579 580 >>> _quote(1)
580 581 "'1'"
581 582 """
582 583 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
583 584
584 585 def _formatargtype(c, arg):
585 586 if c == 'd':
586 587 return 'rev(%d)' % int(arg)
587 588 elif c == 's':
588 589 return _quote(arg)
589 590 elif c == 'r':
590 591 if not isinstance(arg, bytes):
591 592 raise TypeError
592 593 parse(arg) # make sure syntax errors are confined
593 594 return '(%s)' % arg
594 595 elif c == 'n':
595 596 return _quote(node.hex(arg))
596 597 elif c == 'b':
597 598 try:
598 599 return _quote(arg.branch())
599 600 except AttributeError:
600 601 raise TypeError
601 602 raise error.ParseError(_('unexpected revspec format character %s') % c)
602 603
603 604 def _formatlistexp(s, t):
604 605 l = len(s)
605 606 if l == 0:
606 607 return "_list('')"
607 608 elif l == 1:
608 609 return _formatargtype(t, s[0])
609 610 elif t == 'd':
610 611 return _formatintlist(s)
611 612 elif t == 's':
612 613 return "_list(%s)" % _quote("\0".join(s))
613 614 elif t == 'n':
614 615 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
615 616 elif t == 'b':
616 617 try:
617 618 return "_list('%s')" % "\0".join(a.branch() for a in s)
618 619 except AttributeError:
619 620 raise TypeError
620 621
621 622 m = l // 2
622 623 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
623 624
624 625 def _formatintlist(data):
625 626 try:
626 627 l = len(data)
627 628 if l == 0:
628 629 return "_list('')"
629 630 elif l == 1:
630 631 return _formatargtype('d', data[0])
631 632 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
632 633 except (TypeError, ValueError):
633 634 raise error.ParseError(_('invalid argument for revspec'))
634 635
635 636 def _formatparamexp(args, t):
636 637 return ', '.join(_formatargtype(t, a) for a in args)
637 638
638 639 _formatlistfuncs = {
639 640 'l': _formatlistexp,
640 641 'p': _formatparamexp,
641 642 }
642 643
643 644 def formatspec(expr, *args):
644 645 '''
645 646 This is a convenience function for using revsets internally, and
646 647 escapes arguments appropriately. Aliases are intentionally ignored
647 648 so that intended expression behavior isn't accidentally subverted.
648 649
649 650 Supported arguments:
650 651
651 652 %r = revset expression, parenthesized
652 653 %d = rev(int(arg)), no quoting
653 654 %s = string(arg), escaped and single-quoted
654 655 %b = arg.branch(), escaped and single-quoted
655 656 %n = hex(arg), single-quoted
656 657 %% = a literal '%'
657 658
658 659 Prefixing the type with 'l' specifies a parenthesized list of that type,
659 660 and 'p' specifies a list of function parameters of that type.
660 661
661 662 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
662 663 '(10 or 11):: and ((this()) or (that()))'
663 664 >>> formatspec(b'%d:: and not %d::', 10, 20)
664 665 'rev(10):: and not rev(20)::'
665 666 >>> formatspec(b'%ld or %ld', [], [1])
666 667 "_list('') or rev(1)"
667 668 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
668 669 "keyword('foo\\\\xe9')"
669 670 >>> b = lambda: b'default'
670 671 >>> b.branch = b
671 672 >>> formatspec(b'branch(%b)', b)
672 673 "branch('default')"
673 674 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
674 675 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
675 676 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
676 677 "sort((:), 'desc', 'user')"
677 678 >>> formatspec(b'%ls', [b'a', b"'"])
678 679 "_list('a\\\\x00\\\\'')"
679 680 '''
680 681 parsed = _parseargs(expr, args)
681 682 ret = []
682 683 for t, arg in parsed:
683 684 if t is None:
684 685 ret.append(arg)
686 elif t == 'baseset':
687 if isinstance(arg, set):
688 arg = sorted(arg)
689 ret.append(_formatintlist(list(arg)))
685 690 else:
686 691 raise error.ProgrammingError("unknown revspec item type: %r" % t)
687 692 return b''.join(ret)
688 693
689 694 def _parseargs(expr, args):
690 695 """parse the expression and replace all inexpensive args
691 696
692 697 return a list of tuple [(arg-type, arg-value)]
693 698
694 699 Arg-type can be:
695 * None: a string ready to be concatenated into a final spec
700 * None: a string ready to be concatenated into a final spec
701 * 'baseset': an iterable of revisions
696 702 """
697 703 expr = pycompat.bytestr(expr)
698 704 argiter = iter(args)
699 705 ret = []
700 706 pos = 0
701 707 while pos < len(expr):
702 708 q = expr.find('%', pos)
703 709 if q < 0:
704 710 ret.append((None, expr[pos:]))
705 711 break
706 712 ret.append((None, expr[pos:q]))
707 713 pos = q + 1
708 714 try:
709 715 d = expr[pos]
710 716 except IndexError:
711 717 raise error.ParseError(_('incomplete revspec format character'))
712 718 if d == '%':
713 719 ret.append((None, d))
714 720 pos += 1
715 721 continue
716 722
717 723 try:
718 724 arg = next(argiter)
719 725 except StopIteration:
720 726 raise error.ParseError(_('missing argument for revspec'))
721 727 f = _formatlistfuncs.get(d)
722 728 if f:
723 729 # a list of some type, might be expensive, do not replace
724 730 pos += 1
731 islist = (d == 'l')
725 732 try:
726 733 d = expr[pos]
727 734 except IndexError:
728 735 raise error.ParseError(_('incomplete revspec format character'))
736 if islist and d == 'd' and arg:
737 # special case, we might be able to speedup the list of int case
738 #
739 # We have been very conservative here for the first version.
740 # Other types (eg: generator) are probably fine, but we did not
741 # wanted to take any risk>
742 safeinputtype = (list, tuple, set, smartset.abstractsmartset)
743 if isinstance(arg, safeinputtype):
744 # we don't create a baseset yet, because it come with an
745 # extra cost. If we are going to serialize it we better
746 # skip it.
747 ret.append(('baseset', arg))
748 pos += 1
749 continue
729 750 try:
730 751 ret.append((None, f(list(arg), d)))
731 752 except (TypeError, ValueError):
732 753 raise error.ParseError(_('invalid argument for revspec'))
733 754 else:
734 755 # a single entry, not expensive, replace
735 756 try:
736 757 ret.append((None, _formatargtype(d, arg)))
737 758 except (TypeError, ValueError):
738 759 raise error.ParseError(_('invalid argument for revspec'))
739 760 pos += 1
740 761
741 762 try:
742 763 next(argiter)
743 764 raise error.ParseError(_('too many revspec arguments specified'))
744 765 except StopIteration:
745 766 pass
746 767 return ret
747 768
748 769 def prettyformat(tree):
749 770 return parser.prettyformat(tree, ('string', 'symbol'))
750 771
751 772 def depth(tree):
752 773 if isinstance(tree, tuple):
753 774 return max(map(depth, tree)) + 1
754 775 else:
755 776 return 0
756 777
757 778 def funcsused(tree):
758 779 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
759 780 return set()
760 781 else:
761 782 funcs = set()
762 783 for s in tree[1:]:
763 784 funcs |= funcsused(s)
764 785 if tree[0] == 'func':
765 786 funcs.add(tree[1][1])
766 787 return funcs
767 788
768 789 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
769 790
770 791 def _ishashlikesymbol(symbol):
771 792 """returns true if the symbol looks like a hash"""
772 793 return _hashre.match(symbol)
773 794
774 795 def gethashlikesymbols(tree):
775 796 """returns the list of symbols of the tree that look like hashes
776 797
777 798 >>> gethashlikesymbols(parse(b'3::abe3ff'))
778 799 ['3', 'abe3ff']
779 800 >>> gethashlikesymbols(parse(b'precursors(.)'))
780 801 []
781 802 >>> gethashlikesymbols(parse(b'precursors(34)'))
782 803 ['34']
783 804 >>> gethashlikesymbols(parse(b'abe3ffZ'))
784 805 []
785 806 """
786 807 if not tree:
787 808 return []
788 809
789 810 if tree[0] == "symbol":
790 811 if _ishashlikesymbol(tree[1]):
791 812 return [tree[1]]
792 813 elif len(tree) >= 3:
793 814 results = []
794 815 for subtree in tree[1:]:
795 816 results += gethashlikesymbols(subtree)
796 817 return results
797 818 return []
General Comments 0
You need to be logged in to leave comments. Login now