##// END OF EJS Templates
revsetlang: use sysbytes() instead of blind encode()...
Gregory Szorc -
r42001:ddb17451 default
parent child Browse files
Show More
@@ -1,846 +1,846
1 1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import string
11 11
12 12 from .i18n import _
13 13 from . import (
14 14 error,
15 15 node,
16 16 parser,
17 17 pycompat,
18 18 smartset,
19 19 util,
20 20 )
21 21 from .utils import (
22 22 stringutil,
23 23 )
24 24
25 25 elements = {
26 26 # token-type: binding-strength, primary, prefix, infix, suffix
27 27 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
28 28 "[": (21, None, None, ("subscript", 1, "]"), None),
29 29 "#": (21, None, None, ("relation", 21), None),
30 30 "##": (20, None, None, ("_concat", 20), None),
31 31 "~": (18, None, None, ("ancestor", 18), None),
32 32 "^": (18, None, None, ("parent", 18), "parentpost"),
33 33 "-": (5, None, ("negate", 19), ("minus", 5), None),
34 34 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
35 35 "dagrangepost"),
36 36 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
37 37 "dagrangepost"),
38 38 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
39 39 "not": (10, None, ("not", 10), None, None),
40 40 "!": (10, None, ("not", 10), None, None),
41 41 "and": (5, None, None, ("and", 5), None),
42 42 "&": (5, None, None, ("and", 5), None),
43 43 "%": (5, None, None, ("only", 5), "onlypost"),
44 44 "or": (4, None, None, ("or", 4), None),
45 45 "|": (4, None, None, ("or", 4), None),
46 46 "+": (4, None, None, ("or", 4), None),
47 47 "=": (3, None, None, ("keyvalue", 3), None),
48 48 ",": (2, None, None, ("list", 2), None),
49 49 ")": (0, None, None, None, None),
50 50 "]": (0, None, None, None, None),
51 51 "symbol": (0, "symbol", None, None, None),
52 52 "string": (0, "string", None, None, None),
53 53 "end": (0, None, None, None, None),
54 54 }
55 55
56 56 keywords = {'and', 'or', 'not'}
57 57
58 58 symbols = {}
59 59
60 60 _quoteletters = {'"', "'"}
61 61 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
62 62
63 63 # default set of valid characters for the initial letter of symbols
64 64 _syminitletters = set(pycompat.iterbytestr(
65 string.ascii_letters.encode('ascii') +
66 string.digits.encode('ascii') +
65 pycompat.sysbytes(string.ascii_letters) +
66 pycompat.sysbytes(string.digits) +
67 67 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
68 68
69 69 # default set of valid characters for non-initial letters of symbols
70 70 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
71 71
72 72 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
73 73 '''
74 74 Parse a revset statement into a stream of tokens
75 75
76 76 ``syminitletters`` is the set of valid characters for the initial
77 77 letter of symbols.
78 78
79 79 By default, character ``c`` is recognized as valid for initial
80 80 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
81 81
82 82 ``symletters`` is the set of valid characters for non-initial
83 83 letters of symbols.
84 84
85 85 By default, character ``c`` is recognized as valid for non-initial
86 86 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
87 87
88 88 Check that @ is a valid unquoted token character (issue3686):
89 89 >>> list(tokenize(b"@::"))
90 90 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
91 91
92 92 '''
93 93 if not isinstance(program, bytes):
94 94 raise error.ProgrammingError('revset statement must be bytes, got %r'
95 95 % program)
96 96 program = pycompat.bytestr(program)
97 97 if syminitletters is None:
98 98 syminitletters = _syminitletters
99 99 if symletters is None:
100 100 symletters = _symletters
101 101
102 102 if program and lookup:
103 103 # attempt to parse old-style ranges first to deal with
104 104 # things like old-tag which contain query metacharacters
105 105 parts = program.split(':', 1)
106 106 if all(lookup(sym) for sym in parts if sym):
107 107 if parts[0]:
108 108 yield ('symbol', parts[0], 0)
109 109 if len(parts) > 1:
110 110 s = len(parts[0])
111 111 yield (':', None, s)
112 112 if parts[1]:
113 113 yield ('symbol', parts[1], s + 1)
114 114 yield ('end', None, len(program))
115 115 return
116 116
117 117 pos, l = 0, len(program)
118 118 while pos < l:
119 119 c = program[pos]
120 120 if c.isspace(): # skip inter-token whitespace
121 121 pass
122 122 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
123 123 yield ('::', None, pos)
124 124 pos += 1 # skip ahead
125 125 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
126 126 yield ('..', None, pos)
127 127 pos += 1 # skip ahead
128 128 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
129 129 yield ('##', None, pos)
130 130 pos += 1 # skip ahead
131 131 elif c in _simpleopletters: # handle simple operators
132 132 yield (c, None, pos)
133 133 elif (c in _quoteletters or c == 'r' and
134 134 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
135 135 if c == 'r':
136 136 pos += 1
137 137 c = program[pos]
138 138 decode = lambda x: x
139 139 else:
140 140 decode = parser.unescapestr
141 141 pos += 1
142 142 s = pos
143 143 while pos < l: # find closing quote
144 144 d = program[pos]
145 145 if d == '\\': # skip over escaped characters
146 146 pos += 2
147 147 continue
148 148 if d == c:
149 149 yield ('string', decode(program[s:pos]), s)
150 150 break
151 151 pos += 1
152 152 else:
153 153 raise error.ParseError(_("unterminated string"), s)
154 154 # gather up a symbol/keyword
155 155 elif c in syminitletters:
156 156 s = pos
157 157 pos += 1
158 158 while pos < l: # find end of symbol
159 159 d = program[pos]
160 160 if d not in symletters:
161 161 break
162 162 if d == '.' and program[pos - 1] == '.': # special case for ..
163 163 pos -= 1
164 164 break
165 165 pos += 1
166 166 sym = program[s:pos]
167 167 if sym in keywords: # operator keywords
168 168 yield (sym, None, s)
169 169 elif '-' in sym:
170 170 # some jerk gave us foo-bar-baz, try to check if it's a symbol
171 171 if lookup and lookup(sym):
172 172 # looks like a real symbol
173 173 yield ('symbol', sym, s)
174 174 else:
175 175 # looks like an expression
176 176 parts = sym.split('-')
177 177 for p in parts[:-1]:
178 178 if p: # possible consecutive -
179 179 yield ('symbol', p, s)
180 180 s += len(p)
181 181 yield ('-', None, s)
182 182 s += 1
183 183 if parts[-1]: # possible trailing -
184 184 yield ('symbol', parts[-1], s)
185 185 else:
186 186 yield ('symbol', sym, s)
187 187 pos -= 1
188 188 else:
189 189 raise error.ParseError(_("syntax error in revset '%s'") %
190 190 program, pos)
191 191 pos += 1
192 192 yield ('end', None, pos)
193 193
194 194 # helpers
195 195
196 196 _notset = object()
197 197
198 198 def getsymbol(x):
199 199 if x and x[0] == 'symbol':
200 200 return x[1]
201 201 raise error.ParseError(_('not a symbol'))
202 202
203 203 def getstring(x, err):
204 204 if x and (x[0] == 'string' or x[0] == 'symbol'):
205 205 return x[1]
206 206 raise error.ParseError(err)
207 207
208 208 def getinteger(x, err, default=_notset):
209 209 if not x and default is not _notset:
210 210 return default
211 211 try:
212 212 return int(getstring(x, err))
213 213 except ValueError:
214 214 raise error.ParseError(err)
215 215
216 216 def getboolean(x, err):
217 217 value = stringutil.parsebool(getsymbol(x))
218 218 if value is not None:
219 219 return value
220 220 raise error.ParseError(err)
221 221
222 222 def getlist(x):
223 223 if not x:
224 224 return []
225 225 if x[0] == 'list':
226 226 return list(x[1:])
227 227 return [x]
228 228
229 229 def getrange(x, err):
230 230 if not x:
231 231 raise error.ParseError(err)
232 232 op = x[0]
233 233 if op == 'range':
234 234 return x[1], x[2]
235 235 elif op == 'rangepre':
236 236 return None, x[1]
237 237 elif op == 'rangepost':
238 238 return x[1], None
239 239 elif op == 'rangeall':
240 240 return None, None
241 241 raise error.ParseError(err)
242 242
243 243 def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):
244 244 """Get [first, last] integer range (both inclusive) from a parsed tree
245 245
246 246 If any of the sides omitted, and if no default provided, ParseError will
247 247 be raised.
248 248 """
249 249 if x and (x[0] == 'string' or x[0] == 'symbol'):
250 250 n = getinteger(x, err1)
251 251 return n, n
252 252 a, b = getrange(x, err1)
253 253 return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)
254 254
255 255 def getargs(x, min, max, err):
256 256 l = getlist(x)
257 257 if len(l) < min or (max >= 0 and len(l) > max):
258 258 raise error.ParseError(err)
259 259 return l
260 260
261 261 def getargsdict(x, funcname, keys):
262 262 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
263 263 keyvaluenode='keyvalue', keynode='symbol')
264 264
265 265 # cache of {spec: raw parsed tree} built internally
266 266 _treecache = {}
267 267
268 268 def _cachedtree(spec):
269 269 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
270 270 tree = _treecache.get(spec)
271 271 if tree is None:
272 272 _treecache[spec] = tree = parse(spec)
273 273 return tree
274 274
275 275 def _build(tmplspec, *repls):
276 276 """Create raw parsed tree from a template revset statement
277 277
278 278 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
279 279 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
280 280 """
281 281 template = _cachedtree(tmplspec)
282 282 return parser.buildtree(template, ('symbol', '_'), *repls)
283 283
284 284 def _match(patspec, tree):
285 285 """Test if a tree matches the given pattern statement; return the matches
286 286
287 287 >>> _match(b'f(_)', parse(b'f()'))
288 288 >>> _match(b'f(_)', parse(b'f(1)'))
289 289 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
290 290 >>> _match(b'f(_)', parse(b'f(1, 2)'))
291 291 """
292 292 pattern = _cachedtree(patspec)
293 293 return parser.matchtree(pattern, tree, ('symbol', '_'),
294 294 {'keyvalue', 'list'})
295 295
296 296 def _matchonly(revs, bases):
297 297 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
298 298
299 299 def _fixops(x):
300 300 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
301 301 handled well by our simple top-down parser"""
302 302 if not isinstance(x, tuple):
303 303 return x
304 304
305 305 op = x[0]
306 306 if op == 'parent':
307 307 # x^:y means (x^) : y, not x ^ (:y)
308 308 # x^: means (x^) :, not x ^ (:)
309 309 post = ('parentpost', x[1])
310 310 if x[2][0] == 'dagrangepre':
311 311 return _fixops(('dagrange', post, x[2][1]))
312 312 elif x[2][0] == 'dagrangeall':
313 313 return _fixops(('dagrangepost', post))
314 314 elif x[2][0] == 'rangepre':
315 315 return _fixops(('range', post, x[2][1]))
316 316 elif x[2][0] == 'rangeall':
317 317 return _fixops(('rangepost', post))
318 318 elif op == 'or':
319 319 # make number of arguments deterministic:
320 320 # x + y + z -> (or x y z) -> (or (list x y z))
321 321 return (op, _fixops(('list',) + x[1:]))
322 322 elif op == 'subscript' and x[1][0] == 'relation':
323 323 # x#y[z] ternary
324 324 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
325 325
326 326 return (op,) + tuple(_fixops(y) for y in x[1:])
327 327
328 328 def _analyze(x):
329 329 if x is None:
330 330 return x
331 331
332 332 op = x[0]
333 333 if op == 'minus':
334 334 return _analyze(_build('_ and not _', *x[1:]))
335 335 elif op == 'only':
336 336 return _analyze(_build('only(_, _)', *x[1:]))
337 337 elif op == 'onlypost':
338 338 return _analyze(_build('only(_)', x[1]))
339 339 elif op == 'dagrangeall':
340 340 raise error.ParseError(_("can't use '::' in this context"))
341 341 elif op == 'dagrangepre':
342 342 return _analyze(_build('ancestors(_)', x[1]))
343 343 elif op == 'dagrangepost':
344 344 return _analyze(_build('descendants(_)', x[1]))
345 345 elif op == 'negate':
346 346 s = getstring(x[1], _("can't negate that"))
347 347 return _analyze(('string', '-' + s))
348 348 elif op in ('string', 'symbol', 'smartset'):
349 349 return x
350 350 elif op == 'rangeall':
351 351 return (op, None)
352 352 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
353 353 return (op, _analyze(x[1]))
354 354 elif op == 'group':
355 355 return _analyze(x[1])
356 356 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
357 357 'subscript'}:
358 358 ta = _analyze(x[1])
359 359 tb = _analyze(x[2])
360 360 return (op, ta, tb)
361 361 elif op == 'relsubscript':
362 362 ta = _analyze(x[1])
363 363 tb = _analyze(x[2])
364 364 tc = _analyze(x[3])
365 365 return (op, ta, tb, tc)
366 366 elif op == 'list':
367 367 return (op,) + tuple(_analyze(y) for y in x[1:])
368 368 elif op == 'keyvalue':
369 369 return (op, x[1], _analyze(x[2]))
370 370 elif op == 'func':
371 371 return (op, x[1], _analyze(x[2]))
372 372 raise ValueError('invalid operator %r' % op)
373 373
374 374 def analyze(x):
375 375 """Transform raw parsed tree to evaluatable tree which can be fed to
376 376 optimize() or getset()
377 377
378 378 All pseudo operations should be mapped to real operations or functions
379 379 defined in methods or symbols table respectively.
380 380 """
381 381 return _analyze(x)
382 382
383 383 def _optimize(x):
384 384 if x is None:
385 385 return 0, x
386 386
387 387 op = x[0]
388 388 if op in ('string', 'symbol', 'smartset'):
389 389 return 0.5, x # single revisions are small
390 390 elif op == 'and':
391 391 wa, ta = _optimize(x[1])
392 392 wb, tb = _optimize(x[2])
393 393 w = min(wa, wb)
394 394
395 395 # (draft/secret/_notpublic() & ::x) have a fast path
396 396 m = _match('_() & ancestors(_)', ('and', ta, tb))
397 397 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
398 398 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
399 399
400 400 # (::x and not ::y)/(not ::y and ::x) have a fast path
401 401 m = _matchonly(ta, tb) or _matchonly(tb, ta)
402 402 if m:
403 403 return w, _build('only(_, _)', *m[1:])
404 404
405 405 m = _match('not _', tb)
406 406 if m:
407 407 return wa, ('difference', ta, m[1])
408 408 if wa > wb:
409 409 op = 'andsmally'
410 410 return w, (op, ta, tb)
411 411 elif op == 'or':
412 412 # fast path for machine-generated expression, that is likely to have
413 413 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
414 414 ws, ts, ss = [], [], []
415 415 def flushss():
416 416 if not ss:
417 417 return
418 418 if len(ss) == 1:
419 419 w, t = ss[0]
420 420 else:
421 421 s = '\0'.join(t[1] for w, t in ss)
422 422 y = _build('_list(_)', ('string', s))
423 423 w, t = _optimize(y)
424 424 ws.append(w)
425 425 ts.append(t)
426 426 del ss[:]
427 427 for y in getlist(x[1]):
428 428 w, t = _optimize(y)
429 429 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
430 430 ss.append((w, t))
431 431 continue
432 432 flushss()
433 433 ws.append(w)
434 434 ts.append(t)
435 435 flushss()
436 436 if len(ts) == 1:
437 437 return ws[0], ts[0] # 'or' operation is fully optimized out
438 438 return max(ws), (op, ('list',) + tuple(ts))
439 439 elif op == 'not':
440 440 # Optimize not public() to _notpublic() because we have a fast version
441 441 if _match('public()', x[1]):
442 442 o = _optimize(_build('_notpublic()'))
443 443 return o[0], o[1]
444 444 else:
445 445 o = _optimize(x[1])
446 446 return o[0], (op, o[1])
447 447 elif op == 'rangeall':
448 448 return 1, x
449 449 elif op in ('rangepre', 'rangepost', 'parentpost'):
450 450 o = _optimize(x[1])
451 451 return o[0], (op, o[1])
452 452 elif op in ('dagrange', 'range'):
453 453 wa, ta = _optimize(x[1])
454 454 wb, tb = _optimize(x[2])
455 455 return wa + wb, (op, ta, tb)
456 456 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
457 457 w, t = _optimize(x[1])
458 458 return w, (op, t, x[2])
459 459 elif op == 'relsubscript':
460 460 w, t = _optimize(x[1])
461 461 return w, (op, t, x[2], x[3])
462 462 elif op == 'list':
463 463 ws, ts = zip(*(_optimize(y) for y in x[1:]))
464 464 return sum(ws), (op,) + ts
465 465 elif op == 'keyvalue':
466 466 w, t = _optimize(x[2])
467 467 return w, (op, x[1], t)
468 468 elif op == 'func':
469 469 f = getsymbol(x[1])
470 470 wa, ta = _optimize(x[2])
471 471 w = getattr(symbols.get(f), '_weight', 1)
472 472 m = _match('commonancestors(_)', ta)
473 473
474 474 # Optimize heads(commonancestors(_)) because we have a fast version
475 475 if f == 'heads' and m:
476 476 return w + wa, _build('_commonancestorheads(_)', m[1])
477 477
478 478 return w + wa, (op, x[1], ta)
479 479 raise ValueError('invalid operator %r' % op)
480 480
481 481 def optimize(tree):
482 482 """Optimize evaluatable tree
483 483
484 484 All pseudo operations should be transformed beforehand.
485 485 """
486 486 _weight, newtree = _optimize(tree)
487 487 return newtree
488 488
489 489 # the set of valid characters for the initial letter of symbols in
490 490 # alias declarations and definitions
491 491 _aliassyminitletters = _syminitletters | {'$'}
492 492
493 493 def _parsewith(spec, lookup=None, syminitletters=None):
494 494 """Generate a parse tree of given spec with given tokenizing options
495 495
496 496 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
497 497 ('func', ('symbol', 'foo'), ('symbol', '$1'))
498 498 >>> _parsewith(b'$1')
499 499 Traceback (most recent call last):
500 500 ...
501 501 ParseError: ("syntax error in revset '$1'", 0)
502 502 >>> _parsewith(b'foo bar')
503 503 Traceback (most recent call last):
504 504 ...
505 505 ParseError: ('invalid token', 4)
506 506 """
507 507 if lookup and spec.startswith('revset(') and spec.endswith(')'):
508 508 lookup = None
509 509 p = parser.parser(elements)
510 510 tree, pos = p.parse(tokenize(spec, lookup=lookup,
511 511 syminitletters=syminitletters))
512 512 if pos != len(spec):
513 513 raise error.ParseError(_('invalid token'), pos)
514 514 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
515 515
516 516 class _aliasrules(parser.basealiasrules):
517 517 """Parsing and expansion rule set of revset aliases"""
518 518 _section = _('revset alias')
519 519
520 520 @staticmethod
521 521 def _parse(spec):
522 522 """Parse alias declaration/definition ``spec``
523 523
524 524 This allows symbol names to use also ``$`` as an initial letter
525 525 (for backward compatibility), and callers of this function should
526 526 examine whether ``$`` is used also for unexpected symbols or not.
527 527 """
528 528 return _parsewith(spec, syminitletters=_aliassyminitletters)
529 529
530 530 @staticmethod
531 531 def _trygetfunc(tree):
532 532 if tree[0] == 'func' and tree[1][0] == 'symbol':
533 533 return tree[1][1], getlist(tree[2])
534 534
535 535 def expandaliases(tree, aliases, warn=None):
536 536 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
537 537 aliases = _aliasrules.buildmap(aliases)
538 538 tree = _aliasrules.expand(aliases, tree)
539 539 # warn about problematic (but not referred) aliases
540 540 if warn is not None:
541 541 for name, alias in sorted(aliases.iteritems()):
542 542 if alias.error and not alias.warned:
543 543 warn(_('warning: %s\n') % (alias.error))
544 544 alias.warned = True
545 545 return tree
546 546
547 547 def foldconcat(tree):
548 548 """Fold elements to be concatenated by `##`
549 549 """
550 550 if (not isinstance(tree, tuple)
551 551 or tree[0] in ('string', 'symbol', 'smartset')):
552 552 return tree
553 553 if tree[0] == '_concat':
554 554 pending = [tree]
555 555 l = []
556 556 while pending:
557 557 e = pending.pop()
558 558 if e[0] == '_concat':
559 559 pending.extend(reversed(e[1:]))
560 560 elif e[0] in ('string', 'symbol'):
561 561 l.append(e[1])
562 562 else:
563 563 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
564 564 raise error.ParseError(msg)
565 565 return ('string', ''.join(l))
566 566 else:
567 567 return tuple(foldconcat(t) for t in tree)
568 568
569 569 def parse(spec, lookup=None):
570 570 try:
571 571 return _parsewith(spec, lookup=lookup)
572 572 except error.ParseError as inst:
573 573 if len(inst.args) > 1: # has location
574 574 loc = inst.args[1]
575 575 # Remove newlines -- spaces are equivalent whitespace.
576 576 spec = spec.replace('\n', ' ')
577 577 # We want the caret to point to the place in the template that
578 578 # failed to parse, but in a hint we get a open paren at the
579 579 # start. Therefore, we print "loc + 1" spaces (instead of "loc")
580 580 # to line up the caret with the location of the error.
581 581 inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
582 582 raise
583 583
584 584 def _quote(s):
585 585 r"""Quote a value in order to make it safe for the revset engine.
586 586
587 587 >>> _quote(b'asdf')
588 588 "'asdf'"
589 589 >>> _quote(b"asdf'\"")
590 590 '\'asdf\\\'"\''
591 591 >>> _quote(b'asdf\'')
592 592 "'asdf\\''"
593 593 >>> _quote(1)
594 594 "'1'"
595 595 """
596 596 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
597 597
598 598 def _formatargtype(c, arg):
599 599 if c == 'd':
600 600 return '_rev(%d)' % int(arg)
601 601 elif c == 's':
602 602 return _quote(arg)
603 603 elif c == 'r':
604 604 if not isinstance(arg, bytes):
605 605 raise TypeError
606 606 parse(arg) # make sure syntax errors are confined
607 607 return '(%s)' % arg
608 608 elif c == 'n':
609 609 return _quote(node.hex(arg))
610 610 elif c == 'b':
611 611 try:
612 612 return _quote(arg.branch())
613 613 except AttributeError:
614 614 raise TypeError
615 615 raise error.ParseError(_('unexpected revspec format character %s') % c)
616 616
617 617 def _formatlistexp(s, t):
618 618 l = len(s)
619 619 if l == 0:
620 620 return "_list('')"
621 621 elif l == 1:
622 622 return _formatargtype(t, s[0])
623 623 elif t == 'd':
624 624 return _formatintlist(s)
625 625 elif t == 's':
626 626 return "_list(%s)" % _quote("\0".join(s))
627 627 elif t == 'n':
628 628 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
629 629 elif t == 'b':
630 630 try:
631 631 return "_list('%s')" % "\0".join(a.branch() for a in s)
632 632 except AttributeError:
633 633 raise TypeError
634 634
635 635 m = l // 2
636 636 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
637 637
638 638 def _formatintlist(data):
639 639 try:
640 640 l = len(data)
641 641 if l == 0:
642 642 return "_list('')"
643 643 elif l == 1:
644 644 return _formatargtype('d', data[0])
645 645 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
646 646 except (TypeError, ValueError):
647 647 raise error.ParseError(_('invalid argument for revspec'))
648 648
649 649 def _formatparamexp(args, t):
650 650 return ', '.join(_formatargtype(t, a) for a in args)
651 651
652 652 _formatlistfuncs = {
653 653 'l': _formatlistexp,
654 654 'p': _formatparamexp,
655 655 }
656 656
657 657 def formatspec(expr, *args):
658 658 '''
659 659 This is a convenience function for using revsets internally, and
660 660 escapes arguments appropriately. Aliases are intentionally ignored
661 661 so that intended expression behavior isn't accidentally subverted.
662 662
663 663 Supported arguments:
664 664
665 665 %r = revset expression, parenthesized
666 666 %d = rev(int(arg)), no quoting
667 667 %s = string(arg), escaped and single-quoted
668 668 %b = arg.branch(), escaped and single-quoted
669 669 %n = hex(arg), single-quoted
670 670 %% = a literal '%'
671 671
672 672 Prefixing the type with 'l' specifies a parenthesized list of that type,
673 673 and 'p' specifies a list of function parameters of that type.
674 674
675 675 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
676 676 '(10 or 11):: and ((this()) or (that()))'
677 677 >>> formatspec(b'%d:: and not %d::', 10, 20)
678 678 '_rev(10):: and not _rev(20)::'
679 679 >>> formatspec(b'%ld or %ld', [], [1])
680 680 "_list('') or _rev(1)"
681 681 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
682 682 "keyword('foo\\\\xe9')"
683 683 >>> b = lambda: b'default'
684 684 >>> b.branch = b
685 685 >>> formatspec(b'branch(%b)', b)
686 686 "branch('default')"
687 687 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
688 688 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
689 689 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
690 690 "sort((:), 'desc', 'user')"
691 691 >>> formatspec(b'%ls', [b'a', b"'"])
692 692 "_list('a\\\\x00\\\\'')"
693 693 '''
694 694 parsed = _parseargs(expr, args)
695 695 ret = []
696 696 for t, arg in parsed:
697 697 if t is None:
698 698 ret.append(arg)
699 699 elif t == 'baseset':
700 700 if isinstance(arg, set):
701 701 arg = sorted(arg)
702 702 ret.append(_formatintlist(list(arg)))
703 703 else:
704 704 raise error.ProgrammingError("unknown revspec item type: %r" % t)
705 705 return b''.join(ret)
706 706
707 707 def spectree(expr, *args):
708 708 """similar to formatspec but return a parsed and optimized tree"""
709 709 parsed = _parseargs(expr, args)
710 710 ret = []
711 711 inputs = []
712 712 for t, arg in parsed:
713 713 if t is None:
714 714 ret.append(arg)
715 715 elif t == 'baseset':
716 716 newtree = ('smartset', smartset.baseset(arg))
717 717 inputs.append(newtree)
718 718 ret.append("$")
719 719 else:
720 720 raise error.ProgrammingError("unknown revspec item type: %r" % t)
721 721 expr = b''.join(ret)
722 722 tree = _parsewith(expr, syminitletters=_aliassyminitletters)
723 723 tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
724 724 tree = foldconcat(tree)
725 725 tree = analyze(tree)
726 726 tree = optimize(tree)
727 727 return tree
728 728
729 729 def _parseargs(expr, args):
730 730 """parse the expression and replace all inexpensive args
731 731
732 732 return a list of tuple [(arg-type, arg-value)]
733 733
734 734 Arg-type can be:
735 735 * None: a string ready to be concatenated into a final spec
736 736 * 'baseset': an iterable of revisions
737 737 """
738 738 expr = pycompat.bytestr(expr)
739 739 argiter = iter(args)
740 740 ret = []
741 741 pos = 0
742 742 while pos < len(expr):
743 743 q = expr.find('%', pos)
744 744 if q < 0:
745 745 ret.append((None, expr[pos:]))
746 746 break
747 747 ret.append((None, expr[pos:q]))
748 748 pos = q + 1
749 749 try:
750 750 d = expr[pos]
751 751 except IndexError:
752 752 raise error.ParseError(_('incomplete revspec format character'))
753 753 if d == '%':
754 754 ret.append((None, d))
755 755 pos += 1
756 756 continue
757 757
758 758 try:
759 759 arg = next(argiter)
760 760 except StopIteration:
761 761 raise error.ParseError(_('missing argument for revspec'))
762 762 f = _formatlistfuncs.get(d)
763 763 if f:
764 764 # a list of some type, might be expensive, do not replace
765 765 pos += 1
766 766 islist = (d == 'l')
767 767 try:
768 768 d = expr[pos]
769 769 except IndexError:
770 770 raise error.ParseError(_('incomplete revspec format character'))
771 771 if islist and d == 'd' and arg:
772 772 # we don't create a baseset yet, because it come with an
773 773 # extra cost. If we are going to serialize it we better
774 774 # skip it.
775 775 ret.append(('baseset', arg))
776 776 pos += 1
777 777 continue
778 778 try:
779 779 ret.append((None, f(list(arg), d)))
780 780 except (TypeError, ValueError):
781 781 raise error.ParseError(_('invalid argument for revspec'))
782 782 else:
783 783 # a single entry, not expensive, replace
784 784 try:
785 785 ret.append((None, _formatargtype(d, arg)))
786 786 except (TypeError, ValueError):
787 787 raise error.ParseError(_('invalid argument for revspec'))
788 788 pos += 1
789 789
790 790 try:
791 791 next(argiter)
792 792 raise error.ParseError(_('too many revspec arguments specified'))
793 793 except StopIteration:
794 794 pass
795 795 return ret
796 796
797 797 def prettyformat(tree):
798 798 return parser.prettyformat(tree, ('string', 'symbol'))
799 799
800 800 def depth(tree):
801 801 if isinstance(tree, tuple):
802 802 return max(map(depth, tree)) + 1
803 803 else:
804 804 return 0
805 805
806 806 def funcsused(tree):
807 807 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
808 808 return set()
809 809 else:
810 810 funcs = set()
811 811 for s in tree[1:]:
812 812 funcs |= funcsused(s)
813 813 if tree[0] == 'func':
814 814 funcs.add(tree[1][1])
815 815 return funcs
816 816
817 817 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
818 818
819 819 def _ishashlikesymbol(symbol):
820 820 """returns true if the symbol looks like a hash"""
821 821 return _hashre.match(symbol)
822 822
823 823 def gethashlikesymbols(tree):
824 824 """returns the list of symbols of the tree that look like hashes
825 825
826 826 >>> gethashlikesymbols(parse(b'3::abe3ff'))
827 827 ['3', 'abe3ff']
828 828 >>> gethashlikesymbols(parse(b'precursors(.)'))
829 829 []
830 830 >>> gethashlikesymbols(parse(b'precursors(34)'))
831 831 ['34']
832 832 >>> gethashlikesymbols(parse(b'abe3ffZ'))
833 833 []
834 834 """
835 835 if not tree:
836 836 return []
837 837
838 838 if tree[0] == "symbol":
839 839 if _ishashlikesymbol(tree[1]):
840 840 return [tree[1]]
841 841 elif len(tree) >= 3:
842 842 results = []
843 843 for subtree in tree[1:]:
844 844 results += gethashlikesymbols(subtree)
845 845 return results
846 846 return []
General Comments 0
You need to be logged in to leave comments. Login now