##// END OF EJS Templates
revset: add support for prefix and suffix versions of : and ::
Matt Mackall -
r11278:7df88cdf default
parent child Browse files
Show More
@@ -1,79 +1,88 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.txt and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns type, value pairs
14 14 # elements is a mapping of types to binding strength, prefix and infix actions
15 15 # an action is a tree node name, a tree label, and an optional match
16 16 # __call__(program) parses program into a labelled tree
17 17
18 18 class parser(object):
19 19 def __init__(self, tokenizer, elements, methods=None):
20 20 self._tokenizer = tokenizer
21 21 self._elements = elements
22 22 self._methods = methods
23 23 def _advance(self):
24 24 'advance the tokenizer'
25 25 t = self.current
26 try:
26 27 self.current = self._iter.next()
28 except StopIteration:
29 pass
27 30 return t
28 31 def _match(self, m):
29 32 'make sure the tokenizer matches an end condition'
30 33 if self.current[0] != m:
31 34 raise SyntaxError(self.current)
32 35 self._advance()
33 36 def _parse(self, bind=0):
34 37 token, value = self._advance()
35 38 # handle prefix rules on current token
36 39 prefix = self._elements[token][1]
37 40 if not prefix:
38 41 raise SyntaxError("not a prefix: %s" % token)
39 42 if len(prefix) == 1:
40 43 expr = (prefix[0], value)
41 44 else:
42 45 if len(prefix) > 2 and prefix[2] == self.current[0]:
43 46 self._match(prefix[2])
44 47 expr = (prefix[0], None)
45 48 else:
46 49 expr = (prefix[0], self._parse(prefix[1]))
47 50 if len(prefix) > 2:
48 51 self._match(prefix[2])
49 52 # gather tokens until we meet a lower binding strength
50 53 while bind < self._elements[self.current[0]][0]:
51 54 token, value = self._advance()
55 e = self._elements[token]
56 # check for suffix - next token isn't a valid prefix
57 if len(e) == 4 and not self._elements[self.current[0]][1]:
58 suffix = e[3]
59 expr = (suffix[0], expr)
60 else:
52 61 # handle infix rules
53 62 infix = self._elements[token][2]
54 63 if len(infix) == 3 and infix[2] == self.current[0]:
55 64 self._match(infix[2])
56 65 expr = (infix[0], expr, (None))
57 66 else:
58 67 if not infix[0]:
59 68 raise SyntaxError("not an infix")
60 69 expr = (infix[0], expr, self._parse(infix[1]))
61 70 if len(infix) == 3:
62 71 self._match(infix[2])
63 72 return expr
64 73 def parse(self, message):
65 74 'generate a parse tree from a message'
66 75 self._iter = self._tokenizer(message)
67 76 self.current = self._iter.next()
68 77 return self._parse()
69 78 def eval(self, tree):
70 79 'recursively evaluate a parse tree using node methods'
71 80 if not isinstance(tree, tuple):
72 81 return tree
73 82 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
74 83 def __call__(self, message):
75 84 'parse a message into a parse tree and evaluate if methods given'
76 85 t = self.parse(message)
77 86 if self._methods:
78 87 return self.eval(t)
79 88 return t
@@ -1,530 +1,546 b''
1 1 # revset.py - revision set queries for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import parser, util, hg
10 10 import match as _match
11 11
12 12 elements = {
13 13 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
14 14 "-": (19, ("negate", 19), ("minus", 19)),
15 "..": (17, None, ("dagrange", 17)),
16 ":": (15, None, ("range", 15)),
15 "::": (17, ("dagrangepre", 17), ("dagrange", 17),
16 ("dagrangepost", 17)),
17 "..": (17, ("dagrangepre", 17), ("dagrange", 17),
18 ("dagrangepost", 17)),
19 ":": (15, ("rangepre", 15), ("range", 15), ("rangepost", 15)),
17 20 "not": (10, ("not", 10)),
18 21 "!": (10, ("not", 10)),
19 22 "and": (5, None, ("and", 5)),
20 23 "&": (5, None, ("and", 5)),
21 24 "or": (4, None, ("or", 4)),
22 25 "|": (4, None, ("or", 4)),
23 26 "+": (4, None, ("or", 4)),
24 27 ",": (2, None, ("list", 2)),
25 28 ")": (0, None, None),
26 29 "symbol": (0, ("symbol",), None),
27 30 "string": (0, ("string",), None),
28 31 "end": (0, None, None),
29 32 }
30 33
31 34 keywords = set(['and', 'or', 'not'])
32 35
33 36 def tokenize(program):
34 37 pos, l = 0, len(program)
35 38 while pos < l:
36 39 c = program[pos]
37 40 if c.isspace(): # skip inter-token whitespace
38 41 pass
39 elif c in "():,-|&+!": # handle simple operators
40 yield (c, None)
42 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
43 yield ('::', None)
44 pos += 1 # skip ahead
41 45 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
42 46 yield ('..', None)
43 47 pos += 1 # skip ahead
48 elif c in "():,-|&+!": # handle simple operators
49 yield (c, None)
44 50 elif c in '"\'': # handle quoted strings
45 51 pos += 1
46 52 s = pos
47 53 while pos < l: # find closing quote
48 54 d = program[pos]
49 55 if d == '\\': # skip over escaped characters
50 56 pos += 2
51 57 continue
52 58 if d == c:
53 59 yield ('string', program[s:pos].decode('string-escape'))
54 60 break
55 61 pos += 1
56 62 else:
57 63 raise "unterminated string"
58 64 elif c.isalnum() or c in '.': # gather up a symbol/keyword
59 65 s = pos
60 66 pos += 1
61 67 while pos < l: # find end of symbol
62 68 d = program[pos]
63 69 if not (d.isalnum() or d in "._"):
64 70 break
65 71 if d == '.' and program[pos - 1] == '.': # special case for ..
66 72 pos -= 1
67 73 break
68 74 pos += 1
69 75 sym = program[s:pos]
70 76 if sym in keywords: # operator keywords
71 77 yield (sym, None)
72 78 else:
73 79 yield ('symbol', sym)
74 80 pos -= 1
75 81 else:
76 82 raise "syntax error at %d" % pos
77 83 pos += 1
78 84 yield ('end', None)
79 85
80 86 # helpers
81 87
82 88 def getstring(x, err):
83 89 if x[0] == 'string' or x[0] == 'symbol':
84 90 return x[1]
85 91 raise err
86 92
87 93 def getlist(x):
88 94 if not x:
89 95 return []
90 96 if x[0] == 'list':
91 97 return getlist(x[1]) + [x[2]]
92 98 return [x]
93 99
94 100 def getpair(x, err):
95 101 l = getlist(x)
96 102 if len(l) != 2:
97 103 raise err
98 104 return l
99 105
100 106 def getset(repo, subset, x):
101 107 if not x:
102 108 raise "missing argument"
103 109 return methods[x[0]](repo, subset, *x[1:])
104 110
105 111 # operator methods
106 112
107 113 def negate(repo, subset, x):
108 114 return getset(repo, subset,
109 115 ('string', '-' + getstring(x, "can't negate that")))
110 116
111 117 def stringset(repo, subset, x):
112 118 x = repo[x].rev()
113 119 if x in subset:
114 120 return [x]
115 121 return []
116 122
117 123 def symbolset(repo, subset, x):
118 124 if x in symbols:
119 125 raise "can't use %s here" % x
120 126 return stringset(repo, subset, x)
121 127
122 128 def rangeset(repo, subset, x, y):
123 129 m = getset(repo, subset, x)[0]
124 130 n = getset(repo, subset, y)[-1]
125 131 if m < n:
126 132 return range(m, n + 1)
127 133 return range(m, n - 1, -1)
128 134
135 def rangepreset(repo, subset, x):
136 return range(0, getset(repo, subset, x)[-1] + 1)
137
138 def rangepostset(repo, subset, x):
139 return range(getset(repo, subset, x)[0], len(repo))
140
129 141 def dagrangeset(repo, subset, x, y):
130 142 return andset(repo, subset,
131 143 ('func', ('symbol', 'descendants'), x),
132 144 ('func', ('symbol', 'ancestors'), y))
133 145
134 146 def andset(repo, subset, x, y):
135 147 if weight(x, True) > weight(y, True):
136 148 x, y = y, x
137 149 return getset(repo, getset(repo, subset, x), y)
138 150
139 151 def orset(repo, subset, x, y):
140 152 if weight(y, False) < weight(x, False):
141 153 x, y = y, x
142 154 s = set(getset(repo, subset, x))
143 155 s |= set(getset(repo, [r for r in subset if r not in s], y))
144 156 return [r for r in subset if r in s]
145 157
146 158 def notset(repo, subset, x):
147 159 s = set(getset(repo, subset, x))
148 160 return [r for r in subset if r not in s]
149 161
150 162 def minusset(repo, subset, x, y):
151 163 if weight(x, True) > weight(y, True):
152 164 return getset(repo, notset(repo, subset, y), x)
153 165 return notset(repo, getset(repo, subset, x), y)
154 166
155 167 def listset(repo, subset, a, b):
156 168 raise "can't use a list in this context"
157 169
158 170 def func(repo, subset, a, b):
159 171 if a[0] == 'symbol' and a[1] in symbols:
160 172 return symbols[a[1]](repo, subset, b)
161 173 raise "that's not a function: %s" % a[1]
162 174
163 175 # functions
164 176
165 177 def p1(repo, subset, x):
166 178 ps = set()
167 179 cl = repo.changelog
168 180 for r in getset(repo, subset, x):
169 181 ps.add(cl.parentrevs(r)[0])
170 182 return [r for r in subset if r in ps]
171 183
172 184 def p2(repo, subset, x):
173 185 ps = set()
174 186 cl = repo.changelog
175 187 for r in getset(repo, subset, x):
176 188 ps.add(cl.parentrevs(r)[1])
177 189 return [r for r in subset if r in ps]
178 190
179 191 def parents(repo, subset, x):
180 192 ps = set()
181 193 cl = repo.changelog
182 194 for r in getset(repo, subset, x):
183 195 ps.update(cl.parentrevs(r))
184 196 return [r for r in subset if r in ps]
185 197
186 198 def maxrev(repo, subset, x):
187 199 s = getset(repo, subset, x)
188 200 if s:
189 201 m = max(s)
190 202 if m in subset:
191 203 return [m]
192 204 return []
193 205
194 206 def limit(repo, subset, x):
195 207 l = getpair(x, "limit wants two args")
196 208 try:
197 209 lim = int(getstring(l[1], "limit wants a number"))
198 210 except ValueError:
199 211 raise "wants a number"
200 212 return getset(repo, subset, l[0])[:lim]
201 213
202 214 def children(repo, subset, x):
203 215 cs = set()
204 216 cl = repo.changelog
205 217 s = set(getset(repo, subset, x))
206 218 for r in xrange(0, len(repo)):
207 219 for p in cl.parentrevs(r):
208 220 if p in s:
209 221 cs.add(r)
210 222 return [r for r in subset if r in cs]
211 223
212 224 def branch(repo, subset, x):
213 225 s = getset(repo, range(len(repo)), x)
214 226 b = set()
215 227 for r in s:
216 228 b.add(repo[r].branch())
217 229 s = set(s)
218 230 return [r for r in subset if r in s or repo[r].branch() in b]
219 231
220 232 def ancestor(repo, subset, x):
221 233 l = getpair(x, "ancestor wants two args")
222 234 a = getset(repo, subset, l[0])
223 235 b = getset(repo, subset, l[1])
224 236 if len(a) > 1 or len(b) > 1:
225 237 raise "arguments to ancestor must be single revisions"
226 238 return [repo[a[0]].ancestor(repo[b[0]]).rev()]
227 239
228 240 def ancestors(repo, subset, x):
229 241 args = getset(repo, range(len(repo)), x)
230 242 s = set(repo.changelog.ancestors(*args)) | set(args)
231 243 return [r for r in subset if r in s]
232 244
233 245 def descendants(repo, subset, x):
234 246 args = getset(repo, range(len(repo)), x)
235 247 s = set(repo.changelog.descendants(*args)) | set(args)
236 248 return [r for r in subset if r in s]
237 249
238 250 def follow(repo, subset, x):
239 251 if x:
240 252 raise "follow takes no args"
241 253 p = repo['.'].rev()
242 254 s = set(repo.changelog.ancestors(p)) | set([p])
243 255 return [r for r in subset if r in s]
244 256
245 257 def date(repo, subset, x):
246 258 ds = getstring(x, 'date wants a string')
247 259 dm = util.matchdate(ds)
248 260 return [r for r in subset if dm(repo[r].date()[0])]
249 261
250 262 def keyword(repo, subset, x):
251 263 kw = getstring(x, "keyword wants a string").lower()
252 264 l = []
253 265 for r in subset:
254 266 c = repo[r]
255 267 t = " ".join(c.files() + [c.user(), c.description()])
256 268 if kw in t.lower():
257 269 l.append(r)
258 270 return l
259 271
260 272 def grep(repo, subset, x):
261 273 gr = re.compile(getstring(x, "grep wants a string"))
262 274 l = []
263 275 for r in subset:
264 276 c = repo[r]
265 277 for e in c.files() + [c.user(), c.description()]:
266 278 if gr.search(e):
267 279 l.append(r)
268 280 continue
269 281 return l
270 282
271 283 def author(repo, subset, x):
272 284 n = getstring(x, "author wants a string").lower()
273 285 return [r for r in subset if n in repo[r].user().lower()]
274 286
275 287 def hasfile(repo, subset, x):
276 288 pat = getstring(x, "file wants a pattern")
277 289 m = _match.match(repo.root, repo.getcwd(), [pat])
278 290 s = []
279 291 for r in subset:
280 292 for f in repo[r].files():
281 293 if m(f):
282 294 s.append(r)
283 295 continue
284 296 return s
285 297
286 298 def contains(repo, subset, x):
287 299 pat = getstring(x, "file wants a pattern")
288 300 m = _match.match(repo.root, repo.getcwd(), [pat])
289 301 s = []
290 302 if m.files() == [pat]:
291 303 for r in subset:
292 304 if pat in repo[r]:
293 305 s.append(r)
294 306 continue
295 307 else:
296 308 for r in subset:
297 309 c = repo[r]
298 310 for f in repo[r].manifest():
299 311 if m(f):
300 312 s.append(r)
301 313 continue
302 314 return s
303 315
304 316 def checkstatus(repo, subset, pat, field):
305 317 m = _match.match(repo.root, repo.getcwd(), [pat])
306 318 s = []
307 319 fast = (m.files() == [pat])
308 320 for r in subset:
309 321 c = repo[r]
310 322 if fast:
311 323 if pat not in c.files():
312 324 continue
313 325 else:
314 326 for f in c.files():
315 327 if m(f):
316 328 break
317 329 else:
318 330 continue
319 331 files = repo.status(c.p1().node(), c.node())[field]
320 332 if fast:
321 333 if pat in files:
322 334 s.append(r)
323 335 continue
324 336 else:
325 337 for f in files:
326 338 if m(f):
327 339 s.append(r)
328 340 continue
329 341 return s
330 342
331 343 def modifies(repo, subset, x):
332 344 pat = getstring(x, "modifies wants a pattern")
333 345 return checkstatus(repo, subset, pat, 0)
334 346
335 347 def adds(repo, subset, x):
336 348 pat = getstring(x, "adds wants a pattern")
337 349 return checkstatus(repo, subset, pat, 1)
338 350
339 351 def removes(repo, subset, x):
340 352 pat = getstring(x, "removes wants a pattern")
341 353 return checkstatus(repo, subset, pat, 2)
342 354
343 355 def merge(repo, subset, x):
344 356 if x:
345 357 raise "merge takes no args"
346 358 cl = repo.changelog
347 359 return [r for r in subset if cl.parentrevs(r)[1] != -1]
348 360
349 361 def closed(repo, subset, x):
350 362 return [r for r in subset if repo[r].extra('close')]
351 363
352 364 def head(repo, subset, x):
353 365 hs = set()
354 366 for b, ls in repo.branchmap().iteritems():
355 367 hs.update(repo[h].rev() for h in ls)
356 368 return [r for r in subset if r in hs]
357 369
358 370 def reverse(repo, subset, x):
359 371 l = getset(repo, subset, x)
360 372 l.reverse()
361 373 return l
362 374
363 375 def sort(repo, subset, x):
364 376 l = getlist(x)
365 377 keys = "rev"
366 378 if len(l) == 2:
367 379 keys = getstring(l[1], "sort spec must be a string")
368 380
369 381 s = l[0]
370 382 keys = keys.split()
371 383 l = []
372 384 def invert(s):
373 385 return "".join(chr(255 - ord(c)) for c in s)
374 386 for r in getset(repo, subset, s):
375 387 c = repo[r]
376 388 e = []
377 389 for k in keys:
378 390 if k == 'rev':
379 391 e.append(r)
380 392 elif k == '-rev':
381 393 e.append(-r)
382 394 elif k == 'branch':
383 395 e.append(c.branch())
384 396 elif k == '-branch':
385 397 e.append(invert(c.branch()))
386 398 elif k == 'desc':
387 399 e.append(c.description())
388 400 elif k == '-desc':
389 401 e.append(invert(c.description()))
390 402 elif k in 'user author':
391 403 e.append(c.user())
392 404 elif k in '-user -author':
393 405 e.append(invert(c.user()))
394 406 elif k == 'date':
395 407 e.append(c.date()[0])
396 408 elif k == '-date':
397 409 e.append(-c.date()[0])
398 410 else:
399 411 raise "unknown sort key %r" % k
400 412 e.append(r)
401 413 l.append(e)
402 414 l.sort()
403 415 return [e[-1] for e in l]
404 416
405 417 def getall(repo, subset, x):
406 418 return subset
407 419
408 420 def heads(repo, subset, x):
409 421 s = getset(repo, subset, x)
410 422 ps = set(parents(repo, subset, x))
411 423 return [r for r in s if r not in ps]
412 424
413 425 def roots(repo, subset, x):
414 426 s = getset(repo, subset, x)
415 427 cs = set(children(repo, subset, x))
416 428 return [r for r in s if r not in cs]
417 429
418 430 def outgoing(repo, subset, x):
419 431 l = getlist(x)
420 432 if len(l) == 1:
421 433 dest = getstring(l[0], "outgoing wants a repo path")
422 434 else:
423 435 dest = ''
424 436 dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
425 437 dest, branches = hg.parseurl(dest)
426 438 other = hg.repository(hg.remoteui(repo, {}), dest)
427 439 repo.ui.pushbuffer()
428 440 o = repo.findoutgoing(other)
429 441 repo.ui.popbuffer()
430 442 cl = repo.changelog
431 443 o = set([cl.rev(r) for r in repo.changelog.nodesbetween(o, None)[0]])
432 444 print 'out', dest, o
433 445 return [r for r in subset if r in o]
434 446
435 447 symbols = {
436 448 "ancestor": ancestor,
437 449 "ancestors": ancestors,
438 450 "descendants": descendants,
439 451 "follow": follow,
440 452 "merge": merge,
441 453 "reverse": reverse,
442 454 "sort": sort,
443 455 "branch": branch,
444 456 "keyword": keyword,
445 457 "author": author,
446 458 "user": author,
447 459 "date": date,
448 460 "grep": grep,
449 461 "p1": p1,
450 462 "p2": p2,
451 463 "parents": parents,
452 464 "children": children,
453 465 "max": maxrev,
454 466 "limit": limit,
455 467 "file": hasfile,
456 468 "contains": contains,
457 469 "heads": heads,
458 470 "roots": roots,
459 471 "all": getall,
460 472 "closed": closed,
461 473 "head": head,
462 474 "modifies": modifies,
463 475 "adds": adds,
464 476 "removes": removes,
465 477 "outgoing": outgoing,
466 478 }
467 479
468 480 methods = {
469 481 "negate": negate,
470 482 "minus": minusset,
471 483 "range": rangeset,
484 "rangepre": rangepreset,
485 "rangepost": rangepostset,
472 486 "dagrange": dagrangeset,
487 "dagrangepre": ancestors,
488 "dagrangepost": descendants,
473 489 "string": stringset,
474 490 "symbol": symbolset,
475 491 "and": andset,
476 492 "or": orset,
477 493 "not": notset,
478 494 "list": listset,
479 495 "func": func,
480 496 "group": lambda r, s, x: getset(r, s, x),
481 497 }
482 498
483 499 def weight(x, small):
484 500 smallbonus = 1
485 501 if small:
486 502 smallbonus = .5
487 503
488 504 op = x[0]
489 505 if op in 'string symbol negate':
490 506 return smallbonus # single revisions are small
491 507 elif op == 'and' or op == 'dagrange':
492 508 return min(weight(x[1], True), weight(x[2], True))
493 509 elif op in 'or -':
494 510 return max(weight(x[1], False), weight(x[2], False))
495 511 elif op == 'not':
496 512 return weight(x[1], not small)
497 513 elif op == 'group':
498 514 return weight(x[1], small)
499 515 elif op == 'range':
500 516 return weight(x[1], small) + weight(x[2], small)
501 517 elif op == 'func':
502 518 f = getstring(x[1], "not a symbol")
503 519 if f in "grep date user author keyword branch file":
504 520 return 10 # slow
505 521 elif f in "modifies adds removes":
506 522 return 30 # slower
507 523 elif f == "contains":
508 524 return 100 # very slow
509 525 elif f == "ancestor":
510 526 return (weight(x[1][1], small) +
511 527 weight(x[1][2], small)) * smallbonus
512 528 elif f == "reverse limit":
513 529 return weight(x[1], small)
514 530 elif f in "sort":
515 531 base = x[1]
516 532 spec = "rev"
517 533 if x[1][0] == 'list':
518 534 base = x[1][1]
519 535 spec = x[1][2]
520 536 return max(weight(base, small), 10)
521 537 else:
522 538 return 1
523 539
524 540 parse = parser.parser(tokenize, elements).parse
525 541
526 542 def match(spec):
527 543 tree = parse(spec)
528 544 def mfunc(repo, subset):
529 545 return getset(repo, subset, tree)
530 546 return mfunc
General Comments 0
You need to be logged in to leave comments. Login now