##// END OF EJS Templates
revset: improve filter argument handling
Matt Mackall -
r11339:744d5b73 default
parent child Browse files
Show More
@@ -1,553 +1,552
1 1 # revset.py - revision set queries for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import parser, util, error, discovery
10 10 import match as _match
11 11
12 12 elements = {
13 13 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
14 14 "-": (19, ("negate", 19), ("minus", 19)),
15 15 "::": (17, ("dagrangepre", 17), ("dagrange", 17),
16 16 ("dagrangepost", 17)),
17 17 "..": (17, ("dagrangepre", 17), ("dagrange", 17),
18 18 ("dagrangepost", 17)),
19 19 ":": (15, ("rangepre", 15), ("range", 15), ("rangepost", 15)),
20 20 "not": (10, ("not", 10)),
21 21 "!": (10, ("not", 10)),
22 22 "and": (5, None, ("and", 5)),
23 23 "&": (5, None, ("and", 5)),
24 24 "or": (4, None, ("or", 4)),
25 25 "|": (4, None, ("or", 4)),
26 26 "+": (4, None, ("or", 4)),
27 27 ",": (2, None, ("list", 2)),
28 28 ")": (0, None, None),
29 29 "symbol": (0, ("symbol",), None),
30 30 "string": (0, ("string",), None),
31 31 "end": (0, None, None),
32 32 }
33 33
34 34 keywords = set(['and', 'or', 'not'])
35 35
36 36 def tokenize(program):
37 37 pos, l = 0, len(program)
38 38 while pos < l:
39 39 c = program[pos]
40 40 if c.isspace(): # skip inter-token whitespace
41 41 pass
42 42 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
43 43 yield ('::', None, pos)
44 44 pos += 1 # skip ahead
45 45 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
46 46 yield ('..', None, pos)
47 47 pos += 1 # skip ahead
48 48 elif c in "():,-|&+!": # handle simple operators
49 49 yield (c, None, pos)
50 50 elif c in '"\'': # handle quoted strings
51 51 pos += 1
52 52 s = pos
53 53 while pos < l: # find closing quote
54 54 d = program[pos]
55 55 if d == '\\': # skip over escaped characters
56 56 pos += 2
57 57 continue
58 58 if d == c:
59 59 yield ('string', program[s:pos].decode('string-escape'), s)
60 60 break
61 61 pos += 1
62 62 else:
63 63 raise error.ParseError("unterminated string", s)
64 64 elif c.isalnum() or c in '.': # gather up a symbol/keyword
65 65 s = pos
66 66 pos += 1
67 67 while pos < l: # find end of symbol
68 68 d = program[pos]
69 69 if not (d.isalnum() or d in "._"):
70 70 break
71 71 if d == '.' and program[pos - 1] == '.': # special case for ..
72 72 pos -= 1
73 73 break
74 74 pos += 1
75 75 sym = program[s:pos]
76 76 if sym in keywords: # operator keywords
77 77 yield (sym, None, s)
78 78 else:
79 79 yield ('symbol', sym, s)
80 80 pos -= 1
81 81 else:
82 82 raise error.ParseError("syntax error", pos)
83 83 pos += 1
84 84 yield ('end', None, pos)
85 85
86 86 # helpers
87 87
88 88 def getstring(x, err):
89 89 if x[0] == 'string' or x[0] == 'symbol':
90 90 return x[1]
91 91 raise error.ParseError(err)
92 92
93 93 def getlist(x):
94 94 if not x:
95 95 return []
96 96 if x[0] == 'list':
97 97 return getlist(x[1]) + [x[2]]
98 98 return [x]
99 99
100 def getpair(x, err):
100 def getargs(x, min, max, err):
101 101 l = getlist(x)
102 if len(l) != 2:
102 if len(l) < min or len(l) > max:
103 103 raise error.ParseError(err)
104 104 return l
105 105
106 106 def getset(repo, subset, x):
107 107 if not x:
108 108 raise error.ParseError("missing argument")
109 109 return methods[x[0]](repo, subset, *x[1:])
110 110
111 111 # operator methods
112 112
113 113 def negate(repo, subset, x):
114 114 return getset(repo, subset,
115 115 ('string', '-' + getstring(x, "can't negate that")))
116 116
117 117 def stringset(repo, subset, x):
118 118 x = repo[x].rev()
119 119 if x == -1 and len(subset) == len(repo):
120 120 return [-1]
121 121 if x in subset:
122 122 return [x]
123 123 return []
124 124
125 125 def symbolset(repo, subset, x):
126 126 if x in symbols:
127 127 raise error.ParseError("can't use %s here" % x)
128 128 return stringset(repo, subset, x)
129 129
130 130 def rangeset(repo, subset, x, y):
131 131 m = getset(repo, subset, x)[0]
132 132 n = getset(repo, subset, y)[-1]
133 133 if m < n:
134 134 return range(m, n + 1)
135 135 return range(m, n - 1, -1)
136 136
137 137 def andset(repo, subset, x, y):
138 138 return getset(repo, getset(repo, subset, x), y)
139 139
140 140 def orset(repo, subset, x, y):
141 141 s = set(getset(repo, subset, x))
142 142 s |= set(getset(repo, [r for r in subset if r not in s], y))
143 143 return [r for r in subset if r in s]
144 144
145 145 def notset(repo, subset, x):
146 146 s = set(getset(repo, subset, x))
147 147 return [r for r in subset if r not in s]
148 148
149 149 def listset(repo, subset, a, b):
150 150 raise error.ParseError("can't use a list in this context")
151 151
152 152 def func(repo, subset, a, b):
153 153 if a[0] == 'symbol' and a[1] in symbols:
154 154 return symbols[a[1]](repo, subset, b)
155 155 raise error.ParseError("not a function: %s" % a[1])
156 156
157 157 # functions
158 158
159 159 def p1(repo, subset, x):
160 160 ps = set()
161 161 cl = repo.changelog
162 162 for r in getset(repo, subset, x):
163 163 ps.add(cl.parentrevs(r)[0])
164 164 return [r for r in subset if r in ps]
165 165
166 166 def p2(repo, subset, x):
167 167 ps = set()
168 168 cl = repo.changelog
169 169 for r in getset(repo, subset, x):
170 170 ps.add(cl.parentrevs(r)[1])
171 171 return [r for r in subset if r in ps]
172 172
173 173 def parents(repo, subset, x):
174 174 ps = set()
175 175 cl = repo.changelog
176 176 for r in getset(repo, subset, x):
177 177 ps.update(cl.parentrevs(r))
178 178 return [r for r in subset if r in ps]
179 179
180 180 def maxrev(repo, subset, x):
181 181 s = getset(repo, subset, x)
182 182 if s:
183 183 m = max(s)
184 184 if m in subset:
185 185 return [m]
186 186 return []
187 187
188 188 def limit(repo, subset, x):
189 l = getpair(x, "limit wants two args")
189 l = getargs(x, 2, 2, "limit wants two args")
190 190 try:
191 191 lim = int(getstring(l[1], "limit wants a number"))
192 192 except ValueError:
193 193 raise error.ParseError("limit expects a number")
194 194 return getset(repo, subset, l[0])[:lim]
195 195
196 196 def children(repo, subset, x):
197 197 cs = set()
198 198 cl = repo.changelog
199 199 s = set(getset(repo, subset, x))
200 200 for r in xrange(0, len(repo)):
201 201 for p in cl.parentrevs(r):
202 202 if p in s:
203 203 cs.add(r)
204 204 return [r for r in subset if r in cs]
205 205
206 206 def branch(repo, subset, x):
207 207 s = getset(repo, range(len(repo)), x)
208 208 b = set()
209 209 for r in s:
210 210 b.add(repo[r].branch())
211 211 s = set(s)
212 212 return [r for r in subset if r in s or repo[r].branch() in b]
213 213
214 214 def ancestor(repo, subset, x):
215 l = getpair(x, "ancestor wants two args")
215 l = getargs(x, 2, 2, "ancestor wants two args")
216 216 a = getset(repo, subset, l[0])
217 217 b = getset(repo, subset, l[1])
218 218 if len(a) > 1 or len(b) > 1:
219 219 raise error.ParseError("ancestor args must be single revisions")
220 220 return [repo[a[0]].ancestor(repo[b[0]]).rev()]
221 221
222 222 def ancestors(repo, subset, x):
223 223 args = getset(repo, range(len(repo)), x)
224 224 s = set(repo.changelog.ancestors(*args)) | set(args)
225 225 return [r for r in subset if r in s]
226 226
227 227 def descendants(repo, subset, x):
228 228 args = getset(repo, range(len(repo)), x)
229 229 s = set(repo.changelog.descendants(*args)) | set(args)
230 230 return [r for r in subset if r in s]
231 231
232 232 def follow(repo, subset, x):
233 if x:
234 raise error.ParseError("follow takes no args")
233 getargs(x, 0, 0, "follow takes no arguments")
235 234 p = repo['.'].rev()
236 235 s = set(repo.changelog.ancestors(p)) | set([p])
237 236 return [r for r in subset if r in s]
238 237
239 238 def date(repo, subset, x):
240 239 ds = getstring(x, 'date wants a string')
241 240 dm = util.matchdate(ds)
242 241 return [r for r in subset if dm(repo[r].date()[0])]
243 242
244 243 def keyword(repo, subset, x):
245 244 kw = getstring(x, "keyword wants a string").lower()
246 245 l = []
247 246 for r in subset:
248 247 c = repo[r]
249 248 t = " ".join(c.files() + [c.user(), c.description()])
250 249 if kw in t.lower():
251 250 l.append(r)
252 251 return l
253 252
254 253 def grep(repo, subset, x):
255 254 gr = re.compile(getstring(x, "grep wants a string"))
256 255 l = []
257 256 for r in subset:
258 257 c = repo[r]
259 258 for e in c.files() + [c.user(), c.description()]:
260 259 if gr.search(e):
261 260 l.append(r)
262 261 continue
263 262 return l
264 263
265 264 def author(repo, subset, x):
266 265 n = getstring(x, "author wants a string").lower()
267 266 return [r for r in subset if n in repo[r].user().lower()]
268 267
269 268 def hasfile(repo, subset, x):
270 269 pat = getstring(x, "file wants a pattern")
271 270 m = _match.match(repo.root, repo.getcwd(), [pat])
272 271 s = []
273 272 for r in subset:
274 273 for f in repo[r].files():
275 274 if m(f):
276 275 s.append(r)
277 276 continue
278 277 return s
279 278
280 279 def contains(repo, subset, x):
281 280 pat = getstring(x, "file wants a pattern")
282 281 m = _match.match(repo.root, repo.getcwd(), [pat])
283 282 s = []
284 283 if m.files() == [pat]:
285 284 for r in subset:
286 285 if pat in repo[r]:
287 286 s.append(r)
288 287 continue
289 288 else:
290 289 for r in subset:
291 290 for f in repo[r].manifest():
292 291 if m(f):
293 292 s.append(r)
294 293 continue
295 294 return s
296 295
297 296 def checkstatus(repo, subset, pat, field):
298 297 m = _match.match(repo.root, repo.getcwd(), [pat])
299 298 s = []
300 299 fast = (m.files() == [pat])
301 300 for r in subset:
302 301 c = repo[r]
303 302 if fast:
304 303 if pat not in c.files():
305 304 continue
306 305 else:
307 306 for f in c.files():
308 307 if m(f):
309 308 break
310 309 else:
311 310 continue
312 311 files = repo.status(c.p1().node(), c.node())[field]
313 312 if fast:
314 313 if pat in files:
315 314 s.append(r)
316 315 continue
317 316 else:
318 317 for f in files:
319 318 if m(f):
320 319 s.append(r)
321 320 continue
322 321 return s
323 322
324 323 def modifies(repo, subset, x):
325 324 pat = getstring(x, "modifies wants a pattern")
326 325 return checkstatus(repo, subset, pat, 0)
327 326
328 327 def adds(repo, subset, x):
329 328 pat = getstring(x, "adds wants a pattern")
330 329 return checkstatus(repo, subset, pat, 1)
331 330
332 331 def removes(repo, subset, x):
333 332 pat = getstring(x, "removes wants a pattern")
334 333 return checkstatus(repo, subset, pat, 2)
335 334
336 335 def merge(repo, subset, x):
337 if x:
338 raise error.ParseError("merge takes no args")
336 getargs(x, 0, 0, "merge takes no arguments")
339 337 cl = repo.changelog
340 338 return [r for r in subset if cl.parentrevs(r)[1] != -1]
341 339
342 340 def closed(repo, subset, x):
341 getargs(x, 0, 0, "closed takes no arguments")
343 342 return [r for r in subset if repo[r].extra('close')]
344 343
345 344 def head(repo, subset, x):
345 getargs(x, 0, 0, "head takes no arguments")
346 346 hs = set()
347 347 for b, ls in repo.branchmap().iteritems():
348 348 hs.update(repo[h].rev() for h in ls)
349 349 return [r for r in subset if r in hs]
350 350
351 351 def reverse(repo, subset, x):
352 352 l = getset(repo, subset, x)
353 353 l.reverse()
354 354 return l
355 355
356 356 def sort(repo, subset, x):
357 l = getlist(x)
357 l = getargs(x, 1, 2, "sort wants one or two arguments")
358 358 keys = "rev"
359 359 if len(l) == 2:
360 360 keys = getstring(l[1], "sort spec must be a string")
361 361
362 362 s = l[0]
363 363 keys = keys.split()
364 364 l = []
365 365 def invert(s):
366 366 return "".join(chr(255 - ord(c)) for c in s)
367 367 for r in getset(repo, subset, s):
368 368 c = repo[r]
369 369 e = []
370 370 for k in keys:
371 371 if k == 'rev':
372 372 e.append(r)
373 373 elif k == '-rev':
374 374 e.append(-r)
375 375 elif k == 'branch':
376 376 e.append(c.branch())
377 377 elif k == '-branch':
378 378 e.append(invert(c.branch()))
379 379 elif k == 'desc':
380 380 e.append(c.description())
381 381 elif k == '-desc':
382 382 e.append(invert(c.description()))
383 383 elif k in 'user author':
384 384 e.append(c.user())
385 385 elif k in '-user -author':
386 386 e.append(invert(c.user()))
387 387 elif k == 'date':
388 388 e.append(c.date()[0])
389 389 elif k == '-date':
390 390 e.append(-c.date()[0])
391 391 else:
392 392 raise error.ParseError("unknown sort key %r" % k)
393 393 e.append(r)
394 394 l.append(e)
395 395 l.sort()
396 396 return [e[-1] for e in l]
397 397
398 398 def getall(repo, subset, x):
399 getargs(x, 0, 0, "all takes no arguments")
399 400 return subset
400 401
401 402 def heads(repo, subset, x):
402 403 s = getset(repo, subset, x)
403 404 ps = set(parents(repo, subset, x))
404 405 return [r for r in s if r not in ps]
405 406
406 407 def roots(repo, subset, x):
407 408 s = getset(repo, subset, x)
408 409 cs = set(children(repo, subset, x))
409 410 return [r for r in s if r not in cs]
410 411
411 412 def outgoing(repo, subset, x):
412 413 import hg # avoid start-up nasties
413 l = getlist(x)
414 if len(l) == 1:
415 dest = getstring(l[0], "outgoing wants a repo path")
416 else:
417 dest = ''
414 l = getargs(x, 0, 1, "outgoing wants a repo path")
415 dest = l[1:] or ''
418 416 dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
419 417 dest, branches = hg.parseurl(dest)
420 418 other = hg.repository(hg.remoteui(repo, {}), dest)
421 419 repo.ui.pushbuffer()
422 420 o = discovery.findoutgoing(repo, other)
423 421 repo.ui.popbuffer()
424 422 cl = repo.changelog
425 423 o = set([cl.rev(r) for r in repo.changelog.nodesbetween(o, None)[0]])
426 424 print 'out', dest, o
427 425 return [r for r in subset if r in o]
428 426
429 427 def tagged(repo, subset, x):
428 getargs(x, 0, 0, "tagged takes no arguments")
430 429 cl = repo.changelog
431 430 s = set([cl.rev(n) for t, n in repo.tagslist() if t != 'tip'])
432 431 return [r for r in subset if r in s]
433 432
434 433 symbols = {
435 434 "adds": adds,
436 435 "all": getall,
437 436 "ancestor": ancestor,
438 437 "ancestors": ancestors,
439 438 "author": author,
440 439 "branch": branch,
441 440 "children": children,
442 441 "closed": closed,
443 442 "contains": contains,
444 443 "date": date,
445 444 "descendants": descendants,
446 445 "file": hasfile,
447 446 "follow": follow,
448 447 "grep": grep,
449 448 "head": head,
450 449 "heads": heads,
451 450 "keyword": keyword,
452 451 "limit": limit,
453 452 "max": maxrev,
454 453 "merge": merge,
455 454 "modifies": modifies,
456 455 "outgoing": outgoing,
457 456 "p1": p1,
458 457 "p2": p2,
459 458 "parents": parents,
460 459 "removes": removes,
461 460 "reverse": reverse,
462 461 "roots": roots,
463 462 "sort": sort,
464 463 "tagged": tagged,
465 464 "user": author,
466 465 }
467 466
468 467 methods = {
469 468 "negate": negate,
470 469 "range": rangeset,
471 470 "string": stringset,
472 471 "symbol": symbolset,
473 472 "and": andset,
474 473 "or": orset,
475 474 "not": notset,
476 475 "list": listset,
477 476 "func": func,
478 477 }
479 478
480 479 def optimize(x, small):
481 480 if x == None:
482 481 return 0, x
483 482
484 483 smallbonus = 1
485 484 if small:
486 485 smallbonus = .5
487 486
488 487 op = x[0]
489 488 if op == 'minus':
490 489 return optimize(('and', x[1], ('not', x[2])), small)
491 490 elif op == 'dagrange':
492 491 return optimize(('and', ('func', ('symbol', 'descendants'), x[1]),
493 492 ('func', ('symbol', 'ancestors'), x[2])), small)
494 493 elif op == 'dagrangepre':
495 494 return optimize(('func', ('symbol', 'ancestors'), x[1]), small)
496 495 elif op == 'dagrangepost':
497 496 return optimize(('func', ('symbol', 'descendants'), x[1]), small)
498 497 elif op == 'rangepre':
499 498 return optimize(('range', ('string', '0'), x[1]), small)
500 499 elif op == 'rangepost':
501 500 return optimize(('range', x[1], ('string', 'tip')), small)
502 501 elif op in 'string symbol negate':
503 502 return smallbonus, x # single revisions are small
504 503 elif op == 'and' or op == 'dagrange':
505 504 wa, ta = optimize(x[1], True)
506 505 wb, tb = optimize(x[2], True)
507 506 w = min(wa, wb)
508 507 if wa > wb:
509 508 return w, (op, tb, ta)
510 509 return w, (op, ta, tb)
511 510 elif op == 'or':
512 511 wa, ta = optimize(x[1], False)
513 512 wb, tb = optimize(x[2], False)
514 513 if wb < wa:
515 514 wb, wa = wa, wb
516 515 return max(wa, wb), (op, ta, tb)
517 516 elif op == 'not':
518 517 o = optimize(x[1], not small)
519 518 return o[0], (op, o[1])
520 519 elif op == 'group':
521 520 return optimize(x[1], small)
522 521 elif op in 'range list':
523 522 wa, ta = optimize(x[1], small)
524 523 wb, tb = optimize(x[2], small)
525 524 return wa + wb, (op, ta, tb)
526 525 elif op == 'func':
527 526 f = getstring(x[1], "not a symbol")
528 527 wa, ta = optimize(x[2], small)
529 528 if f in "grep date user author keyword branch file":
530 529 w = 10 # slow
531 530 elif f in "modifies adds removes outgoing":
532 531 w = 30 # slower
533 532 elif f == "contains":
534 533 w = 100 # very slow
535 534 elif f == "ancestor":
536 535 w = 1 * smallbonus
537 536 elif f == "reverse limit":
538 537 w = 0
539 538 elif f in "sort":
540 539 w = 10 # assume most sorts look at changelog
541 540 else:
542 541 w = 1
543 542 return w + wa, (op, x[1], ta)
544 543 return 1, x
545 544
546 545 parse = parser.parser(tokenize, elements).parse
547 546
548 547 def match(spec):
549 548 tree = parse(spec)
550 549 weight, tree = optimize(tree, True)
551 550 def mfunc(repo, subset):
552 551 return getset(repo, subset, tree)
553 552 return mfunc
General Comments 0
You need to be logged in to leave comments. Login now