##// END OF EJS Templates
revsets: reduce cost of outgoing in the optimizer
Matt Mackall -
r12351:b913232d default
parent child Browse files
Show More
@@ -1,591 +1,591 b''
1 1 # revset.py - revision set queries for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import parser, util, error, discovery
10 10 import match as matchmod
11 11 from i18n import _
12 12
13 13 elements = {
14 14 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
15 15 "-": (19, ("negate", 19), ("minus", 19)),
16 16 "::": (17, ("dagrangepre", 17), ("dagrange", 17),
17 17 ("dagrangepost", 17)),
18 18 "..": (17, ("dagrangepre", 17), ("dagrange", 17),
19 19 ("dagrangepost", 17)),
20 20 ":": (15, ("rangepre", 15), ("range", 15), ("rangepost", 15)),
21 21 "not": (10, ("not", 10)),
22 22 "!": (10, ("not", 10)),
23 23 "and": (5, None, ("and", 5)),
24 24 "&": (5, None, ("and", 5)),
25 25 "or": (4, None, ("or", 4)),
26 26 "|": (4, None, ("or", 4)),
27 27 "+": (4, None, ("or", 4)),
28 28 ",": (2, None, ("list", 2)),
29 29 ")": (0, None, None),
30 30 "symbol": (0, ("symbol",), None),
31 31 "string": (0, ("string",), None),
32 32 "end": (0, None, None),
33 33 }
34 34
35 35 keywords = set(['and', 'or', 'not'])
36 36
37 37 def tokenize(program):
38 38 pos, l = 0, len(program)
39 39 while pos < l:
40 40 c = program[pos]
41 41 if c.isspace(): # skip inter-token whitespace
42 42 pass
43 43 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
44 44 yield ('::', None, pos)
45 45 pos += 1 # skip ahead
46 46 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
47 47 yield ('..', None, pos)
48 48 pos += 1 # skip ahead
49 49 elif c in "():,-|&+!": # handle simple operators
50 50 yield (c, None, pos)
51 51 elif c in '"\'': # handle quoted strings
52 52 pos += 1
53 53 s = pos
54 54 while pos < l: # find closing quote
55 55 d = program[pos]
56 56 if d == '\\': # skip over escaped characters
57 57 pos += 2
58 58 continue
59 59 if d == c:
60 60 yield ('string', program[s:pos].decode('string-escape'), s)
61 61 break
62 62 pos += 1
63 63 else:
64 64 raise error.ParseError(_("unterminated string"), s)
65 65 elif c.isalnum() or c in '._' or ord(c) > 127: # gather up a symbol/keyword
66 66 s = pos
67 67 pos += 1
68 68 while pos < l: # find end of symbol
69 69 d = program[pos]
70 70 if not (d.isalnum() or d in "._" or ord(d) > 127):
71 71 break
72 72 if d == '.' and program[pos - 1] == '.': # special case for ..
73 73 pos -= 1
74 74 break
75 75 pos += 1
76 76 sym = program[s:pos]
77 77 if sym in keywords: # operator keywords
78 78 yield (sym, None, s)
79 79 else:
80 80 yield ('symbol', sym, s)
81 81 pos -= 1
82 82 else:
83 83 raise error.ParseError(_("syntax error"), pos)
84 84 pos += 1
85 85 yield ('end', None, pos)
86 86
87 87 # helpers
88 88
89 89 def getstring(x, err):
90 90 if x and (x[0] == 'string' or x[0] == 'symbol'):
91 91 return x[1]
92 92 raise error.ParseError(err)
93 93
94 94 def getlist(x):
95 95 if not x:
96 96 return []
97 97 if x[0] == 'list':
98 98 return getlist(x[1]) + [x[2]]
99 99 return [x]
100 100
101 101 def getargs(x, min, max, err):
102 102 l = getlist(x)
103 103 if len(l) < min or len(l) > max:
104 104 raise error.ParseError(err)
105 105 return l
106 106
107 107 def getset(repo, subset, x):
108 108 if not x:
109 109 raise error.ParseError(_("missing argument"))
110 110 return methods[x[0]](repo, subset, *x[1:])
111 111
112 112 # operator methods
113 113
114 114 def stringset(repo, subset, x):
115 115 x = repo[x].rev()
116 116 if x == -1 and len(subset) == len(repo):
117 117 return [-1]
118 118 if x in subset:
119 119 return [x]
120 120 return []
121 121
122 122 def symbolset(repo, subset, x):
123 123 if x in symbols:
124 124 raise error.ParseError(_("can't use %s here") % x)
125 125 return stringset(repo, subset, x)
126 126
127 127 def rangeset(repo, subset, x, y):
128 128 m = getset(repo, subset, x)
129 129 if not m:
130 130 m = getset(repo, range(len(repo)), x)
131 131
132 132 n = getset(repo, subset, y)
133 133 if not n:
134 134 n = getset(repo, range(len(repo)), y)
135 135
136 136 if not m or not n:
137 137 return []
138 138 m, n = m[0], n[-1]
139 139
140 140 if m < n:
141 141 r = range(m, n + 1)
142 142 else:
143 143 r = range(m, n - 1, -1)
144 144 s = set(subset)
145 145 return [x for x in r if x in s]
146 146
147 147 def andset(repo, subset, x, y):
148 148 return getset(repo, getset(repo, subset, x), y)
149 149
150 150 def orset(repo, subset, x, y):
151 151 s = set(getset(repo, subset, x))
152 152 s |= set(getset(repo, [r for r in subset if r not in s], y))
153 153 return [r for r in subset if r in s]
154 154
155 155 def notset(repo, subset, x):
156 156 s = set(getset(repo, subset, x))
157 157 return [r for r in subset if r not in s]
158 158
159 159 def listset(repo, subset, a, b):
160 160 raise error.ParseError(_("can't use a list in this context"))
161 161
162 162 def func(repo, subset, a, b):
163 163 if a[0] == 'symbol' and a[1] in symbols:
164 164 return symbols[a[1]](repo, subset, b)
165 165 raise error.ParseError(_("not a function: %s") % a[1])
166 166
167 167 # functions
168 168
169 169 def p1(repo, subset, x):
170 170 ps = set()
171 171 cl = repo.changelog
172 172 for r in getset(repo, subset, x):
173 173 ps.add(cl.parentrevs(r)[0])
174 174 return [r for r in subset if r in ps]
175 175
176 176 def p2(repo, subset, x):
177 177 ps = set()
178 178 cl = repo.changelog
179 179 for r in getset(repo, subset, x):
180 180 ps.add(cl.parentrevs(r)[1])
181 181 return [r for r in subset if r in ps]
182 182
183 183 def parents(repo, subset, x):
184 184 ps = set()
185 185 cl = repo.changelog
186 186 for r in getset(repo, subset, x):
187 187 ps.update(cl.parentrevs(r))
188 188 return [r for r in subset if r in ps]
189 189
190 190 def maxrev(repo, subset, x):
191 191 s = getset(repo, subset, x)
192 192 if s:
193 193 m = max(s)
194 194 if m in subset:
195 195 return [m]
196 196 return []
197 197
198 198 def minrev(repo, subset, x):
199 199 s = getset(repo, subset, x)
200 200 if s:
201 201 m = min(s)
202 202 if m in subset:
203 203 return [m]
204 204 return []
205 205
206 206 def limit(repo, subset, x):
207 207 l = getargs(x, 2, 2, _("limit wants two arguments"))
208 208 try:
209 209 lim = int(getstring(l[1], _("limit wants a number")))
210 210 except ValueError:
211 211 raise error.ParseError(_("limit expects a number"))
212 212 return getset(repo, subset, l[0])[:lim]
213 213
214 214 def children(repo, subset, x):
215 215 cs = set()
216 216 cl = repo.changelog
217 217 s = set(getset(repo, subset, x))
218 218 for r in xrange(0, len(repo)):
219 219 for p in cl.parentrevs(r):
220 220 if p in s:
221 221 cs.add(r)
222 222 return [r for r in subset if r in cs]
223 223
224 224 def branch(repo, subset, x):
225 225 s = getset(repo, range(len(repo)), x)
226 226 b = set()
227 227 for r in s:
228 228 b.add(repo[r].branch())
229 229 s = set(s)
230 230 return [r for r in subset if r in s or repo[r].branch() in b]
231 231
232 232 def ancestor(repo, subset, x):
233 233 l = getargs(x, 2, 2, _("ancestor wants two arguments"))
234 234 r = range(len(repo))
235 235 a = getset(repo, r, l[0])
236 236 b = getset(repo, r, l[1])
237 237 if len(a) != 1 or len(b) != 1:
238 238 raise error.ParseError(_("ancestor arguments must be single revisions"))
239 239 an = [repo[a[0]].ancestor(repo[b[0]]).rev()]
240 240
241 241 return [r for r in an if r in subset]
242 242
243 243 def ancestors(repo, subset, x):
244 244 args = getset(repo, range(len(repo)), x)
245 245 if not args:
246 246 return []
247 247 s = set(repo.changelog.ancestors(*args)) | set(args)
248 248 return [r for r in subset if r in s]
249 249
250 250 def descendants(repo, subset, x):
251 251 args = getset(repo, range(len(repo)), x)
252 252 if not args:
253 253 return []
254 254 s = set(repo.changelog.descendants(*args)) | set(args)
255 255 return [r for r in subset if r in s]
256 256
257 257 def follow(repo, subset, x):
258 258 getargs(x, 0, 0, _("follow takes no arguments"))
259 259 p = repo['.'].rev()
260 260 s = set(repo.changelog.ancestors(p)) | set([p])
261 261 return [r for r in subset if r in s]
262 262
263 263 def date(repo, subset, x):
264 264 ds = getstring(x, _("date wants a string"))
265 265 dm = util.matchdate(ds)
266 266 return [r for r in subset if dm(repo[r].date()[0])]
267 267
268 268 def keyword(repo, subset, x):
269 269 kw = getstring(x, _("keyword wants a string")).lower()
270 270 l = []
271 271 for r in subset:
272 272 c = repo[r]
273 273 t = " ".join(c.files() + [c.user(), c.description()])
274 274 if kw in t.lower():
275 275 l.append(r)
276 276 return l
277 277
278 278 def grep(repo, subset, x):
279 279 try:
280 280 gr = re.compile(getstring(x, _("grep wants a string")))
281 281 except re.error, e:
282 282 raise error.ParseError(_('invalid match pattern: %s') % e)
283 283 l = []
284 284 for r in subset:
285 285 c = repo[r]
286 286 for e in c.files() + [c.user(), c.description()]:
287 287 if gr.search(e):
288 288 l.append(r)
289 289 continue
290 290 return l
291 291
292 292 def author(repo, subset, x):
293 293 n = getstring(x, _("author wants a string")).lower()
294 294 return [r for r in subset if n in repo[r].user().lower()]
295 295
296 296 def hasfile(repo, subset, x):
297 297 pat = getstring(x, _("file wants a pattern"))
298 298 m = matchmod.match(repo.root, repo.getcwd(), [pat])
299 299 s = []
300 300 for r in subset:
301 301 for f in repo[r].files():
302 302 if m(f):
303 303 s.append(r)
304 304 continue
305 305 return s
306 306
307 307 def contains(repo, subset, x):
308 308 pat = getstring(x, _("contains wants a pattern"))
309 309 m = matchmod.match(repo.root, repo.getcwd(), [pat])
310 310 s = []
311 311 if m.files() == [pat]:
312 312 for r in subset:
313 313 if pat in repo[r]:
314 314 s.append(r)
315 315 continue
316 316 else:
317 317 for r in subset:
318 318 for f in repo[r].manifest():
319 319 if m(f):
320 320 s.append(r)
321 321 continue
322 322 return s
323 323
324 324 def checkstatus(repo, subset, pat, field):
325 325 m = matchmod.match(repo.root, repo.getcwd(), [pat])
326 326 s = []
327 327 fast = (m.files() == [pat])
328 328 for r in subset:
329 329 c = repo[r]
330 330 if fast:
331 331 if pat not in c.files():
332 332 continue
333 333 else:
334 334 for f in c.files():
335 335 if m(f):
336 336 break
337 337 else:
338 338 continue
339 339 files = repo.status(c.p1().node(), c.node())[field]
340 340 if fast:
341 341 if pat in files:
342 342 s.append(r)
343 343 continue
344 344 else:
345 345 for f in files:
346 346 if m(f):
347 347 s.append(r)
348 348 continue
349 349 return s
350 350
351 351 def modifies(repo, subset, x):
352 352 pat = getstring(x, _("modifies wants a pattern"))
353 353 return checkstatus(repo, subset, pat, 0)
354 354
355 355 def adds(repo, subset, x):
356 356 pat = getstring(x, _("adds wants a pattern"))
357 357 return checkstatus(repo, subset, pat, 1)
358 358
359 359 def removes(repo, subset, x):
360 360 pat = getstring(x, _("removes wants a pattern"))
361 361 return checkstatus(repo, subset, pat, 2)
362 362
363 363 def merge(repo, subset, x):
364 364 getargs(x, 0, 0, _("merge takes no arguments"))
365 365 cl = repo.changelog
366 366 return [r for r in subset if cl.parentrevs(r)[1] != -1]
367 367
368 368 def closed(repo, subset, x):
369 369 getargs(x, 0, 0, _("closed takes no arguments"))
370 370 return [r for r in subset if repo[r].extra().get('close')]
371 371
372 372 def head(repo, subset, x):
373 373 getargs(x, 0, 0, _("head takes no arguments"))
374 374 hs = set()
375 375 for b, ls in repo.branchmap().iteritems():
376 376 hs.update(repo[h].rev() for h in ls)
377 377 return [r for r in subset if r in hs]
378 378
379 379 def reverse(repo, subset, x):
380 380 l = getset(repo, subset, x)
381 381 l.reverse()
382 382 return l
383 383
384 384 def present(repo, subset, x):
385 385 try:
386 386 return getset(repo, subset, x)
387 387 except error.RepoLookupError:
388 388 return []
389 389
390 390 def sort(repo, subset, x):
391 391 l = getargs(x, 1, 2, _("sort wants one or two arguments"))
392 392 keys = "rev"
393 393 if len(l) == 2:
394 394 keys = getstring(l[1], _("sort spec must be a string"))
395 395
396 396 s = l[0]
397 397 keys = keys.split()
398 398 l = []
399 399 def invert(s):
400 400 return "".join(chr(255 - ord(c)) for c in s)
401 401 for r in getset(repo, subset, s):
402 402 c = repo[r]
403 403 e = []
404 404 for k in keys:
405 405 if k == 'rev':
406 406 e.append(r)
407 407 elif k == '-rev':
408 408 e.append(-r)
409 409 elif k == 'branch':
410 410 e.append(c.branch())
411 411 elif k == '-branch':
412 412 e.append(invert(c.branch()))
413 413 elif k == 'desc':
414 414 e.append(c.description())
415 415 elif k == '-desc':
416 416 e.append(invert(c.description()))
417 417 elif k in 'user author':
418 418 e.append(c.user())
419 419 elif k in '-user -author':
420 420 e.append(invert(c.user()))
421 421 elif k == 'date':
422 422 e.append(c.date()[0])
423 423 elif k == '-date':
424 424 e.append(-c.date()[0])
425 425 else:
426 426 raise error.ParseError(_("unknown sort key %r") % k)
427 427 e.append(r)
428 428 l.append(e)
429 429 l.sort()
430 430 return [e[-1] for e in l]
431 431
432 432 def getall(repo, subset, x):
433 433 getargs(x, 0, 0, _("all takes no arguments"))
434 434 return subset
435 435
436 436 def heads(repo, subset, x):
437 437 s = getset(repo, subset, x)
438 438 ps = set(parents(repo, subset, x))
439 439 return [r for r in s if r not in ps]
440 440
441 441 def roots(repo, subset, x):
442 442 s = getset(repo, subset, x)
443 443 cs = set(children(repo, subset, x))
444 444 return [r for r in s if r not in cs]
445 445
446 446 def outgoing(repo, subset, x):
447 447 import hg # avoid start-up nasties
448 448 l = getargs(x, 0, 1, _("outgoing wants a repository path"))
449 449 dest = l and getstring(l[0], _("outgoing wants a repository path")) or ''
450 450 dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
451 451 dest, branches = hg.parseurl(dest)
452 452 other = hg.repository(hg.remoteui(repo, {}), dest)
453 453 repo.ui.pushbuffer()
454 454 o = discovery.findoutgoing(repo, other)
455 455 repo.ui.popbuffer()
456 456 cl = repo.changelog
457 457 o = set([cl.rev(r) for r in repo.changelog.nodesbetween(o, None)[0]])
458 458 return [r for r in subset if r in o]
459 459
460 460 def tagged(repo, subset, x):
461 461 getargs(x, 0, 0, _("tagged takes no arguments"))
462 462 cl = repo.changelog
463 463 s = set([cl.rev(n) for t, n in repo.tagslist() if t != 'tip'])
464 464 return [r for r in subset if r in s]
465 465
466 466 symbols = {
467 467 "adds": adds,
468 468 "all": getall,
469 469 "ancestor": ancestor,
470 470 "ancestors": ancestors,
471 471 "author": author,
472 472 "branch": branch,
473 473 "children": children,
474 474 "closed": closed,
475 475 "contains": contains,
476 476 "date": date,
477 477 "descendants": descendants,
478 478 "file": hasfile,
479 479 "follow": follow,
480 480 "grep": grep,
481 481 "head": head,
482 482 "heads": heads,
483 483 "keyword": keyword,
484 484 "limit": limit,
485 485 "max": maxrev,
486 486 "min": minrev,
487 487 "merge": merge,
488 488 "modifies": modifies,
489 489 "outgoing": outgoing,
490 490 "p1": p1,
491 491 "p2": p2,
492 492 "parents": parents,
493 493 "present": present,
494 494 "removes": removes,
495 495 "reverse": reverse,
496 496 "roots": roots,
497 497 "sort": sort,
498 498 "tagged": tagged,
499 499 "user": author,
500 500 }
501 501
502 502 methods = {
503 503 "range": rangeset,
504 504 "string": stringset,
505 505 "symbol": symbolset,
506 506 "and": andset,
507 507 "or": orset,
508 508 "not": notset,
509 509 "list": listset,
510 510 "func": func,
511 511 }
512 512
513 513 def optimize(x, small):
514 514 if x == None:
515 515 return 0, x
516 516
517 517 smallbonus = 1
518 518 if small:
519 519 smallbonus = .5
520 520
521 521 op = x[0]
522 522 if op == 'minus':
523 523 return optimize(('and', x[1], ('not', x[2])), small)
524 524 elif op == 'dagrange':
525 525 return optimize(('and', ('func', ('symbol', 'descendants'), x[1]),
526 526 ('func', ('symbol', 'ancestors'), x[2])), small)
527 527 elif op == 'dagrangepre':
528 528 return optimize(('func', ('symbol', 'ancestors'), x[1]), small)
529 529 elif op == 'dagrangepost':
530 530 return optimize(('func', ('symbol', 'descendants'), x[1]), small)
531 531 elif op == 'rangepre':
532 532 return optimize(('range', ('string', '0'), x[1]), small)
533 533 elif op == 'rangepost':
534 534 return optimize(('range', x[1], ('string', 'tip')), small)
535 535 elif op == 'negate':
536 536 return optimize(('string',
537 537 '-' + getstring(x[1], _("can't negate that"))), small)
538 538 elif op in 'string symbol negate':
539 539 return smallbonus, x # single revisions are small
540 540 elif op == 'and' or op == 'dagrange':
541 541 wa, ta = optimize(x[1], True)
542 542 wb, tb = optimize(x[2], True)
543 543 w = min(wa, wb)
544 544 if wa > wb:
545 545 return w, (op, tb, ta)
546 546 return w, (op, ta, tb)
547 547 elif op == 'or':
548 548 wa, ta = optimize(x[1], False)
549 549 wb, tb = optimize(x[2], False)
550 550 if wb < wa:
551 551 wb, wa = wa, wb
552 552 return max(wa, wb), (op, ta, tb)
553 553 elif op == 'not':
554 554 o = optimize(x[1], not small)
555 555 return o[0], (op, o[1])
556 556 elif op == 'group':
557 557 return optimize(x[1], small)
558 558 elif op in 'range list':
559 559 wa, ta = optimize(x[1], small)
560 560 wb, tb = optimize(x[2], small)
561 561 return wa + wb, (op, ta, tb)
562 562 elif op == 'func':
563 563 f = getstring(x[1], _("not a symbol"))
564 564 wa, ta = optimize(x[2], small)
565 if f in "grep date user author keyword branch file":
565 if f in "grep date user author keyword branch file outgoing":
566 566 w = 10 # slow
567 elif f in "modifies adds removes outgoing":
567 elif f in "modifies adds removes":
568 568 w = 30 # slower
569 569 elif f == "contains":
570 570 w = 100 # very slow
571 571 elif f == "ancestor":
572 572 w = 1 * smallbonus
573 573 elif f == "reverse limit":
574 574 w = 0
575 575 elif f in "sort":
576 576 w = 10 # assume most sorts look at changelog
577 577 else:
578 578 w = 1
579 579 return w + wa, (op, x[1], ta)
580 580 return 1, x
581 581
582 582 parse = parser.parser(tokenize, elements).parse
583 583
584 584 def match(spec):
585 585 if not spec:
586 586 raise error.ParseError(_("empty query"))
587 587 tree = parse(spec)
588 588 weight, tree = optimize(tree, True)
589 589 def mfunc(repo, subset):
590 590 return getset(repo, subset, tree)
591 591 return mfunc
General Comments 0
You need to be logged in to leave comments. Login now