##// END OF EJS Templates
revset: optimize the parse tree directly...
Matt Mackall -
r11279:62ccf4cd default
parent child Browse files
Show More
@@ -1,546 +1,548
1 1 # revset.py - revision set queries for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 9 import parser, util, hg
10 10 import match as _match
11 11
12 12 elements = {
13 13 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
14 14 "-": (19, ("negate", 19), ("minus", 19)),
15 15 "::": (17, ("dagrangepre", 17), ("dagrange", 17),
16 16 ("dagrangepost", 17)),
17 17 "..": (17, ("dagrangepre", 17), ("dagrange", 17),
18 18 ("dagrangepost", 17)),
19 19 ":": (15, ("rangepre", 15), ("range", 15), ("rangepost", 15)),
20 20 "not": (10, ("not", 10)),
21 21 "!": (10, ("not", 10)),
22 22 "and": (5, None, ("and", 5)),
23 23 "&": (5, None, ("and", 5)),
24 24 "or": (4, None, ("or", 4)),
25 25 "|": (4, None, ("or", 4)),
26 26 "+": (4, None, ("or", 4)),
27 27 ",": (2, None, ("list", 2)),
28 28 ")": (0, None, None),
29 29 "symbol": (0, ("symbol",), None),
30 30 "string": (0, ("string",), None),
31 31 "end": (0, None, None),
32 32 }
33 33
34 34 keywords = set(['and', 'or', 'not'])
35 35
36 36 def tokenize(program):
37 37 pos, l = 0, len(program)
38 38 while pos < l:
39 39 c = program[pos]
40 40 if c.isspace(): # skip inter-token whitespace
41 41 pass
42 42 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
43 43 yield ('::', None)
44 44 pos += 1 # skip ahead
45 45 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
46 46 yield ('..', None)
47 47 pos += 1 # skip ahead
48 48 elif c in "():,-|&+!": # handle simple operators
49 49 yield (c, None)
50 50 elif c in '"\'': # handle quoted strings
51 51 pos += 1
52 52 s = pos
53 53 while pos < l: # find closing quote
54 54 d = program[pos]
55 55 if d == '\\': # skip over escaped characters
56 56 pos += 2
57 57 continue
58 58 if d == c:
59 59 yield ('string', program[s:pos].decode('string-escape'))
60 60 break
61 61 pos += 1
62 62 else:
63 63 raise "unterminated string"
64 64 elif c.isalnum() or c in '.': # gather up a symbol/keyword
65 65 s = pos
66 66 pos += 1
67 67 while pos < l: # find end of symbol
68 68 d = program[pos]
69 69 if not (d.isalnum() or d in "._"):
70 70 break
71 71 if d == '.' and program[pos - 1] == '.': # special case for ..
72 72 pos -= 1
73 73 break
74 74 pos += 1
75 75 sym = program[s:pos]
76 76 if sym in keywords: # operator keywords
77 77 yield (sym, None)
78 78 else:
79 79 yield ('symbol', sym)
80 80 pos -= 1
81 81 else:
82 82 raise "syntax error at %d" % pos
83 83 pos += 1
84 84 yield ('end', None)
85 85
86 86 # helpers
87 87
88 88 def getstring(x, err):
89 89 if x[0] == 'string' or x[0] == 'symbol':
90 90 return x[1]
91 91 raise err
92 92
93 93 def getlist(x):
94 94 if not x:
95 95 return []
96 96 if x[0] == 'list':
97 97 return getlist(x[1]) + [x[2]]
98 98 return [x]
99 99
100 100 def getpair(x, err):
101 101 l = getlist(x)
102 102 if len(l) != 2:
103 103 raise err
104 104 return l
105 105
106 106 def getset(repo, subset, x):
107 107 if not x:
108 108 raise "missing argument"
109 109 return methods[x[0]](repo, subset, *x[1:])
110 110
111 111 # operator methods
112 112
113 113 def negate(repo, subset, x):
114 114 return getset(repo, subset,
115 115 ('string', '-' + getstring(x, "can't negate that")))
116 116
117 117 def stringset(repo, subset, x):
118 118 x = repo[x].rev()
119 119 if x in subset:
120 120 return [x]
121 121 return []
122 122
123 123 def symbolset(repo, subset, x):
124 124 if x in symbols:
125 125 raise "can't use %s here" % x
126 126 return stringset(repo, subset, x)
127 127
128 128 def rangeset(repo, subset, x, y):
129 129 m = getset(repo, subset, x)[0]
130 130 n = getset(repo, subset, y)[-1]
131 131 if m < n:
132 132 return range(m, n + 1)
133 133 return range(m, n - 1, -1)
134 134
135 def rangepreset(repo, subset, x):
136 return range(0, getset(repo, subset, x)[-1] + 1)
137
138 def rangepostset(repo, subset, x):
139 return range(getset(repo, subset, x)[0], len(repo))
140
141 def dagrangeset(repo, subset, x, y):
142 return andset(repo, subset,
143 ('func', ('symbol', 'descendants'), x),
144 ('func', ('symbol', 'ancestors'), y))
145
146 135 def andset(repo, subset, x, y):
147 if weight(x, True) > weight(y, True):
148 x, y = y, x
149 136 return getset(repo, getset(repo, subset, x), y)
150 137
151 138 def orset(repo, subset, x, y):
152 if weight(y, False) < weight(x, False):
153 x, y = y, x
154 139 s = set(getset(repo, subset, x))
155 140 s |= set(getset(repo, [r for r in subset if r not in s], y))
156 141 return [r for r in subset if r in s]
157 142
158 143 def notset(repo, subset, x):
159 144 s = set(getset(repo, subset, x))
160 145 return [r for r in subset if r not in s]
161 146
162 def minusset(repo, subset, x, y):
163 if weight(x, True) > weight(y, True):
164 return getset(repo, notset(repo, subset, y), x)
165 return notset(repo, getset(repo, subset, x), y)
166
167 147 def listset(repo, subset, a, b):
168 148 raise "can't use a list in this context"
169 149
170 150 def func(repo, subset, a, b):
171 151 if a[0] == 'symbol' and a[1] in symbols:
172 152 return symbols[a[1]](repo, subset, b)
173 153 raise "that's not a function: %s" % a[1]
174 154
175 155 # functions
176 156
177 157 def p1(repo, subset, x):
178 158 ps = set()
179 159 cl = repo.changelog
180 160 for r in getset(repo, subset, x):
181 161 ps.add(cl.parentrevs(r)[0])
182 162 return [r for r in subset if r in ps]
183 163
184 164 def p2(repo, subset, x):
185 165 ps = set()
186 166 cl = repo.changelog
187 167 for r in getset(repo, subset, x):
188 168 ps.add(cl.parentrevs(r)[1])
189 169 return [r for r in subset if r in ps]
190 170
191 171 def parents(repo, subset, x):
192 172 ps = set()
193 173 cl = repo.changelog
194 174 for r in getset(repo, subset, x):
195 175 ps.update(cl.parentrevs(r))
196 176 return [r for r in subset if r in ps]
197 177
198 178 def maxrev(repo, subset, x):
199 179 s = getset(repo, subset, x)
200 180 if s:
201 181 m = max(s)
202 182 if m in subset:
203 183 return [m]
204 184 return []
205 185
206 186 def limit(repo, subset, x):
207 187 l = getpair(x, "limit wants two args")
208 188 try:
209 189 lim = int(getstring(l[1], "limit wants a number"))
210 190 except ValueError:
211 191 raise "wants a number"
212 192 return getset(repo, subset, l[0])[:lim]
213 193
214 194 def children(repo, subset, x):
215 195 cs = set()
216 196 cl = repo.changelog
217 197 s = set(getset(repo, subset, x))
218 198 for r in xrange(0, len(repo)):
219 199 for p in cl.parentrevs(r):
220 200 if p in s:
221 201 cs.add(r)
222 202 return [r for r in subset if r in cs]
223 203
224 204 def branch(repo, subset, x):
225 205 s = getset(repo, range(len(repo)), x)
226 206 b = set()
227 207 for r in s:
228 208 b.add(repo[r].branch())
229 209 s = set(s)
230 210 return [r for r in subset if r in s or repo[r].branch() in b]
231 211
232 212 def ancestor(repo, subset, x):
233 213 l = getpair(x, "ancestor wants two args")
234 214 a = getset(repo, subset, l[0])
235 215 b = getset(repo, subset, l[1])
236 216 if len(a) > 1 or len(b) > 1:
237 217 raise "arguments to ancestor must be single revisions"
238 218 return [repo[a[0]].ancestor(repo[b[0]]).rev()]
239 219
240 220 def ancestors(repo, subset, x):
241 221 args = getset(repo, range(len(repo)), x)
242 222 s = set(repo.changelog.ancestors(*args)) | set(args)
243 223 return [r for r in subset if r in s]
244 224
245 225 def descendants(repo, subset, x):
246 226 args = getset(repo, range(len(repo)), x)
247 227 s = set(repo.changelog.descendants(*args)) | set(args)
248 228 return [r for r in subset if r in s]
249 229
250 230 def follow(repo, subset, x):
251 231 if x:
252 232 raise "follow takes no args"
253 233 p = repo['.'].rev()
254 234 s = set(repo.changelog.ancestors(p)) | set([p])
255 235 return [r for r in subset if r in s]
256 236
257 237 def date(repo, subset, x):
258 238 ds = getstring(x, 'date wants a string')
259 239 dm = util.matchdate(ds)
260 240 return [r for r in subset if dm(repo[r].date()[0])]
261 241
262 242 def keyword(repo, subset, x):
263 243 kw = getstring(x, "keyword wants a string").lower()
264 244 l = []
265 245 for r in subset:
266 246 c = repo[r]
267 247 t = " ".join(c.files() + [c.user(), c.description()])
268 248 if kw in t.lower():
269 249 l.append(r)
270 250 return l
271 251
272 252 def grep(repo, subset, x):
273 253 gr = re.compile(getstring(x, "grep wants a string"))
274 254 l = []
275 255 for r in subset:
276 256 c = repo[r]
277 257 for e in c.files() + [c.user(), c.description()]:
278 258 if gr.search(e):
279 259 l.append(r)
280 260 continue
281 261 return l
282 262
283 263 def author(repo, subset, x):
284 264 n = getstring(x, "author wants a string").lower()
285 265 return [r for r in subset if n in repo[r].user().lower()]
286 266
287 267 def hasfile(repo, subset, x):
288 268 pat = getstring(x, "file wants a pattern")
289 269 m = _match.match(repo.root, repo.getcwd(), [pat])
290 270 s = []
291 271 for r in subset:
292 272 for f in repo[r].files():
293 273 if m(f):
294 274 s.append(r)
295 275 continue
296 276 return s
297 277
298 278 def contains(repo, subset, x):
299 279 pat = getstring(x, "file wants a pattern")
300 280 m = _match.match(repo.root, repo.getcwd(), [pat])
301 281 s = []
302 282 if m.files() == [pat]:
303 283 for r in subset:
304 284 if pat in repo[r]:
305 285 s.append(r)
306 286 continue
307 287 else:
308 288 for r in subset:
309 289 c = repo[r]
310 290 for f in repo[r].manifest():
311 291 if m(f):
312 292 s.append(r)
313 293 continue
314 294 return s
315 295
316 296 def checkstatus(repo, subset, pat, field):
317 297 m = _match.match(repo.root, repo.getcwd(), [pat])
318 298 s = []
319 299 fast = (m.files() == [pat])
320 300 for r in subset:
321 301 c = repo[r]
322 302 if fast:
323 303 if pat not in c.files():
324 304 continue
325 305 else:
326 306 for f in c.files():
327 307 if m(f):
328 308 break
329 309 else:
330 310 continue
331 311 files = repo.status(c.p1().node(), c.node())[field]
332 312 if fast:
333 313 if pat in files:
334 314 s.append(r)
335 315 continue
336 316 else:
337 317 for f in files:
338 318 if m(f):
339 319 s.append(r)
340 320 continue
341 321 return s
342 322
343 323 def modifies(repo, subset, x):
344 324 pat = getstring(x, "modifies wants a pattern")
345 325 return checkstatus(repo, subset, pat, 0)
346 326
347 327 def adds(repo, subset, x):
348 328 pat = getstring(x, "adds wants a pattern")
349 329 return checkstatus(repo, subset, pat, 1)
350 330
351 331 def removes(repo, subset, x):
352 332 pat = getstring(x, "removes wants a pattern")
353 333 return checkstatus(repo, subset, pat, 2)
354 334
355 335 def merge(repo, subset, x):
356 336 if x:
357 337 raise "merge takes no args"
358 338 cl = repo.changelog
359 339 return [r for r in subset if cl.parentrevs(r)[1] != -1]
360 340
361 341 def closed(repo, subset, x):
362 342 return [r for r in subset if repo[r].extra('close')]
363 343
364 344 def head(repo, subset, x):
365 345 hs = set()
366 346 for b, ls in repo.branchmap().iteritems():
367 347 hs.update(repo[h].rev() for h in ls)
368 348 return [r for r in subset if r in hs]
369 349
370 350 def reverse(repo, subset, x):
371 351 l = getset(repo, subset, x)
372 352 l.reverse()
373 353 return l
374 354
375 355 def sort(repo, subset, x):
376 356 l = getlist(x)
377 357 keys = "rev"
378 358 if len(l) == 2:
379 359 keys = getstring(l[1], "sort spec must be a string")
380 360
381 361 s = l[0]
382 362 keys = keys.split()
383 363 l = []
384 364 def invert(s):
385 365 return "".join(chr(255 - ord(c)) for c in s)
386 366 for r in getset(repo, subset, s):
387 367 c = repo[r]
388 368 e = []
389 369 for k in keys:
390 370 if k == 'rev':
391 371 e.append(r)
392 372 elif k == '-rev':
393 373 e.append(-r)
394 374 elif k == 'branch':
395 375 e.append(c.branch())
396 376 elif k == '-branch':
397 377 e.append(invert(c.branch()))
398 378 elif k == 'desc':
399 379 e.append(c.description())
400 380 elif k == '-desc':
401 381 e.append(invert(c.description()))
402 382 elif k in 'user author':
403 383 e.append(c.user())
404 384 elif k in '-user -author':
405 385 e.append(invert(c.user()))
406 386 elif k == 'date':
407 387 e.append(c.date()[0])
408 388 elif k == '-date':
409 389 e.append(-c.date()[0])
410 390 else:
411 391 raise "unknown sort key %r" % k
412 392 e.append(r)
413 393 l.append(e)
414 394 l.sort()
415 395 return [e[-1] for e in l]
416 396
417 397 def getall(repo, subset, x):
418 398 return subset
419 399
420 400 def heads(repo, subset, x):
421 401 s = getset(repo, subset, x)
422 402 ps = set(parents(repo, subset, x))
423 403 return [r for r in s if r not in ps]
424 404
425 405 def roots(repo, subset, x):
426 406 s = getset(repo, subset, x)
427 407 cs = set(children(repo, subset, x))
428 408 return [r for r in s if r not in cs]
429 409
430 410 def outgoing(repo, subset, x):
431 411 l = getlist(x)
432 412 if len(l) == 1:
433 413 dest = getstring(l[0], "outgoing wants a repo path")
434 414 else:
435 415 dest = ''
436 416 dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
437 417 dest, branches = hg.parseurl(dest)
438 418 other = hg.repository(hg.remoteui(repo, {}), dest)
439 419 repo.ui.pushbuffer()
440 420 o = repo.findoutgoing(other)
441 421 repo.ui.popbuffer()
442 422 cl = repo.changelog
443 423 o = set([cl.rev(r) for r in repo.changelog.nodesbetween(o, None)[0]])
444 424 print 'out', dest, o
445 425 return [r for r in subset if r in o]
446 426
447 427 symbols = {
448 428 "ancestor": ancestor,
449 429 "ancestors": ancestors,
450 430 "descendants": descendants,
451 431 "follow": follow,
452 432 "merge": merge,
453 433 "reverse": reverse,
454 434 "sort": sort,
455 435 "branch": branch,
456 436 "keyword": keyword,
457 437 "author": author,
458 438 "user": author,
459 439 "date": date,
460 440 "grep": grep,
461 441 "p1": p1,
462 442 "p2": p2,
463 443 "parents": parents,
464 444 "children": children,
465 445 "max": maxrev,
466 446 "limit": limit,
467 447 "file": hasfile,
468 448 "contains": contains,
469 449 "heads": heads,
470 450 "roots": roots,
471 451 "all": getall,
472 452 "closed": closed,
473 453 "head": head,
474 454 "modifies": modifies,
475 455 "adds": adds,
476 456 "removes": removes,
477 457 "outgoing": outgoing,
478 458 }
479 459
480 460 methods = {
481 461 "negate": negate,
482 "minus": minusset,
483 462 "range": rangeset,
484 "rangepre": rangepreset,
485 "rangepost": rangepostset,
486 "dagrange": dagrangeset,
487 "dagrangepre": ancestors,
488 "dagrangepost": descendants,
489 463 "string": stringset,
490 464 "symbol": symbolset,
491 465 "and": andset,
492 466 "or": orset,
493 467 "not": notset,
494 468 "list": listset,
495 469 "func": func,
496 "group": lambda r, s, x: getset(r, s, x),
497 470 }
498 471
499 def weight(x, small):
472 def optimize(x, small):
473 if x == None:
474 return 0, x
475
500 476 smallbonus = 1
501 477 if small:
502 478 smallbonus = .5
503 479
504 480 op = x[0]
505 if op in 'string symbol negate':
506 return smallbonus # single revisions are small
481 if op == '-':
482 return optimize(('and', x[1], ('not', x[2])), small)
483 elif op == 'dagrange':
484 return optimize(('and', ('func', ('symbol', 'descendants'), x[1]),
485 ('func', ('symbol', 'ancestors'), x[2])), small)
486 elif op == 'dagrangepre':
487 return optimize(('func', ('symbol', 'ancestors'), x[1]), small)
488 elif op == 'dagrangepost':
489 return optimize(('func', ('symbol', 'descendants'), x[1]), small)
490 elif op == 'rangepre':
491 return optimize(('range', ('string', '0'), x[1]), small)
492 elif op == 'rangepost':
493 return optimize(('range', x[1], ('string', 'tip')), small)
494 elif op in 'string symbol negate':
495 return smallbonus, x # single revisions are small
507 496 elif op == 'and' or op == 'dagrange':
508 return min(weight(x[1], True), weight(x[2], True))
509 elif op in 'or -':
510 return max(weight(x[1], False), weight(x[2], False))
497 wa, ta = optimize(x[1], True)
498 wb, tb = optimize(x[2], True)
499 w = min(wa, wb)
500 if wa > wb:
501 return w, (op, tb, ta)
502 return w, (op, ta, tb)
503 elif op == 'or':
504 wa, ta = optimize(x[1], False)
505 wb, tb = optimize(x[2], False)
506 if wb < wa:
507 wb, wa = wa, wb
508 return max(wa, wb), (op, ta, tb)
511 509 elif op == 'not':
512 return weight(x[1], not small)
510 o = optimize(x[1], not small)
511 return o[0], (op, o[1])
513 512 elif op == 'group':
514 return weight(x[1], small)
515 elif op == 'range':
516 return weight(x[1], small) + weight(x[2], small)
513 return optimize(x[1], small)
514 elif op in 'rangepre rangepost dagrangepre dagrangepost':
515 wa, ta = optimize(x[1], small)
516 return wa + 1, (op, ta)
517 elif op in 'range list':
518 wa, ta = optimize(x[1], small)
519 wb, tb = optimize(x[2], small)
520 return wa + wb, (op, ta, tb)
517 521 elif op == 'func':
518 522 f = getstring(x[1], "not a symbol")
523 wa, ta = optimize(x[2], small)
519 524 if f in "grep date user author keyword branch file":
520 return 10 # slow
521 elif f in "modifies adds removes":
522 return 30 # slower
525 w = 10 # slow
526 elif f in "modifies adds removes outgoing":
527 w = 30 # slower
523 528 elif f == "contains":
524 return 100 # very slow
529 w = 100 # very slow
525 530 elif f == "ancestor":
526 return (weight(x[1][1], small) +
527 weight(x[1][2], small)) * smallbonus
531 w = 1 * smallbonus
528 532 elif f == "reverse limit":
529 return weight(x[1], small)
533 w = 0
530 534 elif f in "sort":
531 base = x[1]
532 spec = "rev"
533 if x[1][0] == 'list':
534 base = x[1][1]
535 spec = x[1][2]
536 return max(weight(base, small), 10)
535 w = 10 # assume most sorts look at changelog
537 536 else:
538 return 1
537 w = 1
538 return w + wa, (op, x[1], ta)
539 return 1, x
539 540
540 541 parse = parser.parser(tokenize, elements).parse
541 542
542 543 def match(spec):
543 544 tree = parse(spec)
545 weight, tree = optimize(tree, True)
544 546 def mfunc(repo, subset):
545 547 return getset(repo, subset, tree)
546 548 return mfunc
General Comments 0
You need to be logged in to leave comments. Login now