##// END OF EJS Templates
revset: raise ParseError exceptions
Matt Mackall -
r11289:4215ce51 default
parent child Browse files
Show More
@@ -1,88 +1,91 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.txt and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns type, value pairs
14 14 # elements is a mapping of types to binding strength, prefix and infix actions
15 15 # an action is a tree node name, a tree label, and an optional match
16 16 # __call__(program) parses program into a labelled tree
17 17
18 import error
19
18 20 class parser(object):
19 21 def __init__(self, tokenizer, elements, methods=None):
20 22 self._tokenizer = tokenizer
21 23 self._elements = elements
22 24 self._methods = methods
23 25 def _advance(self):
24 26 'advance the tokenizer'
25 27 t = self.current
26 28 try:
27 29 self.current = self._iter.next()
28 30 except StopIteration:
29 31 pass
30 32 return t
31 33 def _match(self, m):
32 34 'make sure the tokenizer matches an end condition'
33 35 if self.current[0] != m:
34 raise SyntaxError(self.current)
36 raise error.ParseError("unexpected token: %s" % self.current[2],
37 pos)
35 38 self._advance()
36 39 def _parse(self, bind=0):
37 token, value = self._advance()
40 token, value, pos = self._advance()
38 41 # handle prefix rules on current token
39 42 prefix = self._elements[token][1]
40 43 if not prefix:
41 raise SyntaxError("not a prefix: %s" % token)
44 raise error.ParseError("not a prefix: %s" % token, pos)
42 45 if len(prefix) == 1:
43 46 expr = (prefix[0], value)
44 47 else:
45 48 if len(prefix) > 2 and prefix[2] == self.current[0]:
46 49 self._match(prefix[2])
47 50 expr = (prefix[0], None)
48 51 else:
49 52 expr = (prefix[0], self._parse(prefix[1]))
50 53 if len(prefix) > 2:
51 54 self._match(prefix[2])
52 55 # gather tokens until we meet a lower binding strength
53 56 while bind < self._elements[self.current[0]][0]:
54 token, value = self._advance()
57 token, value, pos = self._advance()
55 58 e = self._elements[token]
56 59 # check for suffix - next token isn't a valid prefix
57 60 if len(e) == 4 and not self._elements[self.current[0]][1]:
58 61 suffix = e[3]
59 62 expr = (suffix[0], expr)
60 63 else:
61 64 # handle infix rules
62 65 infix = self._elements[token][2]
63 66 if len(infix) == 3 and infix[2] == self.current[0]:
64 67 self._match(infix[2])
65 68 expr = (infix[0], expr, (None))
66 69 else:
67 70 if not infix[0]:
68 raise SyntaxError("not an infix")
71 raise error.ParseError("not an infix: %s" % token, pos)
69 72 expr = (infix[0], expr, self._parse(infix[1]))
70 73 if len(infix) == 3:
71 74 self._match(infix[2])
72 75 return expr
73 76 def parse(self, message):
74 77 'generate a parse tree from a message'
75 78 self._iter = self._tokenizer(message)
76 79 self.current = self._iter.next()
77 80 return self._parse()
78 81 def eval(self, tree):
79 82 'recursively evaluate a parse tree using node methods'
80 83 if not isinstance(tree, tuple):
81 84 return tree
82 85 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
83 86 def __call__(self, message):
84 87 'parse a message into a parse tree and evaluate if methods given'
85 88 t = self.parse(message)
86 89 if self._methods:
87 90 return self.eval(t)
88 91 return t
@@ -1,553 +1,553 b''
1 1 # revset.py - revision set queries for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import re
9 import parser, util, hg
9 import parser, util, hg, error
10 10 import match as _match
11 11
12 12 elements = {
13 13 "(": (20, ("group", 1, ")"), ("func", 1, ")")),
14 14 "-": (19, ("negate", 19), ("minus", 19)),
15 15 "::": (17, ("dagrangepre", 17), ("dagrange", 17),
16 16 ("dagrangepost", 17)),
17 17 "..": (17, ("dagrangepre", 17), ("dagrange", 17),
18 18 ("dagrangepost", 17)),
19 19 ":": (15, ("rangepre", 15), ("range", 15), ("rangepost", 15)),
20 20 "not": (10, ("not", 10)),
21 21 "!": (10, ("not", 10)),
22 22 "and": (5, None, ("and", 5)),
23 23 "&": (5, None, ("and", 5)),
24 24 "or": (4, None, ("or", 4)),
25 25 "|": (4, None, ("or", 4)),
26 26 "+": (4, None, ("or", 4)),
27 27 ",": (2, None, ("list", 2)),
28 28 ")": (0, None, None),
29 29 "symbol": (0, ("symbol",), None),
30 30 "string": (0, ("string",), None),
31 31 "end": (0, None, None),
32 32 }
33 33
34 34 keywords = set(['and', 'or', 'not'])
35 35
36 36 def tokenize(program):
37 37 pos, l = 0, len(program)
38 38 while pos < l:
39 39 c = program[pos]
40 40 if c.isspace(): # skip inter-token whitespace
41 41 pass
42 42 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
43 yield ('::', None)
43 yield ('::', None, pos)
44 44 pos += 1 # skip ahead
45 45 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
46 yield ('..', None)
46 yield ('..', None, pos)
47 47 pos += 1 # skip ahead
48 48 elif c in "():,-|&+!": # handle simple operators
49 yield (c, None)
49 yield (c, None, pos)
50 50 elif c in '"\'': # handle quoted strings
51 51 pos += 1
52 52 s = pos
53 53 while pos < l: # find closing quote
54 54 d = program[pos]
55 55 if d == '\\': # skip over escaped characters
56 56 pos += 2
57 57 continue
58 58 if d == c:
59 yield ('string', program[s:pos].decode('string-escape'))
59 yield ('string', program[s:pos].decode('string-escape'), s)
60 60 break
61 61 pos += 1
62 62 else:
63 raise "unterminated string"
63 raise error.ParseError("unterminated string", s)
64 64 elif c.isalnum() or c in '.': # gather up a symbol/keyword
65 65 s = pos
66 66 pos += 1
67 67 while pos < l: # find end of symbol
68 68 d = program[pos]
69 69 if not (d.isalnum() or d in "._"):
70 70 break
71 71 if d == '.' and program[pos - 1] == '.': # special case for ..
72 72 pos -= 1
73 73 break
74 74 pos += 1
75 75 sym = program[s:pos]
76 76 if sym in keywords: # operator keywords
77 yield (sym, None)
77 yield (sym, None, s)
78 78 else:
79 yield ('symbol', sym)
79 yield ('symbol', sym, s)
80 80 pos -= 1
81 81 else:
82 raise "syntax error at %d" % pos
82 raise error.ParseError("syntax error", pos)
83 83 pos += 1
84 yield ('end', None)
84 yield ('end', None, pos)
85 85
86 86 # helpers
87 87
88 88 def getstring(x, err):
89 89 if x[0] == 'string' or x[0] == 'symbol':
90 90 return x[1]
91 raise err
91 raise error.ParseError(err)
92 92
93 93 def getlist(x):
94 94 if not x:
95 95 return []
96 96 if x[0] == 'list':
97 97 return getlist(x[1]) + [x[2]]
98 98 return [x]
99 99
100 100 def getpair(x, err):
101 101 l = getlist(x)
102 102 if len(l) != 2:
103 raise err
103 raise error.ParseError(err)
104 104 return l
105 105
106 106 def getset(repo, subset, x):
107 107 if not x:
108 raise "missing argument"
108 raise error.ParseError("missing argument")
109 109 return methods[x[0]](repo, subset, *x[1:])
110 110
111 111 # operator methods
112 112
113 113 def negate(repo, subset, x):
114 114 return getset(repo, subset,
115 115 ('string', '-' + getstring(x, "can't negate that")))
116 116
117 117 def stringset(repo, subset, x):
118 118 x = repo[x].rev()
119 119 if x == -1 and len(subset) == len(repo):
120 120 return [-1]
121 121 if x in subset:
122 122 return [x]
123 123 return []
124 124
125 125 def symbolset(repo, subset, x):
126 126 if x in symbols:
127 raise "can't use %s here" % x
127 raise error.ParseError("can't use %s here" % x)
128 128 return stringset(repo, subset, x)
129 129
130 130 def rangeset(repo, subset, x, y):
131 131 m = getset(repo, subset, x)[0]
132 132 n = getset(repo, subset, y)[-1]
133 133 if m < n:
134 134 return range(m, n + 1)
135 135 return range(m, n - 1, -1)
136 136
137 137 def andset(repo, subset, x, y):
138 138 return getset(repo, getset(repo, subset, x), y)
139 139
140 140 def orset(repo, subset, x, y):
141 141 s = set(getset(repo, subset, x))
142 142 s |= set(getset(repo, [r for r in subset if r not in s], y))
143 143 return [r for r in subset if r in s]
144 144
145 145 def notset(repo, subset, x):
146 146 s = set(getset(repo, subset, x))
147 147 return [r for r in subset if r not in s]
148 148
149 149 def listset(repo, subset, a, b):
150 raise "can't use a list in this context"
150 raise error.ParseError("can't use a list in this context")
151 151
152 152 def func(repo, subset, a, b):
153 153 if a[0] == 'symbol' and a[1] in symbols:
154 154 return symbols[a[1]](repo, subset, b)
155 raise "that's not a function: %s" % a[1]
155 raise error.ParseError("not a function: %s" % a[1])
156 156
157 157 # functions
158 158
159 159 def p1(repo, subset, x):
160 160 ps = set()
161 161 cl = repo.changelog
162 162 for r in getset(repo, subset, x):
163 163 ps.add(cl.parentrevs(r)[0])
164 164 return [r for r in subset if r in ps]
165 165
166 166 def p2(repo, subset, x):
167 167 ps = set()
168 168 cl = repo.changelog
169 169 for r in getset(repo, subset, x):
170 170 ps.add(cl.parentrevs(r)[1])
171 171 return [r for r in subset if r in ps]
172 172
173 173 def parents(repo, subset, x):
174 174 ps = set()
175 175 cl = repo.changelog
176 176 for r in getset(repo, subset, x):
177 177 ps.update(cl.parentrevs(r))
178 178 return [r for r in subset if r in ps]
179 179
180 180 def maxrev(repo, subset, x):
181 181 s = getset(repo, subset, x)
182 182 if s:
183 183 m = max(s)
184 184 if m in subset:
185 185 return [m]
186 186 return []
187 187
188 188 def limit(repo, subset, x):
189 189 l = getpair(x, "limit wants two args")
190 190 try:
191 191 lim = int(getstring(l[1], "limit wants a number"))
192 192 except ValueError:
193 raise "wants a number"
193 raise error.ParseError("limit expects a number")
194 194 return getset(repo, subset, l[0])[:lim]
195 195
196 196 def children(repo, subset, x):
197 197 cs = set()
198 198 cl = repo.changelog
199 199 s = set(getset(repo, subset, x))
200 200 for r in xrange(0, len(repo)):
201 201 for p in cl.parentrevs(r):
202 202 if p in s:
203 203 cs.add(r)
204 204 return [r for r in subset if r in cs]
205 205
206 206 def branch(repo, subset, x):
207 207 s = getset(repo, range(len(repo)), x)
208 208 b = set()
209 209 for r in s:
210 210 b.add(repo[r].branch())
211 211 s = set(s)
212 212 return [r for r in subset if r in s or repo[r].branch() in b]
213 213
214 214 def ancestor(repo, subset, x):
215 215 l = getpair(x, "ancestor wants two args")
216 216 a = getset(repo, subset, l[0])
217 217 b = getset(repo, subset, l[1])
218 218 if len(a) > 1 or len(b) > 1:
219 raise "arguments to ancestor must be single revisions"
219 raise error.ParseError("ancestor args must be single revisions")
220 220 return [repo[a[0]].ancestor(repo[b[0]]).rev()]
221 221
222 222 def ancestors(repo, subset, x):
223 223 args = getset(repo, range(len(repo)), x)
224 224 s = set(repo.changelog.ancestors(*args)) | set(args)
225 225 return [r for r in subset if r in s]
226 226
227 227 def descendants(repo, subset, x):
228 228 args = getset(repo, range(len(repo)), x)
229 229 s = set(repo.changelog.descendants(*args)) | set(args)
230 230 return [r for r in subset if r in s]
231 231
232 232 def follow(repo, subset, x):
233 233 if x:
234 raise "follow takes no args"
234 raise error.ParseError("follow takes no args")
235 235 p = repo['.'].rev()
236 236 s = set(repo.changelog.ancestors(p)) | set([p])
237 237 return [r for r in subset if r in s]
238 238
239 239 def date(repo, subset, x):
240 240 ds = getstring(x, 'date wants a string')
241 241 dm = util.matchdate(ds)
242 242 return [r for r in subset if dm(repo[r].date()[0])]
243 243
244 244 def keyword(repo, subset, x):
245 245 kw = getstring(x, "keyword wants a string").lower()
246 246 l = []
247 247 for r in subset:
248 248 c = repo[r]
249 249 t = " ".join(c.files() + [c.user(), c.description()])
250 250 if kw in t.lower():
251 251 l.append(r)
252 252 return l
253 253
254 254 def grep(repo, subset, x):
255 255 gr = re.compile(getstring(x, "grep wants a string"))
256 256 l = []
257 257 for r in subset:
258 258 c = repo[r]
259 259 for e in c.files() + [c.user(), c.description()]:
260 260 if gr.search(e):
261 261 l.append(r)
262 262 continue
263 263 return l
264 264
265 265 def author(repo, subset, x):
266 266 n = getstring(x, "author wants a string").lower()
267 267 return [r for r in subset if n in repo[r].user().lower()]
268 268
269 269 def hasfile(repo, subset, x):
270 270 pat = getstring(x, "file wants a pattern")
271 271 m = _match.match(repo.root, repo.getcwd(), [pat])
272 272 s = []
273 273 for r in subset:
274 274 for f in repo[r].files():
275 275 if m(f):
276 276 s.append(r)
277 277 continue
278 278 return s
279 279
280 280 def contains(repo, subset, x):
281 281 pat = getstring(x, "file wants a pattern")
282 282 m = _match.match(repo.root, repo.getcwd(), [pat])
283 283 s = []
284 284 if m.files() == [pat]:
285 285 for r in subset:
286 286 if pat in repo[r]:
287 287 s.append(r)
288 288 continue
289 289 else:
290 290 for r in subset:
291 291 c = repo[r]
292 292 for f in repo[r].manifest():
293 293 if m(f):
294 294 s.append(r)
295 295 continue
296 296 return s
297 297
298 298 def checkstatus(repo, subset, pat, field):
299 299 m = _match.match(repo.root, repo.getcwd(), [pat])
300 300 s = []
301 301 fast = (m.files() == [pat])
302 302 for r in subset:
303 303 c = repo[r]
304 304 if fast:
305 305 if pat not in c.files():
306 306 continue
307 307 else:
308 308 for f in c.files():
309 309 if m(f):
310 310 break
311 311 else:
312 312 continue
313 313 files = repo.status(c.p1().node(), c.node())[field]
314 314 if fast:
315 315 if pat in files:
316 316 s.append(r)
317 317 continue
318 318 else:
319 319 for f in files:
320 320 if m(f):
321 321 s.append(r)
322 322 continue
323 323 return s
324 324
325 325 def modifies(repo, subset, x):
326 326 pat = getstring(x, "modifies wants a pattern")
327 327 return checkstatus(repo, subset, pat, 0)
328 328
329 329 def adds(repo, subset, x):
330 330 pat = getstring(x, "adds wants a pattern")
331 331 return checkstatus(repo, subset, pat, 1)
332 332
333 333 def removes(repo, subset, x):
334 334 pat = getstring(x, "removes wants a pattern")
335 335 return checkstatus(repo, subset, pat, 2)
336 336
337 337 def merge(repo, subset, x):
338 338 if x:
339 raise "merge takes no args"
339 raise error.ParseError("merge takes no args")
340 340 cl = repo.changelog
341 341 return [r for r in subset if cl.parentrevs(r)[1] != -1]
342 342
343 343 def closed(repo, subset, x):
344 344 return [r for r in subset if repo[r].extra('close')]
345 345
346 346 def head(repo, subset, x):
347 347 hs = set()
348 348 for b, ls in repo.branchmap().iteritems():
349 349 hs.update(repo[h].rev() for h in ls)
350 350 return [r for r in subset if r in hs]
351 351
352 352 def reverse(repo, subset, x):
353 353 l = getset(repo, subset, x)
354 354 l.reverse()
355 355 return l
356 356
357 357 def sort(repo, subset, x):
358 358 l = getlist(x)
359 359 keys = "rev"
360 360 if len(l) == 2:
361 361 keys = getstring(l[1], "sort spec must be a string")
362 362
363 363 s = l[0]
364 364 keys = keys.split()
365 365 l = []
366 366 def invert(s):
367 367 return "".join(chr(255 - ord(c)) for c in s)
368 368 for r in getset(repo, subset, s):
369 369 c = repo[r]
370 370 e = []
371 371 for k in keys:
372 372 if k == 'rev':
373 373 e.append(r)
374 374 elif k == '-rev':
375 375 e.append(-r)
376 376 elif k == 'branch':
377 377 e.append(c.branch())
378 378 elif k == '-branch':
379 379 e.append(invert(c.branch()))
380 380 elif k == 'desc':
381 381 e.append(c.description())
382 382 elif k == '-desc':
383 383 e.append(invert(c.description()))
384 384 elif k in 'user author':
385 385 e.append(c.user())
386 386 elif k in '-user -author':
387 387 e.append(invert(c.user()))
388 388 elif k == 'date':
389 389 e.append(c.date()[0])
390 390 elif k == '-date':
391 391 e.append(-c.date()[0])
392 392 else:
393 raise "unknown sort key %r" % k
393 raise error.ParseError("unknown sort key %r" % k)
394 394 e.append(r)
395 395 l.append(e)
396 396 l.sort()
397 397 return [e[-1] for e in l]
398 398
399 399 def getall(repo, subset, x):
400 400 return subset
401 401
402 402 def heads(repo, subset, x):
403 403 s = getset(repo, subset, x)
404 404 ps = set(parents(repo, subset, x))
405 405 return [r for r in s if r not in ps]
406 406
407 407 def roots(repo, subset, x):
408 408 s = getset(repo, subset, x)
409 409 cs = set(children(repo, subset, x))
410 410 return [r for r in s if r not in cs]
411 411
412 412 def outgoing(repo, subset, x):
413 413 l = getlist(x)
414 414 if len(l) == 1:
415 415 dest = getstring(l[0], "outgoing wants a repo path")
416 416 else:
417 417 dest = ''
418 418 dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
419 419 dest, branches = hg.parseurl(dest)
420 420 other = hg.repository(hg.remoteui(repo, {}), dest)
421 421 repo.ui.pushbuffer()
422 422 o = repo.findoutgoing(other)
423 423 repo.ui.popbuffer()
424 424 cl = repo.changelog
425 425 o = set([cl.rev(r) for r in repo.changelog.nodesbetween(o, None)[0]])
426 426 print 'out', dest, o
427 427 return [r for r in subset if r in o]
428 428
429 429 def tagged(repo, subset, x):
430 430 cl = repo.changelog
431 431 s = set([cl.rev(n) for t, n in repo.tagslist() if t != 'tip'])
432 432 return [r for r in subset if r in s]
433 433
434 434 symbols = {
435 435 "adds": adds,
436 436 "all": getall,
437 437 "ancestor": ancestor,
438 438 "ancestors": ancestors,
439 439 "author": author,
440 440 "branch": branch,
441 441 "children": children,
442 442 "closed": closed,
443 443 "contains": contains,
444 444 "date": date,
445 445 "descendants": descendants,
446 446 "file": hasfile,
447 447 "follow": follow,
448 448 "grep": grep,
449 449 "head": head,
450 450 "heads": heads,
451 451 "keyword": keyword,
452 452 "limit": limit,
453 453 "max": maxrev,
454 454 "merge": merge,
455 455 "modifies": modifies,
456 456 "outgoing": outgoing,
457 457 "p1": p1,
458 458 "p2": p2,
459 459 "parents": parents,
460 460 "removes": removes,
461 461 "reverse": reverse,
462 462 "roots": roots,
463 463 "sort": sort,
464 464 "tagged": tagged,
465 465 "user": author,
466 466 }
467 467
468 468 methods = {
469 469 "negate": negate,
470 470 "range": rangeset,
471 471 "string": stringset,
472 472 "symbol": symbolset,
473 473 "and": andset,
474 474 "or": orset,
475 475 "not": notset,
476 476 "list": listset,
477 477 "func": func,
478 478 }
479 479
480 480 def optimize(x, small):
481 481 if x == None:
482 482 return 0, x
483 483
484 484 smallbonus = 1
485 485 if small:
486 486 smallbonus = .5
487 487
488 488 op = x[0]
489 489 if op == 'minus':
490 490 return optimize(('and', x[1], ('not', x[2])), small)
491 491 elif op == 'dagrange':
492 492 return optimize(('and', ('func', ('symbol', 'descendants'), x[1]),
493 493 ('func', ('symbol', 'ancestors'), x[2])), small)
494 494 elif op == 'dagrangepre':
495 495 return optimize(('func', ('symbol', 'ancestors'), x[1]), small)
496 496 elif op == 'dagrangepost':
497 497 return optimize(('func', ('symbol', 'descendants'), x[1]), small)
498 498 elif op == 'rangepre':
499 499 return optimize(('range', ('string', '0'), x[1]), small)
500 500 elif op == 'rangepost':
501 501 return optimize(('range', x[1], ('string', 'tip')), small)
502 502 elif op in 'string symbol negate':
503 503 return smallbonus, x # single revisions are small
504 504 elif op == 'and' or op == 'dagrange':
505 505 wa, ta = optimize(x[1], True)
506 506 wb, tb = optimize(x[2], True)
507 507 w = min(wa, wb)
508 508 if wa > wb:
509 509 return w, (op, tb, ta)
510 510 return w, (op, ta, tb)
511 511 elif op == 'or':
512 512 wa, ta = optimize(x[1], False)
513 513 wb, tb = optimize(x[2], False)
514 514 if wb < wa:
515 515 wb, wa = wa, wb
516 516 return max(wa, wb), (op, ta, tb)
517 517 elif op == 'not':
518 518 o = optimize(x[1], not small)
519 519 return o[0], (op, o[1])
520 520 elif op == 'group':
521 521 return optimize(x[1], small)
522 522 elif op in 'range list':
523 523 wa, ta = optimize(x[1], small)
524 524 wb, tb = optimize(x[2], small)
525 525 return wa + wb, (op, ta, tb)
526 526 elif op == 'func':
527 527 f = getstring(x[1], "not a symbol")
528 528 wa, ta = optimize(x[2], small)
529 529 if f in "grep date user author keyword branch file":
530 530 w = 10 # slow
531 531 elif f in "modifies adds removes outgoing":
532 532 w = 30 # slower
533 533 elif f == "contains":
534 534 w = 100 # very slow
535 535 elif f == "ancestor":
536 536 w = 1 * smallbonus
537 537 elif f == "reverse limit":
538 538 w = 0
539 539 elif f in "sort":
540 540 w = 10 # assume most sorts look at changelog
541 541 else:
542 542 w = 1
543 543 return w + wa, (op, x[1], ta)
544 544 return 1, x
545 545
546 546 parse = parser.parser(tokenize, elements).parse
547 547
548 548 def match(spec):
549 549 tree = parse(spec)
550 550 weight, tree = optimize(tree, True)
551 551 def mfunc(repo, subset):
552 552 return getset(repo, subset, tree)
553 553 return mfunc
General Comments 0
You need to be logged in to leave comments. Login now