##// END OF EJS Templates
revsetlang: build optimized tree by helper function...
Yuya Nishihara -
r34046:b862e6fc default
parent child Browse files
Show More
@@ -1,671 +1,688 b''
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 util,
18 util,
19 )
19 )
20
20
21 elements = {
21 elements = {
22 # token-type: binding-strength, primary, prefix, infix, suffix
22 # token-type: binding-strength, primary, prefix, infix, suffix
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
25 "#": (21, None, None, ("relation", 21), None),
25 "#": (21, None, None, ("relation", 21), None),
26 "##": (20, None, None, ("_concat", 20), None),
26 "##": (20, None, None, ("_concat", 20), None),
27 "~": (18, None, None, ("ancestor", 18), None),
27 "~": (18, None, None, ("ancestor", 18), None),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
33 "not": (10, None, ("not", 10), None, None),
33 "not": (10, None, ("not", 10), None, None),
34 "!": (10, None, ("not", 10), None, None),
34 "!": (10, None, ("not", 10), None, None),
35 "and": (5, None, None, ("and", 5), None),
35 "and": (5, None, None, ("and", 5), None),
36 "&": (5, None, None, ("and", 5), None),
36 "&": (5, None, None, ("and", 5), None),
37 "%": (5, None, None, ("only", 5), "onlypost"),
37 "%": (5, None, None, ("only", 5), "onlypost"),
38 "or": (4, None, None, ("or", 4), None),
38 "or": (4, None, None, ("or", 4), None),
39 "|": (4, None, None, ("or", 4), None),
39 "|": (4, None, None, ("or", 4), None),
40 "+": (4, None, None, ("or", 4), None),
40 "+": (4, None, None, ("or", 4), None),
41 "=": (3, None, None, ("keyvalue", 3), None),
41 "=": (3, None, None, ("keyvalue", 3), None),
42 ",": (2, None, None, ("list", 2), None),
42 ",": (2, None, None, ("list", 2), None),
43 ")": (0, None, None, None, None),
43 ")": (0, None, None, None, None),
44 "]": (0, None, None, None, None),
44 "]": (0, None, None, None, None),
45 "symbol": (0, "symbol", None, None, None),
45 "symbol": (0, "symbol", None, None, None),
46 "string": (0, "string", None, None, None),
46 "string": (0, "string", None, None, None),
47 "end": (0, None, None, None, None),
47 "end": (0, None, None, None, None),
48 }
48 }
49
49
50 keywords = {'and', 'or', 'not'}
50 keywords = {'and', 'or', 'not'}
51
51
52 _quoteletters = {'"', "'"}
52 _quoteletters = {'"', "'"}
53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
54
54
55 # default set of valid characters for the initial letter of symbols
55 # default set of valid characters for the initial letter of symbols
56 _syminitletters = set(pycompat.iterbytestr(
56 _syminitletters = set(pycompat.iterbytestr(
57 string.ascii_letters.encode('ascii') +
57 string.ascii_letters.encode('ascii') +
58 string.digits.encode('ascii') +
58 string.digits.encode('ascii') +
59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
60
60
61 # default set of valid characters for non-initial letters of symbols
61 # default set of valid characters for non-initial letters of symbols
62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
63
63
64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
65 '''
65 '''
66 Parse a revset statement into a stream of tokens
66 Parse a revset statement into a stream of tokens
67
67
68 ``syminitletters`` is the set of valid characters for the initial
68 ``syminitletters`` is the set of valid characters for the initial
69 letter of symbols.
69 letter of symbols.
70
70
71 By default, character ``c`` is recognized as valid for initial
71 By default, character ``c`` is recognized as valid for initial
72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
73
73
74 ``symletters`` is the set of valid characters for non-initial
74 ``symletters`` is the set of valid characters for non-initial
75 letters of symbols.
75 letters of symbols.
76
76
77 By default, character ``c`` is recognized as valid for non-initial
77 By default, character ``c`` is recognized as valid for non-initial
78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
79
79
80 Check that @ is a valid unquoted token character (issue3686):
80 Check that @ is a valid unquoted token character (issue3686):
81 >>> list(tokenize("@::"))
81 >>> list(tokenize("@::"))
82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
83
83
84 '''
84 '''
85 program = pycompat.bytestr(program)
85 program = pycompat.bytestr(program)
86 if syminitletters is None:
86 if syminitletters is None:
87 syminitletters = _syminitletters
87 syminitletters = _syminitletters
88 if symletters is None:
88 if symletters is None:
89 symletters = _symletters
89 symletters = _symletters
90
90
91 if program and lookup:
91 if program and lookup:
92 # attempt to parse old-style ranges first to deal with
92 # attempt to parse old-style ranges first to deal with
93 # things like old-tag which contain query metacharacters
93 # things like old-tag which contain query metacharacters
94 parts = program.split(':', 1)
94 parts = program.split(':', 1)
95 if all(lookup(sym) for sym in parts if sym):
95 if all(lookup(sym) for sym in parts if sym):
96 if parts[0]:
96 if parts[0]:
97 yield ('symbol', parts[0], 0)
97 yield ('symbol', parts[0], 0)
98 if len(parts) > 1:
98 if len(parts) > 1:
99 s = len(parts[0])
99 s = len(parts[0])
100 yield (':', None, s)
100 yield (':', None, s)
101 if parts[1]:
101 if parts[1]:
102 yield ('symbol', parts[1], s + 1)
102 yield ('symbol', parts[1], s + 1)
103 yield ('end', None, len(program))
103 yield ('end', None, len(program))
104 return
104 return
105
105
106 pos, l = 0, len(program)
106 pos, l = 0, len(program)
107 while pos < l:
107 while pos < l:
108 c = program[pos]
108 c = program[pos]
109 if c.isspace(): # skip inter-token whitespace
109 if c.isspace(): # skip inter-token whitespace
110 pass
110 pass
111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
112 yield ('::', None, pos)
112 yield ('::', None, pos)
113 pos += 1 # skip ahead
113 pos += 1 # skip ahead
114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
115 yield ('..', None, pos)
115 yield ('..', None, pos)
116 pos += 1 # skip ahead
116 pos += 1 # skip ahead
117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
118 yield ('##', None, pos)
118 yield ('##', None, pos)
119 pos += 1 # skip ahead
119 pos += 1 # skip ahead
120 elif c in _simpleopletters: # handle simple operators
120 elif c in _simpleopletters: # handle simple operators
121 yield (c, None, pos)
121 yield (c, None, pos)
122 elif (c in _quoteletters or c == 'r' and
122 elif (c in _quoteletters or c == 'r' and
123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
124 if c == 'r':
124 if c == 'r':
125 pos += 1
125 pos += 1
126 c = program[pos]
126 c = program[pos]
127 decode = lambda x: x
127 decode = lambda x: x
128 else:
128 else:
129 decode = parser.unescapestr
129 decode = parser.unescapestr
130 pos += 1
130 pos += 1
131 s = pos
131 s = pos
132 while pos < l: # find closing quote
132 while pos < l: # find closing quote
133 d = program[pos]
133 d = program[pos]
134 if d == '\\': # skip over escaped characters
134 if d == '\\': # skip over escaped characters
135 pos += 2
135 pos += 2
136 continue
136 continue
137 if d == c:
137 if d == c:
138 yield ('string', decode(program[s:pos]), s)
138 yield ('string', decode(program[s:pos]), s)
139 break
139 break
140 pos += 1
140 pos += 1
141 else:
141 else:
142 raise error.ParseError(_("unterminated string"), s)
142 raise error.ParseError(_("unterminated string"), s)
143 # gather up a symbol/keyword
143 # gather up a symbol/keyword
144 elif c in syminitletters:
144 elif c in syminitletters:
145 s = pos
145 s = pos
146 pos += 1
146 pos += 1
147 while pos < l: # find end of symbol
147 while pos < l: # find end of symbol
148 d = program[pos]
148 d = program[pos]
149 if d not in symletters:
149 if d not in symletters:
150 break
150 break
151 if d == '.' and program[pos - 1] == '.': # special case for ..
151 if d == '.' and program[pos - 1] == '.': # special case for ..
152 pos -= 1
152 pos -= 1
153 break
153 break
154 pos += 1
154 pos += 1
155 sym = program[s:pos]
155 sym = program[s:pos]
156 if sym in keywords: # operator keywords
156 if sym in keywords: # operator keywords
157 yield (sym, None, s)
157 yield (sym, None, s)
158 elif '-' in sym:
158 elif '-' in sym:
159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
160 if lookup and lookup(sym):
160 if lookup and lookup(sym):
161 # looks like a real symbol
161 # looks like a real symbol
162 yield ('symbol', sym, s)
162 yield ('symbol', sym, s)
163 else:
163 else:
164 # looks like an expression
164 # looks like an expression
165 parts = sym.split('-')
165 parts = sym.split('-')
166 for p in parts[:-1]:
166 for p in parts[:-1]:
167 if p: # possible consecutive -
167 if p: # possible consecutive -
168 yield ('symbol', p, s)
168 yield ('symbol', p, s)
169 s += len(p)
169 s += len(p)
170 yield ('-', None, pos)
170 yield ('-', None, pos)
171 s += 1
171 s += 1
172 if parts[-1]: # possible trailing -
172 if parts[-1]: # possible trailing -
173 yield ('symbol', parts[-1], s)
173 yield ('symbol', parts[-1], s)
174 else:
174 else:
175 yield ('symbol', sym, s)
175 yield ('symbol', sym, s)
176 pos -= 1
176 pos -= 1
177 else:
177 else:
178 raise error.ParseError(_("syntax error in revset '%s'") %
178 raise error.ParseError(_("syntax error in revset '%s'") %
179 program, pos)
179 program, pos)
180 pos += 1
180 pos += 1
181 yield ('end', None, pos)
181 yield ('end', None, pos)
182
182
183 # helpers
183 # helpers
184
184
185 _notset = object()
185 _notset = object()
186
186
187 def getsymbol(x):
187 def getsymbol(x):
188 if x and x[0] == 'symbol':
188 if x and x[0] == 'symbol':
189 return x[1]
189 return x[1]
190 raise error.ParseError(_('not a symbol'))
190 raise error.ParseError(_('not a symbol'))
191
191
192 def getstring(x, err):
192 def getstring(x, err):
193 if x and (x[0] == 'string' or x[0] == 'symbol'):
193 if x and (x[0] == 'string' or x[0] == 'symbol'):
194 return x[1]
194 return x[1]
195 raise error.ParseError(err)
195 raise error.ParseError(err)
196
196
197 def getinteger(x, err, default=_notset):
197 def getinteger(x, err, default=_notset):
198 if not x and default is not _notset:
198 if not x and default is not _notset:
199 return default
199 return default
200 try:
200 try:
201 return int(getstring(x, err))
201 return int(getstring(x, err))
202 except ValueError:
202 except ValueError:
203 raise error.ParseError(err)
203 raise error.ParseError(err)
204
204
205 def getboolean(x, err):
205 def getboolean(x, err):
206 value = util.parsebool(getsymbol(x))
206 value = util.parsebool(getsymbol(x))
207 if value is not None:
207 if value is not None:
208 return value
208 return value
209 raise error.ParseError(err)
209 raise error.ParseError(err)
210
210
211 def getlist(x):
211 def getlist(x):
212 if not x:
212 if not x:
213 return []
213 return []
214 if x[0] == 'list':
214 if x[0] == 'list':
215 return list(x[1:])
215 return list(x[1:])
216 return [x]
216 return [x]
217
217
218 def getrange(x, err):
218 def getrange(x, err):
219 if not x:
219 if not x:
220 raise error.ParseError(err)
220 raise error.ParseError(err)
221 op = x[0]
221 op = x[0]
222 if op == 'range':
222 if op == 'range':
223 return x[1], x[2]
223 return x[1], x[2]
224 elif op == 'rangepre':
224 elif op == 'rangepre':
225 return None, x[1]
225 return None, x[1]
226 elif op == 'rangepost':
226 elif op == 'rangepost':
227 return x[1], None
227 return x[1], None
228 elif op == 'rangeall':
228 elif op == 'rangeall':
229 return None, None
229 return None, None
230 raise error.ParseError(err)
230 raise error.ParseError(err)
231
231
232 def getargs(x, min, max, err):
232 def getargs(x, min, max, err):
233 l = getlist(x)
233 l = getlist(x)
234 if len(l) < min or (max >= 0 and len(l) > max):
234 if len(l) < min or (max >= 0 and len(l) > max):
235 raise error.ParseError(err)
235 raise error.ParseError(err)
236 return l
236 return l
237
237
238 def getargsdict(x, funcname, keys):
238 def getargsdict(x, funcname, keys):
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
240 keyvaluenode='keyvalue', keynode='symbol')
240 keyvaluenode='keyvalue', keynode='symbol')
241
241
242 # cache of {spec: raw parsed tree} built internally
243 _treecache = {}
244
245 def _cachedtree(spec):
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
247 tree = _treecache.get(spec)
248 if tree is None:
249 _treecache[spec] = tree = parse(spec)
250 return tree
251
252 def _build(tmplspec, *repls):
253 """Create raw parsed tree from a template revset statement
254
255 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
257 """
258 template = _cachedtree(tmplspec)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
260
242 def _isnamedfunc(x, funcname):
261 def _isnamedfunc(x, funcname):
243 """Check if given tree matches named function"""
262 """Check if given tree matches named function"""
244 return x and x[0] == 'func' and getsymbol(x[1]) == funcname
263 return x and x[0] == 'func' and getsymbol(x[1]) == funcname
245
264
246 def _isposargs(x, n):
265 def _isposargs(x, n):
247 """Check if given tree is n-length list of positional arguments"""
266 """Check if given tree is n-length list of positional arguments"""
248 l = getlist(x)
267 l = getlist(x)
249 return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)
268 return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)
250
269
251 def _matchnamedfunc(x, funcname):
270 def _matchnamedfunc(x, funcname):
252 """Return args tree if given tree matches named function; otherwise None
271 """Return args tree if given tree matches named function; otherwise None
253
272
254 This can't be used for testing a nullary function since its args tree
273 This can't be used for testing a nullary function since its args tree
255 is also None. Use _isnamedfunc() instead.
274 is also None. Use _isnamedfunc() instead.
256 """
275 """
257 if not _isnamedfunc(x, funcname):
276 if not _isnamedfunc(x, funcname):
258 return
277 return
259 return x[2]
278 return x[2]
260
279
261 def _matchonly(revs, bases):
280 def _matchonly(revs, bases):
262 """
281 """
263 >>> f = lambda *args: _matchonly(*map(parse, args))
282 >>> f = lambda *args: _matchonly(*map(parse, args))
264 >>> f('ancestors(A)', 'not ancestors(B)')
283 >>> f('ancestors(A)', 'not ancestors(B)')
265 ('list', ('symbol', 'A'), ('symbol', 'B'))
284 ('list', ('symbol', 'A'), ('symbol', 'B'))
266 """
285 """
267 ta = _matchnamedfunc(revs, 'ancestors')
286 ta = _matchnamedfunc(revs, 'ancestors')
268 tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')
287 tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')
269 if _isposargs(ta, 1) and _isposargs(tb, 1):
288 if _isposargs(ta, 1) and _isposargs(tb, 1):
270 return ('list', ta, tb)
289 return ('list', ta, tb)
271
290
272 def _fixops(x):
291 def _fixops(x):
273 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
292 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
274 handled well by our simple top-down parser"""
293 handled well by our simple top-down parser"""
275 if not isinstance(x, tuple):
294 if not isinstance(x, tuple):
276 return x
295 return x
277
296
278 op = x[0]
297 op = x[0]
279 if op == 'parent':
298 if op == 'parent':
280 # x^:y means (x^) : y, not x ^ (:y)
299 # x^:y means (x^) : y, not x ^ (:y)
281 # x^: means (x^) :, not x ^ (:)
300 # x^: means (x^) :, not x ^ (:)
282 post = ('parentpost', x[1])
301 post = ('parentpost', x[1])
283 if x[2][0] == 'dagrangepre':
302 if x[2][0] == 'dagrangepre':
284 return _fixops(('dagrange', post, x[2][1]))
303 return _fixops(('dagrange', post, x[2][1]))
285 elif x[2][0] == 'rangepre':
304 elif x[2][0] == 'rangepre':
286 return _fixops(('range', post, x[2][1]))
305 return _fixops(('range', post, x[2][1]))
287 elif x[2][0] == 'rangeall':
306 elif x[2][0] == 'rangeall':
288 return _fixops(('rangepost', post))
307 return _fixops(('rangepost', post))
289 elif op == 'or':
308 elif op == 'or':
290 # make number of arguments deterministic:
309 # make number of arguments deterministic:
291 # x + y + z -> (or x y z) -> (or (list x y z))
310 # x + y + z -> (or x y z) -> (or (list x y z))
292 return (op, _fixops(('list',) + x[1:]))
311 return (op, _fixops(('list',) + x[1:]))
293 elif op == 'subscript' and x[1][0] == 'relation':
312 elif op == 'subscript' and x[1][0] == 'relation':
294 # x#y[z] ternary
313 # x#y[z] ternary
295 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
314 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
296
315
297 return (op,) + tuple(_fixops(y) for y in x[1:])
316 return (op,) + tuple(_fixops(y) for y in x[1:])
298
317
299 def _analyze(x):
318 def _analyze(x):
300 if x is None:
319 if x is None:
301 return x
320 return x
302
321
303 op = x[0]
322 op = x[0]
304 if op == 'minus':
323 if op == 'minus':
305 return _analyze(('and', x[1], ('not', x[2])))
324 return _analyze(_build('_ and not _', *x[1:]))
306 elif op == 'only':
325 elif op == 'only':
307 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
326 return _analyze(_build('only(_, _)', *x[1:]))
308 return _analyze(t)
309 elif op == 'onlypost':
327 elif op == 'onlypost':
310 return _analyze(('func', ('symbol', 'only'), x[1]))
328 return _analyze(_build('only(_)', x[1]))
311 elif op == 'dagrangepre':
329 elif op == 'dagrangepre':
312 return _analyze(('func', ('symbol', 'ancestors'), x[1]))
330 return _analyze(_build('ancestors(_)', x[1]))
313 elif op == 'dagrangepost':
331 elif op == 'dagrangepost':
314 return _analyze(('func', ('symbol', 'descendants'), x[1]))
332 return _analyze(_build('descendants(_)', x[1]))
315 elif op == 'negate':
333 elif op == 'negate':
316 s = getstring(x[1], _("can't negate that"))
334 s = getstring(x[1], _("can't negate that"))
317 return _analyze(('string', '-' + s))
335 return _analyze(('string', '-' + s))
318 elif op in ('string', 'symbol'):
336 elif op in ('string', 'symbol'):
319 return x
337 return x
320 elif op == 'rangeall':
338 elif op == 'rangeall':
321 return (op, None)
339 return (op, None)
322 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
340 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
323 return (op, _analyze(x[1]))
341 return (op, _analyze(x[1]))
324 elif op == 'group':
342 elif op == 'group':
325 return _analyze(x[1])
343 return _analyze(x[1])
326 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
344 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
327 'subscript'}:
345 'subscript'}:
328 ta = _analyze(x[1])
346 ta = _analyze(x[1])
329 tb = _analyze(x[2])
347 tb = _analyze(x[2])
330 return (op, ta, tb)
348 return (op, ta, tb)
331 elif op == 'relsubscript':
349 elif op == 'relsubscript':
332 ta = _analyze(x[1])
350 ta = _analyze(x[1])
333 tb = _analyze(x[2])
351 tb = _analyze(x[2])
334 tc = _analyze(x[3])
352 tc = _analyze(x[3])
335 return (op, ta, tb, tc)
353 return (op, ta, tb, tc)
336 elif op == 'list':
354 elif op == 'list':
337 return (op,) + tuple(_analyze(y) for y in x[1:])
355 return (op,) + tuple(_analyze(y) for y in x[1:])
338 elif op == 'keyvalue':
356 elif op == 'keyvalue':
339 return (op, x[1], _analyze(x[2]))
357 return (op, x[1], _analyze(x[2]))
340 elif op == 'func':
358 elif op == 'func':
341 return (op, x[1], _analyze(x[2]))
359 return (op, x[1], _analyze(x[2]))
342 raise ValueError('invalid operator %r' % op)
360 raise ValueError('invalid operator %r' % op)
343
361
344 def analyze(x):
362 def analyze(x):
345 """Transform raw parsed tree to evaluatable tree which can be fed to
363 """Transform raw parsed tree to evaluatable tree which can be fed to
346 optimize() or getset()
364 optimize() or getset()
347
365
348 All pseudo operations should be mapped to real operations or functions
366 All pseudo operations should be mapped to real operations or functions
349 defined in methods or symbols table respectively.
367 defined in methods or symbols table respectively.
350 """
368 """
351 return _analyze(x)
369 return _analyze(x)
352
370
353 def _optimize(x, small):
371 def _optimize(x, small):
354 if x is None:
372 if x is None:
355 return 0, x
373 return 0, x
356
374
357 smallbonus = 1
375 smallbonus = 1
358 if small:
376 if small:
359 smallbonus = .5
377 smallbonus = .5
360
378
361 op = x[0]
379 op = x[0]
362 if op in ('string', 'symbol'):
380 if op in ('string', 'symbol'):
363 return smallbonus, x # single revisions are small
381 return smallbonus, x # single revisions are small
364 elif op == 'and':
382 elif op == 'and':
365 wa, ta = _optimize(x[1], True)
383 wa, ta = _optimize(x[1], True)
366 wb, tb = _optimize(x[2], True)
384 wb, tb = _optimize(x[2], True)
367 w = min(wa, wb)
385 w = min(wa, wb)
368
386
369 # (::x and not ::y)/(not ::y and ::x) have a fast path
387 # (::x and not ::y)/(not ::y and ::x) have a fast path
370 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
388 m = _matchonly(ta, tb) or _matchonly(tb, ta)
371 if tm:
389 if m:
372 return w, ('func', ('symbol', 'only'), tm)
390 return w, _build('only(_, _)', *m[1:])
373
391
374 if tb is not None and tb[0] == 'not':
392 if tb is not None and tb[0] == 'not':
375 return wa, ('difference', ta, tb[1])
393 return wa, ('difference', ta, tb[1])
376 if wa > wb:
394 if wa > wb:
377 op = 'andsmally'
395 op = 'andsmally'
378 return w, (op, ta, tb)
396 return w, (op, ta, tb)
379 elif op == 'or':
397 elif op == 'or':
380 # fast path for machine-generated expression, that is likely to have
398 # fast path for machine-generated expression, that is likely to have
381 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
399 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
382 ws, ts, ss = [], [], []
400 ws, ts, ss = [], [], []
383 def flushss():
401 def flushss():
384 if not ss:
402 if not ss:
385 return
403 return
386 if len(ss) == 1:
404 if len(ss) == 1:
387 w, t = ss[0]
405 w, t = ss[0]
388 else:
406 else:
389 s = '\0'.join(t[1] for w, t in ss)
407 s = '\0'.join(t[1] for w, t in ss)
390 y = ('func', ('symbol', '_list'), ('string', s))
408 y = _build('_list(_)', ('string', s))
391 w, t = _optimize(y, False)
409 w, t = _optimize(y, False)
392 ws.append(w)
410 ws.append(w)
393 ts.append(t)
411 ts.append(t)
394 del ss[:]
412 del ss[:]
395 for y in getlist(x[1]):
413 for y in getlist(x[1]):
396 w, t = _optimize(y, False)
414 w, t = _optimize(y, False)
397 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
415 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
398 ss.append((w, t))
416 ss.append((w, t))
399 continue
417 continue
400 flushss()
418 flushss()
401 ws.append(w)
419 ws.append(w)
402 ts.append(t)
420 ts.append(t)
403 flushss()
421 flushss()
404 if len(ts) == 1:
422 if len(ts) == 1:
405 return ws[0], ts[0] # 'or' operation is fully optimized out
423 return ws[0], ts[0] # 'or' operation is fully optimized out
406 return max(ws), (op, ('list',) + tuple(ts))
424 return max(ws), (op, ('list',) + tuple(ts))
407 elif op == 'not':
425 elif op == 'not':
408 # Optimize not public() to _notpublic() because we have a fast version
426 # Optimize not public() to _notpublic() because we have a fast version
409 if x[1][:3] == ('func', ('symbol', 'public'), None):
427 if x[1][:3] == ('func', ('symbol', 'public'), None):
410 newsym = ('func', ('symbol', '_notpublic'), None)
428 o = _optimize(_build('_notpublic()'), not small)
411 o = _optimize(newsym, not small)
412 return o[0], o[1]
429 return o[0], o[1]
413 else:
430 else:
414 o = _optimize(x[1], not small)
431 o = _optimize(x[1], not small)
415 return o[0], (op, o[1])
432 return o[0], (op, o[1])
416 elif op == 'rangeall':
433 elif op == 'rangeall':
417 return smallbonus, x
434 return smallbonus, x
418 elif op in ('rangepre', 'rangepost', 'parentpost'):
435 elif op in ('rangepre', 'rangepost', 'parentpost'):
419 o = _optimize(x[1], small)
436 o = _optimize(x[1], small)
420 return o[0], (op, o[1])
437 return o[0], (op, o[1])
421 elif op in ('dagrange', 'range'):
438 elif op in ('dagrange', 'range'):
422 wa, ta = _optimize(x[1], small)
439 wa, ta = _optimize(x[1], small)
423 wb, tb = _optimize(x[2], small)
440 wb, tb = _optimize(x[2], small)
424 return wa + wb, (op, ta, tb)
441 return wa + wb, (op, ta, tb)
425 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
442 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
426 w, t = _optimize(x[1], small)
443 w, t = _optimize(x[1], small)
427 return w, (op, t, x[2])
444 return w, (op, t, x[2])
428 elif op == 'relsubscript':
445 elif op == 'relsubscript':
429 w, t = _optimize(x[1], small)
446 w, t = _optimize(x[1], small)
430 return w, (op, t, x[2], x[3])
447 return w, (op, t, x[2], x[3])
431 elif op == 'list':
448 elif op == 'list':
432 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
449 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
433 return sum(ws), (op,) + ts
450 return sum(ws), (op,) + ts
434 elif op == 'keyvalue':
451 elif op == 'keyvalue':
435 w, t = _optimize(x[2], small)
452 w, t = _optimize(x[2], small)
436 return w, (op, x[1], t)
453 return w, (op, x[1], t)
437 elif op == 'func':
454 elif op == 'func':
438 f = getsymbol(x[1])
455 f = getsymbol(x[1])
439 wa, ta = _optimize(x[2], small)
456 wa, ta = _optimize(x[2], small)
440 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
457 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
441 'keyword', 'outgoing', 'user', 'destination'):
458 'keyword', 'outgoing', 'user', 'destination'):
442 w = 10 # slow
459 w = 10 # slow
443 elif f in ('modifies', 'adds', 'removes'):
460 elif f in ('modifies', 'adds', 'removes'):
444 w = 30 # slower
461 w = 30 # slower
445 elif f == "contains":
462 elif f == "contains":
446 w = 100 # very slow
463 w = 100 # very slow
447 elif f == "ancestor":
464 elif f == "ancestor":
448 w = 1 * smallbonus
465 w = 1 * smallbonus
449 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
466 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
450 w = 0
467 w = 0
451 elif f == "sort":
468 elif f == "sort":
452 w = 10 # assume most sorts look at changelog
469 w = 10 # assume most sorts look at changelog
453 else:
470 else:
454 w = 1
471 w = 1
455 return w + wa, (op, x[1], ta)
472 return w + wa, (op, x[1], ta)
456 raise ValueError('invalid operator %r' % op)
473 raise ValueError('invalid operator %r' % op)
457
474
458 def optimize(tree):
475 def optimize(tree):
459 """Optimize evaluatable tree
476 """Optimize evaluatable tree
460
477
461 All pseudo operations should be transformed beforehand.
478 All pseudo operations should be transformed beforehand.
462 """
479 """
463 _weight, newtree = _optimize(tree, small=True)
480 _weight, newtree = _optimize(tree, small=True)
464 return newtree
481 return newtree
465
482
466 # the set of valid characters for the initial letter of symbols in
483 # the set of valid characters for the initial letter of symbols in
467 # alias declarations and definitions
484 # alias declarations and definitions
468 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
485 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
469
486
470 def _parsewith(spec, lookup=None, syminitletters=None):
487 def _parsewith(spec, lookup=None, syminitletters=None):
471 """Generate a parse tree of given spec with given tokenizing options
488 """Generate a parse tree of given spec with given tokenizing options
472
489
473 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
490 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
474 ('func', ('symbol', 'foo'), ('symbol', '$1'))
491 ('func', ('symbol', 'foo'), ('symbol', '$1'))
475 >>> _parsewith('$1')
492 >>> _parsewith('$1')
476 Traceback (most recent call last):
493 Traceback (most recent call last):
477 ...
494 ...
478 ParseError: ("syntax error in revset '$1'", 0)
495 ParseError: ("syntax error in revset '$1'", 0)
479 >>> _parsewith('foo bar')
496 >>> _parsewith('foo bar')
480 Traceback (most recent call last):
497 Traceback (most recent call last):
481 ...
498 ...
482 ParseError: ('invalid token', 4)
499 ParseError: ('invalid token', 4)
483 """
500 """
484 p = parser.parser(elements)
501 p = parser.parser(elements)
485 tree, pos = p.parse(tokenize(spec, lookup=lookup,
502 tree, pos = p.parse(tokenize(spec, lookup=lookup,
486 syminitletters=syminitletters))
503 syminitletters=syminitletters))
487 if pos != len(spec):
504 if pos != len(spec):
488 raise error.ParseError(_('invalid token'), pos)
505 raise error.ParseError(_('invalid token'), pos)
489 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
506 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
490
507
491 class _aliasrules(parser.basealiasrules):
508 class _aliasrules(parser.basealiasrules):
492 """Parsing and expansion rule set of revset aliases"""
509 """Parsing and expansion rule set of revset aliases"""
493 _section = _('revset alias')
510 _section = _('revset alias')
494
511
495 @staticmethod
512 @staticmethod
496 def _parse(spec):
513 def _parse(spec):
497 """Parse alias declaration/definition ``spec``
514 """Parse alias declaration/definition ``spec``
498
515
499 This allows symbol names to use also ``$`` as an initial letter
516 This allows symbol names to use also ``$`` as an initial letter
500 (for backward compatibility), and callers of this function should
517 (for backward compatibility), and callers of this function should
501 examine whether ``$`` is used also for unexpected symbols or not.
518 examine whether ``$`` is used also for unexpected symbols or not.
502 """
519 """
503 return _parsewith(spec, syminitletters=_aliassyminitletters)
520 return _parsewith(spec, syminitletters=_aliassyminitletters)
504
521
505 @staticmethod
522 @staticmethod
506 def _trygetfunc(tree):
523 def _trygetfunc(tree):
507 if tree[0] == 'func' and tree[1][0] == 'symbol':
524 if tree[0] == 'func' and tree[1][0] == 'symbol':
508 return tree[1][1], getlist(tree[2])
525 return tree[1][1], getlist(tree[2])
509
526
510 def expandaliases(tree, aliases, warn=None):
527 def expandaliases(tree, aliases, warn=None):
511 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
528 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
512 aliases = _aliasrules.buildmap(aliases)
529 aliases = _aliasrules.buildmap(aliases)
513 tree = _aliasrules.expand(aliases, tree)
530 tree = _aliasrules.expand(aliases, tree)
514 # warn about problematic (but not referred) aliases
531 # warn about problematic (but not referred) aliases
515 if warn is not None:
532 if warn is not None:
516 for name, alias in sorted(aliases.iteritems()):
533 for name, alias in sorted(aliases.iteritems()):
517 if alias.error and not alias.warned:
534 if alias.error and not alias.warned:
518 warn(_('warning: %s\n') % (alias.error))
535 warn(_('warning: %s\n') % (alias.error))
519 alias.warned = True
536 alias.warned = True
520 return tree
537 return tree
521
538
522 def foldconcat(tree):
539 def foldconcat(tree):
523 """Fold elements to be concatenated by `##`
540 """Fold elements to be concatenated by `##`
524 """
541 """
525 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
542 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
526 return tree
543 return tree
527 if tree[0] == '_concat':
544 if tree[0] == '_concat':
528 pending = [tree]
545 pending = [tree]
529 l = []
546 l = []
530 while pending:
547 while pending:
531 e = pending.pop()
548 e = pending.pop()
532 if e[0] == '_concat':
549 if e[0] == '_concat':
533 pending.extend(reversed(e[1:]))
550 pending.extend(reversed(e[1:]))
534 elif e[0] in ('string', 'symbol'):
551 elif e[0] in ('string', 'symbol'):
535 l.append(e[1])
552 l.append(e[1])
536 else:
553 else:
537 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
554 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
538 raise error.ParseError(msg)
555 raise error.ParseError(msg)
539 return ('string', ''.join(l))
556 return ('string', ''.join(l))
540 else:
557 else:
541 return tuple(foldconcat(t) for t in tree)
558 return tuple(foldconcat(t) for t in tree)
542
559
543 def parse(spec, lookup=None):
560 def parse(spec, lookup=None):
544 return _parsewith(spec, lookup=lookup)
561 return _parsewith(spec, lookup=lookup)
545
562
546 def _quote(s):
563 def _quote(s):
547 r"""Quote a value in order to make it safe for the revset engine.
564 r"""Quote a value in order to make it safe for the revset engine.
548
565
549 >>> _quote('asdf')
566 >>> _quote('asdf')
550 "'asdf'"
567 "'asdf'"
551 >>> _quote("asdf'\"")
568 >>> _quote("asdf'\"")
552 '\'asdf\\\'"\''
569 '\'asdf\\\'"\''
553 >>> _quote('asdf\'')
570 >>> _quote('asdf\'')
554 "'asdf\\''"
571 "'asdf\\''"
555 >>> _quote(1)
572 >>> _quote(1)
556 "'1'"
573 "'1'"
557 """
574 """
558 return "'%s'" % util.escapestr(pycompat.bytestr(s))
575 return "'%s'" % util.escapestr(pycompat.bytestr(s))
559
576
560 def formatspec(expr, *args):
577 def formatspec(expr, *args):
561 '''
578 '''
562 This is a convenience function for using revsets internally, and
579 This is a convenience function for using revsets internally, and
563 escapes arguments appropriately. Aliases are intentionally ignored
580 escapes arguments appropriately. Aliases are intentionally ignored
564 so that intended expression behavior isn't accidentally subverted.
581 so that intended expression behavior isn't accidentally subverted.
565
582
566 Supported arguments:
583 Supported arguments:
567
584
568 %r = revset expression, parenthesized
585 %r = revset expression, parenthesized
569 %d = int(arg), no quoting
586 %d = int(arg), no quoting
570 %s = string(arg), escaped and single-quoted
587 %s = string(arg), escaped and single-quoted
571 %b = arg.branch(), escaped and single-quoted
588 %b = arg.branch(), escaped and single-quoted
572 %n = hex(arg), single-quoted
589 %n = hex(arg), single-quoted
573 %% = a literal '%'
590 %% = a literal '%'
574
591
575 Prefixing the type with 'l' specifies a parenthesized list of that type.
592 Prefixing the type with 'l' specifies a parenthesized list of that type.
576
593
577 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
594 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
578 '(10 or 11):: and ((this()) or (that()))'
595 '(10 or 11):: and ((this()) or (that()))'
579 >>> formatspec('%d:: and not %d::', 10, 20)
596 >>> formatspec('%d:: and not %d::', 10, 20)
580 '10:: and not 20::'
597 '10:: and not 20::'
581 >>> formatspec('%ld or %ld', [], [1])
598 >>> formatspec('%ld or %ld', [], [1])
582 "_list('') or 1"
599 "_list('') or 1"
583 >>> formatspec('keyword(%s)', 'foo\\xe9')
600 >>> formatspec('keyword(%s)', 'foo\\xe9')
584 "keyword('foo\\\\xe9')"
601 "keyword('foo\\\\xe9')"
585 >>> b = lambda: 'default'
602 >>> b = lambda: 'default'
586 >>> b.branch = b
603 >>> b.branch = b
587 >>> formatspec('branch(%b)', b)
604 >>> formatspec('branch(%b)', b)
588 "branch('default')"
605 "branch('default')"
589 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
606 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
590 "root(_list('a\\x00b\\x00c\\x00d'))"
607 "root(_list('a\\x00b\\x00c\\x00d'))"
591 '''
608 '''
592
609
593 def argtype(c, arg):
610 def argtype(c, arg):
594 if c == 'd':
611 if c == 'd':
595 return '%d' % int(arg)
612 return '%d' % int(arg)
596 elif c == 's':
613 elif c == 's':
597 return _quote(arg)
614 return _quote(arg)
598 elif c == 'r':
615 elif c == 'r':
599 parse(arg) # make sure syntax errors are confined
616 parse(arg) # make sure syntax errors are confined
600 return '(%s)' % arg
617 return '(%s)' % arg
601 elif c == 'n':
618 elif c == 'n':
602 return _quote(node.hex(arg))
619 return _quote(node.hex(arg))
603 elif c == 'b':
620 elif c == 'b':
604 return _quote(arg.branch())
621 return _quote(arg.branch())
605
622
606 def listexp(s, t):
623 def listexp(s, t):
607 l = len(s)
624 l = len(s)
608 if l == 0:
625 if l == 0:
609 return "_list('')"
626 return "_list('')"
610 elif l == 1:
627 elif l == 1:
611 return argtype(t, s[0])
628 return argtype(t, s[0])
612 elif t == 'd':
629 elif t == 'd':
613 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
630 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
614 elif t == 's':
631 elif t == 's':
615 return "_list('%s')" % "\0".join(s)
632 return "_list('%s')" % "\0".join(s)
616 elif t == 'n':
633 elif t == 'n':
617 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
634 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
618 elif t == 'b':
635 elif t == 'b':
619 return "_list('%s')" % "\0".join(a.branch() for a in s)
636 return "_list('%s')" % "\0".join(a.branch() for a in s)
620
637
621 m = l // 2
638 m = l // 2
622 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
639 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
623
640
624 expr = pycompat.bytestr(expr)
641 expr = pycompat.bytestr(expr)
625 ret = ''
642 ret = ''
626 pos = 0
643 pos = 0
627 arg = 0
644 arg = 0
628 while pos < len(expr):
645 while pos < len(expr):
629 c = expr[pos]
646 c = expr[pos]
630 if c == '%':
647 if c == '%':
631 pos += 1
648 pos += 1
632 d = expr[pos]
649 d = expr[pos]
633 if d == '%':
650 if d == '%':
634 ret += d
651 ret += d
635 elif d in 'dsnbr':
652 elif d in 'dsnbr':
636 ret += argtype(d, args[arg])
653 ret += argtype(d, args[arg])
637 arg += 1
654 arg += 1
638 elif d == 'l':
655 elif d == 'l':
639 # a list of some type
656 # a list of some type
640 pos += 1
657 pos += 1
641 d = expr[pos]
658 d = expr[pos]
642 ret += listexp(list(args[arg]), d)
659 ret += listexp(list(args[arg]), d)
643 arg += 1
660 arg += 1
644 else:
661 else:
645 raise error.Abort(_('unexpected revspec format character %s')
662 raise error.Abort(_('unexpected revspec format character %s')
646 % d)
663 % d)
647 else:
664 else:
648 ret += c
665 ret += c
649 pos += 1
666 pos += 1
650
667
651 return ret
668 return ret
652
669
653 def prettyformat(tree):
670 def prettyformat(tree):
654 return parser.prettyformat(tree, ('string', 'symbol'))
671 return parser.prettyformat(tree, ('string', 'symbol'))
655
672
656 def depth(tree):
673 def depth(tree):
657 if isinstance(tree, tuple):
674 if isinstance(tree, tuple):
658 return max(map(depth, tree)) + 1
675 return max(map(depth, tree)) + 1
659 else:
676 else:
660 return 0
677 return 0
661
678
662 def funcsused(tree):
679 def funcsused(tree):
663 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
680 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
664 return set()
681 return set()
665 else:
682 else:
666 funcs = set()
683 funcs = set()
667 for s in tree[1:]:
684 for s in tree[1:]:
668 funcs |= funcsused(s)
685 funcs |= funcsused(s)
669 if tree[0] == 'func':
686 if tree[0] == 'func':
670 funcs.add(tree[1][1])
687 funcs.add(tree[1][1])
671 return funcs
688 return funcs
General Comments 0
You need to be logged in to leave comments. Login now