##// END OF EJS Templates
revsetlang: match tree by helper function on optimize...
Yuya Nishihara -
r34048:f23cbca9 default
parent child Browse files
Show More
@@ -1,688 +1,693 b''
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 util,
18 util,
19 )
19 )
20
20
21 elements = {
21 elements = {
22 # token-type: binding-strength, primary, prefix, infix, suffix
22 # token-type: binding-strength, primary, prefix, infix, suffix
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
25 "#": (21, None, None, ("relation", 21), None),
25 "#": (21, None, None, ("relation", 21), None),
26 "##": (20, None, None, ("_concat", 20), None),
26 "##": (20, None, None, ("_concat", 20), None),
27 "~": (18, None, None, ("ancestor", 18), None),
27 "~": (18, None, None, ("ancestor", 18), None),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
33 "not": (10, None, ("not", 10), None, None),
33 "not": (10, None, ("not", 10), None, None),
34 "!": (10, None, ("not", 10), None, None),
34 "!": (10, None, ("not", 10), None, None),
35 "and": (5, None, None, ("and", 5), None),
35 "and": (5, None, None, ("and", 5), None),
36 "&": (5, None, None, ("and", 5), None),
36 "&": (5, None, None, ("and", 5), None),
37 "%": (5, None, None, ("only", 5), "onlypost"),
37 "%": (5, None, None, ("only", 5), "onlypost"),
38 "or": (4, None, None, ("or", 4), None),
38 "or": (4, None, None, ("or", 4), None),
39 "|": (4, None, None, ("or", 4), None),
39 "|": (4, None, None, ("or", 4), None),
40 "+": (4, None, None, ("or", 4), None),
40 "+": (4, None, None, ("or", 4), None),
41 "=": (3, None, None, ("keyvalue", 3), None),
41 "=": (3, None, None, ("keyvalue", 3), None),
42 ",": (2, None, None, ("list", 2), None),
42 ",": (2, None, None, ("list", 2), None),
43 ")": (0, None, None, None, None),
43 ")": (0, None, None, None, None),
44 "]": (0, None, None, None, None),
44 "]": (0, None, None, None, None),
45 "symbol": (0, "symbol", None, None, None),
45 "symbol": (0, "symbol", None, None, None),
46 "string": (0, "string", None, None, None),
46 "string": (0, "string", None, None, None),
47 "end": (0, None, None, None, None),
47 "end": (0, None, None, None, None),
48 }
48 }
49
49
50 keywords = {'and', 'or', 'not'}
50 keywords = {'and', 'or', 'not'}
51
51
52 _quoteletters = {'"', "'"}
52 _quoteletters = {'"', "'"}
53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
54
54
55 # default set of valid characters for the initial letter of symbols
55 # default set of valid characters for the initial letter of symbols
56 _syminitletters = set(pycompat.iterbytestr(
56 _syminitletters = set(pycompat.iterbytestr(
57 string.ascii_letters.encode('ascii') +
57 string.ascii_letters.encode('ascii') +
58 string.digits.encode('ascii') +
58 string.digits.encode('ascii') +
59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
60
60
61 # default set of valid characters for non-initial letters of symbols
61 # default set of valid characters for non-initial letters of symbols
62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
63
63
64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
65 '''
65 '''
66 Parse a revset statement into a stream of tokens
66 Parse a revset statement into a stream of tokens
67
67
68 ``syminitletters`` is the set of valid characters for the initial
68 ``syminitletters`` is the set of valid characters for the initial
69 letter of symbols.
69 letter of symbols.
70
70
71 By default, character ``c`` is recognized as valid for initial
71 By default, character ``c`` is recognized as valid for initial
72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
73
73
74 ``symletters`` is the set of valid characters for non-initial
74 ``symletters`` is the set of valid characters for non-initial
75 letters of symbols.
75 letters of symbols.
76
76
77 By default, character ``c`` is recognized as valid for non-initial
77 By default, character ``c`` is recognized as valid for non-initial
78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
79
79
80 Check that @ is a valid unquoted token character (issue3686):
80 Check that @ is a valid unquoted token character (issue3686):
81 >>> list(tokenize("@::"))
81 >>> list(tokenize("@::"))
82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
83
83
84 '''
84 '''
85 program = pycompat.bytestr(program)
85 program = pycompat.bytestr(program)
86 if syminitletters is None:
86 if syminitletters is None:
87 syminitletters = _syminitletters
87 syminitletters = _syminitletters
88 if symletters is None:
88 if symletters is None:
89 symletters = _symletters
89 symletters = _symletters
90
90
91 if program and lookup:
91 if program and lookup:
92 # attempt to parse old-style ranges first to deal with
92 # attempt to parse old-style ranges first to deal with
93 # things like old-tag which contain query metacharacters
93 # things like old-tag which contain query metacharacters
94 parts = program.split(':', 1)
94 parts = program.split(':', 1)
95 if all(lookup(sym) for sym in parts if sym):
95 if all(lookup(sym) for sym in parts if sym):
96 if parts[0]:
96 if parts[0]:
97 yield ('symbol', parts[0], 0)
97 yield ('symbol', parts[0], 0)
98 if len(parts) > 1:
98 if len(parts) > 1:
99 s = len(parts[0])
99 s = len(parts[0])
100 yield (':', None, s)
100 yield (':', None, s)
101 if parts[1]:
101 if parts[1]:
102 yield ('symbol', parts[1], s + 1)
102 yield ('symbol', parts[1], s + 1)
103 yield ('end', None, len(program))
103 yield ('end', None, len(program))
104 return
104 return
105
105
106 pos, l = 0, len(program)
106 pos, l = 0, len(program)
107 while pos < l:
107 while pos < l:
108 c = program[pos]
108 c = program[pos]
109 if c.isspace(): # skip inter-token whitespace
109 if c.isspace(): # skip inter-token whitespace
110 pass
110 pass
111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
112 yield ('::', None, pos)
112 yield ('::', None, pos)
113 pos += 1 # skip ahead
113 pos += 1 # skip ahead
114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
115 yield ('..', None, pos)
115 yield ('..', None, pos)
116 pos += 1 # skip ahead
116 pos += 1 # skip ahead
117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
118 yield ('##', None, pos)
118 yield ('##', None, pos)
119 pos += 1 # skip ahead
119 pos += 1 # skip ahead
120 elif c in _simpleopletters: # handle simple operators
120 elif c in _simpleopletters: # handle simple operators
121 yield (c, None, pos)
121 yield (c, None, pos)
122 elif (c in _quoteletters or c == 'r' and
122 elif (c in _quoteletters or c == 'r' and
123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
124 if c == 'r':
124 if c == 'r':
125 pos += 1
125 pos += 1
126 c = program[pos]
126 c = program[pos]
127 decode = lambda x: x
127 decode = lambda x: x
128 else:
128 else:
129 decode = parser.unescapestr
129 decode = parser.unescapestr
130 pos += 1
130 pos += 1
131 s = pos
131 s = pos
132 while pos < l: # find closing quote
132 while pos < l: # find closing quote
133 d = program[pos]
133 d = program[pos]
134 if d == '\\': # skip over escaped characters
134 if d == '\\': # skip over escaped characters
135 pos += 2
135 pos += 2
136 continue
136 continue
137 if d == c:
137 if d == c:
138 yield ('string', decode(program[s:pos]), s)
138 yield ('string', decode(program[s:pos]), s)
139 break
139 break
140 pos += 1
140 pos += 1
141 else:
141 else:
142 raise error.ParseError(_("unterminated string"), s)
142 raise error.ParseError(_("unterminated string"), s)
143 # gather up a symbol/keyword
143 # gather up a symbol/keyword
144 elif c in syminitletters:
144 elif c in syminitletters:
145 s = pos
145 s = pos
146 pos += 1
146 pos += 1
147 while pos < l: # find end of symbol
147 while pos < l: # find end of symbol
148 d = program[pos]
148 d = program[pos]
149 if d not in symletters:
149 if d not in symletters:
150 break
150 break
151 if d == '.' and program[pos - 1] == '.': # special case for ..
151 if d == '.' and program[pos - 1] == '.': # special case for ..
152 pos -= 1
152 pos -= 1
153 break
153 break
154 pos += 1
154 pos += 1
155 sym = program[s:pos]
155 sym = program[s:pos]
156 if sym in keywords: # operator keywords
156 if sym in keywords: # operator keywords
157 yield (sym, None, s)
157 yield (sym, None, s)
158 elif '-' in sym:
158 elif '-' in sym:
159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
160 if lookup and lookup(sym):
160 if lookup and lookup(sym):
161 # looks like a real symbol
161 # looks like a real symbol
162 yield ('symbol', sym, s)
162 yield ('symbol', sym, s)
163 else:
163 else:
164 # looks like an expression
164 # looks like an expression
165 parts = sym.split('-')
165 parts = sym.split('-')
166 for p in parts[:-1]:
166 for p in parts[:-1]:
167 if p: # possible consecutive -
167 if p: # possible consecutive -
168 yield ('symbol', p, s)
168 yield ('symbol', p, s)
169 s += len(p)
169 s += len(p)
170 yield ('-', None, pos)
170 yield ('-', None, pos)
171 s += 1
171 s += 1
172 if parts[-1]: # possible trailing -
172 if parts[-1]: # possible trailing -
173 yield ('symbol', parts[-1], s)
173 yield ('symbol', parts[-1], s)
174 else:
174 else:
175 yield ('symbol', sym, s)
175 yield ('symbol', sym, s)
176 pos -= 1
176 pos -= 1
177 else:
177 else:
178 raise error.ParseError(_("syntax error in revset '%s'") %
178 raise error.ParseError(_("syntax error in revset '%s'") %
179 program, pos)
179 program, pos)
180 pos += 1
180 pos += 1
181 yield ('end', None, pos)
181 yield ('end', None, pos)
182
182
183 # helpers
183 # helpers
184
184
185 _notset = object()
185 _notset = object()
186
186
187 def getsymbol(x):
187 def getsymbol(x):
188 if x and x[0] == 'symbol':
188 if x and x[0] == 'symbol':
189 return x[1]
189 return x[1]
190 raise error.ParseError(_('not a symbol'))
190 raise error.ParseError(_('not a symbol'))
191
191
192 def getstring(x, err):
192 def getstring(x, err):
193 if x and (x[0] == 'string' or x[0] == 'symbol'):
193 if x and (x[0] == 'string' or x[0] == 'symbol'):
194 return x[1]
194 return x[1]
195 raise error.ParseError(err)
195 raise error.ParseError(err)
196
196
197 def getinteger(x, err, default=_notset):
197 def getinteger(x, err, default=_notset):
198 if not x and default is not _notset:
198 if not x and default is not _notset:
199 return default
199 return default
200 try:
200 try:
201 return int(getstring(x, err))
201 return int(getstring(x, err))
202 except ValueError:
202 except ValueError:
203 raise error.ParseError(err)
203 raise error.ParseError(err)
204
204
205 def getboolean(x, err):
205 def getboolean(x, err):
206 value = util.parsebool(getsymbol(x))
206 value = util.parsebool(getsymbol(x))
207 if value is not None:
207 if value is not None:
208 return value
208 return value
209 raise error.ParseError(err)
209 raise error.ParseError(err)
210
210
211 def getlist(x):
211 def getlist(x):
212 if not x:
212 if not x:
213 return []
213 return []
214 if x[0] == 'list':
214 if x[0] == 'list':
215 return list(x[1:])
215 return list(x[1:])
216 return [x]
216 return [x]
217
217
218 def getrange(x, err):
218 def getrange(x, err):
219 if not x:
219 if not x:
220 raise error.ParseError(err)
220 raise error.ParseError(err)
221 op = x[0]
221 op = x[0]
222 if op == 'range':
222 if op == 'range':
223 return x[1], x[2]
223 return x[1], x[2]
224 elif op == 'rangepre':
224 elif op == 'rangepre':
225 return None, x[1]
225 return None, x[1]
226 elif op == 'rangepost':
226 elif op == 'rangepost':
227 return x[1], None
227 return x[1], None
228 elif op == 'rangeall':
228 elif op == 'rangeall':
229 return None, None
229 return None, None
230 raise error.ParseError(err)
230 raise error.ParseError(err)
231
231
232 def getargs(x, min, max, err):
232 def getargs(x, min, max, err):
233 l = getlist(x)
233 l = getlist(x)
234 if len(l) < min or (max >= 0 and len(l) > max):
234 if len(l) < min or (max >= 0 and len(l) > max):
235 raise error.ParseError(err)
235 raise error.ParseError(err)
236 return l
236 return l
237
237
238 def getargsdict(x, funcname, keys):
238 def getargsdict(x, funcname, keys):
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
240 keyvaluenode='keyvalue', keynode='symbol')
240 keyvaluenode='keyvalue', keynode='symbol')
241
241
242 # cache of {spec: raw parsed tree} built internally
242 # cache of {spec: raw parsed tree} built internally
243 _treecache = {}
243 _treecache = {}
244
244
245 def _cachedtree(spec):
245 def _cachedtree(spec):
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
247 tree = _treecache.get(spec)
247 tree = _treecache.get(spec)
248 if tree is None:
248 if tree is None:
249 _treecache[spec] = tree = parse(spec)
249 _treecache[spec] = tree = parse(spec)
250 return tree
250 return tree
251
251
252 def _build(tmplspec, *repls):
252 def _build(tmplspec, *repls):
253 """Create raw parsed tree from a template revset statement
253 """Create raw parsed tree from a template revset statement
254
254
255 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
255 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
257 """
257 """
258 template = _cachedtree(tmplspec)
258 template = _cachedtree(tmplspec)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
260
260
261 def _match(patspec, tree):
262 """Test if a tree matches the given pattern statement; return the matches
263
264 >>> _match('f(_)', parse('f()'))
265 >>> _match('f(_)', parse('f(1)'))
266 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
267 >>> _match('f(_)', parse('f(1, 2)'))
268 """
269 pattern = _cachedtree(patspec)
270 return parser.matchtree(pattern, tree, ('symbol', '_'),
271 {'keyvalue', 'list'})
272
261 def _isnamedfunc(x, funcname):
273 def _isnamedfunc(x, funcname):
262 """Check if given tree matches named function"""
274 """Check if given tree matches named function"""
263 return x and x[0] == 'func' and getsymbol(x[1]) == funcname
275 return x and x[0] == 'func' and getsymbol(x[1]) == funcname
264
276
265 def _isposargs(x, n):
277 def _isposargs(x, n):
266 """Check if given tree is n-length list of positional arguments"""
278 """Check if given tree is n-length list of positional arguments"""
267 l = getlist(x)
279 l = getlist(x)
268 return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)
280 return len(l) == n and all(y and y[0] != 'keyvalue' for y in l)
269
281
270 def _matchnamedfunc(x, funcname):
282 def _matchnamedfunc(x, funcname):
271 """Return args tree if given tree matches named function; otherwise None
283 """Return args tree if given tree matches named function; otherwise None
272
284
273 This can't be used for testing a nullary function since its args tree
285 This can't be used for testing a nullary function since its args tree
274 is also None. Use _isnamedfunc() instead.
286 is also None. Use _isnamedfunc() instead.
275 """
287 """
276 if not _isnamedfunc(x, funcname):
288 if not _isnamedfunc(x, funcname):
277 return
289 return
278 return x[2]
290 return x[2]
279
291
280 def _matchonly(revs, bases):
292 def _matchonly(revs, bases):
281 """
293 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
282 >>> f = lambda *args: _matchonly(*map(parse, args))
283 >>> f('ancestors(A)', 'not ancestors(B)')
284 ('list', ('symbol', 'A'), ('symbol', 'B'))
285 """
286 ta = _matchnamedfunc(revs, 'ancestors')
287 tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')
288 if _isposargs(ta, 1) and _isposargs(tb, 1):
289 return ('list', ta, tb)
290
294
291 def _fixops(x):
295 def _fixops(x):
292 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
296 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
293 handled well by our simple top-down parser"""
297 handled well by our simple top-down parser"""
294 if not isinstance(x, tuple):
298 if not isinstance(x, tuple):
295 return x
299 return x
296
300
297 op = x[0]
301 op = x[0]
298 if op == 'parent':
302 if op == 'parent':
299 # x^:y means (x^) : y, not x ^ (:y)
303 # x^:y means (x^) : y, not x ^ (:y)
300 # x^: means (x^) :, not x ^ (:)
304 # x^: means (x^) :, not x ^ (:)
301 post = ('parentpost', x[1])
305 post = ('parentpost', x[1])
302 if x[2][0] == 'dagrangepre':
306 if x[2][0] == 'dagrangepre':
303 return _fixops(('dagrange', post, x[2][1]))
307 return _fixops(('dagrange', post, x[2][1]))
304 elif x[2][0] == 'rangepre':
308 elif x[2][0] == 'rangepre':
305 return _fixops(('range', post, x[2][1]))
309 return _fixops(('range', post, x[2][1]))
306 elif x[2][0] == 'rangeall':
310 elif x[2][0] == 'rangeall':
307 return _fixops(('rangepost', post))
311 return _fixops(('rangepost', post))
308 elif op == 'or':
312 elif op == 'or':
309 # make number of arguments deterministic:
313 # make number of arguments deterministic:
310 # x + y + z -> (or x y z) -> (or (list x y z))
314 # x + y + z -> (or x y z) -> (or (list x y z))
311 return (op, _fixops(('list',) + x[1:]))
315 return (op, _fixops(('list',) + x[1:]))
312 elif op == 'subscript' and x[1][0] == 'relation':
316 elif op == 'subscript' and x[1][0] == 'relation':
313 # x#y[z] ternary
317 # x#y[z] ternary
314 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
318 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
315
319
316 return (op,) + tuple(_fixops(y) for y in x[1:])
320 return (op,) + tuple(_fixops(y) for y in x[1:])
317
321
318 def _analyze(x):
322 def _analyze(x):
319 if x is None:
323 if x is None:
320 return x
324 return x
321
325
322 op = x[0]
326 op = x[0]
323 if op == 'minus':
327 if op == 'minus':
324 return _analyze(_build('_ and not _', *x[1:]))
328 return _analyze(_build('_ and not _', *x[1:]))
325 elif op == 'only':
329 elif op == 'only':
326 return _analyze(_build('only(_, _)', *x[1:]))
330 return _analyze(_build('only(_, _)', *x[1:]))
327 elif op == 'onlypost':
331 elif op == 'onlypost':
328 return _analyze(_build('only(_)', x[1]))
332 return _analyze(_build('only(_)', x[1]))
329 elif op == 'dagrangepre':
333 elif op == 'dagrangepre':
330 return _analyze(_build('ancestors(_)', x[1]))
334 return _analyze(_build('ancestors(_)', x[1]))
331 elif op == 'dagrangepost':
335 elif op == 'dagrangepost':
332 return _analyze(_build('descendants(_)', x[1]))
336 return _analyze(_build('descendants(_)', x[1]))
333 elif op == 'negate':
337 elif op == 'negate':
334 s = getstring(x[1], _("can't negate that"))
338 s = getstring(x[1], _("can't negate that"))
335 return _analyze(('string', '-' + s))
339 return _analyze(('string', '-' + s))
336 elif op in ('string', 'symbol'):
340 elif op in ('string', 'symbol'):
337 return x
341 return x
338 elif op == 'rangeall':
342 elif op == 'rangeall':
339 return (op, None)
343 return (op, None)
340 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
344 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
341 return (op, _analyze(x[1]))
345 return (op, _analyze(x[1]))
342 elif op == 'group':
346 elif op == 'group':
343 return _analyze(x[1])
347 return _analyze(x[1])
344 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
348 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
345 'subscript'}:
349 'subscript'}:
346 ta = _analyze(x[1])
350 ta = _analyze(x[1])
347 tb = _analyze(x[2])
351 tb = _analyze(x[2])
348 return (op, ta, tb)
352 return (op, ta, tb)
349 elif op == 'relsubscript':
353 elif op == 'relsubscript':
350 ta = _analyze(x[1])
354 ta = _analyze(x[1])
351 tb = _analyze(x[2])
355 tb = _analyze(x[2])
352 tc = _analyze(x[3])
356 tc = _analyze(x[3])
353 return (op, ta, tb, tc)
357 return (op, ta, tb, tc)
354 elif op == 'list':
358 elif op == 'list':
355 return (op,) + tuple(_analyze(y) for y in x[1:])
359 return (op,) + tuple(_analyze(y) for y in x[1:])
356 elif op == 'keyvalue':
360 elif op == 'keyvalue':
357 return (op, x[1], _analyze(x[2]))
361 return (op, x[1], _analyze(x[2]))
358 elif op == 'func':
362 elif op == 'func':
359 return (op, x[1], _analyze(x[2]))
363 return (op, x[1], _analyze(x[2]))
360 raise ValueError('invalid operator %r' % op)
364 raise ValueError('invalid operator %r' % op)
361
365
362 def analyze(x):
366 def analyze(x):
363 """Transform raw parsed tree to evaluatable tree which can be fed to
367 """Transform raw parsed tree to evaluatable tree which can be fed to
364 optimize() or getset()
368 optimize() or getset()
365
369
366 All pseudo operations should be mapped to real operations or functions
370 All pseudo operations should be mapped to real operations or functions
367 defined in methods or symbols table respectively.
371 defined in methods or symbols table respectively.
368 """
372 """
369 return _analyze(x)
373 return _analyze(x)
370
374
371 def _optimize(x, small):
375 def _optimize(x, small):
372 if x is None:
376 if x is None:
373 return 0, x
377 return 0, x
374
378
375 smallbonus = 1
379 smallbonus = 1
376 if small:
380 if small:
377 smallbonus = .5
381 smallbonus = .5
378
382
379 op = x[0]
383 op = x[0]
380 if op in ('string', 'symbol'):
384 if op in ('string', 'symbol'):
381 return smallbonus, x # single revisions are small
385 return smallbonus, x # single revisions are small
382 elif op == 'and':
386 elif op == 'and':
383 wa, ta = _optimize(x[1], True)
387 wa, ta = _optimize(x[1], True)
384 wb, tb = _optimize(x[2], True)
388 wb, tb = _optimize(x[2], True)
385 w = min(wa, wb)
389 w = min(wa, wb)
386
390
387 # (::x and not ::y)/(not ::y and ::x) have a fast path
391 # (::x and not ::y)/(not ::y and ::x) have a fast path
388 m = _matchonly(ta, tb) or _matchonly(tb, ta)
392 m = _matchonly(ta, tb) or _matchonly(tb, ta)
389 if m:
393 if m:
390 return w, _build('only(_, _)', *m[1:])
394 return w, _build('only(_, _)', *m[1:])
391
395
392 if tb is not None and tb[0] == 'not':
396 m = _match('not _', tb)
393 return wa, ('difference', ta, tb[1])
397 if m:
398 return wa, ('difference', ta, m[1])
394 if wa > wb:
399 if wa > wb:
395 op = 'andsmally'
400 op = 'andsmally'
396 return w, (op, ta, tb)
401 return w, (op, ta, tb)
397 elif op == 'or':
402 elif op == 'or':
398 # fast path for machine-generated expression, that is likely to have
403 # fast path for machine-generated expression, that is likely to have
399 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
404 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
400 ws, ts, ss = [], [], []
405 ws, ts, ss = [], [], []
401 def flushss():
406 def flushss():
402 if not ss:
407 if not ss:
403 return
408 return
404 if len(ss) == 1:
409 if len(ss) == 1:
405 w, t = ss[0]
410 w, t = ss[0]
406 else:
411 else:
407 s = '\0'.join(t[1] for w, t in ss)
412 s = '\0'.join(t[1] for w, t in ss)
408 y = _build('_list(_)', ('string', s))
413 y = _build('_list(_)', ('string', s))
409 w, t = _optimize(y, False)
414 w, t = _optimize(y, False)
410 ws.append(w)
415 ws.append(w)
411 ts.append(t)
416 ts.append(t)
412 del ss[:]
417 del ss[:]
413 for y in getlist(x[1]):
418 for y in getlist(x[1]):
414 w, t = _optimize(y, False)
419 w, t = _optimize(y, False)
415 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
420 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
416 ss.append((w, t))
421 ss.append((w, t))
417 continue
422 continue
418 flushss()
423 flushss()
419 ws.append(w)
424 ws.append(w)
420 ts.append(t)
425 ts.append(t)
421 flushss()
426 flushss()
422 if len(ts) == 1:
427 if len(ts) == 1:
423 return ws[0], ts[0] # 'or' operation is fully optimized out
428 return ws[0], ts[0] # 'or' operation is fully optimized out
424 return max(ws), (op, ('list',) + tuple(ts))
429 return max(ws), (op, ('list',) + tuple(ts))
425 elif op == 'not':
430 elif op == 'not':
426 # Optimize not public() to _notpublic() because we have a fast version
431 # Optimize not public() to _notpublic() because we have a fast version
427 if x[1][:3] == ('func', ('symbol', 'public'), None):
432 if _match('public()', x[1]):
428 o = _optimize(_build('_notpublic()'), not small)
433 o = _optimize(_build('_notpublic()'), not small)
429 return o[0], o[1]
434 return o[0], o[1]
430 else:
435 else:
431 o = _optimize(x[1], not small)
436 o = _optimize(x[1], not small)
432 return o[0], (op, o[1])
437 return o[0], (op, o[1])
433 elif op == 'rangeall':
438 elif op == 'rangeall':
434 return smallbonus, x
439 return smallbonus, x
435 elif op in ('rangepre', 'rangepost', 'parentpost'):
440 elif op in ('rangepre', 'rangepost', 'parentpost'):
436 o = _optimize(x[1], small)
441 o = _optimize(x[1], small)
437 return o[0], (op, o[1])
442 return o[0], (op, o[1])
438 elif op in ('dagrange', 'range'):
443 elif op in ('dagrange', 'range'):
439 wa, ta = _optimize(x[1], small)
444 wa, ta = _optimize(x[1], small)
440 wb, tb = _optimize(x[2], small)
445 wb, tb = _optimize(x[2], small)
441 return wa + wb, (op, ta, tb)
446 return wa + wb, (op, ta, tb)
442 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
447 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
443 w, t = _optimize(x[1], small)
448 w, t = _optimize(x[1], small)
444 return w, (op, t, x[2])
449 return w, (op, t, x[2])
445 elif op == 'relsubscript':
450 elif op == 'relsubscript':
446 w, t = _optimize(x[1], small)
451 w, t = _optimize(x[1], small)
447 return w, (op, t, x[2], x[3])
452 return w, (op, t, x[2], x[3])
448 elif op == 'list':
453 elif op == 'list':
449 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
454 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
450 return sum(ws), (op,) + ts
455 return sum(ws), (op,) + ts
451 elif op == 'keyvalue':
456 elif op == 'keyvalue':
452 w, t = _optimize(x[2], small)
457 w, t = _optimize(x[2], small)
453 return w, (op, x[1], t)
458 return w, (op, x[1], t)
454 elif op == 'func':
459 elif op == 'func':
455 f = getsymbol(x[1])
460 f = getsymbol(x[1])
456 wa, ta = _optimize(x[2], small)
461 wa, ta = _optimize(x[2], small)
457 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
462 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
458 'keyword', 'outgoing', 'user', 'destination'):
463 'keyword', 'outgoing', 'user', 'destination'):
459 w = 10 # slow
464 w = 10 # slow
460 elif f in ('modifies', 'adds', 'removes'):
465 elif f in ('modifies', 'adds', 'removes'):
461 w = 30 # slower
466 w = 30 # slower
462 elif f == "contains":
467 elif f == "contains":
463 w = 100 # very slow
468 w = 100 # very slow
464 elif f == "ancestor":
469 elif f == "ancestor":
465 w = 1 * smallbonus
470 w = 1 * smallbonus
466 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
471 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
467 w = 0
472 w = 0
468 elif f == "sort":
473 elif f == "sort":
469 w = 10 # assume most sorts look at changelog
474 w = 10 # assume most sorts look at changelog
470 else:
475 else:
471 w = 1
476 w = 1
472 return w + wa, (op, x[1], ta)
477 return w + wa, (op, x[1], ta)
473 raise ValueError('invalid operator %r' % op)
478 raise ValueError('invalid operator %r' % op)
474
479
475 def optimize(tree):
480 def optimize(tree):
476 """Optimize evaluatable tree
481 """Optimize evaluatable tree
477
482
478 All pseudo operations should be transformed beforehand.
483 All pseudo operations should be transformed beforehand.
479 """
484 """
480 _weight, newtree = _optimize(tree, small=True)
485 _weight, newtree = _optimize(tree, small=True)
481 return newtree
486 return newtree
482
487
483 # the set of valid characters for the initial letter of symbols in
488 # the set of valid characters for the initial letter of symbols in
484 # alias declarations and definitions
489 # alias declarations and definitions
485 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
490 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
486
491
487 def _parsewith(spec, lookup=None, syminitletters=None):
492 def _parsewith(spec, lookup=None, syminitletters=None):
488 """Generate a parse tree of given spec with given tokenizing options
493 """Generate a parse tree of given spec with given tokenizing options
489
494
490 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
495 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
491 ('func', ('symbol', 'foo'), ('symbol', '$1'))
496 ('func', ('symbol', 'foo'), ('symbol', '$1'))
492 >>> _parsewith('$1')
497 >>> _parsewith('$1')
493 Traceback (most recent call last):
498 Traceback (most recent call last):
494 ...
499 ...
495 ParseError: ("syntax error in revset '$1'", 0)
500 ParseError: ("syntax error in revset '$1'", 0)
496 >>> _parsewith('foo bar')
501 >>> _parsewith('foo bar')
497 Traceback (most recent call last):
502 Traceback (most recent call last):
498 ...
503 ...
499 ParseError: ('invalid token', 4)
504 ParseError: ('invalid token', 4)
500 """
505 """
501 p = parser.parser(elements)
506 p = parser.parser(elements)
502 tree, pos = p.parse(tokenize(spec, lookup=lookup,
507 tree, pos = p.parse(tokenize(spec, lookup=lookup,
503 syminitletters=syminitletters))
508 syminitletters=syminitletters))
504 if pos != len(spec):
509 if pos != len(spec):
505 raise error.ParseError(_('invalid token'), pos)
510 raise error.ParseError(_('invalid token'), pos)
506 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
511 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
507
512
508 class _aliasrules(parser.basealiasrules):
513 class _aliasrules(parser.basealiasrules):
509 """Parsing and expansion rule set of revset aliases"""
514 """Parsing and expansion rule set of revset aliases"""
510 _section = _('revset alias')
515 _section = _('revset alias')
511
516
512 @staticmethod
517 @staticmethod
513 def _parse(spec):
518 def _parse(spec):
514 """Parse alias declaration/definition ``spec``
519 """Parse alias declaration/definition ``spec``
515
520
516 This allows symbol names to use also ``$`` as an initial letter
521 This allows symbol names to use also ``$`` as an initial letter
517 (for backward compatibility), and callers of this function should
522 (for backward compatibility), and callers of this function should
518 examine whether ``$`` is used also for unexpected symbols or not.
523 examine whether ``$`` is used also for unexpected symbols or not.
519 """
524 """
520 return _parsewith(spec, syminitletters=_aliassyminitletters)
525 return _parsewith(spec, syminitletters=_aliassyminitletters)
521
526
522 @staticmethod
527 @staticmethod
523 def _trygetfunc(tree):
528 def _trygetfunc(tree):
524 if tree[0] == 'func' and tree[1][0] == 'symbol':
529 if tree[0] == 'func' and tree[1][0] == 'symbol':
525 return tree[1][1], getlist(tree[2])
530 return tree[1][1], getlist(tree[2])
526
531
527 def expandaliases(tree, aliases, warn=None):
532 def expandaliases(tree, aliases, warn=None):
528 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
533 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
529 aliases = _aliasrules.buildmap(aliases)
534 aliases = _aliasrules.buildmap(aliases)
530 tree = _aliasrules.expand(aliases, tree)
535 tree = _aliasrules.expand(aliases, tree)
531 # warn about problematic (but not referred) aliases
536 # warn about problematic (but not referred) aliases
532 if warn is not None:
537 if warn is not None:
533 for name, alias in sorted(aliases.iteritems()):
538 for name, alias in sorted(aliases.iteritems()):
534 if alias.error and not alias.warned:
539 if alias.error and not alias.warned:
535 warn(_('warning: %s\n') % (alias.error))
540 warn(_('warning: %s\n') % (alias.error))
536 alias.warned = True
541 alias.warned = True
537 return tree
542 return tree
538
543
539 def foldconcat(tree):
544 def foldconcat(tree):
540 """Fold elements to be concatenated by `##`
545 """Fold elements to be concatenated by `##`
541 """
546 """
542 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
547 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
543 return tree
548 return tree
544 if tree[0] == '_concat':
549 if tree[0] == '_concat':
545 pending = [tree]
550 pending = [tree]
546 l = []
551 l = []
547 while pending:
552 while pending:
548 e = pending.pop()
553 e = pending.pop()
549 if e[0] == '_concat':
554 if e[0] == '_concat':
550 pending.extend(reversed(e[1:]))
555 pending.extend(reversed(e[1:]))
551 elif e[0] in ('string', 'symbol'):
556 elif e[0] in ('string', 'symbol'):
552 l.append(e[1])
557 l.append(e[1])
553 else:
558 else:
554 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
559 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
555 raise error.ParseError(msg)
560 raise error.ParseError(msg)
556 return ('string', ''.join(l))
561 return ('string', ''.join(l))
557 else:
562 else:
558 return tuple(foldconcat(t) for t in tree)
563 return tuple(foldconcat(t) for t in tree)
559
564
560 def parse(spec, lookup=None):
565 def parse(spec, lookup=None):
561 return _parsewith(spec, lookup=lookup)
566 return _parsewith(spec, lookup=lookup)
562
567
563 def _quote(s):
568 def _quote(s):
564 r"""Quote a value in order to make it safe for the revset engine.
569 r"""Quote a value in order to make it safe for the revset engine.
565
570
566 >>> _quote('asdf')
571 >>> _quote('asdf')
567 "'asdf'"
572 "'asdf'"
568 >>> _quote("asdf'\"")
573 >>> _quote("asdf'\"")
569 '\'asdf\\\'"\''
574 '\'asdf\\\'"\''
570 >>> _quote('asdf\'')
575 >>> _quote('asdf\'')
571 "'asdf\\''"
576 "'asdf\\''"
572 >>> _quote(1)
577 >>> _quote(1)
573 "'1'"
578 "'1'"
574 """
579 """
575 return "'%s'" % util.escapestr(pycompat.bytestr(s))
580 return "'%s'" % util.escapestr(pycompat.bytestr(s))
576
581
577 def formatspec(expr, *args):
582 def formatspec(expr, *args):
578 '''
583 '''
579 This is a convenience function for using revsets internally, and
584 This is a convenience function for using revsets internally, and
580 escapes arguments appropriately. Aliases are intentionally ignored
585 escapes arguments appropriately. Aliases are intentionally ignored
581 so that intended expression behavior isn't accidentally subverted.
586 so that intended expression behavior isn't accidentally subverted.
582
587
583 Supported arguments:
588 Supported arguments:
584
589
585 %r = revset expression, parenthesized
590 %r = revset expression, parenthesized
586 %d = int(arg), no quoting
591 %d = int(arg), no quoting
587 %s = string(arg), escaped and single-quoted
592 %s = string(arg), escaped and single-quoted
588 %b = arg.branch(), escaped and single-quoted
593 %b = arg.branch(), escaped and single-quoted
589 %n = hex(arg), single-quoted
594 %n = hex(arg), single-quoted
590 %% = a literal '%'
595 %% = a literal '%'
591
596
592 Prefixing the type with 'l' specifies a parenthesized list of that type.
597 Prefixing the type with 'l' specifies a parenthesized list of that type.
593
598
594 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
599 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
595 '(10 or 11):: and ((this()) or (that()))'
600 '(10 or 11):: and ((this()) or (that()))'
596 >>> formatspec('%d:: and not %d::', 10, 20)
601 >>> formatspec('%d:: and not %d::', 10, 20)
597 '10:: and not 20::'
602 '10:: and not 20::'
598 >>> formatspec('%ld or %ld', [], [1])
603 >>> formatspec('%ld or %ld', [], [1])
599 "_list('') or 1"
604 "_list('') or 1"
600 >>> formatspec('keyword(%s)', 'foo\\xe9')
605 >>> formatspec('keyword(%s)', 'foo\\xe9')
601 "keyword('foo\\\\xe9')"
606 "keyword('foo\\\\xe9')"
602 >>> b = lambda: 'default'
607 >>> b = lambda: 'default'
603 >>> b.branch = b
608 >>> b.branch = b
604 >>> formatspec('branch(%b)', b)
609 >>> formatspec('branch(%b)', b)
605 "branch('default')"
610 "branch('default')"
606 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
611 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
607 "root(_list('a\\x00b\\x00c\\x00d'))"
612 "root(_list('a\\x00b\\x00c\\x00d'))"
608 '''
613 '''
609
614
610 def argtype(c, arg):
615 def argtype(c, arg):
611 if c == 'd':
616 if c == 'd':
612 return '%d' % int(arg)
617 return '%d' % int(arg)
613 elif c == 's':
618 elif c == 's':
614 return _quote(arg)
619 return _quote(arg)
615 elif c == 'r':
620 elif c == 'r':
616 parse(arg) # make sure syntax errors are confined
621 parse(arg) # make sure syntax errors are confined
617 return '(%s)' % arg
622 return '(%s)' % arg
618 elif c == 'n':
623 elif c == 'n':
619 return _quote(node.hex(arg))
624 return _quote(node.hex(arg))
620 elif c == 'b':
625 elif c == 'b':
621 return _quote(arg.branch())
626 return _quote(arg.branch())
622
627
623 def listexp(s, t):
628 def listexp(s, t):
624 l = len(s)
629 l = len(s)
625 if l == 0:
630 if l == 0:
626 return "_list('')"
631 return "_list('')"
627 elif l == 1:
632 elif l == 1:
628 return argtype(t, s[0])
633 return argtype(t, s[0])
629 elif t == 'd':
634 elif t == 'd':
630 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
635 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
631 elif t == 's':
636 elif t == 's':
632 return "_list('%s')" % "\0".join(s)
637 return "_list('%s')" % "\0".join(s)
633 elif t == 'n':
638 elif t == 'n':
634 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
639 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
635 elif t == 'b':
640 elif t == 'b':
636 return "_list('%s')" % "\0".join(a.branch() for a in s)
641 return "_list('%s')" % "\0".join(a.branch() for a in s)
637
642
638 m = l // 2
643 m = l // 2
639 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
644 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
640
645
641 expr = pycompat.bytestr(expr)
646 expr = pycompat.bytestr(expr)
642 ret = ''
647 ret = ''
643 pos = 0
648 pos = 0
644 arg = 0
649 arg = 0
645 while pos < len(expr):
650 while pos < len(expr):
646 c = expr[pos]
651 c = expr[pos]
647 if c == '%':
652 if c == '%':
648 pos += 1
653 pos += 1
649 d = expr[pos]
654 d = expr[pos]
650 if d == '%':
655 if d == '%':
651 ret += d
656 ret += d
652 elif d in 'dsnbr':
657 elif d in 'dsnbr':
653 ret += argtype(d, args[arg])
658 ret += argtype(d, args[arg])
654 arg += 1
659 arg += 1
655 elif d == 'l':
660 elif d == 'l':
656 # a list of some type
661 # a list of some type
657 pos += 1
662 pos += 1
658 d = expr[pos]
663 d = expr[pos]
659 ret += listexp(list(args[arg]), d)
664 ret += listexp(list(args[arg]), d)
660 arg += 1
665 arg += 1
661 else:
666 else:
662 raise error.Abort(_('unexpected revspec format character %s')
667 raise error.Abort(_('unexpected revspec format character %s')
663 % d)
668 % d)
664 else:
669 else:
665 ret += c
670 ret += c
666 pos += 1
671 pos += 1
667
672
668 return ret
673 return ret
669
674
670 def prettyformat(tree):
675 def prettyformat(tree):
671 return parser.prettyformat(tree, ('string', 'symbol'))
676 return parser.prettyformat(tree, ('string', 'symbol'))
672
677
673 def depth(tree):
678 def depth(tree):
674 if isinstance(tree, tuple):
679 if isinstance(tree, tuple):
675 return max(map(depth, tree)) + 1
680 return max(map(depth, tree)) + 1
676 else:
681 else:
677 return 0
682 return 0
678
683
679 def funcsused(tree):
684 def funcsused(tree):
680 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
685 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
681 return set()
686 return set()
682 else:
687 else:
683 funcs = set()
688 funcs = set()
684 for s in tree[1:]:
689 for s in tree[1:]:
685 funcs |= funcsused(s)
690 funcs |= funcsused(s)
686 if tree[0] == 'func':
691 if tree[0] == 'func':
687 funcs.add(tree[1][1])
692 funcs.add(tree[1][1])
688 return funcs
693 return funcs
General Comments 0
You need to be logged in to leave comments. Login now