##// END OF EJS Templates
revsetlang: move quoting function to not be a closure...
Augie Fackler -
r31604:7eac6fcf default
parent child Browse files
Show More
@@ -1,690 +1,690 b''
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 )
18 )
19
19
20 elements = {
20 elements = {
21 # token-type: binding-strength, primary, prefix, infix, suffix
21 # token-type: binding-strength, primary, prefix, infix, suffix
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "##": (20, None, None, ("_concat", 20), None),
23 "##": (20, None, None, ("_concat", 20), None),
24 "~": (18, None, None, ("ancestor", 18), None),
24 "~": (18, None, None, ("ancestor", 18), None),
25 "^": (18, None, None, ("parent", 18), "parentpost"),
25 "^": (18, None, None, ("parent", 18), "parentpost"),
26 "-": (5, None, ("negate", 19), ("minus", 5), None),
26 "-": (5, None, ("negate", 19), ("minus", 5), None),
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
30 "not": (10, None, ("not", 10), None, None),
30 "not": (10, None, ("not", 10), None, None),
31 "!": (10, None, ("not", 10), None, None),
31 "!": (10, None, ("not", 10), None, None),
32 "and": (5, None, None, ("and", 5), None),
32 "and": (5, None, None, ("and", 5), None),
33 "&": (5, None, None, ("and", 5), None),
33 "&": (5, None, None, ("and", 5), None),
34 "%": (5, None, None, ("only", 5), "onlypost"),
34 "%": (5, None, None, ("only", 5), "onlypost"),
35 "or": (4, None, None, ("or", 4), None),
35 "or": (4, None, None, ("or", 4), None),
36 "|": (4, None, None, ("or", 4), None),
36 "|": (4, None, None, ("or", 4), None),
37 "+": (4, None, None, ("or", 4), None),
37 "+": (4, None, None, ("or", 4), None),
38 "=": (3, None, None, ("keyvalue", 3), None),
38 "=": (3, None, None, ("keyvalue", 3), None),
39 ",": (2, None, None, ("list", 2), None),
39 ",": (2, None, None, ("list", 2), None),
40 ")": (0, None, None, None, None),
40 ")": (0, None, None, None, None),
41 "symbol": (0, "symbol", None, None, None),
41 "symbol": (0, "symbol", None, None, None),
42 "string": (0, "string", None, None, None),
42 "string": (0, "string", None, None, None),
43 "end": (0, None, None, None, None),
43 "end": (0, None, None, None, None),
44 }
44 }
45
45
46 keywords = set(['and', 'or', 'not'])
46 keywords = set(['and', 'or', 'not'])
47
47
48 _quoteletters = set(['"', "'"])
48 _quoteletters = set(['"', "'"])
49 _simpleopletters = set(pycompat.iterbytestr("():=,-|&+!~^%"))
49 _simpleopletters = set(pycompat.iterbytestr("():=,-|&+!~^%"))
50
50
51 # default set of valid characters for the initial letter of symbols
51 # default set of valid characters for the initial letter of symbols
52 _syminitletters = set(pycompat.iterbytestr(
52 _syminitletters = set(pycompat.iterbytestr(
53 string.ascii_letters.encode('ascii') +
53 string.ascii_letters.encode('ascii') +
54 string.digits.encode('ascii') +
54 string.digits.encode('ascii') +
55 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
55 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
56
56
57 # default set of valid characters for non-initial letters of symbols
57 # default set of valid characters for non-initial letters of symbols
58 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
58 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
59
59
60 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
60 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
61 '''
61 '''
62 Parse a revset statement into a stream of tokens
62 Parse a revset statement into a stream of tokens
63
63
64 ``syminitletters`` is the set of valid characters for the initial
64 ``syminitletters`` is the set of valid characters for the initial
65 letter of symbols.
65 letter of symbols.
66
66
67 By default, character ``c`` is recognized as valid for initial
67 By default, character ``c`` is recognized as valid for initial
68 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
68 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
69
69
70 ``symletters`` is the set of valid characters for non-initial
70 ``symletters`` is the set of valid characters for non-initial
71 letters of symbols.
71 letters of symbols.
72
72
73 By default, character ``c`` is recognized as valid for non-initial
73 By default, character ``c`` is recognized as valid for non-initial
74 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
74 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
75
75
76 Check that @ is a valid unquoted token character (issue3686):
76 Check that @ is a valid unquoted token character (issue3686):
77 >>> list(tokenize("@::"))
77 >>> list(tokenize("@::"))
78 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
78 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
79
79
80 '''
80 '''
81 program = pycompat.bytestr(program)
81 program = pycompat.bytestr(program)
82 if syminitletters is None:
82 if syminitletters is None:
83 syminitletters = _syminitletters
83 syminitletters = _syminitletters
84 if symletters is None:
84 if symletters is None:
85 symletters = _symletters
85 symletters = _symletters
86
86
87 if program and lookup:
87 if program and lookup:
88 # attempt to parse old-style ranges first to deal with
88 # attempt to parse old-style ranges first to deal with
89 # things like old-tag which contain query metacharacters
89 # things like old-tag which contain query metacharacters
90 parts = program.split(':', 1)
90 parts = program.split(':', 1)
91 if all(lookup(sym) for sym in parts if sym):
91 if all(lookup(sym) for sym in parts if sym):
92 if parts[0]:
92 if parts[0]:
93 yield ('symbol', parts[0], 0)
93 yield ('symbol', parts[0], 0)
94 if len(parts) > 1:
94 if len(parts) > 1:
95 s = len(parts[0])
95 s = len(parts[0])
96 yield (':', None, s)
96 yield (':', None, s)
97 if parts[1]:
97 if parts[1]:
98 yield ('symbol', parts[1], s + 1)
98 yield ('symbol', parts[1], s + 1)
99 yield ('end', None, len(program))
99 yield ('end', None, len(program))
100 return
100 return
101
101
102 pos, l = 0, len(program)
102 pos, l = 0, len(program)
103 while pos < l:
103 while pos < l:
104 c = program[pos]
104 c = program[pos]
105 if c.isspace(): # skip inter-token whitespace
105 if c.isspace(): # skip inter-token whitespace
106 pass
106 pass
107 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
107 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
108 yield ('::', None, pos)
108 yield ('::', None, pos)
109 pos += 1 # skip ahead
109 pos += 1 # skip ahead
110 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
110 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
111 yield ('..', None, pos)
111 yield ('..', None, pos)
112 pos += 1 # skip ahead
112 pos += 1 # skip ahead
113 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
113 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
114 yield ('##', None, pos)
114 yield ('##', None, pos)
115 pos += 1 # skip ahead
115 pos += 1 # skip ahead
116 elif c in _simpleopletters: # handle simple operators
116 elif c in _simpleopletters: # handle simple operators
117 yield (c, None, pos)
117 yield (c, None, pos)
118 elif (c in _quoteletters or c == 'r' and
118 elif (c in _quoteletters or c == 'r' and
119 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
119 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
120 if c == 'r':
120 if c == 'r':
121 pos += 1
121 pos += 1
122 c = program[pos]
122 c = program[pos]
123 decode = lambda x: x
123 decode = lambda x: x
124 else:
124 else:
125 decode = parser.unescapestr
125 decode = parser.unescapestr
126 pos += 1
126 pos += 1
127 s = pos
127 s = pos
128 while pos < l: # find closing quote
128 while pos < l: # find closing quote
129 d = program[pos]
129 d = program[pos]
130 if d == '\\': # skip over escaped characters
130 if d == '\\': # skip over escaped characters
131 pos += 2
131 pos += 2
132 continue
132 continue
133 if d == c:
133 if d == c:
134 yield ('string', decode(program[s:pos]), s)
134 yield ('string', decode(program[s:pos]), s)
135 break
135 break
136 pos += 1
136 pos += 1
137 else:
137 else:
138 raise error.ParseError(_("unterminated string"), s)
138 raise error.ParseError(_("unterminated string"), s)
139 # gather up a symbol/keyword
139 # gather up a symbol/keyword
140 elif c in syminitletters:
140 elif c in syminitletters:
141 s = pos
141 s = pos
142 pos += 1
142 pos += 1
143 while pos < l: # find end of symbol
143 while pos < l: # find end of symbol
144 d = program[pos]
144 d = program[pos]
145 if d not in symletters:
145 if d not in symletters:
146 break
146 break
147 if d == '.' and program[pos - 1] == '.': # special case for ..
147 if d == '.' and program[pos - 1] == '.': # special case for ..
148 pos -= 1
148 pos -= 1
149 break
149 break
150 pos += 1
150 pos += 1
151 sym = program[s:pos]
151 sym = program[s:pos]
152 if sym in keywords: # operator keywords
152 if sym in keywords: # operator keywords
153 yield (sym, None, s)
153 yield (sym, None, s)
154 elif '-' in sym:
154 elif '-' in sym:
155 # some jerk gave us foo-bar-baz, try to check if it's a symbol
155 # some jerk gave us foo-bar-baz, try to check if it's a symbol
156 if lookup and lookup(sym):
156 if lookup and lookup(sym):
157 # looks like a real symbol
157 # looks like a real symbol
158 yield ('symbol', sym, s)
158 yield ('symbol', sym, s)
159 else:
159 else:
160 # looks like an expression
160 # looks like an expression
161 parts = sym.split('-')
161 parts = sym.split('-')
162 for p in parts[:-1]:
162 for p in parts[:-1]:
163 if p: # possible consecutive -
163 if p: # possible consecutive -
164 yield ('symbol', p, s)
164 yield ('symbol', p, s)
165 s += len(p)
165 s += len(p)
166 yield ('-', None, pos)
166 yield ('-', None, pos)
167 s += 1
167 s += 1
168 if parts[-1]: # possible trailing -
168 if parts[-1]: # possible trailing -
169 yield ('symbol', parts[-1], s)
169 yield ('symbol', parts[-1], s)
170 else:
170 else:
171 yield ('symbol', sym, s)
171 yield ('symbol', sym, s)
172 pos -= 1
172 pos -= 1
173 else:
173 else:
174 raise error.ParseError(_("syntax error in revset '%s'") %
174 raise error.ParseError(_("syntax error in revset '%s'") %
175 program, pos)
175 program, pos)
176 pos += 1
176 pos += 1
177 yield ('end', None, pos)
177 yield ('end', None, pos)
178
178
179 # helpers
179 # helpers
180
180
181 _notset = object()
181 _notset = object()
182
182
183 def getsymbol(x):
183 def getsymbol(x):
184 if x and x[0] == 'symbol':
184 if x and x[0] == 'symbol':
185 return x[1]
185 return x[1]
186 raise error.ParseError(_('not a symbol'))
186 raise error.ParseError(_('not a symbol'))
187
187
188 def getstring(x, err):
188 def getstring(x, err):
189 if x and (x[0] == 'string' or x[0] == 'symbol'):
189 if x and (x[0] == 'string' or x[0] == 'symbol'):
190 return x[1]
190 return x[1]
191 raise error.ParseError(err)
191 raise error.ParseError(err)
192
192
193 def getinteger(x, err, default=_notset):
193 def getinteger(x, err, default=_notset):
194 if not x and default is not _notset:
194 if not x and default is not _notset:
195 return default
195 return default
196 try:
196 try:
197 return int(getstring(x, err))
197 return int(getstring(x, err))
198 except ValueError:
198 except ValueError:
199 raise error.ParseError(err)
199 raise error.ParseError(err)
200
200
201 def getlist(x):
201 def getlist(x):
202 if not x:
202 if not x:
203 return []
203 return []
204 if x[0] == 'list':
204 if x[0] == 'list':
205 return list(x[1:])
205 return list(x[1:])
206 return [x]
206 return [x]
207
207
208 def getrange(x, err):
208 def getrange(x, err):
209 if not x:
209 if not x:
210 raise error.ParseError(err)
210 raise error.ParseError(err)
211 op = x[0]
211 op = x[0]
212 if op == 'range':
212 if op == 'range':
213 return x[1], x[2]
213 return x[1], x[2]
214 elif op == 'rangepre':
214 elif op == 'rangepre':
215 return None, x[1]
215 return None, x[1]
216 elif op == 'rangepost':
216 elif op == 'rangepost':
217 return x[1], None
217 return x[1], None
218 elif op == 'rangeall':
218 elif op == 'rangeall':
219 return None, None
219 return None, None
220 raise error.ParseError(err)
220 raise error.ParseError(err)
221
221
222 def getargs(x, min, max, err):
222 def getargs(x, min, max, err):
223 l = getlist(x)
223 l = getlist(x)
224 if len(l) < min or (max >= 0 and len(l) > max):
224 if len(l) < min or (max >= 0 and len(l) > max):
225 raise error.ParseError(err)
225 raise error.ParseError(err)
226 return l
226 return l
227
227
228 def getargsdict(x, funcname, keys):
228 def getargsdict(x, funcname, keys):
229 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
229 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
230 keyvaluenode='keyvalue', keynode='symbol')
230 keyvaluenode='keyvalue', keynode='symbol')
231
231
232 # Constants for ordering requirement, used in _analyze():
232 # Constants for ordering requirement, used in _analyze():
233 #
233 #
234 # If 'define', any nested functions and operations can change the ordering of
234 # If 'define', any nested functions and operations can change the ordering of
235 # the entries in the set. If 'follow', any nested functions and operations
235 # the entries in the set. If 'follow', any nested functions and operations
236 # should take the ordering specified by the first operand to the '&' operator.
236 # should take the ordering specified by the first operand to the '&' operator.
237 #
237 #
238 # For instance,
238 # For instance,
239 #
239 #
240 # X & (Y | Z)
240 # X & (Y | Z)
241 # ^ ^^^^^^^
241 # ^ ^^^^^^^
242 # | follow
242 # | follow
243 # define
243 # define
244 #
244 #
245 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
245 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
246 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
246 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
247 #
247 #
248 # 'any' means the order doesn't matter. For instance,
248 # 'any' means the order doesn't matter. For instance,
249 #
249 #
250 # X & !Y
250 # X & !Y
251 # ^
251 # ^
252 # any
252 # any
253 #
253 #
254 # 'y()' can either enforce its ordering requirement or take the ordering
254 # 'y()' can either enforce its ordering requirement or take the ordering
255 # specified by 'x()' because 'not()' doesn't care the order.
255 # specified by 'x()' because 'not()' doesn't care the order.
256 #
256 #
257 # Transition of ordering requirement:
257 # Transition of ordering requirement:
258 #
258 #
259 # 1. starts with 'define'
259 # 1. starts with 'define'
260 # 2. shifts to 'follow' by 'x & y'
260 # 2. shifts to 'follow' by 'x & y'
261 # 3. changes back to 'define' on function call 'f(x)' or function-like
261 # 3. changes back to 'define' on function call 'f(x)' or function-like
262 # operation 'x (f) y' because 'f' may have its own ordering requirement
262 # operation 'x (f) y' because 'f' may have its own ordering requirement
263 # for 'x' and 'y' (e.g. 'first(x)')
263 # for 'x' and 'y' (e.g. 'first(x)')
264 #
264 #
265 anyorder = 'any' # don't care the order
265 anyorder = 'any' # don't care the order
266 defineorder = 'define' # should define the order
266 defineorder = 'define' # should define the order
267 followorder = 'follow' # must follow the current order
267 followorder = 'follow' # must follow the current order
268
268
269 # transition table for 'x & y', from the current expression 'x' to 'y'
269 # transition table for 'x & y', from the current expression 'x' to 'y'
270 _tofolloworder = {
270 _tofolloworder = {
271 anyorder: anyorder,
271 anyorder: anyorder,
272 defineorder: followorder,
272 defineorder: followorder,
273 followorder: followorder,
273 followorder: followorder,
274 }
274 }
275
275
276 def _matchonly(revs, bases):
276 def _matchonly(revs, bases):
277 """
277 """
278 >>> f = lambda *args: _matchonly(*map(parse, args))
278 >>> f = lambda *args: _matchonly(*map(parse, args))
279 >>> f('ancestors(A)', 'not ancestors(B)')
279 >>> f('ancestors(A)', 'not ancestors(B)')
280 ('list', ('symbol', 'A'), ('symbol', 'B'))
280 ('list', ('symbol', 'A'), ('symbol', 'B'))
281 """
281 """
282 if (revs is not None
282 if (revs is not None
283 and revs[0] == 'func'
283 and revs[0] == 'func'
284 and getsymbol(revs[1]) == 'ancestors'
284 and getsymbol(revs[1]) == 'ancestors'
285 and bases is not None
285 and bases is not None
286 and bases[0] == 'not'
286 and bases[0] == 'not'
287 and bases[1][0] == 'func'
287 and bases[1][0] == 'func'
288 and getsymbol(bases[1][1]) == 'ancestors'):
288 and getsymbol(bases[1][1]) == 'ancestors'):
289 return ('list', revs[2], bases[1][2])
289 return ('list', revs[2], bases[1][2])
290
290
291 def _fixops(x):
291 def _fixops(x):
292 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
292 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
293 handled well by our simple top-down parser"""
293 handled well by our simple top-down parser"""
294 if not isinstance(x, tuple):
294 if not isinstance(x, tuple):
295 return x
295 return x
296
296
297 op = x[0]
297 op = x[0]
298 if op == 'parent':
298 if op == 'parent':
299 # x^:y means (x^) : y, not x ^ (:y)
299 # x^:y means (x^) : y, not x ^ (:y)
300 # x^: means (x^) :, not x ^ (:)
300 # x^: means (x^) :, not x ^ (:)
301 post = ('parentpost', x[1])
301 post = ('parentpost', x[1])
302 if x[2][0] == 'dagrangepre':
302 if x[2][0] == 'dagrangepre':
303 return _fixops(('dagrange', post, x[2][1]))
303 return _fixops(('dagrange', post, x[2][1]))
304 elif x[2][0] == 'rangepre':
304 elif x[2][0] == 'rangepre':
305 return _fixops(('range', post, x[2][1]))
305 return _fixops(('range', post, x[2][1]))
306 elif x[2][0] == 'rangeall':
306 elif x[2][0] == 'rangeall':
307 return _fixops(('rangepost', post))
307 return _fixops(('rangepost', post))
308 elif op == 'or':
308 elif op == 'or':
309 # make number of arguments deterministic:
309 # make number of arguments deterministic:
310 # x + y + z -> (or x y z) -> (or (list x y z))
310 # x + y + z -> (or x y z) -> (or (list x y z))
311 return (op, _fixops(('list',) + x[1:]))
311 return (op, _fixops(('list',) + x[1:]))
312
312
313 return (op,) + tuple(_fixops(y) for y in x[1:])
313 return (op,) + tuple(_fixops(y) for y in x[1:])
314
314
315 def _analyze(x, order):
315 def _analyze(x, order):
316 if x is None:
316 if x is None:
317 return x
317 return x
318
318
319 op = x[0]
319 op = x[0]
320 if op == 'minus':
320 if op == 'minus':
321 return _analyze(('and', x[1], ('not', x[2])), order)
321 return _analyze(('and', x[1], ('not', x[2])), order)
322 elif op == 'only':
322 elif op == 'only':
323 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
323 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
324 return _analyze(t, order)
324 return _analyze(t, order)
325 elif op == 'onlypost':
325 elif op == 'onlypost':
326 return _analyze(('func', ('symbol', 'only'), x[1]), order)
326 return _analyze(('func', ('symbol', 'only'), x[1]), order)
327 elif op == 'dagrangepre':
327 elif op == 'dagrangepre':
328 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
328 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
329 elif op == 'dagrangepost':
329 elif op == 'dagrangepost':
330 return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
330 return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
331 elif op == 'negate':
331 elif op == 'negate':
332 s = getstring(x[1], _("can't negate that"))
332 s = getstring(x[1], _("can't negate that"))
333 return _analyze(('string', '-' + s), order)
333 return _analyze(('string', '-' + s), order)
334 elif op in ('string', 'symbol'):
334 elif op in ('string', 'symbol'):
335 return x
335 return x
336 elif op == 'and':
336 elif op == 'and':
337 ta = _analyze(x[1], order)
337 ta = _analyze(x[1], order)
338 tb = _analyze(x[2], _tofolloworder[order])
338 tb = _analyze(x[2], _tofolloworder[order])
339 return (op, ta, tb, order)
339 return (op, ta, tb, order)
340 elif op == 'or':
340 elif op == 'or':
341 return (op, _analyze(x[1], order), order)
341 return (op, _analyze(x[1], order), order)
342 elif op == 'not':
342 elif op == 'not':
343 return (op, _analyze(x[1], anyorder), order)
343 return (op, _analyze(x[1], anyorder), order)
344 elif op == 'rangeall':
344 elif op == 'rangeall':
345 return (op, None, order)
345 return (op, None, order)
346 elif op in ('rangepre', 'rangepost', 'parentpost'):
346 elif op in ('rangepre', 'rangepost', 'parentpost'):
347 return (op, _analyze(x[1], defineorder), order)
347 return (op, _analyze(x[1], defineorder), order)
348 elif op == 'group':
348 elif op == 'group':
349 return _analyze(x[1], order)
349 return _analyze(x[1], order)
350 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
350 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
351 ta = _analyze(x[1], defineorder)
351 ta = _analyze(x[1], defineorder)
352 tb = _analyze(x[2], defineorder)
352 tb = _analyze(x[2], defineorder)
353 return (op, ta, tb, order)
353 return (op, ta, tb, order)
354 elif op == 'list':
354 elif op == 'list':
355 return (op,) + tuple(_analyze(y, order) for y in x[1:])
355 return (op,) + tuple(_analyze(y, order) for y in x[1:])
356 elif op == 'keyvalue':
356 elif op == 'keyvalue':
357 return (op, x[1], _analyze(x[2], order))
357 return (op, x[1], _analyze(x[2], order))
358 elif op == 'func':
358 elif op == 'func':
359 f = getsymbol(x[1])
359 f = getsymbol(x[1])
360 d = defineorder
360 d = defineorder
361 if f == 'present':
361 if f == 'present':
362 # 'present(set)' is known to return the argument set with no
362 # 'present(set)' is known to return the argument set with no
363 # modification, so forward the current order to its argument
363 # modification, so forward the current order to its argument
364 d = order
364 d = order
365 return (op, x[1], _analyze(x[2], d), order)
365 return (op, x[1], _analyze(x[2], d), order)
366 raise ValueError('invalid operator %r' % op)
366 raise ValueError('invalid operator %r' % op)
367
367
368 def analyze(x, order=defineorder):
368 def analyze(x, order=defineorder):
369 """Transform raw parsed tree to evaluatable tree which can be fed to
369 """Transform raw parsed tree to evaluatable tree which can be fed to
370 optimize() or getset()
370 optimize() or getset()
371
371
372 All pseudo operations should be mapped to real operations or functions
372 All pseudo operations should be mapped to real operations or functions
373 defined in methods or symbols table respectively.
373 defined in methods or symbols table respectively.
374
374
375 'order' specifies how the current expression 'x' is ordered (see the
375 'order' specifies how the current expression 'x' is ordered (see the
376 constants defined above.)
376 constants defined above.)
377 """
377 """
378 return _analyze(x, order)
378 return _analyze(x, order)
379
379
380 def _optimize(x, small):
380 def _optimize(x, small):
381 if x is None:
381 if x is None:
382 return 0, x
382 return 0, x
383
383
384 smallbonus = 1
384 smallbonus = 1
385 if small:
385 if small:
386 smallbonus = .5
386 smallbonus = .5
387
387
388 op = x[0]
388 op = x[0]
389 if op in ('string', 'symbol'):
389 if op in ('string', 'symbol'):
390 return smallbonus, x # single revisions are small
390 return smallbonus, x # single revisions are small
391 elif op == 'and':
391 elif op == 'and':
392 wa, ta = _optimize(x[1], True)
392 wa, ta = _optimize(x[1], True)
393 wb, tb = _optimize(x[2], True)
393 wb, tb = _optimize(x[2], True)
394 order = x[3]
394 order = x[3]
395 w = min(wa, wb)
395 w = min(wa, wb)
396
396
397 # (::x and not ::y)/(not ::y and ::x) have a fast path
397 # (::x and not ::y)/(not ::y and ::x) have a fast path
398 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
398 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
399 if tm:
399 if tm:
400 return w, ('func', ('symbol', 'only'), tm, order)
400 return w, ('func', ('symbol', 'only'), tm, order)
401
401
402 if tb is not None and tb[0] == 'not':
402 if tb is not None and tb[0] == 'not':
403 return wa, ('difference', ta, tb[1], order)
403 return wa, ('difference', ta, tb[1], order)
404
404
405 if wa > wb:
405 if wa > wb:
406 return w, (op, tb, ta, order)
406 return w, (op, tb, ta, order)
407 return w, (op, ta, tb, order)
407 return w, (op, ta, tb, order)
408 elif op == 'or':
408 elif op == 'or':
409 # fast path for machine-generated expression, that is likely to have
409 # fast path for machine-generated expression, that is likely to have
410 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
410 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
411 order = x[2]
411 order = x[2]
412 ws, ts, ss = [], [], []
412 ws, ts, ss = [], [], []
413 def flushss():
413 def flushss():
414 if not ss:
414 if not ss:
415 return
415 return
416 if len(ss) == 1:
416 if len(ss) == 1:
417 w, t = ss[0]
417 w, t = ss[0]
418 else:
418 else:
419 s = '\0'.join(t[1] for w, t in ss)
419 s = '\0'.join(t[1] for w, t in ss)
420 y = ('func', ('symbol', '_list'), ('string', s), order)
420 y = ('func', ('symbol', '_list'), ('string', s), order)
421 w, t = _optimize(y, False)
421 w, t = _optimize(y, False)
422 ws.append(w)
422 ws.append(w)
423 ts.append(t)
423 ts.append(t)
424 del ss[:]
424 del ss[:]
425 for y in getlist(x[1]):
425 for y in getlist(x[1]):
426 w, t = _optimize(y, False)
426 w, t = _optimize(y, False)
427 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
427 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
428 ss.append((w, t))
428 ss.append((w, t))
429 continue
429 continue
430 flushss()
430 flushss()
431 ws.append(w)
431 ws.append(w)
432 ts.append(t)
432 ts.append(t)
433 flushss()
433 flushss()
434 if len(ts) == 1:
434 if len(ts) == 1:
435 return ws[0], ts[0] # 'or' operation is fully optimized out
435 return ws[0], ts[0] # 'or' operation is fully optimized out
436 # we can't reorder trees by weight because it would change the order.
436 # we can't reorder trees by weight because it would change the order.
437 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
437 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
438 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
438 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
439 return max(ws), (op, ('list',) + tuple(ts), order)
439 return max(ws), (op, ('list',) + tuple(ts), order)
440 elif op == 'not':
440 elif op == 'not':
441 # Optimize not public() to _notpublic() because we have a fast version
441 # Optimize not public() to _notpublic() because we have a fast version
442 if x[1][:3] == ('func', ('symbol', 'public'), None):
442 if x[1][:3] == ('func', ('symbol', 'public'), None):
443 order = x[1][3]
443 order = x[1][3]
444 newsym = ('func', ('symbol', '_notpublic'), None, order)
444 newsym = ('func', ('symbol', '_notpublic'), None, order)
445 o = _optimize(newsym, not small)
445 o = _optimize(newsym, not small)
446 return o[0], o[1]
446 return o[0], o[1]
447 else:
447 else:
448 o = _optimize(x[1], not small)
448 o = _optimize(x[1], not small)
449 order = x[2]
449 order = x[2]
450 return o[0], (op, o[1], order)
450 return o[0], (op, o[1], order)
451 elif op == 'rangeall':
451 elif op == 'rangeall':
452 return smallbonus, x
452 return smallbonus, x
453 elif op in ('rangepre', 'rangepost', 'parentpost'):
453 elif op in ('rangepre', 'rangepost', 'parentpost'):
454 o = _optimize(x[1], small)
454 o = _optimize(x[1], small)
455 order = x[2]
455 order = x[2]
456 return o[0], (op, o[1], order)
456 return o[0], (op, o[1], order)
457 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
457 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
458 wa, ta = _optimize(x[1], small)
458 wa, ta = _optimize(x[1], small)
459 wb, tb = _optimize(x[2], small)
459 wb, tb = _optimize(x[2], small)
460 order = x[3]
460 order = x[3]
461 return wa + wb, (op, ta, tb, order)
461 return wa + wb, (op, ta, tb, order)
462 elif op == 'list':
462 elif op == 'list':
463 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
463 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
464 return sum(ws), (op,) + ts
464 return sum(ws), (op,) + ts
465 elif op == 'keyvalue':
465 elif op == 'keyvalue':
466 w, t = _optimize(x[2], small)
466 w, t = _optimize(x[2], small)
467 return w, (op, x[1], t)
467 return w, (op, x[1], t)
468 elif op == 'func':
468 elif op == 'func':
469 f = getsymbol(x[1])
469 f = getsymbol(x[1])
470 wa, ta = _optimize(x[2], small)
470 wa, ta = _optimize(x[2], small)
471 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
471 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
472 'keyword', 'outgoing', 'user', 'destination'):
472 'keyword', 'outgoing', 'user', 'destination'):
473 w = 10 # slow
473 w = 10 # slow
474 elif f in ('modifies', 'adds', 'removes'):
474 elif f in ('modifies', 'adds', 'removes'):
475 w = 30 # slower
475 w = 30 # slower
476 elif f == "contains":
476 elif f == "contains":
477 w = 100 # very slow
477 w = 100 # very slow
478 elif f == "ancestor":
478 elif f == "ancestor":
479 w = 1 * smallbonus
479 w = 1 * smallbonus
480 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
480 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
481 w = 0
481 w = 0
482 elif f == "sort":
482 elif f == "sort":
483 w = 10 # assume most sorts look at changelog
483 w = 10 # assume most sorts look at changelog
484 else:
484 else:
485 w = 1
485 w = 1
486 order = x[3]
486 order = x[3]
487 return w + wa, (op, x[1], ta, order)
487 return w + wa, (op, x[1], ta, order)
488 raise ValueError('invalid operator %r' % op)
488 raise ValueError('invalid operator %r' % op)
489
489
490 def optimize(tree):
490 def optimize(tree):
491 """Optimize evaluatable tree
491 """Optimize evaluatable tree
492
492
493 All pseudo operations should be transformed beforehand.
493 All pseudo operations should be transformed beforehand.
494 """
494 """
495 _weight, newtree = _optimize(tree, small=True)
495 _weight, newtree = _optimize(tree, small=True)
496 return newtree
496 return newtree
497
497
498 # the set of valid characters for the initial letter of symbols in
498 # the set of valid characters for the initial letter of symbols in
499 # alias declarations and definitions
499 # alias declarations and definitions
500 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
500 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
501
501
502 def _parsewith(spec, lookup=None, syminitletters=None):
502 def _parsewith(spec, lookup=None, syminitletters=None):
503 """Generate a parse tree of given spec with given tokenizing options
503 """Generate a parse tree of given spec with given tokenizing options
504
504
505 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
505 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
506 ('func', ('symbol', 'foo'), ('symbol', '$1'))
506 ('func', ('symbol', 'foo'), ('symbol', '$1'))
507 >>> _parsewith('$1')
507 >>> _parsewith('$1')
508 Traceback (most recent call last):
508 Traceback (most recent call last):
509 ...
509 ...
510 ParseError: ("syntax error in revset '$1'", 0)
510 ParseError: ("syntax error in revset '$1'", 0)
511 >>> _parsewith('foo bar')
511 >>> _parsewith('foo bar')
512 Traceback (most recent call last):
512 Traceback (most recent call last):
513 ...
513 ...
514 ParseError: ('invalid token', 4)
514 ParseError: ('invalid token', 4)
515 """
515 """
516 p = parser.parser(elements)
516 p = parser.parser(elements)
517 tree, pos = p.parse(tokenize(spec, lookup=lookup,
517 tree, pos = p.parse(tokenize(spec, lookup=lookup,
518 syminitletters=syminitletters))
518 syminitletters=syminitletters))
519 if pos != len(spec):
519 if pos != len(spec):
520 raise error.ParseError(_('invalid token'), pos)
520 raise error.ParseError(_('invalid token'), pos)
521 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
521 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
522
522
523 class _aliasrules(parser.basealiasrules):
523 class _aliasrules(parser.basealiasrules):
524 """Parsing and expansion rule set of revset aliases"""
524 """Parsing and expansion rule set of revset aliases"""
525 _section = _('revset alias')
525 _section = _('revset alias')
526
526
527 @staticmethod
527 @staticmethod
528 def _parse(spec):
528 def _parse(spec):
529 """Parse alias declaration/definition ``spec``
529 """Parse alias declaration/definition ``spec``
530
530
531 This allows symbol names to use also ``$`` as an initial letter
531 This allows symbol names to use also ``$`` as an initial letter
532 (for backward compatibility), and callers of this function should
532 (for backward compatibility), and callers of this function should
533 examine whether ``$`` is used also for unexpected symbols or not.
533 examine whether ``$`` is used also for unexpected symbols or not.
534 """
534 """
535 return _parsewith(spec, syminitletters=_aliassyminitletters)
535 return _parsewith(spec, syminitletters=_aliassyminitletters)
536
536
537 @staticmethod
537 @staticmethod
538 def _trygetfunc(tree):
538 def _trygetfunc(tree):
539 if tree[0] == 'func' and tree[1][0] == 'symbol':
539 if tree[0] == 'func' and tree[1][0] == 'symbol':
540 return tree[1][1], getlist(tree[2])
540 return tree[1][1], getlist(tree[2])
541
541
542 def expandaliases(ui, tree):
542 def expandaliases(ui, tree):
543 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
543 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
544 tree = _aliasrules.expand(aliases, tree)
544 tree = _aliasrules.expand(aliases, tree)
545 # warn about problematic (but not referred) aliases
545 # warn about problematic (but not referred) aliases
546 for name, alias in sorted(aliases.iteritems()):
546 for name, alias in sorted(aliases.iteritems()):
547 if alias.error and not alias.warned:
547 if alias.error and not alias.warned:
548 ui.warn(_('warning: %s\n') % (alias.error))
548 ui.warn(_('warning: %s\n') % (alias.error))
549 alias.warned = True
549 alias.warned = True
550 return tree
550 return tree
551
551
552 def foldconcat(tree):
552 def foldconcat(tree):
553 """Fold elements to be concatenated by `##`
553 """Fold elements to be concatenated by `##`
554 """
554 """
555 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
555 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
556 return tree
556 return tree
557 if tree[0] == '_concat':
557 if tree[0] == '_concat':
558 pending = [tree]
558 pending = [tree]
559 l = []
559 l = []
560 while pending:
560 while pending:
561 e = pending.pop()
561 e = pending.pop()
562 if e[0] == '_concat':
562 if e[0] == '_concat':
563 pending.extend(reversed(e[1:]))
563 pending.extend(reversed(e[1:]))
564 elif e[0] in ('string', 'symbol'):
564 elif e[0] in ('string', 'symbol'):
565 l.append(e[1])
565 l.append(e[1])
566 else:
566 else:
567 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
567 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
568 raise error.ParseError(msg)
568 raise error.ParseError(msg)
569 return ('string', ''.join(l))
569 return ('string', ''.join(l))
570 else:
570 else:
571 return tuple(foldconcat(t) for t in tree)
571 return tuple(foldconcat(t) for t in tree)
572
572
573 def parse(spec, lookup=None):
573 def parse(spec, lookup=None):
574 return _parsewith(spec, lookup=lookup)
574 return _parsewith(spec, lookup=lookup)
575
575
576 def _quote(s):
577 return repr(str(s))
578
576 def formatspec(expr, *args):
579 def formatspec(expr, *args):
577 '''
580 '''
578 This is a convenience function for using revsets internally, and
581 This is a convenience function for using revsets internally, and
579 escapes arguments appropriately. Aliases are intentionally ignored
582 escapes arguments appropriately. Aliases are intentionally ignored
580 so that intended expression behavior isn't accidentally subverted.
583 so that intended expression behavior isn't accidentally subverted.
581
584
582 Supported arguments:
585 Supported arguments:
583
586
584 %r = revset expression, parenthesized
587 %r = revset expression, parenthesized
585 %d = int(arg), no quoting
588 %d = int(arg), no quoting
586 %s = string(arg), escaped and single-quoted
589 %s = string(arg), escaped and single-quoted
587 %b = arg.branch(), escaped and single-quoted
590 %b = arg.branch(), escaped and single-quoted
588 %n = hex(arg), single-quoted
591 %n = hex(arg), single-quoted
589 %% = a literal '%'
592 %% = a literal '%'
590
593
591 Prefixing the type with 'l' specifies a parenthesized list of that type.
594 Prefixing the type with 'l' specifies a parenthesized list of that type.
592
595
593 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
596 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
594 '(10 or 11):: and ((this()) or (that()))'
597 '(10 or 11):: and ((this()) or (that()))'
595 >>> formatspec('%d:: and not %d::', 10, 20)
598 >>> formatspec('%d:: and not %d::', 10, 20)
596 '10:: and not 20::'
599 '10:: and not 20::'
597 >>> formatspec('%ld or %ld', [], [1])
600 >>> formatspec('%ld or %ld', [], [1])
598 "_list('') or 1"
601 "_list('') or 1"
599 >>> formatspec('keyword(%s)', 'foo\\xe9')
602 >>> formatspec('keyword(%s)', 'foo\\xe9')
600 "keyword('foo\\\\xe9')"
603 "keyword('foo\\\\xe9')"
601 >>> b = lambda: 'default'
604 >>> b = lambda: 'default'
602 >>> b.branch = b
605 >>> b.branch = b
603 >>> formatspec('branch(%b)', b)
606 >>> formatspec('branch(%b)', b)
604 "branch('default')"
607 "branch('default')"
605 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
608 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
606 "root(_list('a\\x00b\\x00c\\x00d'))"
609 "root(_list('a\\x00b\\x00c\\x00d'))"
607 '''
610 '''
608
611
609 def quote(s):
610 return repr(str(s))
611
612 def argtype(c, arg):
612 def argtype(c, arg):
613 if c == 'd':
613 if c == 'd':
614 return '%d' % int(arg)
614 return '%d' % int(arg)
615 elif c == 's':
615 elif c == 's':
616 return quote(arg)
616 return _quote(arg)
617 elif c == 'r':
617 elif c == 'r':
618 parse(arg) # make sure syntax errors are confined
618 parse(arg) # make sure syntax errors are confined
619 return '(%s)' % arg
619 return '(%s)' % arg
620 elif c == 'n':
620 elif c == 'n':
621 return quote(node.hex(arg))
621 return _quote(node.hex(arg))
622 elif c == 'b':
622 elif c == 'b':
623 return quote(arg.branch())
623 return _quote(arg.branch())
624
624
625 def listexp(s, t):
625 def listexp(s, t):
626 l = len(s)
626 l = len(s)
627 if l == 0:
627 if l == 0:
628 return "_list('')"
628 return "_list('')"
629 elif l == 1:
629 elif l == 1:
630 return argtype(t, s[0])
630 return argtype(t, s[0])
631 elif t == 'd':
631 elif t == 'd':
632 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
632 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
633 elif t == 's':
633 elif t == 's':
634 return "_list('%s')" % "\0".join(s)
634 return "_list('%s')" % "\0".join(s)
635 elif t == 'n':
635 elif t == 'n':
636 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
636 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
637 elif t == 'b':
637 elif t == 'b':
638 return "_list('%s')" % "\0".join(a.branch() for a in s)
638 return "_list('%s')" % "\0".join(a.branch() for a in s)
639
639
640 m = l // 2
640 m = l // 2
641 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
641 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
642
642
643 expr = pycompat.bytestr(expr)
643 expr = pycompat.bytestr(expr)
644 ret = ''
644 ret = ''
645 pos = 0
645 pos = 0
646 arg = 0
646 arg = 0
647 while pos < len(expr):
647 while pos < len(expr):
648 c = expr[pos]
648 c = expr[pos]
649 if c == '%':
649 if c == '%':
650 pos += 1
650 pos += 1
651 d = expr[pos]
651 d = expr[pos]
652 if d == '%':
652 if d == '%':
653 ret += d
653 ret += d
654 elif d in 'dsnbr':
654 elif d in 'dsnbr':
655 ret += argtype(d, args[arg])
655 ret += argtype(d, args[arg])
656 arg += 1
656 arg += 1
657 elif d == 'l':
657 elif d == 'l':
658 # a list of some type
658 # a list of some type
659 pos += 1
659 pos += 1
660 d = expr[pos]
660 d = expr[pos]
661 ret += listexp(list(args[arg]), d)
661 ret += listexp(list(args[arg]), d)
662 arg += 1
662 arg += 1
663 else:
663 else:
664 raise error.Abort(_('unexpected revspec format character %s')
664 raise error.Abort(_('unexpected revspec format character %s')
665 % d)
665 % d)
666 else:
666 else:
667 ret += c
667 ret += c
668 pos += 1
668 pos += 1
669
669
670 return ret
670 return ret
671
671
672 def prettyformat(tree):
672 def prettyformat(tree):
673 return parser.prettyformat(tree, ('string', 'symbol'))
673 return parser.prettyformat(tree, ('string', 'symbol'))
674
674
675 def depth(tree):
675 def depth(tree):
676 if isinstance(tree, tuple):
676 if isinstance(tree, tuple):
677 return max(map(depth, tree)) + 1
677 return max(map(depth, tree)) + 1
678 else:
678 else:
679 return 0
679 return 0
680
680
681 def funcsused(tree):
681 def funcsused(tree):
682 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
682 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
683 return set()
683 return set()
684 else:
684 else:
685 funcs = set()
685 funcs = set()
686 for s in tree[1:]:
686 for s in tree[1:]:
687 funcs |= funcsused(s)
687 funcs |= funcsused(s)
688 if tree[0] == 'func':
688 if tree[0] == 'func':
689 funcs.add(tree[1][1])
689 funcs.add(tree[1][1])
690 return funcs
690 return funcs
General Comments 0
You need to be logged in to leave comments. Login now