##// END OF EJS Templates
revsetlang: slice out single bytes instead of indexing...
Augie Fackler -
r31355:77270ec0 default
parent child Browse files
Show More
@@ -1,684 +1,685
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 )
18 )
19
19
20 elements = {
20 elements = {
21 # token-type: binding-strength, primary, prefix, infix, suffix
21 # token-type: binding-strength, primary, prefix, infix, suffix
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "##": (20, None, None, ("_concat", 20), None),
23 "##": (20, None, None, ("_concat", 20), None),
24 "~": (18, None, None, ("ancestor", 18), None),
24 "~": (18, None, None, ("ancestor", 18), None),
25 "^": (18, None, None, ("parent", 18), "parentpost"),
25 "^": (18, None, None, ("parent", 18), "parentpost"),
26 "-": (5, None, ("negate", 19), ("minus", 5), None),
26 "-": (5, None, ("negate", 19), ("minus", 5), None),
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
30 "not": (10, None, ("not", 10), None, None),
30 "not": (10, None, ("not", 10), None, None),
31 "!": (10, None, ("not", 10), None, None),
31 "!": (10, None, ("not", 10), None, None),
32 "and": (5, None, None, ("and", 5), None),
32 "and": (5, None, None, ("and", 5), None),
33 "&": (5, None, None, ("and", 5), None),
33 "&": (5, None, None, ("and", 5), None),
34 "%": (5, None, None, ("only", 5), "onlypost"),
34 "%": (5, None, None, ("only", 5), "onlypost"),
35 "or": (4, None, None, ("or", 4), None),
35 "or": (4, None, None, ("or", 4), None),
36 "|": (4, None, None, ("or", 4), None),
36 "|": (4, None, None, ("or", 4), None),
37 "+": (4, None, None, ("or", 4), None),
37 "+": (4, None, None, ("or", 4), None),
38 "=": (3, None, None, ("keyvalue", 3), None),
38 "=": (3, None, None, ("keyvalue", 3), None),
39 ",": (2, None, None, ("list", 2), None),
39 ",": (2, None, None, ("list", 2), None),
40 ")": (0, None, None, None, None),
40 ")": (0, None, None, None, None),
41 "symbol": (0, "symbol", None, None, None),
41 "symbol": (0, "symbol", None, None, None),
42 "string": (0, "string", None, None, None),
42 "string": (0, "string", None, None, None),
43 "end": (0, None, None, None, None),
43 "end": (0, None, None, None, None),
44 }
44 }
45
45
46 keywords = set(['and', 'or', 'not'])
46 keywords = set(['and', 'or', 'not'])
47
47
48 # default set of valid characters for the initial letter of symbols
48 # default set of valid characters for the initial letter of symbols
49 _syminitletters = set(
49 _syminitletters = set(
50 string.ascii_letters +
50 string.ascii_letters +
51 string.digits + pycompat.sysstr('._@')) | set(map(chr, xrange(128, 256)))
51 string.digits + pycompat.sysstr('._@')) | set(map(chr, xrange(128, 256)))
52
52
53 # default set of valid characters for non-initial letters of symbols
53 # default set of valid characters for non-initial letters of symbols
54 _symletters = _syminitletters | set(pycompat.sysstr('-/'))
54 _symletters = _syminitletters | set(pycompat.sysstr('-/'))
55
55
56 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
56 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
57 '''
57 '''
58 Parse a revset statement into a stream of tokens
58 Parse a revset statement into a stream of tokens
59
59
60 ``syminitletters`` is the set of valid characters for the initial
60 ``syminitletters`` is the set of valid characters for the initial
61 letter of symbols.
61 letter of symbols.
62
62
63 By default, character ``c`` is recognized as valid for initial
63 By default, character ``c`` is recognized as valid for initial
64 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
64 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
65
65
66 ``symletters`` is the set of valid characters for non-initial
66 ``symletters`` is the set of valid characters for non-initial
67 letters of symbols.
67 letters of symbols.
68
68
69 By default, character ``c`` is recognized as valid for non-initial
69 By default, character ``c`` is recognized as valid for non-initial
70 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
70 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
71
71
72 Check that @ is a valid unquoted token character (issue3686):
72 Check that @ is a valid unquoted token character (issue3686):
73 >>> list(tokenize("@::"))
73 >>> list(tokenize("@::"))
74 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
74 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
75
75
76 '''
76 '''
77 if syminitletters is None:
77 if syminitletters is None:
78 syminitletters = _syminitletters
78 syminitletters = _syminitletters
79 if symletters is None:
79 if symletters is None:
80 symletters = _symletters
80 symletters = _symletters
81
81
82 if program and lookup:
82 if program and lookup:
83 # attempt to parse old-style ranges first to deal with
83 # attempt to parse old-style ranges first to deal with
84 # things like old-tag which contain query metacharacters
84 # things like old-tag which contain query metacharacters
85 parts = program.split(':', 1)
85 parts = program.split(':', 1)
86 if all(lookup(sym) for sym in parts if sym):
86 if all(lookup(sym) for sym in parts if sym):
87 if parts[0]:
87 if parts[0]:
88 yield ('symbol', parts[0], 0)
88 yield ('symbol', parts[0], 0)
89 if len(parts) > 1:
89 if len(parts) > 1:
90 s = len(parts[0])
90 s = len(parts[0])
91 yield (':', None, s)
91 yield (':', None, s)
92 if parts[1]:
92 if parts[1]:
93 yield ('symbol', parts[1], s + 1)
93 yield ('symbol', parts[1], s + 1)
94 yield ('end', None, len(program))
94 yield ('end', None, len(program))
95 return
95 return
96
96
97 pos, l = 0, len(program)
97 pos, l = 0, len(program)
98 while pos < l:
98 while pos < l:
99 c = program[pos]
99 c = program[pos:pos + 1]
100 if c.isspace(): # skip inter-token whitespace
100 if c.isspace(): # skip inter-token whitespace
101 pass
101 pass
102 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
102 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
103 yield ('::', None, pos)
103 yield ('::', None, pos)
104 pos += 1 # skip ahead
104 pos += 1 # skip ahead
105 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
105 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
106 yield ('..', None, pos)
106 yield ('..', None, pos)
107 pos += 1 # skip ahead
107 pos += 1 # skip ahead
108 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
108 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
109 yield ('##', None, pos)
109 yield ('##', None, pos)
110 pos += 1 # skip ahead
110 pos += 1 # skip ahead
111 elif c in "():=,-|&+!~^%": # handle simple operators
111 elif c in "():=,-|&+!~^%": # handle simple operators
112 yield (c, None, pos)
112 yield (c, None, pos)
113 elif (c in '"\'' or c == 'r' and
113 elif (c in '"\'' or c == 'r' and
114 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
114 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
115 if c == 'r':
115 if c == 'r':
116 pos += 1
116 pos += 1
117 c = program[pos]
117 c = program[pos:pos + 1]
118 decode = lambda x: x
118 decode = lambda x: x
119 else:
119 else:
120 decode = parser.unescapestr
120 decode = parser.unescapestr
121 pos += 1
121 pos += 1
122 s = pos
122 s = pos
123 while pos < l: # find closing quote
123 while pos < l: # find closing quote
124 d = program[pos]
124 d = program[pos:pos + 1]
125 if d == '\\': # skip over escaped characters
125 if d == '\\': # skip over escaped characters
126 pos += 2
126 pos += 2
127 continue
127 continue
128 if d == c:
128 if d == c:
129 yield ('string', decode(program[s:pos]), s)
129 yield ('string', decode(program[s:pos]), s)
130 break
130 break
131 pos += 1
131 pos += 1
132 else:
132 else:
133 raise error.ParseError(_("unterminated string"), s)
133 raise error.ParseError(_("unterminated string"), s)
134 # gather up a symbol/keyword
134 # gather up a symbol/keyword
135 elif c in syminitletters:
135 elif c in syminitletters:
136 s = pos
136 s = pos
137 pos += 1
137 pos += 1
138 while pos < l: # find end of symbol
138 while pos < l: # find end of symbol
139 d = program[pos]
139 d = program[pos:pos + 1]
140 if d not in symletters:
140 if d not in symletters:
141 break
141 break
142 if d == '.' and program[pos - 1] == '.': # special case for ..
142 if (d == '.'
143 and program[pos - 1:pos] == '.'): # special case for ..
143 pos -= 1
144 pos -= 1
144 break
145 break
145 pos += 1
146 pos += 1
146 sym = program[s:pos]
147 sym = program[s:pos]
147 if sym in keywords: # operator keywords
148 if sym in keywords: # operator keywords
148 yield (sym, None, s)
149 yield (sym, None, s)
149 elif '-' in sym:
150 elif '-' in sym:
150 # some jerk gave us foo-bar-baz, try to check if it's a symbol
151 # some jerk gave us foo-bar-baz, try to check if it's a symbol
151 if lookup and lookup(sym):
152 if lookup and lookup(sym):
152 # looks like a real symbol
153 # looks like a real symbol
153 yield ('symbol', sym, s)
154 yield ('symbol', sym, s)
154 else:
155 else:
155 # looks like an expression
156 # looks like an expression
156 parts = sym.split('-')
157 parts = sym.split('-')
157 for p in parts[:-1]:
158 for p in parts[:-1]:
158 if p: # possible consecutive -
159 if p: # possible consecutive -
159 yield ('symbol', p, s)
160 yield ('symbol', p, s)
160 s += len(p)
161 s += len(p)
161 yield ('-', None, pos)
162 yield ('-', None, pos)
162 s += 1
163 s += 1
163 if parts[-1]: # possible trailing -
164 if parts[-1]: # possible trailing -
164 yield ('symbol', parts[-1], s)
165 yield ('symbol', parts[-1], s)
165 else:
166 else:
166 yield ('symbol', sym, s)
167 yield ('symbol', sym, s)
167 pos -= 1
168 pos -= 1
168 else:
169 else:
169 raise error.ParseError(_("syntax error in revset '%s'") %
170 raise error.ParseError(_("syntax error in revset '%s'") %
170 program, pos)
171 program, pos)
171 pos += 1
172 pos += 1
172 yield ('end', None, pos)
173 yield ('end', None, pos)
173
174
174 # helpers
175 # helpers
175
176
176 _notset = object()
177 _notset = object()
177
178
178 def getsymbol(x):
179 def getsymbol(x):
179 if x and x[0] == 'symbol':
180 if x and x[0] == 'symbol':
180 return x[1]
181 return x[1]
181 raise error.ParseError(_('not a symbol'))
182 raise error.ParseError(_('not a symbol'))
182
183
183 def getstring(x, err):
184 def getstring(x, err):
184 if x and (x[0] == 'string' or x[0] == 'symbol'):
185 if x and (x[0] == 'string' or x[0] == 'symbol'):
185 return x[1]
186 return x[1]
186 raise error.ParseError(err)
187 raise error.ParseError(err)
187
188
188 def getinteger(x, err, default=_notset):
189 def getinteger(x, err, default=_notset):
189 if not x and default is not _notset:
190 if not x and default is not _notset:
190 return default
191 return default
191 try:
192 try:
192 return int(getstring(x, err))
193 return int(getstring(x, err))
193 except ValueError:
194 except ValueError:
194 raise error.ParseError(err)
195 raise error.ParseError(err)
195
196
196 def getlist(x):
197 def getlist(x):
197 if not x:
198 if not x:
198 return []
199 return []
199 if x[0] == 'list':
200 if x[0] == 'list':
200 return list(x[1:])
201 return list(x[1:])
201 return [x]
202 return [x]
202
203
203 def getrange(x, err):
204 def getrange(x, err):
204 if not x:
205 if not x:
205 raise error.ParseError(err)
206 raise error.ParseError(err)
206 op = x[0]
207 op = x[0]
207 if op == 'range':
208 if op == 'range':
208 return x[1], x[2]
209 return x[1], x[2]
209 elif op == 'rangepre':
210 elif op == 'rangepre':
210 return None, x[1]
211 return None, x[1]
211 elif op == 'rangepost':
212 elif op == 'rangepost':
212 return x[1], None
213 return x[1], None
213 elif op == 'rangeall':
214 elif op == 'rangeall':
214 return None, None
215 return None, None
215 raise error.ParseError(err)
216 raise error.ParseError(err)
216
217
217 def getargs(x, min, max, err):
218 def getargs(x, min, max, err):
218 l = getlist(x)
219 l = getlist(x)
219 if len(l) < min or (max >= 0 and len(l) > max):
220 if len(l) < min or (max >= 0 and len(l) > max):
220 raise error.ParseError(err)
221 raise error.ParseError(err)
221 return l
222 return l
222
223
223 def getargsdict(x, funcname, keys):
224 def getargsdict(x, funcname, keys):
224 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
225 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
225 keyvaluenode='keyvalue', keynode='symbol')
226 keyvaluenode='keyvalue', keynode='symbol')
226
227
227 # Constants for ordering requirement, used in _analyze():
228 # Constants for ordering requirement, used in _analyze():
228 #
229 #
229 # If 'define', any nested functions and operations can change the ordering of
230 # If 'define', any nested functions and operations can change the ordering of
230 # the entries in the set. If 'follow', any nested functions and operations
231 # the entries in the set. If 'follow', any nested functions and operations
231 # should take the ordering specified by the first operand to the '&' operator.
232 # should take the ordering specified by the first operand to the '&' operator.
232 #
233 #
233 # For instance,
234 # For instance,
234 #
235 #
235 # X & (Y | Z)
236 # X & (Y | Z)
236 # ^ ^^^^^^^
237 # ^ ^^^^^^^
237 # | follow
238 # | follow
238 # define
239 # define
239 #
240 #
240 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
241 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
241 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
242 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
242 #
243 #
243 # 'any' means the order doesn't matter. For instance,
244 # 'any' means the order doesn't matter. For instance,
244 #
245 #
245 # X & !Y
246 # X & !Y
246 # ^
247 # ^
247 # any
248 # any
248 #
249 #
249 # 'y()' can either enforce its ordering requirement or take the ordering
250 # 'y()' can either enforce its ordering requirement or take the ordering
250 # specified by 'x()' because 'not()' doesn't care the order.
251 # specified by 'x()' because 'not()' doesn't care the order.
251 #
252 #
252 # Transition of ordering requirement:
253 # Transition of ordering requirement:
253 #
254 #
254 # 1. starts with 'define'
255 # 1. starts with 'define'
255 # 2. shifts to 'follow' by 'x & y'
256 # 2. shifts to 'follow' by 'x & y'
256 # 3. changes back to 'define' on function call 'f(x)' or function-like
257 # 3. changes back to 'define' on function call 'f(x)' or function-like
257 # operation 'x (f) y' because 'f' may have its own ordering requirement
258 # operation 'x (f) y' because 'f' may have its own ordering requirement
258 # for 'x' and 'y' (e.g. 'first(x)')
259 # for 'x' and 'y' (e.g. 'first(x)')
259 #
260 #
260 anyorder = 'any' # don't care the order
261 anyorder = 'any' # don't care the order
261 defineorder = 'define' # should define the order
262 defineorder = 'define' # should define the order
262 followorder = 'follow' # must follow the current order
263 followorder = 'follow' # must follow the current order
263
264
264 # transition table for 'x & y', from the current expression 'x' to 'y'
265 # transition table for 'x & y', from the current expression 'x' to 'y'
265 _tofolloworder = {
266 _tofolloworder = {
266 anyorder: anyorder,
267 anyorder: anyorder,
267 defineorder: followorder,
268 defineorder: followorder,
268 followorder: followorder,
269 followorder: followorder,
269 }
270 }
270
271
271 def _matchonly(revs, bases):
272 def _matchonly(revs, bases):
272 """
273 """
273 >>> f = lambda *args: _matchonly(*map(parse, args))
274 >>> f = lambda *args: _matchonly(*map(parse, args))
274 >>> f('ancestors(A)', 'not ancestors(B)')
275 >>> f('ancestors(A)', 'not ancestors(B)')
275 ('list', ('symbol', 'A'), ('symbol', 'B'))
276 ('list', ('symbol', 'A'), ('symbol', 'B'))
276 """
277 """
277 if (revs is not None
278 if (revs is not None
278 and revs[0] == 'func'
279 and revs[0] == 'func'
279 and getsymbol(revs[1]) == 'ancestors'
280 and getsymbol(revs[1]) == 'ancestors'
280 and bases is not None
281 and bases is not None
281 and bases[0] == 'not'
282 and bases[0] == 'not'
282 and bases[1][0] == 'func'
283 and bases[1][0] == 'func'
283 and getsymbol(bases[1][1]) == 'ancestors'):
284 and getsymbol(bases[1][1]) == 'ancestors'):
284 return ('list', revs[2], bases[1][2])
285 return ('list', revs[2], bases[1][2])
285
286
286 def _fixops(x):
287 def _fixops(x):
287 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
288 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
288 handled well by our simple top-down parser"""
289 handled well by our simple top-down parser"""
289 if not isinstance(x, tuple):
290 if not isinstance(x, tuple):
290 return x
291 return x
291
292
292 op = x[0]
293 op = x[0]
293 if op == 'parent':
294 if op == 'parent':
294 # x^:y means (x^) : y, not x ^ (:y)
295 # x^:y means (x^) : y, not x ^ (:y)
295 # x^: means (x^) :, not x ^ (:)
296 # x^: means (x^) :, not x ^ (:)
296 post = ('parentpost', x[1])
297 post = ('parentpost', x[1])
297 if x[2][0] == 'dagrangepre':
298 if x[2][0] == 'dagrangepre':
298 return _fixops(('dagrange', post, x[2][1]))
299 return _fixops(('dagrange', post, x[2][1]))
299 elif x[2][0] == 'rangepre':
300 elif x[2][0] == 'rangepre':
300 return _fixops(('range', post, x[2][1]))
301 return _fixops(('range', post, x[2][1]))
301 elif x[2][0] == 'rangeall':
302 elif x[2][0] == 'rangeall':
302 return _fixops(('rangepost', post))
303 return _fixops(('rangepost', post))
303 elif op == 'or':
304 elif op == 'or':
304 # make number of arguments deterministic:
305 # make number of arguments deterministic:
305 # x + y + z -> (or x y z) -> (or (list x y z))
306 # x + y + z -> (or x y z) -> (or (list x y z))
306 return (op, _fixops(('list',) + x[1:]))
307 return (op, _fixops(('list',) + x[1:]))
307
308
308 return (op,) + tuple(_fixops(y) for y in x[1:])
309 return (op,) + tuple(_fixops(y) for y in x[1:])
309
310
310 def _analyze(x, order):
311 def _analyze(x, order):
311 if x is None:
312 if x is None:
312 return x
313 return x
313
314
314 op = x[0]
315 op = x[0]
315 if op == 'minus':
316 if op == 'minus':
316 return _analyze(('and', x[1], ('not', x[2])), order)
317 return _analyze(('and', x[1], ('not', x[2])), order)
317 elif op == 'only':
318 elif op == 'only':
318 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
319 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
319 return _analyze(t, order)
320 return _analyze(t, order)
320 elif op == 'onlypost':
321 elif op == 'onlypost':
321 return _analyze(('func', ('symbol', 'only'), x[1]), order)
322 return _analyze(('func', ('symbol', 'only'), x[1]), order)
322 elif op == 'dagrangepre':
323 elif op == 'dagrangepre':
323 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
324 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
324 elif op == 'dagrangepost':
325 elif op == 'dagrangepost':
325 return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
326 return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
326 elif op == 'negate':
327 elif op == 'negate':
327 s = getstring(x[1], _("can't negate that"))
328 s = getstring(x[1], _("can't negate that"))
328 return _analyze(('string', '-' + s), order)
329 return _analyze(('string', '-' + s), order)
329 elif op in ('string', 'symbol'):
330 elif op in ('string', 'symbol'):
330 return x
331 return x
331 elif op == 'and':
332 elif op == 'and':
332 ta = _analyze(x[1], order)
333 ta = _analyze(x[1], order)
333 tb = _analyze(x[2], _tofolloworder[order])
334 tb = _analyze(x[2], _tofolloworder[order])
334 return (op, ta, tb, order)
335 return (op, ta, tb, order)
335 elif op == 'or':
336 elif op == 'or':
336 return (op, _analyze(x[1], order), order)
337 return (op, _analyze(x[1], order), order)
337 elif op == 'not':
338 elif op == 'not':
338 return (op, _analyze(x[1], anyorder), order)
339 return (op, _analyze(x[1], anyorder), order)
339 elif op == 'rangeall':
340 elif op == 'rangeall':
340 return (op, None, order)
341 return (op, None, order)
341 elif op in ('rangepre', 'rangepost', 'parentpost'):
342 elif op in ('rangepre', 'rangepost', 'parentpost'):
342 return (op, _analyze(x[1], defineorder), order)
343 return (op, _analyze(x[1], defineorder), order)
343 elif op == 'group':
344 elif op == 'group':
344 return _analyze(x[1], order)
345 return _analyze(x[1], order)
345 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
346 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
346 ta = _analyze(x[1], defineorder)
347 ta = _analyze(x[1], defineorder)
347 tb = _analyze(x[2], defineorder)
348 tb = _analyze(x[2], defineorder)
348 return (op, ta, tb, order)
349 return (op, ta, tb, order)
349 elif op == 'list':
350 elif op == 'list':
350 return (op,) + tuple(_analyze(y, order) for y in x[1:])
351 return (op,) + tuple(_analyze(y, order) for y in x[1:])
351 elif op == 'keyvalue':
352 elif op == 'keyvalue':
352 return (op, x[1], _analyze(x[2], order))
353 return (op, x[1], _analyze(x[2], order))
353 elif op == 'func':
354 elif op == 'func':
354 f = getsymbol(x[1])
355 f = getsymbol(x[1])
355 d = defineorder
356 d = defineorder
356 if f == 'present':
357 if f == 'present':
357 # 'present(set)' is known to return the argument set with no
358 # 'present(set)' is known to return the argument set with no
358 # modification, so forward the current order to its argument
359 # modification, so forward the current order to its argument
359 d = order
360 d = order
360 return (op, x[1], _analyze(x[2], d), order)
361 return (op, x[1], _analyze(x[2], d), order)
361 raise ValueError('invalid operator %r' % op)
362 raise ValueError('invalid operator %r' % op)
362
363
363 def analyze(x, order=defineorder):
364 def analyze(x, order=defineorder):
364 """Transform raw parsed tree to evaluatable tree which can be fed to
365 """Transform raw parsed tree to evaluatable tree which can be fed to
365 optimize() or getset()
366 optimize() or getset()
366
367
367 All pseudo operations should be mapped to real operations or functions
368 All pseudo operations should be mapped to real operations or functions
368 defined in methods or symbols table respectively.
369 defined in methods or symbols table respectively.
369
370
370 'order' specifies how the current expression 'x' is ordered (see the
371 'order' specifies how the current expression 'x' is ordered (see the
371 constants defined above.)
372 constants defined above.)
372 """
373 """
373 return _analyze(x, order)
374 return _analyze(x, order)
374
375
375 def _optimize(x, small):
376 def _optimize(x, small):
376 if x is None:
377 if x is None:
377 return 0, x
378 return 0, x
378
379
379 smallbonus = 1
380 smallbonus = 1
380 if small:
381 if small:
381 smallbonus = .5
382 smallbonus = .5
382
383
383 op = x[0]
384 op = x[0]
384 if op in ('string', 'symbol'):
385 if op in ('string', 'symbol'):
385 return smallbonus, x # single revisions are small
386 return smallbonus, x # single revisions are small
386 elif op == 'and':
387 elif op == 'and':
387 wa, ta = _optimize(x[1], True)
388 wa, ta = _optimize(x[1], True)
388 wb, tb = _optimize(x[2], True)
389 wb, tb = _optimize(x[2], True)
389 order = x[3]
390 order = x[3]
390 w = min(wa, wb)
391 w = min(wa, wb)
391
392
392 # (::x and not ::y)/(not ::y and ::x) have a fast path
393 # (::x and not ::y)/(not ::y and ::x) have a fast path
393 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
394 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
394 if tm:
395 if tm:
395 return w, ('func', ('symbol', 'only'), tm, order)
396 return w, ('func', ('symbol', 'only'), tm, order)
396
397
397 if tb is not None and tb[0] == 'not':
398 if tb is not None and tb[0] == 'not':
398 return wa, ('difference', ta, tb[1], order)
399 return wa, ('difference', ta, tb[1], order)
399
400
400 if wa > wb:
401 if wa > wb:
401 return w, (op, tb, ta, order)
402 return w, (op, tb, ta, order)
402 return w, (op, ta, tb, order)
403 return w, (op, ta, tb, order)
403 elif op == 'or':
404 elif op == 'or':
404 # fast path for machine-generated expression, that is likely to have
405 # fast path for machine-generated expression, that is likely to have
405 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
406 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
406 order = x[2]
407 order = x[2]
407 ws, ts, ss = [], [], []
408 ws, ts, ss = [], [], []
408 def flushss():
409 def flushss():
409 if not ss:
410 if not ss:
410 return
411 return
411 if len(ss) == 1:
412 if len(ss) == 1:
412 w, t = ss[0]
413 w, t = ss[0]
413 else:
414 else:
414 s = '\0'.join(t[1] for w, t in ss)
415 s = '\0'.join(t[1] for w, t in ss)
415 y = ('func', ('symbol', '_list'), ('string', s), order)
416 y = ('func', ('symbol', '_list'), ('string', s), order)
416 w, t = _optimize(y, False)
417 w, t = _optimize(y, False)
417 ws.append(w)
418 ws.append(w)
418 ts.append(t)
419 ts.append(t)
419 del ss[:]
420 del ss[:]
420 for y in getlist(x[1]):
421 for y in getlist(x[1]):
421 w, t = _optimize(y, False)
422 w, t = _optimize(y, False)
422 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
423 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
423 ss.append((w, t))
424 ss.append((w, t))
424 continue
425 continue
425 flushss()
426 flushss()
426 ws.append(w)
427 ws.append(w)
427 ts.append(t)
428 ts.append(t)
428 flushss()
429 flushss()
429 if len(ts) == 1:
430 if len(ts) == 1:
430 return ws[0], ts[0] # 'or' operation is fully optimized out
431 return ws[0], ts[0] # 'or' operation is fully optimized out
431 # we can't reorder trees by weight because it would change the order.
432 # we can't reorder trees by weight because it would change the order.
432 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
433 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
433 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
434 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
434 return max(ws), (op, ('list',) + tuple(ts), order)
435 return max(ws), (op, ('list',) + tuple(ts), order)
435 elif op == 'not':
436 elif op == 'not':
436 # Optimize not public() to _notpublic() because we have a fast version
437 # Optimize not public() to _notpublic() because we have a fast version
437 if x[1][:3] == ('func', ('symbol', 'public'), None):
438 if x[1][:3] == ('func', ('symbol', 'public'), None):
438 order = x[1][3]
439 order = x[1][3]
439 newsym = ('func', ('symbol', '_notpublic'), None, order)
440 newsym = ('func', ('symbol', '_notpublic'), None, order)
440 o = _optimize(newsym, not small)
441 o = _optimize(newsym, not small)
441 return o[0], o[1]
442 return o[0], o[1]
442 else:
443 else:
443 o = _optimize(x[1], not small)
444 o = _optimize(x[1], not small)
444 order = x[2]
445 order = x[2]
445 return o[0], (op, o[1], order)
446 return o[0], (op, o[1], order)
446 elif op == 'rangeall':
447 elif op == 'rangeall':
447 return smallbonus, x
448 return smallbonus, x
448 elif op in ('rangepre', 'rangepost', 'parentpost'):
449 elif op in ('rangepre', 'rangepost', 'parentpost'):
449 o = _optimize(x[1], small)
450 o = _optimize(x[1], small)
450 order = x[2]
451 order = x[2]
451 return o[0], (op, o[1], order)
452 return o[0], (op, o[1], order)
452 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
453 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
453 wa, ta = _optimize(x[1], small)
454 wa, ta = _optimize(x[1], small)
454 wb, tb = _optimize(x[2], small)
455 wb, tb = _optimize(x[2], small)
455 order = x[3]
456 order = x[3]
456 return wa + wb, (op, ta, tb, order)
457 return wa + wb, (op, ta, tb, order)
457 elif op == 'list':
458 elif op == 'list':
458 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
459 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
459 return sum(ws), (op,) + ts
460 return sum(ws), (op,) + ts
460 elif op == 'keyvalue':
461 elif op == 'keyvalue':
461 w, t = _optimize(x[2], small)
462 w, t = _optimize(x[2], small)
462 return w, (op, x[1], t)
463 return w, (op, x[1], t)
463 elif op == 'func':
464 elif op == 'func':
464 f = getsymbol(x[1])
465 f = getsymbol(x[1])
465 wa, ta = _optimize(x[2], small)
466 wa, ta = _optimize(x[2], small)
466 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
467 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
467 'keyword', 'outgoing', 'user', 'destination'):
468 'keyword', 'outgoing', 'user', 'destination'):
468 w = 10 # slow
469 w = 10 # slow
469 elif f in ('modifies', 'adds', 'removes'):
470 elif f in ('modifies', 'adds', 'removes'):
470 w = 30 # slower
471 w = 30 # slower
471 elif f == "contains":
472 elif f == "contains":
472 w = 100 # very slow
473 w = 100 # very slow
473 elif f == "ancestor":
474 elif f == "ancestor":
474 w = 1 * smallbonus
475 w = 1 * smallbonus
475 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
476 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
476 w = 0
477 w = 0
477 elif f == "sort":
478 elif f == "sort":
478 w = 10 # assume most sorts look at changelog
479 w = 10 # assume most sorts look at changelog
479 else:
480 else:
480 w = 1
481 w = 1
481 order = x[3]
482 order = x[3]
482 return w + wa, (op, x[1], ta, order)
483 return w + wa, (op, x[1], ta, order)
483 raise ValueError('invalid operator %r' % op)
484 raise ValueError('invalid operator %r' % op)
484
485
485 def optimize(tree):
486 def optimize(tree):
486 """Optimize evaluatable tree
487 """Optimize evaluatable tree
487
488
488 All pseudo operations should be transformed beforehand.
489 All pseudo operations should be transformed beforehand.
489 """
490 """
490 _weight, newtree = _optimize(tree, small=True)
491 _weight, newtree = _optimize(tree, small=True)
491 return newtree
492 return newtree
492
493
493 # the set of valid characters for the initial letter of symbols in
494 # the set of valid characters for the initial letter of symbols in
494 # alias declarations and definitions
495 # alias declarations and definitions
495 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
496 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
496
497
497 def _parsewith(spec, lookup=None, syminitletters=None):
498 def _parsewith(spec, lookup=None, syminitletters=None):
498 """Generate a parse tree of given spec with given tokenizing options
499 """Generate a parse tree of given spec with given tokenizing options
499
500
500 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
501 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
501 ('func', ('symbol', 'foo'), ('symbol', '$1'))
502 ('func', ('symbol', 'foo'), ('symbol', '$1'))
502 >>> _parsewith('$1')
503 >>> _parsewith('$1')
503 Traceback (most recent call last):
504 Traceback (most recent call last):
504 ...
505 ...
505 ParseError: ("syntax error in revset '$1'", 0)
506 ParseError: ("syntax error in revset '$1'", 0)
506 >>> _parsewith('foo bar')
507 >>> _parsewith('foo bar')
507 Traceback (most recent call last):
508 Traceback (most recent call last):
508 ...
509 ...
509 ParseError: ('invalid token', 4)
510 ParseError: ('invalid token', 4)
510 """
511 """
511 p = parser.parser(elements)
512 p = parser.parser(elements)
512 tree, pos = p.parse(tokenize(spec, lookup=lookup,
513 tree, pos = p.parse(tokenize(spec, lookup=lookup,
513 syminitletters=syminitletters))
514 syminitletters=syminitletters))
514 if pos != len(spec):
515 if pos != len(spec):
515 raise error.ParseError(_('invalid token'), pos)
516 raise error.ParseError(_('invalid token'), pos)
516 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
517 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
517
518
518 class _aliasrules(parser.basealiasrules):
519 class _aliasrules(parser.basealiasrules):
519 """Parsing and expansion rule set of revset aliases"""
520 """Parsing and expansion rule set of revset aliases"""
520 _section = _('revset alias')
521 _section = _('revset alias')
521
522
522 @staticmethod
523 @staticmethod
523 def _parse(spec):
524 def _parse(spec):
524 """Parse alias declaration/definition ``spec``
525 """Parse alias declaration/definition ``spec``
525
526
526 This allows symbol names to use also ``$`` as an initial letter
527 This allows symbol names to use also ``$`` as an initial letter
527 (for backward compatibility), and callers of this function should
528 (for backward compatibility), and callers of this function should
528 examine whether ``$`` is used also for unexpected symbols or not.
529 examine whether ``$`` is used also for unexpected symbols or not.
529 """
530 """
530 return _parsewith(spec, syminitletters=_aliassyminitletters)
531 return _parsewith(spec, syminitletters=_aliassyminitletters)
531
532
532 @staticmethod
533 @staticmethod
533 def _trygetfunc(tree):
534 def _trygetfunc(tree):
534 if tree[0] == 'func' and tree[1][0] == 'symbol':
535 if tree[0] == 'func' and tree[1][0] == 'symbol':
535 return tree[1][1], getlist(tree[2])
536 return tree[1][1], getlist(tree[2])
536
537
537 def expandaliases(ui, tree):
538 def expandaliases(ui, tree):
538 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
539 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
539 tree = _aliasrules.expand(aliases, tree)
540 tree = _aliasrules.expand(aliases, tree)
540 # warn about problematic (but not referred) aliases
541 # warn about problematic (but not referred) aliases
541 for name, alias in sorted(aliases.iteritems()):
542 for name, alias in sorted(aliases.iteritems()):
542 if alias.error and not alias.warned:
543 if alias.error and not alias.warned:
543 ui.warn(_('warning: %s\n') % (alias.error))
544 ui.warn(_('warning: %s\n') % (alias.error))
544 alias.warned = True
545 alias.warned = True
545 return tree
546 return tree
546
547
547 def foldconcat(tree):
548 def foldconcat(tree):
548 """Fold elements to be concatenated by `##`
549 """Fold elements to be concatenated by `##`
549 """
550 """
550 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
551 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
551 return tree
552 return tree
552 if tree[0] == '_concat':
553 if tree[0] == '_concat':
553 pending = [tree]
554 pending = [tree]
554 l = []
555 l = []
555 while pending:
556 while pending:
556 e = pending.pop()
557 e = pending.pop()
557 if e[0] == '_concat':
558 if e[0] == '_concat':
558 pending.extend(reversed(e[1:]))
559 pending.extend(reversed(e[1:]))
559 elif e[0] in ('string', 'symbol'):
560 elif e[0] in ('string', 'symbol'):
560 l.append(e[1])
561 l.append(e[1])
561 else:
562 else:
562 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
563 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
563 raise error.ParseError(msg)
564 raise error.ParseError(msg)
564 return ('string', ''.join(l))
565 return ('string', ''.join(l))
565 else:
566 else:
566 return tuple(foldconcat(t) for t in tree)
567 return tuple(foldconcat(t) for t in tree)
567
568
568 def parse(spec, lookup=None):
569 def parse(spec, lookup=None):
569 return _parsewith(spec, lookup=lookup)
570 return _parsewith(spec, lookup=lookup)
570
571
571 def formatspec(expr, *args):
572 def formatspec(expr, *args):
572 '''
573 '''
573 This is a convenience function for using revsets internally, and
574 This is a convenience function for using revsets internally, and
574 escapes arguments appropriately. Aliases are intentionally ignored
575 escapes arguments appropriately. Aliases are intentionally ignored
575 so that intended expression behavior isn't accidentally subverted.
576 so that intended expression behavior isn't accidentally subverted.
576
577
577 Supported arguments:
578 Supported arguments:
578
579
579 %r = revset expression, parenthesized
580 %r = revset expression, parenthesized
580 %d = int(arg), no quoting
581 %d = int(arg), no quoting
581 %s = string(arg), escaped and single-quoted
582 %s = string(arg), escaped and single-quoted
582 %b = arg.branch(), escaped and single-quoted
583 %b = arg.branch(), escaped and single-quoted
583 %n = hex(arg), single-quoted
584 %n = hex(arg), single-quoted
584 %% = a literal '%'
585 %% = a literal '%'
585
586
586 Prefixing the type with 'l' specifies a parenthesized list of that type.
587 Prefixing the type with 'l' specifies a parenthesized list of that type.
587
588
588 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
589 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
589 '(10 or 11):: and ((this()) or (that()))'
590 '(10 or 11):: and ((this()) or (that()))'
590 >>> formatspec('%d:: and not %d::', 10, 20)
591 >>> formatspec('%d:: and not %d::', 10, 20)
591 '10:: and not 20::'
592 '10:: and not 20::'
592 >>> formatspec('%ld or %ld', [], [1])
593 >>> formatspec('%ld or %ld', [], [1])
593 "_list('') or 1"
594 "_list('') or 1"
594 >>> formatspec('keyword(%s)', 'foo\\xe9')
595 >>> formatspec('keyword(%s)', 'foo\\xe9')
595 "keyword('foo\\\\xe9')"
596 "keyword('foo\\\\xe9')"
596 >>> b = lambda: 'default'
597 >>> b = lambda: 'default'
597 >>> b.branch = b
598 >>> b.branch = b
598 >>> formatspec('branch(%b)', b)
599 >>> formatspec('branch(%b)', b)
599 "branch('default')"
600 "branch('default')"
600 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
601 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
601 "root(_list('a\\x00b\\x00c\\x00d'))"
602 "root(_list('a\\x00b\\x00c\\x00d'))"
602 '''
603 '''
603
604
604 def quote(s):
605 def quote(s):
605 return repr(str(s))
606 return repr(str(s))
606
607
607 def argtype(c, arg):
608 def argtype(c, arg):
608 if c == 'd':
609 if c == 'd':
609 return str(int(arg))
610 return str(int(arg))
610 elif c == 's':
611 elif c == 's':
611 return quote(arg)
612 return quote(arg)
612 elif c == 'r':
613 elif c == 'r':
613 parse(arg) # make sure syntax errors are confined
614 parse(arg) # make sure syntax errors are confined
614 return '(%s)' % arg
615 return '(%s)' % arg
615 elif c == 'n':
616 elif c == 'n':
616 return quote(node.hex(arg))
617 return quote(node.hex(arg))
617 elif c == 'b':
618 elif c == 'b':
618 return quote(arg.branch())
619 return quote(arg.branch())
619
620
620 def listexp(s, t):
621 def listexp(s, t):
621 l = len(s)
622 l = len(s)
622 if l == 0:
623 if l == 0:
623 return "_list('')"
624 return "_list('')"
624 elif l == 1:
625 elif l == 1:
625 return argtype(t, s[0])
626 return argtype(t, s[0])
626 elif t == 'd':
627 elif t == 'd':
627 return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
628 return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
628 elif t == 's':
629 elif t == 's':
629 return "_list('%s')" % "\0".join(s)
630 return "_list('%s')" % "\0".join(s)
630 elif t == 'n':
631 elif t == 'n':
631 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
632 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
632 elif t == 'b':
633 elif t == 'b':
633 return "_list('%s')" % "\0".join(a.branch() for a in s)
634 return "_list('%s')" % "\0".join(a.branch() for a in s)
634
635
635 m = l // 2
636 m = l // 2
636 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
637 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
637
638
638 ret = ''
639 ret = ''
639 pos = 0
640 pos = 0
640 arg = 0
641 arg = 0
641 while pos < len(expr):
642 while pos < len(expr):
642 c = expr[pos]
643 c = expr[pos]
643 if c == '%':
644 if c == '%':
644 pos += 1
645 pos += 1
645 d = expr[pos]
646 d = expr[pos]
646 if d == '%':
647 if d == '%':
647 ret += d
648 ret += d
648 elif d in 'dsnbr':
649 elif d in 'dsnbr':
649 ret += argtype(d, args[arg])
650 ret += argtype(d, args[arg])
650 arg += 1
651 arg += 1
651 elif d == 'l':
652 elif d == 'l':
652 # a list of some type
653 # a list of some type
653 pos += 1
654 pos += 1
654 d = expr[pos]
655 d = expr[pos]
655 ret += listexp(list(args[arg]), d)
656 ret += listexp(list(args[arg]), d)
656 arg += 1
657 arg += 1
657 else:
658 else:
658 raise error.Abort(_('unexpected revspec format character %s')
659 raise error.Abort(_('unexpected revspec format character %s')
659 % d)
660 % d)
660 else:
661 else:
661 ret += c
662 ret += c
662 pos += 1
663 pos += 1
663
664
664 return ret
665 return ret
665
666
666 def prettyformat(tree):
667 def prettyformat(tree):
667 return parser.prettyformat(tree, ('string', 'symbol'))
668 return parser.prettyformat(tree, ('string', 'symbol'))
668
669
669 def depth(tree):
670 def depth(tree):
670 if isinstance(tree, tuple):
671 if isinstance(tree, tuple):
671 return max(map(depth, tree)) + 1
672 return max(map(depth, tree)) + 1
672 else:
673 else:
673 return 0
674 return 0
674
675
675 def funcsused(tree):
676 def funcsused(tree):
676 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
677 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
677 return set()
678 return set()
678 else:
679 else:
679 funcs = set()
680 funcs = set()
680 for s in tree[1:]:
681 for s in tree[1:]:
681 funcs |= funcsused(s)
682 funcs |= funcsused(s)
682 if tree[0] == 'func':
683 if tree[0] == 'func':
683 funcs.add(tree[1][1])
684 funcs.add(tree[1][1])
684 return funcs
685 return funcs
General Comments 0
You need to be logged in to leave comments. Login now