##// END OF EJS Templates
py3: make set of revset operators and quotes in bytes
Yuya Nishihara -
r31384:fac5cd3b default
parent child Browse files
Show More
@@ -1,686 +1,689
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 )
18 )
19
19
20 elements = {
20 elements = {
21 # token-type: binding-strength, primary, prefix, infix, suffix
21 # token-type: binding-strength, primary, prefix, infix, suffix
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "##": (20, None, None, ("_concat", 20), None),
23 "##": (20, None, None, ("_concat", 20), None),
24 "~": (18, None, None, ("ancestor", 18), None),
24 "~": (18, None, None, ("ancestor", 18), None),
25 "^": (18, None, None, ("parent", 18), "parentpost"),
25 "^": (18, None, None, ("parent", 18), "parentpost"),
26 "-": (5, None, ("negate", 19), ("minus", 5), None),
26 "-": (5, None, ("negate", 19), ("minus", 5), None),
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
30 "not": (10, None, ("not", 10), None, None),
30 "not": (10, None, ("not", 10), None, None),
31 "!": (10, None, ("not", 10), None, None),
31 "!": (10, None, ("not", 10), None, None),
32 "and": (5, None, None, ("and", 5), None),
32 "and": (5, None, None, ("and", 5), None),
33 "&": (5, None, None, ("and", 5), None),
33 "&": (5, None, None, ("and", 5), None),
34 "%": (5, None, None, ("only", 5), "onlypost"),
34 "%": (5, None, None, ("only", 5), "onlypost"),
35 "or": (4, None, None, ("or", 4), None),
35 "or": (4, None, None, ("or", 4), None),
36 "|": (4, None, None, ("or", 4), None),
36 "|": (4, None, None, ("or", 4), None),
37 "+": (4, None, None, ("or", 4), None),
37 "+": (4, None, None, ("or", 4), None),
38 "=": (3, None, None, ("keyvalue", 3), None),
38 "=": (3, None, None, ("keyvalue", 3), None),
39 ",": (2, None, None, ("list", 2), None),
39 ",": (2, None, None, ("list", 2), None),
40 ")": (0, None, None, None, None),
40 ")": (0, None, None, None, None),
41 "symbol": (0, "symbol", None, None, None),
41 "symbol": (0, "symbol", None, None, None),
42 "string": (0, "string", None, None, None),
42 "string": (0, "string", None, None, None),
43 "end": (0, None, None, None, None),
43 "end": (0, None, None, None, None),
44 }
44 }
45
45
46 keywords = set(['and', 'or', 'not'])
46 keywords = set(['and', 'or', 'not'])
47
47
48 _quoteletters = set(['"', "'"])
49 _simpleopletters = set(pycompat.iterbytestr("():=,-|&+!~^%"))
50
48 # default set of valid characters for the initial letter of symbols
51 # default set of valid characters for the initial letter of symbols
49 _syminitletters = set(pycompat.iterbytestr(
52 _syminitletters = set(pycompat.iterbytestr(
50 string.ascii_letters.encode('ascii') +
53 string.ascii_letters.encode('ascii') +
51 string.digits.encode('ascii') +
54 string.digits.encode('ascii') +
52 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
55 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
53
56
54 # default set of valid characters for non-initial letters of symbols
57 # default set of valid characters for non-initial letters of symbols
55 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
58 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
56
59
57 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
60 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
58 '''
61 '''
59 Parse a revset statement into a stream of tokens
62 Parse a revset statement into a stream of tokens
60
63
61 ``syminitletters`` is the set of valid characters for the initial
64 ``syminitletters`` is the set of valid characters for the initial
62 letter of symbols.
65 letter of symbols.
63
66
64 By default, character ``c`` is recognized as valid for initial
67 By default, character ``c`` is recognized as valid for initial
65 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
68 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
66
69
67 ``symletters`` is the set of valid characters for non-initial
70 ``symletters`` is the set of valid characters for non-initial
68 letters of symbols.
71 letters of symbols.
69
72
70 By default, character ``c`` is recognized as valid for non-initial
73 By default, character ``c`` is recognized as valid for non-initial
71 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
74 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
72
75
73 Check that @ is a valid unquoted token character (issue3686):
76 Check that @ is a valid unquoted token character (issue3686):
74 >>> list(tokenize("@::"))
77 >>> list(tokenize("@::"))
75 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
78 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
76
79
77 '''
80 '''
78 if syminitletters is None:
81 if syminitletters is None:
79 syminitletters = _syminitletters
82 syminitletters = _syminitletters
80 if symletters is None:
83 if symletters is None:
81 symletters = _symletters
84 symletters = _symletters
82
85
83 if program and lookup:
86 if program and lookup:
84 # attempt to parse old-style ranges first to deal with
87 # attempt to parse old-style ranges first to deal with
85 # things like old-tag which contain query metacharacters
88 # things like old-tag which contain query metacharacters
86 parts = program.split(':', 1)
89 parts = program.split(':', 1)
87 if all(lookup(sym) for sym in parts if sym):
90 if all(lookup(sym) for sym in parts if sym):
88 if parts[0]:
91 if parts[0]:
89 yield ('symbol', parts[0], 0)
92 yield ('symbol', parts[0], 0)
90 if len(parts) > 1:
93 if len(parts) > 1:
91 s = len(parts[0])
94 s = len(parts[0])
92 yield (':', None, s)
95 yield (':', None, s)
93 if parts[1]:
96 if parts[1]:
94 yield ('symbol', parts[1], s + 1)
97 yield ('symbol', parts[1], s + 1)
95 yield ('end', None, len(program))
98 yield ('end', None, len(program))
96 return
99 return
97
100
98 pos, l = 0, len(program)
101 pos, l = 0, len(program)
99 while pos < l:
102 while pos < l:
100 c = program[pos:pos + 1]
103 c = program[pos:pos + 1]
101 if c.isspace(): # skip inter-token whitespace
104 if c.isspace(): # skip inter-token whitespace
102 pass
105 pass
103 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
106 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
104 yield ('::', None, pos)
107 yield ('::', None, pos)
105 pos += 1 # skip ahead
108 pos += 1 # skip ahead
106 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
109 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
107 yield ('..', None, pos)
110 yield ('..', None, pos)
108 pos += 1 # skip ahead
111 pos += 1 # skip ahead
109 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
112 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
110 yield ('##', None, pos)
113 yield ('##', None, pos)
111 pos += 1 # skip ahead
114 pos += 1 # skip ahead
112 elif c in "():=,-|&+!~^%": # handle simple operators
115 elif c in _simpleopletters: # handle simple operators
113 yield (c, None, pos)
116 yield (c, None, pos)
114 elif (c in '"\'' or c == 'r' and
117 elif (c in _quoteletters or c == 'r' and
115 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
118 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
116 if c == 'r':
119 if c == 'r':
117 pos += 1
120 pos += 1
118 c = program[pos:pos + 1]
121 c = program[pos:pos + 1]
119 decode = lambda x: x
122 decode = lambda x: x
120 else:
123 else:
121 decode = parser.unescapestr
124 decode = parser.unescapestr
122 pos += 1
125 pos += 1
123 s = pos
126 s = pos
124 while pos < l: # find closing quote
127 while pos < l: # find closing quote
125 d = program[pos:pos + 1]
128 d = program[pos:pos + 1]
126 if d == '\\': # skip over escaped characters
129 if d == '\\': # skip over escaped characters
127 pos += 2
130 pos += 2
128 continue
131 continue
129 if d == c:
132 if d == c:
130 yield ('string', decode(program[s:pos]), s)
133 yield ('string', decode(program[s:pos]), s)
131 break
134 break
132 pos += 1
135 pos += 1
133 else:
136 else:
134 raise error.ParseError(_("unterminated string"), s)
137 raise error.ParseError(_("unterminated string"), s)
135 # gather up a symbol/keyword
138 # gather up a symbol/keyword
136 elif c in syminitletters:
139 elif c in syminitletters:
137 s = pos
140 s = pos
138 pos += 1
141 pos += 1
139 while pos < l: # find end of symbol
142 while pos < l: # find end of symbol
140 d = program[pos:pos + 1]
143 d = program[pos:pos + 1]
141 if d not in symletters:
144 if d not in symletters:
142 break
145 break
143 if (d == '.'
146 if (d == '.'
144 and program[pos - 1:pos] == '.'): # special case for ..
147 and program[pos - 1:pos] == '.'): # special case for ..
145 pos -= 1
148 pos -= 1
146 break
149 break
147 pos += 1
150 pos += 1
148 sym = program[s:pos]
151 sym = program[s:pos]
149 if sym in keywords: # operator keywords
152 if sym in keywords: # operator keywords
150 yield (sym, None, s)
153 yield (sym, None, s)
151 elif '-' in sym:
154 elif '-' in sym:
152 # some jerk gave us foo-bar-baz, try to check if it's a symbol
155 # some jerk gave us foo-bar-baz, try to check if it's a symbol
153 if lookup and lookup(sym):
156 if lookup and lookup(sym):
154 # looks like a real symbol
157 # looks like a real symbol
155 yield ('symbol', sym, s)
158 yield ('symbol', sym, s)
156 else:
159 else:
157 # looks like an expression
160 # looks like an expression
158 parts = sym.split('-')
161 parts = sym.split('-')
159 for p in parts[:-1]:
162 for p in parts[:-1]:
160 if p: # possible consecutive -
163 if p: # possible consecutive -
161 yield ('symbol', p, s)
164 yield ('symbol', p, s)
162 s += len(p)
165 s += len(p)
163 yield ('-', None, pos)
166 yield ('-', None, pos)
164 s += 1
167 s += 1
165 if parts[-1]: # possible trailing -
168 if parts[-1]: # possible trailing -
166 yield ('symbol', parts[-1], s)
169 yield ('symbol', parts[-1], s)
167 else:
170 else:
168 yield ('symbol', sym, s)
171 yield ('symbol', sym, s)
169 pos -= 1
172 pos -= 1
170 else:
173 else:
171 raise error.ParseError(_("syntax error in revset '%s'") %
174 raise error.ParseError(_("syntax error in revset '%s'") %
172 program, pos)
175 program, pos)
173 pos += 1
176 pos += 1
174 yield ('end', None, pos)
177 yield ('end', None, pos)
175
178
176 # helpers
179 # helpers
177
180
178 _notset = object()
181 _notset = object()
179
182
180 def getsymbol(x):
183 def getsymbol(x):
181 if x and x[0] == 'symbol':
184 if x and x[0] == 'symbol':
182 return x[1]
185 return x[1]
183 raise error.ParseError(_('not a symbol'))
186 raise error.ParseError(_('not a symbol'))
184
187
185 def getstring(x, err):
188 def getstring(x, err):
186 if x and (x[0] == 'string' or x[0] == 'symbol'):
189 if x and (x[0] == 'string' or x[0] == 'symbol'):
187 return x[1]
190 return x[1]
188 raise error.ParseError(err)
191 raise error.ParseError(err)
189
192
190 def getinteger(x, err, default=_notset):
193 def getinteger(x, err, default=_notset):
191 if not x and default is not _notset:
194 if not x and default is not _notset:
192 return default
195 return default
193 try:
196 try:
194 return int(getstring(x, err))
197 return int(getstring(x, err))
195 except ValueError:
198 except ValueError:
196 raise error.ParseError(err)
199 raise error.ParseError(err)
197
200
198 def getlist(x):
201 def getlist(x):
199 if not x:
202 if not x:
200 return []
203 return []
201 if x[0] == 'list':
204 if x[0] == 'list':
202 return list(x[1:])
205 return list(x[1:])
203 return [x]
206 return [x]
204
207
205 def getrange(x, err):
208 def getrange(x, err):
206 if not x:
209 if not x:
207 raise error.ParseError(err)
210 raise error.ParseError(err)
208 op = x[0]
211 op = x[0]
209 if op == 'range':
212 if op == 'range':
210 return x[1], x[2]
213 return x[1], x[2]
211 elif op == 'rangepre':
214 elif op == 'rangepre':
212 return None, x[1]
215 return None, x[1]
213 elif op == 'rangepost':
216 elif op == 'rangepost':
214 return x[1], None
217 return x[1], None
215 elif op == 'rangeall':
218 elif op == 'rangeall':
216 return None, None
219 return None, None
217 raise error.ParseError(err)
220 raise error.ParseError(err)
218
221
219 def getargs(x, min, max, err):
222 def getargs(x, min, max, err):
220 l = getlist(x)
223 l = getlist(x)
221 if len(l) < min or (max >= 0 and len(l) > max):
224 if len(l) < min or (max >= 0 and len(l) > max):
222 raise error.ParseError(err)
225 raise error.ParseError(err)
223 return l
226 return l
224
227
225 def getargsdict(x, funcname, keys):
228 def getargsdict(x, funcname, keys):
226 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
229 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
227 keyvaluenode='keyvalue', keynode='symbol')
230 keyvaluenode='keyvalue', keynode='symbol')
228
231
229 # Constants for ordering requirement, used in _analyze():
232 # Constants for ordering requirement, used in _analyze():
230 #
233 #
231 # If 'define', any nested functions and operations can change the ordering of
234 # If 'define', any nested functions and operations can change the ordering of
232 # the entries in the set. If 'follow', any nested functions and operations
235 # the entries in the set. If 'follow', any nested functions and operations
233 # should take the ordering specified by the first operand to the '&' operator.
236 # should take the ordering specified by the first operand to the '&' operator.
234 #
237 #
235 # For instance,
238 # For instance,
236 #
239 #
237 # X & (Y | Z)
240 # X & (Y | Z)
238 # ^ ^^^^^^^
241 # ^ ^^^^^^^
239 # | follow
242 # | follow
240 # define
243 # define
241 #
244 #
242 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
245 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
243 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
246 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
244 #
247 #
245 # 'any' means the order doesn't matter. For instance,
248 # 'any' means the order doesn't matter. For instance,
246 #
249 #
247 # X & !Y
250 # X & !Y
248 # ^
251 # ^
249 # any
252 # any
250 #
253 #
251 # 'y()' can either enforce its ordering requirement or take the ordering
254 # 'y()' can either enforce its ordering requirement or take the ordering
252 # specified by 'x()' because 'not()' doesn't care the order.
255 # specified by 'x()' because 'not()' doesn't care the order.
253 #
256 #
254 # Transition of ordering requirement:
257 # Transition of ordering requirement:
255 #
258 #
256 # 1. starts with 'define'
259 # 1. starts with 'define'
257 # 2. shifts to 'follow' by 'x & y'
260 # 2. shifts to 'follow' by 'x & y'
258 # 3. changes back to 'define' on function call 'f(x)' or function-like
261 # 3. changes back to 'define' on function call 'f(x)' or function-like
259 # operation 'x (f) y' because 'f' may have its own ordering requirement
262 # operation 'x (f) y' because 'f' may have its own ordering requirement
260 # for 'x' and 'y' (e.g. 'first(x)')
263 # for 'x' and 'y' (e.g. 'first(x)')
261 #
264 #
262 anyorder = 'any' # don't care the order
265 anyorder = 'any' # don't care the order
263 defineorder = 'define' # should define the order
266 defineorder = 'define' # should define the order
264 followorder = 'follow' # must follow the current order
267 followorder = 'follow' # must follow the current order
265
268
266 # transition table for 'x & y', from the current expression 'x' to 'y'
269 # transition table for 'x & y', from the current expression 'x' to 'y'
267 _tofolloworder = {
270 _tofolloworder = {
268 anyorder: anyorder,
271 anyorder: anyorder,
269 defineorder: followorder,
272 defineorder: followorder,
270 followorder: followorder,
273 followorder: followorder,
271 }
274 }
272
275
273 def _matchonly(revs, bases):
276 def _matchonly(revs, bases):
274 """
277 """
275 >>> f = lambda *args: _matchonly(*map(parse, args))
278 >>> f = lambda *args: _matchonly(*map(parse, args))
276 >>> f('ancestors(A)', 'not ancestors(B)')
279 >>> f('ancestors(A)', 'not ancestors(B)')
277 ('list', ('symbol', 'A'), ('symbol', 'B'))
280 ('list', ('symbol', 'A'), ('symbol', 'B'))
278 """
281 """
279 if (revs is not None
282 if (revs is not None
280 and revs[0] == 'func'
283 and revs[0] == 'func'
281 and getsymbol(revs[1]) == 'ancestors'
284 and getsymbol(revs[1]) == 'ancestors'
282 and bases is not None
285 and bases is not None
283 and bases[0] == 'not'
286 and bases[0] == 'not'
284 and bases[1][0] == 'func'
287 and bases[1][0] == 'func'
285 and getsymbol(bases[1][1]) == 'ancestors'):
288 and getsymbol(bases[1][1]) == 'ancestors'):
286 return ('list', revs[2], bases[1][2])
289 return ('list', revs[2], bases[1][2])
287
290
288 def _fixops(x):
291 def _fixops(x):
289 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
292 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
290 handled well by our simple top-down parser"""
293 handled well by our simple top-down parser"""
291 if not isinstance(x, tuple):
294 if not isinstance(x, tuple):
292 return x
295 return x
293
296
294 op = x[0]
297 op = x[0]
295 if op == 'parent':
298 if op == 'parent':
296 # x^:y means (x^) : y, not x ^ (:y)
299 # x^:y means (x^) : y, not x ^ (:y)
297 # x^: means (x^) :, not x ^ (:)
300 # x^: means (x^) :, not x ^ (:)
298 post = ('parentpost', x[1])
301 post = ('parentpost', x[1])
299 if x[2][0] == 'dagrangepre':
302 if x[2][0] == 'dagrangepre':
300 return _fixops(('dagrange', post, x[2][1]))
303 return _fixops(('dagrange', post, x[2][1]))
301 elif x[2][0] == 'rangepre':
304 elif x[2][0] == 'rangepre':
302 return _fixops(('range', post, x[2][1]))
305 return _fixops(('range', post, x[2][1]))
303 elif x[2][0] == 'rangeall':
306 elif x[2][0] == 'rangeall':
304 return _fixops(('rangepost', post))
307 return _fixops(('rangepost', post))
305 elif op == 'or':
308 elif op == 'or':
306 # make number of arguments deterministic:
309 # make number of arguments deterministic:
307 # x + y + z -> (or x y z) -> (or (list x y z))
310 # x + y + z -> (or x y z) -> (or (list x y z))
308 return (op, _fixops(('list',) + x[1:]))
311 return (op, _fixops(('list',) + x[1:]))
309
312
310 return (op,) + tuple(_fixops(y) for y in x[1:])
313 return (op,) + tuple(_fixops(y) for y in x[1:])
311
314
312 def _analyze(x, order):
315 def _analyze(x, order):
313 if x is None:
316 if x is None:
314 return x
317 return x
315
318
316 op = x[0]
319 op = x[0]
317 if op == 'minus':
320 if op == 'minus':
318 return _analyze(('and', x[1], ('not', x[2])), order)
321 return _analyze(('and', x[1], ('not', x[2])), order)
319 elif op == 'only':
322 elif op == 'only':
320 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
323 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
321 return _analyze(t, order)
324 return _analyze(t, order)
322 elif op == 'onlypost':
325 elif op == 'onlypost':
323 return _analyze(('func', ('symbol', 'only'), x[1]), order)
326 return _analyze(('func', ('symbol', 'only'), x[1]), order)
324 elif op == 'dagrangepre':
327 elif op == 'dagrangepre':
325 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
328 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
326 elif op == 'dagrangepost':
329 elif op == 'dagrangepost':
327 return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
330 return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
328 elif op == 'negate':
331 elif op == 'negate':
329 s = getstring(x[1], _("can't negate that"))
332 s = getstring(x[1], _("can't negate that"))
330 return _analyze(('string', '-' + s), order)
333 return _analyze(('string', '-' + s), order)
331 elif op in ('string', 'symbol'):
334 elif op in ('string', 'symbol'):
332 return x
335 return x
333 elif op == 'and':
336 elif op == 'and':
334 ta = _analyze(x[1], order)
337 ta = _analyze(x[1], order)
335 tb = _analyze(x[2], _tofolloworder[order])
338 tb = _analyze(x[2], _tofolloworder[order])
336 return (op, ta, tb, order)
339 return (op, ta, tb, order)
337 elif op == 'or':
340 elif op == 'or':
338 return (op, _analyze(x[1], order), order)
341 return (op, _analyze(x[1], order), order)
339 elif op == 'not':
342 elif op == 'not':
340 return (op, _analyze(x[1], anyorder), order)
343 return (op, _analyze(x[1], anyorder), order)
341 elif op == 'rangeall':
344 elif op == 'rangeall':
342 return (op, None, order)
345 return (op, None, order)
343 elif op in ('rangepre', 'rangepost', 'parentpost'):
346 elif op in ('rangepre', 'rangepost', 'parentpost'):
344 return (op, _analyze(x[1], defineorder), order)
347 return (op, _analyze(x[1], defineorder), order)
345 elif op == 'group':
348 elif op == 'group':
346 return _analyze(x[1], order)
349 return _analyze(x[1], order)
347 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
350 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
348 ta = _analyze(x[1], defineorder)
351 ta = _analyze(x[1], defineorder)
349 tb = _analyze(x[2], defineorder)
352 tb = _analyze(x[2], defineorder)
350 return (op, ta, tb, order)
353 return (op, ta, tb, order)
351 elif op == 'list':
354 elif op == 'list':
352 return (op,) + tuple(_analyze(y, order) for y in x[1:])
355 return (op,) + tuple(_analyze(y, order) for y in x[1:])
353 elif op == 'keyvalue':
356 elif op == 'keyvalue':
354 return (op, x[1], _analyze(x[2], order))
357 return (op, x[1], _analyze(x[2], order))
355 elif op == 'func':
358 elif op == 'func':
356 f = getsymbol(x[1])
359 f = getsymbol(x[1])
357 d = defineorder
360 d = defineorder
358 if f == 'present':
361 if f == 'present':
359 # 'present(set)' is known to return the argument set with no
362 # 'present(set)' is known to return the argument set with no
360 # modification, so forward the current order to its argument
363 # modification, so forward the current order to its argument
361 d = order
364 d = order
362 return (op, x[1], _analyze(x[2], d), order)
365 return (op, x[1], _analyze(x[2], d), order)
363 raise ValueError('invalid operator %r' % op)
366 raise ValueError('invalid operator %r' % op)
364
367
365 def analyze(x, order=defineorder):
368 def analyze(x, order=defineorder):
366 """Transform raw parsed tree to evaluatable tree which can be fed to
369 """Transform raw parsed tree to evaluatable tree which can be fed to
367 optimize() or getset()
370 optimize() or getset()
368
371
369 All pseudo operations should be mapped to real operations or functions
372 All pseudo operations should be mapped to real operations or functions
370 defined in methods or symbols table respectively.
373 defined in methods or symbols table respectively.
371
374
372 'order' specifies how the current expression 'x' is ordered (see the
375 'order' specifies how the current expression 'x' is ordered (see the
373 constants defined above.)
376 constants defined above.)
374 """
377 """
375 return _analyze(x, order)
378 return _analyze(x, order)
376
379
377 def _optimize(x, small):
380 def _optimize(x, small):
378 if x is None:
381 if x is None:
379 return 0, x
382 return 0, x
380
383
381 smallbonus = 1
384 smallbonus = 1
382 if small:
385 if small:
383 smallbonus = .5
386 smallbonus = .5
384
387
385 op = x[0]
388 op = x[0]
386 if op in ('string', 'symbol'):
389 if op in ('string', 'symbol'):
387 return smallbonus, x # single revisions are small
390 return smallbonus, x # single revisions are small
388 elif op == 'and':
391 elif op == 'and':
389 wa, ta = _optimize(x[1], True)
392 wa, ta = _optimize(x[1], True)
390 wb, tb = _optimize(x[2], True)
393 wb, tb = _optimize(x[2], True)
391 order = x[3]
394 order = x[3]
392 w = min(wa, wb)
395 w = min(wa, wb)
393
396
394 # (::x and not ::y)/(not ::y and ::x) have a fast path
397 # (::x and not ::y)/(not ::y and ::x) have a fast path
395 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
398 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
396 if tm:
399 if tm:
397 return w, ('func', ('symbol', 'only'), tm, order)
400 return w, ('func', ('symbol', 'only'), tm, order)
398
401
399 if tb is not None and tb[0] == 'not':
402 if tb is not None and tb[0] == 'not':
400 return wa, ('difference', ta, tb[1], order)
403 return wa, ('difference', ta, tb[1], order)
401
404
402 if wa > wb:
405 if wa > wb:
403 return w, (op, tb, ta, order)
406 return w, (op, tb, ta, order)
404 return w, (op, ta, tb, order)
407 return w, (op, ta, tb, order)
405 elif op == 'or':
408 elif op == 'or':
406 # fast path for machine-generated expression, that is likely to have
409 # fast path for machine-generated expression, that is likely to have
407 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
410 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
408 order = x[2]
411 order = x[2]
409 ws, ts, ss = [], [], []
412 ws, ts, ss = [], [], []
410 def flushss():
413 def flushss():
411 if not ss:
414 if not ss:
412 return
415 return
413 if len(ss) == 1:
416 if len(ss) == 1:
414 w, t = ss[0]
417 w, t = ss[0]
415 else:
418 else:
416 s = '\0'.join(t[1] for w, t in ss)
419 s = '\0'.join(t[1] for w, t in ss)
417 y = ('func', ('symbol', '_list'), ('string', s), order)
420 y = ('func', ('symbol', '_list'), ('string', s), order)
418 w, t = _optimize(y, False)
421 w, t = _optimize(y, False)
419 ws.append(w)
422 ws.append(w)
420 ts.append(t)
423 ts.append(t)
421 del ss[:]
424 del ss[:]
422 for y in getlist(x[1]):
425 for y in getlist(x[1]):
423 w, t = _optimize(y, False)
426 w, t = _optimize(y, False)
424 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
427 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
425 ss.append((w, t))
428 ss.append((w, t))
426 continue
429 continue
427 flushss()
430 flushss()
428 ws.append(w)
431 ws.append(w)
429 ts.append(t)
432 ts.append(t)
430 flushss()
433 flushss()
431 if len(ts) == 1:
434 if len(ts) == 1:
432 return ws[0], ts[0] # 'or' operation is fully optimized out
435 return ws[0], ts[0] # 'or' operation is fully optimized out
433 # we can't reorder trees by weight because it would change the order.
436 # we can't reorder trees by weight because it would change the order.
434 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
437 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
435 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
438 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
436 return max(ws), (op, ('list',) + tuple(ts), order)
439 return max(ws), (op, ('list',) + tuple(ts), order)
437 elif op == 'not':
440 elif op == 'not':
438 # Optimize not public() to _notpublic() because we have a fast version
441 # Optimize not public() to _notpublic() because we have a fast version
439 if x[1][:3] == ('func', ('symbol', 'public'), None):
442 if x[1][:3] == ('func', ('symbol', 'public'), None):
440 order = x[1][3]
443 order = x[1][3]
441 newsym = ('func', ('symbol', '_notpublic'), None, order)
444 newsym = ('func', ('symbol', '_notpublic'), None, order)
442 o = _optimize(newsym, not small)
445 o = _optimize(newsym, not small)
443 return o[0], o[1]
446 return o[0], o[1]
444 else:
447 else:
445 o = _optimize(x[1], not small)
448 o = _optimize(x[1], not small)
446 order = x[2]
449 order = x[2]
447 return o[0], (op, o[1], order)
450 return o[0], (op, o[1], order)
448 elif op == 'rangeall':
451 elif op == 'rangeall':
449 return smallbonus, x
452 return smallbonus, x
450 elif op in ('rangepre', 'rangepost', 'parentpost'):
453 elif op in ('rangepre', 'rangepost', 'parentpost'):
451 o = _optimize(x[1], small)
454 o = _optimize(x[1], small)
452 order = x[2]
455 order = x[2]
453 return o[0], (op, o[1], order)
456 return o[0], (op, o[1], order)
454 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
457 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
455 wa, ta = _optimize(x[1], small)
458 wa, ta = _optimize(x[1], small)
456 wb, tb = _optimize(x[2], small)
459 wb, tb = _optimize(x[2], small)
457 order = x[3]
460 order = x[3]
458 return wa + wb, (op, ta, tb, order)
461 return wa + wb, (op, ta, tb, order)
459 elif op == 'list':
462 elif op == 'list':
460 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
463 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
461 return sum(ws), (op,) + ts
464 return sum(ws), (op,) + ts
462 elif op == 'keyvalue':
465 elif op == 'keyvalue':
463 w, t = _optimize(x[2], small)
466 w, t = _optimize(x[2], small)
464 return w, (op, x[1], t)
467 return w, (op, x[1], t)
465 elif op == 'func':
468 elif op == 'func':
466 f = getsymbol(x[1])
469 f = getsymbol(x[1])
467 wa, ta = _optimize(x[2], small)
470 wa, ta = _optimize(x[2], small)
468 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
471 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
469 'keyword', 'outgoing', 'user', 'destination'):
472 'keyword', 'outgoing', 'user', 'destination'):
470 w = 10 # slow
473 w = 10 # slow
471 elif f in ('modifies', 'adds', 'removes'):
474 elif f in ('modifies', 'adds', 'removes'):
472 w = 30 # slower
475 w = 30 # slower
473 elif f == "contains":
476 elif f == "contains":
474 w = 100 # very slow
477 w = 100 # very slow
475 elif f == "ancestor":
478 elif f == "ancestor":
476 w = 1 * smallbonus
479 w = 1 * smallbonus
477 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
480 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
478 w = 0
481 w = 0
479 elif f == "sort":
482 elif f == "sort":
480 w = 10 # assume most sorts look at changelog
483 w = 10 # assume most sorts look at changelog
481 else:
484 else:
482 w = 1
485 w = 1
483 order = x[3]
486 order = x[3]
484 return w + wa, (op, x[1], ta, order)
487 return w + wa, (op, x[1], ta, order)
485 raise ValueError('invalid operator %r' % op)
488 raise ValueError('invalid operator %r' % op)
486
489
487 def optimize(tree):
490 def optimize(tree):
488 """Optimize evaluatable tree
491 """Optimize evaluatable tree
489
492
490 All pseudo operations should be transformed beforehand.
493 All pseudo operations should be transformed beforehand.
491 """
494 """
492 _weight, newtree = _optimize(tree, small=True)
495 _weight, newtree = _optimize(tree, small=True)
493 return newtree
496 return newtree
494
497
495 # the set of valid characters for the initial letter of symbols in
498 # the set of valid characters for the initial letter of symbols in
496 # alias declarations and definitions
499 # alias declarations and definitions
497 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
500 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
498
501
499 def _parsewith(spec, lookup=None, syminitletters=None):
502 def _parsewith(spec, lookup=None, syminitletters=None):
500 """Generate a parse tree of given spec with given tokenizing options
503 """Generate a parse tree of given spec with given tokenizing options
501
504
502 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
505 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
503 ('func', ('symbol', 'foo'), ('symbol', '$1'))
506 ('func', ('symbol', 'foo'), ('symbol', '$1'))
504 >>> _parsewith('$1')
507 >>> _parsewith('$1')
505 Traceback (most recent call last):
508 Traceback (most recent call last):
506 ...
509 ...
507 ParseError: ("syntax error in revset '$1'", 0)
510 ParseError: ("syntax error in revset '$1'", 0)
508 >>> _parsewith('foo bar')
511 >>> _parsewith('foo bar')
509 Traceback (most recent call last):
512 Traceback (most recent call last):
510 ...
513 ...
511 ParseError: ('invalid token', 4)
514 ParseError: ('invalid token', 4)
512 """
515 """
513 p = parser.parser(elements)
516 p = parser.parser(elements)
514 tree, pos = p.parse(tokenize(spec, lookup=lookup,
517 tree, pos = p.parse(tokenize(spec, lookup=lookup,
515 syminitletters=syminitletters))
518 syminitletters=syminitletters))
516 if pos != len(spec):
519 if pos != len(spec):
517 raise error.ParseError(_('invalid token'), pos)
520 raise error.ParseError(_('invalid token'), pos)
518 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
521 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
519
522
520 class _aliasrules(parser.basealiasrules):
523 class _aliasrules(parser.basealiasrules):
521 """Parsing and expansion rule set of revset aliases"""
524 """Parsing and expansion rule set of revset aliases"""
522 _section = _('revset alias')
525 _section = _('revset alias')
523
526
524 @staticmethod
527 @staticmethod
525 def _parse(spec):
528 def _parse(spec):
526 """Parse alias declaration/definition ``spec``
529 """Parse alias declaration/definition ``spec``
527
530
528 This allows symbol names to use also ``$`` as an initial letter
531 This allows symbol names to use also ``$`` as an initial letter
529 (for backward compatibility), and callers of this function should
532 (for backward compatibility), and callers of this function should
530 examine whether ``$`` is used also for unexpected symbols or not.
533 examine whether ``$`` is used also for unexpected symbols or not.
531 """
534 """
532 return _parsewith(spec, syminitletters=_aliassyminitletters)
535 return _parsewith(spec, syminitletters=_aliassyminitletters)
533
536
534 @staticmethod
537 @staticmethod
535 def _trygetfunc(tree):
538 def _trygetfunc(tree):
536 if tree[0] == 'func' and tree[1][0] == 'symbol':
539 if tree[0] == 'func' and tree[1][0] == 'symbol':
537 return tree[1][1], getlist(tree[2])
540 return tree[1][1], getlist(tree[2])
538
541
539 def expandaliases(ui, tree):
542 def expandaliases(ui, tree):
540 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
543 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
541 tree = _aliasrules.expand(aliases, tree)
544 tree = _aliasrules.expand(aliases, tree)
542 # warn about problematic (but not referred) aliases
545 # warn about problematic (but not referred) aliases
543 for name, alias in sorted(aliases.iteritems()):
546 for name, alias in sorted(aliases.iteritems()):
544 if alias.error and not alias.warned:
547 if alias.error and not alias.warned:
545 ui.warn(_('warning: %s\n') % (alias.error))
548 ui.warn(_('warning: %s\n') % (alias.error))
546 alias.warned = True
549 alias.warned = True
547 return tree
550 return tree
548
551
549 def foldconcat(tree):
552 def foldconcat(tree):
550 """Fold elements to be concatenated by `##`
553 """Fold elements to be concatenated by `##`
551 """
554 """
552 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
555 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
553 return tree
556 return tree
554 if tree[0] == '_concat':
557 if tree[0] == '_concat':
555 pending = [tree]
558 pending = [tree]
556 l = []
559 l = []
557 while pending:
560 while pending:
558 e = pending.pop()
561 e = pending.pop()
559 if e[0] == '_concat':
562 if e[0] == '_concat':
560 pending.extend(reversed(e[1:]))
563 pending.extend(reversed(e[1:]))
561 elif e[0] in ('string', 'symbol'):
564 elif e[0] in ('string', 'symbol'):
562 l.append(e[1])
565 l.append(e[1])
563 else:
566 else:
564 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
567 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
565 raise error.ParseError(msg)
568 raise error.ParseError(msg)
566 return ('string', ''.join(l))
569 return ('string', ''.join(l))
567 else:
570 else:
568 return tuple(foldconcat(t) for t in tree)
571 return tuple(foldconcat(t) for t in tree)
569
572
570 def parse(spec, lookup=None):
573 def parse(spec, lookup=None):
571 return _parsewith(spec, lookup=lookup)
574 return _parsewith(spec, lookup=lookup)
572
575
573 def formatspec(expr, *args):
576 def formatspec(expr, *args):
574 '''
577 '''
575 This is a convenience function for using revsets internally, and
578 This is a convenience function for using revsets internally, and
576 escapes arguments appropriately. Aliases are intentionally ignored
579 escapes arguments appropriately. Aliases are intentionally ignored
577 so that intended expression behavior isn't accidentally subverted.
580 so that intended expression behavior isn't accidentally subverted.
578
581
579 Supported arguments:
582 Supported arguments:
580
583
581 %r = revset expression, parenthesized
584 %r = revset expression, parenthesized
582 %d = int(arg), no quoting
585 %d = int(arg), no quoting
583 %s = string(arg), escaped and single-quoted
586 %s = string(arg), escaped and single-quoted
584 %b = arg.branch(), escaped and single-quoted
587 %b = arg.branch(), escaped and single-quoted
585 %n = hex(arg), single-quoted
588 %n = hex(arg), single-quoted
586 %% = a literal '%'
589 %% = a literal '%'
587
590
588 Prefixing the type with 'l' specifies a parenthesized list of that type.
591 Prefixing the type with 'l' specifies a parenthesized list of that type.
589
592
590 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
593 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
591 '(10 or 11):: and ((this()) or (that()))'
594 '(10 or 11):: and ((this()) or (that()))'
592 >>> formatspec('%d:: and not %d::', 10, 20)
595 >>> formatspec('%d:: and not %d::', 10, 20)
593 '10:: and not 20::'
596 '10:: and not 20::'
594 >>> formatspec('%ld or %ld', [], [1])
597 >>> formatspec('%ld or %ld', [], [1])
595 "_list('') or 1"
598 "_list('') or 1"
596 >>> formatspec('keyword(%s)', 'foo\\xe9')
599 >>> formatspec('keyword(%s)', 'foo\\xe9')
597 "keyword('foo\\\\xe9')"
600 "keyword('foo\\\\xe9')"
598 >>> b = lambda: 'default'
601 >>> b = lambda: 'default'
599 >>> b.branch = b
602 >>> b.branch = b
600 >>> formatspec('branch(%b)', b)
603 >>> formatspec('branch(%b)', b)
601 "branch('default')"
604 "branch('default')"
602 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
605 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
603 "root(_list('a\\x00b\\x00c\\x00d'))"
606 "root(_list('a\\x00b\\x00c\\x00d'))"
604 '''
607 '''
605
608
606 def quote(s):
609 def quote(s):
607 return repr(str(s))
610 return repr(str(s))
608
611
609 def argtype(c, arg):
612 def argtype(c, arg):
610 if c == 'd':
613 if c == 'd':
611 return str(int(arg))
614 return str(int(arg))
612 elif c == 's':
615 elif c == 's':
613 return quote(arg)
616 return quote(arg)
614 elif c == 'r':
617 elif c == 'r':
615 parse(arg) # make sure syntax errors are confined
618 parse(arg) # make sure syntax errors are confined
616 return '(%s)' % arg
619 return '(%s)' % arg
617 elif c == 'n':
620 elif c == 'n':
618 return quote(node.hex(arg))
621 return quote(node.hex(arg))
619 elif c == 'b':
622 elif c == 'b':
620 return quote(arg.branch())
623 return quote(arg.branch())
621
624
622 def listexp(s, t):
625 def listexp(s, t):
623 l = len(s)
626 l = len(s)
624 if l == 0:
627 if l == 0:
625 return "_list('')"
628 return "_list('')"
626 elif l == 1:
629 elif l == 1:
627 return argtype(t, s[0])
630 return argtype(t, s[0])
628 elif t == 'd':
631 elif t == 'd':
629 return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
632 return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
630 elif t == 's':
633 elif t == 's':
631 return "_list('%s')" % "\0".join(s)
634 return "_list('%s')" % "\0".join(s)
632 elif t == 'n':
635 elif t == 'n':
633 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
636 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
634 elif t == 'b':
637 elif t == 'b':
635 return "_list('%s')" % "\0".join(a.branch() for a in s)
638 return "_list('%s')" % "\0".join(a.branch() for a in s)
636
639
637 m = l // 2
640 m = l // 2
638 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
641 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
639
642
640 ret = ''
643 ret = ''
641 pos = 0
644 pos = 0
642 arg = 0
645 arg = 0
643 while pos < len(expr):
646 while pos < len(expr):
644 c = expr[pos]
647 c = expr[pos]
645 if c == '%':
648 if c == '%':
646 pos += 1
649 pos += 1
647 d = expr[pos]
650 d = expr[pos]
648 if d == '%':
651 if d == '%':
649 ret += d
652 ret += d
650 elif d in 'dsnbr':
653 elif d in 'dsnbr':
651 ret += argtype(d, args[arg])
654 ret += argtype(d, args[arg])
652 arg += 1
655 arg += 1
653 elif d == 'l':
656 elif d == 'l':
654 # a list of some type
657 # a list of some type
655 pos += 1
658 pos += 1
656 d = expr[pos]
659 d = expr[pos]
657 ret += listexp(list(args[arg]), d)
660 ret += listexp(list(args[arg]), d)
658 arg += 1
661 arg += 1
659 else:
662 else:
660 raise error.Abort(_('unexpected revspec format character %s')
663 raise error.Abort(_('unexpected revspec format character %s')
661 % d)
664 % d)
662 else:
665 else:
663 ret += c
666 ret += c
664 pos += 1
667 pos += 1
665
668
666 return ret
669 return ret
667
670
668 def prettyformat(tree):
671 def prettyformat(tree):
669 return parser.prettyformat(tree, ('string', 'symbol'))
672 return parser.prettyformat(tree, ('string', 'symbol'))
670
673
671 def depth(tree):
674 def depth(tree):
672 if isinstance(tree, tuple):
675 if isinstance(tree, tuple):
673 return max(map(depth, tree)) + 1
676 return max(map(depth, tree)) + 1
674 else:
677 else:
675 return 0
678 return 0
676
679
677 def funcsused(tree):
680 def funcsused(tree):
678 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
681 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
679 return set()
682 return set()
680 else:
683 else:
681 funcs = set()
684 funcs = set()
682 for s in tree[1:]:
685 for s in tree[1:]:
683 funcs |= funcsused(s)
686 funcs |= funcsused(s)
684 if tree[0] == 'func':
687 if tree[0] == 'func':
685 funcs.add(tree[1][1])
688 funcs.add(tree[1][1])
686 return funcs
689 return funcs
General Comments 0
You need to be logged in to leave comments. Login now