##// END OF EJS Templates
py3: fix slicing of bytes in revset.formatspec()
Yuya Nishihara -
r31385:1c48a827 default
parent child Browse files
Show More
@@ -1,689 +1,689 b''
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 )
18 )
19
19
20 elements = {
20 elements = {
21 # token-type: binding-strength, primary, prefix, infix, suffix
21 # token-type: binding-strength, primary, prefix, infix, suffix
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "##": (20, None, None, ("_concat", 20), None),
23 "##": (20, None, None, ("_concat", 20), None),
24 "~": (18, None, None, ("ancestor", 18), None),
24 "~": (18, None, None, ("ancestor", 18), None),
25 "^": (18, None, None, ("parent", 18), "parentpost"),
25 "^": (18, None, None, ("parent", 18), "parentpost"),
26 "-": (5, None, ("negate", 19), ("minus", 5), None),
26 "-": (5, None, ("negate", 19), ("minus", 5), None),
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
30 "not": (10, None, ("not", 10), None, None),
30 "not": (10, None, ("not", 10), None, None),
31 "!": (10, None, ("not", 10), None, None),
31 "!": (10, None, ("not", 10), None, None),
32 "and": (5, None, None, ("and", 5), None),
32 "and": (5, None, None, ("and", 5), None),
33 "&": (5, None, None, ("and", 5), None),
33 "&": (5, None, None, ("and", 5), None),
34 "%": (5, None, None, ("only", 5), "onlypost"),
34 "%": (5, None, None, ("only", 5), "onlypost"),
35 "or": (4, None, None, ("or", 4), None),
35 "or": (4, None, None, ("or", 4), None),
36 "|": (4, None, None, ("or", 4), None),
36 "|": (4, None, None, ("or", 4), None),
37 "+": (4, None, None, ("or", 4), None),
37 "+": (4, None, None, ("or", 4), None),
38 "=": (3, None, None, ("keyvalue", 3), None),
38 "=": (3, None, None, ("keyvalue", 3), None),
39 ",": (2, None, None, ("list", 2), None),
39 ",": (2, None, None, ("list", 2), None),
40 ")": (0, None, None, None, None),
40 ")": (0, None, None, None, None),
41 "symbol": (0, "symbol", None, None, None),
41 "symbol": (0, "symbol", None, None, None),
42 "string": (0, "string", None, None, None),
42 "string": (0, "string", None, None, None),
43 "end": (0, None, None, None, None),
43 "end": (0, None, None, None, None),
44 }
44 }
45
45
46 keywords = set(['and', 'or', 'not'])
46 keywords = set(['and', 'or', 'not'])
47
47
48 _quoteletters = set(['"', "'"])
48 _quoteletters = set(['"', "'"])
49 _simpleopletters = set(pycompat.iterbytestr("():=,-|&+!~^%"))
49 _simpleopletters = set(pycompat.iterbytestr("():=,-|&+!~^%"))
50
50
51 # default set of valid characters for the initial letter of symbols
51 # default set of valid characters for the initial letter of symbols
52 _syminitletters = set(pycompat.iterbytestr(
52 _syminitletters = set(pycompat.iterbytestr(
53 string.ascii_letters.encode('ascii') +
53 string.ascii_letters.encode('ascii') +
54 string.digits.encode('ascii') +
54 string.digits.encode('ascii') +
55 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
55 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
56
56
57 # default set of valid characters for non-initial letters of symbols
57 # default set of valid characters for non-initial letters of symbols
58 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
58 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
59
59
60 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
60 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
61 '''
61 '''
62 Parse a revset statement into a stream of tokens
62 Parse a revset statement into a stream of tokens
63
63
64 ``syminitletters`` is the set of valid characters for the initial
64 ``syminitletters`` is the set of valid characters for the initial
65 letter of symbols.
65 letter of symbols.
66
66
67 By default, character ``c`` is recognized as valid for initial
67 By default, character ``c`` is recognized as valid for initial
68 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
68 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
69
69
70 ``symletters`` is the set of valid characters for non-initial
70 ``symletters`` is the set of valid characters for non-initial
71 letters of symbols.
71 letters of symbols.
72
72
73 By default, character ``c`` is recognized as valid for non-initial
73 By default, character ``c`` is recognized as valid for non-initial
74 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
74 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
75
75
76 Check that @ is a valid unquoted token character (issue3686):
76 Check that @ is a valid unquoted token character (issue3686):
77 >>> list(tokenize("@::"))
77 >>> list(tokenize("@::"))
78 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
78 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
79
79
80 '''
80 '''
81 if syminitletters is None:
81 if syminitletters is None:
82 syminitletters = _syminitletters
82 syminitletters = _syminitletters
83 if symletters is None:
83 if symletters is None:
84 symletters = _symletters
84 symletters = _symletters
85
85
86 if program and lookup:
86 if program and lookup:
87 # attempt to parse old-style ranges first to deal with
87 # attempt to parse old-style ranges first to deal with
88 # things like old-tag which contain query metacharacters
88 # things like old-tag which contain query metacharacters
89 parts = program.split(':', 1)
89 parts = program.split(':', 1)
90 if all(lookup(sym) for sym in parts if sym):
90 if all(lookup(sym) for sym in parts if sym):
91 if parts[0]:
91 if parts[0]:
92 yield ('symbol', parts[0], 0)
92 yield ('symbol', parts[0], 0)
93 if len(parts) > 1:
93 if len(parts) > 1:
94 s = len(parts[0])
94 s = len(parts[0])
95 yield (':', None, s)
95 yield (':', None, s)
96 if parts[1]:
96 if parts[1]:
97 yield ('symbol', parts[1], s + 1)
97 yield ('symbol', parts[1], s + 1)
98 yield ('end', None, len(program))
98 yield ('end', None, len(program))
99 return
99 return
100
100
101 pos, l = 0, len(program)
101 pos, l = 0, len(program)
102 while pos < l:
102 while pos < l:
103 c = program[pos:pos + 1]
103 c = program[pos:pos + 1]
104 if c.isspace(): # skip inter-token whitespace
104 if c.isspace(): # skip inter-token whitespace
105 pass
105 pass
106 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
106 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
107 yield ('::', None, pos)
107 yield ('::', None, pos)
108 pos += 1 # skip ahead
108 pos += 1 # skip ahead
109 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
109 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
110 yield ('..', None, pos)
110 yield ('..', None, pos)
111 pos += 1 # skip ahead
111 pos += 1 # skip ahead
112 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
112 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
113 yield ('##', None, pos)
113 yield ('##', None, pos)
114 pos += 1 # skip ahead
114 pos += 1 # skip ahead
115 elif c in _simpleopletters: # handle simple operators
115 elif c in _simpleopletters: # handle simple operators
116 yield (c, None, pos)
116 yield (c, None, pos)
117 elif (c in _quoteletters or c == 'r' and
117 elif (c in _quoteletters or c == 'r' and
118 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
118 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
119 if c == 'r':
119 if c == 'r':
120 pos += 1
120 pos += 1
121 c = program[pos:pos + 1]
121 c = program[pos:pos + 1]
122 decode = lambda x: x
122 decode = lambda x: x
123 else:
123 else:
124 decode = parser.unescapestr
124 decode = parser.unescapestr
125 pos += 1
125 pos += 1
126 s = pos
126 s = pos
127 while pos < l: # find closing quote
127 while pos < l: # find closing quote
128 d = program[pos:pos + 1]
128 d = program[pos:pos + 1]
129 if d == '\\': # skip over escaped characters
129 if d == '\\': # skip over escaped characters
130 pos += 2
130 pos += 2
131 continue
131 continue
132 if d == c:
132 if d == c:
133 yield ('string', decode(program[s:pos]), s)
133 yield ('string', decode(program[s:pos]), s)
134 break
134 break
135 pos += 1
135 pos += 1
136 else:
136 else:
137 raise error.ParseError(_("unterminated string"), s)
137 raise error.ParseError(_("unterminated string"), s)
138 # gather up a symbol/keyword
138 # gather up a symbol/keyword
139 elif c in syminitletters:
139 elif c in syminitletters:
140 s = pos
140 s = pos
141 pos += 1
141 pos += 1
142 while pos < l: # find end of symbol
142 while pos < l: # find end of symbol
143 d = program[pos:pos + 1]
143 d = program[pos:pos + 1]
144 if d not in symletters:
144 if d not in symletters:
145 break
145 break
146 if (d == '.'
146 if (d == '.'
147 and program[pos - 1:pos] == '.'): # special case for ..
147 and program[pos - 1:pos] == '.'): # special case for ..
148 pos -= 1
148 pos -= 1
149 break
149 break
150 pos += 1
150 pos += 1
151 sym = program[s:pos]
151 sym = program[s:pos]
152 if sym in keywords: # operator keywords
152 if sym in keywords: # operator keywords
153 yield (sym, None, s)
153 yield (sym, None, s)
154 elif '-' in sym:
154 elif '-' in sym:
155 # some jerk gave us foo-bar-baz, try to check if it's a symbol
155 # some jerk gave us foo-bar-baz, try to check if it's a symbol
156 if lookup and lookup(sym):
156 if lookup and lookup(sym):
157 # looks like a real symbol
157 # looks like a real symbol
158 yield ('symbol', sym, s)
158 yield ('symbol', sym, s)
159 else:
159 else:
160 # looks like an expression
160 # looks like an expression
161 parts = sym.split('-')
161 parts = sym.split('-')
162 for p in parts[:-1]:
162 for p in parts[:-1]:
163 if p: # possible consecutive -
163 if p: # possible consecutive -
164 yield ('symbol', p, s)
164 yield ('symbol', p, s)
165 s += len(p)
165 s += len(p)
166 yield ('-', None, pos)
166 yield ('-', None, pos)
167 s += 1
167 s += 1
168 if parts[-1]: # possible trailing -
168 if parts[-1]: # possible trailing -
169 yield ('symbol', parts[-1], s)
169 yield ('symbol', parts[-1], s)
170 else:
170 else:
171 yield ('symbol', sym, s)
171 yield ('symbol', sym, s)
172 pos -= 1
172 pos -= 1
173 else:
173 else:
174 raise error.ParseError(_("syntax error in revset '%s'") %
174 raise error.ParseError(_("syntax error in revset '%s'") %
175 program, pos)
175 program, pos)
176 pos += 1
176 pos += 1
177 yield ('end', None, pos)
177 yield ('end', None, pos)
178
178
179 # helpers
179 # helpers
180
180
181 _notset = object()
181 _notset = object()
182
182
183 def getsymbol(x):
183 def getsymbol(x):
184 if x and x[0] == 'symbol':
184 if x and x[0] == 'symbol':
185 return x[1]
185 return x[1]
186 raise error.ParseError(_('not a symbol'))
186 raise error.ParseError(_('not a symbol'))
187
187
188 def getstring(x, err):
188 def getstring(x, err):
189 if x and (x[0] == 'string' or x[0] == 'symbol'):
189 if x and (x[0] == 'string' or x[0] == 'symbol'):
190 return x[1]
190 return x[1]
191 raise error.ParseError(err)
191 raise error.ParseError(err)
192
192
193 def getinteger(x, err, default=_notset):
193 def getinteger(x, err, default=_notset):
194 if not x and default is not _notset:
194 if not x and default is not _notset:
195 return default
195 return default
196 try:
196 try:
197 return int(getstring(x, err))
197 return int(getstring(x, err))
198 except ValueError:
198 except ValueError:
199 raise error.ParseError(err)
199 raise error.ParseError(err)
200
200
201 def getlist(x):
201 def getlist(x):
202 if not x:
202 if not x:
203 return []
203 return []
204 if x[0] == 'list':
204 if x[0] == 'list':
205 return list(x[1:])
205 return list(x[1:])
206 return [x]
206 return [x]
207
207
208 def getrange(x, err):
208 def getrange(x, err):
209 if not x:
209 if not x:
210 raise error.ParseError(err)
210 raise error.ParseError(err)
211 op = x[0]
211 op = x[0]
212 if op == 'range':
212 if op == 'range':
213 return x[1], x[2]
213 return x[1], x[2]
214 elif op == 'rangepre':
214 elif op == 'rangepre':
215 return None, x[1]
215 return None, x[1]
216 elif op == 'rangepost':
216 elif op == 'rangepost':
217 return x[1], None
217 return x[1], None
218 elif op == 'rangeall':
218 elif op == 'rangeall':
219 return None, None
219 return None, None
220 raise error.ParseError(err)
220 raise error.ParseError(err)
221
221
222 def getargs(x, min, max, err):
222 def getargs(x, min, max, err):
223 l = getlist(x)
223 l = getlist(x)
224 if len(l) < min or (max >= 0 and len(l) > max):
224 if len(l) < min or (max >= 0 and len(l) > max):
225 raise error.ParseError(err)
225 raise error.ParseError(err)
226 return l
226 return l
227
227
228 def getargsdict(x, funcname, keys):
228 def getargsdict(x, funcname, keys):
229 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
229 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
230 keyvaluenode='keyvalue', keynode='symbol')
230 keyvaluenode='keyvalue', keynode='symbol')
231
231
232 # Constants for ordering requirement, used in _analyze():
232 # Constants for ordering requirement, used in _analyze():
233 #
233 #
234 # If 'define', any nested functions and operations can change the ordering of
234 # If 'define', any nested functions and operations can change the ordering of
235 # the entries in the set. If 'follow', any nested functions and operations
235 # the entries in the set. If 'follow', any nested functions and operations
236 # should take the ordering specified by the first operand to the '&' operator.
236 # should take the ordering specified by the first operand to the '&' operator.
237 #
237 #
238 # For instance,
238 # For instance,
239 #
239 #
240 # X & (Y | Z)
240 # X & (Y | Z)
241 # ^ ^^^^^^^
241 # ^ ^^^^^^^
242 # | follow
242 # | follow
243 # define
243 # define
244 #
244 #
245 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
245 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
246 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
246 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
247 #
247 #
248 # 'any' means the order doesn't matter. For instance,
248 # 'any' means the order doesn't matter. For instance,
249 #
249 #
250 # X & !Y
250 # X & !Y
251 # ^
251 # ^
252 # any
252 # any
253 #
253 #
254 # 'y()' can either enforce its ordering requirement or take the ordering
254 # 'y()' can either enforce its ordering requirement or take the ordering
255 # specified by 'x()' because 'not()' doesn't care the order.
255 # specified by 'x()' because 'not()' doesn't care the order.
256 #
256 #
257 # Transition of ordering requirement:
257 # Transition of ordering requirement:
258 #
258 #
259 # 1. starts with 'define'
259 # 1. starts with 'define'
260 # 2. shifts to 'follow' by 'x & y'
260 # 2. shifts to 'follow' by 'x & y'
261 # 3. changes back to 'define' on function call 'f(x)' or function-like
261 # 3. changes back to 'define' on function call 'f(x)' or function-like
262 # operation 'x (f) y' because 'f' may have its own ordering requirement
262 # operation 'x (f) y' because 'f' may have its own ordering requirement
263 # for 'x' and 'y' (e.g. 'first(x)')
263 # for 'x' and 'y' (e.g. 'first(x)')
264 #
264 #
265 anyorder = 'any' # don't care the order
265 anyorder = 'any' # don't care the order
266 defineorder = 'define' # should define the order
266 defineorder = 'define' # should define the order
267 followorder = 'follow' # must follow the current order
267 followorder = 'follow' # must follow the current order
268
268
269 # transition table for 'x & y', from the current expression 'x' to 'y'
269 # transition table for 'x & y', from the current expression 'x' to 'y'
270 _tofolloworder = {
270 _tofolloworder = {
271 anyorder: anyorder,
271 anyorder: anyorder,
272 defineorder: followorder,
272 defineorder: followorder,
273 followorder: followorder,
273 followorder: followorder,
274 }
274 }
275
275
276 def _matchonly(revs, bases):
276 def _matchonly(revs, bases):
277 """
277 """
278 >>> f = lambda *args: _matchonly(*map(parse, args))
278 >>> f = lambda *args: _matchonly(*map(parse, args))
279 >>> f('ancestors(A)', 'not ancestors(B)')
279 >>> f('ancestors(A)', 'not ancestors(B)')
280 ('list', ('symbol', 'A'), ('symbol', 'B'))
280 ('list', ('symbol', 'A'), ('symbol', 'B'))
281 """
281 """
282 if (revs is not None
282 if (revs is not None
283 and revs[0] == 'func'
283 and revs[0] == 'func'
284 and getsymbol(revs[1]) == 'ancestors'
284 and getsymbol(revs[1]) == 'ancestors'
285 and bases is not None
285 and bases is not None
286 and bases[0] == 'not'
286 and bases[0] == 'not'
287 and bases[1][0] == 'func'
287 and bases[1][0] == 'func'
288 and getsymbol(bases[1][1]) == 'ancestors'):
288 and getsymbol(bases[1][1]) == 'ancestors'):
289 return ('list', revs[2], bases[1][2])
289 return ('list', revs[2], bases[1][2])
290
290
291 def _fixops(x):
291 def _fixops(x):
292 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
292 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
293 handled well by our simple top-down parser"""
293 handled well by our simple top-down parser"""
294 if not isinstance(x, tuple):
294 if not isinstance(x, tuple):
295 return x
295 return x
296
296
297 op = x[0]
297 op = x[0]
298 if op == 'parent':
298 if op == 'parent':
299 # x^:y means (x^) : y, not x ^ (:y)
299 # x^:y means (x^) : y, not x ^ (:y)
300 # x^: means (x^) :, not x ^ (:)
300 # x^: means (x^) :, not x ^ (:)
301 post = ('parentpost', x[1])
301 post = ('parentpost', x[1])
302 if x[2][0] == 'dagrangepre':
302 if x[2][0] == 'dagrangepre':
303 return _fixops(('dagrange', post, x[2][1]))
303 return _fixops(('dagrange', post, x[2][1]))
304 elif x[2][0] == 'rangepre':
304 elif x[2][0] == 'rangepre':
305 return _fixops(('range', post, x[2][1]))
305 return _fixops(('range', post, x[2][1]))
306 elif x[2][0] == 'rangeall':
306 elif x[2][0] == 'rangeall':
307 return _fixops(('rangepost', post))
307 return _fixops(('rangepost', post))
308 elif op == 'or':
308 elif op == 'or':
309 # make number of arguments deterministic:
309 # make number of arguments deterministic:
310 # x + y + z -> (or x y z) -> (or (list x y z))
310 # x + y + z -> (or x y z) -> (or (list x y z))
311 return (op, _fixops(('list',) + x[1:]))
311 return (op, _fixops(('list',) + x[1:]))
312
312
313 return (op,) + tuple(_fixops(y) for y in x[1:])
313 return (op,) + tuple(_fixops(y) for y in x[1:])
314
314
315 def _analyze(x, order):
315 def _analyze(x, order):
316 if x is None:
316 if x is None:
317 return x
317 return x
318
318
319 op = x[0]
319 op = x[0]
320 if op == 'minus':
320 if op == 'minus':
321 return _analyze(('and', x[1], ('not', x[2])), order)
321 return _analyze(('and', x[1], ('not', x[2])), order)
322 elif op == 'only':
322 elif op == 'only':
323 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
323 t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
324 return _analyze(t, order)
324 return _analyze(t, order)
325 elif op == 'onlypost':
325 elif op == 'onlypost':
326 return _analyze(('func', ('symbol', 'only'), x[1]), order)
326 return _analyze(('func', ('symbol', 'only'), x[1]), order)
327 elif op == 'dagrangepre':
327 elif op == 'dagrangepre':
328 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
328 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
329 elif op == 'dagrangepost':
329 elif op == 'dagrangepost':
330 return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
330 return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
331 elif op == 'negate':
331 elif op == 'negate':
332 s = getstring(x[1], _("can't negate that"))
332 s = getstring(x[1], _("can't negate that"))
333 return _analyze(('string', '-' + s), order)
333 return _analyze(('string', '-' + s), order)
334 elif op in ('string', 'symbol'):
334 elif op in ('string', 'symbol'):
335 return x
335 return x
336 elif op == 'and':
336 elif op == 'and':
337 ta = _analyze(x[1], order)
337 ta = _analyze(x[1], order)
338 tb = _analyze(x[2], _tofolloworder[order])
338 tb = _analyze(x[2], _tofolloworder[order])
339 return (op, ta, tb, order)
339 return (op, ta, tb, order)
340 elif op == 'or':
340 elif op == 'or':
341 return (op, _analyze(x[1], order), order)
341 return (op, _analyze(x[1], order), order)
342 elif op == 'not':
342 elif op == 'not':
343 return (op, _analyze(x[1], anyorder), order)
343 return (op, _analyze(x[1], anyorder), order)
344 elif op == 'rangeall':
344 elif op == 'rangeall':
345 return (op, None, order)
345 return (op, None, order)
346 elif op in ('rangepre', 'rangepost', 'parentpost'):
346 elif op in ('rangepre', 'rangepost', 'parentpost'):
347 return (op, _analyze(x[1], defineorder), order)
347 return (op, _analyze(x[1], defineorder), order)
348 elif op == 'group':
348 elif op == 'group':
349 return _analyze(x[1], order)
349 return _analyze(x[1], order)
350 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
350 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
351 ta = _analyze(x[1], defineorder)
351 ta = _analyze(x[1], defineorder)
352 tb = _analyze(x[2], defineorder)
352 tb = _analyze(x[2], defineorder)
353 return (op, ta, tb, order)
353 return (op, ta, tb, order)
354 elif op == 'list':
354 elif op == 'list':
355 return (op,) + tuple(_analyze(y, order) for y in x[1:])
355 return (op,) + tuple(_analyze(y, order) for y in x[1:])
356 elif op == 'keyvalue':
356 elif op == 'keyvalue':
357 return (op, x[1], _analyze(x[2], order))
357 return (op, x[1], _analyze(x[2], order))
358 elif op == 'func':
358 elif op == 'func':
359 f = getsymbol(x[1])
359 f = getsymbol(x[1])
360 d = defineorder
360 d = defineorder
361 if f == 'present':
361 if f == 'present':
362 # 'present(set)' is known to return the argument set with no
362 # 'present(set)' is known to return the argument set with no
363 # modification, so forward the current order to its argument
363 # modification, so forward the current order to its argument
364 d = order
364 d = order
365 return (op, x[1], _analyze(x[2], d), order)
365 return (op, x[1], _analyze(x[2], d), order)
366 raise ValueError('invalid operator %r' % op)
366 raise ValueError('invalid operator %r' % op)
367
367
368 def analyze(x, order=defineorder):
368 def analyze(x, order=defineorder):
369 """Transform raw parsed tree to evaluatable tree which can be fed to
369 """Transform raw parsed tree to evaluatable tree which can be fed to
370 optimize() or getset()
370 optimize() or getset()
371
371
372 All pseudo operations should be mapped to real operations or functions
372 All pseudo operations should be mapped to real operations or functions
373 defined in methods or symbols table respectively.
373 defined in methods or symbols table respectively.
374
374
375 'order' specifies how the current expression 'x' is ordered (see the
375 'order' specifies how the current expression 'x' is ordered (see the
376 constants defined above.)
376 constants defined above.)
377 """
377 """
378 return _analyze(x, order)
378 return _analyze(x, order)
379
379
380 def _optimize(x, small):
380 def _optimize(x, small):
381 if x is None:
381 if x is None:
382 return 0, x
382 return 0, x
383
383
384 smallbonus = 1
384 smallbonus = 1
385 if small:
385 if small:
386 smallbonus = .5
386 smallbonus = .5
387
387
388 op = x[0]
388 op = x[0]
389 if op in ('string', 'symbol'):
389 if op in ('string', 'symbol'):
390 return smallbonus, x # single revisions are small
390 return smallbonus, x # single revisions are small
391 elif op == 'and':
391 elif op == 'and':
392 wa, ta = _optimize(x[1], True)
392 wa, ta = _optimize(x[1], True)
393 wb, tb = _optimize(x[2], True)
393 wb, tb = _optimize(x[2], True)
394 order = x[3]
394 order = x[3]
395 w = min(wa, wb)
395 w = min(wa, wb)
396
396
397 # (::x and not ::y)/(not ::y and ::x) have a fast path
397 # (::x and not ::y)/(not ::y and ::x) have a fast path
398 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
398 tm = _matchonly(ta, tb) or _matchonly(tb, ta)
399 if tm:
399 if tm:
400 return w, ('func', ('symbol', 'only'), tm, order)
400 return w, ('func', ('symbol', 'only'), tm, order)
401
401
402 if tb is not None and tb[0] == 'not':
402 if tb is not None and tb[0] == 'not':
403 return wa, ('difference', ta, tb[1], order)
403 return wa, ('difference', ta, tb[1], order)
404
404
405 if wa > wb:
405 if wa > wb:
406 return w, (op, tb, ta, order)
406 return w, (op, tb, ta, order)
407 return w, (op, ta, tb, order)
407 return w, (op, ta, tb, order)
408 elif op == 'or':
408 elif op == 'or':
409 # fast path for machine-generated expression, that is likely to have
409 # fast path for machine-generated expression, that is likely to have
410 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
410 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
411 order = x[2]
411 order = x[2]
412 ws, ts, ss = [], [], []
412 ws, ts, ss = [], [], []
413 def flushss():
413 def flushss():
414 if not ss:
414 if not ss:
415 return
415 return
416 if len(ss) == 1:
416 if len(ss) == 1:
417 w, t = ss[0]
417 w, t = ss[0]
418 else:
418 else:
419 s = '\0'.join(t[1] for w, t in ss)
419 s = '\0'.join(t[1] for w, t in ss)
420 y = ('func', ('symbol', '_list'), ('string', s), order)
420 y = ('func', ('symbol', '_list'), ('string', s), order)
421 w, t = _optimize(y, False)
421 w, t = _optimize(y, False)
422 ws.append(w)
422 ws.append(w)
423 ts.append(t)
423 ts.append(t)
424 del ss[:]
424 del ss[:]
425 for y in getlist(x[1]):
425 for y in getlist(x[1]):
426 w, t = _optimize(y, False)
426 w, t = _optimize(y, False)
427 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
427 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
428 ss.append((w, t))
428 ss.append((w, t))
429 continue
429 continue
430 flushss()
430 flushss()
431 ws.append(w)
431 ws.append(w)
432 ts.append(t)
432 ts.append(t)
433 flushss()
433 flushss()
434 if len(ts) == 1:
434 if len(ts) == 1:
435 return ws[0], ts[0] # 'or' operation is fully optimized out
435 return ws[0], ts[0] # 'or' operation is fully optimized out
436 # we can't reorder trees by weight because it would change the order.
436 # we can't reorder trees by weight because it would change the order.
437 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
437 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
438 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
438 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
439 return max(ws), (op, ('list',) + tuple(ts), order)
439 return max(ws), (op, ('list',) + tuple(ts), order)
440 elif op == 'not':
440 elif op == 'not':
441 # Optimize not public() to _notpublic() because we have a fast version
441 # Optimize not public() to _notpublic() because we have a fast version
442 if x[1][:3] == ('func', ('symbol', 'public'), None):
442 if x[1][:3] == ('func', ('symbol', 'public'), None):
443 order = x[1][3]
443 order = x[1][3]
444 newsym = ('func', ('symbol', '_notpublic'), None, order)
444 newsym = ('func', ('symbol', '_notpublic'), None, order)
445 o = _optimize(newsym, not small)
445 o = _optimize(newsym, not small)
446 return o[0], o[1]
446 return o[0], o[1]
447 else:
447 else:
448 o = _optimize(x[1], not small)
448 o = _optimize(x[1], not small)
449 order = x[2]
449 order = x[2]
450 return o[0], (op, o[1], order)
450 return o[0], (op, o[1], order)
451 elif op == 'rangeall':
451 elif op == 'rangeall':
452 return smallbonus, x
452 return smallbonus, x
453 elif op in ('rangepre', 'rangepost', 'parentpost'):
453 elif op in ('rangepre', 'rangepost', 'parentpost'):
454 o = _optimize(x[1], small)
454 o = _optimize(x[1], small)
455 order = x[2]
455 order = x[2]
456 return o[0], (op, o[1], order)
456 return o[0], (op, o[1], order)
457 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
457 elif op in ('dagrange', 'range', 'parent', 'ancestor'):
458 wa, ta = _optimize(x[1], small)
458 wa, ta = _optimize(x[1], small)
459 wb, tb = _optimize(x[2], small)
459 wb, tb = _optimize(x[2], small)
460 order = x[3]
460 order = x[3]
461 return wa + wb, (op, ta, tb, order)
461 return wa + wb, (op, ta, tb, order)
462 elif op == 'list':
462 elif op == 'list':
463 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
463 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
464 return sum(ws), (op,) + ts
464 return sum(ws), (op,) + ts
465 elif op == 'keyvalue':
465 elif op == 'keyvalue':
466 w, t = _optimize(x[2], small)
466 w, t = _optimize(x[2], small)
467 return w, (op, x[1], t)
467 return w, (op, x[1], t)
468 elif op == 'func':
468 elif op == 'func':
469 f = getsymbol(x[1])
469 f = getsymbol(x[1])
470 wa, ta = _optimize(x[2], small)
470 wa, ta = _optimize(x[2], small)
471 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
471 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
472 'keyword', 'outgoing', 'user', 'destination'):
472 'keyword', 'outgoing', 'user', 'destination'):
473 w = 10 # slow
473 w = 10 # slow
474 elif f in ('modifies', 'adds', 'removes'):
474 elif f in ('modifies', 'adds', 'removes'):
475 w = 30 # slower
475 w = 30 # slower
476 elif f == "contains":
476 elif f == "contains":
477 w = 100 # very slow
477 w = 100 # very slow
478 elif f == "ancestor":
478 elif f == "ancestor":
479 w = 1 * smallbonus
479 w = 1 * smallbonus
480 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
480 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
481 w = 0
481 w = 0
482 elif f == "sort":
482 elif f == "sort":
483 w = 10 # assume most sorts look at changelog
483 w = 10 # assume most sorts look at changelog
484 else:
484 else:
485 w = 1
485 w = 1
486 order = x[3]
486 order = x[3]
487 return w + wa, (op, x[1], ta, order)
487 return w + wa, (op, x[1], ta, order)
488 raise ValueError('invalid operator %r' % op)
488 raise ValueError('invalid operator %r' % op)
489
489
490 def optimize(tree):
490 def optimize(tree):
491 """Optimize evaluatable tree
491 """Optimize evaluatable tree
492
492
493 All pseudo operations should be transformed beforehand.
493 All pseudo operations should be transformed beforehand.
494 """
494 """
495 _weight, newtree = _optimize(tree, small=True)
495 _weight, newtree = _optimize(tree, small=True)
496 return newtree
496 return newtree
497
497
498 # the set of valid characters for the initial letter of symbols in
498 # the set of valid characters for the initial letter of symbols in
499 # alias declarations and definitions
499 # alias declarations and definitions
500 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
500 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
501
501
502 def _parsewith(spec, lookup=None, syminitletters=None):
502 def _parsewith(spec, lookup=None, syminitletters=None):
503 """Generate a parse tree of given spec with given tokenizing options
503 """Generate a parse tree of given spec with given tokenizing options
504
504
505 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
505 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
506 ('func', ('symbol', 'foo'), ('symbol', '$1'))
506 ('func', ('symbol', 'foo'), ('symbol', '$1'))
507 >>> _parsewith('$1')
507 >>> _parsewith('$1')
508 Traceback (most recent call last):
508 Traceback (most recent call last):
509 ...
509 ...
510 ParseError: ("syntax error in revset '$1'", 0)
510 ParseError: ("syntax error in revset '$1'", 0)
511 >>> _parsewith('foo bar')
511 >>> _parsewith('foo bar')
512 Traceback (most recent call last):
512 Traceback (most recent call last):
513 ...
513 ...
514 ParseError: ('invalid token', 4)
514 ParseError: ('invalid token', 4)
515 """
515 """
516 p = parser.parser(elements)
516 p = parser.parser(elements)
517 tree, pos = p.parse(tokenize(spec, lookup=lookup,
517 tree, pos = p.parse(tokenize(spec, lookup=lookup,
518 syminitletters=syminitletters))
518 syminitletters=syminitletters))
519 if pos != len(spec):
519 if pos != len(spec):
520 raise error.ParseError(_('invalid token'), pos)
520 raise error.ParseError(_('invalid token'), pos)
521 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
521 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
522
522
523 class _aliasrules(parser.basealiasrules):
523 class _aliasrules(parser.basealiasrules):
524 """Parsing and expansion rule set of revset aliases"""
524 """Parsing and expansion rule set of revset aliases"""
525 _section = _('revset alias')
525 _section = _('revset alias')
526
526
527 @staticmethod
527 @staticmethod
528 def _parse(spec):
528 def _parse(spec):
529 """Parse alias declaration/definition ``spec``
529 """Parse alias declaration/definition ``spec``
530
530
531 This allows symbol names to use also ``$`` as an initial letter
531 This allows symbol names to use also ``$`` as an initial letter
532 (for backward compatibility), and callers of this function should
532 (for backward compatibility), and callers of this function should
533 examine whether ``$`` is used also for unexpected symbols or not.
533 examine whether ``$`` is used also for unexpected symbols or not.
534 """
534 """
535 return _parsewith(spec, syminitletters=_aliassyminitletters)
535 return _parsewith(spec, syminitletters=_aliassyminitletters)
536
536
537 @staticmethod
537 @staticmethod
538 def _trygetfunc(tree):
538 def _trygetfunc(tree):
539 if tree[0] == 'func' and tree[1][0] == 'symbol':
539 if tree[0] == 'func' and tree[1][0] == 'symbol':
540 return tree[1][1], getlist(tree[2])
540 return tree[1][1], getlist(tree[2])
541
541
542 def expandaliases(ui, tree):
542 def expandaliases(ui, tree):
543 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
543 aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
544 tree = _aliasrules.expand(aliases, tree)
544 tree = _aliasrules.expand(aliases, tree)
545 # warn about problematic (but not referred) aliases
545 # warn about problematic (but not referred) aliases
546 for name, alias in sorted(aliases.iteritems()):
546 for name, alias in sorted(aliases.iteritems()):
547 if alias.error and not alias.warned:
547 if alias.error and not alias.warned:
548 ui.warn(_('warning: %s\n') % (alias.error))
548 ui.warn(_('warning: %s\n') % (alias.error))
549 alias.warned = True
549 alias.warned = True
550 return tree
550 return tree
551
551
552 def foldconcat(tree):
552 def foldconcat(tree):
553 """Fold elements to be concatenated by `##`
553 """Fold elements to be concatenated by `##`
554 """
554 """
555 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
555 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
556 return tree
556 return tree
557 if tree[0] == '_concat':
557 if tree[0] == '_concat':
558 pending = [tree]
558 pending = [tree]
559 l = []
559 l = []
560 while pending:
560 while pending:
561 e = pending.pop()
561 e = pending.pop()
562 if e[0] == '_concat':
562 if e[0] == '_concat':
563 pending.extend(reversed(e[1:]))
563 pending.extend(reversed(e[1:]))
564 elif e[0] in ('string', 'symbol'):
564 elif e[0] in ('string', 'symbol'):
565 l.append(e[1])
565 l.append(e[1])
566 else:
566 else:
567 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
567 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
568 raise error.ParseError(msg)
568 raise error.ParseError(msg)
569 return ('string', ''.join(l))
569 return ('string', ''.join(l))
570 else:
570 else:
571 return tuple(foldconcat(t) for t in tree)
571 return tuple(foldconcat(t) for t in tree)
572
572
573 def parse(spec, lookup=None):
573 def parse(spec, lookup=None):
574 return _parsewith(spec, lookup=lookup)
574 return _parsewith(spec, lookup=lookup)
575
575
576 def formatspec(expr, *args):
576 def formatspec(expr, *args):
577 '''
577 '''
578 This is a convenience function for using revsets internally, and
578 This is a convenience function for using revsets internally, and
579 escapes arguments appropriately. Aliases are intentionally ignored
579 escapes arguments appropriately. Aliases are intentionally ignored
580 so that intended expression behavior isn't accidentally subverted.
580 so that intended expression behavior isn't accidentally subverted.
581
581
582 Supported arguments:
582 Supported arguments:
583
583
584 %r = revset expression, parenthesized
584 %r = revset expression, parenthesized
585 %d = int(arg), no quoting
585 %d = int(arg), no quoting
586 %s = string(arg), escaped and single-quoted
586 %s = string(arg), escaped and single-quoted
587 %b = arg.branch(), escaped and single-quoted
587 %b = arg.branch(), escaped and single-quoted
588 %n = hex(arg), single-quoted
588 %n = hex(arg), single-quoted
589 %% = a literal '%'
589 %% = a literal '%'
590
590
591 Prefixing the type with 'l' specifies a parenthesized list of that type.
591 Prefixing the type with 'l' specifies a parenthesized list of that type.
592
592
593 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
593 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
594 '(10 or 11):: and ((this()) or (that()))'
594 '(10 or 11):: and ((this()) or (that()))'
595 >>> formatspec('%d:: and not %d::', 10, 20)
595 >>> formatspec('%d:: and not %d::', 10, 20)
596 '10:: and not 20::'
596 '10:: and not 20::'
597 >>> formatspec('%ld or %ld', [], [1])
597 >>> formatspec('%ld or %ld', [], [1])
598 "_list('') or 1"
598 "_list('') or 1"
599 >>> formatspec('keyword(%s)', 'foo\\xe9')
599 >>> formatspec('keyword(%s)', 'foo\\xe9')
600 "keyword('foo\\\\xe9')"
600 "keyword('foo\\\\xe9')"
601 >>> b = lambda: 'default'
601 >>> b = lambda: 'default'
602 >>> b.branch = b
602 >>> b.branch = b
603 >>> formatspec('branch(%b)', b)
603 >>> formatspec('branch(%b)', b)
604 "branch('default')"
604 "branch('default')"
605 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
605 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
606 "root(_list('a\\x00b\\x00c\\x00d'))"
606 "root(_list('a\\x00b\\x00c\\x00d'))"
607 '''
607 '''
608
608
609 def quote(s):
609 def quote(s):
610 return repr(str(s))
610 return repr(str(s))
611
611
612 def argtype(c, arg):
612 def argtype(c, arg):
613 if c == 'd':
613 if c == 'd':
614 return str(int(arg))
614 return str(int(arg))
615 elif c == 's':
615 elif c == 's':
616 return quote(arg)
616 return quote(arg)
617 elif c == 'r':
617 elif c == 'r':
618 parse(arg) # make sure syntax errors are confined
618 parse(arg) # make sure syntax errors are confined
619 return '(%s)' % arg
619 return '(%s)' % arg
620 elif c == 'n':
620 elif c == 'n':
621 return quote(node.hex(arg))
621 return quote(node.hex(arg))
622 elif c == 'b':
622 elif c == 'b':
623 return quote(arg.branch())
623 return quote(arg.branch())
624
624
625 def listexp(s, t):
625 def listexp(s, t):
626 l = len(s)
626 l = len(s)
627 if l == 0:
627 if l == 0:
628 return "_list('')"
628 return "_list('')"
629 elif l == 1:
629 elif l == 1:
630 return argtype(t, s[0])
630 return argtype(t, s[0])
631 elif t == 'd':
631 elif t == 'd':
632 return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
632 return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
633 elif t == 's':
633 elif t == 's':
634 return "_list('%s')" % "\0".join(s)
634 return "_list('%s')" % "\0".join(s)
635 elif t == 'n':
635 elif t == 'n':
636 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
636 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
637 elif t == 'b':
637 elif t == 'b':
638 return "_list('%s')" % "\0".join(a.branch() for a in s)
638 return "_list('%s')" % "\0".join(a.branch() for a in s)
639
639
640 m = l // 2
640 m = l // 2
641 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
641 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
642
642
643 ret = ''
643 ret = ''
644 pos = 0
644 pos = 0
645 arg = 0
645 arg = 0
646 while pos < len(expr):
646 while pos < len(expr):
647 c = expr[pos]
647 c = expr[pos:pos + 1]
648 if c == '%':
648 if c == '%':
649 pos += 1
649 pos += 1
650 d = expr[pos]
650 d = expr[pos:pos + 1]
651 if d == '%':
651 if d == '%':
652 ret += d
652 ret += d
653 elif d in 'dsnbr':
653 elif d in 'dsnbr':
654 ret += argtype(d, args[arg])
654 ret += argtype(d, args[arg])
655 arg += 1
655 arg += 1
656 elif d == 'l':
656 elif d == 'l':
657 # a list of some type
657 # a list of some type
658 pos += 1
658 pos += 1
659 d = expr[pos]
659 d = expr[pos:pos + 1]
660 ret += listexp(list(args[arg]), d)
660 ret += listexp(list(args[arg]), d)
661 arg += 1
661 arg += 1
662 else:
662 else:
663 raise error.Abort(_('unexpected revspec format character %s')
663 raise error.Abort(_('unexpected revspec format character %s')
664 % d)
664 % d)
665 else:
665 else:
666 ret += c
666 ret += c
667 pos += 1
667 pos += 1
668
668
669 return ret
669 return ret
670
670
671 def prettyformat(tree):
671 def prettyformat(tree):
672 return parser.prettyformat(tree, ('string', 'symbol'))
672 return parser.prettyformat(tree, ('string', 'symbol'))
673
673
674 def depth(tree):
674 def depth(tree):
675 if isinstance(tree, tuple):
675 if isinstance(tree, tuple):
676 return max(map(depth, tree)) + 1
676 return max(map(depth, tree)) + 1
677 else:
677 else:
678 return 0
678 return 0
679
679
680 def funcsused(tree):
680 def funcsused(tree):
681 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
681 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
682 return set()
682 return set()
683 else:
683 else:
684 funcs = set()
684 funcs = set()
685 for s in tree[1:]:
685 for s in tree[1:]:
686 funcs |= funcsused(s)
686 funcs |= funcsused(s)
687 if tree[0] == 'func':
687 if tree[0] == 'func':
688 funcs.add(tree[1][1])
688 funcs.add(tree[1][1])
689 return funcs
689 return funcs
General Comments 0
You need to be logged in to leave comments. Login now