##// END OF EJS Templates
revset: remove "small" argument from "_optimize"...
Jun Wu -
r34273:53fb09c7 default
parent child Browse files
Show More
@@ -1,679 +1,675 b''
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 util,
18 util,
19 )
19 )
20
20
21 elements = {
21 elements = {
22 # token-type: binding-strength, primary, prefix, infix, suffix
22 # token-type: binding-strength, primary, prefix, infix, suffix
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
25 "#": (21, None, None, ("relation", 21), None),
25 "#": (21, None, None, ("relation", 21), None),
26 "##": (20, None, None, ("_concat", 20), None),
26 "##": (20, None, None, ("_concat", 20), None),
27 "~": (18, None, None, ("ancestor", 18), None),
27 "~": (18, None, None, ("ancestor", 18), None),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
33 "not": (10, None, ("not", 10), None, None),
33 "not": (10, None, ("not", 10), None, None),
34 "!": (10, None, ("not", 10), None, None),
34 "!": (10, None, ("not", 10), None, None),
35 "and": (5, None, None, ("and", 5), None),
35 "and": (5, None, None, ("and", 5), None),
36 "&": (5, None, None, ("and", 5), None),
36 "&": (5, None, None, ("and", 5), None),
37 "%": (5, None, None, ("only", 5), "onlypost"),
37 "%": (5, None, None, ("only", 5), "onlypost"),
38 "or": (4, None, None, ("or", 4), None),
38 "or": (4, None, None, ("or", 4), None),
39 "|": (4, None, None, ("or", 4), None),
39 "|": (4, None, None, ("or", 4), None),
40 "+": (4, None, None, ("or", 4), None),
40 "+": (4, None, None, ("or", 4), None),
41 "=": (3, None, None, ("keyvalue", 3), None),
41 "=": (3, None, None, ("keyvalue", 3), None),
42 ",": (2, None, None, ("list", 2), None),
42 ",": (2, None, None, ("list", 2), None),
43 ")": (0, None, None, None, None),
43 ")": (0, None, None, None, None),
44 "]": (0, None, None, None, None),
44 "]": (0, None, None, None, None),
45 "symbol": (0, "symbol", None, None, None),
45 "symbol": (0, "symbol", None, None, None),
46 "string": (0, "string", None, None, None),
46 "string": (0, "string", None, None, None),
47 "end": (0, None, None, None, None),
47 "end": (0, None, None, None, None),
48 }
48 }
49
49
50 keywords = {'and', 'or', 'not'}
50 keywords = {'and', 'or', 'not'}
51
51
52 _quoteletters = {'"', "'"}
52 _quoteletters = {'"', "'"}
53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
54
54
55 # default set of valid characters for the initial letter of symbols
55 # default set of valid characters for the initial letter of symbols
56 _syminitletters = set(pycompat.iterbytestr(
56 _syminitletters = set(pycompat.iterbytestr(
57 string.ascii_letters.encode('ascii') +
57 string.ascii_letters.encode('ascii') +
58 string.digits.encode('ascii') +
58 string.digits.encode('ascii') +
59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
60
60
61 # default set of valid characters for non-initial letters of symbols
61 # default set of valid characters for non-initial letters of symbols
62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
63
63
64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
65 '''
65 '''
66 Parse a revset statement into a stream of tokens
66 Parse a revset statement into a stream of tokens
67
67
68 ``syminitletters`` is the set of valid characters for the initial
68 ``syminitletters`` is the set of valid characters for the initial
69 letter of symbols.
69 letter of symbols.
70
70
71 By default, character ``c`` is recognized as valid for initial
71 By default, character ``c`` is recognized as valid for initial
72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
73
73
74 ``symletters`` is the set of valid characters for non-initial
74 ``symletters`` is the set of valid characters for non-initial
75 letters of symbols.
75 letters of symbols.
76
76
77 By default, character ``c`` is recognized as valid for non-initial
77 By default, character ``c`` is recognized as valid for non-initial
78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
79
79
80 Check that @ is a valid unquoted token character (issue3686):
80 Check that @ is a valid unquoted token character (issue3686):
81 >>> list(tokenize(b"@::"))
81 >>> list(tokenize(b"@::"))
82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
83
83
84 '''
84 '''
85 program = pycompat.bytestr(program)
85 program = pycompat.bytestr(program)
86 if syminitletters is None:
86 if syminitletters is None:
87 syminitletters = _syminitletters
87 syminitletters = _syminitletters
88 if symletters is None:
88 if symletters is None:
89 symletters = _symletters
89 symletters = _symletters
90
90
91 if program and lookup:
91 if program and lookup:
92 # attempt to parse old-style ranges first to deal with
92 # attempt to parse old-style ranges first to deal with
93 # things like old-tag which contain query metacharacters
93 # things like old-tag which contain query metacharacters
94 parts = program.split(':', 1)
94 parts = program.split(':', 1)
95 if all(lookup(sym) for sym in parts if sym):
95 if all(lookup(sym) for sym in parts if sym):
96 if parts[0]:
96 if parts[0]:
97 yield ('symbol', parts[0], 0)
97 yield ('symbol', parts[0], 0)
98 if len(parts) > 1:
98 if len(parts) > 1:
99 s = len(parts[0])
99 s = len(parts[0])
100 yield (':', None, s)
100 yield (':', None, s)
101 if parts[1]:
101 if parts[1]:
102 yield ('symbol', parts[1], s + 1)
102 yield ('symbol', parts[1], s + 1)
103 yield ('end', None, len(program))
103 yield ('end', None, len(program))
104 return
104 return
105
105
106 pos, l = 0, len(program)
106 pos, l = 0, len(program)
107 while pos < l:
107 while pos < l:
108 c = program[pos]
108 c = program[pos]
109 if c.isspace(): # skip inter-token whitespace
109 if c.isspace(): # skip inter-token whitespace
110 pass
110 pass
111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
112 yield ('::', None, pos)
112 yield ('::', None, pos)
113 pos += 1 # skip ahead
113 pos += 1 # skip ahead
114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
115 yield ('..', None, pos)
115 yield ('..', None, pos)
116 pos += 1 # skip ahead
116 pos += 1 # skip ahead
117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
118 yield ('##', None, pos)
118 yield ('##', None, pos)
119 pos += 1 # skip ahead
119 pos += 1 # skip ahead
120 elif c in _simpleopletters: # handle simple operators
120 elif c in _simpleopletters: # handle simple operators
121 yield (c, None, pos)
121 yield (c, None, pos)
122 elif (c in _quoteletters or c == 'r' and
122 elif (c in _quoteletters or c == 'r' and
123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
124 if c == 'r':
124 if c == 'r':
125 pos += 1
125 pos += 1
126 c = program[pos]
126 c = program[pos]
127 decode = lambda x: x
127 decode = lambda x: x
128 else:
128 else:
129 decode = parser.unescapestr
129 decode = parser.unescapestr
130 pos += 1
130 pos += 1
131 s = pos
131 s = pos
132 while pos < l: # find closing quote
132 while pos < l: # find closing quote
133 d = program[pos]
133 d = program[pos]
134 if d == '\\': # skip over escaped characters
134 if d == '\\': # skip over escaped characters
135 pos += 2
135 pos += 2
136 continue
136 continue
137 if d == c:
137 if d == c:
138 yield ('string', decode(program[s:pos]), s)
138 yield ('string', decode(program[s:pos]), s)
139 break
139 break
140 pos += 1
140 pos += 1
141 else:
141 else:
142 raise error.ParseError(_("unterminated string"), s)
142 raise error.ParseError(_("unterminated string"), s)
143 # gather up a symbol/keyword
143 # gather up a symbol/keyword
144 elif c in syminitletters:
144 elif c in syminitletters:
145 s = pos
145 s = pos
146 pos += 1
146 pos += 1
147 while pos < l: # find end of symbol
147 while pos < l: # find end of symbol
148 d = program[pos]
148 d = program[pos]
149 if d not in symletters:
149 if d not in symletters:
150 break
150 break
151 if d == '.' and program[pos - 1] == '.': # special case for ..
151 if d == '.' and program[pos - 1] == '.': # special case for ..
152 pos -= 1
152 pos -= 1
153 break
153 break
154 pos += 1
154 pos += 1
155 sym = program[s:pos]
155 sym = program[s:pos]
156 if sym in keywords: # operator keywords
156 if sym in keywords: # operator keywords
157 yield (sym, None, s)
157 yield (sym, None, s)
158 elif '-' in sym:
158 elif '-' in sym:
159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
160 if lookup and lookup(sym):
160 if lookup and lookup(sym):
161 # looks like a real symbol
161 # looks like a real symbol
162 yield ('symbol', sym, s)
162 yield ('symbol', sym, s)
163 else:
163 else:
164 # looks like an expression
164 # looks like an expression
165 parts = sym.split('-')
165 parts = sym.split('-')
166 for p in parts[:-1]:
166 for p in parts[:-1]:
167 if p: # possible consecutive -
167 if p: # possible consecutive -
168 yield ('symbol', p, s)
168 yield ('symbol', p, s)
169 s += len(p)
169 s += len(p)
170 yield ('-', None, pos)
170 yield ('-', None, pos)
171 s += 1
171 s += 1
172 if parts[-1]: # possible trailing -
172 if parts[-1]: # possible trailing -
173 yield ('symbol', parts[-1], s)
173 yield ('symbol', parts[-1], s)
174 else:
174 else:
175 yield ('symbol', sym, s)
175 yield ('symbol', sym, s)
176 pos -= 1
176 pos -= 1
177 else:
177 else:
178 raise error.ParseError(_("syntax error in revset '%s'") %
178 raise error.ParseError(_("syntax error in revset '%s'") %
179 program, pos)
179 program, pos)
180 pos += 1
180 pos += 1
181 yield ('end', None, pos)
181 yield ('end', None, pos)
182
182
183 # helpers
183 # helpers
184
184
185 _notset = object()
185 _notset = object()
186
186
187 def getsymbol(x):
187 def getsymbol(x):
188 if x and x[0] == 'symbol':
188 if x and x[0] == 'symbol':
189 return x[1]
189 return x[1]
190 raise error.ParseError(_('not a symbol'))
190 raise error.ParseError(_('not a symbol'))
191
191
192 def getstring(x, err):
192 def getstring(x, err):
193 if x and (x[0] == 'string' or x[0] == 'symbol'):
193 if x and (x[0] == 'string' or x[0] == 'symbol'):
194 return x[1]
194 return x[1]
195 raise error.ParseError(err)
195 raise error.ParseError(err)
196
196
197 def getinteger(x, err, default=_notset):
197 def getinteger(x, err, default=_notset):
198 if not x and default is not _notset:
198 if not x and default is not _notset:
199 return default
199 return default
200 try:
200 try:
201 return int(getstring(x, err))
201 return int(getstring(x, err))
202 except ValueError:
202 except ValueError:
203 raise error.ParseError(err)
203 raise error.ParseError(err)
204
204
205 def getboolean(x, err):
205 def getboolean(x, err):
206 value = util.parsebool(getsymbol(x))
206 value = util.parsebool(getsymbol(x))
207 if value is not None:
207 if value is not None:
208 return value
208 return value
209 raise error.ParseError(err)
209 raise error.ParseError(err)
210
210
211 def getlist(x):
211 def getlist(x):
212 if not x:
212 if not x:
213 return []
213 return []
214 if x[0] == 'list':
214 if x[0] == 'list':
215 return list(x[1:])
215 return list(x[1:])
216 return [x]
216 return [x]
217
217
218 def getrange(x, err):
218 def getrange(x, err):
219 if not x:
219 if not x:
220 raise error.ParseError(err)
220 raise error.ParseError(err)
221 op = x[0]
221 op = x[0]
222 if op == 'range':
222 if op == 'range':
223 return x[1], x[2]
223 return x[1], x[2]
224 elif op == 'rangepre':
224 elif op == 'rangepre':
225 return None, x[1]
225 return None, x[1]
226 elif op == 'rangepost':
226 elif op == 'rangepost':
227 return x[1], None
227 return x[1], None
228 elif op == 'rangeall':
228 elif op == 'rangeall':
229 return None, None
229 return None, None
230 raise error.ParseError(err)
230 raise error.ParseError(err)
231
231
232 def getargs(x, min, max, err):
232 def getargs(x, min, max, err):
233 l = getlist(x)
233 l = getlist(x)
234 if len(l) < min or (max >= 0 and len(l) > max):
234 if len(l) < min or (max >= 0 and len(l) > max):
235 raise error.ParseError(err)
235 raise error.ParseError(err)
236 return l
236 return l
237
237
238 def getargsdict(x, funcname, keys):
238 def getargsdict(x, funcname, keys):
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
240 keyvaluenode='keyvalue', keynode='symbol')
240 keyvaluenode='keyvalue', keynode='symbol')
241
241
242 # cache of {spec: raw parsed tree} built internally
242 # cache of {spec: raw parsed tree} built internally
243 _treecache = {}
243 _treecache = {}
244
244
245 def _cachedtree(spec):
245 def _cachedtree(spec):
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
247 tree = _treecache.get(spec)
247 tree = _treecache.get(spec)
248 if tree is None:
248 if tree is None:
249 _treecache[spec] = tree = parse(spec)
249 _treecache[spec] = tree = parse(spec)
250 return tree
250 return tree
251
251
252 def _build(tmplspec, *repls):
252 def _build(tmplspec, *repls):
253 """Create raw parsed tree from a template revset statement
253 """Create raw parsed tree from a template revset statement
254
254
255 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
255 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
257 """
257 """
258 template = _cachedtree(tmplspec)
258 template = _cachedtree(tmplspec)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
260
260
261 def _match(patspec, tree):
261 def _match(patspec, tree):
262 """Test if a tree matches the given pattern statement; return the matches
262 """Test if a tree matches the given pattern statement; return the matches
263
263
264 >>> _match(b'f(_)', parse(b'f()'))
264 >>> _match(b'f(_)', parse(b'f()'))
265 >>> _match(b'f(_)', parse(b'f(1)'))
265 >>> _match(b'f(_)', parse(b'f(1)'))
266 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
266 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
267 >>> _match(b'f(_)', parse(b'f(1, 2)'))
267 >>> _match(b'f(_)', parse(b'f(1, 2)'))
268 """
268 """
269 pattern = _cachedtree(patspec)
269 pattern = _cachedtree(patspec)
270 return parser.matchtree(pattern, tree, ('symbol', '_'),
270 return parser.matchtree(pattern, tree, ('symbol', '_'),
271 {'keyvalue', 'list'})
271 {'keyvalue', 'list'})
272
272
273 def _matchonly(revs, bases):
273 def _matchonly(revs, bases):
274 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
274 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
275
275
276 def _fixops(x):
276 def _fixops(x):
277 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
277 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
278 handled well by our simple top-down parser"""
278 handled well by our simple top-down parser"""
279 if not isinstance(x, tuple):
279 if not isinstance(x, tuple):
280 return x
280 return x
281
281
282 op = x[0]
282 op = x[0]
283 if op == 'parent':
283 if op == 'parent':
284 # x^:y means (x^) : y, not x ^ (:y)
284 # x^:y means (x^) : y, not x ^ (:y)
285 # x^: means (x^) :, not x ^ (:)
285 # x^: means (x^) :, not x ^ (:)
286 post = ('parentpost', x[1])
286 post = ('parentpost', x[1])
287 if x[2][0] == 'dagrangepre':
287 if x[2][0] == 'dagrangepre':
288 return _fixops(('dagrange', post, x[2][1]))
288 return _fixops(('dagrange', post, x[2][1]))
289 elif x[2][0] == 'rangepre':
289 elif x[2][0] == 'rangepre':
290 return _fixops(('range', post, x[2][1]))
290 return _fixops(('range', post, x[2][1]))
291 elif x[2][0] == 'rangeall':
291 elif x[2][0] == 'rangeall':
292 return _fixops(('rangepost', post))
292 return _fixops(('rangepost', post))
293 elif op == 'or':
293 elif op == 'or':
294 # make number of arguments deterministic:
294 # make number of arguments deterministic:
295 # x + y + z -> (or x y z) -> (or (list x y z))
295 # x + y + z -> (or x y z) -> (or (list x y z))
296 return (op, _fixops(('list',) + x[1:]))
296 return (op, _fixops(('list',) + x[1:]))
297 elif op == 'subscript' and x[1][0] == 'relation':
297 elif op == 'subscript' and x[1][0] == 'relation':
298 # x#y[z] ternary
298 # x#y[z] ternary
299 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
299 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
300
300
301 return (op,) + tuple(_fixops(y) for y in x[1:])
301 return (op,) + tuple(_fixops(y) for y in x[1:])
302
302
303 def _analyze(x):
303 def _analyze(x):
304 if x is None:
304 if x is None:
305 return x
305 return x
306
306
307 op = x[0]
307 op = x[0]
308 if op == 'minus':
308 if op == 'minus':
309 return _analyze(_build('_ and not _', *x[1:]))
309 return _analyze(_build('_ and not _', *x[1:]))
310 elif op == 'only':
310 elif op == 'only':
311 return _analyze(_build('only(_, _)', *x[1:]))
311 return _analyze(_build('only(_, _)', *x[1:]))
312 elif op == 'onlypost':
312 elif op == 'onlypost':
313 return _analyze(_build('only(_)', x[1]))
313 return _analyze(_build('only(_)', x[1]))
314 elif op == 'dagrangepre':
314 elif op == 'dagrangepre':
315 return _analyze(_build('ancestors(_)', x[1]))
315 return _analyze(_build('ancestors(_)', x[1]))
316 elif op == 'dagrangepost':
316 elif op == 'dagrangepost':
317 return _analyze(_build('descendants(_)', x[1]))
317 return _analyze(_build('descendants(_)', x[1]))
318 elif op == 'negate':
318 elif op == 'negate':
319 s = getstring(x[1], _("can't negate that"))
319 s = getstring(x[1], _("can't negate that"))
320 return _analyze(('string', '-' + s))
320 return _analyze(('string', '-' + s))
321 elif op in ('string', 'symbol'):
321 elif op in ('string', 'symbol'):
322 return x
322 return x
323 elif op == 'rangeall':
323 elif op == 'rangeall':
324 return (op, None)
324 return (op, None)
325 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
325 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
326 return (op, _analyze(x[1]))
326 return (op, _analyze(x[1]))
327 elif op == 'group':
327 elif op == 'group':
328 return _analyze(x[1])
328 return _analyze(x[1])
329 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
329 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
330 'subscript'}:
330 'subscript'}:
331 ta = _analyze(x[1])
331 ta = _analyze(x[1])
332 tb = _analyze(x[2])
332 tb = _analyze(x[2])
333 return (op, ta, tb)
333 return (op, ta, tb)
334 elif op == 'relsubscript':
334 elif op == 'relsubscript':
335 ta = _analyze(x[1])
335 ta = _analyze(x[1])
336 tb = _analyze(x[2])
336 tb = _analyze(x[2])
337 tc = _analyze(x[3])
337 tc = _analyze(x[3])
338 return (op, ta, tb, tc)
338 return (op, ta, tb, tc)
339 elif op == 'list':
339 elif op == 'list':
340 return (op,) + tuple(_analyze(y) for y in x[1:])
340 return (op,) + tuple(_analyze(y) for y in x[1:])
341 elif op == 'keyvalue':
341 elif op == 'keyvalue':
342 return (op, x[1], _analyze(x[2]))
342 return (op, x[1], _analyze(x[2]))
343 elif op == 'func':
343 elif op == 'func':
344 return (op, x[1], _analyze(x[2]))
344 return (op, x[1], _analyze(x[2]))
345 raise ValueError('invalid operator %r' % op)
345 raise ValueError('invalid operator %r' % op)
346
346
347 def analyze(x):
347 def analyze(x):
348 """Transform raw parsed tree to evaluatable tree which can be fed to
348 """Transform raw parsed tree to evaluatable tree which can be fed to
349 optimize() or getset()
349 optimize() or getset()
350
350
351 All pseudo operations should be mapped to real operations or functions
351 All pseudo operations should be mapped to real operations or functions
352 defined in methods or symbols table respectively.
352 defined in methods or symbols table respectively.
353 """
353 """
354 return _analyze(x)
354 return _analyze(x)
355
355
356 def _optimize(x, small):
356 def _optimize(x):
357 if x is None:
357 if x is None:
358 return 0, x
358 return 0, x
359
359
360 smallbonus = 1
361 if small:
362 smallbonus = .5
363
364 op = x[0]
360 op = x[0]
365 if op in ('string', 'symbol'):
361 if op in ('string', 'symbol'):
366 return smallbonus, x # single revisions are small
362 return 0.5, x # single revisions are small
367 elif op == 'and':
363 elif op == 'and':
368 wa, ta = _optimize(x[1], True)
364 wa, ta = _optimize(x[1])
369 wb, tb = _optimize(x[2], True)
365 wb, tb = _optimize(x[2])
370 w = min(wa, wb)
366 w = min(wa, wb)
371
367
372 # (draft/secret/_notpublic() & ::x) have a fast path
368 # (draft/secret/_notpublic() & ::x) have a fast path
373 m = _match('_() & ancestors(_)', ('and', ta, tb))
369 m = _match('_() & ancestors(_)', ('and', ta, tb))
374 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
370 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
375 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
371 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
376
372
377 # (::x and not ::y)/(not ::y and ::x) have a fast path
373 # (::x and not ::y)/(not ::y and ::x) have a fast path
378 m = _matchonly(ta, tb) or _matchonly(tb, ta)
374 m = _matchonly(ta, tb) or _matchonly(tb, ta)
379 if m:
375 if m:
380 return w, _build('only(_, _)', *m[1:])
376 return w, _build('only(_, _)', *m[1:])
381
377
382 m = _match('not _', tb)
378 m = _match('not _', tb)
383 if m:
379 if m:
384 return wa, ('difference', ta, m[1])
380 return wa, ('difference', ta, m[1])
385 if wa > wb:
381 if wa > wb:
386 op = 'andsmally'
382 op = 'andsmally'
387 return w, (op, ta, tb)
383 return w, (op, ta, tb)
388 elif op == 'or':
384 elif op == 'or':
389 # fast path for machine-generated expression, that is likely to have
385 # fast path for machine-generated expression, that is likely to have
390 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
386 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
391 ws, ts, ss = [], [], []
387 ws, ts, ss = [], [], []
392 def flushss():
388 def flushss():
393 if not ss:
389 if not ss:
394 return
390 return
395 if len(ss) == 1:
391 if len(ss) == 1:
396 w, t = ss[0]
392 w, t = ss[0]
397 else:
393 else:
398 s = '\0'.join(t[1] for w, t in ss)
394 s = '\0'.join(t[1] for w, t in ss)
399 y = _build('_list(_)', ('string', s))
395 y = _build('_list(_)', ('string', s))
400 w, t = _optimize(y, False)
396 w, t = _optimize(y)
401 ws.append(w)
397 ws.append(w)
402 ts.append(t)
398 ts.append(t)
403 del ss[:]
399 del ss[:]
404 for y in getlist(x[1]):
400 for y in getlist(x[1]):
405 w, t = _optimize(y, False)
401 w, t = _optimize(y)
406 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
402 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
407 ss.append((w, t))
403 ss.append((w, t))
408 continue
404 continue
409 flushss()
405 flushss()
410 ws.append(w)
406 ws.append(w)
411 ts.append(t)
407 ts.append(t)
412 flushss()
408 flushss()
413 if len(ts) == 1:
409 if len(ts) == 1:
414 return ws[0], ts[0] # 'or' operation is fully optimized out
410 return ws[0], ts[0] # 'or' operation is fully optimized out
415 return max(ws), (op, ('list',) + tuple(ts))
411 return max(ws), (op, ('list',) + tuple(ts))
416 elif op == 'not':
412 elif op == 'not':
417 # Optimize not public() to _notpublic() because we have a fast version
413 # Optimize not public() to _notpublic() because we have a fast version
418 if _match('public()', x[1]):
414 if _match('public()', x[1]):
419 o = _optimize(_build('_notpublic()'), not small)
415 o = _optimize(_build('_notpublic()'))
420 return o[0], o[1]
416 return o[0], o[1]
421 else:
417 else:
422 o = _optimize(x[1], not small)
418 o = _optimize(x[1])
423 return o[0], (op, o[1])
419 return o[0], (op, o[1])
424 elif op == 'rangeall':
420 elif op == 'rangeall':
425 return smallbonus, x
421 return 1, x
426 elif op in ('rangepre', 'rangepost', 'parentpost'):
422 elif op in ('rangepre', 'rangepost', 'parentpost'):
427 o = _optimize(x[1], small)
423 o = _optimize(x[1])
428 return o[0], (op, o[1])
424 return o[0], (op, o[1])
429 elif op in ('dagrange', 'range'):
425 elif op in ('dagrange', 'range'):
430 wa, ta = _optimize(x[1], small)
426 wa, ta = _optimize(x[1])
431 wb, tb = _optimize(x[2], small)
427 wb, tb = _optimize(x[2])
432 return wa + wb, (op, ta, tb)
428 return wa + wb, (op, ta, tb)
433 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
429 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
434 w, t = _optimize(x[1], small)
430 w, t = _optimize(x[1])
435 return w, (op, t, x[2])
431 return w, (op, t, x[2])
436 elif op == 'relsubscript':
432 elif op == 'relsubscript':
437 w, t = _optimize(x[1], small)
433 w, t = _optimize(x[1])
438 return w, (op, t, x[2], x[3])
434 return w, (op, t, x[2], x[3])
439 elif op == 'list':
435 elif op == 'list':
440 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
436 ws, ts = zip(*(_optimize(y) for y in x[1:]))
441 return sum(ws), (op,) + ts
437 return sum(ws), (op,) + ts
442 elif op == 'keyvalue':
438 elif op == 'keyvalue':
443 w, t = _optimize(x[2], small)
439 w, t = _optimize(x[2])
444 return w, (op, x[1], t)
440 return w, (op, x[1], t)
445 elif op == 'func':
441 elif op == 'func':
446 f = getsymbol(x[1])
442 f = getsymbol(x[1])
447 wa, ta = _optimize(x[2], small)
443 wa, ta = _optimize(x[2])
448 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
444 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
449 'keyword', 'outgoing', 'user', 'destination'):
445 'keyword', 'outgoing', 'user', 'destination'):
450 w = 10 # slow
446 w = 10 # slow
451 elif f in ('modifies', 'adds', 'removes'):
447 elif f in ('modifies', 'adds', 'removes'):
452 w = 30 # slower
448 w = 30 # slower
453 elif f == "contains":
449 elif f == "contains":
454 w = 100 # very slow
450 w = 100 # very slow
455 elif f == "ancestor":
451 elif f == "ancestor":
456 w = 1 * smallbonus
452 w = 0.5
457 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
453 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
458 w = 0
454 w = 0
459 elif f == "sort":
455 elif f == "sort":
460 w = 10 # assume most sorts look at changelog
456 w = 10 # assume most sorts look at changelog
461 else:
457 else:
462 w = 1
458 w = 1
463 return w + wa, (op, x[1], ta)
459 return w + wa, (op, x[1], ta)
464 raise ValueError('invalid operator %r' % op)
460 raise ValueError('invalid operator %r' % op)
465
461
466 def optimize(tree):
462 def optimize(tree):
467 """Optimize evaluatable tree
463 """Optimize evaluatable tree
468
464
469 All pseudo operations should be transformed beforehand.
465 All pseudo operations should be transformed beforehand.
470 """
466 """
471 _weight, newtree = _optimize(tree, small=True)
467 _weight, newtree = _optimize(tree)
472 return newtree
468 return newtree
473
469
474 # the set of valid characters for the initial letter of symbols in
470 # the set of valid characters for the initial letter of symbols in
475 # alias declarations and definitions
471 # alias declarations and definitions
476 _aliassyminitletters = _syminitletters | {'$'}
472 _aliassyminitletters = _syminitletters | {'$'}
477
473
478 def _parsewith(spec, lookup=None, syminitletters=None):
474 def _parsewith(spec, lookup=None, syminitletters=None):
479 """Generate a parse tree of given spec with given tokenizing options
475 """Generate a parse tree of given spec with given tokenizing options
480
476
481 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
477 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
482 ('func', ('symbol', 'foo'), ('symbol', '$1'))
478 ('func', ('symbol', 'foo'), ('symbol', '$1'))
483 >>> _parsewith(b'$1')
479 >>> _parsewith(b'$1')
484 Traceback (most recent call last):
480 Traceback (most recent call last):
485 ...
481 ...
486 ParseError: ("syntax error in revset '$1'", 0)
482 ParseError: ("syntax error in revset '$1'", 0)
487 >>> _parsewith(b'foo bar')
483 >>> _parsewith(b'foo bar')
488 Traceback (most recent call last):
484 Traceback (most recent call last):
489 ...
485 ...
490 ParseError: ('invalid token', 4)
486 ParseError: ('invalid token', 4)
491 """
487 """
492 p = parser.parser(elements)
488 p = parser.parser(elements)
493 tree, pos = p.parse(tokenize(spec, lookup=lookup,
489 tree, pos = p.parse(tokenize(spec, lookup=lookup,
494 syminitletters=syminitletters))
490 syminitletters=syminitletters))
495 if pos != len(spec):
491 if pos != len(spec):
496 raise error.ParseError(_('invalid token'), pos)
492 raise error.ParseError(_('invalid token'), pos)
497 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
493 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
498
494
499 class _aliasrules(parser.basealiasrules):
495 class _aliasrules(parser.basealiasrules):
500 """Parsing and expansion rule set of revset aliases"""
496 """Parsing and expansion rule set of revset aliases"""
501 _section = _('revset alias')
497 _section = _('revset alias')
502
498
503 @staticmethod
499 @staticmethod
504 def _parse(spec):
500 def _parse(spec):
505 """Parse alias declaration/definition ``spec``
501 """Parse alias declaration/definition ``spec``
506
502
507 This allows symbol names to use also ``$`` as an initial letter
503 This allows symbol names to use also ``$`` as an initial letter
508 (for backward compatibility), and callers of this function should
504 (for backward compatibility), and callers of this function should
509 examine whether ``$`` is used also for unexpected symbols or not.
505 examine whether ``$`` is used also for unexpected symbols or not.
510 """
506 """
511 return _parsewith(spec, syminitletters=_aliassyminitletters)
507 return _parsewith(spec, syminitletters=_aliassyminitletters)
512
508
513 @staticmethod
509 @staticmethod
514 def _trygetfunc(tree):
510 def _trygetfunc(tree):
515 if tree[0] == 'func' and tree[1][0] == 'symbol':
511 if tree[0] == 'func' and tree[1][0] == 'symbol':
516 return tree[1][1], getlist(tree[2])
512 return tree[1][1], getlist(tree[2])
517
513
518 def expandaliases(tree, aliases, warn=None):
514 def expandaliases(tree, aliases, warn=None):
519 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
515 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
520 aliases = _aliasrules.buildmap(aliases)
516 aliases = _aliasrules.buildmap(aliases)
521 tree = _aliasrules.expand(aliases, tree)
517 tree = _aliasrules.expand(aliases, tree)
522 # warn about problematic (but not referred) aliases
518 # warn about problematic (but not referred) aliases
523 if warn is not None:
519 if warn is not None:
524 for name, alias in sorted(aliases.iteritems()):
520 for name, alias in sorted(aliases.iteritems()):
525 if alias.error and not alias.warned:
521 if alias.error and not alias.warned:
526 warn(_('warning: %s\n') % (alias.error))
522 warn(_('warning: %s\n') % (alias.error))
527 alias.warned = True
523 alias.warned = True
528 return tree
524 return tree
529
525
530 def foldconcat(tree):
526 def foldconcat(tree):
531 """Fold elements to be concatenated by `##`
527 """Fold elements to be concatenated by `##`
532 """
528 """
533 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
529 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
534 return tree
530 return tree
535 if tree[0] == '_concat':
531 if tree[0] == '_concat':
536 pending = [tree]
532 pending = [tree]
537 l = []
533 l = []
538 while pending:
534 while pending:
539 e = pending.pop()
535 e = pending.pop()
540 if e[0] == '_concat':
536 if e[0] == '_concat':
541 pending.extend(reversed(e[1:]))
537 pending.extend(reversed(e[1:]))
542 elif e[0] in ('string', 'symbol'):
538 elif e[0] in ('string', 'symbol'):
543 l.append(e[1])
539 l.append(e[1])
544 else:
540 else:
545 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
541 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
546 raise error.ParseError(msg)
542 raise error.ParseError(msg)
547 return ('string', ''.join(l))
543 return ('string', ''.join(l))
548 else:
544 else:
549 return tuple(foldconcat(t) for t in tree)
545 return tuple(foldconcat(t) for t in tree)
550
546
551 def parse(spec, lookup=None):
547 def parse(spec, lookup=None):
552 return _parsewith(spec, lookup=lookup)
548 return _parsewith(spec, lookup=lookup)
553
549
554 def _quote(s):
550 def _quote(s):
555 r"""Quote a value in order to make it safe for the revset engine.
551 r"""Quote a value in order to make it safe for the revset engine.
556
552
557 >>> _quote(b'asdf')
553 >>> _quote(b'asdf')
558 "'asdf'"
554 "'asdf'"
559 >>> _quote(b"asdf'\"")
555 >>> _quote(b"asdf'\"")
560 '\'asdf\\\'"\''
556 '\'asdf\\\'"\''
561 >>> _quote(b'asdf\'')
557 >>> _quote(b'asdf\'')
562 "'asdf\\''"
558 "'asdf\\''"
563 >>> _quote(1)
559 >>> _quote(1)
564 "'1'"
560 "'1'"
565 """
561 """
566 return "'%s'" % util.escapestr(pycompat.bytestr(s))
562 return "'%s'" % util.escapestr(pycompat.bytestr(s))
567
563
568 def formatspec(expr, *args):
564 def formatspec(expr, *args):
569 '''
565 '''
570 This is a convenience function for using revsets internally, and
566 This is a convenience function for using revsets internally, and
571 escapes arguments appropriately. Aliases are intentionally ignored
567 escapes arguments appropriately. Aliases are intentionally ignored
572 so that intended expression behavior isn't accidentally subverted.
568 so that intended expression behavior isn't accidentally subverted.
573
569
574 Supported arguments:
570 Supported arguments:
575
571
576 %r = revset expression, parenthesized
572 %r = revset expression, parenthesized
577 %d = int(arg), no quoting
573 %d = int(arg), no quoting
578 %s = string(arg), escaped and single-quoted
574 %s = string(arg), escaped and single-quoted
579 %b = arg.branch(), escaped and single-quoted
575 %b = arg.branch(), escaped and single-quoted
580 %n = hex(arg), single-quoted
576 %n = hex(arg), single-quoted
581 %% = a literal '%'
577 %% = a literal '%'
582
578
583 Prefixing the type with 'l' specifies a parenthesized list of that type.
579 Prefixing the type with 'l' specifies a parenthesized list of that type.
584
580
585 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
581 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
586 '(10 or 11):: and ((this()) or (that()))'
582 '(10 or 11):: and ((this()) or (that()))'
587 >>> formatspec(b'%d:: and not %d::', 10, 20)
583 >>> formatspec(b'%d:: and not %d::', 10, 20)
588 '10:: and not 20::'
584 '10:: and not 20::'
589 >>> formatspec(b'%ld or %ld', [], [1])
585 >>> formatspec(b'%ld or %ld', [], [1])
590 "_list('') or 1"
586 "_list('') or 1"
591 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
587 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
592 "keyword('foo\\\\xe9')"
588 "keyword('foo\\\\xe9')"
593 >>> b = lambda: b'default'
589 >>> b = lambda: b'default'
594 >>> b.branch = b
590 >>> b.branch = b
595 >>> formatspec(b'branch(%b)', b)
591 >>> formatspec(b'branch(%b)', b)
596 "branch('default')"
592 "branch('default')"
597 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
593 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
598 "root(_list('a\\x00b\\x00c\\x00d'))"
594 "root(_list('a\\x00b\\x00c\\x00d'))"
599 '''
595 '''
600
596
601 def argtype(c, arg):
597 def argtype(c, arg):
602 if c == 'd':
598 if c == 'd':
603 return '%d' % int(arg)
599 return '%d' % int(arg)
604 elif c == 's':
600 elif c == 's':
605 return _quote(arg)
601 return _quote(arg)
606 elif c == 'r':
602 elif c == 'r':
607 parse(arg) # make sure syntax errors are confined
603 parse(arg) # make sure syntax errors are confined
608 return '(%s)' % arg
604 return '(%s)' % arg
609 elif c == 'n':
605 elif c == 'n':
610 return _quote(node.hex(arg))
606 return _quote(node.hex(arg))
611 elif c == 'b':
607 elif c == 'b':
612 return _quote(arg.branch())
608 return _quote(arg.branch())
613
609
614 def listexp(s, t):
610 def listexp(s, t):
615 l = len(s)
611 l = len(s)
616 if l == 0:
612 if l == 0:
617 return "_list('')"
613 return "_list('')"
618 elif l == 1:
614 elif l == 1:
619 return argtype(t, s[0])
615 return argtype(t, s[0])
620 elif t == 'd':
616 elif t == 'd':
621 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
617 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
622 elif t == 's':
618 elif t == 's':
623 return "_list('%s')" % "\0".join(s)
619 return "_list('%s')" % "\0".join(s)
624 elif t == 'n':
620 elif t == 'n':
625 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
621 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
626 elif t == 'b':
622 elif t == 'b':
627 return "_list('%s')" % "\0".join(a.branch() for a in s)
623 return "_list('%s')" % "\0".join(a.branch() for a in s)
628
624
629 m = l // 2
625 m = l // 2
630 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
626 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
631
627
632 expr = pycompat.bytestr(expr)
628 expr = pycompat.bytestr(expr)
633 ret = ''
629 ret = ''
634 pos = 0
630 pos = 0
635 arg = 0
631 arg = 0
636 while pos < len(expr):
632 while pos < len(expr):
637 c = expr[pos]
633 c = expr[pos]
638 if c == '%':
634 if c == '%':
639 pos += 1
635 pos += 1
640 d = expr[pos]
636 d = expr[pos]
641 if d == '%':
637 if d == '%':
642 ret += d
638 ret += d
643 elif d in 'dsnbr':
639 elif d in 'dsnbr':
644 ret += argtype(d, args[arg])
640 ret += argtype(d, args[arg])
645 arg += 1
641 arg += 1
646 elif d == 'l':
642 elif d == 'l':
647 # a list of some type
643 # a list of some type
648 pos += 1
644 pos += 1
649 d = expr[pos]
645 d = expr[pos]
650 ret += listexp(list(args[arg]), d)
646 ret += listexp(list(args[arg]), d)
651 arg += 1
647 arg += 1
652 else:
648 else:
653 raise error.Abort(_('unexpected revspec format character %s')
649 raise error.Abort(_('unexpected revspec format character %s')
654 % d)
650 % d)
655 else:
651 else:
656 ret += c
652 ret += c
657 pos += 1
653 pos += 1
658
654
659 return ret
655 return ret
660
656
661 def prettyformat(tree):
657 def prettyformat(tree):
662 return parser.prettyformat(tree, ('string', 'symbol'))
658 return parser.prettyformat(tree, ('string', 'symbol'))
663
659
664 def depth(tree):
660 def depth(tree):
665 if isinstance(tree, tuple):
661 if isinstance(tree, tuple):
666 return max(map(depth, tree)) + 1
662 return max(map(depth, tree)) + 1
667 else:
663 else:
668 return 0
664 return 0
669
665
670 def funcsused(tree):
666 def funcsused(tree):
671 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
667 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
672 return set()
668 return set()
673 else:
669 else:
674 funcs = set()
670 funcs = set()
675 for s in tree[1:]:
671 for s in tree[1:]:
676 funcs |= funcsused(s)
672 funcs |= funcsused(s)
677 if tree[0] == 'func':
673 if tree[0] == 'func':
678 funcs.add(tree[1][1])
674 funcs.add(tree[1][1])
679 return funcs
675 return funcs
General Comments 0
You need to be logged in to leave comments. Login now