##// END OF EJS Templates
revset: detect integer list on parsing...
Boris Feld -
r41257:73203cdf default
parent child Browse files
Show More
@@ -1,797 +1,818 b''
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 smartset,
18 util,
19 util,
19 )
20 )
20 from .utils import (
21 from .utils import (
21 stringutil,
22 stringutil,
22 )
23 )
23
24
24 elements = {
25 elements = {
25 # token-type: binding-strength, primary, prefix, infix, suffix
26 # token-type: binding-strength, primary, prefix, infix, suffix
26 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
27 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
27 "[": (21, None, None, ("subscript", 1, "]"), None),
28 "[": (21, None, None, ("subscript", 1, "]"), None),
28 "#": (21, None, None, ("relation", 21), None),
29 "#": (21, None, None, ("relation", 21), None),
29 "##": (20, None, None, ("_concat", 20), None),
30 "##": (20, None, None, ("_concat", 20), None),
30 "~": (18, None, None, ("ancestor", 18), None),
31 "~": (18, None, None, ("ancestor", 18), None),
31 "^": (18, None, None, ("parent", 18), "parentpost"),
32 "^": (18, None, None, ("parent", 18), "parentpost"),
32 "-": (5, None, ("negate", 19), ("minus", 5), None),
33 "-": (5, None, ("negate", 19), ("minus", 5), None),
33 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
34 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
34 "dagrangepost"),
35 "dagrangepost"),
35 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
36 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
36 "dagrangepost"),
37 "dagrangepost"),
37 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
38 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
38 "not": (10, None, ("not", 10), None, None),
39 "not": (10, None, ("not", 10), None, None),
39 "!": (10, None, ("not", 10), None, None),
40 "!": (10, None, ("not", 10), None, None),
40 "and": (5, None, None, ("and", 5), None),
41 "and": (5, None, None, ("and", 5), None),
41 "&": (5, None, None, ("and", 5), None),
42 "&": (5, None, None, ("and", 5), None),
42 "%": (5, None, None, ("only", 5), "onlypost"),
43 "%": (5, None, None, ("only", 5), "onlypost"),
43 "or": (4, None, None, ("or", 4), None),
44 "or": (4, None, None, ("or", 4), None),
44 "|": (4, None, None, ("or", 4), None),
45 "|": (4, None, None, ("or", 4), None),
45 "+": (4, None, None, ("or", 4), None),
46 "+": (4, None, None, ("or", 4), None),
46 "=": (3, None, None, ("keyvalue", 3), None),
47 "=": (3, None, None, ("keyvalue", 3), None),
47 ",": (2, None, None, ("list", 2), None),
48 ",": (2, None, None, ("list", 2), None),
48 ")": (0, None, None, None, None),
49 ")": (0, None, None, None, None),
49 "]": (0, None, None, None, None),
50 "]": (0, None, None, None, None),
50 "symbol": (0, "symbol", None, None, None),
51 "symbol": (0, "symbol", None, None, None),
51 "string": (0, "string", None, None, None),
52 "string": (0, "string", None, None, None),
52 "end": (0, None, None, None, None),
53 "end": (0, None, None, None, None),
53 }
54 }
54
55
55 keywords = {'and', 'or', 'not'}
56 keywords = {'and', 'or', 'not'}
56
57
57 symbols = {}
58 symbols = {}
58
59
59 _quoteletters = {'"', "'"}
60 _quoteletters = {'"', "'"}
60 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
61 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
61
62
62 # default set of valid characters for the initial letter of symbols
63 # default set of valid characters for the initial letter of symbols
63 _syminitletters = set(pycompat.iterbytestr(
64 _syminitletters = set(pycompat.iterbytestr(
64 string.ascii_letters.encode('ascii') +
65 string.ascii_letters.encode('ascii') +
65 string.digits.encode('ascii') +
66 string.digits.encode('ascii') +
66 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
67 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
67
68
68 # default set of valid characters for non-initial letters of symbols
69 # default set of valid characters for non-initial letters of symbols
69 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
70 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
70
71
71 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
72 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
72 '''
73 '''
73 Parse a revset statement into a stream of tokens
74 Parse a revset statement into a stream of tokens
74
75
75 ``syminitletters`` is the set of valid characters for the initial
76 ``syminitletters`` is the set of valid characters for the initial
76 letter of symbols.
77 letter of symbols.
77
78
78 By default, character ``c`` is recognized as valid for initial
79 By default, character ``c`` is recognized as valid for initial
79 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
80 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
80
81
81 ``symletters`` is the set of valid characters for non-initial
82 ``symletters`` is the set of valid characters for non-initial
82 letters of symbols.
83 letters of symbols.
83
84
84 By default, character ``c`` is recognized as valid for non-initial
85 By default, character ``c`` is recognized as valid for non-initial
85 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
86 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
86
87
87 Check that @ is a valid unquoted token character (issue3686):
88 Check that @ is a valid unquoted token character (issue3686):
88 >>> list(tokenize(b"@::"))
89 >>> list(tokenize(b"@::"))
89 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
90 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
90
91
91 '''
92 '''
92 if not isinstance(program, bytes):
93 if not isinstance(program, bytes):
93 raise error.ProgrammingError('revset statement must be bytes, got %r'
94 raise error.ProgrammingError('revset statement must be bytes, got %r'
94 % program)
95 % program)
95 program = pycompat.bytestr(program)
96 program = pycompat.bytestr(program)
96 if syminitletters is None:
97 if syminitletters is None:
97 syminitletters = _syminitletters
98 syminitletters = _syminitletters
98 if symletters is None:
99 if symletters is None:
99 symletters = _symletters
100 symletters = _symletters
100
101
101 if program and lookup:
102 if program and lookup:
102 # attempt to parse old-style ranges first to deal with
103 # attempt to parse old-style ranges first to deal with
103 # things like old-tag which contain query metacharacters
104 # things like old-tag which contain query metacharacters
104 parts = program.split(':', 1)
105 parts = program.split(':', 1)
105 if all(lookup(sym) for sym in parts if sym):
106 if all(lookup(sym) for sym in parts if sym):
106 if parts[0]:
107 if parts[0]:
107 yield ('symbol', parts[0], 0)
108 yield ('symbol', parts[0], 0)
108 if len(parts) > 1:
109 if len(parts) > 1:
109 s = len(parts[0])
110 s = len(parts[0])
110 yield (':', None, s)
111 yield (':', None, s)
111 if parts[1]:
112 if parts[1]:
112 yield ('symbol', parts[1], s + 1)
113 yield ('symbol', parts[1], s + 1)
113 yield ('end', None, len(program))
114 yield ('end', None, len(program))
114 return
115 return
115
116
116 pos, l = 0, len(program)
117 pos, l = 0, len(program)
117 while pos < l:
118 while pos < l:
118 c = program[pos]
119 c = program[pos]
119 if c.isspace(): # skip inter-token whitespace
120 if c.isspace(): # skip inter-token whitespace
120 pass
121 pass
121 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
122 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
122 yield ('::', None, pos)
123 yield ('::', None, pos)
123 pos += 1 # skip ahead
124 pos += 1 # skip ahead
124 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
125 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
125 yield ('..', None, pos)
126 yield ('..', None, pos)
126 pos += 1 # skip ahead
127 pos += 1 # skip ahead
127 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
128 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
128 yield ('##', None, pos)
129 yield ('##', None, pos)
129 pos += 1 # skip ahead
130 pos += 1 # skip ahead
130 elif c in _simpleopletters: # handle simple operators
131 elif c in _simpleopletters: # handle simple operators
131 yield (c, None, pos)
132 yield (c, None, pos)
132 elif (c in _quoteletters or c == 'r' and
133 elif (c in _quoteletters or c == 'r' and
133 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
134 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
134 if c == 'r':
135 if c == 'r':
135 pos += 1
136 pos += 1
136 c = program[pos]
137 c = program[pos]
137 decode = lambda x: x
138 decode = lambda x: x
138 else:
139 else:
139 decode = parser.unescapestr
140 decode = parser.unescapestr
140 pos += 1
141 pos += 1
141 s = pos
142 s = pos
142 while pos < l: # find closing quote
143 while pos < l: # find closing quote
143 d = program[pos]
144 d = program[pos]
144 if d == '\\': # skip over escaped characters
145 if d == '\\': # skip over escaped characters
145 pos += 2
146 pos += 2
146 continue
147 continue
147 if d == c:
148 if d == c:
148 yield ('string', decode(program[s:pos]), s)
149 yield ('string', decode(program[s:pos]), s)
149 break
150 break
150 pos += 1
151 pos += 1
151 else:
152 else:
152 raise error.ParseError(_("unterminated string"), s)
153 raise error.ParseError(_("unterminated string"), s)
153 # gather up a symbol/keyword
154 # gather up a symbol/keyword
154 elif c in syminitletters:
155 elif c in syminitletters:
155 s = pos
156 s = pos
156 pos += 1
157 pos += 1
157 while pos < l: # find end of symbol
158 while pos < l: # find end of symbol
158 d = program[pos]
159 d = program[pos]
159 if d not in symletters:
160 if d not in symletters:
160 break
161 break
161 if d == '.' and program[pos - 1] == '.': # special case for ..
162 if d == '.' and program[pos - 1] == '.': # special case for ..
162 pos -= 1
163 pos -= 1
163 break
164 break
164 pos += 1
165 pos += 1
165 sym = program[s:pos]
166 sym = program[s:pos]
166 if sym in keywords: # operator keywords
167 if sym in keywords: # operator keywords
167 yield (sym, None, s)
168 yield (sym, None, s)
168 elif '-' in sym:
169 elif '-' in sym:
169 # some jerk gave us foo-bar-baz, try to check if it's a symbol
170 # some jerk gave us foo-bar-baz, try to check if it's a symbol
170 if lookup and lookup(sym):
171 if lookup and lookup(sym):
171 # looks like a real symbol
172 # looks like a real symbol
172 yield ('symbol', sym, s)
173 yield ('symbol', sym, s)
173 else:
174 else:
174 # looks like an expression
175 # looks like an expression
175 parts = sym.split('-')
176 parts = sym.split('-')
176 for p in parts[:-1]:
177 for p in parts[:-1]:
177 if p: # possible consecutive -
178 if p: # possible consecutive -
178 yield ('symbol', p, s)
179 yield ('symbol', p, s)
179 s += len(p)
180 s += len(p)
180 yield ('-', None, s)
181 yield ('-', None, s)
181 s += 1
182 s += 1
182 if parts[-1]: # possible trailing -
183 if parts[-1]: # possible trailing -
183 yield ('symbol', parts[-1], s)
184 yield ('symbol', parts[-1], s)
184 else:
185 else:
185 yield ('symbol', sym, s)
186 yield ('symbol', sym, s)
186 pos -= 1
187 pos -= 1
187 else:
188 else:
188 raise error.ParseError(_("syntax error in revset '%s'") %
189 raise error.ParseError(_("syntax error in revset '%s'") %
189 program, pos)
190 program, pos)
190 pos += 1
191 pos += 1
191 yield ('end', None, pos)
192 yield ('end', None, pos)
192
193
193 # helpers
194 # helpers
194
195
195 _notset = object()
196 _notset = object()
196
197
197 def getsymbol(x):
198 def getsymbol(x):
198 if x and x[0] == 'symbol':
199 if x and x[0] == 'symbol':
199 return x[1]
200 return x[1]
200 raise error.ParseError(_('not a symbol'))
201 raise error.ParseError(_('not a symbol'))
201
202
202 def getstring(x, err):
203 def getstring(x, err):
203 if x and (x[0] == 'string' or x[0] == 'symbol'):
204 if x and (x[0] == 'string' or x[0] == 'symbol'):
204 return x[1]
205 return x[1]
205 raise error.ParseError(err)
206 raise error.ParseError(err)
206
207
207 def getinteger(x, err, default=_notset):
208 def getinteger(x, err, default=_notset):
208 if not x and default is not _notset:
209 if not x and default is not _notset:
209 return default
210 return default
210 try:
211 try:
211 return int(getstring(x, err))
212 return int(getstring(x, err))
212 except ValueError:
213 except ValueError:
213 raise error.ParseError(err)
214 raise error.ParseError(err)
214
215
215 def getboolean(x, err):
216 def getboolean(x, err):
216 value = stringutil.parsebool(getsymbol(x))
217 value = stringutil.parsebool(getsymbol(x))
217 if value is not None:
218 if value is not None:
218 return value
219 return value
219 raise error.ParseError(err)
220 raise error.ParseError(err)
220
221
221 def getlist(x):
222 def getlist(x):
222 if not x:
223 if not x:
223 return []
224 return []
224 if x[0] == 'list':
225 if x[0] == 'list':
225 return list(x[1:])
226 return list(x[1:])
226 return [x]
227 return [x]
227
228
228 def getrange(x, err):
229 def getrange(x, err):
229 if not x:
230 if not x:
230 raise error.ParseError(err)
231 raise error.ParseError(err)
231 op = x[0]
232 op = x[0]
232 if op == 'range':
233 if op == 'range':
233 return x[1], x[2]
234 return x[1], x[2]
234 elif op == 'rangepre':
235 elif op == 'rangepre':
235 return None, x[1]
236 return None, x[1]
236 elif op == 'rangepost':
237 elif op == 'rangepost':
237 return x[1], None
238 return x[1], None
238 elif op == 'rangeall':
239 elif op == 'rangeall':
239 return None, None
240 return None, None
240 raise error.ParseError(err)
241 raise error.ParseError(err)
241
242
242 def getargs(x, min, max, err):
243 def getargs(x, min, max, err):
243 l = getlist(x)
244 l = getlist(x)
244 if len(l) < min or (max >= 0 and len(l) > max):
245 if len(l) < min or (max >= 0 and len(l) > max):
245 raise error.ParseError(err)
246 raise error.ParseError(err)
246 return l
247 return l
247
248
248 def getargsdict(x, funcname, keys):
249 def getargsdict(x, funcname, keys):
249 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
250 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
250 keyvaluenode='keyvalue', keynode='symbol')
251 keyvaluenode='keyvalue', keynode='symbol')
251
252
252 # cache of {spec: raw parsed tree} built internally
253 # cache of {spec: raw parsed tree} built internally
253 _treecache = {}
254 _treecache = {}
254
255
255 def _cachedtree(spec):
256 def _cachedtree(spec):
256 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
257 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
257 tree = _treecache.get(spec)
258 tree = _treecache.get(spec)
258 if tree is None:
259 if tree is None:
259 _treecache[spec] = tree = parse(spec)
260 _treecache[spec] = tree = parse(spec)
260 return tree
261 return tree
261
262
262 def _build(tmplspec, *repls):
263 def _build(tmplspec, *repls):
263 """Create raw parsed tree from a template revset statement
264 """Create raw parsed tree from a template revset statement
264
265
265 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
266 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
266 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
267 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
267 """
268 """
268 template = _cachedtree(tmplspec)
269 template = _cachedtree(tmplspec)
269 return parser.buildtree(template, ('symbol', '_'), *repls)
270 return parser.buildtree(template, ('symbol', '_'), *repls)
270
271
271 def _match(patspec, tree):
272 def _match(patspec, tree):
272 """Test if a tree matches the given pattern statement; return the matches
273 """Test if a tree matches the given pattern statement; return the matches
273
274
274 >>> _match(b'f(_)', parse(b'f()'))
275 >>> _match(b'f(_)', parse(b'f()'))
275 >>> _match(b'f(_)', parse(b'f(1)'))
276 >>> _match(b'f(_)', parse(b'f(1)'))
276 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
277 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
277 >>> _match(b'f(_)', parse(b'f(1, 2)'))
278 >>> _match(b'f(_)', parse(b'f(1, 2)'))
278 """
279 """
279 pattern = _cachedtree(patspec)
280 pattern = _cachedtree(patspec)
280 return parser.matchtree(pattern, tree, ('symbol', '_'),
281 return parser.matchtree(pattern, tree, ('symbol', '_'),
281 {'keyvalue', 'list'})
282 {'keyvalue', 'list'})
282
283
283 def _matchonly(revs, bases):
284 def _matchonly(revs, bases):
284 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
285 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
285
286
286 def _fixops(x):
287 def _fixops(x):
287 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
288 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
288 handled well by our simple top-down parser"""
289 handled well by our simple top-down parser"""
289 if not isinstance(x, tuple):
290 if not isinstance(x, tuple):
290 return x
291 return x
291
292
292 op = x[0]
293 op = x[0]
293 if op == 'parent':
294 if op == 'parent':
294 # x^:y means (x^) : y, not x ^ (:y)
295 # x^:y means (x^) : y, not x ^ (:y)
295 # x^: means (x^) :, not x ^ (:)
296 # x^: means (x^) :, not x ^ (:)
296 post = ('parentpost', x[1])
297 post = ('parentpost', x[1])
297 if x[2][0] == 'dagrangepre':
298 if x[2][0] == 'dagrangepre':
298 return _fixops(('dagrange', post, x[2][1]))
299 return _fixops(('dagrange', post, x[2][1]))
299 elif x[2][0] == 'dagrangeall':
300 elif x[2][0] == 'dagrangeall':
300 return _fixops(('dagrangepost', post))
301 return _fixops(('dagrangepost', post))
301 elif x[2][0] == 'rangepre':
302 elif x[2][0] == 'rangepre':
302 return _fixops(('range', post, x[2][1]))
303 return _fixops(('range', post, x[2][1]))
303 elif x[2][0] == 'rangeall':
304 elif x[2][0] == 'rangeall':
304 return _fixops(('rangepost', post))
305 return _fixops(('rangepost', post))
305 elif op == 'or':
306 elif op == 'or':
306 # make number of arguments deterministic:
307 # make number of arguments deterministic:
307 # x + y + z -> (or x y z) -> (or (list x y z))
308 # x + y + z -> (or x y z) -> (or (list x y z))
308 return (op, _fixops(('list',) + x[1:]))
309 return (op, _fixops(('list',) + x[1:]))
309 elif op == 'subscript' and x[1][0] == 'relation':
310 elif op == 'subscript' and x[1][0] == 'relation':
310 # x#y[z] ternary
311 # x#y[z] ternary
311 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
312 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
312
313
313 return (op,) + tuple(_fixops(y) for y in x[1:])
314 return (op,) + tuple(_fixops(y) for y in x[1:])
314
315
315 def _analyze(x):
316 def _analyze(x):
316 if x is None:
317 if x is None:
317 return x
318 return x
318
319
319 op = x[0]
320 op = x[0]
320 if op == 'minus':
321 if op == 'minus':
321 return _analyze(_build('_ and not _', *x[1:]))
322 return _analyze(_build('_ and not _', *x[1:]))
322 elif op == 'only':
323 elif op == 'only':
323 return _analyze(_build('only(_, _)', *x[1:]))
324 return _analyze(_build('only(_, _)', *x[1:]))
324 elif op == 'onlypost':
325 elif op == 'onlypost':
325 return _analyze(_build('only(_)', x[1]))
326 return _analyze(_build('only(_)', x[1]))
326 elif op == 'dagrangeall':
327 elif op == 'dagrangeall':
327 raise error.ParseError(_("can't use '::' in this context"))
328 raise error.ParseError(_("can't use '::' in this context"))
328 elif op == 'dagrangepre':
329 elif op == 'dagrangepre':
329 return _analyze(_build('ancestors(_)', x[1]))
330 return _analyze(_build('ancestors(_)', x[1]))
330 elif op == 'dagrangepost':
331 elif op == 'dagrangepost':
331 return _analyze(_build('descendants(_)', x[1]))
332 return _analyze(_build('descendants(_)', x[1]))
332 elif op == 'negate':
333 elif op == 'negate':
333 s = getstring(x[1], _("can't negate that"))
334 s = getstring(x[1], _("can't negate that"))
334 return _analyze(('string', '-' + s))
335 return _analyze(('string', '-' + s))
335 elif op in ('string', 'symbol'):
336 elif op in ('string', 'symbol'):
336 return x
337 return x
337 elif op == 'rangeall':
338 elif op == 'rangeall':
338 return (op, None)
339 return (op, None)
339 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
340 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
340 return (op, _analyze(x[1]))
341 return (op, _analyze(x[1]))
341 elif op == 'group':
342 elif op == 'group':
342 return _analyze(x[1])
343 return _analyze(x[1])
343 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
344 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
344 'subscript'}:
345 'subscript'}:
345 ta = _analyze(x[1])
346 ta = _analyze(x[1])
346 tb = _analyze(x[2])
347 tb = _analyze(x[2])
347 return (op, ta, tb)
348 return (op, ta, tb)
348 elif op == 'relsubscript':
349 elif op == 'relsubscript':
349 ta = _analyze(x[1])
350 ta = _analyze(x[1])
350 tb = _analyze(x[2])
351 tb = _analyze(x[2])
351 tc = _analyze(x[3])
352 tc = _analyze(x[3])
352 return (op, ta, tb, tc)
353 return (op, ta, tb, tc)
353 elif op == 'list':
354 elif op == 'list':
354 return (op,) + tuple(_analyze(y) for y in x[1:])
355 return (op,) + tuple(_analyze(y) for y in x[1:])
355 elif op == 'keyvalue':
356 elif op == 'keyvalue':
356 return (op, x[1], _analyze(x[2]))
357 return (op, x[1], _analyze(x[2]))
357 elif op == 'func':
358 elif op == 'func':
358 return (op, x[1], _analyze(x[2]))
359 return (op, x[1], _analyze(x[2]))
359 raise ValueError('invalid operator %r' % op)
360 raise ValueError('invalid operator %r' % op)
360
361
361 def analyze(x):
362 def analyze(x):
362 """Transform raw parsed tree to evaluatable tree which can be fed to
363 """Transform raw parsed tree to evaluatable tree which can be fed to
363 optimize() or getset()
364 optimize() or getset()
364
365
365 All pseudo operations should be mapped to real operations or functions
366 All pseudo operations should be mapped to real operations or functions
366 defined in methods or symbols table respectively.
367 defined in methods or symbols table respectively.
367 """
368 """
368 return _analyze(x)
369 return _analyze(x)
369
370
370 def _optimize(x):
371 def _optimize(x):
371 if x is None:
372 if x is None:
372 return 0, x
373 return 0, x
373
374
374 op = x[0]
375 op = x[0]
375 if op in ('string', 'symbol'):
376 if op in ('string', 'symbol'):
376 return 0.5, x # single revisions are small
377 return 0.5, x # single revisions are small
377 elif op == 'and':
378 elif op == 'and':
378 wa, ta = _optimize(x[1])
379 wa, ta = _optimize(x[1])
379 wb, tb = _optimize(x[2])
380 wb, tb = _optimize(x[2])
380 w = min(wa, wb)
381 w = min(wa, wb)
381
382
382 # (draft/secret/_notpublic() & ::x) have a fast path
383 # (draft/secret/_notpublic() & ::x) have a fast path
383 m = _match('_() & ancestors(_)', ('and', ta, tb))
384 m = _match('_() & ancestors(_)', ('and', ta, tb))
384 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
385 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
385 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
386 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
386
387
387 # (::x and not ::y)/(not ::y and ::x) have a fast path
388 # (::x and not ::y)/(not ::y and ::x) have a fast path
388 m = _matchonly(ta, tb) or _matchonly(tb, ta)
389 m = _matchonly(ta, tb) or _matchonly(tb, ta)
389 if m:
390 if m:
390 return w, _build('only(_, _)', *m[1:])
391 return w, _build('only(_, _)', *m[1:])
391
392
392 m = _match('not _', tb)
393 m = _match('not _', tb)
393 if m:
394 if m:
394 return wa, ('difference', ta, m[1])
395 return wa, ('difference', ta, m[1])
395 if wa > wb:
396 if wa > wb:
396 op = 'andsmally'
397 op = 'andsmally'
397 return w, (op, ta, tb)
398 return w, (op, ta, tb)
398 elif op == 'or':
399 elif op == 'or':
399 # fast path for machine-generated expression, that is likely to have
400 # fast path for machine-generated expression, that is likely to have
400 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
401 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
401 ws, ts, ss = [], [], []
402 ws, ts, ss = [], [], []
402 def flushss():
403 def flushss():
403 if not ss:
404 if not ss:
404 return
405 return
405 if len(ss) == 1:
406 if len(ss) == 1:
406 w, t = ss[0]
407 w, t = ss[0]
407 else:
408 else:
408 s = '\0'.join(t[1] for w, t in ss)
409 s = '\0'.join(t[1] for w, t in ss)
409 y = _build('_list(_)', ('string', s))
410 y = _build('_list(_)', ('string', s))
410 w, t = _optimize(y)
411 w, t = _optimize(y)
411 ws.append(w)
412 ws.append(w)
412 ts.append(t)
413 ts.append(t)
413 del ss[:]
414 del ss[:]
414 for y in getlist(x[1]):
415 for y in getlist(x[1]):
415 w, t = _optimize(y)
416 w, t = _optimize(y)
416 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
417 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
417 ss.append((w, t))
418 ss.append((w, t))
418 continue
419 continue
419 flushss()
420 flushss()
420 ws.append(w)
421 ws.append(w)
421 ts.append(t)
422 ts.append(t)
422 flushss()
423 flushss()
423 if len(ts) == 1:
424 if len(ts) == 1:
424 return ws[0], ts[0] # 'or' operation is fully optimized out
425 return ws[0], ts[0] # 'or' operation is fully optimized out
425 return max(ws), (op, ('list',) + tuple(ts))
426 return max(ws), (op, ('list',) + tuple(ts))
426 elif op == 'not':
427 elif op == 'not':
427 # Optimize not public() to _notpublic() because we have a fast version
428 # Optimize not public() to _notpublic() because we have a fast version
428 if _match('public()', x[1]):
429 if _match('public()', x[1]):
429 o = _optimize(_build('_notpublic()'))
430 o = _optimize(_build('_notpublic()'))
430 return o[0], o[1]
431 return o[0], o[1]
431 else:
432 else:
432 o = _optimize(x[1])
433 o = _optimize(x[1])
433 return o[0], (op, o[1])
434 return o[0], (op, o[1])
434 elif op == 'rangeall':
435 elif op == 'rangeall':
435 return 1, x
436 return 1, x
436 elif op in ('rangepre', 'rangepost', 'parentpost'):
437 elif op in ('rangepre', 'rangepost', 'parentpost'):
437 o = _optimize(x[1])
438 o = _optimize(x[1])
438 return o[0], (op, o[1])
439 return o[0], (op, o[1])
439 elif op in ('dagrange', 'range'):
440 elif op in ('dagrange', 'range'):
440 wa, ta = _optimize(x[1])
441 wa, ta = _optimize(x[1])
441 wb, tb = _optimize(x[2])
442 wb, tb = _optimize(x[2])
442 return wa + wb, (op, ta, tb)
443 return wa + wb, (op, ta, tb)
443 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
444 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
444 w, t = _optimize(x[1])
445 w, t = _optimize(x[1])
445 return w, (op, t, x[2])
446 return w, (op, t, x[2])
446 elif op == 'relsubscript':
447 elif op == 'relsubscript':
447 w, t = _optimize(x[1])
448 w, t = _optimize(x[1])
448 return w, (op, t, x[2], x[3])
449 return w, (op, t, x[2], x[3])
449 elif op == 'list':
450 elif op == 'list':
450 ws, ts = zip(*(_optimize(y) for y in x[1:]))
451 ws, ts = zip(*(_optimize(y) for y in x[1:]))
451 return sum(ws), (op,) + ts
452 return sum(ws), (op,) + ts
452 elif op == 'keyvalue':
453 elif op == 'keyvalue':
453 w, t = _optimize(x[2])
454 w, t = _optimize(x[2])
454 return w, (op, x[1], t)
455 return w, (op, x[1], t)
455 elif op == 'func':
456 elif op == 'func':
456 f = getsymbol(x[1])
457 f = getsymbol(x[1])
457 wa, ta = _optimize(x[2])
458 wa, ta = _optimize(x[2])
458 w = getattr(symbols.get(f), '_weight', 1)
459 w = getattr(symbols.get(f), '_weight', 1)
459 m = _match('commonancestors(_)', ta)
460 m = _match('commonancestors(_)', ta)
460
461
461 # Optimize heads(commonancestors(_)) because we have a fast version
462 # Optimize heads(commonancestors(_)) because we have a fast version
462 if f == 'heads' and m:
463 if f == 'heads' and m:
463 return w + wa, _build('_commonancestorheads(_)', m[1])
464 return w + wa, _build('_commonancestorheads(_)', m[1])
464
465
465 return w + wa, (op, x[1], ta)
466 return w + wa, (op, x[1], ta)
466 raise ValueError('invalid operator %r' % op)
467 raise ValueError('invalid operator %r' % op)
467
468
468 def optimize(tree):
469 def optimize(tree):
469 """Optimize evaluatable tree
470 """Optimize evaluatable tree
470
471
471 All pseudo operations should be transformed beforehand.
472 All pseudo operations should be transformed beforehand.
472 """
473 """
473 _weight, newtree = _optimize(tree)
474 _weight, newtree = _optimize(tree)
474 return newtree
475 return newtree
475
476
476 # the set of valid characters for the initial letter of symbols in
477 # the set of valid characters for the initial letter of symbols in
477 # alias declarations and definitions
478 # alias declarations and definitions
478 _aliassyminitletters = _syminitletters | {'$'}
479 _aliassyminitletters = _syminitletters | {'$'}
479
480
480 def _parsewith(spec, lookup=None, syminitletters=None):
481 def _parsewith(spec, lookup=None, syminitletters=None):
481 """Generate a parse tree of given spec with given tokenizing options
482 """Generate a parse tree of given spec with given tokenizing options
482
483
483 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
484 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
484 ('func', ('symbol', 'foo'), ('symbol', '$1'))
485 ('func', ('symbol', 'foo'), ('symbol', '$1'))
485 >>> _parsewith(b'$1')
486 >>> _parsewith(b'$1')
486 Traceback (most recent call last):
487 Traceback (most recent call last):
487 ...
488 ...
488 ParseError: ("syntax error in revset '$1'", 0)
489 ParseError: ("syntax error in revset '$1'", 0)
489 >>> _parsewith(b'foo bar')
490 >>> _parsewith(b'foo bar')
490 Traceback (most recent call last):
491 Traceback (most recent call last):
491 ...
492 ...
492 ParseError: ('invalid token', 4)
493 ParseError: ('invalid token', 4)
493 """
494 """
494 if lookup and spec.startswith('revset(') and spec.endswith(')'):
495 if lookup and spec.startswith('revset(') and spec.endswith(')'):
495 lookup = None
496 lookup = None
496 p = parser.parser(elements)
497 p = parser.parser(elements)
497 tree, pos = p.parse(tokenize(spec, lookup=lookup,
498 tree, pos = p.parse(tokenize(spec, lookup=lookup,
498 syminitletters=syminitletters))
499 syminitletters=syminitletters))
499 if pos != len(spec):
500 if pos != len(spec):
500 raise error.ParseError(_('invalid token'), pos)
501 raise error.ParseError(_('invalid token'), pos)
501 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
502 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
502
503
503 class _aliasrules(parser.basealiasrules):
504 class _aliasrules(parser.basealiasrules):
504 """Parsing and expansion rule set of revset aliases"""
505 """Parsing and expansion rule set of revset aliases"""
505 _section = _('revset alias')
506 _section = _('revset alias')
506
507
507 @staticmethod
508 @staticmethod
508 def _parse(spec):
509 def _parse(spec):
509 """Parse alias declaration/definition ``spec``
510 """Parse alias declaration/definition ``spec``
510
511
511 This allows symbol names to use also ``$`` as an initial letter
512 This allows symbol names to use also ``$`` as an initial letter
512 (for backward compatibility), and callers of this function should
513 (for backward compatibility), and callers of this function should
513 examine whether ``$`` is used also for unexpected symbols or not.
514 examine whether ``$`` is used also for unexpected symbols or not.
514 """
515 """
515 return _parsewith(spec, syminitletters=_aliassyminitletters)
516 return _parsewith(spec, syminitletters=_aliassyminitletters)
516
517
517 @staticmethod
518 @staticmethod
518 def _trygetfunc(tree):
519 def _trygetfunc(tree):
519 if tree[0] == 'func' and tree[1][0] == 'symbol':
520 if tree[0] == 'func' and tree[1][0] == 'symbol':
520 return tree[1][1], getlist(tree[2])
521 return tree[1][1], getlist(tree[2])
521
522
522 def expandaliases(tree, aliases, warn=None):
523 def expandaliases(tree, aliases, warn=None):
523 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
524 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
524 aliases = _aliasrules.buildmap(aliases)
525 aliases = _aliasrules.buildmap(aliases)
525 tree = _aliasrules.expand(aliases, tree)
526 tree = _aliasrules.expand(aliases, tree)
526 # warn about problematic (but not referred) aliases
527 # warn about problematic (but not referred) aliases
527 if warn is not None:
528 if warn is not None:
528 for name, alias in sorted(aliases.iteritems()):
529 for name, alias in sorted(aliases.iteritems()):
529 if alias.error and not alias.warned:
530 if alias.error and not alias.warned:
530 warn(_('warning: %s\n') % (alias.error))
531 warn(_('warning: %s\n') % (alias.error))
531 alias.warned = True
532 alias.warned = True
532 return tree
533 return tree
533
534
534 def foldconcat(tree):
535 def foldconcat(tree):
535 """Fold elements to be concatenated by `##`
536 """Fold elements to be concatenated by `##`
536 """
537 """
537 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
538 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
538 return tree
539 return tree
539 if tree[0] == '_concat':
540 if tree[0] == '_concat':
540 pending = [tree]
541 pending = [tree]
541 l = []
542 l = []
542 while pending:
543 while pending:
543 e = pending.pop()
544 e = pending.pop()
544 if e[0] == '_concat':
545 if e[0] == '_concat':
545 pending.extend(reversed(e[1:]))
546 pending.extend(reversed(e[1:]))
546 elif e[0] in ('string', 'symbol'):
547 elif e[0] in ('string', 'symbol'):
547 l.append(e[1])
548 l.append(e[1])
548 else:
549 else:
549 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
550 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
550 raise error.ParseError(msg)
551 raise error.ParseError(msg)
551 return ('string', ''.join(l))
552 return ('string', ''.join(l))
552 else:
553 else:
553 return tuple(foldconcat(t) for t in tree)
554 return tuple(foldconcat(t) for t in tree)
554
555
555 def parse(spec, lookup=None):
556 def parse(spec, lookup=None):
556 try:
557 try:
557 return _parsewith(spec, lookup=lookup)
558 return _parsewith(spec, lookup=lookup)
558 except error.ParseError as inst:
559 except error.ParseError as inst:
559 if len(inst.args) > 1: # has location
560 if len(inst.args) > 1: # has location
560 loc = inst.args[1]
561 loc = inst.args[1]
561 # Remove newlines -- spaces are equivalent whitespace.
562 # Remove newlines -- spaces are equivalent whitespace.
562 spec = spec.replace('\n', ' ')
563 spec = spec.replace('\n', ' ')
563 # We want the caret to point to the place in the template that
564 # We want the caret to point to the place in the template that
564 # failed to parse, but in a hint we get a open paren at the
565 # failed to parse, but in a hint we get a open paren at the
565 # start. Therefore, we print "loc + 1" spaces (instead of "loc")
566 # start. Therefore, we print "loc + 1" spaces (instead of "loc")
566 # to line up the caret with the location of the error.
567 # to line up the caret with the location of the error.
567 inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
568 inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
568 raise
569 raise
569
570
570 def _quote(s):
571 def _quote(s):
571 r"""Quote a value in order to make it safe for the revset engine.
572 r"""Quote a value in order to make it safe for the revset engine.
572
573
573 >>> _quote(b'asdf')
574 >>> _quote(b'asdf')
574 "'asdf'"
575 "'asdf'"
575 >>> _quote(b"asdf'\"")
576 >>> _quote(b"asdf'\"")
576 '\'asdf\\\'"\''
577 '\'asdf\\\'"\''
577 >>> _quote(b'asdf\'')
578 >>> _quote(b'asdf\'')
578 "'asdf\\''"
579 "'asdf\\''"
579 >>> _quote(1)
580 >>> _quote(1)
580 "'1'"
581 "'1'"
581 """
582 """
582 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
583 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
583
584
584 def _formatargtype(c, arg):
585 def _formatargtype(c, arg):
585 if c == 'd':
586 if c == 'd':
586 return 'rev(%d)' % int(arg)
587 return 'rev(%d)' % int(arg)
587 elif c == 's':
588 elif c == 's':
588 return _quote(arg)
589 return _quote(arg)
589 elif c == 'r':
590 elif c == 'r':
590 if not isinstance(arg, bytes):
591 if not isinstance(arg, bytes):
591 raise TypeError
592 raise TypeError
592 parse(arg) # make sure syntax errors are confined
593 parse(arg) # make sure syntax errors are confined
593 return '(%s)' % arg
594 return '(%s)' % arg
594 elif c == 'n':
595 elif c == 'n':
595 return _quote(node.hex(arg))
596 return _quote(node.hex(arg))
596 elif c == 'b':
597 elif c == 'b':
597 try:
598 try:
598 return _quote(arg.branch())
599 return _quote(arg.branch())
599 except AttributeError:
600 except AttributeError:
600 raise TypeError
601 raise TypeError
601 raise error.ParseError(_('unexpected revspec format character %s') % c)
602 raise error.ParseError(_('unexpected revspec format character %s') % c)
602
603
603 def _formatlistexp(s, t):
604 def _formatlistexp(s, t):
604 l = len(s)
605 l = len(s)
605 if l == 0:
606 if l == 0:
606 return "_list('')"
607 return "_list('')"
607 elif l == 1:
608 elif l == 1:
608 return _formatargtype(t, s[0])
609 return _formatargtype(t, s[0])
609 elif t == 'd':
610 elif t == 'd':
610 return _formatintlist(s)
611 return _formatintlist(s)
611 elif t == 's':
612 elif t == 's':
612 return "_list(%s)" % _quote("\0".join(s))
613 return "_list(%s)" % _quote("\0".join(s))
613 elif t == 'n':
614 elif t == 'n':
614 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
615 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
615 elif t == 'b':
616 elif t == 'b':
616 try:
617 try:
617 return "_list('%s')" % "\0".join(a.branch() for a in s)
618 return "_list('%s')" % "\0".join(a.branch() for a in s)
618 except AttributeError:
619 except AttributeError:
619 raise TypeError
620 raise TypeError
620
621
621 m = l // 2
622 m = l // 2
622 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
623 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
623
624
624 def _formatintlist(data):
625 def _formatintlist(data):
625 try:
626 try:
626 l = len(data)
627 l = len(data)
627 if l == 0:
628 if l == 0:
628 return "_list('')"
629 return "_list('')"
629 elif l == 1:
630 elif l == 1:
630 return _formatargtype('d', data[0])
631 return _formatargtype('d', data[0])
631 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
632 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
632 except (TypeError, ValueError):
633 except (TypeError, ValueError):
633 raise error.ParseError(_('invalid argument for revspec'))
634 raise error.ParseError(_('invalid argument for revspec'))
634
635
635 def _formatparamexp(args, t):
636 def _formatparamexp(args, t):
636 return ', '.join(_formatargtype(t, a) for a in args)
637 return ', '.join(_formatargtype(t, a) for a in args)
637
638
638 _formatlistfuncs = {
639 _formatlistfuncs = {
639 'l': _formatlistexp,
640 'l': _formatlistexp,
640 'p': _formatparamexp,
641 'p': _formatparamexp,
641 }
642 }
642
643
643 def formatspec(expr, *args):
644 def formatspec(expr, *args):
644 '''
645 '''
645 This is a convenience function for using revsets internally, and
646 This is a convenience function for using revsets internally, and
646 escapes arguments appropriately. Aliases are intentionally ignored
647 escapes arguments appropriately. Aliases are intentionally ignored
647 so that intended expression behavior isn't accidentally subverted.
648 so that intended expression behavior isn't accidentally subverted.
648
649
649 Supported arguments:
650 Supported arguments:
650
651
651 %r = revset expression, parenthesized
652 %r = revset expression, parenthesized
652 %d = rev(int(arg)), no quoting
653 %d = rev(int(arg)), no quoting
653 %s = string(arg), escaped and single-quoted
654 %s = string(arg), escaped and single-quoted
654 %b = arg.branch(), escaped and single-quoted
655 %b = arg.branch(), escaped and single-quoted
655 %n = hex(arg), single-quoted
656 %n = hex(arg), single-quoted
656 %% = a literal '%'
657 %% = a literal '%'
657
658
658 Prefixing the type with 'l' specifies a parenthesized list of that type,
659 Prefixing the type with 'l' specifies a parenthesized list of that type,
659 and 'p' specifies a list of function parameters of that type.
660 and 'p' specifies a list of function parameters of that type.
660
661
661 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
662 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
662 '(10 or 11):: and ((this()) or (that()))'
663 '(10 or 11):: and ((this()) or (that()))'
663 >>> formatspec(b'%d:: and not %d::', 10, 20)
664 >>> formatspec(b'%d:: and not %d::', 10, 20)
664 'rev(10):: and not rev(20)::'
665 'rev(10):: and not rev(20)::'
665 >>> formatspec(b'%ld or %ld', [], [1])
666 >>> formatspec(b'%ld or %ld', [], [1])
666 "_list('') or rev(1)"
667 "_list('') or rev(1)"
667 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
668 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
668 "keyword('foo\\\\xe9')"
669 "keyword('foo\\\\xe9')"
669 >>> b = lambda: b'default'
670 >>> b = lambda: b'default'
670 >>> b.branch = b
671 >>> b.branch = b
671 >>> formatspec(b'branch(%b)', b)
672 >>> formatspec(b'branch(%b)', b)
672 "branch('default')"
673 "branch('default')"
673 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
674 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
674 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
675 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
675 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
676 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
676 "sort((:), 'desc', 'user')"
677 "sort((:), 'desc', 'user')"
677 >>> formatspec(b'%ls', [b'a', b"'"])
678 >>> formatspec(b'%ls', [b'a', b"'"])
678 "_list('a\\\\x00\\\\'')"
679 "_list('a\\\\x00\\\\'')"
679 '''
680 '''
680 parsed = _parseargs(expr, args)
681 parsed = _parseargs(expr, args)
681 ret = []
682 ret = []
682 for t, arg in parsed:
683 for t, arg in parsed:
683 if t is None:
684 if t is None:
684 ret.append(arg)
685 ret.append(arg)
686 elif t == 'baseset':
687 if isinstance(arg, set):
688 arg = sorted(arg)
689 ret.append(_formatintlist(list(arg)))
685 else:
690 else:
686 raise error.ProgrammingError("unknown revspec item type: %r" % t)
691 raise error.ProgrammingError("unknown revspec item type: %r" % t)
687 return b''.join(ret)
692 return b''.join(ret)
688
693
689 def _parseargs(expr, args):
694 def _parseargs(expr, args):
690 """parse the expression and replace all inexpensive args
695 """parse the expression and replace all inexpensive args
691
696
692 return a list of tuple [(arg-type, arg-value)]
697 return a list of tuple [(arg-type, arg-value)]
693
698
694 Arg-type can be:
699 Arg-type can be:
695 * None: a string ready to be concatenated into a final spec
700 * None: a string ready to be concatenated into a final spec
701 * 'baseset': an iterable of revisions
696 """
702 """
697 expr = pycompat.bytestr(expr)
703 expr = pycompat.bytestr(expr)
698 argiter = iter(args)
704 argiter = iter(args)
699 ret = []
705 ret = []
700 pos = 0
706 pos = 0
701 while pos < len(expr):
707 while pos < len(expr):
702 q = expr.find('%', pos)
708 q = expr.find('%', pos)
703 if q < 0:
709 if q < 0:
704 ret.append((None, expr[pos:]))
710 ret.append((None, expr[pos:]))
705 break
711 break
706 ret.append((None, expr[pos:q]))
712 ret.append((None, expr[pos:q]))
707 pos = q + 1
713 pos = q + 1
708 try:
714 try:
709 d = expr[pos]
715 d = expr[pos]
710 except IndexError:
716 except IndexError:
711 raise error.ParseError(_('incomplete revspec format character'))
717 raise error.ParseError(_('incomplete revspec format character'))
712 if d == '%':
718 if d == '%':
713 ret.append((None, d))
719 ret.append((None, d))
714 pos += 1
720 pos += 1
715 continue
721 continue
716
722
717 try:
723 try:
718 arg = next(argiter)
724 arg = next(argiter)
719 except StopIteration:
725 except StopIteration:
720 raise error.ParseError(_('missing argument for revspec'))
726 raise error.ParseError(_('missing argument for revspec'))
721 f = _formatlistfuncs.get(d)
727 f = _formatlistfuncs.get(d)
722 if f:
728 if f:
723 # a list of some type, might be expensive, do not replace
729 # a list of some type, might be expensive, do not replace
724 pos += 1
730 pos += 1
731 islist = (d == 'l')
725 try:
732 try:
726 d = expr[pos]
733 d = expr[pos]
727 except IndexError:
734 except IndexError:
728 raise error.ParseError(_('incomplete revspec format character'))
735 raise error.ParseError(_('incomplete revspec format character'))
736 if islist and d == 'd' and arg:
737 # special case, we might be able to speedup the list of int case
738 #
739 # We have been very conservative here for the first version.
740 # Other types (eg: generator) are probably fine, but we did not
741 # wanted to take any risk>
742 safeinputtype = (list, tuple, set, smartset.abstractsmartset)
743 if isinstance(arg, safeinputtype):
744 # we don't create a baseset yet, because it come with an
745 # extra cost. If we are going to serialize it we better
746 # skip it.
747 ret.append(('baseset', arg))
748 pos += 1
749 continue
729 try:
750 try:
730 ret.append((None, f(list(arg), d)))
751 ret.append((None, f(list(arg), d)))
731 except (TypeError, ValueError):
752 except (TypeError, ValueError):
732 raise error.ParseError(_('invalid argument for revspec'))
753 raise error.ParseError(_('invalid argument for revspec'))
733 else:
754 else:
734 # a single entry, not expensive, replace
755 # a single entry, not expensive, replace
735 try:
756 try:
736 ret.append((None, _formatargtype(d, arg)))
757 ret.append((None, _formatargtype(d, arg)))
737 except (TypeError, ValueError):
758 except (TypeError, ValueError):
738 raise error.ParseError(_('invalid argument for revspec'))
759 raise error.ParseError(_('invalid argument for revspec'))
739 pos += 1
760 pos += 1
740
761
741 try:
762 try:
742 next(argiter)
763 next(argiter)
743 raise error.ParseError(_('too many revspec arguments specified'))
764 raise error.ParseError(_('too many revspec arguments specified'))
744 except StopIteration:
765 except StopIteration:
745 pass
766 pass
746 return ret
767 return ret
747
768
748 def prettyformat(tree):
769 def prettyformat(tree):
749 return parser.prettyformat(tree, ('string', 'symbol'))
770 return parser.prettyformat(tree, ('string', 'symbol'))
750
771
751 def depth(tree):
772 def depth(tree):
752 if isinstance(tree, tuple):
773 if isinstance(tree, tuple):
753 return max(map(depth, tree)) + 1
774 return max(map(depth, tree)) + 1
754 else:
775 else:
755 return 0
776 return 0
756
777
757 def funcsused(tree):
778 def funcsused(tree):
758 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
779 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
759 return set()
780 return set()
760 else:
781 else:
761 funcs = set()
782 funcs = set()
762 for s in tree[1:]:
783 for s in tree[1:]:
763 funcs |= funcsused(s)
784 funcs |= funcsused(s)
764 if tree[0] == 'func':
785 if tree[0] == 'func':
765 funcs.add(tree[1][1])
786 funcs.add(tree[1][1])
766 return funcs
787 return funcs
767
788
768 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
789 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
769
790
770 def _ishashlikesymbol(symbol):
791 def _ishashlikesymbol(symbol):
771 """returns true if the symbol looks like a hash"""
792 """returns true if the symbol looks like a hash"""
772 return _hashre.match(symbol)
793 return _hashre.match(symbol)
773
794
774 def gethashlikesymbols(tree):
795 def gethashlikesymbols(tree):
775 """returns the list of symbols of the tree that look like hashes
796 """returns the list of symbols of the tree that look like hashes
776
797
777 >>> gethashlikesymbols(parse(b'3::abe3ff'))
798 >>> gethashlikesymbols(parse(b'3::abe3ff'))
778 ['3', 'abe3ff']
799 ['3', 'abe3ff']
779 >>> gethashlikesymbols(parse(b'precursors(.)'))
800 >>> gethashlikesymbols(parse(b'precursors(.)'))
780 []
801 []
781 >>> gethashlikesymbols(parse(b'precursors(34)'))
802 >>> gethashlikesymbols(parse(b'precursors(34)'))
782 ['34']
803 ['34']
783 >>> gethashlikesymbols(parse(b'abe3ffZ'))
804 >>> gethashlikesymbols(parse(b'abe3ffZ'))
784 []
805 []
785 """
806 """
786 if not tree:
807 if not tree:
787 return []
808 return []
788
809
789 if tree[0] == "symbol":
810 if tree[0] == "symbol":
790 if _ishashlikesymbol(tree[1]):
811 if _ishashlikesymbol(tree[1]):
791 return [tree[1]]
812 return [tree[1]]
792 elif len(tree) >= 3:
813 elif len(tree) >= 3:
793 results = []
814 results = []
794 for subtree in tree[1:]:
815 for subtree in tree[1:]:
795 results += gethashlikesymbols(subtree)
816 results += gethashlikesymbols(subtree)
796 return results
817 return results
797 return []
818 return []
General Comments 0
You need to be logged in to leave comments. Login now