##// END OF EJS Templates
revsetlang: use sysbytes() instead of blind encode()...
Gregory Szorc -
r42001:ddb17451 default
parent child Browse files
Show More
@@ -1,846 +1,846
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 smartset,
18 smartset,
19 util,
19 util,
20 )
20 )
21 from .utils import (
21 from .utils import (
22 stringutil,
22 stringutil,
23 )
23 )
24
24
25 elements = {
25 elements = {
26 # token-type: binding-strength, primary, prefix, infix, suffix
26 # token-type: binding-strength, primary, prefix, infix, suffix
27 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
27 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
28 "[": (21, None, None, ("subscript", 1, "]"), None),
28 "[": (21, None, None, ("subscript", 1, "]"), None),
29 "#": (21, None, None, ("relation", 21), None),
29 "#": (21, None, None, ("relation", 21), None),
30 "##": (20, None, None, ("_concat", 20), None),
30 "##": (20, None, None, ("_concat", 20), None),
31 "~": (18, None, None, ("ancestor", 18), None),
31 "~": (18, None, None, ("ancestor", 18), None),
32 "^": (18, None, None, ("parent", 18), "parentpost"),
32 "^": (18, None, None, ("parent", 18), "parentpost"),
33 "-": (5, None, ("negate", 19), ("minus", 5), None),
33 "-": (5, None, ("negate", 19), ("minus", 5), None),
34 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
34 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
35 "dagrangepost"),
35 "dagrangepost"),
36 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
36 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
37 "dagrangepost"),
37 "dagrangepost"),
38 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
38 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
39 "not": (10, None, ("not", 10), None, None),
39 "not": (10, None, ("not", 10), None, None),
40 "!": (10, None, ("not", 10), None, None),
40 "!": (10, None, ("not", 10), None, None),
41 "and": (5, None, None, ("and", 5), None),
41 "and": (5, None, None, ("and", 5), None),
42 "&": (5, None, None, ("and", 5), None),
42 "&": (5, None, None, ("and", 5), None),
43 "%": (5, None, None, ("only", 5), "onlypost"),
43 "%": (5, None, None, ("only", 5), "onlypost"),
44 "or": (4, None, None, ("or", 4), None),
44 "or": (4, None, None, ("or", 4), None),
45 "|": (4, None, None, ("or", 4), None),
45 "|": (4, None, None, ("or", 4), None),
46 "+": (4, None, None, ("or", 4), None),
46 "+": (4, None, None, ("or", 4), None),
47 "=": (3, None, None, ("keyvalue", 3), None),
47 "=": (3, None, None, ("keyvalue", 3), None),
48 ",": (2, None, None, ("list", 2), None),
48 ",": (2, None, None, ("list", 2), None),
49 ")": (0, None, None, None, None),
49 ")": (0, None, None, None, None),
50 "]": (0, None, None, None, None),
50 "]": (0, None, None, None, None),
51 "symbol": (0, "symbol", None, None, None),
51 "symbol": (0, "symbol", None, None, None),
52 "string": (0, "string", None, None, None),
52 "string": (0, "string", None, None, None),
53 "end": (0, None, None, None, None),
53 "end": (0, None, None, None, None),
54 }
54 }
55
55
56 keywords = {'and', 'or', 'not'}
56 keywords = {'and', 'or', 'not'}
57
57
58 symbols = {}
58 symbols = {}
59
59
60 _quoteletters = {'"', "'"}
60 _quoteletters = {'"', "'"}
61 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
61 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
62
62
63 # default set of valid characters for the initial letter of symbols
63 # default set of valid characters for the initial letter of symbols
64 _syminitletters = set(pycompat.iterbytestr(
64 _syminitletters = set(pycompat.iterbytestr(
65 string.ascii_letters.encode('ascii') +
65 pycompat.sysbytes(string.ascii_letters) +
66 string.digits.encode('ascii') +
66 pycompat.sysbytes(string.digits) +
67 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
67 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
68
68
69 # default set of valid characters for non-initial letters of symbols
69 # default set of valid characters for non-initial letters of symbols
70 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
70 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
71
71
72 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
72 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
73 '''
73 '''
74 Parse a revset statement into a stream of tokens
74 Parse a revset statement into a stream of tokens
75
75
76 ``syminitletters`` is the set of valid characters for the initial
76 ``syminitletters`` is the set of valid characters for the initial
77 letter of symbols.
77 letter of symbols.
78
78
79 By default, character ``c`` is recognized as valid for initial
79 By default, character ``c`` is recognized as valid for initial
80 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
80 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
81
81
82 ``symletters`` is the set of valid characters for non-initial
82 ``symletters`` is the set of valid characters for non-initial
83 letters of symbols.
83 letters of symbols.
84
84
85 By default, character ``c`` is recognized as valid for non-initial
85 By default, character ``c`` is recognized as valid for non-initial
86 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
86 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
87
87
88 Check that @ is a valid unquoted token character (issue3686):
88 Check that @ is a valid unquoted token character (issue3686):
89 >>> list(tokenize(b"@::"))
89 >>> list(tokenize(b"@::"))
90 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
90 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
91
91
92 '''
92 '''
93 if not isinstance(program, bytes):
93 if not isinstance(program, bytes):
94 raise error.ProgrammingError('revset statement must be bytes, got %r'
94 raise error.ProgrammingError('revset statement must be bytes, got %r'
95 % program)
95 % program)
96 program = pycompat.bytestr(program)
96 program = pycompat.bytestr(program)
97 if syminitletters is None:
97 if syminitletters is None:
98 syminitletters = _syminitletters
98 syminitletters = _syminitletters
99 if symletters is None:
99 if symletters is None:
100 symletters = _symletters
100 symletters = _symletters
101
101
102 if program and lookup:
102 if program and lookup:
103 # attempt to parse old-style ranges first to deal with
103 # attempt to parse old-style ranges first to deal with
104 # things like old-tag which contain query metacharacters
104 # things like old-tag which contain query metacharacters
105 parts = program.split(':', 1)
105 parts = program.split(':', 1)
106 if all(lookup(sym) for sym in parts if sym):
106 if all(lookup(sym) for sym in parts if sym):
107 if parts[0]:
107 if parts[0]:
108 yield ('symbol', parts[0], 0)
108 yield ('symbol', parts[0], 0)
109 if len(parts) > 1:
109 if len(parts) > 1:
110 s = len(parts[0])
110 s = len(parts[0])
111 yield (':', None, s)
111 yield (':', None, s)
112 if parts[1]:
112 if parts[1]:
113 yield ('symbol', parts[1], s + 1)
113 yield ('symbol', parts[1], s + 1)
114 yield ('end', None, len(program))
114 yield ('end', None, len(program))
115 return
115 return
116
116
117 pos, l = 0, len(program)
117 pos, l = 0, len(program)
118 while pos < l:
118 while pos < l:
119 c = program[pos]
119 c = program[pos]
120 if c.isspace(): # skip inter-token whitespace
120 if c.isspace(): # skip inter-token whitespace
121 pass
121 pass
122 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
122 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
123 yield ('::', None, pos)
123 yield ('::', None, pos)
124 pos += 1 # skip ahead
124 pos += 1 # skip ahead
125 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
125 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
126 yield ('..', None, pos)
126 yield ('..', None, pos)
127 pos += 1 # skip ahead
127 pos += 1 # skip ahead
128 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
128 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
129 yield ('##', None, pos)
129 yield ('##', None, pos)
130 pos += 1 # skip ahead
130 pos += 1 # skip ahead
131 elif c in _simpleopletters: # handle simple operators
131 elif c in _simpleopletters: # handle simple operators
132 yield (c, None, pos)
132 yield (c, None, pos)
133 elif (c in _quoteletters or c == 'r' and
133 elif (c in _quoteletters or c == 'r' and
134 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
134 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
135 if c == 'r':
135 if c == 'r':
136 pos += 1
136 pos += 1
137 c = program[pos]
137 c = program[pos]
138 decode = lambda x: x
138 decode = lambda x: x
139 else:
139 else:
140 decode = parser.unescapestr
140 decode = parser.unescapestr
141 pos += 1
141 pos += 1
142 s = pos
142 s = pos
143 while pos < l: # find closing quote
143 while pos < l: # find closing quote
144 d = program[pos]
144 d = program[pos]
145 if d == '\\': # skip over escaped characters
145 if d == '\\': # skip over escaped characters
146 pos += 2
146 pos += 2
147 continue
147 continue
148 if d == c:
148 if d == c:
149 yield ('string', decode(program[s:pos]), s)
149 yield ('string', decode(program[s:pos]), s)
150 break
150 break
151 pos += 1
151 pos += 1
152 else:
152 else:
153 raise error.ParseError(_("unterminated string"), s)
153 raise error.ParseError(_("unterminated string"), s)
154 # gather up a symbol/keyword
154 # gather up a symbol/keyword
155 elif c in syminitletters:
155 elif c in syminitletters:
156 s = pos
156 s = pos
157 pos += 1
157 pos += 1
158 while pos < l: # find end of symbol
158 while pos < l: # find end of symbol
159 d = program[pos]
159 d = program[pos]
160 if d not in symletters:
160 if d not in symletters:
161 break
161 break
162 if d == '.' and program[pos - 1] == '.': # special case for ..
162 if d == '.' and program[pos - 1] == '.': # special case for ..
163 pos -= 1
163 pos -= 1
164 break
164 break
165 pos += 1
165 pos += 1
166 sym = program[s:pos]
166 sym = program[s:pos]
167 if sym in keywords: # operator keywords
167 if sym in keywords: # operator keywords
168 yield (sym, None, s)
168 yield (sym, None, s)
169 elif '-' in sym:
169 elif '-' in sym:
170 # some jerk gave us foo-bar-baz, try to check if it's a symbol
170 # some jerk gave us foo-bar-baz, try to check if it's a symbol
171 if lookup and lookup(sym):
171 if lookup and lookup(sym):
172 # looks like a real symbol
172 # looks like a real symbol
173 yield ('symbol', sym, s)
173 yield ('symbol', sym, s)
174 else:
174 else:
175 # looks like an expression
175 # looks like an expression
176 parts = sym.split('-')
176 parts = sym.split('-')
177 for p in parts[:-1]:
177 for p in parts[:-1]:
178 if p: # possible consecutive -
178 if p: # possible consecutive -
179 yield ('symbol', p, s)
179 yield ('symbol', p, s)
180 s += len(p)
180 s += len(p)
181 yield ('-', None, s)
181 yield ('-', None, s)
182 s += 1
182 s += 1
183 if parts[-1]: # possible trailing -
183 if parts[-1]: # possible trailing -
184 yield ('symbol', parts[-1], s)
184 yield ('symbol', parts[-1], s)
185 else:
185 else:
186 yield ('symbol', sym, s)
186 yield ('symbol', sym, s)
187 pos -= 1
187 pos -= 1
188 else:
188 else:
189 raise error.ParseError(_("syntax error in revset '%s'") %
189 raise error.ParseError(_("syntax error in revset '%s'") %
190 program, pos)
190 program, pos)
191 pos += 1
191 pos += 1
192 yield ('end', None, pos)
192 yield ('end', None, pos)
193
193
194 # helpers
194 # helpers
195
195
196 _notset = object()
196 _notset = object()
197
197
198 def getsymbol(x):
198 def getsymbol(x):
199 if x and x[0] == 'symbol':
199 if x and x[0] == 'symbol':
200 return x[1]
200 return x[1]
201 raise error.ParseError(_('not a symbol'))
201 raise error.ParseError(_('not a symbol'))
202
202
203 def getstring(x, err):
203 def getstring(x, err):
204 if x and (x[0] == 'string' or x[0] == 'symbol'):
204 if x and (x[0] == 'string' or x[0] == 'symbol'):
205 return x[1]
205 return x[1]
206 raise error.ParseError(err)
206 raise error.ParseError(err)
207
207
208 def getinteger(x, err, default=_notset):
208 def getinteger(x, err, default=_notset):
209 if not x and default is not _notset:
209 if not x and default is not _notset:
210 return default
210 return default
211 try:
211 try:
212 return int(getstring(x, err))
212 return int(getstring(x, err))
213 except ValueError:
213 except ValueError:
214 raise error.ParseError(err)
214 raise error.ParseError(err)
215
215
216 def getboolean(x, err):
216 def getboolean(x, err):
217 value = stringutil.parsebool(getsymbol(x))
217 value = stringutil.parsebool(getsymbol(x))
218 if value is not None:
218 if value is not None:
219 return value
219 return value
220 raise error.ParseError(err)
220 raise error.ParseError(err)
221
221
222 def getlist(x):
222 def getlist(x):
223 if not x:
223 if not x:
224 return []
224 return []
225 if x[0] == 'list':
225 if x[0] == 'list':
226 return list(x[1:])
226 return list(x[1:])
227 return [x]
227 return [x]
228
228
229 def getrange(x, err):
229 def getrange(x, err):
230 if not x:
230 if not x:
231 raise error.ParseError(err)
231 raise error.ParseError(err)
232 op = x[0]
232 op = x[0]
233 if op == 'range':
233 if op == 'range':
234 return x[1], x[2]
234 return x[1], x[2]
235 elif op == 'rangepre':
235 elif op == 'rangepre':
236 return None, x[1]
236 return None, x[1]
237 elif op == 'rangepost':
237 elif op == 'rangepost':
238 return x[1], None
238 return x[1], None
239 elif op == 'rangeall':
239 elif op == 'rangeall':
240 return None, None
240 return None, None
241 raise error.ParseError(err)
241 raise error.ParseError(err)
242
242
243 def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):
243 def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):
244 """Get [first, last] integer range (both inclusive) from a parsed tree
244 """Get [first, last] integer range (both inclusive) from a parsed tree
245
245
246 If any of the sides omitted, and if no default provided, ParseError will
246 If any of the sides omitted, and if no default provided, ParseError will
247 be raised.
247 be raised.
248 """
248 """
249 if x and (x[0] == 'string' or x[0] == 'symbol'):
249 if x and (x[0] == 'string' or x[0] == 'symbol'):
250 n = getinteger(x, err1)
250 n = getinteger(x, err1)
251 return n, n
251 return n, n
252 a, b = getrange(x, err1)
252 a, b = getrange(x, err1)
253 return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)
253 return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)
254
254
255 def getargs(x, min, max, err):
255 def getargs(x, min, max, err):
256 l = getlist(x)
256 l = getlist(x)
257 if len(l) < min or (max >= 0 and len(l) > max):
257 if len(l) < min or (max >= 0 and len(l) > max):
258 raise error.ParseError(err)
258 raise error.ParseError(err)
259 return l
259 return l
260
260
261 def getargsdict(x, funcname, keys):
261 def getargsdict(x, funcname, keys):
262 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
262 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
263 keyvaluenode='keyvalue', keynode='symbol')
263 keyvaluenode='keyvalue', keynode='symbol')
264
264
265 # cache of {spec: raw parsed tree} built internally
265 # cache of {spec: raw parsed tree} built internally
266 _treecache = {}
266 _treecache = {}
267
267
268 def _cachedtree(spec):
268 def _cachedtree(spec):
269 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
269 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
270 tree = _treecache.get(spec)
270 tree = _treecache.get(spec)
271 if tree is None:
271 if tree is None:
272 _treecache[spec] = tree = parse(spec)
272 _treecache[spec] = tree = parse(spec)
273 return tree
273 return tree
274
274
275 def _build(tmplspec, *repls):
275 def _build(tmplspec, *repls):
276 """Create raw parsed tree from a template revset statement
276 """Create raw parsed tree from a template revset statement
277
277
278 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
278 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
279 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
279 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
280 """
280 """
281 template = _cachedtree(tmplspec)
281 template = _cachedtree(tmplspec)
282 return parser.buildtree(template, ('symbol', '_'), *repls)
282 return parser.buildtree(template, ('symbol', '_'), *repls)
283
283
284 def _match(patspec, tree):
284 def _match(patspec, tree):
285 """Test if a tree matches the given pattern statement; return the matches
285 """Test if a tree matches the given pattern statement; return the matches
286
286
287 >>> _match(b'f(_)', parse(b'f()'))
287 >>> _match(b'f(_)', parse(b'f()'))
288 >>> _match(b'f(_)', parse(b'f(1)'))
288 >>> _match(b'f(_)', parse(b'f(1)'))
289 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
289 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
290 >>> _match(b'f(_)', parse(b'f(1, 2)'))
290 >>> _match(b'f(_)', parse(b'f(1, 2)'))
291 """
291 """
292 pattern = _cachedtree(patspec)
292 pattern = _cachedtree(patspec)
293 return parser.matchtree(pattern, tree, ('symbol', '_'),
293 return parser.matchtree(pattern, tree, ('symbol', '_'),
294 {'keyvalue', 'list'})
294 {'keyvalue', 'list'})
295
295
296 def _matchonly(revs, bases):
296 def _matchonly(revs, bases):
297 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
297 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
298
298
299 def _fixops(x):
299 def _fixops(x):
300 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
300 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
301 handled well by our simple top-down parser"""
301 handled well by our simple top-down parser"""
302 if not isinstance(x, tuple):
302 if not isinstance(x, tuple):
303 return x
303 return x
304
304
305 op = x[0]
305 op = x[0]
306 if op == 'parent':
306 if op == 'parent':
307 # x^:y means (x^) : y, not x ^ (:y)
307 # x^:y means (x^) : y, not x ^ (:y)
308 # x^: means (x^) :, not x ^ (:)
308 # x^: means (x^) :, not x ^ (:)
309 post = ('parentpost', x[1])
309 post = ('parentpost', x[1])
310 if x[2][0] == 'dagrangepre':
310 if x[2][0] == 'dagrangepre':
311 return _fixops(('dagrange', post, x[2][1]))
311 return _fixops(('dagrange', post, x[2][1]))
312 elif x[2][0] == 'dagrangeall':
312 elif x[2][0] == 'dagrangeall':
313 return _fixops(('dagrangepost', post))
313 return _fixops(('dagrangepost', post))
314 elif x[2][0] == 'rangepre':
314 elif x[2][0] == 'rangepre':
315 return _fixops(('range', post, x[2][1]))
315 return _fixops(('range', post, x[2][1]))
316 elif x[2][0] == 'rangeall':
316 elif x[2][0] == 'rangeall':
317 return _fixops(('rangepost', post))
317 return _fixops(('rangepost', post))
318 elif op == 'or':
318 elif op == 'or':
319 # make number of arguments deterministic:
319 # make number of arguments deterministic:
320 # x + y + z -> (or x y z) -> (or (list x y z))
320 # x + y + z -> (or x y z) -> (or (list x y z))
321 return (op, _fixops(('list',) + x[1:]))
321 return (op, _fixops(('list',) + x[1:]))
322 elif op == 'subscript' and x[1][0] == 'relation':
322 elif op == 'subscript' and x[1][0] == 'relation':
323 # x#y[z] ternary
323 # x#y[z] ternary
324 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
324 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
325
325
326 return (op,) + tuple(_fixops(y) for y in x[1:])
326 return (op,) + tuple(_fixops(y) for y in x[1:])
327
327
328 def _analyze(x):
328 def _analyze(x):
329 if x is None:
329 if x is None:
330 return x
330 return x
331
331
332 op = x[0]
332 op = x[0]
333 if op == 'minus':
333 if op == 'minus':
334 return _analyze(_build('_ and not _', *x[1:]))
334 return _analyze(_build('_ and not _', *x[1:]))
335 elif op == 'only':
335 elif op == 'only':
336 return _analyze(_build('only(_, _)', *x[1:]))
336 return _analyze(_build('only(_, _)', *x[1:]))
337 elif op == 'onlypost':
337 elif op == 'onlypost':
338 return _analyze(_build('only(_)', x[1]))
338 return _analyze(_build('only(_)', x[1]))
339 elif op == 'dagrangeall':
339 elif op == 'dagrangeall':
340 raise error.ParseError(_("can't use '::' in this context"))
340 raise error.ParseError(_("can't use '::' in this context"))
341 elif op == 'dagrangepre':
341 elif op == 'dagrangepre':
342 return _analyze(_build('ancestors(_)', x[1]))
342 return _analyze(_build('ancestors(_)', x[1]))
343 elif op == 'dagrangepost':
343 elif op == 'dagrangepost':
344 return _analyze(_build('descendants(_)', x[1]))
344 return _analyze(_build('descendants(_)', x[1]))
345 elif op == 'negate':
345 elif op == 'negate':
346 s = getstring(x[1], _("can't negate that"))
346 s = getstring(x[1], _("can't negate that"))
347 return _analyze(('string', '-' + s))
347 return _analyze(('string', '-' + s))
348 elif op in ('string', 'symbol', 'smartset'):
348 elif op in ('string', 'symbol', 'smartset'):
349 return x
349 return x
350 elif op == 'rangeall':
350 elif op == 'rangeall':
351 return (op, None)
351 return (op, None)
352 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
352 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
353 return (op, _analyze(x[1]))
353 return (op, _analyze(x[1]))
354 elif op == 'group':
354 elif op == 'group':
355 return _analyze(x[1])
355 return _analyze(x[1])
356 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
356 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
357 'subscript'}:
357 'subscript'}:
358 ta = _analyze(x[1])
358 ta = _analyze(x[1])
359 tb = _analyze(x[2])
359 tb = _analyze(x[2])
360 return (op, ta, tb)
360 return (op, ta, tb)
361 elif op == 'relsubscript':
361 elif op == 'relsubscript':
362 ta = _analyze(x[1])
362 ta = _analyze(x[1])
363 tb = _analyze(x[2])
363 tb = _analyze(x[2])
364 tc = _analyze(x[3])
364 tc = _analyze(x[3])
365 return (op, ta, tb, tc)
365 return (op, ta, tb, tc)
366 elif op == 'list':
366 elif op == 'list':
367 return (op,) + tuple(_analyze(y) for y in x[1:])
367 return (op,) + tuple(_analyze(y) for y in x[1:])
368 elif op == 'keyvalue':
368 elif op == 'keyvalue':
369 return (op, x[1], _analyze(x[2]))
369 return (op, x[1], _analyze(x[2]))
370 elif op == 'func':
370 elif op == 'func':
371 return (op, x[1], _analyze(x[2]))
371 return (op, x[1], _analyze(x[2]))
372 raise ValueError('invalid operator %r' % op)
372 raise ValueError('invalid operator %r' % op)
373
373
374 def analyze(x):
374 def analyze(x):
375 """Transform raw parsed tree to evaluatable tree which can be fed to
375 """Transform raw parsed tree to evaluatable tree which can be fed to
376 optimize() or getset()
376 optimize() or getset()
377
377
378 All pseudo operations should be mapped to real operations or functions
378 All pseudo operations should be mapped to real operations or functions
379 defined in methods or symbols table respectively.
379 defined in methods or symbols table respectively.
380 """
380 """
381 return _analyze(x)
381 return _analyze(x)
382
382
383 def _optimize(x):
383 def _optimize(x):
384 if x is None:
384 if x is None:
385 return 0, x
385 return 0, x
386
386
387 op = x[0]
387 op = x[0]
388 if op in ('string', 'symbol', 'smartset'):
388 if op in ('string', 'symbol', 'smartset'):
389 return 0.5, x # single revisions are small
389 return 0.5, x # single revisions are small
390 elif op == 'and':
390 elif op == 'and':
391 wa, ta = _optimize(x[1])
391 wa, ta = _optimize(x[1])
392 wb, tb = _optimize(x[2])
392 wb, tb = _optimize(x[2])
393 w = min(wa, wb)
393 w = min(wa, wb)
394
394
395 # (draft/secret/_notpublic() & ::x) have a fast path
395 # (draft/secret/_notpublic() & ::x) have a fast path
396 m = _match('_() & ancestors(_)', ('and', ta, tb))
396 m = _match('_() & ancestors(_)', ('and', ta, tb))
397 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
397 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
398 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
398 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
399
399
400 # (::x and not ::y)/(not ::y and ::x) have a fast path
400 # (::x and not ::y)/(not ::y and ::x) have a fast path
401 m = _matchonly(ta, tb) or _matchonly(tb, ta)
401 m = _matchonly(ta, tb) or _matchonly(tb, ta)
402 if m:
402 if m:
403 return w, _build('only(_, _)', *m[1:])
403 return w, _build('only(_, _)', *m[1:])
404
404
405 m = _match('not _', tb)
405 m = _match('not _', tb)
406 if m:
406 if m:
407 return wa, ('difference', ta, m[1])
407 return wa, ('difference', ta, m[1])
408 if wa > wb:
408 if wa > wb:
409 op = 'andsmally'
409 op = 'andsmally'
410 return w, (op, ta, tb)
410 return w, (op, ta, tb)
411 elif op == 'or':
411 elif op == 'or':
412 # fast path for machine-generated expression, that is likely to have
412 # fast path for machine-generated expression, that is likely to have
413 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
413 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
414 ws, ts, ss = [], [], []
414 ws, ts, ss = [], [], []
415 def flushss():
415 def flushss():
416 if not ss:
416 if not ss:
417 return
417 return
418 if len(ss) == 1:
418 if len(ss) == 1:
419 w, t = ss[0]
419 w, t = ss[0]
420 else:
420 else:
421 s = '\0'.join(t[1] for w, t in ss)
421 s = '\0'.join(t[1] for w, t in ss)
422 y = _build('_list(_)', ('string', s))
422 y = _build('_list(_)', ('string', s))
423 w, t = _optimize(y)
423 w, t = _optimize(y)
424 ws.append(w)
424 ws.append(w)
425 ts.append(t)
425 ts.append(t)
426 del ss[:]
426 del ss[:]
427 for y in getlist(x[1]):
427 for y in getlist(x[1]):
428 w, t = _optimize(y)
428 w, t = _optimize(y)
429 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
429 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
430 ss.append((w, t))
430 ss.append((w, t))
431 continue
431 continue
432 flushss()
432 flushss()
433 ws.append(w)
433 ws.append(w)
434 ts.append(t)
434 ts.append(t)
435 flushss()
435 flushss()
436 if len(ts) == 1:
436 if len(ts) == 1:
437 return ws[0], ts[0] # 'or' operation is fully optimized out
437 return ws[0], ts[0] # 'or' operation is fully optimized out
438 return max(ws), (op, ('list',) + tuple(ts))
438 return max(ws), (op, ('list',) + tuple(ts))
439 elif op == 'not':
439 elif op == 'not':
440 # Optimize not public() to _notpublic() because we have a fast version
440 # Optimize not public() to _notpublic() because we have a fast version
441 if _match('public()', x[1]):
441 if _match('public()', x[1]):
442 o = _optimize(_build('_notpublic()'))
442 o = _optimize(_build('_notpublic()'))
443 return o[0], o[1]
443 return o[0], o[1]
444 else:
444 else:
445 o = _optimize(x[1])
445 o = _optimize(x[1])
446 return o[0], (op, o[1])
446 return o[0], (op, o[1])
447 elif op == 'rangeall':
447 elif op == 'rangeall':
448 return 1, x
448 return 1, x
449 elif op in ('rangepre', 'rangepost', 'parentpost'):
449 elif op in ('rangepre', 'rangepost', 'parentpost'):
450 o = _optimize(x[1])
450 o = _optimize(x[1])
451 return o[0], (op, o[1])
451 return o[0], (op, o[1])
452 elif op in ('dagrange', 'range'):
452 elif op in ('dagrange', 'range'):
453 wa, ta = _optimize(x[1])
453 wa, ta = _optimize(x[1])
454 wb, tb = _optimize(x[2])
454 wb, tb = _optimize(x[2])
455 return wa + wb, (op, ta, tb)
455 return wa + wb, (op, ta, tb)
456 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
456 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
457 w, t = _optimize(x[1])
457 w, t = _optimize(x[1])
458 return w, (op, t, x[2])
458 return w, (op, t, x[2])
459 elif op == 'relsubscript':
459 elif op == 'relsubscript':
460 w, t = _optimize(x[1])
460 w, t = _optimize(x[1])
461 return w, (op, t, x[2], x[3])
461 return w, (op, t, x[2], x[3])
462 elif op == 'list':
462 elif op == 'list':
463 ws, ts = zip(*(_optimize(y) for y in x[1:]))
463 ws, ts = zip(*(_optimize(y) for y in x[1:]))
464 return sum(ws), (op,) + ts
464 return sum(ws), (op,) + ts
465 elif op == 'keyvalue':
465 elif op == 'keyvalue':
466 w, t = _optimize(x[2])
466 w, t = _optimize(x[2])
467 return w, (op, x[1], t)
467 return w, (op, x[1], t)
468 elif op == 'func':
468 elif op == 'func':
469 f = getsymbol(x[1])
469 f = getsymbol(x[1])
470 wa, ta = _optimize(x[2])
470 wa, ta = _optimize(x[2])
471 w = getattr(symbols.get(f), '_weight', 1)
471 w = getattr(symbols.get(f), '_weight', 1)
472 m = _match('commonancestors(_)', ta)
472 m = _match('commonancestors(_)', ta)
473
473
474 # Optimize heads(commonancestors(_)) because we have a fast version
474 # Optimize heads(commonancestors(_)) because we have a fast version
475 if f == 'heads' and m:
475 if f == 'heads' and m:
476 return w + wa, _build('_commonancestorheads(_)', m[1])
476 return w + wa, _build('_commonancestorheads(_)', m[1])
477
477
478 return w + wa, (op, x[1], ta)
478 return w + wa, (op, x[1], ta)
479 raise ValueError('invalid operator %r' % op)
479 raise ValueError('invalid operator %r' % op)
480
480
481 def optimize(tree):
481 def optimize(tree):
482 """Optimize evaluatable tree
482 """Optimize evaluatable tree
483
483
484 All pseudo operations should be transformed beforehand.
484 All pseudo operations should be transformed beforehand.
485 """
485 """
486 _weight, newtree = _optimize(tree)
486 _weight, newtree = _optimize(tree)
487 return newtree
487 return newtree
488
488
489 # the set of valid characters for the initial letter of symbols in
489 # the set of valid characters for the initial letter of symbols in
490 # alias declarations and definitions
490 # alias declarations and definitions
491 _aliassyminitletters = _syminitletters | {'$'}
491 _aliassyminitletters = _syminitletters | {'$'}
492
492
493 def _parsewith(spec, lookup=None, syminitletters=None):
493 def _parsewith(spec, lookup=None, syminitletters=None):
494 """Generate a parse tree of given spec with given tokenizing options
494 """Generate a parse tree of given spec with given tokenizing options
495
495
496 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
496 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
497 ('func', ('symbol', 'foo'), ('symbol', '$1'))
497 ('func', ('symbol', 'foo'), ('symbol', '$1'))
498 >>> _parsewith(b'$1')
498 >>> _parsewith(b'$1')
499 Traceback (most recent call last):
499 Traceback (most recent call last):
500 ...
500 ...
501 ParseError: ("syntax error in revset '$1'", 0)
501 ParseError: ("syntax error in revset '$1'", 0)
502 >>> _parsewith(b'foo bar')
502 >>> _parsewith(b'foo bar')
503 Traceback (most recent call last):
503 Traceback (most recent call last):
504 ...
504 ...
505 ParseError: ('invalid token', 4)
505 ParseError: ('invalid token', 4)
506 """
506 """
507 if lookup and spec.startswith('revset(') and spec.endswith(')'):
507 if lookup and spec.startswith('revset(') and spec.endswith(')'):
508 lookup = None
508 lookup = None
509 p = parser.parser(elements)
509 p = parser.parser(elements)
510 tree, pos = p.parse(tokenize(spec, lookup=lookup,
510 tree, pos = p.parse(tokenize(spec, lookup=lookup,
511 syminitletters=syminitletters))
511 syminitletters=syminitletters))
512 if pos != len(spec):
512 if pos != len(spec):
513 raise error.ParseError(_('invalid token'), pos)
513 raise error.ParseError(_('invalid token'), pos)
514 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
514 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
515
515
516 class _aliasrules(parser.basealiasrules):
516 class _aliasrules(parser.basealiasrules):
517 """Parsing and expansion rule set of revset aliases"""
517 """Parsing and expansion rule set of revset aliases"""
518 _section = _('revset alias')
518 _section = _('revset alias')
519
519
520 @staticmethod
520 @staticmethod
521 def _parse(spec):
521 def _parse(spec):
522 """Parse alias declaration/definition ``spec``
522 """Parse alias declaration/definition ``spec``
523
523
524 This allows symbol names to use also ``$`` as an initial letter
524 This allows symbol names to use also ``$`` as an initial letter
525 (for backward compatibility), and callers of this function should
525 (for backward compatibility), and callers of this function should
526 examine whether ``$`` is used also for unexpected symbols or not.
526 examine whether ``$`` is used also for unexpected symbols or not.
527 """
527 """
528 return _parsewith(spec, syminitletters=_aliassyminitletters)
528 return _parsewith(spec, syminitletters=_aliassyminitletters)
529
529
530 @staticmethod
530 @staticmethod
531 def _trygetfunc(tree):
531 def _trygetfunc(tree):
532 if tree[0] == 'func' and tree[1][0] == 'symbol':
532 if tree[0] == 'func' and tree[1][0] == 'symbol':
533 return tree[1][1], getlist(tree[2])
533 return tree[1][1], getlist(tree[2])
534
534
535 def expandaliases(tree, aliases, warn=None):
535 def expandaliases(tree, aliases, warn=None):
536 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
536 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
537 aliases = _aliasrules.buildmap(aliases)
537 aliases = _aliasrules.buildmap(aliases)
538 tree = _aliasrules.expand(aliases, tree)
538 tree = _aliasrules.expand(aliases, tree)
539 # warn about problematic (but not referred) aliases
539 # warn about problematic (but not referred) aliases
540 if warn is not None:
540 if warn is not None:
541 for name, alias in sorted(aliases.iteritems()):
541 for name, alias in sorted(aliases.iteritems()):
542 if alias.error and not alias.warned:
542 if alias.error and not alias.warned:
543 warn(_('warning: %s\n') % (alias.error))
543 warn(_('warning: %s\n') % (alias.error))
544 alias.warned = True
544 alias.warned = True
545 return tree
545 return tree
546
546
547 def foldconcat(tree):
547 def foldconcat(tree):
548 """Fold elements to be concatenated by `##`
548 """Fold elements to be concatenated by `##`
549 """
549 """
550 if (not isinstance(tree, tuple)
550 if (not isinstance(tree, tuple)
551 or tree[0] in ('string', 'symbol', 'smartset')):
551 or tree[0] in ('string', 'symbol', 'smartset')):
552 return tree
552 return tree
553 if tree[0] == '_concat':
553 if tree[0] == '_concat':
554 pending = [tree]
554 pending = [tree]
555 l = []
555 l = []
556 while pending:
556 while pending:
557 e = pending.pop()
557 e = pending.pop()
558 if e[0] == '_concat':
558 if e[0] == '_concat':
559 pending.extend(reversed(e[1:]))
559 pending.extend(reversed(e[1:]))
560 elif e[0] in ('string', 'symbol'):
560 elif e[0] in ('string', 'symbol'):
561 l.append(e[1])
561 l.append(e[1])
562 else:
562 else:
563 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
563 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
564 raise error.ParseError(msg)
564 raise error.ParseError(msg)
565 return ('string', ''.join(l))
565 return ('string', ''.join(l))
566 else:
566 else:
567 return tuple(foldconcat(t) for t in tree)
567 return tuple(foldconcat(t) for t in tree)
568
568
569 def parse(spec, lookup=None):
569 def parse(spec, lookup=None):
570 try:
570 try:
571 return _parsewith(spec, lookup=lookup)
571 return _parsewith(spec, lookup=lookup)
572 except error.ParseError as inst:
572 except error.ParseError as inst:
573 if len(inst.args) > 1: # has location
573 if len(inst.args) > 1: # has location
574 loc = inst.args[1]
574 loc = inst.args[1]
575 # Remove newlines -- spaces are equivalent whitespace.
575 # Remove newlines -- spaces are equivalent whitespace.
576 spec = spec.replace('\n', ' ')
576 spec = spec.replace('\n', ' ')
577 # We want the caret to point to the place in the template that
577 # We want the caret to point to the place in the template that
578 # failed to parse, but in a hint we get a open paren at the
578 # failed to parse, but in a hint we get a open paren at the
579 # start. Therefore, we print "loc + 1" spaces (instead of "loc")
579 # start. Therefore, we print "loc + 1" spaces (instead of "loc")
580 # to line up the caret with the location of the error.
580 # to line up the caret with the location of the error.
581 inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
581 inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
582 raise
582 raise
583
583
584 def _quote(s):
584 def _quote(s):
585 r"""Quote a value in order to make it safe for the revset engine.
585 r"""Quote a value in order to make it safe for the revset engine.
586
586
587 >>> _quote(b'asdf')
587 >>> _quote(b'asdf')
588 "'asdf'"
588 "'asdf'"
589 >>> _quote(b"asdf'\"")
589 >>> _quote(b"asdf'\"")
590 '\'asdf\\\'"\''
590 '\'asdf\\\'"\''
591 >>> _quote(b'asdf\'')
591 >>> _quote(b'asdf\'')
592 "'asdf\\''"
592 "'asdf\\''"
593 >>> _quote(1)
593 >>> _quote(1)
594 "'1'"
594 "'1'"
595 """
595 """
596 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
596 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
597
597
598 def _formatargtype(c, arg):
598 def _formatargtype(c, arg):
599 if c == 'd':
599 if c == 'd':
600 return '_rev(%d)' % int(arg)
600 return '_rev(%d)' % int(arg)
601 elif c == 's':
601 elif c == 's':
602 return _quote(arg)
602 return _quote(arg)
603 elif c == 'r':
603 elif c == 'r':
604 if not isinstance(arg, bytes):
604 if not isinstance(arg, bytes):
605 raise TypeError
605 raise TypeError
606 parse(arg) # make sure syntax errors are confined
606 parse(arg) # make sure syntax errors are confined
607 return '(%s)' % arg
607 return '(%s)' % arg
608 elif c == 'n':
608 elif c == 'n':
609 return _quote(node.hex(arg))
609 return _quote(node.hex(arg))
610 elif c == 'b':
610 elif c == 'b':
611 try:
611 try:
612 return _quote(arg.branch())
612 return _quote(arg.branch())
613 except AttributeError:
613 except AttributeError:
614 raise TypeError
614 raise TypeError
615 raise error.ParseError(_('unexpected revspec format character %s') % c)
615 raise error.ParseError(_('unexpected revspec format character %s') % c)
616
616
617 def _formatlistexp(s, t):
617 def _formatlistexp(s, t):
618 l = len(s)
618 l = len(s)
619 if l == 0:
619 if l == 0:
620 return "_list('')"
620 return "_list('')"
621 elif l == 1:
621 elif l == 1:
622 return _formatargtype(t, s[0])
622 return _formatargtype(t, s[0])
623 elif t == 'd':
623 elif t == 'd':
624 return _formatintlist(s)
624 return _formatintlist(s)
625 elif t == 's':
625 elif t == 's':
626 return "_list(%s)" % _quote("\0".join(s))
626 return "_list(%s)" % _quote("\0".join(s))
627 elif t == 'n':
627 elif t == 'n':
628 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
628 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
629 elif t == 'b':
629 elif t == 'b':
630 try:
630 try:
631 return "_list('%s')" % "\0".join(a.branch() for a in s)
631 return "_list('%s')" % "\0".join(a.branch() for a in s)
632 except AttributeError:
632 except AttributeError:
633 raise TypeError
633 raise TypeError
634
634
635 m = l // 2
635 m = l // 2
636 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
636 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
637
637
638 def _formatintlist(data):
638 def _formatintlist(data):
639 try:
639 try:
640 l = len(data)
640 l = len(data)
641 if l == 0:
641 if l == 0:
642 return "_list('')"
642 return "_list('')"
643 elif l == 1:
643 elif l == 1:
644 return _formatargtype('d', data[0])
644 return _formatargtype('d', data[0])
645 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
645 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
646 except (TypeError, ValueError):
646 except (TypeError, ValueError):
647 raise error.ParseError(_('invalid argument for revspec'))
647 raise error.ParseError(_('invalid argument for revspec'))
648
648
649 def _formatparamexp(args, t):
649 def _formatparamexp(args, t):
650 return ', '.join(_formatargtype(t, a) for a in args)
650 return ', '.join(_formatargtype(t, a) for a in args)
651
651
652 _formatlistfuncs = {
652 _formatlistfuncs = {
653 'l': _formatlistexp,
653 'l': _formatlistexp,
654 'p': _formatparamexp,
654 'p': _formatparamexp,
655 }
655 }
656
656
657 def formatspec(expr, *args):
657 def formatspec(expr, *args):
658 '''
658 '''
659 This is a convenience function for using revsets internally, and
659 This is a convenience function for using revsets internally, and
660 escapes arguments appropriately. Aliases are intentionally ignored
660 escapes arguments appropriately. Aliases are intentionally ignored
661 so that intended expression behavior isn't accidentally subverted.
661 so that intended expression behavior isn't accidentally subverted.
662
662
663 Supported arguments:
663 Supported arguments:
664
664
665 %r = revset expression, parenthesized
665 %r = revset expression, parenthesized
666 %d = rev(int(arg)), no quoting
666 %d = rev(int(arg)), no quoting
667 %s = string(arg), escaped and single-quoted
667 %s = string(arg), escaped and single-quoted
668 %b = arg.branch(), escaped and single-quoted
668 %b = arg.branch(), escaped and single-quoted
669 %n = hex(arg), single-quoted
669 %n = hex(arg), single-quoted
670 %% = a literal '%'
670 %% = a literal '%'
671
671
672 Prefixing the type with 'l' specifies a parenthesized list of that type,
672 Prefixing the type with 'l' specifies a parenthesized list of that type,
673 and 'p' specifies a list of function parameters of that type.
673 and 'p' specifies a list of function parameters of that type.
674
674
675 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
675 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
676 '(10 or 11):: and ((this()) or (that()))'
676 '(10 or 11):: and ((this()) or (that()))'
677 >>> formatspec(b'%d:: and not %d::', 10, 20)
677 >>> formatspec(b'%d:: and not %d::', 10, 20)
678 '_rev(10):: and not _rev(20)::'
678 '_rev(10):: and not _rev(20)::'
679 >>> formatspec(b'%ld or %ld', [], [1])
679 >>> formatspec(b'%ld or %ld', [], [1])
680 "_list('') or _rev(1)"
680 "_list('') or _rev(1)"
681 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
681 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
682 "keyword('foo\\\\xe9')"
682 "keyword('foo\\\\xe9')"
683 >>> b = lambda: b'default'
683 >>> b = lambda: b'default'
684 >>> b.branch = b
684 >>> b.branch = b
685 >>> formatspec(b'branch(%b)', b)
685 >>> formatspec(b'branch(%b)', b)
686 "branch('default')"
686 "branch('default')"
687 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
687 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
688 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
688 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
689 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
689 >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
690 "sort((:), 'desc', 'user')"
690 "sort((:), 'desc', 'user')"
691 >>> formatspec(b'%ls', [b'a', b"'"])
691 >>> formatspec(b'%ls', [b'a', b"'"])
692 "_list('a\\\\x00\\\\'')"
692 "_list('a\\\\x00\\\\'')"
693 '''
693 '''
694 parsed = _parseargs(expr, args)
694 parsed = _parseargs(expr, args)
695 ret = []
695 ret = []
696 for t, arg in parsed:
696 for t, arg in parsed:
697 if t is None:
697 if t is None:
698 ret.append(arg)
698 ret.append(arg)
699 elif t == 'baseset':
699 elif t == 'baseset':
700 if isinstance(arg, set):
700 if isinstance(arg, set):
701 arg = sorted(arg)
701 arg = sorted(arg)
702 ret.append(_formatintlist(list(arg)))
702 ret.append(_formatintlist(list(arg)))
703 else:
703 else:
704 raise error.ProgrammingError("unknown revspec item type: %r" % t)
704 raise error.ProgrammingError("unknown revspec item type: %r" % t)
705 return b''.join(ret)
705 return b''.join(ret)
706
706
707 def spectree(expr, *args):
707 def spectree(expr, *args):
708 """similar to formatspec but return a parsed and optimized tree"""
708 """similar to formatspec but return a parsed and optimized tree"""
709 parsed = _parseargs(expr, args)
709 parsed = _parseargs(expr, args)
710 ret = []
710 ret = []
711 inputs = []
711 inputs = []
712 for t, arg in parsed:
712 for t, arg in parsed:
713 if t is None:
713 if t is None:
714 ret.append(arg)
714 ret.append(arg)
715 elif t == 'baseset':
715 elif t == 'baseset':
716 newtree = ('smartset', smartset.baseset(arg))
716 newtree = ('smartset', smartset.baseset(arg))
717 inputs.append(newtree)
717 inputs.append(newtree)
718 ret.append("$")
718 ret.append("$")
719 else:
719 else:
720 raise error.ProgrammingError("unknown revspec item type: %r" % t)
720 raise error.ProgrammingError("unknown revspec item type: %r" % t)
721 expr = b''.join(ret)
721 expr = b''.join(ret)
722 tree = _parsewith(expr, syminitletters=_aliassyminitletters)
722 tree = _parsewith(expr, syminitletters=_aliassyminitletters)
723 tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
723 tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
724 tree = foldconcat(tree)
724 tree = foldconcat(tree)
725 tree = analyze(tree)
725 tree = analyze(tree)
726 tree = optimize(tree)
726 tree = optimize(tree)
727 return tree
727 return tree
728
728
729 def _parseargs(expr, args):
729 def _parseargs(expr, args):
730 """parse the expression and replace all inexpensive args
730 """parse the expression and replace all inexpensive args
731
731
732 return a list of tuple [(arg-type, arg-value)]
732 return a list of tuple [(arg-type, arg-value)]
733
733
734 Arg-type can be:
734 Arg-type can be:
735 * None: a string ready to be concatenated into a final spec
735 * None: a string ready to be concatenated into a final spec
736 * 'baseset': an iterable of revisions
736 * 'baseset': an iterable of revisions
737 """
737 """
738 expr = pycompat.bytestr(expr)
738 expr = pycompat.bytestr(expr)
739 argiter = iter(args)
739 argiter = iter(args)
740 ret = []
740 ret = []
741 pos = 0
741 pos = 0
742 while pos < len(expr):
742 while pos < len(expr):
743 q = expr.find('%', pos)
743 q = expr.find('%', pos)
744 if q < 0:
744 if q < 0:
745 ret.append((None, expr[pos:]))
745 ret.append((None, expr[pos:]))
746 break
746 break
747 ret.append((None, expr[pos:q]))
747 ret.append((None, expr[pos:q]))
748 pos = q + 1
748 pos = q + 1
749 try:
749 try:
750 d = expr[pos]
750 d = expr[pos]
751 except IndexError:
751 except IndexError:
752 raise error.ParseError(_('incomplete revspec format character'))
752 raise error.ParseError(_('incomplete revspec format character'))
753 if d == '%':
753 if d == '%':
754 ret.append((None, d))
754 ret.append((None, d))
755 pos += 1
755 pos += 1
756 continue
756 continue
757
757
758 try:
758 try:
759 arg = next(argiter)
759 arg = next(argiter)
760 except StopIteration:
760 except StopIteration:
761 raise error.ParseError(_('missing argument for revspec'))
761 raise error.ParseError(_('missing argument for revspec'))
762 f = _formatlistfuncs.get(d)
762 f = _formatlistfuncs.get(d)
763 if f:
763 if f:
764 # a list of some type, might be expensive, do not replace
764 # a list of some type, might be expensive, do not replace
765 pos += 1
765 pos += 1
766 islist = (d == 'l')
766 islist = (d == 'l')
767 try:
767 try:
768 d = expr[pos]
768 d = expr[pos]
769 except IndexError:
769 except IndexError:
770 raise error.ParseError(_('incomplete revspec format character'))
770 raise error.ParseError(_('incomplete revspec format character'))
771 if islist and d == 'd' and arg:
771 if islist and d == 'd' and arg:
772 # we don't create a baseset yet, because it come with an
772 # we don't create a baseset yet, because it come with an
773 # extra cost. If we are going to serialize it we better
773 # extra cost. If we are going to serialize it we better
774 # skip it.
774 # skip it.
775 ret.append(('baseset', arg))
775 ret.append(('baseset', arg))
776 pos += 1
776 pos += 1
777 continue
777 continue
778 try:
778 try:
779 ret.append((None, f(list(arg), d)))
779 ret.append((None, f(list(arg), d)))
780 except (TypeError, ValueError):
780 except (TypeError, ValueError):
781 raise error.ParseError(_('invalid argument for revspec'))
781 raise error.ParseError(_('invalid argument for revspec'))
782 else:
782 else:
783 # a single entry, not expensive, replace
783 # a single entry, not expensive, replace
784 try:
784 try:
785 ret.append((None, _formatargtype(d, arg)))
785 ret.append((None, _formatargtype(d, arg)))
786 except (TypeError, ValueError):
786 except (TypeError, ValueError):
787 raise error.ParseError(_('invalid argument for revspec'))
787 raise error.ParseError(_('invalid argument for revspec'))
788 pos += 1
788 pos += 1
789
789
790 try:
790 try:
791 next(argiter)
791 next(argiter)
792 raise error.ParseError(_('too many revspec arguments specified'))
792 raise error.ParseError(_('too many revspec arguments specified'))
793 except StopIteration:
793 except StopIteration:
794 pass
794 pass
795 return ret
795 return ret
796
796
797 def prettyformat(tree):
797 def prettyformat(tree):
798 return parser.prettyformat(tree, ('string', 'symbol'))
798 return parser.prettyformat(tree, ('string', 'symbol'))
799
799
800 def depth(tree):
800 def depth(tree):
801 if isinstance(tree, tuple):
801 if isinstance(tree, tuple):
802 return max(map(depth, tree)) + 1
802 return max(map(depth, tree)) + 1
803 else:
803 else:
804 return 0
804 return 0
805
805
806 def funcsused(tree):
806 def funcsused(tree):
807 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
807 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
808 return set()
808 return set()
809 else:
809 else:
810 funcs = set()
810 funcs = set()
811 for s in tree[1:]:
811 for s in tree[1:]:
812 funcs |= funcsused(s)
812 funcs |= funcsused(s)
813 if tree[0] == 'func':
813 if tree[0] == 'func':
814 funcs.add(tree[1][1])
814 funcs.add(tree[1][1])
815 return funcs
815 return funcs
816
816
817 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
817 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
818
818
819 def _ishashlikesymbol(symbol):
819 def _ishashlikesymbol(symbol):
820 """returns true if the symbol looks like a hash"""
820 """returns true if the symbol looks like a hash"""
821 return _hashre.match(symbol)
821 return _hashre.match(symbol)
822
822
823 def gethashlikesymbols(tree):
823 def gethashlikesymbols(tree):
824 """returns the list of symbols of the tree that look like hashes
824 """returns the list of symbols of the tree that look like hashes
825
825
826 >>> gethashlikesymbols(parse(b'3::abe3ff'))
826 >>> gethashlikesymbols(parse(b'3::abe3ff'))
827 ['3', 'abe3ff']
827 ['3', 'abe3ff']
828 >>> gethashlikesymbols(parse(b'precursors(.)'))
828 >>> gethashlikesymbols(parse(b'precursors(.)'))
829 []
829 []
830 >>> gethashlikesymbols(parse(b'precursors(34)'))
830 >>> gethashlikesymbols(parse(b'precursors(34)'))
831 ['34']
831 ['34']
832 >>> gethashlikesymbols(parse(b'abe3ffZ'))
832 >>> gethashlikesymbols(parse(b'abe3ffZ'))
833 []
833 []
834 """
834 """
835 if not tree:
835 if not tree:
836 return []
836 return []
837
837
838 if tree[0] == "symbol":
838 if tree[0] == "symbol":
839 if _ishashlikesymbol(tree[1]):
839 if _ishashlikesymbol(tree[1]):
840 return [tree[1]]
840 return [tree[1]]
841 elif len(tree) >= 3:
841 elif len(tree) >= 3:
842 results = []
842 results = []
843 for subtree in tree[1:]:
843 for subtree in tree[1:]:
844 results += gethashlikesymbols(subtree)
844 results += gethashlikesymbols(subtree)
845 return results
845 return results
846 return []
846 return []
General Comments 0
You need to be logged in to leave comments. Login now