##// END OF EJS Templates
py3: fix mixed bytes/unicode in revsetlang._aliassyminitletters
Yuya Nishihara -
r34071:e18119b1 default
parent child Browse files
Show More
@@ -1,679 +1,679 b''
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 util,
18 util,
19 )
19 )
20
20
21 elements = {
21 elements = {
22 # token-type: binding-strength, primary, prefix, infix, suffix
22 # token-type: binding-strength, primary, prefix, infix, suffix
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
25 "#": (21, None, None, ("relation", 21), None),
25 "#": (21, None, None, ("relation", 21), None),
26 "##": (20, None, None, ("_concat", 20), None),
26 "##": (20, None, None, ("_concat", 20), None),
27 "~": (18, None, None, ("ancestor", 18), None),
27 "~": (18, None, None, ("ancestor", 18), None),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
30 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
31 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
32 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
33 "not": (10, None, ("not", 10), None, None),
33 "not": (10, None, ("not", 10), None, None),
34 "!": (10, None, ("not", 10), None, None),
34 "!": (10, None, ("not", 10), None, None),
35 "and": (5, None, None, ("and", 5), None),
35 "and": (5, None, None, ("and", 5), None),
36 "&": (5, None, None, ("and", 5), None),
36 "&": (5, None, None, ("and", 5), None),
37 "%": (5, None, None, ("only", 5), "onlypost"),
37 "%": (5, None, None, ("only", 5), "onlypost"),
38 "or": (4, None, None, ("or", 4), None),
38 "or": (4, None, None, ("or", 4), None),
39 "|": (4, None, None, ("or", 4), None),
39 "|": (4, None, None, ("or", 4), None),
40 "+": (4, None, None, ("or", 4), None),
40 "+": (4, None, None, ("or", 4), None),
41 "=": (3, None, None, ("keyvalue", 3), None),
41 "=": (3, None, None, ("keyvalue", 3), None),
42 ",": (2, None, None, ("list", 2), None),
42 ",": (2, None, None, ("list", 2), None),
43 ")": (0, None, None, None, None),
43 ")": (0, None, None, None, None),
44 "]": (0, None, None, None, None),
44 "]": (0, None, None, None, None),
45 "symbol": (0, "symbol", None, None, None),
45 "symbol": (0, "symbol", None, None, None),
46 "string": (0, "string", None, None, None),
46 "string": (0, "string", None, None, None),
47 "end": (0, None, None, None, None),
47 "end": (0, None, None, None, None),
48 }
48 }
49
49
50 keywords = {'and', 'or', 'not'}
50 keywords = {'and', 'or', 'not'}
51
51
52 _quoteletters = {'"', "'"}
52 _quoteletters = {'"', "'"}
53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
53 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
54
54
55 # default set of valid characters for the initial letter of symbols
55 # default set of valid characters for the initial letter of symbols
56 _syminitletters = set(pycompat.iterbytestr(
56 _syminitletters = set(pycompat.iterbytestr(
57 string.ascii_letters.encode('ascii') +
57 string.ascii_letters.encode('ascii') +
58 string.digits.encode('ascii') +
58 string.digits.encode('ascii') +
59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
59 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
60
60
61 # default set of valid characters for non-initial letters of symbols
61 # default set of valid characters for non-initial letters of symbols
62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
62 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
63
63
64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
64 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
65 '''
65 '''
66 Parse a revset statement into a stream of tokens
66 Parse a revset statement into a stream of tokens
67
67
68 ``syminitletters`` is the set of valid characters for the initial
68 ``syminitletters`` is the set of valid characters for the initial
69 letter of symbols.
69 letter of symbols.
70
70
71 By default, character ``c`` is recognized as valid for initial
71 By default, character ``c`` is recognized as valid for initial
72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
72 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
73
73
74 ``symletters`` is the set of valid characters for non-initial
74 ``symletters`` is the set of valid characters for non-initial
75 letters of symbols.
75 letters of symbols.
76
76
77 By default, character ``c`` is recognized as valid for non-initial
77 By default, character ``c`` is recognized as valid for non-initial
78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
78 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
79
79
80 Check that @ is a valid unquoted token character (issue3686):
80 Check that @ is a valid unquoted token character (issue3686):
81 >>> list(tokenize("@::"))
81 >>> list(tokenize("@::"))
82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
82 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
83
83
84 '''
84 '''
85 program = pycompat.bytestr(program)
85 program = pycompat.bytestr(program)
86 if syminitletters is None:
86 if syminitletters is None:
87 syminitletters = _syminitletters
87 syminitletters = _syminitletters
88 if symletters is None:
88 if symletters is None:
89 symletters = _symletters
89 symletters = _symletters
90
90
91 if program and lookup:
91 if program and lookup:
92 # attempt to parse old-style ranges first to deal with
92 # attempt to parse old-style ranges first to deal with
93 # things like old-tag which contain query metacharacters
93 # things like old-tag which contain query metacharacters
94 parts = program.split(':', 1)
94 parts = program.split(':', 1)
95 if all(lookup(sym) for sym in parts if sym):
95 if all(lookup(sym) for sym in parts if sym):
96 if parts[0]:
96 if parts[0]:
97 yield ('symbol', parts[0], 0)
97 yield ('symbol', parts[0], 0)
98 if len(parts) > 1:
98 if len(parts) > 1:
99 s = len(parts[0])
99 s = len(parts[0])
100 yield (':', None, s)
100 yield (':', None, s)
101 if parts[1]:
101 if parts[1]:
102 yield ('symbol', parts[1], s + 1)
102 yield ('symbol', parts[1], s + 1)
103 yield ('end', None, len(program))
103 yield ('end', None, len(program))
104 return
104 return
105
105
106 pos, l = 0, len(program)
106 pos, l = 0, len(program)
107 while pos < l:
107 while pos < l:
108 c = program[pos]
108 c = program[pos]
109 if c.isspace(): # skip inter-token whitespace
109 if c.isspace(): # skip inter-token whitespace
110 pass
110 pass
111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
111 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
112 yield ('::', None, pos)
112 yield ('::', None, pos)
113 pos += 1 # skip ahead
113 pos += 1 # skip ahead
114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
114 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
115 yield ('..', None, pos)
115 yield ('..', None, pos)
116 pos += 1 # skip ahead
116 pos += 1 # skip ahead
117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
117 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
118 yield ('##', None, pos)
118 yield ('##', None, pos)
119 pos += 1 # skip ahead
119 pos += 1 # skip ahead
120 elif c in _simpleopletters: # handle simple operators
120 elif c in _simpleopletters: # handle simple operators
121 yield (c, None, pos)
121 yield (c, None, pos)
122 elif (c in _quoteletters or c == 'r' and
122 elif (c in _quoteletters or c == 'r' and
123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
123 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
124 if c == 'r':
124 if c == 'r':
125 pos += 1
125 pos += 1
126 c = program[pos]
126 c = program[pos]
127 decode = lambda x: x
127 decode = lambda x: x
128 else:
128 else:
129 decode = parser.unescapestr
129 decode = parser.unescapestr
130 pos += 1
130 pos += 1
131 s = pos
131 s = pos
132 while pos < l: # find closing quote
132 while pos < l: # find closing quote
133 d = program[pos]
133 d = program[pos]
134 if d == '\\': # skip over escaped characters
134 if d == '\\': # skip over escaped characters
135 pos += 2
135 pos += 2
136 continue
136 continue
137 if d == c:
137 if d == c:
138 yield ('string', decode(program[s:pos]), s)
138 yield ('string', decode(program[s:pos]), s)
139 break
139 break
140 pos += 1
140 pos += 1
141 else:
141 else:
142 raise error.ParseError(_("unterminated string"), s)
142 raise error.ParseError(_("unterminated string"), s)
143 # gather up a symbol/keyword
143 # gather up a symbol/keyword
144 elif c in syminitletters:
144 elif c in syminitletters:
145 s = pos
145 s = pos
146 pos += 1
146 pos += 1
147 while pos < l: # find end of symbol
147 while pos < l: # find end of symbol
148 d = program[pos]
148 d = program[pos]
149 if d not in symletters:
149 if d not in symletters:
150 break
150 break
151 if d == '.' and program[pos - 1] == '.': # special case for ..
151 if d == '.' and program[pos - 1] == '.': # special case for ..
152 pos -= 1
152 pos -= 1
153 break
153 break
154 pos += 1
154 pos += 1
155 sym = program[s:pos]
155 sym = program[s:pos]
156 if sym in keywords: # operator keywords
156 if sym in keywords: # operator keywords
157 yield (sym, None, s)
157 yield (sym, None, s)
158 elif '-' in sym:
158 elif '-' in sym:
159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
159 # some jerk gave us foo-bar-baz, try to check if it's a symbol
160 if lookup and lookup(sym):
160 if lookup and lookup(sym):
161 # looks like a real symbol
161 # looks like a real symbol
162 yield ('symbol', sym, s)
162 yield ('symbol', sym, s)
163 else:
163 else:
164 # looks like an expression
164 # looks like an expression
165 parts = sym.split('-')
165 parts = sym.split('-')
166 for p in parts[:-1]:
166 for p in parts[:-1]:
167 if p: # possible consecutive -
167 if p: # possible consecutive -
168 yield ('symbol', p, s)
168 yield ('symbol', p, s)
169 s += len(p)
169 s += len(p)
170 yield ('-', None, pos)
170 yield ('-', None, pos)
171 s += 1
171 s += 1
172 if parts[-1]: # possible trailing -
172 if parts[-1]: # possible trailing -
173 yield ('symbol', parts[-1], s)
173 yield ('symbol', parts[-1], s)
174 else:
174 else:
175 yield ('symbol', sym, s)
175 yield ('symbol', sym, s)
176 pos -= 1
176 pos -= 1
177 else:
177 else:
178 raise error.ParseError(_("syntax error in revset '%s'") %
178 raise error.ParseError(_("syntax error in revset '%s'") %
179 program, pos)
179 program, pos)
180 pos += 1
180 pos += 1
181 yield ('end', None, pos)
181 yield ('end', None, pos)
182
182
183 # helpers
183 # helpers
184
184
185 _notset = object()
185 _notset = object()
186
186
187 def getsymbol(x):
187 def getsymbol(x):
188 if x and x[0] == 'symbol':
188 if x and x[0] == 'symbol':
189 return x[1]
189 return x[1]
190 raise error.ParseError(_('not a symbol'))
190 raise error.ParseError(_('not a symbol'))
191
191
192 def getstring(x, err):
192 def getstring(x, err):
193 if x and (x[0] == 'string' or x[0] == 'symbol'):
193 if x and (x[0] == 'string' or x[0] == 'symbol'):
194 return x[1]
194 return x[1]
195 raise error.ParseError(err)
195 raise error.ParseError(err)
196
196
197 def getinteger(x, err, default=_notset):
197 def getinteger(x, err, default=_notset):
198 if not x and default is not _notset:
198 if not x and default is not _notset:
199 return default
199 return default
200 try:
200 try:
201 return int(getstring(x, err))
201 return int(getstring(x, err))
202 except ValueError:
202 except ValueError:
203 raise error.ParseError(err)
203 raise error.ParseError(err)
204
204
205 def getboolean(x, err):
205 def getboolean(x, err):
206 value = util.parsebool(getsymbol(x))
206 value = util.parsebool(getsymbol(x))
207 if value is not None:
207 if value is not None:
208 return value
208 return value
209 raise error.ParseError(err)
209 raise error.ParseError(err)
210
210
211 def getlist(x):
211 def getlist(x):
212 if not x:
212 if not x:
213 return []
213 return []
214 if x[0] == 'list':
214 if x[0] == 'list':
215 return list(x[1:])
215 return list(x[1:])
216 return [x]
216 return [x]
217
217
218 def getrange(x, err):
218 def getrange(x, err):
219 if not x:
219 if not x:
220 raise error.ParseError(err)
220 raise error.ParseError(err)
221 op = x[0]
221 op = x[0]
222 if op == 'range':
222 if op == 'range':
223 return x[1], x[2]
223 return x[1], x[2]
224 elif op == 'rangepre':
224 elif op == 'rangepre':
225 return None, x[1]
225 return None, x[1]
226 elif op == 'rangepost':
226 elif op == 'rangepost':
227 return x[1], None
227 return x[1], None
228 elif op == 'rangeall':
228 elif op == 'rangeall':
229 return None, None
229 return None, None
230 raise error.ParseError(err)
230 raise error.ParseError(err)
231
231
232 def getargs(x, min, max, err):
232 def getargs(x, min, max, err):
233 l = getlist(x)
233 l = getlist(x)
234 if len(l) < min or (max >= 0 and len(l) > max):
234 if len(l) < min or (max >= 0 and len(l) > max):
235 raise error.ParseError(err)
235 raise error.ParseError(err)
236 return l
236 return l
237
237
238 def getargsdict(x, funcname, keys):
238 def getargsdict(x, funcname, keys):
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
240 keyvaluenode='keyvalue', keynode='symbol')
240 keyvaluenode='keyvalue', keynode='symbol')
241
241
242 # cache of {spec: raw parsed tree} built internally
242 # cache of {spec: raw parsed tree} built internally
243 _treecache = {}
243 _treecache = {}
244
244
245 def _cachedtree(spec):
245 def _cachedtree(spec):
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
247 tree = _treecache.get(spec)
247 tree = _treecache.get(spec)
248 if tree is None:
248 if tree is None:
249 _treecache[spec] = tree = parse(spec)
249 _treecache[spec] = tree = parse(spec)
250 return tree
250 return tree
251
251
252 def _build(tmplspec, *repls):
252 def _build(tmplspec, *repls):
253 """Create raw parsed tree from a template revset statement
253 """Create raw parsed tree from a template revset statement
254
254
255 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
255 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
257 """
257 """
258 template = _cachedtree(tmplspec)
258 template = _cachedtree(tmplspec)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
260
260
261 def _match(patspec, tree):
261 def _match(patspec, tree):
262 """Test if a tree matches the given pattern statement; return the matches
262 """Test if a tree matches the given pattern statement; return the matches
263
263
264 >>> _match('f(_)', parse('f()'))
264 >>> _match('f(_)', parse('f()'))
265 >>> _match('f(_)', parse('f(1)'))
265 >>> _match('f(_)', parse('f(1)'))
266 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
266 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
267 >>> _match('f(_)', parse('f(1, 2)'))
267 >>> _match('f(_)', parse('f(1, 2)'))
268 """
268 """
269 pattern = _cachedtree(patspec)
269 pattern = _cachedtree(patspec)
270 return parser.matchtree(pattern, tree, ('symbol', '_'),
270 return parser.matchtree(pattern, tree, ('symbol', '_'),
271 {'keyvalue', 'list'})
271 {'keyvalue', 'list'})
272
272
273 def _matchonly(revs, bases):
273 def _matchonly(revs, bases):
274 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
274 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
275
275
276 def _fixops(x):
276 def _fixops(x):
277 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
277 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
278 handled well by our simple top-down parser"""
278 handled well by our simple top-down parser"""
279 if not isinstance(x, tuple):
279 if not isinstance(x, tuple):
280 return x
280 return x
281
281
282 op = x[0]
282 op = x[0]
283 if op == 'parent':
283 if op == 'parent':
284 # x^:y means (x^) : y, not x ^ (:y)
284 # x^:y means (x^) : y, not x ^ (:y)
285 # x^: means (x^) :, not x ^ (:)
285 # x^: means (x^) :, not x ^ (:)
286 post = ('parentpost', x[1])
286 post = ('parentpost', x[1])
287 if x[2][0] == 'dagrangepre':
287 if x[2][0] == 'dagrangepre':
288 return _fixops(('dagrange', post, x[2][1]))
288 return _fixops(('dagrange', post, x[2][1]))
289 elif x[2][0] == 'rangepre':
289 elif x[2][0] == 'rangepre':
290 return _fixops(('range', post, x[2][1]))
290 return _fixops(('range', post, x[2][1]))
291 elif x[2][0] == 'rangeall':
291 elif x[2][0] == 'rangeall':
292 return _fixops(('rangepost', post))
292 return _fixops(('rangepost', post))
293 elif op == 'or':
293 elif op == 'or':
294 # make number of arguments deterministic:
294 # make number of arguments deterministic:
295 # x + y + z -> (or x y z) -> (or (list x y z))
295 # x + y + z -> (or x y z) -> (or (list x y z))
296 return (op, _fixops(('list',) + x[1:]))
296 return (op, _fixops(('list',) + x[1:]))
297 elif op == 'subscript' and x[1][0] == 'relation':
297 elif op == 'subscript' and x[1][0] == 'relation':
298 # x#y[z] ternary
298 # x#y[z] ternary
299 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
299 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
300
300
301 return (op,) + tuple(_fixops(y) for y in x[1:])
301 return (op,) + tuple(_fixops(y) for y in x[1:])
302
302
303 def _analyze(x):
303 def _analyze(x):
304 if x is None:
304 if x is None:
305 return x
305 return x
306
306
307 op = x[0]
307 op = x[0]
308 if op == 'minus':
308 if op == 'minus':
309 return _analyze(_build('_ and not _', *x[1:]))
309 return _analyze(_build('_ and not _', *x[1:]))
310 elif op == 'only':
310 elif op == 'only':
311 return _analyze(_build('only(_, _)', *x[1:]))
311 return _analyze(_build('only(_, _)', *x[1:]))
312 elif op == 'onlypost':
312 elif op == 'onlypost':
313 return _analyze(_build('only(_)', x[1]))
313 return _analyze(_build('only(_)', x[1]))
314 elif op == 'dagrangepre':
314 elif op == 'dagrangepre':
315 return _analyze(_build('ancestors(_)', x[1]))
315 return _analyze(_build('ancestors(_)', x[1]))
316 elif op == 'dagrangepost':
316 elif op == 'dagrangepost':
317 return _analyze(_build('descendants(_)', x[1]))
317 return _analyze(_build('descendants(_)', x[1]))
318 elif op == 'negate':
318 elif op == 'negate':
319 s = getstring(x[1], _("can't negate that"))
319 s = getstring(x[1], _("can't negate that"))
320 return _analyze(('string', '-' + s))
320 return _analyze(('string', '-' + s))
321 elif op in ('string', 'symbol'):
321 elif op in ('string', 'symbol'):
322 return x
322 return x
323 elif op == 'rangeall':
323 elif op == 'rangeall':
324 return (op, None)
324 return (op, None)
325 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
325 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
326 return (op, _analyze(x[1]))
326 return (op, _analyze(x[1]))
327 elif op == 'group':
327 elif op == 'group':
328 return _analyze(x[1])
328 return _analyze(x[1])
329 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
329 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
330 'subscript'}:
330 'subscript'}:
331 ta = _analyze(x[1])
331 ta = _analyze(x[1])
332 tb = _analyze(x[2])
332 tb = _analyze(x[2])
333 return (op, ta, tb)
333 return (op, ta, tb)
334 elif op == 'relsubscript':
334 elif op == 'relsubscript':
335 ta = _analyze(x[1])
335 ta = _analyze(x[1])
336 tb = _analyze(x[2])
336 tb = _analyze(x[2])
337 tc = _analyze(x[3])
337 tc = _analyze(x[3])
338 return (op, ta, tb, tc)
338 return (op, ta, tb, tc)
339 elif op == 'list':
339 elif op == 'list':
340 return (op,) + tuple(_analyze(y) for y in x[1:])
340 return (op,) + tuple(_analyze(y) for y in x[1:])
341 elif op == 'keyvalue':
341 elif op == 'keyvalue':
342 return (op, x[1], _analyze(x[2]))
342 return (op, x[1], _analyze(x[2]))
343 elif op == 'func':
343 elif op == 'func':
344 return (op, x[1], _analyze(x[2]))
344 return (op, x[1], _analyze(x[2]))
345 raise ValueError('invalid operator %r' % op)
345 raise ValueError('invalid operator %r' % op)
346
346
347 def analyze(x):
347 def analyze(x):
348 """Transform raw parsed tree to evaluatable tree which can be fed to
348 """Transform raw parsed tree to evaluatable tree which can be fed to
349 optimize() or getset()
349 optimize() or getset()
350
350
351 All pseudo operations should be mapped to real operations or functions
351 All pseudo operations should be mapped to real operations or functions
352 defined in methods or symbols table respectively.
352 defined in methods or symbols table respectively.
353 """
353 """
354 return _analyze(x)
354 return _analyze(x)
355
355
356 def _optimize(x, small):
356 def _optimize(x, small):
357 if x is None:
357 if x is None:
358 return 0, x
358 return 0, x
359
359
360 smallbonus = 1
360 smallbonus = 1
361 if small:
361 if small:
362 smallbonus = .5
362 smallbonus = .5
363
363
364 op = x[0]
364 op = x[0]
365 if op in ('string', 'symbol'):
365 if op in ('string', 'symbol'):
366 return smallbonus, x # single revisions are small
366 return smallbonus, x # single revisions are small
367 elif op == 'and':
367 elif op == 'and':
368 wa, ta = _optimize(x[1], True)
368 wa, ta = _optimize(x[1], True)
369 wb, tb = _optimize(x[2], True)
369 wb, tb = _optimize(x[2], True)
370 w = min(wa, wb)
370 w = min(wa, wb)
371
371
372 # (draft/secret/_notpublic() & ::x) have a fast path
372 # (draft/secret/_notpublic() & ::x) have a fast path
373 m = _match('_() & ancestors(_)', ('and', ta, tb))
373 m = _match('_() & ancestors(_)', ('and', ta, tb))
374 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
374 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
375 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
375 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
376
376
377 # (::x and not ::y)/(not ::y and ::x) have a fast path
377 # (::x and not ::y)/(not ::y and ::x) have a fast path
378 m = _matchonly(ta, tb) or _matchonly(tb, ta)
378 m = _matchonly(ta, tb) or _matchonly(tb, ta)
379 if m:
379 if m:
380 return w, _build('only(_, _)', *m[1:])
380 return w, _build('only(_, _)', *m[1:])
381
381
382 m = _match('not _', tb)
382 m = _match('not _', tb)
383 if m:
383 if m:
384 return wa, ('difference', ta, m[1])
384 return wa, ('difference', ta, m[1])
385 if wa > wb:
385 if wa > wb:
386 op = 'andsmally'
386 op = 'andsmally'
387 return w, (op, ta, tb)
387 return w, (op, ta, tb)
388 elif op == 'or':
388 elif op == 'or':
389 # fast path for machine-generated expression, that is likely to have
389 # fast path for machine-generated expression, that is likely to have
390 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
390 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
391 ws, ts, ss = [], [], []
391 ws, ts, ss = [], [], []
392 def flushss():
392 def flushss():
393 if not ss:
393 if not ss:
394 return
394 return
395 if len(ss) == 1:
395 if len(ss) == 1:
396 w, t = ss[0]
396 w, t = ss[0]
397 else:
397 else:
398 s = '\0'.join(t[1] for w, t in ss)
398 s = '\0'.join(t[1] for w, t in ss)
399 y = _build('_list(_)', ('string', s))
399 y = _build('_list(_)', ('string', s))
400 w, t = _optimize(y, False)
400 w, t = _optimize(y, False)
401 ws.append(w)
401 ws.append(w)
402 ts.append(t)
402 ts.append(t)
403 del ss[:]
403 del ss[:]
404 for y in getlist(x[1]):
404 for y in getlist(x[1]):
405 w, t = _optimize(y, False)
405 w, t = _optimize(y, False)
406 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
406 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
407 ss.append((w, t))
407 ss.append((w, t))
408 continue
408 continue
409 flushss()
409 flushss()
410 ws.append(w)
410 ws.append(w)
411 ts.append(t)
411 ts.append(t)
412 flushss()
412 flushss()
413 if len(ts) == 1:
413 if len(ts) == 1:
414 return ws[0], ts[0] # 'or' operation is fully optimized out
414 return ws[0], ts[0] # 'or' operation is fully optimized out
415 return max(ws), (op, ('list',) + tuple(ts))
415 return max(ws), (op, ('list',) + tuple(ts))
416 elif op == 'not':
416 elif op == 'not':
417 # Optimize not public() to _notpublic() because we have a fast version
417 # Optimize not public() to _notpublic() because we have a fast version
418 if _match('public()', x[1]):
418 if _match('public()', x[1]):
419 o = _optimize(_build('_notpublic()'), not small)
419 o = _optimize(_build('_notpublic()'), not small)
420 return o[0], o[1]
420 return o[0], o[1]
421 else:
421 else:
422 o = _optimize(x[1], not small)
422 o = _optimize(x[1], not small)
423 return o[0], (op, o[1])
423 return o[0], (op, o[1])
424 elif op == 'rangeall':
424 elif op == 'rangeall':
425 return smallbonus, x
425 return smallbonus, x
426 elif op in ('rangepre', 'rangepost', 'parentpost'):
426 elif op in ('rangepre', 'rangepost', 'parentpost'):
427 o = _optimize(x[1], small)
427 o = _optimize(x[1], small)
428 return o[0], (op, o[1])
428 return o[0], (op, o[1])
429 elif op in ('dagrange', 'range'):
429 elif op in ('dagrange', 'range'):
430 wa, ta = _optimize(x[1], small)
430 wa, ta = _optimize(x[1], small)
431 wb, tb = _optimize(x[2], small)
431 wb, tb = _optimize(x[2], small)
432 return wa + wb, (op, ta, tb)
432 return wa + wb, (op, ta, tb)
433 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
433 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
434 w, t = _optimize(x[1], small)
434 w, t = _optimize(x[1], small)
435 return w, (op, t, x[2])
435 return w, (op, t, x[2])
436 elif op == 'relsubscript':
436 elif op == 'relsubscript':
437 w, t = _optimize(x[1], small)
437 w, t = _optimize(x[1], small)
438 return w, (op, t, x[2], x[3])
438 return w, (op, t, x[2], x[3])
439 elif op == 'list':
439 elif op == 'list':
440 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
440 ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
441 return sum(ws), (op,) + ts
441 return sum(ws), (op,) + ts
442 elif op == 'keyvalue':
442 elif op == 'keyvalue':
443 w, t = _optimize(x[2], small)
443 w, t = _optimize(x[2], small)
444 return w, (op, x[1], t)
444 return w, (op, x[1], t)
445 elif op == 'func':
445 elif op == 'func':
446 f = getsymbol(x[1])
446 f = getsymbol(x[1])
447 wa, ta = _optimize(x[2], small)
447 wa, ta = _optimize(x[2], small)
448 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
448 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
449 'keyword', 'outgoing', 'user', 'destination'):
449 'keyword', 'outgoing', 'user', 'destination'):
450 w = 10 # slow
450 w = 10 # slow
451 elif f in ('modifies', 'adds', 'removes'):
451 elif f in ('modifies', 'adds', 'removes'):
452 w = 30 # slower
452 w = 30 # slower
453 elif f == "contains":
453 elif f == "contains":
454 w = 100 # very slow
454 w = 100 # very slow
455 elif f == "ancestor":
455 elif f == "ancestor":
456 w = 1 * smallbonus
456 w = 1 * smallbonus
457 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
457 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
458 w = 0
458 w = 0
459 elif f == "sort":
459 elif f == "sort":
460 w = 10 # assume most sorts look at changelog
460 w = 10 # assume most sorts look at changelog
461 else:
461 else:
462 w = 1
462 w = 1
463 return w + wa, (op, x[1], ta)
463 return w + wa, (op, x[1], ta)
464 raise ValueError('invalid operator %r' % op)
464 raise ValueError('invalid operator %r' % op)
465
465
466 def optimize(tree):
466 def optimize(tree):
467 """Optimize evaluatable tree
467 """Optimize evaluatable tree
468
468
469 All pseudo operations should be transformed beforehand.
469 All pseudo operations should be transformed beforehand.
470 """
470 """
471 _weight, newtree = _optimize(tree, small=True)
471 _weight, newtree = _optimize(tree, small=True)
472 return newtree
472 return newtree
473
473
474 # the set of valid characters for the initial letter of symbols in
474 # the set of valid characters for the initial letter of symbols in
475 # alias declarations and definitions
475 # alias declarations and definitions
476 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
476 _aliassyminitletters = _syminitletters | {'$'}
477
477
478 def _parsewith(spec, lookup=None, syminitletters=None):
478 def _parsewith(spec, lookup=None, syminitletters=None):
479 """Generate a parse tree of given spec with given tokenizing options
479 """Generate a parse tree of given spec with given tokenizing options
480
480
481 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
481 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
482 ('func', ('symbol', 'foo'), ('symbol', '$1'))
482 ('func', ('symbol', 'foo'), ('symbol', '$1'))
483 >>> _parsewith('$1')
483 >>> _parsewith('$1')
484 Traceback (most recent call last):
484 Traceback (most recent call last):
485 ...
485 ...
486 ParseError: ("syntax error in revset '$1'", 0)
486 ParseError: ("syntax error in revset '$1'", 0)
487 >>> _parsewith('foo bar')
487 >>> _parsewith('foo bar')
488 Traceback (most recent call last):
488 Traceback (most recent call last):
489 ...
489 ...
490 ParseError: ('invalid token', 4)
490 ParseError: ('invalid token', 4)
491 """
491 """
492 p = parser.parser(elements)
492 p = parser.parser(elements)
493 tree, pos = p.parse(tokenize(spec, lookup=lookup,
493 tree, pos = p.parse(tokenize(spec, lookup=lookup,
494 syminitletters=syminitletters))
494 syminitletters=syminitletters))
495 if pos != len(spec):
495 if pos != len(spec):
496 raise error.ParseError(_('invalid token'), pos)
496 raise error.ParseError(_('invalid token'), pos)
497 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
497 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
498
498
499 class _aliasrules(parser.basealiasrules):
499 class _aliasrules(parser.basealiasrules):
500 """Parsing and expansion rule set of revset aliases"""
500 """Parsing and expansion rule set of revset aliases"""
501 _section = _('revset alias')
501 _section = _('revset alias')
502
502
503 @staticmethod
503 @staticmethod
504 def _parse(spec):
504 def _parse(spec):
505 """Parse alias declaration/definition ``spec``
505 """Parse alias declaration/definition ``spec``
506
506
507 This allows symbol names to use also ``$`` as an initial letter
507 This allows symbol names to use also ``$`` as an initial letter
508 (for backward compatibility), and callers of this function should
508 (for backward compatibility), and callers of this function should
509 examine whether ``$`` is used also for unexpected symbols or not.
509 examine whether ``$`` is used also for unexpected symbols or not.
510 """
510 """
511 return _parsewith(spec, syminitletters=_aliassyminitletters)
511 return _parsewith(spec, syminitletters=_aliassyminitletters)
512
512
513 @staticmethod
513 @staticmethod
514 def _trygetfunc(tree):
514 def _trygetfunc(tree):
515 if tree[0] == 'func' and tree[1][0] == 'symbol':
515 if tree[0] == 'func' and tree[1][0] == 'symbol':
516 return tree[1][1], getlist(tree[2])
516 return tree[1][1], getlist(tree[2])
517
517
518 def expandaliases(tree, aliases, warn=None):
518 def expandaliases(tree, aliases, warn=None):
519 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
519 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
520 aliases = _aliasrules.buildmap(aliases)
520 aliases = _aliasrules.buildmap(aliases)
521 tree = _aliasrules.expand(aliases, tree)
521 tree = _aliasrules.expand(aliases, tree)
522 # warn about problematic (but not referred) aliases
522 # warn about problematic (but not referred) aliases
523 if warn is not None:
523 if warn is not None:
524 for name, alias in sorted(aliases.iteritems()):
524 for name, alias in sorted(aliases.iteritems()):
525 if alias.error and not alias.warned:
525 if alias.error and not alias.warned:
526 warn(_('warning: %s\n') % (alias.error))
526 warn(_('warning: %s\n') % (alias.error))
527 alias.warned = True
527 alias.warned = True
528 return tree
528 return tree
529
529
530 def foldconcat(tree):
530 def foldconcat(tree):
531 """Fold elements to be concatenated by `##`
531 """Fold elements to be concatenated by `##`
532 """
532 """
533 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
533 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
534 return tree
534 return tree
535 if tree[0] == '_concat':
535 if tree[0] == '_concat':
536 pending = [tree]
536 pending = [tree]
537 l = []
537 l = []
538 while pending:
538 while pending:
539 e = pending.pop()
539 e = pending.pop()
540 if e[0] == '_concat':
540 if e[0] == '_concat':
541 pending.extend(reversed(e[1:]))
541 pending.extend(reversed(e[1:]))
542 elif e[0] in ('string', 'symbol'):
542 elif e[0] in ('string', 'symbol'):
543 l.append(e[1])
543 l.append(e[1])
544 else:
544 else:
545 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
545 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
546 raise error.ParseError(msg)
546 raise error.ParseError(msg)
547 return ('string', ''.join(l))
547 return ('string', ''.join(l))
548 else:
548 else:
549 return tuple(foldconcat(t) for t in tree)
549 return tuple(foldconcat(t) for t in tree)
550
550
551 def parse(spec, lookup=None):
551 def parse(spec, lookup=None):
552 return _parsewith(spec, lookup=lookup)
552 return _parsewith(spec, lookup=lookup)
553
553
554 def _quote(s):
554 def _quote(s):
555 r"""Quote a value in order to make it safe for the revset engine.
555 r"""Quote a value in order to make it safe for the revset engine.
556
556
557 >>> _quote('asdf')
557 >>> _quote('asdf')
558 "'asdf'"
558 "'asdf'"
559 >>> _quote("asdf'\"")
559 >>> _quote("asdf'\"")
560 '\'asdf\\\'"\''
560 '\'asdf\\\'"\''
561 >>> _quote('asdf\'')
561 >>> _quote('asdf\'')
562 "'asdf\\''"
562 "'asdf\\''"
563 >>> _quote(1)
563 >>> _quote(1)
564 "'1'"
564 "'1'"
565 """
565 """
566 return "'%s'" % util.escapestr(pycompat.bytestr(s))
566 return "'%s'" % util.escapestr(pycompat.bytestr(s))
567
567
568 def formatspec(expr, *args):
568 def formatspec(expr, *args):
569 '''
569 '''
570 This is a convenience function for using revsets internally, and
570 This is a convenience function for using revsets internally, and
571 escapes arguments appropriately. Aliases are intentionally ignored
571 escapes arguments appropriately. Aliases are intentionally ignored
572 so that intended expression behavior isn't accidentally subverted.
572 so that intended expression behavior isn't accidentally subverted.
573
573
574 Supported arguments:
574 Supported arguments:
575
575
576 %r = revset expression, parenthesized
576 %r = revset expression, parenthesized
577 %d = int(arg), no quoting
577 %d = int(arg), no quoting
578 %s = string(arg), escaped and single-quoted
578 %s = string(arg), escaped and single-quoted
579 %b = arg.branch(), escaped and single-quoted
579 %b = arg.branch(), escaped and single-quoted
580 %n = hex(arg), single-quoted
580 %n = hex(arg), single-quoted
581 %% = a literal '%'
581 %% = a literal '%'
582
582
583 Prefixing the type with 'l' specifies a parenthesized list of that type.
583 Prefixing the type with 'l' specifies a parenthesized list of that type.
584
584
585 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
585 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
586 '(10 or 11):: and ((this()) or (that()))'
586 '(10 or 11):: and ((this()) or (that()))'
587 >>> formatspec('%d:: and not %d::', 10, 20)
587 >>> formatspec('%d:: and not %d::', 10, 20)
588 '10:: and not 20::'
588 '10:: and not 20::'
589 >>> formatspec('%ld or %ld', [], [1])
589 >>> formatspec('%ld or %ld', [], [1])
590 "_list('') or 1"
590 "_list('') or 1"
591 >>> formatspec('keyword(%s)', 'foo\\xe9')
591 >>> formatspec('keyword(%s)', 'foo\\xe9')
592 "keyword('foo\\\\xe9')"
592 "keyword('foo\\\\xe9')"
593 >>> b = lambda: 'default'
593 >>> b = lambda: 'default'
594 >>> b.branch = b
594 >>> b.branch = b
595 >>> formatspec('branch(%b)', b)
595 >>> formatspec('branch(%b)', b)
596 "branch('default')"
596 "branch('default')"
597 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
597 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
598 "root(_list('a\\x00b\\x00c\\x00d'))"
598 "root(_list('a\\x00b\\x00c\\x00d'))"
599 '''
599 '''
600
600
601 def argtype(c, arg):
601 def argtype(c, arg):
602 if c == 'd':
602 if c == 'd':
603 return '%d' % int(arg)
603 return '%d' % int(arg)
604 elif c == 's':
604 elif c == 's':
605 return _quote(arg)
605 return _quote(arg)
606 elif c == 'r':
606 elif c == 'r':
607 parse(arg) # make sure syntax errors are confined
607 parse(arg) # make sure syntax errors are confined
608 return '(%s)' % arg
608 return '(%s)' % arg
609 elif c == 'n':
609 elif c == 'n':
610 return _quote(node.hex(arg))
610 return _quote(node.hex(arg))
611 elif c == 'b':
611 elif c == 'b':
612 return _quote(arg.branch())
612 return _quote(arg.branch())
613
613
614 def listexp(s, t):
614 def listexp(s, t):
615 l = len(s)
615 l = len(s)
616 if l == 0:
616 if l == 0:
617 return "_list('')"
617 return "_list('')"
618 elif l == 1:
618 elif l == 1:
619 return argtype(t, s[0])
619 return argtype(t, s[0])
620 elif t == 'd':
620 elif t == 'd':
621 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
621 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
622 elif t == 's':
622 elif t == 's':
623 return "_list('%s')" % "\0".join(s)
623 return "_list('%s')" % "\0".join(s)
624 elif t == 'n':
624 elif t == 'n':
625 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
625 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
626 elif t == 'b':
626 elif t == 'b':
627 return "_list('%s')" % "\0".join(a.branch() for a in s)
627 return "_list('%s')" % "\0".join(a.branch() for a in s)
628
628
629 m = l // 2
629 m = l // 2
630 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
630 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
631
631
632 expr = pycompat.bytestr(expr)
632 expr = pycompat.bytestr(expr)
633 ret = ''
633 ret = ''
634 pos = 0
634 pos = 0
635 arg = 0
635 arg = 0
636 while pos < len(expr):
636 while pos < len(expr):
637 c = expr[pos]
637 c = expr[pos]
638 if c == '%':
638 if c == '%':
639 pos += 1
639 pos += 1
640 d = expr[pos]
640 d = expr[pos]
641 if d == '%':
641 if d == '%':
642 ret += d
642 ret += d
643 elif d in 'dsnbr':
643 elif d in 'dsnbr':
644 ret += argtype(d, args[arg])
644 ret += argtype(d, args[arg])
645 arg += 1
645 arg += 1
646 elif d == 'l':
646 elif d == 'l':
647 # a list of some type
647 # a list of some type
648 pos += 1
648 pos += 1
649 d = expr[pos]
649 d = expr[pos]
650 ret += listexp(list(args[arg]), d)
650 ret += listexp(list(args[arg]), d)
651 arg += 1
651 arg += 1
652 else:
652 else:
653 raise error.Abort(_('unexpected revspec format character %s')
653 raise error.Abort(_('unexpected revspec format character %s')
654 % d)
654 % d)
655 else:
655 else:
656 ret += c
656 ret += c
657 pos += 1
657 pos += 1
658
658
659 return ret
659 return ret
660
660
661 def prettyformat(tree):
661 def prettyformat(tree):
662 return parser.prettyformat(tree, ('string', 'symbol'))
662 return parser.prettyformat(tree, ('string', 'symbol'))
663
663
664 def depth(tree):
664 def depth(tree):
665 if isinstance(tree, tuple):
665 if isinstance(tree, tuple):
666 return max(map(depth, tree)) + 1
666 return max(map(depth, tree)) + 1
667 else:
667 else:
668 return 0
668 return 0
669
669
670 def funcsused(tree):
670 def funcsused(tree):
671 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
671 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
672 return set()
672 return set()
673 else:
673 else:
674 funcs = set()
674 funcs = set()
675 for s in tree[1:]:
675 for s in tree[1:]:
676 funcs |= funcsused(s)
676 funcs |= funcsused(s)
677 if tree[0] == 'func':
677 if tree[0] == 'func':
678 funcs.add(tree[1][1])
678 funcs.add(tree[1][1])
679 return funcs
679 return funcs
General Comments 0
You need to be logged in to leave comments. Login now