##// END OF EJS Templates
revsetlang: fix quoting of %ls string...
Yuya Nishihara -
r35613:91201737 default
parent child Browse files
Show More
@@ -1,727 +1,729 b''
1 # revsetlang.py - parser, tokenizer and utility for revision set language
1 # revsetlang.py - parser, tokenizer and utility for revision set language
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import string
10 import string
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 error,
14 error,
15 node,
15 node,
16 parser,
16 parser,
17 pycompat,
17 pycompat,
18 util,
18 util,
19 )
19 )
20
20
21 elements = {
21 elements = {
22 # token-type: binding-strength, primary, prefix, infix, suffix
22 # token-type: binding-strength, primary, prefix, infix, suffix
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
23 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
24 "[": (21, None, None, ("subscript", 1, "]"), None),
25 "#": (21, None, None, ("relation", 21), None),
25 "#": (21, None, None, ("relation", 21), None),
26 "##": (20, None, None, ("_concat", 20), None),
26 "##": (20, None, None, ("_concat", 20), None),
27 "~": (18, None, None, ("ancestor", 18), None),
27 "~": (18, None, None, ("ancestor", 18), None),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
28 "^": (18, None, None, ("parent", 18), "parentpost"),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
29 "-": (5, None, ("negate", 19), ("minus", 5), None),
30 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
30 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
31 "dagrangepost"),
31 "dagrangepost"),
32 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
32 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
33 "dagrangepost"),
33 "dagrangepost"),
34 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
34 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
35 "not": (10, None, ("not", 10), None, None),
35 "not": (10, None, ("not", 10), None, None),
36 "!": (10, None, ("not", 10), None, None),
36 "!": (10, None, ("not", 10), None, None),
37 "and": (5, None, None, ("and", 5), None),
37 "and": (5, None, None, ("and", 5), None),
38 "&": (5, None, None, ("and", 5), None),
38 "&": (5, None, None, ("and", 5), None),
39 "%": (5, None, None, ("only", 5), "onlypost"),
39 "%": (5, None, None, ("only", 5), "onlypost"),
40 "or": (4, None, None, ("or", 4), None),
40 "or": (4, None, None, ("or", 4), None),
41 "|": (4, None, None, ("or", 4), None),
41 "|": (4, None, None, ("or", 4), None),
42 "+": (4, None, None, ("or", 4), None),
42 "+": (4, None, None, ("or", 4), None),
43 "=": (3, None, None, ("keyvalue", 3), None),
43 "=": (3, None, None, ("keyvalue", 3), None),
44 ",": (2, None, None, ("list", 2), None),
44 ",": (2, None, None, ("list", 2), None),
45 ")": (0, None, None, None, None),
45 ")": (0, None, None, None, None),
46 "]": (0, None, None, None, None),
46 "]": (0, None, None, None, None),
47 "symbol": (0, "symbol", None, None, None),
47 "symbol": (0, "symbol", None, None, None),
48 "string": (0, "string", None, None, None),
48 "string": (0, "string", None, None, None),
49 "end": (0, None, None, None, None),
49 "end": (0, None, None, None, None),
50 }
50 }
51
51
52 keywords = {'and', 'or', 'not'}
52 keywords = {'and', 'or', 'not'}
53
53
54 symbols = {}
54 symbols = {}
55
55
56 _quoteletters = {'"', "'"}
56 _quoteletters = {'"', "'"}
57 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
57 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
58
58
59 # default set of valid characters for the initial letter of symbols
59 # default set of valid characters for the initial letter of symbols
60 _syminitletters = set(pycompat.iterbytestr(
60 _syminitletters = set(pycompat.iterbytestr(
61 string.ascii_letters.encode('ascii') +
61 string.ascii_letters.encode('ascii') +
62 string.digits.encode('ascii') +
62 string.digits.encode('ascii') +
63 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
63 '._@')) | set(map(pycompat.bytechr, xrange(128, 256)))
64
64
65 # default set of valid characters for non-initial letters of symbols
65 # default set of valid characters for non-initial letters of symbols
66 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
66 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
67
67
68 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
68 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
69 '''
69 '''
70 Parse a revset statement into a stream of tokens
70 Parse a revset statement into a stream of tokens
71
71
72 ``syminitletters`` is the set of valid characters for the initial
72 ``syminitletters`` is the set of valid characters for the initial
73 letter of symbols.
73 letter of symbols.
74
74
75 By default, character ``c`` is recognized as valid for initial
75 By default, character ``c`` is recognized as valid for initial
76 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
76 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
77
77
78 ``symletters`` is the set of valid characters for non-initial
78 ``symletters`` is the set of valid characters for non-initial
79 letters of symbols.
79 letters of symbols.
80
80
81 By default, character ``c`` is recognized as valid for non-initial
81 By default, character ``c`` is recognized as valid for non-initial
82 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
82 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
83
83
84 Check that @ is a valid unquoted token character (issue3686):
84 Check that @ is a valid unquoted token character (issue3686):
85 >>> list(tokenize(b"@::"))
85 >>> list(tokenize(b"@::"))
86 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
86 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
87
87
88 '''
88 '''
89 program = pycompat.bytestr(program)
89 program = pycompat.bytestr(program)
90 if syminitletters is None:
90 if syminitletters is None:
91 syminitletters = _syminitletters
91 syminitletters = _syminitletters
92 if symletters is None:
92 if symletters is None:
93 symletters = _symletters
93 symletters = _symletters
94
94
95 if program and lookup:
95 if program and lookup:
96 # attempt to parse old-style ranges first to deal with
96 # attempt to parse old-style ranges first to deal with
97 # things like old-tag which contain query metacharacters
97 # things like old-tag which contain query metacharacters
98 parts = program.split(':', 1)
98 parts = program.split(':', 1)
99 if all(lookup(sym) for sym in parts if sym):
99 if all(lookup(sym) for sym in parts if sym):
100 if parts[0]:
100 if parts[0]:
101 yield ('symbol', parts[0], 0)
101 yield ('symbol', parts[0], 0)
102 if len(parts) > 1:
102 if len(parts) > 1:
103 s = len(parts[0])
103 s = len(parts[0])
104 yield (':', None, s)
104 yield (':', None, s)
105 if parts[1]:
105 if parts[1]:
106 yield ('symbol', parts[1], s + 1)
106 yield ('symbol', parts[1], s + 1)
107 yield ('end', None, len(program))
107 yield ('end', None, len(program))
108 return
108 return
109
109
110 pos, l = 0, len(program)
110 pos, l = 0, len(program)
111 while pos < l:
111 while pos < l:
112 c = program[pos]
112 c = program[pos]
113 if c.isspace(): # skip inter-token whitespace
113 if c.isspace(): # skip inter-token whitespace
114 pass
114 pass
115 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
115 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
116 yield ('::', None, pos)
116 yield ('::', None, pos)
117 pos += 1 # skip ahead
117 pos += 1 # skip ahead
118 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
118 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
119 yield ('..', None, pos)
119 yield ('..', None, pos)
120 pos += 1 # skip ahead
120 pos += 1 # skip ahead
121 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
121 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
122 yield ('##', None, pos)
122 yield ('##', None, pos)
123 pos += 1 # skip ahead
123 pos += 1 # skip ahead
124 elif c in _simpleopletters: # handle simple operators
124 elif c in _simpleopletters: # handle simple operators
125 yield (c, None, pos)
125 yield (c, None, pos)
126 elif (c in _quoteletters or c == 'r' and
126 elif (c in _quoteletters or c == 'r' and
127 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
127 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
128 if c == 'r':
128 if c == 'r':
129 pos += 1
129 pos += 1
130 c = program[pos]
130 c = program[pos]
131 decode = lambda x: x
131 decode = lambda x: x
132 else:
132 else:
133 decode = parser.unescapestr
133 decode = parser.unescapestr
134 pos += 1
134 pos += 1
135 s = pos
135 s = pos
136 while pos < l: # find closing quote
136 while pos < l: # find closing quote
137 d = program[pos]
137 d = program[pos]
138 if d == '\\': # skip over escaped characters
138 if d == '\\': # skip over escaped characters
139 pos += 2
139 pos += 2
140 continue
140 continue
141 if d == c:
141 if d == c:
142 yield ('string', decode(program[s:pos]), s)
142 yield ('string', decode(program[s:pos]), s)
143 break
143 break
144 pos += 1
144 pos += 1
145 else:
145 else:
146 raise error.ParseError(_("unterminated string"), s)
146 raise error.ParseError(_("unterminated string"), s)
147 # gather up a symbol/keyword
147 # gather up a symbol/keyword
148 elif c in syminitletters:
148 elif c in syminitletters:
149 s = pos
149 s = pos
150 pos += 1
150 pos += 1
151 while pos < l: # find end of symbol
151 while pos < l: # find end of symbol
152 d = program[pos]
152 d = program[pos]
153 if d not in symletters:
153 if d not in symletters:
154 break
154 break
155 if d == '.' and program[pos - 1] == '.': # special case for ..
155 if d == '.' and program[pos - 1] == '.': # special case for ..
156 pos -= 1
156 pos -= 1
157 break
157 break
158 pos += 1
158 pos += 1
159 sym = program[s:pos]
159 sym = program[s:pos]
160 if sym in keywords: # operator keywords
160 if sym in keywords: # operator keywords
161 yield (sym, None, s)
161 yield (sym, None, s)
162 elif '-' in sym:
162 elif '-' in sym:
163 # some jerk gave us foo-bar-baz, try to check if it's a symbol
163 # some jerk gave us foo-bar-baz, try to check if it's a symbol
164 if lookup and lookup(sym):
164 if lookup and lookup(sym):
165 # looks like a real symbol
165 # looks like a real symbol
166 yield ('symbol', sym, s)
166 yield ('symbol', sym, s)
167 else:
167 else:
168 # looks like an expression
168 # looks like an expression
169 parts = sym.split('-')
169 parts = sym.split('-')
170 for p in parts[:-1]:
170 for p in parts[:-1]:
171 if p: # possible consecutive -
171 if p: # possible consecutive -
172 yield ('symbol', p, s)
172 yield ('symbol', p, s)
173 s += len(p)
173 s += len(p)
174 yield ('-', None, pos)
174 yield ('-', None, pos)
175 s += 1
175 s += 1
176 if parts[-1]: # possible trailing -
176 if parts[-1]: # possible trailing -
177 yield ('symbol', parts[-1], s)
177 yield ('symbol', parts[-1], s)
178 else:
178 else:
179 yield ('symbol', sym, s)
179 yield ('symbol', sym, s)
180 pos -= 1
180 pos -= 1
181 else:
181 else:
182 raise error.ParseError(_("syntax error in revset '%s'") %
182 raise error.ParseError(_("syntax error in revset '%s'") %
183 program, pos)
183 program, pos)
184 pos += 1
184 pos += 1
185 yield ('end', None, pos)
185 yield ('end', None, pos)
186
186
187 # helpers
187 # helpers
188
188
189 _notset = object()
189 _notset = object()
190
190
191 def getsymbol(x):
191 def getsymbol(x):
192 if x and x[0] == 'symbol':
192 if x and x[0] == 'symbol':
193 return x[1]
193 return x[1]
194 raise error.ParseError(_('not a symbol'))
194 raise error.ParseError(_('not a symbol'))
195
195
196 def getstring(x, err):
196 def getstring(x, err):
197 if x and (x[0] == 'string' or x[0] == 'symbol'):
197 if x and (x[0] == 'string' or x[0] == 'symbol'):
198 return x[1]
198 return x[1]
199 raise error.ParseError(err)
199 raise error.ParseError(err)
200
200
201 def getinteger(x, err, default=_notset):
201 def getinteger(x, err, default=_notset):
202 if not x and default is not _notset:
202 if not x and default is not _notset:
203 return default
203 return default
204 try:
204 try:
205 return int(getstring(x, err))
205 return int(getstring(x, err))
206 except ValueError:
206 except ValueError:
207 raise error.ParseError(err)
207 raise error.ParseError(err)
208
208
209 def getboolean(x, err):
209 def getboolean(x, err):
210 value = util.parsebool(getsymbol(x))
210 value = util.parsebool(getsymbol(x))
211 if value is not None:
211 if value is not None:
212 return value
212 return value
213 raise error.ParseError(err)
213 raise error.ParseError(err)
214
214
215 def getlist(x):
215 def getlist(x):
216 if not x:
216 if not x:
217 return []
217 return []
218 if x[0] == 'list':
218 if x[0] == 'list':
219 return list(x[1:])
219 return list(x[1:])
220 return [x]
220 return [x]
221
221
222 def getrange(x, err):
222 def getrange(x, err):
223 if not x:
223 if not x:
224 raise error.ParseError(err)
224 raise error.ParseError(err)
225 op = x[0]
225 op = x[0]
226 if op == 'range':
226 if op == 'range':
227 return x[1], x[2]
227 return x[1], x[2]
228 elif op == 'rangepre':
228 elif op == 'rangepre':
229 return None, x[1]
229 return None, x[1]
230 elif op == 'rangepost':
230 elif op == 'rangepost':
231 return x[1], None
231 return x[1], None
232 elif op == 'rangeall':
232 elif op == 'rangeall':
233 return None, None
233 return None, None
234 raise error.ParseError(err)
234 raise error.ParseError(err)
235
235
236 def getargs(x, min, max, err):
236 def getargs(x, min, max, err):
237 l = getlist(x)
237 l = getlist(x)
238 if len(l) < min or (max >= 0 and len(l) > max):
238 if len(l) < min or (max >= 0 and len(l) > max):
239 raise error.ParseError(err)
239 raise error.ParseError(err)
240 return l
240 return l
241
241
242 def getargsdict(x, funcname, keys):
242 def getargsdict(x, funcname, keys):
243 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
243 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
244 keyvaluenode='keyvalue', keynode='symbol')
244 keyvaluenode='keyvalue', keynode='symbol')
245
245
246 # cache of {spec: raw parsed tree} built internally
246 # cache of {spec: raw parsed tree} built internally
247 _treecache = {}
247 _treecache = {}
248
248
249 def _cachedtree(spec):
249 def _cachedtree(spec):
250 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
250 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
251 tree = _treecache.get(spec)
251 tree = _treecache.get(spec)
252 if tree is None:
252 if tree is None:
253 _treecache[spec] = tree = parse(spec)
253 _treecache[spec] = tree = parse(spec)
254 return tree
254 return tree
255
255
256 def _build(tmplspec, *repls):
256 def _build(tmplspec, *repls):
257 """Create raw parsed tree from a template revset statement
257 """Create raw parsed tree from a template revset statement
258
258
259 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
259 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
260 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
260 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
261 """
261 """
262 template = _cachedtree(tmplspec)
262 template = _cachedtree(tmplspec)
263 return parser.buildtree(template, ('symbol', '_'), *repls)
263 return parser.buildtree(template, ('symbol', '_'), *repls)
264
264
265 def _match(patspec, tree):
265 def _match(patspec, tree):
266 """Test if a tree matches the given pattern statement; return the matches
266 """Test if a tree matches the given pattern statement; return the matches
267
267
268 >>> _match(b'f(_)', parse(b'f()'))
268 >>> _match(b'f(_)', parse(b'f()'))
269 >>> _match(b'f(_)', parse(b'f(1)'))
269 >>> _match(b'f(_)', parse(b'f(1)'))
270 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
270 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
271 >>> _match(b'f(_)', parse(b'f(1, 2)'))
271 >>> _match(b'f(_)', parse(b'f(1, 2)'))
272 """
272 """
273 pattern = _cachedtree(patspec)
273 pattern = _cachedtree(patspec)
274 return parser.matchtree(pattern, tree, ('symbol', '_'),
274 return parser.matchtree(pattern, tree, ('symbol', '_'),
275 {'keyvalue', 'list'})
275 {'keyvalue', 'list'})
276
276
277 def _matchonly(revs, bases):
277 def _matchonly(revs, bases):
278 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
278 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
279
279
280 def _fixops(x):
280 def _fixops(x):
281 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
281 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
282 handled well by our simple top-down parser"""
282 handled well by our simple top-down parser"""
283 if not isinstance(x, tuple):
283 if not isinstance(x, tuple):
284 return x
284 return x
285
285
286 op = x[0]
286 op = x[0]
287 if op == 'parent':
287 if op == 'parent':
288 # x^:y means (x^) : y, not x ^ (:y)
288 # x^:y means (x^) : y, not x ^ (:y)
289 # x^: means (x^) :, not x ^ (:)
289 # x^: means (x^) :, not x ^ (:)
290 post = ('parentpost', x[1])
290 post = ('parentpost', x[1])
291 if x[2][0] == 'dagrangepre':
291 if x[2][0] == 'dagrangepre':
292 return _fixops(('dagrange', post, x[2][1]))
292 return _fixops(('dagrange', post, x[2][1]))
293 elif x[2][0] == 'dagrangeall':
293 elif x[2][0] == 'dagrangeall':
294 return _fixops(('dagrangepost', post))
294 return _fixops(('dagrangepost', post))
295 elif x[2][0] == 'rangepre':
295 elif x[2][0] == 'rangepre':
296 return _fixops(('range', post, x[2][1]))
296 return _fixops(('range', post, x[2][1]))
297 elif x[2][0] == 'rangeall':
297 elif x[2][0] == 'rangeall':
298 return _fixops(('rangepost', post))
298 return _fixops(('rangepost', post))
299 elif op == 'or':
299 elif op == 'or':
300 # make number of arguments deterministic:
300 # make number of arguments deterministic:
301 # x + y + z -> (or x y z) -> (or (list x y z))
301 # x + y + z -> (or x y z) -> (or (list x y z))
302 return (op, _fixops(('list',) + x[1:]))
302 return (op, _fixops(('list',) + x[1:]))
303 elif op == 'subscript' and x[1][0] == 'relation':
303 elif op == 'subscript' and x[1][0] == 'relation':
304 # x#y[z] ternary
304 # x#y[z] ternary
305 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
305 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
306
306
307 return (op,) + tuple(_fixops(y) for y in x[1:])
307 return (op,) + tuple(_fixops(y) for y in x[1:])
308
308
309 def _analyze(x):
309 def _analyze(x):
310 if x is None:
310 if x is None:
311 return x
311 return x
312
312
313 op = x[0]
313 op = x[0]
314 if op == 'minus':
314 if op == 'minus':
315 return _analyze(_build('_ and not _', *x[1:]))
315 return _analyze(_build('_ and not _', *x[1:]))
316 elif op == 'only':
316 elif op == 'only':
317 return _analyze(_build('only(_, _)', *x[1:]))
317 return _analyze(_build('only(_, _)', *x[1:]))
318 elif op == 'onlypost':
318 elif op == 'onlypost':
319 return _analyze(_build('only(_)', x[1]))
319 return _analyze(_build('only(_)', x[1]))
320 elif op == 'dagrangeall':
320 elif op == 'dagrangeall':
321 raise error.ParseError(_("can't use '::' in this context"))
321 raise error.ParseError(_("can't use '::' in this context"))
322 elif op == 'dagrangepre':
322 elif op == 'dagrangepre':
323 return _analyze(_build('ancestors(_)', x[1]))
323 return _analyze(_build('ancestors(_)', x[1]))
324 elif op == 'dagrangepost':
324 elif op == 'dagrangepost':
325 return _analyze(_build('descendants(_)', x[1]))
325 return _analyze(_build('descendants(_)', x[1]))
326 elif op == 'negate':
326 elif op == 'negate':
327 s = getstring(x[1], _("can't negate that"))
327 s = getstring(x[1], _("can't negate that"))
328 return _analyze(('string', '-' + s))
328 return _analyze(('string', '-' + s))
329 elif op in ('string', 'symbol'):
329 elif op in ('string', 'symbol'):
330 return x
330 return x
331 elif op == 'rangeall':
331 elif op == 'rangeall':
332 return (op, None)
332 return (op, None)
333 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
333 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
334 return (op, _analyze(x[1]))
334 return (op, _analyze(x[1]))
335 elif op == 'group':
335 elif op == 'group':
336 return _analyze(x[1])
336 return _analyze(x[1])
337 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
337 elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
338 'subscript'}:
338 'subscript'}:
339 ta = _analyze(x[1])
339 ta = _analyze(x[1])
340 tb = _analyze(x[2])
340 tb = _analyze(x[2])
341 return (op, ta, tb)
341 return (op, ta, tb)
342 elif op == 'relsubscript':
342 elif op == 'relsubscript':
343 ta = _analyze(x[1])
343 ta = _analyze(x[1])
344 tb = _analyze(x[2])
344 tb = _analyze(x[2])
345 tc = _analyze(x[3])
345 tc = _analyze(x[3])
346 return (op, ta, tb, tc)
346 return (op, ta, tb, tc)
347 elif op == 'list':
347 elif op == 'list':
348 return (op,) + tuple(_analyze(y) for y in x[1:])
348 return (op,) + tuple(_analyze(y) for y in x[1:])
349 elif op == 'keyvalue':
349 elif op == 'keyvalue':
350 return (op, x[1], _analyze(x[2]))
350 return (op, x[1], _analyze(x[2]))
351 elif op == 'func':
351 elif op == 'func':
352 return (op, x[1], _analyze(x[2]))
352 return (op, x[1], _analyze(x[2]))
353 raise ValueError('invalid operator %r' % op)
353 raise ValueError('invalid operator %r' % op)
354
354
355 def analyze(x):
355 def analyze(x):
356 """Transform raw parsed tree to evaluatable tree which can be fed to
356 """Transform raw parsed tree to evaluatable tree which can be fed to
357 optimize() or getset()
357 optimize() or getset()
358
358
359 All pseudo operations should be mapped to real operations or functions
359 All pseudo operations should be mapped to real operations or functions
360 defined in methods or symbols table respectively.
360 defined in methods or symbols table respectively.
361 """
361 """
362 return _analyze(x)
362 return _analyze(x)
363
363
364 def _optimize(x):
364 def _optimize(x):
365 if x is None:
365 if x is None:
366 return 0, x
366 return 0, x
367
367
368 op = x[0]
368 op = x[0]
369 if op in ('string', 'symbol'):
369 if op in ('string', 'symbol'):
370 return 0.5, x # single revisions are small
370 return 0.5, x # single revisions are small
371 elif op == 'and':
371 elif op == 'and':
372 wa, ta = _optimize(x[1])
372 wa, ta = _optimize(x[1])
373 wb, tb = _optimize(x[2])
373 wb, tb = _optimize(x[2])
374 w = min(wa, wb)
374 w = min(wa, wb)
375
375
376 # (draft/secret/_notpublic() & ::x) have a fast path
376 # (draft/secret/_notpublic() & ::x) have a fast path
377 m = _match('_() & ancestors(_)', ('and', ta, tb))
377 m = _match('_() & ancestors(_)', ('and', ta, tb))
378 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
378 if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
379 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
379 return w, _build('_phaseandancestors(_, _)', m[1], m[2])
380
380
381 # (::x and not ::y)/(not ::y and ::x) have a fast path
381 # (::x and not ::y)/(not ::y and ::x) have a fast path
382 m = _matchonly(ta, tb) or _matchonly(tb, ta)
382 m = _matchonly(ta, tb) or _matchonly(tb, ta)
383 if m:
383 if m:
384 return w, _build('only(_, _)', *m[1:])
384 return w, _build('only(_, _)', *m[1:])
385
385
386 m = _match('not _', tb)
386 m = _match('not _', tb)
387 if m:
387 if m:
388 return wa, ('difference', ta, m[1])
388 return wa, ('difference', ta, m[1])
389 if wa > wb:
389 if wa > wb:
390 op = 'andsmally'
390 op = 'andsmally'
391 return w, (op, ta, tb)
391 return w, (op, ta, tb)
392 elif op == 'or':
392 elif op == 'or':
393 # fast path for machine-generated expression, that is likely to have
393 # fast path for machine-generated expression, that is likely to have
394 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
394 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
395 ws, ts, ss = [], [], []
395 ws, ts, ss = [], [], []
396 def flushss():
396 def flushss():
397 if not ss:
397 if not ss:
398 return
398 return
399 if len(ss) == 1:
399 if len(ss) == 1:
400 w, t = ss[0]
400 w, t = ss[0]
401 else:
401 else:
402 s = '\0'.join(t[1] for w, t in ss)
402 s = '\0'.join(t[1] for w, t in ss)
403 y = _build('_list(_)', ('string', s))
403 y = _build('_list(_)', ('string', s))
404 w, t = _optimize(y)
404 w, t = _optimize(y)
405 ws.append(w)
405 ws.append(w)
406 ts.append(t)
406 ts.append(t)
407 del ss[:]
407 del ss[:]
408 for y in getlist(x[1]):
408 for y in getlist(x[1]):
409 w, t = _optimize(y)
409 w, t = _optimize(y)
410 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
410 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
411 ss.append((w, t))
411 ss.append((w, t))
412 continue
412 continue
413 flushss()
413 flushss()
414 ws.append(w)
414 ws.append(w)
415 ts.append(t)
415 ts.append(t)
416 flushss()
416 flushss()
417 if len(ts) == 1:
417 if len(ts) == 1:
418 return ws[0], ts[0] # 'or' operation is fully optimized out
418 return ws[0], ts[0] # 'or' operation is fully optimized out
419 return max(ws), (op, ('list',) + tuple(ts))
419 return max(ws), (op, ('list',) + tuple(ts))
420 elif op == 'not':
420 elif op == 'not':
421 # Optimize not public() to _notpublic() because we have a fast version
421 # Optimize not public() to _notpublic() because we have a fast version
422 if _match('public()', x[1]):
422 if _match('public()', x[1]):
423 o = _optimize(_build('_notpublic()'))
423 o = _optimize(_build('_notpublic()'))
424 return o[0], o[1]
424 return o[0], o[1]
425 else:
425 else:
426 o = _optimize(x[1])
426 o = _optimize(x[1])
427 return o[0], (op, o[1])
427 return o[0], (op, o[1])
428 elif op == 'rangeall':
428 elif op == 'rangeall':
429 return 1, x
429 return 1, x
430 elif op in ('rangepre', 'rangepost', 'parentpost'):
430 elif op in ('rangepre', 'rangepost', 'parentpost'):
431 o = _optimize(x[1])
431 o = _optimize(x[1])
432 return o[0], (op, o[1])
432 return o[0], (op, o[1])
433 elif op in ('dagrange', 'range'):
433 elif op in ('dagrange', 'range'):
434 wa, ta = _optimize(x[1])
434 wa, ta = _optimize(x[1])
435 wb, tb = _optimize(x[2])
435 wb, tb = _optimize(x[2])
436 return wa + wb, (op, ta, tb)
436 return wa + wb, (op, ta, tb)
437 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
437 elif op in ('parent', 'ancestor', 'relation', 'subscript'):
438 w, t = _optimize(x[1])
438 w, t = _optimize(x[1])
439 return w, (op, t, x[2])
439 return w, (op, t, x[2])
440 elif op == 'relsubscript':
440 elif op == 'relsubscript':
441 w, t = _optimize(x[1])
441 w, t = _optimize(x[1])
442 return w, (op, t, x[2], x[3])
442 return w, (op, t, x[2], x[3])
443 elif op == 'list':
443 elif op == 'list':
444 ws, ts = zip(*(_optimize(y) for y in x[1:]))
444 ws, ts = zip(*(_optimize(y) for y in x[1:]))
445 return sum(ws), (op,) + ts
445 return sum(ws), (op,) + ts
446 elif op == 'keyvalue':
446 elif op == 'keyvalue':
447 w, t = _optimize(x[2])
447 w, t = _optimize(x[2])
448 return w, (op, x[1], t)
448 return w, (op, x[1], t)
449 elif op == 'func':
449 elif op == 'func':
450 f = getsymbol(x[1])
450 f = getsymbol(x[1])
451 wa, ta = _optimize(x[2])
451 wa, ta = _optimize(x[2])
452 w = getattr(symbols.get(f), '_weight', 1)
452 w = getattr(symbols.get(f), '_weight', 1)
453 return w + wa, (op, x[1], ta)
453 return w + wa, (op, x[1], ta)
454 raise ValueError('invalid operator %r' % op)
454 raise ValueError('invalid operator %r' % op)
455
455
456 def optimize(tree):
456 def optimize(tree):
457 """Optimize evaluatable tree
457 """Optimize evaluatable tree
458
458
459 All pseudo operations should be transformed beforehand.
459 All pseudo operations should be transformed beforehand.
460 """
460 """
461 _weight, newtree = _optimize(tree)
461 _weight, newtree = _optimize(tree)
462 return newtree
462 return newtree
463
463
464 # the set of valid characters for the initial letter of symbols in
464 # the set of valid characters for the initial letter of symbols in
465 # alias declarations and definitions
465 # alias declarations and definitions
466 _aliassyminitletters = _syminitletters | {'$'}
466 _aliassyminitletters = _syminitletters | {'$'}
467
467
468 def _parsewith(spec, lookup=None, syminitletters=None):
468 def _parsewith(spec, lookup=None, syminitletters=None):
469 """Generate a parse tree of given spec with given tokenizing options
469 """Generate a parse tree of given spec with given tokenizing options
470
470
471 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
471 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
472 ('func', ('symbol', 'foo'), ('symbol', '$1'))
472 ('func', ('symbol', 'foo'), ('symbol', '$1'))
473 >>> _parsewith(b'$1')
473 >>> _parsewith(b'$1')
474 Traceback (most recent call last):
474 Traceback (most recent call last):
475 ...
475 ...
476 ParseError: ("syntax error in revset '$1'", 0)
476 ParseError: ("syntax error in revset '$1'", 0)
477 >>> _parsewith(b'foo bar')
477 >>> _parsewith(b'foo bar')
478 Traceback (most recent call last):
478 Traceback (most recent call last):
479 ...
479 ...
480 ParseError: ('invalid token', 4)
480 ParseError: ('invalid token', 4)
481 """
481 """
482 p = parser.parser(elements)
482 p = parser.parser(elements)
483 tree, pos = p.parse(tokenize(spec, lookup=lookup,
483 tree, pos = p.parse(tokenize(spec, lookup=lookup,
484 syminitletters=syminitletters))
484 syminitletters=syminitletters))
485 if pos != len(spec):
485 if pos != len(spec):
486 raise error.ParseError(_('invalid token'), pos)
486 raise error.ParseError(_('invalid token'), pos)
487 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
487 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
488
488
489 class _aliasrules(parser.basealiasrules):
489 class _aliasrules(parser.basealiasrules):
490 """Parsing and expansion rule set of revset aliases"""
490 """Parsing and expansion rule set of revset aliases"""
491 _section = _('revset alias')
491 _section = _('revset alias')
492
492
493 @staticmethod
493 @staticmethod
494 def _parse(spec):
494 def _parse(spec):
495 """Parse alias declaration/definition ``spec``
495 """Parse alias declaration/definition ``spec``
496
496
497 This allows symbol names to use also ``$`` as an initial letter
497 This allows symbol names to use also ``$`` as an initial letter
498 (for backward compatibility), and callers of this function should
498 (for backward compatibility), and callers of this function should
499 examine whether ``$`` is used also for unexpected symbols or not.
499 examine whether ``$`` is used also for unexpected symbols or not.
500 """
500 """
501 return _parsewith(spec, syminitletters=_aliassyminitletters)
501 return _parsewith(spec, syminitletters=_aliassyminitletters)
502
502
503 @staticmethod
503 @staticmethod
504 def _trygetfunc(tree):
504 def _trygetfunc(tree):
505 if tree[0] == 'func' and tree[1][0] == 'symbol':
505 if tree[0] == 'func' and tree[1][0] == 'symbol':
506 return tree[1][1], getlist(tree[2])
506 return tree[1][1], getlist(tree[2])
507
507
508 def expandaliases(tree, aliases, warn=None):
508 def expandaliases(tree, aliases, warn=None):
509 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
509 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
510 aliases = _aliasrules.buildmap(aliases)
510 aliases = _aliasrules.buildmap(aliases)
511 tree = _aliasrules.expand(aliases, tree)
511 tree = _aliasrules.expand(aliases, tree)
512 # warn about problematic (but not referred) aliases
512 # warn about problematic (but not referred) aliases
513 if warn is not None:
513 if warn is not None:
514 for name, alias in sorted(aliases.iteritems()):
514 for name, alias in sorted(aliases.iteritems()):
515 if alias.error and not alias.warned:
515 if alias.error and not alias.warned:
516 warn(_('warning: %s\n') % (alias.error))
516 warn(_('warning: %s\n') % (alias.error))
517 alias.warned = True
517 alias.warned = True
518 return tree
518 return tree
519
519
520 def foldconcat(tree):
520 def foldconcat(tree):
521 """Fold elements to be concatenated by `##`
521 """Fold elements to be concatenated by `##`
522 """
522 """
523 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
523 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
524 return tree
524 return tree
525 if tree[0] == '_concat':
525 if tree[0] == '_concat':
526 pending = [tree]
526 pending = [tree]
527 l = []
527 l = []
528 while pending:
528 while pending:
529 e = pending.pop()
529 e = pending.pop()
530 if e[0] == '_concat':
530 if e[0] == '_concat':
531 pending.extend(reversed(e[1:]))
531 pending.extend(reversed(e[1:]))
532 elif e[0] in ('string', 'symbol'):
532 elif e[0] in ('string', 'symbol'):
533 l.append(e[1])
533 l.append(e[1])
534 else:
534 else:
535 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
535 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
536 raise error.ParseError(msg)
536 raise error.ParseError(msg)
537 return ('string', ''.join(l))
537 return ('string', ''.join(l))
538 else:
538 else:
539 return tuple(foldconcat(t) for t in tree)
539 return tuple(foldconcat(t) for t in tree)
540
540
541 def parse(spec, lookup=None):
541 def parse(spec, lookup=None):
542 return _parsewith(spec, lookup=lookup)
542 return _parsewith(spec, lookup=lookup)
543
543
544 def _quote(s):
544 def _quote(s):
545 r"""Quote a value in order to make it safe for the revset engine.
545 r"""Quote a value in order to make it safe for the revset engine.
546
546
547 >>> _quote(b'asdf')
547 >>> _quote(b'asdf')
548 "'asdf'"
548 "'asdf'"
549 >>> _quote(b"asdf'\"")
549 >>> _quote(b"asdf'\"")
550 '\'asdf\\\'"\''
550 '\'asdf\\\'"\''
551 >>> _quote(b'asdf\'')
551 >>> _quote(b'asdf\'')
552 "'asdf\\''"
552 "'asdf\\''"
553 >>> _quote(1)
553 >>> _quote(1)
554 "'1'"
554 "'1'"
555 """
555 """
556 return "'%s'" % util.escapestr(pycompat.bytestr(s))
556 return "'%s'" % util.escapestr(pycompat.bytestr(s))
557
557
558 def formatspec(expr, *args):
558 def formatspec(expr, *args):
559 '''
559 '''
560 This is a convenience function for using revsets internally, and
560 This is a convenience function for using revsets internally, and
561 escapes arguments appropriately. Aliases are intentionally ignored
561 escapes arguments appropriately. Aliases are intentionally ignored
562 so that intended expression behavior isn't accidentally subverted.
562 so that intended expression behavior isn't accidentally subverted.
563
563
564 Supported arguments:
564 Supported arguments:
565
565
566 %r = revset expression, parenthesized
566 %r = revset expression, parenthesized
567 %d = int(arg), no quoting
567 %d = int(arg), no quoting
568 %s = string(arg), escaped and single-quoted
568 %s = string(arg), escaped and single-quoted
569 %b = arg.branch(), escaped and single-quoted
569 %b = arg.branch(), escaped and single-quoted
570 %n = hex(arg), single-quoted
570 %n = hex(arg), single-quoted
571 %% = a literal '%'
571 %% = a literal '%'
572
572
573 Prefixing the type with 'l' specifies a parenthesized list of that type.
573 Prefixing the type with 'l' specifies a parenthesized list of that type.
574
574
575 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
575 >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
576 '(10 or 11):: and ((this()) or (that()))'
576 '(10 or 11):: and ((this()) or (that()))'
577 >>> formatspec(b'%d:: and not %d::', 10, 20)
577 >>> formatspec(b'%d:: and not %d::', 10, 20)
578 '10:: and not 20::'
578 '10:: and not 20::'
579 >>> formatspec(b'%ld or %ld', [], [1])
579 >>> formatspec(b'%ld or %ld', [], [1])
580 "_list('') or 1"
580 "_list('') or 1"
581 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
581 >>> formatspec(b'keyword(%s)', b'foo\\xe9')
582 "keyword('foo\\\\xe9')"
582 "keyword('foo\\\\xe9')"
583 >>> b = lambda: b'default'
583 >>> b = lambda: b'default'
584 >>> b.branch = b
584 >>> b.branch = b
585 >>> formatspec(b'branch(%b)', b)
585 >>> formatspec(b'branch(%b)', b)
586 "branch('default')"
586 "branch('default')"
587 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
587 >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
588 "root(_list('a\\x00b\\x00c\\x00d'))"
588 "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
589 >>> formatspec('%ls', ['a', "'"])
590 "_list('a\\\\x00\\\\'')"
589 '''
591 '''
590
592
591 def argtype(c, arg):
593 def argtype(c, arg):
592 if c == 'd':
594 if c == 'd':
593 return '%d' % int(arg)
595 return '%d' % int(arg)
594 elif c == 's':
596 elif c == 's':
595 return _quote(arg)
597 return _quote(arg)
596 elif c == 'r':
598 elif c == 'r':
597 parse(arg) # make sure syntax errors are confined
599 parse(arg) # make sure syntax errors are confined
598 return '(%s)' % arg
600 return '(%s)' % arg
599 elif c == 'n':
601 elif c == 'n':
600 return _quote(node.hex(arg))
602 return _quote(node.hex(arg))
601 elif c == 'b':
603 elif c == 'b':
602 try:
604 try:
603 return _quote(arg.branch())
605 return _quote(arg.branch())
604 except AttributeError:
606 except AttributeError:
605 raise TypeError
607 raise TypeError
606 raise error.ParseError(_('unexpected revspec format character %s') % c)
608 raise error.ParseError(_('unexpected revspec format character %s') % c)
607
609
608 def listexp(s, t):
610 def listexp(s, t):
609 l = len(s)
611 l = len(s)
610 if l == 0:
612 if l == 0:
611 return "_list('')"
613 return "_list('')"
612 elif l == 1:
614 elif l == 1:
613 return argtype(t, s[0])
615 return argtype(t, s[0])
614 elif t == 'd':
616 elif t == 'd':
615 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
617 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s)
616 elif t == 's':
618 elif t == 's':
617 return "_list('%s')" % "\0".join(s)
619 return "_list(%s)" % _quote("\0".join(s))
618 elif t == 'n':
620 elif t == 'n':
619 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
621 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
620 elif t == 'b':
622 elif t == 'b':
621 try:
623 try:
622 return "_list('%s')" % "\0".join(a.branch() for a in s)
624 return "_list('%s')" % "\0".join(a.branch() for a in s)
623 except AttributeError:
625 except AttributeError:
624 raise TypeError
626 raise TypeError
625
627
626 m = l // 2
628 m = l // 2
627 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
629 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
628
630
629 expr = pycompat.bytestr(expr)
631 expr = pycompat.bytestr(expr)
630 argiter = iter(args)
632 argiter = iter(args)
631 ret = []
633 ret = []
632 pos = 0
634 pos = 0
633 while pos < len(expr):
635 while pos < len(expr):
634 q = expr.find('%', pos)
636 q = expr.find('%', pos)
635 if q < 0:
637 if q < 0:
636 ret.append(expr[pos:])
638 ret.append(expr[pos:])
637 break
639 break
638 ret.append(expr[pos:q])
640 ret.append(expr[pos:q])
639 pos = q + 1
641 pos = q + 1
640 try:
642 try:
641 d = expr[pos]
643 d = expr[pos]
642 except IndexError:
644 except IndexError:
643 raise error.ParseError(_('incomplete revspec format character'))
645 raise error.ParseError(_('incomplete revspec format character'))
644 if d == '%':
646 if d == '%':
645 ret.append(d)
647 ret.append(d)
646 pos += 1
648 pos += 1
647 continue
649 continue
648
650
649 try:
651 try:
650 arg = next(argiter)
652 arg = next(argiter)
651 except StopIteration:
653 except StopIteration:
652 raise error.ParseError(_('missing argument for revspec'))
654 raise error.ParseError(_('missing argument for revspec'))
653 if d == 'l':
655 if d == 'l':
654 # a list of some type
656 # a list of some type
655 pos += 1
657 pos += 1
656 try:
658 try:
657 d = expr[pos]
659 d = expr[pos]
658 except IndexError:
660 except IndexError:
659 raise error.ParseError(_('incomplete revspec format character'))
661 raise error.ParseError(_('incomplete revspec format character'))
660 try:
662 try:
661 ret.append(listexp(list(arg), d))
663 ret.append(listexp(list(arg), d))
662 except (TypeError, ValueError):
664 except (TypeError, ValueError):
663 raise error.ParseError(_('invalid argument for revspec'))
665 raise error.ParseError(_('invalid argument for revspec'))
664 else:
666 else:
665 try:
667 try:
666 ret.append(argtype(d, arg))
668 ret.append(argtype(d, arg))
667 except (TypeError, ValueError):
669 except (TypeError, ValueError):
668 raise error.ParseError(_('invalid argument for revspec'))
670 raise error.ParseError(_('invalid argument for revspec'))
669 pos += 1
671 pos += 1
670
672
671 try:
673 try:
672 next(argiter)
674 next(argiter)
673 raise error.ParseError(_('too many revspec arguments specified'))
675 raise error.ParseError(_('too many revspec arguments specified'))
674 except StopIteration:
676 except StopIteration:
675 pass
677 pass
676 return ''.join(ret)
678 return ''.join(ret)
677
679
678 def prettyformat(tree):
680 def prettyformat(tree):
679 return parser.prettyformat(tree, ('string', 'symbol'))
681 return parser.prettyformat(tree, ('string', 'symbol'))
680
682
681 def depth(tree):
683 def depth(tree):
682 if isinstance(tree, tuple):
684 if isinstance(tree, tuple):
683 return max(map(depth, tree)) + 1
685 return max(map(depth, tree)) + 1
684 else:
686 else:
685 return 0
687 return 0
686
688
687 def funcsused(tree):
689 def funcsused(tree):
688 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
690 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
689 return set()
691 return set()
690 else:
692 else:
691 funcs = set()
693 funcs = set()
692 for s in tree[1:]:
694 for s in tree[1:]:
693 funcs |= funcsused(s)
695 funcs |= funcsused(s)
694 if tree[0] == 'func':
696 if tree[0] == 'func':
695 funcs.add(tree[1][1])
697 funcs.add(tree[1][1])
696 return funcs
698 return funcs
697
699
698 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
700 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
699
701
700 def _ishashlikesymbol(symbol):
702 def _ishashlikesymbol(symbol):
701 """returns true if the symbol looks like a hash"""
703 """returns true if the symbol looks like a hash"""
702 return _hashre.match(symbol)
704 return _hashre.match(symbol)
703
705
704 def gethashlikesymbols(tree):
706 def gethashlikesymbols(tree):
705 """returns the list of symbols of the tree that look like hashes
707 """returns the list of symbols of the tree that look like hashes
706
708
707 >>> gethashlikesymbols(('dagrange', ('symbol', '3'), ('symbol', 'abe3ff')))
709 >>> gethashlikesymbols(('dagrange', ('symbol', '3'), ('symbol', 'abe3ff')))
708 ['3', 'abe3ff']
710 ['3', 'abe3ff']
709 >>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '.')))
711 >>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '.')))
710 []
712 []
711 >>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '34')))
713 >>> gethashlikesymbols(('func', ('symbol', 'precursors'), ('symbol', '34')))
712 ['34']
714 ['34']
713 >>> gethashlikesymbols(('symbol', 'abe3ffZ'))
715 >>> gethashlikesymbols(('symbol', 'abe3ffZ'))
714 []
716 []
715 """
717 """
716 if not tree:
718 if not tree:
717 return []
719 return []
718
720
719 if tree[0] == "symbol":
721 if tree[0] == "symbol":
720 if _ishashlikesymbol(tree[1]):
722 if _ishashlikesymbol(tree[1]):
721 return [tree[1]]
723 return [tree[1]]
722 elif len(tree) >= 3:
724 elif len(tree) >= 3:
723 results = []
725 results = []
724 for subtree in tree[1:]:
726 for subtree in tree[1:]:
725 results += gethashlikesymbols(subtree)
727 results += gethashlikesymbols(subtree)
726 return results
728 return results
727 return []
729 return []
General Comments 0
You need to be logged in to leave comments. Login now