##// END OF EJS Templates
parser: add helper to reduce nesting of chained infix operations...
Yuya Nishihara -
r25306:c87b0592 default
parent child Browse files
Show More
@@ -1,110 +1,187 b''
1 # parser.py - simple top-down operator precedence parser for mercurial
1 # parser.py - simple top-down operator precedence parser for mercurial
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 # for background
10 # for background
11
11
12 # takes a tokenizer and elements
12 # takes a tokenizer and elements
13 # tokenizer is an iterator that returns type, value pairs
13 # tokenizer is an iterator that returns type, value pairs
14 # elements is a mapping of types to binding strength, prefix and infix actions
14 # elements is a mapping of types to binding strength, prefix and infix actions
15 # an action is a tree node name, a tree label, and an optional match
15 # an action is a tree node name, a tree label, and an optional match
16 # __call__(program) parses program into a labeled tree
16 # __call__(program) parses program into a labeled tree
17
17
18 import error
18 import error
19 from i18n import _
19 from i18n import _
20
20
21 class parser(object):
21 class parser(object):
22 def __init__(self, tokenizer, elements, methods=None):
22 def __init__(self, tokenizer, elements, methods=None):
23 self._tokenizer = tokenizer
23 self._tokenizer = tokenizer
24 self._elements = elements
24 self._elements = elements
25 self._methods = methods
25 self._methods = methods
26 self.current = None
26 self.current = None
27 def _advance(self):
27 def _advance(self):
28 'advance the tokenizer'
28 'advance the tokenizer'
29 t = self.current
29 t = self.current
30 self.current = next(self._iter, None)
30 self.current = next(self._iter, None)
31 return t
31 return t
32 def _match(self, m, pos):
32 def _match(self, m, pos):
33 'make sure the tokenizer matches an end condition'
33 'make sure the tokenizer matches an end condition'
34 if self.current[0] != m:
34 if self.current[0] != m:
35 raise error.ParseError(_("unexpected token: %s") % self.current[0],
35 raise error.ParseError(_("unexpected token: %s") % self.current[0],
36 self.current[2])
36 self.current[2])
37 self._advance()
37 self._advance()
38 def _parse(self, bind=0):
38 def _parse(self, bind=0):
39 token, value, pos = self._advance()
39 token, value, pos = self._advance()
40 # handle prefix rules on current token
40 # handle prefix rules on current token
41 prefix = self._elements[token][1]
41 prefix = self._elements[token][1]
42 if not prefix:
42 if not prefix:
43 raise error.ParseError(_("not a prefix: %s") % token, pos)
43 raise error.ParseError(_("not a prefix: %s") % token, pos)
44 if len(prefix) == 1:
44 if len(prefix) == 1:
45 expr = (prefix[0], value)
45 expr = (prefix[0], value)
46 else:
46 else:
47 if len(prefix) > 2 and prefix[2] == self.current[0]:
47 if len(prefix) > 2 and prefix[2] == self.current[0]:
48 self._match(prefix[2], pos)
48 self._match(prefix[2], pos)
49 expr = (prefix[0], None)
49 expr = (prefix[0], None)
50 else:
50 else:
51 expr = (prefix[0], self._parse(prefix[1]))
51 expr = (prefix[0], self._parse(prefix[1]))
52 if len(prefix) > 2:
52 if len(prefix) > 2:
53 self._match(prefix[2], pos)
53 self._match(prefix[2], pos)
54 # gather tokens until we meet a lower binding strength
54 # gather tokens until we meet a lower binding strength
55 while bind < self._elements[self.current[0]][0]:
55 while bind < self._elements[self.current[0]][0]:
56 token, value, pos = self._advance()
56 token, value, pos = self._advance()
57 e = self._elements[token]
57 e = self._elements[token]
58 # check for suffix - next token isn't a valid prefix
58 # check for suffix - next token isn't a valid prefix
59 if len(e) == 4 and not self._elements[self.current[0]][1]:
59 if len(e) == 4 and not self._elements[self.current[0]][1]:
60 suffix = e[3]
60 suffix = e[3]
61 expr = (suffix[0], expr)
61 expr = (suffix[0], expr)
62 else:
62 else:
63 # handle infix rules
63 # handle infix rules
64 if len(e) < 3 or not e[2]:
64 if len(e) < 3 or not e[2]:
65 raise error.ParseError(_("not an infix: %s") % token, pos)
65 raise error.ParseError(_("not an infix: %s") % token, pos)
66 infix = e[2]
66 infix = e[2]
67 if len(infix) == 3 and infix[2] == self.current[0]:
67 if len(infix) == 3 and infix[2] == self.current[0]:
68 self._match(infix[2], pos)
68 self._match(infix[2], pos)
69 expr = (infix[0], expr, (None))
69 expr = (infix[0], expr, (None))
70 else:
70 else:
71 expr = (infix[0], expr, self._parse(infix[1]))
71 expr = (infix[0], expr, self._parse(infix[1]))
72 if len(infix) == 3:
72 if len(infix) == 3:
73 self._match(infix[2], pos)
73 self._match(infix[2], pos)
74 return expr
74 return expr
75 def parse(self, message, lookup=None):
75 def parse(self, message, lookup=None):
76 'generate a parse tree from a message'
76 'generate a parse tree from a message'
77 if lookup:
77 if lookup:
78 self._iter = self._tokenizer(message, lookup)
78 self._iter = self._tokenizer(message, lookup)
79 else:
79 else:
80 self._iter = self._tokenizer(message)
80 self._iter = self._tokenizer(message)
81 self._advance()
81 self._advance()
82 res = self._parse()
82 res = self._parse()
83 token, value, pos = self.current
83 token, value, pos = self.current
84 return res, pos
84 return res, pos
85 def eval(self, tree):
85 def eval(self, tree):
86 'recursively evaluate a parse tree using node methods'
86 'recursively evaluate a parse tree using node methods'
87 if not isinstance(tree, tuple):
87 if not isinstance(tree, tuple):
88 return tree
88 return tree
89 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
89 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
90 def __call__(self, message):
90 def __call__(self, message):
91 'parse a message into a parse tree and evaluate if methods given'
91 'parse a message into a parse tree and evaluate if methods given'
92 t = self.parse(message)
92 t = self.parse(message)
93 if self._methods:
93 if self._methods:
94 return self.eval(t)
94 return self.eval(t)
95 return t
95 return t
96
96
97 def _prettyformat(tree, leafnodes, level, lines):
97 def _prettyformat(tree, leafnodes, level, lines):
98 if not isinstance(tree, tuple) or tree[0] in leafnodes:
98 if not isinstance(tree, tuple) or tree[0] in leafnodes:
99 lines.append((level, str(tree)))
99 lines.append((level, str(tree)))
100 else:
100 else:
101 lines.append((level, '(%s' % tree[0]))
101 lines.append((level, '(%s' % tree[0]))
102 for s in tree[1:]:
102 for s in tree[1:]:
103 _prettyformat(s, leafnodes, level + 1, lines)
103 _prettyformat(s, leafnodes, level + 1, lines)
104 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
104 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
105
105
106 def prettyformat(tree, leafnodes):
106 def prettyformat(tree, leafnodes):
107 lines = []
107 lines = []
108 _prettyformat(tree, leafnodes, 0, lines)
108 _prettyformat(tree, leafnodes, 0, lines)
109 output = '\n'.join((' ' * l + s) for l, s in lines)
109 output = '\n'.join((' ' * l + s) for l, s in lines)
110 return output
110 return output
111
112 def simplifyinfixops(tree, targetnodes):
113 """Flatten chained infix operations to reduce usage of Python stack
114
115 >>> def f(tree):
116 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
117 >>> f(('or',
118 ... ('or',
119 ... ('symbol', '1'),
120 ... ('symbol', '2')),
121 ... ('symbol', '3')))
122 (or
123 ('symbol', '1')
124 ('symbol', '2')
125 ('symbol', '3'))
126 >>> f(('func',
127 ... ('symbol', 'p1'),
128 ... ('or',
129 ... ('or',
130 ... ('func',
131 ... ('symbol', 'sort'),
132 ... ('list',
133 ... ('or',
134 ... ('or',
135 ... ('symbol', '1'),
136 ... ('symbol', '2')),
137 ... ('symbol', '3')),
138 ... ('negate',
139 ... ('symbol', 'rev')))),
140 ... ('and',
141 ... ('symbol', '4'),
142 ... ('group',
143 ... ('or',
144 ... ('or',
145 ... ('symbol', '5'),
146 ... ('symbol', '6')),
147 ... ('symbol', '7'))))),
148 ... ('symbol', '8'))))
149 (func
150 ('symbol', 'p1')
151 (or
152 (func
153 ('symbol', 'sort')
154 (list
155 (or
156 ('symbol', '1')
157 ('symbol', '2')
158 ('symbol', '3'))
159 (negate
160 ('symbol', 'rev'))))
161 (and
162 ('symbol', '4')
163 (group
164 (or
165 ('symbol', '5')
166 ('symbol', '6')
167 ('symbol', '7'))))
168 ('symbol', '8')))
169 """
170 if not isinstance(tree, tuple):
171 return tree
172 op = tree[0]
173 if op not in targetnodes:
174 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
175
176 # walk down left nodes taking each right node. no recursion to left nodes
177 # because infix operators are left-associative, i.e. left tree is deep.
178 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
179 simplified = []
180 x = tree
181 while x[0] == op:
182 l, r = x[1:]
183 simplified.append(simplifyinfixops(r, targetnodes))
184 x = l
185 simplified.append(simplifyinfixops(x, targetnodes))
186 simplified.append(op)
187 return tuple(reversed(simplified))
@@ -1,35 +1,36 b''
1 # this is hack to make sure no escape characters are inserted into the output
1 # this is hack to make sure no escape characters are inserted into the output
2 import os, sys
2 import os, sys
3 if 'TERM' in os.environ:
3 if 'TERM' in os.environ:
4 del os.environ['TERM']
4 del os.environ['TERM']
5 import doctest
5 import doctest
6
6
7 def testmod(name, optionflags=0, testtarget=None):
7 def testmod(name, optionflags=0, testtarget=None):
8 __import__(name)
8 __import__(name)
9 mod = sys.modules[name]
9 mod = sys.modules[name]
10 if testtarget is not None:
10 if testtarget is not None:
11 mod = getattr(mod, testtarget)
11 mod = getattr(mod, testtarget)
12 doctest.testmod(mod, optionflags=optionflags)
12 doctest.testmod(mod, optionflags=optionflags)
13
13
14 testmod('mercurial.changelog')
14 testmod('mercurial.changelog')
15 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
15 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
16 testmod('mercurial.dispatch')
16 testmod('mercurial.dispatch')
17 testmod('mercurial.encoding')
17 testmod('mercurial.encoding')
18 testmod('mercurial.hg')
18 testmod('mercurial.hg')
19 testmod('mercurial.hgweb.hgwebdir_mod')
19 testmod('mercurial.hgweb.hgwebdir_mod')
20 testmod('mercurial.match')
20 testmod('mercurial.match')
21 testmod('mercurial.minirst')
21 testmod('mercurial.minirst')
22 testmod('mercurial.patch')
22 testmod('mercurial.patch')
23 testmod('mercurial.pathutil')
23 testmod('mercurial.pathutil')
24 testmod('mercurial.parser')
24 testmod('mercurial.revset')
25 testmod('mercurial.revset')
25 testmod('mercurial.store')
26 testmod('mercurial.store')
26 testmod('mercurial.subrepo')
27 testmod('mercurial.subrepo')
27 testmod('mercurial.templatefilters')
28 testmod('mercurial.templatefilters')
28 testmod('mercurial.ui')
29 testmod('mercurial.ui')
29 testmod('mercurial.url')
30 testmod('mercurial.url')
30 testmod('mercurial.util')
31 testmod('mercurial.util')
31 testmod('mercurial.util', testtarget='platform')
32 testmod('mercurial.util', testtarget='platform')
32 testmod('hgext.convert.cvsps')
33 testmod('hgext.convert.cvsps')
33 testmod('hgext.convert.filemap')
34 testmod('hgext.convert.filemap')
34 testmod('hgext.convert.subversion')
35 testmod('hgext.convert.subversion')
35 testmod('hgext.mq')
36 testmod('hgext.mq')
General Comments 0
You need to be logged in to leave comments. Login now