##// END OF EJS Templates
parser: add helper to reduce nesting of chained infix operations...
Yuya Nishihara -
r25306:c87b0592 default
parent child Browse files
Show More
@@ -1,110 +1,187 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns type, value pairs
14 14 # elements is a mapping of types to binding strength, prefix and infix actions
15 15 # an action is a tree node name, a tree label, and an optional match
16 16 # __call__(program) parses program into a labeled tree
17 17
18 18 import error
19 19 from i18n import _
20 20
21 21 class parser(object):
22 22 def __init__(self, tokenizer, elements, methods=None):
23 23 self._tokenizer = tokenizer
24 24 self._elements = elements
25 25 self._methods = methods
26 26 self.current = None
27 27 def _advance(self):
28 28 'advance the tokenizer'
29 29 t = self.current
30 30 self.current = next(self._iter, None)
31 31 return t
32 32 def _match(self, m, pos):
33 33 'make sure the tokenizer matches an end condition'
34 34 if self.current[0] != m:
35 35 raise error.ParseError(_("unexpected token: %s") % self.current[0],
36 36 self.current[2])
37 37 self._advance()
38 38 def _parse(self, bind=0):
39 39 token, value, pos = self._advance()
40 40 # handle prefix rules on current token
41 41 prefix = self._elements[token][1]
42 42 if not prefix:
43 43 raise error.ParseError(_("not a prefix: %s") % token, pos)
44 44 if len(prefix) == 1:
45 45 expr = (prefix[0], value)
46 46 else:
47 47 if len(prefix) > 2 and prefix[2] == self.current[0]:
48 48 self._match(prefix[2], pos)
49 49 expr = (prefix[0], None)
50 50 else:
51 51 expr = (prefix[0], self._parse(prefix[1]))
52 52 if len(prefix) > 2:
53 53 self._match(prefix[2], pos)
54 54 # gather tokens until we meet a lower binding strength
55 55 while bind < self._elements[self.current[0]][0]:
56 56 token, value, pos = self._advance()
57 57 e = self._elements[token]
58 58 # check for suffix - next token isn't a valid prefix
59 59 if len(e) == 4 and not self._elements[self.current[0]][1]:
60 60 suffix = e[3]
61 61 expr = (suffix[0], expr)
62 62 else:
63 63 # handle infix rules
64 64 if len(e) < 3 or not e[2]:
65 65 raise error.ParseError(_("not an infix: %s") % token, pos)
66 66 infix = e[2]
67 67 if len(infix) == 3 and infix[2] == self.current[0]:
68 68 self._match(infix[2], pos)
69 69 expr = (infix[0], expr, (None))
70 70 else:
71 71 expr = (infix[0], expr, self._parse(infix[1]))
72 72 if len(infix) == 3:
73 73 self._match(infix[2], pos)
74 74 return expr
75 75 def parse(self, message, lookup=None):
76 76 'generate a parse tree from a message'
77 77 if lookup:
78 78 self._iter = self._tokenizer(message, lookup)
79 79 else:
80 80 self._iter = self._tokenizer(message)
81 81 self._advance()
82 82 res = self._parse()
83 83 token, value, pos = self.current
84 84 return res, pos
85 85 def eval(self, tree):
86 86 'recursively evaluate a parse tree using node methods'
87 87 if not isinstance(tree, tuple):
88 88 return tree
89 89 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
90 90 def __call__(self, message):
91 91 'parse a message into a parse tree and evaluate if methods given'
92 92 t = self.parse(message)
93 93 if self._methods:
94 94 return self.eval(t)
95 95 return t
96 96
97 97 def _prettyformat(tree, leafnodes, level, lines):
98 98 if not isinstance(tree, tuple) or tree[0] in leafnodes:
99 99 lines.append((level, str(tree)))
100 100 else:
101 101 lines.append((level, '(%s' % tree[0]))
102 102 for s in tree[1:]:
103 103 _prettyformat(s, leafnodes, level + 1, lines)
104 104 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
105 105
106 106 def prettyformat(tree, leafnodes):
107 107 lines = []
108 108 _prettyformat(tree, leafnodes, 0, lines)
109 109 output = '\n'.join((' ' * l + s) for l, s in lines)
110 110 return output
111
112 def simplifyinfixops(tree, targetnodes):
113 """Flatten chained infix operations to reduce usage of Python stack
114
115 >>> def f(tree):
116 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
117 >>> f(('or',
118 ... ('or',
119 ... ('symbol', '1'),
120 ... ('symbol', '2')),
121 ... ('symbol', '3')))
122 (or
123 ('symbol', '1')
124 ('symbol', '2')
125 ('symbol', '3'))
126 >>> f(('func',
127 ... ('symbol', 'p1'),
128 ... ('or',
129 ... ('or',
130 ... ('func',
131 ... ('symbol', 'sort'),
132 ... ('list',
133 ... ('or',
134 ... ('or',
135 ... ('symbol', '1'),
136 ... ('symbol', '2')),
137 ... ('symbol', '3')),
138 ... ('negate',
139 ... ('symbol', 'rev')))),
140 ... ('and',
141 ... ('symbol', '4'),
142 ... ('group',
143 ... ('or',
144 ... ('or',
145 ... ('symbol', '5'),
146 ... ('symbol', '6')),
147 ... ('symbol', '7'))))),
148 ... ('symbol', '8'))))
149 (func
150 ('symbol', 'p1')
151 (or
152 (func
153 ('symbol', 'sort')
154 (list
155 (or
156 ('symbol', '1')
157 ('symbol', '2')
158 ('symbol', '3'))
159 (negate
160 ('symbol', 'rev'))))
161 (and
162 ('symbol', '4')
163 (group
164 (or
165 ('symbol', '5')
166 ('symbol', '6')
167 ('symbol', '7'))))
168 ('symbol', '8')))
169 """
170 if not isinstance(tree, tuple):
171 return tree
172 op = tree[0]
173 if op not in targetnodes:
174 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
175
176 # walk down left nodes taking each right node. no recursion to left nodes
177 # because infix operators are left-associative, i.e. left tree is deep.
178 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
179 simplified = []
180 x = tree
181 while x[0] == op:
182 l, r = x[1:]
183 simplified.append(simplifyinfixops(r, targetnodes))
184 x = l
185 simplified.append(simplifyinfixops(x, targetnodes))
186 simplified.append(op)
187 return tuple(reversed(simplified))
@@ -1,35 +1,36 b''
1 1 # this is hack to make sure no escape characters are inserted into the output
2 2 import os, sys
3 3 if 'TERM' in os.environ:
4 4 del os.environ['TERM']
5 5 import doctest
6 6
7 7 def testmod(name, optionflags=0, testtarget=None):
8 8 __import__(name)
9 9 mod = sys.modules[name]
10 10 if testtarget is not None:
11 11 mod = getattr(mod, testtarget)
12 12 doctest.testmod(mod, optionflags=optionflags)
13 13
14 14 testmod('mercurial.changelog')
15 15 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
16 16 testmod('mercurial.dispatch')
17 17 testmod('mercurial.encoding')
18 18 testmod('mercurial.hg')
19 19 testmod('mercurial.hgweb.hgwebdir_mod')
20 20 testmod('mercurial.match')
21 21 testmod('mercurial.minirst')
22 22 testmod('mercurial.patch')
23 23 testmod('mercurial.pathutil')
24 testmod('mercurial.parser')
24 25 testmod('mercurial.revset')
25 26 testmod('mercurial.store')
26 27 testmod('mercurial.subrepo')
27 28 testmod('mercurial.templatefilters')
28 29 testmod('mercurial.ui')
29 30 testmod('mercurial.url')
30 31 testmod('mercurial.util')
31 32 testmod('mercurial.util', testtarget='platform')
32 33 testmod('hgext.convert.cvsps')
33 34 testmod('hgext.convert.filemap')
34 35 testmod('hgext.convert.subversion')
35 36 testmod('hgext.mq')
General Comments 0
You need to be logged in to leave comments. Login now