Show More
@@ -129,15 +129,39 elements = { | |||||
129 |
|
129 | |||
130 | keywords = set(['and', 'or', 'not']) |
|
130 | keywords = set(['and', 'or', 'not']) | |
131 |
|
131 | |||
132 | def tokenize(program, lookup=None): |
|
132 | # default set of valid characters for the initial letter of symbols | |
|
133 | _syminitletters = set(c for c in [chr(i) for i in xrange(256)] | |||
|
134 | if c.isalnum() or c in '._@' or ord(c) > 127) | |||
|
135 | ||||
|
136 | # default set of valid characters for non-initial letters of symbols | |||
|
137 | _symletters = set(c for c in [chr(i) for i in xrange(256)] | |||
|
138 | if c.isalnum() or c in '-._/@' or ord(c) > 127) | |||
|
139 | ||||
|
140 | def tokenize(program, lookup=None, syminitletters=None, symletters=None): | |||
133 | ''' |
|
141 | ''' | |
134 | Parse a revset statement into a stream of tokens |
|
142 | Parse a revset statement into a stream of tokens | |
135 |
|
143 | |||
|
144 | ``syminitletters`` is the set of valid characters for the initial | |||
|
145 | letter of symbols. | |||
|
146 | ||||
|
147 | By default, character ``c`` is recognized as valid for initial | |||
|
148 | letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``. | |||
|
149 | ||||
|
150 | ``symletters`` is the set of valid characters for non-initial | |||
|
151 | letters of symbols. | |||
|
152 | ||||
|
153 | By default, character ``c`` is recognized as valid for non-initial | |||
|
154 | letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``. | |||
|
155 | ||||
136 | Check that @ is a valid unquoted token character (issue3686): |
|
156 | Check that @ is a valid unquoted token character (issue3686): | |
137 | >>> list(tokenize("@::")) |
|
157 | >>> list(tokenize("@::")) | |
138 | [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)] |
|
158 | [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)] | |
139 |
|
159 | |||
140 | ''' |
|
160 | ''' | |
|
161 | if syminitletters is None: | |||
|
162 | syminitletters = _syminitletters | |||
|
163 | if symletters is None: | |||
|
164 | symletters = _symletters | |||
141 |
|
165 | |||
142 | pos, l = 0, len(program) |
|
166 | pos, l = 0, len(program) | |
143 | while pos < l: |
|
167 | while pos < l: | |
@@ -177,12 +201,12 def tokenize(program, lookup=None): | |||||
177 | else: |
|
201 | else: | |
178 | raise error.ParseError(_("unterminated string"), s) |
|
202 | raise error.ParseError(_("unterminated string"), s) | |
179 | # gather up a symbol/keyword |
|
203 | # gather up a symbol/keyword | |
180 | elif c.isalnum() or c in '._@' or ord(c) > 127: |
|
204 | elif c in syminitletters: | |
181 | s = pos |
|
205 | s = pos | |
182 | pos += 1 |
|
206 | pos += 1 | |
183 | while pos < l: # find end of symbol |
|
207 | while pos < l: # find end of symbol | |
184 | d = program[pos] |
|
208 | d = program[pos] | |
185 | if not (d.isalnum() or d in "-._/@" or ord(d) > 127): |
|
209 | if d not in symletters: | |
186 | break |
|
210 | break | |
187 | if d == '.' and program[pos - 1] == '.': # special case for .. |
|
211 | if d == '.' and program[pos - 1] == '.': # special case for .. | |
188 | pos -= 1 |
|
212 | pos -= 1 |
General Comments 0
You need to be logged in to leave comments.
Login now