##// END OF EJS Templates
rust-parsers: use the same error message as with the higher-level code...
rust-parsers: use the same error message as with the higher-level code This can happen at two places, but it's not really enough time to justify it being refactored. Let's ensure we have the same error message, the newer one being slightly more helpful.

File last commit:

r52755:607e94e0 default
r52945:db5c202e default
Show More
filesetlang.py
351 lines | 10.3 KiB | text/x-python | PythonLexer
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 # filesetlang.py - parser, tokenizer and utility for file set language
#
Raphaël Gomès
contributor: change mentions of mpm to olivia...
r47575 # Copyright 2010 Olivia Mackall <olivia@selenic.com>
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 #
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Matt Harbison
format: add many "missing" comma...
r52755 from __future__ import annotations
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
from .i18n import _
from . import (
error,
parser,
pycompat,
)
Yuya Nishihara
fileset: introduce weight constants for readability...
r38899 # common weight constants for static optimization
# (see registrar.filesetpredicate for details)
WEIGHT_CHECK_FILENAME = 0.5
WEIGHT_READ_CONTENTS = 30
WEIGHT_STATUS = 10
WEIGHT_STATUS_THOROUGH = 50
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 elements = {
# token-type: binding-strength, primary, prefix, infix, suffix
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"(": (20, None, (b"group", 1, b")"), (b"func", 1, b")"), None),
b":": (15, None, None, (b"kindpat", 15), None),
b"-": (5, None, (b"negate", 19), (b"minus", 5), None),
b"not": (10, None, (b"not", 10), None, None),
b"!": (10, None, (b"not", 10), None, None),
b"and": (5, None, None, (b"and", 5), None),
b"&": (5, None, None, (b"and", 5), None),
b"or": (4, None, None, (b"or", 4), None),
b"|": (4, None, None, (b"or", 4), None),
b"+": (4, None, None, (b"or", 4), None),
b",": (2, None, None, (b"list", 2), None),
b")": (0, None, None, None, None),
b"symbol": (0, b"symbol", None, None, None),
b"string": (0, b"string", None, None, None),
b"end": (0, None, None, None, None),
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 }
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 keywords = {b'and', b'or', b'not'}
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
symbols = {}
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 globchars = b".*{}[]?/\\_"
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def tokenize(program):
pos, l = 0, len(program)
program = pycompat.bytestr(program)
while pos < l:
c = program[pos]
Augie Fackler
formatting: blacken the codebase...
r43346 if c.isspace(): # skip inter-token whitespace
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pass
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 elif c in b"(),-:|&+!": # handle simple operators
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 yield (c, None, pos)
Augie Fackler
formatting: blacken the codebase...
r43346 elif (
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 c in b'"\''
or c == b'r'
and program[pos : pos + 2] in (b"r'", b'r"')
Augie Fackler
formatting: blacken the codebase...
r43346 ): # handle quoted strings
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if c == b'r':
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pos += 1
c = program[pos]
decode = lambda x: x
else:
decode = parser.unescapestr
pos += 1
s = pos
Augie Fackler
formatting: blacken the codebase...
r43346 while pos < l: # find closing quote
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 d = program[pos]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if d == b'\\': # skip over escaped characters
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pos += 2
continue
if d == c:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield (b'string', decode(program[s:pos]), s)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 break
pos += 1
else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b"unterminated string"), s)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 elif c.isalnum() or c in globchars or ord(c) > 127:
# gather up a symbol/keyword
s = pos
pos += 1
Augie Fackler
formatting: blacken the codebase...
r43346 while pos < l: # find end of symbol
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 d = program[pos]
if not (d.isalnum() or d in globchars or ord(d) > 127):
break
pos += 1
sym = program[s:pos]
Augie Fackler
formatting: blacken the codebase...
r43346 if sym in keywords: # operator keywords
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 yield (sym, None, s)
else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield (b'symbol', sym, s)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pos -= 1
else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b"syntax error"), pos)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pos += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield (b'end', None, pos)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def parse(expr):
p = parser.parser(elements)
tree, pos = p.parse(tokenize(expr))
if pos != len(expr):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b"invalid token"), pos)
return parser.simplifyinfixops(tree, {b'list', b'or'})
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getsymbol(x):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if x and x[0] == b'symbol':
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 return x[1]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b'not a symbol'))
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getstring(x, err):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if x and (x[0] == b'string' or x[0] == b'symbol'):
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 return x[1]
raise error.ParseError(err)
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getkindpat(x, y, allkinds, err):
kind = getsymbol(x)
pat = getstring(y, err)
if kind not in allkinds:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b"invalid pattern kind: %s") % kind)
return b'%s:%s' % (kind, pat)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getpattern(x, allkinds, err):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if x and x[0] == b'kindpat':
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 return getkindpat(x[1], x[2], allkinds, err)
return getstring(x, err)
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getlist(x):
if not x:
return []
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if x[0] == b'list':
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 return list(x[1:])
return [x]
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getargs(x, min, max, err):
l = getlist(x)
if len(l) < min or len(l) > max:
raise error.ParseError(err)
return l
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 def _analyze(x):
if x is None:
return x
op = x[0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'string', b'symbol'}:
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 return x
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'kindpat':
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 getsymbol(x[1]) # kind must be a symbol
t = _analyze(x[2])
return (op, x[1], t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'group':
Yuya Nishihara
fileset: drop 'group' node from tree to be evaluated...
r38863 return _analyze(x[1])
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'negate':
raise error.ParseError(_(b"can't use negate operator in this context"))
if op == b'not':
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 t = _analyze(x[1])
return (op, t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'and':
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 ta = _analyze(x[1])
tb = _analyze(x[2])
return (op, ta, tb)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'minus':
return _analyze((b'and', x[1], (b'not', x[2])))
if op in {b'list', b'or'}:
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 ts = tuple(_analyze(y) for y in x[1:])
return (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'func':
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 getsymbol(x[1]) # function name must be a symbol
ta = _analyze(x[2])
return (op, x[1], ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ProgrammingError(b'invalid operator %r' % op)
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 def _insertstatushints(x):
"""Insert hint nodes where status should be calculated (first path)
This works in bottom-up way, summing up status names and inserting hint
nodes at 'and' and 'or' as needed. Thus redundant hint nodes may be left.
Returns (status-names, new-tree) at the given subtree, where status-names
is a sum of status names referenced in the given subtree.
"""
if x is None:
return (), x
op = x[0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'string', b'symbol', b'kindpat'}:
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return (), x
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'not':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 h, t = _insertstatushints(x[1])
return h, (op, t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'and':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 ha, ta = _insertstatushints(x[1])
hb, tb = _insertstatushints(x[2])
hr = ha + hb
if ha and hb:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return hr, (b'withstatus', (op, ta, tb), (b'string', b' '.join(hr)))
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return hr, (op, ta, tb)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'or':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 hs, ts = zip(*(_insertstatushints(y) for y in x[1:]))
hr = sum(hs, ())
if sum(bool(h) for h in hs) > 1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return hr, (b'withstatus', (op,) + ts, (b'string', b' '.join(hr)))
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return hr, (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'list':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 hs, ts = zip(*(_insertstatushints(y) for y in x[1:]))
return sum(hs, ()), (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'func':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 f = getsymbol(x[1])
# don't propagate 'ha' crossing a function boundary
ha, ta = _insertstatushints(x[2])
if getattr(symbols.get(f), '_callstatus', False):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return (f,), (b'withstatus', (op, x[1], ta), (b'string', f))
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return (), (op, x[1], ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ProgrammingError(b'invalid operator %r' % op)
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 def _mergestatushints(x, instatus):
"""Remove redundant status hint nodes (second path)
This is the top-down path to eliminate inner hint nodes.
"""
if x is None:
return x
op = x[0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'withstatus':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 if instatus:
# drop redundant hint node
return _mergestatushints(x[1], instatus)
t = _mergestatushints(x[1], instatus=True)
return (op, t, x[2])
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'string', b'symbol', b'kindpat'}:
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return x
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'not':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 t = _mergestatushints(x[1], instatus)
return (op, t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'and':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 ta = _mergestatushints(x[1], instatus)
tb = _mergestatushints(x[2], instatus)
return (op, ta, tb)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'list', b'or'}:
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 ts = tuple(_mergestatushints(y, instatus) for y in x[1:])
return (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'func':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 # don't propagate 'instatus' crossing a function boundary
ta = _mergestatushints(x[2], instatus=False)
return (op, x[1], ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ProgrammingError(b'invalid operator %r' % op)
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 def analyze(x):
"""Transform raw parsed tree to evaluatable tree which can be fed to
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 optimize() or getmatch()
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862
All pseudo operations should be mapped to real operations or functions
defined in methods or symbols table respectively.
"""
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 t = _analyze(x)
_h, t = _insertstatushints(t)
return _mergestatushints(t, instatus=False)
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: optimize 'x and not y' to 'x - y'...
r38868 def _optimizeandops(op, ta, tb):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if tb is not None and tb[0] == b'not':
return (b'minus', ta, tb[1])
Yuya Nishihara
fileset: optimize 'x and not y' to 'x - y'...
r38868 return (op, ta, tb)
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: combine union of basic patterns into single matcher...
r38901 def _optimizeunion(xs):
# collect string patterns so they can be compiled into a single regexp
ws, ts, ss = [], [], []
for x in xs:
w, t = _optimize(x)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if t is not None and t[0] in {b'string', b'symbol', b'kindpat'}:
Yuya Nishihara
fileset: combine union of basic patterns into single matcher...
r38901 ss.append(t)
continue
ws.append(w)
ts.append(t)
if ss:
ws.append(WEIGHT_CHECK_FILENAME)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ts.append((b'patterns',) + tuple(ss))
Yuya Nishihara
fileset: combine union of basic patterns into single matcher...
r38901 return ws, ts
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 def _optimize(x):
if x is None:
return 0, x
op = x[0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'withstatus':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 w, t = _optimize(x[1])
return w, (op, t, x[2])
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'string', b'symbol'}:
Yuya Nishihara
fileset: introduce weight constants for readability...
r38899 return WEIGHT_CHECK_FILENAME, x
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'kindpat':
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 w, t = _optimize(x[2])
return w, (op, x[1], t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'not':
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 w, t = _optimize(x[1])
return w, (op, t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'and':
Yuya Nishihara
fileset: reorder 'and' expression to evaluate basic patterns first...
r38867 wa, ta = _optimize(x[1])
wb, tb = _optimize(x[2])
if wa <= wb:
Yuya Nishihara
fileset: optimize 'x and not y' to 'x - y'...
r38868 return wa, _optimizeandops(op, ta, tb)
Yuya Nishihara
fileset: reorder 'and' expression to evaluate basic patterns first...
r38867 else:
Yuya Nishihara
fileset: optimize 'x and not y' to 'x - y'...
r38868 return wb, _optimizeandops(op, tb, ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'or':
Yuya Nishihara
fileset: combine union of basic patterns into single matcher...
r38901 ws, ts = _optimizeunion(x[1:])
if len(ts) == 1:
Augie Fackler
formatting: blacken the codebase...
r43346 return ws[0], ts[0] # 'or' operation is fully optimized out
ts = tuple(
it[1] for it in sorted(enumerate(ts), key=lambda it: ws[it[0]])
)
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 return max(ws), (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'list':
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 ws, ts = zip(*(_optimize(y) for y in x[1:]))
return sum(ws), (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'func':
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 f = getsymbol(x[1])
w = getattr(symbols.get(f), '_weight', 1)
wa, ta = _optimize(x[2])
return w + wa, (op, x[1], ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ProgrammingError(b'invalid operator %r' % op)
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 def optimize(x):
"""Reorder/rewrite evaluatable tree for optimization
All pseudo operations should be transformed beforehand.
"""
_w, t = _optimize(x)
return t
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def prettyformat(tree):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return parser.prettyformat(tree, (b'string', b'symbol'))