##// END OF EJS Templates
grep: move readfile() to grepsearcher class
grep: move readfile() to grepsearcher class

File last commit:

r43359:c59eb156 default
r46291:888e633f default
Show More
filesetlang.py
352 lines | 10.3 KiB | text/x-python | PythonLexer
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 # filesetlang.py - parser, tokenizer and utility for file set language
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
from .i18n import _
Gregory Szorc
py3: manually import getattr where it is needed...
r43359 from .pycompat import getattr
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 from . import (
error,
parser,
pycompat,
)
Yuya Nishihara
fileset: introduce weight constants for readability...
r38899 # common weight constants for static optimization
# (see registrar.filesetpredicate for details)
WEIGHT_CHECK_FILENAME = 0.5
WEIGHT_READ_CONTENTS = 30
WEIGHT_STATUS = 10
WEIGHT_STATUS_THOROUGH = 50
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 elements = {
# token-type: binding-strength, primary, prefix, infix, suffix
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b"(": (20, None, (b"group", 1, b")"), (b"func", 1, b")"), None),
b":": (15, None, None, (b"kindpat", 15), None),
b"-": (5, None, (b"negate", 19), (b"minus", 5), None),
b"not": (10, None, (b"not", 10), None, None),
b"!": (10, None, (b"not", 10), None, None),
b"and": (5, None, None, (b"and", 5), None),
b"&": (5, None, None, (b"and", 5), None),
b"or": (4, None, None, (b"or", 4), None),
b"|": (4, None, None, (b"or", 4), None),
b"+": (4, None, None, (b"or", 4), None),
b",": (2, None, None, (b"list", 2), None),
b")": (0, None, None, None, None),
b"symbol": (0, b"symbol", None, None, None),
b"string": (0, b"string", None, None, None),
b"end": (0, None, None, None, None),
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 }
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 keywords = {b'and', b'or', b'not'}
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
symbols = {}
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 globchars = b".*{}[]?/\\_"
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def tokenize(program):
pos, l = 0, len(program)
program = pycompat.bytestr(program)
while pos < l:
c = program[pos]
Augie Fackler
formatting: blacken the codebase...
r43346 if c.isspace(): # skip inter-token whitespace
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pass
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 elif c in b"(),-:|&+!": # handle simple operators
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 yield (c, None, pos)
Augie Fackler
formatting: blacken the codebase...
r43346 elif (
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 c in b'"\''
or c == b'r'
and program[pos : pos + 2] in (b"r'", b'r"')
Augie Fackler
formatting: blacken the codebase...
r43346 ): # handle quoted strings
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if c == b'r':
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pos += 1
c = program[pos]
decode = lambda x: x
else:
decode = parser.unescapestr
pos += 1
s = pos
Augie Fackler
formatting: blacken the codebase...
r43346 while pos < l: # find closing quote
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 d = program[pos]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if d == b'\\': # skip over escaped characters
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pos += 2
continue
if d == c:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield (b'string', decode(program[s:pos]), s)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 break
pos += 1
else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b"unterminated string"), s)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 elif c.isalnum() or c in globchars or ord(c) > 127:
# gather up a symbol/keyword
s = pos
pos += 1
Augie Fackler
formatting: blacken the codebase...
r43346 while pos < l: # find end of symbol
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 d = program[pos]
if not (d.isalnum() or d in globchars or ord(d) > 127):
break
pos += 1
sym = program[s:pos]
Augie Fackler
formatting: blacken the codebase...
r43346 if sym in keywords: # operator keywords
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 yield (sym, None, s)
else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield (b'symbol', sym, s)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pos -= 1
else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b"syntax error"), pos)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 pos += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield (b'end', None, pos)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def parse(expr):
p = parser.parser(elements)
tree, pos = p.parse(tokenize(expr))
if pos != len(expr):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b"invalid token"), pos)
return parser.simplifyinfixops(tree, {b'list', b'or'})
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getsymbol(x):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if x and x[0] == b'symbol':
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 return x[1]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b'not a symbol'))
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getstring(x, err):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if x and (x[0] == b'string' or x[0] == b'symbol'):
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 return x[1]
raise error.ParseError(err)
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getkindpat(x, y, allkinds, err):
kind = getsymbol(x)
pat = getstring(y, err)
if kind not in allkinds:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ParseError(_(b"invalid pattern kind: %s") % kind)
return b'%s:%s' % (kind, pat)
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getpattern(x, allkinds, err):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if x and x[0] == b'kindpat':
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 return getkindpat(x[1], x[2], allkinds, err)
return getstring(x, err)
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getlist(x):
if not x:
return []
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if x[0] == b'list':
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 return list(x[1:])
return [x]
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def getargs(x, min, max, err):
l = getlist(x)
if len(l) < min or len(l) > max:
raise error.ParseError(err)
return l
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 def _analyze(x):
if x is None:
return x
op = x[0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'string', b'symbol'}:
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 return x
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'kindpat':
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 getsymbol(x[1]) # kind must be a symbol
t = _analyze(x[2])
return (op, x[1], t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'group':
Yuya Nishihara
fileset: drop 'group' node from tree to be evaluated...
r38863 return _analyze(x[1])
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'negate':
raise error.ParseError(_(b"can't use negate operator in this context"))
if op == b'not':
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 t = _analyze(x[1])
return (op, t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'and':
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 ta = _analyze(x[1])
tb = _analyze(x[2])
return (op, ta, tb)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'minus':
return _analyze((b'and', x[1], (b'not', x[2])))
if op in {b'list', b'or'}:
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 ts = tuple(_analyze(y) for y in x[1:])
return (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'func':
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 getsymbol(x[1]) # function name must be a symbol
ta = _analyze(x[2])
return (op, x[1], ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ProgrammingError(b'invalid operator %r' % op)
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 def _insertstatushints(x):
"""Insert hint nodes where status should be calculated (first path)
This works in bottom-up way, summing up status names and inserting hint
nodes at 'and' and 'or' as needed. Thus redundant hint nodes may be left.
Returns (status-names, new-tree) at the given subtree, where status-names
is a sum of status names referenced in the given subtree.
"""
if x is None:
return (), x
op = x[0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'string', b'symbol', b'kindpat'}:
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return (), x
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'not':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 h, t = _insertstatushints(x[1])
return h, (op, t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'and':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 ha, ta = _insertstatushints(x[1])
hb, tb = _insertstatushints(x[2])
hr = ha + hb
if ha and hb:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return hr, (b'withstatus', (op, ta, tb), (b'string', b' '.join(hr)))
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return hr, (op, ta, tb)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'or':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 hs, ts = zip(*(_insertstatushints(y) for y in x[1:]))
hr = sum(hs, ())
if sum(bool(h) for h in hs) > 1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return hr, (b'withstatus', (op,) + ts, (b'string', b' '.join(hr)))
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return hr, (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'list':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 hs, ts = zip(*(_insertstatushints(y) for y in x[1:]))
return sum(hs, ()), (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'func':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 f = getsymbol(x[1])
# don't propagate 'ha' crossing a function boundary
ha, ta = _insertstatushints(x[2])
if getattr(symbols.get(f), '_callstatus', False):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return (f,), (b'withstatus', (op, x[1], ta), (b'string', f))
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return (), (op, x[1], ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ProgrammingError(b'invalid operator %r' % op)
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 def _mergestatushints(x, instatus):
"""Remove redundant status hint nodes (second path)
This is the top-down path to eliminate inner hint nodes.
"""
if x is None:
return x
op = x[0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'withstatus':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 if instatus:
# drop redundant hint node
return _mergestatushints(x[1], instatus)
t = _mergestatushints(x[1], instatus=True)
return (op, t, x[2])
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'string', b'symbol', b'kindpat'}:
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 return x
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'not':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 t = _mergestatushints(x[1], instatus)
return (op, t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'and':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 ta = _mergestatushints(x[1], instatus)
tb = _mergestatushints(x[2], instatus)
return (op, ta, tb)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'list', b'or'}:
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 ts = tuple(_mergestatushints(y, instatus) for y in x[1:])
return (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'func':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 # don't propagate 'instatus' crossing a function boundary
ta = _mergestatushints(x[2], instatus=False)
return (op, x[1], ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ProgrammingError(b'invalid operator %r' % op)
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862 def analyze(x):
"""Transform raw parsed tree to evaluatable tree which can be fed to
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 optimize() or getmatch()
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862
All pseudo operations should be mapped to real operations or functions
defined in methods or symbols table respectively.
"""
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 t = _analyze(x)
_h, t = _insertstatushints(t)
return _mergestatushints(t, instatus=False)
Yuya Nishihara
fileset: add phase to transform parsed tree...
r38862
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: optimize 'x and not y' to 'x - y'...
r38868 def _optimizeandops(op, ta, tb):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if tb is not None and tb[0] == b'not':
return (b'minus', ta, tb[1])
Yuya Nishihara
fileset: optimize 'x and not y' to 'x - y'...
r38868 return (op, ta, tb)
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: combine union of basic patterns into single matcher...
r38901 def _optimizeunion(xs):
# collect string patterns so they can be compiled into a single regexp
ws, ts, ss = [], [], []
for x in xs:
w, t = _optimize(x)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if t is not None and t[0] in {b'string', b'symbol', b'kindpat'}:
Yuya Nishihara
fileset: combine union of basic patterns into single matcher...
r38901 ss.append(t)
continue
ws.append(w)
ts.append(t)
if ss:
ws.append(WEIGHT_CHECK_FILENAME)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ts.append((b'patterns',) + tuple(ss))
Yuya Nishihara
fileset: combine union of basic patterns into single matcher...
r38901 return ws, ts
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 def _optimize(x):
if x is None:
return 0, x
op = x[0]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'withstatus':
Yuya Nishihara
fileset: insert hints where status should be computed...
r38915 w, t = _optimize(x[1])
return w, (op, t, x[2])
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op in {b'string', b'symbol'}:
Yuya Nishihara
fileset: introduce weight constants for readability...
r38899 return WEIGHT_CHECK_FILENAME, x
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'kindpat':
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 w, t = _optimize(x[2])
return w, (op, x[1], t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'not':
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 w, t = _optimize(x[1])
return w, (op, t)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'and':
Yuya Nishihara
fileset: reorder 'and' expression to evaluate basic patterns first...
r38867 wa, ta = _optimize(x[1])
wb, tb = _optimize(x[2])
if wa <= wb:
Yuya Nishihara
fileset: optimize 'x and not y' to 'x - y'...
r38868 return wa, _optimizeandops(op, ta, tb)
Yuya Nishihara
fileset: reorder 'and' expression to evaluate basic patterns first...
r38867 else:
Yuya Nishihara
fileset: optimize 'x and not y' to 'x - y'...
r38868 return wb, _optimizeandops(op, tb, ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'or':
Yuya Nishihara
fileset: combine union of basic patterns into single matcher...
r38901 ws, ts = _optimizeunion(x[1:])
if len(ts) == 1:
Augie Fackler
formatting: blacken the codebase...
r43346 return ws[0], ts[0] # 'or' operation is fully optimized out
ts = tuple(
it[1] for it in sorted(enumerate(ts), key=lambda it: ws[it[0]])
)
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 return max(ws), (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'list':
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 ws, ts = zip(*(_optimize(y) for y in x[1:]))
return sum(ws), (op,) + ts
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if op == b'func':
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 f = getsymbol(x[1])
w = getattr(symbols.get(f), '_weight', 1)
wa, ta = _optimize(x[2])
return w + wa, (op, x[1], ta)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise error.ProgrammingError(b'invalid operator %r' % op)
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: add stub for weight-based optimization...
r38865 def optimize(x):
"""Reorder/rewrite evaluatable tree for optimization
All pseudo operations should be transformed beforehand.
"""
_w, t = _optimize(x)
return t
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
fileset: extract language processing part to new module (API)...
r38841 def prettyformat(tree):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return parser.prettyformat(tree, (b'string', b'symbol'))