##// END OF EJS Templates
match: optimize matcher when all patterns are of rootfilesin kind...
match: optimize matcher when all patterns are of rootfilesin kind Internally at Google, we use narrowspecs with only rootfilesin-kind patterns. Sometimes there are thousands of such patterns (i.e. thousands of tracked directories). In such cases, it can take quite long to build and evaluate the resulting matcher. This patch optimizes matchers that have only patterns of rootfilesin so it instead of creating a regular expression, it matches the given file's directory against the set of directories. In a repo with ~3600 tracked directories, it takes about 1.35 s to build the matcher and 2.7 s to walk the dirstate before this patch. After, it takes 0.04 s to create the matcher and 0.87 s to walk the dirstate. It may be worthwhile to do similar optimizations for e.g. patterns of type "kind:", but that's not a priority for us right now. Differential Revision: https://phab.mercurial-scm.org/D5058

File last commit:

r38806:e7aa113b default
r40278:19ed212d default
Show More
diffhelper.py
78 lines | 2.2 KiB | text/x-python | PythonLexer
# diffhelper.py - helper routines for patch
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
from .i18n import _
from . import (
error,
pycompat,
)
def addlines(fp, hunk, lena, lenb, a, b):
"""Read lines from fp into the hunk
The hunk is parsed into two arrays, a and b. a gets the old state of
the text, b gets the new state. The control char from the hunk is saved
when inserting into a, but not b (for performance while deleting files.)
"""
while True:
todoa = lena - len(a)
todob = lenb - len(b)
num = max(todoa, todob)
if num == 0:
break
for i in pycompat.xrange(num):
s = fp.readline()
if not s:
raise error.ParseError(_('incomplete hunk'))
if s == "\\ No newline at end of file\n":
fixnewline(hunk, a, b)
continue
if s == '\n' or s == '\r\n':
# Some patches may be missing the control char
# on empty lines. Supply a leading space.
s = ' ' + s
hunk.append(s)
if s.startswith('+'):
b.append(s[1:])
elif s.startswith('-'):
a.append(s)
else:
b.append(s[1:])
a.append(s)
def fixnewline(hunk, a, b):
"""Fix up the last lines of a and b when the patch has no newline at EOF"""
l = hunk[-1]
# tolerate CRLF in last line
if l.endswith('\r\n'):
hline = l[:-2]
else:
hline = l[:-1]
if hline.startswith((' ', '+')):
b[-1] = hline[1:]
if hline.startswith((' ', '-')):
a[-1] = hline
hunk[-1] = hline
def testhunk(a, b, bstart):
"""Compare the lines in a with the lines in b
a is assumed to have a control char at the start of each line, this char
is ignored in the compare.
"""
alen = len(a)
blen = len(b)
if alen > blen - bstart or bstart < 0:
return False
for i in pycompat.xrange(alen):
if a[i][1:] != b[i + bstart]:
return False
return True