##// END OF EJS Templates
branchmap-v3: filter topo heads using node for performance reason...
branchmap-v3: filter topo heads using node for performance reason The branchmap currently contains heads as nodeid. If we build a set of revnum with the topological heads, we need to turn the nodeid in the branchmap to revnum to be able to check if they are topo-heads. That nodeid → revnum lookup is "expensive" and adds up to something noticeable if you do it hundreds of thousand of time. Instead we turn all the topo-heads revnums into nodes and build a set. So we can directly test membership of the nodeids stored in the branchmap. That is much faster. Ideally we would have revnum in the branchmap and could directly test revnum against a revnum set and that would be even faster. However that's an adventure for another time. Without this change, the branchmap format "v3" was significantly slower than the "v2" format. With this changes, some of that gap is recovered With rust + persistent nodemap, this overhead was smaller because the extra lookup did not had to to build the nodemap from scratch. In addition the mozilla-unified repository is able to use the "pure_top" mode of branchmap v3, so it was not really affected by this. Future changeset will work of the remaining of the performance gap. ### benchmark.name = hg.command.unbundle # bin-env-vars.hg.py-re2-module = default # benchmark.variants.issue6528 = disabled # benchmark.variants.resource-usage = default # benchmark.variants.reuse-external-delta-parent = yes # benchmark.variants.revs = any-1-extra-rev # benchmark.variants.source = unbundle # benchmark.variants.validate = default # benchmark.variants.verbosity = quiet ## data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.233711 ~~~~~ branch-v3 before: 0.380994 (+63.02%, +0.15) branch-v3 after: 0.368769 (+57.79%, +0.14) # bin-env-vars.hg.flavor = rust branch-v2: 0.235230 ~~~~~ branch-v3 before: 0.385060 (+63.70%, +0.15) branch-v3 after: 0.372460 (+58.34%, +0.14) ## data-env-vars.name = netbeans-2018-08-01-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.255586 ~~~~~ branch-v3 before: 0.317524 (+24.23%, +0.06) branch-v3 after: 0.318907 (+24.78%, +0.06) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.339010 ~~~~~ branch-v3 before: 0.410007 (+20.94%, +0.07) branch-v3 after: 0.349752 (+3.17%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.346525 ~~~~~ branch-v3 before: 0.410428 (+18.44%, +0.06) branch-v3 after: 0.354300 (+2.24%, +0.01) ## data-env-vars.name = mozilla-central-2024-03-22-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.380202 ~~~~~ branch-v3 before: 0.393871 (+3.60%, +0.01) branch-v3 after: 0.396293 (+4.23%, +0.02) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.412165 ~~~~~ branch-v3 before: 0.438105 (+6.29%, +0.03) branch-v3 after: 0.424769 (+3.06%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.412397 ~~~~~ branch-v3 before: 0.438405 (+6.31%, +0.03) branch-v3 after: 0.421796 (+2.28%, +0.01) ## data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.429501 ~~~~~ branch-v3 before: 0.452692 (+5.40%, +0.02) branch-v3 after: 0.443849 (+3.34%, +0.01) ## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 3.403171 ~~~~~ branch-v3 before: 6.562345 (+92.83%, +3.16) branch-v3 after: 6.234055 (+83.18%, +2.83) # bin-env-vars.hg.flavor = rust branch-v2: 3.454876 ~~~~~ branch-v3 before: 6.160248 (+78.31%, +2.71) branch-v3 after: 6.307813 (+82.58%, +2.85) ## data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 3.465435 ~~~~~ branch-v3 before: 5.381648 (+55.30%, +1.92) branch-v3 after: 5.176076 (+49.36%, +1.71)

File last commit:

r52756:f4733654 default
r52869:41b8892a default
Show More
grep.py
222 lines | 7.1 KiB | text/x-python | PythonLexer
# grep.py - logic for history walk and grep
#
# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import annotations
import difflib
from .i18n import _
from . import (
error,
match as matchmod,
pycompat,
scmutil,
util,
)
def matchlines(body, regexp):
begin = 0
linenum = 0
while begin < len(body):
match = regexp.search(body, begin)
if not match:
break
mstart, mend = match.span()
linenum += body.count(b'\n', begin, mstart) + 1
lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
begin = body.find(b'\n', mend) + 1 or len(body) + 1
lend = begin - 1
yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]
class linestate:
def __init__(self, line, linenum, colstart, colend):
self.line = line
self.linenum = linenum
self.colstart = colstart
self.colend = colend
def __hash__(self):
return hash(self.line)
def __eq__(self, other):
return self.line == other.line
def findpos(self, regexp):
"""Iterate all (start, end) indices of matches"""
yield self.colstart, self.colend
p = self.colend
while p < len(self.line):
m = regexp.search(self.line, p)
if not m:
break
if m.end() == p:
p += 1
else:
yield m.span()
p = m.end()
def difflinestates(a, b):
sm = difflib.SequenceMatcher(None, a, b)
for tag, alo, ahi, blo, bhi in sm.get_opcodes():
if tag == 'insert':
for i in range(blo, bhi):
yield (b'+', b[i])
elif tag == 'delete':
for i in range(alo, ahi):
yield (b'-', a[i])
elif tag == 'replace':
for i in range(alo, ahi):
yield (b'-', a[i])
for i in range(blo, bhi):
yield (b'+', b[i])
class grepsearcher:
"""Search files and revisions for lines matching the given pattern
Options:
- all_files to search unchanged files at that revision.
- diff to search files in the parent revision so diffs can be generated.
- follow to skip files across copies and renames.
"""
def __init__(
self, ui, repo, regexp, all_files=False, diff=False, follow=False
):
self._ui = ui
self._repo = repo
self._regexp = regexp
self._all_files = all_files
self._diff = diff
self._follow = follow
self._getfile = util.lrucachefunc(repo.file)
self._getrenamed = scmutil.getrenamedfn(repo)
self._matches = {}
self._copies = {}
self._skip = set()
self._revfiles = {}
def skipfile(self, fn, rev):
"""Exclude the given file (and the copy at the specified revision)
from future search"""
copy = self._copies.get(rev, {}).get(fn)
self._skip.add(fn)
if copy:
self._skip.add(copy)
def searchfiles(self, revs, makefilematcher):
"""Walk files and revisions to yield (fn, ctx, pstates, states)
matches
states is a list of linestate objects. pstates may be empty unless
diff is True.
"""
for ctx in scmutil.walkchangerevs(
self._repo, revs, makefilematcher, self._prep
):
rev = ctx.rev()
parent = ctx.p1().rev()
for fn in sorted(self._revfiles.get(rev, [])):
states = self._matches[rev][fn]
copy = self._copies.get(rev, {}).get(fn)
if fn in self._skip:
if copy:
self._skip.add(copy)
continue
pstates = self._matches.get(parent, {}).get(copy or fn, [])
if pstates or states:
yield fn, ctx, pstates, states
del self._revfiles[rev]
# We will keep the matches dict for the duration of the window
# clear the matches dict once the window is over
if not self._revfiles:
self._matches.clear()
def _grepbody(self, fn, rev, body):
self._matches[rev].setdefault(fn, [])
m = self._matches[rev][fn]
if body is None:
return
for lnum, cstart, cend, line in matchlines(body, self._regexp):
s = linestate(line, lnum, cstart, cend)
m.append(s)
def _readfile(self, ctx, fn):
rev = ctx.rev()
if rev is None:
fctx = ctx[fn]
try:
return fctx.data()
except FileNotFoundError:
pass
else:
flog = self._getfile(fn)
fnode = ctx.filenode(fn)
try:
return flog.read(fnode)
except error.CensoredNodeError:
self._ui.warn(
_(
b'cannot search in censored file: '
b'%(filename)s:%(revnum)s\n'
)
% {b'filename': fn, b'revnum': pycompat.bytestr(rev)}
)
def _prep(self, ctx, fmatch):
rev = ctx.rev()
pctx = ctx.p1()
self._matches.setdefault(rev, {})
if self._diff:
parent = pctx.rev()
self._matches.setdefault(parent, {})
files = self._revfiles.setdefault(rev, [])
if rev is None:
# in `hg grep pattern`, 2/3 of the time is spent is spent in
# pathauditor checks without this in mozilla-central
contextmanager = self._repo.wvfs.audit.cached
else:
contextmanager = util.nullcontextmanager
with contextmanager():
# TODO: maybe better to warn missing files?
if self._all_files:
fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)
filenames = ctx.matches(fmatch)
else:
filenames = (f for f in ctx.files() if fmatch(f))
for fn in filenames:
# fn might not exist in the revision (could be a file removed by
# the revision). We could check `fn not in ctx` even when rev is
# None, but it's less racy to protect againt that in readfile.
if rev is not None and fn not in ctx:
continue
copy = None
if self._follow:
copy = self._getrenamed(fn, rev)
if copy:
self._copies.setdefault(rev, {})[fn] = copy
if fn in self._skip:
self._skip.add(copy)
if fn in self._skip:
continue
files.append(fn)
if fn not in self._matches[rev]:
self._grepbody(fn, rev, self._readfile(ctx, fn))
if self._diff:
pfn = copy or fn
if pfn not in self._matches[parent] and pfn in pctx:
self._grepbody(pfn, parent, self._readfile(pctx, pfn))