##// END OF EJS Templates
phases: large rewrite on retract boundary...
phases: large rewrite on retract boundary The new code is still pure Python, so we still have room to going significantly faster. However its complexity of the complex part is `O(|[min_new_draft, tip]|)` instead of `O(|[min_draft, tip]|` which should help tremendously one repository with old draft (like mercurial-devel or mozilla-try). This is especially useful as the most common "retract boundary" operation happens when we commit/rewrite new drafts or when we push new draft to a non-publishing server. In this case, the smallest new_revs is very close to the tip and there is very few work to do. A few smaller optimisation could be done for these cases and will be introduced in later changesets. We still have iterate over large sets of roots, but this is already a great improvement for a very small amount of work. We gather information on the affected changeset as we go as we can put it to use in the next changesets. This extra data collection might slowdown the `register_new` case a bit, however for register_new, it should not really matters. The set of new nodes is either small, so the impact is negligible, or the set of new nodes is large, and the amount of work to do to had them will dominate the overhead the collecting information in `changed_revs`. As this new code compute the changes on the fly, it unlock other interesting improvement to be done in later changeset.

File last commit:

r50201:2e726c93 default
r52302:2f39c7ae default
Show More
grep.py
221 lines | 7.1 KiB | text/x-python | PythonLexer
Yuya Nishihara
grep: move match and diff logic to new module...
r46288 # grep.py - logic for history walk and grep
#
Raphaël Gomès
contributor: change mentions of mpm to olivia...
r47575 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
Yuya Nishihara
grep: move match and diff logic to new module...
r46288 #
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import difflib
Yuya Nishihara
grep: move readfile() to grepsearcher class
r46291
from .i18n import _
Yuya Nishihara
grep: move match and diff logic to new module...
r46288
Yuya Nishihara
grep: add stub class that maintains cache and states of grep operation...
r46289 from . import (
Yuya Nishihara
grep: move readfile() to grepsearcher class
r46291 error,
Yuya Nishihara
grep: move prep() to grepsearcher class
r46292 match as matchmod,
Yuya Nishihara
grep: add stub class that maintains cache and states of grep operation...
r46289 pycompat,
scmutil,
util,
)
Yuya Nishihara
grep: move match and diff logic to new module...
r46288
def matchlines(body, regexp):
begin = 0
linenum = 0
while begin < len(body):
match = regexp.search(body, begin)
if not match:
break
mstart, mend = match.span()
linenum += body.count(b'\n', begin, mstart) + 1
lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
begin = body.find(b'\n', mend) + 1 or len(body) + 1
lend = begin - 1
yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class linestate:
Yuya Nishihara
grep: move match and diff logic to new module...
r46288 def __init__(self, line, linenum, colstart, colend):
self.line = line
self.linenum = linenum
self.colstart = colstart
self.colend = colend
def __hash__(self):
return hash(self.line)
def __eq__(self, other):
return self.line == other.line
def findpos(self, regexp):
"""Iterate all (start, end) indices of matches"""
yield self.colstart, self.colend
p = self.colend
while p < len(self.line):
m = regexp.search(self.line, p)
if not m:
break
if m.end() == p:
p += 1
else:
yield m.span()
p = m.end()
def difflinestates(a, b):
sm = difflib.SequenceMatcher(None, a, b)
for tag, alo, ahi, blo, bhi in sm.get_opcodes():
if tag == 'insert':
Manuel Jacob
py3: replace `pycompat.xrange` by `range`
r50179 for i in range(blo, bhi):
Yuya Nishihara
grep: move match and diff logic to new module...
r46288 yield (b'+', b[i])
elif tag == 'delete':
Manuel Jacob
py3: replace `pycompat.xrange` by `range`
r50179 for i in range(alo, ahi):
Yuya Nishihara
grep: move match and diff logic to new module...
r46288 yield (b'-', a[i])
elif tag == 'replace':
Manuel Jacob
py3: replace `pycompat.xrange` by `range`
r50179 for i in range(alo, ahi):
Yuya Nishihara
grep: move match and diff logic to new module...
r46288 yield (b'-', a[i])
Manuel Jacob
py3: replace `pycompat.xrange` by `range`
r50179 for i in range(blo, bhi):
Yuya Nishihara
grep: move match and diff logic to new module...
r46288 yield (b'+', b[i])
Yuya Nishihara
grep: add stub class that maintains cache and states of grep operation...
r46289
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class grepsearcher:
Yuya Nishihara
grep: move prep() to grepsearcher class
r46292 """Search files and revisions for lines matching the given pattern
Yuya Nishihara
grep: add stub class that maintains cache and states of grep operation...
r46289
Yuya Nishihara
grep: move prep() to grepsearcher class
r46292 Options:
- all_files to search unchanged files at that revision.
- diff to search files in the parent revision so diffs can be generated.
- follow to skip files across copies and renames.
"""
def __init__(
self, ui, repo, regexp, all_files=False, diff=False, follow=False
):
Yuya Nishihara
grep: add stub class that maintains cache and states of grep operation...
r46289 self._ui = ui
self._repo = repo
self._regexp = regexp
Yuya Nishihara
grep: move prep() to grepsearcher class
r46292 self._all_files = all_files
self._diff = diff
self._follow = follow
Yuya Nishihara
grep: add stub class that maintains cache and states of grep operation...
r46289
self._getfile = util.lrucachefunc(repo.file)
self._getrenamed = scmutil.getrenamedfn(repo)
self._matches = {}
self._copies = {}
self._skip = set()
self._revfiles = {}
Yuya Nishihara
grep: move getbody() to grepsearcher class
r46290
Yuya Nishihara
grep: extract public function to register file to be skipped...
r46311 def skipfile(self, fn, rev):
"""Exclude the given file (and the copy at the specified revision)
from future search"""
copy = self._copies.get(rev, {}).get(fn)
self._skip.add(fn)
if copy:
self._skip.add(copy)
Yuya Nishihara
grep: extract main search loop as searcher method...
r46313 def searchfiles(self, revs, makefilematcher):
"""Walk files and revisions to yield (fn, ctx, pstates, states)
matches
states is a list of linestate objects. pstates may be empty unless
diff is True.
"""
for ctx in scmutil.walkchangerevs(
self._repo, revs, makefilematcher, self._prep
):
rev = ctx.rev()
parent = ctx.p1().rev()
for fn in sorted(self._revfiles.get(rev, [])):
states = self._matches[rev][fn]
copy = self._copies.get(rev, {}).get(fn)
if fn in self._skip:
if copy:
self._skip.add(copy)
continue
pstates = self._matches.get(parent, {}).get(copy or fn, [])
if pstates or states:
yield fn, ctx, pstates, states
del self._revfiles[rev]
# We will keep the matches dict for the duration of the window
# clear the matches dict once the window is over
if not self._revfiles:
self._matches.clear()
Yuya Nishihara
grep: move getbody() to grepsearcher class
r46290 def _grepbody(self, fn, rev, body):
self._matches[rev].setdefault(fn, [])
m = self._matches[rev][fn]
if body is None:
return
for lnum, cstart, cend, line in matchlines(body, self._regexp):
s = linestate(line, lnum, cstart, cend)
m.append(s)
Yuya Nishihara
grep: move readfile() to grepsearcher class
r46291
def _readfile(self, ctx, fn):
rev = ctx.rev()
if rev is None:
fctx = ctx[fn]
try:
return fctx.data()
Manuel Jacob
py3: catch FileNotFoundError instead of checking errno == ENOENT
r50201 except FileNotFoundError:
pass
Yuya Nishihara
grep: move readfile() to grepsearcher class
r46291 else:
flog = self._getfile(fn)
fnode = ctx.filenode(fn)
try:
return flog.read(fnode)
except error.CensoredNodeError:
self._ui.warn(
_(
b'cannot search in censored file: '
b'%(filename)s:%(revnum)s\n'
)
% {b'filename': fn, b'revnum': pycompat.bytestr(rev)}
)
Yuya Nishihara
grep: move prep() to grepsearcher class
r46292
def _prep(self, ctx, fmatch):
rev = ctx.rev()
pctx = ctx.p1()
self._matches.setdefault(rev, {})
if self._diff:
parent = pctx.rev()
self._matches.setdefault(parent, {})
files = self._revfiles.setdefault(rev, [])
if rev is None:
# in `hg grep pattern`, 2/3 of the time is spent is spent in
# pathauditor checks without this in mozilla-central
contextmanager = self._repo.wvfs.audit.cached
else:
contextmanager = util.nullcontextmanager
with contextmanager():
# TODO: maybe better to warn missing files?
if self._all_files:
fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)
filenames = ctx.matches(fmatch)
else:
filenames = (f for f in ctx.files() if fmatch(f))
for fn in filenames:
# fn might not exist in the revision (could be a file removed by
# the revision). We could check `fn not in ctx` even when rev is
# None, but it's less racy to protect againt that in readfile.
if rev is not None and fn not in ctx:
continue
copy = None
if self._follow:
copy = self._getrenamed(fn, rev)
if copy:
self._copies.setdefault(rev, {})[fn] = copy
if fn in self._skip:
self._skip.add(copy)
if fn in self._skip:
continue
files.append(fn)
if fn not in self._matches[rev]:
self._grepbody(fn, rev, self._readfile(ctx, fn))
if self._diff:
pfn = copy or fn
if pfn not in self._matches[parent] and pfn in pctx:
self._grepbody(pfn, parent, self._readfile(pctx, pfn))