grep.py
222 lines
| 7.1 KiB
| text/x-python
|
PythonLexer
/ mercurial / grep.py
Yuya Nishihara
|
r46288 | # grep.py - logic for history walk and grep | ||
# | ||||
Raphaël Gomès
|
r47575 | # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> | ||
Yuya Nishihara
|
r46288 | # | ||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
Yuya Nishihara
|
r46288 | |||
import difflib | ||||
Yuya Nishihara
|
r46291 | |||
from .i18n import _ | ||||
Yuya Nishihara
|
r46288 | |||
Yuya Nishihara
|
r46289 | from . import ( | ||
Yuya Nishihara
|
r46291 | error, | ||
Yuya Nishihara
|
r46292 | match as matchmod, | ||
Yuya Nishihara
|
r46289 | pycompat, | ||
scmutil, | ||||
util, | ||||
) | ||||
Yuya Nishihara
|
r46288 | |||
def matchlines(body, regexp): | ||||
begin = 0 | ||||
linenum = 0 | ||||
while begin < len(body): | ||||
match = regexp.search(body, begin) | ||||
if not match: | ||||
break | ||||
mstart, mend = match.span() | ||||
linenum += body.count(b'\n', begin, mstart) + 1 | ||||
lstart = body.rfind(b'\n', begin, mstart) + 1 or begin | ||||
begin = body.find(b'\n', mend) + 1 or len(body) + 1 | ||||
lend = begin - 1 | ||||
yield linenum, mstart - lstart, mend - lstart, body[lstart:lend] | ||||
Gregory Szorc
|
r49801 | class linestate: | ||
Yuya Nishihara
|
r46288 | def __init__(self, line, linenum, colstart, colend): | ||
self.line = line | ||||
self.linenum = linenum | ||||
self.colstart = colstart | ||||
self.colend = colend | ||||
def __hash__(self): | ||||
return hash(self.line) | ||||
def __eq__(self, other): | ||||
return self.line == other.line | ||||
def findpos(self, regexp): | ||||
"""Iterate all (start, end) indices of matches""" | ||||
yield self.colstart, self.colend | ||||
p = self.colend | ||||
while p < len(self.line): | ||||
m = regexp.search(self.line, p) | ||||
if not m: | ||||
break | ||||
if m.end() == p: | ||||
p += 1 | ||||
else: | ||||
yield m.span() | ||||
p = m.end() | ||||
def difflinestates(a, b): | ||||
sm = difflib.SequenceMatcher(None, a, b) | ||||
for tag, alo, ahi, blo, bhi in sm.get_opcodes(): | ||||
if tag == 'insert': | ||||
Manuel Jacob
|
r50179 | for i in range(blo, bhi): | ||
Yuya Nishihara
|
r46288 | yield (b'+', b[i]) | ||
elif tag == 'delete': | ||||
Manuel Jacob
|
r50179 | for i in range(alo, ahi): | ||
Yuya Nishihara
|
r46288 | yield (b'-', a[i]) | ||
elif tag == 'replace': | ||||
Manuel Jacob
|
r50179 | for i in range(alo, ahi): | ||
Yuya Nishihara
|
r46288 | yield (b'-', a[i]) | ||
Manuel Jacob
|
r50179 | for i in range(blo, bhi): | ||
Yuya Nishihara
|
r46288 | yield (b'+', b[i]) | ||
Yuya Nishihara
|
r46289 | |||
Gregory Szorc
|
r49801 | class grepsearcher: | ||
Yuya Nishihara
|
r46292 | """Search files and revisions for lines matching the given pattern | ||
Yuya Nishihara
|
r46289 | |||
Yuya Nishihara
|
r46292 | Options: | ||
- all_files to search unchanged files at that revision. | ||||
- diff to search files in the parent revision so diffs can be generated. | ||||
- follow to skip files across copies and renames. | ||||
""" | ||||
def __init__( | ||||
self, ui, repo, regexp, all_files=False, diff=False, follow=False | ||||
): | ||||
Yuya Nishihara
|
r46289 | self._ui = ui | ||
self._repo = repo | ||||
self._regexp = regexp | ||||
Yuya Nishihara
|
r46292 | self._all_files = all_files | ||
self._diff = diff | ||||
self._follow = follow | ||||
Yuya Nishihara
|
r46289 | |||
self._getfile = util.lrucachefunc(repo.file) | ||||
self._getrenamed = scmutil.getrenamedfn(repo) | ||||
self._matches = {} | ||||
self._copies = {} | ||||
self._skip = set() | ||||
self._revfiles = {} | ||||
Yuya Nishihara
|
r46290 | |||
Yuya Nishihara
|
r46311 | def skipfile(self, fn, rev): | ||
"""Exclude the given file (and the copy at the specified revision) | ||||
from future search""" | ||||
copy = self._copies.get(rev, {}).get(fn) | ||||
self._skip.add(fn) | ||||
if copy: | ||||
self._skip.add(copy) | ||||
Yuya Nishihara
|
r46313 | def searchfiles(self, revs, makefilematcher): | ||
"""Walk files and revisions to yield (fn, ctx, pstates, states) | ||||
matches | ||||
states is a list of linestate objects. pstates may be empty unless | ||||
diff is True. | ||||
""" | ||||
for ctx in scmutil.walkchangerevs( | ||||
self._repo, revs, makefilematcher, self._prep | ||||
): | ||||
rev = ctx.rev() | ||||
parent = ctx.p1().rev() | ||||
for fn in sorted(self._revfiles.get(rev, [])): | ||||
states = self._matches[rev][fn] | ||||
copy = self._copies.get(rev, {}).get(fn) | ||||
if fn in self._skip: | ||||
if copy: | ||||
self._skip.add(copy) | ||||
continue | ||||
pstates = self._matches.get(parent, {}).get(copy or fn, []) | ||||
if pstates or states: | ||||
yield fn, ctx, pstates, states | ||||
del self._revfiles[rev] | ||||
# We will keep the matches dict for the duration of the window | ||||
# clear the matches dict once the window is over | ||||
if not self._revfiles: | ||||
self._matches.clear() | ||||
Yuya Nishihara
|
r46290 | def _grepbody(self, fn, rev, body): | ||
self._matches[rev].setdefault(fn, []) | ||||
m = self._matches[rev][fn] | ||||
if body is None: | ||||
return | ||||
for lnum, cstart, cend, line in matchlines(body, self._regexp): | ||||
s = linestate(line, lnum, cstart, cend) | ||||
m.append(s) | ||||
Yuya Nishihara
|
r46291 | |||
def _readfile(self, ctx, fn): | ||||
rev = ctx.rev() | ||||
if rev is None: | ||||
fctx = ctx[fn] | ||||
try: | ||||
return fctx.data() | ||||
Manuel Jacob
|
r50201 | except FileNotFoundError: | ||
pass | ||||
Yuya Nishihara
|
r46291 | else: | ||
flog = self._getfile(fn) | ||||
fnode = ctx.filenode(fn) | ||||
try: | ||||
return flog.read(fnode) | ||||
except error.CensoredNodeError: | ||||
self._ui.warn( | ||||
_( | ||||
b'cannot search in censored file: ' | ||||
b'%(filename)s:%(revnum)s\n' | ||||
) | ||||
% {b'filename': fn, b'revnum': pycompat.bytestr(rev)} | ||||
) | ||||
Yuya Nishihara
|
r46292 | |||
def _prep(self, ctx, fmatch): | ||||
rev = ctx.rev() | ||||
pctx = ctx.p1() | ||||
self._matches.setdefault(rev, {}) | ||||
if self._diff: | ||||
parent = pctx.rev() | ||||
self._matches.setdefault(parent, {}) | ||||
files = self._revfiles.setdefault(rev, []) | ||||
if rev is None: | ||||
# in `hg grep pattern`, 2/3 of the time is spent is spent in | ||||
# pathauditor checks without this in mozilla-central | ||||
contextmanager = self._repo.wvfs.audit.cached | ||||
else: | ||||
contextmanager = util.nullcontextmanager | ||||
with contextmanager(): | ||||
# TODO: maybe better to warn missing files? | ||||
if self._all_files: | ||||
fmatch = matchmod.badmatch(fmatch, lambda f, msg: None) | ||||
filenames = ctx.matches(fmatch) | ||||
else: | ||||
filenames = (f for f in ctx.files() if fmatch(f)) | ||||
for fn in filenames: | ||||
# fn might not exist in the revision (could be a file removed by | ||||
# the revision). We could check `fn not in ctx` even when rev is | ||||
# None, but it's less racy to protect againt that in readfile. | ||||
if rev is not None and fn not in ctx: | ||||
continue | ||||
copy = None | ||||
if self._follow: | ||||
copy = self._getrenamed(fn, rev) | ||||
if copy: | ||||
self._copies.setdefault(rev, {})[fn] = copy | ||||
if fn in self._skip: | ||||
self._skip.add(copy) | ||||
if fn in self._skip: | ||||
continue | ||||
files.append(fn) | ||||
if fn not in self._matches[rev]: | ||||
self._grepbody(fn, rev, self._readfile(ctx, fn)) | ||||
if self._diff: | ||||
pfn = copy or fn | ||||
if pfn not in self._matches[parent] and pfn in pctx: | ||||
self._grepbody(pfn, parent, self._readfile(pctx, pfn)) | ||||