upstream/mercurial-mirror Files · mercurial/grep.py

statprof: remove superfluous sys.version_info check...

statprof: remove superfluous sys.version_info check Always true since we require Python 3 now. Differential Revision: https://phab.mercurial-scm.org/D12345

Gregory Szorc - - Load All Authors

File last commit:

r49730:6000f5b2 default


                r49794:37537a4d

default

Download file

             grep.py
        
                    223 lines
            
             | 7.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / grep.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Yuya Nishihara
    
grep: move match and diff logic to new module...

              r46288
            
      # grep.py - logic for history walk and grep

      #

        Raphaël Gomès
    
contributor: change mentions of mpm to olivia...

              r47575
            
      # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>

        Yuya Nishihara
    
grep: move match and diff logic to new module...

              r46288
            
      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      import difflib

        Yuya Nishihara
    
grep: move readfile() to grepsearcher class

              r46291
            
      import errno

      from .i18n import _

        Yuya Nishihara
    
grep: move match and diff logic to new module...

              r46288
            
        Yuya Nishihara
    
grep: add stub class that maintains cache and states of grep operation...

              r46289
            
      from . import (

        Yuya Nishihara
    
grep: move readfile() to grepsearcher class

              r46291
            
          error,

        Yuya Nishihara
    
grep: move prep() to grepsearcher class

              r46292
            
          match as matchmod,

        Yuya Nishihara
    
grep: add stub class that maintains cache and states of grep operation...

              r46289
            
          pycompat,

          scmutil,

          util,

      )

        Yuya Nishihara
    
grep: move match and diff logic to new module...

              r46288
            
      def matchlines(body, regexp):

          begin = 0

          linenum = 0

          while begin < len(body):

              match = regexp.search(body, begin)

              if not match:

                  break

              mstart, mend = match.span()

              linenum += body.count(b'\n', begin, mstart) + 1

              lstart = body.rfind(b'\n', begin, mstart) + 1 or begin

              begin = body.find(b'\n', mend) + 1 or len(body) + 1

              lend = begin - 1

              yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]

      class linestate(object):

          def __init__(self, line, linenum, colstart, colend):

              self.line = line

              self.linenum = linenum

              self.colstart = colstart

              self.colend = colend

          def __hash__(self):

              return hash(self.line)

          def __eq__(self, other):

              return self.line == other.line

          def findpos(self, regexp):

              """Iterate all (start, end) indices of matches"""

              yield self.colstart, self.colend

              p = self.colend

              while p < len(self.line):

                  m = regexp.search(self.line, p)

                  if not m:

                      break

                  if m.end() == p:

                      p += 1

                  else:

                      yield m.span()

                      p = m.end()

      def difflinestates(a, b):

          sm = difflib.SequenceMatcher(None, a, b)

          for tag, alo, ahi, blo, bhi in sm.get_opcodes():

              if tag == 'insert':

                  for i in pycompat.xrange(blo, bhi):

                      yield (b'+', b[i])

              elif tag == 'delete':

                  for i in pycompat.xrange(alo, ahi):

                      yield (b'-', a[i])

              elif tag == 'replace':

                  for i in pycompat.xrange(alo, ahi):

                      yield (b'-', a[i])

                  for i in pycompat.xrange(blo, bhi):

                      yield (b'+', b[i])

        Yuya Nishihara
    
grep: add stub class that maintains cache and states of grep operation...

              r46289
            
      class grepsearcher(object):

        Yuya Nishihara
    
grep: move prep() to grepsearcher class

              r46292
            
          """Search files and revisions for lines matching the given pattern

        Yuya Nishihara
    
grep: add stub class that maintains cache and states of grep operation...

              r46289
            
        Yuya Nishihara
    
grep: move prep() to grepsearcher class

              r46292
            
          Options:

          - all_files to search unchanged files at that revision.

          - diff to search files in the parent revision so diffs can be generated.

          - follow to skip files across copies and renames.

          """

          def __init__(

              self, ui, repo, regexp, all_files=False, diff=False, follow=False

          ):

        Yuya Nishihara
    
grep: add stub class that maintains cache and states of grep operation...

              r46289
            
              self._ui = ui

              self._repo = repo

              self._regexp = regexp

        Yuya Nishihara
    
grep: move prep() to grepsearcher class

              r46292
            
              self._all_files = all_files

              self._diff = diff

              self._follow = follow

        Yuya Nishihara
    
grep: add stub class that maintains cache and states of grep operation...

              r46289
            
              self._getfile = util.lrucachefunc(repo.file)

              self._getrenamed = scmutil.getrenamedfn(repo)

              self._matches = {}

              self._copies = {}

              self._skip = set()

              self._revfiles = {}

        Yuya Nishihara
    
grep: move getbody() to grepsearcher class

              r46290
            
        Yuya Nishihara
    
grep: extract public function to register file to be skipped...

              r46311
            
          def skipfile(self, fn, rev):

              """Exclude the given file (and the copy at the specified revision)

              from future search"""

              copy = self._copies.get(rev, {}).get(fn)

              self._skip.add(fn)

              if copy:

                  self._skip.add(copy)

        Yuya Nishihara
    
grep: extract main search loop as searcher method...

              r46313
            
          def searchfiles(self, revs, makefilematcher):

              """Walk files and revisions to yield (fn, ctx, pstates, states)

              matches

              states is a list of linestate objects. pstates may be empty unless

              diff is True.

              """

              for ctx in scmutil.walkchangerevs(

                  self._repo, revs, makefilematcher, self._prep

              ):

                  rev = ctx.rev()

                  parent = ctx.p1().rev()

                  for fn in sorted(self._revfiles.get(rev, [])):

                      states = self._matches[rev][fn]

                      copy = self._copies.get(rev, {}).get(fn)

                      if fn in self._skip:

                          if copy:

                              self._skip.add(copy)

                          continue

                      pstates = self._matches.get(parent, {}).get(copy or fn, [])

                      if pstates or states:

                          yield fn, ctx, pstates, states

                  del self._revfiles[rev]

                  # We will keep the matches dict for the duration of the window

                  # clear the matches dict once the window is over

                  if not self._revfiles:

                      self._matches.clear()

        Yuya Nishihara
    
grep: move getbody() to grepsearcher class

              r46290
            
          def _grepbody(self, fn, rev, body):

              self._matches[rev].setdefault(fn, [])

              m = self._matches[rev][fn]

              if body is None:

                  return

              for lnum, cstart, cend, line in matchlines(body, self._regexp):

                  s = linestate(line, lnum, cstart, cend)

                  m.append(s)

        Yuya Nishihara
    
grep: move readfile() to grepsearcher class

              r46291
            
          def _readfile(self, ctx, fn):

              rev = ctx.rev()

              if rev is None:

                  fctx = ctx[fn]

                  try:

                      return fctx.data()

                  except IOError as e:

                      if e.errno != errno.ENOENT:

                          raise

              else:

                  flog = self._getfile(fn)

                  fnode = ctx.filenode(fn)

                  try:

                      return flog.read(fnode)

                  except error.CensoredNodeError:

                      self._ui.warn(

                          _(

                              b'cannot search in censored file: '

                              b'%(filename)s:%(revnum)s\n'

                          )

                          % {b'filename': fn, b'revnum': pycompat.bytestr(rev)}

                      )

        Yuya Nishihara
    
grep: move prep() to grepsearcher class

              r46292
            
          def _prep(self, ctx, fmatch):

              rev = ctx.rev()

              pctx = ctx.p1()

              self._matches.setdefault(rev, {})

              if self._diff:

                  parent = pctx.rev()

                  self._matches.setdefault(parent, {})

              files = self._revfiles.setdefault(rev, [])

              if rev is None:

                  # in `hg grep pattern`, 2/3 of the time is spent is spent in

                  # pathauditor checks without this in mozilla-central

                  contextmanager = self._repo.wvfs.audit.cached

              else:

                  contextmanager = util.nullcontextmanager

              with contextmanager():

                  # TODO: maybe better to warn missing files?

                  if self._all_files:

                      fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)

                      filenames = ctx.matches(fmatch)

                  else:

                      filenames = (f for f in ctx.files() if fmatch(f))

                  for fn in filenames:

                      # fn might not exist in the revision (could be a file removed by

                      # the revision). We could check `fn not in ctx` even when rev is

                      # None, but it's less racy to protect againt that in readfile.

                      if rev is not None and fn not in ctx:

                          continue

                      copy = None

                      if self._follow:

                          copy = self._getrenamed(fn, rev)

                          if copy:

                              self._copies.setdefault(rev, {})[fn] = copy

                              if fn in self._skip:

                                  self._skip.add(copy)

                      if fn in self._skip:

                          continue

                      files.append(fn)

                      if fn not in self._matches[rev]:

                          self._grepbody(fn, rev, self._readfile(ctx, fn))

                      if self._diff:

                          pfn = copy or fn

                          if pfn not in self._matches[parent] and pfn in pctx:

                              self._grepbody(pfn, parent, self._readfile(pctx, pfn))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Yuya Nishihara grep: move match and diff logic to new module...	r46288	# grep.py - logic for history walk and grep
		#
Raphaël Gomès contributor: change mentions of mpm to olivia...	r47575	# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
Yuya Nishihara grep: move match and diff logic to new module...	r46288	#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.


		import difflib
Yuya Nishihara grep: move readfile() to grepsearcher class	r46291	import errno

		from .i18n import _
Yuya Nishihara grep: move match and diff logic to new module...	r46288
Yuya Nishihara grep: add stub class that maintains cache and states of grep operation...	r46289	from . import (
Yuya Nishihara grep: move readfile() to grepsearcher class	r46291	error,
Yuya Nishihara grep: move prep() to grepsearcher class	r46292	match as matchmod,
Yuya Nishihara grep: add stub class that maintains cache and states of grep operation...	r46289	pycompat,
		scmutil,
		util,
		)
Yuya Nishihara grep: move match and diff logic to new module...	r46288

		def matchlines(body, regexp):
		begin = 0
		linenum = 0
		while begin < len(body):
		match = regexp.search(body, begin)
		if not match:
		break
		mstart, mend = match.span()
		linenum += body.count(b'\n', begin, mstart) + 1
		lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
		begin = body.find(b'\n', mend) + 1 or len(body) + 1
		lend = begin - 1
		yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]


		class linestate(object):
		def __init__(self, line, linenum, colstart, colend):
		self.line = line
		self.linenum = linenum
		self.colstart = colstart
		self.colend = colend

		def __hash__(self):
		return hash(self.line)

		def __eq__(self, other):
		return self.line == other.line

		def findpos(self, regexp):
		"""Iterate all (start, end) indices of matches"""
		yield self.colstart, self.colend
		p = self.colend
		while p < len(self.line):
		m = regexp.search(self.line, p)
		if not m:
		break
		if m.end() == p:
		p += 1
		else:
		yield m.span()
		p = m.end()


		def difflinestates(a, b):
		sm = difflib.SequenceMatcher(None, a, b)
		for tag, alo, ahi, blo, bhi in sm.get_opcodes():
		if tag == 'insert':
		for i in pycompat.xrange(blo, bhi):
		yield (b'+', b[i])
		elif tag == 'delete':
		for i in pycompat.xrange(alo, ahi):
		yield (b'-', a[i])
		elif tag == 'replace':
		for i in pycompat.xrange(alo, ahi):
		yield (b'-', a[i])
		for i in pycompat.xrange(blo, bhi):
		yield (b'+', b[i])
Yuya Nishihara grep: add stub class that maintains cache and states of grep operation...	r46289

		class grepsearcher(object):
Yuya Nishihara grep: move prep() to grepsearcher class	r46292	"""Search files and revisions for lines matching the given pattern
Yuya Nishihara grep: add stub class that maintains cache and states of grep operation...	r46289
Yuya Nishihara grep: move prep() to grepsearcher class	r46292	Options:
		- all_files to search unchanged files at that revision.
		- diff to search files in the parent revision so diffs can be generated.
		- follow to skip files across copies and renames.
		"""

		def __init__(
		self, ui, repo, regexp, all_files=False, diff=False, follow=False
		):
Yuya Nishihara grep: add stub class that maintains cache and states of grep operation...	r46289	self._ui = ui
		self._repo = repo
		self._regexp = regexp
Yuya Nishihara grep: move prep() to grepsearcher class	r46292	self._all_files = all_files
		self._diff = diff
		self._follow = follow
Yuya Nishihara grep: add stub class that maintains cache and states of grep operation...	r46289
		self._getfile = util.lrucachefunc(repo.file)
		self._getrenamed = scmutil.getrenamedfn(repo)

		self._matches = {}
		self._copies = {}
		self._skip = set()
		self._revfiles = {}
Yuya Nishihara grep: move getbody() to grepsearcher class	r46290
Yuya Nishihara grep: extract public function to register file to be skipped...	r46311	def skipfile(self, fn, rev):
		"""Exclude the given file (and the copy at the specified revision)
		from future search"""
		copy = self._copies.get(rev, {}).get(fn)
		self._skip.add(fn)
		if copy:
		self._skip.add(copy)

Yuya Nishihara grep: extract main search loop as searcher method...	r46313	def searchfiles(self, revs, makefilematcher):
		"""Walk files and revisions to yield (fn, ctx, pstates, states)
		matches

		states is a list of linestate objects. pstates may be empty unless
		diff is True.
		"""
		for ctx in scmutil.walkchangerevs(
		self._repo, revs, makefilematcher, self._prep
		):
		rev = ctx.rev()
		parent = ctx.p1().rev()
		for fn in sorted(self._revfiles.get(rev, [])):
		states = self._matches[rev][fn]
		copy = self._copies.get(rev, {}).get(fn)
		if fn in self._skip:
		if copy:
		self._skip.add(copy)
		continue
		pstates = self._matches.get(parent, {}).get(copy or fn, [])
		if pstates or states:
		yield fn, ctx, pstates, states
		del self._revfiles[rev]
		# We will keep the matches dict for the duration of the window
		# clear the matches dict once the window is over
		if not self._revfiles:
		self._matches.clear()

Yuya Nishihara grep: move getbody() to grepsearcher class	r46290	def _grepbody(self, fn, rev, body):
		self._matches[rev].setdefault(fn, [])
		m = self._matches[rev][fn]
		if body is None:
		return

		for lnum, cstart, cend, line in matchlines(body, self._regexp):
		s = linestate(line, lnum, cstart, cend)
		m.append(s)
Yuya Nishihara grep: move readfile() to grepsearcher class	r46291
		def _readfile(self, ctx, fn):
		rev = ctx.rev()
		if rev is None:
		fctx = ctx[fn]
		try:
		return fctx.data()
		except IOError as e:
		if e.errno != errno.ENOENT:
		raise
		else:
		flog = self._getfile(fn)
		fnode = ctx.filenode(fn)
		try:
		return flog.read(fnode)
		except error.CensoredNodeError:
		self._ui.warn(
		_(
		b'cannot search in censored file: '
		b'%(filename)s:%(revnum)s\n'
		)
		% {b'filename': fn, b'revnum': pycompat.bytestr(rev)}
		)
Yuya Nishihara grep: move prep() to grepsearcher class	r46292
		def _prep(self, ctx, fmatch):
		rev = ctx.rev()
		pctx = ctx.p1()
		self._matches.setdefault(rev, {})
		if self._diff:
		parent = pctx.rev()
		self._matches.setdefault(parent, {})
		files = self._revfiles.setdefault(rev, [])
		if rev is None:
		# in `hg grep pattern`, 2/3 of the time is spent is spent in
		# pathauditor checks without this in mozilla-central
		contextmanager = self._repo.wvfs.audit.cached
		else:
		contextmanager = util.nullcontextmanager
		with contextmanager():
		# TODO: maybe better to warn missing files?
		if self._all_files:
		fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)
		filenames = ctx.matches(fmatch)
		else:
		filenames = (f for f in ctx.files() if fmatch(f))
		for fn in filenames:
		# fn might not exist in the revision (could be a file removed by
		# the revision). We could check `fn not in ctx` even when rev is
		# None, but it's less racy to protect againt that in readfile.
		if rev is not None and fn not in ctx:
		continue

		copy = None
		if self._follow:
		copy = self._getrenamed(fn, rev)
		if copy:
		self._copies.setdefault(rev, {})[fn] = copy
		if fn in self._skip:
		self._skip.add(copy)
		if fn in self._skip:
		continue
		files.append(fn)

		if fn not in self._matches[rev]:
		self._grepbody(fn, rev, self._readfile(ctx, fn))

		if self._diff:
		pfn = copy or fn
		if pfn not in self._matches[parent] and pfn in pctx:
		self._grepbody(pfn, parent, self._readfile(pctx, pfn))