upstream/mercurial-mirror Files · mercurial/grep.py

branchmap-v3: introduce a "stop_rev" argument to `headsrevs`...

branchmap-v3: introduce a "stop_rev" argument to `headsrevs` The `headsrevs` method of the revlog already have a `revs` argument to compute the headrevs of a limited set of heads. However, it disable the use of the native compiled code to compute the heads, which slows down the branchmap v3 code a lot. The branchmap v3 usage is actually quite constrained as we will always only ignores a part at the top of the graph. So we could be significantly faster. We start by making small change to the python side to improve the situation and introduce the new API. More collaboration with the native code are coming later. This massively speedup operation and close most of the remaining gaps between branchmap-v3 and branchmap-v2. especially on repository with many revs like mozilla-try. A small overhead remains mostly because the `headrevs` logic currently has some inefficiently. We will look into them from there. ### benchmark.name = hg.command.unbundle # bin-env-vars.hg.py-re2-module = default # benchmark.variants.issue6528 = disabled # benchmark.variants.resource-usage = default # benchmark.variants.reuse-external-delta-parent = yes # benchmark.variants.revs = any-1-extra-rev # benchmark.variants.source = unbundle # benchmark.variants.validate = default # benchmark.variants.verbosity = quiet ## data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.233711 ~~~~~ branch-v3 before: 0.368769 (+57.79%, +0.14) branch-v3 after: 0.239857 (+2.63%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.235230 ~~~~~ branch-v3 before: 0.372460 (+58.34%, +0.14) branch-v3 after: 0.240972 (+2.44%, +0.01) ## data-env-vars.name = netbeans-2018-08-01-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.255586 ~~~~~ branch-v3 before: 0.318907 (+24.78%, +0.06) branch-v3 after: 0.268560 (+5.08%, +0.01) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.339010 ~~~~~ branch-v3 before: 0.349752 (+3.17%, +0.01) branch-v3 after: 0.349389 (+3.06%, +0.01) # bin-env-vars.hg.flavor = rust branch-v2: 0.346525 ~~~~~ branch-v3 before: 0.354300 (+2.24%, +0.01) branch-v3 after: 0.355661 (+2.64%, +0.01) ## data-env-vars.name = mozilla-central-2024-03-22-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.380202 ~~~~~ branch-v3 before: 0.396293 (+4.23%, +0.02) branch-v3 after: 0.408851 (+7.54%, +0.03) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 0.412165 ~~~~~ branch-v3 before: 0.424769 (+3.06%, +0.01) branch-v3 after: 0.427782 (+3.79%, +0.02) # bin-env-vars.hg.flavor = rust branch-v2: 0.412397 ~~~~~ branch-v3 before: 0.421796 (+2.28%, +0.01) branch-v3 after: 0.422354 (+2.41%, +0.01) ## data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 0.429501 ~~~~~ branch-v3 before: 0.443849 (+3.34%, +0.01) branch-v3 after: 0.443197 (+3.19%, +0.01) ## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog # bin-env-vars.hg.flavor = default branch-v2: 3.403171 ~~~~~ branch-v3 before: 6.234055 (+83.18%, +2.83) branch-v3 after: 3.819477 (+12.23%, +0.42) # bin-env-vars.hg.flavor = rust branch-v2: 3.454876 ~~~~~ branch-v3 before: 6.307813 (+82.58%, +2.85) branch-v3 after: 3.590284 (+3.92%, +0.14) ## data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # bin-env-vars.hg.flavor = rust branch-v2: 3.465435 ~~~~~ branch-v3 before: 5.176076 (+49.36%, +1.71) branch-v3 after: 3.633278 (+4.84%, +0.17)

Matt Harbison - - Load All Authors

File last commit:

r52756:f4733654 default


                r52870:42a116f1

default

Download file

             grep.py
        
                    222 lines
            
             | 7.1 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / grep.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # grep.py - logic for history walk and grep

      #

      # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import annotations

      import difflib

      from .i18n import _

      from . import (

          error,

          match as matchmod,

          pycompat,

          scmutil,

          util,

      )

      def matchlines(body, regexp):

          begin = 0

          linenum = 0

          while begin < len(body):

              match = regexp.search(body, begin)

              if not match:

                  break

              mstart, mend = match.span()

              linenum += body.count(b'\n', begin, mstart) + 1

              lstart = body.rfind(b'\n', begin, mstart) + 1 or begin

              begin = body.find(b'\n', mend) + 1 or len(body) + 1

              lend = begin - 1

              yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]

      class linestate:

          def __init__(self, line, linenum, colstart, colend):

              self.line = line

              self.linenum = linenum

              self.colstart = colstart

              self.colend = colend

          def __hash__(self):

              return hash(self.line)

          def __eq__(self, other):

              return self.line == other.line

          def findpos(self, regexp):

              """Iterate all (start, end) indices of matches"""

              yield self.colstart, self.colend

              p = self.colend

              while p < len(self.line):

                  m = regexp.search(self.line, p)

                  if not m:

                      break

                  if m.end() == p:

                      p += 1

                  else:

                      yield m.span()

                      p = m.end()

      def difflinestates(a, b):

          sm = difflib.SequenceMatcher(None, a, b)

          for tag, alo, ahi, blo, bhi in sm.get_opcodes():

              if tag == 'insert':

                  for i in range(blo, bhi):

                      yield (b'+', b[i])

              elif tag == 'delete':

                  for i in range(alo, ahi):

                      yield (b'-', a[i])

              elif tag == 'replace':

                  for i in range(alo, ahi):

                      yield (b'-', a[i])

                  for i in range(blo, bhi):

                      yield (b'+', b[i])

      class grepsearcher:

          """Search files and revisions for lines matching the given pattern

          Options:

          - all_files to search unchanged files at that revision.

          - diff to search files in the parent revision so diffs can be generated.

          - follow to skip files across copies and renames.

          """

          def __init__(

              self, ui, repo, regexp, all_files=False, diff=False, follow=False

          ):

              self._ui = ui

              self._repo = repo

              self._regexp = regexp

              self._all_files = all_files

              self._diff = diff

              self._follow = follow

              self._getfile = util.lrucachefunc(repo.file)

              self._getrenamed = scmutil.getrenamedfn(repo)

              self._matches = {}

              self._copies = {}

              self._skip = set()

              self._revfiles = {}

          def skipfile(self, fn, rev):

              """Exclude the given file (and the copy at the specified revision)

              from future search"""

              copy = self._copies.get(rev, {}).get(fn)

              self._skip.add(fn)

              if copy:

                  self._skip.add(copy)

          def searchfiles(self, revs, makefilematcher):

              """Walk files and revisions to yield (fn, ctx, pstates, states)

              matches

              states is a list of linestate objects. pstates may be empty unless

              diff is True.

              """

              for ctx in scmutil.walkchangerevs(

                  self._repo, revs, makefilematcher, self._prep

              ):

                  rev = ctx.rev()

                  parent = ctx.p1().rev()

                  for fn in sorted(self._revfiles.get(rev, [])):

                      states = self._matches[rev][fn]

                      copy = self._copies.get(rev, {}).get(fn)

                      if fn in self._skip:

                          if copy:

                              self._skip.add(copy)

                          continue

                      pstates = self._matches.get(parent, {}).get(copy or fn, [])

                      if pstates or states:

                          yield fn, ctx, pstates, states

                  del self._revfiles[rev]

                  # We will keep the matches dict for the duration of the window

                  # clear the matches dict once the window is over

                  if not self._revfiles:

                      self._matches.clear()

          def _grepbody(self, fn, rev, body):

              self._matches[rev].setdefault(fn, [])

              m = self._matches[rev][fn]

              if body is None:

                  return

              for lnum, cstart, cend, line in matchlines(body, self._regexp):

                  s = linestate(line, lnum, cstart, cend)

                  m.append(s)

          def _readfile(self, ctx, fn):

              rev = ctx.rev()

              if rev is None:

                  fctx = ctx[fn]

                  try:

                      return fctx.data()

                  except FileNotFoundError:

                      pass

              else:

                  flog = self._getfile(fn)

                  fnode = ctx.filenode(fn)

                  try:

                      return flog.read(fnode)

                  except error.CensoredNodeError:

                      self._ui.warn(

                          _(

                              b'cannot search in censored file: '

                              b'%(filename)s:%(revnum)s\n'

                          )

                          % {b'filename': fn, b'revnum': pycompat.bytestr(rev)}

                      )

          def _prep(self, ctx, fmatch):

              rev = ctx.rev()

              pctx = ctx.p1()

              self._matches.setdefault(rev, {})

              if self._diff:

                  parent = pctx.rev()

                  self._matches.setdefault(parent, {})

              files = self._revfiles.setdefault(rev, [])

              if rev is None:

                  # in `hg grep pattern`, 2/3 of the time is spent is spent in

                  # pathauditor checks without this in mozilla-central

                  contextmanager = self._repo.wvfs.audit.cached

              else:

                  contextmanager = util.nullcontextmanager

              with contextmanager():

                  # TODO: maybe better to warn missing files?

                  if self._all_files:

                      fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)

                      filenames = ctx.matches(fmatch)

                  else:

                      filenames = (f for f in ctx.files() if fmatch(f))

                  for fn in filenames:

                      # fn might not exist in the revision (could be a file removed by

                      # the revision). We could check `fn not in ctx` even when rev is

                      # None, but it's less racy to protect againt that in readfile.

                      if rev is not None and fn not in ctx:

                          continue

                      copy = None

                      if self._follow:

                          copy = self._getrenamed(fn, rev)

                          if copy:

                              self._copies.setdefault(rev, {})[fn] = copy

                              if fn in self._skip:

                                  self._skip.add(copy)

                      if fn in self._skip:

                          continue

                      files.append(fn)

                      if fn not in self._matches[rev]:

                          self._grepbody(fn, rev, self._readfile(ctx, fn))

                      if self._diff:

                          pfn = copy or fn

                          if pfn not in self._matches[parent] and pfn in pctx:

                              self._grepbody(pfn, parent, self._readfile(pctx, pfn))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# grep.py - logic for history walk and grep
				#
				# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				from __future__ import annotations

				import difflib

				from .i18n import _

				from . import (
				error,
				match as matchmod,
				pycompat,
				scmutil,
				util,
				)


				def matchlines(body, regexp):
				begin = 0
				linenum = 0
				while begin < len(body):
				match = regexp.search(body, begin)
				if not match:
				break
				mstart, mend = match.span()
				linenum += body.count(b'\n', begin, mstart) + 1
				lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
				begin = body.find(b'\n', mend) + 1 or len(body) + 1
				lend = begin - 1
				yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]


				class linestate:
				def __init__(self, line, linenum, colstart, colend):
				self.line = line
				self.linenum = linenum
				self.colstart = colstart
				self.colend = colend

				def __hash__(self):
				return hash(self.line)

				def __eq__(self, other):
				return self.line == other.line

				def findpos(self, regexp):
				"""Iterate all (start, end) indices of matches"""
				yield self.colstart, self.colend
				p = self.colend
				while p < len(self.line):
				m = regexp.search(self.line, p)
				if not m:
				break
				if m.end() == p:
				p += 1
				else:
				yield m.span()
				p = m.end()


				def difflinestates(a, b):
				sm = difflib.SequenceMatcher(None, a, b)
				for tag, alo, ahi, blo, bhi in sm.get_opcodes():
				if tag == 'insert':
				for i in range(blo, bhi):
				yield (b'+', b[i])
				elif tag == 'delete':
				for i in range(alo, ahi):
				yield (b'-', a[i])
				elif tag == 'replace':
				for i in range(alo, ahi):
				yield (b'-', a[i])
				for i in range(blo, bhi):
				yield (b'+', b[i])


				class grepsearcher:
				"""Search files and revisions for lines matching the given pattern

				Options:
				- all_files to search unchanged files at that revision.
				- diff to search files in the parent revision so diffs can be generated.
				- follow to skip files across copies and renames.
				"""

				def __init__(
				self, ui, repo, regexp, all_files=False, diff=False, follow=False
				):
				self._ui = ui
				self._repo = repo
				self._regexp = regexp
				self._all_files = all_files
				self._diff = diff
				self._follow = follow

				self._getfile = util.lrucachefunc(repo.file)
				self._getrenamed = scmutil.getrenamedfn(repo)

				self._matches = {}
				self._copies = {}
				self._skip = set()
				self._revfiles = {}

				def skipfile(self, fn, rev):
				"""Exclude the given file (and the copy at the specified revision)
				from future search"""
				copy = self._copies.get(rev, {}).get(fn)
				self._skip.add(fn)
				if copy:
				self._skip.add(copy)

				def searchfiles(self, revs, makefilematcher):
				"""Walk files and revisions to yield (fn, ctx, pstates, states)
				matches

				states is a list of linestate objects. pstates may be empty unless
				diff is True.
				"""
				for ctx in scmutil.walkchangerevs(
				self._repo, revs, makefilematcher, self._prep
				):
				rev = ctx.rev()
				parent = ctx.p1().rev()
				for fn in sorted(self._revfiles.get(rev, [])):
				states = self._matches[rev][fn]
				copy = self._copies.get(rev, {}).get(fn)
				if fn in self._skip:
				if copy:
				self._skip.add(copy)
				continue
				pstates = self._matches.get(parent, {}).get(copy or fn, [])
				if pstates or states:
				yield fn, ctx, pstates, states
				del self._revfiles[rev]
				# We will keep the matches dict for the duration of the window
				# clear the matches dict once the window is over
				if not self._revfiles:
				self._matches.clear()

				def _grepbody(self, fn, rev, body):
				self._matches[rev].setdefault(fn, [])
				m = self._matches[rev][fn]
				if body is None:
				return

				for lnum, cstart, cend, line in matchlines(body, self._regexp):
				s = linestate(line, lnum, cstart, cend)
				m.append(s)

				def _readfile(self, ctx, fn):
				rev = ctx.rev()
				if rev is None:
				fctx = ctx[fn]
				try:
				return fctx.data()
				except FileNotFoundError:
				pass
				else:
				flog = self._getfile(fn)
				fnode = ctx.filenode(fn)
				try:
				return flog.read(fnode)
				except error.CensoredNodeError:
				self._ui.warn(
				_(
				b'cannot search in censored file: '
				b'%(filename)s:%(revnum)s\n'
				)
				% {b'filename': fn, b'revnum': pycompat.bytestr(rev)}
				)

				def _prep(self, ctx, fmatch):
				rev = ctx.rev()
				pctx = ctx.p1()
				self._matches.setdefault(rev, {})
				if self._diff:
				parent = pctx.rev()
				self._matches.setdefault(parent, {})
				files = self._revfiles.setdefault(rev, [])
				if rev is None:
				# in `hg grep pattern`, 2/3 of the time is spent is spent in
				# pathauditor checks without this in mozilla-central
				contextmanager = self._repo.wvfs.audit.cached
				else:
				contextmanager = util.nullcontextmanager
				with contextmanager():
				# TODO: maybe better to warn missing files?
				if self._all_files:
				fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)
				filenames = ctx.matches(fmatch)
				else:
				filenames = (f for f in ctx.files() if fmatch(f))
				for fn in filenames:
				# fn might not exist in the revision (could be a file removed by
				# the revision). We could check `fn not in ctx` even when rev is
				# None, but it's less racy to protect againt that in readfile.
				if rev is not None and fn not in ctx:
				continue

				copy = None
				if self._follow:
				copy = self._getrenamed(fn, rev)
				if copy:
				self._copies.setdefault(rev, {})[fn] = copy
				if fn in self._skip:
				self._skip.add(copy)
				if fn in self._skip:
				continue
				files.append(fn)

				if fn not in self._matches[rev]:
				self._grepbody(fn, rev, self._readfile(ctx, fn))

				if self._diff:
				pfn = copy or fn
				if pfn not in self._matches[parent] and pfn in pctx:
				self._grepbody(pfn, parent, self._readfile(pctx, pfn))