upstream/mercurial-mirror Files · mercurial/metadata.py

rust: start plugging the dirstate tree behind a feature gate...

rust: start plugging the dirstate tree behind a feature gate The previous patch added the `dirstate-tree` feature gate to enable the two dirstate implementations to co-habit while the tree-based one gets better. This patch copies over the code that differs, be it because the algorithm changed or because the borrowing rules are different. Indeed, `DirstateTree` is not observationally equivalent to the std `HashMap` in the APIs we use: it does not have the `Entry` API (yet?) and its iterator returns owned values instead of references. This last point is because the implementation needs to be changed to a more clever and efficient solution. Differential Revision: https://phab.mercurial-scm.org/D9133

marmoute - - Load All Authors

File last commit:

r46163:3d5b2b8e default


                r46185:496537c9

default

Download file

             metadata.py
        
                    519 lines
            
             | 16.6 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / metadata.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      # metadata.py -- code related to various metadata computation and access.

      #

      # Copyright 2019 Google, Inc <martinvonz@google.com>

      # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import, print_function

      import multiprocessing

      from . import (

          error,

        marmoute
    
files: extract code for extra filtering of the `removed` entry into copies...

              r45467
            
          node,

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          pycompat,

          util,

      )

      from .revlogutils import (

          flagutil as sidedataflag,

          sidedata as sidedatamod,

      )

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
      class ChangingFiles(object):

        marmoute
    
changing-files: document the various sets...

              r46078
            
          """A class recording the changes made to a file by a changeset

          Actions performed on files are gathered into 3 sets:

          - added:   files actively added in the changeset.

          - removed: files removed in the revision

          - touched: files affected by the merge

          and copies information is held by 2 mappings

          - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

          - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

          See their inline help for details.

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          """

          def __init__(

        marmoute
    
changing-files: move default constructor value to None...

              r46163
            
              self,

              touched=None,

              added=None,

              removed=None,

              p1_copies=None,

              p2_copies=None,

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          ):

        marmoute
    
changing-files: move default constructor value to None...

              r46163
            
              self._added = set(() if added is None else added)

              self._removed = set(() if removed is None else removed)

              self._touched = set(() if touched is None else touched)

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              self._touched.update(self._added)

              self._touched.update(self._removed)

        marmoute
    
changing-files: move default constructor value to None...

              r46163
            
              self._p1_copies = dict(() if p1_copies is None else p1_copies)

              self._p2_copies = dict(() if p2_copies is None else p2_copies)

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
        marmoute
    
changing-files: implement equality checking...

              r46079
            
          def __eq__(self, other):

              return (

                  self.added == other.added

                  and self.removed == other.removed

                  and self.touched == other.touched

                  and self.copied_from_p1 == other.copied_from_p1

                  and self.copied_from_p2 == other.copied_from_p2

              )

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          @property

          def added(self):

        marmoute
    
changing-files: document the various sets...

              r46078
            
              """files actively added in the changeset

              Any file present in that revision that was absent in all the changeset's

              parents.

              In case of merge, this means a file absent in one of the parents but

              existing in the other will *not* be contained in this set. (They were

              added by an ancestor)

              """

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              return frozenset(self._added)

          def mark_added(self, filename):

              self._added.add(filename)

              self._touched.add(filename)

          def update_added(self, filenames):

              for f in filenames:

                  self.mark_added(f)

          @property

          def removed(self):

        marmoute
    
changing-files: document the various sets...

              r46078
            
              """files actively removed by the changeset

              In case of merge this will only contain the set of files removing "new"

              content. For any file absent in the current changeset:

              a) If the file exists in both parents, it is clearly "actively" removed

              by this changeset.

              b) If a file exists in only one parent and in none of the common

              ancestors, then the file was newly added in one of the merged branches

              and then got "actively" removed.

              c) If a file exists in only one parent and at least one of the common

              ancestors using the same filenode, then the file was unchanged on one

              side and deleted on the other side. The merge "passively" propagated

              that deletion, but didn't "actively" remove the file. In this case the

              file is *not* included in the `removed` set.

              d) If a file exists in only one parent and at least one of the common

              ancestors using a different filenode, then the file was changed on one

              side and removed on the other side. The merge process "actively"

              decided to drop the new change and delete the file. Unlike in the

              previous case, (c), the file included in the `removed` set.

              Summary table for merge:

              case | exists in parents | exists in gca || removed

               (a) |       both        |     *         ||   yes

               (b) |       one         |     none      ||   yes

               (c) |       one         | same filenode ||   no

               (d) |       one         |  new filenode ||   yes

              """

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              return frozenset(self._removed)

          def mark_removed(self, filename):

              self._removed.add(filename)

              self._touched.add(filename)

          def update_removed(self, filenames):

              for f in filenames:

                  self.mark_removed(f)

          @property

          def touched(self):

        marmoute
    
changing-files: document the various sets...

              r46078
            
              """files either actively modified, added or removed"""

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              return frozenset(self._touched)

          def mark_touched(self, filename):

              self._touched.add(filename)

          def update_touched(self, filenames):

              for f in filenames:

                  self.mark_touched(f)

          @property

          def copied_from_p1(self):

              return self._p1_copies.copy()

          def mark_copied_from_p1(self, source, dest):

              self._p1_copies[dest] = source

          def update_copies_from_p1(self, copies):

              for dest, source in copies.items():

                  self.mark_copied_from_p1(source, dest)

          @property

          def copied_from_p2(self):

              return self._p2_copies.copy()

          def mark_copied_from_p2(self, source, dest):

              self._p2_copies[dest] = source

          def update_copies_from_p2(self, copies):

              for dest, source in copies.items():

                  self.mark_copied_from_p2(source, dest)

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      def computechangesetfilesadded(ctx):

          """return the list of files added in a changeset

          """

          added = []

          for f in ctx.files():

              if not any(f in p for p in ctx.parents()):

                  added.append(f)

          return added

        marmoute
    
files: extract code for extra filtering of the `removed` entry into copies...

              r45467
            
      def get_removal_filter(ctx, x=None):

          """return a function to detect files "wrongly" detected as `removed`

          When a file is removed relative to p1 in a merge, this

          function determines whether the absence is due to a

          deletion from a parent, or whether the merge commit

          itself deletes the file. We decide this by doing a

          simplified three way merge of the manifest entry for

          the file. There are two ways we decide the merge

          itself didn't delete a file:

          - neither parent (nor the merge) contain the file

          - exactly one parent contains the file, and that

            parent has the same filelog entry as the merge

            ancestor (or all of them if there two). In other

            words, that parent left the file unchanged while the

            other one deleted it.

          One way to think about this is that deleting a file is

          similar to emptying it, so the list of changed files

          should be similar either way. The computation

          described above is not done directly in _filecommit

          when creating the list of changed files, however

          it does something very similar by comparing filelog

          nodes.

          """

          if x is not None:

              p1, p2, m1, m2 = x

          else:

              p1 = ctx.p1()

              p2 = ctx.p2()

              m1 = p1.manifest()

              m2 = p2.manifest()

          @util.cachefunc

          def mas():

              p1n = p1.node()

              p2n = p2.node()

              cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

              if not cahs:

                  cahs = [node.nullrev]

              return [ctx.repo()[r].manifest() for r in cahs]

          def deletionfromparent(f):

              if f in m1:

                  return f not in m2 and all(

                      f in ma and ma.find(f) == m1.find(f) for ma in mas()

                  )

              elif f in m2:

                  return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

              else:

                  return True

          return deletionfromparent

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      def computechangesetfilesremoved(ctx):

          """return the list of files removed in a changeset

          """

          removed = []

          for f in ctx.files():

              if f not in ctx:

                  removed.append(f)

        marmoute
    
metadata: filter the `removed` set to only contains relevant data...

              r45468
            
          if removed:

              rf = get_removal_filter(ctx)

              removed = [r for r in removed if not rf(r)]

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          return removed

      def computechangesetcopies(ctx):

          """return the copies data for a changeset

          The copies data are returned as a pair of dictionnary (p1copies, p2copies).

          Each dictionnary are in the form: `{newname: oldname}`

          """

          p1copies = {}

          p2copies = {}

          p1 = ctx.p1()

          p2 = ctx.p2()

          narrowmatch = ctx._repo.narrowmatch()

          for dst in ctx.files():

              if not narrowmatch(dst) or dst not in ctx:

                  continue

              copied = ctx[dst].renamed()

              if not copied:

                  continue

              src, srcnode = copied

              if src in p1 and p1[src].filenode() == srcnode:

                  p1copies[dst] = src

              elif src in p2 and p2[src].filenode() == srcnode:

                  p2copies[dst] = src

          return p1copies, p2copies

      def encodecopies(files, copies):

          items = []

          for i, dst in enumerate(files):

              if dst in copies:

                  items.append(b'%d\0%s' % (i, copies[dst]))

          if len(items) != len(copies):

              raise error.ProgrammingError(

                  b'some copy targets missing from file list'

              )

          return b"\n".join(items)

      def decodecopies(files, data):

          try:

              copies = {}

              if not data:

                  return copies

              for l in data.split(b'\n'):

                  strindex, src = l.split(b'\0')

                  i = int(strindex)

                  dst = files[i]

                  copies[dst] = src

              return copies

          except (ValueError, IndexError):

              # Perhaps someone had chosen the same key name (e.g. "p1copies") and

              # used different syntax for the value.

              return None

      def encodefileindices(files, subset):

          subset = set(subset)

          indices = []

          for i, f in enumerate(files):

              if f in subset:

                  indices.append(b'%d' % i)

          return b'\n'.join(indices)

      def decodefileindices(files, data):

          try:

              subset = []

              if not data:

                  return subset

              for strindex in data.split(b'\n'):

                  i = int(strindex)

                  if i < 0 or i >= len(files):

                      return None

                  subset.append(files[i])

              return subset

          except (ValueError, IndexError):

              # Perhaps someone had chosen the same key name (e.g. "added") and

              # used different syntax for the value.

              return None

        marmoute
    
sidedata: rename `encode_copies_sidedata` to `encode_files_sidedata`...

              r46143
            
      def encode_files_sidedata(files):

        marmoute
    
commitctx: extract sidedata encoding inside its own function...

              r45885
            
          sortedfiles = sorted(files.touched)

          sidedata = {}

          p1copies = files.copied_from_p1

          if p1copies:

              p1copies = encodecopies(sortedfiles, p1copies)

              sidedata[sidedatamod.SD_P1COPIES] = p1copies

          p2copies = files.copied_from_p2

          if p2copies:

              p2copies = encodecopies(sortedfiles, p2copies)

              sidedata[sidedatamod.SD_P2COPIES] = p2copies

          filesadded = files.added

          if filesadded:

              filesadded = encodefileindices(sortedfiles, filesadded)

              sidedata[sidedatamod.SD_FILESADDED] = filesadded

          filesremoved = files.removed

          if filesremoved:

              filesremoved = encodefileindices(sortedfiles, filesremoved)

              sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

          if not sidedata:

              sidedata = None

          return sidedata

        marmoute
    
sidedata: add a `decode_files_sidedata` function...

              r46145
            
      def decode_files_sidedata(changelogrevision, sidedata):

          """Return a ChangingFiles instance from a changelogrevision using sidata

          """

          touched = changelogrevision.files

          rawindices = sidedata.get(sidedatamod.SD_FILESADDED)

          added = decodefileindices(touched, rawindices)

          rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED)

          removed = decodefileindices(touched, rawindices)

          rawcopies = sidedata.get(sidedatamod.SD_P1COPIES)

          p1_copies = decodecopies(touched, rawcopies)

          rawcopies = sidedata.get(sidedatamod.SD_P2COPIES)

          p2_copies = decodecopies(touched, rawcopies)

          return ChangingFiles(

              touched=touched,

              added=added,

              removed=removed,

              p1_copies=p1_copies,

              p2_copies=p2_copies,

          )

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      def _getsidedata(srcrepo, rev):

          ctx = srcrepo[rev]

          filescopies = computechangesetcopies(ctx)

          filesadded = computechangesetfilesadded(ctx)

          filesremoved = computechangesetfilesremoved(ctx)

          sidedata = {}

          if any([filescopies, filesadded, filesremoved]):

              sortedfiles = sorted(ctx.files())

              p1copies, p2copies = filescopies

              p1copies = encodecopies(sortedfiles, p1copies)

              p2copies = encodecopies(sortedfiles, p2copies)

              filesadded = encodefileindices(sortedfiles, filesadded)

              filesremoved = encodefileindices(sortedfiles, filesremoved)

              if p1copies:

                  sidedata[sidedatamod.SD_P1COPIES] = p1copies

              if p2copies:

                  sidedata[sidedatamod.SD_P2COPIES] = p2copies

              if filesadded:

                  sidedata[sidedatamod.SD_FILESADDED] = filesadded

              if filesremoved:

                  sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved

          return sidedata

      def getsidedataadder(srcrepo, destrepo):

          use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

          if pycompat.iswindows or not use_w:

              return _get_simple_sidedata_adder(srcrepo, destrepo)

          else:

              return _get_worker_sidedata_adder(srcrepo, destrepo)

      def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

          """The function used by worker precomputing sidedata

          It read an input queue containing revision numbers

          It write in an output queue containing (rev, <sidedata-map>)

          The `None` input value is used as a stop signal.

          The `tokens` semaphore is user to avoid having too many unprocessed

          entries. The workers needs to acquire one token before fetching a task.

          They will be released by the consumer of the produced data.

          """

          tokens.acquire()

          rev = revs_queue.get()

          while rev is not None:

              data = _getsidedata(srcrepo, rev)

              sidedata_queue.put((rev, data))

              tokens.acquire()

              rev = revs_queue.get()

          # processing of `None` is completed, release the token.

          tokens.release()

      BUFF_PER_WORKER = 50

      def _get_worker_sidedata_adder(srcrepo, destrepo):

          """The parallel version of the sidedata computation

          This code spawn a pool of worker that precompute a buffer of sidedata

          before we actually need them"""

          # avoid circular import copies -> scmutil -> worker -> copies

          from . import worker

          nbworkers = worker._numworkers(srcrepo.ui)

          tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

          revsq = multiprocessing.Queue()

          sidedataq = multiprocessing.Queue()

          assert srcrepo.filtername is None

          # queue all tasks beforehand, revision numbers are small and it make

          # synchronisation simpler

          #

          # Since the computation for each node can be quite expensive, the overhead

          # of using a single queue is not revelant. In practice, most computation

          # are fast but some are very expensive and dominate all the other smaller

          # cost.

          for r in srcrepo.changelog.revs():

              revsq.put(r)

          # queue the "no more tasks" markers

          for i in range(nbworkers):

              revsq.put(None)

          allworkers = []

          for i in range(nbworkers):

              args = (srcrepo, revsq, sidedataq, tokens)

              w = multiprocessing.Process(target=_sidedata_worker, args=args)

              allworkers.append(w)

              w.start()

          # dictionnary to store results for revision higher than we one we are

          # looking for. For example, if we need the sidedatamap for 42, and 43 is

          # received, when shelve 43 for later use.

          staging = {}

          def sidedata_companion(revlog, rev):

              sidedata = {}

              if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog

                  # Is the data previously shelved ?

                  sidedata = staging.pop(rev, None)

                  if sidedata is None:

                      # look at the queued result until we find the one we are lookig

                      # for (shelve the other ones)

                      r, sidedata = sidedataq.get()

                      while r != rev:

                          staging[r] = sidedata

                          r, sidedata = sidedataq.get()

                  tokens.release()

              return False, (), sidedata

          return sidedata_companion

      def _get_simple_sidedata_adder(srcrepo, destrepo):

          """The simple version of the sidedata computation

          It just compute it in the same thread on request"""

          def sidedatacompanion(revlog, rev):

              sidedata = {}

              if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog

                  sidedata = _getsidedata(srcrepo, rev)

              return False, (), sidedata

          return sidedatacompanion

      def getsidedataremover(srcrepo, destrepo):

          def sidedatacompanion(revlog, rev):

              f = ()

              if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog

                  if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

                      f = (

                          sidedatamod.SD_P1COPIES,

                          sidedatamod.SD_P2COPIES,

                          sidedatamod.SD_FILESADDED,

                          sidedatamod.SD_FILESREMOVED,

                      )

              return False, f, {}

          return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marmoute metadata: move computation related to files touched in a dedicated module...	r45466	# metadata.py -- code related to various metadata computation and access.
		#
		# Copyright 2019 Google, Inc <martinvonz@google.com>
		# Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.
		from __future__ import absolute_import, print_function

		import multiprocessing

		from . import (
		error,
marmoute files: extract code for extra filtering of the `removed` entry into copies...	r45467	node,
marmoute metadata: move computation related to files touched in a dedicated module...	r45466	pycompat,
		util,
		)

		from .revlogutils import (
		flagutil as sidedataflag,
		sidedata as sidedatamod,
		)


marmoute commitctx: return a richer object from _prepare_files...	r45883	class ChangingFiles(object):
marmoute changing-files: document the various sets...	r46078	"""A class recording the changes made to a file by a changeset

		Actions performed on files are gathered into 3 sets:

		- added: files actively added in the changeset.
		- removed: files removed in the revision
		- touched: files affected by the merge

		and copies information is held by 2 mappings

		- copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
		- copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

		See their inline help for details.
marmoute commitctx: return a richer object from _prepare_files...	r45883	"""

		def __init__(
marmoute changing-files: move default constructor value to None...	r46163	self,
		touched=None,
		added=None,
		removed=None,
		p1_copies=None,
		p2_copies=None,
marmoute commitctx: return a richer object from _prepare_files...	r45883	):
marmoute changing-files: move default constructor value to None...	r46163	self._added = set(() if added is None else added)
		self._removed = set(() if removed is None else removed)
		self._touched = set(() if touched is None else touched)
marmoute commitctx: return a richer object from _prepare_files...	r45883	self._touched.update(self._added)
		self._touched.update(self._removed)
marmoute changing-files: move default constructor value to None...	r46163	self._p1_copies = dict(() if p1_copies is None else p1_copies)
		self._p2_copies = dict(() if p2_copies is None else p2_copies)
marmoute commitctx: return a richer object from _prepare_files...	r45883
marmoute changing-files: implement equality checking...	r46079	def __eq__(self, other):
		return (
		self.added == other.added
		and self.removed == other.removed
		and self.touched == other.touched
		and self.copied_from_p1 == other.copied_from_p1
		and self.copied_from_p2 == other.copied_from_p2
		)

marmoute commitctx: return a richer object from _prepare_files...	r45883	@property
		def added(self):
marmoute changing-files: document the various sets...	r46078	"""files actively added in the changeset

		Any file present in that revision that was absent in all the changeset's
		parents.

		In case of merge, this means a file absent in one of the parents but
		existing in the other will not be contained in this set. (They were
		added by an ancestor)
		"""
marmoute commitctx: return a richer object from _prepare_files...	r45883	return frozenset(self._added)

		def mark_added(self, filename):
		self._added.add(filename)
		self._touched.add(filename)

		def update_added(self, filenames):
		for f in filenames:
		self.mark_added(f)

		@property
		def removed(self):
marmoute changing-files: document the various sets...	r46078	"""files actively removed by the changeset

		In case of merge this will only contain the set of files removing "new"
		content. For any file absent in the current changeset:

		a) If the file exists in both parents, it is clearly "actively" removed
		by this changeset.

		b) If a file exists in only one parent and in none of the common
		ancestors, then the file was newly added in one of the merged branches
		and then got "actively" removed.

		c) If a file exists in only one parent and at least one of the common
		ancestors using the same filenode, then the file was unchanged on one
		side and deleted on the other side. The merge "passively" propagated
		that deletion, but didn't "actively" remove the file. In this case the
		file is not included in the `removed` set.

		d) If a file exists in only one parent and at least one of the common
		ancestors using a different filenode, then the file was changed on one
		side and removed on the other side. The merge process "actively"
		decided to drop the new change and delete the file. Unlike in the
		previous case, (c), the file included in the `removed` set.

		Summary table for merge:

		case \| exists in parents \| exists in gca \|\| removed
		(a) \| both \| * \|\| yes
		(b) \| one \| none \|\| yes
		(c) \| one \| same filenode \|\| no
		(d) \| one \| new filenode \|\| yes
		"""
marmoute commitctx: return a richer object from _prepare_files...	r45883	return frozenset(self._removed)

		def mark_removed(self, filename):
		self._removed.add(filename)
		self._touched.add(filename)

		def update_removed(self, filenames):
		for f in filenames:
		self.mark_removed(f)

		@property
		def touched(self):
marmoute changing-files: document the various sets...	r46078	"""files either actively modified, added or removed"""
marmoute commitctx: return a richer object from _prepare_files...	r45883	return frozenset(self._touched)

		def mark_touched(self, filename):
		self._touched.add(filename)

		def update_touched(self, filenames):
		for f in filenames:
		self.mark_touched(f)

		@property
		def copied_from_p1(self):
		return self._p1_copies.copy()

		def mark_copied_from_p1(self, source, dest):
		self._p1_copies[dest] = source

		def update_copies_from_p1(self, copies):
		for dest, source in copies.items():
		self.mark_copied_from_p1(source, dest)

		@property
		def copied_from_p2(self):
		return self._p2_copies.copy()

		def mark_copied_from_p2(self, source, dest):
		self._p2_copies[dest] = source

		def update_copies_from_p2(self, copies):
		for dest, source in copies.items():
		self.mark_copied_from_p2(source, dest)


marmoute metadata: move computation related to files touched in a dedicated module...	r45466	def computechangesetfilesadded(ctx):
		"""return the list of files added in a changeset
		"""
		added = []
		for f in ctx.files():
		if not any(f in p for p in ctx.parents()):
		added.append(f)
		return added


marmoute files: extract code for extra filtering of the `removed` entry into copies...	r45467	def get_removal_filter(ctx, x=None):
		"""return a function to detect files "wrongly" detected as `removed`

		When a file is removed relative to p1 in a merge, this
		function determines whether the absence is due to a
		deletion from a parent, or whether the merge commit
		itself deletes the file. We decide this by doing a
		simplified three way merge of the manifest entry for
		the file. There are two ways we decide the merge
		itself didn't delete a file:
		- neither parent (nor the merge) contain the file
		- exactly one parent contains the file, and that
		parent has the same filelog entry as the merge
		ancestor (or all of them if there two). In other
		words, that parent left the file unchanged while the
		other one deleted it.
		One way to think about this is that deleting a file is
		similar to emptying it, so the list of changed files
		should be similar either way. The computation
		described above is not done directly in _filecommit
		when creating the list of changed files, however
		it does something very similar by comparing filelog
		nodes.
		"""

		if x is not None:
		p1, p2, m1, m2 = x
		else:
		p1 = ctx.p1()
		p2 = ctx.p2()
		m1 = p1.manifest()
		m2 = p2.manifest()

		@util.cachefunc
		def mas():
		p1n = p1.node()
		p2n = p2.node()
		cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
		if not cahs:
		cahs = [node.nullrev]
		return [ctx.repo()[r].manifest() for r in cahs]

		def deletionfromparent(f):
		if f in m1:
		return f not in m2 and all(
		f in ma and ma.find(f) == m1.find(f) for ma in mas()
		)
		elif f in m2:
		return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
		else:
		return True

		return deletionfromparent


marmoute metadata: move computation related to files touched in a dedicated module...	r45466	def computechangesetfilesremoved(ctx):
		"""return the list of files removed in a changeset
		"""
		removed = []
		for f in ctx.files():
		if f not in ctx:
		removed.append(f)
marmoute metadata: filter the `removed` set to only contains relevant data...	r45468	if removed:
		rf = get_removal_filter(ctx)
		removed = [r for r in removed if not rf(r)]
marmoute metadata: move computation related to files touched in a dedicated module...	r45466	return removed


		def computechangesetcopies(ctx):
		"""return the copies data for a changeset

		The copies data are returned as a pair of dictionnary (p1copies, p2copies).

		Each dictionnary are in the form: `{newname: oldname}`
		"""
		p1copies = {}
		p2copies = {}
		p1 = ctx.p1()
		p2 = ctx.p2()
		narrowmatch = ctx._repo.narrowmatch()
		for dst in ctx.files():
		if not narrowmatch(dst) or dst not in ctx:
		continue
		copied = ctx[dst].renamed()
		if not copied:
		continue
		src, srcnode = copied
		if src in p1 and p1[src].filenode() == srcnode:
		p1copies[dst] = src
		elif src in p2 and p2[src].filenode() == srcnode:
		p2copies[dst] = src
		return p1copies, p2copies


		def encodecopies(files, copies):
		items = []
		for i, dst in enumerate(files):
		if dst in copies:
		items.append(b'%d\0%s' % (i, copies[dst]))
		if len(items) != len(copies):
		raise error.ProgrammingError(
		b'some copy targets missing from file list'
		)
		return b"\n".join(items)


		def decodecopies(files, data):
		try:
		copies = {}
		if not data:
		return copies
		for l in data.split(b'\n'):
		strindex, src = l.split(b'\0')
		i = int(strindex)
		dst = files[i]
		copies[dst] = src
		return copies
		except (ValueError, IndexError):
		# Perhaps someone had chosen the same key name (e.g. "p1copies") and
		# used different syntax for the value.
		return None


		def encodefileindices(files, subset):
		subset = set(subset)
		indices = []
		for i, f in enumerate(files):
		if f in subset:
		indices.append(b'%d' % i)
		return b'\n'.join(indices)


		def decodefileindices(files, data):
		try:
		subset = []
		if not data:
		return subset
		for strindex in data.split(b'\n'):
		i = int(strindex)
		if i < 0 or i >= len(files):
		return None
		subset.append(files[i])
		return subset
		except (ValueError, IndexError):
		# Perhaps someone had chosen the same key name (e.g. "added") and
		# used different syntax for the value.
		return None


marmoute sidedata: rename `encode_copies_sidedata` to `encode_files_sidedata`...	r46143	def encode_files_sidedata(files):
marmoute commitctx: extract sidedata encoding inside its own function...	r45885	sortedfiles = sorted(files.touched)
		sidedata = {}
		p1copies = files.copied_from_p1
		if p1copies:
		p1copies = encodecopies(sortedfiles, p1copies)
		sidedata[sidedatamod.SD_P1COPIES] = p1copies
		p2copies = files.copied_from_p2
		if p2copies:
		p2copies = encodecopies(sortedfiles, p2copies)
		sidedata[sidedatamod.SD_P2COPIES] = p2copies
		filesadded = files.added
		if filesadded:
		filesadded = encodefileindices(sortedfiles, filesadded)
		sidedata[sidedatamod.SD_FILESADDED] = filesadded
		filesremoved = files.removed
		if filesremoved:
		filesremoved = encodefileindices(sortedfiles, filesremoved)
		sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
		if not sidedata:
		sidedata = None
		return sidedata


marmoute sidedata: add a `decode_files_sidedata` function...	r46145	def decode_files_sidedata(changelogrevision, sidedata):
		"""Return a ChangingFiles instance from a changelogrevision using sidata
		"""
		touched = changelogrevision.files

		rawindices = sidedata.get(sidedatamod.SD_FILESADDED)
		added = decodefileindices(touched, rawindices)

		rawindices = sidedata.get(sidedatamod.SD_FILESREMOVED)
		removed = decodefileindices(touched, rawindices)

		rawcopies = sidedata.get(sidedatamod.SD_P1COPIES)
		p1_copies = decodecopies(touched, rawcopies)

		rawcopies = sidedata.get(sidedatamod.SD_P2COPIES)
		p2_copies = decodecopies(touched, rawcopies)

		return ChangingFiles(
		touched=touched,
		added=added,
		removed=removed,
		p1_copies=p1_copies,
		p2_copies=p2_copies,
		)


marmoute metadata: move computation related to files touched in a dedicated module...	r45466	def _getsidedata(srcrepo, rev):
		ctx = srcrepo[rev]
		filescopies = computechangesetcopies(ctx)
		filesadded = computechangesetfilesadded(ctx)
		filesremoved = computechangesetfilesremoved(ctx)
		sidedata = {}
		if any([filescopies, filesadded, filesremoved]):
		sortedfiles = sorted(ctx.files())
		p1copies, p2copies = filescopies
		p1copies = encodecopies(sortedfiles, p1copies)
		p2copies = encodecopies(sortedfiles, p2copies)
		filesadded = encodefileindices(sortedfiles, filesadded)
		filesremoved = encodefileindices(sortedfiles, filesremoved)
		if p1copies:
		sidedata[sidedatamod.SD_P1COPIES] = p1copies
		if p2copies:
		sidedata[sidedatamod.SD_P2COPIES] = p2copies
		if filesadded:
		sidedata[sidedatamod.SD_FILESADDED] = filesadded
		if filesremoved:
		sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
		return sidedata


		def getsidedataadder(srcrepo, destrepo):
		use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
		if pycompat.iswindows or not use_w:
		return _get_simple_sidedata_adder(srcrepo, destrepo)
		else:
		return _get_worker_sidedata_adder(srcrepo, destrepo)


		def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
		"""The function used by worker precomputing sidedata

		It read an input queue containing revision numbers
		It write in an output queue containing (rev, <sidedata-map>)

		The `None` input value is used as a stop signal.

		The `tokens` semaphore is user to avoid having too many unprocessed
		entries. The workers needs to acquire one token before fetching a task.
		They will be released by the consumer of the produced data.
		"""
		tokens.acquire()
		rev = revs_queue.get()
		while rev is not None:
		data = _getsidedata(srcrepo, rev)
		sidedata_queue.put((rev, data))
		tokens.acquire()
		rev = revs_queue.get()
		# processing of `None` is completed, release the token.
		tokens.release()


		BUFF_PER_WORKER = 50


		def _get_worker_sidedata_adder(srcrepo, destrepo):
		"""The parallel version of the sidedata computation

		This code spawn a pool of worker that precompute a buffer of sidedata
		before we actually need them"""
		# avoid circular import copies -> scmutil -> worker -> copies
		from . import worker

		nbworkers = worker._numworkers(srcrepo.ui)

		tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
		revsq = multiprocessing.Queue()
		sidedataq = multiprocessing.Queue()

		assert srcrepo.filtername is None
		# queue all tasks beforehand, revision numbers are small and it make
		# synchronisation simpler
		#
		# Since the computation for each node can be quite expensive, the overhead
		# of using a single queue is not revelant. In practice, most computation
		# are fast but some are very expensive and dominate all the other smaller
		# cost.
		for r in srcrepo.changelog.revs():
		revsq.put(r)
		# queue the "no more tasks" markers
		for i in range(nbworkers):
		revsq.put(None)

		allworkers = []
		for i in range(nbworkers):
		args = (srcrepo, revsq, sidedataq, tokens)
		w = multiprocessing.Process(target=_sidedata_worker, args=args)
		allworkers.append(w)
		w.start()

		# dictionnary to store results for revision higher than we one we are
		# looking for. For example, if we need the sidedatamap for 42, and 43 is
		# received, when shelve 43 for later use.
		staging = {}

		def sidedata_companion(revlog, rev):
		sidedata = {}
		if util.safehasattr(revlog, b'filteredrevs'): # this is a changelog
		# Is the data previously shelved ?
		sidedata = staging.pop(rev, None)
		if sidedata is None:
		# look at the queued result until we find the one we are lookig
		# for (shelve the other ones)
		r, sidedata = sidedataq.get()
		while r != rev:
		staging[r] = sidedata
		r, sidedata = sidedataq.get()
		tokens.release()
		return False, (), sidedata

		return sidedata_companion


		def _get_simple_sidedata_adder(srcrepo, destrepo):
		"""The simple version of the sidedata computation

		It just compute it in the same thread on request"""

		def sidedatacompanion(revlog, rev):
		sidedata = {}
		if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
		sidedata = _getsidedata(srcrepo, rev)
		return False, (), sidedata

		return sidedatacompanion


		def getsidedataremover(srcrepo, destrepo):
		def sidedatacompanion(revlog, rev):
		f = ()
		if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
		if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
		f = (
		sidedatamod.SD_P1COPIES,
		sidedatamod.SD_P2COPIES,
		sidedatamod.SD_FILESADDED,
		sidedatamod.SD_FILESREMOVED,
		)
		return False, f, {}

		return sidedatacompanion