upstream/mercurial-mirror Files · mercurial/metadata.py

push: indent the some part of the command...

push: indent the some part of the command That code will be put in a loop in the next changeset, pre-indenting make the next change clearer. Differential Revision: https://phab.mercurial-scm.org/D10160

Raphaël Gomès - - Load All Authors

File last commit:

r47447:bc251951 default


                r47535:25850879

default

Download file

             metadata.py
        
                    950 lines
            
             | 33.4 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / metadata.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Dan Villiom Podlaski Christiansen
    
i18n: fix coding tag unsupported by xgettext...

              r46391
            
      # coding: utf-8

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      # metadata.py -- code related to various metadata computation and access.

      #

      # Copyright 2019 Google, Inc <martinvonz@google.com>

      # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import, print_function

      import multiprocessing

        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
      import struct

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
        Joerg Sonnenberger
    
node: import symbols explicitly...

              r46729
            
      from .node import (

          nullid,

          nullrev,

      )

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      from . import (

          error,

          pycompat,

        Raphaël Gomès
    
sidedata-exchange: add `wanted_sidedata` and `sidedata_computers` to repos...

              r47447
            
          requirements as requirementsmod,

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          util,

      )

      from .revlogutils import (

          flagutil as sidedataflag,

          sidedata as sidedatamod,

      )

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
      class ChangingFiles(object):

        marmoute
    
changing-files: fix docstring...

              r46196
            
          """A class recording the changes made to files by a changeset

        marmoute
    
changing-files: document the various sets...

              r46078
            
          Actions performed on files are gathered into 3 sets:

          - added:   files actively added in the changeset.

        marmoute
    
changing-files: add the ability to track merged files too...

              r46186
            
          - merged:  files whose history got merged

        marmoute
    
changing-files: document the various sets...

              r46078
            
          - removed: files removed in the revision

        marmoute
    
changing-files: add a "salvaged" set to track file that were not removed...

              r46251
            
          - salvaged: files that might have been deleted by a merge but were not

        marmoute
    
changing-files: document the various sets...

              r46078
            
          - touched: files affected by the merge

          and copies information is held by 2 mappings

          - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies

          - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies

          See their inline help for details.

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          """

          def __init__(

        marmoute
    
changing-files: move default constructor value to None...

              r46163
            
              self,

              touched=None,

              added=None,

              removed=None,

        marmoute
    
changing-files: add the ability to track merged files too...

              r46186
            
              merged=None,

        marmoute
    
changing-files: add a "salvaged" set to track file that were not removed...

              r46251
            
              salvaged=None,

        marmoute
    
changing-files: move default constructor value to None...

              r46163
            
              p1_copies=None,

              p2_copies=None,

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          ):

        marmoute
    
changing-files: move default constructor value to None...

              r46163
            
              self._added = set(() if added is None else added)

        marmoute
    
changing-files: add the ability to track merged files too...

              r46186
            
              self._merged = set(() if merged is None else merged)

        marmoute
    
changing-files: move default constructor value to None...

              r46163
            
              self._removed = set(() if removed is None else removed)

              self._touched = set(() if touched is None else touched)

        marmoute
    
changing-files: add a "salvaged" set to track file that were not removed...

              r46251
            
              self._salvaged = set(() if salvaged is None else salvaged)

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              self._touched.update(self._added)

        marmoute
    
changing-files: add the ability to track merged files too...

              r46186
            
              self._touched.update(self._merged)

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              self._touched.update(self._removed)

        marmoute
    
changing-files: move default constructor value to None...

              r46163
            
              self._p1_copies = dict(() if p1_copies is None else p1_copies)

              self._p2_copies = dict(() if p2_copies is None else p2_copies)

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
        marmoute
    
changing-files: implement equality checking...

              r46079
            
          def __eq__(self, other):

              return (

                  self.added == other.added

        marmoute
    
changing-files: add the ability to track merged files too...

              r46186
            
                  and self.merged == other.merged

        marmoute
    
changing-files: implement equality checking...

              r46079
            
                  and self.removed == other.removed

        marmoute
    
changing-files: add a "salvaged" set to track file that were not removed...

              r46251
            
                  and self.salvaged == other.salvaged

        marmoute
    
changing-files: implement equality checking...

              r46079
            
                  and self.touched == other.touched

                  and self.copied_from_p1 == other.copied_from_p1

                  and self.copied_from_p2 == other.copied_from_p2

              )

        marmoute
    
changing-files: add a shorthand property to check for copy relevant info...

              r46320
            
          @property

          def has_copies_info(self):

              return bool(

                  self.removed

                  or self.merged

                  or self.salvaged

                  or self.copied_from_p1

                  or self.copied_from_p2

              )

        marmoute
    
changing-files: cache the various property...

              r46198
            
          @util.propertycache

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          def added(self):

        marmoute
    
changing-files: document the various sets...

              r46078
            
              """files actively added in the changeset

              Any file present in that revision that was absent in all the changeset's

              parents.

              In case of merge, this means a file absent in one of the parents but

              existing in the other will *not* be contained in this set. (They were

              added by an ancestor)

              """

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              return frozenset(self._added)

          def mark_added(self, filename):

        marmoute
    
changing-files: cache the various property...

              r46198
            
              if 'added' in vars(self):

                  del self.added

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              self._added.add(filename)

        marmoute
    
changing-files: always use `mark_touched` to update the touched set...

              r46197
            
              self.mark_touched(filename)

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          def update_added(self, filenames):

              for f in filenames:

                  self.mark_added(f)

        marmoute
    
changing-files: cache the various property...

              r46198
            
          @util.propertycache

        marmoute
    
changing-files: add the ability to track merged files too...

              r46186
            
          def merged(self):

              """files actively merged during a merge

              Any modified files which had modification on both size that needed merging.

              In this case a new filenode was created and it has two parents.

              """

              return frozenset(self._merged)

          def mark_merged(self, filename):

        marmoute
    
changing-files: cache the various property...

              r46198
            
              if 'merged' in vars(self):

                  del self.merged

        marmoute
    
changing-files: add the ability to track merged files too...

              r46186
            
              self._merged.add(filename)

        marmoute
    
changing-files: always use `mark_touched` to update the touched set...

              r46197
            
              self.mark_touched(filename)

        marmoute
    
changing-files: add the ability to track merged files too...

              r46186
            
          def update_merged(self, filenames):

              for f in filenames:

                  self.mark_merged(f)

        marmoute
    
changing-files: cache the various property...

              r46198
            
          @util.propertycache

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          def removed(self):

        marmoute
    
changing-files: document the various sets...

              r46078
            
              """files actively removed by the changeset

              In case of merge this will only contain the set of files removing "new"

              content. For any file absent in the current changeset:

              a) If the file exists in both parents, it is clearly "actively" removed

              by this changeset.

              b) If a file exists in only one parent and in none of the common

              ancestors, then the file was newly added in one of the merged branches

              and then got "actively" removed.

              c) If a file exists in only one parent and at least one of the common

              ancestors using the same filenode, then the file was unchanged on one

              side and deleted on the other side. The merge "passively" propagated

              that deletion, but didn't "actively" remove the file. In this case the

              file is *not* included in the `removed` set.

              d) If a file exists in only one parent and at least one of the common

              ancestors using a different filenode, then the file was changed on one

              side and removed on the other side. The merge process "actively"

              decided to drop the new change and delete the file. Unlike in the

              previous case, (c), the file included in the `removed` set.

              Summary table for merge:

              case | exists in parents | exists in gca || removed

               (a) |       both        |     *         ||   yes

               (b) |       one         |     none      ||   yes

               (c) |       one         | same filenode ||   no

               (d) |       one         |  new filenode ||   yes

              """

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              return frozenset(self._removed)

          def mark_removed(self, filename):

        marmoute
    
changing-files: cache the various property...

              r46198
            
              if 'removed' in vars(self):

                  del self.removed

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              self._removed.add(filename)

        marmoute
    
changing-files: always use `mark_touched` to update the touched set...

              r46197
            
              self.mark_touched(filename)

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          def update_removed(self, filenames):

              for f in filenames:

                  self.mark_removed(f)

        marmoute
    
changing-files: cache the various property...

              r46198
            
          @util.propertycache

        marmoute
    
changing-files: add a "salvaged" set to track file that were not removed...

              r46251
            
          def salvaged(self):

              """files that might have been deleted by a merge, but still exists.

              During a merge, the manifest merging might select some files for

              removal, or for a removed/changed conflict. If at commit time the file

              still exists, its removal was "reverted" and the file is "salvaged"

              """

              return frozenset(self._salvaged)

          def mark_salvaged(self, filename):

              if "salvaged" in vars(self):

                  del self.salvaged

              self._salvaged.add(filename)

              self.mark_touched(filename)

          def update_salvaged(self, filenames):

              for f in filenames:

                  self.mark_salvaged(f)

          @util.propertycache

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          def touched(self):

        marmoute
    
changing-files: document the various sets...

              r46078
            
              """files either actively modified, added or removed"""

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              return frozenset(self._touched)

          def mark_touched(self, filename):

        marmoute
    
changing-files: cache the various property...

              r46198
            
              if 'touched' in vars(self):

                  del self.touched

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              self._touched.add(filename)

          def update_touched(self, filenames):

              for f in filenames:

                  self.mark_touched(f)

        marmoute
    
changing-files: cache the various property...

              r46198
            
          @util.propertycache

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          def copied_from_p1(self):

              return self._p1_copies.copy()

          def mark_copied_from_p1(self, source, dest):

        marmoute
    
changing-files: cache the various property...

              r46198
            
              if 'copied_from_p1' in vars(self):

                  del self.copied_from_p1

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              self._p1_copies[dest] = source

          def update_copies_from_p1(self, copies):

              for dest, source in copies.items():

                  self.mark_copied_from_p1(source, dest)

        marmoute
    
changing-files: cache the various property...

              r46198
            
          @util.propertycache

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
          def copied_from_p2(self):

              return self._p2_copies.copy()

          def mark_copied_from_p2(self, source, dest):

        marmoute
    
changing-files: cache the various property...

              r46198
            
              if 'copied_from_p2' in vars(self):

                  del self.copied_from_p2

        marmoute
    
commitctx: return a richer object from _prepare_files...

              r45883
            
              self._p2_copies[dest] = source

          def update_copies_from_p2(self, copies):

              for dest, source in copies.items():

                  self.mark_copied_from_p2(source, dest)

        marmoute
    
changing-files: split the changing files computation from encoding...

              r46255
            
      def compute_all_files_changes(ctx):

          """compute the files changed by a revision"""

        marmoute
    
changing-files: add clean computation of changed files for roots...

              r46258
            
          p1 = ctx.p1()

          p2 = ctx.p2()

        Joerg Sonnenberger
    
node: import symbols explicitly...

              r46729
            
          if p1.rev() == nullrev and p2.rev() == nullrev:

        marmoute
    
changing-files: add clean computation of changed files for roots...

              r46258
            
              return _process_root(ctx)

        Joerg Sonnenberger
    
node: import symbols explicitly...

              r46729
            
          elif p1.rev() != nullrev and p2.rev() == nullrev:

        marmoute
    
changing-files: add clean computation of changed files for linear changesets...

              r46259
            
              return _process_linear(p1, ctx)

        Joerg Sonnenberger
    
node: import symbols explicitly...

              r46729
            
          elif p1.rev() == nullrev and p2.rev() != nullrev:

        marmoute
    
changing-files: add clean computation of changed files for linear changesets...

              r46259
            
              # In the wild, one can encounter changeset where p1 is null but p2 is not

              return _process_linear(p1, ctx, parent=2)

          elif p1.rev() == p2.rev():

              # In the wild, one can encounter such "non-merge"

              return _process_linear(p1, ctx)

        marmoute
    
changing-files: add clean computation of changed file for merges...

              r46260
            
          else:

              return _process_merge(p1, p2, ctx)

        marmoute
    
changing-files: split the changing files computation from encoding...

              r46255
            
        marmoute
    
changing-files: add clean computation of changed files for roots...

              r46258
            
      def _process_root(ctx):

        Augie Fackler
    
formating: upgrade to black 20.8b1...

              r46554
            
          """compute the appropriate changed files for a changeset with no parents"""

        marmoute
    
changing-files: add clean computation of changed files for roots...

              r46258
            
          # Simple, there was nothing before it, so everything is added.

          md = ChangingFiles()

          manifest = ctx.manifest()

          for filename in manifest:

              md.mark_added(filename)

          return md

        marmoute
    
changing-files: add clean computation of changed files for linear changesets...

              r46259
            
      def _process_linear(parent_ctx, children_ctx, parent=1):

        Augie Fackler
    
formating: upgrade to black 20.8b1...

              r46554
            
          """compute the appropriate changed files for a changeset with a single parent"""

        marmoute
    
changing-files: add clean computation of changed files for linear changesets...

              r46259
            
          md = ChangingFiles()

          parent_manifest = parent_ctx.manifest()

          children_manifest = children_ctx.manifest()

          copies_candidate = []

          for filename, d in parent_manifest.diff(children_manifest).items():

              if d[1][0] is None:

                  # no filenode for the "new" value, file is absent

                  md.mark_removed(filename)

              else:

                  copies_candidate.append(filename)

                  if d[0][0] is None:

                      # not filenode for the "old" value file was absent

                      md.mark_added(filename)

                  else:

                      # filenode for both "old" and "new"

                      md.mark_touched(filename)

          if parent == 1:

              copied = md.mark_copied_from_p1

          elif parent == 2:

              copied = md.mark_copied_from_p2

          else:

              assert False, "bad parent value %d" % parent

          for filename in copies_candidate:

              copy_info = children_ctx[filename].renamed()

              if copy_info:

                  source, srcnode = copy_info

                  copied(source, filename)

          return md

        marmoute
    
changing-files: add clean computation of changed file for merges...

              r46260
            
      def _process_merge(p1_ctx, p2_ctx, ctx):

          """compute the appropriate changed files for a changeset with two parents

          This is a more advance case. The information we need to record is summarise

          in the following table:

          ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐

          │ diff ╲  diff │       ø      │ (Some, None) │ (None, Some) │ (Some, Some) │

          │  p2   ╲  p1  │              │              │              │              │

          ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

          │              │              │🄱  No Changes │🄳  No Changes │              │

          │  ø           │🄰  No Changes │      OR      │     OR       │🄵  No Changes │

          │              │              │🄲  Deleted[1] │🄴  Salvaged[2]│     [3]      │

          ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

          │              │🄶  No Changes │              │              │              │

          │ (Some, None) │      OR      │🄻  Deleted    │       ø      │      ø       │

          │              │🄷  Deleted[1] │              │              │              │

          ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

          │              │🄸  No Changes │              │              │              │

          │ (None, Some) │     OR       │      ø       │🄼   Added     │🄽   Merged    │

          │              │🄹  Salvaged[2]│              │   (copied?)  │   (copied?)  │

          ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤

          │              │              │              │              │              │

          │ (Some, Some) │🄺  No Changes │      ø       │🄾   Merged    │🄿   Merged    │

          │              │     [3]      │              │   (copied?)  │   (copied?)  │

          └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘

          Special case [1]:

            The situation is:

              - parent-A:     file exists,

              - parent-B:     no file,

              - working-copy: no file.

            Detecting a "deletion" will depend on the presence of actual change on

            the "parent-A" branch:

            Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged

            compared to the merge ancestors, then parent-A branch left the file

            untouched while parent-B deleted it. We simply apply the change from

            "parent-B" branch the file was automatically dropped.

            The result is:

                - file is not recorded as touched by the merge.

            Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and

            the file was "deleted again". From a user perspective, the message

            about "locally changed" while "remotely deleted" (or the other way

            around) was issued and the user chose to deleted the file.

            The result:

                - file is recorded as touched by the merge.

          Special case [2]:

            The situation is:

              - parent-A:     no file,

              - parent-B:     file,

              - working-copy: file (same content as parent-B).

            There are three subcases depending on the ancestors contents:

            - A) the file is missing in all ancestors,

            - B) at least one ancestor has the file with filenode ≠ from parent-B,

            - C) all ancestors use the same filenode as parent-B,

            Subcase (A) is the simpler, nothing happend on parent-A side while

            parent-B added it.

              The result:

                  - the file is not marked as touched by the merge.

            Subcase (B) is the counter part of "Special case [1]", the file was

              modified on parent-B side, while parent-A side deleted it. However this

              time, the conflict was solved by keeping the file (and its

              modification). We consider the file as "salvaged".

              The result:

                  - the file is marked as "salvaged" by the merge.

            Subcase (C) is subtle variation of the case above. In this case, the

              file in unchanged on the parent-B side and actively removed on the

              parent-A side. So the merge machinery correctly decide it should be

              removed. However, the file was explicitly restored to its parent-B

              content before the merge was commited. The file is be marked

              as salvaged too. From the merge result perspective, this is similar to

              Subcase (B), however from the merge resolution perspective they differ

              since in (C), there was some conflict not obvious solution to the

              merge (That got reversed)

          Special case [3]:

            The situation is:

              - parent-A:     file,

              - parent-B:     file (different filenode as parent-A),

              - working-copy: file (same filenode as parent-B).

            This case is in theory much simple, for this to happens, this mean the

            filenode in parent-A is purely replacing the one in parent-B (either a

            descendant, or a full new file history, see changeset). So the merge

            introduce no changes, and the file is not affected by the merge...

            However, in the wild it is possible to find commit with the above is not

            True. For example repository have some commit where the *new* node is an

            ancestor of the node in parent-A, or where parent-A and parent-B are two

            branches of the same file history, yet not merge-filenode were created

            (while the "merge" should have led to a "modification").

            Detecting such cases (and not recording the file as modified) would be a

            nice bonus. However do not any of this yet.

          """

          md = ChangingFiles()

          m = ctx.manifest()

          p1m = p1_ctx.manifest()

          p2m = p2_ctx.manifest()

          diff_p1 = p1m.diff(m)

          diff_p2 = p2m.diff(m)

          cahs = ctx.repo().changelog.commonancestorsheads(

              p1_ctx.node(), p2_ctx.node()

          )

          if not cahs:

        Joerg Sonnenberger
    
node: import symbols explicitly...

              r46729
            
              cahs = [nullrev]

        marmoute
    
changing-files: add clean computation of changed file for merges...

              r46260
            
          mas = [ctx.repo()[r].manifest() for r in cahs]

          copy_candidates = []

          # Dealing with case 🄰 happens automatically.  Since there are no entry in

          # d1 nor d2, we won't iterate on it ever.

          # Iteration over d1 content will deal with all cases, but the one in the

          # first column of the table.

          for filename, d1 in diff_p1.items():

              d2 = diff_p2.pop(filename, None)

              if d2 is None:

                  # this deal with the first line of the table.

                  _process_other_unchanged(md, mas, filename, d1)

              else:

                  if d1[0][0] is None and d2[0][0] is None:

                      # case 🄼 — both deleted the file.

                      md.mark_added(filename)

                      copy_candidates.append(filename)

                  elif d1[1][0] is None and d2[1][0] is None:

                      # case 🄻 — both deleted the file.

                      md.mark_removed(filename)

                  elif d1[1][0] is not None and d2[1][0] is not None:

                      # case 🄽 🄾 🄿

                      md.mark_merged(filename)

                      copy_candidates.append(filename)

                  else:

                      # Impossible case, the post-merge file status cannot be None on

                      # one side and Something on the other side.

                      assert False, "unreachable"

          # Iteration over remaining d2 content deal with the first column of the

          # table.

          for filename, d2 in diff_p2.items():

              _process_other_unchanged(md, mas, filename, d2)

          for filename in copy_candidates:

              copy_info = ctx[filename].renamed()

              if copy_info:

                  source, srcnode = copy_info

                  if source in p1_ctx and p1_ctx[source].filenode() == srcnode:

                      md.mark_copied_from_p1(source, filename)

                  elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:

                      md.mark_copied_from_p2(source, filename)

          return md

      def _find(manifest, filename):

          """return the associate filenode or None"""

          if filename not in manifest:

              return None

          return manifest.find(filename)[0]

      def _process_other_unchanged(md, mas, filename, diff):

          source_node = diff[0][0]

          target_node = diff[1][0]

          if source_node is not None and target_node is None:

              if any(not _find(ma, filename) == source_node for ma in mas):

                  # case 🄲 of 🄷

                  md.mark_removed(filename)

              # else, we have case 🄱 or 🄶 : no change need to be recorded

          elif source_node is None and target_node is not None:

              if any(_find(ma, filename) is not None for ma in mas):

                  # case 🄴 or 🄹

                  md.mark_salvaged(filename)

              # else, we have case 🄳 or 🄸 : simple merge without intervention

          elif source_node is not None and target_node is not None:

              # case 🄵  or 🄺 : simple merge without intervention

              #

              # In buggy case where source_node is not an ancestors of target_node.

              # There should have a been a new filenode created, recording this as

              # "modified". We do not deal with them yet.

              pass

          else:

              # An impossible case, the diff algorithm should not return entry if the

              # file is missing on both side.

              assert False, "unreachable"

      def _missing_from_all_ancestors(mas, filename):

          return all(_find(ma, filename) is None for ma in mas)

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      def computechangesetfilesadded(ctx):

        Augie Fackler
    
formating: upgrade to black 20.8b1...

              r46554
            
          """return the list of files added in a changeset"""

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          added = []

          for f in ctx.files():

              if not any(f in p for p in ctx.parents()):

                  added.append(f)

          return added

        marmoute
    
files: extract code for extra filtering of the `removed` entry into copies...

              r45467
            
      def get_removal_filter(ctx, x=None):

          """return a function to detect files "wrongly" detected as `removed`

          When a file is removed relative to p1 in a merge, this

          function determines whether the absence is due to a

          deletion from a parent, or whether the merge commit

          itself deletes the file. We decide this by doing a

          simplified three way merge of the manifest entry for

          the file. There are two ways we decide the merge

          itself didn't delete a file:

          - neither parent (nor the merge) contain the file

          - exactly one parent contains the file, and that

            parent has the same filelog entry as the merge

            ancestor (or all of them if there two). In other

            words, that parent left the file unchanged while the

            other one deleted it.

          One way to think about this is that deleting a file is

          similar to emptying it, so the list of changed files

          should be similar either way. The computation

          described above is not done directly in _filecommit

          when creating the list of changed files, however

          it does something very similar by comparing filelog

          nodes.

          """

          if x is not None:

              p1, p2, m1, m2 = x

          else:

              p1 = ctx.p1()

              p2 = ctx.p2()

              m1 = p1.manifest()

              m2 = p2.manifest()

          @util.cachefunc

          def mas():

              p1n = p1.node()

              p2n = p2.node()

              cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)

              if not cahs:

        Joerg Sonnenberger
    
node: import symbols explicitly...

              r46729
            
                  cahs = [nullrev]

        marmoute
    
files: extract code for extra filtering of the `removed` entry into copies...

              r45467
            
              return [ctx.repo()[r].manifest() for r in cahs]

          def deletionfromparent(f):

              if f in m1:

                  return f not in m2 and all(

                      f in ma and ma.find(f) == m1.find(f) for ma in mas()

                  )

              elif f in m2:

                  return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())

              else:

                  return True

          return deletionfromparent

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      def computechangesetfilesremoved(ctx):

        Augie Fackler
    
formating: upgrade to black 20.8b1...

              r46554
            
          """return the list of files removed in a changeset"""

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          removed = []

          for f in ctx.files():

              if f not in ctx:

                  removed.append(f)

        marmoute
    
metadata: filter the `removed` set to only contains relevant data...

              r45468
            
          if removed:

              rf = get_removal_filter(ctx)

              removed = [r for r in removed if not rf(r)]

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          return removed

        marmoute
    
changing-files: add a utility to compute the merged files post-commit...

              r46188
            
      def computechangesetfilesmerged(ctx):

        Augie Fackler
    
formating: upgrade to black 20.8b1...

              r46554
            
          """return the list of files merged in a changeset"""

        marmoute
    
changing-files: add a utility to compute the merged files post-commit...

              r46188
            
          merged = []

          if len(ctx.parents()) < 2:

              return merged

          for f in ctx.files():

              if f in ctx:

                  fctx = ctx[f]

                  parents = fctx._filelog.parents(fctx._filenode)

        Joerg Sonnenberger
    
node: import symbols explicitly...

              r46729
            
                  if parents[1] != nullid:

        marmoute
    
changing-files: add a utility to compute the merged files post-commit...

              r46188
            
                      merged.append(f)

          return merged

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      def computechangesetcopies(ctx):

          """return the copies data for a changeset

          The copies data are returned as a pair of dictionnary (p1copies, p2copies).

          Each dictionnary are in the form: `{newname: oldname}`

          """

          p1copies = {}

          p2copies = {}

          p1 = ctx.p1()

          p2 = ctx.p2()

          narrowmatch = ctx._repo.narrowmatch()

          for dst in ctx.files():

              if not narrowmatch(dst) or dst not in ctx:

                  continue

              copied = ctx[dst].renamed()

              if not copied:

                  continue

              src, srcnode = copied

              if src in p1 and p1[src].filenode() == srcnode:

                  p1copies[dst] = src

              elif src in p2 and p2[src].filenode() == srcnode:

                  p2copies[dst] = src

          return p1copies, p2copies

      def encodecopies(files, copies):

          items = []

          for i, dst in enumerate(files):

              if dst in copies:

                  items.append(b'%d\0%s' % (i, copies[dst]))

          if len(items) != len(copies):

              raise error.ProgrammingError(

                  b'some copy targets missing from file list'

              )

          return b"\n".join(items)

      def decodecopies(files, data):

          try:

              copies = {}

              if not data:

                  return copies

              for l in data.split(b'\n'):

                  strindex, src = l.split(b'\0')

                  i = int(strindex)

                  dst = files[i]

                  copies[dst] = src

              return copies

          except (ValueError, IndexError):

              # Perhaps someone had chosen the same key name (e.g. "p1copies") and

              # used different syntax for the value.

              return None

      def encodefileindices(files, subset):

          subset = set(subset)

          indices = []

          for i, f in enumerate(files):

              if f in subset:

                  indices.append(b'%d' % i)

          return b'\n'.join(indices)

      def decodefileindices(files, data):

          try:

              subset = []

              if not data:

                  return subset

              for strindex in data.split(b'\n'):

                  i = int(strindex)

                  if i < 0 or i >= len(files):

                      return None

                  subset.append(files[i])

              return subset

          except (ValueError, IndexError):

              # Perhaps someone had chosen the same key name (e.g. "added") and

              # used different syntax for the value.

              return None

        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
      # see mercurial/helptext/internals/revlogs.txt for details about the format

      ACTION_MASK = int("111" "00", 2)

      # note: untouched file used as copy source will as `000` for this mask.

      ADDED_FLAG = int("001" "00", 2)

      MERGED_FLAG = int("010" "00", 2)

      REMOVED_FLAG = int("011" "00", 2)

        marmoute
    
salvaged: persist the salvaged set on disk...

              r46261
            
      SALVAGED_FLAG = int("100" "00", 2)

        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
      TOUCHED_FLAG = int("101" "00", 2)

      COPIED_MASK = int("11", 2)

      COPIED_FROM_P1_FLAG = int("10", 2)

      COPIED_FROM_P2_FLAG = int("11", 2)

      # structure is <flag><filename-end><copy-source>

      INDEX_HEADER = struct.Struct(">L")

      INDEX_ENTRY = struct.Struct(">bLL")

        marmoute
    
sidedata: rename `encode_copies_sidedata` to `encode_files_sidedata`...

              r46143
            
      def encode_files_sidedata(files):

        marmoute
    
salvaged: persist the salvaged set on disk...

              r46261
            
          all_files = set(files.touched)

        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
          all_files.update(files.copied_from_p1.values())

          all_files.update(files.copied_from_p2.values())

          all_files = sorted(all_files)

          file_idx = {f: i for (i, f) in enumerate(all_files)}

          file_idx[None] = 0

          chunks = [INDEX_HEADER.pack(len(all_files))]

          filename_length = 0

          for f in all_files:

              filename_size = len(f)

              filename_length += filename_size

              flag = 0

              if f in files.added:

                  flag |= ADDED_FLAG

              elif f in files.merged:

                  flag |= MERGED_FLAG

              elif f in files.removed:

                  flag |= REMOVED_FLAG

        marmoute
    
salvaged: persist the salvaged set on disk...

              r46261
            
              elif f in files.salvaged:

                  flag |= SALVAGED_FLAG

        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
              elif f in files.touched:

                  flag |= TOUCHED_FLAG

              copy = None

              if f in files.copied_from_p1:

                  flag |= COPIED_FROM_P1_FLAG

                  copy = files.copied_from_p1.get(f)

              elif f in files.copied_from_p2:

                  copy = files.copied_from_p2.get(f)

                  flag |= COPIED_FROM_P2_FLAG

              copy_idx = file_idx[copy]

              chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))

          chunks.extend(all_files)

          return {sidedatamod.SD_FILES: b''.join(chunks)}

        marmoute
    
commitctx: extract sidedata encoding inside its own function...

              r45885
            
        marmoute
    
changing-files: drop the now useless changelogrevision argument...

              r46212
            
      def decode_files_sidedata(sidedata):

        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
          md = ChangingFiles()

          raw = sidedata.get(sidedatamod.SD_FILES)

          if raw is None:

              return md

          copies = []

          all_files = []

        marmoute
    
sidedata: add a `decode_files_sidedata` function...

              r46145
            
        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
          assert len(raw) >= INDEX_HEADER.size

          total_files = INDEX_HEADER.unpack_from(raw, 0)[0]

        marmoute
    
sidedata: add a `decode_files_sidedata` function...

              r46145
            
        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
          offset = INDEX_HEADER.size

          file_offset_base = offset + (INDEX_ENTRY.size * total_files)

          file_offset_last = file_offset_base

          assert len(raw) >= file_offset_base

        marmoute
    
sidedata: add a `decode_files_sidedata` function...

              r46145
            
        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
          for idx in range(total_files):

              flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)

              file_end += file_offset_base

              filename = raw[file_offset_last:file_end]

              filesize = file_end - file_offset_last

              assert len(filename) == filesize

              offset += INDEX_ENTRY.size

              file_offset_last = file_end

              all_files.append(filename)

              if flag & ACTION_MASK == ADDED_FLAG:

                  md.mark_added(filename)

              elif flag & ACTION_MASK == MERGED_FLAG:

                  md.mark_merged(filename)

              elif flag & ACTION_MASK == REMOVED_FLAG:

                  md.mark_removed(filename)

        marmoute
    
salvaged: persist the salvaged set on disk...

              r46261
            
              elif flag & ACTION_MASK == SALVAGED_FLAG:

                  md.mark_salvaged(filename)

        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
              elif flag & ACTION_MASK == TOUCHED_FLAG:

                  md.mark_touched(filename)

        marmoute
    
sidedata: add a `decode_files_sidedata` function...

              r46145
            
        marmoute
    
changing-files: rework the way we store changed files in side-data...

              r46211
            
              copied = None

              if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:

                  copied = md.mark_copied_from_p1

              elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:

                  copied = md.mark_copied_from_p2

              if copied is not None:

                  copies.append((copied, filename, copy_idx))

          for copied, filename, copy_idx in copies:

              copied(all_files[copy_idx], filename)

          return md

        marmoute
    
sidedata: add a `decode_files_sidedata` function...

              r46145
            
        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      def _getsidedata(srcrepo, rev):

          ctx = srcrepo[rev]

        marmoute
    
changing-files: split the changing files computation from encoding...

              r46255
            
          files = compute_all_files_changes(ctx)

        marmoute
    
sidedata: return enough data to set the proper flag in the future...

              r46326
            
          return encode_files_sidedata(files), files.has_copies_info

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
        Raphaël Gomès
    
sidedata-exchange: add `wanted_sidedata` and `sidedata_computers` to repos...

              r47447
            
      def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):

          return _getsidedata(repo, rev)[0]

      def set_sidedata_spec_for_repo(repo):

          if requirementsmod.COPIESSDC_REQUIREMENT in repo.requirements:

              repo.register_wanted_sidedata(sidedatamod.SD_FILES)

              repo.register_sidedata_computer(

                  b"changelog",

                  sidedatamod.SD_FILES,

                  (sidedatamod.SD_FILES,),

                  copies_sidedata_computer,

              )

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
      def getsidedataadder(srcrepo, destrepo):

          use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')

          if pycompat.iswindows or not use_w:

              return _get_simple_sidedata_adder(srcrepo, destrepo)

          else:

              return _get_worker_sidedata_adder(srcrepo, destrepo)

      def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):

          """The function used by worker precomputing sidedata

          It read an input queue containing revision numbers

          It write in an output queue containing (rev, <sidedata-map>)

          The `None` input value is used as a stop signal.

          The `tokens` semaphore is user to avoid having too many unprocessed

          entries. The workers needs to acquire one token before fetching a task.

          They will be released by the consumer of the produced data.

          """

          tokens.acquire()

          rev = revs_queue.get()

          while rev is not None:

              data = _getsidedata(srcrepo, rev)

              sidedata_queue.put((rev, data))

              tokens.acquire()

              rev = revs_queue.get()

          # processing of `None` is completed, release the token.

          tokens.release()

      BUFF_PER_WORKER = 50

      def _get_worker_sidedata_adder(srcrepo, destrepo):

          """The parallel version of the sidedata computation

          This code spawn a pool of worker that precompute a buffer of sidedata

          before we actually need them"""

          # avoid circular import copies -> scmutil -> worker -> copies

          from . import worker

          nbworkers = worker._numworkers(srcrepo.ui)

          tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)

          revsq = multiprocessing.Queue()

          sidedataq = multiprocessing.Queue()

          assert srcrepo.filtername is None

          # queue all tasks beforehand, revision numbers are small and it make

          # synchronisation simpler

          #

          # Since the computation for each node can be quite expensive, the overhead

          # of using a single queue is not revelant. In practice, most computation

          # are fast but some are very expensive and dominate all the other smaller

          # cost.

          for r in srcrepo.changelog.revs():

              revsq.put(r)

          # queue the "no more tasks" markers

          for i in range(nbworkers):

              revsq.put(None)

          allworkers = []

          for i in range(nbworkers):

              args = (srcrepo, revsq, sidedataq, tokens)

              w = multiprocessing.Process(target=_sidedata_worker, args=args)

              allworkers.append(w)

              w.start()

          # dictionnary to store results for revision higher than we one we are

          # looking for. For example, if we need the sidedatamap for 42, and 43 is

          # received, when shelve 43 for later use.

          staging = {}

          def sidedata_companion(revlog, rev):

        marmoute
    
sidedata: return enough data to set the proper flag in the future...

              r46326
            
              data = {}, False

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
              if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog

                  # Is the data previously shelved ?

        marmoute
    
copies: tests and fix parallel computation of changed file information...

              r47362
            
                  data = staging.pop(rev, None)

                  if data is None:

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
                      # look at the queued result until we find the one we are lookig

                      # for (shelve the other ones)

        marmoute
    
sidedata: return enough data to set the proper flag in the future...

              r46326
            
                      r, data = sidedataq.get()

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
                      while r != rev:

        marmoute
    
sidedata: return enough data to set the proper flag in the future...

              r46326
            
                          staging[r] = data

        marmoute
    
copies: tests and fix parallel computation of changed file information...

              r47362
            
                          r, data = sidedataq.get()

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
                  tokens.release()

        marmoute
    
upgrade: allow sidedata upgrade to modify revision flag...

              r46327
            
              sidedata, has_copies_info = data

              new_flag = 0

              if has_copies_info:

                  new_flag = sidedataflag.REVIDX_HASCOPIESINFO

              return False, (), sidedata, new_flag, 0

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          return sidedata_companion

      def _get_simple_sidedata_adder(srcrepo, destrepo):

          """The simple version of the sidedata computation

          It just compute it in the same thread on request"""

          def sidedatacompanion(revlog, rev):

        marmoute
    
upgrade: allow sidedata upgrade to modify revision flag...

              r46327
            
              sidedata, has_copies_info = {}, False

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
              if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog

        marmoute
    
sidedata: return enough data to set the proper flag in the future...

              r46326
            
                  sidedata, has_copies_info = _getsidedata(srcrepo, rev)

        marmoute
    
upgrade: allow sidedata upgrade to modify revision flag...

              r46327
            
              new_flag = 0

              if has_copies_info:

                  new_flag = sidedataflag.REVIDX_HASCOPIESINFO

              return False, (), sidedata, new_flag, 0

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          return sidedatacompanion

      def getsidedataremover(srcrepo, destrepo):

          def sidedatacompanion(revlog, rev):

              f = ()

              if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog

                  if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:

                      f = (

                          sidedatamod.SD_P1COPIES,

                          sidedatamod.SD_P2COPIES,

                          sidedatamod.SD_FILESADDED,

                          sidedatamod.SD_FILESREMOVED,

                      )

        marmoute
    
upgrade: allow sidedata upgrade to modify revision flag...

              r46327
            
              return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO

        marmoute
    
metadata: move computation related to files touched in a dedicated module...

              r45466
            
          return sidedatacompanion

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages