upstream/mercurial-mirror Commit - r46320:232c88dd

changing-files: add a shorthand property to check for copy relevant info...

marmoute -

r46320:232c88dd default

parent child

mercurial/changelog.py

0 +1 -7

             # changelog.py - changelog class for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             from .i18n import _
             from .node import (
                 bin,
                 hex,
                 nullid,
             )
             from .thirdparty import attr
             from . import (
                 encoding,
                 error,
                 metadata,
                 pycompat,
                 revlog,
             )
             from .utils import (
                 dateutil,
                 stringutil,
             )
             from .revlogutils import flagutil
             _defaultextra = {b'branch': b'default'}
             def _string_escape(text):
                 """
                 >>> from .pycompat import bytechr as chr
                 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
                 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
                 >>> s
                 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
                 >>> res = _string_escape(s)
                 >>> s == _string_unescape(res)
                 True
                 """
                 # subset of the string_escape codec
                 text = (
                     text.replace(b'\\', b'\\\\')
                     .replace(b'\n', b'\\n')
                     .replace(b'\r', b'\\r')
                 )
                 return text.replace(b'\0', b'\\0')
             def _string_unescape(text):
                 if b'\\0' in text:
                     # fix up \0 without getting into trouble with \\0
                     text = text.replace(b'\\\\', b'\\\\\n')
                     text = text.replace(b'\\0', b'\0')
                     text = text.replace(b'\n', b'')
                 return stringutil.unescapestr(text)
             def decodeextra(text):
                 """
                 >>> from .pycompat import bytechr as chr
                 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
                 ...                    ).items())
                 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
                 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
                 ...                                 b'baz': chr(92) + chr(0) + b'2'})
                 ...                    ).items())
                 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
                 """
                 extra = _defaultextra.copy()
                 for l in text.split(b'\0'):
                     if l:
                         k, v = _string_unescape(l).split(b':', 1)
                         extra[k] = v
                 return extra
             def encodeextra(d):
                 # keys must be sorted to produce a deterministic changelog entry
                 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
                 return b"\0".join(items)
             def stripdesc(desc):
                 """strip trailing whitespace and leading and trailing empty lines"""
                 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
             class appender(object):
                 '''the changelog index must be updated last on disk, so we use this class
                 to delay writes to it'''
                 def __init__(self, vfs, name, mode, buf):
                     self.data = buf
                     fp = vfs(name, mode)
                     self.fp = fp
                     self.offset = fp.tell()
                     self.size = vfs.fstat(fp).st_size
                     self._end = self.size
                 def end(self):
                     return self._end
                 def tell(self):
                     return self.offset
                 def flush(self):
                     pass
                 @property
                 def closed(self):
                     return self.fp.closed
                 def close(self):
                     self.fp.close()
                 def seek(self, offset, whence=0):
                     '''virtual file offset spans real file and data'''
                     if whence == 0:
                         self.offset = offset
                     elif whence == 1:
                         self.offset += offset
                     elif whence == 2:
                         self.offset = self.end() + offset
                     if self.offset < self.size:
                         self.fp.seek(self.offset)
                 def read(self, count=-1):
                     '''only trick here is reads that span real file and data'''
                     ret = b""
                     if self.offset < self.size:
                         s = self.fp.read(count)
                         ret = s
                         self.offset += len(s)
                         if count > 0:
                             count -= len(s)
                     if count != 0:
                         doff = self.offset - self.size
                         self.data.insert(0, b"".join(self.data))
                         del self.data[1:]
                         s = self.data[0][doff : doff + count]
                         self.offset += len(s)
                         ret += s
                     return ret
                 def write(self, s):
                     self.data.append(bytes(s))
                     self.offset += len(s)
                     self._end += len(s)
                 def __enter__(self):
                     self.fp.__enter__()
                     return self
                 def __exit__(self, *args):
                     return self.fp.__exit__(*args)
             class _divertopener(object):
                 def __init__(self, opener, target):
                     self._opener = opener
                     self._target = target
                 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
                     if name != self._target:
                         return self._opener(name, mode, **kwargs)
                     return self._opener(name + b".a", mode, **kwargs)
                 def __getattr__(self, attr):
                     return getattr(self._opener, attr)
             def _delayopener(opener, target, buf):
                 """build an opener that stores chunks in 'buf' instead of 'target'"""
                 def _delay(name, mode=b'r', checkambig=False, **kwargs):
                     if name != target:
                         return opener(name, mode, **kwargs)
                     assert not kwargs
                     return appender(opener, name, mode, buf)
                 return _delay
             @attr.s
             class _changelogrevision(object):
                 # Extensions might modify _defaultextra, so let the constructor below pass
                 # it in
                 extra = attr.ib()
                 manifest = attr.ib(default=nullid)
                 user = attr.ib(default=b'')
                 date = attr.ib(default=(0, 0))
                 files = attr.ib(default=attr.Factory(list))
                 filesadded = attr.ib(default=None)
                 filesremoved = attr.ib(default=None)
                 p1copies = attr.ib(default=None)
                 p2copies = attr.ib(default=None)
                 description = attr.ib(default=b'')
             class changelogrevision(object):
                 """Holds results of a parsed changelog revision.
                 Changelog revisions consist of multiple pieces of data, including
                 the manifest node, user, and date. This object exposes a view into
                 the parsed object.
                 """
                 __slots__ = (
                     '_offsets',
                     '_text',
                     '_sidedata',
                     '_cpsd',
                     '_changes',
                 )
                 def __new__(cls, text, sidedata, cpsd):
                     if not text:
                         return _changelogrevision(extra=_defaultextra)
                     self = super(changelogrevision, cls).__new__(cls)
                     # We could return here and implement the following as an __init__.
                     # But doing it here is equivalent and saves an extra function call.
                     # format used:
                     # nodeid\n        : manifest node in ascii
                     # user\n          : user, no \n or \r allowed
                     # time tz extra\n : date (time is int or float, timezone is int)
                     #                 : extra is metadata, encoded and separated by '\0'
                     #                 : older versions ignore it
                     # files\n\n       : files modified by the cset, no \n or \r allowed
                     # (.*)            : comment (free text, ideally utf-8)
                     #
                     # changelog v0 doesn't use extra
                     nl1 = text.index(b'\n')
                     nl2 = text.index(b'\n', nl1 + 1)
                     nl3 = text.index(b'\n', nl2 + 1)
                     # The list of files may be empty. Which means nl3 is the first of the
                     # double newline that precedes the description.
                     if text[nl3 + 1 : nl3 + 2] == b'\n':
                         doublenl = nl3
                     else:
                         doublenl = text.index(b'\n\n', nl3 + 1)
                     self._offsets = (nl1, nl2, nl3, doublenl)
                     self._text = text
                     self._sidedata = sidedata
                     self._cpsd = cpsd
                     self._changes = None
                     return self
                 @property
                 def manifest(self):
                     return bin(self._text[0 : self._offsets[0]])
                 @property
                 def user(self):
                     off = self._offsets
                     return encoding.tolocal(self._text[off[0] + 1 : off[1]])
                 @property
                 def _rawdate(self):
                     off = self._offsets
                     dateextra = self._text[off[1] + 1 : off[2]]
                     return dateextra.split(b' ', 2)[0:2]
                 @property
                 def _rawextra(self):
                     off = self._offsets
                     dateextra = self._text[off[1] + 1 : off[2]]
                     fields = dateextra.split(b' ', 2)
                     if len(fields) != 3:
                         return None
                     return fields[2]
                 @property
                 def date(self):
                     raw = self._rawdate
                     time = float(raw[0])
                     # Various tools did silly things with the timezone.
                     try:
                         timezone = int(raw[1])
                     except ValueError:
                         timezone = 0
                     return time, timezone
                 @property
                 def extra(self):
                     raw = self._rawextra
                     if raw is None:
                         return _defaultextra
                     return decodeextra(raw)
                 @property
                 def changes(self):
                     if self._changes is not None:
                         return self._changes
                     if self._cpsd:
                         changes = metadata.decode_files_sidedata(self._sidedata)
                     else:
                         changes = metadata.ChangingFiles(
                             touched=self.files or (),
                             added=self.filesadded or (),
                             removed=self.filesremoved or (),
                             p1_copies=self.p1copies or {},
                             p2_copies=self.p2copies or {},
                         )
                     self._changes = changes
                     return changes
                 @property
                 def files(self):
                     if self._cpsd:
                         return sorted(self.changes.touched)
                     off = self._offsets
                     if off[2] == off[3]:
                         return []
                     return self._text[off[2] + 1 : off[3]].split(b'\n')
                 @property
                 def filesadded(self):
                     if self._cpsd:
                         return self.changes.added
                     else:
                         rawindices = self.extra.get(b'filesadded')
                     if rawindices is None:
                         return None
                     return metadata.decodefileindices(self.files, rawindices)
                 @property
                 def filesremoved(self):
                     if self._cpsd:
                         return self.changes.removed
                     else:
                         rawindices = self.extra.get(b'filesremoved')
                     if rawindices is None:
                         return None
                     return metadata.decodefileindices(self.files, rawindices)
                 @property
                 def p1copies(self):
                     if self._cpsd:
                         return self.changes.copied_from_p1
                     else:
                         rawcopies = self.extra.get(b'p1copies')
                     if rawcopies is None:
                         return None
                     return metadata.decodecopies(self.files, rawcopies)
                 @property
                 def p2copies(self):
                     if self._cpsd:
                         return self.changes.copied_from_p2
                     else:
                         rawcopies = self.extra.get(b'p2copies')
                     if rawcopies is None:
                         return None
                     return metadata.decodecopies(self.files, rawcopies)
                 @property
                 def description(self):
                     return encoding.tolocal(self._text[self._offsets[3] + 2 :])
             class changelog(revlog.revlog):
                 def __init__(self, opener, trypending=False):
                     """Load a changelog revlog using an opener.
                     If ``trypending`` is true, we attempt to load the index from a
                     ``00changelog.i.a`` file instead of the default ``00changelog.i``.
                     The ``00changelog.i.a`` file contains index (and possibly inline
                     revision) data for a transaction that hasn't been finalized yet.
                     It exists in a separate file to facilitate readers (such as
                     hooks processes) accessing data before a transaction is finalized.
                     """
                     if trypending and opener.exists(b'00changelog.i.a'):
                         indexfile = b'00changelog.i.a'
                     else:
                         indexfile = b'00changelog.i'
                     datafile = b'00changelog.d'
                     revlog.revlog.__init__(
                         self,
                         opener,
                         indexfile,
                         datafile=datafile,
                         checkambig=True,
                         mmaplargeindex=True,
                         persistentnodemap=opener.options.get(b'persistent-nodemap', False),
                     )
                     if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
                         # changelogs don't benefit from generaldelta.
                         self.version &= ~revlog.FLAG_GENERALDELTA
                         self._generaldelta = False
                     # Delta chains for changelogs tend to be very small because entries
                     # tend to be small and don't delta well with each. So disable delta
                     # chains.
                     self._storedeltachains = False
                     self._realopener = opener
                     self._delayed = False
                     self._delaybuf = None
                     self._divert = False
                     self._filteredrevs = frozenset()
                     self._filteredrevs_hashcache = {}
                     self._copiesstorage = opener.options.get(b'copies-storage')
                 @property
                 def filteredrevs(self):
                     return self._filteredrevs
                 @filteredrevs.setter
                 def filteredrevs(self, val):
                     # Ensure all updates go through this function
                     assert isinstance(val, frozenset)
                     self._filteredrevs = val
                     self._filteredrevs_hashcache = {}
                 def delayupdate(self, tr):
                     """delay visibility of index updates to other readers"""
                     if not self._delayed:
                         if len(self) == 0:
                             self._divert = True
                             if self._realopener.exists(self.indexfile + b'.a'):
                                 self._realopener.unlink(self.indexfile + b'.a')
                             self.opener = _divertopener(self._realopener, self.indexfile)
                         else:
                             self._delaybuf = []
                             self.opener = _delayopener(
                                 self._realopener, self.indexfile, self._delaybuf
                             )
                     self._delayed = True
                     tr.addpending(b'cl-%i' % id(self), self._writepending)
                     tr.addfinalize(b'cl-%i' % id(self), self._finalize)
                 def _finalize(self, tr):
                     """finalize index updates"""
                     self._delayed = False
                     self.opener = self._realopener
                     # move redirected index data back into place
                     if self._divert:
                         assert not self._delaybuf
                         tmpname = self.indexfile + b".a"
                         nfile = self.opener.open(tmpname)
                         nfile.close()
                         self.opener.rename(tmpname, self.indexfile, checkambig=True)
                     elif self._delaybuf:
                         fp = self.opener(self.indexfile, b'a', checkambig=True)
                         fp.write(b"".join(self._delaybuf))
                         fp.close()
                         self._delaybuf = None
                     self._divert = False
                     # split when we're done
                     self._enforceinlinesize(tr)
                 def _writepending(self, tr):
                     """create a file containing the unfinalized state for
                     pretxnchangegroup"""
                     if self._delaybuf:
                         # make a temporary copy of the index
                         fp1 = self._realopener(self.indexfile)
                         pendingfilename = self.indexfile + b".a"
                         # register as a temp file to ensure cleanup on failure
                         tr.registertmp(pendingfilename)
                         # write existing data
                         fp2 = self._realopener(pendingfilename, b"w")
                         fp2.write(fp1.read())
                         # add pending data
                         fp2.write(b"".join(self._delaybuf))
                         fp2.close()
                         # switch modes so finalize can simply rename
                         self._delaybuf = None
                         self._divert = True
                         self.opener = _divertopener(self._realopener, self.indexfile)
                     if self._divert:
                         return True
                     return False
                 def _enforceinlinesize(self, tr, fp=None):
                     if not self._delayed:
                         revlog.revlog._enforceinlinesize(self, tr, fp)
                 def read(self, node):
                     """Obtain data from a parsed changelog revision.
                     Returns a 6-tuple of:
                        - manifest node in binary
                        - author/user as a localstr
                        - date as a 2-tuple of (time, timezone)
                        - list of files
                        - commit message as a localstr
                        - dict of extra metadata
                     Unless you need to access all fields, consider calling
                     ``changelogrevision`` instead, as it is faster for partial object
                     access.
                     """
                     d, s = self._revisiondata(node)
                     c = changelogrevision(
                         d, s, self._copiesstorage == b'changeset-sidedata'
                     )
                     return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
                 def changelogrevision(self, nodeorrev):
                     """Obtain a ``changelogrevision`` for a node or revision."""
                     text, sidedata = self._revisiondata(nodeorrev)
                     return changelogrevision(
                         text, sidedata, self._copiesstorage == b'changeset-sidedata'
                     )
                 def readfiles(self, node):
                     """
                     short version of read that only returns the files modified by the cset
                     """
                     text = self.revision(node)
                     if not text:
                         return []
                     last = text.index(b"\n\n")
                     l = text[:last].split(b'\n')
                     return l[3:]
                 def add(
                     self,
                     manifest,
                     files,
                     desc,
                     transaction,
                     p1,
                     p2,
                     user,
                     date=None,
                     extra=None,
                 ):
                     # Convert to UTF-8 encoded bytestrings as the very first
                     # thing: calling any method on a localstr object will turn it
                     # into a str object and the cached UTF-8 string is thus lost.
                     user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
                     user = user.strip()
                     # An empty username or a username with a "\n" will make the
                     # revision text contain two "\n\n" sequences -> corrupt
                     # repository since read cannot unpack the revision.
                     if not user:
                         raise error.StorageError(_(b"empty username"))
                     if b"\n" in user:
                         raise error.StorageError(
                             _(b"username %r contains a newline") % pycompat.bytestr(user)
                         )
                     desc = stripdesc(desc)
                     if date:
                         parseddate = b"%d %d" % dateutil.parsedate(date)
                     else:
                         parseddate = b"%d %d" % dateutil.makedate()
                     if extra:
                         branch = extra.get(b"branch")
                         if branch in (b"default", b""):
                             del extra[b"branch"]
                         elif branch in (b".", b"null", b"tip"):
                             raise error.StorageError(
                                 _(b'the name \'%s\' is reserved') % branch
                             )
                     sortedfiles = sorted(files.touched)
                     flags = 0
                     sidedata = None
                     if self._copiesstorage == b'changeset-sidedata':
-                        if (
+                        if files.has_copies_info:
-                            files.removed
-                            or files.merged
-                            or files.salvaged
-                            or files.copied_from_p1
-                            or files.copied_from_p2
-                        ):
                             flags |= flagutil.REVIDX_HASCOPIESINFO
                         sidedata = metadata.encode_files_sidedata(files)
                     if extra:
                         extra = encodeextra(extra)
                         parseddate = b"%s %s" % (parseddate, extra)
                     l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
                     text = b"\n".join(l)
                     return self.addrevision(
                         text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
                     )
                 def branchinfo(self, rev):
                     """return the branch name and open/close state of a revision
                     This function exists because creating a changectx object
                     just to access this is costly."""
                     extra = self.read(rev)[5]
                     return encoding.tolocal(extra.get(b"branch")), b'close' in extra
                 def _nodeduplicatecallback(self, transaction, node):
                     # keep track of revisions that got "re-added", eg: unbunde of know rev.
                     #
                     # We track them in a list to preserve their order from the source bundle
                     duplicates = transaction.changes.setdefault(b'revduplicates', [])
                     duplicates.append(self.rev(node))

mercurial/metadata.py

0 +10 0

             # coding: utf8
             # metadata.py -- code related to various metadata computation and access.
             #
             # Copyright 2019 Google, Inc <martinvonz@google.com>
             # Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import multiprocessing
             import struct
             from . import (
                 error,
                 node,
                 pycompat,
                 util,
             )
             from .revlogutils import (
                 flagutil as sidedataflag,
                 sidedata as sidedatamod,
             )
             class ChangingFiles(object):
                 """A class recording the changes made to files by a changeset
                 Actions performed on files are gathered into 3 sets:
                 - added:   files actively added in the changeset.
                 - merged:  files whose history got merged
                 - removed: files removed in the revision
                 - salvaged: files that might have been deleted by a merge but were not
                 - touched: files affected by the merge
                 and copies information is held by 2 mappings
                 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies
                 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies
                 See their inline help for details.
                 """
                 def __init__(
                     self,
                     touched=None,
                     added=None,
                     removed=None,
                     merged=None,
                     salvaged=None,
                     p1_copies=None,
                     p2_copies=None,
                 ):
                     self._added = set(() if added is None else added)
                     self._merged = set(() if merged is None else merged)
                     self._removed = set(() if removed is None else removed)
                     self._touched = set(() if touched is None else touched)
                     self._salvaged = set(() if salvaged is None else salvaged)
                     self._touched.update(self._added)
                     self._touched.update(self._merged)
                     self._touched.update(self._removed)
                     self._p1_copies = dict(() if p1_copies is None else p1_copies)
                     self._p2_copies = dict(() if p2_copies is None else p2_copies)
                 def __eq__(self, other):
                     return (
                         self.added == other.added
                         and self.merged == other.merged
                         and self.removed == other.removed
                         and self.salvaged == other.salvaged
                         and self.touched == other.touched
                         and self.copied_from_p1 == other.copied_from_p1
                         and self.copied_from_p2 == other.copied_from_p2
                     )
+                @property
+                def has_copies_info(self):
+                    return bool(
+                        self.removed
+                        or self.merged
+                        or self.salvaged
+                        or self.copied_from_p1
+                        or self.copied_from_p2
+                    )
                 @util.propertycache
                 def added(self):
                     """files actively added in the changeset
                     Any file present in that revision that was absent in all the changeset's
                     parents.
                     In case of merge, this means a file absent in one of the parents but
                     existing in the other will *not* be contained in this set. (They were
                     added by an ancestor)
                     """
                     return frozenset(self._added)
                 def mark_added(self, filename):
                     if 'added' in vars(self):
                         del self.added
                     self._added.add(filename)
                     self.mark_touched(filename)
                 def update_added(self, filenames):
                     for f in filenames:
                         self.mark_added(f)
                 @util.propertycache
                 def merged(self):
                     """files actively merged during a merge
                     Any modified files which had modification on both size that needed merging.
                     In this case a new filenode was created and it has two parents.
                     """
                     return frozenset(self._merged)
                 def mark_merged(self, filename):
                     if 'merged' in vars(self):
                         del self.merged
                     self._merged.add(filename)
                     self.mark_touched(filename)
                 def update_merged(self, filenames):
                     for f in filenames:
                         self.mark_merged(f)
                 @util.propertycache
                 def removed(self):
                     """files actively removed by the changeset
                     In case of merge this will only contain the set of files removing "new"
                     content. For any file absent in the current changeset:
                     a) If the file exists in both parents, it is clearly "actively" removed
                     by this changeset.
                     b) If a file exists in only one parent and in none of the common
                     ancestors, then the file was newly added in one of the merged branches
                     and then got "actively" removed.
                     c) If a file exists in only one parent and at least one of the common
                     ancestors using the same filenode, then the file was unchanged on one
                     side and deleted on the other side. The merge "passively" propagated
                     that deletion, but didn't "actively" remove the file. In this case the
                     file is *not* included in the `removed` set.
                     d) If a file exists in only one parent and at least one of the common
                     ancestors using a different filenode, then the file was changed on one
                     side and removed on the other side. The merge process "actively"
                     decided to drop the new change and delete the file. Unlike in the
                     previous case, (c), the file included in the `removed` set.
                     Summary table for merge:
                     case | exists in parents | exists in gca || removed
                      (a) |       both        |     *         ||   yes
                      (b) |       one         |     none      ||   yes
                      (c) |       one         | same filenode ||   no
                      (d) |       one         |  new filenode ||   yes
                     """
                     return frozenset(self._removed)
                 def mark_removed(self, filename):
                     if 'removed' in vars(self):
                         del self.removed
                     self._removed.add(filename)
                     self.mark_touched(filename)
                 def update_removed(self, filenames):
                     for f in filenames:
                         self.mark_removed(f)
                 @util.propertycache
                 def salvaged(self):
                     """files that might have been deleted by a merge, but still exists.
                     During a merge, the manifest merging might select some files for
                     removal, or for a removed/changed conflict. If at commit time the file
                     still exists, its removal was "reverted" and the file is "salvaged"
                     """
                     return frozenset(self._salvaged)
                 def mark_salvaged(self, filename):
                     if "salvaged" in vars(self):
                         del self.salvaged
                     self._salvaged.add(filename)
                     self.mark_touched(filename)
                 def update_salvaged(self, filenames):
                     for f in filenames:
                         self.mark_salvaged(f)
                 @util.propertycache
                 def touched(self):
                     """files either actively modified, added or removed"""
                     return frozenset(self._touched)
                 def mark_touched(self, filename):
                     if 'touched' in vars(self):
                         del self.touched
                     self._touched.add(filename)
                 def update_touched(self, filenames):
                     for f in filenames:
                         self.mark_touched(f)
                 @util.propertycache
                 def copied_from_p1(self):
                     return self._p1_copies.copy()
                 def mark_copied_from_p1(self, source, dest):
                     if 'copied_from_p1' in vars(self):
                         del self.copied_from_p1
                     self._p1_copies[dest] = source
                 def update_copies_from_p1(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p1(source, dest)
                 @util.propertycache
                 def copied_from_p2(self):
                     return self._p2_copies.copy()
                 def mark_copied_from_p2(self, source, dest):
                     if 'copied_from_p2' in vars(self):
                         del self.copied_from_p2
                     self._p2_copies[dest] = source
                 def update_copies_from_p2(self, copies):
                     for dest, source in copies.items():
                         self.mark_copied_from_p2(source, dest)
             def compute_all_files_changes(ctx):
                 """compute the files changed by a revision"""
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 if p1.rev() == node.nullrev and p2.rev() == node.nullrev:
                     return _process_root(ctx)
                 elif p1.rev() != node.nullrev and p2.rev() == node.nullrev:
                     return _process_linear(p1, ctx)
                 elif p1.rev() == node.nullrev and p2.rev() != node.nullrev:
                     # In the wild, one can encounter changeset where p1 is null but p2 is not
                     return _process_linear(p1, ctx, parent=2)
                 elif p1.rev() == p2.rev():
                     # In the wild, one can encounter such "non-merge"
                     return _process_linear(p1, ctx)
                 else:
                     return _process_merge(p1, p2, ctx)
             def _process_root(ctx):
                 """compute the appropriate changed files for a changeset with no parents
                 """
                 # Simple, there was nothing before it, so everything is added.
                 md = ChangingFiles()
                 manifest = ctx.manifest()
                 for filename in manifest:
                     md.mark_added(filename)
                 return md
             def _process_linear(parent_ctx, children_ctx, parent=1):
                 """compute the appropriate changed files for a changeset with a single parent
                 """
                 md = ChangingFiles()
                 parent_manifest = parent_ctx.manifest()
                 children_manifest = children_ctx.manifest()
                 copies_candidate = []
                 for filename, d in parent_manifest.diff(children_manifest).items():
                     if d[1][0] is None:
                         # no filenode for the "new" value, file is absent
                         md.mark_removed(filename)
                     else:
                         copies_candidate.append(filename)
                         if d[0][0] is None:
                             # not filenode for the "old" value file was absent
                             md.mark_added(filename)
                         else:
                             # filenode for both "old" and "new"
                             md.mark_touched(filename)
                 if parent == 1:
                     copied = md.mark_copied_from_p1
                 elif parent == 2:
                     copied = md.mark_copied_from_p2
                 else:
                     assert False, "bad parent value %d" % parent
                 for filename in copies_candidate:
                     copy_info = children_ctx[filename].renamed()
                     if copy_info:
                         source, srcnode = copy_info
                         copied(source, filename)
                 return md
             def _process_merge(p1_ctx, p2_ctx, ctx):
                 """compute the appropriate changed files for a changeset with two parents
                 This is a more advance case. The information we need to record is summarise
                 in the following table:
                 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
                 │ diff ╲  diff │       ø      │ (Some, None) │ (None, Some) │ (Some, Some) │
                 │  p2   ╲  p1  │              │              │              │              │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │              │🄱  No Changes │🄳  No Changes │              │
                 │  ø           │🄰  No Changes │      OR      │     OR       │🄵  No Changes │
                 │              │              │🄲  Deleted[1] │🄴  Salvaged[2]│     [3]      │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │🄶  No Changes │              │              │              │
                 │ (Some, None) │      OR      │🄻  Deleted    │       ø      │      ø       │
                 │              │🄷  Deleted[1] │              │              │              │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │🄸  No Changes │              │              │              │
                 │ (None, Some) │     OR       │      ø       │🄼   Added     │🄽   Merged    │
                 │              │🄹  Salvaged[2]│              │   (copied?)  │   (copied?)  │
                 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤
                 │              │              │              │              │              │
                 │ (Some, Some) │🄺  No Changes │      ø       │🄾   Merged    │🄿   Merged    │
                 │              │     [3]      │              │   (copied?)  │   (copied?)  │
                 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
                 Special case [1]:
                   The situation is:
                     - parent-A:     file exists,
                     - parent-B:     no file,
                     - working-copy: no file.
                   Detecting a "deletion" will depend on the presence of actual change on
                   the "parent-A" branch:
                   Subcase 🄱 or 🄶 : if the state of the file in "parent-A" is unchanged
                   compared to the merge ancestors, then parent-A branch left the file
                   untouched while parent-B deleted it. We simply apply the change from
                   "parent-B" branch the file was automatically dropped.
                   The result is:
                       - file is not recorded as touched by the merge.
                   Subcase 🄲 or 🄷 : otherwise, the change from parent-A branch were explicitly dropped and
                   the file was "deleted again". From a user perspective, the message
                   about "locally changed" while "remotely deleted" (or the other way
                   around) was issued and the user chose to deleted the file.
                   The result:
                       - file is recorded as touched by the merge.
                 Special case [2]:
                   The situation is:
                     - parent-A:     no file,
                     - parent-B:     file,
                     - working-copy: file (same content as parent-B).
                   There are three subcases depending on the ancestors contents:
                   - A) the file is missing in all ancestors,
                   - B) at least one ancestor has the file with filenode ≠ from parent-B,
                   - C) all ancestors use the same filenode as parent-B,
                   Subcase (A) is the simpler, nothing happend on parent-A side while
                   parent-B added it.
                     The result:
                         - the file is not marked as touched by the merge.
                   Subcase (B) is the counter part of "Special case [1]", the file was
                     modified on parent-B side, while parent-A side deleted it. However this
                     time, the conflict was solved by keeping the file (and its
                     modification). We consider the file as "salvaged".
                     The result:
                         - the file is marked as "salvaged" by the merge.
                   Subcase (C) is subtle variation of the case above. In this case, the
                     file in unchanged on the parent-B side and actively removed on the
                     parent-A side. So the merge machinery correctly decide it should be
                     removed. However, the file was explicitly restored to its parent-B
                     content before the merge was commited. The file is be marked
                     as salvaged too. From the merge result perspective, this is similar to
                     Subcase (B), however from the merge resolution perspective they differ
                     since in (C), there was some conflict not obvious solution to the
                     merge (That got reversed)
                 Special case [3]:
                   The situation is:
                     - parent-A:     file,
                     - parent-B:     file (different filenode as parent-A),
                     - working-copy: file (same filenode as parent-B).
                   This case is in theory much simple, for this to happens, this mean the
                   filenode in parent-A is purely replacing the one in parent-B (either a
                   descendant, or a full new file history, see changeset). So the merge
                   introduce no changes, and the file is not affected by the merge...
                   However, in the wild it is possible to find commit with the above is not
                   True. For example repository have some commit where the *new* node is an
                   ancestor of the node in parent-A, or where parent-A and parent-B are two
                   branches of the same file history, yet not merge-filenode were created
                   (while the "merge" should have led to a "modification").
                   Detecting such cases (and not recording the file as modified) would be a
                   nice bonus. However do not any of this yet.
                 """
                 md = ChangingFiles()
                 m = ctx.manifest()
                 p1m = p1_ctx.manifest()
                 p2m = p2_ctx.manifest()
                 diff_p1 = p1m.diff(m)
                 diff_p2 = p2m.diff(m)
                 cahs = ctx.repo().changelog.commonancestorsheads(
                     p1_ctx.node(), p2_ctx.node()
                 )
                 if not cahs:
                     cahs = [node.nullrev]
                 mas = [ctx.repo()[r].manifest() for r in cahs]
                 copy_candidates = []
                 # Dealing with case 🄰 happens automatically.  Since there are no entry in
                 # d1 nor d2, we won't iterate on it ever.
                 # Iteration over d1 content will deal with all cases, but the one in the
                 # first column of the table.
                 for filename, d1 in diff_p1.items():
                     d2 = diff_p2.pop(filename, None)
                     if d2 is None:
                         # this deal with the first line of the table.
                         _process_other_unchanged(md, mas, filename, d1)
                     else:
                         if d1[0][0] is None and d2[0][0] is None:
                             # case 🄼 — both deleted the file.
                             md.mark_added(filename)
                             copy_candidates.append(filename)
                         elif d1[1][0] is None and d2[1][0] is None:
                             # case 🄻 — both deleted the file.
                             md.mark_removed(filename)
                         elif d1[1][0] is not None and d2[1][0] is not None:
                             # case 🄽 🄾 🄿
                             md.mark_merged(filename)
                             copy_candidates.append(filename)
                         else:
                             # Impossible case, the post-merge file status cannot be None on
                             # one side and Something on the other side.
                             assert False, "unreachable"
                 # Iteration over remaining d2 content deal with the first column of the
                 # table.
                 for filename, d2 in diff_p2.items():
                     _process_other_unchanged(md, mas, filename, d2)
                 for filename in copy_candidates:
                     copy_info = ctx[filename].renamed()
                     if copy_info:
                         source, srcnode = copy_info
                         if source in p1_ctx and p1_ctx[source].filenode() == srcnode:
                             md.mark_copied_from_p1(source, filename)
                         elif source in p2_ctx and p2_ctx[source].filenode() == srcnode:
                             md.mark_copied_from_p2(source, filename)
                 return md
             def _find(manifest, filename):
                 """return the associate filenode or None"""
                 if filename not in manifest:
                     return None
                 return manifest.find(filename)[0]
             def _process_other_unchanged(md, mas, filename, diff):
                 source_node = diff[0][0]
                 target_node = diff[1][0]
                 if source_node is not None and target_node is None:
                     if any(not _find(ma, filename) == source_node for ma in mas):
                         # case 🄲 of 🄷
                         md.mark_removed(filename)
                     # else, we have case 🄱 or 🄶 : no change need to be recorded
                 elif source_node is None and target_node is not None:
                     if any(_find(ma, filename) is not None for ma in mas):
                         # case 🄴 or 🄹
                         md.mark_salvaged(filename)
                     # else, we have case 🄳 or 🄸 : simple merge without intervention
                 elif source_node is not None and target_node is not None:
                     # case 🄵  or 🄺 : simple merge without intervention
                     #
                     # In buggy case where source_node is not an ancestors of target_node.
                     # There should have a been a new filenode created, recording this as
                     # "modified". We do not deal with them yet.
                     pass
                 else:
                     # An impossible case, the diff algorithm should not return entry if the
                     # file is missing on both side.
                     assert False, "unreachable"
             def _missing_from_all_ancestors(mas, filename):
                 return all(_find(ma, filename) is None for ma in mas)
             def computechangesetfilesadded(ctx):
                 """return the list of files added in a changeset
                 """
                 added = []
                 for f in ctx.files():
                     if not any(f in p for p in ctx.parents()):
                         added.append(f)
                 return added
             def get_removal_filter(ctx, x=None):
                 """return a function to detect files "wrongly" detected as `removed`
                 When a file is removed relative to p1 in a merge, this
                 function determines whether the absence is due to a
                 deletion from a parent, or whether the merge commit
                 itself deletes the file. We decide this by doing a
                 simplified three way merge of the manifest entry for
                 the file. There are two ways we decide the merge
                 itself didn't delete a file:
                 - neither parent (nor the merge) contain the file
                 - exactly one parent contains the file, and that
                   parent has the same filelog entry as the merge
                   ancestor (or all of them if there two). In other
                   words, that parent left the file unchanged while the
                   other one deleted it.
                 One way to think about this is that deleting a file is
                 similar to emptying it, so the list of changed files
                 should be similar either way. The computation
                 described above is not done directly in _filecommit
                 when creating the list of changed files, however
                 it does something very similar by comparing filelog
                 nodes.
                 """
                 if x is not None:
                     p1, p2, m1, m2 = x
                 else:
                     p1 = ctx.p1()
                     p2 = ctx.p2()
                     m1 = p1.manifest()
                     m2 = p2.manifest()
                 @util.cachefunc
                 def mas():
                     p1n = p1.node()
                     p2n = p2.node()
                     cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n)
                     if not cahs:
                         cahs = [node.nullrev]
                     return [ctx.repo()[r].manifest() for r in cahs]
                 def deletionfromparent(f):
                     if f in m1:
                         return f not in m2 and all(
                             f in ma and ma.find(f) == m1.find(f) for ma in mas()
                         )
                     elif f in m2:
                         return all(f in ma and ma.find(f) == m2.find(f) for ma in mas())
                     else:
                         return True
                 return deletionfromparent
             def computechangesetfilesremoved(ctx):
                 """return the list of files removed in a changeset
                 """
                 removed = []
                 for f in ctx.files():
                     if f not in ctx:
                         removed.append(f)
                 if removed:
                     rf = get_removal_filter(ctx)
                     removed = [r for r in removed if not rf(r)]
                 return removed
             def computechangesetfilesmerged(ctx):
                 """return the list of files merged in a changeset
                 """
                 merged = []
                 if len(ctx.parents()) < 2:
                     return merged
                 for f in ctx.files():
                     if f in ctx:
                         fctx = ctx[f]
                         parents = fctx._filelog.parents(fctx._filenode)
                         if parents[1] != node.nullid:
                             merged.append(f)
                 return merged
             def computechangesetcopies(ctx):
                 """return the copies data for a changeset
                 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
                 Each dictionnary are in the form: `{newname: oldname}`
                 """
                 p1copies = {}
                 p2copies = {}
                 p1 = ctx.p1()
                 p2 = ctx.p2()
                 narrowmatch = ctx._repo.narrowmatch()
                 for dst in ctx.files():
                     if not narrowmatch(dst) or dst not in ctx:
                         continue
                     copied = ctx[dst].renamed()
                     if not copied:
                         continue
                     src, srcnode = copied
                     if src in p1 and p1[src].filenode() == srcnode:
                         p1copies[dst] = src
                     elif src in p2 and p2[src].filenode() == srcnode:
                         p2copies[dst] = src
                 return p1copies, p2copies
             def encodecopies(files, copies):
                 items = []
                 for i, dst in enumerate(files):
                     if dst in copies:
                         items.append(b'%d\0%s' % (i, copies[dst]))
                 if len(items) != len(copies):
                     raise error.ProgrammingError(
                         b'some copy targets missing from file list'
                     )
                 return b"\n".join(items)
             def decodecopies(files, data):
                 try:
                     copies = {}
                     if not data:
                         return copies
                     for l in data.split(b'\n'):
                         strindex, src = l.split(b'\0')
                         i = int(strindex)
                         dst = files[i]
                         copies[dst] = src
                     return copies
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "p1copies") and
                     # used different syntax for the value.
                     return None
             def encodefileindices(files, subset):
                 subset = set(subset)
                 indices = []
                 for i, f in enumerate(files):
                     if f in subset:
                         indices.append(b'%d' % i)
                 return b'\n'.join(indices)
             def decodefileindices(files, data):
                 try:
                     subset = []
                     if not data:
                         return subset
                     for strindex in data.split(b'\n'):
                         i = int(strindex)
                         if i < 0 or i >= len(files):
                             return None
                         subset.append(files[i])
                     return subset
                 except (ValueError, IndexError):
                     # Perhaps someone had chosen the same key name (e.g. "added") and
                     # used different syntax for the value.
                     return None
             # see mercurial/helptext/internals/revlogs.txt for details about the format
             ACTION_MASK = int("111" "00", 2)
             # note: untouched file used as copy source will as `000` for this mask.
             ADDED_FLAG = int("001" "00", 2)
             MERGED_FLAG = int("010" "00", 2)
             REMOVED_FLAG = int("011" "00", 2)
             SALVAGED_FLAG = int("100" "00", 2)
             TOUCHED_FLAG = int("101" "00", 2)
             COPIED_MASK = int("11", 2)
             COPIED_FROM_P1_FLAG = int("10", 2)
             COPIED_FROM_P2_FLAG = int("11", 2)
             # structure is <flag><filename-end><copy-source>
             INDEX_HEADER = struct.Struct(">L")
             INDEX_ENTRY = struct.Struct(">bLL")
             def encode_files_sidedata(files):
                 all_files = set(files.touched)
                 all_files.update(files.copied_from_p1.values())
                 all_files.update(files.copied_from_p2.values())
                 all_files = sorted(all_files)
                 file_idx = {f: i for (i, f) in enumerate(all_files)}
                 file_idx[None] = 0
                 chunks = [INDEX_HEADER.pack(len(all_files))]
                 filename_length = 0
                 for f in all_files:
                     filename_size = len(f)
                     filename_length += filename_size
                     flag = 0
                     if f in files.added:
                         flag |= ADDED_FLAG
                     elif f in files.merged:
                         flag |= MERGED_FLAG
                     elif f in files.removed:
                         flag |= REMOVED_FLAG
                     elif f in files.salvaged:
                         flag |= SALVAGED_FLAG
                     elif f in files.touched:
                         flag |= TOUCHED_FLAG
                     copy = None
                     if f in files.copied_from_p1:
                         flag |= COPIED_FROM_P1_FLAG
                         copy = files.copied_from_p1.get(f)
                     elif f in files.copied_from_p2:
                         copy = files.copied_from_p2.get(f)
                         flag |= COPIED_FROM_P2_FLAG
                     copy_idx = file_idx[copy]
                     chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx))
                 chunks.extend(all_files)
                 return {sidedatamod.SD_FILES: b''.join(chunks)}
             def decode_files_sidedata(sidedata):
                 md = ChangingFiles()
                 raw = sidedata.get(sidedatamod.SD_FILES)
                 if raw is None:
                     return md
                 copies = []
                 all_files = []
                 assert len(raw) >= INDEX_HEADER.size
                 total_files = INDEX_HEADER.unpack_from(raw, 0)[0]
                 offset = INDEX_HEADER.size
                 file_offset_base = offset + (INDEX_ENTRY.size * total_files)
                 file_offset_last = file_offset_base
                 assert len(raw) >= file_offset_base
                 for idx in range(total_files):
                     flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset)
                     file_end += file_offset_base
                     filename = raw[file_offset_last:file_end]
                     filesize = file_end - file_offset_last
                     assert len(filename) == filesize
                     offset += INDEX_ENTRY.size
                     file_offset_last = file_end
                     all_files.append(filename)
                     if flag & ACTION_MASK == ADDED_FLAG:
                         md.mark_added(filename)
                     elif flag & ACTION_MASK == MERGED_FLAG:
                         md.mark_merged(filename)
                     elif flag & ACTION_MASK == REMOVED_FLAG:
                         md.mark_removed(filename)
                     elif flag & ACTION_MASK == SALVAGED_FLAG:
                         md.mark_salvaged(filename)
                     elif flag & ACTION_MASK == TOUCHED_FLAG:
                         md.mark_touched(filename)
                     copied = None
                     if flag & COPIED_MASK == COPIED_FROM_P1_FLAG:
                         copied = md.mark_copied_from_p1
                     elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG:
                         copied = md.mark_copied_from_p2
                     if copied is not None:
                         copies.append((copied, filename, copy_idx))
                 for copied, filename, copy_idx in copies:
                     copied(all_files[copy_idx], filename)
                 return md
             def _getsidedata(srcrepo, rev):
                 ctx = srcrepo[rev]
                 files = compute_all_files_changes(ctx)
                 return encode_files_sidedata(files)
             def getsidedataadder(srcrepo, destrepo):
                 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
                 if pycompat.iswindows or not use_w:
                     return _get_simple_sidedata_adder(srcrepo, destrepo)
                 else:
                     return _get_worker_sidedata_adder(srcrepo, destrepo)
             def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
                 """The function used by worker precomputing sidedata
                 It read an input queue containing revision numbers
                 It write in an output queue containing (rev, <sidedata-map>)
                 The `None` input value is used as a stop signal.
                 The `tokens` semaphore is user to avoid having too many unprocessed
                 entries. The workers needs to acquire one token before fetching a task.
                 They will be released by the consumer of the produced data.
                 """
                 tokens.acquire()
                 rev = revs_queue.get()
                 while rev is not None:
                     data = _getsidedata(srcrepo, rev)
                     sidedata_queue.put((rev, data))
                     tokens.acquire()
                     rev = revs_queue.get()
                 # processing of `None` is completed, release the token.
                 tokens.release()
             BUFF_PER_WORKER = 50
             def _get_worker_sidedata_adder(srcrepo, destrepo):
                 """The parallel version of the sidedata computation
                 This code spawn a pool of worker that precompute a buffer of sidedata
                 before we actually need them"""
                 # avoid circular import copies -> scmutil -> worker -> copies
                 from . import worker
                 nbworkers = worker._numworkers(srcrepo.ui)
                 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER)
                 revsq = multiprocessing.Queue()
                 sidedataq = multiprocessing.Queue()
                 assert srcrepo.filtername is None
                 # queue all tasks beforehand, revision numbers are small and it make
                 # synchronisation simpler
                 #
                 # Since the computation for each node can be quite expensive, the overhead
                 # of using a single queue is not revelant. In practice, most computation
                 # are fast but some are very expensive and dominate all the other smaller
                 # cost.
                 for r in srcrepo.changelog.revs():
                     revsq.put(r)
                 # queue the "no more tasks" markers
                 for i in range(nbworkers):
                     revsq.put(None)
                 allworkers = []
                 for i in range(nbworkers):
                     args = (srcrepo, revsq, sidedataq, tokens)
                     w = multiprocessing.Process(target=_sidedata_worker, args=args)
                     allworkers.append(w)
                     w.start()
                 # dictionnary to store results for revision higher than we one we are
                 # looking for. For example, if we need the sidedatamap for 42, and 43 is
                 # received, when shelve 43 for later use.
                 staging = {}
                 def sidedata_companion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
                         # Is the data previously shelved ?
                         sidedata = staging.pop(rev, None)
                         if sidedata is None:
                             # look at the queued result until we find the one we are lookig
                             # for (shelve the other ones)
                             r, sidedata = sidedataq.get()
                             while r != rev:
                                 staging[r] = sidedata
                                 r, sidedata = sidedataq.get()
                         tokens.release()
                     return False, (), sidedata
                 return sidedata_companion
             def _get_simple_sidedata_adder(srcrepo, destrepo):
                 """The simple version of the sidedata computation
                 It just compute it in the same thread on request"""
                 def sidedatacompanion(revlog, rev):
                     sidedata = {}
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         sidedata = _getsidedata(srcrepo, rev)
                     return False, (), sidedata
                 return sidedatacompanion
             def getsidedataremover(srcrepo, destrepo):
                 def sidedatacompanion(revlog, rev):
                     f = ()
                     if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
                         if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
                             f = (
                                 sidedatamod.SD_P1COPIES,
                                 sidedatamod.SD_P2COPIES,
                                 sidedatamod.SD_FILESADDED,
                                 sidedatamod.SD_FILESREMOVED,
                             )
                     return False, f, {}
                 return sidedatacompanion

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages