# HG changeset patch # User Mads Kiilerich # Date 2014-08-26 20:03:32 # Node ID 35ab037de9892791fd6108027376ce53289be6b9 # Parent 98aafdf4cbf61410d817aceba5dda7b633ff15b3 convert: introduce --full for converting all files Convert will normally only process files that were changed in a source revision, apply the filemap, and record it has a change in the target repository. (If it ends up not really changing anything, nothing changes.) That means that _if_ the filemap is changed before continuing an incremental convert, the change will only kick in when the files it affects are modified in a source revision and thus processed. With --full, convert will make a full conversion every time and process all files in the source repo and remove target repo files that shouldn't be there. Filemap changes will thus kick in on the first converted revision, no matter what is changed. This flag should in most cases not make any difference but will make convert significantly slower. Other names has been considered for this feature, such as "resync", "sync", "checkunmodified", "all" or "allfiles", but I found that they were less obvious and required more explanation than "full" and were harder to describe consistently. diff --git a/hgext/convert/__init__.py b/hgext/convert/__init__.py --- a/hgext/convert/__init__.py +++ b/hgext/convert/__init__.py @@ -29,6 +29,8 @@ testedwith = 'internal' ('A', 'authormap', '', _('remap usernames using this file'), _('FILE')), ('', 'filemap', '', _('remap file names using contents of file'), _('FILE')), + ('', 'full', None, + _('apply filemap changes by converting all files again')), ('', 'splicemap', '', _('splice synthesized history into place'), _('FILE')), ('', 'branchmap', '', _('change branch names while converting'), @@ -131,6 +133,14 @@ def convert(ui, src, dest=None, revmapfi it is converted. To rename from a subdirectory into the root of the repository, use ``.`` as the path to rename to. + ``--full`` will make sure the converted changesets contain exactly + the right files with the right content. It will make a full + conversion of all files, not just the ones that have + changed. Files that already are correct will not be changed. This + can be used to apply filemap changes when converting + incrementally. This is currently only supported for Mercurial and + Subversion. + The splicemap is a file that allows insertion of synthetic history, letting you specify the parents of a revision. This is useful if you want to e.g. give a Subversion merge two parents, or diff --git a/hgext/convert/bzr.py b/hgext/convert/bzr.py --- a/hgext/convert/bzr.py +++ b/hgext/convert/bzr.py @@ -134,8 +134,9 @@ class bzr_source(converter_source): sio = revtree.get_file(fileid) return sio.read(), mode - def getchanges(self, version): - # set up caches: modecache and revtree + def getchanges(self, version, full): + if full: + raise util.Abort(_("convert from cvs do not support --full")) self._modecache = {} self._revtree = self.sourcerepo.revision_tree(version) # get the parentids from the cache diff --git a/hgext/convert/common.py b/hgext/convert/common.py --- a/hgext/convert/common.py +++ b/hgext/convert/common.py @@ -93,12 +93,13 @@ class converter_source(object): """ raise NotImplementedError - def getchanges(self, version): + def getchanges(self, version, full): """Returns a tuple of (files, copies). files is a sorted list of (filename, id) tuples for all files changed between version and its first parent returned by - getcommit(). id is the source revision id of the file. + getcommit(). If full, all files in that revision is returned. + id is the source revision id of the file. copies is a dictionary of dest: source """ @@ -204,7 +205,7 @@ class converter_sink(object): mapping equivalent authors identifiers for each system.""" return None - def putcommit(self, files, copies, parents, commit, source, revmap): + def putcommit(self, files, copies, parents, commit, source, revmap, full): """Create a revision with all changed files listed in 'files' and having listed parents. 'commit' is a commit object containing at a minimum the author, date, and message for this @@ -212,7 +213,8 @@ class converter_sink(object): 'copies' is a dictionary mapping destinations to sources, 'source' is the source repository, and 'revmap' is a mapfile of source revisions to converted revisions. Only getfile() and - lookuprev() should be called on 'source'. + lookuprev() should be called on 'source'. 'full' means that 'files' + is complete and all other files should be removed. Note that the sink repository is not told to update itself to a particular revision (or even what that revision would be) diff --git a/hgext/convert/convcmd.py b/hgext/convert/convcmd.py --- a/hgext/convert/convcmd.py +++ b/hgext/convert/convcmd.py @@ -386,8 +386,8 @@ class converter(object): def copy(self, rev): commit = self.commitcache[rev] - - changes = self.source.getchanges(rev) + full = self.opts.get('full') + changes = self.source.getchanges(rev, full) if isinstance(changes, basestring): if changes == SKIPREV: dest = SKIPREV @@ -413,7 +413,7 @@ class converter(object): parents = [b[0] for b in pbranches] source = progresssource(self.ui, self.source, len(files)) newnode = self.dest.putcommit(files, copies, parents, commit, - source, self.map) + source, self.map, full) source.close() self.source.converted(rev, newnode) self.map[rev] = newnode diff --git a/hgext/convert/cvs.py b/hgext/convert/cvs.py --- a/hgext/convert/cvs.py +++ b/hgext/convert/cvs.py @@ -258,7 +258,9 @@ class convert_cvs(converter_source): else: raise util.Abort(_("unknown CVS response: %s") % line) - def getchanges(self, rev): + def getchanges(self, rev, full): + if full: + raise util.Abort(_("convert from cvs do not support --full")) self._parse() return sorted(self.files[rev].iteritems()), {} diff --git a/hgext/convert/darcs.py b/hgext/convert/darcs.py --- a/hgext/convert/darcs.py +++ b/hgext/convert/darcs.py @@ -156,7 +156,9 @@ class darcs_source(converter_source, com output, status = self.run('revert', all=True, repodir=self.tmppath) self.checkexit(status, output) - def getchanges(self, rev): + def getchanges(self, rev, full): + if full: + raise util.Abort(_("convert from darcs do not support --full")) copies = {} changes = [] man = None diff --git a/hgext/convert/filemap.py b/hgext/convert/filemap.py --- a/hgext/convert/filemap.py +++ b/hgext/convert/filemap.py @@ -304,7 +304,7 @@ class filemap_source(converter_source): wrev.add(rev) self.wantedancestors[rev] = wrev - def getchanges(self, rev): + def getchanges(self, rev, full): parents = self.commits[rev].parents if len(parents) > 1: self.rebuild() @@ -384,7 +384,7 @@ class filemap_source(converter_source): # Get the real changes and do the filtering/mapping. To be # able to get the files later on in getfile, we hide the # original filename in the rev part of the return value. - changes, copies = self.base.getchanges(rev) + changes, copies = self.base.getchanges(rev, full) files = {} for f, r in changes: newf = self.filemapper(f) diff --git a/hgext/convert/git.py b/hgext/convert/git.py --- a/hgext/convert/git.py +++ b/hgext/convert/git.py @@ -180,7 +180,9 @@ class convert_git(converter_source): continue m.node = node.strip() - def getchanges(self, version): + def getchanges(self, version, full): + if full: + raise util.Abort(_("convert from git do not support --full")) self.modecache = {} fh = self.gitopen("git diff-tree -z --root -m -r %s" % version) changes = [] diff --git a/hgext/convert/gnuarch.py b/hgext/convert/gnuarch.py --- a/hgext/convert/gnuarch.py +++ b/hgext/convert/gnuarch.py @@ -142,7 +142,9 @@ class gnuarch_source(converter_source, c return self._getfile(name, rev) - def getchanges(self, rev): + def getchanges(self, rev, full): + if full: + raise util.Abort(_("convert from arch do not support --full")) self._update(rev) changes = [] copies = {} diff --git a/hgext/convert/hg.py b/hgext/convert/hg.py --- a/hgext/convert/hg.py +++ b/hgext/convert/hg.py @@ -128,11 +128,13 @@ class mercurial_sink(converter_sink): fp.write('%s %s\n' % (revid, s[1])) return fp.getvalue() - def putcommit(self, files, copies, parents, commit, source, revmap): - + def putcommit(self, files, copies, parents, commit, source, revmap, full): files = dict(files) def getfilectx(repo, memctx, f): - v = files[f] + try: + v = files[f] + except KeyError: + return None data, mode = source.getfile(f, v) if data is None: return None @@ -193,7 +195,10 @@ class mercurial_sink(converter_sink): while parents: p1 = p2 p2 = parents.pop(0) - ctx = context.memctx(self.repo, (p1, p2), text, files.keys(), + fileset = set(files) + if full: + fileset.update(self.repo[p1], self.repo[p2]) + ctx = context.memctx(self.repo, (p1, p2), text, fileset, getfilectx, commit.author, commit.date, extra) self.repo.commitctx(ctx) text = "(octopus merge fixup)\n" @@ -356,17 +361,18 @@ class mercurial_source(converter_source) except error.LookupError: return None, None - def getchanges(self, rev): + def getchanges(self, rev, full): ctx = self.changectx(rev) parents = self.parents(ctx) - if not parents: + if full or not parents: files = copyfiles = ctx.manifest() - else: + if parents: if self._changescache[0] == rev: m, a, r = self._changescache[1] else: m, a, r = self.repo.status(parents[0].node(), ctx.node())[:3] - files = m + a + r + if not full: + files = m + a + r copyfiles = m + a # getcopies() is also run for roots and before filtering so missing # revlogs are detected early diff --git a/hgext/convert/monotone.py b/hgext/convert/monotone.py --- a/hgext/convert/monotone.py +++ b/hgext/convert/monotone.py @@ -224,7 +224,9 @@ class monotone_source(converter_source, else: return [self.rev] - def getchanges(self, rev): + def getchanges(self, rev, full): + if full: + raise util.Abort(_("convert from monotone do not support --full")) revision = self.mtnrun("get_revision", rev).split("\n\n") files = {} ignoremove = {} diff --git a/hgext/convert/p4.py b/hgext/convert/p4.py --- a/hgext/convert/p4.py +++ b/hgext/convert/p4.py @@ -192,7 +192,9 @@ class p4_source(converter_source): return contents, mode - def getchanges(self, rev): + def getchanges(self, rev, full): + if full: + raise util.Abort(_("convert from p4 do not support --full")) return self.files[rev], {} def getcommit(self, rev): diff --git a/hgext/convert/subversion.py b/hgext/convert/subversion.py --- a/hgext/convert/subversion.py +++ b/hgext/convert/subversion.py @@ -444,37 +444,37 @@ class svn_source(converter_source): return self.heads - def _getchanges(self, rev): + def _getchanges(self, rev, full): (paths, parents) = self.paths[rev] + copies = {} if parents: files, self.removed, copies = self.expandpaths(rev, paths, parents) - else: + if full or not parents: # Perform a full checkout on roots uuid, module, revnum = revsplit(rev) entries = svn.client.ls(self.baseurl + quote(module), optrev(revnum), True, self.ctx) files = [n for n, e in entries.iteritems() if e.kind == svn.core.svn_node_file] - copies = {} self.removed = set() files.sort() files = zip(files, [rev] * len(files)) return (files, copies) - def getchanges(self, rev): + def getchanges(self, rev, full): # reuse cache from getchangedfiles - if self._changescache[0] == rev: + if self._changescache[0] == rev and not full: (files, copies) = self._changescache[1] else: - (files, copies) = self._getchanges(rev) + (files, copies) = self._getchanges(rev, full) # caller caches the result, so free it here to release memory del self.paths[rev] return (files, copies) def getchangedfiles(self, rev, i): # called from filemap - cache computed values for reuse in getchanges - (files, copies) = self._getchanges(rev) + (files, copies) = self._getchanges(rev, False) self._changescache = (rev, (files, copies)) return [f[0] for f in files] @@ -1222,7 +1222,7 @@ class svn_sink(converter_sink, commandli def revid(self, rev): return u"svn:%s@%s" % (self.uuid, rev) - def putcommit(self, files, copies, parents, commit, source, revmap): + def putcommit(self, files, copies, parents, commit, source, revmap, full): for parent in parents: try: return self.revid(self.childmap[parent]) @@ -1238,6 +1238,8 @@ class svn_sink(converter_sink, commandli self.putfile(f, mode, data) if f in copies: self.copies.append([copies[f], f]) + if full: + self.delete.extend(sorted(self.manifest.difference(files))) files = [f[0] for f in files] entries = set(self.delete) diff --git a/tests/test-convert-hg-sink.t b/tests/test-convert-hg-sink.t --- a/tests/test-convert-hg-sink.t +++ b/tests/test-convert-hg-sink.t @@ -537,3 +537,16 @@ Conversion after rollback | o 0 0 (a-only f) +Convert with --full adds and removes files that didn't change + + $ echo f >> 0/f + $ hg -R 0 ci -m "f" + $ hg convert --filemap filemap-b --full 0 a --config convert.hg.revs=1:: + scanning source... + sorting... + converting... + 0 f + $ hg -R a status --change tip + M f + A b-only + R a-only diff --git a/tests/test-convert-svn-sink.t b/tests/test-convert-svn-sink.t --- a/tests/test-convert-svn-sink.t +++ b/tests/test-convert-svn-sink.t @@ -247,6 +247,31 @@ Symlinks #endif +Convert with --full adds and removes files that didn't change + + $ touch a/f + $ hg -R a ci -Aqmf + $ echo "rename c d" > filemap + $ hg convert -d svn a --filemap filemap --full + assuming destination a-hg + initializing svn working copy 'a-hg-wc' + scanning source... + sorting... + converting... + 0 f + $ svnupanddisplay a-hg-wc 1 + 9 9 test . + 9 9 test d + 9 9 test f + revision: 9 + author: test + msg: f + D /c + A /d + D /d1 + A /f + D /newlink + $ rm -rf a a-hg a-hg-wc diff --git a/tests/test-convert-svn-source.t b/tests/test-convert-svn-source.t --- a/tests/test-convert-svn-source.t +++ b/tests/test-convert-svn-source.t @@ -168,6 +168,27 @@ Test filemap | o 0 second letter files: letter2.txt +Convert with --full adds and removes files that didn't change + + $ cd B + $ echo >> "letter .txt" + $ svn ci -m 'nothing' + Sending letter .txt + Transmitting file data . + Committed revision 9. + $ cd .. + + $ echo 'rename letter2.txt letter3.txt' > filemap + $ hg convert --filemap filemap --full "$SVNREPOURL/proj%20B/mytrunk" fmap + scanning source... + sorting... + converting... + 0 nothing + $ hg -R fmap st --change tip + A letter .txt + A letter3.txt + R letter2.txt + test invalid splicemap1 $ cat > splicemap <