# HG changeset patch # User Patrick Mezard # Date 2010-01-01 19:54:05 # Node ID e451e599fbcf5e98a791558c99a0c6dbdceb0e74 # Parent fd6e9c7cd98c245e439fdee0cda343e4b2ba4262 patch: support diff data loss detection and upgrade In worst case, generating diff in upgrade mode can be two times more expensive than generating it in git mode directly: we may have to regenerate the whole diff again whenever a git feature is detected. Also, the first diff attempt is completely buffered instead of being streamed. That said, even without having profiled it yet, I am convinced we can fast-path the upgrade mode if necessary were it to be used in regular diff commands, and not only in mq where avoiding data loss is worth the price. diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py --- a/mercurial/mdiff.py +++ b/mercurial/mdiff.py @@ -27,7 +27,9 @@ class diffopts(object): nodates removes dates from diff headers ignorews ignores all whitespace changes in the diff ignorewsamount ignores changes in the amount of whitespace - ignoreblanklines ignores changes whose lines are all blank''' + ignoreblanklines ignores changes whose lines are all blank + upgrade generates git diffs to avoid data loss + ''' defaults = { 'context': 3, @@ -38,6 +40,7 @@ class diffopts(object): 'ignorews': False, 'ignorewsamount': False, 'ignoreblanklines': False, + 'upgrade': False, } __slots__ = defaults.keys() diff --git a/mercurial/patch.py b/mercurial/patch.py --- a/mercurial/patch.py +++ b/mercurial/patch.py @@ -1246,17 +1246,25 @@ def b85diff(to, tn): ret.append('\n') return ''.join(ret) -def _addmodehdr(header, omode, nmode): - if omode != nmode: - header.append('old mode %s\n' % omode) - header.append('new mode %s\n' % nmode) +class GitDiffRequired(Exception): + pass -def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None): +def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None, + losedatafn=None): '''yields diff of changes to files between two nodes, or node and working directory. if node1 is None, use first dirstate parent instead. - if node2 is None, compare node1 with working directory.''' + if node2 is None, compare node1 with working directory. + + losedatafn(**kwarg) is a callable run when opts.upgrade=True and + every time some change cannot be represented with the current + patch format. Return False to upgrade to git patch format, True to + accept the loss or raise an exception to abort the diff. It is + called with the name of current file being diffed as 'fn'. If set + to None, patches will always be upgraded to git format when + necessary. + ''' if opts is None: opts = mdiff.defaultopts @@ -1288,24 +1296,50 @@ def diff(repo, node1=None, node2=None, m modified, added, removed = changes[:3] if not modified and not added and not removed: - return + return [] + + revs = None + if not repo.ui.quiet: + hexfunc = repo.ui.debugflag and hex or short + revs = [hexfunc(node) for node in [node1, node2] if node] + + copy = {} + if opts.git or opts.upgrade: + copy = copies.copies(repo, ctx1, ctx2, repo[nullid])[0] + copy = copy.copy() + for k, v in copy.items(): + copy[v] = k + + difffn = lambda opts, losedata: trydiff(repo, revs, ctx1, ctx2, + modified, added, removed, copy, getfilectx, opts, losedata) + if opts.upgrade and not opts.git: + try: + def losedata(fn): + if not losedatafn or not losedatafn(fn=fn): + raise GitDiffRequired() + # Buffer the whole output until we are sure it can be generated + return list(difffn(opts.copy(git=False), losedata)) + except GitDiffRequired: + return difffn(opts.copy(git=True), None) + else: + return difffn(opts, None) + +def _addmodehdr(header, omode, nmode): + if omode != nmode: + header.append('old mode %s\n' % omode) + header.append('new mode %s\n' % nmode) + +def trydiff(repo, revs, ctx1, ctx2, modified, added, removed, + copy, getfilectx, opts, losedatafn): date1 = util.datestr(ctx1.date()) man1 = ctx1.manifest() - revs = None - if not repo.ui.quiet and not opts.git: - hexfunc = repo.ui.debugflag and hex or short - revs = [hexfunc(node) for node in [node1, node2] if node] + gone = set() + gitmode = {'l': '120000', 'x': '100755', '': '100644'} if opts.git: - copy, diverge = copies.copies(repo, ctx1, ctx2, repo[nullid]) - copy = copy.copy() - for k, v in copy.items(): - copy[v] = k - - gone = set() - gitmode = {'l': '120000', 'x': '100755', '': '100644'} + revs = None for f in sorted(modified + added + removed): to = None @@ -1317,39 +1351,61 @@ def diff(repo, node1=None, node2=None, m if f not in removed: tn = getfilectx(f, ctx2).data() a, b = f, f - if opts.git: + if opts.git or losedatafn: if f in added: mode = gitmode[ctx2.flags(f)] if f in copy: - a = copy[f] - omode = gitmode[man1.flags(a)] - _addmodehdr(header, omode, mode) - if a in removed and a not in gone: - op = 'rename' - gone.add(a) + if opts.git: + a = copy[f] + omode = gitmode[man1.flags(a)] + _addmodehdr(header, omode, mode) + if a in removed and a not in gone: + op = 'rename' + gone.add(a) + else: + op = 'copy' + header.append('%s from %s\n' % (op, a)) + header.append('%s to %s\n' % (op, f)) + to = getfilectx(a, ctx1).data() else: - op = 'copy' - header.append('%s from %s\n' % (op, a)) - header.append('%s to %s\n' % (op, f)) - to = getfilectx(a, ctx1).data() + losedatafn(f) else: - header.append('new file mode %s\n' % mode) + if opts.git: + header.append('new file mode %s\n' % mode) + elif ctx2.flags(f): + losedatafn(f) if util.binary(tn): - dodiff = 'binary' + if opts.git: + dodiff = 'binary' + else: + losedatafn(f) + if not opts.git and not tn: + # regular diffs cannot represent new empty file + losedatafn(f) elif f in removed: - # have we already reported a copy above? - if f in copy and copy[f] in added and copy[copy[f]] == f: - dodiff = False - else: - header.append('deleted file mode %s\n' % - gitmode[man1.flags(f)]) + if opts.git: + # have we already reported a copy above? + if f in copy and copy[f] in added and copy[copy[f]] == f: + dodiff = False + else: + header.append('deleted file mode %s\n' % + gitmode[man1.flags(f)]) + elif not to: + # regular diffs cannot represent empty file deletion + losedatafn(f) else: - omode = gitmode[man1.flags(f)] - nmode = gitmode[ctx2.flags(f)] - _addmodehdr(header, omode, nmode) - if util.binary(to) or util.binary(tn): - dodiff = 'binary' - header.insert(0, mdiff.diffline(revs, a, b, opts)) + oflag = man1.flags(f) + nflag = ctx2.flags(f) + binary = util.binary(to) or util.binary(tn) + if opts.git: + _addmodehdr(header, gitmode[oflag], gitmode[nflag]) + if binary: + dodiff = 'binary' + elif binary or nflag != oflag: + losedatafn(f) + if opts.git: + header.insert(0, mdiff.diffline(revs, a, b, opts)) + if dodiff: if dodiff == 'binary': text = b85diff(to, tn) diff --git a/tests/autodiff.py b/tests/autodiff.py new file mode 100644 --- /dev/null +++ b/tests/autodiff.py @@ -0,0 +1,46 @@ +# Extension dedicated to test patch.diff() upgrade modes +# +# +from mercurial import cmdutil, patch, util + +def autodiff(ui, repo, *pats, **opts): + diffopts = patch.diffopts(ui, opts) + git = opts.get('git', 'no') + brokenfiles = set() + losedatafn = None + if git in ('yes', 'no'): + diffopts.git = git == 'yes' + diffopts.upgrade = False + elif git == 'auto': + diffopts.git = False + diffopts.upgrade = True + elif git == 'warn': + diffopts.git = False + diffopts.upgrade = True + def losedatafn(fn=None, **kwargs): + brokenfiles.add(fn) + return True + elif git == 'abort': + diffopts.git = False + diffopts.upgrade = True + def losedatafn(fn=None, **kwargs): + raise util.Abort('losing data for %s' % fn) + else: + raise util.Abort('--git must be yes, no or auto') + + node1, node2 = cmdutil.revpair(repo, []) + m = cmdutil.match(repo, pats, opts) + it = patch.diff(repo, node1, node2, match=m, opts=diffopts, + losedatafn=losedatafn) + for chunk in it: + ui.write(chunk) + for fn in sorted(brokenfiles): + ui.write('data lost for: %s\n' % fn) + +cmdtable = { + "autodiff": + (autodiff, + [('', 'git', '', 'git upgrade mode (yes/no/auto/warn/abort)'), + ], + '[OPTION]... [FILE]...'), +} diff --git a/tests/test-diff-upgrade b/tests/test-diff-upgrade new file mode 100755 --- /dev/null +++ b/tests/test-diff-upgrade @@ -0,0 +1,63 @@ +#!/bin/sh + +echo "[extensions]" >> $HGRCPATH +echo "autodiff=$TESTDIR/autodiff.py" >> $HGRCPATH +echo "[diff]" >> $HGRCPATH +echo "nodates=1" >> $HGRCPATH + +hg init repo +cd repo +echo '% make a combination of new, changed and deleted file' +echo regular > regular +echo rmregular > rmregular +touch rmempty +echo exec > exec +chmod +x exec +echo rmexec > rmexec +chmod +x rmexec +echo setexec > setexec +echo unsetexec > unsetexec +chmod +x unsetexec +echo binary > binary +python -c "file('rmbinary', 'wb').write('\0')" +hg ci -Am addfiles +echo regular >> regular +echo newregular >> newregular +rm rmempty +touch newempty +rm rmregular +echo exec >> exec +echo newexec > newexec +chmod +x newexec +rm rmexec +chmod +x setexec +chmod -x unsetexec +python -c "file('binary', 'wb').write('\0\0')" +python -c "file('newbinary', 'wb').write('\0')" +rm rmbinary +hg addremove + +echo '% git=no: regular diff for all files' +hg autodiff --git=no + +echo '% git=no: git diff for single regular file' +hg autodiff --git=yes regular + +echo '% git=auto: regular diff for regular files and removals' +hg autodiff --git=auto regular newregular rmregular rmbinary rmexec + +for f in exec newexec setexec unsetexec binary newbinary newempty rmempty; do + echo '% git=auto: git diff for' $f + hg autodiff --git=auto $f +done + +echo '% git=warn: regular diff with data loss warnings' +hg autodiff --git=warn + +echo '% git=abort: fail on execute bit change' +hg autodiff --git=abort regular setexec + +echo '% git=abort: succeed on regular file' +hg autodiff --git=abort regular + +cd .. diff --git a/tests/test-diff-upgrade.out b/tests/test-diff-upgrade.out new file mode 100644 --- /dev/null +++ b/tests/test-diff-upgrade.out @@ -0,0 +1,186 @@ +% make a combination of new, changed and deleted file +adding binary +adding exec +adding regular +adding rmbinary +adding rmempty +adding rmexec +adding rmregular +adding setexec +adding unsetexec +adding newbinary +adding newempty +adding newexec +adding newregular +removing rmbinary +removing rmempty +removing rmexec +removing rmregular +% git=no: regular diff for all files +diff -r b3f053cd7c7f binary +Binary file binary has changed +diff -r b3f053cd7c7f exec +--- a/exec ++++ b/exec +@@ -1,1 +1,2 @@ + exec ++exec +diff -r b3f053cd7c7f newbinary +Binary file newbinary has changed +diff -r b3f053cd7c7f newexec +--- /dev/null ++++ b/newexec +@@ -0,0 +1,1 @@ ++newexec +diff -r b3f053cd7c7f newregular +--- /dev/null ++++ b/newregular +@@ -0,0 +1,1 @@ ++newregular +diff -r b3f053cd7c7f regular +--- a/regular ++++ b/regular +@@ -1,1 +1,2 @@ + regular ++regular +diff -r b3f053cd7c7f rmbinary +Binary file rmbinary has changed +diff -r b3f053cd7c7f rmexec +--- a/rmexec ++++ /dev/null +@@ -1,1 +0,0 @@ +-rmexec +diff -r b3f053cd7c7f rmregular +--- a/rmregular ++++ /dev/null +@@ -1,1 +0,0 @@ +-rmregular +% git=no: git diff for single regular file +diff --git a/regular b/regular +--- a/regular ++++ b/regular +@@ -1,1 +1,2 @@ + regular ++regular +% git=auto: regular diff for regular files and removals +diff -r b3f053cd7c7f newregular +--- /dev/null ++++ b/newregular +@@ -0,0 +1,1 @@ ++newregular +diff -r b3f053cd7c7f regular +--- a/regular ++++ b/regular +@@ -1,1 +1,2 @@ + regular ++regular +diff -r b3f053cd7c7f rmbinary +Binary file rmbinary has changed +diff -r b3f053cd7c7f rmexec +--- a/rmexec ++++ /dev/null +@@ -1,1 +0,0 @@ +-rmexec +diff -r b3f053cd7c7f rmregular +--- a/rmregular ++++ /dev/null +@@ -1,1 +0,0 @@ +-rmregular +% git=auto: git diff for exec +diff -r b3f053cd7c7f exec +--- a/exec ++++ b/exec +@@ -1,1 +1,2 @@ + exec ++exec +% git=auto: git diff for newexec +diff --git a/newexec b/newexec +new file mode 100755 +--- /dev/null ++++ b/newexec +@@ -0,0 +1,1 @@ ++newexec +% git=auto: git diff for setexec +diff --git a/setexec b/setexec +old mode 100644 +new mode 100755 +% git=auto: git diff for unsetexec +diff --git a/unsetexec b/unsetexec +old mode 100755 +new mode 100644 +% git=auto: git diff for binary +diff --git a/binary b/binary +index a9128c283485202893f5af379dd9beccb6e79486..09f370e38f498a462e1ca0faa724559b6630c04f +GIT binary patch +literal 2 +Jc${Nk0000200961 + +% git=auto: git diff for newbinary +diff --git a/newbinary b/newbinary +new file mode 100644 +index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d +GIT binary patch +literal 1 +Ic${MZ000310RR91 + +% git=auto: git diff for newempty +diff --git a/newempty b/newempty +new file mode 100644 +% git=auto: git diff for rmempty +diff --git a/rmempty b/rmempty +deleted file mode 100644 +% git=warn: regular diff with data loss warnings +diff -r b3f053cd7c7f binary +Binary file binary has changed +diff -r b3f053cd7c7f exec +--- a/exec ++++ b/exec +@@ -1,1 +1,2 @@ + exec ++exec +diff -r b3f053cd7c7f newbinary +Binary file newbinary has changed +diff -r b3f053cd7c7f newexec +--- /dev/null ++++ b/newexec +@@ -0,0 +1,1 @@ ++newexec +diff -r b3f053cd7c7f newregular +--- /dev/null ++++ b/newregular +@@ -0,0 +1,1 @@ ++newregular +diff -r b3f053cd7c7f regular +--- a/regular ++++ b/regular +@@ -1,1 +1,2 @@ + regular ++regular +diff -r b3f053cd7c7f rmbinary +Binary file rmbinary has changed +diff -r b3f053cd7c7f rmexec +--- a/rmexec ++++ /dev/null +@@ -1,1 +0,0 @@ +-rmexec +diff -r b3f053cd7c7f rmregular +--- a/rmregular ++++ /dev/null +@@ -1,1 +0,0 @@ +-rmregular +data lost for: binary +data lost for: newbinary +data lost for: newempty +data lost for: newexec +data lost for: rmempty +data lost for: setexec +data lost for: unsetexec +% git=abort: fail on execute bit change +abort: losing data for setexec +% git=abort: succeed on regular file +diff -r b3f053cd7c7f regular +--- a/regular ++++ b/regular +@@ -1,1 +1,2 @@ + regular ++regular