# HG changeset patch # User Matt Mackall # Date 2012-03-12 18:37:39 # Node ID 0d175ac527c1d1b51b1de6dff25907ae377b9280 # Parent 51e6f318bdf1c090783109c7051d22cad5d1306c pvec: introduce pvecs diff --git a/contrib/check-code.py b/contrib/check-code.py --- a/contrib/check-code.py +++ b/contrib/check-code.py @@ -169,7 +169,7 @@ pypats = [ "missing whitespace around operator"), (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=)\S', "missing whitespace around operator"), - (r'[^+=*/!<>&| -](\s=|=\s)[^= ]', + (r'[^^+=*/!<>&| -](\s=|=\s)[^= ]', "wrong whitespace around ="), (r'raise Exception', "don't raise generic exceptions"), (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"), diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -16,7 +16,7 @@ import sshserver, hgweb, hgweb.server, c import merge as mergemod import minirst, revset, fileset import dagparser, context, simplemerge -import random, setdiscovery, treediscovery, dagutil +import random, setdiscovery, treediscovery, dagutil, pvec import phases table = {} @@ -1963,6 +1963,27 @@ def debugpushkey(ui, repopath, namespace ui.write("%s\t%s\n" % (k.encode('string-escape'), v.encode('string-escape'))) +@command('debugpvec', [], _('A B')) +def debugpvec(ui, repo, a, b=None): + ca = scmutil.revsingle(repo, a) + cb = scmutil.revsingle(repo, b) + pa = pvec.ctxpvec(ca) + pb = pvec.ctxpvec(cb) + if pa == pb: + rel = "=" + elif pa > pb: + rel = ">" + elif pa < pb: + rel = "<" + elif pa | pb: + rel = "|" + ui.write(_("a: %s\n") % pa) + ui.write(_("b: %s\n") % pb) + ui.write(_("depth(a): %d depth(b): %d\n") % (pa._depth, pb._depth)) + ui.write(_("delta: %d hdist: %d distance: %d relation: %s\n") % + (abs(pa._depth - pb._depth), pvec._hamming(pa._vec, pb._vec), + pa.distance(pb), rel)) + @command('debugrebuildstate', [('r', 'rev', '', _('revision to rebuild to'), _('REV'))], _('[-r REV] [REV]')) diff --git a/mercurial/pvec.py b/mercurial/pvec.py new file mode 100644 --- /dev/null +++ b/mercurial/pvec.py @@ -0,0 +1,210 @@ +# pvec.py - probabilistic vector clocks for Mercurial +# +# Copyright 2012 Matt Mackall +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +''' +A "pvec" is a changeset property based on the theory of vector clocks +that can be compared to discover relatedness without consulting a +graph. This can be useful for tasks like determining how a +disconnected patch relates to a repository. + +Currently a pvec consist of 448 bits, of which 24 are 'depth' and the +remainder are a bit vector. It is represented as a 70-character base85 +string. + +Construction: + +- a root changeset has a depth of 0 and a bit vector based on its hash +- a normal commit has a changeset where depth is increased by one and + one bit vector bit is flipped based on its hash +- a merge changeset pvec is constructed by copying changes from one pvec into + the other to balance its depth + +Properties: + +- for linear changes, difference in depth is always <= hamming distance +- otherwise, changes are probably divergent +- when hamming distance is < 200, we can reliably detect when pvecs are near + +Issues: + +- hamming distance ceases to work over distances of ~ 200 +- detecting divergence is less accurate when the common ancestor is very close + to either revision or total distance is high +- this could probably be improved by modeling the relation between + delta and hdist + +Uses: + +- a patch pvec can be used to locate the nearest available common ancestor for + resolving conflicts +- ordering of patches can be established without a DAG +- two head pvecs can be compared to determine whether push/pull/merge is needed + and approximately how many changesets are involved +- can be used to find a heuristic divergence measure between changesets on + different branches +''' + +import base85, util +from node import nullrev + +_size = 448 # 70 chars b85-encoded +_bytes = _size / 8 +_depthbits = 24 +_depthbytes = _depthbits / 8 +_vecbytes = _bytes - _depthbytes +_vecbits = _vecbytes * 8 +_radius = (_vecbits - 30) / 2 # high probability vecs are related + +def _bin(bs): + '''convert a bytestring to a long''' + v = 0 + for b in bs: + v = v * 256 + ord(b) + return v + +def _str(v, l): + bs = "" + for p in xrange(l): + bs = chr(v & 255) + bs + v >>= 8 + return bs + +def _split(b): + '''depth and bitvec''' + return _bin(b[:_depthbytes]), _bin(b[_depthbytes:]) + +def _join(depth, bitvec): + return _str(depth, _depthbytes) + _str(bitvec, _vecbytes) + +def _hweight(x): + c = 0 + while x: + if x & 1: + c += 1 + x >>= 1 + return c +_htab = [_hweight(x) for x in xrange(256)] + +def _hamming(a, b): + '''find the hamming distance between two longs''' + d = a ^ b + c = 0 + while d: + c += _htab[d & 0xff] + d >>= 8 + return c + +def _mergevec(x, y, c): + # Ideally, this function would be x ^ y ^ ancestor, but finding + # ancestors is a nuisance. So instead we find the minimal number + # of changes to balance the depth and hamming distance + + d1, v1 = x + d2, v2 = y + if d1 < d2: + d1, d2, v1, v2 = d2, d1, v2, v1 + + hdist = _hamming(v1, v2) + ddist = d1 - d2 + v = v1 + m = v1 ^ v2 # mask of different bits + i = 1 + + if hdist > ddist: + # if delta = 10 and hdist = 100, then we need to go up 55 steps + # to the ancestor and down 45 + changes = (hdist - ddist + 1) / 2 + else: + # must make at least one change + changes = 1 + depth = d1 + changes + + # copy changes from v2 + if m: + while changes: + if m & i: + v ^= i + changes -= 1 + i <<= 1 + else: + v = _flipbit(v, c) + + return depth, v + +def _flipbit(v, node): + # converting bit strings to longs is slow + bit = (hash(node) & 0xffffffff) % _vecbits + return v ^ (1< delta: + return False + return True + + def __gt__(self, b): + return b < self + + def __or__(self, b): + delta = abs(b._depth - self._depth) + if _hamming(self._vec, b._vec) <= delta: + return False + return True + + def __sub__(self, b): + if self | b: + raise ValueError("concurrent pvecs") + return self._depth - b._depth + + def distance(self, b): + d = abs(b._depth - self._depth) + h = _hamming(self._vec, b._vec) + return max(d, h) + + def near(self, b): + dist = abs(b.depth - self._depth) + if dist > _radius or _hamming(self._vec, b._vec) > _radius: + return False diff --git a/tests/test-debugcomplete.t b/tests/test-debugcomplete.t --- a/tests/test-debugcomplete.t +++ b/tests/test-debugcomplete.t @@ -87,6 +87,7 @@ Show debug commands if there are no othe debuginstall debugknown debugpushkey + debugpvec debugrebuildstate debugrename debugrevlog @@ -236,6 +237,7 @@ Show all commands + options debuginstall: debugknown: debugpushkey: + debugpvec: debugrebuildstate: rev debugrename: rev debugrevlog: changelog, manifest, dump