# HG changeset patch # User pacien # Date 2023-03-30 20:22:44 # Node ID f0d2b18f0274b7d68788b1014c55834562dc4bb3 # Parent 9fa3cda7449e1f2b576d3a8034c070c1e8e61681 stabletailgraph: implement stable-tail sort This adds the computation of the "stable-tail sort", an incremental node sorting method. It is a stepping stone for the implementation of faster label discovery (for example for obs markers) and more caching. diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -93,6 +93,7 @@ from . import ( wireprotoserver, ) from .interfaces import repository +from .stabletailgraph import stabletailsort from .utils import ( cborutil, compression, @@ -3644,6 +3645,30 @@ def debugssl(ui, repo, source=None, **op @command( + b'debug::stable-tail-sort', + [ + ( + b'T', + b'template', + b'{rev}\n', + _(b'display with template'), + _(b'TEMPLATE'), + ), + ], + b'REV', +) +def debug_stable_tail_sort(ui, repo, revspec, template, **opts): + """display the stable-tail sort of the ancestors of a given node""" + rev = logcmdutil.revsingle(repo, revspec).rev() + cl = repo.changelog + + displayer = logcmdutil.maketemplater(ui, repo, template) + sorted_revs = stabletailsort._stable_tail_sort(cl, rev) + for ancestor_rev in sorted_revs: + displayer.show(repo[ancestor_rev]) + + +@command( b"debugbackupbundle", [ ( diff --git a/mercurial/stabletailgraph/__init__.py b/mercurial/stabletailgraph/__init__.py new file mode 100644 diff --git a/mercurial/stabletailgraph/stabletailsort.py b/mercurial/stabletailgraph/stabletailsort.py new file mode 100644 --- /dev/null +++ b/mercurial/stabletailgraph/stabletailsort.py @@ -0,0 +1,110 @@ +# stabletailsort.py - stable ordering of revisions +# +# Copyright 2021-2023 Pacien TRAN-GIRARD +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +""" +Stable-tail sort computation. + +The "stable-tail sort", or STS, is a reverse topological ordering of the +ancestors of a node, which tends to share large suffixes with the stable-tail +sort of ancestors and other nodes, giving it its name. + +Its properties should make it suitable for making chunks of ancestors with high +reuse and incrementality for example. + +This module and implementation are experimental. Most functions are not yet +optimised to operate on large production graphs. +""" + +import itertools +from ..node import nullrev +from .. import ancestor + + +def _sorted_parents(cl, p1, p2): + """ + Chooses and returns the pair (px, pt) from (p1, p2). + + Where + "px" denotes the parent starting the "exclusive" part, and + "pt" denotes the parent starting the "Tail" part. + + "px" is chosen as the parent with the lowest rank with the goal of + minimising the size of the exclusive part and maximise the size of the + tail part, hopefully reducing the overall complexity of the stable sort. + + In case of equal ranks, the stable node ID is used as a tie-breaker. + """ + r1, r2 = cl.fast_rank(p1), cl.fast_rank(p2) + if r1 < r2: + return (p1, p2) + elif r1 > r2: + return (p2, p1) + elif cl.node(p1) < cl.node(p2): + return (p1, p2) + else: + return (p2, p1) + + +def _nonoedipal_parent_revs(cl, rev): + """ + Returns the non-œdipal parent pair of the given revision. + + An œdipal merge is a merge with parents p1, p2 with either + p1 in ancestors(p2) or p2 in ancestors(p1). + In the first case, p1 is the œdipal parent. + In the second case, p2 is the œdipal parent. + + Œdipal edges start empty exclusive parts. They do not bring new ancestors. + As such, they can be skipped when computing any topological sort or any + iteration over the ancestors of a node. + + The œdipal edges are eliminated here using the rank information. + """ + p1, p2 = cl.parentrevs(rev) + if p1 == nullrev or cl.fast_rank(p2) == cl.fast_rank(rev) - 1: + return p2, nullrev + elif p2 == nullrev or cl.fast_rank(p1) == cl.fast_rank(rev) - 1: + return p1, nullrev + else: + return p1, p2 + + +def _stable_tail_sort(cl, head_rev): + """ + Naive topological iterator of the ancestors given by the stable-tail sort. + + The stable-tail sort of a node "h" is defined as the sequence: + sts(h) := [h] + excl(h) + sts(pt(h)) + where excl(h) := u for u in sts(px(h)) if u not in ancestors(pt(h)) + + This implementation uses a call-stack whose size is + O(number of open merges). + + As such, this implementation exists mainly as a defining reference. + """ + cursor_rev = head_rev + while cursor_rev != nullrev: + yield cursor_rev + + p1, p2 = _nonoedipal_parent_revs(cl, cursor_rev) + if p1 == nullrev: + cursor_rev = p2 + elif p2 == nullrev: + cursor_rev = p1 + else: + px, pt = _sorted_parents(cl, p1, p2) + + tail_ancestors = ancestor.lazyancestors( + cl.parentrevs, (pt,), inclusive=True + ) + exclusive_ancestors = ( + a for a in _stable_tail_sort(cl, px) if a not in tail_ancestors + ) + + excl_part_size = cl.fast_rank(cursor_rev) - cl.fast_rank(pt) - 1 + yield from itertools.islice(exclusive_ancestors, excl_part_size) + cursor_rev = pt diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -1299,6 +1299,7 @@ packages = [ 'mercurial.hgweb', 'mercurial.interfaces', 'mercurial.pure', + 'mercurial.stabletailgraph', 'mercurial.templates', 'mercurial.thirdparty', 'mercurial.thirdparty.attr', diff --git a/tests/test-completion.t b/tests/test-completion.t --- a/tests/test-completion.t +++ b/tests/test-completion.t @@ -78,6 +78,7 @@ Show debug commands if there are no othe debug-repair-issue6528 debug-revlog-index debug-revlog-stats + debug::stable-tail-sort debugancestor debugantivirusrunning debugapplystreamclonebundle @@ -273,6 +274,7 @@ Show all commands + options debug-repair-issue6528: to-report, from-report, paranoid, dry-run debug-revlog-index: changelog, manifest, dir, template debug-revlog-stats: changelog, manifest, filelogs, template + debug::stable-tail-sort: template debugancestor: debugantivirusrunning: debugapplystreamclonebundle: diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -987,6 +987,8 @@ Test list of internal help commands dump index data for a revlog debug-revlog-stats display statistics about revlogs in the store + debug::stable-tail-sort + display the stable-tail sort of the ancestors of a given node debugancestor find the ancestor revision of two revisions in a given index debugantivirusrunning diff --git a/tests/test-stabletailgraph.t b/tests/test-stabletailgraph.t new file mode 100644 --- /dev/null +++ b/tests/test-stabletailgraph.t @@ -0,0 +1,710 @@ +==================================== +Test for the stabletailgraph package +==================================== + +This test file contains a bunch of small test graphs with some minimal yet +non-trivial structure, on which the various stable-tail graph and stable-tail +sort functions are tested. + +Each case consists of the creation of the interesting graph structure, followed +by a check, for each noteworthy node, of: +- the stable-tail sort output. + +In the ASCII art of the diagrams, the side of the exclusive part which is +followed in priority is denoted with "<" or ">" if it is on the left or right +respectively. + +The intermediary linear parts in the example graph are there to force the +exclusive part choice (made on a min rank condition). + + +Setup +===== + +Enable the rank computation to test sorting based on the rank. + + $ cat << EOF >> $HGRCPATH + > [format] + > exp-use-changelog-v2=enable-unstable-format-and-corrupt-my-data + > + > [alias] + > test-sts = debug::stable-tail-sort -T '{tags},' + > test-log = log --graph -T '{tags} rank={_fast_rank}' + > EOF + + +Example 1: single merge node +============================ + +A base case with one branchpoint "b" and one merge node "e". + +The exclusive part, starting with the lowest-ranking parent "c" of "e", +appears first in stable-tail sort of "e" and "f". + +# f +# | +# e +# | +# --<-- +# | | +# c d +# | | +# --+-- <- at this point, the sort of "e" is done consuming its +# | exclusive part [c] and jumps back to its other parent "d" +# b +# | +# a + + $ hg init example-1 + $ cd example-1 + $ hg debugbuilddag '.:a*a:b*b:c-- | <- in the sort of "f", we need to skip "c" and leap to the +# | | | inherited part of "d" +# | +---- +# b | +# | c +# | | +# --+-- +# | +# a + + $ hg init example-4 + $ cd example-4 + $ hg debugbuilddag '.:a*a+1:b-- | +# | | | +# | g | +# | | | +# | +---- <- in the sort of "f", leaping from "g" to "b" +# b | +# | c +# | | +# --+-- +# | +# a + + $ hg init example-5 + $ cd example-5 + $ hg debugbuilddag '.:a*a+2:b-- --<-- +# | | | | +# g e h i +# | | | | +# | --+-- | <- at this point, for the sort of "l", the iteration on +# f | | the end of excl(j) is postponed to the iteration of +# | d | excl(k) +# | | | +# | c | +# | | | +# ---+--- | +# | | +# b | +# | | +# ----+----- +# | +# a + + $ hg init example-7 + $ cd example-7 + $ hg debugbuilddag \ + > '.:a*a:b*b:c*c:d*d:e*b:f