##// END OF EJS Templates
graphmod: attempt to clarify documentation of groupbranchiter()...
Augie Fackler -
r23570:3f86fe9b default
parent child Browse files
Show More
@@ -1,566 +1,576
1 1 # Revision graph generator for Mercurial
2 2 #
3 3 # Copyright 2008 Dirkjan Ochtman <dirkjan@ochtman.nl>
4 4 # Copyright 2007 Joel Rosdahl <joel@rosdahl.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """supports walking the history as DAGs suitable for graphical output
10 10
11 11 The most basic format we use is that of::
12 12
13 13 (id, type, data, [parentids])
14 14
15 15 The node and parent ids are arbitrary integers which identify a node in the
16 16 context of the graph returned. Type is a constant specifying the node type.
17 17 Data depends on type.
18 18 """
19 19
20 20 from mercurial.node import nullrev
21 21 import util
22 22
23 23 import heapq
24 24
25 25 CHANGESET = 'C'
26 26
27 27 def groupbranchiter(revs, parentsfunc, firstbranch=()):
28 """yield revision from heads to roots one (topo) branch after the other.
28 """Yield revisions from heads to roots one (topo) branch at a time.
29 29
30 30 This function aims to be used by a graph generator that wishes to minimize
31 the amount of parallel branches and their interleaving.
31 the number of parallel branches and their interleaving.
32 32
33 Example iteration order:
33 Example iteration order (numbers show the "true" order in a changelog):
34 34
35 35 o 4
36 36 |
37 37 o 1
38 38 |
39 39 | o 3
40 40 | |
41 41 | o 2
42 42 |/
43 43 o 0
44 44
45 Currently consider every changeset under a merge to be on the same branch
46 using revision number to sort them.
45 Note that the ancestors of merges are understood by the current
46 algorithm to be on the same branch. This means no reordering will
47 occur behind a merge.
47 48 """
48 49
49 50 ### Quick summary of the algorithm
50 51 #
51 52 # This function is based around a "retention" principle. We keep revisions
52 53 # in memory until we are ready to emit a whole branch that immediately
53 # "merge" into an existing one. This reduce the number of branch "ongoing"
54 # at the same time.
54 # "merges" into an existing one. This reduces the number of parallel
55 # branches with interleaved revisions.
55 56 #
56 57 # During iteration revs are split into two groups:
57 58 # A) revision already emitted
58 59 # B) revision in "retention". They are stored as different subgroups.
59 60 #
60 # for each REV, we do the follow logic:
61 # for each REV, we do the following logic:
61 62 #
62 # a) if REV is a parent of (A), we will emit it. But before emitting it,
63 # we'll "free" all the revs from subgroup in (B) that were waiting for
64 # REV to be available. So we emit all revision of such subgroup before
65 # emitting REV
63 # 1) if REV is a parent of (A), we will emit it. If there is a
64 # retention group ((B) above) that is blocked on REV being
65 # available, we emit all the revisions out of that retention
66 # group first.
66 67 #
67 # b) else, we'll search for a subgroup in (B) awaiting for REV to be
68 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
68 69 # available, if such subgroup exist, we add REV to it and the subgroup is
69 70 # now awaiting for REV.parents() to be available.
70 71 #
71 # c) finally if no such group existed in (B), we create a new subgroup.
72 # 3) finally if no such group existed in (B), we create a new subgroup.
72 73 #
73 74 #
74 # To bootstrap the algorithm, we emit the tipmost revision.
75 # To bootstrap the algorithm, we emit the tipmost revision (which
76 # puts it in group (A) from above).
75 77
76 78 revs.sort(reverse=True)
77 79
78 # Set of parents of revision that have been yield. They can be considered
80 # Set of parents of revision that have been emitted. They can be considered
79 81 # unblocked as the graph generator is already aware of them so there is no
80 # need to delay the one that reference them.
82 # need to delay the revisions that reference them.
81 83 #
82 84 # If someone wants to prioritize a branch over the others, pre-filling this
83 85 # set will force all other branches to wait until this branch is ready to be
84 # outputed.
86 # emitted.
85 87 unblocked = set(firstbranch)
86 88
87 # list of group waiting to be displayed, each group is defined by:
89 # list of groups waiting to be displayed, each group is defined by:
88 90 #
89 91 # (revs: lists of revs waiting to be displayed,
90 92 # blocked: set of that cannot be displayed before those in 'revs')
91 93 #
92 94 # The second value ('blocked') correspond to parents of any revision in the
93 95 # group ('revs') that is not itself contained in the group. The main idea
94 96 # of this algorithm is to delay as much as possible the emission of any
95 97 # revision. This means waiting for the moment we are about to display
96 # theses parents to display the revs in a group.
98 # these parents to display the revs in a group.
97 99 #
98 # This first implementation is smart until it meet a merge: it will emit
99 # revs as soon as any parents is about to be emitted and can grow an
100 # This first implementation is smart until it encounters a merge: it will
101 # emit revs as soon as any parent is about to be emitted and can grow an
100 102 # arbitrary number of revs in 'blocked'. In practice this mean we properly
101 # retains new branches but give up on any special ordering for ancestors of
102 # merges. The implementation can be improved to handle this better.
103 # retains new branches but gives up on any special ordering for ancestors
104 # of merges. The implementation can be improved to handle this better.
103 105 #
104 # The first subgroup is special. It correspond to all the revision that
106 # The first subgroup is special. It corresponds to all the revision that
105 107 # were already emitted. The 'revs' lists is expected to be empty and the
106 108 # 'blocked' set contains the parents revisions of already emitted revision.
107 109 #
108 110 # You could pre-seed the <parents> set of groups[0] to a specific
109 111 # changesets to select what the first emitted branch should be.
110 112 groups = [([], unblocked)]
111 113 pendingheap = []
112 114 pendingset = set()
113 115
114 116 heapq.heapify(pendingheap)
115 117 heappop = heapq.heappop
116 118 heappush = heapq.heappush
117 119 for currentrev in revs:
118 120 # Heap works with smallest element, we want highest so we invert
119 121 if currentrev not in pendingset:
120 122 heappush(pendingheap, -currentrev)
121 123 pendingset.add(currentrev)
122 124 # iterates on pending rev until after the current rev have been
123 125 # processeed.
124 126 rev = None
125 127 while rev != currentrev:
126 128 rev = -heappop(pendingheap)
127 129 pendingset.remove(rev)
128 130
129 131 # Seek for a subgroup blocked, waiting for the current revision.
130 132 matching = [i for i, g in enumerate(groups) if rev in g[1]]
131 133
132 134 if matching:
133 # The main idea is to gather together all sets that await on
134 # the same revision.
135 # The main idea is to gather together all sets that are blocked
136 # on the same revision.
137 #
138 # Groups are merged when a common blocking ancestor is
139 # observed. For example, given two groups:
135 140 #
136 # This merging is done at the time we are about to add this
137 # common awaited to the subgroup for simplicity purpose. Such
138 # merge could happen sooner when we update the "blocked" set of
139 # revision.
141 # revs [5, 4] waiting for 1
142 # revs [3, 2] waiting for 1
143 #
144 # These two groups will be merged when we process
145 # 1. In theory, we could have merged the groups when
146 # we added 2 to the group it is now in (we could have
147 # noticed the groups were both blocked on 1 then), but
148 # the way it works now makes the algorithm simpler.
140 149 #
141 150 # We also always keep the oldest subgroup first. We can
142 # probably improve the behavior by having the longuest set
143 # first. That way, graph algorythms could minimise the length
144 # of parallele lines their draw. This is currently not done.
151 # probably improve the behavior by having the longest set
152 # first. That way, graph algorithms could minimise the length
153 # of parallel lines their drawing. This is currently not done.
145 154 targetidx = matching.pop(0)
146 155 trevs, tparents = groups[targetidx]
147 156 for i in matching:
148 157 gr = groups[i]
149 158 trevs.extend(gr[0])
150 159 tparents |= gr[1]
151 # delete all merged subgroups (but the one we keep) (starting
152 # from the last subgroup for performance and sanity reason)
160 # delete all merged subgroups (except the one we kept)
161 # (starting from the last subgroup for performance and
162 # sanity reasons)
153 163 for i in reversed(matching):
154 164 del groups[i]
155 165 else:
156 166 # This is a new head. We create a new subgroup for it.
157 167 targetidx = len(groups)
158 168 groups.append(([], set([rev])))
159 169
160 170 gr = groups[targetidx]
161 171
162 # We now adds the current nodes to this subgroups. This is done
172 # We now add the current nodes to this subgroups. This is done
163 173 # after the subgroup merging because all elements from a subgroup
164 # that relied on this rev must preceed it.
174 # that relied on this rev must precede it.
165 175 #
166 # we also update the <parents> set to includes the parents on the
176 # we also update the <parents> set to include the parents of the
167 177 # new nodes.
168 178 if rev == currentrev: # only display stuff in rev
169 179 gr[0].append(rev)
170 180 gr[1].remove(rev)
171 181 parents = [p for p in parentsfunc(rev) if p > nullrev]
172 182 gr[1].update(parents)
173 183 for p in parents:
174 184 if p not in pendingset:
175 185 pendingset.add(p)
176 186 heappush(pendingheap, -p)
177 187
178 188 # Look for a subgroup to display
179 189 #
180 # When unblocked is empty (if clause), We are not waiting over any
181 # revision during the first iteration (if no priority was given) or
182 # if we outputed a whole disconnected sets of the graph (reached a
183 # root). In that case we arbitrarily takes the oldest known
184 # subgroup. The heuristique could probably be better.
190 # When unblocked is empty (if clause), we were not waiting for any
191 # revisions during the first iteration (if no priority was given) or
192 # if we emitted a whole disconnected set of the graph (reached a
193 # root). In that case we arbitrarily take the oldest known
194 # subgroup. The heuristic could probably be better.
185 195 #
186 # Otherwise (elif clause) this mean we have some emitted revision.
187 # if the subgroup awaits on the same revision that the outputed
188 # ones, we can safely output it.
196 # Otherwise (elif clause) if the subgroup is blocked on
197 # a revision we just emitted, we can safely emit it as
198 # well.
189 199 if not unblocked:
190 200 if len(groups) > 1: # display other subset
191 201 targetidx = 1
192 202 gr = groups[1]
193 203 elif not gr[1] & unblocked:
194 204 gr = None
195 205
196 206 if gr is not None:
197 207 # update the set of awaited revisions with the one from the
198 208 # subgroup
199 209 unblocked |= gr[1]
200 210 # output all revisions in the subgroup
201 211 for r in gr[0]:
202 212 yield r
203 213 # delete the subgroup that you just output
204 214 # unless it is groups[0] in which case you just empty it.
205 215 if targetidx:
206 216 del groups[targetidx]
207 217 else:
208 218 gr[0][:] = []
209 # Check if we have some subgroup waiting for revision we are not going to
219 # Check if we have some subgroup waiting for revisions we are not going to
210 220 # iterate over
211 221 for g in groups:
212 222 for r in g[0]:
213 223 yield r
214 224
215 225 def dagwalker(repo, revs):
216 226 """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples
217 227
218 228 This generator function walks through revisions (which should be ordered
219 229 from bigger to lower). It returns a tuple for each node. The node and parent
220 230 ids are arbitrary integers which identify a node in the context of the graph
221 231 returned.
222 232 """
223 233 if not revs:
224 234 return
225 235
226 236 cl = repo.changelog
227 237 lowestrev = revs.min()
228 238 gpcache = {}
229 239
230 240 if repo.ui.configbool('experimental', 'graph-group-branches', False):
231 241 firstbranch = ()
232 242 firstbranchrevset = repo.ui.config(
233 243 'experimental', 'graph-group-branches.firstbranch', '')
234 244 if firstbranchrevset:
235 245 firstbranch = repo.revs(firstbranchrevset)
236 246 parentrevs = repo.changelog.parentrevs
237 247 revs = list(groupbranchiter(revs, parentrevs, firstbranch))
238 248
239 249 for rev in revs:
240 250 ctx = repo[rev]
241 251 parents = sorted(set([p.rev() for p in ctx.parents()
242 252 if p.rev() in revs]))
243 253 mpars = [p.rev() for p in ctx.parents() if
244 254 p.rev() != nullrev and p.rev() not in parents]
245 255
246 256 for mpar in mpars:
247 257 gp = gpcache.get(mpar)
248 258 if gp is None:
249 259 gp = gpcache[mpar] = grandparent(cl, lowestrev, revs, mpar)
250 260 if not gp:
251 261 parents.append(mpar)
252 262 else:
253 263 parents.extend(g for g in gp if g not in parents)
254 264
255 265 yield (ctx.rev(), CHANGESET, ctx, parents)
256 266
257 267 def nodes(repo, nodes):
258 268 """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples
259 269
260 270 This generator function walks the given nodes. It only returns parents
261 271 that are in nodes, too.
262 272 """
263 273 include = set(nodes)
264 274 for node in nodes:
265 275 ctx = repo[node]
266 276 parents = set([p.rev() for p in ctx.parents() if p.node() in include])
267 277 yield (ctx.rev(), CHANGESET, ctx, sorted(parents))
268 278
269 279 def colored(dag, repo):
270 280 """annotates a DAG with colored edge information
271 281
272 282 For each DAG node this function emits tuples::
273 283
274 284 (id, type, data, (col, color), [(col, nextcol, color)])
275 285
276 286 with the following new elements:
277 287
278 288 - Tuple (col, color) with column and color index for the current node
279 289 - A list of tuples indicating the edges between the current node and its
280 290 parents.
281 291 """
282 292 seen = []
283 293 colors = {}
284 294 newcolor = 1
285 295 config = {}
286 296
287 297 for key, val in repo.ui.configitems('graph'):
288 298 if '.' in key:
289 299 branch, setting = key.rsplit('.', 1)
290 300 # Validation
291 301 if setting == "width" and val.isdigit():
292 302 config.setdefault(branch, {})[setting] = int(val)
293 303 elif setting == "color" and val.isalnum():
294 304 config.setdefault(branch, {})[setting] = val
295 305
296 306 if config:
297 307 getconf = util.lrucachefunc(
298 308 lambda rev: config.get(repo[rev].branch(), {}))
299 309 else:
300 310 getconf = lambda rev: {}
301 311
302 312 for (cur, type, data, parents) in dag:
303 313
304 314 # Compute seen and next
305 315 if cur not in seen:
306 316 seen.append(cur) # new head
307 317 colors[cur] = newcolor
308 318 newcolor += 1
309 319
310 320 col = seen.index(cur)
311 321 color = colors.pop(cur)
312 322 next = seen[:]
313 323
314 324 # Add parents to next
315 325 addparents = [p for p in parents if p not in next]
316 326 next[col:col + 1] = addparents
317 327
318 328 # Set colors for the parents
319 329 for i, p in enumerate(addparents):
320 330 if not i:
321 331 colors[p] = color
322 332 else:
323 333 colors[p] = newcolor
324 334 newcolor += 1
325 335
326 336 # Add edges to the graph
327 337 edges = []
328 338 for ecol, eid in enumerate(seen):
329 339 if eid in next:
330 340 bconf = getconf(eid)
331 341 edges.append((
332 342 ecol, next.index(eid), colors[eid],
333 343 bconf.get('width', -1),
334 344 bconf.get('color', '')))
335 345 elif eid == cur:
336 346 for p in parents:
337 347 bconf = getconf(p)
338 348 edges.append((
339 349 ecol, next.index(p), color,
340 350 bconf.get('width', -1),
341 351 bconf.get('color', '')))
342 352
343 353 # Yield and move on
344 354 yield (cur, type, data, (col, color), edges)
345 355 seen = next
346 356
347 357 def grandparent(cl, lowestrev, roots, head):
348 358 """Return all ancestors of head in roots which revision is
349 359 greater or equal to lowestrev.
350 360 """
351 361 pending = set([head])
352 362 seen = set()
353 363 kept = set()
354 364 llowestrev = max(nullrev, lowestrev)
355 365 while pending:
356 366 r = pending.pop()
357 367 if r >= llowestrev and r not in seen:
358 368 if r in roots:
359 369 kept.add(r)
360 370 else:
361 371 pending.update([p for p in cl.parentrevs(r)])
362 372 seen.add(r)
363 373 return sorted(kept)
364 374
365 375 def asciiedges(type, char, lines, seen, rev, parents):
366 376 """adds edge info to changelog DAG walk suitable for ascii()"""
367 377 if rev not in seen:
368 378 seen.append(rev)
369 379 nodeidx = seen.index(rev)
370 380
371 381 knownparents = []
372 382 newparents = []
373 383 for parent in parents:
374 384 if parent in seen:
375 385 knownparents.append(parent)
376 386 else:
377 387 newparents.append(parent)
378 388
379 389 ncols = len(seen)
380 390 nextseen = seen[:]
381 391 nextseen[nodeidx:nodeidx + 1] = newparents
382 392 edges = [(nodeidx, nextseen.index(p)) for p in knownparents if p != nullrev]
383 393
384 394 while len(newparents) > 2:
385 395 # ascii() only knows how to add or remove a single column between two
386 396 # calls. Nodes with more than two parents break this constraint so we
387 397 # introduce intermediate expansion lines to grow the active node list
388 398 # slowly.
389 399 edges.append((nodeidx, nodeidx))
390 400 edges.append((nodeidx, nodeidx + 1))
391 401 nmorecols = 1
392 402 yield (type, char, lines, (nodeidx, edges, ncols, nmorecols))
393 403 char = '\\'
394 404 lines = []
395 405 nodeidx += 1
396 406 ncols += 1
397 407 edges = []
398 408 del newparents[0]
399 409
400 410 if len(newparents) > 0:
401 411 edges.append((nodeidx, nodeidx))
402 412 if len(newparents) > 1:
403 413 edges.append((nodeidx, nodeidx + 1))
404 414 nmorecols = len(nextseen) - ncols
405 415 seen[:] = nextseen
406 416 yield (type, char, lines, (nodeidx, edges, ncols, nmorecols))
407 417
408 418 def _fixlongrightedges(edges):
409 419 for (i, (start, end)) in enumerate(edges):
410 420 if end > start:
411 421 edges[i] = (start, end + 1)
412 422
413 423 def _getnodelineedgestail(
414 424 node_index, p_node_index, n_columns, n_columns_diff, p_diff, fix_tail):
415 425 if fix_tail and n_columns_diff == p_diff and n_columns_diff != 0:
416 426 # Still going in the same non-vertical direction.
417 427 if n_columns_diff == -1:
418 428 start = max(node_index + 1, p_node_index)
419 429 tail = ["|", " "] * (start - node_index - 1)
420 430 tail.extend(["/", " "] * (n_columns - start))
421 431 return tail
422 432 else:
423 433 return ["\\", " "] * (n_columns - node_index - 1)
424 434 else:
425 435 return ["|", " "] * (n_columns - node_index - 1)
426 436
427 437 def _drawedges(edges, nodeline, interline):
428 438 for (start, end) in edges:
429 439 if start == end + 1:
430 440 interline[2 * end + 1] = "/"
431 441 elif start == end - 1:
432 442 interline[2 * start + 1] = "\\"
433 443 elif start == end:
434 444 interline[2 * start] = "|"
435 445 else:
436 446 if 2 * end >= len(nodeline):
437 447 continue
438 448 nodeline[2 * end] = "+"
439 449 if start > end:
440 450 (start, end) = (end, start)
441 451 for i in range(2 * start + 1, 2 * end):
442 452 if nodeline[i] != "+":
443 453 nodeline[i] = "-"
444 454
445 455 def _getpaddingline(ni, n_columns, edges):
446 456 line = []
447 457 line.extend(["|", " "] * ni)
448 458 if (ni, ni - 1) in edges or (ni, ni) in edges:
449 459 # (ni, ni - 1) (ni, ni)
450 460 # | | | | | | | |
451 461 # +---o | | o---+
452 462 # | | c | | c | |
453 463 # | |/ / | |/ /
454 464 # | | | | | |
455 465 c = "|"
456 466 else:
457 467 c = " "
458 468 line.extend([c, " "])
459 469 line.extend(["|", " "] * (n_columns - ni - 1))
460 470 return line
461 471
462 472 def asciistate():
463 473 """returns the initial value for the "state" argument to ascii()"""
464 474 return [0, 0]
465 475
466 476 def ascii(ui, state, type, char, text, coldata):
467 477 """prints an ASCII graph of the DAG
468 478
469 479 takes the following arguments (one call per node in the graph):
470 480
471 481 - ui to write to
472 482 - Somewhere to keep the needed state in (init to asciistate())
473 483 - Column of the current node in the set of ongoing edges.
474 484 - Type indicator of node data, usually 'C' for changesets.
475 485 - Payload: (char, lines):
476 486 - Character to use as node's symbol.
477 487 - List of lines to display as the node's text.
478 488 - Edges; a list of (col, next_col) indicating the edges between
479 489 the current node and its parents.
480 490 - Number of columns (ongoing edges) in the current revision.
481 491 - The difference between the number of columns (ongoing edges)
482 492 in the next revision and the number of columns (ongoing edges)
483 493 in the current revision. That is: -1 means one column removed;
484 494 0 means no columns added or removed; 1 means one column added.
485 495 """
486 496
487 497 idx, edges, ncols, coldiff = coldata
488 498 assert -2 < coldiff < 2
489 499 if coldiff == -1:
490 500 # Transform
491 501 #
492 502 # | | | | | |
493 503 # o | | into o---+
494 504 # |X / |/ /
495 505 # | | | |
496 506 _fixlongrightedges(edges)
497 507
498 508 # add_padding_line says whether to rewrite
499 509 #
500 510 # | | | | | | | |
501 511 # | o---+ into | o---+
502 512 # | / / | | | # <--- padding line
503 513 # o | | | / /
504 514 # o | |
505 515 add_padding_line = (len(text) > 2 and coldiff == -1 and
506 516 [x for (x, y) in edges if x + 1 < y])
507 517
508 518 # fix_nodeline_tail says whether to rewrite
509 519 #
510 520 # | | o | | | | o | |
511 521 # | | |/ / | | |/ /
512 522 # | o | | into | o / / # <--- fixed nodeline tail
513 523 # | |/ / | |/ /
514 524 # o | | o | |
515 525 fix_nodeline_tail = len(text) <= 2 and not add_padding_line
516 526
517 527 # nodeline is the line containing the node character (typically o)
518 528 nodeline = ["|", " "] * idx
519 529 nodeline.extend([char, " "])
520 530
521 531 nodeline.extend(
522 532 _getnodelineedgestail(idx, state[1], ncols, coldiff,
523 533 state[0], fix_nodeline_tail))
524 534
525 535 # shift_interline is the line containing the non-vertical
526 536 # edges between this entry and the next
527 537 shift_interline = ["|", " "] * idx
528 538 if coldiff == -1:
529 539 n_spaces = 1
530 540 edge_ch = "/"
531 541 elif coldiff == 0:
532 542 n_spaces = 2
533 543 edge_ch = "|"
534 544 else:
535 545 n_spaces = 3
536 546 edge_ch = "\\"
537 547 shift_interline.extend(n_spaces * [" "])
538 548 shift_interline.extend([edge_ch, " "] * (ncols - idx - 1))
539 549
540 550 # draw edges from the current node to its parents
541 551 _drawedges(edges, nodeline, shift_interline)
542 552
543 553 # lines is the list of all graph lines to print
544 554 lines = [nodeline]
545 555 if add_padding_line:
546 556 lines.append(_getpaddingline(idx, ncols, edges))
547 557 lines.append(shift_interline)
548 558
549 559 # make sure that there are as many graph lines as there are
550 560 # log strings
551 561 while len(text) < len(lines):
552 562 text.append("")
553 563 if len(lines) < len(text):
554 564 extra_interline = ["|", " "] * (ncols + coldiff)
555 565 while len(lines) < len(text):
556 566 lines.append(extra_interline)
557 567
558 568 # print lines
559 569 indentation_level = max(ncols, ncols + coldiff)
560 570 for (line, logstr) in zip(lines, text):
561 571 ln = "%-*s %s" % (2 * indentation_level, "".join(line), logstr)
562 572 ui.write(ln.rstrip() + '\n')
563 573
564 574 # ... and start over
565 575 state[0] = coldiff
566 576 state[1] = idx
General Comments 0
You need to be logged in to leave comments. Login now