##// END OF EJS Templates
dagop: factor out pfunc from revancestors() generator...
Yuya Nishihara -
r33077:58ebb384 default
parent child Browse files
Show More
@@ -1,454 +1,454 b''
1 1 # dagop.py - graph ancestry and topology algorithm for revset
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 error,
14 14 mdiff,
15 15 node,
16 16 patch,
17 17 smartset,
18 18 )
19 19
20 20 baseset = smartset.baseset
21 21 generatorset = smartset.generatorset
22 22
23 23 # possible maximum depth between null and wdir()
24 24 _maxlogdepth = 0x80000000
25 25
26 26 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth):
27 27 if followfirst:
28 28 cut = 1
29 29 else:
30 30 cut = None
31 31 if startdepth is None:
32 32 startdepth = 0
33 33 if stopdepth is None:
34 34 stopdepth = _maxlogdepth
35 35 if stopdepth == 0:
36 36 return
37 37 if stopdepth < 0:
38 38 raise error.ProgrammingError('negative stopdepth')
39 39
40 40 cl = repo.changelog
41 def pfunc(rev):
42 try:
43 return cl.parentrevs(rev)[:cut]
44 except error.WdirUnsupported:
45 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
41 46
42 47 # load input revs lazily to heap so earlier revisions can be yielded
43 48 # without fully computing the input revs
44 49 revs.sort(reverse=True)
45 50 irevs = iter(revs)
46 51 pendingheap = [] # [(-rev, depth), ...] (i.e. lower depth first)
47 52
48 53 inputrev = next(irevs, None)
49 54 if inputrev is not None:
50 55 heapq.heappush(pendingheap, (-inputrev, 0))
51 56
52 57 lastrev = None
53 58 while pendingheap:
54 59 currev, curdepth = heapq.heappop(pendingheap)
55 60 currev = -currev
56 61 if currev == inputrev:
57 62 inputrev = next(irevs, None)
58 63 if inputrev is not None:
59 64 heapq.heappush(pendingheap, (-inputrev, 0))
60 65 # rescan parents until curdepth >= startdepth because queued entries
61 66 # of the same revision are iterated from the lowest depth
62 67 foundnew = (currev != lastrev)
63 68 if foundnew and curdepth >= startdepth:
64 69 lastrev = currev
65 70 yield currev
66 71 pdepth = curdepth + 1
67 72 if foundnew and pdepth < stopdepth:
68 try:
69 for prev in cl.parentrevs(currev)[:cut]:
70 if prev != node.nullrev:
71 heapq.heappush(pendingheap, (-prev, pdepth))
72 except error.WdirUnsupported:
73 for pctx in repo[currev].parents()[:cut]:
74 if pctx.rev() != node.nullrev:
75 heapq.heappush(pendingheap, (-pctx.rev(), pdepth))
73 for prev in pfunc(currev):
74 if prev != node.nullrev:
75 heapq.heappush(pendingheap, (-prev, pdepth))
76 76
77 77 def revancestors(repo, revs, followfirst, startdepth=None, stopdepth=None):
78 78 """Like revlog.ancestors(), but supports additional options, includes
79 79 the given revs themselves, and returns a smartset
80 80
81 81 Scan ends at the stopdepth (exlusive) if specified. Revisions found
82 82 earlier than the startdepth are omitted.
83 83 """
84 84 gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth)
85 85 return generatorset(gen, iterasc=False)
86 86
87 87 def _genrevdescendants(repo, revs, followfirst):
88 88 if followfirst:
89 89 cut = 1
90 90 else:
91 91 cut = None
92 92
93 93 cl = repo.changelog
94 94 first = revs.min()
95 95 nullrev = node.nullrev
96 96 if first == nullrev:
97 97 # Are there nodes with a null first parent and a non-null
98 98 # second one? Maybe. Do we care? Probably not.
99 99 yield first
100 100 for i in cl:
101 101 yield i
102 102 else:
103 103 seen = set(revs)
104 104 for i in cl.revs(first):
105 105 if i in seen:
106 106 yield i
107 107 continue
108 108 for x in cl.parentrevs(i)[:cut]:
109 109 if x != nullrev and x in seen:
110 110 seen.add(i)
111 111 yield i
112 112 break
113 113
114 114 def revdescendants(repo, revs, followfirst):
115 115 """Like revlog.descendants() but supports additional options, includes
116 116 the given revs themselves, and returns a smartset"""
117 117 gen = _genrevdescendants(repo, revs, followfirst)
118 118 return generatorset(gen, iterasc=True)
119 119
120 120 def _reachablerootspure(repo, minroot, roots, heads, includepath):
121 121 """return (heads(::<roots> and ::<heads>))
122 122
123 123 If includepath is True, return (<roots>::<heads>)."""
124 124 if not roots:
125 125 return []
126 126 parentrevs = repo.changelog.parentrevs
127 127 roots = set(roots)
128 128 visit = list(heads)
129 129 reachable = set()
130 130 seen = {}
131 131 # prefetch all the things! (because python is slow)
132 132 reached = reachable.add
133 133 dovisit = visit.append
134 134 nextvisit = visit.pop
135 135 # open-code the post-order traversal due to the tiny size of
136 136 # sys.getrecursionlimit()
137 137 while visit:
138 138 rev = nextvisit()
139 139 if rev in roots:
140 140 reached(rev)
141 141 if not includepath:
142 142 continue
143 143 parents = parentrevs(rev)
144 144 seen[rev] = parents
145 145 for parent in parents:
146 146 if parent >= minroot and parent not in seen:
147 147 dovisit(parent)
148 148 if not reachable:
149 149 return baseset()
150 150 if not includepath:
151 151 return reachable
152 152 for rev in sorted(seen):
153 153 for parent in seen[rev]:
154 154 if parent in reachable:
155 155 reached(rev)
156 156 return reachable
157 157
158 158 def reachableroots(repo, roots, heads, includepath=False):
159 159 """return (heads(::<roots> and ::<heads>))
160 160
161 161 If includepath is True, return (<roots>::<heads>)."""
162 162 if not roots:
163 163 return baseset()
164 164 minroot = roots.min()
165 165 roots = list(roots)
166 166 heads = list(heads)
167 167 try:
168 168 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
169 169 except AttributeError:
170 170 revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
171 171 revs = baseset(revs)
172 172 revs.sort()
173 173 return revs
174 174
175 175 def _changesrange(fctx1, fctx2, linerange2, diffopts):
176 176 """Return `(diffinrange, linerange1)` where `diffinrange` is True
177 177 if diff from fctx2 to fctx1 has changes in linerange2 and
178 178 `linerange1` is the new line range for fctx1.
179 179 """
180 180 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
181 181 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
182 182 diffinrange = any(stype == '!' for _, stype in filteredblocks)
183 183 return diffinrange, linerange1
184 184
185 185 def blockancestors(fctx, fromline, toline, followfirst=False):
186 186 """Yield ancestors of `fctx` with respect to the block of lines within
187 187 `fromline`-`toline` range.
188 188 """
189 189 diffopts = patch.diffopts(fctx._repo.ui)
190 190 introrev = fctx.introrev()
191 191 if fctx.rev() != introrev:
192 192 fctx = fctx.filectx(fctx.filenode(), changeid=introrev)
193 193 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
194 194 while visit:
195 195 c, linerange2 = visit.pop(max(visit))
196 196 pl = c.parents()
197 197 if followfirst:
198 198 pl = pl[:1]
199 199 if not pl:
200 200 # The block originates from the initial revision.
201 201 yield c, linerange2
202 202 continue
203 203 inrange = False
204 204 for p in pl:
205 205 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
206 206 inrange = inrange or inrangep
207 207 if linerange1[0] == linerange1[1]:
208 208 # Parent's linerange is empty, meaning that the block got
209 209 # introduced in this revision; no need to go futher in this
210 210 # branch.
211 211 continue
212 212 # Set _descendantrev with 'c' (a known descendant) so that, when
213 213 # _adjustlinkrev is called for 'p', it receives this descendant
214 214 # (as srcrev) instead possibly topmost introrev.
215 215 p._descendantrev = c.rev()
216 216 visit[p.linkrev(), p.filenode()] = p, linerange1
217 217 if inrange:
218 218 yield c, linerange2
219 219
220 220 def blockdescendants(fctx, fromline, toline):
221 221 """Yield descendants of `fctx` with respect to the block of lines within
222 222 `fromline`-`toline` range.
223 223 """
224 224 # First possibly yield 'fctx' if it has changes in range with respect to
225 225 # its parents.
226 226 try:
227 227 c, linerange1 = next(blockancestors(fctx, fromline, toline))
228 228 except StopIteration:
229 229 pass
230 230 else:
231 231 if c == fctx:
232 232 yield c, linerange1
233 233
234 234 diffopts = patch.diffopts(fctx._repo.ui)
235 235 fl = fctx.filelog()
236 236 seen = {fctx.filerev(): (fctx, (fromline, toline))}
237 237 for i in fl.descendants([fctx.filerev()]):
238 238 c = fctx.filectx(i)
239 239 inrange = False
240 240 for x in fl.parentrevs(i):
241 241 try:
242 242 p, linerange2 = seen[x]
243 243 except KeyError:
244 244 # nullrev or other branch
245 245 continue
246 246 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
247 247 inrange = inrange or inrangep
248 248 # If revision 'i' has been seen (it's a merge), we assume that its
249 249 # line range is the same independently of which parents was used
250 250 # to compute it.
251 251 assert i not in seen or seen[i][1] == linerange1, (
252 252 'computed line range for %s is not consistent between '
253 253 'ancestor branches' % c)
254 254 seen[i] = c, linerange1
255 255 if inrange:
256 256 yield c, linerange1
257 257
258 258 def toposort(revs, parentsfunc, firstbranch=()):
259 259 """Yield revisions from heads to roots one (topo) branch at a time.
260 260
261 261 This function aims to be used by a graph generator that wishes to minimize
262 262 the number of parallel branches and their interleaving.
263 263
264 264 Example iteration order (numbers show the "true" order in a changelog):
265 265
266 266 o 4
267 267 |
268 268 o 1
269 269 |
270 270 | o 3
271 271 | |
272 272 | o 2
273 273 |/
274 274 o 0
275 275
276 276 Note that the ancestors of merges are understood by the current
277 277 algorithm to be on the same branch. This means no reordering will
278 278 occur behind a merge.
279 279 """
280 280
281 281 ### Quick summary of the algorithm
282 282 #
283 283 # This function is based around a "retention" principle. We keep revisions
284 284 # in memory until we are ready to emit a whole branch that immediately
285 285 # "merges" into an existing one. This reduces the number of parallel
286 286 # branches with interleaved revisions.
287 287 #
288 288 # During iteration revs are split into two groups:
289 289 # A) revision already emitted
290 290 # B) revision in "retention". They are stored as different subgroups.
291 291 #
292 292 # for each REV, we do the following logic:
293 293 #
294 294 # 1) if REV is a parent of (A), we will emit it. If there is a
295 295 # retention group ((B) above) that is blocked on REV being
296 296 # available, we emit all the revisions out of that retention
297 297 # group first.
298 298 #
299 299 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
300 300 # available, if such subgroup exist, we add REV to it and the subgroup is
301 301 # now awaiting for REV.parents() to be available.
302 302 #
303 303 # 3) finally if no such group existed in (B), we create a new subgroup.
304 304 #
305 305 #
306 306 # To bootstrap the algorithm, we emit the tipmost revision (which
307 307 # puts it in group (A) from above).
308 308
309 309 revs.sort(reverse=True)
310 310
311 311 # Set of parents of revision that have been emitted. They can be considered
312 312 # unblocked as the graph generator is already aware of them so there is no
313 313 # need to delay the revisions that reference them.
314 314 #
315 315 # If someone wants to prioritize a branch over the others, pre-filling this
316 316 # set will force all other branches to wait until this branch is ready to be
317 317 # emitted.
318 318 unblocked = set(firstbranch)
319 319
320 320 # list of groups waiting to be displayed, each group is defined by:
321 321 #
322 322 # (revs: lists of revs waiting to be displayed,
323 323 # blocked: set of that cannot be displayed before those in 'revs')
324 324 #
325 325 # The second value ('blocked') correspond to parents of any revision in the
326 326 # group ('revs') that is not itself contained in the group. The main idea
327 327 # of this algorithm is to delay as much as possible the emission of any
328 328 # revision. This means waiting for the moment we are about to display
329 329 # these parents to display the revs in a group.
330 330 #
331 331 # This first implementation is smart until it encounters a merge: it will
332 332 # emit revs as soon as any parent is about to be emitted and can grow an
333 333 # arbitrary number of revs in 'blocked'. In practice this mean we properly
334 334 # retains new branches but gives up on any special ordering for ancestors
335 335 # of merges. The implementation can be improved to handle this better.
336 336 #
337 337 # The first subgroup is special. It corresponds to all the revision that
338 338 # were already emitted. The 'revs' lists is expected to be empty and the
339 339 # 'blocked' set contains the parents revisions of already emitted revision.
340 340 #
341 341 # You could pre-seed the <parents> set of groups[0] to a specific
342 342 # changesets to select what the first emitted branch should be.
343 343 groups = [([], unblocked)]
344 344 pendingheap = []
345 345 pendingset = set()
346 346
347 347 heapq.heapify(pendingheap)
348 348 heappop = heapq.heappop
349 349 heappush = heapq.heappush
350 350 for currentrev in revs:
351 351 # Heap works with smallest element, we want highest so we invert
352 352 if currentrev not in pendingset:
353 353 heappush(pendingheap, -currentrev)
354 354 pendingset.add(currentrev)
355 355 # iterates on pending rev until after the current rev have been
356 356 # processed.
357 357 rev = None
358 358 while rev != currentrev:
359 359 rev = -heappop(pendingheap)
360 360 pendingset.remove(rev)
361 361
362 362 # Seek for a subgroup blocked, waiting for the current revision.
363 363 matching = [i for i, g in enumerate(groups) if rev in g[1]]
364 364
365 365 if matching:
366 366 # The main idea is to gather together all sets that are blocked
367 367 # on the same revision.
368 368 #
369 369 # Groups are merged when a common blocking ancestor is
370 370 # observed. For example, given two groups:
371 371 #
372 372 # revs [5, 4] waiting for 1
373 373 # revs [3, 2] waiting for 1
374 374 #
375 375 # These two groups will be merged when we process
376 376 # 1. In theory, we could have merged the groups when
377 377 # we added 2 to the group it is now in (we could have
378 378 # noticed the groups were both blocked on 1 then), but
379 379 # the way it works now makes the algorithm simpler.
380 380 #
381 381 # We also always keep the oldest subgroup first. We can
382 382 # probably improve the behavior by having the longest set
383 383 # first. That way, graph algorithms could minimise the length
384 384 # of parallel lines their drawing. This is currently not done.
385 385 targetidx = matching.pop(0)
386 386 trevs, tparents = groups[targetidx]
387 387 for i in matching:
388 388 gr = groups[i]
389 389 trevs.extend(gr[0])
390 390 tparents |= gr[1]
391 391 # delete all merged subgroups (except the one we kept)
392 392 # (starting from the last subgroup for performance and
393 393 # sanity reasons)
394 394 for i in reversed(matching):
395 395 del groups[i]
396 396 else:
397 397 # This is a new head. We create a new subgroup for it.
398 398 targetidx = len(groups)
399 399 groups.append(([], {rev}))
400 400
401 401 gr = groups[targetidx]
402 402
403 403 # We now add the current nodes to this subgroups. This is done
404 404 # after the subgroup merging because all elements from a subgroup
405 405 # that relied on this rev must precede it.
406 406 #
407 407 # we also update the <parents> set to include the parents of the
408 408 # new nodes.
409 409 if rev == currentrev: # only display stuff in rev
410 410 gr[0].append(rev)
411 411 gr[1].remove(rev)
412 412 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
413 413 gr[1].update(parents)
414 414 for p in parents:
415 415 if p not in pendingset:
416 416 pendingset.add(p)
417 417 heappush(pendingheap, -p)
418 418
419 419 # Look for a subgroup to display
420 420 #
421 421 # When unblocked is empty (if clause), we were not waiting for any
422 422 # revisions during the first iteration (if no priority was given) or
423 423 # if we emitted a whole disconnected set of the graph (reached a
424 424 # root). In that case we arbitrarily take the oldest known
425 425 # subgroup. The heuristic could probably be better.
426 426 #
427 427 # Otherwise (elif clause) if the subgroup is blocked on
428 428 # a revision we just emitted, we can safely emit it as
429 429 # well.
430 430 if not unblocked:
431 431 if len(groups) > 1: # display other subset
432 432 targetidx = 1
433 433 gr = groups[1]
434 434 elif not gr[1] & unblocked:
435 435 gr = None
436 436
437 437 if gr is not None:
438 438 # update the set of awaited revisions with the one from the
439 439 # subgroup
440 440 unblocked |= gr[1]
441 441 # output all revisions in the subgroup
442 442 for r in gr[0]:
443 443 yield r
444 444 # delete the subgroup that you just output
445 445 # unless it is groups[0] in which case you just empty it.
446 446 if targetidx:
447 447 del groups[targetidx]
448 448 else:
449 449 gr[0][:] = []
450 450 # Check if we have some subgroup waiting for revisions we are not going to
451 451 # iterate over
452 452 for g in groups:
453 453 for r in g[0]:
454 454 yield r
General Comments 0
You need to be logged in to leave comments. Login now