##// END OF EJS Templates
obsstore: add a flag for sha256 hashes...
Pierre-Yves David -
r22850:b078e4dc default
parent child Browse files
Show More
@@ -1,1008 +1,1011 b''
1 1 # obsolete.py - obsolete markers handling
2 2 #
3 3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 4 # Logilab SA <contact@logilab.fr>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Obsolete marker handling
10 10
11 11 An obsolete marker maps an old changeset to a list of new
12 12 changesets. If the list of new changesets is empty, the old changeset
13 13 is said to be "killed". Otherwise, the old changeset is being
14 14 "replaced" by the new changesets.
15 15
16 16 Obsolete markers can be used to record and distribute changeset graph
17 17 transformations performed by history rewrite operations, and help
18 18 building new tools to reconcile conflicting rewrite actions. To
19 19 facilitate conflict resolution, markers include various annotations
20 20 besides old and news changeset identifiers, such as creation date or
21 21 author name.
22 22
23 23 The old obsoleted changeset is called a "precursor" and possible
24 24 replacements are called "successors". Markers that used changeset X as
25 25 a precursor are called "successor markers of X" because they hold
26 26 information about the successors of X. Markers that use changeset Y as
27 27 a successors are call "precursor markers of Y" because they hold
28 28 information about the precursors of Y.
29 29
30 30 Examples:
31 31
32 32 - When changeset A is replaced by changeset A', one marker is stored:
33 33
34 34 (A, (A',))
35 35
36 36 - When changesets A and B are folded into a new changeset C, two markers are
37 37 stored:
38 38
39 39 (A, (C,)) and (B, (C,))
40 40
41 41 - When changeset A is simply "pruned" from the graph, a marker is created:
42 42
43 43 (A, ())
44 44
45 45 - When changeset A is split into B and C, a single marker are used:
46 46
47 47 (A, (C, C))
48 48
49 49 We use a single marker to distinguish the "split" case from the "divergence"
50 50 case. If two independent operations rewrite the same changeset A in to A' and
51 51 A'', we have an error case: divergent rewriting. We can detect it because
52 52 two markers will be created independently:
53 53
54 54 (A, (B,)) and (A, (C,))
55 55
56 56 Format
57 57 ------
58 58
59 59 Markers are stored in an append-only file stored in
60 60 '.hg/store/obsstore'.
61 61
62 62 The file starts with a version header:
63 63
64 64 - 1 unsigned byte: version number, starting at zero.
65 65
66 66 The header is followed by the markers. Marker format depend of the version. See
67 67 comment associated with each format for details.
68 68
69 69 """
70 70 import struct
71 71 import util, base85, node
72 72 import phases
73 73 from i18n import _
74 74
75 75 _pack = struct.pack
76 76 _unpack = struct.unpack
77 77
78 78 _SEEK_END = 2 # os.SEEK_END was introduced in Python 2.5
79 79
80 80 # the obsolete feature is not mature enough to be enabled by default.
81 81 # you have to rely on third party extension extension to enable this.
82 82 _enabled = False
83 83
84 84 ### obsolescence marker flag
85 85
86 86 ## bumpedfix flag
87 87 #
88 88 # When a changeset A' succeed to a changeset A which became public, we call A'
89 89 # "bumped" because it's a successors of a public changesets
90 90 #
91 91 # o A' (bumped)
92 92 # |`:
93 93 # | o A
94 94 # |/
95 95 # o Z
96 96 #
97 97 # The way to solve this situation is to create a new changeset Ad as children
98 98 # of A. This changeset have the same content than A'. So the diff from A to A'
99 99 # is the same than the diff from A to Ad. Ad is marked as a successors of A'
100 100 #
101 101 # o Ad
102 102 # |`:
103 103 # | x A'
104 104 # |'|
105 105 # o | A
106 106 # |/
107 107 # o Z
108 108 #
109 109 # But by transitivity Ad is also a successors of A. To avoid having Ad marked
110 110 # as bumped too, we add the `bumpedfix` flag to the marker. <A', (Ad,)>.
111 111 # This flag mean that the successors express the changes between the public and
112 112 # bumped version and fix the situation, breaking the transitivity of
113 113 # "bumped" here.
114 114 bumpedfix = 1
115 usingsha256 = 2
115 116
116 117 ## Parsing and writing of version "0"
117 118 #
118 119 # The header is followed by the markers. Each marker is made of:
119 120 #
120 121 # - 1 uint8 : number of new changesets "N", can be zero.
121 122 #
122 123 # - 1 uint32: metadata size "M" in bytes.
123 124 #
124 125 # - 1 byte: a bit field. It is reserved for flags used in common
125 126 # obsolete marker operations, to avoid repeated decoding of metadata
126 127 # entries.
127 128 #
128 129 # - 20 bytes: obsoleted changeset identifier.
129 130 #
130 131 # - N*20 bytes: new changesets identifiers.
131 132 #
132 133 # - M bytes: metadata as a sequence of nul-terminated strings. Each
133 134 # string contains a key and a value, separated by a colon ':', without
134 135 # additional encoding. Keys cannot contain '\0' or ':' and values
135 136 # cannot contain '\0'.
136 137 _fm0version = 0
137 138 _fm0fixed = '>BIB20s'
138 139 _fm0node = '20s'
139 140 _fm0fsize = struct.calcsize(_fm0fixed)
140 141 _fm0fnodesize = struct.calcsize(_fm0node)
141 142
142 143 def _fm0readmarkers(data, off=0):
143 144 # Loop on markers
144 145 l = len(data)
145 146 while off + _fm0fsize <= l:
146 147 # read fixed part
147 148 cur = data[off:off + _fm0fsize]
148 149 off += _fm0fsize
149 150 numsuc, mdsize, flags, pre = _unpack(_fm0fixed, cur)
150 151 # read replacement
151 152 sucs = ()
152 153 if numsuc:
153 154 s = (_fm0fnodesize * numsuc)
154 155 cur = data[off:off + s]
155 156 sucs = _unpack(_fm0node * numsuc, cur)
156 157 off += s
157 158 # read metadata
158 159 # (metadata will be decoded on demand)
159 160 metadata = data[off:off + mdsize]
160 161 if len(metadata) != mdsize:
161 162 raise util.Abort(_('parsing obsolete marker: metadata is too '
162 163 'short, %d bytes expected, got %d')
163 164 % (mdsize, len(metadata)))
164 165 off += mdsize
165 166 metadata = _fm0decodemeta(metadata)
166 167 try:
167 168 when, offset = metadata.pop('date', '0 0').split(' ')
168 169 date = float(when), int(offset)
169 170 except ValueError:
170 171 date = (0., 0)
171 172 parents = None
172 173 if 'p2' in metadata:
173 174 parents = (metadata.pop('p1', None), metadata.pop('p2', None))
174 175 elif 'p1' in metadata:
175 176 parents = (metadata.pop('p1', None),)
176 177 elif 'p0' in metadata:
177 178 parents = ()
178 179 if parents is not None:
179 180 try:
180 181 parents = tuple(node.bin(p) for p in parents)
181 182 # if parent content is not a nodeid, drop the data
182 183 for p in parents:
183 184 if len(p) != 20:
184 185 parents = None
185 186 break
186 187 except TypeError:
187 188 # if content cannot be translated to nodeid drop the data.
188 189 parents = None
189 190
190 191 metadata = tuple(sorted(metadata.iteritems()))
191 192
192 193 yield (pre, sucs, flags, metadata, date, parents)
193 194
194 195 def _fm0encodeonemarker(marker):
195 196 pre, sucs, flags, metadata, date, parents = marker
197 if flags & usingsha256:
198 raise util.Abort(_('cannot handle sha256 with old obsstore format'))
196 199 metadata = dict(metadata)
197 200 metadata['date'] = '%d %i' % date
198 201 if parents is not None:
199 202 if not parents:
200 203 # mark that we explicitly recorded no parents
201 204 metadata['p0'] = ''
202 205 for i, p in enumerate(parents):
203 206 metadata['p%i' % (i + 1)] = node.hex(p)
204 207 metadata = _fm0encodemeta(metadata)
205 208 numsuc = len(sucs)
206 209 format = _fm0fixed + (_fm0node * numsuc)
207 210 data = [numsuc, len(metadata), flags, pre]
208 211 data.extend(sucs)
209 212 return _pack(format, *data) + metadata
210 213
211 214 def _fm0encodemeta(meta):
212 215 """Return encoded metadata string to string mapping.
213 216
214 217 Assume no ':' in key and no '\0' in both key and value."""
215 218 for key, value in meta.iteritems():
216 219 if ':' in key or '\0' in key:
217 220 raise ValueError("':' and '\0' are forbidden in metadata key'")
218 221 if '\0' in value:
219 222 raise ValueError("':' is forbidden in metadata value'")
220 223 return '\0'.join(['%s:%s' % (k, meta[k]) for k in sorted(meta)])
221 224
222 225 def _fm0decodemeta(data):
223 226 """Return string to string dictionary from encoded version."""
224 227 d = {}
225 228 for l in data.split('\0'):
226 229 if l:
227 230 key, value = l.split(':')
228 231 d[key] = value
229 232 return d
230 233
231 234
232 235 # mapping to read/write various marker formats
233 236 # <version> -> (decoder, encoder)
234 237 formats = {_fm0version: (_fm0readmarkers, _fm0encodeonemarker)}
235 238
236 239 def _readmarkers(data):
237 240 """Read and enumerate markers from raw data"""
238 241 off = 0
239 242 diskversion = _unpack('>B', data[off:off + 1])[0]
240 243 off += 1
241 244 if diskversion not in formats:
242 245 raise util.Abort(_('parsing obsolete marker: unknown version %r')
243 246 % diskversion)
244 247 return diskversion, formats[diskversion][0](data, off)
245 248
246 249 def encodemarkers(markers, addheader=False, version=_fm0version):
247 250 # Kept separate from flushmarkers(), it will be reused for
248 251 # markers exchange.
249 252 encodeone = formats[version][1]
250 253 if addheader:
251 254 yield _pack('>B', version)
252 255 for marker in markers:
253 256 yield encodeone(marker)
254 257
255 258
256 259 class marker(object):
257 260 """Wrap obsolete marker raw data"""
258 261
259 262 def __init__(self, repo, data):
260 263 # the repo argument will be used to create changectx in later version
261 264 self._repo = repo
262 265 self._data = data
263 266 self._decodedmeta = None
264 267
265 268 def __hash__(self):
266 269 return hash(self._data)
267 270
268 271 def __eq__(self, other):
269 272 if type(other) != type(self):
270 273 return False
271 274 return self._data == other._data
272 275
273 276 def precnode(self):
274 277 """Precursor changeset node identifier"""
275 278 return self._data[0]
276 279
277 280 def succnodes(self):
278 281 """List of successor changesets node identifiers"""
279 282 return self._data[1]
280 283
281 284 def parentnodes(self):
282 285 """Parents of the precursors (None if not recorded)"""
283 286 return self._data[5]
284 287
285 288 def metadata(self):
286 289 """Decoded metadata dictionary"""
287 290 return dict(self._data[3])
288 291
289 292 def date(self):
290 293 """Creation date as (unixtime, offset)"""
291 294 return self._data[4]
292 295
293 296 def flags(self):
294 297 """The flags field of the marker"""
295 298 return self._data[2]
296 299
297 300 class obsstore(object):
298 301 """Store obsolete markers
299 302
300 303 Markers can be accessed with two mappings:
301 304 - precursors[x] -> set(markers on precursors edges of x)
302 305 - successors[x] -> set(markers on successors edges of x)
303 306 - children[x] -> set(markers on precursors edges of children(x)
304 307 """
305 308
306 309 fields = ('prec', 'succs', 'flag', 'meta', 'date', 'parents')
307 310 # prec: nodeid, precursor changesets
308 311 # succs: tuple of nodeid, successor changesets (0-N length)
309 312 # flag: integer, flag field carrying modifier for the markers (see doc)
310 313 # meta: binary blob, encoded metadata dictionary
311 314 # date: (float, int) tuple, date of marker creation
312 315 # parents: (tuple of nodeid) or None, parents of precursors
313 316 # None is used when no data has been recorded
314 317
315 318 def __init__(self, sopener):
316 319 # caches for various obsolescence related cache
317 320 self.caches = {}
318 321 self._all = []
319 322 self.precursors = {}
320 323 self.successors = {}
321 324 self.children = {}
322 325 self.sopener = sopener
323 326 data = sopener.tryread('obsstore')
324 327 self._version = _fm0version
325 328 if data:
326 329 self._version, markers = _readmarkers(data)
327 330 self._load(markers)
328 331
329 332 def __iter__(self):
330 333 return iter(self._all)
331 334
332 335 def __len__(self):
333 336 return len(self._all)
334 337
335 338 def __nonzero__(self):
336 339 return bool(self._all)
337 340
338 341 def create(self, transaction, prec, succs=(), flag=0, parents=None,
339 342 date=None, metadata=None):
340 343 """obsolete: add a new obsolete marker
341 344
342 345 * ensuring it is hashable
343 346 * check mandatory metadata
344 347 * encode metadata
345 348
346 349 If you are a human writing code creating marker you want to use the
347 350 `createmarkers` function in this module instead.
348 351
349 352 return True if a new marker have been added, False if the markers
350 353 already existed (no op).
351 354 """
352 355 if metadata is None:
353 356 metadata = {}
354 357 if date is None:
355 358 if 'date' in metadata:
356 359 # as a courtesy for out-of-tree extensions
357 360 date = util.parsedate(metadata.pop('date'))
358 361 else:
359 362 date = util.makedate()
360 363 if len(prec) != 20:
361 364 raise ValueError(prec)
362 365 for succ in succs:
363 366 if len(succ) != 20:
364 367 raise ValueError(succ)
365 368 if prec in succs:
366 369 raise ValueError(_('in-marker cycle with %s') % node.hex(prec))
367 370
368 371 metadata = tuple(sorted(metadata.iteritems()))
369 372
370 373 marker = (str(prec), tuple(succs), int(flag), metadata, date, parents)
371 374 return bool(self.add(transaction, [marker]))
372 375
373 376 def add(self, transaction, markers):
374 377 """Add new markers to the store
375 378
376 379 Take care of filtering duplicate.
377 380 Return the number of new marker."""
378 381 if not _enabled:
379 382 raise util.Abort('obsolete feature is not enabled on this repo')
380 383 known = set(self._all)
381 384 new = []
382 385 for m in markers:
383 386 if m not in known:
384 387 known.add(m)
385 388 new.append(m)
386 389 if new:
387 390 f = self.sopener('obsstore', 'ab')
388 391 try:
389 392 # Whether the file's current position is at the begin or at
390 393 # the end after opening a file for appending is implementation
391 394 # defined. So we must seek to the end before calling tell(),
392 395 # or we may get a zero offset for non-zero sized files on
393 396 # some platforms (issue3543).
394 397 f.seek(0, _SEEK_END)
395 398 offset = f.tell()
396 399 transaction.add('obsstore', offset)
397 400 # offset == 0: new file - add the version header
398 401 for bytes in encodemarkers(new, offset == 0, self._version):
399 402 f.write(bytes)
400 403 finally:
401 404 # XXX: f.close() == filecache invalidation == obsstore rebuilt.
402 405 # call 'filecacheentry.refresh()' here
403 406 f.close()
404 407 self._load(new)
405 408 # new marker *may* have changed several set. invalidate the cache.
406 409 self.caches.clear()
407 410 # records the number of new markers for the transaction hooks
408 411 previous = int(transaction.hookargs.get('new_obsmarkers', '0'))
409 412 transaction.hookargs['new_obsmarkers'] = str(previous + len(new))
410 413 return len(new)
411 414
412 415 def mergemarkers(self, transaction, data):
413 416 """merge a binary stream of markers inside the obsstore
414 417
415 418 Returns the number of new markers added."""
416 419 version, markers = _readmarkers(data)
417 420 return self.add(transaction, markers)
418 421
419 422 def _load(self, markers):
420 423 for mark in markers:
421 424 self._all.append(mark)
422 425 pre, sucs = mark[:2]
423 426 self.successors.setdefault(pre, set()).add(mark)
424 427 for suc in sucs:
425 428 self.precursors.setdefault(suc, set()).add(mark)
426 429 parents = mark[5]
427 430 if parents is not None:
428 431 for p in parents:
429 432 self.children.setdefault(p, set()).add(mark)
430 433 if node.nullid in self.precursors:
431 434 raise util.Abort(_('bad obsolescence marker detected: '
432 435 'invalid successors nullid'))
433 436 def relevantmarkers(self, nodes):
434 437 """return a set of all obsolescence markers relevant to a set of nodes.
435 438
436 439 "relevant" to a set of nodes mean:
437 440
438 441 - marker that use this changeset as successor
439 442 - prune marker of direct children on this changeset
440 443 - recursive application of the two rules on precursors of these markers
441 444
442 445 It is a set so you cannot rely on order."""
443 446
444 447 pendingnodes = set(nodes)
445 448 seenmarkers = set()
446 449 seennodes = set(pendingnodes)
447 450 precursorsmarkers = self.precursors
448 451 children = self.children
449 452 while pendingnodes:
450 453 direct = set()
451 454 for current in pendingnodes:
452 455 direct.update(precursorsmarkers.get(current, ()))
453 456 pruned = [m for m in children.get(current, ()) if not m[1]]
454 457 direct.update(pruned)
455 458 direct -= seenmarkers
456 459 pendingnodes = set([m[0] for m in direct])
457 460 seenmarkers |= direct
458 461 pendingnodes -= seennodes
459 462 seennodes |= pendingnodes
460 463 return seenmarkers
461 464
462 465 def commonversion(versions):
463 466 """Return the newest version listed in both versions and our local formats.
464 467
465 468 Returns None if no common version exists.
466 469 """
467 470 versions.sort(reverse=True)
468 471 # search for highest version known on both side
469 472 for v in versions:
470 473 if v in formats:
471 474 return v
472 475 return None
473 476
474 477 # arbitrary picked to fit into 8K limit from HTTP server
475 478 # you have to take in account:
476 479 # - the version header
477 480 # - the base85 encoding
478 481 _maxpayload = 5300
479 482
480 483 def _pushkeyescape(markers):
481 484 """encode markers into a dict suitable for pushkey exchange
482 485
483 486 - binary data is base85 encoded
484 487 - split in chunks smaller than 5300 bytes"""
485 488 keys = {}
486 489 parts = []
487 490 currentlen = _maxpayload * 2 # ensure we create a new part
488 491 for marker in markers:
489 492 nextdata = _fm0encodeonemarker(marker)
490 493 if (len(nextdata) + currentlen > _maxpayload):
491 494 currentpart = []
492 495 currentlen = 0
493 496 parts.append(currentpart)
494 497 currentpart.append(nextdata)
495 498 currentlen += len(nextdata)
496 499 for idx, part in enumerate(reversed(parts)):
497 500 data = ''.join([_pack('>B', _fm0version)] + part)
498 501 keys['dump%i' % idx] = base85.b85encode(data)
499 502 return keys
500 503
501 504 def listmarkers(repo):
502 505 """List markers over pushkey"""
503 506 if not repo.obsstore:
504 507 return {}
505 508 return _pushkeyescape(repo.obsstore)
506 509
507 510 def pushmarker(repo, key, old, new):
508 511 """Push markers over pushkey"""
509 512 if not key.startswith('dump'):
510 513 repo.ui.warn(_('unknown key: %r') % key)
511 514 return 0
512 515 if old:
513 516 repo.ui.warn(_('unexpected old value for %r') % key)
514 517 return 0
515 518 data = base85.b85decode(new)
516 519 lock = repo.lock()
517 520 try:
518 521 tr = repo.transaction('pushkey: obsolete markers')
519 522 try:
520 523 repo.obsstore.mergemarkers(tr, data)
521 524 tr.close()
522 525 return 1
523 526 finally:
524 527 tr.release()
525 528 finally:
526 529 lock.release()
527 530
528 531 def getmarkers(repo, nodes=None):
529 532 """returns markers known in a repository
530 533
531 534 If <nodes> is specified, only markers "relevant" to those nodes are are
532 535 returned"""
533 536 if nodes is None:
534 537 rawmarkers = repo.obsstore
535 538 else:
536 539 rawmarkers = repo.obsstore.relevantmarkers(nodes)
537 540
538 541 for markerdata in rawmarkers:
539 542 yield marker(repo, markerdata)
540 543
541 544 def relevantmarkers(repo, node):
542 545 """all obsolete markers relevant to some revision"""
543 546 for markerdata in repo.obsstore.relevantmarkers(node):
544 547 yield marker(repo, markerdata)
545 548
546 549
547 550 def precursormarkers(ctx):
548 551 """obsolete marker marking this changeset as a successors"""
549 552 for data in ctx._repo.obsstore.precursors.get(ctx.node(), ()):
550 553 yield marker(ctx._repo, data)
551 554
552 555 def successormarkers(ctx):
553 556 """obsolete marker making this changeset obsolete"""
554 557 for data in ctx._repo.obsstore.successors.get(ctx.node(), ()):
555 558 yield marker(ctx._repo, data)
556 559
557 560 def allsuccessors(obsstore, nodes, ignoreflags=0):
558 561 """Yield node for every successor of <nodes>.
559 562
560 563 Some successors may be unknown locally.
561 564
562 565 This is a linear yield unsuited to detecting split changesets. It includes
563 566 initial nodes too."""
564 567 remaining = set(nodes)
565 568 seen = set(remaining)
566 569 while remaining:
567 570 current = remaining.pop()
568 571 yield current
569 572 for mark in obsstore.successors.get(current, ()):
570 573 # ignore marker flagged with specified flag
571 574 if mark[2] & ignoreflags:
572 575 continue
573 576 for suc in mark[1]:
574 577 if suc not in seen:
575 578 seen.add(suc)
576 579 remaining.add(suc)
577 580
578 581 def allprecursors(obsstore, nodes, ignoreflags=0):
579 582 """Yield node for every precursors of <nodes>.
580 583
581 584 Some precursors may be unknown locally.
582 585
583 586 This is a linear yield unsuited to detecting folded changesets. It includes
584 587 initial nodes too."""
585 588
586 589 remaining = set(nodes)
587 590 seen = set(remaining)
588 591 while remaining:
589 592 current = remaining.pop()
590 593 yield current
591 594 for mark in obsstore.precursors.get(current, ()):
592 595 # ignore marker flagged with specified flag
593 596 if mark[2] & ignoreflags:
594 597 continue
595 598 suc = mark[0]
596 599 if suc not in seen:
597 600 seen.add(suc)
598 601 remaining.add(suc)
599 602
600 603 def foreground(repo, nodes):
601 604 """return all nodes in the "foreground" of other node
602 605
603 606 The foreground of a revision is anything reachable using parent -> children
604 607 or precursor -> successor relation. It is very similar to "descendant" but
605 608 augmented with obsolescence information.
606 609
607 610 Beware that possible obsolescence cycle may result if complex situation.
608 611 """
609 612 repo = repo.unfiltered()
610 613 foreground = set(repo.set('%ln::', nodes))
611 614 if repo.obsstore:
612 615 # We only need this complicated logic if there is obsolescence
613 616 # XXX will probably deserve an optimised revset.
614 617 nm = repo.changelog.nodemap
615 618 plen = -1
616 619 # compute the whole set of successors or descendants
617 620 while len(foreground) != plen:
618 621 plen = len(foreground)
619 622 succs = set(c.node() for c in foreground)
620 623 mutable = [c.node() for c in foreground if c.mutable()]
621 624 succs.update(allsuccessors(repo.obsstore, mutable))
622 625 known = (n for n in succs if n in nm)
623 626 foreground = set(repo.set('%ln::', known))
624 627 return set(c.node() for c in foreground)
625 628
626 629
627 630 def successorssets(repo, initialnode, cache=None):
628 631 """Return all set of successors of initial nodes
629 632
630 633 The successors set of a changeset A are a group of revisions that succeed
631 634 A. It succeeds A as a consistent whole, each revision being only a partial
632 635 replacement. The successors set contains non-obsolete changesets only.
633 636
634 637 This function returns the full list of successor sets which is why it
635 638 returns a list of tuples and not just a single tuple. Each tuple is a valid
636 639 successors set. Not that (A,) may be a valid successors set for changeset A
637 640 (see below).
638 641
639 642 In most cases, a changeset A will have a single element (e.g. the changeset
640 643 A is replaced by A') in its successors set. Though, it is also common for a
641 644 changeset A to have no elements in its successor set (e.g. the changeset
642 645 has been pruned). Therefore, the returned list of successors sets will be
643 646 [(A',)] or [], respectively.
644 647
645 648 When a changeset A is split into A' and B', however, it will result in a
646 649 successors set containing more than a single element, i.e. [(A',B')].
647 650 Divergent changesets will result in multiple successors sets, i.e. [(A',),
648 651 (A'')].
649 652
650 653 If a changeset A is not obsolete, then it will conceptually have no
651 654 successors set. To distinguish this from a pruned changeset, the successor
652 655 set will only contain itself, i.e. [(A,)].
653 656
654 657 Finally, successors unknown locally are considered to be pruned (obsoleted
655 658 without any successors).
656 659
657 660 The optional `cache` parameter is a dictionary that may contain precomputed
658 661 successors sets. It is meant to reuse the computation of a previous call to
659 662 `successorssets` when multiple calls are made at the same time. The cache
660 663 dictionary is updated in place. The caller is responsible for its live
661 664 spawn. Code that makes multiple calls to `successorssets` *must* use this
662 665 cache mechanism or suffer terrible performances.
663 666
664 667 """
665 668
666 669 succmarkers = repo.obsstore.successors
667 670
668 671 # Stack of nodes we search successors sets for
669 672 toproceed = [initialnode]
670 673 # set version of above list for fast loop detection
671 674 # element added to "toproceed" must be added here
672 675 stackedset = set(toproceed)
673 676 if cache is None:
674 677 cache = {}
675 678
676 679 # This while loop is the flattened version of a recursive search for
677 680 # successors sets
678 681 #
679 682 # def successorssets(x):
680 683 # successors = directsuccessors(x)
681 684 # ss = [[]]
682 685 # for succ in directsuccessors(x):
683 686 # # product as in itertools cartesian product
684 687 # ss = product(ss, successorssets(succ))
685 688 # return ss
686 689 #
687 690 # But we can not use plain recursive calls here:
688 691 # - that would blow the python call stack
689 692 # - obsolescence markers may have cycles, we need to handle them.
690 693 #
691 694 # The `toproceed` list act as our call stack. Every node we search
692 695 # successors set for are stacked there.
693 696 #
694 697 # The `stackedset` is set version of this stack used to check if a node is
695 698 # already stacked. This check is used to detect cycles and prevent infinite
696 699 # loop.
697 700 #
698 701 # successors set of all nodes are stored in the `cache` dictionary.
699 702 #
700 703 # After this while loop ends we use the cache to return the successors sets
701 704 # for the node requested by the caller.
702 705 while toproceed:
703 706 # Every iteration tries to compute the successors sets of the topmost
704 707 # node of the stack: CURRENT.
705 708 #
706 709 # There are four possible outcomes:
707 710 #
708 711 # 1) We already know the successors sets of CURRENT:
709 712 # -> mission accomplished, pop it from the stack.
710 713 # 2) Node is not obsolete:
711 714 # -> the node is its own successors sets. Add it to the cache.
712 715 # 3) We do not know successors set of direct successors of CURRENT:
713 716 # -> We add those successors to the stack.
714 717 # 4) We know successors sets of all direct successors of CURRENT:
715 718 # -> We can compute CURRENT successors set and add it to the
716 719 # cache.
717 720 #
718 721 current = toproceed[-1]
719 722 if current in cache:
720 723 # case (1): We already know the successors sets
721 724 stackedset.remove(toproceed.pop())
722 725 elif current not in succmarkers:
723 726 # case (2): The node is not obsolete.
724 727 if current in repo:
725 728 # We have a valid last successors.
726 729 cache[current] = [(current,)]
727 730 else:
728 731 # Final obsolete version is unknown locally.
729 732 # Do not count that as a valid successors
730 733 cache[current] = []
731 734 else:
732 735 # cases (3) and (4)
733 736 #
734 737 # We proceed in two phases. Phase 1 aims to distinguish case (3)
735 738 # from case (4):
736 739 #
737 740 # For each direct successors of CURRENT, we check whether its
738 741 # successors sets are known. If they are not, we stack the
739 742 # unknown node and proceed to the next iteration of the while
740 743 # loop. (case 3)
741 744 #
742 745 # During this step, we may detect obsolescence cycles: a node
743 746 # with unknown successors sets but already in the call stack.
744 747 # In such a situation, we arbitrary set the successors sets of
745 748 # the node to nothing (node pruned) to break the cycle.
746 749 #
747 750 # If no break was encountered we proceed to phase 2.
748 751 #
749 752 # Phase 2 computes successors sets of CURRENT (case 4); see details
750 753 # in phase 2 itself.
751 754 #
752 755 # Note the two levels of iteration in each phase.
753 756 # - The first one handles obsolescence markers using CURRENT as
754 757 # precursor (successors markers of CURRENT).
755 758 #
756 759 # Having multiple entry here means divergence.
757 760 #
758 761 # - The second one handles successors defined in each marker.
759 762 #
760 763 # Having none means pruned node, multiple successors means split,
761 764 # single successors are standard replacement.
762 765 #
763 766 for mark in sorted(succmarkers[current]):
764 767 for suc in mark[1]:
765 768 if suc not in cache:
766 769 if suc in stackedset:
767 770 # cycle breaking
768 771 cache[suc] = []
769 772 else:
770 773 # case (3) If we have not computed successors sets
771 774 # of one of those successors we add it to the
772 775 # `toproceed` stack and stop all work for this
773 776 # iteration.
774 777 toproceed.append(suc)
775 778 stackedset.add(suc)
776 779 break
777 780 else:
778 781 continue
779 782 break
780 783 else:
781 784 # case (4): we know all successors sets of all direct
782 785 # successors
783 786 #
784 787 # Successors set contributed by each marker depends on the
785 788 # successors sets of all its "successors" node.
786 789 #
787 790 # Each different marker is a divergence in the obsolescence
788 791 # history. It contributes successors sets distinct from other
789 792 # markers.
790 793 #
791 794 # Within a marker, a successor may have divergent successors
792 795 # sets. In such a case, the marker will contribute multiple
793 796 # divergent successors sets. If multiple successors have
794 797 # divergent successors sets, a Cartesian product is used.
795 798 #
796 799 # At the end we post-process successors sets to remove
797 800 # duplicated entry and successors set that are strict subset of
798 801 # another one.
799 802 succssets = []
800 803 for mark in sorted(succmarkers[current]):
801 804 # successors sets contributed by this marker
802 805 markss = [[]]
803 806 for suc in mark[1]:
804 807 # cardinal product with previous successors
805 808 productresult = []
806 809 for prefix in markss:
807 810 for suffix in cache[suc]:
808 811 newss = list(prefix)
809 812 for part in suffix:
810 813 # do not duplicated entry in successors set
811 814 # first entry wins.
812 815 if part not in newss:
813 816 newss.append(part)
814 817 productresult.append(newss)
815 818 markss = productresult
816 819 succssets.extend(markss)
817 820 # remove duplicated and subset
818 821 seen = []
819 822 final = []
820 823 candidate = sorted(((set(s), s) for s in succssets if s),
821 824 key=lambda x: len(x[1]), reverse=True)
822 825 for setversion, listversion in candidate:
823 826 for seenset in seen:
824 827 if setversion.issubset(seenset):
825 828 break
826 829 else:
827 830 final.append(listversion)
828 831 seen.append(setversion)
829 832 final.reverse() # put small successors set first
830 833 cache[current] = final
831 834 return cache[initialnode]
832 835
833 836 def _knownrevs(repo, nodes):
834 837 """yield revision numbers of known nodes passed in parameters
835 838
836 839 Unknown revisions are silently ignored."""
837 840 torev = repo.changelog.nodemap.get
838 841 for n in nodes:
839 842 rev = torev(n)
840 843 if rev is not None:
841 844 yield rev
842 845
843 846 # mapping of 'set-name' -> <function to compute this set>
844 847 cachefuncs = {}
845 848 def cachefor(name):
846 849 """Decorator to register a function as computing the cache for a set"""
847 850 def decorator(func):
848 851 assert name not in cachefuncs
849 852 cachefuncs[name] = func
850 853 return func
851 854 return decorator
852 855
853 856 def getrevs(repo, name):
854 857 """Return the set of revision that belong to the <name> set
855 858
856 859 Such access may compute the set and cache it for future use"""
857 860 repo = repo.unfiltered()
858 861 if not repo.obsstore:
859 862 return frozenset()
860 863 if name not in repo.obsstore.caches:
861 864 repo.obsstore.caches[name] = cachefuncs[name](repo)
862 865 return repo.obsstore.caches[name]
863 866
864 867 # To be simple we need to invalidate obsolescence cache when:
865 868 #
866 869 # - new changeset is added:
867 870 # - public phase is changed
868 871 # - obsolescence marker are added
869 872 # - strip is used a repo
870 873 def clearobscaches(repo):
871 874 """Remove all obsolescence related cache from a repo
872 875
873 876 This remove all cache in obsstore is the obsstore already exist on the
874 877 repo.
875 878
876 879 (We could be smarter here given the exact event that trigger the cache
877 880 clearing)"""
878 881 # only clear cache is there is obsstore data in this repo
879 882 if 'obsstore' in repo._filecache:
880 883 repo.obsstore.caches.clear()
881 884
882 885 @cachefor('obsolete')
883 886 def _computeobsoleteset(repo):
884 887 """the set of obsolete revisions"""
885 888 obs = set()
886 889 getrev = repo.changelog.nodemap.get
887 890 getphase = repo._phasecache.phase
888 891 for n in repo.obsstore.successors:
889 892 rev = getrev(n)
890 893 if rev is not None and getphase(repo, rev):
891 894 obs.add(rev)
892 895 return obs
893 896
894 897 @cachefor('unstable')
895 898 def _computeunstableset(repo):
896 899 """the set of non obsolete revisions with obsolete parents"""
897 900 # revset is not efficient enough here
898 901 # we do (obsolete()::) - obsolete() by hand
899 902 obs = getrevs(repo, 'obsolete')
900 903 if not obs:
901 904 return set()
902 905 cl = repo.changelog
903 906 return set(r for r in cl.descendants(obs) if r not in obs)
904 907
905 908 @cachefor('suspended')
906 909 def _computesuspendedset(repo):
907 910 """the set of obsolete parents with non obsolete descendants"""
908 911 suspended = repo.changelog.ancestors(getrevs(repo, 'unstable'))
909 912 return set(r for r in getrevs(repo, 'obsolete') if r in suspended)
910 913
911 914 @cachefor('extinct')
912 915 def _computeextinctset(repo):
913 916 """the set of obsolete parents without non obsolete descendants"""
914 917 return getrevs(repo, 'obsolete') - getrevs(repo, 'suspended')
915 918
916 919
917 920 @cachefor('bumped')
918 921 def _computebumpedset(repo):
919 922 """the set of revs trying to obsolete public revisions"""
920 923 bumped = set()
921 924 # util function (avoid attribute lookup in the loop)
922 925 phase = repo._phasecache.phase # would be faster to grab the full list
923 926 public = phases.public
924 927 cl = repo.changelog
925 928 torev = cl.nodemap.get
926 929 obs = getrevs(repo, 'obsolete')
927 930 for rev in repo:
928 931 # We only evaluate mutable, non-obsolete revision
929 932 if (public < phase(repo, rev)) and (rev not in obs):
930 933 node = cl.node(rev)
931 934 # (future) A cache of precursors may worth if split is very common
932 935 for pnode in allprecursors(repo.obsstore, [node],
933 936 ignoreflags=bumpedfix):
934 937 prev = torev(pnode) # unfiltered! but so is phasecache
935 938 if (prev is not None) and (phase(repo, prev) <= public):
936 939 # we have a public precursors
937 940 bumped.add(rev)
938 941 break # Next draft!
939 942 return bumped
940 943
941 944 @cachefor('divergent')
942 945 def _computedivergentset(repo):
943 946 """the set of rev that compete to be the final successors of some revision.
944 947 """
945 948 divergent = set()
946 949 obsstore = repo.obsstore
947 950 newermap = {}
948 951 for ctx in repo.set('(not public()) - obsolete()'):
949 952 mark = obsstore.precursors.get(ctx.node(), ())
950 953 toprocess = set(mark)
951 954 while toprocess:
952 955 prec = toprocess.pop()[0]
953 956 if prec not in newermap:
954 957 successorssets(repo, prec, newermap)
955 958 newer = [n for n in newermap[prec] if n]
956 959 if len(newer) > 1:
957 960 divergent.add(ctx.rev())
958 961 break
959 962 toprocess.update(obsstore.precursors.get(prec, ()))
960 963 return divergent
961 964
962 965
963 966 def createmarkers(repo, relations, flag=0, date=None, metadata=None):
964 967 """Add obsolete markers between changesets in a repo
965 968
966 969 <relations> must be an iterable of (<old>, (<new>, ...)[,{metadata}])
967 970 tuple. `old` and `news` are changectx. metadata is an optional dictionary
968 971 containing metadata for this marker only. It is merged with the global
969 972 metadata specified through the `metadata` argument of this function,
970 973
971 974 Trying to obsolete a public changeset will raise an exception.
972 975
973 976 Current user and date are used except if specified otherwise in the
974 977 metadata attribute.
975 978
976 979 This function operates within a transaction of its own, but does
977 980 not take any lock on the repo.
978 981 """
979 982 # prepare metadata
980 983 if metadata is None:
981 984 metadata = {}
982 985 if 'user' not in metadata:
983 986 metadata['user'] = repo.ui.username()
984 987 tr = repo.transaction('add-obsolescence-marker')
985 988 try:
986 989 for rel in relations:
987 990 prec = rel[0]
988 991 sucs = rel[1]
989 992 localmetadata = metadata.copy()
990 993 if 2 < len(rel):
991 994 localmetadata.update(rel[2])
992 995
993 996 if not prec.mutable():
994 997 raise util.Abort("cannot obsolete immutable changeset: %s"
995 998 % prec)
996 999 nprec = prec.node()
997 1000 nsucs = tuple(s.node() for s in sucs)
998 1001 npare = None
999 1002 if not nsucs:
1000 1003 npare = tuple(p.node() for p in prec.parents())
1001 1004 if nprec in nsucs:
1002 1005 raise util.Abort("changeset %s cannot obsolete itself" % prec)
1003 1006 repo.obsstore.create(tr, nprec, nsucs, flag, parents=npare,
1004 1007 date=date, metadata=localmetadata)
1005 1008 repo.filteredrevcache.clear()
1006 1009 tr.close()
1007 1010 finally:
1008 1011 tr.release()
General Comments 0
You need to be logged in to leave comments. Login now