##// END OF EJS Templates
revlog: improve documentation...
Gregory Szorc -
r27070:7860366b default
parent child Browse files
Show More
@@ -1,1684 +1,1737 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error as e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 20 bytes: parent 1 nodeid
104 104 # 20 bytes: parent 2 nodeid
105 105 # 20 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 versionformat = ">I"
153 153
154 154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 155 # signed integer)
156 156 _maxentrysize = 0x7fffffff
157 157
158 158 class revlogio(object):
159 159 def __init__(self):
160 160 self.size = struct.calcsize(indexformatng)
161 161
162 162 def parseindex(self, data, inline):
163 163 # call the C implementation to parse the index data
164 164 index, cache = parsers.parse_index2(data, inline)
165 165 return index, getattr(index, 'nodemap', None), cache
166 166
167 167 def packentry(self, entry, node, version, rev):
168 168 p = _pack(indexformatng, *entry)
169 169 if rev == 0:
170 170 p = _pack(versionformat, version) + p[4:]
171 171 return p
172 172
173 173 class revlog(object):
174 174 """
175 175 the underlying revision storage object
176 176
177 177 A revlog consists of two parts, an index and the revision data.
178 178
179 179 The index is a file with a fixed record size containing
180 180 information on each revision, including its nodeid (hash), the
181 181 nodeids of its parents, the position and offset of its data within
182 182 the data file, and the revision it's based on. Finally, each entry
183 183 contains a linkrev entry that can serve as a pointer to external
184 184 data.
185 185
186 186 The revision data itself is a linear collection of data chunks.
187 187 Each chunk represents a revision and is usually represented as a
188 188 delta against the previous chunk. To bound lookup time, runs of
189 189 deltas are limited to about 2 times the length of the original
190 190 version data. This makes retrieval of a version proportional to
191 191 its size, or O(1) relative to the number of revisions.
192 192
193 193 Both pieces of the revlog are written to in an append-only
194 194 fashion, which means we never need to rewrite a file to insert or
195 195 remove data, and can use some simple techniques to avoid the need
196 196 for locking while reading.
197 197 """
198 198 def __init__(self, opener, indexfile):
199 199 """
200 200 create a revlog object
201 201
202 202 opener is a function that abstracts the file opening operation
203 203 and can be used to implement COW semantics or the like.
204 204 """
205 205 self.indexfile = indexfile
206 206 self.datafile = indexfile[:-2] + ".d"
207 207 self.opener = opener
208 # 3-tuple of (node, rev, text) for a raw revision.
208 209 self._cache = None
210 # 2-tuple of (rev, baserev) defining the base revision the delta chain
211 # begins at for a revision.
209 212 self._basecache = None
213 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
210 214 self._chunkcache = (0, '')
215 # How much data to read and cache into the raw revlog data cache.
211 216 self._chunkcachesize = 65536
212 217 self._maxchainlen = None
213 218 self._aggressivemergedeltas = False
214 219 self.index = []
220 # Mapping of partial identifiers to full nodes.
215 221 self._pcache = {}
222 # Mapping of revision integer to full node.
216 223 self._nodecache = {nullid: nullrev}
217 224 self._nodepos = None
218 225
219 226 v = REVLOG_DEFAULT_VERSION
220 227 opts = getattr(opener, 'options', None)
221 228 if opts is not None:
222 229 if 'revlogv1' in opts:
223 230 if 'generaldelta' in opts:
224 231 v |= REVLOGGENERALDELTA
225 232 else:
226 233 v = 0
227 234 if 'chunkcachesize' in opts:
228 235 self._chunkcachesize = opts['chunkcachesize']
229 236 if 'maxchainlen' in opts:
230 237 self._maxchainlen = opts['maxchainlen']
231 238 if 'aggressivemergedeltas' in opts:
232 239 self._aggressivemergedeltas = opts['aggressivemergedeltas']
233 240 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
234 241
235 242 if self._chunkcachesize <= 0:
236 243 raise RevlogError(_('revlog chunk cache size %r is not greater '
237 244 'than 0') % self._chunkcachesize)
238 245 elif self._chunkcachesize & (self._chunkcachesize - 1):
239 246 raise RevlogError(_('revlog chunk cache size %r is not a power '
240 247 'of 2') % self._chunkcachesize)
241 248
242 249 indexdata = ''
243 250 self._initempty = True
244 251 try:
245 252 f = self.opener(self.indexfile)
246 253 indexdata = f.read()
247 254 f.close()
248 255 if len(indexdata) > 0:
249 256 v = struct.unpack(versionformat, indexdata[:4])[0]
250 257 self._initempty = False
251 258 except IOError as inst:
252 259 if inst.errno != errno.ENOENT:
253 260 raise
254 261
255 262 self.version = v
256 263 self._inline = v & REVLOGNGINLINEDATA
257 264 self._generaldelta = v & REVLOGGENERALDELTA
258 265 flags = v & ~0xFFFF
259 266 fmt = v & 0xFFFF
260 267 if fmt == REVLOGV0 and flags:
261 268 raise RevlogError(_("index %s unknown flags %#04x for format v0")
262 269 % (self.indexfile, flags >> 16))
263 270 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
264 271 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
265 272 % (self.indexfile, flags >> 16))
266 273 elif fmt > REVLOGNG:
267 274 raise RevlogError(_("index %s unknown format %d")
268 275 % (self.indexfile, fmt))
269 276
270 277 self._io = revlogio()
271 278 if self.version == REVLOGV0:
272 279 self._io = revlogoldio()
273 280 try:
274 281 d = self._io.parseindex(indexdata, self._inline)
275 282 except (ValueError, IndexError):
276 283 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
277 284 self.index, nodemap, self._chunkcache = d
278 285 if nodemap is not None:
279 286 self.nodemap = self._nodecache = nodemap
280 287 if not self._chunkcache:
281 288 self._chunkclear()
282 289 # revnum -> (chain-length, sum-delta-length)
283 290 self._chaininfocache = {}
284 291
285 292 def tip(self):
286 293 return self.node(len(self.index) - 2)
287 294 def __contains__(self, rev):
288 295 return 0 <= rev < len(self)
289 296 def __len__(self):
290 297 return len(self.index) - 1
291 298 def __iter__(self):
292 299 return iter(xrange(len(self)))
293 300 def revs(self, start=0, stop=None):
294 301 """iterate over all rev in this revlog (from start to stop)"""
295 302 step = 1
296 303 if stop is not None:
297 304 if start > stop:
298 305 step = -1
299 306 stop += step
300 307 else:
301 308 stop = len(self)
302 309 return xrange(start, stop, step)
303 310
304 311 @util.propertycache
305 312 def nodemap(self):
306 313 self.rev(self.node(0))
307 314 return self._nodecache
308 315
309 316 def hasnode(self, node):
310 317 try:
311 318 self.rev(node)
312 319 return True
313 320 except KeyError:
314 321 return False
315 322
316 323 def clearcaches(self):
317 324 try:
318 325 self._nodecache.clearcaches()
319 326 except AttributeError:
320 327 self._nodecache = {nullid: nullrev}
321 328 self._nodepos = None
322 329
323 330 def rev(self, node):
324 331 try:
325 332 return self._nodecache[node]
326 333 except TypeError:
327 334 raise
328 335 except RevlogError:
329 336 # parsers.c radix tree lookup failed
330 337 raise LookupError(node, self.indexfile, _('no node'))
331 338 except KeyError:
332 339 # pure python cache lookup failed
333 340 n = self._nodecache
334 341 i = self.index
335 342 p = self._nodepos
336 343 if p is None:
337 344 p = len(i) - 2
338 345 for r in xrange(p, -1, -1):
339 346 v = i[r][7]
340 347 n[v] = r
341 348 if v == node:
342 349 self._nodepos = r - 1
343 350 return r
344 351 raise LookupError(node, self.indexfile, _('no node'))
345 352
346 353 def node(self, rev):
347 354 return self.index[rev][7]
348 355 def linkrev(self, rev):
349 356 return self.index[rev][4]
350 357 def parents(self, node):
351 358 i = self.index
352 359 d = i[self.rev(node)]
353 360 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
354 361 def parentrevs(self, rev):
355 362 return self.index[rev][5:7]
356 363 def start(self, rev):
357 364 return int(self.index[rev][0] >> 16)
358 365 def end(self, rev):
359 366 return self.start(rev) + self.length(rev)
360 367 def length(self, rev):
361 368 return self.index[rev][1]
362 369 def chainbase(self, rev):
363 370 index = self.index
364 371 base = index[rev][3]
365 372 while base != rev:
366 373 rev = base
367 374 base = index[rev][3]
368 375 return base
369 376 def chainlen(self, rev):
370 377 return self._chaininfo(rev)[0]
371 378
372 379 def _chaininfo(self, rev):
373 380 chaininfocache = self._chaininfocache
374 381 if rev in chaininfocache:
375 382 return chaininfocache[rev]
376 383 index = self.index
377 384 generaldelta = self._generaldelta
378 385 iterrev = rev
379 386 e = index[iterrev]
380 387 clen = 0
381 388 compresseddeltalen = 0
382 389 while iterrev != e[3]:
383 390 clen += 1
384 391 compresseddeltalen += e[1]
385 392 if generaldelta:
386 393 iterrev = e[3]
387 394 else:
388 395 iterrev -= 1
389 396 if iterrev in chaininfocache:
390 397 t = chaininfocache[iterrev]
391 398 clen += t[0]
392 399 compresseddeltalen += t[1]
393 400 break
394 401 e = index[iterrev]
395 402 else:
396 403 # Add text length of base since decompressing that also takes
397 404 # work. For cache hits the length is already included.
398 405 compresseddeltalen += e[1]
399 406 r = (clen, compresseddeltalen)
400 407 chaininfocache[rev] = r
401 408 return r
402 409
403 410 def flags(self, rev):
404 411 return self.index[rev][0] & 0xFFFF
405 412 def rawsize(self, rev):
406 413 """return the length of the uncompressed text for a given revision"""
407 414 l = self.index[rev][2]
408 415 if l >= 0:
409 416 return l
410 417
411 418 t = self.revision(self.node(rev))
412 419 return len(t)
413 420 size = rawsize
414 421
415 422 def ancestors(self, revs, stoprev=0, inclusive=False):
416 423 """Generate the ancestors of 'revs' in reverse topological order.
417 424 Does not generate revs lower than stoprev.
418 425
419 426 See the documentation for ancestor.lazyancestors for more details."""
420 427
421 428 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
422 429 inclusive=inclusive)
423 430
424 431 def descendants(self, revs):
425 432 """Generate the descendants of 'revs' in revision order.
426 433
427 434 Yield a sequence of revision numbers starting with a child of
428 435 some rev in revs, i.e., each revision is *not* considered a
429 436 descendant of itself. Results are ordered by revision number (a
430 437 topological sort)."""
431 438 first = min(revs)
432 439 if first == nullrev:
433 440 for i in self:
434 441 yield i
435 442 return
436 443
437 444 seen = set(revs)
438 445 for i in self.revs(start=first + 1):
439 446 for x in self.parentrevs(i):
440 447 if x != nullrev and x in seen:
441 448 seen.add(i)
442 449 yield i
443 450 break
444 451
445 452 def findcommonmissing(self, common=None, heads=None):
446 453 """Return a tuple of the ancestors of common and the ancestors of heads
447 454 that are not ancestors of common. In revset terminology, we return the
448 455 tuple:
449 456
450 457 ::common, (::heads) - (::common)
451 458
452 459 The list is sorted by revision number, meaning it is
453 460 topologically sorted.
454 461
455 462 'heads' and 'common' are both lists of node IDs. If heads is
456 463 not supplied, uses all of the revlog's heads. If common is not
457 464 supplied, uses nullid."""
458 465 if common is None:
459 466 common = [nullid]
460 467 if heads is None:
461 468 heads = self.heads()
462 469
463 470 common = [self.rev(n) for n in common]
464 471 heads = [self.rev(n) for n in heads]
465 472
466 473 # we want the ancestors, but inclusive
467 474 class lazyset(object):
468 475 def __init__(self, lazyvalues):
469 476 self.addedvalues = set()
470 477 self.lazyvalues = lazyvalues
471 478
472 479 def __contains__(self, value):
473 480 return value in self.addedvalues or value in self.lazyvalues
474 481
475 482 def __iter__(self):
476 483 added = self.addedvalues
477 484 for r in added:
478 485 yield r
479 486 for r in self.lazyvalues:
480 487 if not r in added:
481 488 yield r
482 489
483 490 def add(self, value):
484 491 self.addedvalues.add(value)
485 492
486 493 def update(self, values):
487 494 self.addedvalues.update(values)
488 495
489 496 has = lazyset(self.ancestors(common))
490 497 has.add(nullrev)
491 498 has.update(common)
492 499
493 500 # take all ancestors from heads that aren't in has
494 501 missing = set()
495 502 visit = collections.deque(r for r in heads if r not in has)
496 503 while visit:
497 504 r = visit.popleft()
498 505 if r in missing:
499 506 continue
500 507 else:
501 508 missing.add(r)
502 509 for p in self.parentrevs(r):
503 510 if p not in has:
504 511 visit.append(p)
505 512 missing = list(missing)
506 513 missing.sort()
507 514 return has, [self.node(r) for r in missing]
508 515
509 516 def incrementalmissingrevs(self, common=None):
510 517 """Return an object that can be used to incrementally compute the
511 518 revision numbers of the ancestors of arbitrary sets that are not
512 519 ancestors of common. This is an ancestor.incrementalmissingancestors
513 520 object.
514 521
515 522 'common' is a list of revision numbers. If common is not supplied, uses
516 523 nullrev.
517 524 """
518 525 if common is None:
519 526 common = [nullrev]
520 527
521 528 return ancestor.incrementalmissingancestors(self.parentrevs, common)
522 529
523 530 def findmissingrevs(self, common=None, heads=None):
524 531 """Return the revision numbers of the ancestors of heads that
525 532 are not ancestors of common.
526 533
527 534 More specifically, return a list of revision numbers corresponding to
528 535 nodes N such that every N satisfies the following constraints:
529 536
530 537 1. N is an ancestor of some node in 'heads'
531 538 2. N is not an ancestor of any node in 'common'
532 539
533 540 The list is sorted by revision number, meaning it is
534 541 topologically sorted.
535 542
536 543 'heads' and 'common' are both lists of revision numbers. If heads is
537 544 not supplied, uses all of the revlog's heads. If common is not
538 545 supplied, uses nullid."""
539 546 if common is None:
540 547 common = [nullrev]
541 548 if heads is None:
542 549 heads = self.headrevs()
543 550
544 551 inc = self.incrementalmissingrevs(common=common)
545 552 return inc.missingancestors(heads)
546 553
547 554 def findmissing(self, common=None, heads=None):
548 555 """Return the ancestors of heads that are not ancestors of common.
549 556
550 557 More specifically, return a list of nodes N such that every N
551 558 satisfies the following constraints:
552 559
553 560 1. N is an ancestor of some node in 'heads'
554 561 2. N is not an ancestor of any node in 'common'
555 562
556 563 The list is sorted by revision number, meaning it is
557 564 topologically sorted.
558 565
559 566 'heads' and 'common' are both lists of node IDs. If heads is
560 567 not supplied, uses all of the revlog's heads. If common is not
561 568 supplied, uses nullid."""
562 569 if common is None:
563 570 common = [nullid]
564 571 if heads is None:
565 572 heads = self.heads()
566 573
567 574 common = [self.rev(n) for n in common]
568 575 heads = [self.rev(n) for n in heads]
569 576
570 577 inc = self.incrementalmissingrevs(common=common)
571 578 return [self.node(r) for r in inc.missingancestors(heads)]
572 579
573 580 def nodesbetween(self, roots=None, heads=None):
574 581 """Return a topological path from 'roots' to 'heads'.
575 582
576 583 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
577 584 topologically sorted list of all nodes N that satisfy both of
578 585 these constraints:
579 586
580 587 1. N is a descendant of some node in 'roots'
581 588 2. N is an ancestor of some node in 'heads'
582 589
583 590 Every node is considered to be both a descendant and an ancestor
584 591 of itself, so every reachable node in 'roots' and 'heads' will be
585 592 included in 'nodes'.
586 593
587 594 'outroots' is the list of reachable nodes in 'roots', i.e., the
588 595 subset of 'roots' that is returned in 'nodes'. Likewise,
589 596 'outheads' is the subset of 'heads' that is also in 'nodes'.
590 597
591 598 'roots' and 'heads' are both lists of node IDs. If 'roots' is
592 599 unspecified, uses nullid as the only root. If 'heads' is
593 600 unspecified, uses list of all of the revlog's heads."""
594 601 nonodes = ([], [], [])
595 602 if roots is not None:
596 603 roots = list(roots)
597 604 if not roots:
598 605 return nonodes
599 606 lowestrev = min([self.rev(n) for n in roots])
600 607 else:
601 608 roots = [nullid] # Everybody's a descendant of nullid
602 609 lowestrev = nullrev
603 610 if (lowestrev == nullrev) and (heads is None):
604 611 # We want _all_ the nodes!
605 612 return ([self.node(r) for r in self], [nullid], list(self.heads()))
606 613 if heads is None:
607 614 # All nodes are ancestors, so the latest ancestor is the last
608 615 # node.
609 616 highestrev = len(self) - 1
610 617 # Set ancestors to None to signal that every node is an ancestor.
611 618 ancestors = None
612 619 # Set heads to an empty dictionary for later discovery of heads
613 620 heads = {}
614 621 else:
615 622 heads = list(heads)
616 623 if not heads:
617 624 return nonodes
618 625 ancestors = set()
619 626 # Turn heads into a dictionary so we can remove 'fake' heads.
620 627 # Also, later we will be using it to filter out the heads we can't
621 628 # find from roots.
622 629 heads = dict.fromkeys(heads, False)
623 630 # Start at the top and keep marking parents until we're done.
624 631 nodestotag = set(heads)
625 632 # Remember where the top was so we can use it as a limit later.
626 633 highestrev = max([self.rev(n) for n in nodestotag])
627 634 while nodestotag:
628 635 # grab a node to tag
629 636 n = nodestotag.pop()
630 637 # Never tag nullid
631 638 if n == nullid:
632 639 continue
633 640 # A node's revision number represents its place in a
634 641 # topologically sorted list of nodes.
635 642 r = self.rev(n)
636 643 if r >= lowestrev:
637 644 if n not in ancestors:
638 645 # If we are possibly a descendant of one of the roots
639 646 # and we haven't already been marked as an ancestor
640 647 ancestors.add(n) # Mark as ancestor
641 648 # Add non-nullid parents to list of nodes to tag.
642 649 nodestotag.update([p for p in self.parents(n) if
643 650 p != nullid])
644 651 elif n in heads: # We've seen it before, is it a fake head?
645 652 # So it is, real heads should not be the ancestors of
646 653 # any other heads.
647 654 heads.pop(n)
648 655 if not ancestors:
649 656 return nonodes
650 657 # Now that we have our set of ancestors, we want to remove any
651 658 # roots that are not ancestors.
652 659
653 660 # If one of the roots was nullid, everything is included anyway.
654 661 if lowestrev > nullrev:
655 662 # But, since we weren't, let's recompute the lowest rev to not
656 663 # include roots that aren't ancestors.
657 664
658 665 # Filter out roots that aren't ancestors of heads
659 666 roots = [n for n in roots if n in ancestors]
660 667 # Recompute the lowest revision
661 668 if roots:
662 669 lowestrev = min([self.rev(n) for n in roots])
663 670 else:
664 671 # No more roots? Return empty list
665 672 return nonodes
666 673 else:
667 674 # We are descending from nullid, and don't need to care about
668 675 # any other roots.
669 676 lowestrev = nullrev
670 677 roots = [nullid]
671 678 # Transform our roots list into a set.
672 679 descendants = set(roots)
673 680 # Also, keep the original roots so we can filter out roots that aren't
674 681 # 'real' roots (i.e. are descended from other roots).
675 682 roots = descendants.copy()
676 683 # Our topologically sorted list of output nodes.
677 684 orderedout = []
678 685 # Don't start at nullid since we don't want nullid in our output list,
679 686 # and if nullid shows up in descendants, empty parents will look like
680 687 # they're descendants.
681 688 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
682 689 n = self.node(r)
683 690 isdescendant = False
684 691 if lowestrev == nullrev: # Everybody is a descendant of nullid
685 692 isdescendant = True
686 693 elif n in descendants:
687 694 # n is already a descendant
688 695 isdescendant = True
689 696 # This check only needs to be done here because all the roots
690 697 # will start being marked is descendants before the loop.
691 698 if n in roots:
692 699 # If n was a root, check if it's a 'real' root.
693 700 p = tuple(self.parents(n))
694 701 # If any of its parents are descendants, it's not a root.
695 702 if (p[0] in descendants) or (p[1] in descendants):
696 703 roots.remove(n)
697 704 else:
698 705 p = tuple(self.parents(n))
699 706 # A node is a descendant if either of its parents are
700 707 # descendants. (We seeded the dependents list with the roots
701 708 # up there, remember?)
702 709 if (p[0] in descendants) or (p[1] in descendants):
703 710 descendants.add(n)
704 711 isdescendant = True
705 712 if isdescendant and ((ancestors is None) or (n in ancestors)):
706 713 # Only include nodes that are both descendants and ancestors.
707 714 orderedout.append(n)
708 715 if (ancestors is not None) and (n in heads):
709 716 # We're trying to figure out which heads are reachable
710 717 # from roots.
711 718 # Mark this head as having been reached
712 719 heads[n] = True
713 720 elif ancestors is None:
714 721 # Otherwise, we're trying to discover the heads.
715 722 # Assume this is a head because if it isn't, the next step
716 723 # will eventually remove it.
717 724 heads[n] = True
718 725 # But, obviously its parents aren't.
719 726 for p in self.parents(n):
720 727 heads.pop(p, None)
721 728 heads = [n for n, flag in heads.iteritems() if flag]
722 729 roots = list(roots)
723 730 assert orderedout
724 731 assert roots
725 732 assert heads
726 733 return (orderedout, roots, heads)
727 734
728 735 def headrevs(self):
729 736 try:
730 737 return self.index.headrevs()
731 738 except AttributeError:
732 739 return self._headrevs()
733 740
734 741 def computephases(self, roots):
735 742 return self.index.computephasesmapsets(roots)
736 743
737 744 def _headrevs(self):
738 745 count = len(self)
739 746 if not count:
740 747 return [nullrev]
741 748 # we won't iter over filtered rev so nobody is a head at start
742 749 ishead = [0] * (count + 1)
743 750 index = self.index
744 751 for r in self:
745 752 ishead[r] = 1 # I may be an head
746 753 e = index[r]
747 754 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
748 755 return [r for r, val in enumerate(ishead) if val]
749 756
750 757 def heads(self, start=None, stop=None):
751 758 """return the list of all nodes that have no children
752 759
753 760 if start is specified, only heads that are descendants of
754 761 start will be returned
755 762 if stop is specified, it will consider all the revs from stop
756 763 as if they had no children
757 764 """
758 765 if start is None and stop is None:
759 766 if not len(self):
760 767 return [nullid]
761 768 return [self.node(r) for r in self.headrevs()]
762 769
763 770 if start is None:
764 771 start = nullid
765 772 if stop is None:
766 773 stop = []
767 774 stoprevs = set([self.rev(n) for n in stop])
768 775 startrev = self.rev(start)
769 776 reachable = set((startrev,))
770 777 heads = set((startrev,))
771 778
772 779 parentrevs = self.parentrevs
773 780 for r in self.revs(start=startrev + 1):
774 781 for p in parentrevs(r):
775 782 if p in reachable:
776 783 if r not in stoprevs:
777 784 reachable.add(r)
778 785 heads.add(r)
779 786 if p in heads and p not in stoprevs:
780 787 heads.remove(p)
781 788
782 789 return [self.node(r) for r in heads]
783 790
784 791 def children(self, node):
785 792 """find the children of a given node"""
786 793 c = []
787 794 p = self.rev(node)
788 795 for r in self.revs(start=p + 1):
789 796 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
790 797 if prevs:
791 798 for pr in prevs:
792 799 if pr == p:
793 800 c.append(self.node(r))
794 801 elif p == nullrev:
795 802 c.append(self.node(r))
796 803 return c
797 804
798 805 def descendant(self, start, end):
799 806 if start == nullrev:
800 807 return True
801 808 for i in self.descendants([start]):
802 809 if i == end:
803 810 return True
804 811 elif i > end:
805 812 break
806 813 return False
807 814
808 815 def commonancestorsheads(self, a, b):
809 816 """calculate all the heads of the common ancestors of nodes a and b"""
810 817 a, b = self.rev(a), self.rev(b)
811 818 try:
812 819 ancs = self.index.commonancestorsheads(a, b)
813 820 except (AttributeError, OverflowError): # C implementation failed
814 821 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
815 822 return map(self.node, ancs)
816 823
817 824 def isancestor(self, a, b):
818 825 """return True if node a is an ancestor of node b
819 826
820 827 The implementation of this is trivial but the use of
821 828 commonancestorsheads is not."""
822 829 return a in self.commonancestorsheads(a, b)
823 830
824 831 def ancestor(self, a, b):
825 832 """calculate the "best" common ancestor of nodes a and b"""
826 833
827 834 a, b = self.rev(a), self.rev(b)
828 835 try:
829 836 ancs = self.index.ancestors(a, b)
830 837 except (AttributeError, OverflowError):
831 838 ancs = ancestor.ancestors(self.parentrevs, a, b)
832 839 if ancs:
833 840 # choose a consistent winner when there's a tie
834 841 return min(map(self.node, ancs))
835 842 return nullid
836 843
837 844 def _match(self, id):
838 845 if isinstance(id, int):
839 846 # rev
840 847 return self.node(id)
841 848 if len(id) == 20:
842 849 # possibly a binary node
843 850 # odds of a binary node being all hex in ASCII are 1 in 10**25
844 851 try:
845 852 node = id
846 853 self.rev(node) # quick search the index
847 854 return node
848 855 except LookupError:
849 856 pass # may be partial hex id
850 857 try:
851 858 # str(rev)
852 859 rev = int(id)
853 860 if str(rev) != id:
854 861 raise ValueError
855 862 if rev < 0:
856 863 rev = len(self) + rev
857 864 if rev < 0 or rev >= len(self):
858 865 raise ValueError
859 866 return self.node(rev)
860 867 except (ValueError, OverflowError):
861 868 pass
862 869 if len(id) == 40:
863 870 try:
864 871 # a full hex nodeid?
865 872 node = bin(id)
866 873 self.rev(node)
867 874 return node
868 875 except (TypeError, LookupError):
869 876 pass
870 877
871 878 def _partialmatch(self, id):
872 879 try:
873 880 n = self.index.partialmatch(id)
874 881 if n and self.hasnode(n):
875 882 return n
876 883 return None
877 884 except RevlogError:
878 885 # parsers.c radix tree lookup gave multiple matches
879 886 # fall through to slow path that filters hidden revisions
880 887 pass
881 888 except (AttributeError, ValueError):
882 889 # we are pure python, or key was too short to search radix tree
883 890 pass
884 891
885 892 if id in self._pcache:
886 893 return self._pcache[id]
887 894
888 895 if len(id) < 40:
889 896 try:
890 897 # hex(node)[:...]
891 898 l = len(id) // 2 # grab an even number of digits
892 899 prefix = bin(id[:l * 2])
893 900 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
894 901 nl = [n for n in nl if hex(n).startswith(id) and
895 902 self.hasnode(n)]
896 903 if len(nl) > 0:
897 904 if len(nl) == 1:
898 905 self._pcache[id] = nl[0]
899 906 return nl[0]
900 907 raise LookupError(id, self.indexfile,
901 908 _('ambiguous identifier'))
902 909 return None
903 910 except TypeError:
904 911 pass
905 912
906 913 def lookup(self, id):
907 914 """locate a node based on:
908 915 - revision number or str(revision number)
909 916 - nodeid or subset of hex nodeid
910 917 """
911 918 n = self._match(id)
912 919 if n is not None:
913 920 return n
914 921 n = self._partialmatch(id)
915 922 if n:
916 923 return n
917 924
918 925 raise LookupError(id, self.indexfile, _('no match found'))
919 926
920 927 def cmp(self, node, text):
921 928 """compare text with a given file revision
922 929
923 930 returns True if text is different than what is stored.
924 931 """
925 932 p1, p2 = self.parents(node)
926 933 return hash(text, p1, p2) != node
927 934
928 935 def _addchunk(self, offset, data):
936 """Add a segment to the revlog cache.
937
938 Accepts an absolute offset and the data that is at that location.
939 """
929 940 o, d = self._chunkcache
930 941 # try to add to existing cache
931 942 if o + len(d) == offset and len(d) + len(data) < _chunksize:
932 943 self._chunkcache = o, d + data
933 944 else:
934 945 self._chunkcache = offset, data
935 946
936 947 def _loadchunk(self, offset, length, df=None):
937 """Load a chunk/segment from the revlog.
948 """Load a segment of raw data from the revlog.
938 949
939 Accepts absolute offset, length to read, and an optional existing
950 Accepts an absolute offset, length to read, and an optional existing
940 951 file handle to read from.
941 952
942 953 If an existing file handle is passed, it will be seeked and the
943 954 original seek position will NOT be restored.
955
956 Returns a str or buffer of raw byte data.
944 957 """
945 958 if df is not None:
946 959 closehandle = False
947 960 else:
948 961 if self._inline:
949 962 df = self.opener(self.indexfile)
950 963 else:
951 964 df = self.opener(self.datafile)
952 965 closehandle = True
953 966
954 967 # Cache data both forward and backward around the requested
955 968 # data, in a fixed size window. This helps speed up operations
956 969 # involving reading the revlog backwards.
957 970 cachesize = self._chunkcachesize
958 971 realoffset = offset & ~(cachesize - 1)
959 972 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
960 973 - realoffset)
961 974 df.seek(realoffset)
962 975 d = df.read(reallength)
963 976 if closehandle:
964 977 df.close()
965 978 self._addchunk(realoffset, d)
966 979 if offset != realoffset or reallength != length:
967 980 return util.buffer(d, offset - realoffset, length)
968 981 return d
969 982
970 983 def _getchunk(self, offset, length, df=None):
984 """Obtain a segment of raw data from the revlog.
985
986 Accepts an absolute offset, length of bytes to obtain, and an
987 optional file handle to the already-opened revlog. If the file
988 handle is used, it's original seek position will not be preserved.
989
990 Requests for data may be returned from a cache.
991
992 Returns a str or a buffer instance of raw byte data.
993 """
971 994 o, d = self._chunkcache
972 995 l = len(d)
973 996
974 997 # is it in the cache?
975 998 cachestart = offset - o
976 999 cacheend = cachestart + length
977 1000 if cachestart >= 0 and cacheend <= l:
978 1001 if cachestart == 0 and cacheend == l:
979 1002 return d # avoid a copy
980 1003 return util.buffer(d, cachestart, cacheend - cachestart)
981 1004
982 1005 return self._loadchunk(offset, length, df=df)
983 1006
984 1007 def _chunkraw(self, startrev, endrev, df=None):
1008 """Obtain a segment of raw data corresponding to a range of revisions.
1009
1010 Accepts the start and end revisions and an optional already-open
1011 file handle to be used for reading. If the file handle is read, its
1012 seek position will not be preserved.
1013
1014 Requests for data may be satisfied by a cache.
1015
1016 Returns a str or a buffer instance of raw byte data. Callers will
1017 need to call ``self.start(rev)`` and ``self.length()`` to determine
1018 where each revision's data begins and ends.
1019 """
985 1020 start = self.start(startrev)
986 1021 end = self.end(endrev)
987 1022 if self._inline:
988 1023 start += (startrev + 1) * self._io.size
989 1024 end += (endrev + 1) * self._io.size
990 1025 length = end - start
991 1026 return self._getchunk(start, length, df=df)
992 1027
993 1028 def _chunk(self, rev, df=None):
1029 """Obtain a single decompressed chunk for a revision.
1030
1031 Accepts an integer revision and an optional already-open file handle
1032 to be used for reading. If used, the seek position of the file will not
1033 be preserved.
1034
1035 Returns a str holding uncompressed data for the requested revision.
1036 """
994 1037 return decompress(self._chunkraw(rev, rev, df=df))
995 1038
996 1039 def _chunks(self, revs, df=None):
997 '''faster version of [self._chunk(rev) for rev in revs]
1040 """Obtain decompressed chunks for the specified revisions.
998 1041
999 Assumes that revs is in ascending order.'''
1042 Accepts an iterable of numeric revisions that are assumed to be in
1043 ascending order. Also accepts an optional already-open file handle
1044 to be used for reading. If used, the seek position of the file will
1045 not be preserved.
1046
1047 This function is similar to calling ``self._chunk()`` multiple times,
1048 but is faster.
1049
1050 Returns a list with decompressed data for each requested revision.
1051 """
1000 1052 if not revs:
1001 1053 return []
1002 1054 start = self.start
1003 1055 length = self.length
1004 1056 inline = self._inline
1005 1057 iosize = self._io.size
1006 1058 buffer = util.buffer
1007 1059
1008 1060 l = []
1009 1061 ladd = l.append
1010 1062
1011 1063 # preload the cache
1012 1064 try:
1013 1065 while True:
1014 1066 # ensure that the cache doesn't change out from under us
1015 1067 _cache = self._chunkcache
1016 1068 self._chunkraw(revs[0], revs[-1], df=df)
1017 1069 if _cache == self._chunkcache:
1018 1070 break
1019 1071 offset, data = _cache
1020 1072 except OverflowError:
1021 1073 # issue4215 - we can't cache a run of chunks greater than
1022 1074 # 2G on Windows
1023 1075 return [self._chunk(rev, df=df) for rev in revs]
1024 1076
1025 1077 for rev in revs:
1026 1078 chunkstart = start(rev)
1027 1079 if inline:
1028 1080 chunkstart += (rev + 1) * iosize
1029 1081 chunklength = length(rev)
1030 1082 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1031 1083
1032 1084 return l
1033 1085
1034 1086 def _chunkclear(self):
1087 """Clear the raw chunk cache."""
1035 1088 self._chunkcache = (0, '')
1036 1089
1037 1090 def deltaparent(self, rev):
1038 1091 """return deltaparent of the given revision"""
1039 1092 base = self.index[rev][3]
1040 1093 if base == rev:
1041 1094 return nullrev
1042 1095 elif self._generaldelta:
1043 1096 return base
1044 1097 else:
1045 1098 return rev - 1
1046 1099
1047 1100 def revdiff(self, rev1, rev2):
1048 1101 """return or calculate a delta between two revisions"""
1049 1102 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1050 1103 return str(self._chunk(rev2))
1051 1104
1052 1105 return mdiff.textdiff(self.revision(rev1),
1053 1106 self.revision(rev2))
1054 1107
1055 1108 def revision(self, nodeorrev, _df=None):
1056 1109 """return an uncompressed revision of a given node or revision
1057 1110 number.
1058 1111
1059 1112 _df is an existing file handle to read from. It is meant to only be
1060 1113 used internally.
1061 1114 """
1062 1115 if isinstance(nodeorrev, int):
1063 1116 rev = nodeorrev
1064 1117 node = self.node(rev)
1065 1118 else:
1066 1119 node = nodeorrev
1067 1120 rev = None
1068 1121
1069 1122 cachedrev = None
1070 1123 if node == nullid:
1071 1124 return ""
1072 1125 if self._cache:
1073 1126 if self._cache[0] == node:
1074 1127 return self._cache[2]
1075 1128 cachedrev = self._cache[1]
1076 1129
1077 1130 # look up what we need to read
1078 1131 text = None
1079 1132 if rev is None:
1080 1133 rev = self.rev(node)
1081 1134
1082 1135 # check rev flags
1083 1136 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1084 1137 raise RevlogError(_('incompatible revision flag %x') %
1085 1138 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1086 1139
1087 1140 # build delta chain
1088 1141 chain = []
1089 1142 index = self.index # for performance
1090 1143 generaldelta = self._generaldelta
1091 1144 iterrev = rev
1092 1145 e = index[iterrev]
1093 1146 while iterrev != e[3] and iterrev != cachedrev:
1094 1147 chain.append(iterrev)
1095 1148 if generaldelta:
1096 1149 iterrev = e[3]
1097 1150 else:
1098 1151 iterrev -= 1
1099 1152 e = index[iterrev]
1100 1153
1101 1154 if iterrev == cachedrev:
1102 1155 # cache hit
1103 1156 text = self._cache[2]
1104 1157 else:
1105 1158 chain.append(iterrev)
1106 1159 chain.reverse()
1107 1160
1108 1161 # drop cache to save memory
1109 1162 self._cache = None
1110 1163
1111 1164 bins = self._chunks(chain, df=_df)
1112 1165 if text is None:
1113 1166 text = str(bins[0])
1114 1167 bins = bins[1:]
1115 1168
1116 1169 text = mdiff.patches(text, bins)
1117 1170
1118 1171 text = self._checkhash(text, node, rev)
1119 1172
1120 1173 self._cache = (node, rev, text)
1121 1174 return text
1122 1175
1123 1176 def hash(self, text, p1, p2):
1124 1177 """Compute a node hash.
1125 1178
1126 1179 Available as a function so that subclasses can replace the hash
1127 1180 as needed.
1128 1181 """
1129 1182 return hash(text, p1, p2)
1130 1183
1131 1184 def _checkhash(self, text, node, rev):
1132 1185 p1, p2 = self.parents(node)
1133 1186 self.checkhash(text, p1, p2, node, rev)
1134 1187 return text
1135 1188
1136 1189 def checkhash(self, text, p1, p2, node, rev=None):
1137 1190 if node != self.hash(text, p1, p2):
1138 1191 revornode = rev
1139 1192 if revornode is None:
1140 1193 revornode = templatefilters.short(hex(node))
1141 1194 raise RevlogError(_("integrity check failed on %s:%s")
1142 1195 % (self.indexfile, revornode))
1143 1196
1144 1197 def checkinlinesize(self, tr, fp=None):
1145 1198 """Check if the revlog is too big for inline and convert if so.
1146 1199
1147 1200 This should be called after revisions are added to the revlog. If the
1148 1201 revlog has grown too large to be an inline revlog, it will convert it
1149 1202 to use multiple index and data files.
1150 1203 """
1151 1204 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1152 1205 return
1153 1206
1154 1207 trinfo = tr.find(self.indexfile)
1155 1208 if trinfo is None:
1156 1209 raise RevlogError(_("%s not found in the transaction")
1157 1210 % self.indexfile)
1158 1211
1159 1212 trindex = trinfo[2]
1160 1213 if trindex is not None:
1161 1214 dataoff = self.start(trindex)
1162 1215 else:
1163 1216 # revlog was stripped at start of transaction, use all leftover data
1164 1217 trindex = len(self) - 1
1165 1218 dataoff = self.end(-2)
1166 1219
1167 1220 tr.add(self.datafile, dataoff)
1168 1221
1169 1222 if fp:
1170 1223 fp.flush()
1171 1224 fp.close()
1172 1225
1173 1226 df = self.opener(self.datafile, 'w')
1174 1227 try:
1175 1228 for r in self:
1176 1229 df.write(self._chunkraw(r, r))
1177 1230 finally:
1178 1231 df.close()
1179 1232
1180 1233 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1181 1234 self.version &= ~(REVLOGNGINLINEDATA)
1182 1235 self._inline = False
1183 1236 for i in self:
1184 1237 e = self._io.packentry(self.index[i], self.node, self.version, i)
1185 1238 fp.write(e)
1186 1239
1187 1240 # if we don't call close, the temp file will never replace the
1188 1241 # real index
1189 1242 fp.close()
1190 1243
1191 1244 tr.replace(self.indexfile, trindex * self._io.size)
1192 1245 self._chunkclear()
1193 1246
1194 1247 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1195 1248 node=None):
1196 1249 """add a revision to the log
1197 1250
1198 1251 text - the revision data to add
1199 1252 transaction - the transaction object used for rollback
1200 1253 link - the linkrev data to add
1201 1254 p1, p2 - the parent nodeids of the revision
1202 1255 cachedelta - an optional precomputed delta
1203 1256 node - nodeid of revision; typically node is not specified, and it is
1204 1257 computed by default as hash(text, p1, p2), however subclasses might
1205 1258 use different hashing method (and override checkhash() in such case)
1206 1259 """
1207 1260 if link == nullrev:
1208 1261 raise RevlogError(_("attempted to add linkrev -1 to %s")
1209 1262 % self.indexfile)
1210 1263
1211 1264 if len(text) > _maxentrysize:
1212 1265 raise RevlogError(
1213 1266 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1214 1267 % (self.indexfile, len(text)))
1215 1268
1216 1269 node = node or self.hash(text, p1, p2)
1217 1270 if node in self.nodemap:
1218 1271 return node
1219 1272
1220 1273 dfh = None
1221 1274 if not self._inline:
1222 1275 dfh = self.opener(self.datafile, "a+")
1223 1276 ifh = self.opener(self.indexfile, "a+")
1224 1277 try:
1225 1278 return self._addrevision(node, text, transaction, link, p1, p2,
1226 1279 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1227 1280 finally:
1228 1281 if dfh:
1229 1282 dfh.close()
1230 1283 ifh.close()
1231 1284
1232 1285 def compress(self, text):
1233 1286 """ generate a possibly-compressed representation of text """
1234 1287 if not text:
1235 1288 return ("", text)
1236 1289 l = len(text)
1237 1290 bin = None
1238 1291 if l < 44:
1239 1292 pass
1240 1293 elif l > 1000000:
1241 1294 # zlib makes an internal copy, thus doubling memory usage for
1242 1295 # large files, so lets do this in pieces
1243 1296 z = zlib.compressobj()
1244 1297 p = []
1245 1298 pos = 0
1246 1299 while pos < l:
1247 1300 pos2 = pos + 2**20
1248 1301 p.append(z.compress(text[pos:pos2]))
1249 1302 pos = pos2
1250 1303 p.append(z.flush())
1251 1304 if sum(map(len, p)) < l:
1252 1305 bin = "".join(p)
1253 1306 else:
1254 1307 bin = _compress(text)
1255 1308 if bin is None or len(bin) > l:
1256 1309 if text[0] == '\0':
1257 1310 return ("", text)
1258 1311 return ('u', text)
1259 1312 return ("", bin)
1260 1313
1261 1314 def _isgooddelta(self, d, textlen):
1262 1315 """Returns True if the given delta is good. Good means that it is within
1263 1316 the disk span, disk size, and chain length bounds that we know to be
1264 1317 performant."""
1265 1318 if d is None:
1266 1319 return False
1267 1320
1268 1321 # - 'dist' is the distance from the base revision -- bounding it limits
1269 1322 # the amount of I/O we need to do.
1270 1323 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1271 1324 # to apply -- bounding it limits the amount of CPU we consume.
1272 1325 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1273 1326 if (dist > textlen * 4 or l > textlen or
1274 1327 compresseddeltalen > textlen * 2 or
1275 1328 (self._maxchainlen and chainlen > self._maxchainlen)):
1276 1329 return False
1277 1330
1278 1331 return True
1279 1332
1280 1333 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1281 1334 cachedelta, ifh, dfh, alwayscache=False):
1282 1335 """internal function to add revisions to the log
1283 1336
1284 1337 see addrevision for argument descriptions.
1285 1338 invariants:
1286 1339 - text is optional (can be None); if not set, cachedelta must be set.
1287 1340 if both are set, they must correspond to each other.
1288 1341 """
1289 1342 btext = [text]
1290 1343 def buildtext():
1291 1344 if btext[0] is not None:
1292 1345 return btext[0]
1293 1346 baserev = cachedelta[0]
1294 1347 delta = cachedelta[1]
1295 1348 # special case deltas which replace entire base; no need to decode
1296 1349 # base revision. this neatly avoids censored bases, which throw when
1297 1350 # they're decoded.
1298 1351 hlen = struct.calcsize(">lll")
1299 1352 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1300 1353 len(delta) - hlen):
1301 1354 btext[0] = delta[hlen:]
1302 1355 else:
1303 1356 if self._inline:
1304 1357 fh = ifh
1305 1358 else:
1306 1359 fh = dfh
1307 1360 basetext = self.revision(self.node(baserev), _df=fh)
1308 1361 btext[0] = mdiff.patch(basetext, delta)
1309 1362 try:
1310 1363 self.checkhash(btext[0], p1, p2, node)
1311 1364 if flags & REVIDX_ISCENSORED:
1312 1365 raise RevlogError(_('node %s is not censored') % node)
1313 1366 except CensoredNodeError:
1314 1367 # must pass the censored index flag to add censored revisions
1315 1368 if not flags & REVIDX_ISCENSORED:
1316 1369 raise
1317 1370 return btext[0]
1318 1371
1319 1372 def builddelta(rev):
1320 1373 # can we use the cached delta?
1321 1374 if cachedelta and cachedelta[0] == rev:
1322 1375 delta = cachedelta[1]
1323 1376 else:
1324 1377 t = buildtext()
1325 1378 if self.iscensored(rev):
1326 1379 # deltas based on a censored revision must replace the
1327 1380 # full content in one patch, so delta works everywhere
1328 1381 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1329 1382 delta = header + t
1330 1383 else:
1331 1384 if self._inline:
1332 1385 fh = ifh
1333 1386 else:
1334 1387 fh = dfh
1335 1388 ptext = self.revision(self.node(rev), _df=fh)
1336 1389 delta = mdiff.textdiff(ptext, t)
1337 1390 data = self.compress(delta)
1338 1391 l = len(data[1]) + len(data[0])
1339 1392 if basecache[0] == rev:
1340 1393 chainbase = basecache[1]
1341 1394 else:
1342 1395 chainbase = self.chainbase(rev)
1343 1396 dist = l + offset - self.start(chainbase)
1344 1397 if self._generaldelta:
1345 1398 base = rev
1346 1399 else:
1347 1400 base = chainbase
1348 1401 chainlen, compresseddeltalen = self._chaininfo(rev)
1349 1402 chainlen += 1
1350 1403 compresseddeltalen += l
1351 1404 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1352 1405
1353 1406 curr = len(self)
1354 1407 prev = curr - 1
1355 1408 base = chainbase = curr
1356 1409 chainlen = None
1357 1410 offset = self.end(prev)
1358 1411 d = None
1359 1412 if self._basecache is None:
1360 1413 self._basecache = (prev, self.chainbase(prev))
1361 1414 basecache = self._basecache
1362 1415 p1r, p2r = self.rev(p1), self.rev(p2)
1363 1416
1364 1417 # full versions are inserted when the needed deltas
1365 1418 # become comparable to the uncompressed text
1366 1419 if text is None:
1367 1420 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1368 1421 cachedelta[1])
1369 1422 else:
1370 1423 textlen = len(text)
1371 1424
1372 1425 # should we try to build a delta?
1373 1426 if prev != nullrev:
1374 1427 if cachedelta and self._generaldelta and self._lazydeltabase:
1375 1428 # Assume what we received from the server is a good choice
1376 1429 # build delta will reuse the cache
1377 1430 d = builddelta(cachedelta[0])
1378 1431 elif self._generaldelta:
1379 1432 if p2r != nullrev and self._aggressivemergedeltas:
1380 1433 d = builddelta(p1r)
1381 1434 d2 = builddelta(p2r)
1382 1435 p1good = self._isgooddelta(d, textlen)
1383 1436 p2good = self._isgooddelta(d2, textlen)
1384 1437 if p1good and p2good:
1385 1438 # If both are good deltas, choose the smallest
1386 1439 if d2[1] < d[1]:
1387 1440 d = d2
1388 1441 elif p2good:
1389 1442 # If only p2 is good, use it
1390 1443 d = d2
1391 1444 elif p1good:
1392 1445 pass
1393 1446 else:
1394 1447 # Neither is good, try against prev to hopefully save us
1395 1448 # a fulltext.
1396 1449 d = builddelta(prev)
1397 1450 else:
1398 1451 # Pick whichever parent is closer to us (to minimize the
1399 1452 # chance of having to build a fulltext). Since
1400 1453 # nullrev == -1, any non-merge commit will always pick p1r.
1401 1454 drev = p2r if p2r > p1r else p1r
1402 1455 d = builddelta(drev)
1403 1456 # If the chosen delta will result in us making a full text,
1404 1457 # give it one last try against prev.
1405 1458 if drev != prev and not self._isgooddelta(d, textlen):
1406 1459 d = builddelta(prev)
1407 1460 else:
1408 1461 d = builddelta(prev)
1409 1462 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1410 1463
1411 1464 if not self._isgooddelta(d, textlen):
1412 1465 text = buildtext()
1413 1466 data = self.compress(text)
1414 1467 l = len(data[1]) + len(data[0])
1415 1468 base = chainbase = curr
1416 1469
1417 1470 e = (offset_type(offset, flags), l, textlen,
1418 1471 base, link, p1r, p2r, node)
1419 1472 self.index.insert(-1, e)
1420 1473 self.nodemap[node] = curr
1421 1474
1422 1475 entry = self._io.packentry(e, self.node, self.version, curr)
1423 1476 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1424 1477
1425 1478 if alwayscache and text is None:
1426 1479 text = buildtext()
1427 1480
1428 1481 if type(text) == str: # only accept immutable objects
1429 1482 self._cache = (node, curr, text)
1430 1483 self._basecache = (curr, chainbase)
1431 1484 return node
1432 1485
1433 1486 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1434 1487 curr = len(self) - 1
1435 1488 if not self._inline:
1436 1489 transaction.add(self.datafile, offset)
1437 1490 transaction.add(self.indexfile, curr * len(entry))
1438 1491 if data[0]:
1439 1492 dfh.write(data[0])
1440 1493 dfh.write(data[1])
1441 1494 ifh.write(entry)
1442 1495 else:
1443 1496 offset += curr * self._io.size
1444 1497 transaction.add(self.indexfile, offset, curr)
1445 1498 ifh.write(entry)
1446 1499 ifh.write(data[0])
1447 1500 ifh.write(data[1])
1448 1501 self.checkinlinesize(transaction, ifh)
1449 1502
1450 1503 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1451 1504 """
1452 1505 add a delta group
1453 1506
1454 1507 given a set of deltas, add them to the revision log. the
1455 1508 first delta is against its parent, which should be in our
1456 1509 log, the rest are against the previous delta.
1457 1510
1458 1511 If ``addrevisioncb`` is defined, it will be called with arguments of
1459 1512 this revlog and the node that was added.
1460 1513 """
1461 1514
1462 1515 # track the base of the current delta log
1463 1516 content = []
1464 1517 node = None
1465 1518
1466 1519 r = len(self)
1467 1520 end = 0
1468 1521 if r:
1469 1522 end = self.end(r - 1)
1470 1523 ifh = self.opener(self.indexfile, "a+")
1471 1524 isize = r * self._io.size
1472 1525 if self._inline:
1473 1526 transaction.add(self.indexfile, end + isize, r)
1474 1527 dfh = None
1475 1528 else:
1476 1529 transaction.add(self.indexfile, isize, r)
1477 1530 transaction.add(self.datafile, end)
1478 1531 dfh = self.opener(self.datafile, "a+")
1479 1532 def flush():
1480 1533 if dfh:
1481 1534 dfh.flush()
1482 1535 ifh.flush()
1483 1536 try:
1484 1537 # loop through our set of deltas
1485 1538 chain = None
1486 1539 while True:
1487 1540 chunkdata = cg.deltachunk(chain)
1488 1541 if not chunkdata:
1489 1542 break
1490 1543 node = chunkdata['node']
1491 1544 p1 = chunkdata['p1']
1492 1545 p2 = chunkdata['p2']
1493 1546 cs = chunkdata['cs']
1494 1547 deltabase = chunkdata['deltabase']
1495 1548 delta = chunkdata['delta']
1496 1549
1497 1550 content.append(node)
1498 1551
1499 1552 link = linkmapper(cs)
1500 1553 if node in self.nodemap:
1501 1554 # this can happen if two branches make the same change
1502 1555 chain = node
1503 1556 continue
1504 1557
1505 1558 for p in (p1, p2):
1506 1559 if p not in self.nodemap:
1507 1560 raise LookupError(p, self.indexfile,
1508 1561 _('unknown parent'))
1509 1562
1510 1563 if deltabase not in self.nodemap:
1511 1564 raise LookupError(deltabase, self.indexfile,
1512 1565 _('unknown delta base'))
1513 1566
1514 1567 baserev = self.rev(deltabase)
1515 1568
1516 1569 if baserev != nullrev and self.iscensored(baserev):
1517 1570 # if base is censored, delta must be full replacement in a
1518 1571 # single patch operation
1519 1572 hlen = struct.calcsize(">lll")
1520 1573 oldlen = self.rawsize(baserev)
1521 1574 newlen = len(delta) - hlen
1522 1575 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1523 1576 raise error.CensoredBaseError(self.indexfile,
1524 1577 self.node(baserev))
1525 1578
1526 1579 flags = REVIDX_DEFAULT_FLAGS
1527 1580 if self._peek_iscensored(baserev, delta, flush):
1528 1581 flags |= REVIDX_ISCENSORED
1529 1582
1530 1583 # We assume consumers of addrevisioncb will want to retrieve
1531 1584 # the added revision, which will require a call to
1532 1585 # revision(). revision() will fast path if there is a cache
1533 1586 # hit. So, we tell _addrevision() to always cache in this case.
1534 1587 chain = self._addrevision(node, None, transaction, link,
1535 1588 p1, p2, flags, (baserev, delta),
1536 1589 ifh, dfh,
1537 1590 alwayscache=bool(addrevisioncb))
1538 1591
1539 1592 if addrevisioncb:
1540 1593 addrevisioncb(self, chain)
1541 1594
1542 1595 if not dfh and not self._inline:
1543 1596 # addrevision switched from inline to conventional
1544 1597 # reopen the index
1545 1598 ifh.close()
1546 1599 dfh = self.opener(self.datafile, "a+")
1547 1600 ifh = self.opener(self.indexfile, "a+")
1548 1601 finally:
1549 1602 if dfh:
1550 1603 dfh.close()
1551 1604 ifh.close()
1552 1605
1553 1606 return content
1554 1607
1555 1608 def iscensored(self, rev):
1556 1609 """Check if a file revision is censored."""
1557 1610 return False
1558 1611
1559 1612 def _peek_iscensored(self, baserev, delta, flush):
1560 1613 """Quickly check if a delta produces a censored revision."""
1561 1614 return False
1562 1615
1563 1616 def getstrippoint(self, minlink):
1564 1617 """find the minimum rev that must be stripped to strip the linkrev
1565 1618
1566 1619 Returns a tuple containing the minimum rev and a set of all revs that
1567 1620 have linkrevs that will be broken by this strip.
1568 1621 """
1569 1622 brokenrevs = set()
1570 1623 strippoint = len(self)
1571 1624
1572 1625 heads = {}
1573 1626 futurelargelinkrevs = set()
1574 1627 for head in self.headrevs():
1575 1628 headlinkrev = self.linkrev(head)
1576 1629 heads[head] = headlinkrev
1577 1630 if headlinkrev >= minlink:
1578 1631 futurelargelinkrevs.add(headlinkrev)
1579 1632
1580 1633 # This algorithm involves walking down the rev graph, starting at the
1581 1634 # heads. Since the revs are topologically sorted according to linkrev,
1582 1635 # once all head linkrevs are below the minlink, we know there are
1583 1636 # no more revs that could have a linkrev greater than minlink.
1584 1637 # So we can stop walking.
1585 1638 while futurelargelinkrevs:
1586 1639 strippoint -= 1
1587 1640 linkrev = heads.pop(strippoint)
1588 1641
1589 1642 if linkrev < minlink:
1590 1643 brokenrevs.add(strippoint)
1591 1644 else:
1592 1645 futurelargelinkrevs.remove(linkrev)
1593 1646
1594 1647 for p in self.parentrevs(strippoint):
1595 1648 if p != nullrev:
1596 1649 plinkrev = self.linkrev(p)
1597 1650 heads[p] = plinkrev
1598 1651 if plinkrev >= minlink:
1599 1652 futurelargelinkrevs.add(plinkrev)
1600 1653
1601 1654 return strippoint, brokenrevs
1602 1655
1603 1656 def strip(self, minlink, transaction):
1604 1657 """truncate the revlog on the first revision with a linkrev >= minlink
1605 1658
1606 1659 This function is called when we're stripping revision minlink and
1607 1660 its descendants from the repository.
1608 1661
1609 1662 We have to remove all revisions with linkrev >= minlink, because
1610 1663 the equivalent changelog revisions will be renumbered after the
1611 1664 strip.
1612 1665
1613 1666 So we truncate the revlog on the first of these revisions, and
1614 1667 trust that the caller has saved the revisions that shouldn't be
1615 1668 removed and that it'll re-add them after this truncation.
1616 1669 """
1617 1670 if len(self) == 0:
1618 1671 return
1619 1672
1620 1673 rev, _ = self.getstrippoint(minlink)
1621 1674 if rev == len(self):
1622 1675 return
1623 1676
1624 1677 # first truncate the files on disk
1625 1678 end = self.start(rev)
1626 1679 if not self._inline:
1627 1680 transaction.add(self.datafile, end)
1628 1681 end = rev * self._io.size
1629 1682 else:
1630 1683 end += rev * self._io.size
1631 1684
1632 1685 transaction.add(self.indexfile, end)
1633 1686
1634 1687 # then reset internal state in memory to forget those revisions
1635 1688 self._cache = None
1636 1689 self._chaininfocache = {}
1637 1690 self._chunkclear()
1638 1691 for x in xrange(rev, len(self)):
1639 1692 del self.nodemap[self.node(x)]
1640 1693
1641 1694 del self.index[rev:-1]
1642 1695
1643 1696 def checksize(self):
1644 1697 expected = 0
1645 1698 if len(self):
1646 1699 expected = max(0, self.end(len(self) - 1))
1647 1700
1648 1701 try:
1649 1702 f = self.opener(self.datafile)
1650 1703 f.seek(0, 2)
1651 1704 actual = f.tell()
1652 1705 f.close()
1653 1706 dd = actual - expected
1654 1707 except IOError as inst:
1655 1708 if inst.errno != errno.ENOENT:
1656 1709 raise
1657 1710 dd = 0
1658 1711
1659 1712 try:
1660 1713 f = self.opener(self.indexfile)
1661 1714 f.seek(0, 2)
1662 1715 actual = f.tell()
1663 1716 f.close()
1664 1717 s = self._io.size
1665 1718 i = max(0, actual // s)
1666 1719 di = actual - (i * s)
1667 1720 if self._inline:
1668 1721 databytes = 0
1669 1722 for r in self:
1670 1723 databytes += max(0, self.length(r))
1671 1724 dd = 0
1672 1725 di = actual - len(self) * s - databytes
1673 1726 except IOError as inst:
1674 1727 if inst.errno != errno.ENOENT:
1675 1728 raise
1676 1729 di = 0
1677 1730
1678 1731 return (dd, di)
1679 1732
1680 1733 def files(self):
1681 1734 res = [self.indexfile]
1682 1735 if not self._inline:
1683 1736 res.append(self.datafile)
1684 1737 return res
General Comments 0
You need to be logged in to leave comments. Login now