##// END OF EJS Templates
revlog: change generaldelta delta parent heuristic...
Durham Goode -
r26117:4dc5b51f default
parent child Browse files
Show More
@@ -1,1626 +1,1629
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error as e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 20 bytes: parent 1 nodeid
104 104 # 20 bytes: parent 2 nodeid
105 105 # 20 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 versionformat = ">I"
153 153
154 154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 155 # signed integer)
156 156 _maxentrysize = 0x7fffffff
157 157
158 158 class revlogio(object):
159 159 def __init__(self):
160 160 self.size = struct.calcsize(indexformatng)
161 161
162 162 def parseindex(self, data, inline):
163 163 # call the C implementation to parse the index data
164 164 index, cache = parsers.parse_index2(data, inline)
165 165 return index, getattr(index, 'nodemap', None), cache
166 166
167 167 def packentry(self, entry, node, version, rev):
168 168 p = _pack(indexformatng, *entry)
169 169 if rev == 0:
170 170 p = _pack(versionformat, version) + p[4:]
171 171 return p
172 172
173 173 class revlog(object):
174 174 """
175 175 the underlying revision storage object
176 176
177 177 A revlog consists of two parts, an index and the revision data.
178 178
179 179 The index is a file with a fixed record size containing
180 180 information on each revision, including its nodeid (hash), the
181 181 nodeids of its parents, the position and offset of its data within
182 182 the data file, and the revision it's based on. Finally, each entry
183 183 contains a linkrev entry that can serve as a pointer to external
184 184 data.
185 185
186 186 The revision data itself is a linear collection of data chunks.
187 187 Each chunk represents a revision and is usually represented as a
188 188 delta against the previous chunk. To bound lookup time, runs of
189 189 deltas are limited to about 2 times the length of the original
190 190 version data. This makes retrieval of a version proportional to
191 191 its size, or O(1) relative to the number of revisions.
192 192
193 193 Both pieces of the revlog are written to in an append-only
194 194 fashion, which means we never need to rewrite a file to insert or
195 195 remove data, and can use some simple techniques to avoid the need
196 196 for locking while reading.
197 197 """
198 198 def __init__(self, opener, indexfile):
199 199 """
200 200 create a revlog object
201 201
202 202 opener is a function that abstracts the file opening operation
203 203 and can be used to implement COW semantics or the like.
204 204 """
205 205 self.indexfile = indexfile
206 206 self.datafile = indexfile[:-2] + ".d"
207 207 self.opener = opener
208 208 self._cache = None
209 209 self._basecache = None
210 210 self._chunkcache = (0, '')
211 211 self._chunkcachesize = 65536
212 212 self._maxchainlen = None
213 213 self.index = []
214 214 self._pcache = {}
215 215 self._nodecache = {nullid: nullrev}
216 216 self._nodepos = None
217 217
218 218 v = REVLOG_DEFAULT_VERSION
219 219 opts = getattr(opener, 'options', None)
220 220 if opts is not None:
221 221 if 'revlogv1' in opts:
222 222 if 'generaldelta' in opts:
223 223 v |= REVLOGGENERALDELTA
224 224 else:
225 225 v = 0
226 226 if 'chunkcachesize' in opts:
227 227 self._chunkcachesize = opts['chunkcachesize']
228 228 if 'maxchainlen' in opts:
229 229 self._maxchainlen = opts['maxchainlen']
230 230
231 231 if self._chunkcachesize <= 0:
232 232 raise RevlogError(_('revlog chunk cache size %r is not greater '
233 233 'than 0') % self._chunkcachesize)
234 234 elif self._chunkcachesize & (self._chunkcachesize - 1):
235 235 raise RevlogError(_('revlog chunk cache size %r is not a power '
236 236 'of 2') % self._chunkcachesize)
237 237
238 238 i = ''
239 239 self._initempty = True
240 240 try:
241 241 f = self.opener(self.indexfile)
242 242 i = f.read()
243 243 f.close()
244 244 if len(i) > 0:
245 245 v = struct.unpack(versionformat, i[:4])[0]
246 246 self._initempty = False
247 247 except IOError as inst:
248 248 if inst.errno != errno.ENOENT:
249 249 raise
250 250
251 251 self.version = v
252 252 self._inline = v & REVLOGNGINLINEDATA
253 253 self._generaldelta = v & REVLOGGENERALDELTA
254 254 flags = v & ~0xFFFF
255 255 fmt = v & 0xFFFF
256 256 if fmt == REVLOGV0 and flags:
257 257 raise RevlogError(_("index %s unknown flags %#04x for format v0")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
260 260 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
261 261 % (self.indexfile, flags >> 16))
262 262 elif fmt > REVLOGNG:
263 263 raise RevlogError(_("index %s unknown format %d")
264 264 % (self.indexfile, fmt))
265 265
266 266 self._io = revlogio()
267 267 if self.version == REVLOGV0:
268 268 self._io = revlogoldio()
269 269 try:
270 270 d = self._io.parseindex(i, self._inline)
271 271 except (ValueError, IndexError):
272 272 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
273 273 self.index, nodemap, self._chunkcache = d
274 274 if nodemap is not None:
275 275 self.nodemap = self._nodecache = nodemap
276 276 if not self._chunkcache:
277 277 self._chunkclear()
278 278 # revnum -> (chain-length, sum-delta-length)
279 279 self._chaininfocache = {}
280 280
281 281 def tip(self):
282 282 return self.node(len(self.index) - 2)
283 283 def __contains__(self, rev):
284 284 return 0 <= rev < len(self)
285 285 def __len__(self):
286 286 return len(self.index) - 1
287 287 def __iter__(self):
288 288 return iter(xrange(len(self)))
289 289 def revs(self, start=0, stop=None):
290 290 """iterate over all rev in this revlog (from start to stop)"""
291 291 step = 1
292 292 if stop is not None:
293 293 if start > stop:
294 294 step = -1
295 295 stop += step
296 296 else:
297 297 stop = len(self)
298 298 return xrange(start, stop, step)
299 299
300 300 @util.propertycache
301 301 def nodemap(self):
302 302 self.rev(self.node(0))
303 303 return self._nodecache
304 304
305 305 def hasnode(self, node):
306 306 try:
307 307 self.rev(node)
308 308 return True
309 309 except KeyError:
310 310 return False
311 311
312 312 def clearcaches(self):
313 313 try:
314 314 self._nodecache.clearcaches()
315 315 except AttributeError:
316 316 self._nodecache = {nullid: nullrev}
317 317 self._nodepos = None
318 318
319 319 def rev(self, node):
320 320 try:
321 321 return self._nodecache[node]
322 322 except TypeError:
323 323 raise
324 324 except RevlogError:
325 325 # parsers.c radix tree lookup failed
326 326 raise LookupError(node, self.indexfile, _('no node'))
327 327 except KeyError:
328 328 # pure python cache lookup failed
329 329 n = self._nodecache
330 330 i = self.index
331 331 p = self._nodepos
332 332 if p is None:
333 333 p = len(i) - 2
334 334 for r in xrange(p, -1, -1):
335 335 v = i[r][7]
336 336 n[v] = r
337 337 if v == node:
338 338 self._nodepos = r - 1
339 339 return r
340 340 raise LookupError(node, self.indexfile, _('no node'))
341 341
342 342 def node(self, rev):
343 343 return self.index[rev][7]
344 344 def linkrev(self, rev):
345 345 return self.index[rev][4]
346 346 def parents(self, node):
347 347 i = self.index
348 348 d = i[self.rev(node)]
349 349 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
350 350 def parentrevs(self, rev):
351 351 return self.index[rev][5:7]
352 352 def start(self, rev):
353 353 return int(self.index[rev][0] >> 16)
354 354 def end(self, rev):
355 355 return self.start(rev) + self.length(rev)
356 356 def length(self, rev):
357 357 return self.index[rev][1]
358 358 def chainbase(self, rev):
359 359 index = self.index
360 360 base = index[rev][3]
361 361 while base != rev:
362 362 rev = base
363 363 base = index[rev][3]
364 364 return base
365 365 def chainlen(self, rev):
366 366 return self._chaininfo(rev)[0]
367 367
368 368 def _chaininfo(self, rev):
369 369 chaininfocache = self._chaininfocache
370 370 if rev in chaininfocache:
371 371 return chaininfocache[rev]
372 372 index = self.index
373 373 generaldelta = self._generaldelta
374 374 iterrev = rev
375 375 e = index[iterrev]
376 376 clen = 0
377 377 compresseddeltalen = 0
378 378 while iterrev != e[3]:
379 379 clen += 1
380 380 compresseddeltalen += e[1]
381 381 if generaldelta:
382 382 iterrev = e[3]
383 383 else:
384 384 iterrev -= 1
385 385 if iterrev in chaininfocache:
386 386 t = chaininfocache[iterrev]
387 387 clen += t[0]
388 388 compresseddeltalen += t[1]
389 389 break
390 390 e = index[iterrev]
391 391 else:
392 392 # Add text length of base since decompressing that also takes
393 393 # work. For cache hits the length is already included.
394 394 compresseddeltalen += e[1]
395 395 r = (clen, compresseddeltalen)
396 396 chaininfocache[rev] = r
397 397 return r
398 398
399 399 def flags(self, rev):
400 400 return self.index[rev][0] & 0xFFFF
401 401 def rawsize(self, rev):
402 402 """return the length of the uncompressed text for a given revision"""
403 403 l = self.index[rev][2]
404 404 if l >= 0:
405 405 return l
406 406
407 407 t = self.revision(self.node(rev))
408 408 return len(t)
409 409 size = rawsize
410 410
411 411 def ancestors(self, revs, stoprev=0, inclusive=False):
412 412 """Generate the ancestors of 'revs' in reverse topological order.
413 413 Does not generate revs lower than stoprev.
414 414
415 415 See the documentation for ancestor.lazyancestors for more details."""
416 416
417 417 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
418 418 inclusive=inclusive)
419 419
420 420 def descendants(self, revs):
421 421 """Generate the descendants of 'revs' in revision order.
422 422
423 423 Yield a sequence of revision numbers starting with a child of
424 424 some rev in revs, i.e., each revision is *not* considered a
425 425 descendant of itself. Results are ordered by revision number (a
426 426 topological sort)."""
427 427 first = min(revs)
428 428 if first == nullrev:
429 429 for i in self:
430 430 yield i
431 431 return
432 432
433 433 seen = set(revs)
434 434 for i in self.revs(start=first + 1):
435 435 for x in self.parentrevs(i):
436 436 if x != nullrev and x in seen:
437 437 seen.add(i)
438 438 yield i
439 439 break
440 440
441 441 def findcommonmissing(self, common=None, heads=None):
442 442 """Return a tuple of the ancestors of common and the ancestors of heads
443 443 that are not ancestors of common. In revset terminology, we return the
444 444 tuple:
445 445
446 446 ::common, (::heads) - (::common)
447 447
448 448 The list is sorted by revision number, meaning it is
449 449 topologically sorted.
450 450
451 451 'heads' and 'common' are both lists of node IDs. If heads is
452 452 not supplied, uses all of the revlog's heads. If common is not
453 453 supplied, uses nullid."""
454 454 if common is None:
455 455 common = [nullid]
456 456 if heads is None:
457 457 heads = self.heads()
458 458
459 459 common = [self.rev(n) for n in common]
460 460 heads = [self.rev(n) for n in heads]
461 461
462 462 # we want the ancestors, but inclusive
463 463 class lazyset(object):
464 464 def __init__(self, lazyvalues):
465 465 self.addedvalues = set()
466 466 self.lazyvalues = lazyvalues
467 467
468 468 def __contains__(self, value):
469 469 return value in self.addedvalues or value in self.lazyvalues
470 470
471 471 def __iter__(self):
472 472 added = self.addedvalues
473 473 for r in added:
474 474 yield r
475 475 for r in self.lazyvalues:
476 476 if not r in added:
477 477 yield r
478 478
479 479 def add(self, value):
480 480 self.addedvalues.add(value)
481 481
482 482 def update(self, values):
483 483 self.addedvalues.update(values)
484 484
485 485 has = lazyset(self.ancestors(common))
486 486 has.add(nullrev)
487 487 has.update(common)
488 488
489 489 # take all ancestors from heads that aren't in has
490 490 missing = set()
491 491 visit = collections.deque(r for r in heads if r not in has)
492 492 while visit:
493 493 r = visit.popleft()
494 494 if r in missing:
495 495 continue
496 496 else:
497 497 missing.add(r)
498 498 for p in self.parentrevs(r):
499 499 if p not in has:
500 500 visit.append(p)
501 501 missing = list(missing)
502 502 missing.sort()
503 503 return has, [self.node(r) for r in missing]
504 504
505 505 def incrementalmissingrevs(self, common=None):
506 506 """Return an object that can be used to incrementally compute the
507 507 revision numbers of the ancestors of arbitrary sets that are not
508 508 ancestors of common. This is an ancestor.incrementalmissingancestors
509 509 object.
510 510
511 511 'common' is a list of revision numbers. If common is not supplied, uses
512 512 nullrev.
513 513 """
514 514 if common is None:
515 515 common = [nullrev]
516 516
517 517 return ancestor.incrementalmissingancestors(self.parentrevs, common)
518 518
519 519 def findmissingrevs(self, common=None, heads=None):
520 520 """Return the revision numbers of the ancestors of heads that
521 521 are not ancestors of common.
522 522
523 523 More specifically, return a list of revision numbers corresponding to
524 524 nodes N such that every N satisfies the following constraints:
525 525
526 526 1. N is an ancestor of some node in 'heads'
527 527 2. N is not an ancestor of any node in 'common'
528 528
529 529 The list is sorted by revision number, meaning it is
530 530 topologically sorted.
531 531
532 532 'heads' and 'common' are both lists of revision numbers. If heads is
533 533 not supplied, uses all of the revlog's heads. If common is not
534 534 supplied, uses nullid."""
535 535 if common is None:
536 536 common = [nullrev]
537 537 if heads is None:
538 538 heads = self.headrevs()
539 539
540 540 inc = self.incrementalmissingrevs(common=common)
541 541 return inc.missingancestors(heads)
542 542
543 543 def findmissing(self, common=None, heads=None):
544 544 """Return the ancestors of heads that are not ancestors of common.
545 545
546 546 More specifically, return a list of nodes N such that every N
547 547 satisfies the following constraints:
548 548
549 549 1. N is an ancestor of some node in 'heads'
550 550 2. N is not an ancestor of any node in 'common'
551 551
552 552 The list is sorted by revision number, meaning it is
553 553 topologically sorted.
554 554
555 555 'heads' and 'common' are both lists of node IDs. If heads is
556 556 not supplied, uses all of the revlog's heads. If common is not
557 557 supplied, uses nullid."""
558 558 if common is None:
559 559 common = [nullid]
560 560 if heads is None:
561 561 heads = self.heads()
562 562
563 563 common = [self.rev(n) for n in common]
564 564 heads = [self.rev(n) for n in heads]
565 565
566 566 inc = self.incrementalmissingrevs(common=common)
567 567 return [self.node(r) for r in inc.missingancestors(heads)]
568 568
569 569 def nodesbetween(self, roots=None, heads=None):
570 570 """Return a topological path from 'roots' to 'heads'.
571 571
572 572 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
573 573 topologically sorted list of all nodes N that satisfy both of
574 574 these constraints:
575 575
576 576 1. N is a descendant of some node in 'roots'
577 577 2. N is an ancestor of some node in 'heads'
578 578
579 579 Every node is considered to be both a descendant and an ancestor
580 580 of itself, so every reachable node in 'roots' and 'heads' will be
581 581 included in 'nodes'.
582 582
583 583 'outroots' is the list of reachable nodes in 'roots', i.e., the
584 584 subset of 'roots' that is returned in 'nodes'. Likewise,
585 585 'outheads' is the subset of 'heads' that is also in 'nodes'.
586 586
587 587 'roots' and 'heads' are both lists of node IDs. If 'roots' is
588 588 unspecified, uses nullid as the only root. If 'heads' is
589 589 unspecified, uses list of all of the revlog's heads."""
590 590 nonodes = ([], [], [])
591 591 if roots is not None:
592 592 roots = list(roots)
593 593 if not roots:
594 594 return nonodes
595 595 lowestrev = min([self.rev(n) for n in roots])
596 596 else:
597 597 roots = [nullid] # Everybody's a descendant of nullid
598 598 lowestrev = nullrev
599 599 if (lowestrev == nullrev) and (heads is None):
600 600 # We want _all_ the nodes!
601 601 return ([self.node(r) for r in self], [nullid], list(self.heads()))
602 602 if heads is None:
603 603 # All nodes are ancestors, so the latest ancestor is the last
604 604 # node.
605 605 highestrev = len(self) - 1
606 606 # Set ancestors to None to signal that every node is an ancestor.
607 607 ancestors = None
608 608 # Set heads to an empty dictionary for later discovery of heads
609 609 heads = {}
610 610 else:
611 611 heads = list(heads)
612 612 if not heads:
613 613 return nonodes
614 614 ancestors = set()
615 615 # Turn heads into a dictionary so we can remove 'fake' heads.
616 616 # Also, later we will be using it to filter out the heads we can't
617 617 # find from roots.
618 618 heads = dict.fromkeys(heads, False)
619 619 # Start at the top and keep marking parents until we're done.
620 620 nodestotag = set(heads)
621 621 # Remember where the top was so we can use it as a limit later.
622 622 highestrev = max([self.rev(n) for n in nodestotag])
623 623 while nodestotag:
624 624 # grab a node to tag
625 625 n = nodestotag.pop()
626 626 # Never tag nullid
627 627 if n == nullid:
628 628 continue
629 629 # A node's revision number represents its place in a
630 630 # topologically sorted list of nodes.
631 631 r = self.rev(n)
632 632 if r >= lowestrev:
633 633 if n not in ancestors:
634 634 # If we are possibly a descendant of one of the roots
635 635 # and we haven't already been marked as an ancestor
636 636 ancestors.add(n) # Mark as ancestor
637 637 # Add non-nullid parents to list of nodes to tag.
638 638 nodestotag.update([p for p in self.parents(n) if
639 639 p != nullid])
640 640 elif n in heads: # We've seen it before, is it a fake head?
641 641 # So it is, real heads should not be the ancestors of
642 642 # any other heads.
643 643 heads.pop(n)
644 644 if not ancestors:
645 645 return nonodes
646 646 # Now that we have our set of ancestors, we want to remove any
647 647 # roots that are not ancestors.
648 648
649 649 # If one of the roots was nullid, everything is included anyway.
650 650 if lowestrev > nullrev:
651 651 # But, since we weren't, let's recompute the lowest rev to not
652 652 # include roots that aren't ancestors.
653 653
654 654 # Filter out roots that aren't ancestors of heads
655 655 roots = [n for n in roots if n in ancestors]
656 656 # Recompute the lowest revision
657 657 if roots:
658 658 lowestrev = min([self.rev(n) for n in roots])
659 659 else:
660 660 # No more roots? Return empty list
661 661 return nonodes
662 662 else:
663 663 # We are descending from nullid, and don't need to care about
664 664 # any other roots.
665 665 lowestrev = nullrev
666 666 roots = [nullid]
667 667 # Transform our roots list into a set.
668 668 descendants = set(roots)
669 669 # Also, keep the original roots so we can filter out roots that aren't
670 670 # 'real' roots (i.e. are descended from other roots).
671 671 roots = descendants.copy()
672 672 # Our topologically sorted list of output nodes.
673 673 orderedout = []
674 674 # Don't start at nullid since we don't want nullid in our output list,
675 675 # and if nullid shows up in descendants, empty parents will look like
676 676 # they're descendants.
677 677 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
678 678 n = self.node(r)
679 679 isdescendant = False
680 680 if lowestrev == nullrev: # Everybody is a descendant of nullid
681 681 isdescendant = True
682 682 elif n in descendants:
683 683 # n is already a descendant
684 684 isdescendant = True
685 685 # This check only needs to be done here because all the roots
686 686 # will start being marked is descendants before the loop.
687 687 if n in roots:
688 688 # If n was a root, check if it's a 'real' root.
689 689 p = tuple(self.parents(n))
690 690 # If any of its parents are descendants, it's not a root.
691 691 if (p[0] in descendants) or (p[1] in descendants):
692 692 roots.remove(n)
693 693 else:
694 694 p = tuple(self.parents(n))
695 695 # A node is a descendant if either of its parents are
696 696 # descendants. (We seeded the dependents list with the roots
697 697 # up there, remember?)
698 698 if (p[0] in descendants) or (p[1] in descendants):
699 699 descendants.add(n)
700 700 isdescendant = True
701 701 if isdescendant and ((ancestors is None) or (n in ancestors)):
702 702 # Only include nodes that are both descendants and ancestors.
703 703 orderedout.append(n)
704 704 if (ancestors is not None) and (n in heads):
705 705 # We're trying to figure out which heads are reachable
706 706 # from roots.
707 707 # Mark this head as having been reached
708 708 heads[n] = True
709 709 elif ancestors is None:
710 710 # Otherwise, we're trying to discover the heads.
711 711 # Assume this is a head because if it isn't, the next step
712 712 # will eventually remove it.
713 713 heads[n] = True
714 714 # But, obviously its parents aren't.
715 715 for p in self.parents(n):
716 716 heads.pop(p, None)
717 717 heads = [n for n, flag in heads.iteritems() if flag]
718 718 roots = list(roots)
719 719 assert orderedout
720 720 assert roots
721 721 assert heads
722 722 return (orderedout, roots, heads)
723 723
724 724 def headrevs(self):
725 725 try:
726 726 return self.index.headrevs()
727 727 except AttributeError:
728 728 return self._headrevs()
729 729
730 730 def computephases(self, roots):
731 731 return self.index.computephasesmapsets(roots)
732 732
733 733 def _headrevs(self):
734 734 count = len(self)
735 735 if not count:
736 736 return [nullrev]
737 737 # we won't iter over filtered rev so nobody is a head at start
738 738 ishead = [0] * (count + 1)
739 739 index = self.index
740 740 for r in self:
741 741 ishead[r] = 1 # I may be an head
742 742 e = index[r]
743 743 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
744 744 return [r for r, val in enumerate(ishead) if val]
745 745
746 746 def heads(self, start=None, stop=None):
747 747 """return the list of all nodes that have no children
748 748
749 749 if start is specified, only heads that are descendants of
750 750 start will be returned
751 751 if stop is specified, it will consider all the revs from stop
752 752 as if they had no children
753 753 """
754 754 if start is None and stop is None:
755 755 if not len(self):
756 756 return [nullid]
757 757 return [self.node(r) for r in self.headrevs()]
758 758
759 759 if start is None:
760 760 start = nullid
761 761 if stop is None:
762 762 stop = []
763 763 stoprevs = set([self.rev(n) for n in stop])
764 764 startrev = self.rev(start)
765 765 reachable = set((startrev,))
766 766 heads = set((startrev,))
767 767
768 768 parentrevs = self.parentrevs
769 769 for r in self.revs(start=startrev + 1):
770 770 for p in parentrevs(r):
771 771 if p in reachable:
772 772 if r not in stoprevs:
773 773 reachable.add(r)
774 774 heads.add(r)
775 775 if p in heads and p not in stoprevs:
776 776 heads.remove(p)
777 777
778 778 return [self.node(r) for r in heads]
779 779
780 780 def children(self, node):
781 781 """find the children of a given node"""
782 782 c = []
783 783 p = self.rev(node)
784 784 for r in self.revs(start=p + 1):
785 785 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
786 786 if prevs:
787 787 for pr in prevs:
788 788 if pr == p:
789 789 c.append(self.node(r))
790 790 elif p == nullrev:
791 791 c.append(self.node(r))
792 792 return c
793 793
794 794 def descendant(self, start, end):
795 795 if start == nullrev:
796 796 return True
797 797 for i in self.descendants([start]):
798 798 if i == end:
799 799 return True
800 800 elif i > end:
801 801 break
802 802 return False
803 803
804 804 def commonancestorsheads(self, a, b):
805 805 """calculate all the heads of the common ancestors of nodes a and b"""
806 806 a, b = self.rev(a), self.rev(b)
807 807 try:
808 808 ancs = self.index.commonancestorsheads(a, b)
809 809 except (AttributeError, OverflowError): # C implementation failed
810 810 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
811 811 return map(self.node, ancs)
812 812
813 813 def isancestor(self, a, b):
814 814 """return True if node a is an ancestor of node b
815 815
816 816 The implementation of this is trivial but the use of
817 817 commonancestorsheads is not."""
818 818 return a in self.commonancestorsheads(a, b)
819 819
820 820 def ancestor(self, a, b):
821 821 """calculate the "best" common ancestor of nodes a and b"""
822 822
823 823 a, b = self.rev(a), self.rev(b)
824 824 try:
825 825 ancs = self.index.ancestors(a, b)
826 826 except (AttributeError, OverflowError):
827 827 ancs = ancestor.ancestors(self.parentrevs, a, b)
828 828 if ancs:
829 829 # choose a consistent winner when there's a tie
830 830 return min(map(self.node, ancs))
831 831 return nullid
832 832
833 833 def _match(self, id):
834 834 if isinstance(id, int):
835 835 # rev
836 836 return self.node(id)
837 837 if len(id) == 20:
838 838 # possibly a binary node
839 839 # odds of a binary node being all hex in ASCII are 1 in 10**25
840 840 try:
841 841 node = id
842 842 self.rev(node) # quick search the index
843 843 return node
844 844 except LookupError:
845 845 pass # may be partial hex id
846 846 try:
847 847 # str(rev)
848 848 rev = int(id)
849 849 if str(rev) != id:
850 850 raise ValueError
851 851 if rev < 0:
852 852 rev = len(self) + rev
853 853 if rev < 0 or rev >= len(self):
854 854 raise ValueError
855 855 return self.node(rev)
856 856 except (ValueError, OverflowError):
857 857 pass
858 858 if len(id) == 40:
859 859 try:
860 860 # a full hex nodeid?
861 861 node = bin(id)
862 862 self.rev(node)
863 863 return node
864 864 except (TypeError, LookupError):
865 865 pass
866 866
867 867 def _partialmatch(self, id):
868 868 try:
869 869 n = self.index.partialmatch(id)
870 870 if n and self.hasnode(n):
871 871 return n
872 872 return None
873 873 except RevlogError:
874 874 # parsers.c radix tree lookup gave multiple matches
875 875 # fall through to slow path that filters hidden revisions
876 876 pass
877 877 except (AttributeError, ValueError):
878 878 # we are pure python, or key was too short to search radix tree
879 879 pass
880 880
881 881 if id in self._pcache:
882 882 return self._pcache[id]
883 883
884 884 if len(id) < 40:
885 885 try:
886 886 # hex(node)[:...]
887 887 l = len(id) // 2 # grab an even number of digits
888 888 prefix = bin(id[:l * 2])
889 889 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
890 890 nl = [n for n in nl if hex(n).startswith(id) and
891 891 self.hasnode(n)]
892 892 if len(nl) > 0:
893 893 if len(nl) == 1:
894 894 self._pcache[id] = nl[0]
895 895 return nl[0]
896 896 raise LookupError(id, self.indexfile,
897 897 _('ambiguous identifier'))
898 898 return None
899 899 except TypeError:
900 900 pass
901 901
902 902 def lookup(self, id):
903 903 """locate a node based on:
904 904 - revision number or str(revision number)
905 905 - nodeid or subset of hex nodeid
906 906 """
907 907 n = self._match(id)
908 908 if n is not None:
909 909 return n
910 910 n = self._partialmatch(id)
911 911 if n:
912 912 return n
913 913
914 914 raise LookupError(id, self.indexfile, _('no match found'))
915 915
916 916 def cmp(self, node, text):
917 917 """compare text with a given file revision
918 918
919 919 returns True if text is different than what is stored.
920 920 """
921 921 p1, p2 = self.parents(node)
922 922 return hash(text, p1, p2) != node
923 923
924 924 def _addchunk(self, offset, data):
925 925 o, d = self._chunkcache
926 926 # try to add to existing cache
927 927 if o + len(d) == offset and len(d) + len(data) < _chunksize:
928 928 self._chunkcache = o, d + data
929 929 else:
930 930 self._chunkcache = offset, data
931 931
932 932 def _loadchunk(self, offset, length):
933 933 if self._inline:
934 934 df = self.opener(self.indexfile)
935 935 else:
936 936 df = self.opener(self.datafile)
937 937
938 938 # Cache data both forward and backward around the requested
939 939 # data, in a fixed size window. This helps speed up operations
940 940 # involving reading the revlog backwards.
941 941 cachesize = self._chunkcachesize
942 942 realoffset = offset & ~(cachesize - 1)
943 943 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
944 944 - realoffset)
945 945 df.seek(realoffset)
946 946 d = df.read(reallength)
947 947 df.close()
948 948 self._addchunk(realoffset, d)
949 949 if offset != realoffset or reallength != length:
950 950 return util.buffer(d, offset - realoffset, length)
951 951 return d
952 952
953 953 def _getchunk(self, offset, length):
954 954 o, d = self._chunkcache
955 955 l = len(d)
956 956
957 957 # is it in the cache?
958 958 cachestart = offset - o
959 959 cacheend = cachestart + length
960 960 if cachestart >= 0 and cacheend <= l:
961 961 if cachestart == 0 and cacheend == l:
962 962 return d # avoid a copy
963 963 return util.buffer(d, cachestart, cacheend - cachestart)
964 964
965 965 return self._loadchunk(offset, length)
966 966
967 967 def _chunkraw(self, startrev, endrev):
968 968 start = self.start(startrev)
969 969 end = self.end(endrev)
970 970 if self._inline:
971 971 start += (startrev + 1) * self._io.size
972 972 end += (endrev + 1) * self._io.size
973 973 length = end - start
974 974 return self._getchunk(start, length)
975 975
976 976 def _chunk(self, rev):
977 977 return decompress(self._chunkraw(rev, rev))
978 978
979 979 def _chunks(self, revs):
980 980 '''faster version of [self._chunk(rev) for rev in revs]
981 981
982 982 Assumes that revs is in ascending order.'''
983 983 if not revs:
984 984 return []
985 985 start = self.start
986 986 length = self.length
987 987 inline = self._inline
988 988 iosize = self._io.size
989 989 buffer = util.buffer
990 990
991 991 l = []
992 992 ladd = l.append
993 993
994 994 # preload the cache
995 995 try:
996 996 while True:
997 997 # ensure that the cache doesn't change out from under us
998 998 _cache = self._chunkcache
999 999 self._chunkraw(revs[0], revs[-1])
1000 1000 if _cache == self._chunkcache:
1001 1001 break
1002 1002 offset, data = _cache
1003 1003 except OverflowError:
1004 1004 # issue4215 - we can't cache a run of chunks greater than
1005 1005 # 2G on Windows
1006 1006 return [self._chunk(rev) for rev in revs]
1007 1007
1008 1008 for rev in revs:
1009 1009 chunkstart = start(rev)
1010 1010 if inline:
1011 1011 chunkstart += (rev + 1) * iosize
1012 1012 chunklength = length(rev)
1013 1013 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1014 1014
1015 1015 return l
1016 1016
1017 1017 def _chunkclear(self):
1018 1018 self._chunkcache = (0, '')
1019 1019
1020 1020 def deltaparent(self, rev):
1021 1021 """return deltaparent of the given revision"""
1022 1022 base = self.index[rev][3]
1023 1023 if base == rev:
1024 1024 return nullrev
1025 1025 elif self._generaldelta:
1026 1026 return base
1027 1027 else:
1028 1028 return rev - 1
1029 1029
1030 1030 def revdiff(self, rev1, rev2):
1031 1031 """return or calculate a delta between two revisions"""
1032 1032 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1033 1033 return str(self._chunk(rev2))
1034 1034
1035 1035 return mdiff.textdiff(self.revision(rev1),
1036 1036 self.revision(rev2))
1037 1037
1038 1038 def revision(self, nodeorrev):
1039 1039 """return an uncompressed revision of a given node or revision
1040 1040 number.
1041 1041 """
1042 1042 if isinstance(nodeorrev, int):
1043 1043 rev = nodeorrev
1044 1044 node = self.node(rev)
1045 1045 else:
1046 1046 node = nodeorrev
1047 1047 rev = None
1048 1048
1049 1049 _cache = self._cache # grab local copy of cache to avoid thread race
1050 1050 cachedrev = None
1051 1051 if node == nullid:
1052 1052 return ""
1053 1053 if _cache:
1054 1054 if _cache[0] == node:
1055 1055 return _cache[2]
1056 1056 cachedrev = _cache[1]
1057 1057
1058 1058 # look up what we need to read
1059 1059 text = None
1060 1060 if rev is None:
1061 1061 rev = self.rev(node)
1062 1062
1063 1063 # check rev flags
1064 1064 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1065 1065 raise RevlogError(_('incompatible revision flag %x') %
1066 1066 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1067 1067
1068 1068 # build delta chain
1069 1069 chain = []
1070 1070 index = self.index # for performance
1071 1071 generaldelta = self._generaldelta
1072 1072 iterrev = rev
1073 1073 e = index[iterrev]
1074 1074 while iterrev != e[3] and iterrev != cachedrev:
1075 1075 chain.append(iterrev)
1076 1076 if generaldelta:
1077 1077 iterrev = e[3]
1078 1078 else:
1079 1079 iterrev -= 1
1080 1080 e = index[iterrev]
1081 1081
1082 1082 if iterrev == cachedrev:
1083 1083 # cache hit
1084 1084 text = _cache[2]
1085 1085 else:
1086 1086 chain.append(iterrev)
1087 1087 chain.reverse()
1088 1088
1089 1089 # drop cache to save memory
1090 1090 self._cache = None
1091 1091
1092 1092 bins = self._chunks(chain)
1093 1093 if text is None:
1094 1094 text = str(bins[0])
1095 1095 bins = bins[1:]
1096 1096
1097 1097 text = mdiff.patches(text, bins)
1098 1098
1099 1099 text = self._checkhash(text, node, rev)
1100 1100
1101 1101 self._cache = (node, rev, text)
1102 1102 return text
1103 1103
1104 1104 def hash(self, text, p1, p2):
1105 1105 """Compute a node hash.
1106 1106
1107 1107 Available as a function so that subclasses can replace the hash
1108 1108 as needed.
1109 1109 """
1110 1110 return hash(text, p1, p2)
1111 1111
1112 1112 def _checkhash(self, text, node, rev):
1113 1113 p1, p2 = self.parents(node)
1114 1114 self.checkhash(text, p1, p2, node, rev)
1115 1115 return text
1116 1116
1117 1117 def checkhash(self, text, p1, p2, node, rev=None):
1118 1118 if node != self.hash(text, p1, p2):
1119 1119 revornode = rev
1120 1120 if revornode is None:
1121 1121 revornode = templatefilters.short(hex(node))
1122 1122 raise RevlogError(_("integrity check failed on %s:%s")
1123 1123 % (self.indexfile, revornode))
1124 1124
1125 1125 def checkinlinesize(self, tr, fp=None):
1126 1126 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1127 1127 return
1128 1128
1129 1129 trinfo = tr.find(self.indexfile)
1130 1130 if trinfo is None:
1131 1131 raise RevlogError(_("%s not found in the transaction")
1132 1132 % self.indexfile)
1133 1133
1134 1134 trindex = trinfo[2]
1135 1135 if trindex is not None:
1136 1136 dataoff = self.start(trindex)
1137 1137 else:
1138 1138 # revlog was stripped at start of transaction, use all leftover data
1139 1139 trindex = len(self) - 1
1140 1140 dataoff = self.end(-2)
1141 1141
1142 1142 tr.add(self.datafile, dataoff)
1143 1143
1144 1144 if fp:
1145 1145 fp.flush()
1146 1146 fp.close()
1147 1147
1148 1148 df = self.opener(self.datafile, 'w')
1149 1149 try:
1150 1150 for r in self:
1151 1151 df.write(self._chunkraw(r, r))
1152 1152 finally:
1153 1153 df.close()
1154 1154
1155 1155 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1156 1156 self.version &= ~(REVLOGNGINLINEDATA)
1157 1157 self._inline = False
1158 1158 for i in self:
1159 1159 e = self._io.packentry(self.index[i], self.node, self.version, i)
1160 1160 fp.write(e)
1161 1161
1162 1162 # if we don't call close, the temp file will never replace the
1163 1163 # real index
1164 1164 fp.close()
1165 1165
1166 1166 tr.replace(self.indexfile, trindex * self._io.size)
1167 1167 self._chunkclear()
1168 1168
1169 1169 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1170 1170 node=None):
1171 1171 """add a revision to the log
1172 1172
1173 1173 text - the revision data to add
1174 1174 transaction - the transaction object used for rollback
1175 1175 link - the linkrev data to add
1176 1176 p1, p2 - the parent nodeids of the revision
1177 1177 cachedelta - an optional precomputed delta
1178 1178 node - nodeid of revision; typically node is not specified, and it is
1179 1179 computed by default as hash(text, p1, p2), however subclasses might
1180 1180 use different hashing method (and override checkhash() in such case)
1181 1181 """
1182 1182 if link == nullrev:
1183 1183 raise RevlogError(_("attempted to add linkrev -1 to %s")
1184 1184 % self.indexfile)
1185 1185
1186 1186 if len(text) > _maxentrysize:
1187 1187 raise RevlogError(
1188 1188 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1189 1189 % (self.indexfile, len(text)))
1190 1190
1191 1191 node = node or self.hash(text, p1, p2)
1192 1192 if node in self.nodemap:
1193 1193 return node
1194 1194
1195 1195 dfh = None
1196 1196 if not self._inline:
1197 1197 dfh = self.opener(self.datafile, "a")
1198 1198 ifh = self.opener(self.indexfile, "a+")
1199 1199 try:
1200 1200 return self._addrevision(node, text, transaction, link, p1, p2,
1201 1201 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1202 1202 finally:
1203 1203 if dfh:
1204 1204 dfh.close()
1205 1205 ifh.close()
1206 1206
1207 1207 def compress(self, text):
1208 1208 """ generate a possibly-compressed representation of text """
1209 1209 if not text:
1210 1210 return ("", text)
1211 1211 l = len(text)
1212 1212 bin = None
1213 1213 if l < 44:
1214 1214 pass
1215 1215 elif l > 1000000:
1216 1216 # zlib makes an internal copy, thus doubling memory usage for
1217 1217 # large files, so lets do this in pieces
1218 1218 z = zlib.compressobj()
1219 1219 p = []
1220 1220 pos = 0
1221 1221 while pos < l:
1222 1222 pos2 = pos + 2**20
1223 1223 p.append(z.compress(text[pos:pos2]))
1224 1224 pos = pos2
1225 1225 p.append(z.flush())
1226 1226 if sum(map(len, p)) < l:
1227 1227 bin = "".join(p)
1228 1228 else:
1229 1229 bin = _compress(text)
1230 1230 if bin is None or len(bin) > l:
1231 1231 if text[0] == '\0':
1232 1232 return ("", text)
1233 1233 return ('u', text)
1234 1234 return ("", bin)
1235 1235
1236 1236 def _isgooddelta(self, d, textlen):
1237 1237 """Returns True if the given delta is good. Good means that it is within
1238 1238 the disk span, disk size, and chain length bounds that we know to be
1239 1239 performant."""
1240 1240 if d is None:
1241 1241 return False
1242 1242
1243 1243 # - 'dist' is the distance from the base revision -- bounding it limits
1244 1244 # the amount of I/O we need to do.
1245 1245 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1246 1246 # to apply -- bounding it limits the amount of CPU we consume.
1247 1247 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1248 1248 if (dist > textlen * 4 or l > textlen or
1249 1249 compresseddeltalen > textlen * 2 or
1250 1250 (self._maxchainlen and chainlen > self._maxchainlen)):
1251 1251 return False
1252 1252
1253 1253 return True
1254 1254
1255 1255 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1256 1256 cachedelta, ifh, dfh):
1257 1257 """internal function to add revisions to the log
1258 1258
1259 1259 see addrevision for argument descriptions.
1260 1260 invariants:
1261 1261 - text is optional (can be None); if not set, cachedelta must be set.
1262 1262 if both are set, they must correspond to each other.
1263 1263 """
1264 1264 btext = [text]
1265 1265 def buildtext():
1266 1266 if btext[0] is not None:
1267 1267 return btext[0]
1268 1268 # flush any pending writes here so we can read it in revision
1269 1269 if dfh:
1270 1270 dfh.flush()
1271 1271 ifh.flush()
1272 1272 baserev = cachedelta[0]
1273 1273 delta = cachedelta[1]
1274 1274 # special case deltas which replace entire base; no need to decode
1275 1275 # base revision. this neatly avoids censored bases, which throw when
1276 1276 # they're decoded.
1277 1277 hlen = struct.calcsize(">lll")
1278 1278 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1279 1279 len(delta) - hlen):
1280 1280 btext[0] = delta[hlen:]
1281 1281 else:
1282 1282 basetext = self.revision(self.node(baserev))
1283 1283 btext[0] = mdiff.patch(basetext, delta)
1284 1284 try:
1285 1285 self.checkhash(btext[0], p1, p2, node)
1286 1286 if flags & REVIDX_ISCENSORED:
1287 1287 raise RevlogError(_('node %s is not censored') % node)
1288 1288 except CensoredNodeError:
1289 1289 # must pass the censored index flag to add censored revisions
1290 1290 if not flags & REVIDX_ISCENSORED:
1291 1291 raise
1292 1292 return btext[0]
1293 1293
1294 1294 def builddelta(rev):
1295 1295 # can we use the cached delta?
1296 1296 if cachedelta and cachedelta[0] == rev:
1297 1297 delta = cachedelta[1]
1298 1298 else:
1299 1299 t = buildtext()
1300 1300 if self.iscensored(rev):
1301 1301 # deltas based on a censored revision must replace the
1302 1302 # full content in one patch, so delta works everywhere
1303 1303 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1304 1304 delta = header + t
1305 1305 else:
1306 1306 ptext = self.revision(self.node(rev))
1307 1307 delta = mdiff.textdiff(ptext, t)
1308 1308 data = self.compress(delta)
1309 1309 l = len(data[1]) + len(data[0])
1310 1310 if basecache[0] == rev:
1311 1311 chainbase = basecache[1]
1312 1312 else:
1313 1313 chainbase = self.chainbase(rev)
1314 1314 dist = l + offset - self.start(chainbase)
1315 1315 if self._generaldelta:
1316 1316 base = rev
1317 1317 else:
1318 1318 base = chainbase
1319 1319 chainlen, compresseddeltalen = self._chaininfo(rev)
1320 1320 chainlen += 1
1321 1321 compresseddeltalen += l
1322 1322 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1323 1323
1324 1324 curr = len(self)
1325 1325 prev = curr - 1
1326 1326 base = chainbase = curr
1327 1327 chainlen = None
1328 1328 offset = self.end(prev)
1329 1329 d = None
1330 1330 if self._basecache is None:
1331 1331 self._basecache = (prev, self.chainbase(prev))
1332 1332 basecache = self._basecache
1333 1333 p1r, p2r = self.rev(p1), self.rev(p2)
1334 1334
1335 1335 # full versions are inserted when the needed deltas
1336 1336 # become comparable to the uncompressed text
1337 1337 if text is None:
1338 1338 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1339 1339 cachedelta[1])
1340 1340 else:
1341 1341 textlen = len(text)
1342 1342
1343 1343 # should we try to build a delta?
1344 1344 if prev != nullrev:
1345 1345 if self._generaldelta:
1346 if p1r >= basecache[1]:
1347 d = builddelta(p1r)
1348 elif p2r >= basecache[1]:
1349 d = builddelta(p2r)
1350 else:
1346 # Pick whichever parent is closer to us (to minimize the
1347 # chance of having to build a fulltext). Since
1348 # nullrev == -1, any non-merge commit will always pick p1r.
1349 drev = p2r if p2r > p1r else p1r
1350 d = builddelta(drev)
1351 # If the chosen delta will result in us making a full text,
1352 # give it one last try against prev.
1353 if drev != prev and not self._isgooddelta(d, textlen):
1351 1354 d = builddelta(prev)
1352 1355 else:
1353 1356 d = builddelta(prev)
1354 1357 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1355 1358
1356 1359 if not self._isgooddelta(d, textlen):
1357 1360 text = buildtext()
1358 1361 data = self.compress(text)
1359 1362 l = len(data[1]) + len(data[0])
1360 1363 base = chainbase = curr
1361 1364
1362 1365 e = (offset_type(offset, flags), l, textlen,
1363 1366 base, link, p1r, p2r, node)
1364 1367 self.index.insert(-1, e)
1365 1368 self.nodemap[node] = curr
1366 1369
1367 1370 entry = self._io.packentry(e, self.node, self.version, curr)
1368 1371 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1369 1372
1370 1373 if type(text) == str: # only accept immutable objects
1371 1374 self._cache = (node, curr, text)
1372 1375 self._basecache = (curr, chainbase)
1373 1376 return node
1374 1377
1375 1378 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1376 1379 curr = len(self) - 1
1377 1380 if not self._inline:
1378 1381 transaction.add(self.datafile, offset)
1379 1382 transaction.add(self.indexfile, curr * len(entry))
1380 1383 if data[0]:
1381 1384 dfh.write(data[0])
1382 1385 dfh.write(data[1])
1383 1386 dfh.flush()
1384 1387 ifh.write(entry)
1385 1388 else:
1386 1389 offset += curr * self._io.size
1387 1390 transaction.add(self.indexfile, offset, curr)
1388 1391 ifh.write(entry)
1389 1392 ifh.write(data[0])
1390 1393 ifh.write(data[1])
1391 1394 self.checkinlinesize(transaction, ifh)
1392 1395
1393 1396 def addgroup(self, bundle, linkmapper, transaction, addrevisioncb=None):
1394 1397 """
1395 1398 add a delta group
1396 1399
1397 1400 given a set of deltas, add them to the revision log. the
1398 1401 first delta is against its parent, which should be in our
1399 1402 log, the rest are against the previous delta.
1400 1403
1401 1404 If ``addrevisioncb`` is defined, it will be called with arguments of
1402 1405 this revlog and the node that was added.
1403 1406 """
1404 1407
1405 1408 # track the base of the current delta log
1406 1409 content = []
1407 1410 node = None
1408 1411
1409 1412 r = len(self)
1410 1413 end = 0
1411 1414 if r:
1412 1415 end = self.end(r - 1)
1413 1416 ifh = self.opener(self.indexfile, "a+")
1414 1417 isize = r * self._io.size
1415 1418 if self._inline:
1416 1419 transaction.add(self.indexfile, end + isize, r)
1417 1420 dfh = None
1418 1421 else:
1419 1422 transaction.add(self.indexfile, isize, r)
1420 1423 transaction.add(self.datafile, end)
1421 1424 dfh = self.opener(self.datafile, "a")
1422 1425 def flush():
1423 1426 if dfh:
1424 1427 dfh.flush()
1425 1428 ifh.flush()
1426 1429 try:
1427 1430 # loop through our set of deltas
1428 1431 chain = None
1429 1432 while True:
1430 1433 chunkdata = bundle.deltachunk(chain)
1431 1434 if not chunkdata:
1432 1435 break
1433 1436 node = chunkdata['node']
1434 1437 p1 = chunkdata['p1']
1435 1438 p2 = chunkdata['p2']
1436 1439 cs = chunkdata['cs']
1437 1440 deltabase = chunkdata['deltabase']
1438 1441 delta = chunkdata['delta']
1439 1442
1440 1443 content.append(node)
1441 1444
1442 1445 link = linkmapper(cs)
1443 1446 if node in self.nodemap:
1444 1447 # this can happen if two branches make the same change
1445 1448 chain = node
1446 1449 continue
1447 1450
1448 1451 for p in (p1, p2):
1449 1452 if p not in self.nodemap:
1450 1453 raise LookupError(p, self.indexfile,
1451 1454 _('unknown parent'))
1452 1455
1453 1456 if deltabase not in self.nodemap:
1454 1457 raise LookupError(deltabase, self.indexfile,
1455 1458 _('unknown delta base'))
1456 1459
1457 1460 baserev = self.rev(deltabase)
1458 1461
1459 1462 if baserev != nullrev and self.iscensored(baserev):
1460 1463 # if base is censored, delta must be full replacement in a
1461 1464 # single patch operation
1462 1465 hlen = struct.calcsize(">lll")
1463 1466 oldlen = self.rawsize(baserev)
1464 1467 newlen = len(delta) - hlen
1465 1468 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1466 1469 raise error.CensoredBaseError(self.indexfile,
1467 1470 self.node(baserev))
1468 1471
1469 1472 flags = REVIDX_DEFAULT_FLAGS
1470 1473 if self._peek_iscensored(baserev, delta, flush):
1471 1474 flags |= REVIDX_ISCENSORED
1472 1475
1473 1476 chain = self._addrevision(node, None, transaction, link,
1474 1477 p1, p2, flags, (baserev, delta),
1475 1478 ifh, dfh)
1476 1479
1477 1480 if addrevisioncb:
1478 1481 # Data for added revision can't be read unless flushed
1479 1482 # because _loadchunk always opensa new file handle and
1480 1483 # there is no guarantee data was actually written yet.
1481 1484 flush()
1482 1485 addrevisioncb(self, chain)
1483 1486
1484 1487 if not dfh and not self._inline:
1485 1488 # addrevision switched from inline to conventional
1486 1489 # reopen the index
1487 1490 ifh.close()
1488 1491 dfh = self.opener(self.datafile, "a")
1489 1492 ifh = self.opener(self.indexfile, "a")
1490 1493 finally:
1491 1494 if dfh:
1492 1495 dfh.close()
1493 1496 ifh.close()
1494 1497
1495 1498 return content
1496 1499
1497 1500 def iscensored(self, rev):
1498 1501 """Check if a file revision is censored."""
1499 1502 return False
1500 1503
1501 1504 def _peek_iscensored(self, baserev, delta, flush):
1502 1505 """Quickly check if a delta produces a censored revision."""
1503 1506 return False
1504 1507
1505 1508 def getstrippoint(self, minlink):
1506 1509 """find the minimum rev that must be stripped to strip the linkrev
1507 1510
1508 1511 Returns a tuple containing the minimum rev and a set of all revs that
1509 1512 have linkrevs that will be broken by this strip.
1510 1513 """
1511 1514 brokenrevs = set()
1512 1515 strippoint = len(self)
1513 1516
1514 1517 heads = {}
1515 1518 futurelargelinkrevs = set()
1516 1519 for head in self.headrevs():
1517 1520 headlinkrev = self.linkrev(head)
1518 1521 heads[head] = headlinkrev
1519 1522 if headlinkrev >= minlink:
1520 1523 futurelargelinkrevs.add(headlinkrev)
1521 1524
1522 1525 # This algorithm involves walking down the rev graph, starting at the
1523 1526 # heads. Since the revs are topologically sorted according to linkrev,
1524 1527 # once all head linkrevs are below the minlink, we know there are
1525 1528 # no more revs that could have a linkrev greater than minlink.
1526 1529 # So we can stop walking.
1527 1530 while futurelargelinkrevs:
1528 1531 strippoint -= 1
1529 1532 linkrev = heads.pop(strippoint)
1530 1533
1531 1534 if linkrev < minlink:
1532 1535 brokenrevs.add(strippoint)
1533 1536 else:
1534 1537 futurelargelinkrevs.remove(linkrev)
1535 1538
1536 1539 for p in self.parentrevs(strippoint):
1537 1540 if p != nullrev:
1538 1541 plinkrev = self.linkrev(p)
1539 1542 heads[p] = plinkrev
1540 1543 if plinkrev >= minlink:
1541 1544 futurelargelinkrevs.add(plinkrev)
1542 1545
1543 1546 return strippoint, brokenrevs
1544 1547
1545 1548 def strip(self, minlink, transaction):
1546 1549 """truncate the revlog on the first revision with a linkrev >= minlink
1547 1550
1548 1551 This function is called when we're stripping revision minlink and
1549 1552 its descendants from the repository.
1550 1553
1551 1554 We have to remove all revisions with linkrev >= minlink, because
1552 1555 the equivalent changelog revisions will be renumbered after the
1553 1556 strip.
1554 1557
1555 1558 So we truncate the revlog on the first of these revisions, and
1556 1559 trust that the caller has saved the revisions that shouldn't be
1557 1560 removed and that it'll re-add them after this truncation.
1558 1561 """
1559 1562 if len(self) == 0:
1560 1563 return
1561 1564
1562 1565 rev, _ = self.getstrippoint(minlink)
1563 1566 if rev == len(self):
1564 1567 return
1565 1568
1566 1569 # first truncate the files on disk
1567 1570 end = self.start(rev)
1568 1571 if not self._inline:
1569 1572 transaction.add(self.datafile, end)
1570 1573 end = rev * self._io.size
1571 1574 else:
1572 1575 end += rev * self._io.size
1573 1576
1574 1577 transaction.add(self.indexfile, end)
1575 1578
1576 1579 # then reset internal state in memory to forget those revisions
1577 1580 self._cache = None
1578 1581 self._chaininfocache = {}
1579 1582 self._chunkclear()
1580 1583 for x in xrange(rev, len(self)):
1581 1584 del self.nodemap[self.node(x)]
1582 1585
1583 1586 del self.index[rev:-1]
1584 1587
1585 1588 def checksize(self):
1586 1589 expected = 0
1587 1590 if len(self):
1588 1591 expected = max(0, self.end(len(self) - 1))
1589 1592
1590 1593 try:
1591 1594 f = self.opener(self.datafile)
1592 1595 f.seek(0, 2)
1593 1596 actual = f.tell()
1594 1597 f.close()
1595 1598 dd = actual - expected
1596 1599 except IOError as inst:
1597 1600 if inst.errno != errno.ENOENT:
1598 1601 raise
1599 1602 dd = 0
1600 1603
1601 1604 try:
1602 1605 f = self.opener(self.indexfile)
1603 1606 f.seek(0, 2)
1604 1607 actual = f.tell()
1605 1608 f.close()
1606 1609 s = self._io.size
1607 1610 i = max(0, actual // s)
1608 1611 di = actual - (i * s)
1609 1612 if self._inline:
1610 1613 databytes = 0
1611 1614 for r in self:
1612 1615 databytes += max(0, self.length(r))
1613 1616 dd = 0
1614 1617 di = actual - len(self) * s - databytes
1615 1618 except IOError as inst:
1616 1619 if inst.errno != errno.ENOENT:
1617 1620 raise
1618 1621 di = 0
1619 1622
1620 1623 return (dd, di)
1621 1624
1622 1625 def files(self):
1623 1626 res = [self.indexfile]
1624 1627 if not self._inline:
1625 1628 res.append(self.datafile)
1626 1629 return res
General Comments 0
You need to be logged in to leave comments. Login now