##// END OF EJS Templates
revlog: verify censored flag when hashing added revision fulltext...
Mike Edgar -
r23857:8a3c132f default
parent child Browse files
Show More
@@ -1,1537 +1,1541 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
38 38 REVIDX_DEFAULT_FLAGS = 0
39 39 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47 CensoredNodeError = error.CensoredNodeError
48 48
49 49 def getoffset(q):
50 50 return int(q >> 16)
51 51
52 52 def gettype(q):
53 53 return int(q & 0xFFFF)
54 54
55 55 def offset_type(offset, type):
56 56 return long(long(offset) << 16 | type)
57 57
58 58 _nullhash = _sha(nullid)
59 59
60 60 def hash(text, p1, p2):
61 61 """generate a hash from the given text and its parent hashes
62 62
63 63 This hash combines both the current file contents and its history
64 64 in a manner that makes it easy to distinguish nodes with the same
65 65 content in the revision graph.
66 66 """
67 67 # As of now, if one of the parent node is null, p2 is null
68 68 if p2 == nullid:
69 69 # deep copy of a hash is faster than creating one
70 70 s = _nullhash.copy()
71 71 s.update(p1)
72 72 else:
73 73 # none of the parent nodes are nullid
74 74 l = [p1, p2]
75 75 l.sort()
76 76 s = _sha(l[0])
77 77 s.update(l[1])
78 78 s.update(text)
79 79 return s.digest()
80 80
81 81 def decompress(bin):
82 82 """ decompress the given input """
83 83 if not bin:
84 84 return bin
85 85 t = bin[0]
86 86 if t == '\0':
87 87 return bin
88 88 if t == 'x':
89 89 try:
90 90 return _decompress(bin)
91 91 except zlib.error, e:
92 92 raise RevlogError(_("revlog decompress error: %s") % str(e))
93 93 if t == 'u':
94 94 return bin[1:]
95 95 raise RevlogError(_("unknown compression type %r") % t)
96 96
97 97 # index v0:
98 98 # 4 bytes: offset
99 99 # 4 bytes: compressed length
100 100 # 4 bytes: base rev
101 101 # 4 bytes: link rev
102 102 # 32 bytes: parent 1 nodeid
103 103 # 32 bytes: parent 2 nodeid
104 104 # 32 bytes: nodeid
105 105 indexformatv0 = ">4l20s20s20s"
106 106 v0shaoffset = 56
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 ngshaoffset = 32
153 153 versionformat = ">I"
154 154
155 155 class revlogio(object):
156 156 def __init__(self):
157 157 self.size = struct.calcsize(indexformatng)
158 158
159 159 def parseindex(self, data, inline):
160 160 # call the C implementation to parse the index data
161 161 index, cache = parsers.parse_index2(data, inline)
162 162 return index, getattr(index, 'nodemap', None), cache
163 163
164 164 def packentry(self, entry, node, version, rev):
165 165 p = _pack(indexformatng, *entry)
166 166 if rev == 0:
167 167 p = _pack(versionformat, version) + p[4:]
168 168 return p
169 169
170 170 class revlog(object):
171 171 """
172 172 the underlying revision storage object
173 173
174 174 A revlog consists of two parts, an index and the revision data.
175 175
176 176 The index is a file with a fixed record size containing
177 177 information on each revision, including its nodeid (hash), the
178 178 nodeids of its parents, the position and offset of its data within
179 179 the data file, and the revision it's based on. Finally, each entry
180 180 contains a linkrev entry that can serve as a pointer to external
181 181 data.
182 182
183 183 The revision data itself is a linear collection of data chunks.
184 184 Each chunk represents a revision and is usually represented as a
185 185 delta against the previous chunk. To bound lookup time, runs of
186 186 deltas are limited to about 2 times the length of the original
187 187 version data. This makes retrieval of a version proportional to
188 188 its size, or O(1) relative to the number of revisions.
189 189
190 190 Both pieces of the revlog are written to in an append-only
191 191 fashion, which means we never need to rewrite a file to insert or
192 192 remove data, and can use some simple techniques to avoid the need
193 193 for locking while reading.
194 194 """
195 195 def __init__(self, opener, indexfile):
196 196 """
197 197 create a revlog object
198 198
199 199 opener is a function that abstracts the file opening operation
200 200 and can be used to implement COW semantics or the like.
201 201 """
202 202 self.indexfile = indexfile
203 203 self.datafile = indexfile[:-2] + ".d"
204 204 self.opener = opener
205 205 self._cache = None
206 206 self._basecache = None
207 207 self._chunkcache = (0, '')
208 208 self._chunkcachesize = 65536
209 209 self._maxchainlen = None
210 210 self.index = []
211 211 self._pcache = {}
212 212 self._nodecache = {nullid: nullrev}
213 213 self._nodepos = None
214 214
215 215 v = REVLOG_DEFAULT_VERSION
216 216 opts = getattr(opener, 'options', None)
217 217 if opts is not None:
218 218 if 'revlogv1' in opts:
219 219 if 'generaldelta' in opts:
220 220 v |= REVLOGGENERALDELTA
221 221 else:
222 222 v = 0
223 223 if 'chunkcachesize' in opts:
224 224 self._chunkcachesize = opts['chunkcachesize']
225 225 if 'maxchainlen' in opts:
226 226 self._maxchainlen = opts['maxchainlen']
227 227
228 228 if self._chunkcachesize <= 0:
229 229 raise RevlogError(_('revlog chunk cache size %r is not greater '
230 230 'than 0') % self._chunkcachesize)
231 231 elif self._chunkcachesize & (self._chunkcachesize - 1):
232 232 raise RevlogError(_('revlog chunk cache size %r is not a power '
233 233 'of 2') % self._chunkcachesize)
234 234
235 235 i = ''
236 236 self._initempty = True
237 237 try:
238 238 f = self.opener(self.indexfile)
239 239 i = f.read()
240 240 f.close()
241 241 if len(i) > 0:
242 242 v = struct.unpack(versionformat, i[:4])[0]
243 243 self._initempty = False
244 244 except IOError, inst:
245 245 if inst.errno != errno.ENOENT:
246 246 raise
247 247
248 248 self.version = v
249 249 self._inline = v & REVLOGNGINLINEDATA
250 250 self._generaldelta = v & REVLOGGENERALDELTA
251 251 flags = v & ~0xFFFF
252 252 fmt = v & 0xFFFF
253 253 if fmt == REVLOGV0 and flags:
254 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 255 % (self.indexfile, flags >> 16))
256 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt > REVLOGNG:
260 260 raise RevlogError(_("index %s unknown format %d")
261 261 % (self.indexfile, fmt))
262 262
263 263 self._io = revlogio()
264 264 if self.version == REVLOGV0:
265 265 self._io = revlogoldio()
266 266 try:
267 267 d = self._io.parseindex(i, self._inline)
268 268 except (ValueError, IndexError):
269 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 270 self.index, nodemap, self._chunkcache = d
271 271 if nodemap is not None:
272 272 self.nodemap = self._nodecache = nodemap
273 273 if not self._chunkcache:
274 274 self._chunkclear()
275 275 # revnum -> (chain-length, sum-delta-length)
276 276 self._chaininfocache = {}
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __len__(self):
281 281 return len(self.index) - 1
282 282 def __iter__(self):
283 283 return iter(xrange(len(self)))
284 284 def revs(self, start=0, stop=None):
285 285 """iterate over all rev in this revlog (from start to stop)"""
286 286 step = 1
287 287 if stop is not None:
288 288 if start > stop:
289 289 step = -1
290 290 stop += step
291 291 else:
292 292 stop = len(self)
293 293 return xrange(start, stop, step)
294 294
295 295 @util.propertycache
296 296 def nodemap(self):
297 297 self.rev(self.node(0))
298 298 return self._nodecache
299 299
300 300 def hasnode(self, node):
301 301 try:
302 302 self.rev(node)
303 303 return True
304 304 except KeyError:
305 305 return False
306 306
307 307 def clearcaches(self):
308 308 try:
309 309 self._nodecache.clearcaches()
310 310 except AttributeError:
311 311 self._nodecache = {nullid: nullrev}
312 312 self._nodepos = None
313 313
314 314 def rev(self, node):
315 315 try:
316 316 return self._nodecache[node]
317 317 except TypeError:
318 318 raise
319 319 except RevlogError:
320 320 # parsers.c radix tree lookup failed
321 321 raise LookupError(node, self.indexfile, _('no node'))
322 322 except KeyError:
323 323 # pure python cache lookup failed
324 324 n = self._nodecache
325 325 i = self.index
326 326 p = self._nodepos
327 327 if p is None:
328 328 p = len(i) - 2
329 329 for r in xrange(p, -1, -1):
330 330 v = i[r][7]
331 331 n[v] = r
332 332 if v == node:
333 333 self._nodepos = r - 1
334 334 return r
335 335 raise LookupError(node, self.indexfile, _('no node'))
336 336
337 337 def node(self, rev):
338 338 return self.index[rev][7]
339 339 def linkrev(self, rev):
340 340 return self.index[rev][4]
341 341 def parents(self, node):
342 342 i = self.index
343 343 d = i[self.rev(node)]
344 344 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
345 345 def parentrevs(self, rev):
346 346 return self.index[rev][5:7]
347 347 def start(self, rev):
348 348 return int(self.index[rev][0] >> 16)
349 349 def end(self, rev):
350 350 return self.start(rev) + self.length(rev)
351 351 def length(self, rev):
352 352 return self.index[rev][1]
353 353 def chainbase(self, rev):
354 354 index = self.index
355 355 base = index[rev][3]
356 356 while base != rev:
357 357 rev = base
358 358 base = index[rev][3]
359 359 return base
360 360 def chainlen(self, rev):
361 361 return self._chaininfo(rev)[0]
362 362
363 363 def _chaininfo(self, rev):
364 364 chaininfocache = self._chaininfocache
365 365 if rev in chaininfocache:
366 366 return chaininfocache[rev]
367 367 index = self.index
368 368 generaldelta = self._generaldelta
369 369 iterrev = rev
370 370 e = index[iterrev]
371 371 clen = 0
372 372 compresseddeltalen = 0
373 373 while iterrev != e[3]:
374 374 clen += 1
375 375 compresseddeltalen += e[1]
376 376 if generaldelta:
377 377 iterrev = e[3]
378 378 else:
379 379 iterrev -= 1
380 380 if iterrev in chaininfocache:
381 381 t = chaininfocache[iterrev]
382 382 clen += t[0]
383 383 compresseddeltalen += t[1]
384 384 break
385 385 e = index[iterrev]
386 386 else:
387 387 # Add text length of base since decompressing that also takes
388 388 # work. For cache hits the length is already included.
389 389 compresseddeltalen += e[1]
390 390 r = (clen, compresseddeltalen)
391 391 chaininfocache[rev] = r
392 392 return r
393 393
394 394 def flags(self, rev):
395 395 return self.index[rev][0] & 0xFFFF
396 396 def rawsize(self, rev):
397 397 """return the length of the uncompressed text for a given revision"""
398 398 l = self.index[rev][2]
399 399 if l >= 0:
400 400 return l
401 401
402 402 t = self.revision(self.node(rev))
403 403 return len(t)
404 404 size = rawsize
405 405
406 406 def ancestors(self, revs, stoprev=0, inclusive=False):
407 407 """Generate the ancestors of 'revs' in reverse topological order.
408 408 Does not generate revs lower than stoprev.
409 409
410 410 See the documentation for ancestor.lazyancestors for more details."""
411 411
412 412 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
413 413 inclusive=inclusive)
414 414
415 415 def descendants(self, revs):
416 416 """Generate the descendants of 'revs' in revision order.
417 417
418 418 Yield a sequence of revision numbers starting with a child of
419 419 some rev in revs, i.e., each revision is *not* considered a
420 420 descendant of itself. Results are ordered by revision number (a
421 421 topological sort)."""
422 422 first = min(revs)
423 423 if first == nullrev:
424 424 for i in self:
425 425 yield i
426 426 return
427 427
428 428 seen = set(revs)
429 429 for i in self.revs(start=first + 1):
430 430 for x in self.parentrevs(i):
431 431 if x != nullrev and x in seen:
432 432 seen.add(i)
433 433 yield i
434 434 break
435 435
436 436 def findcommonmissing(self, common=None, heads=None):
437 437 """Return a tuple of the ancestors of common and the ancestors of heads
438 438 that are not ancestors of common. In revset terminology, we return the
439 439 tuple:
440 440
441 441 ::common, (::heads) - (::common)
442 442
443 443 The list is sorted by revision number, meaning it is
444 444 topologically sorted.
445 445
446 446 'heads' and 'common' are both lists of node IDs. If heads is
447 447 not supplied, uses all of the revlog's heads. If common is not
448 448 supplied, uses nullid."""
449 449 if common is None:
450 450 common = [nullid]
451 451 if heads is None:
452 452 heads = self.heads()
453 453
454 454 common = [self.rev(n) for n in common]
455 455 heads = [self.rev(n) for n in heads]
456 456
457 457 # we want the ancestors, but inclusive
458 458 class lazyset(object):
459 459 def __init__(self, lazyvalues):
460 460 self.addedvalues = set()
461 461 self.lazyvalues = lazyvalues
462 462
463 463 def __contains__(self, value):
464 464 return value in self.addedvalues or value in self.lazyvalues
465 465
466 466 def __iter__(self):
467 467 added = self.addedvalues
468 468 for r in added:
469 469 yield r
470 470 for r in self.lazyvalues:
471 471 if not r in added:
472 472 yield r
473 473
474 474 def add(self, value):
475 475 self.addedvalues.add(value)
476 476
477 477 def update(self, values):
478 478 self.addedvalues.update(values)
479 479
480 480 has = lazyset(self.ancestors(common))
481 481 has.add(nullrev)
482 482 has.update(common)
483 483
484 484 # take all ancestors from heads that aren't in has
485 485 missing = set()
486 486 visit = util.deque(r for r in heads if r not in has)
487 487 while visit:
488 488 r = visit.popleft()
489 489 if r in missing:
490 490 continue
491 491 else:
492 492 missing.add(r)
493 493 for p in self.parentrevs(r):
494 494 if p not in has:
495 495 visit.append(p)
496 496 missing = list(missing)
497 497 missing.sort()
498 498 return has, [self.node(r) for r in missing]
499 499
500 500 def incrementalmissingrevs(self, common=None):
501 501 """Return an object that can be used to incrementally compute the
502 502 revision numbers of the ancestors of arbitrary sets that are not
503 503 ancestors of common. This is an ancestor.incrementalmissingancestors
504 504 object.
505 505
506 506 'common' is a list of revision numbers. If common is not supplied, uses
507 507 nullrev.
508 508 """
509 509 if common is None:
510 510 common = [nullrev]
511 511
512 512 return ancestor.incrementalmissingancestors(self.parentrevs, common)
513 513
514 514 def findmissingrevs(self, common=None, heads=None):
515 515 """Return the revision numbers of the ancestors of heads that
516 516 are not ancestors of common.
517 517
518 518 More specifically, return a list of revision numbers corresponding to
519 519 nodes N such that every N satisfies the following constraints:
520 520
521 521 1. N is an ancestor of some node in 'heads'
522 522 2. N is not an ancestor of any node in 'common'
523 523
524 524 The list is sorted by revision number, meaning it is
525 525 topologically sorted.
526 526
527 527 'heads' and 'common' are both lists of revision numbers. If heads is
528 528 not supplied, uses all of the revlog's heads. If common is not
529 529 supplied, uses nullid."""
530 530 if common is None:
531 531 common = [nullrev]
532 532 if heads is None:
533 533 heads = self.headrevs()
534 534
535 535 inc = self.incrementalmissingrevs(common=common)
536 536 return inc.missingancestors(heads)
537 537
538 538 def findmissing(self, common=None, heads=None):
539 539 """Return the ancestors of heads that are not ancestors of common.
540 540
541 541 More specifically, return a list of nodes N such that every N
542 542 satisfies the following constraints:
543 543
544 544 1. N is an ancestor of some node in 'heads'
545 545 2. N is not an ancestor of any node in 'common'
546 546
547 547 The list is sorted by revision number, meaning it is
548 548 topologically sorted.
549 549
550 550 'heads' and 'common' are both lists of node IDs. If heads is
551 551 not supplied, uses all of the revlog's heads. If common is not
552 552 supplied, uses nullid."""
553 553 if common is None:
554 554 common = [nullid]
555 555 if heads is None:
556 556 heads = self.heads()
557 557
558 558 common = [self.rev(n) for n in common]
559 559 heads = [self.rev(n) for n in heads]
560 560
561 561 inc = self.incrementalmissingrevs(common=common)
562 562 return [self.node(r) for r in inc.missingancestors(heads)]
563 563
564 564 def nodesbetween(self, roots=None, heads=None):
565 565 """Return a topological path from 'roots' to 'heads'.
566 566
567 567 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
568 568 topologically sorted list of all nodes N that satisfy both of
569 569 these constraints:
570 570
571 571 1. N is a descendant of some node in 'roots'
572 572 2. N is an ancestor of some node in 'heads'
573 573
574 574 Every node is considered to be both a descendant and an ancestor
575 575 of itself, so every reachable node in 'roots' and 'heads' will be
576 576 included in 'nodes'.
577 577
578 578 'outroots' is the list of reachable nodes in 'roots', i.e., the
579 579 subset of 'roots' that is returned in 'nodes'. Likewise,
580 580 'outheads' is the subset of 'heads' that is also in 'nodes'.
581 581
582 582 'roots' and 'heads' are both lists of node IDs. If 'roots' is
583 583 unspecified, uses nullid as the only root. If 'heads' is
584 584 unspecified, uses list of all of the revlog's heads."""
585 585 nonodes = ([], [], [])
586 586 if roots is not None:
587 587 roots = list(roots)
588 588 if not roots:
589 589 return nonodes
590 590 lowestrev = min([self.rev(n) for n in roots])
591 591 else:
592 592 roots = [nullid] # Everybody's a descendant of nullid
593 593 lowestrev = nullrev
594 594 if (lowestrev == nullrev) and (heads is None):
595 595 # We want _all_ the nodes!
596 596 return ([self.node(r) for r in self], [nullid], list(self.heads()))
597 597 if heads is None:
598 598 # All nodes are ancestors, so the latest ancestor is the last
599 599 # node.
600 600 highestrev = len(self) - 1
601 601 # Set ancestors to None to signal that every node is an ancestor.
602 602 ancestors = None
603 603 # Set heads to an empty dictionary for later discovery of heads
604 604 heads = {}
605 605 else:
606 606 heads = list(heads)
607 607 if not heads:
608 608 return nonodes
609 609 ancestors = set()
610 610 # Turn heads into a dictionary so we can remove 'fake' heads.
611 611 # Also, later we will be using it to filter out the heads we can't
612 612 # find from roots.
613 613 heads = dict.fromkeys(heads, False)
614 614 # Start at the top and keep marking parents until we're done.
615 615 nodestotag = set(heads)
616 616 # Remember where the top was so we can use it as a limit later.
617 617 highestrev = max([self.rev(n) for n in nodestotag])
618 618 while nodestotag:
619 619 # grab a node to tag
620 620 n = nodestotag.pop()
621 621 # Never tag nullid
622 622 if n == nullid:
623 623 continue
624 624 # A node's revision number represents its place in a
625 625 # topologically sorted list of nodes.
626 626 r = self.rev(n)
627 627 if r >= lowestrev:
628 628 if n not in ancestors:
629 629 # If we are possibly a descendant of one of the roots
630 630 # and we haven't already been marked as an ancestor
631 631 ancestors.add(n) # Mark as ancestor
632 632 # Add non-nullid parents to list of nodes to tag.
633 633 nodestotag.update([p for p in self.parents(n) if
634 634 p != nullid])
635 635 elif n in heads: # We've seen it before, is it a fake head?
636 636 # So it is, real heads should not be the ancestors of
637 637 # any other heads.
638 638 heads.pop(n)
639 639 if not ancestors:
640 640 return nonodes
641 641 # Now that we have our set of ancestors, we want to remove any
642 642 # roots that are not ancestors.
643 643
644 644 # If one of the roots was nullid, everything is included anyway.
645 645 if lowestrev > nullrev:
646 646 # But, since we weren't, let's recompute the lowest rev to not
647 647 # include roots that aren't ancestors.
648 648
649 649 # Filter out roots that aren't ancestors of heads
650 650 roots = [n for n in roots if n in ancestors]
651 651 # Recompute the lowest revision
652 652 if roots:
653 653 lowestrev = min([self.rev(n) for n in roots])
654 654 else:
655 655 # No more roots? Return empty list
656 656 return nonodes
657 657 else:
658 658 # We are descending from nullid, and don't need to care about
659 659 # any other roots.
660 660 lowestrev = nullrev
661 661 roots = [nullid]
662 662 # Transform our roots list into a set.
663 663 descendants = set(roots)
664 664 # Also, keep the original roots so we can filter out roots that aren't
665 665 # 'real' roots (i.e. are descended from other roots).
666 666 roots = descendants.copy()
667 667 # Our topologically sorted list of output nodes.
668 668 orderedout = []
669 669 # Don't start at nullid since we don't want nullid in our output list,
670 670 # and if nullid shows up in descendants, empty parents will look like
671 671 # they're descendants.
672 672 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
673 673 n = self.node(r)
674 674 isdescendant = False
675 675 if lowestrev == nullrev: # Everybody is a descendant of nullid
676 676 isdescendant = True
677 677 elif n in descendants:
678 678 # n is already a descendant
679 679 isdescendant = True
680 680 # This check only needs to be done here because all the roots
681 681 # will start being marked is descendants before the loop.
682 682 if n in roots:
683 683 # If n was a root, check if it's a 'real' root.
684 684 p = tuple(self.parents(n))
685 685 # If any of its parents are descendants, it's not a root.
686 686 if (p[0] in descendants) or (p[1] in descendants):
687 687 roots.remove(n)
688 688 else:
689 689 p = tuple(self.parents(n))
690 690 # A node is a descendant if either of its parents are
691 691 # descendants. (We seeded the dependents list with the roots
692 692 # up there, remember?)
693 693 if (p[0] in descendants) or (p[1] in descendants):
694 694 descendants.add(n)
695 695 isdescendant = True
696 696 if isdescendant and ((ancestors is None) or (n in ancestors)):
697 697 # Only include nodes that are both descendants and ancestors.
698 698 orderedout.append(n)
699 699 if (ancestors is not None) and (n in heads):
700 700 # We're trying to figure out which heads are reachable
701 701 # from roots.
702 702 # Mark this head as having been reached
703 703 heads[n] = True
704 704 elif ancestors is None:
705 705 # Otherwise, we're trying to discover the heads.
706 706 # Assume this is a head because if it isn't, the next step
707 707 # will eventually remove it.
708 708 heads[n] = True
709 709 # But, obviously its parents aren't.
710 710 for p in self.parents(n):
711 711 heads.pop(p, None)
712 712 heads = [n for n, flag in heads.iteritems() if flag]
713 713 roots = list(roots)
714 714 assert orderedout
715 715 assert roots
716 716 assert heads
717 717 return (orderedout, roots, heads)
718 718
719 719 def headrevs(self):
720 720 try:
721 721 return self.index.headrevs()
722 722 except AttributeError:
723 723 return self._headrevs()
724 724
725 725 def _headrevs(self):
726 726 count = len(self)
727 727 if not count:
728 728 return [nullrev]
729 729 # we won't iter over filtered rev so nobody is a head at start
730 730 ishead = [0] * (count + 1)
731 731 index = self.index
732 732 for r in self:
733 733 ishead[r] = 1 # I may be an head
734 734 e = index[r]
735 735 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
736 736 return [r for r, val in enumerate(ishead) if val]
737 737
738 738 def heads(self, start=None, stop=None):
739 739 """return the list of all nodes that have no children
740 740
741 741 if start is specified, only heads that are descendants of
742 742 start will be returned
743 743 if stop is specified, it will consider all the revs from stop
744 744 as if they had no children
745 745 """
746 746 if start is None and stop is None:
747 747 if not len(self):
748 748 return [nullid]
749 749 return [self.node(r) for r in self.headrevs()]
750 750
751 751 if start is None:
752 752 start = nullid
753 753 if stop is None:
754 754 stop = []
755 755 stoprevs = set([self.rev(n) for n in stop])
756 756 startrev = self.rev(start)
757 757 reachable = set((startrev,))
758 758 heads = set((startrev,))
759 759
760 760 parentrevs = self.parentrevs
761 761 for r in self.revs(start=startrev + 1):
762 762 for p in parentrevs(r):
763 763 if p in reachable:
764 764 if r not in stoprevs:
765 765 reachable.add(r)
766 766 heads.add(r)
767 767 if p in heads and p not in stoprevs:
768 768 heads.remove(p)
769 769
770 770 return [self.node(r) for r in heads]
771 771
772 772 def children(self, node):
773 773 """find the children of a given node"""
774 774 c = []
775 775 p = self.rev(node)
776 776 for r in self.revs(start=p + 1):
777 777 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
778 778 if prevs:
779 779 for pr in prevs:
780 780 if pr == p:
781 781 c.append(self.node(r))
782 782 elif p == nullrev:
783 783 c.append(self.node(r))
784 784 return c
785 785
786 786 def descendant(self, start, end):
787 787 if start == nullrev:
788 788 return True
789 789 for i in self.descendants([start]):
790 790 if i == end:
791 791 return True
792 792 elif i > end:
793 793 break
794 794 return False
795 795
796 796 def commonancestorsheads(self, a, b):
797 797 """calculate all the heads of the common ancestors of nodes a and b"""
798 798 a, b = self.rev(a), self.rev(b)
799 799 try:
800 800 ancs = self.index.commonancestorsheads(a, b)
801 801 except (AttributeError, OverflowError): # C implementation failed
802 802 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
803 803 return map(self.node, ancs)
804 804
805 805 def isancestor(self, a, b):
806 806 """return True if node a is an ancestor of node b
807 807
808 808 The implementation of this is trivial but the use of
809 809 commonancestorsheads is not."""
810 810 return a in self.commonancestorsheads(a, b)
811 811
812 812 def ancestor(self, a, b):
813 813 """calculate the "best" common ancestor of nodes a and b"""
814 814
815 815 a, b = self.rev(a), self.rev(b)
816 816 try:
817 817 ancs = self.index.ancestors(a, b)
818 818 except (AttributeError, OverflowError):
819 819 ancs = ancestor.ancestors(self.parentrevs, a, b)
820 820 if ancs:
821 821 # choose a consistent winner when there's a tie
822 822 return min(map(self.node, ancs))
823 823 return nullid
824 824
825 825 def _match(self, id):
826 826 if isinstance(id, int):
827 827 # rev
828 828 return self.node(id)
829 829 if len(id) == 20:
830 830 # possibly a binary node
831 831 # odds of a binary node being all hex in ASCII are 1 in 10**25
832 832 try:
833 833 node = id
834 834 self.rev(node) # quick search the index
835 835 return node
836 836 except LookupError:
837 837 pass # may be partial hex id
838 838 try:
839 839 # str(rev)
840 840 rev = int(id)
841 841 if str(rev) != id:
842 842 raise ValueError
843 843 if rev < 0:
844 844 rev = len(self) + rev
845 845 if rev < 0 or rev >= len(self):
846 846 raise ValueError
847 847 return self.node(rev)
848 848 except (ValueError, OverflowError):
849 849 pass
850 850 if len(id) == 40:
851 851 try:
852 852 # a full hex nodeid?
853 853 node = bin(id)
854 854 self.rev(node)
855 855 return node
856 856 except (TypeError, LookupError):
857 857 pass
858 858
859 859 def _partialmatch(self, id):
860 860 try:
861 861 n = self.index.partialmatch(id)
862 862 if n and self.hasnode(n):
863 863 return n
864 864 return None
865 865 except RevlogError:
866 866 # parsers.c radix tree lookup gave multiple matches
867 867 # fall through to slow path that filters hidden revisions
868 868 pass
869 869 except (AttributeError, ValueError):
870 870 # we are pure python, or key was too short to search radix tree
871 871 pass
872 872
873 873 if id in self._pcache:
874 874 return self._pcache[id]
875 875
876 876 if len(id) < 40:
877 877 try:
878 878 # hex(node)[:...]
879 879 l = len(id) // 2 # grab an even number of digits
880 880 prefix = bin(id[:l * 2])
881 881 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
882 882 nl = [n for n in nl if hex(n).startswith(id) and
883 883 self.hasnode(n)]
884 884 if len(nl) > 0:
885 885 if len(nl) == 1:
886 886 self._pcache[id] = nl[0]
887 887 return nl[0]
888 888 raise LookupError(id, self.indexfile,
889 889 _('ambiguous identifier'))
890 890 return None
891 891 except TypeError:
892 892 pass
893 893
894 894 def lookup(self, id):
895 895 """locate a node based on:
896 896 - revision number or str(revision number)
897 897 - nodeid or subset of hex nodeid
898 898 """
899 899 n = self._match(id)
900 900 if n is not None:
901 901 return n
902 902 n = self._partialmatch(id)
903 903 if n:
904 904 return n
905 905
906 906 raise LookupError(id, self.indexfile, _('no match found'))
907 907
908 908 def cmp(self, node, text):
909 909 """compare text with a given file revision
910 910
911 911 returns True if text is different than what is stored.
912 912 """
913 913 p1, p2 = self.parents(node)
914 914 return hash(text, p1, p2) != node
915 915
916 916 def _addchunk(self, offset, data):
917 917 o, d = self._chunkcache
918 918 # try to add to existing cache
919 919 if o + len(d) == offset and len(d) + len(data) < _chunksize:
920 920 self._chunkcache = o, d + data
921 921 else:
922 922 self._chunkcache = offset, data
923 923
924 924 def _loadchunk(self, offset, length):
925 925 if self._inline:
926 926 df = self.opener(self.indexfile)
927 927 else:
928 928 df = self.opener(self.datafile)
929 929
930 930 # Cache data both forward and backward around the requested
931 931 # data, in a fixed size window. This helps speed up operations
932 932 # involving reading the revlog backwards.
933 933 cachesize = self._chunkcachesize
934 934 realoffset = offset & ~(cachesize - 1)
935 935 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
936 936 - realoffset)
937 937 df.seek(realoffset)
938 938 d = df.read(reallength)
939 939 df.close()
940 940 self._addchunk(realoffset, d)
941 941 if offset != realoffset or reallength != length:
942 942 return util.buffer(d, offset - realoffset, length)
943 943 return d
944 944
945 945 def _getchunk(self, offset, length):
946 946 o, d = self._chunkcache
947 947 l = len(d)
948 948
949 949 # is it in the cache?
950 950 cachestart = offset - o
951 951 cacheend = cachestart + length
952 952 if cachestart >= 0 and cacheend <= l:
953 953 if cachestart == 0 and cacheend == l:
954 954 return d # avoid a copy
955 955 return util.buffer(d, cachestart, cacheend - cachestart)
956 956
957 957 return self._loadchunk(offset, length)
958 958
959 959 def _chunkraw(self, startrev, endrev):
960 960 start = self.start(startrev)
961 961 end = self.end(endrev)
962 962 if self._inline:
963 963 start += (startrev + 1) * self._io.size
964 964 end += (endrev + 1) * self._io.size
965 965 length = end - start
966 966 return self._getchunk(start, length)
967 967
968 968 def _chunk(self, rev):
969 969 return decompress(self._chunkraw(rev, rev))
970 970
971 971 def _chunks(self, revs):
972 972 '''faster version of [self._chunk(rev) for rev in revs]
973 973
974 974 Assumes that revs is in ascending order.'''
975 975 if not revs:
976 976 return []
977 977 start = self.start
978 978 length = self.length
979 979 inline = self._inline
980 980 iosize = self._io.size
981 981 buffer = util.buffer
982 982
983 983 l = []
984 984 ladd = l.append
985 985
986 986 # preload the cache
987 987 try:
988 988 while True:
989 989 # ensure that the cache doesn't change out from under us
990 990 _cache = self._chunkcache
991 991 self._chunkraw(revs[0], revs[-1])
992 992 if _cache == self._chunkcache:
993 993 break
994 994 offset, data = _cache
995 995 except OverflowError:
996 996 # issue4215 - we can't cache a run of chunks greater than
997 997 # 2G on Windows
998 998 return [self._chunk(rev) for rev in revs]
999 999
1000 1000 for rev in revs:
1001 1001 chunkstart = start(rev)
1002 1002 if inline:
1003 1003 chunkstart += (rev + 1) * iosize
1004 1004 chunklength = length(rev)
1005 1005 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1006 1006
1007 1007 return l
1008 1008
1009 1009 def _chunkclear(self):
1010 1010 self._chunkcache = (0, '')
1011 1011
1012 1012 def deltaparent(self, rev):
1013 1013 """return deltaparent of the given revision"""
1014 1014 base = self.index[rev][3]
1015 1015 if base == rev:
1016 1016 return nullrev
1017 1017 elif self._generaldelta:
1018 1018 return base
1019 1019 else:
1020 1020 return rev - 1
1021 1021
1022 1022 def revdiff(self, rev1, rev2):
1023 1023 """return or calculate a delta between two revisions"""
1024 1024 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1025 1025 return str(self._chunk(rev2))
1026 1026
1027 1027 return mdiff.textdiff(self.revision(rev1),
1028 1028 self.revision(rev2))
1029 1029
1030 1030 def revision(self, nodeorrev):
1031 1031 """return an uncompressed revision of a given node or revision
1032 1032 number.
1033 1033 """
1034 1034 if isinstance(nodeorrev, int):
1035 1035 rev = nodeorrev
1036 1036 node = self.node(rev)
1037 1037 else:
1038 1038 node = nodeorrev
1039 1039 rev = None
1040 1040
1041 1041 _cache = self._cache # grab local copy of cache to avoid thread race
1042 1042 cachedrev = None
1043 1043 if node == nullid:
1044 1044 return ""
1045 1045 if _cache:
1046 1046 if _cache[0] == node:
1047 1047 return _cache[2]
1048 1048 cachedrev = _cache[1]
1049 1049
1050 1050 # look up what we need to read
1051 1051 text = None
1052 1052 if rev is None:
1053 1053 rev = self.rev(node)
1054 1054
1055 1055 # check rev flags
1056 1056 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1057 1057 raise RevlogError(_('incompatible revision flag %x') %
1058 1058 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1059 1059
1060 1060 # build delta chain
1061 1061 chain = []
1062 1062 index = self.index # for performance
1063 1063 generaldelta = self._generaldelta
1064 1064 iterrev = rev
1065 1065 e = index[iterrev]
1066 1066 while iterrev != e[3] and iterrev != cachedrev:
1067 1067 chain.append(iterrev)
1068 1068 if generaldelta:
1069 1069 iterrev = e[3]
1070 1070 else:
1071 1071 iterrev -= 1
1072 1072 e = index[iterrev]
1073 1073
1074 1074 if iterrev == cachedrev:
1075 1075 # cache hit
1076 1076 text = _cache[2]
1077 1077 else:
1078 1078 chain.append(iterrev)
1079 1079 chain.reverse()
1080 1080
1081 1081 # drop cache to save memory
1082 1082 self._cache = None
1083 1083
1084 1084 bins = self._chunks(chain)
1085 1085 if text is None:
1086 1086 text = str(bins[0])
1087 1087 bins = bins[1:]
1088 1088
1089 1089 text = mdiff.patches(text, bins)
1090 1090
1091 1091 text = self._checkhash(text, node, rev)
1092 1092
1093 1093 self._cache = (node, rev, text)
1094 1094 return text
1095 1095
1096 1096 def hash(self, text, p1, p2):
1097 1097 """Compute a node hash.
1098 1098
1099 1099 Available as a function so that subclasses can replace the hash
1100 1100 as needed.
1101 1101 """
1102 1102 return hash(text, p1, p2)
1103 1103
1104 1104 def _checkhash(self, text, node, rev):
1105 1105 p1, p2 = self.parents(node)
1106 1106 self.checkhash(text, p1, p2, node, rev)
1107 1107 return text
1108 1108
1109 1109 def checkhash(self, text, p1, p2, node, rev=None):
1110 1110 if node != self.hash(text, p1, p2):
1111 1111 revornode = rev
1112 1112 if revornode is None:
1113 1113 revornode = templatefilters.short(hex(node))
1114 1114 raise RevlogError(_("integrity check failed on %s:%s")
1115 1115 % (self.indexfile, revornode))
1116 1116
1117 1117 def checkinlinesize(self, tr, fp=None):
1118 1118 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1119 1119 return
1120 1120
1121 1121 trinfo = tr.find(self.indexfile)
1122 1122 if trinfo is None:
1123 1123 raise RevlogError(_("%s not found in the transaction")
1124 1124 % self.indexfile)
1125 1125
1126 1126 trindex = trinfo[2]
1127 1127 dataoff = self.start(trindex)
1128 1128
1129 1129 tr.add(self.datafile, dataoff)
1130 1130
1131 1131 if fp:
1132 1132 fp.flush()
1133 1133 fp.close()
1134 1134
1135 1135 df = self.opener(self.datafile, 'w')
1136 1136 try:
1137 1137 for r in self:
1138 1138 df.write(self._chunkraw(r, r))
1139 1139 finally:
1140 1140 df.close()
1141 1141
1142 1142 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1143 1143 self.version &= ~(REVLOGNGINLINEDATA)
1144 1144 self._inline = False
1145 1145 for i in self:
1146 1146 e = self._io.packentry(self.index[i], self.node, self.version, i)
1147 1147 fp.write(e)
1148 1148
1149 1149 # if we don't call close, the temp file will never replace the
1150 1150 # real index
1151 1151 fp.close()
1152 1152
1153 1153 tr.replace(self.indexfile, trindex * self._io.size)
1154 1154 self._chunkclear()
1155 1155
1156 1156 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1157 1157 node=None):
1158 1158 """add a revision to the log
1159 1159
1160 1160 text - the revision data to add
1161 1161 transaction - the transaction object used for rollback
1162 1162 link - the linkrev data to add
1163 1163 p1, p2 - the parent nodeids of the revision
1164 1164 cachedelta - an optional precomputed delta
1165 1165 node - nodeid of revision; typically node is not specified, and it is
1166 1166 computed by default as hash(text, p1, p2), however subclasses might
1167 1167 use different hashing method (and override checkhash() in such case)
1168 1168 """
1169 1169 if link == nullrev:
1170 1170 raise RevlogError(_("attempted to add linkrev -1 to %s")
1171 1171 % self.indexfile)
1172 1172 node = node or self.hash(text, p1, p2)
1173 1173 if node in self.nodemap:
1174 1174 return node
1175 1175
1176 1176 dfh = None
1177 1177 if not self._inline:
1178 1178 dfh = self.opener(self.datafile, "a")
1179 1179 ifh = self.opener(self.indexfile, "a+")
1180 1180 try:
1181 1181 return self._addrevision(node, text, transaction, link, p1, p2,
1182 1182 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1183 1183 finally:
1184 1184 if dfh:
1185 1185 dfh.close()
1186 1186 ifh.close()
1187 1187
1188 1188 def compress(self, text):
1189 1189 """ generate a possibly-compressed representation of text """
1190 1190 if not text:
1191 1191 return ("", text)
1192 1192 l = len(text)
1193 1193 bin = None
1194 1194 if l < 44:
1195 1195 pass
1196 1196 elif l > 1000000:
1197 1197 # zlib makes an internal copy, thus doubling memory usage for
1198 1198 # large files, so lets do this in pieces
1199 1199 z = zlib.compressobj()
1200 1200 p = []
1201 1201 pos = 0
1202 1202 while pos < l:
1203 1203 pos2 = pos + 2**20
1204 1204 p.append(z.compress(text[pos:pos2]))
1205 1205 pos = pos2
1206 1206 p.append(z.flush())
1207 1207 if sum(map(len, p)) < l:
1208 1208 bin = "".join(p)
1209 1209 else:
1210 1210 bin = _compress(text)
1211 1211 if bin is None or len(bin) > l:
1212 1212 if text[0] == '\0':
1213 1213 return ("", text)
1214 1214 return ('u', text)
1215 1215 return ("", bin)
1216 1216
1217 1217 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1218 1218 cachedelta, ifh, dfh):
1219 1219 """internal function to add revisions to the log
1220 1220
1221 1221 see addrevision for argument descriptions.
1222 1222 invariants:
1223 1223 - text is optional (can be None); if not set, cachedelta must be set.
1224 1224 if both are set, they must correspond to each other.
1225 1225 """
1226 1226 btext = [text]
1227 1227 def buildtext():
1228 1228 if btext[0] is not None:
1229 1229 return btext[0]
1230 1230 # flush any pending writes here so we can read it in revision
1231 1231 if dfh:
1232 1232 dfh.flush()
1233 1233 ifh.flush()
1234 1234 basetext = self.revision(self.node(cachedelta[0]))
1235 1235 btext[0] = mdiff.patch(basetext, cachedelta[1])
1236 1236 try:
1237 1237 self.checkhash(btext[0], p1, p2, node)
1238 if flags & REVIDX_ISCENSORED:
1239 raise RevlogError(_('node %s is not censored') % node)
1238 1240 except CensoredNodeError:
1239 pass # always import a censor tombstone.
1241 # must pass the censored index flag to add censored revisions
1242 if not flags & REVIDX_ISCENSORED:
1243 raise
1240 1244 return btext[0]
1241 1245
1242 1246 def builddelta(rev):
1243 1247 # can we use the cached delta?
1244 1248 if cachedelta and cachedelta[0] == rev:
1245 1249 delta = cachedelta[1]
1246 1250 else:
1247 1251 t = buildtext()
1248 1252 ptext = self.revision(self.node(rev))
1249 1253 delta = mdiff.textdiff(ptext, t)
1250 1254 data = self.compress(delta)
1251 1255 l = len(data[1]) + len(data[0])
1252 1256 if basecache[0] == rev:
1253 1257 chainbase = basecache[1]
1254 1258 else:
1255 1259 chainbase = self.chainbase(rev)
1256 1260 dist = l + offset - self.start(chainbase)
1257 1261 if self._generaldelta:
1258 1262 base = rev
1259 1263 else:
1260 1264 base = chainbase
1261 1265 chainlen, compresseddeltalen = self._chaininfo(rev)
1262 1266 chainlen += 1
1263 1267 compresseddeltalen += l
1264 1268 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1265 1269
1266 1270 curr = len(self)
1267 1271 prev = curr - 1
1268 1272 base = chainbase = curr
1269 1273 chainlen = None
1270 1274 offset = self.end(prev)
1271 1275 d = None
1272 1276 if self._basecache is None:
1273 1277 self._basecache = (prev, self.chainbase(prev))
1274 1278 basecache = self._basecache
1275 1279 p1r, p2r = self.rev(p1), self.rev(p2)
1276 1280
1277 1281 # should we try to build a delta?
1278 1282 if prev != nullrev:
1279 1283 if self._generaldelta:
1280 1284 if p1r >= basecache[1]:
1281 1285 d = builddelta(p1r)
1282 1286 elif p2r >= basecache[1]:
1283 1287 d = builddelta(p2r)
1284 1288 else:
1285 1289 d = builddelta(prev)
1286 1290 else:
1287 1291 d = builddelta(prev)
1288 1292 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1289 1293
1290 1294 # full versions are inserted when the needed deltas
1291 1295 # become comparable to the uncompressed text
1292 1296 if text is None:
1293 1297 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1294 1298 cachedelta[1])
1295 1299 else:
1296 1300 textlen = len(text)
1297 1301
1298 1302 # - 'dist' is the distance from the base revision -- bounding it limits
1299 1303 # the amount of I/O we need to do.
1300 1304 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1301 1305 # to apply -- bounding it limits the amount of CPU we consume.
1302 1306 if (d is None or dist > textlen * 4 or l > textlen or
1303 1307 compresseddeltalen > textlen * 2 or
1304 1308 (self._maxchainlen and chainlen > self._maxchainlen)):
1305 1309 text = buildtext()
1306 1310 data = self.compress(text)
1307 1311 l = len(data[1]) + len(data[0])
1308 1312 base = chainbase = curr
1309 1313
1310 1314 e = (offset_type(offset, flags), l, textlen,
1311 1315 base, link, p1r, p2r, node)
1312 1316 self.index.insert(-1, e)
1313 1317 self.nodemap[node] = curr
1314 1318
1315 1319 entry = self._io.packentry(e, self.node, self.version, curr)
1316 1320 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1317 1321
1318 1322 if type(text) == str: # only accept immutable objects
1319 1323 self._cache = (node, curr, text)
1320 1324 self._basecache = (curr, chainbase)
1321 1325 return node
1322 1326
1323 1327 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1324 1328 curr = len(self) - 1
1325 1329 if not self._inline:
1326 1330 transaction.add(self.datafile, offset)
1327 1331 transaction.add(self.indexfile, curr * len(entry))
1328 1332 if data[0]:
1329 1333 dfh.write(data[0])
1330 1334 dfh.write(data[1])
1331 1335 dfh.flush()
1332 1336 ifh.write(entry)
1333 1337 else:
1334 1338 offset += curr * self._io.size
1335 1339 transaction.add(self.indexfile, offset, curr)
1336 1340 ifh.write(entry)
1337 1341 ifh.write(data[0])
1338 1342 ifh.write(data[1])
1339 1343 self.checkinlinesize(transaction, ifh)
1340 1344
1341 1345 def addgroup(self, bundle, linkmapper, transaction):
1342 1346 """
1343 1347 add a delta group
1344 1348
1345 1349 given a set of deltas, add them to the revision log. the
1346 1350 first delta is against its parent, which should be in our
1347 1351 log, the rest are against the previous delta.
1348 1352 """
1349 1353
1350 1354 # track the base of the current delta log
1351 1355 content = []
1352 1356 node = None
1353 1357
1354 1358 r = len(self)
1355 1359 end = 0
1356 1360 if r:
1357 1361 end = self.end(r - 1)
1358 1362 ifh = self.opener(self.indexfile, "a+")
1359 1363 isize = r * self._io.size
1360 1364 if self._inline:
1361 1365 transaction.add(self.indexfile, end + isize, r)
1362 1366 dfh = None
1363 1367 else:
1364 1368 transaction.add(self.indexfile, isize, r)
1365 1369 transaction.add(self.datafile, end)
1366 1370 dfh = self.opener(self.datafile, "a")
1367 1371
1368 1372 try:
1369 1373 # loop through our set of deltas
1370 1374 chain = None
1371 1375 while True:
1372 1376 chunkdata = bundle.deltachunk(chain)
1373 1377 if not chunkdata:
1374 1378 break
1375 1379 node = chunkdata['node']
1376 1380 p1 = chunkdata['p1']
1377 1381 p2 = chunkdata['p2']
1378 1382 cs = chunkdata['cs']
1379 1383 deltabase = chunkdata['deltabase']
1380 1384 delta = chunkdata['delta']
1381 1385
1382 1386 content.append(node)
1383 1387
1384 1388 link = linkmapper(cs)
1385 1389 if node in self.nodemap:
1386 1390 # this can happen if two branches make the same change
1387 1391 chain = node
1388 1392 continue
1389 1393
1390 1394 for p in (p1, p2):
1391 1395 if p not in self.nodemap:
1392 1396 raise LookupError(p, self.indexfile,
1393 1397 _('unknown parent'))
1394 1398
1395 1399 if deltabase not in self.nodemap:
1396 1400 raise LookupError(deltabase, self.indexfile,
1397 1401 _('unknown delta base'))
1398 1402
1399 1403 baserev = self.rev(deltabase)
1400 1404 chain = self._addrevision(node, None, transaction, link,
1401 1405 p1, p2, REVIDX_DEFAULT_FLAGS,
1402 1406 (baserev, delta), ifh, dfh)
1403 1407 if not dfh and not self._inline:
1404 1408 # addrevision switched from inline to conventional
1405 1409 # reopen the index
1406 1410 ifh.close()
1407 1411 dfh = self.opener(self.datafile, "a")
1408 1412 ifh = self.opener(self.indexfile, "a")
1409 1413 finally:
1410 1414 if dfh:
1411 1415 dfh.close()
1412 1416 ifh.close()
1413 1417
1414 1418 return content
1415 1419
1416 1420 def getstrippoint(self, minlink):
1417 1421 """find the minimum rev that must be stripped to strip the linkrev
1418 1422
1419 1423 Returns a tuple containing the minimum rev and a set of all revs that
1420 1424 have linkrevs that will be broken by this strip.
1421 1425 """
1422 1426 brokenrevs = set()
1423 1427 strippoint = len(self)
1424 1428
1425 1429 heads = {}
1426 1430 futurelargelinkrevs = set()
1427 1431 for head in self.headrevs():
1428 1432 headlinkrev = self.linkrev(head)
1429 1433 heads[head] = headlinkrev
1430 1434 if headlinkrev >= minlink:
1431 1435 futurelargelinkrevs.add(headlinkrev)
1432 1436
1433 1437 # This algorithm involves walking down the rev graph, starting at the
1434 1438 # heads. Since the revs are topologically sorted according to linkrev,
1435 1439 # once all head linkrevs are below the minlink, we know there are
1436 1440 # no more revs that could have a linkrev greater than minlink.
1437 1441 # So we can stop walking.
1438 1442 while futurelargelinkrevs:
1439 1443 strippoint -= 1
1440 1444 linkrev = heads.pop(strippoint)
1441 1445
1442 1446 if linkrev < minlink:
1443 1447 brokenrevs.add(strippoint)
1444 1448 else:
1445 1449 futurelargelinkrevs.remove(linkrev)
1446 1450
1447 1451 for p in self.parentrevs(strippoint):
1448 1452 if p != nullrev:
1449 1453 plinkrev = self.linkrev(p)
1450 1454 heads[p] = plinkrev
1451 1455 if plinkrev >= minlink:
1452 1456 futurelargelinkrevs.add(plinkrev)
1453 1457
1454 1458 return strippoint, brokenrevs
1455 1459
1456 1460 def strip(self, minlink, transaction):
1457 1461 """truncate the revlog on the first revision with a linkrev >= minlink
1458 1462
1459 1463 This function is called when we're stripping revision minlink and
1460 1464 its descendants from the repository.
1461 1465
1462 1466 We have to remove all revisions with linkrev >= minlink, because
1463 1467 the equivalent changelog revisions will be renumbered after the
1464 1468 strip.
1465 1469
1466 1470 So we truncate the revlog on the first of these revisions, and
1467 1471 trust that the caller has saved the revisions that shouldn't be
1468 1472 removed and that it'll re-add them after this truncation.
1469 1473 """
1470 1474 if len(self) == 0:
1471 1475 return
1472 1476
1473 1477 rev, _ = self.getstrippoint(minlink)
1474 1478 if rev == len(self):
1475 1479 return
1476 1480
1477 1481 # first truncate the files on disk
1478 1482 end = self.start(rev)
1479 1483 if not self._inline:
1480 1484 transaction.add(self.datafile, end)
1481 1485 end = rev * self._io.size
1482 1486 else:
1483 1487 end += rev * self._io.size
1484 1488
1485 1489 transaction.add(self.indexfile, end)
1486 1490
1487 1491 # then reset internal state in memory to forget those revisions
1488 1492 self._cache = None
1489 1493 self._chaininfocache = {}
1490 1494 self._chunkclear()
1491 1495 for x in xrange(rev, len(self)):
1492 1496 del self.nodemap[self.node(x)]
1493 1497
1494 1498 del self.index[rev:-1]
1495 1499
1496 1500 def checksize(self):
1497 1501 expected = 0
1498 1502 if len(self):
1499 1503 expected = max(0, self.end(len(self) - 1))
1500 1504
1501 1505 try:
1502 1506 f = self.opener(self.datafile)
1503 1507 f.seek(0, 2)
1504 1508 actual = f.tell()
1505 1509 f.close()
1506 1510 dd = actual - expected
1507 1511 except IOError, inst:
1508 1512 if inst.errno != errno.ENOENT:
1509 1513 raise
1510 1514 dd = 0
1511 1515
1512 1516 try:
1513 1517 f = self.opener(self.indexfile)
1514 1518 f.seek(0, 2)
1515 1519 actual = f.tell()
1516 1520 f.close()
1517 1521 s = self._io.size
1518 1522 i = max(0, actual // s)
1519 1523 di = actual - (i * s)
1520 1524 if self._inline:
1521 1525 databytes = 0
1522 1526 for r in self:
1523 1527 databytes += max(0, self.length(r))
1524 1528 dd = 0
1525 1529 di = actual - len(self) * s - databytes
1526 1530 except IOError, inst:
1527 1531 if inst.errno != errno.ENOENT:
1528 1532 raise
1529 1533 di = 0
1530 1534
1531 1535 return (dd, di)
1532 1536
1533 1537 def files(self):
1534 1538 res = [self.indexfile]
1535 1539 if not self._inline:
1536 1540 res.append(self.datafile)
1537 1541 return res
General Comments 0
You need to be logged in to leave comments. Login now