##// END OF EJS Templates
addrevision: only use the incoming base if it is a good delta (issue4975)...
Pierre-Yves David -
r27180:8e7db961 default
parent child Browse files
Show More
@@ -1,1738 +1,1743 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 import collections
16 16 from node import bin, hex, nullid, nullrev
17 17 from i18n import _
18 18 import ancestor, mdiff, parsers, error, util, templatefilters
19 19 import struct, zlib, errno
20 20
21 21 _pack = struct.pack
22 22 _unpack = struct.unpack
23 23 _compress = zlib.compress
24 24 _decompress = zlib.decompress
25 25 _sha = util.sha1
26 26
27 27 # revlog header flags
28 28 REVLOGV0 = 0
29 29 REVLOGNG = 1
30 30 REVLOGNGINLINEDATA = (1 << 16)
31 31 REVLOGGENERALDELTA = (1 << 17)
32 32 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 33 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 34 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 35 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
36 36
37 37 # revlog index flags
38 38 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
39 39 REVIDX_DEFAULT_FLAGS = 0
40 40 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
41 41
42 42 # max size of revlog with inline data
43 43 _maxinline = 131072
44 44 _chunksize = 1048576
45 45
46 46 RevlogError = error.RevlogError
47 47 LookupError = error.LookupError
48 48 CensoredNodeError = error.CensoredNodeError
49 49
50 50 def getoffset(q):
51 51 return int(q >> 16)
52 52
53 53 def gettype(q):
54 54 return int(q & 0xFFFF)
55 55
56 56 def offset_type(offset, type):
57 57 return long(long(offset) << 16 | type)
58 58
59 59 _nullhash = _sha(nullid)
60 60
61 61 def hash(text, p1, p2):
62 62 """generate a hash from the given text and its parent hashes
63 63
64 64 This hash combines both the current file contents and its history
65 65 in a manner that makes it easy to distinguish nodes with the same
66 66 content in the revision graph.
67 67 """
68 68 # As of now, if one of the parent node is null, p2 is null
69 69 if p2 == nullid:
70 70 # deep copy of a hash is faster than creating one
71 71 s = _nullhash.copy()
72 72 s.update(p1)
73 73 else:
74 74 # none of the parent nodes are nullid
75 75 l = [p1, p2]
76 76 l.sort()
77 77 s = _sha(l[0])
78 78 s.update(l[1])
79 79 s.update(text)
80 80 return s.digest()
81 81
82 82 def decompress(bin):
83 83 """ decompress the given input """
84 84 if not bin:
85 85 return bin
86 86 t = bin[0]
87 87 if t == '\0':
88 88 return bin
89 89 if t == 'x':
90 90 try:
91 91 return _decompress(bin)
92 92 except zlib.error as e:
93 93 raise RevlogError(_("revlog decompress error: %s") % str(e))
94 94 if t == 'u':
95 95 return bin[1:]
96 96 raise RevlogError(_("unknown compression type %r") % t)
97 97
98 98 # index v0:
99 99 # 4 bytes: offset
100 100 # 4 bytes: compressed length
101 101 # 4 bytes: base rev
102 102 # 4 bytes: link rev
103 103 # 20 bytes: parent 1 nodeid
104 104 # 20 bytes: parent 2 nodeid
105 105 # 20 bytes: nodeid
106 106 indexformatv0 = ">4l20s20s20s"
107 107
108 108 class revlogoldio(object):
109 109 def __init__(self):
110 110 self.size = struct.calcsize(indexformatv0)
111 111
112 112 def parseindex(self, data, inline):
113 113 s = self.size
114 114 index = []
115 115 nodemap = {nullid: nullrev}
116 116 n = off = 0
117 117 l = len(data)
118 118 while off + s <= l:
119 119 cur = data[off:off + s]
120 120 off += s
121 121 e = _unpack(indexformatv0, cur)
122 122 # transform to revlogv1 format
123 123 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
124 124 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
125 125 index.append(e2)
126 126 nodemap[e[6]] = n
127 127 n += 1
128 128
129 129 # add the magic null revision at -1
130 130 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
131 131
132 132 return index, nodemap, None
133 133
134 134 def packentry(self, entry, node, version, rev):
135 135 if gettype(entry[0]):
136 136 raise RevlogError(_("index entry flags need RevlogNG"))
137 137 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
138 138 node(entry[5]), node(entry[6]), entry[7])
139 139 return _pack(indexformatv0, *e2)
140 140
141 141 # index ng:
142 142 # 6 bytes: offset
143 143 # 2 bytes: flags
144 144 # 4 bytes: compressed length
145 145 # 4 bytes: uncompressed length
146 146 # 4 bytes: base rev
147 147 # 4 bytes: link rev
148 148 # 4 bytes: parent 1 rev
149 149 # 4 bytes: parent 2 rev
150 150 # 32 bytes: nodeid
151 151 indexformatng = ">Qiiiiii20s12x"
152 152 versionformat = ">I"
153 153
154 154 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
155 155 # signed integer)
156 156 _maxentrysize = 0x7fffffff
157 157
158 158 class revlogio(object):
159 159 def __init__(self):
160 160 self.size = struct.calcsize(indexformatng)
161 161
162 162 def parseindex(self, data, inline):
163 163 # call the C implementation to parse the index data
164 164 index, cache = parsers.parse_index2(data, inline)
165 165 return index, getattr(index, 'nodemap', None), cache
166 166
167 167 def packentry(self, entry, node, version, rev):
168 168 p = _pack(indexformatng, *entry)
169 169 if rev == 0:
170 170 p = _pack(versionformat, version) + p[4:]
171 171 return p
172 172
173 173 class revlog(object):
174 174 """
175 175 the underlying revision storage object
176 176
177 177 A revlog consists of two parts, an index and the revision data.
178 178
179 179 The index is a file with a fixed record size containing
180 180 information on each revision, including its nodeid (hash), the
181 181 nodeids of its parents, the position and offset of its data within
182 182 the data file, and the revision it's based on. Finally, each entry
183 183 contains a linkrev entry that can serve as a pointer to external
184 184 data.
185 185
186 186 The revision data itself is a linear collection of data chunks.
187 187 Each chunk represents a revision and is usually represented as a
188 188 delta against the previous chunk. To bound lookup time, runs of
189 189 deltas are limited to about 2 times the length of the original
190 190 version data. This makes retrieval of a version proportional to
191 191 its size, or O(1) relative to the number of revisions.
192 192
193 193 Both pieces of the revlog are written to in an append-only
194 194 fashion, which means we never need to rewrite a file to insert or
195 195 remove data, and can use some simple techniques to avoid the need
196 196 for locking while reading.
197 197 """
198 198 def __init__(self, opener, indexfile):
199 199 """
200 200 create a revlog object
201 201
202 202 opener is a function that abstracts the file opening operation
203 203 and can be used to implement COW semantics or the like.
204 204 """
205 205 self.indexfile = indexfile
206 206 self.datafile = indexfile[:-2] + ".d"
207 207 self.opener = opener
208 208 # 3-tuple of (node, rev, text) for a raw revision.
209 209 self._cache = None
210 210 # 2-tuple of (rev, baserev) defining the base revision the delta chain
211 211 # begins at for a revision.
212 212 self._basecache = None
213 213 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
214 214 self._chunkcache = (0, '')
215 215 # How much data to read and cache into the raw revlog data cache.
216 216 self._chunkcachesize = 65536
217 217 self._maxchainlen = None
218 218 self._aggressivemergedeltas = False
219 219 self.index = []
220 220 # Mapping of partial identifiers to full nodes.
221 221 self._pcache = {}
222 222 # Mapping of revision integer to full node.
223 223 self._nodecache = {nullid: nullrev}
224 224 self._nodepos = None
225 225
226 226 v = REVLOG_DEFAULT_VERSION
227 227 opts = getattr(opener, 'options', None)
228 228 if opts is not None:
229 229 if 'revlogv1' in opts:
230 230 if 'generaldelta' in opts:
231 231 v |= REVLOGGENERALDELTA
232 232 else:
233 233 v = 0
234 234 if 'chunkcachesize' in opts:
235 235 self._chunkcachesize = opts['chunkcachesize']
236 236 if 'maxchainlen' in opts:
237 237 self._maxchainlen = opts['maxchainlen']
238 238 if 'aggressivemergedeltas' in opts:
239 239 self._aggressivemergedeltas = opts['aggressivemergedeltas']
240 240 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
241 241
242 242 if self._chunkcachesize <= 0:
243 243 raise RevlogError(_('revlog chunk cache size %r is not greater '
244 244 'than 0') % self._chunkcachesize)
245 245 elif self._chunkcachesize & (self._chunkcachesize - 1):
246 246 raise RevlogError(_('revlog chunk cache size %r is not a power '
247 247 'of 2') % self._chunkcachesize)
248 248
249 249 indexdata = ''
250 250 self._initempty = True
251 251 try:
252 252 f = self.opener(self.indexfile)
253 253 indexdata = f.read()
254 254 f.close()
255 255 if len(indexdata) > 0:
256 256 v = struct.unpack(versionformat, indexdata[:4])[0]
257 257 self._initempty = False
258 258 except IOError as inst:
259 259 if inst.errno != errno.ENOENT:
260 260 raise
261 261
262 262 self.version = v
263 263 self._inline = v & REVLOGNGINLINEDATA
264 264 self._generaldelta = v & REVLOGGENERALDELTA
265 265 flags = v & ~0xFFFF
266 266 fmt = v & 0xFFFF
267 267 if fmt == REVLOGV0 and flags:
268 268 raise RevlogError(_("index %s unknown flags %#04x for format v0")
269 269 % (self.indexfile, flags >> 16))
270 270 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
271 271 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
272 272 % (self.indexfile, flags >> 16))
273 273 elif fmt > REVLOGNG:
274 274 raise RevlogError(_("index %s unknown format %d")
275 275 % (self.indexfile, fmt))
276 276
277 277 self._io = revlogio()
278 278 if self.version == REVLOGV0:
279 279 self._io = revlogoldio()
280 280 try:
281 281 d = self._io.parseindex(indexdata, self._inline)
282 282 except (ValueError, IndexError):
283 283 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
284 284 self.index, nodemap, self._chunkcache = d
285 285 if nodemap is not None:
286 286 self.nodemap = self._nodecache = nodemap
287 287 if not self._chunkcache:
288 288 self._chunkclear()
289 289 # revnum -> (chain-length, sum-delta-length)
290 290 self._chaininfocache = {}
291 291
292 292 def tip(self):
293 293 return self.node(len(self.index) - 2)
294 294 def __contains__(self, rev):
295 295 return 0 <= rev < len(self)
296 296 def __len__(self):
297 297 return len(self.index) - 1
298 298 def __iter__(self):
299 299 return iter(xrange(len(self)))
300 300 def revs(self, start=0, stop=None):
301 301 """iterate over all rev in this revlog (from start to stop)"""
302 302 step = 1
303 303 if stop is not None:
304 304 if start > stop:
305 305 step = -1
306 306 stop += step
307 307 else:
308 308 stop = len(self)
309 309 return xrange(start, stop, step)
310 310
311 311 @util.propertycache
312 312 def nodemap(self):
313 313 self.rev(self.node(0))
314 314 return self._nodecache
315 315
316 316 def hasnode(self, node):
317 317 try:
318 318 self.rev(node)
319 319 return True
320 320 except KeyError:
321 321 return False
322 322
323 323 def clearcaches(self):
324 324 try:
325 325 self._nodecache.clearcaches()
326 326 except AttributeError:
327 327 self._nodecache = {nullid: nullrev}
328 328 self._nodepos = None
329 329
330 330 def rev(self, node):
331 331 try:
332 332 return self._nodecache[node]
333 333 except TypeError:
334 334 raise
335 335 except RevlogError:
336 336 # parsers.c radix tree lookup failed
337 337 raise LookupError(node, self.indexfile, _('no node'))
338 338 except KeyError:
339 339 # pure python cache lookup failed
340 340 n = self._nodecache
341 341 i = self.index
342 342 p = self._nodepos
343 343 if p is None:
344 344 p = len(i) - 2
345 345 for r in xrange(p, -1, -1):
346 346 v = i[r][7]
347 347 n[v] = r
348 348 if v == node:
349 349 self._nodepos = r - 1
350 350 return r
351 351 raise LookupError(node, self.indexfile, _('no node'))
352 352
353 353 def node(self, rev):
354 354 return self.index[rev][7]
355 355 def linkrev(self, rev):
356 356 return self.index[rev][4]
357 357 def parents(self, node):
358 358 i = self.index
359 359 d = i[self.rev(node)]
360 360 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
361 361 def parentrevs(self, rev):
362 362 return self.index[rev][5:7]
363 363 def start(self, rev):
364 364 return int(self.index[rev][0] >> 16)
365 365 def end(self, rev):
366 366 return self.start(rev) + self.length(rev)
367 367 def length(self, rev):
368 368 return self.index[rev][1]
369 369 def chainbase(self, rev):
370 370 index = self.index
371 371 base = index[rev][3]
372 372 while base != rev:
373 373 rev = base
374 374 base = index[rev][3]
375 375 return base
376 376 def chainlen(self, rev):
377 377 return self._chaininfo(rev)[0]
378 378
379 379 def _chaininfo(self, rev):
380 380 chaininfocache = self._chaininfocache
381 381 if rev in chaininfocache:
382 382 return chaininfocache[rev]
383 383 index = self.index
384 384 generaldelta = self._generaldelta
385 385 iterrev = rev
386 386 e = index[iterrev]
387 387 clen = 0
388 388 compresseddeltalen = 0
389 389 while iterrev != e[3]:
390 390 clen += 1
391 391 compresseddeltalen += e[1]
392 392 if generaldelta:
393 393 iterrev = e[3]
394 394 else:
395 395 iterrev -= 1
396 396 if iterrev in chaininfocache:
397 397 t = chaininfocache[iterrev]
398 398 clen += t[0]
399 399 compresseddeltalen += t[1]
400 400 break
401 401 e = index[iterrev]
402 402 else:
403 403 # Add text length of base since decompressing that also takes
404 404 # work. For cache hits the length is already included.
405 405 compresseddeltalen += e[1]
406 406 r = (clen, compresseddeltalen)
407 407 chaininfocache[rev] = r
408 408 return r
409 409
410 410 def flags(self, rev):
411 411 return self.index[rev][0] & 0xFFFF
412 412 def rawsize(self, rev):
413 413 """return the length of the uncompressed text for a given revision"""
414 414 l = self.index[rev][2]
415 415 if l >= 0:
416 416 return l
417 417
418 418 t = self.revision(self.node(rev))
419 419 return len(t)
420 420 size = rawsize
421 421
422 422 def ancestors(self, revs, stoprev=0, inclusive=False):
423 423 """Generate the ancestors of 'revs' in reverse topological order.
424 424 Does not generate revs lower than stoprev.
425 425
426 426 See the documentation for ancestor.lazyancestors for more details."""
427 427
428 428 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
429 429 inclusive=inclusive)
430 430
431 431 def descendants(self, revs):
432 432 """Generate the descendants of 'revs' in revision order.
433 433
434 434 Yield a sequence of revision numbers starting with a child of
435 435 some rev in revs, i.e., each revision is *not* considered a
436 436 descendant of itself. Results are ordered by revision number (a
437 437 topological sort)."""
438 438 first = min(revs)
439 439 if first == nullrev:
440 440 for i in self:
441 441 yield i
442 442 return
443 443
444 444 seen = set(revs)
445 445 for i in self.revs(start=first + 1):
446 446 for x in self.parentrevs(i):
447 447 if x != nullrev and x in seen:
448 448 seen.add(i)
449 449 yield i
450 450 break
451 451
452 452 def findcommonmissing(self, common=None, heads=None):
453 453 """Return a tuple of the ancestors of common and the ancestors of heads
454 454 that are not ancestors of common. In revset terminology, we return the
455 455 tuple:
456 456
457 457 ::common, (::heads) - (::common)
458 458
459 459 The list is sorted by revision number, meaning it is
460 460 topologically sorted.
461 461
462 462 'heads' and 'common' are both lists of node IDs. If heads is
463 463 not supplied, uses all of the revlog's heads. If common is not
464 464 supplied, uses nullid."""
465 465 if common is None:
466 466 common = [nullid]
467 467 if heads is None:
468 468 heads = self.heads()
469 469
470 470 common = [self.rev(n) for n in common]
471 471 heads = [self.rev(n) for n in heads]
472 472
473 473 # we want the ancestors, but inclusive
474 474 class lazyset(object):
475 475 def __init__(self, lazyvalues):
476 476 self.addedvalues = set()
477 477 self.lazyvalues = lazyvalues
478 478
479 479 def __contains__(self, value):
480 480 return value in self.addedvalues or value in self.lazyvalues
481 481
482 482 def __iter__(self):
483 483 added = self.addedvalues
484 484 for r in added:
485 485 yield r
486 486 for r in self.lazyvalues:
487 487 if not r in added:
488 488 yield r
489 489
490 490 def add(self, value):
491 491 self.addedvalues.add(value)
492 492
493 493 def update(self, values):
494 494 self.addedvalues.update(values)
495 495
496 496 has = lazyset(self.ancestors(common))
497 497 has.add(nullrev)
498 498 has.update(common)
499 499
500 500 # take all ancestors from heads that aren't in has
501 501 missing = set()
502 502 visit = collections.deque(r for r in heads if r not in has)
503 503 while visit:
504 504 r = visit.popleft()
505 505 if r in missing:
506 506 continue
507 507 else:
508 508 missing.add(r)
509 509 for p in self.parentrevs(r):
510 510 if p not in has:
511 511 visit.append(p)
512 512 missing = list(missing)
513 513 missing.sort()
514 514 return has, [self.node(r) for r in missing]
515 515
516 516 def incrementalmissingrevs(self, common=None):
517 517 """Return an object that can be used to incrementally compute the
518 518 revision numbers of the ancestors of arbitrary sets that are not
519 519 ancestors of common. This is an ancestor.incrementalmissingancestors
520 520 object.
521 521
522 522 'common' is a list of revision numbers. If common is not supplied, uses
523 523 nullrev.
524 524 """
525 525 if common is None:
526 526 common = [nullrev]
527 527
528 528 return ancestor.incrementalmissingancestors(self.parentrevs, common)
529 529
530 530 def findmissingrevs(self, common=None, heads=None):
531 531 """Return the revision numbers of the ancestors of heads that
532 532 are not ancestors of common.
533 533
534 534 More specifically, return a list of revision numbers corresponding to
535 535 nodes N such that every N satisfies the following constraints:
536 536
537 537 1. N is an ancestor of some node in 'heads'
538 538 2. N is not an ancestor of any node in 'common'
539 539
540 540 The list is sorted by revision number, meaning it is
541 541 topologically sorted.
542 542
543 543 'heads' and 'common' are both lists of revision numbers. If heads is
544 544 not supplied, uses all of the revlog's heads. If common is not
545 545 supplied, uses nullid."""
546 546 if common is None:
547 547 common = [nullrev]
548 548 if heads is None:
549 549 heads = self.headrevs()
550 550
551 551 inc = self.incrementalmissingrevs(common=common)
552 552 return inc.missingancestors(heads)
553 553
554 554 def findmissing(self, common=None, heads=None):
555 555 """Return the ancestors of heads that are not ancestors of common.
556 556
557 557 More specifically, return a list of nodes N such that every N
558 558 satisfies the following constraints:
559 559
560 560 1. N is an ancestor of some node in 'heads'
561 561 2. N is not an ancestor of any node in 'common'
562 562
563 563 The list is sorted by revision number, meaning it is
564 564 topologically sorted.
565 565
566 566 'heads' and 'common' are both lists of node IDs. If heads is
567 567 not supplied, uses all of the revlog's heads. If common is not
568 568 supplied, uses nullid."""
569 569 if common is None:
570 570 common = [nullid]
571 571 if heads is None:
572 572 heads = self.heads()
573 573
574 574 common = [self.rev(n) for n in common]
575 575 heads = [self.rev(n) for n in heads]
576 576
577 577 inc = self.incrementalmissingrevs(common=common)
578 578 return [self.node(r) for r in inc.missingancestors(heads)]
579 579
580 580 def nodesbetween(self, roots=None, heads=None):
581 581 """Return a topological path from 'roots' to 'heads'.
582 582
583 583 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
584 584 topologically sorted list of all nodes N that satisfy both of
585 585 these constraints:
586 586
587 587 1. N is a descendant of some node in 'roots'
588 588 2. N is an ancestor of some node in 'heads'
589 589
590 590 Every node is considered to be both a descendant and an ancestor
591 591 of itself, so every reachable node in 'roots' and 'heads' will be
592 592 included in 'nodes'.
593 593
594 594 'outroots' is the list of reachable nodes in 'roots', i.e., the
595 595 subset of 'roots' that is returned in 'nodes'. Likewise,
596 596 'outheads' is the subset of 'heads' that is also in 'nodes'.
597 597
598 598 'roots' and 'heads' are both lists of node IDs. If 'roots' is
599 599 unspecified, uses nullid as the only root. If 'heads' is
600 600 unspecified, uses list of all of the revlog's heads."""
601 601 nonodes = ([], [], [])
602 602 if roots is not None:
603 603 roots = list(roots)
604 604 if not roots:
605 605 return nonodes
606 606 lowestrev = min([self.rev(n) for n in roots])
607 607 else:
608 608 roots = [nullid] # Everybody's a descendant of nullid
609 609 lowestrev = nullrev
610 610 if (lowestrev == nullrev) and (heads is None):
611 611 # We want _all_ the nodes!
612 612 return ([self.node(r) for r in self], [nullid], list(self.heads()))
613 613 if heads is None:
614 614 # All nodes are ancestors, so the latest ancestor is the last
615 615 # node.
616 616 highestrev = len(self) - 1
617 617 # Set ancestors to None to signal that every node is an ancestor.
618 618 ancestors = None
619 619 # Set heads to an empty dictionary for later discovery of heads
620 620 heads = {}
621 621 else:
622 622 heads = list(heads)
623 623 if not heads:
624 624 return nonodes
625 625 ancestors = set()
626 626 # Turn heads into a dictionary so we can remove 'fake' heads.
627 627 # Also, later we will be using it to filter out the heads we can't
628 628 # find from roots.
629 629 heads = dict.fromkeys(heads, False)
630 630 # Start at the top and keep marking parents until we're done.
631 631 nodestotag = set(heads)
632 632 # Remember where the top was so we can use it as a limit later.
633 633 highestrev = max([self.rev(n) for n in nodestotag])
634 634 while nodestotag:
635 635 # grab a node to tag
636 636 n = nodestotag.pop()
637 637 # Never tag nullid
638 638 if n == nullid:
639 639 continue
640 640 # A node's revision number represents its place in a
641 641 # topologically sorted list of nodes.
642 642 r = self.rev(n)
643 643 if r >= lowestrev:
644 644 if n not in ancestors:
645 645 # If we are possibly a descendant of one of the roots
646 646 # and we haven't already been marked as an ancestor
647 647 ancestors.add(n) # Mark as ancestor
648 648 # Add non-nullid parents to list of nodes to tag.
649 649 nodestotag.update([p for p in self.parents(n) if
650 650 p != nullid])
651 651 elif n in heads: # We've seen it before, is it a fake head?
652 652 # So it is, real heads should not be the ancestors of
653 653 # any other heads.
654 654 heads.pop(n)
655 655 if not ancestors:
656 656 return nonodes
657 657 # Now that we have our set of ancestors, we want to remove any
658 658 # roots that are not ancestors.
659 659
660 660 # If one of the roots was nullid, everything is included anyway.
661 661 if lowestrev > nullrev:
662 662 # But, since we weren't, let's recompute the lowest rev to not
663 663 # include roots that aren't ancestors.
664 664
665 665 # Filter out roots that aren't ancestors of heads
666 666 roots = [n for n in roots if n in ancestors]
667 667 # Recompute the lowest revision
668 668 if roots:
669 669 lowestrev = min([self.rev(n) for n in roots])
670 670 else:
671 671 # No more roots? Return empty list
672 672 return nonodes
673 673 else:
674 674 # We are descending from nullid, and don't need to care about
675 675 # any other roots.
676 676 lowestrev = nullrev
677 677 roots = [nullid]
678 678 # Transform our roots list into a set.
679 679 descendants = set(roots)
680 680 # Also, keep the original roots so we can filter out roots that aren't
681 681 # 'real' roots (i.e. are descended from other roots).
682 682 roots = descendants.copy()
683 683 # Our topologically sorted list of output nodes.
684 684 orderedout = []
685 685 # Don't start at nullid since we don't want nullid in our output list,
686 686 # and if nullid shows up in descendants, empty parents will look like
687 687 # they're descendants.
688 688 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
689 689 n = self.node(r)
690 690 isdescendant = False
691 691 if lowestrev == nullrev: # Everybody is a descendant of nullid
692 692 isdescendant = True
693 693 elif n in descendants:
694 694 # n is already a descendant
695 695 isdescendant = True
696 696 # This check only needs to be done here because all the roots
697 697 # will start being marked is descendants before the loop.
698 698 if n in roots:
699 699 # If n was a root, check if it's a 'real' root.
700 700 p = tuple(self.parents(n))
701 701 # If any of its parents are descendants, it's not a root.
702 702 if (p[0] in descendants) or (p[1] in descendants):
703 703 roots.remove(n)
704 704 else:
705 705 p = tuple(self.parents(n))
706 706 # A node is a descendant if either of its parents are
707 707 # descendants. (We seeded the dependents list with the roots
708 708 # up there, remember?)
709 709 if (p[0] in descendants) or (p[1] in descendants):
710 710 descendants.add(n)
711 711 isdescendant = True
712 712 if isdescendant and ((ancestors is None) or (n in ancestors)):
713 713 # Only include nodes that are both descendants and ancestors.
714 714 orderedout.append(n)
715 715 if (ancestors is not None) and (n in heads):
716 716 # We're trying to figure out which heads are reachable
717 717 # from roots.
718 718 # Mark this head as having been reached
719 719 heads[n] = True
720 720 elif ancestors is None:
721 721 # Otherwise, we're trying to discover the heads.
722 722 # Assume this is a head because if it isn't, the next step
723 723 # will eventually remove it.
724 724 heads[n] = True
725 725 # But, obviously its parents aren't.
726 726 for p in self.parents(n):
727 727 heads.pop(p, None)
728 728 heads = [n for n, flag in heads.iteritems() if flag]
729 729 roots = list(roots)
730 730 assert orderedout
731 731 assert roots
732 732 assert heads
733 733 return (orderedout, roots, heads)
734 734
735 735 def headrevs(self):
736 736 try:
737 737 return self.index.headrevs()
738 738 except AttributeError:
739 739 return self._headrevs()
740 740
741 741 def computephases(self, roots):
742 742 return self.index.computephasesmapsets(roots)
743 743
744 744 def _headrevs(self):
745 745 count = len(self)
746 746 if not count:
747 747 return [nullrev]
748 748 # we won't iter over filtered rev so nobody is a head at start
749 749 ishead = [0] * (count + 1)
750 750 index = self.index
751 751 for r in self:
752 752 ishead[r] = 1 # I may be an head
753 753 e = index[r]
754 754 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
755 755 return [r for r, val in enumerate(ishead) if val]
756 756
757 757 def heads(self, start=None, stop=None):
758 758 """return the list of all nodes that have no children
759 759
760 760 if start is specified, only heads that are descendants of
761 761 start will be returned
762 762 if stop is specified, it will consider all the revs from stop
763 763 as if they had no children
764 764 """
765 765 if start is None and stop is None:
766 766 if not len(self):
767 767 return [nullid]
768 768 return [self.node(r) for r in self.headrevs()]
769 769
770 770 if start is None:
771 771 start = nullid
772 772 if stop is None:
773 773 stop = []
774 774 stoprevs = set([self.rev(n) for n in stop])
775 775 startrev = self.rev(start)
776 776 reachable = set((startrev,))
777 777 heads = set((startrev,))
778 778
779 779 parentrevs = self.parentrevs
780 780 for r in self.revs(start=startrev + 1):
781 781 for p in parentrevs(r):
782 782 if p in reachable:
783 783 if r not in stoprevs:
784 784 reachable.add(r)
785 785 heads.add(r)
786 786 if p in heads and p not in stoprevs:
787 787 heads.remove(p)
788 788
789 789 return [self.node(r) for r in heads]
790 790
791 791 def children(self, node):
792 792 """find the children of a given node"""
793 793 c = []
794 794 p = self.rev(node)
795 795 for r in self.revs(start=p + 1):
796 796 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
797 797 if prevs:
798 798 for pr in prevs:
799 799 if pr == p:
800 800 c.append(self.node(r))
801 801 elif p == nullrev:
802 802 c.append(self.node(r))
803 803 return c
804 804
805 805 def descendant(self, start, end):
806 806 if start == nullrev:
807 807 return True
808 808 for i in self.descendants([start]):
809 809 if i == end:
810 810 return True
811 811 elif i > end:
812 812 break
813 813 return False
814 814
815 815 def commonancestorsheads(self, a, b):
816 816 """calculate all the heads of the common ancestors of nodes a and b"""
817 817 a, b = self.rev(a), self.rev(b)
818 818 try:
819 819 ancs = self.index.commonancestorsheads(a, b)
820 820 except (AttributeError, OverflowError): # C implementation failed
821 821 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
822 822 return map(self.node, ancs)
823 823
824 824 def isancestor(self, a, b):
825 825 """return True if node a is an ancestor of node b
826 826
827 827 The implementation of this is trivial but the use of
828 828 commonancestorsheads is not."""
829 829 return a in self.commonancestorsheads(a, b)
830 830
831 831 def ancestor(self, a, b):
832 832 """calculate the "best" common ancestor of nodes a and b"""
833 833
834 834 a, b = self.rev(a), self.rev(b)
835 835 try:
836 836 ancs = self.index.ancestors(a, b)
837 837 except (AttributeError, OverflowError):
838 838 ancs = ancestor.ancestors(self.parentrevs, a, b)
839 839 if ancs:
840 840 # choose a consistent winner when there's a tie
841 841 return min(map(self.node, ancs))
842 842 return nullid
843 843
844 844 def _match(self, id):
845 845 if isinstance(id, int):
846 846 # rev
847 847 return self.node(id)
848 848 if len(id) == 20:
849 849 # possibly a binary node
850 850 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 851 try:
852 852 node = id
853 853 self.rev(node) # quick search the index
854 854 return node
855 855 except LookupError:
856 856 pass # may be partial hex id
857 857 try:
858 858 # str(rev)
859 859 rev = int(id)
860 860 if str(rev) != id:
861 861 raise ValueError
862 862 if rev < 0:
863 863 rev = len(self) + rev
864 864 if rev < 0 or rev >= len(self):
865 865 raise ValueError
866 866 return self.node(rev)
867 867 except (ValueError, OverflowError):
868 868 pass
869 869 if len(id) == 40:
870 870 try:
871 871 # a full hex nodeid?
872 872 node = bin(id)
873 873 self.rev(node)
874 874 return node
875 875 except (TypeError, LookupError):
876 876 pass
877 877
878 878 def _partialmatch(self, id):
879 879 try:
880 880 n = self.index.partialmatch(id)
881 881 if n and self.hasnode(n):
882 882 return n
883 883 return None
884 884 except RevlogError:
885 885 # parsers.c radix tree lookup gave multiple matches
886 886 # fall through to slow path that filters hidden revisions
887 887 pass
888 888 except (AttributeError, ValueError):
889 889 # we are pure python, or key was too short to search radix tree
890 890 pass
891 891
892 892 if id in self._pcache:
893 893 return self._pcache[id]
894 894
895 895 if len(id) < 40:
896 896 try:
897 897 # hex(node)[:...]
898 898 l = len(id) // 2 # grab an even number of digits
899 899 prefix = bin(id[:l * 2])
900 900 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
901 901 nl = [n for n in nl if hex(n).startswith(id) and
902 902 self.hasnode(n)]
903 903 if len(nl) > 0:
904 904 if len(nl) == 1:
905 905 self._pcache[id] = nl[0]
906 906 return nl[0]
907 907 raise LookupError(id, self.indexfile,
908 908 _('ambiguous identifier'))
909 909 return None
910 910 except TypeError:
911 911 pass
912 912
913 913 def lookup(self, id):
914 914 """locate a node based on:
915 915 - revision number or str(revision number)
916 916 - nodeid or subset of hex nodeid
917 917 """
918 918 n = self._match(id)
919 919 if n is not None:
920 920 return n
921 921 n = self._partialmatch(id)
922 922 if n:
923 923 return n
924 924
925 925 raise LookupError(id, self.indexfile, _('no match found'))
926 926
927 927 def cmp(self, node, text):
928 928 """compare text with a given file revision
929 929
930 930 returns True if text is different than what is stored.
931 931 """
932 932 p1, p2 = self.parents(node)
933 933 return hash(text, p1, p2) != node
934 934
935 935 def _addchunk(self, offset, data):
936 936 """Add a segment to the revlog cache.
937 937
938 938 Accepts an absolute offset and the data that is at that location.
939 939 """
940 940 o, d = self._chunkcache
941 941 # try to add to existing cache
942 942 if o + len(d) == offset and len(d) + len(data) < _chunksize:
943 943 self._chunkcache = o, d + data
944 944 else:
945 945 self._chunkcache = offset, data
946 946
947 947 def _loadchunk(self, offset, length, df=None):
948 948 """Load a segment of raw data from the revlog.
949 949
950 950 Accepts an absolute offset, length to read, and an optional existing
951 951 file handle to read from.
952 952
953 953 If an existing file handle is passed, it will be seeked and the
954 954 original seek position will NOT be restored.
955 955
956 956 Returns a str or buffer of raw byte data.
957 957 """
958 958 if df is not None:
959 959 closehandle = False
960 960 else:
961 961 if self._inline:
962 962 df = self.opener(self.indexfile)
963 963 else:
964 964 df = self.opener(self.datafile)
965 965 closehandle = True
966 966
967 967 # Cache data both forward and backward around the requested
968 968 # data, in a fixed size window. This helps speed up operations
969 969 # involving reading the revlog backwards.
970 970 cachesize = self._chunkcachesize
971 971 realoffset = offset & ~(cachesize - 1)
972 972 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
973 973 - realoffset)
974 974 df.seek(realoffset)
975 975 d = df.read(reallength)
976 976 if closehandle:
977 977 df.close()
978 978 self._addchunk(realoffset, d)
979 979 if offset != realoffset or reallength != length:
980 980 return util.buffer(d, offset - realoffset, length)
981 981 return d
982 982
983 983 def _getchunk(self, offset, length, df=None):
984 984 """Obtain a segment of raw data from the revlog.
985 985
986 986 Accepts an absolute offset, length of bytes to obtain, and an
987 987 optional file handle to the already-opened revlog. If the file
988 988 handle is used, it's original seek position will not be preserved.
989 989
990 990 Requests for data may be returned from a cache.
991 991
992 992 Returns a str or a buffer instance of raw byte data.
993 993 """
994 994 o, d = self._chunkcache
995 995 l = len(d)
996 996
997 997 # is it in the cache?
998 998 cachestart = offset - o
999 999 cacheend = cachestart + length
1000 1000 if cachestart >= 0 and cacheend <= l:
1001 1001 if cachestart == 0 and cacheend == l:
1002 1002 return d # avoid a copy
1003 1003 return util.buffer(d, cachestart, cacheend - cachestart)
1004 1004
1005 1005 return self._loadchunk(offset, length, df=df)
1006 1006
1007 1007 def _chunkraw(self, startrev, endrev, df=None):
1008 1008 """Obtain a segment of raw data corresponding to a range of revisions.
1009 1009
1010 1010 Accepts the start and end revisions and an optional already-open
1011 1011 file handle to be used for reading. If the file handle is read, its
1012 1012 seek position will not be preserved.
1013 1013
1014 1014 Requests for data may be satisfied by a cache.
1015 1015
1016 1016 Returns a str or a buffer instance of raw byte data. Callers will
1017 1017 need to call ``self.start(rev)`` and ``self.length()`` to determine
1018 1018 where each revision's data begins and ends.
1019 1019 """
1020 1020 start = self.start(startrev)
1021 1021 end = self.end(endrev)
1022 1022 if self._inline:
1023 1023 start += (startrev + 1) * self._io.size
1024 1024 end += (endrev + 1) * self._io.size
1025 1025 length = end - start
1026 1026 return self._getchunk(start, length, df=df)
1027 1027
1028 1028 def _chunk(self, rev, df=None):
1029 1029 """Obtain a single decompressed chunk for a revision.
1030 1030
1031 1031 Accepts an integer revision and an optional already-open file handle
1032 1032 to be used for reading. If used, the seek position of the file will not
1033 1033 be preserved.
1034 1034
1035 1035 Returns a str holding uncompressed data for the requested revision.
1036 1036 """
1037 1037 return decompress(self._chunkraw(rev, rev, df=df))
1038 1038
1039 1039 def _chunks(self, revs, df=None):
1040 1040 """Obtain decompressed chunks for the specified revisions.
1041 1041
1042 1042 Accepts an iterable of numeric revisions that are assumed to be in
1043 1043 ascending order. Also accepts an optional already-open file handle
1044 1044 to be used for reading. If used, the seek position of the file will
1045 1045 not be preserved.
1046 1046
1047 1047 This function is similar to calling ``self._chunk()`` multiple times,
1048 1048 but is faster.
1049 1049
1050 1050 Returns a list with decompressed data for each requested revision.
1051 1051 """
1052 1052 if not revs:
1053 1053 return []
1054 1054 start = self.start
1055 1055 length = self.length
1056 1056 inline = self._inline
1057 1057 iosize = self._io.size
1058 1058 buffer = util.buffer
1059 1059
1060 1060 l = []
1061 1061 ladd = l.append
1062 1062
1063 1063 # preload the cache
1064 1064 try:
1065 1065 while True:
1066 1066 # ensure that the cache doesn't change out from under us
1067 1067 _cache = self._chunkcache
1068 1068 self._chunkraw(revs[0], revs[-1], df=df)
1069 1069 if _cache == self._chunkcache:
1070 1070 break
1071 1071 offset, data = _cache
1072 1072 except OverflowError:
1073 1073 # issue4215 - we can't cache a run of chunks greater than
1074 1074 # 2G on Windows
1075 1075 return [self._chunk(rev, df=df) for rev in revs]
1076 1076
1077 1077 for rev in revs:
1078 1078 chunkstart = start(rev)
1079 1079 if inline:
1080 1080 chunkstart += (rev + 1) * iosize
1081 1081 chunklength = length(rev)
1082 1082 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1083 1083
1084 1084 return l
1085 1085
1086 1086 def _chunkclear(self):
1087 1087 """Clear the raw chunk cache."""
1088 1088 self._chunkcache = (0, '')
1089 1089
1090 1090 def deltaparent(self, rev):
1091 1091 """return deltaparent of the given revision"""
1092 1092 base = self.index[rev][3]
1093 1093 if base == rev:
1094 1094 return nullrev
1095 1095 elif self._generaldelta:
1096 1096 return base
1097 1097 else:
1098 1098 return rev - 1
1099 1099
1100 1100 def revdiff(self, rev1, rev2):
1101 1101 """return or calculate a delta between two revisions"""
1102 1102 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1103 1103 return str(self._chunk(rev2))
1104 1104
1105 1105 return mdiff.textdiff(self.revision(rev1),
1106 1106 self.revision(rev2))
1107 1107
1108 1108 def revision(self, nodeorrev, _df=None):
1109 1109 """return an uncompressed revision of a given node or revision
1110 1110 number.
1111 1111
1112 1112 _df is an existing file handle to read from. It is meant to only be
1113 1113 used internally.
1114 1114 """
1115 1115 if isinstance(nodeorrev, int):
1116 1116 rev = nodeorrev
1117 1117 node = self.node(rev)
1118 1118 else:
1119 1119 node = nodeorrev
1120 1120 rev = None
1121 1121
1122 1122 cachedrev = None
1123 1123 if node == nullid:
1124 1124 return ""
1125 1125 if self._cache:
1126 1126 if self._cache[0] == node:
1127 1127 return self._cache[2]
1128 1128 cachedrev = self._cache[1]
1129 1129
1130 1130 # look up what we need to read
1131 1131 text = None
1132 1132 if rev is None:
1133 1133 rev = self.rev(node)
1134 1134
1135 1135 # check rev flags
1136 1136 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1137 1137 raise RevlogError(_('incompatible revision flag %x') %
1138 1138 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1139 1139
1140 1140 # build delta chain
1141 1141 chain = []
1142 1142 index = self.index # for performance
1143 1143 generaldelta = self._generaldelta
1144 1144 iterrev = rev
1145 1145 e = index[iterrev]
1146 1146 while iterrev != e[3] and iterrev != cachedrev:
1147 1147 chain.append(iterrev)
1148 1148 if generaldelta:
1149 1149 iterrev = e[3]
1150 1150 else:
1151 1151 iterrev -= 1
1152 1152 e = index[iterrev]
1153 1153
1154 1154 if iterrev == cachedrev:
1155 1155 # cache hit
1156 1156 text = self._cache[2]
1157 1157 else:
1158 1158 chain.append(iterrev)
1159 1159 chain.reverse()
1160 1160
1161 1161 # drop cache to save memory
1162 1162 self._cache = None
1163 1163
1164 1164 bins = self._chunks(chain, df=_df)
1165 1165 if text is None:
1166 1166 text = str(bins[0])
1167 1167 bins = bins[1:]
1168 1168
1169 1169 text = mdiff.patches(text, bins)
1170 1170
1171 1171 text = self._checkhash(text, node, rev)
1172 1172
1173 1173 self._cache = (node, rev, text)
1174 1174 return text
1175 1175
1176 1176 def hash(self, text, p1, p2):
1177 1177 """Compute a node hash.
1178 1178
1179 1179 Available as a function so that subclasses can replace the hash
1180 1180 as needed.
1181 1181 """
1182 1182 return hash(text, p1, p2)
1183 1183
1184 1184 def _checkhash(self, text, node, rev):
1185 1185 p1, p2 = self.parents(node)
1186 1186 self.checkhash(text, p1, p2, node, rev)
1187 1187 return text
1188 1188
1189 1189 def checkhash(self, text, p1, p2, node, rev=None):
1190 1190 if node != self.hash(text, p1, p2):
1191 1191 revornode = rev
1192 1192 if revornode is None:
1193 1193 revornode = templatefilters.short(hex(node))
1194 1194 raise RevlogError(_("integrity check failed on %s:%s")
1195 1195 % (self.indexfile, revornode))
1196 1196
1197 1197 def checkinlinesize(self, tr, fp=None):
1198 1198 """Check if the revlog is too big for inline and convert if so.
1199 1199
1200 1200 This should be called after revisions are added to the revlog. If the
1201 1201 revlog has grown too large to be an inline revlog, it will convert it
1202 1202 to use multiple index and data files.
1203 1203 """
1204 1204 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1205 1205 return
1206 1206
1207 1207 trinfo = tr.find(self.indexfile)
1208 1208 if trinfo is None:
1209 1209 raise RevlogError(_("%s not found in the transaction")
1210 1210 % self.indexfile)
1211 1211
1212 1212 trindex = trinfo[2]
1213 1213 if trindex is not None:
1214 1214 dataoff = self.start(trindex)
1215 1215 else:
1216 1216 # revlog was stripped at start of transaction, use all leftover data
1217 1217 trindex = len(self) - 1
1218 1218 dataoff = self.end(-2)
1219 1219
1220 1220 tr.add(self.datafile, dataoff)
1221 1221
1222 1222 if fp:
1223 1223 fp.flush()
1224 1224 fp.close()
1225 1225
1226 1226 df = self.opener(self.datafile, 'w')
1227 1227 try:
1228 1228 for r in self:
1229 1229 df.write(self._chunkraw(r, r))
1230 1230 finally:
1231 1231 df.close()
1232 1232
1233 1233 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1234 1234 self.version &= ~(REVLOGNGINLINEDATA)
1235 1235 self._inline = False
1236 1236 for i in self:
1237 1237 e = self._io.packentry(self.index[i], self.node, self.version, i)
1238 1238 fp.write(e)
1239 1239
1240 1240 # if we don't call close, the temp file will never replace the
1241 1241 # real index
1242 1242 fp.close()
1243 1243
1244 1244 tr.replace(self.indexfile, trindex * self._io.size)
1245 1245 self._chunkclear()
1246 1246
1247 1247 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1248 1248 node=None):
1249 1249 """add a revision to the log
1250 1250
1251 1251 text - the revision data to add
1252 1252 transaction - the transaction object used for rollback
1253 1253 link - the linkrev data to add
1254 1254 p1, p2 - the parent nodeids of the revision
1255 1255 cachedelta - an optional precomputed delta
1256 1256 node - nodeid of revision; typically node is not specified, and it is
1257 1257 computed by default as hash(text, p1, p2), however subclasses might
1258 1258 use different hashing method (and override checkhash() in such case)
1259 1259 """
1260 1260 if link == nullrev:
1261 1261 raise RevlogError(_("attempted to add linkrev -1 to %s")
1262 1262 % self.indexfile)
1263 1263
1264 1264 if len(text) > _maxentrysize:
1265 1265 raise RevlogError(
1266 1266 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1267 1267 % (self.indexfile, len(text)))
1268 1268
1269 1269 node = node or self.hash(text, p1, p2)
1270 1270 if node in self.nodemap:
1271 1271 return node
1272 1272
1273 1273 dfh = None
1274 1274 if not self._inline:
1275 1275 dfh = self.opener(self.datafile, "a+")
1276 1276 ifh = self.opener(self.indexfile, "a+")
1277 1277 try:
1278 1278 return self._addrevision(node, text, transaction, link, p1, p2,
1279 1279 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1280 1280 finally:
1281 1281 if dfh:
1282 1282 dfh.close()
1283 1283 ifh.close()
1284 1284
1285 1285 def compress(self, text):
1286 1286 """ generate a possibly-compressed representation of text """
1287 1287 if not text:
1288 1288 return ("", text)
1289 1289 l = len(text)
1290 1290 bin = None
1291 1291 if l < 44:
1292 1292 pass
1293 1293 elif l > 1000000:
1294 1294 # zlib makes an internal copy, thus doubling memory usage for
1295 1295 # large files, so lets do this in pieces
1296 1296 z = zlib.compressobj()
1297 1297 p = []
1298 1298 pos = 0
1299 1299 while pos < l:
1300 1300 pos2 = pos + 2**20
1301 1301 p.append(z.compress(text[pos:pos2]))
1302 1302 pos = pos2
1303 1303 p.append(z.flush())
1304 1304 if sum(map(len, p)) < l:
1305 1305 bin = "".join(p)
1306 1306 else:
1307 1307 bin = _compress(text)
1308 1308 if bin is None or len(bin) > l:
1309 1309 if text[0] == '\0':
1310 1310 return ("", text)
1311 1311 return ('u', text)
1312 1312 return ("", bin)
1313 1313
1314 1314 def _isgooddelta(self, d, textlen):
1315 1315 """Returns True if the given delta is good. Good means that it is within
1316 1316 the disk span, disk size, and chain length bounds that we know to be
1317 1317 performant."""
1318 1318 if d is None:
1319 1319 return False
1320 1320
1321 1321 # - 'dist' is the distance from the base revision -- bounding it limits
1322 1322 # the amount of I/O we need to do.
1323 1323 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1324 1324 # to apply -- bounding it limits the amount of CPU we consume.
1325 1325 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1326 1326 if (dist > textlen * 4 or l > textlen or
1327 1327 compresseddeltalen > textlen * 2 or
1328 1328 (self._maxchainlen and chainlen > self._maxchainlen)):
1329 1329 return False
1330 1330
1331 1331 return True
1332 1332
1333 1333 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1334 1334 cachedelta, ifh, dfh, alwayscache=False):
1335 1335 """internal function to add revisions to the log
1336 1336
1337 1337 see addrevision for argument descriptions.
1338 1338 invariants:
1339 1339 - text is optional (can be None); if not set, cachedelta must be set.
1340 1340 if both are set, they must correspond to each other.
1341 1341 """
1342 1342 btext = [text]
1343 1343 def buildtext():
1344 1344 if btext[0] is not None:
1345 1345 return btext[0]
1346 1346 baserev = cachedelta[0]
1347 1347 delta = cachedelta[1]
1348 1348 # special case deltas which replace entire base; no need to decode
1349 1349 # base revision. this neatly avoids censored bases, which throw when
1350 1350 # they're decoded.
1351 1351 hlen = struct.calcsize(">lll")
1352 1352 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1353 1353 len(delta) - hlen):
1354 1354 btext[0] = delta[hlen:]
1355 1355 else:
1356 1356 if self._inline:
1357 1357 fh = ifh
1358 1358 else:
1359 1359 fh = dfh
1360 1360 basetext = self.revision(self.node(baserev), _df=fh)
1361 1361 btext[0] = mdiff.patch(basetext, delta)
1362 1362 try:
1363 1363 self.checkhash(btext[0], p1, p2, node)
1364 1364 if flags & REVIDX_ISCENSORED:
1365 1365 raise RevlogError(_('node %s is not censored') % node)
1366 1366 except CensoredNodeError:
1367 1367 # must pass the censored index flag to add censored revisions
1368 1368 if not flags & REVIDX_ISCENSORED:
1369 1369 raise
1370 1370 return btext[0]
1371 1371
1372 1372 def builddelta(rev):
1373 1373 # can we use the cached delta?
1374 1374 if cachedelta and cachedelta[0] == rev:
1375 1375 delta = cachedelta[1]
1376 1376 else:
1377 1377 t = buildtext()
1378 1378 if self.iscensored(rev):
1379 1379 # deltas based on a censored revision must replace the
1380 1380 # full content in one patch, so delta works everywhere
1381 1381 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1382 1382 delta = header + t
1383 1383 else:
1384 1384 if self._inline:
1385 1385 fh = ifh
1386 1386 else:
1387 1387 fh = dfh
1388 1388 ptext = self.revision(self.node(rev), _df=fh)
1389 1389 delta = mdiff.textdiff(ptext, t)
1390 1390 data = self.compress(delta)
1391 1391 l = len(data[1]) + len(data[0])
1392 1392 if basecache[0] == rev:
1393 1393 chainbase = basecache[1]
1394 1394 else:
1395 1395 chainbase = self.chainbase(rev)
1396 1396 dist = l + offset - self.start(chainbase)
1397 1397 if self._generaldelta:
1398 1398 base = rev
1399 1399 else:
1400 1400 base = chainbase
1401 1401 chainlen, compresseddeltalen = self._chaininfo(rev)
1402 1402 chainlen += 1
1403 1403 compresseddeltalen += l
1404 1404 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1405 1405
1406 1406 curr = len(self)
1407 1407 prev = curr - 1
1408 1408 base = chainbase = curr
1409 1409 chainlen = None
1410 1410 offset = self.end(prev)
1411 1411 delta = None
1412 1412 if self._basecache is None:
1413 1413 self._basecache = (prev, self.chainbase(prev))
1414 1414 basecache = self._basecache
1415 1415 p1r, p2r = self.rev(p1), self.rev(p2)
1416 1416
1417 1417 # full versions are inserted when the needed deltas
1418 1418 # become comparable to the uncompressed text
1419 1419 if text is None:
1420 1420 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1421 1421 cachedelta[1])
1422 1422 else:
1423 1423 textlen = len(text)
1424 1424
1425 1425 # should we try to build a delta?
1426 1426 if prev != nullrev:
1427 1427 if cachedelta and self._generaldelta and self._lazydeltabase:
1428 1428 # Assume what we received from the server is a good choice
1429 1429 # build delta will reuse the cache
1430 delta = builddelta(cachedelta[0])
1430 candidatedelta = builddelta(cachedelta[0])
1431 if self._isgooddelta(candidatedelta, textlen):
1432 delta = candidatedelta
1433 elif prev != candidatedelta[3]:
1434 # Try against prev to hopefully save us a fulltext.
1435 delta = builddelta(prev)
1431 1436 elif self._generaldelta:
1432 1437 if p2r != nullrev and self._aggressivemergedeltas:
1433 1438 delta = builddelta(p1r)
1434 1439 delta2 = builddelta(p2r)
1435 1440 p1good = self._isgooddelta(delta, textlen)
1436 1441 p2good = self._isgooddelta(delta2, textlen)
1437 1442 if p1good and p2good:
1438 1443 # If both are good deltas, choose the smallest
1439 1444 if delta2[1] < delta[1]:
1440 1445 delta = delta2
1441 1446 elif p2good:
1442 1447 # If only p2 is good, use it
1443 1448 delta = delta2
1444 1449 elif p1good:
1445 1450 pass
1446 1451 else:
1447 1452 # Neither is good, try against prev to hopefully save us
1448 1453 # a fulltext.
1449 1454 delta = builddelta(prev)
1450 1455 else:
1451 1456 # Pick whichever parent is closer to us (to minimize the
1452 1457 # chance of having to build a fulltext). Since
1453 1458 # nullrev == -1, any non-merge commit will always pick p1r.
1454 1459 drev = p2r if p2r > p1r else p1r
1455 1460 delta = builddelta(drev)
1456 1461 # If the chosen delta will result in us making a full text,
1457 1462 # give it one last try against prev.
1458 1463 if drev != prev and not self._isgooddelta(delta, textlen):
1459 1464 delta = builddelta(prev)
1460 1465 else:
1461 1466 delta = builddelta(prev)
1462 1467 if delta is not None:
1463 1468 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1464 1469
1465 1470 if not self._isgooddelta(delta, textlen):
1466 1471 text = buildtext()
1467 1472 data = self.compress(text)
1468 1473 l = len(data[1]) + len(data[0])
1469 1474 base = chainbase = curr
1470 1475
1471 1476 e = (offset_type(offset, flags), l, textlen,
1472 1477 base, link, p1r, p2r, node)
1473 1478 self.index.insert(-1, e)
1474 1479 self.nodemap[node] = curr
1475 1480
1476 1481 entry = self._io.packentry(e, self.node, self.version, curr)
1477 1482 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1478 1483
1479 1484 if alwayscache and text is None:
1480 1485 text = buildtext()
1481 1486
1482 1487 if type(text) == str: # only accept immutable objects
1483 1488 self._cache = (node, curr, text)
1484 1489 self._basecache = (curr, chainbase)
1485 1490 return node
1486 1491
1487 1492 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1488 1493 curr = len(self) - 1
1489 1494 if not self._inline:
1490 1495 transaction.add(self.datafile, offset)
1491 1496 transaction.add(self.indexfile, curr * len(entry))
1492 1497 if data[0]:
1493 1498 dfh.write(data[0])
1494 1499 dfh.write(data[1])
1495 1500 ifh.write(entry)
1496 1501 else:
1497 1502 offset += curr * self._io.size
1498 1503 transaction.add(self.indexfile, offset, curr)
1499 1504 ifh.write(entry)
1500 1505 ifh.write(data[0])
1501 1506 ifh.write(data[1])
1502 1507 self.checkinlinesize(transaction, ifh)
1503 1508
1504 1509 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1505 1510 """
1506 1511 add a delta group
1507 1512
1508 1513 given a set of deltas, add them to the revision log. the
1509 1514 first delta is against its parent, which should be in our
1510 1515 log, the rest are against the previous delta.
1511 1516
1512 1517 If ``addrevisioncb`` is defined, it will be called with arguments of
1513 1518 this revlog and the node that was added.
1514 1519 """
1515 1520
1516 1521 # track the base of the current delta log
1517 1522 content = []
1518 1523 node = None
1519 1524
1520 1525 r = len(self)
1521 1526 end = 0
1522 1527 if r:
1523 1528 end = self.end(r - 1)
1524 1529 ifh = self.opener(self.indexfile, "a+")
1525 1530 isize = r * self._io.size
1526 1531 if self._inline:
1527 1532 transaction.add(self.indexfile, end + isize, r)
1528 1533 dfh = None
1529 1534 else:
1530 1535 transaction.add(self.indexfile, isize, r)
1531 1536 transaction.add(self.datafile, end)
1532 1537 dfh = self.opener(self.datafile, "a+")
1533 1538 def flush():
1534 1539 if dfh:
1535 1540 dfh.flush()
1536 1541 ifh.flush()
1537 1542 try:
1538 1543 # loop through our set of deltas
1539 1544 chain = None
1540 1545 while True:
1541 1546 chunkdata = cg.deltachunk(chain)
1542 1547 if not chunkdata:
1543 1548 break
1544 1549 node = chunkdata['node']
1545 1550 p1 = chunkdata['p1']
1546 1551 p2 = chunkdata['p2']
1547 1552 cs = chunkdata['cs']
1548 1553 deltabase = chunkdata['deltabase']
1549 1554 delta = chunkdata['delta']
1550 1555
1551 1556 content.append(node)
1552 1557
1553 1558 link = linkmapper(cs)
1554 1559 if node in self.nodemap:
1555 1560 # this can happen if two branches make the same change
1556 1561 chain = node
1557 1562 continue
1558 1563
1559 1564 for p in (p1, p2):
1560 1565 if p not in self.nodemap:
1561 1566 raise LookupError(p, self.indexfile,
1562 1567 _('unknown parent'))
1563 1568
1564 1569 if deltabase not in self.nodemap:
1565 1570 raise LookupError(deltabase, self.indexfile,
1566 1571 _('unknown delta base'))
1567 1572
1568 1573 baserev = self.rev(deltabase)
1569 1574
1570 1575 if baserev != nullrev and self.iscensored(baserev):
1571 1576 # if base is censored, delta must be full replacement in a
1572 1577 # single patch operation
1573 1578 hlen = struct.calcsize(">lll")
1574 1579 oldlen = self.rawsize(baserev)
1575 1580 newlen = len(delta) - hlen
1576 1581 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1577 1582 raise error.CensoredBaseError(self.indexfile,
1578 1583 self.node(baserev))
1579 1584
1580 1585 flags = REVIDX_DEFAULT_FLAGS
1581 1586 if self._peek_iscensored(baserev, delta, flush):
1582 1587 flags |= REVIDX_ISCENSORED
1583 1588
1584 1589 # We assume consumers of addrevisioncb will want to retrieve
1585 1590 # the added revision, which will require a call to
1586 1591 # revision(). revision() will fast path if there is a cache
1587 1592 # hit. So, we tell _addrevision() to always cache in this case.
1588 1593 chain = self._addrevision(node, None, transaction, link,
1589 1594 p1, p2, flags, (baserev, delta),
1590 1595 ifh, dfh,
1591 1596 alwayscache=bool(addrevisioncb))
1592 1597
1593 1598 if addrevisioncb:
1594 1599 addrevisioncb(self, chain)
1595 1600
1596 1601 if not dfh and not self._inline:
1597 1602 # addrevision switched from inline to conventional
1598 1603 # reopen the index
1599 1604 ifh.close()
1600 1605 dfh = self.opener(self.datafile, "a+")
1601 1606 ifh = self.opener(self.indexfile, "a+")
1602 1607 finally:
1603 1608 if dfh:
1604 1609 dfh.close()
1605 1610 ifh.close()
1606 1611
1607 1612 return content
1608 1613
1609 1614 def iscensored(self, rev):
1610 1615 """Check if a file revision is censored."""
1611 1616 return False
1612 1617
1613 1618 def _peek_iscensored(self, baserev, delta, flush):
1614 1619 """Quickly check if a delta produces a censored revision."""
1615 1620 return False
1616 1621
1617 1622 def getstrippoint(self, minlink):
1618 1623 """find the minimum rev that must be stripped to strip the linkrev
1619 1624
1620 1625 Returns a tuple containing the minimum rev and a set of all revs that
1621 1626 have linkrevs that will be broken by this strip.
1622 1627 """
1623 1628 brokenrevs = set()
1624 1629 strippoint = len(self)
1625 1630
1626 1631 heads = {}
1627 1632 futurelargelinkrevs = set()
1628 1633 for head in self.headrevs():
1629 1634 headlinkrev = self.linkrev(head)
1630 1635 heads[head] = headlinkrev
1631 1636 if headlinkrev >= minlink:
1632 1637 futurelargelinkrevs.add(headlinkrev)
1633 1638
1634 1639 # This algorithm involves walking down the rev graph, starting at the
1635 1640 # heads. Since the revs are topologically sorted according to linkrev,
1636 1641 # once all head linkrevs are below the minlink, we know there are
1637 1642 # no more revs that could have a linkrev greater than minlink.
1638 1643 # So we can stop walking.
1639 1644 while futurelargelinkrevs:
1640 1645 strippoint -= 1
1641 1646 linkrev = heads.pop(strippoint)
1642 1647
1643 1648 if linkrev < minlink:
1644 1649 brokenrevs.add(strippoint)
1645 1650 else:
1646 1651 futurelargelinkrevs.remove(linkrev)
1647 1652
1648 1653 for p in self.parentrevs(strippoint):
1649 1654 if p != nullrev:
1650 1655 plinkrev = self.linkrev(p)
1651 1656 heads[p] = plinkrev
1652 1657 if plinkrev >= minlink:
1653 1658 futurelargelinkrevs.add(plinkrev)
1654 1659
1655 1660 return strippoint, brokenrevs
1656 1661
1657 1662 def strip(self, minlink, transaction):
1658 1663 """truncate the revlog on the first revision with a linkrev >= minlink
1659 1664
1660 1665 This function is called when we're stripping revision minlink and
1661 1666 its descendants from the repository.
1662 1667
1663 1668 We have to remove all revisions with linkrev >= minlink, because
1664 1669 the equivalent changelog revisions will be renumbered after the
1665 1670 strip.
1666 1671
1667 1672 So we truncate the revlog on the first of these revisions, and
1668 1673 trust that the caller has saved the revisions that shouldn't be
1669 1674 removed and that it'll re-add them after this truncation.
1670 1675 """
1671 1676 if len(self) == 0:
1672 1677 return
1673 1678
1674 1679 rev, _ = self.getstrippoint(minlink)
1675 1680 if rev == len(self):
1676 1681 return
1677 1682
1678 1683 # first truncate the files on disk
1679 1684 end = self.start(rev)
1680 1685 if not self._inline:
1681 1686 transaction.add(self.datafile, end)
1682 1687 end = rev * self._io.size
1683 1688 else:
1684 1689 end += rev * self._io.size
1685 1690
1686 1691 transaction.add(self.indexfile, end)
1687 1692
1688 1693 # then reset internal state in memory to forget those revisions
1689 1694 self._cache = None
1690 1695 self._chaininfocache = {}
1691 1696 self._chunkclear()
1692 1697 for x in xrange(rev, len(self)):
1693 1698 del self.nodemap[self.node(x)]
1694 1699
1695 1700 del self.index[rev:-1]
1696 1701
1697 1702 def checksize(self):
1698 1703 expected = 0
1699 1704 if len(self):
1700 1705 expected = max(0, self.end(len(self) - 1))
1701 1706
1702 1707 try:
1703 1708 f = self.opener(self.datafile)
1704 1709 f.seek(0, 2)
1705 1710 actual = f.tell()
1706 1711 f.close()
1707 1712 dd = actual - expected
1708 1713 except IOError as inst:
1709 1714 if inst.errno != errno.ENOENT:
1710 1715 raise
1711 1716 dd = 0
1712 1717
1713 1718 try:
1714 1719 f = self.opener(self.indexfile)
1715 1720 f.seek(0, 2)
1716 1721 actual = f.tell()
1717 1722 f.close()
1718 1723 s = self._io.size
1719 1724 i = max(0, actual // s)
1720 1725 di = actual - (i * s)
1721 1726 if self._inline:
1722 1727 databytes = 0
1723 1728 for r in self:
1724 1729 databytes += max(0, self.length(r))
1725 1730 dd = 0
1726 1731 di = actual - len(self) * s - databytes
1727 1732 except IOError as inst:
1728 1733 if inst.errno != errno.ENOENT:
1729 1734 raise
1730 1735 di = 0
1731 1736
1732 1737 return (dd, di)
1733 1738
1734 1739 def files(self):
1735 1740 res = [self.indexfile]
1736 1741 if not self._inline:
1737 1742 res.append(self.datafile)
1738 1743 return res
General Comments 0
You need to be logged in to leave comments. Login now