##// END OF EJS Templates
revlog: reorder index accessors to match data structure order...
Gregory Szorc -
r30287:0986f225 default
parent child Browse files
Show More
@@ -1,1796 +1,1811
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 75 return long(long(offset) << 16 | type)
76 76
77 77 _nullhash = hashlib.sha1(nullid)
78 78
79 79 def hash(text, p1, p2):
80 80 """generate a hash from the given text and its parent hashes
81 81
82 82 This hash combines both the current file contents and its history
83 83 in a manner that makes it easy to distinguish nodes with the same
84 84 content in the revision graph.
85 85 """
86 86 # As of now, if one of the parent node is null, p2 is null
87 87 if p2 == nullid:
88 88 # deep copy of a hash is faster than creating one
89 89 s = _nullhash.copy()
90 90 s.update(p1)
91 91 else:
92 92 # none of the parent nodes are nullid
93 93 l = [p1, p2]
94 94 l.sort()
95 95 s = hashlib.sha1(l[0])
96 96 s.update(l[1])
97 97 s.update(text)
98 98 return s.digest()
99 99
100 100 def decompress(bin):
101 101 """ decompress the given input """
102 102 if not bin:
103 103 return bin
104 104 t = bin[0]
105 105 if t == '\0':
106 106 return bin
107 107 if t == 'x':
108 108 try:
109 109 return _decompress(bin)
110 110 except zlib.error as e:
111 111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 112 if t == 'u':
113 113 return util.buffer(bin, 1)
114 114 raise RevlogError(_("unknown compression type %r") % t)
115 115
116 116 # index v0:
117 117 # 4 bytes: offset
118 118 # 4 bytes: compressed length
119 119 # 4 bytes: base rev
120 120 # 4 bytes: link rev
121 121 # 20 bytes: parent 1 nodeid
122 122 # 20 bytes: parent 2 nodeid
123 123 # 20 bytes: nodeid
124 124 indexformatv0 = ">4l20s20s20s"
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 versionformat = ">I"
171 171
172 172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 173 # signed integer)
174 174 _maxentrysize = 0x7fffffff
175 175
176 176 class revlogio(object):
177 177 def __init__(self):
178 178 self.size = struct.calcsize(indexformatng)
179 179
180 180 def parseindex(self, data, inline):
181 181 # call the C implementation to parse the index data
182 182 index, cache = parsers.parse_index2(data, inline)
183 183 return index, getattr(index, 'nodemap', None), cache
184 184
185 185 def packentry(self, entry, node, version, rev):
186 186 p = _pack(indexformatng, *entry)
187 187 if rev == 0:
188 188 p = _pack(versionformat, version) + p[4:]
189 189 return p
190 190
191 191 class revlog(object):
192 192 """
193 193 the underlying revision storage object
194 194
195 195 A revlog consists of two parts, an index and the revision data.
196 196
197 197 The index is a file with a fixed record size containing
198 198 information on each revision, including its nodeid (hash), the
199 199 nodeids of its parents, the position and offset of its data within
200 200 the data file, and the revision it's based on. Finally, each entry
201 201 contains a linkrev entry that can serve as a pointer to external
202 202 data.
203 203
204 204 The revision data itself is a linear collection of data chunks.
205 205 Each chunk represents a revision and is usually represented as a
206 206 delta against the previous chunk. To bound lookup time, runs of
207 207 deltas are limited to about 2 times the length of the original
208 208 version data. This makes retrieval of a version proportional to
209 209 its size, or O(1) relative to the number of revisions.
210 210
211 211 Both pieces of the revlog are written to in an append-only
212 212 fashion, which means we never need to rewrite a file to insert or
213 213 remove data, and can use some simple techniques to avoid the need
214 214 for locking while reading.
215 215
216 216 If checkambig, indexfile is opened with checkambig=True at
217 217 writing, to avoid file stat ambiguity.
218 218 """
219 219 def __init__(self, opener, indexfile, checkambig=False):
220 220 """
221 221 create a revlog object
222 222
223 223 opener is a function that abstracts the file opening operation
224 224 and can be used to implement COW semantics or the like.
225 225 """
226 226 self.indexfile = indexfile
227 227 self.datafile = indexfile[:-2] + ".d"
228 228 self.opener = opener
229 229 # When True, indexfile is opened with checkambig=True at writing, to
230 230 # avoid file stat ambiguity.
231 231 self._checkambig = checkambig
232 232 # 3-tuple of (node, rev, text) for a raw revision.
233 233 self._cache = None
234 234 # Maps rev to chain base rev.
235 235 self._chainbasecache = util.lrucachedict(100)
236 236 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
237 237 self._chunkcache = (0, '')
238 238 # How much data to read and cache into the raw revlog data cache.
239 239 self._chunkcachesize = 65536
240 240 self._maxchainlen = None
241 241 self._aggressivemergedeltas = False
242 242 self.index = []
243 243 # Mapping of partial identifiers to full nodes.
244 244 self._pcache = {}
245 245 # Mapping of revision integer to full node.
246 246 self._nodecache = {nullid: nullrev}
247 247 self._nodepos = None
248 248
249 249 v = REVLOG_DEFAULT_VERSION
250 250 opts = getattr(opener, 'options', None)
251 251 if opts is not None:
252 252 if 'revlogv1' in opts:
253 253 if 'generaldelta' in opts:
254 254 v |= REVLOGGENERALDELTA
255 255 else:
256 256 v = 0
257 257 if 'chunkcachesize' in opts:
258 258 self._chunkcachesize = opts['chunkcachesize']
259 259 if 'maxchainlen' in opts:
260 260 self._maxchainlen = opts['maxchainlen']
261 261 if 'aggressivemergedeltas' in opts:
262 262 self._aggressivemergedeltas = opts['aggressivemergedeltas']
263 263 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
264 264
265 265 if self._chunkcachesize <= 0:
266 266 raise RevlogError(_('revlog chunk cache size %r is not greater '
267 267 'than 0') % self._chunkcachesize)
268 268 elif self._chunkcachesize & (self._chunkcachesize - 1):
269 269 raise RevlogError(_('revlog chunk cache size %r is not a power '
270 270 'of 2') % self._chunkcachesize)
271 271
272 272 indexdata = ''
273 273 self._initempty = True
274 274 try:
275 275 f = self.opener(self.indexfile)
276 276 indexdata = f.read()
277 277 f.close()
278 278 if len(indexdata) > 0:
279 279 v = struct.unpack(versionformat, indexdata[:4])[0]
280 280 self._initempty = False
281 281 except IOError as inst:
282 282 if inst.errno != errno.ENOENT:
283 283 raise
284 284
285 285 self.version = v
286 286 self._inline = v & REVLOGNGINLINEDATA
287 287 self._generaldelta = v & REVLOGGENERALDELTA
288 288 flags = v & ~0xFFFF
289 289 fmt = v & 0xFFFF
290 290 if fmt == REVLOGV0 and flags:
291 291 raise RevlogError(_("index %s unknown flags %#04x for format v0")
292 292 % (self.indexfile, flags >> 16))
293 293 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
294 294 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
295 295 % (self.indexfile, flags >> 16))
296 296 elif fmt > REVLOGNG:
297 297 raise RevlogError(_("index %s unknown format %d")
298 298 % (self.indexfile, fmt))
299 299
300 300 self.storedeltachains = True
301 301
302 302 self._io = revlogio()
303 303 if self.version == REVLOGV0:
304 304 self._io = revlogoldio()
305 305 try:
306 306 d = self._io.parseindex(indexdata, self._inline)
307 307 except (ValueError, IndexError):
308 308 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
309 309 self.index, nodemap, self._chunkcache = d
310 310 if nodemap is not None:
311 311 self.nodemap = self._nodecache = nodemap
312 312 if not self._chunkcache:
313 313 self._chunkclear()
314 314 # revnum -> (chain-length, sum-delta-length)
315 315 self._chaininfocache = {}
316 316
317 317 def tip(self):
318 318 return self.node(len(self.index) - 2)
319 319 def __contains__(self, rev):
320 320 return 0 <= rev < len(self)
321 321 def __len__(self):
322 322 return len(self.index) - 1
323 323 def __iter__(self):
324 324 return iter(xrange(len(self)))
325 325 def revs(self, start=0, stop=None):
326 326 """iterate over all rev in this revlog (from start to stop)"""
327 327 step = 1
328 328 if stop is not None:
329 329 if start > stop:
330 330 step = -1
331 331 stop += step
332 332 else:
333 333 stop = len(self)
334 334 return xrange(start, stop, step)
335 335
336 336 @util.propertycache
337 337 def nodemap(self):
338 338 self.rev(self.node(0))
339 339 return self._nodecache
340 340
341 341 def hasnode(self, node):
342 342 try:
343 343 self.rev(node)
344 344 return True
345 345 except KeyError:
346 346 return False
347 347
348 348 def clearcaches(self):
349 349 self._cache = None
350 350 self._chainbasecache.clear()
351 351 self._chunkcache = (0, '')
352 352 self._pcache = {}
353 353
354 354 try:
355 355 self._nodecache.clearcaches()
356 356 except AttributeError:
357 357 self._nodecache = {nullid: nullrev}
358 358 self._nodepos = None
359 359
360 360 def rev(self, node):
361 361 try:
362 362 return self._nodecache[node]
363 363 except TypeError:
364 364 raise
365 365 except RevlogError:
366 366 # parsers.c radix tree lookup failed
367 367 raise LookupError(node, self.indexfile, _('no node'))
368 368 except KeyError:
369 369 # pure python cache lookup failed
370 370 n = self._nodecache
371 371 i = self.index
372 372 p = self._nodepos
373 373 if p is None:
374 374 p = len(i) - 2
375 375 for r in xrange(p, -1, -1):
376 376 v = i[r][7]
377 377 n[v] = r
378 378 if v == node:
379 379 self._nodepos = r - 1
380 380 return r
381 381 raise LookupError(node, self.indexfile, _('no node'))
382 382
383 def node(self, rev):
384 return self.index[rev][7]
385 def linkrev(self, rev):
386 return self.index[rev][4]
387 def parents(self, node):
388 i = self.index
389 d = i[self.rev(node)]
390 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
391 def parentrevs(self, rev):
392 return self.index[rev][5:7]
383 # Accessors for index entries.
384
385 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
386 # are flags.
393 387 def start(self, rev):
394 388 return int(self.index[rev][0] >> 16)
395 def end(self, rev):
396 return self.start(rev) + self.length(rev)
389
390 def flags(self, rev):
391 return self.index[rev][0] & 0xFFFF
392
397 393 def length(self, rev):
398 394 return self.index[rev][1]
395
396 def rawsize(self, rev):
397 """return the length of the uncompressed text for a given revision"""
398 l = self.index[rev][2]
399 if l >= 0:
400 return l
401
402 t = self.revision(self.node(rev))
403 return len(t)
404 size = rawsize
405
399 406 def chainbase(self, rev):
400 407 base = self._chainbasecache.get(rev)
401 408 if base is not None:
402 409 return base
403 410
404 411 index = self.index
405 412 base = index[rev][3]
406 413 while base != rev:
407 414 rev = base
408 415 base = index[rev][3]
409 416
410 417 self._chainbasecache[rev] = base
411 418 return base
419
420 def linkrev(self, rev):
421 return self.index[rev][4]
422
423 def parentrevs(self, rev):
424 return self.index[rev][5:7]
425
426 def node(self, rev):
427 return self.index[rev][7]
428
429 # Derived from index values.
430
431 def end(self, rev):
432 return self.start(rev) + self.length(rev)
433
434 def parents(self, node):
435 i = self.index
436 d = i[self.rev(node)]
437 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
438
412 439 def chainlen(self, rev):
413 440 return self._chaininfo(rev)[0]
414 441
415 442 def _chaininfo(self, rev):
416 443 chaininfocache = self._chaininfocache
417 444 if rev in chaininfocache:
418 445 return chaininfocache[rev]
419 446 index = self.index
420 447 generaldelta = self._generaldelta
421 448 iterrev = rev
422 449 e = index[iterrev]
423 450 clen = 0
424 451 compresseddeltalen = 0
425 452 while iterrev != e[3]:
426 453 clen += 1
427 454 compresseddeltalen += e[1]
428 455 if generaldelta:
429 456 iterrev = e[3]
430 457 else:
431 458 iterrev -= 1
432 459 if iterrev in chaininfocache:
433 460 t = chaininfocache[iterrev]
434 461 clen += t[0]
435 462 compresseddeltalen += t[1]
436 463 break
437 464 e = index[iterrev]
438 465 else:
439 466 # Add text length of base since decompressing that also takes
440 467 # work. For cache hits the length is already included.
441 468 compresseddeltalen += e[1]
442 469 r = (clen, compresseddeltalen)
443 470 chaininfocache[rev] = r
444 471 return r
445 472
446 473 def _deltachain(self, rev, stoprev=None):
447 474 """Obtain the delta chain for a revision.
448 475
449 476 ``stoprev`` specifies a revision to stop at. If not specified, we
450 477 stop at the base of the chain.
451 478
452 479 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
453 480 revs in ascending order and ``stopped`` is a bool indicating whether
454 481 ``stoprev`` was hit.
455 482 """
456 483 chain = []
457 484
458 485 # Alias to prevent attribute lookup in tight loop.
459 486 index = self.index
460 487 generaldelta = self._generaldelta
461 488
462 489 iterrev = rev
463 490 e = index[iterrev]
464 491 while iterrev != e[3] and iterrev != stoprev:
465 492 chain.append(iterrev)
466 493 if generaldelta:
467 494 iterrev = e[3]
468 495 else:
469 496 iterrev -= 1
470 497 e = index[iterrev]
471 498
472 499 if iterrev == stoprev:
473 500 stopped = True
474 501 else:
475 502 chain.append(iterrev)
476 503 stopped = False
477 504
478 505 chain.reverse()
479 506 return chain, stopped
480 507
481 def flags(self, rev):
482 return self.index[rev][0] & 0xFFFF
483 def rawsize(self, rev):
484 """return the length of the uncompressed text for a given revision"""
485 l = self.index[rev][2]
486 if l >= 0:
487 return l
488
489 t = self.revision(self.node(rev))
490 return len(t)
491 size = rawsize
492
493 508 def ancestors(self, revs, stoprev=0, inclusive=False):
494 509 """Generate the ancestors of 'revs' in reverse topological order.
495 510 Does not generate revs lower than stoprev.
496 511
497 512 See the documentation for ancestor.lazyancestors for more details."""
498 513
499 514 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
500 515 inclusive=inclusive)
501 516
502 517 def descendants(self, revs):
503 518 """Generate the descendants of 'revs' in revision order.
504 519
505 520 Yield a sequence of revision numbers starting with a child of
506 521 some rev in revs, i.e., each revision is *not* considered a
507 522 descendant of itself. Results are ordered by revision number (a
508 523 topological sort)."""
509 524 first = min(revs)
510 525 if first == nullrev:
511 526 for i in self:
512 527 yield i
513 528 return
514 529
515 530 seen = set(revs)
516 531 for i in self.revs(start=first + 1):
517 532 for x in self.parentrevs(i):
518 533 if x != nullrev and x in seen:
519 534 seen.add(i)
520 535 yield i
521 536 break
522 537
523 538 def findcommonmissing(self, common=None, heads=None):
524 539 """Return a tuple of the ancestors of common and the ancestors of heads
525 540 that are not ancestors of common. In revset terminology, we return the
526 541 tuple:
527 542
528 543 ::common, (::heads) - (::common)
529 544
530 545 The list is sorted by revision number, meaning it is
531 546 topologically sorted.
532 547
533 548 'heads' and 'common' are both lists of node IDs. If heads is
534 549 not supplied, uses all of the revlog's heads. If common is not
535 550 supplied, uses nullid."""
536 551 if common is None:
537 552 common = [nullid]
538 553 if heads is None:
539 554 heads = self.heads()
540 555
541 556 common = [self.rev(n) for n in common]
542 557 heads = [self.rev(n) for n in heads]
543 558
544 559 # we want the ancestors, but inclusive
545 560 class lazyset(object):
546 561 def __init__(self, lazyvalues):
547 562 self.addedvalues = set()
548 563 self.lazyvalues = lazyvalues
549 564
550 565 def __contains__(self, value):
551 566 return value in self.addedvalues or value in self.lazyvalues
552 567
553 568 def __iter__(self):
554 569 added = self.addedvalues
555 570 for r in added:
556 571 yield r
557 572 for r in self.lazyvalues:
558 573 if not r in added:
559 574 yield r
560 575
561 576 def add(self, value):
562 577 self.addedvalues.add(value)
563 578
564 579 def update(self, values):
565 580 self.addedvalues.update(values)
566 581
567 582 has = lazyset(self.ancestors(common))
568 583 has.add(nullrev)
569 584 has.update(common)
570 585
571 586 # take all ancestors from heads that aren't in has
572 587 missing = set()
573 588 visit = collections.deque(r for r in heads if r not in has)
574 589 while visit:
575 590 r = visit.popleft()
576 591 if r in missing:
577 592 continue
578 593 else:
579 594 missing.add(r)
580 595 for p in self.parentrevs(r):
581 596 if p not in has:
582 597 visit.append(p)
583 598 missing = list(missing)
584 599 missing.sort()
585 600 return has, [self.node(r) for r in missing]
586 601
587 602 def incrementalmissingrevs(self, common=None):
588 603 """Return an object that can be used to incrementally compute the
589 604 revision numbers of the ancestors of arbitrary sets that are not
590 605 ancestors of common. This is an ancestor.incrementalmissingancestors
591 606 object.
592 607
593 608 'common' is a list of revision numbers. If common is not supplied, uses
594 609 nullrev.
595 610 """
596 611 if common is None:
597 612 common = [nullrev]
598 613
599 614 return ancestor.incrementalmissingancestors(self.parentrevs, common)
600 615
601 616 def findmissingrevs(self, common=None, heads=None):
602 617 """Return the revision numbers of the ancestors of heads that
603 618 are not ancestors of common.
604 619
605 620 More specifically, return a list of revision numbers corresponding to
606 621 nodes N such that every N satisfies the following constraints:
607 622
608 623 1. N is an ancestor of some node in 'heads'
609 624 2. N is not an ancestor of any node in 'common'
610 625
611 626 The list is sorted by revision number, meaning it is
612 627 topologically sorted.
613 628
614 629 'heads' and 'common' are both lists of revision numbers. If heads is
615 630 not supplied, uses all of the revlog's heads. If common is not
616 631 supplied, uses nullid."""
617 632 if common is None:
618 633 common = [nullrev]
619 634 if heads is None:
620 635 heads = self.headrevs()
621 636
622 637 inc = self.incrementalmissingrevs(common=common)
623 638 return inc.missingancestors(heads)
624 639
625 640 def findmissing(self, common=None, heads=None):
626 641 """Return the ancestors of heads that are not ancestors of common.
627 642
628 643 More specifically, return a list of nodes N such that every N
629 644 satisfies the following constraints:
630 645
631 646 1. N is an ancestor of some node in 'heads'
632 647 2. N is not an ancestor of any node in 'common'
633 648
634 649 The list is sorted by revision number, meaning it is
635 650 topologically sorted.
636 651
637 652 'heads' and 'common' are both lists of node IDs. If heads is
638 653 not supplied, uses all of the revlog's heads. If common is not
639 654 supplied, uses nullid."""
640 655 if common is None:
641 656 common = [nullid]
642 657 if heads is None:
643 658 heads = self.heads()
644 659
645 660 common = [self.rev(n) for n in common]
646 661 heads = [self.rev(n) for n in heads]
647 662
648 663 inc = self.incrementalmissingrevs(common=common)
649 664 return [self.node(r) for r in inc.missingancestors(heads)]
650 665
651 666 def nodesbetween(self, roots=None, heads=None):
652 667 """Return a topological path from 'roots' to 'heads'.
653 668
654 669 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
655 670 topologically sorted list of all nodes N that satisfy both of
656 671 these constraints:
657 672
658 673 1. N is a descendant of some node in 'roots'
659 674 2. N is an ancestor of some node in 'heads'
660 675
661 676 Every node is considered to be both a descendant and an ancestor
662 677 of itself, so every reachable node in 'roots' and 'heads' will be
663 678 included in 'nodes'.
664 679
665 680 'outroots' is the list of reachable nodes in 'roots', i.e., the
666 681 subset of 'roots' that is returned in 'nodes'. Likewise,
667 682 'outheads' is the subset of 'heads' that is also in 'nodes'.
668 683
669 684 'roots' and 'heads' are both lists of node IDs. If 'roots' is
670 685 unspecified, uses nullid as the only root. If 'heads' is
671 686 unspecified, uses list of all of the revlog's heads."""
672 687 nonodes = ([], [], [])
673 688 if roots is not None:
674 689 roots = list(roots)
675 690 if not roots:
676 691 return nonodes
677 692 lowestrev = min([self.rev(n) for n in roots])
678 693 else:
679 694 roots = [nullid] # Everybody's a descendant of nullid
680 695 lowestrev = nullrev
681 696 if (lowestrev == nullrev) and (heads is None):
682 697 # We want _all_ the nodes!
683 698 return ([self.node(r) for r in self], [nullid], list(self.heads()))
684 699 if heads is None:
685 700 # All nodes are ancestors, so the latest ancestor is the last
686 701 # node.
687 702 highestrev = len(self) - 1
688 703 # Set ancestors to None to signal that every node is an ancestor.
689 704 ancestors = None
690 705 # Set heads to an empty dictionary for later discovery of heads
691 706 heads = {}
692 707 else:
693 708 heads = list(heads)
694 709 if not heads:
695 710 return nonodes
696 711 ancestors = set()
697 712 # Turn heads into a dictionary so we can remove 'fake' heads.
698 713 # Also, later we will be using it to filter out the heads we can't
699 714 # find from roots.
700 715 heads = dict.fromkeys(heads, False)
701 716 # Start at the top and keep marking parents until we're done.
702 717 nodestotag = set(heads)
703 718 # Remember where the top was so we can use it as a limit later.
704 719 highestrev = max([self.rev(n) for n in nodestotag])
705 720 while nodestotag:
706 721 # grab a node to tag
707 722 n = nodestotag.pop()
708 723 # Never tag nullid
709 724 if n == nullid:
710 725 continue
711 726 # A node's revision number represents its place in a
712 727 # topologically sorted list of nodes.
713 728 r = self.rev(n)
714 729 if r >= lowestrev:
715 730 if n not in ancestors:
716 731 # If we are possibly a descendant of one of the roots
717 732 # and we haven't already been marked as an ancestor
718 733 ancestors.add(n) # Mark as ancestor
719 734 # Add non-nullid parents to list of nodes to tag.
720 735 nodestotag.update([p for p in self.parents(n) if
721 736 p != nullid])
722 737 elif n in heads: # We've seen it before, is it a fake head?
723 738 # So it is, real heads should not be the ancestors of
724 739 # any other heads.
725 740 heads.pop(n)
726 741 if not ancestors:
727 742 return nonodes
728 743 # Now that we have our set of ancestors, we want to remove any
729 744 # roots that are not ancestors.
730 745
731 746 # If one of the roots was nullid, everything is included anyway.
732 747 if lowestrev > nullrev:
733 748 # But, since we weren't, let's recompute the lowest rev to not
734 749 # include roots that aren't ancestors.
735 750
736 751 # Filter out roots that aren't ancestors of heads
737 752 roots = [n for n in roots if n in ancestors]
738 753 # Recompute the lowest revision
739 754 if roots:
740 755 lowestrev = min([self.rev(n) for n in roots])
741 756 else:
742 757 # No more roots? Return empty list
743 758 return nonodes
744 759 else:
745 760 # We are descending from nullid, and don't need to care about
746 761 # any other roots.
747 762 lowestrev = nullrev
748 763 roots = [nullid]
749 764 # Transform our roots list into a set.
750 765 descendants = set(roots)
751 766 # Also, keep the original roots so we can filter out roots that aren't
752 767 # 'real' roots (i.e. are descended from other roots).
753 768 roots = descendants.copy()
754 769 # Our topologically sorted list of output nodes.
755 770 orderedout = []
756 771 # Don't start at nullid since we don't want nullid in our output list,
757 772 # and if nullid shows up in descendants, empty parents will look like
758 773 # they're descendants.
759 774 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
760 775 n = self.node(r)
761 776 isdescendant = False
762 777 if lowestrev == nullrev: # Everybody is a descendant of nullid
763 778 isdescendant = True
764 779 elif n in descendants:
765 780 # n is already a descendant
766 781 isdescendant = True
767 782 # This check only needs to be done here because all the roots
768 783 # will start being marked is descendants before the loop.
769 784 if n in roots:
770 785 # If n was a root, check if it's a 'real' root.
771 786 p = tuple(self.parents(n))
772 787 # If any of its parents are descendants, it's not a root.
773 788 if (p[0] in descendants) or (p[1] in descendants):
774 789 roots.remove(n)
775 790 else:
776 791 p = tuple(self.parents(n))
777 792 # A node is a descendant if either of its parents are
778 793 # descendants. (We seeded the dependents list with the roots
779 794 # up there, remember?)
780 795 if (p[0] in descendants) or (p[1] in descendants):
781 796 descendants.add(n)
782 797 isdescendant = True
783 798 if isdescendant and ((ancestors is None) or (n in ancestors)):
784 799 # Only include nodes that are both descendants and ancestors.
785 800 orderedout.append(n)
786 801 if (ancestors is not None) and (n in heads):
787 802 # We're trying to figure out which heads are reachable
788 803 # from roots.
789 804 # Mark this head as having been reached
790 805 heads[n] = True
791 806 elif ancestors is None:
792 807 # Otherwise, we're trying to discover the heads.
793 808 # Assume this is a head because if it isn't, the next step
794 809 # will eventually remove it.
795 810 heads[n] = True
796 811 # But, obviously its parents aren't.
797 812 for p in self.parents(n):
798 813 heads.pop(p, None)
799 814 heads = [n for n, flag in heads.iteritems() if flag]
800 815 roots = list(roots)
801 816 assert orderedout
802 817 assert roots
803 818 assert heads
804 819 return (orderedout, roots, heads)
805 820
806 821 def headrevs(self):
807 822 try:
808 823 return self.index.headrevs()
809 824 except AttributeError:
810 825 return self._headrevs()
811 826
812 827 def computephases(self, roots):
813 828 return self.index.computephasesmapsets(roots)
814 829
815 830 def _headrevs(self):
816 831 count = len(self)
817 832 if not count:
818 833 return [nullrev]
819 834 # we won't iter over filtered rev so nobody is a head at start
820 835 ishead = [0] * (count + 1)
821 836 index = self.index
822 837 for r in self:
823 838 ishead[r] = 1 # I may be an head
824 839 e = index[r]
825 840 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
826 841 return [r for r, val in enumerate(ishead) if val]
827 842
828 843 def heads(self, start=None, stop=None):
829 844 """return the list of all nodes that have no children
830 845
831 846 if start is specified, only heads that are descendants of
832 847 start will be returned
833 848 if stop is specified, it will consider all the revs from stop
834 849 as if they had no children
835 850 """
836 851 if start is None and stop is None:
837 852 if not len(self):
838 853 return [nullid]
839 854 return [self.node(r) for r in self.headrevs()]
840 855
841 856 if start is None:
842 857 start = nullid
843 858 if stop is None:
844 859 stop = []
845 860 stoprevs = set([self.rev(n) for n in stop])
846 861 startrev = self.rev(start)
847 862 reachable = set((startrev,))
848 863 heads = set((startrev,))
849 864
850 865 parentrevs = self.parentrevs
851 866 for r in self.revs(start=startrev + 1):
852 867 for p in parentrevs(r):
853 868 if p in reachable:
854 869 if r not in stoprevs:
855 870 reachable.add(r)
856 871 heads.add(r)
857 872 if p in heads and p not in stoprevs:
858 873 heads.remove(p)
859 874
860 875 return [self.node(r) for r in heads]
861 876
862 877 def children(self, node):
863 878 """find the children of a given node"""
864 879 c = []
865 880 p = self.rev(node)
866 881 for r in self.revs(start=p + 1):
867 882 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
868 883 if prevs:
869 884 for pr in prevs:
870 885 if pr == p:
871 886 c.append(self.node(r))
872 887 elif p == nullrev:
873 888 c.append(self.node(r))
874 889 return c
875 890
876 891 def descendant(self, start, end):
877 892 if start == nullrev:
878 893 return True
879 894 for i in self.descendants([start]):
880 895 if i == end:
881 896 return True
882 897 elif i > end:
883 898 break
884 899 return False
885 900
886 901 def commonancestorsheads(self, a, b):
887 902 """calculate all the heads of the common ancestors of nodes a and b"""
888 903 a, b = self.rev(a), self.rev(b)
889 904 try:
890 905 ancs = self.index.commonancestorsheads(a, b)
891 906 except (AttributeError, OverflowError): # C implementation failed
892 907 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
893 908 return map(self.node, ancs)
894 909
895 910 def isancestor(self, a, b):
896 911 """return True if node a is an ancestor of node b
897 912
898 913 The implementation of this is trivial but the use of
899 914 commonancestorsheads is not."""
900 915 return a in self.commonancestorsheads(a, b)
901 916
902 917 def ancestor(self, a, b):
903 918 """calculate the "best" common ancestor of nodes a and b"""
904 919
905 920 a, b = self.rev(a), self.rev(b)
906 921 try:
907 922 ancs = self.index.ancestors(a, b)
908 923 except (AttributeError, OverflowError):
909 924 ancs = ancestor.ancestors(self.parentrevs, a, b)
910 925 if ancs:
911 926 # choose a consistent winner when there's a tie
912 927 return min(map(self.node, ancs))
913 928 return nullid
914 929
915 930 def _match(self, id):
916 931 if isinstance(id, int):
917 932 # rev
918 933 return self.node(id)
919 934 if len(id) == 20:
920 935 # possibly a binary node
921 936 # odds of a binary node being all hex in ASCII are 1 in 10**25
922 937 try:
923 938 node = id
924 939 self.rev(node) # quick search the index
925 940 return node
926 941 except LookupError:
927 942 pass # may be partial hex id
928 943 try:
929 944 # str(rev)
930 945 rev = int(id)
931 946 if str(rev) != id:
932 947 raise ValueError
933 948 if rev < 0:
934 949 rev = len(self) + rev
935 950 if rev < 0 or rev >= len(self):
936 951 raise ValueError
937 952 return self.node(rev)
938 953 except (ValueError, OverflowError):
939 954 pass
940 955 if len(id) == 40:
941 956 try:
942 957 # a full hex nodeid?
943 958 node = bin(id)
944 959 self.rev(node)
945 960 return node
946 961 except (TypeError, LookupError):
947 962 pass
948 963
949 964 def _partialmatch(self, id):
950 965 try:
951 966 n = self.index.partialmatch(id)
952 967 if n and self.hasnode(n):
953 968 return n
954 969 return None
955 970 except RevlogError:
956 971 # parsers.c radix tree lookup gave multiple matches
957 972 # fast path: for unfiltered changelog, radix tree is accurate
958 973 if not getattr(self, 'filteredrevs', None):
959 974 raise LookupError(id, self.indexfile,
960 975 _('ambiguous identifier'))
961 976 # fall through to slow path that filters hidden revisions
962 977 except (AttributeError, ValueError):
963 978 # we are pure python, or key was too short to search radix tree
964 979 pass
965 980
966 981 if id in self._pcache:
967 982 return self._pcache[id]
968 983
969 984 if len(id) < 40:
970 985 try:
971 986 # hex(node)[:...]
972 987 l = len(id) // 2 # grab an even number of digits
973 988 prefix = bin(id[:l * 2])
974 989 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
975 990 nl = [n for n in nl if hex(n).startswith(id) and
976 991 self.hasnode(n)]
977 992 if len(nl) > 0:
978 993 if len(nl) == 1:
979 994 self._pcache[id] = nl[0]
980 995 return nl[0]
981 996 raise LookupError(id, self.indexfile,
982 997 _('ambiguous identifier'))
983 998 return None
984 999 except TypeError:
985 1000 pass
986 1001
987 1002 def lookup(self, id):
988 1003 """locate a node based on:
989 1004 - revision number or str(revision number)
990 1005 - nodeid or subset of hex nodeid
991 1006 """
992 1007 n = self._match(id)
993 1008 if n is not None:
994 1009 return n
995 1010 n = self._partialmatch(id)
996 1011 if n:
997 1012 return n
998 1013
999 1014 raise LookupError(id, self.indexfile, _('no match found'))
1000 1015
1001 1016 def cmp(self, node, text):
1002 1017 """compare text with a given file revision
1003 1018
1004 1019 returns True if text is different than what is stored.
1005 1020 """
1006 1021 p1, p2 = self.parents(node)
1007 1022 return hash(text, p1, p2) != node
1008 1023
1009 1024 def _addchunk(self, offset, data):
1010 1025 """Add a segment to the revlog cache.
1011 1026
1012 1027 Accepts an absolute offset and the data that is at that location.
1013 1028 """
1014 1029 o, d = self._chunkcache
1015 1030 # try to add to existing cache
1016 1031 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1017 1032 self._chunkcache = o, d + data
1018 1033 else:
1019 1034 self._chunkcache = offset, data
1020 1035
1021 1036 def _loadchunk(self, offset, length, df=None):
1022 1037 """Load a segment of raw data from the revlog.
1023 1038
1024 1039 Accepts an absolute offset, length to read, and an optional existing
1025 1040 file handle to read from.
1026 1041
1027 1042 If an existing file handle is passed, it will be seeked and the
1028 1043 original seek position will NOT be restored.
1029 1044
1030 1045 Returns a str or buffer of raw byte data.
1031 1046 """
1032 1047 if df is not None:
1033 1048 closehandle = False
1034 1049 else:
1035 1050 if self._inline:
1036 1051 df = self.opener(self.indexfile)
1037 1052 else:
1038 1053 df = self.opener(self.datafile)
1039 1054 closehandle = True
1040 1055
1041 1056 # Cache data both forward and backward around the requested
1042 1057 # data, in a fixed size window. This helps speed up operations
1043 1058 # involving reading the revlog backwards.
1044 1059 cachesize = self._chunkcachesize
1045 1060 realoffset = offset & ~(cachesize - 1)
1046 1061 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1047 1062 - realoffset)
1048 1063 df.seek(realoffset)
1049 1064 d = df.read(reallength)
1050 1065 if closehandle:
1051 1066 df.close()
1052 1067 self._addchunk(realoffset, d)
1053 1068 if offset != realoffset or reallength != length:
1054 1069 return util.buffer(d, offset - realoffset, length)
1055 1070 return d
1056 1071
1057 1072 def _getchunk(self, offset, length, df=None):
1058 1073 """Obtain a segment of raw data from the revlog.
1059 1074
1060 1075 Accepts an absolute offset, length of bytes to obtain, and an
1061 1076 optional file handle to the already-opened revlog. If the file
1062 1077 handle is used, it's original seek position will not be preserved.
1063 1078
1064 1079 Requests for data may be returned from a cache.
1065 1080
1066 1081 Returns a str or a buffer instance of raw byte data.
1067 1082 """
1068 1083 o, d = self._chunkcache
1069 1084 l = len(d)
1070 1085
1071 1086 # is it in the cache?
1072 1087 cachestart = offset - o
1073 1088 cacheend = cachestart + length
1074 1089 if cachestart >= 0 and cacheend <= l:
1075 1090 if cachestart == 0 and cacheend == l:
1076 1091 return d # avoid a copy
1077 1092 return util.buffer(d, cachestart, cacheend - cachestart)
1078 1093
1079 1094 return self._loadchunk(offset, length, df=df)
1080 1095
1081 1096 def _chunkraw(self, startrev, endrev, df=None):
1082 1097 """Obtain a segment of raw data corresponding to a range of revisions.
1083 1098
1084 1099 Accepts the start and end revisions and an optional already-open
1085 1100 file handle to be used for reading. If the file handle is read, its
1086 1101 seek position will not be preserved.
1087 1102
1088 1103 Requests for data may be satisfied by a cache.
1089 1104
1090 1105 Returns a 2-tuple of (offset, data) for the requested range of
1091 1106 revisions. Offset is the integer offset from the beginning of the
1092 1107 revlog and data is a str or buffer of the raw byte data.
1093 1108
1094 1109 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1095 1110 to determine where each revision's data begins and ends.
1096 1111 """
1097 1112 start = self.start(startrev)
1098 1113 end = self.end(endrev)
1099 1114 if self._inline:
1100 1115 start += (startrev + 1) * self._io.size
1101 1116 end += (endrev + 1) * self._io.size
1102 1117 length = end - start
1103 1118
1104 1119 return start, self._getchunk(start, length, df=df)
1105 1120
1106 1121 def _chunk(self, rev, df=None):
1107 1122 """Obtain a single decompressed chunk for a revision.
1108 1123
1109 1124 Accepts an integer revision and an optional already-open file handle
1110 1125 to be used for reading. If used, the seek position of the file will not
1111 1126 be preserved.
1112 1127
1113 1128 Returns a str holding uncompressed data for the requested revision.
1114 1129 """
1115 1130 return decompress(self._chunkraw(rev, rev, df=df)[1])
1116 1131
1117 1132 def _chunks(self, revs, df=None):
1118 1133 """Obtain decompressed chunks for the specified revisions.
1119 1134
1120 1135 Accepts an iterable of numeric revisions that are assumed to be in
1121 1136 ascending order. Also accepts an optional already-open file handle
1122 1137 to be used for reading. If used, the seek position of the file will
1123 1138 not be preserved.
1124 1139
1125 1140 This function is similar to calling ``self._chunk()`` multiple times,
1126 1141 but is faster.
1127 1142
1128 1143 Returns a list with decompressed data for each requested revision.
1129 1144 """
1130 1145 if not revs:
1131 1146 return []
1132 1147 start = self.start
1133 1148 length = self.length
1134 1149 inline = self._inline
1135 1150 iosize = self._io.size
1136 1151 buffer = util.buffer
1137 1152
1138 1153 l = []
1139 1154 ladd = l.append
1140 1155
1141 1156 try:
1142 1157 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1143 1158 except OverflowError:
1144 1159 # issue4215 - we can't cache a run of chunks greater than
1145 1160 # 2G on Windows
1146 1161 return [self._chunk(rev, df=df) for rev in revs]
1147 1162
1148 1163 for rev in revs:
1149 1164 chunkstart = start(rev)
1150 1165 if inline:
1151 1166 chunkstart += (rev + 1) * iosize
1152 1167 chunklength = length(rev)
1153 1168 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1154 1169
1155 1170 return l
1156 1171
1157 1172 def _chunkclear(self):
1158 1173 """Clear the raw chunk cache."""
1159 1174 self._chunkcache = (0, '')
1160 1175
1161 1176 def deltaparent(self, rev):
1162 1177 """return deltaparent of the given revision"""
1163 1178 base = self.index[rev][3]
1164 1179 if base == rev:
1165 1180 return nullrev
1166 1181 elif self._generaldelta:
1167 1182 return base
1168 1183 else:
1169 1184 return rev - 1
1170 1185
1171 1186 def revdiff(self, rev1, rev2):
1172 1187 """return or calculate a delta between two revisions"""
1173 1188 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1174 1189 return str(self._chunk(rev2))
1175 1190
1176 1191 return mdiff.textdiff(self.revision(rev1),
1177 1192 self.revision(rev2))
1178 1193
1179 1194 def revision(self, nodeorrev, _df=None):
1180 1195 """return an uncompressed revision of a given node or revision
1181 1196 number.
1182 1197
1183 1198 _df is an existing file handle to read from. It is meant to only be
1184 1199 used internally.
1185 1200 """
1186 1201 if isinstance(nodeorrev, int):
1187 1202 rev = nodeorrev
1188 1203 node = self.node(rev)
1189 1204 else:
1190 1205 node = nodeorrev
1191 1206 rev = None
1192 1207
1193 1208 cachedrev = None
1194 1209 if node == nullid:
1195 1210 return ""
1196 1211 if self._cache:
1197 1212 if self._cache[0] == node:
1198 1213 return self._cache[2]
1199 1214 cachedrev = self._cache[1]
1200 1215
1201 1216 # look up what we need to read
1202 1217 text = None
1203 1218 if rev is None:
1204 1219 rev = self.rev(node)
1205 1220
1206 1221 # check rev flags
1207 1222 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1208 1223 raise RevlogError(_('incompatible revision flag %x') %
1209 1224 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1210 1225
1211 1226 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1212 1227 if stopped:
1213 1228 text = self._cache[2]
1214 1229
1215 1230 # drop cache to save memory
1216 1231 self._cache = None
1217 1232
1218 1233 bins = self._chunks(chain, df=_df)
1219 1234 if text is None:
1220 1235 text = str(bins[0])
1221 1236 bins = bins[1:]
1222 1237
1223 1238 text = mdiff.patches(text, bins)
1224 1239
1225 1240 text = self._checkhash(text, node, rev)
1226 1241
1227 1242 self._cache = (node, rev, text)
1228 1243 return text
1229 1244
1230 1245 def hash(self, text, p1, p2):
1231 1246 """Compute a node hash.
1232 1247
1233 1248 Available as a function so that subclasses can replace the hash
1234 1249 as needed.
1235 1250 """
1236 1251 return hash(text, p1, p2)
1237 1252
1238 1253 def _checkhash(self, text, node, rev):
1239 1254 p1, p2 = self.parents(node)
1240 1255 self.checkhash(text, p1, p2, node, rev)
1241 1256 return text
1242 1257
1243 1258 def checkhash(self, text, p1, p2, node, rev=None):
1244 1259 if node != self.hash(text, p1, p2):
1245 1260 revornode = rev
1246 1261 if revornode is None:
1247 1262 revornode = templatefilters.short(hex(node))
1248 1263 raise RevlogError(_("integrity check failed on %s:%s")
1249 1264 % (self.indexfile, revornode))
1250 1265
1251 1266 def checkinlinesize(self, tr, fp=None):
1252 1267 """Check if the revlog is too big for inline and convert if so.
1253 1268
1254 1269 This should be called after revisions are added to the revlog. If the
1255 1270 revlog has grown too large to be an inline revlog, it will convert it
1256 1271 to use multiple index and data files.
1257 1272 """
1258 1273 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1259 1274 return
1260 1275
1261 1276 trinfo = tr.find(self.indexfile)
1262 1277 if trinfo is None:
1263 1278 raise RevlogError(_("%s not found in the transaction")
1264 1279 % self.indexfile)
1265 1280
1266 1281 trindex = trinfo[2]
1267 1282 if trindex is not None:
1268 1283 dataoff = self.start(trindex)
1269 1284 else:
1270 1285 # revlog was stripped at start of transaction, use all leftover data
1271 1286 trindex = len(self) - 1
1272 1287 dataoff = self.end(-2)
1273 1288
1274 1289 tr.add(self.datafile, dataoff)
1275 1290
1276 1291 if fp:
1277 1292 fp.flush()
1278 1293 fp.close()
1279 1294
1280 1295 df = self.opener(self.datafile, 'w')
1281 1296 try:
1282 1297 for r in self:
1283 1298 df.write(self._chunkraw(r, r)[1])
1284 1299 finally:
1285 1300 df.close()
1286 1301
1287 1302 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1288 1303 checkambig=self._checkambig)
1289 1304 self.version &= ~(REVLOGNGINLINEDATA)
1290 1305 self._inline = False
1291 1306 for i in self:
1292 1307 e = self._io.packentry(self.index[i], self.node, self.version, i)
1293 1308 fp.write(e)
1294 1309
1295 1310 # if we don't call close, the temp file will never replace the
1296 1311 # real index
1297 1312 fp.close()
1298 1313
1299 1314 tr.replace(self.indexfile, trindex * self._io.size)
1300 1315 self._chunkclear()
1301 1316
1302 1317 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1303 1318 node=None):
1304 1319 """add a revision to the log
1305 1320
1306 1321 text - the revision data to add
1307 1322 transaction - the transaction object used for rollback
1308 1323 link - the linkrev data to add
1309 1324 p1, p2 - the parent nodeids of the revision
1310 1325 cachedelta - an optional precomputed delta
1311 1326 node - nodeid of revision; typically node is not specified, and it is
1312 1327 computed by default as hash(text, p1, p2), however subclasses might
1313 1328 use different hashing method (and override checkhash() in such case)
1314 1329 """
1315 1330 if link == nullrev:
1316 1331 raise RevlogError(_("attempted to add linkrev -1 to %s")
1317 1332 % self.indexfile)
1318 1333
1319 1334 if len(text) > _maxentrysize:
1320 1335 raise RevlogError(
1321 1336 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1322 1337 % (self.indexfile, len(text)))
1323 1338
1324 1339 node = node or self.hash(text, p1, p2)
1325 1340 if node in self.nodemap:
1326 1341 return node
1327 1342
1328 1343 dfh = None
1329 1344 if not self._inline:
1330 1345 dfh = self.opener(self.datafile, "a+")
1331 1346 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1332 1347 try:
1333 1348 return self._addrevision(node, text, transaction, link, p1, p2,
1334 1349 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1335 1350 finally:
1336 1351 if dfh:
1337 1352 dfh.close()
1338 1353 ifh.close()
1339 1354
1340 1355 def compress(self, text):
1341 1356 """ generate a possibly-compressed representation of text """
1342 1357 if not text:
1343 1358 return ("", text)
1344 1359 l = len(text)
1345 1360 bin = None
1346 1361 if l < 44:
1347 1362 pass
1348 1363 elif l > 1000000:
1349 1364 # zlib makes an internal copy, thus doubling memory usage for
1350 1365 # large files, so lets do this in pieces
1351 1366 z = zlib.compressobj()
1352 1367 p = []
1353 1368 pos = 0
1354 1369 while pos < l:
1355 1370 pos2 = pos + 2**20
1356 1371 p.append(z.compress(text[pos:pos2]))
1357 1372 pos = pos2
1358 1373 p.append(z.flush())
1359 1374 if sum(map(len, p)) < l:
1360 1375 bin = "".join(p)
1361 1376 else:
1362 1377 bin = _compress(text)
1363 1378 if bin is None or len(bin) > l:
1364 1379 if text[0] == '\0':
1365 1380 return ("", text)
1366 1381 return ('u', text)
1367 1382 return ("", bin)
1368 1383
1369 1384 def _isgooddelta(self, d, textlen):
1370 1385 """Returns True if the given delta is good. Good means that it is within
1371 1386 the disk span, disk size, and chain length bounds that we know to be
1372 1387 performant."""
1373 1388 if d is None:
1374 1389 return False
1375 1390
1376 1391 # - 'dist' is the distance from the base revision -- bounding it limits
1377 1392 # the amount of I/O we need to do.
1378 1393 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1379 1394 # to apply -- bounding it limits the amount of CPU we consume.
1380 1395 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1381 1396 if (dist > textlen * 4 or l > textlen or
1382 1397 compresseddeltalen > textlen * 2 or
1383 1398 (self._maxchainlen and chainlen > self._maxchainlen)):
1384 1399 return False
1385 1400
1386 1401 return True
1387 1402
1388 1403 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1389 1404 cachedelta, ifh, dfh, alwayscache=False):
1390 1405 """internal function to add revisions to the log
1391 1406
1392 1407 see addrevision for argument descriptions.
1393 1408 invariants:
1394 1409 - text is optional (can be None); if not set, cachedelta must be set.
1395 1410 if both are set, they must correspond to each other.
1396 1411 """
1397 1412 btext = [text]
1398 1413 def buildtext():
1399 1414 if btext[0] is not None:
1400 1415 return btext[0]
1401 1416 baserev = cachedelta[0]
1402 1417 delta = cachedelta[1]
1403 1418 # special case deltas which replace entire base; no need to decode
1404 1419 # base revision. this neatly avoids censored bases, which throw when
1405 1420 # they're decoded.
1406 1421 hlen = struct.calcsize(">lll")
1407 1422 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1408 1423 len(delta) - hlen):
1409 1424 btext[0] = delta[hlen:]
1410 1425 else:
1411 1426 if self._inline:
1412 1427 fh = ifh
1413 1428 else:
1414 1429 fh = dfh
1415 1430 basetext = self.revision(self.node(baserev), _df=fh)
1416 1431 btext[0] = mdiff.patch(basetext, delta)
1417 1432 try:
1418 1433 self.checkhash(btext[0], p1, p2, node)
1419 1434 if flags & REVIDX_ISCENSORED:
1420 1435 raise RevlogError(_('node %s is not censored') % node)
1421 1436 except CensoredNodeError:
1422 1437 # must pass the censored index flag to add censored revisions
1423 1438 if not flags & REVIDX_ISCENSORED:
1424 1439 raise
1425 1440 return btext[0]
1426 1441
1427 1442 def builddelta(rev):
1428 1443 # can we use the cached delta?
1429 1444 if cachedelta and cachedelta[0] == rev:
1430 1445 delta = cachedelta[1]
1431 1446 else:
1432 1447 t = buildtext()
1433 1448 if self.iscensored(rev):
1434 1449 # deltas based on a censored revision must replace the
1435 1450 # full content in one patch, so delta works everywhere
1436 1451 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1437 1452 delta = header + t
1438 1453 else:
1439 1454 if self._inline:
1440 1455 fh = ifh
1441 1456 else:
1442 1457 fh = dfh
1443 1458 ptext = self.revision(self.node(rev), _df=fh)
1444 1459 delta = mdiff.textdiff(ptext, t)
1445 1460 header, data = self.compress(delta)
1446 1461 deltalen = len(header) + len(data)
1447 1462 chainbase = self.chainbase(rev)
1448 1463 dist = deltalen + offset - self.start(chainbase)
1449 1464 if self._generaldelta:
1450 1465 base = rev
1451 1466 else:
1452 1467 base = chainbase
1453 1468 chainlen, compresseddeltalen = self._chaininfo(rev)
1454 1469 chainlen += 1
1455 1470 compresseddeltalen += deltalen
1456 1471 return (dist, deltalen, (header, data), base,
1457 1472 chainbase, chainlen, compresseddeltalen)
1458 1473
1459 1474 curr = len(self)
1460 1475 prev = curr - 1
1461 1476 offset = self.end(prev)
1462 1477 delta = None
1463 1478 p1r, p2r = self.rev(p1), self.rev(p2)
1464 1479
1465 1480 # full versions are inserted when the needed deltas
1466 1481 # become comparable to the uncompressed text
1467 1482 if text is None:
1468 1483 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1469 1484 cachedelta[1])
1470 1485 else:
1471 1486 textlen = len(text)
1472 1487
1473 1488 # should we try to build a delta?
1474 1489 if prev != nullrev and self.storedeltachains:
1475 1490 tested = set()
1476 1491 # This condition is true most of the time when processing
1477 1492 # changegroup data into a generaldelta repo. The only time it
1478 1493 # isn't true is if this is the first revision in a delta chain
1479 1494 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1480 1495 if cachedelta and self._generaldelta and self._lazydeltabase:
1481 1496 # Assume what we received from the server is a good choice
1482 1497 # build delta will reuse the cache
1483 1498 candidatedelta = builddelta(cachedelta[0])
1484 1499 tested.add(cachedelta[0])
1485 1500 if self._isgooddelta(candidatedelta, textlen):
1486 1501 delta = candidatedelta
1487 1502 if delta is None and self._generaldelta:
1488 1503 # exclude already lazy tested base if any
1489 1504 parents = [p for p in (p1r, p2r)
1490 1505 if p != nullrev and p not in tested]
1491 1506 if parents and not self._aggressivemergedeltas:
1492 1507 # Pick whichever parent is closer to us (to minimize the
1493 1508 # chance of having to build a fulltext).
1494 1509 parents = [max(parents)]
1495 1510 tested.update(parents)
1496 1511 pdeltas = []
1497 1512 for p in parents:
1498 1513 pd = builddelta(p)
1499 1514 if self._isgooddelta(pd, textlen):
1500 1515 pdeltas.append(pd)
1501 1516 if pdeltas:
1502 1517 delta = min(pdeltas, key=lambda x: x[1])
1503 1518 if delta is None and prev not in tested:
1504 1519 # other approach failed try against prev to hopefully save us a
1505 1520 # fulltext.
1506 1521 candidatedelta = builddelta(prev)
1507 1522 if self._isgooddelta(candidatedelta, textlen):
1508 1523 delta = candidatedelta
1509 1524 if delta is not None:
1510 1525 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1511 1526 else:
1512 1527 text = buildtext()
1513 1528 data = self.compress(text)
1514 1529 l = len(data[1]) + len(data[0])
1515 1530 base = chainbase = curr
1516 1531
1517 1532 e = (offset_type(offset, flags), l, textlen,
1518 1533 base, link, p1r, p2r, node)
1519 1534 self.index.insert(-1, e)
1520 1535 self.nodemap[node] = curr
1521 1536
1522 1537 entry = self._io.packentry(e, self.node, self.version, curr)
1523 1538 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1524 1539
1525 1540 if alwayscache and text is None:
1526 1541 text = buildtext()
1527 1542
1528 1543 if type(text) == str: # only accept immutable objects
1529 1544 self._cache = (node, curr, text)
1530 1545 self._chainbasecache[curr] = chainbase
1531 1546 return node
1532 1547
1533 1548 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1534 1549 # Files opened in a+ mode have inconsistent behavior on various
1535 1550 # platforms. Windows requires that a file positioning call be made
1536 1551 # when the file handle transitions between reads and writes. See
1537 1552 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1538 1553 # platforms, Python or the platform itself can be buggy. Some versions
1539 1554 # of Solaris have been observed to not append at the end of the file
1540 1555 # if the file was seeked to before the end. See issue4943 for more.
1541 1556 #
1542 1557 # We work around this issue by inserting a seek() before writing.
1543 1558 # Note: This is likely not necessary on Python 3.
1544 1559 ifh.seek(0, os.SEEK_END)
1545 1560 if dfh:
1546 1561 dfh.seek(0, os.SEEK_END)
1547 1562
1548 1563 curr = len(self) - 1
1549 1564 if not self._inline:
1550 1565 transaction.add(self.datafile, offset)
1551 1566 transaction.add(self.indexfile, curr * len(entry))
1552 1567 if data[0]:
1553 1568 dfh.write(data[0])
1554 1569 dfh.write(data[1])
1555 1570 ifh.write(entry)
1556 1571 else:
1557 1572 offset += curr * self._io.size
1558 1573 transaction.add(self.indexfile, offset, curr)
1559 1574 ifh.write(entry)
1560 1575 ifh.write(data[0])
1561 1576 ifh.write(data[1])
1562 1577 self.checkinlinesize(transaction, ifh)
1563 1578
1564 1579 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1565 1580 """
1566 1581 add a delta group
1567 1582
1568 1583 given a set of deltas, add them to the revision log. the
1569 1584 first delta is against its parent, which should be in our
1570 1585 log, the rest are against the previous delta.
1571 1586
1572 1587 If ``addrevisioncb`` is defined, it will be called with arguments of
1573 1588 this revlog and the node that was added.
1574 1589 """
1575 1590
1576 1591 # track the base of the current delta log
1577 1592 content = []
1578 1593 node = None
1579 1594
1580 1595 r = len(self)
1581 1596 end = 0
1582 1597 if r:
1583 1598 end = self.end(r - 1)
1584 1599 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1585 1600 isize = r * self._io.size
1586 1601 if self._inline:
1587 1602 transaction.add(self.indexfile, end + isize, r)
1588 1603 dfh = None
1589 1604 else:
1590 1605 transaction.add(self.indexfile, isize, r)
1591 1606 transaction.add(self.datafile, end)
1592 1607 dfh = self.opener(self.datafile, "a+")
1593 1608 def flush():
1594 1609 if dfh:
1595 1610 dfh.flush()
1596 1611 ifh.flush()
1597 1612 try:
1598 1613 # loop through our set of deltas
1599 1614 chain = None
1600 1615 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1601 1616 node = chunkdata['node']
1602 1617 p1 = chunkdata['p1']
1603 1618 p2 = chunkdata['p2']
1604 1619 cs = chunkdata['cs']
1605 1620 deltabase = chunkdata['deltabase']
1606 1621 delta = chunkdata['delta']
1607 1622 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1608 1623
1609 1624 content.append(node)
1610 1625
1611 1626 link = linkmapper(cs)
1612 1627 if node in self.nodemap:
1613 1628 # this can happen if two branches make the same change
1614 1629 chain = node
1615 1630 continue
1616 1631
1617 1632 for p in (p1, p2):
1618 1633 if p not in self.nodemap:
1619 1634 raise LookupError(p, self.indexfile,
1620 1635 _('unknown parent'))
1621 1636
1622 1637 if deltabase not in self.nodemap:
1623 1638 raise LookupError(deltabase, self.indexfile,
1624 1639 _('unknown delta base'))
1625 1640
1626 1641 baserev = self.rev(deltabase)
1627 1642
1628 1643 if baserev != nullrev and self.iscensored(baserev):
1629 1644 # if base is censored, delta must be full replacement in a
1630 1645 # single patch operation
1631 1646 hlen = struct.calcsize(">lll")
1632 1647 oldlen = self.rawsize(baserev)
1633 1648 newlen = len(delta) - hlen
1634 1649 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1635 1650 raise error.CensoredBaseError(self.indexfile,
1636 1651 self.node(baserev))
1637 1652
1638 1653 if not flags and self._peek_iscensored(baserev, delta, flush):
1639 1654 flags |= REVIDX_ISCENSORED
1640 1655
1641 1656 # We assume consumers of addrevisioncb will want to retrieve
1642 1657 # the added revision, which will require a call to
1643 1658 # revision(). revision() will fast path if there is a cache
1644 1659 # hit. So, we tell _addrevision() to always cache in this case.
1645 1660 chain = self._addrevision(node, None, transaction, link,
1646 1661 p1, p2, flags, (baserev, delta),
1647 1662 ifh, dfh,
1648 1663 alwayscache=bool(addrevisioncb))
1649 1664
1650 1665 if addrevisioncb:
1651 1666 addrevisioncb(self, chain)
1652 1667
1653 1668 if not dfh and not self._inline:
1654 1669 # addrevision switched from inline to conventional
1655 1670 # reopen the index
1656 1671 ifh.close()
1657 1672 dfh = self.opener(self.datafile, "a+")
1658 1673 ifh = self.opener(self.indexfile, "a+",
1659 1674 checkambig=self._checkambig)
1660 1675 finally:
1661 1676 if dfh:
1662 1677 dfh.close()
1663 1678 ifh.close()
1664 1679
1665 1680 return content
1666 1681
1667 1682 def iscensored(self, rev):
1668 1683 """Check if a file revision is censored."""
1669 1684 return False
1670 1685
1671 1686 def _peek_iscensored(self, baserev, delta, flush):
1672 1687 """Quickly check if a delta produces a censored revision."""
1673 1688 return False
1674 1689
1675 1690 def getstrippoint(self, minlink):
1676 1691 """find the minimum rev that must be stripped to strip the linkrev
1677 1692
1678 1693 Returns a tuple containing the minimum rev and a set of all revs that
1679 1694 have linkrevs that will be broken by this strip.
1680 1695 """
1681 1696 brokenrevs = set()
1682 1697 strippoint = len(self)
1683 1698
1684 1699 heads = {}
1685 1700 futurelargelinkrevs = set()
1686 1701 for head in self.headrevs():
1687 1702 headlinkrev = self.linkrev(head)
1688 1703 heads[head] = headlinkrev
1689 1704 if headlinkrev >= minlink:
1690 1705 futurelargelinkrevs.add(headlinkrev)
1691 1706
1692 1707 # This algorithm involves walking down the rev graph, starting at the
1693 1708 # heads. Since the revs are topologically sorted according to linkrev,
1694 1709 # once all head linkrevs are below the minlink, we know there are
1695 1710 # no more revs that could have a linkrev greater than minlink.
1696 1711 # So we can stop walking.
1697 1712 while futurelargelinkrevs:
1698 1713 strippoint -= 1
1699 1714 linkrev = heads.pop(strippoint)
1700 1715
1701 1716 if linkrev < minlink:
1702 1717 brokenrevs.add(strippoint)
1703 1718 else:
1704 1719 futurelargelinkrevs.remove(linkrev)
1705 1720
1706 1721 for p in self.parentrevs(strippoint):
1707 1722 if p != nullrev:
1708 1723 plinkrev = self.linkrev(p)
1709 1724 heads[p] = plinkrev
1710 1725 if plinkrev >= minlink:
1711 1726 futurelargelinkrevs.add(plinkrev)
1712 1727
1713 1728 return strippoint, brokenrevs
1714 1729
1715 1730 def strip(self, minlink, transaction):
1716 1731 """truncate the revlog on the first revision with a linkrev >= minlink
1717 1732
1718 1733 This function is called when we're stripping revision minlink and
1719 1734 its descendants from the repository.
1720 1735
1721 1736 We have to remove all revisions with linkrev >= minlink, because
1722 1737 the equivalent changelog revisions will be renumbered after the
1723 1738 strip.
1724 1739
1725 1740 So we truncate the revlog on the first of these revisions, and
1726 1741 trust that the caller has saved the revisions that shouldn't be
1727 1742 removed and that it'll re-add them after this truncation.
1728 1743 """
1729 1744 if len(self) == 0:
1730 1745 return
1731 1746
1732 1747 rev, _ = self.getstrippoint(minlink)
1733 1748 if rev == len(self):
1734 1749 return
1735 1750
1736 1751 # first truncate the files on disk
1737 1752 end = self.start(rev)
1738 1753 if not self._inline:
1739 1754 transaction.add(self.datafile, end)
1740 1755 end = rev * self._io.size
1741 1756 else:
1742 1757 end += rev * self._io.size
1743 1758
1744 1759 transaction.add(self.indexfile, end)
1745 1760
1746 1761 # then reset internal state in memory to forget those revisions
1747 1762 self._cache = None
1748 1763 self._chaininfocache = {}
1749 1764 self._chunkclear()
1750 1765 for x in xrange(rev, len(self)):
1751 1766 del self.nodemap[self.node(x)]
1752 1767
1753 1768 del self.index[rev:-1]
1754 1769
1755 1770 def checksize(self):
1756 1771 expected = 0
1757 1772 if len(self):
1758 1773 expected = max(0, self.end(len(self) - 1))
1759 1774
1760 1775 try:
1761 1776 f = self.opener(self.datafile)
1762 1777 f.seek(0, 2)
1763 1778 actual = f.tell()
1764 1779 f.close()
1765 1780 dd = actual - expected
1766 1781 except IOError as inst:
1767 1782 if inst.errno != errno.ENOENT:
1768 1783 raise
1769 1784 dd = 0
1770 1785
1771 1786 try:
1772 1787 f = self.opener(self.indexfile)
1773 1788 f.seek(0, 2)
1774 1789 actual = f.tell()
1775 1790 f.close()
1776 1791 s = self._io.size
1777 1792 i = max(0, actual // s)
1778 1793 di = actual - (i * s)
1779 1794 if self._inline:
1780 1795 databytes = 0
1781 1796 for r in self:
1782 1797 databytes += max(0, self.length(r))
1783 1798 dd = 0
1784 1799 di = actual - len(self) * s - databytes
1785 1800 except IOError as inst:
1786 1801 if inst.errno != errno.ENOENT:
1787 1802 raise
1788 1803 di = 0
1789 1804
1790 1805 return (dd, di)
1791 1806
1792 1807 def files(self):
1793 1808 res = [self.indexfile]
1794 1809 if not self._inline:
1795 1810 res.append(self.datafile)
1796 1811 return res
General Comments 0
You need to be logged in to leave comments. Login now