##// END OF EJS Templates
revlog: add a fast method for getting a list of chunks...
Siddharth Agarwal -
r19713:c2e27e57 default
parent child Browse files
Show More
@@ -1,1318 +1,1340
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 # index v0:
95 95 # 4 bytes: offset
96 96 # 4 bytes: compressed length
97 97 # 4 bytes: base rev
98 98 # 4 bytes: link rev
99 99 # 32 bytes: parent 1 nodeid
100 100 # 32 bytes: parent 2 nodeid
101 101 # 32 bytes: nodeid
102 102 indexformatv0 = ">4l20s20s20s"
103 103 v0shaoffset = 56
104 104
105 105 class revlogoldio(object):
106 106 def __init__(self):
107 107 self.size = struct.calcsize(indexformatv0)
108 108
109 109 def parseindex(self, data, inline):
110 110 s = self.size
111 111 index = []
112 112 nodemap = {nullid: nullrev}
113 113 n = off = 0
114 114 l = len(data)
115 115 while off + s <= l:
116 116 cur = data[off:off + s]
117 117 off += s
118 118 e = _unpack(indexformatv0, cur)
119 119 # transform to revlogv1 format
120 120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 122 index.append(e2)
123 123 nodemap[e[6]] = n
124 124 n += 1
125 125
126 126 # add the magic null revision at -1
127 127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 128
129 129 return index, nodemap, None
130 130
131 131 def packentry(self, entry, node, version, rev):
132 132 if gettype(entry[0]):
133 133 raise RevlogError(_("index entry flags need RevlogNG"))
134 134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 135 node(entry[5]), node(entry[6]), entry[7])
136 136 return _pack(indexformatv0, *e2)
137 137
138 138 # index ng:
139 139 # 6 bytes: offset
140 140 # 2 bytes: flags
141 141 # 4 bytes: compressed length
142 142 # 4 bytes: uncompressed length
143 143 # 4 bytes: base rev
144 144 # 4 bytes: link rev
145 145 # 4 bytes: parent 1 rev
146 146 # 4 bytes: parent 2 rev
147 147 # 32 bytes: nodeid
148 148 indexformatng = ">Qiiiiii20s12x"
149 149 ngshaoffset = 32
150 150 versionformat = ">I"
151 151
152 152 class revlogio(object):
153 153 def __init__(self):
154 154 self.size = struct.calcsize(indexformatng)
155 155
156 156 def parseindex(self, data, inline):
157 157 # call the C implementation to parse the index data
158 158 index, cache = parsers.parse_index2(data, inline)
159 159 return index, getattr(index, 'nodemap', None), cache
160 160
161 161 def packentry(self, entry, node, version, rev):
162 162 p = _pack(indexformatng, *entry)
163 163 if rev == 0:
164 164 p = _pack(versionformat, version) + p[4:]
165 165 return p
166 166
167 167 class revlog(object):
168 168 """
169 169 the underlying revision storage object
170 170
171 171 A revlog consists of two parts, an index and the revision data.
172 172
173 173 The index is a file with a fixed record size containing
174 174 information on each revision, including its nodeid (hash), the
175 175 nodeids of its parents, the position and offset of its data within
176 176 the data file, and the revision it's based on. Finally, each entry
177 177 contains a linkrev entry that can serve as a pointer to external
178 178 data.
179 179
180 180 The revision data itself is a linear collection of data chunks.
181 181 Each chunk represents a revision and is usually represented as a
182 182 delta against the previous chunk. To bound lookup time, runs of
183 183 deltas are limited to about 2 times the length of the original
184 184 version data. This makes retrieval of a version proportional to
185 185 its size, or O(1) relative to the number of revisions.
186 186
187 187 Both pieces of the revlog are written to in an append-only
188 188 fashion, which means we never need to rewrite a file to insert or
189 189 remove data, and can use some simple techniques to avoid the need
190 190 for locking while reading.
191 191 """
192 192 def __init__(self, opener, indexfile):
193 193 """
194 194 create a revlog object
195 195
196 196 opener is a function that abstracts the file opening operation
197 197 and can be used to implement COW semantics or the like.
198 198 """
199 199 self.indexfile = indexfile
200 200 self.datafile = indexfile[:-2] + ".d"
201 201 self.opener = opener
202 202 self._cache = None
203 203 self._basecache = (0, 0)
204 204 self._chunkcache = (0, '')
205 205 self.index = []
206 206 self._pcache = {}
207 207 self._nodecache = {nullid: nullrev}
208 208 self._nodepos = None
209 209
210 210 v = REVLOG_DEFAULT_VERSION
211 211 opts = getattr(opener, 'options', None)
212 212 if opts is not None:
213 213 if 'revlogv1' in opts:
214 214 if 'generaldelta' in opts:
215 215 v |= REVLOGGENERALDELTA
216 216 else:
217 217 v = 0
218 218
219 219 i = ''
220 220 self._initempty = True
221 221 try:
222 222 f = self.opener(self.indexfile)
223 223 i = f.read()
224 224 f.close()
225 225 if len(i) > 0:
226 226 v = struct.unpack(versionformat, i[:4])[0]
227 227 self._initempty = False
228 228 except IOError, inst:
229 229 if inst.errno != errno.ENOENT:
230 230 raise
231 231
232 232 self.version = v
233 233 self._inline = v & REVLOGNGINLINEDATA
234 234 self._generaldelta = v & REVLOGGENERALDELTA
235 235 flags = v & ~0xFFFF
236 236 fmt = v & 0xFFFF
237 237 if fmt == REVLOGV0 and flags:
238 238 raise RevlogError(_("index %s unknown flags %#04x for format v0")
239 239 % (self.indexfile, flags >> 16))
240 240 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
241 241 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
242 242 % (self.indexfile, flags >> 16))
243 243 elif fmt > REVLOGNG:
244 244 raise RevlogError(_("index %s unknown format %d")
245 245 % (self.indexfile, fmt))
246 246
247 247 self._io = revlogio()
248 248 if self.version == REVLOGV0:
249 249 self._io = revlogoldio()
250 250 try:
251 251 d = self._io.parseindex(i, self._inline)
252 252 except (ValueError, IndexError):
253 253 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
254 254 self.index, nodemap, self._chunkcache = d
255 255 if nodemap is not None:
256 256 self.nodemap = self._nodecache = nodemap
257 257 if not self._chunkcache:
258 258 self._chunkclear()
259 259
260 260 def tip(self):
261 261 return self.node(len(self.index) - 2)
262 262 def __len__(self):
263 263 return len(self.index) - 1
264 264 def __iter__(self):
265 265 return iter(xrange(len(self)))
266 266 def revs(self, start=0, stop=None):
267 267 """iterate over all rev in this revlog (from start to stop)"""
268 268 step = 1
269 269 if stop is not None:
270 270 if start > stop:
271 271 step = -1
272 272 stop += step
273 273 else:
274 274 stop = len(self)
275 275 return xrange(start, stop, step)
276 276
277 277 @util.propertycache
278 278 def nodemap(self):
279 279 self.rev(self.node(0))
280 280 return self._nodecache
281 281
282 282 def hasnode(self, node):
283 283 try:
284 284 self.rev(node)
285 285 return True
286 286 except KeyError:
287 287 return False
288 288
289 289 def clearcaches(self):
290 290 try:
291 291 self._nodecache.clearcaches()
292 292 except AttributeError:
293 293 self._nodecache = {nullid: nullrev}
294 294 self._nodepos = None
295 295
296 296 def rev(self, node):
297 297 try:
298 298 return self._nodecache[node]
299 299 except RevlogError:
300 300 # parsers.c radix tree lookup failed
301 301 raise LookupError(node, self.indexfile, _('no node'))
302 302 except KeyError:
303 303 # pure python cache lookup failed
304 304 n = self._nodecache
305 305 i = self.index
306 306 p = self._nodepos
307 307 if p is None:
308 308 p = len(i) - 2
309 309 for r in xrange(p, -1, -1):
310 310 v = i[r][7]
311 311 n[v] = r
312 312 if v == node:
313 313 self._nodepos = r - 1
314 314 return r
315 315 raise LookupError(node, self.indexfile, _('no node'))
316 316
317 317 def node(self, rev):
318 318 return self.index[rev][7]
319 319 def linkrev(self, rev):
320 320 return self.index[rev][4]
321 321 def parents(self, node):
322 322 i = self.index
323 323 d = i[self.rev(node)]
324 324 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
325 325 def parentrevs(self, rev):
326 326 return self.index[rev][5:7]
327 327 def start(self, rev):
328 328 return int(self.index[rev][0] >> 16)
329 329 def end(self, rev):
330 330 return self.start(rev) + self.length(rev)
331 331 def length(self, rev):
332 332 return self.index[rev][1]
333 333 def chainbase(self, rev):
334 334 index = self.index
335 335 base = index[rev][3]
336 336 while base != rev:
337 337 rev = base
338 338 base = index[rev][3]
339 339 return base
340 340 def flags(self, rev):
341 341 return self.index[rev][0] & 0xFFFF
342 342 def rawsize(self, rev):
343 343 """return the length of the uncompressed text for a given revision"""
344 344 l = self.index[rev][2]
345 345 if l >= 0:
346 346 return l
347 347
348 348 t = self.revision(self.node(rev))
349 349 return len(t)
350 350 size = rawsize
351 351
352 352 def ancestors(self, revs, stoprev=0, inclusive=False):
353 353 """Generate the ancestors of 'revs' in reverse topological order.
354 354 Does not generate revs lower than stoprev.
355 355
356 356 See the documentation for ancestor.lazyancestors for more details."""
357 357
358 358 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
359 359 inclusive=inclusive)
360 360
361 361 def descendants(self, revs):
362 362 """Generate the descendants of 'revs' in revision order.
363 363
364 364 Yield a sequence of revision numbers starting with a child of
365 365 some rev in revs, i.e., each revision is *not* considered a
366 366 descendant of itself. Results are ordered by revision number (a
367 367 topological sort)."""
368 368 first = min(revs)
369 369 if first == nullrev:
370 370 for i in self:
371 371 yield i
372 372 return
373 373
374 374 seen = set(revs)
375 375 for i in self.revs(start=first + 1):
376 376 for x in self.parentrevs(i):
377 377 if x != nullrev and x in seen:
378 378 seen.add(i)
379 379 yield i
380 380 break
381 381
382 382 def findcommonmissing(self, common=None, heads=None):
383 383 """Return a tuple of the ancestors of common and the ancestors of heads
384 384 that are not ancestors of common. In revset terminology, we return the
385 385 tuple:
386 386
387 387 ::common, (::heads) - (::common)
388 388
389 389 The list is sorted by revision number, meaning it is
390 390 topologically sorted.
391 391
392 392 'heads' and 'common' are both lists of node IDs. If heads is
393 393 not supplied, uses all of the revlog's heads. If common is not
394 394 supplied, uses nullid."""
395 395 if common is None:
396 396 common = [nullid]
397 397 if heads is None:
398 398 heads = self.heads()
399 399
400 400 common = [self.rev(n) for n in common]
401 401 heads = [self.rev(n) for n in heads]
402 402
403 403 # we want the ancestors, but inclusive
404 404 has = set(self.ancestors(common))
405 405 has.add(nullrev)
406 406 has.update(common)
407 407
408 408 # take all ancestors from heads that aren't in has
409 409 missing = set()
410 410 visit = util.deque(r for r in heads if r not in has)
411 411 while visit:
412 412 r = visit.popleft()
413 413 if r in missing:
414 414 continue
415 415 else:
416 416 missing.add(r)
417 417 for p in self.parentrevs(r):
418 418 if p not in has:
419 419 visit.append(p)
420 420 missing = list(missing)
421 421 missing.sort()
422 422 return has, [self.node(r) for r in missing]
423 423
424 424 def findmissingrevs(self, common=None, heads=None):
425 425 """Return the revision numbers of the ancestors of heads that
426 426 are not ancestors of common.
427 427
428 428 More specifically, return a list of revision numbers corresponding to
429 429 nodes N such that every N satisfies the following constraints:
430 430
431 431 1. N is an ancestor of some node in 'heads'
432 432 2. N is not an ancestor of any node in 'common'
433 433
434 434 The list is sorted by revision number, meaning it is
435 435 topologically sorted.
436 436
437 437 'heads' and 'common' are both lists of revision numbers. If heads is
438 438 not supplied, uses all of the revlog's heads. If common is not
439 439 supplied, uses nullid."""
440 440 if common is None:
441 441 common = [nullrev]
442 442 if heads is None:
443 443 heads = self.headrevs()
444 444
445 445 return ancestor.missingancestors(heads, common, self.parentrevs)
446 446
447 447 def findmissing(self, common=None, heads=None):
448 448 """Return the ancestors of heads that are not ancestors of common.
449 449
450 450 More specifically, return a list of nodes N such that every N
451 451 satisfies the following constraints:
452 452
453 453 1. N is an ancestor of some node in 'heads'
454 454 2. N is not an ancestor of any node in 'common'
455 455
456 456 The list is sorted by revision number, meaning it is
457 457 topologically sorted.
458 458
459 459 'heads' and 'common' are both lists of node IDs. If heads is
460 460 not supplied, uses all of the revlog's heads. If common is not
461 461 supplied, uses nullid."""
462 462 if common is None:
463 463 common = [nullid]
464 464 if heads is None:
465 465 heads = self.heads()
466 466
467 467 common = [self.rev(n) for n in common]
468 468 heads = [self.rev(n) for n in heads]
469 469
470 470 return [self.node(r) for r in
471 471 ancestor.missingancestors(heads, common, self.parentrevs)]
472 472
473 473 def nodesbetween(self, roots=None, heads=None):
474 474 """Return a topological path from 'roots' to 'heads'.
475 475
476 476 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
477 477 topologically sorted list of all nodes N that satisfy both of
478 478 these constraints:
479 479
480 480 1. N is a descendant of some node in 'roots'
481 481 2. N is an ancestor of some node in 'heads'
482 482
483 483 Every node is considered to be both a descendant and an ancestor
484 484 of itself, so every reachable node in 'roots' and 'heads' will be
485 485 included in 'nodes'.
486 486
487 487 'outroots' is the list of reachable nodes in 'roots', i.e., the
488 488 subset of 'roots' that is returned in 'nodes'. Likewise,
489 489 'outheads' is the subset of 'heads' that is also in 'nodes'.
490 490
491 491 'roots' and 'heads' are both lists of node IDs. If 'roots' is
492 492 unspecified, uses nullid as the only root. If 'heads' is
493 493 unspecified, uses list of all of the revlog's heads."""
494 494 nonodes = ([], [], [])
495 495 if roots is not None:
496 496 roots = list(roots)
497 497 if not roots:
498 498 return nonodes
499 499 lowestrev = min([self.rev(n) for n in roots])
500 500 else:
501 501 roots = [nullid] # Everybody's a descendant of nullid
502 502 lowestrev = nullrev
503 503 if (lowestrev == nullrev) and (heads is None):
504 504 # We want _all_ the nodes!
505 505 return ([self.node(r) for r in self], [nullid], list(self.heads()))
506 506 if heads is None:
507 507 # All nodes are ancestors, so the latest ancestor is the last
508 508 # node.
509 509 highestrev = len(self) - 1
510 510 # Set ancestors to None to signal that every node is an ancestor.
511 511 ancestors = None
512 512 # Set heads to an empty dictionary for later discovery of heads
513 513 heads = {}
514 514 else:
515 515 heads = list(heads)
516 516 if not heads:
517 517 return nonodes
518 518 ancestors = set()
519 519 # Turn heads into a dictionary so we can remove 'fake' heads.
520 520 # Also, later we will be using it to filter out the heads we can't
521 521 # find from roots.
522 522 heads = dict.fromkeys(heads, False)
523 523 # Start at the top and keep marking parents until we're done.
524 524 nodestotag = set(heads)
525 525 # Remember where the top was so we can use it as a limit later.
526 526 highestrev = max([self.rev(n) for n in nodestotag])
527 527 while nodestotag:
528 528 # grab a node to tag
529 529 n = nodestotag.pop()
530 530 # Never tag nullid
531 531 if n == nullid:
532 532 continue
533 533 # A node's revision number represents its place in a
534 534 # topologically sorted list of nodes.
535 535 r = self.rev(n)
536 536 if r >= lowestrev:
537 537 if n not in ancestors:
538 538 # If we are possibly a descendant of one of the roots
539 539 # and we haven't already been marked as an ancestor
540 540 ancestors.add(n) # Mark as ancestor
541 541 # Add non-nullid parents to list of nodes to tag.
542 542 nodestotag.update([p for p in self.parents(n) if
543 543 p != nullid])
544 544 elif n in heads: # We've seen it before, is it a fake head?
545 545 # So it is, real heads should not be the ancestors of
546 546 # any other heads.
547 547 heads.pop(n)
548 548 if not ancestors:
549 549 return nonodes
550 550 # Now that we have our set of ancestors, we want to remove any
551 551 # roots that are not ancestors.
552 552
553 553 # If one of the roots was nullid, everything is included anyway.
554 554 if lowestrev > nullrev:
555 555 # But, since we weren't, let's recompute the lowest rev to not
556 556 # include roots that aren't ancestors.
557 557
558 558 # Filter out roots that aren't ancestors of heads
559 559 roots = [n for n in roots if n in ancestors]
560 560 # Recompute the lowest revision
561 561 if roots:
562 562 lowestrev = min([self.rev(n) for n in roots])
563 563 else:
564 564 # No more roots? Return empty list
565 565 return nonodes
566 566 else:
567 567 # We are descending from nullid, and don't need to care about
568 568 # any other roots.
569 569 lowestrev = nullrev
570 570 roots = [nullid]
571 571 # Transform our roots list into a set.
572 572 descendants = set(roots)
573 573 # Also, keep the original roots so we can filter out roots that aren't
574 574 # 'real' roots (i.e. are descended from other roots).
575 575 roots = descendants.copy()
576 576 # Our topologically sorted list of output nodes.
577 577 orderedout = []
578 578 # Don't start at nullid since we don't want nullid in our output list,
579 579 # and if nullid shows up in descendants, empty parents will look like
580 580 # they're descendants.
581 581 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
582 582 n = self.node(r)
583 583 isdescendant = False
584 584 if lowestrev == nullrev: # Everybody is a descendant of nullid
585 585 isdescendant = True
586 586 elif n in descendants:
587 587 # n is already a descendant
588 588 isdescendant = True
589 589 # This check only needs to be done here because all the roots
590 590 # will start being marked is descendants before the loop.
591 591 if n in roots:
592 592 # If n was a root, check if it's a 'real' root.
593 593 p = tuple(self.parents(n))
594 594 # If any of its parents are descendants, it's not a root.
595 595 if (p[0] in descendants) or (p[1] in descendants):
596 596 roots.remove(n)
597 597 else:
598 598 p = tuple(self.parents(n))
599 599 # A node is a descendant if either of its parents are
600 600 # descendants. (We seeded the dependents list with the roots
601 601 # up there, remember?)
602 602 if (p[0] in descendants) or (p[1] in descendants):
603 603 descendants.add(n)
604 604 isdescendant = True
605 605 if isdescendant and ((ancestors is None) or (n in ancestors)):
606 606 # Only include nodes that are both descendants and ancestors.
607 607 orderedout.append(n)
608 608 if (ancestors is not None) and (n in heads):
609 609 # We're trying to figure out which heads are reachable
610 610 # from roots.
611 611 # Mark this head as having been reached
612 612 heads[n] = True
613 613 elif ancestors is None:
614 614 # Otherwise, we're trying to discover the heads.
615 615 # Assume this is a head because if it isn't, the next step
616 616 # will eventually remove it.
617 617 heads[n] = True
618 618 # But, obviously its parents aren't.
619 619 for p in self.parents(n):
620 620 heads.pop(p, None)
621 621 heads = [n for n, flag in heads.iteritems() if flag]
622 622 roots = list(roots)
623 623 assert orderedout
624 624 assert roots
625 625 assert heads
626 626 return (orderedout, roots, heads)
627 627
628 628 def headrevs(self):
629 629 try:
630 630 return self.index.headrevs()
631 631 except AttributeError:
632 632 return self._headrevs()
633 633
634 634 def _headrevs(self):
635 635 count = len(self)
636 636 if not count:
637 637 return [nullrev]
638 638 # we won't iter over filtered rev so nobody is a head at start
639 639 ishead = [0] * (count + 1)
640 640 index = self.index
641 641 for r in self:
642 642 ishead[r] = 1 # I may be an head
643 643 e = index[r]
644 644 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
645 645 return [r for r, val in enumerate(ishead) if val]
646 646
647 647 def heads(self, start=None, stop=None):
648 648 """return the list of all nodes that have no children
649 649
650 650 if start is specified, only heads that are descendants of
651 651 start will be returned
652 652 if stop is specified, it will consider all the revs from stop
653 653 as if they had no children
654 654 """
655 655 if start is None and stop is None:
656 656 if not len(self):
657 657 return [nullid]
658 658 return [self.node(r) for r in self.headrevs()]
659 659
660 660 if start is None:
661 661 start = nullid
662 662 if stop is None:
663 663 stop = []
664 664 stoprevs = set([self.rev(n) for n in stop])
665 665 startrev = self.rev(start)
666 666 reachable = set((startrev,))
667 667 heads = set((startrev,))
668 668
669 669 parentrevs = self.parentrevs
670 670 for r in self.revs(start=startrev + 1):
671 671 for p in parentrevs(r):
672 672 if p in reachable:
673 673 if r not in stoprevs:
674 674 reachable.add(r)
675 675 heads.add(r)
676 676 if p in heads and p not in stoprevs:
677 677 heads.remove(p)
678 678
679 679 return [self.node(r) for r in heads]
680 680
681 681 def children(self, node):
682 682 """find the children of a given node"""
683 683 c = []
684 684 p = self.rev(node)
685 685 for r in self.revs(start=p + 1):
686 686 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
687 687 if prevs:
688 688 for pr in prevs:
689 689 if pr == p:
690 690 c.append(self.node(r))
691 691 elif p == nullrev:
692 692 c.append(self.node(r))
693 693 return c
694 694
695 695 def descendant(self, start, end):
696 696 if start == nullrev:
697 697 return True
698 698 for i in self.descendants([start]):
699 699 if i == end:
700 700 return True
701 701 elif i > end:
702 702 break
703 703 return False
704 704
705 705 def ancestor(self, a, b):
706 706 """calculate the least common ancestor of nodes a and b"""
707 707
708 708 a, b = self.rev(a), self.rev(b)
709 709 try:
710 710 ancs = self.index.ancestors(a, b)
711 711 except (AttributeError, OverflowError):
712 712 ancs = ancestor.ancestors(self.parentrevs, a, b)
713 713 if ancs:
714 714 # choose a consistent winner when there's a tie
715 715 return min(map(self.node, ancs))
716 716 return nullid
717 717
718 718 def _match(self, id):
719 719 if isinstance(id, int):
720 720 # rev
721 721 return self.node(id)
722 722 if len(id) == 20:
723 723 # possibly a binary node
724 724 # odds of a binary node being all hex in ASCII are 1 in 10**25
725 725 try:
726 726 node = id
727 727 self.rev(node) # quick search the index
728 728 return node
729 729 except LookupError:
730 730 pass # may be partial hex id
731 731 try:
732 732 # str(rev)
733 733 rev = int(id)
734 734 if str(rev) != id:
735 735 raise ValueError
736 736 if rev < 0:
737 737 rev = len(self) + rev
738 738 if rev < 0 or rev >= len(self):
739 739 raise ValueError
740 740 return self.node(rev)
741 741 except (ValueError, OverflowError):
742 742 pass
743 743 if len(id) == 40:
744 744 try:
745 745 # a full hex nodeid?
746 746 node = bin(id)
747 747 self.rev(node)
748 748 return node
749 749 except (TypeError, LookupError):
750 750 pass
751 751
752 752 def _partialmatch(self, id):
753 753 try:
754 754 n = self.index.partialmatch(id)
755 755 if n and self.hasnode(n):
756 756 return n
757 757 return None
758 758 except RevlogError:
759 759 # parsers.c radix tree lookup gave multiple matches
760 760 # fall through to slow path that filters hidden revisions
761 761 pass
762 762 except (AttributeError, ValueError):
763 763 # we are pure python, or key was too short to search radix tree
764 764 pass
765 765
766 766 if id in self._pcache:
767 767 return self._pcache[id]
768 768
769 769 if len(id) < 40:
770 770 try:
771 771 # hex(node)[:...]
772 772 l = len(id) // 2 # grab an even number of digits
773 773 prefix = bin(id[:l * 2])
774 774 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
775 775 nl = [n for n in nl if hex(n).startswith(id) and
776 776 self.hasnode(n)]
777 777 if len(nl) > 0:
778 778 if len(nl) == 1:
779 779 self._pcache[id] = nl[0]
780 780 return nl[0]
781 781 raise LookupError(id, self.indexfile,
782 782 _('ambiguous identifier'))
783 783 return None
784 784 except TypeError:
785 785 pass
786 786
787 787 def lookup(self, id):
788 788 """locate a node based on:
789 789 - revision number or str(revision number)
790 790 - nodeid or subset of hex nodeid
791 791 """
792 792 n = self._match(id)
793 793 if n is not None:
794 794 return n
795 795 n = self._partialmatch(id)
796 796 if n:
797 797 return n
798 798
799 799 raise LookupError(id, self.indexfile, _('no match found'))
800 800
801 801 def cmp(self, node, text):
802 802 """compare text with a given file revision
803 803
804 804 returns True if text is different than what is stored.
805 805 """
806 806 p1, p2 = self.parents(node)
807 807 return hash(text, p1, p2) != node
808 808
809 809 def _addchunk(self, offset, data):
810 810 o, d = self._chunkcache
811 811 # try to add to existing cache
812 812 if o + len(d) == offset and len(d) + len(data) < _chunksize:
813 813 self._chunkcache = o, d + data
814 814 else:
815 815 self._chunkcache = offset, data
816 816
817 817 def _loadchunk(self, offset, length):
818 818 if self._inline:
819 819 df = self.opener(self.indexfile)
820 820 else:
821 821 df = self.opener(self.datafile)
822 822
823 823 readahead = max(65536, length)
824 824 df.seek(offset)
825 825 d = df.read(readahead)
826 826 df.close()
827 827 self._addchunk(offset, d)
828 828 if readahead > length:
829 829 return util.buffer(d, 0, length)
830 830 return d
831 831
832 832 def _getchunk(self, offset, length):
833 833 o, d = self._chunkcache
834 834 l = len(d)
835 835
836 836 # is it in the cache?
837 837 cachestart = offset - o
838 838 cacheend = cachestart + length
839 839 if cachestart >= 0 and cacheend <= l:
840 840 if cachestart == 0 and cacheend == l:
841 841 return d # avoid a copy
842 842 return util.buffer(d, cachestart, cacheend - cachestart)
843 843
844 844 return self._loadchunk(offset, length)
845 845
846 846 def _chunkraw(self, startrev, endrev):
847 847 start = self.start(startrev)
848 848 length = self.end(endrev) - start
849 849 if self._inline:
850 850 start += (startrev + 1) * self._io.size
851 851 return self._getchunk(start, length)
852 852
853 853 def _chunk(self, rev):
854 854 return decompress(self._chunkraw(rev, rev))
855 855
856 def _chunks(self, revs):
857 '''faster version of [self._chunk(rev) for rev in revs]
858
859 Assumes that revs is in ascending order.'''
860 start = self.start
861 length = self.length
862 inline = self._inline
863 iosize = self._io.size
864 getchunk = self._getchunk
865
866 l = []
867 ladd = l.append
868
869 for rev in revs:
870 chunkstart = start(rev)
871 if inline:
872 chunkstart += (rev + 1) * iosize
873 chunklength = length(rev)
874 ladd(decompress(getchunk(chunkstart, chunklength)))
875
876 return l
877
856 878 def _chunkbase(self, rev):
857 879 return self._chunk(rev)
858 880
859 881 def _chunkclear(self):
860 882 self._chunkcache = (0, '')
861 883
862 884 def deltaparent(self, rev):
863 885 """return deltaparent of the given revision"""
864 886 base = self.index[rev][3]
865 887 if base == rev:
866 888 return nullrev
867 889 elif self._generaldelta:
868 890 return base
869 891 else:
870 892 return rev - 1
871 893
872 894 def revdiff(self, rev1, rev2):
873 895 """return or calculate a delta between two revisions"""
874 896 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
875 897 return str(self._chunk(rev2))
876 898
877 899 return mdiff.textdiff(self.revision(rev1),
878 900 self.revision(rev2))
879 901
880 902 def revision(self, nodeorrev):
881 903 """return an uncompressed revision of a given node or revision
882 904 number.
883 905 """
884 906 if isinstance(nodeorrev, int):
885 907 rev = nodeorrev
886 908 node = self.node(rev)
887 909 else:
888 910 node = nodeorrev
889 911 rev = None
890 912
891 913 cachedrev = None
892 914 if node == nullid:
893 915 return ""
894 916 if self._cache:
895 917 if self._cache[0] == node:
896 918 return self._cache[2]
897 919 cachedrev = self._cache[1]
898 920
899 921 # look up what we need to read
900 922 text = None
901 923 if rev is None:
902 924 rev = self.rev(node)
903 925
904 926 # check rev flags
905 927 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
906 928 raise RevlogError(_('incompatible revision flag %x') %
907 929 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
908 930
909 931 # build delta chain
910 932 chain = []
911 933 index = self.index # for performance
912 934 generaldelta = self._generaldelta
913 935 iterrev = rev
914 936 e = index[iterrev]
915 937 while iterrev != e[3] and iterrev != cachedrev:
916 938 chain.append(iterrev)
917 939 if generaldelta:
918 940 iterrev = e[3]
919 941 else:
920 942 iterrev -= 1
921 943 e = index[iterrev]
922 944 chain.reverse()
923 945 base = iterrev
924 946
925 947 if iterrev == cachedrev:
926 948 # cache hit
927 949 text = self._cache[2]
928 950
929 951 # drop cache to save memory
930 952 self._cache = None
931 953
932 954 self._chunkraw(base, rev)
933 955 if text is None:
934 956 text = str(self._chunkbase(base))
935 957
936 bins = [self._chunk(r) for r in chain]
958 bins = self._chunks(chain)
937 959 text = mdiff.patches(text, bins)
938 960
939 961 text = self._checkhash(text, node, rev)
940 962
941 963 self._cache = (node, rev, text)
942 964 return text
943 965
944 966 def _checkhash(self, text, node, rev):
945 967 p1, p2 = self.parents(node)
946 968 self.checkhash(text, p1, p2, node, rev)
947 969 return text
948 970
949 971 def checkhash(self, text, p1, p2, node, rev=None):
950 972 if node != hash(text, p1, p2):
951 973 revornode = rev
952 974 if revornode is None:
953 975 revornode = templatefilters.short(hex(node))
954 976 raise RevlogError(_("integrity check failed on %s:%s")
955 977 % (self.indexfile, revornode))
956 978
957 979 def checkinlinesize(self, tr, fp=None):
958 980 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
959 981 return
960 982
961 983 trinfo = tr.find(self.indexfile)
962 984 if trinfo is None:
963 985 raise RevlogError(_("%s not found in the transaction")
964 986 % self.indexfile)
965 987
966 988 trindex = trinfo[2]
967 989 dataoff = self.start(trindex)
968 990
969 991 tr.add(self.datafile, dataoff)
970 992
971 993 if fp:
972 994 fp.flush()
973 995 fp.close()
974 996
975 997 df = self.opener(self.datafile, 'w')
976 998 try:
977 999 for r in self:
978 1000 df.write(self._chunkraw(r, r))
979 1001 finally:
980 1002 df.close()
981 1003
982 1004 fp = self.opener(self.indexfile, 'w', atomictemp=True)
983 1005 self.version &= ~(REVLOGNGINLINEDATA)
984 1006 self._inline = False
985 1007 for i in self:
986 1008 e = self._io.packentry(self.index[i], self.node, self.version, i)
987 1009 fp.write(e)
988 1010
989 1011 # if we don't call close, the temp file will never replace the
990 1012 # real index
991 1013 fp.close()
992 1014
993 1015 tr.replace(self.indexfile, trindex * self._io.size)
994 1016 self._chunkclear()
995 1017
996 1018 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
997 1019 node=None):
998 1020 """add a revision to the log
999 1021
1000 1022 text - the revision data to add
1001 1023 transaction - the transaction object used for rollback
1002 1024 link - the linkrev data to add
1003 1025 p1, p2 - the parent nodeids of the revision
1004 1026 cachedelta - an optional precomputed delta
1005 1027 node - nodeid of revision; typically node is not specified, and it is
1006 1028 computed by default as hash(text, p1, p2), however subclasses might
1007 1029 use different hashing method (and override checkhash() in such case)
1008 1030 """
1009 1031 if link == nullrev:
1010 1032 raise RevlogError(_("attempted to add linkrev -1 to %s")
1011 1033 % self.indexfile)
1012 1034 node = node or hash(text, p1, p2)
1013 1035 if node in self.nodemap:
1014 1036 return node
1015 1037
1016 1038 dfh = None
1017 1039 if not self._inline:
1018 1040 dfh = self.opener(self.datafile, "a")
1019 1041 ifh = self.opener(self.indexfile, "a+")
1020 1042 try:
1021 1043 return self._addrevision(node, text, transaction, link, p1, p2,
1022 1044 cachedelta, ifh, dfh)
1023 1045 finally:
1024 1046 if dfh:
1025 1047 dfh.close()
1026 1048 ifh.close()
1027 1049
1028 1050 def compress(self, text):
1029 1051 """ generate a possibly-compressed representation of text """
1030 1052 if not text:
1031 1053 return ("", text)
1032 1054 l = len(text)
1033 1055 bin = None
1034 1056 if l < 44:
1035 1057 pass
1036 1058 elif l > 1000000:
1037 1059 # zlib makes an internal copy, thus doubling memory usage for
1038 1060 # large files, so lets do this in pieces
1039 1061 z = zlib.compressobj()
1040 1062 p = []
1041 1063 pos = 0
1042 1064 while pos < l:
1043 1065 pos2 = pos + 2**20
1044 1066 p.append(z.compress(text[pos:pos2]))
1045 1067 pos = pos2
1046 1068 p.append(z.flush())
1047 1069 if sum(map(len, p)) < l:
1048 1070 bin = "".join(p)
1049 1071 else:
1050 1072 bin = _compress(text)
1051 1073 if bin is None or len(bin) > l:
1052 1074 if text[0] == '\0':
1053 1075 return ("", text)
1054 1076 return ('u', text)
1055 1077 return ("", bin)
1056 1078
1057 1079 def _addrevision(self, node, text, transaction, link, p1, p2,
1058 1080 cachedelta, ifh, dfh):
1059 1081 """internal function to add revisions to the log
1060 1082
1061 1083 see addrevision for argument descriptions.
1062 1084 invariants:
1063 1085 - text is optional (can be None); if not set, cachedelta must be set.
1064 1086 if both are set, they must correspond to each other.
1065 1087 """
1066 1088 btext = [text]
1067 1089 def buildtext():
1068 1090 if btext[0] is not None:
1069 1091 return btext[0]
1070 1092 # flush any pending writes here so we can read it in revision
1071 1093 if dfh:
1072 1094 dfh.flush()
1073 1095 ifh.flush()
1074 1096 basetext = self.revision(self.node(cachedelta[0]))
1075 1097 btext[0] = mdiff.patch(basetext, cachedelta[1])
1076 1098 self.checkhash(btext[0], p1, p2, node)
1077 1099 return btext[0]
1078 1100
1079 1101 def builddelta(rev):
1080 1102 # can we use the cached delta?
1081 1103 if cachedelta and cachedelta[0] == rev:
1082 1104 delta = cachedelta[1]
1083 1105 else:
1084 1106 t = buildtext()
1085 1107 ptext = self.revision(self.node(rev))
1086 1108 delta = mdiff.textdiff(ptext, t)
1087 1109 data = self.compress(delta)
1088 1110 l = len(data[1]) + len(data[0])
1089 1111 if basecache[0] == rev:
1090 1112 chainbase = basecache[1]
1091 1113 else:
1092 1114 chainbase = self.chainbase(rev)
1093 1115 dist = l + offset - self.start(chainbase)
1094 1116 if self._generaldelta:
1095 1117 base = rev
1096 1118 else:
1097 1119 base = chainbase
1098 1120 return dist, l, data, base, chainbase
1099 1121
1100 1122 curr = len(self)
1101 1123 prev = curr - 1
1102 1124 base = chainbase = curr
1103 1125 offset = self.end(prev)
1104 1126 flags = 0
1105 1127 d = None
1106 1128 basecache = self._basecache
1107 1129 p1r, p2r = self.rev(p1), self.rev(p2)
1108 1130
1109 1131 # should we try to build a delta?
1110 1132 if prev != nullrev:
1111 1133 if self._generaldelta:
1112 1134 if p1r >= basecache[1]:
1113 1135 d = builddelta(p1r)
1114 1136 elif p2r >= basecache[1]:
1115 1137 d = builddelta(p2r)
1116 1138 else:
1117 1139 d = builddelta(prev)
1118 1140 else:
1119 1141 d = builddelta(prev)
1120 1142 dist, l, data, base, chainbase = d
1121 1143
1122 1144 # full versions are inserted when the needed deltas
1123 1145 # become comparable to the uncompressed text
1124 1146 if text is None:
1125 1147 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1126 1148 cachedelta[1])
1127 1149 else:
1128 1150 textlen = len(text)
1129 1151 if d is None or dist > textlen * 2:
1130 1152 text = buildtext()
1131 1153 data = self.compress(text)
1132 1154 l = len(data[1]) + len(data[0])
1133 1155 base = chainbase = curr
1134 1156
1135 1157 e = (offset_type(offset, flags), l, textlen,
1136 1158 base, link, p1r, p2r, node)
1137 1159 self.index.insert(-1, e)
1138 1160 self.nodemap[node] = curr
1139 1161
1140 1162 entry = self._io.packentry(e, self.node, self.version, curr)
1141 1163 if not self._inline:
1142 1164 transaction.add(self.datafile, offset)
1143 1165 transaction.add(self.indexfile, curr * len(entry))
1144 1166 if data[0]:
1145 1167 dfh.write(data[0])
1146 1168 dfh.write(data[1])
1147 1169 dfh.flush()
1148 1170 ifh.write(entry)
1149 1171 else:
1150 1172 offset += curr * self._io.size
1151 1173 transaction.add(self.indexfile, offset, curr)
1152 1174 ifh.write(entry)
1153 1175 ifh.write(data[0])
1154 1176 ifh.write(data[1])
1155 1177 self.checkinlinesize(transaction, ifh)
1156 1178
1157 1179 if type(text) == str: # only accept immutable objects
1158 1180 self._cache = (node, curr, text)
1159 1181 self._basecache = (curr, chainbase)
1160 1182 return node
1161 1183
1162 1184 def addgroup(self, bundle, linkmapper, transaction):
1163 1185 """
1164 1186 add a delta group
1165 1187
1166 1188 given a set of deltas, add them to the revision log. the
1167 1189 first delta is against its parent, which should be in our
1168 1190 log, the rest are against the previous delta.
1169 1191 """
1170 1192
1171 1193 # track the base of the current delta log
1172 1194 content = []
1173 1195 node = None
1174 1196
1175 1197 r = len(self)
1176 1198 end = 0
1177 1199 if r:
1178 1200 end = self.end(r - 1)
1179 1201 ifh = self.opener(self.indexfile, "a+")
1180 1202 isize = r * self._io.size
1181 1203 if self._inline:
1182 1204 transaction.add(self.indexfile, end + isize, r)
1183 1205 dfh = None
1184 1206 else:
1185 1207 transaction.add(self.indexfile, isize, r)
1186 1208 transaction.add(self.datafile, end)
1187 1209 dfh = self.opener(self.datafile, "a")
1188 1210
1189 1211 try:
1190 1212 # loop through our set of deltas
1191 1213 chain = None
1192 1214 while True:
1193 1215 chunkdata = bundle.deltachunk(chain)
1194 1216 if not chunkdata:
1195 1217 break
1196 1218 node = chunkdata['node']
1197 1219 p1 = chunkdata['p1']
1198 1220 p2 = chunkdata['p2']
1199 1221 cs = chunkdata['cs']
1200 1222 deltabase = chunkdata['deltabase']
1201 1223 delta = chunkdata['delta']
1202 1224
1203 1225 content.append(node)
1204 1226
1205 1227 link = linkmapper(cs)
1206 1228 if node in self.nodemap:
1207 1229 # this can happen if two branches make the same change
1208 1230 chain = node
1209 1231 continue
1210 1232
1211 1233 for p in (p1, p2):
1212 1234 if p not in self.nodemap:
1213 1235 raise LookupError(p, self.indexfile,
1214 1236 _('unknown parent'))
1215 1237
1216 1238 if deltabase not in self.nodemap:
1217 1239 raise LookupError(deltabase, self.indexfile,
1218 1240 _('unknown delta base'))
1219 1241
1220 1242 baserev = self.rev(deltabase)
1221 1243 chain = self._addrevision(node, None, transaction, link,
1222 1244 p1, p2, (baserev, delta), ifh, dfh)
1223 1245 if not dfh and not self._inline:
1224 1246 # addrevision switched from inline to conventional
1225 1247 # reopen the index
1226 1248 ifh.close()
1227 1249 dfh = self.opener(self.datafile, "a")
1228 1250 ifh = self.opener(self.indexfile, "a")
1229 1251 finally:
1230 1252 if dfh:
1231 1253 dfh.close()
1232 1254 ifh.close()
1233 1255
1234 1256 return content
1235 1257
1236 1258 def strip(self, minlink, transaction):
1237 1259 """truncate the revlog on the first revision with a linkrev >= minlink
1238 1260
1239 1261 This function is called when we're stripping revision minlink and
1240 1262 its descendants from the repository.
1241 1263
1242 1264 We have to remove all revisions with linkrev >= minlink, because
1243 1265 the equivalent changelog revisions will be renumbered after the
1244 1266 strip.
1245 1267
1246 1268 So we truncate the revlog on the first of these revisions, and
1247 1269 trust that the caller has saved the revisions that shouldn't be
1248 1270 removed and that it'll re-add them after this truncation.
1249 1271 """
1250 1272 if len(self) == 0:
1251 1273 return
1252 1274
1253 1275 for rev in self:
1254 1276 if self.index[rev][4] >= minlink:
1255 1277 break
1256 1278 else:
1257 1279 return
1258 1280
1259 1281 # first truncate the files on disk
1260 1282 end = self.start(rev)
1261 1283 if not self._inline:
1262 1284 transaction.add(self.datafile, end)
1263 1285 end = rev * self._io.size
1264 1286 else:
1265 1287 end += rev * self._io.size
1266 1288
1267 1289 transaction.add(self.indexfile, end)
1268 1290
1269 1291 # then reset internal state in memory to forget those revisions
1270 1292 self._cache = None
1271 1293 self._chunkclear()
1272 1294 for x in xrange(rev, len(self)):
1273 1295 del self.nodemap[self.node(x)]
1274 1296
1275 1297 del self.index[rev:-1]
1276 1298
1277 1299 def checksize(self):
1278 1300 expected = 0
1279 1301 if len(self):
1280 1302 expected = max(0, self.end(len(self) - 1))
1281 1303
1282 1304 try:
1283 1305 f = self.opener(self.datafile)
1284 1306 f.seek(0, 2)
1285 1307 actual = f.tell()
1286 1308 f.close()
1287 1309 dd = actual - expected
1288 1310 except IOError, inst:
1289 1311 if inst.errno != errno.ENOENT:
1290 1312 raise
1291 1313 dd = 0
1292 1314
1293 1315 try:
1294 1316 f = self.opener(self.indexfile)
1295 1317 f.seek(0, 2)
1296 1318 actual = f.tell()
1297 1319 f.close()
1298 1320 s = self._io.size
1299 1321 i = max(0, actual // s)
1300 1322 di = actual - (i * s)
1301 1323 if self._inline:
1302 1324 databytes = 0
1303 1325 for r in self:
1304 1326 databytes += max(0, self.length(r))
1305 1327 dd = 0
1306 1328 di = actual - len(self) * s - databytes
1307 1329 except IOError, inst:
1308 1330 if inst.errno != errno.ENOENT:
1309 1331 raise
1310 1332 di = 0
1311 1333
1312 1334 return (dd, di)
1313 1335
1314 1336 def files(self):
1315 1337 res = [self.indexfile]
1316 1338 if not self._inline:
1317 1339 res.append(self.datafile)
1318 1340 return res
General Comments 0
You need to be logged in to leave comments. Login now