##// END OF EJS Templates
revlog: support reading generaldelta revlogs...
Sune Foldager -
r14253:c28d5200 default
parent child Browse files
Show More
@@ -1,1240 +1,1249 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOGNG_FLAGS = REVLOGNGINLINEDATA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 35
35 36 # revlog index flags
36 37 REVIDX_KNOWN_FLAGS = 0
37 38
38 39 # max size of revlog with inline data
39 40 _maxinline = 131072
40 41 _chunksize = 1048576
41 42
42 43 RevlogError = error.RevlogError
43 44 LookupError = error.LookupError
44 45
45 46 def getoffset(q):
46 47 return int(q >> 16)
47 48
48 49 def gettype(q):
49 50 return int(q & 0xFFFF)
50 51
51 52 def offset_type(offset, type):
52 53 return long(long(offset) << 16 | type)
53 54
54 55 nullhash = _sha(nullid)
55 56
56 57 def hash(text, p1, p2):
57 58 """generate a hash from the given text and its parent hashes
58 59
59 60 This hash combines both the current file contents and its history
60 61 in a manner that makes it easy to distinguish nodes with the same
61 62 content in the revision graph.
62 63 """
63 64 # As of now, if one of the parent node is null, p2 is null
64 65 if p2 == nullid:
65 66 # deep copy of a hash is faster than creating one
66 67 s = nullhash.copy()
67 68 s.update(p1)
68 69 else:
69 70 # none of the parent nodes are nullid
70 71 l = [p1, p2]
71 72 l.sort()
72 73 s = _sha(l[0])
73 74 s.update(l[1])
74 75 s.update(text)
75 76 return s.digest()
76 77
77 78 def compress(text):
78 79 """ generate a possibly-compressed representation of text """
79 80 if not text:
80 81 return ("", text)
81 82 l = len(text)
82 83 bin = None
83 84 if l < 44:
84 85 pass
85 86 elif l > 1000000:
86 87 # zlib makes an internal copy, thus doubling memory usage for
87 88 # large files, so lets do this in pieces
88 89 z = zlib.compressobj()
89 90 p = []
90 91 pos = 0
91 92 while pos < l:
92 93 pos2 = pos + 2**20
93 94 p.append(z.compress(text[pos:pos2]))
94 95 pos = pos2
95 96 p.append(z.flush())
96 97 if sum(map(len, p)) < l:
97 98 bin = "".join(p)
98 99 else:
99 100 bin = _compress(text)
100 101 if bin is None or len(bin) > l:
101 102 if text[0] == '\0':
102 103 return ("", text)
103 104 return ('u', text)
104 105 return ("", bin)
105 106
106 107 def decompress(bin):
107 108 """ decompress the given input """
108 109 if not bin:
109 110 return bin
110 111 t = bin[0]
111 112 if t == '\0':
112 113 return bin
113 114 if t == 'x':
114 115 return _decompress(bin)
115 116 if t == 'u':
116 117 return bin[1:]
117 118 raise RevlogError(_("unknown compression type %r") % t)
118 119
119 120 indexformatv0 = ">4l20s20s20s"
120 121 v0shaoffset = 56
121 122
122 123 class revlogoldio(object):
123 124 def __init__(self):
124 125 self.size = struct.calcsize(indexformatv0)
125 126
126 127 def parseindex(self, data, inline):
127 128 s = self.size
128 129 index = []
129 130 nodemap = {nullid: nullrev}
130 131 n = off = 0
131 132 l = len(data)
132 133 while off + s <= l:
133 134 cur = data[off:off + s]
134 135 off += s
135 136 e = _unpack(indexformatv0, cur)
136 137 # transform to revlogv1 format
137 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
138 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
139 140 index.append(e2)
140 141 nodemap[e[6]] = n
141 142 n += 1
142 143
143 144 # add the magic null revision at -1
144 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
145 146
146 147 return index, nodemap, None
147 148
148 149 def packentry(self, entry, node, version, rev):
149 150 if gettype(entry[0]):
150 151 raise RevlogError(_("index entry flags need RevlogNG"))
151 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
152 153 node(entry[5]), node(entry[6]), entry[7])
153 154 return _pack(indexformatv0, *e2)
154 155
155 156 # index ng:
156 157 # 6 bytes: offset
157 158 # 2 bytes: flags
158 159 # 4 bytes: compressed length
159 160 # 4 bytes: uncompressed length
160 161 # 4 bytes: base rev
161 162 # 4 bytes: link rev
162 163 # 4 bytes: parent 1 rev
163 164 # 4 bytes: parent 2 rev
164 165 # 32 bytes: nodeid
165 166 indexformatng = ">Qiiiiii20s12x"
166 167 ngshaoffset = 32
167 168 versionformat = ">I"
168 169
169 170 class revlogio(object):
170 171 def __init__(self):
171 172 self.size = struct.calcsize(indexformatng)
172 173
173 174 def parseindex(self, data, inline):
174 175 # call the C implementation to parse the index data
175 176 index, cache = parsers.parse_index2(data, inline)
176 177 return index, None, cache
177 178
178 179 def packentry(self, entry, node, version, rev):
179 180 p = _pack(indexformatng, *entry)
180 181 if rev == 0:
181 182 p = _pack(versionformat, version) + p[4:]
182 183 return p
183 184
184 185 class revlog(object):
185 186 """
186 187 the underlying revision storage object
187 188
188 189 A revlog consists of two parts, an index and the revision data.
189 190
190 191 The index is a file with a fixed record size containing
191 192 information on each revision, including its nodeid (hash), the
192 193 nodeids of its parents, the position and offset of its data within
193 194 the data file, and the revision it's based on. Finally, each entry
194 195 contains a linkrev entry that can serve as a pointer to external
195 196 data.
196 197
197 198 The revision data itself is a linear collection of data chunks.
198 199 Each chunk represents a revision and is usually represented as a
199 200 delta against the previous chunk. To bound lookup time, runs of
200 201 deltas are limited to about 2 times the length of the original
201 202 version data. This makes retrieval of a version proportional to
202 203 its size, or O(1) relative to the number of revisions.
203 204
204 205 Both pieces of the revlog are written to in an append-only
205 206 fashion, which means we never need to rewrite a file to insert or
206 207 remove data, and can use some simple techniques to avoid the need
207 208 for locking while reading.
208 209 """
209 210 def __init__(self, opener, indexfile):
210 211 """
211 212 create a revlog object
212 213
213 214 opener is a function that abstracts the file opening operation
214 215 and can be used to implement COW semantics or the like.
215 216 """
216 217 self.indexfile = indexfile
217 218 self.datafile = indexfile[:-2] + ".d"
218 219 self.opener = opener
219 220 self._cache = None
220 221 self._basecache = None
221 222 self._chunkcache = (0, '')
222 223 self.index = []
223 224 self._pcache = {}
224 225 self._nodecache = {nullid: nullrev}
225 226 self._nodepos = None
226 227
227 228 v = REVLOG_DEFAULT_VERSION
228 229 if hasattr(opener, 'options') and 'defversion' in opener.options:
229 230 v = opener.options['defversion']
230 231 if v & REVLOGNG:
231 232 v |= REVLOGNGINLINEDATA
232 233
233 234 i = ''
234 235 try:
235 236 f = self.opener(self.indexfile)
236 237 i = f.read()
237 238 f.close()
238 239 if len(i) > 0:
239 240 v = struct.unpack(versionformat, i[:4])[0]
240 241 except IOError, inst:
241 242 if inst.errno != errno.ENOENT:
242 243 raise
243 244
244 245 self.version = v
245 246 self._inline = v & REVLOGNGINLINEDATA
247 self._generaldelta = v & REVLOGGENERALDELTA
246 248 flags = v & ~0xFFFF
247 249 fmt = v & 0xFFFF
248 250 if fmt == REVLOGV0 and flags:
249 251 raise RevlogError(_("index %s unknown flags %#04x for format v0")
250 252 % (self.indexfile, flags >> 16))
251 253 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
252 254 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
253 255 % (self.indexfile, flags >> 16))
254 256 elif fmt > REVLOGNG:
255 257 raise RevlogError(_("index %s unknown format %d")
256 258 % (self.indexfile, fmt))
257 259
258 260 self._io = revlogio()
259 261 if self.version == REVLOGV0:
260 262 self._io = revlogoldio()
261 263 try:
262 264 d = self._io.parseindex(i, self._inline)
263 265 except (ValueError, IndexError):
264 266 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
265 267 self.index, nodemap, self._chunkcache = d
266 268 if nodemap is not None:
267 269 self.nodemap = self._nodecache = nodemap
268 270 if not self._chunkcache:
269 271 self._chunkclear()
270 272
271 273 def tip(self):
272 274 return self.node(len(self.index) - 2)
273 275 def __len__(self):
274 276 return len(self.index) - 1
275 277 def __iter__(self):
276 278 for i in xrange(len(self)):
277 279 yield i
278 280
279 281 @util.propertycache
280 282 def nodemap(self):
281 283 self.rev(self.node(0))
282 284 return self._nodecache
283 285
284 286 def rev(self, node):
285 287 try:
286 288 return self._nodecache[node]
287 289 except KeyError:
288 290 n = self._nodecache
289 291 i = self.index
290 292 p = self._nodepos
291 293 if p is None:
292 294 p = len(i) - 2
293 295 for r in xrange(p, -1, -1):
294 296 v = i[r][7]
295 297 n[v] = r
296 298 if v == node:
297 299 self._nodepos = r - 1
298 300 return r
299 301 raise LookupError(node, self.indexfile, _('no node'))
300 302
301 303 def node(self, rev):
302 304 return self.index[rev][7]
303 305 def linkrev(self, rev):
304 306 return self.index[rev][4]
305 307 def parents(self, node):
306 308 i = self.index
307 309 d = i[self.rev(node)]
308 310 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
309 311 def parentrevs(self, rev):
310 312 return self.index[rev][5:7]
311 313 def start(self, rev):
312 314 return int(self.index[rev][0] >> 16)
313 315 def end(self, rev):
314 316 return self.start(rev) + self.length(rev)
315 317 def length(self, rev):
316 318 return self.index[rev][1]
317 319 def chainbase(self, rev):
318 320 index = self.index
319 321 base = index[rev][3]
320 322 while base != rev:
321 323 rev = base
322 324 base = index[rev][3]
323 325 return base
324 326 def flags(self, rev):
325 327 return self.index[rev][0] & 0xFFFF
326 328 def rawsize(self, rev):
327 329 """return the length of the uncompressed text for a given revision"""
328 330 l = self.index[rev][2]
329 331 if l >= 0:
330 332 return l
331 333
332 334 t = self.revision(self.node(rev))
333 335 return len(t)
334 336 size = rawsize
335 337
336 338 def reachable(self, node, stop=None):
337 339 """return the set of all nodes ancestral to a given node, including
338 340 the node itself, stopping when stop is matched"""
339 341 reachable = set((node,))
340 342 visit = [node]
341 343 if stop:
342 344 stopn = self.rev(stop)
343 345 else:
344 346 stopn = 0
345 347 while visit:
346 348 n = visit.pop(0)
347 349 if n == stop:
348 350 continue
349 351 if n == nullid:
350 352 continue
351 353 for p in self.parents(n):
352 354 if self.rev(p) < stopn:
353 355 continue
354 356 if p not in reachable:
355 357 reachable.add(p)
356 358 visit.append(p)
357 359 return reachable
358 360
359 361 def ancestors(self, *revs):
360 362 """Generate the ancestors of 'revs' in reverse topological order.
361 363
362 364 Yield a sequence of revision numbers starting with the parents
363 365 of each revision in revs, i.e., each revision is *not* considered
364 366 an ancestor of itself. Results are in breadth-first order:
365 367 parents of each rev in revs, then parents of those, etc. Result
366 368 does not include the null revision."""
367 369 visit = list(revs)
368 370 seen = set([nullrev])
369 371 while visit:
370 372 for parent in self.parentrevs(visit.pop(0)):
371 373 if parent not in seen:
372 374 visit.append(parent)
373 375 seen.add(parent)
374 376 yield parent
375 377
376 378 def descendants(self, *revs):
377 379 """Generate the descendants of 'revs' in revision order.
378 380
379 381 Yield a sequence of revision numbers starting with a child of
380 382 some rev in revs, i.e., each revision is *not* considered a
381 383 descendant of itself. Results are ordered by revision number (a
382 384 topological sort)."""
383 385 first = min(revs)
384 386 if first == nullrev:
385 387 for i in self:
386 388 yield i
387 389 return
388 390
389 391 seen = set(revs)
390 392 for i in xrange(first + 1, len(self)):
391 393 for x in self.parentrevs(i):
392 394 if x != nullrev and x in seen:
393 395 seen.add(i)
394 396 yield i
395 397 break
396 398
397 399 def findcommonmissing(self, common=None, heads=None):
398 400 """Return a tuple of the ancestors of common and the ancestors of heads
399 401 that are not ancestors of common.
400 402
401 403 More specifically, the second element is a list of nodes N such that
402 404 every N satisfies the following constraints:
403 405
404 406 1. N is an ancestor of some node in 'heads'
405 407 2. N is not an ancestor of any node in 'common'
406 408
407 409 The list is sorted by revision number, meaning it is
408 410 topologically sorted.
409 411
410 412 'heads' and 'common' are both lists of node IDs. If heads is
411 413 not supplied, uses all of the revlog's heads. If common is not
412 414 supplied, uses nullid."""
413 415 if common is None:
414 416 common = [nullid]
415 417 if heads is None:
416 418 heads = self.heads()
417 419
418 420 common = [self.rev(n) for n in common]
419 421 heads = [self.rev(n) for n in heads]
420 422
421 423 # we want the ancestors, but inclusive
422 424 has = set(self.ancestors(*common))
423 425 has.add(nullrev)
424 426 has.update(common)
425 427
426 428 # take all ancestors from heads that aren't in has
427 429 missing = set()
428 430 visit = [r for r in heads if r not in has]
429 431 while visit:
430 432 r = visit.pop(0)
431 433 if r in missing:
432 434 continue
433 435 else:
434 436 missing.add(r)
435 437 for p in self.parentrevs(r):
436 438 if p not in has:
437 439 visit.append(p)
438 440 missing = list(missing)
439 441 missing.sort()
440 442 return has, [self.node(r) for r in missing]
441 443
442 444 def findmissing(self, common=None, heads=None):
443 445 """Return the ancestors of heads that are not ancestors of common.
444 446
445 447 More specifically, return a list of nodes N such that every N
446 448 satisfies the following constraints:
447 449
448 450 1. N is an ancestor of some node in 'heads'
449 451 2. N is not an ancestor of any node in 'common'
450 452
451 453 The list is sorted by revision number, meaning it is
452 454 topologically sorted.
453 455
454 456 'heads' and 'common' are both lists of node IDs. If heads is
455 457 not supplied, uses all of the revlog's heads. If common is not
456 458 supplied, uses nullid."""
457 459 _common, missing = self.findcommonmissing(common, heads)
458 460 return missing
459 461
460 462 def nodesbetween(self, roots=None, heads=None):
461 463 """Return a topological path from 'roots' to 'heads'.
462 464
463 465 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
464 466 topologically sorted list of all nodes N that satisfy both of
465 467 these constraints:
466 468
467 469 1. N is a descendant of some node in 'roots'
468 470 2. N is an ancestor of some node in 'heads'
469 471
470 472 Every node is considered to be both a descendant and an ancestor
471 473 of itself, so every reachable node in 'roots' and 'heads' will be
472 474 included in 'nodes'.
473 475
474 476 'outroots' is the list of reachable nodes in 'roots', i.e., the
475 477 subset of 'roots' that is returned in 'nodes'. Likewise,
476 478 'outheads' is the subset of 'heads' that is also in 'nodes'.
477 479
478 480 'roots' and 'heads' are both lists of node IDs. If 'roots' is
479 481 unspecified, uses nullid as the only root. If 'heads' is
480 482 unspecified, uses list of all of the revlog's heads."""
481 483 nonodes = ([], [], [])
482 484 if roots is not None:
483 485 roots = list(roots)
484 486 if not roots:
485 487 return nonodes
486 488 lowestrev = min([self.rev(n) for n in roots])
487 489 else:
488 490 roots = [nullid] # Everybody's a descendent of nullid
489 491 lowestrev = nullrev
490 492 if (lowestrev == nullrev) and (heads is None):
491 493 # We want _all_ the nodes!
492 494 return ([self.node(r) for r in self], [nullid], list(self.heads()))
493 495 if heads is None:
494 496 # All nodes are ancestors, so the latest ancestor is the last
495 497 # node.
496 498 highestrev = len(self) - 1
497 499 # Set ancestors to None to signal that every node is an ancestor.
498 500 ancestors = None
499 501 # Set heads to an empty dictionary for later discovery of heads
500 502 heads = {}
501 503 else:
502 504 heads = list(heads)
503 505 if not heads:
504 506 return nonodes
505 507 ancestors = set()
506 508 # Turn heads into a dictionary so we can remove 'fake' heads.
507 509 # Also, later we will be using it to filter out the heads we can't
508 510 # find from roots.
509 511 heads = dict.fromkeys(heads, False)
510 512 # Start at the top and keep marking parents until we're done.
511 513 nodestotag = set(heads)
512 514 # Remember where the top was so we can use it as a limit later.
513 515 highestrev = max([self.rev(n) for n in nodestotag])
514 516 while nodestotag:
515 517 # grab a node to tag
516 518 n = nodestotag.pop()
517 519 # Never tag nullid
518 520 if n == nullid:
519 521 continue
520 522 # A node's revision number represents its place in a
521 523 # topologically sorted list of nodes.
522 524 r = self.rev(n)
523 525 if r >= lowestrev:
524 526 if n not in ancestors:
525 527 # If we are possibly a descendent of one of the roots
526 528 # and we haven't already been marked as an ancestor
527 529 ancestors.add(n) # Mark as ancestor
528 530 # Add non-nullid parents to list of nodes to tag.
529 531 nodestotag.update([p for p in self.parents(n) if
530 532 p != nullid])
531 533 elif n in heads: # We've seen it before, is it a fake head?
532 534 # So it is, real heads should not be the ancestors of
533 535 # any other heads.
534 536 heads.pop(n)
535 537 if not ancestors:
536 538 return nonodes
537 539 # Now that we have our set of ancestors, we want to remove any
538 540 # roots that are not ancestors.
539 541
540 542 # If one of the roots was nullid, everything is included anyway.
541 543 if lowestrev > nullrev:
542 544 # But, since we weren't, let's recompute the lowest rev to not
543 545 # include roots that aren't ancestors.
544 546
545 547 # Filter out roots that aren't ancestors of heads
546 548 roots = [n for n in roots if n in ancestors]
547 549 # Recompute the lowest revision
548 550 if roots:
549 551 lowestrev = min([self.rev(n) for n in roots])
550 552 else:
551 553 # No more roots? Return empty list
552 554 return nonodes
553 555 else:
554 556 # We are descending from nullid, and don't need to care about
555 557 # any other roots.
556 558 lowestrev = nullrev
557 559 roots = [nullid]
558 560 # Transform our roots list into a set.
559 561 descendents = set(roots)
560 562 # Also, keep the original roots so we can filter out roots that aren't
561 563 # 'real' roots (i.e. are descended from other roots).
562 564 roots = descendents.copy()
563 565 # Our topologically sorted list of output nodes.
564 566 orderedout = []
565 567 # Don't start at nullid since we don't want nullid in our output list,
566 568 # and if nullid shows up in descedents, empty parents will look like
567 569 # they're descendents.
568 570 for r in xrange(max(lowestrev, 0), highestrev + 1):
569 571 n = self.node(r)
570 572 isdescendent = False
571 573 if lowestrev == nullrev: # Everybody is a descendent of nullid
572 574 isdescendent = True
573 575 elif n in descendents:
574 576 # n is already a descendent
575 577 isdescendent = True
576 578 # This check only needs to be done here because all the roots
577 579 # will start being marked is descendents before the loop.
578 580 if n in roots:
579 581 # If n was a root, check if it's a 'real' root.
580 582 p = tuple(self.parents(n))
581 583 # If any of its parents are descendents, it's not a root.
582 584 if (p[0] in descendents) or (p[1] in descendents):
583 585 roots.remove(n)
584 586 else:
585 587 p = tuple(self.parents(n))
586 588 # A node is a descendent if either of its parents are
587 589 # descendents. (We seeded the dependents list with the roots
588 590 # up there, remember?)
589 591 if (p[0] in descendents) or (p[1] in descendents):
590 592 descendents.add(n)
591 593 isdescendent = True
592 594 if isdescendent and ((ancestors is None) or (n in ancestors)):
593 595 # Only include nodes that are both descendents and ancestors.
594 596 orderedout.append(n)
595 597 if (ancestors is not None) and (n in heads):
596 598 # We're trying to figure out which heads are reachable
597 599 # from roots.
598 600 # Mark this head as having been reached
599 601 heads[n] = True
600 602 elif ancestors is None:
601 603 # Otherwise, we're trying to discover the heads.
602 604 # Assume this is a head because if it isn't, the next step
603 605 # will eventually remove it.
604 606 heads[n] = True
605 607 # But, obviously its parents aren't.
606 608 for p in self.parents(n):
607 609 heads.pop(p, None)
608 610 heads = [n for n, flag in heads.iteritems() if flag]
609 611 roots = list(roots)
610 612 assert orderedout
611 613 assert roots
612 614 assert heads
613 615 return (orderedout, roots, heads)
614 616
615 617 def headrevs(self):
616 618 count = len(self)
617 619 if not count:
618 620 return [nullrev]
619 621 ishead = [1] * (count + 1)
620 622 index = self.index
621 623 for r in xrange(count):
622 624 e = index[r]
623 625 ishead[e[5]] = ishead[e[6]] = 0
624 626 return [r for r in xrange(count) if ishead[r]]
625 627
626 628 def heads(self, start=None, stop=None):
627 629 """return the list of all nodes that have no children
628 630
629 631 if start is specified, only heads that are descendants of
630 632 start will be returned
631 633 if stop is specified, it will consider all the revs from stop
632 634 as if they had no children
633 635 """
634 636 if start is None and stop is None:
635 637 if not len(self):
636 638 return [nullid]
637 639 return [self.node(r) for r in self.headrevs()]
638 640
639 641 if start is None:
640 642 start = nullid
641 643 if stop is None:
642 644 stop = []
643 645 stoprevs = set([self.rev(n) for n in stop])
644 646 startrev = self.rev(start)
645 647 reachable = set((startrev,))
646 648 heads = set((startrev,))
647 649
648 650 parentrevs = self.parentrevs
649 651 for r in xrange(startrev + 1, len(self)):
650 652 for p in parentrevs(r):
651 653 if p in reachable:
652 654 if r not in stoprevs:
653 655 reachable.add(r)
654 656 heads.add(r)
655 657 if p in heads and p not in stoprevs:
656 658 heads.remove(p)
657 659
658 660 return [self.node(r) for r in heads]
659 661
660 662 def children(self, node):
661 663 """find the children of a given node"""
662 664 c = []
663 665 p = self.rev(node)
664 666 for r in range(p + 1, len(self)):
665 667 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
666 668 if prevs:
667 669 for pr in prevs:
668 670 if pr == p:
669 671 c.append(self.node(r))
670 672 elif p == nullrev:
671 673 c.append(self.node(r))
672 674 return c
673 675
674 676 def descendant(self, start, end):
675 677 if start == nullrev:
676 678 return True
677 679 for i in self.descendants(start):
678 680 if i == end:
679 681 return True
680 682 elif i > end:
681 683 break
682 684 return False
683 685
684 686 def ancestor(self, a, b):
685 687 """calculate the least common ancestor of nodes a and b"""
686 688
687 689 # fast path, check if it is a descendant
688 690 a, b = self.rev(a), self.rev(b)
689 691 start, end = sorted((a, b))
690 692 if self.descendant(start, end):
691 693 return self.node(start)
692 694
693 695 def parents(rev):
694 696 return [p for p in self.parentrevs(rev) if p != nullrev]
695 697
696 698 c = ancestor.ancestor(a, b, parents)
697 699 if c is None:
698 700 return nullid
699 701
700 702 return self.node(c)
701 703
702 704 def _match(self, id):
703 705 if isinstance(id, (long, int)):
704 706 # rev
705 707 return self.node(id)
706 708 if len(id) == 20:
707 709 # possibly a binary node
708 710 # odds of a binary node being all hex in ASCII are 1 in 10**25
709 711 try:
710 712 node = id
711 713 self.rev(node) # quick search the index
712 714 return node
713 715 except LookupError:
714 716 pass # may be partial hex id
715 717 try:
716 718 # str(rev)
717 719 rev = int(id)
718 720 if str(rev) != id:
719 721 raise ValueError
720 722 if rev < 0:
721 723 rev = len(self) + rev
722 724 if rev < 0 or rev >= len(self):
723 725 raise ValueError
724 726 return self.node(rev)
725 727 except (ValueError, OverflowError):
726 728 pass
727 729 if len(id) == 40:
728 730 try:
729 731 # a full hex nodeid?
730 732 node = bin(id)
731 733 self.rev(node)
732 734 return node
733 735 except (TypeError, LookupError):
734 736 pass
735 737
736 738 def _partialmatch(self, id):
737 739 if id in self._pcache:
738 740 return self._pcache[id]
739 741
740 742 if len(id) < 40:
741 743 try:
742 744 # hex(node)[:...]
743 745 l = len(id) // 2 # grab an even number of digits
744 746 prefix = bin(id[:l * 2])
745 747 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
746 748 nl = [n for n in nl if hex(n).startswith(id)]
747 749 if len(nl) > 0:
748 750 if len(nl) == 1:
749 751 self._pcache[id] = nl[0]
750 752 return nl[0]
751 753 raise LookupError(id, self.indexfile,
752 754 _('ambiguous identifier'))
753 755 return None
754 756 except TypeError:
755 757 pass
756 758
757 759 def lookup(self, id):
758 760 """locate a node based on:
759 761 - revision number or str(revision number)
760 762 - nodeid or subset of hex nodeid
761 763 """
762 764 n = self._match(id)
763 765 if n is not None:
764 766 return n
765 767 n = self._partialmatch(id)
766 768 if n:
767 769 return n
768 770
769 771 raise LookupError(id, self.indexfile, _('no match found'))
770 772
771 773 def cmp(self, node, text):
772 774 """compare text with a given file revision
773 775
774 776 returns True if text is different than what is stored.
775 777 """
776 778 p1, p2 = self.parents(node)
777 779 return hash(text, p1, p2) != node
778 780
779 781 def _addchunk(self, offset, data):
780 782 o, d = self._chunkcache
781 783 # try to add to existing cache
782 784 if o + len(d) == offset and len(d) + len(data) < _chunksize:
783 785 self._chunkcache = o, d + data
784 786 else:
785 787 self._chunkcache = offset, data
786 788
787 789 def _loadchunk(self, offset, length):
788 790 if self._inline:
789 791 df = self.opener(self.indexfile)
790 792 else:
791 793 df = self.opener(self.datafile)
792 794
793 795 readahead = max(65536, length)
794 796 df.seek(offset)
795 797 d = df.read(readahead)
796 798 self._addchunk(offset, d)
797 799 if readahead > length:
798 800 return d[:length]
799 801 return d
800 802
801 803 def _getchunk(self, offset, length):
802 804 o, d = self._chunkcache
803 805 l = len(d)
804 806
805 807 # is it in the cache?
806 808 cachestart = offset - o
807 809 cacheend = cachestart + length
808 810 if cachestart >= 0 and cacheend <= l:
809 811 if cachestart == 0 and cacheend == l:
810 812 return d # avoid a copy
811 813 return d[cachestart:cacheend]
812 814
813 815 return self._loadchunk(offset, length)
814 816
815 817 def _chunkraw(self, startrev, endrev):
816 818 start = self.start(startrev)
817 819 length = self.end(endrev) - start
818 820 if self._inline:
819 821 start += (startrev + 1) * self._io.size
820 822 return self._getchunk(start, length)
821 823
822 824 def _chunk(self, rev):
823 825 return decompress(self._chunkraw(rev, rev))
824 826
825 827 def _chunkbase(self, rev):
826 828 return self._chunk(rev)
827 829
828 830 def _chunkclear(self):
829 831 self._chunkcache = (0, '')
830 832
831 833 def deltaparent(self, rev):
832 834 """return deltaparent of the given revision"""
833 if self.index[rev][3] == rev:
835 base = self.index[rev][3]
836 if base == rev:
834 837 return nullrev
838 elif self._generaldelta:
839 return base
835 840 else:
836 841 return rev - 1
837 842
838 843 def revdiff(self, rev1, rev2):
839 844 """return or calculate a delta between two revisions"""
840 845 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
841 846 return self._chunk(rev2)
842 847
843 848 return mdiff.textdiff(self.revision(self.node(rev1)),
844 849 self.revision(self.node(rev2)))
845 850
846 851 def revision(self, node):
847 852 """return an uncompressed revision of a given node"""
848 853 cachedrev = None
849 854 if node == nullid:
850 855 return ""
851 856 if self._cache:
852 857 if self._cache[0] == node:
853 858 return self._cache[2]
854 859 cachedrev = self._cache[1]
855 860
856 861 # look up what we need to read
857 862 text = None
858 863 rev = self.rev(node)
859 864
860 865 # check rev flags
861 866 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
862 867 raise RevlogError(_('incompatible revision flag %x') %
863 868 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
864 869
865 870 # build delta chain
866 871 chain = []
867 872 index = self.index # for performance
873 generaldelta = self._generaldelta
868 874 iterrev = rev
869 875 e = index[iterrev]
870 876 while iterrev != e[3] and iterrev != cachedrev:
871 877 chain.append(iterrev)
872 iterrev -= 1
878 if generaldelta:
879 iterrev = e[3]
880 else:
881 iterrev -= 1
873 882 e = index[iterrev]
874 883 chain.reverse()
875 884 base = iterrev
876 885
877 886 if iterrev == cachedrev:
878 887 # cache hit
879 888 text = self._cache[2]
880 889
881 890 # drop cache to save memory
882 891 self._cache = None
883 892
884 893 self._chunkraw(base, rev)
885 894 if text is None:
886 895 text = self._chunkbase(base)
887 896
888 897 bins = [self._chunk(r) for r in chain]
889 898 text = mdiff.patches(text, bins)
890 899
891 900 text = self._checkhash(text, node, rev)
892 901
893 902 self._cache = (node, rev, text)
894 903 return text
895 904
896 905 def _checkhash(self, text, node, rev):
897 906 p1, p2 = self.parents(node)
898 907 if node != hash(text, p1, p2):
899 908 raise RevlogError(_("integrity check failed on %s:%d")
900 909 % (self.indexfile, rev))
901 910 return text
902 911
903 912 def checkinlinesize(self, tr, fp=None):
904 913 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
905 914 return
906 915
907 916 trinfo = tr.find(self.indexfile)
908 917 if trinfo is None:
909 918 raise RevlogError(_("%s not found in the transaction")
910 919 % self.indexfile)
911 920
912 921 trindex = trinfo[2]
913 922 dataoff = self.start(trindex)
914 923
915 924 tr.add(self.datafile, dataoff)
916 925
917 926 if fp:
918 927 fp.flush()
919 928 fp.close()
920 929
921 930 df = self.opener(self.datafile, 'w')
922 931 try:
923 932 for r in self:
924 933 df.write(self._chunkraw(r, r))
925 934 finally:
926 935 df.close()
927 936
928 937 fp = self.opener(self.indexfile, 'w', atomictemp=True)
929 938 self.version &= ~(REVLOGNGINLINEDATA)
930 939 self._inline = False
931 940 for i in self:
932 941 e = self._io.packentry(self.index[i], self.node, self.version, i)
933 942 fp.write(e)
934 943
935 944 # if we don't call rename, the temp file will never replace the
936 945 # real index
937 946 fp.rename()
938 947
939 948 tr.replace(self.indexfile, trindex * self._io.size)
940 949 self._chunkclear()
941 950
942 951 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
943 952 """add a revision to the log
944 953
945 954 text - the revision data to add
946 955 transaction - the transaction object used for rollback
947 956 link - the linkrev data to add
948 957 p1, p2 - the parent nodeids of the revision
949 958 cachedelta - an optional precomputed delta
950 959 """
951 960 node = hash(text, p1, p2)
952 961 if node in self.nodemap:
953 962 return node
954 963
955 964 dfh = None
956 965 if not self._inline:
957 966 dfh = self.opener(self.datafile, "a")
958 967 ifh = self.opener(self.indexfile, "a+")
959 968 try:
960 969 return self._addrevision(node, text, transaction, link, p1, p2,
961 970 cachedelta, ifh, dfh)
962 971 finally:
963 972 if dfh:
964 973 dfh.close()
965 974 ifh.close()
966 975
967 976 def _addrevision(self, node, text, transaction, link, p1, p2,
968 977 cachedelta, ifh, dfh):
969 978
970 979 btext = [text]
971 980 def buildtext():
972 981 if btext[0] is not None:
973 982 return btext[0]
974 983 # flush any pending writes here so we can read it in revision
975 984 if dfh:
976 985 dfh.flush()
977 986 ifh.flush()
978 987 basetext = self.revision(self.node(cachedelta[0]))
979 988 btext[0] = mdiff.patch(basetext, cachedelta[1])
980 989 chk = hash(btext[0], p1, p2)
981 990 if chk != node:
982 991 raise RevlogError(_("consistency error in delta"))
983 992 return btext[0]
984 993
985 994 def builddelta(rev):
986 995 # can we use the cached delta?
987 996 if cachedelta and cachedelta[0] == rev:
988 997 delta = cachedelta[1]
989 998 else:
990 999 t = buildtext()
991 1000 ptext = self.revision(self.node(rev))
992 1001 delta = mdiff.textdiff(ptext, t)
993 1002 data = compress(delta)
994 1003 l = len(data[1]) + len(data[0])
995 1004 basecache = self._basecache
996 1005 if basecache and basecache[0] == rev:
997 1006 base = basecache[1]
998 1007 else:
999 1008 base = self.chainbase(rev)
1000 1009 dist = l + offset - self.start(base)
1001 1010 return dist, l, data, base
1002 1011
1003 1012 curr = len(self)
1004 1013 prev = curr - 1
1005 1014 base = curr
1006 1015 offset = self.end(prev)
1007 1016 flags = 0
1008 1017 d = None
1009 1018 p1r, p2r = self.rev(p1), self.rev(p2)
1010 1019
1011 1020 # should we try to build a delta?
1012 1021 if prev != nullrev:
1013 1022 d = builddelta(prev)
1014 1023 dist, l, data, base = d
1015 1024
1016 1025 # full versions are inserted when the needed deltas
1017 1026 # become comparable to the uncompressed text
1018 1027 if text is None:
1019 1028 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1020 1029 cachedelta[1])
1021 1030 else:
1022 1031 textlen = len(text)
1023 1032 if d is None or dist > textlen * 2:
1024 1033 text = buildtext()
1025 1034 data = compress(text)
1026 1035 l = len(data[1]) + len(data[0])
1027 1036 base = curr
1028 1037
1029 1038 e = (offset_type(offset, flags), l, textlen,
1030 1039 base, link, p1r, p2r, node)
1031 1040 self.index.insert(-1, e)
1032 1041 self.nodemap[node] = curr
1033 1042
1034 1043 entry = self._io.packentry(e, self.node, self.version, curr)
1035 1044 if not self._inline:
1036 1045 transaction.add(self.datafile, offset)
1037 1046 transaction.add(self.indexfile, curr * len(entry))
1038 1047 if data[0]:
1039 1048 dfh.write(data[0])
1040 1049 dfh.write(data[1])
1041 1050 dfh.flush()
1042 1051 ifh.write(entry)
1043 1052 else:
1044 1053 offset += curr * self._io.size
1045 1054 transaction.add(self.indexfile, offset, curr)
1046 1055 ifh.write(entry)
1047 1056 ifh.write(data[0])
1048 1057 ifh.write(data[1])
1049 1058 self.checkinlinesize(transaction, ifh)
1050 1059
1051 1060 if type(text) == str: # only accept immutable objects
1052 1061 self._cache = (node, curr, text)
1053 1062 self._basecache = (curr, base)
1054 1063 return node
1055 1064
1056 1065 def group(self, nodelist, bundler):
1057 1066 """Calculate a delta group, yielding a sequence of changegroup chunks
1058 1067 (strings).
1059 1068
1060 1069 Given a list of changeset revs, return a set of deltas and
1061 1070 metadata corresponding to nodes. The first delta is
1062 1071 first parent(nodelist[0]) -> nodelist[0], the receiver is
1063 1072 guaranteed to have this parent as it has all history before
1064 1073 these changesets. In the case firstparent is nullrev the
1065 1074 changegroup starts with a full revision.
1066 1075 """
1067 1076
1068 1077 revs = sorted([self.rev(n) for n in nodelist])
1069 1078
1070 1079 # if we don't have any revisions touched by these changesets, bail
1071 1080 if not revs:
1072 1081 yield bundler.close()
1073 1082 return
1074 1083
1075 1084 # add the parent of the first rev
1076 1085 p = self.parentrevs(revs[0])[0]
1077 1086 revs.insert(0, p)
1078 1087
1079 1088 # build deltas
1080 1089 for r in xrange(len(revs) - 1):
1081 1090 prev, curr = revs[r], revs[r + 1]
1082 1091 for c in bundler.revchunk(self, curr, prev):
1083 1092 yield c
1084 1093
1085 1094 yield bundler.close()
1086 1095
1087 1096 def addgroup(self, bundle, linkmapper, transaction):
1088 1097 """
1089 1098 add a delta group
1090 1099
1091 1100 given a set of deltas, add them to the revision log. the
1092 1101 first delta is against its parent, which should be in our
1093 1102 log, the rest are against the previous delta.
1094 1103 """
1095 1104
1096 1105 # track the base of the current delta log
1097 1106 node = None
1098 1107
1099 1108 r = len(self)
1100 1109 end = 0
1101 1110 if r:
1102 1111 end = self.end(r - 1)
1103 1112 ifh = self.opener(self.indexfile, "a+")
1104 1113 isize = r * self._io.size
1105 1114 if self._inline:
1106 1115 transaction.add(self.indexfile, end + isize, r)
1107 1116 dfh = None
1108 1117 else:
1109 1118 transaction.add(self.indexfile, isize, r)
1110 1119 transaction.add(self.datafile, end)
1111 1120 dfh = self.opener(self.datafile, "a")
1112 1121
1113 1122 try:
1114 1123 # loop through our set of deltas
1115 1124 chain = None
1116 1125 while 1:
1117 1126 chunkdata = bundle.deltachunk(chain)
1118 1127 if not chunkdata:
1119 1128 break
1120 1129 node = chunkdata['node']
1121 1130 p1 = chunkdata['p1']
1122 1131 p2 = chunkdata['p2']
1123 1132 cs = chunkdata['cs']
1124 1133 deltabase = chunkdata['deltabase']
1125 1134 delta = chunkdata['delta']
1126 1135
1127 1136 link = linkmapper(cs)
1128 1137 if node in self.nodemap:
1129 1138 # this can happen if two branches make the same change
1130 1139 chain = node
1131 1140 continue
1132 1141
1133 1142 for p in (p1, p2):
1134 1143 if not p in self.nodemap:
1135 1144 raise LookupError(p, self.indexfile,
1136 1145 _('unknown parent'))
1137 1146
1138 1147 if deltabase not in self.nodemap:
1139 1148 raise LookupError(deltabase, self.indexfile,
1140 1149 _('unknown delta base'))
1141 1150
1142 1151 baserev = self.rev(deltabase)
1143 1152 chain = self._addrevision(node, None, transaction, link,
1144 1153 p1, p2, (baserev, delta), ifh, dfh)
1145 1154 if not dfh and not self._inline:
1146 1155 # addrevision switched from inline to conventional
1147 1156 # reopen the index
1148 1157 ifh.close()
1149 1158 dfh = self.opener(self.datafile, "a")
1150 1159 ifh = self.opener(self.indexfile, "a")
1151 1160 finally:
1152 1161 if dfh:
1153 1162 dfh.close()
1154 1163 ifh.close()
1155 1164
1156 1165 return node
1157 1166
1158 1167 def strip(self, minlink, transaction):
1159 1168 """truncate the revlog on the first revision with a linkrev >= minlink
1160 1169
1161 1170 This function is called when we're stripping revision minlink and
1162 1171 its descendants from the repository.
1163 1172
1164 1173 We have to remove all revisions with linkrev >= minlink, because
1165 1174 the equivalent changelog revisions will be renumbered after the
1166 1175 strip.
1167 1176
1168 1177 So we truncate the revlog on the first of these revisions, and
1169 1178 trust that the caller has saved the revisions that shouldn't be
1170 1179 removed and that it'll readd them after this truncation.
1171 1180 """
1172 1181 if len(self) == 0:
1173 1182 return
1174 1183
1175 1184 for rev in self:
1176 1185 if self.index[rev][4] >= minlink:
1177 1186 break
1178 1187 else:
1179 1188 return
1180 1189
1181 1190 # first truncate the files on disk
1182 1191 end = self.start(rev)
1183 1192 if not self._inline:
1184 1193 transaction.add(self.datafile, end)
1185 1194 end = rev * self._io.size
1186 1195 else:
1187 1196 end += rev * self._io.size
1188 1197
1189 1198 transaction.add(self.indexfile, end)
1190 1199
1191 1200 # then reset internal state in memory to forget those revisions
1192 1201 self._cache = None
1193 1202 self._chunkclear()
1194 1203 for x in xrange(rev, len(self)):
1195 1204 del self.nodemap[self.node(x)]
1196 1205
1197 1206 del self.index[rev:-1]
1198 1207
1199 1208 def checksize(self):
1200 1209 expected = 0
1201 1210 if len(self):
1202 1211 expected = max(0, self.end(len(self) - 1))
1203 1212
1204 1213 try:
1205 1214 f = self.opener(self.datafile)
1206 1215 f.seek(0, 2)
1207 1216 actual = f.tell()
1208 1217 f.close()
1209 1218 dd = actual - expected
1210 1219 except IOError, inst:
1211 1220 if inst.errno != errno.ENOENT:
1212 1221 raise
1213 1222 dd = 0
1214 1223
1215 1224 try:
1216 1225 f = self.opener(self.indexfile)
1217 1226 f.seek(0, 2)
1218 1227 actual = f.tell()
1219 1228 f.close()
1220 1229 s = self._io.size
1221 1230 i = max(0, actual // s)
1222 1231 di = actual - (i * s)
1223 1232 if self._inline:
1224 1233 databytes = 0
1225 1234 for r in self:
1226 1235 databytes += max(0, self.length(r))
1227 1236 dd = 0
1228 1237 di = actual - len(self) * s - databytes
1229 1238 except IOError, inst:
1230 1239 if inst.errno != errno.ENOENT:
1231 1240 raise
1232 1241 di = 0
1233 1242
1234 1243 return (dd, di)
1235 1244
1236 1245 def files(self):
1237 1246 res = [self.indexfile]
1238 1247 if not self._inline:
1239 1248 res.append(self.datafile)
1240 1249 return res
General Comments 0
You need to be logged in to leave comments. Login now