##// END OF EJS Templates
revlog: remove support for punched/shallow...
Sune Foldager -
r14196:e7483ec3 default
parent child Browse files
Show More
@@ -1,1252 +1,1230 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 REVIDX_PUNCHED_FLAG = 2
38 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG
37 REVIDX_KNOWN_FLAGS = 0
39 38
40 39 # max size of revlog with inline data
41 40 _maxinline = 131072
42 41 _chunksize = 1048576
43 42
44 43 RevlogError = error.RevlogError
45 44 LookupError = error.LookupError
46 45
47 46 def getoffset(q):
48 47 return int(q >> 16)
49 48
50 49 def gettype(q):
51 50 return int(q & 0xFFFF)
52 51
53 52 def offset_type(offset, type):
54 53 return long(long(offset) << 16 | type)
55 54
56 55 nullhash = _sha(nullid)
57 56
58 57 def hash(text, p1, p2):
59 58 """generate a hash from the given text and its parent hashes
60 59
61 60 This hash combines both the current file contents and its history
62 61 in a manner that makes it easy to distinguish nodes with the same
63 62 content in the revision graph.
64 63 """
65 64 # As of now, if one of the parent node is null, p2 is null
66 65 if p2 == nullid:
67 66 # deep copy of a hash is faster than creating one
68 67 s = nullhash.copy()
69 68 s.update(p1)
70 69 else:
71 70 # none of the parent nodes are nullid
72 71 l = [p1, p2]
73 72 l.sort()
74 73 s = _sha(l[0])
75 74 s.update(l[1])
76 75 s.update(text)
77 76 return s.digest()
78 77
79 78 def compress(text):
80 79 """ generate a possibly-compressed representation of text """
81 80 if not text:
82 81 return ("", text)
83 82 l = len(text)
84 83 bin = None
85 84 if l < 44:
86 85 pass
87 86 elif l > 1000000:
88 87 # zlib makes an internal copy, thus doubling memory usage for
89 88 # large files, so lets do this in pieces
90 89 z = zlib.compressobj()
91 90 p = []
92 91 pos = 0
93 92 while pos < l:
94 93 pos2 = pos + 2**20
95 94 p.append(z.compress(text[pos:pos2]))
96 95 pos = pos2
97 96 p.append(z.flush())
98 97 if sum(map(len, p)) < l:
99 98 bin = "".join(p)
100 99 else:
101 100 bin = _compress(text)
102 101 if bin is None or len(bin) > l:
103 102 if text[0] == '\0':
104 103 return ("", text)
105 104 return ('u', text)
106 105 return ("", bin)
107 106
108 107 def decompress(bin):
109 108 """ decompress the given input """
110 109 if not bin:
111 110 return bin
112 111 t = bin[0]
113 112 if t == '\0':
114 113 return bin
115 114 if t == 'x':
116 115 return _decompress(bin)
117 116 if t == 'u':
118 117 return bin[1:]
119 118 raise RevlogError(_("unknown compression type %r") % t)
120 119
121 120 indexformatv0 = ">4l20s20s20s"
122 121 v0shaoffset = 56
123 122
124 123 class revlogoldio(object):
125 124 def __init__(self):
126 125 self.size = struct.calcsize(indexformatv0)
127 126
128 127 def parseindex(self, data, inline):
129 128 s = self.size
130 129 index = []
131 130 nodemap = {nullid: nullrev}
132 131 n = off = 0
133 132 l = len(data)
134 133 while off + s <= l:
135 134 cur = data[off:off + s]
136 135 off += s
137 136 e = _unpack(indexformatv0, cur)
138 137 # transform to revlogv1 format
139 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
140 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
141 140 index.append(e2)
142 141 nodemap[e[6]] = n
143 142 n += 1
144 143
145 144 # add the magic null revision at -1
146 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
147 146
148 147 return index, nodemap, None
149 148
150 149 def packentry(self, entry, node, version, rev):
151 150 if gettype(entry[0]):
152 151 raise RevlogError(_("index entry flags need RevlogNG"))
153 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
154 153 node(entry[5]), node(entry[6]), entry[7])
155 154 return _pack(indexformatv0, *e2)
156 155
157 156 # index ng:
158 157 # 6 bytes: offset
159 158 # 2 bytes: flags
160 159 # 4 bytes: compressed length
161 160 # 4 bytes: uncompressed length
162 161 # 4 bytes: base rev
163 162 # 4 bytes: link rev
164 163 # 4 bytes: parent 1 rev
165 164 # 4 bytes: parent 2 rev
166 165 # 32 bytes: nodeid
167 166 indexformatng = ">Qiiiiii20s12x"
168 167 ngshaoffset = 32
169 168 versionformat = ">I"
170 169
171 170 class revlogio(object):
172 171 def __init__(self):
173 172 self.size = struct.calcsize(indexformatng)
174 173
175 174 def parseindex(self, data, inline):
176 175 # call the C implementation to parse the index data
177 176 index, cache = parsers.parse_index2(data, inline)
178 177 return index, None, cache
179 178
180 179 def packentry(self, entry, node, version, rev):
181 180 p = _pack(indexformatng, *entry)
182 181 if rev == 0:
183 182 p = _pack(versionformat, version) + p[4:]
184 183 return p
185 184
186 185 class revlog(object):
187 186 """
188 187 the underlying revision storage object
189 188
190 189 A revlog consists of two parts, an index and the revision data.
191 190
192 191 The index is a file with a fixed record size containing
193 192 information on each revision, including its nodeid (hash), the
194 193 nodeids of its parents, the position and offset of its data within
195 194 the data file, and the revision it's based on. Finally, each entry
196 195 contains a linkrev entry that can serve as a pointer to external
197 196 data.
198 197
199 198 The revision data itself is a linear collection of data chunks.
200 199 Each chunk represents a revision and is usually represented as a
201 200 delta against the previous chunk. To bound lookup time, runs of
202 201 deltas are limited to about 2 times the length of the original
203 202 version data. This makes retrieval of a version proportional to
204 203 its size, or O(1) relative to the number of revisions.
205 204
206 205 Both pieces of the revlog are written to in an append-only
207 206 fashion, which means we never need to rewrite a file to insert or
208 207 remove data, and can use some simple techniques to avoid the need
209 208 for locking while reading.
210 209 """
211 210 def __init__(self, opener, indexfile, shallowroot=None):
212 211 """
213 212 create a revlog object
214 213
215 214 opener is a function that abstracts the file opening operation
216 215 and can be used to implement COW semantics or the like.
217 216 """
218 217 self.indexfile = indexfile
219 218 self.datafile = indexfile[:-2] + ".d"
220 219 self.opener = opener
221 220 self._cache = None
222 221 self._chunkcache = (0, '')
223 222 self.index = []
224 223 self._shallowroot = shallowroot
225 224 self._pcache = {}
226 225 self._nodecache = {nullid: nullrev}
227 226 self._nodepos = None
228 227
229 228 v = REVLOG_DEFAULT_VERSION
230 229 if hasattr(opener, 'options') and 'defversion' in opener.options:
231 230 v = opener.options['defversion']
232 231 if v & REVLOGNG:
233 232 v |= REVLOGNGINLINEDATA
234 233
235 234 if shallowroot:
236 235 v |= REVLOGSHALLOW
237 236
238 237 i = ''
239 238 try:
240 239 f = self.opener(self.indexfile)
241 240 i = f.read()
242 241 f.close()
243 242 if len(i) > 0:
244 243 v = struct.unpack(versionformat, i[:4])[0]
245 244 except IOError, inst:
246 245 if inst.errno != errno.ENOENT:
247 246 raise
248 247
249 248 self.version = v
250 249 self._inline = v & REVLOGNGINLINEDATA
251 250 self._shallow = v & REVLOGSHALLOW
252 251 flags = v & ~0xFFFF
253 252 fmt = v & 0xFFFF
254 253 if fmt == REVLOGV0 and flags:
255 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
256 255 % (self.indexfile, flags >> 16))
257 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
258 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
259 258 % (self.indexfile, flags >> 16))
260 259 elif fmt > REVLOGNG:
261 260 raise RevlogError(_("index %s unknown format %d")
262 261 % (self.indexfile, fmt))
263 262
264 263 self._io = revlogio()
265 264 if self.version == REVLOGV0:
266 265 self._io = revlogoldio()
267 266 try:
268 267 d = self._io.parseindex(i, self._inline)
269 268 except (ValueError, IndexError):
270 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
271 270 self.index, nodemap, self._chunkcache = d
272 271 if nodemap is not None:
273 272 self.nodemap = self._nodecache = nodemap
274 273 if not self._chunkcache:
275 274 self._chunkclear()
276 275
277 276 def tip(self):
278 277 return self.node(len(self.index) - 2)
279 278 def __len__(self):
280 279 return len(self.index) - 1
281 280 def __iter__(self):
282 281 for i in xrange(len(self)):
283 282 yield i
284 283
285 284 @util.propertycache
286 285 def nodemap(self):
287 286 self.rev(self.node(0))
288 287 return self._nodecache
289 288
290 289 def rev(self, node):
291 290 try:
292 291 return self._nodecache[node]
293 292 except KeyError:
294 293 n = self._nodecache
295 294 i = self.index
296 295 p = self._nodepos
297 296 if p is None:
298 297 p = len(i) - 2
299 298 for r in xrange(p, -1, -1):
300 299 v = i[r][7]
301 300 n[v] = r
302 301 if v == node:
303 302 self._nodepos = r - 1
304 303 return r
305 304 raise LookupError(node, self.indexfile, _('no node'))
306 305
307 306 def node(self, rev):
308 307 return self.index[rev][7]
309 308 def linkrev(self, rev):
310 309 return self.index[rev][4]
311 310 def parents(self, node):
312 311 i = self.index
313 312 d = i[self.rev(node)]
314 313 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
315 314 def parentrevs(self, rev):
316 315 return self.index[rev][5:7]
317 316 def start(self, rev):
318 317 return int(self.index[rev][0] >> 16)
319 318 def end(self, rev):
320 319 return self.start(rev) + self.length(rev)
321 320 def length(self, rev):
322 321 return self.index[rev][1]
323 322 def base(self, rev):
324 323 return self.index[rev][3]
325 324 def flags(self, rev):
326 325 return self.index[rev][0] & 0xFFFF
327 326 def rawsize(self, rev):
328 327 """return the length of the uncompressed text for a given revision"""
329 328 l = self.index[rev][2]
330 329 if l >= 0:
331 330 return l
332 331
333 332 t = self.revision(self.node(rev))
334 333 return len(t)
335 334 size = rawsize
336 335
337 336 def reachable(self, node, stop=None):
338 337 """return the set of all nodes ancestral to a given node, including
339 338 the node itself, stopping when stop is matched"""
340 339 reachable = set((node,))
341 340 visit = [node]
342 341 if stop:
343 342 stopn = self.rev(stop)
344 343 else:
345 344 stopn = 0
346 345 while visit:
347 346 n = visit.pop(0)
348 347 if n == stop:
349 348 continue
350 349 if n == nullid:
351 350 continue
352 351 for p in self.parents(n):
353 352 if self.rev(p) < stopn:
354 353 continue
355 354 if p not in reachable:
356 355 reachable.add(p)
357 356 visit.append(p)
358 357 return reachable
359 358
360 359 def ancestors(self, *revs):
361 360 """Generate the ancestors of 'revs' in reverse topological order.
362 361
363 362 Yield a sequence of revision numbers starting with the parents
364 363 of each revision in revs, i.e., each revision is *not* considered
365 364 an ancestor of itself. Results are in breadth-first order:
366 365 parents of each rev in revs, then parents of those, etc. Result
367 366 does not include the null revision."""
368 367 visit = list(revs)
369 368 seen = set([nullrev])
370 369 while visit:
371 370 for parent in self.parentrevs(visit.pop(0)):
372 371 if parent not in seen:
373 372 visit.append(parent)
374 373 seen.add(parent)
375 374 yield parent
376 375
377 376 def descendants(self, *revs):
378 377 """Generate the descendants of 'revs' in revision order.
379 378
380 379 Yield a sequence of revision numbers starting with a child of
381 380 some rev in revs, i.e., each revision is *not* considered a
382 381 descendant of itself. Results are ordered by revision number (a
383 382 topological sort)."""
384 383 first = min(revs)
385 384 if first == nullrev:
386 385 for i in self:
387 386 yield i
388 387 return
389 388
390 389 seen = set(revs)
391 390 for i in xrange(first + 1, len(self)):
392 391 for x in self.parentrevs(i):
393 392 if x != nullrev and x in seen:
394 393 seen.add(i)
395 394 yield i
396 395 break
397 396
398 397 def findcommonmissing(self, common=None, heads=None):
399 398 """Return a tuple of the ancestors of common and the ancestors of heads
400 399 that are not ancestors of common.
401 400
402 401 More specifically, the second element is a list of nodes N such that
403 402 every N satisfies the following constraints:
404 403
405 404 1. N is an ancestor of some node in 'heads'
406 405 2. N is not an ancestor of any node in 'common'
407 406
408 407 The list is sorted by revision number, meaning it is
409 408 topologically sorted.
410 409
411 410 'heads' and 'common' are both lists of node IDs. If heads is
412 411 not supplied, uses all of the revlog's heads. If common is not
413 412 supplied, uses nullid."""
414 413 if common is None:
415 414 common = [nullid]
416 415 if heads is None:
417 416 heads = self.heads()
418 417
419 418 common = [self.rev(n) for n in common]
420 419 heads = [self.rev(n) for n in heads]
421 420
422 421 # we want the ancestors, but inclusive
423 422 has = set(self.ancestors(*common))
424 423 has.add(nullrev)
425 424 has.update(common)
426 425
427 426 # take all ancestors from heads that aren't in has
428 427 missing = set()
429 428 visit = [r for r in heads if r not in has]
430 429 while visit:
431 430 r = visit.pop(0)
432 431 if r in missing:
433 432 continue
434 433 else:
435 434 missing.add(r)
436 435 for p in self.parentrevs(r):
437 436 if p not in has:
438 437 visit.append(p)
439 438 missing = list(missing)
440 439 missing.sort()
441 440 return has, [self.node(r) for r in missing]
442 441
443 442 def findmissing(self, common=None, heads=None):
444 443 """Return the ancestors of heads that are not ancestors of common.
445 444
446 445 More specifically, return a list of nodes N such that every N
447 446 satisfies the following constraints:
448 447
449 448 1. N is an ancestor of some node in 'heads'
450 449 2. N is not an ancestor of any node in 'common'
451 450
452 451 The list is sorted by revision number, meaning it is
453 452 topologically sorted.
454 453
455 454 'heads' and 'common' are both lists of node IDs. If heads is
456 455 not supplied, uses all of the revlog's heads. If common is not
457 456 supplied, uses nullid."""
458 457 _common, missing = self.findcommonmissing(common, heads)
459 458 return missing
460 459
461 460 def nodesbetween(self, roots=None, heads=None):
462 461 """Return a topological path from 'roots' to 'heads'.
463 462
464 463 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
465 464 topologically sorted list of all nodes N that satisfy both of
466 465 these constraints:
467 466
468 467 1. N is a descendant of some node in 'roots'
469 468 2. N is an ancestor of some node in 'heads'
470 469
471 470 Every node is considered to be both a descendant and an ancestor
472 471 of itself, so every reachable node in 'roots' and 'heads' will be
473 472 included in 'nodes'.
474 473
475 474 'outroots' is the list of reachable nodes in 'roots', i.e., the
476 475 subset of 'roots' that is returned in 'nodes'. Likewise,
477 476 'outheads' is the subset of 'heads' that is also in 'nodes'.
478 477
479 478 'roots' and 'heads' are both lists of node IDs. If 'roots' is
480 479 unspecified, uses nullid as the only root. If 'heads' is
481 480 unspecified, uses list of all of the revlog's heads."""
482 481 nonodes = ([], [], [])
483 482 if roots is not None:
484 483 roots = list(roots)
485 484 if not roots:
486 485 return nonodes
487 486 lowestrev = min([self.rev(n) for n in roots])
488 487 else:
489 488 roots = [nullid] # Everybody's a descendent of nullid
490 489 lowestrev = nullrev
491 490 if (lowestrev == nullrev) and (heads is None):
492 491 # We want _all_ the nodes!
493 492 return ([self.node(r) for r in self], [nullid], list(self.heads()))
494 493 if heads is None:
495 494 # All nodes are ancestors, so the latest ancestor is the last
496 495 # node.
497 496 highestrev = len(self) - 1
498 497 # Set ancestors to None to signal that every node is an ancestor.
499 498 ancestors = None
500 499 # Set heads to an empty dictionary for later discovery of heads
501 500 heads = {}
502 501 else:
503 502 heads = list(heads)
504 503 if not heads:
505 504 return nonodes
506 505 ancestors = set()
507 506 # Turn heads into a dictionary so we can remove 'fake' heads.
508 507 # Also, later we will be using it to filter out the heads we can't
509 508 # find from roots.
510 509 heads = dict.fromkeys(heads, 0)
511 510 # Start at the top and keep marking parents until we're done.
512 511 nodestotag = set(heads)
513 512 # Remember where the top was so we can use it as a limit later.
514 513 highestrev = max([self.rev(n) for n in nodestotag])
515 514 while nodestotag:
516 515 # grab a node to tag
517 516 n = nodestotag.pop()
518 517 # Never tag nullid
519 518 if n == nullid:
520 519 continue
521 520 # A node's revision number represents its place in a
522 521 # topologically sorted list of nodes.
523 522 r = self.rev(n)
524 523 if r >= lowestrev:
525 524 if n not in ancestors:
526 525 # If we are possibly a descendent of one of the roots
527 526 # and we haven't already been marked as an ancestor
528 527 ancestors.add(n) # Mark as ancestor
529 528 # Add non-nullid parents to list of nodes to tag.
530 529 nodestotag.update([p for p in self.parents(n) if
531 530 p != nullid])
532 531 elif n in heads: # We've seen it before, is it a fake head?
533 532 # So it is, real heads should not be the ancestors of
534 533 # any other heads.
535 534 heads.pop(n)
536 535 if not ancestors:
537 536 return nonodes
538 537 # Now that we have our set of ancestors, we want to remove any
539 538 # roots that are not ancestors.
540 539
541 540 # If one of the roots was nullid, everything is included anyway.
542 541 if lowestrev > nullrev:
543 542 # But, since we weren't, let's recompute the lowest rev to not
544 543 # include roots that aren't ancestors.
545 544
546 545 # Filter out roots that aren't ancestors of heads
547 546 roots = [n for n in roots if n in ancestors]
548 547 # Recompute the lowest revision
549 548 if roots:
550 549 lowestrev = min([self.rev(n) for n in roots])
551 550 else:
552 551 # No more roots? Return empty list
553 552 return nonodes
554 553 else:
555 554 # We are descending from nullid, and don't need to care about
556 555 # any other roots.
557 556 lowestrev = nullrev
558 557 roots = [nullid]
559 558 # Transform our roots list into a set.
560 559 descendents = set(roots)
561 560 # Also, keep the original roots so we can filter out roots that aren't
562 561 # 'real' roots (i.e. are descended from other roots).
563 562 roots = descendents.copy()
564 563 # Our topologically sorted list of output nodes.
565 564 orderedout = []
566 565 # Don't start at nullid since we don't want nullid in our output list,
567 566 # and if nullid shows up in descedents, empty parents will look like
568 567 # they're descendents.
569 568 for r in xrange(max(lowestrev, 0), highestrev + 1):
570 569 n = self.node(r)
571 570 isdescendent = False
572 571 if lowestrev == nullrev: # Everybody is a descendent of nullid
573 572 isdescendent = True
574 573 elif n in descendents:
575 574 # n is already a descendent
576 575 isdescendent = True
577 576 # This check only needs to be done here because all the roots
578 577 # will start being marked is descendents before the loop.
579 578 if n in roots:
580 579 # If n was a root, check if it's a 'real' root.
581 580 p = tuple(self.parents(n))
582 581 # If any of its parents are descendents, it's not a root.
583 582 if (p[0] in descendents) or (p[1] in descendents):
584 583 roots.remove(n)
585 584 else:
586 585 p = tuple(self.parents(n))
587 586 # A node is a descendent if either of its parents are
588 587 # descendents. (We seeded the dependents list with the roots
589 588 # up there, remember?)
590 589 if (p[0] in descendents) or (p[1] in descendents):
591 590 descendents.add(n)
592 591 isdescendent = True
593 592 if isdescendent and ((ancestors is None) or (n in ancestors)):
594 593 # Only include nodes that are both descendents and ancestors.
595 594 orderedout.append(n)
596 595 if (ancestors is not None) and (n in heads):
597 596 # We're trying to figure out which heads are reachable
598 597 # from roots.
599 598 # Mark this head as having been reached
600 599 heads[n] = 1
601 600 elif ancestors is None:
602 601 # Otherwise, we're trying to discover the heads.
603 602 # Assume this is a head because if it isn't, the next step
604 603 # will eventually remove it.
605 604 heads[n] = 1
606 605 # But, obviously its parents aren't.
607 606 for p in self.parents(n):
608 607 heads.pop(p, None)
609 608 heads = [n for n in heads.iterkeys() if heads[n] != 0]
610 609 roots = list(roots)
611 610 assert orderedout
612 611 assert roots
613 612 assert heads
614 613 return (orderedout, roots, heads)
615 614
616 615 def headrevs(self):
617 616 count = len(self)
618 617 if not count:
619 618 return [nullrev]
620 619 ishead = [1] * (count + 1)
621 620 index = self.index
622 621 for r in xrange(count):
623 622 e = index[r]
624 623 ishead[e[5]] = ishead[e[6]] = 0
625 624 return [r for r in xrange(count) if ishead[r]]
626 625
627 626 def heads(self, start=None, stop=None):
628 627 """return the list of all nodes that have no children
629 628
630 629 if start is specified, only heads that are descendants of
631 630 start will be returned
632 631 if stop is specified, it will consider all the revs from stop
633 632 as if they had no children
634 633 """
635 634 if start is None and stop is None:
636 635 if not len(self):
637 636 return [nullid]
638 637 return [self.node(r) for r in self.headrevs()]
639 638
640 639 if start is None:
641 640 start = nullid
642 641 if stop is None:
643 642 stop = []
644 643 stoprevs = set([self.rev(n) for n in stop])
645 644 startrev = self.rev(start)
646 645 reachable = set((startrev,))
647 646 heads = set((startrev,))
648 647
649 648 parentrevs = self.parentrevs
650 649 for r in xrange(startrev + 1, len(self)):
651 650 for p in parentrevs(r):
652 651 if p in reachable:
653 652 if r not in stoprevs:
654 653 reachable.add(r)
655 654 heads.add(r)
656 655 if p in heads and p not in stoprevs:
657 656 heads.remove(p)
658 657
659 658 return [self.node(r) for r in heads]
660 659
661 660 def children(self, node):
662 661 """find the children of a given node"""
663 662 c = []
664 663 p = self.rev(node)
665 664 for r in range(p + 1, len(self)):
666 665 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
667 666 if prevs:
668 667 for pr in prevs:
669 668 if pr == p:
670 669 c.append(self.node(r))
671 670 elif p == nullrev:
672 671 c.append(self.node(r))
673 672 return c
674 673
675 674 def descendant(self, start, end):
676 675 if start == nullrev:
677 676 return True
678 677 for i in self.descendants(start):
679 678 if i == end:
680 679 return True
681 680 elif i > end:
682 681 break
683 682 return False
684 683
685 684 def ancestor(self, a, b):
686 685 """calculate the least common ancestor of nodes a and b"""
687 686
688 687 # fast path, check if it is a descendant
689 688 a, b = self.rev(a), self.rev(b)
690 689 start, end = sorted((a, b))
691 690 if self.descendant(start, end):
692 691 return self.node(start)
693 692
694 693 def parents(rev):
695 694 return [p for p in self.parentrevs(rev) if p != nullrev]
696 695
697 696 c = ancestor.ancestor(a, b, parents)
698 697 if c is None:
699 698 return nullid
700 699
701 700 return self.node(c)
702 701
703 702 def _match(self, id):
704 703 if isinstance(id, (long, int)):
705 704 # rev
706 705 return self.node(id)
707 706 if len(id) == 20:
708 707 # possibly a binary node
709 708 # odds of a binary node being all hex in ASCII are 1 in 10**25
710 709 try:
711 710 node = id
712 711 self.rev(node) # quick search the index
713 712 return node
714 713 except LookupError:
715 714 pass # may be partial hex id
716 715 try:
717 716 # str(rev)
718 717 rev = int(id)
719 718 if str(rev) != id:
720 719 raise ValueError
721 720 if rev < 0:
722 721 rev = len(self) + rev
723 722 if rev < 0 or rev >= len(self):
724 723 raise ValueError
725 724 return self.node(rev)
726 725 except (ValueError, OverflowError):
727 726 pass
728 727 if len(id) == 40:
729 728 try:
730 729 # a full hex nodeid?
731 730 node = bin(id)
732 731 self.rev(node)
733 732 return node
734 733 except (TypeError, LookupError):
735 734 pass
736 735
737 736 def _partialmatch(self, id):
738 737 if id in self._pcache:
739 738 return self._pcache[id]
740 739
741 740 if len(id) < 40:
742 741 try:
743 742 # hex(node)[:...]
744 743 l = len(id) // 2 # grab an even number of digits
745 744 prefix = bin(id[:l * 2])
746 745 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
747 746 nl = [n for n in nl if hex(n).startswith(id)]
748 747 if len(nl) > 0:
749 748 if len(nl) == 1:
750 749 self._pcache[id] = nl[0]
751 750 return nl[0]
752 751 raise LookupError(id, self.indexfile,
753 752 _('ambiguous identifier'))
754 753 return None
755 754 except TypeError:
756 755 pass
757 756
758 757 def lookup(self, id):
759 758 """locate a node based on:
760 759 - revision number or str(revision number)
761 760 - nodeid or subset of hex nodeid
762 761 """
763 762 n = self._match(id)
764 763 if n is not None:
765 764 return n
766 765 n = self._partialmatch(id)
767 766 if n:
768 767 return n
769 768
770 769 raise LookupError(id, self.indexfile, _('no match found'))
771 770
772 771 def cmp(self, node, text):
773 772 """compare text with a given file revision
774 773
775 774 returns True if text is different than what is stored.
776 775 """
777 776 p1, p2 = self.parents(node)
778 777 return hash(text, p1, p2) != node
779 778
780 779 def _addchunk(self, offset, data):
781 780 o, d = self._chunkcache
782 781 # try to add to existing cache
783 782 if o + len(d) == offset and len(d) + len(data) < _chunksize:
784 783 self._chunkcache = o, d + data
785 784 else:
786 785 self._chunkcache = offset, data
787 786
788 787 def _loadchunk(self, offset, length):
789 788 if self._inline:
790 789 df = self.opener(self.indexfile)
791 790 else:
792 791 df = self.opener(self.datafile)
793 792
794 793 readahead = max(65536, length)
795 794 df.seek(offset)
796 795 d = df.read(readahead)
797 796 self._addchunk(offset, d)
798 797 if readahead > length:
799 798 return d[:length]
800 799 return d
801 800
802 801 def _getchunk(self, offset, length):
803 802 o, d = self._chunkcache
804 803 l = len(d)
805 804
806 805 # is it in the cache?
807 806 cachestart = offset - o
808 807 cacheend = cachestart + length
809 808 if cachestart >= 0 and cacheend <= l:
810 809 if cachestart == 0 and cacheend == l:
811 810 return d # avoid a copy
812 811 return d[cachestart:cacheend]
813 812
814 813 return self._loadchunk(offset, length)
815 814
816 815 def _chunkraw(self, startrev, endrev):
817 816 start = self.start(startrev)
818 817 length = self.end(endrev) - start
819 818 if self._inline:
820 819 start += (startrev + 1) * self._io.size
821 820 return self._getchunk(start, length)
822 821
823 822 def _chunk(self, rev):
824 823 return decompress(self._chunkraw(rev, rev))
825 824
826 825 def _chunkbase(self, rev):
827 826 return self._chunk(rev)
828 827
829 828 def _chunkclear(self):
830 829 self._chunkcache = (0, '')
831 830
832 831 def deltaparent(self, rev):
833 832 """return deltaparent of the given revision"""
834 833 return rev - 1
835 834
836 835 def revdiff(self, rev1, rev2):
837 836 """return or calculate a delta between two revisions"""
838 837 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
839 838 return self._chunk(rev2)
840 839
841 840 return mdiff.textdiff(self.revision(self.node(rev1)),
842 841 self.revision(self.node(rev2)))
843 842
844 843 def revision(self, node):
845 844 """return an uncompressed revision of a given node"""
846 845 cachedrev = None
847 846 if node == nullid:
848 847 return ""
849 848 if self._cache:
850 849 if self._cache[0] == node:
851 850 return self._cache[2]
852 851 cachedrev = self._cache[1]
853 852
854 853 # look up what we need to read
855 854 text = None
856 855 rev = self.rev(node)
857 856 base = self.base(rev)
858 857
859 858 # check rev flags
860 859 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
861 860 raise RevlogError(_('incompatible revision flag %x') %
862 861 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
863 862
864 863 # build delta chain
865 864 chain = []
866 865 iterrev = rev
867 866 while iterrev != base and iterrev != cachedrev:
868 867 chain.append(iterrev)
869 868 iterrev -= 1
870 869 chain.reverse()
871 870 base = iterrev
872 871
873 872 if iterrev == cachedrev:
874 873 # cache hit
875 874 text = self._cache[2]
876 875
877 876 # drop cache to save memory
878 877 self._cache = None
879 878
880 879 self._chunkraw(base, rev)
881 880 if text is None:
882 881 text = self._chunkbase(base)
883 882
884 883 bins = [self._chunk(r) for r in chain]
885 884 text = mdiff.patches(text, bins)
886 885
887 886 text = self._checkhash(text, node, rev)
888 887
889 888 self._cache = (node, rev, text)
890 889 return text
891 890
892 891 def _checkhash(self, text, node, rev):
893 892 p1, p2 = self.parents(node)
894 if (node != hash(text, p1, p2) and
895 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
893 if node != hash(text, p1, p2):
896 894 raise RevlogError(_("integrity check failed on %s:%d")
897 895 % (self.indexfile, rev))
898 896 return text
899 897
900 898 def checkinlinesize(self, tr, fp=None):
901 899 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
902 900 return
903 901
904 902 trinfo = tr.find(self.indexfile)
905 903 if trinfo is None:
906 904 raise RevlogError(_("%s not found in the transaction")
907 905 % self.indexfile)
908 906
909 907 trindex = trinfo[2]
910 908 dataoff = self.start(trindex)
911 909
912 910 tr.add(self.datafile, dataoff)
913 911
914 912 if fp:
915 913 fp.flush()
916 914 fp.close()
917 915
918 916 df = self.opener(self.datafile, 'w')
919 917 try:
920 918 for r in self:
921 919 df.write(self._chunkraw(r, r))
922 920 finally:
923 921 df.close()
924 922
925 923 fp = self.opener(self.indexfile, 'w', atomictemp=True)
926 924 self.version &= ~(REVLOGNGINLINEDATA)
927 925 self._inline = False
928 926 for i in self:
929 927 e = self._io.packentry(self.index[i], self.node, self.version, i)
930 928 fp.write(e)
931 929
932 930 # if we don't call rename, the temp file will never replace the
933 931 # real index
934 932 fp.rename()
935 933
936 934 tr.replace(self.indexfile, trindex * self._io.size)
937 935 self._chunkclear()
938 936
939 937 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
940 938 """add a revision to the log
941 939
942 940 text - the revision data to add
943 941 transaction - the transaction object used for rollback
944 942 link - the linkrev data to add
945 943 p1, p2 - the parent nodeids of the revision
946 944 cachedelta - an optional precomputed delta
947 945 """
948 946 node = hash(text, p1, p2)
949 if (node in self.nodemap and
950 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
947 if node in self.nodemap:
951 948 return node
952 949
953 950 dfh = None
954 951 if not self._inline:
955 952 dfh = self.opener(self.datafile, "a")
956 953 ifh = self.opener(self.indexfile, "a+")
957 954 try:
958 955 return self._addrevision(node, text, transaction, link, p1, p2,
959 956 cachedelta, ifh, dfh)
960 957 finally:
961 958 if dfh:
962 959 dfh.close()
963 960 ifh.close()
964 961
965 962 def _addrevision(self, node, text, transaction, link, p1, p2,
966 963 cachedelta, ifh, dfh):
967 964
968 965 btext = [text]
969 966 def buildtext():
970 967 if btext[0] is not None:
971 968 return btext[0]
972 969 # flush any pending writes here so we can read it in revision
973 970 if dfh:
974 971 dfh.flush()
975 972 ifh.flush()
976 973 basetext = self.revision(self.node(cachedelta[0]))
977 974 btext[0] = mdiff.patch(basetext, cachedelta[1])
978 975 chk = hash(btext[0], p1, p2)
979 976 if chk != node:
980 977 raise RevlogError(_("consistency error in delta"))
981 978 return btext[0]
982 979
983 980 def builddelta(rev):
984 981 # can we use the cached delta?
985 982 if cachedelta and cachedelta[0] == rev:
986 983 delta = cachedelta[1]
987 984 else:
988 985 t = buildtext()
989 986 ptext = self.revision(self.node(rev))
990 987 delta = mdiff.textdiff(ptext, t)
991 988 data = compress(delta)
992 989 l = len(data[1]) + len(data[0])
993 990 base = self.base(rev)
994 991 dist = l + offset - self.start(base)
995 992 return dist, l, data, base
996 993
997 994 curr = len(self)
998 995 prev = curr - 1
999 996 base = curr
1000 997 offset = self.end(prev)
1001 998 flags = 0
1002 999 d = None
1003 1000 p1r, p2r = self.rev(p1), self.rev(p2)
1004 1001
1005 1002 # should we try to build a delta?
1006 1003 if prev != nullrev:
1007 1004 d = builddelta(prev)
1008 1005 dist, l, data, base = d
1009 1006
1010 1007 # full versions are inserted when the needed deltas
1011 1008 # become comparable to the uncompressed text
1012 # or the base revision is punched
1013 1009 if text is None:
1014 1010 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1015 1011 cachedelta[1])
1016 1012 else:
1017 1013 textlen = len(text)
1018 if (d is None or dist > textlen * 2 or
1019 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1014 if d is None or dist > textlen * 2:
1020 1015 text = buildtext()
1021 1016 data = compress(text)
1022 1017 l = len(data[1]) + len(data[0])
1023 1018 base = curr
1024 1019
1025 1020 e = (offset_type(offset, flags), l, textlen,
1026 1021 base, link, p1r, p2r, node)
1027 1022 self.index.insert(-1, e)
1028 1023 self.nodemap[node] = curr
1029 1024
1030 1025 entry = self._io.packentry(e, self.node, self.version, curr)
1031 1026 if not self._inline:
1032 1027 transaction.add(self.datafile, offset)
1033 1028 transaction.add(self.indexfile, curr * len(entry))
1034 1029 if data[0]:
1035 1030 dfh.write(data[0])
1036 1031 dfh.write(data[1])
1037 1032 dfh.flush()
1038 1033 ifh.write(entry)
1039 1034 else:
1040 1035 offset += curr * self._io.size
1041 1036 transaction.add(self.indexfile, offset, curr)
1042 1037 ifh.write(entry)
1043 1038 ifh.write(data[0])
1044 1039 ifh.write(data[1])
1045 1040 self.checkinlinesize(transaction, ifh)
1046 1041
1047 1042 if type(text) == str: # only accept immutable objects
1048 1043 self._cache = (node, curr, text)
1049 1044 return node
1050 1045
1051 1046 def group(self, nodelist, bundler):
1052 1047 """Calculate a delta group, yielding a sequence of changegroup chunks
1053 1048 (strings).
1054 1049
1055 1050 Given a list of changeset revs, return a set of deltas and
1056 1051 metadata corresponding to nodes. The first delta is
1057 1052 first parent(nodelist[0]) -> nodelist[0], the receiver is
1058 1053 guaranteed to have this parent as it has all history before
1059 1054 these changesets. In the case firstparent is nullrev the
1060 1055 changegroup starts with a full revision.
1061 1056 """
1062 1057
1063 1058 revs = sorted([self.rev(n) for n in nodelist])
1064 1059
1065 1060 # if we don't have any revisions touched by these changesets, bail
1066 1061 if not revs:
1067 1062 yield bundler.close()
1068 1063 return
1069 1064
1070 1065 # add the parent of the first rev
1071 1066 p = self.parentrevs(revs[0])[0]
1072 1067 revs.insert(0, p)
1073 1068
1074 1069 # build deltas
1075 1070 for r in xrange(len(revs) - 1):
1076 1071 prev, curr = revs[r], revs[r + 1]
1077 1072 for c in bundler.revchunk(self, curr, prev):
1078 1073 yield c
1079 1074
1080 1075 yield bundler.close()
1081 1076
1082 1077 def addgroup(self, bundle, linkmapper, transaction):
1083 1078 """
1084 1079 add a delta group
1085 1080
1086 1081 given a set of deltas, add them to the revision log. the
1087 1082 first delta is against its parent, which should be in our
1088 1083 log, the rest are against the previous delta.
1089 1084 """
1090 1085
1091 1086 # track the base of the current delta log
1092 1087 node = None
1093 1088
1094 1089 r = len(self)
1095 1090 end = 0
1096 1091 if r:
1097 1092 end = self.end(r - 1)
1098 1093 ifh = self.opener(self.indexfile, "a+")
1099 1094 isize = r * self._io.size
1100 1095 if self._inline:
1101 1096 transaction.add(self.indexfile, end + isize, r)
1102 1097 dfh = None
1103 1098 else:
1104 1099 transaction.add(self.indexfile, isize, r)
1105 1100 transaction.add(self.datafile, end)
1106 1101 dfh = self.opener(self.datafile, "a")
1107 1102
1108 1103 try:
1109 1104 # loop through our set of deltas
1110 1105 chain = None
1111 1106 while 1:
1112 1107 chunkdata = bundle.deltachunk(chain)
1113 1108 if not chunkdata:
1114 1109 break
1115 1110 node = chunkdata['node']
1116 1111 p1 = chunkdata['p1']
1117 1112 p2 = chunkdata['p2']
1118 1113 cs = chunkdata['cs']
1119 1114 deltabase = chunkdata['deltabase']
1120 1115 delta = chunkdata['delta']
1121 1116
1122 1117 link = linkmapper(cs)
1123 if (node in self.nodemap and
1124 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1118 if node in self.nodemap:
1125 1119 # this can happen if two branches make the same change
1126 1120 chain = node
1127 1121 continue
1128 1122
1129 1123 for p in (p1, p2):
1130 1124 if not p in self.nodemap:
1131 if self._shallow:
1132 # add null entries for missing parents
1133 # XXX FIXME
1134 #if base == nullrev:
1135 # base = len(self)
1136 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1137 # 0, 0, base, nullrev, nullrev, nullrev, p)
1138 #self.index.insert(-1, e)
1139 #self.nodemap[p] = r
1140 #entry = self._io.packentry(e, self.node,
1141 # self.version, r)
1142 #ifh.write(entry)
1143 #t, r = r, r + 1
1144 raise LookupError(p, self.indexfile,
1145 _('unknown parent'))
1146 else:
1147 raise LookupError(p, self.indexfile,
1148 _('unknown parent'))
1125 raise LookupError(p, self.indexfile,
1126 _('unknown parent'))
1149 1127
1150 1128 if deltabase not in self.nodemap:
1151 1129 raise LookupError(deltabase, self.indexfile,
1152 1130 _('unknown delta base'))
1153 1131
1154 1132 baserev = self.rev(deltabase)
1155 1133 chain = self._addrevision(node, None, transaction, link,
1156 1134 p1, p2, (baserev, delta), ifh, dfh)
1157 1135 if not dfh and not self._inline:
1158 1136 # addrevision switched from inline to conventional
1159 1137 # reopen the index
1160 1138 ifh.close()
1161 1139 dfh = self.opener(self.datafile, "a")
1162 1140 ifh = self.opener(self.indexfile, "a")
1163 1141 finally:
1164 1142 if dfh:
1165 1143 dfh.close()
1166 1144 ifh.close()
1167 1145
1168 1146 return node
1169 1147
1170 1148 def strip(self, minlink, transaction):
1171 1149 """truncate the revlog on the first revision with a linkrev >= minlink
1172 1150
1173 1151 This function is called when we're stripping revision minlink and
1174 1152 its descendants from the repository.
1175 1153
1176 1154 We have to remove all revisions with linkrev >= minlink, because
1177 1155 the equivalent changelog revisions will be renumbered after the
1178 1156 strip.
1179 1157
1180 1158 So we truncate the revlog on the first of these revisions, and
1181 1159 trust that the caller has saved the revisions that shouldn't be
1182 1160 removed and that it'll readd them after this truncation.
1183 1161 """
1184 1162 if len(self) == 0:
1185 1163 return
1186 1164
1187 1165 for rev in self:
1188 1166 if self.index[rev][4] >= minlink:
1189 1167 break
1190 1168 else:
1191 1169 return
1192 1170
1193 1171 # first truncate the files on disk
1194 1172 end = self.start(rev)
1195 1173 if not self._inline:
1196 1174 transaction.add(self.datafile, end)
1197 1175 end = rev * self._io.size
1198 1176 else:
1199 1177 end += rev * self._io.size
1200 1178
1201 1179 transaction.add(self.indexfile, end)
1202 1180
1203 1181 # then reset internal state in memory to forget those revisions
1204 1182 self._cache = None
1205 1183 self._chunkclear()
1206 1184 for x in xrange(rev, len(self)):
1207 1185 del self.nodemap[self.node(x)]
1208 1186
1209 1187 del self.index[rev:-1]
1210 1188
1211 1189 def checksize(self):
1212 1190 expected = 0
1213 1191 if len(self):
1214 1192 expected = max(0, self.end(len(self) - 1))
1215 1193
1216 1194 try:
1217 1195 f = self.opener(self.datafile)
1218 1196 f.seek(0, 2)
1219 1197 actual = f.tell()
1220 1198 f.close()
1221 1199 dd = actual - expected
1222 1200 except IOError, inst:
1223 1201 if inst.errno != errno.ENOENT:
1224 1202 raise
1225 1203 dd = 0
1226 1204
1227 1205 try:
1228 1206 f = self.opener(self.indexfile)
1229 1207 f.seek(0, 2)
1230 1208 actual = f.tell()
1231 1209 f.close()
1232 1210 s = self._io.size
1233 1211 i = max(0, actual // s)
1234 1212 di = actual - (i * s)
1235 1213 if self._inline:
1236 1214 databytes = 0
1237 1215 for r in self:
1238 1216 databytes += max(0, self.length(r))
1239 1217 dd = 0
1240 1218 di = actual - len(self) * s - databytes
1241 1219 except IOError, inst:
1242 1220 if inst.errno != errno.ENOENT:
1243 1221 raise
1244 1222 di = 0
1245 1223
1246 1224 return (dd, di)
1247 1225
1248 1226 def files(self):
1249 1227 res = [self.indexfile]
1250 1228 if not self._inline:
1251 1229 res.append(self.datafile)
1252 1230 return res
General Comments 0
You need to be logged in to leave comments. Login now