##// END OF EJS Templates
revlog: introduce _chunkbase to allow filelog to override...
Sune Foldager -
r14075:bc101902 default
parent child Browse files
Show More
@@ -1,1272 +1,1275
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47
48 48 def getoffset(q):
49 49 return int(q >> 16)
50 50
51 51 def gettype(q):
52 52 return int(q & 0xFFFF)
53 53
54 54 def offset_type(offset, type):
55 55 return long(long(offset) << 16 | type)
56 56
57 57 nullhash = _sha(nullid)
58 58
59 59 def hash(text, p1, p2):
60 60 """generate a hash from the given text and its parent hashes
61 61
62 62 This hash combines both the current file contents and its history
63 63 in a manner that makes it easy to distinguish nodes with the same
64 64 content in the revision graph.
65 65 """
66 66 # As of now, if one of the parent node is null, p2 is null
67 67 if p2 == nullid:
68 68 # deep copy of a hash is faster than creating one
69 69 s = nullhash.copy()
70 70 s.update(p1)
71 71 else:
72 72 # none of the parent nodes are nullid
73 73 l = [p1, p2]
74 74 l.sort()
75 75 s = _sha(l[0])
76 76 s.update(l[1])
77 77 s.update(text)
78 78 return s.digest()
79 79
80 80 def compress(text):
81 81 """ generate a possibly-compressed representation of text """
82 82 if not text:
83 83 return ("", text)
84 84 l = len(text)
85 85 bin = None
86 86 if l < 44:
87 87 pass
88 88 elif l > 1000000:
89 89 # zlib makes an internal copy, thus doubling memory usage for
90 90 # large files, so lets do this in pieces
91 91 z = zlib.compressobj()
92 92 p = []
93 93 pos = 0
94 94 while pos < l:
95 95 pos2 = pos + 2**20
96 96 p.append(z.compress(text[pos:pos2]))
97 97 pos = pos2
98 98 p.append(z.flush())
99 99 if sum(map(len, p)) < l:
100 100 bin = "".join(p)
101 101 else:
102 102 bin = _compress(text)
103 103 if bin is None or len(bin) > l:
104 104 if text[0] == '\0':
105 105 return ("", text)
106 106 return ('u', text)
107 107 return ("", bin)
108 108
109 109 def decompress(bin):
110 110 """ decompress the given input """
111 111 if not bin:
112 112 return bin
113 113 t = bin[0]
114 114 if t == '\0':
115 115 return bin
116 116 if t == 'x':
117 117 return _decompress(bin)
118 118 if t == 'u':
119 119 return bin[1:]
120 120 raise RevlogError(_("unknown compression type %r") % t)
121 121
122 122 indexformatv0 = ">4l20s20s20s"
123 123 v0shaoffset = 56
124 124
125 125 class revlogoldio(object):
126 126 def __init__(self):
127 127 self.size = struct.calcsize(indexformatv0)
128 128
129 129 def parseindex(self, data, inline):
130 130 s = self.size
131 131 index = []
132 132 nodemap = {nullid: nullrev}
133 133 n = off = 0
134 134 l = len(data)
135 135 while off + s <= l:
136 136 cur = data[off:off + s]
137 137 off += s
138 138 e = _unpack(indexformatv0, cur)
139 139 # transform to revlogv1 format
140 140 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 141 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 142 index.append(e2)
143 143 nodemap[e[6]] = n
144 144 n += 1
145 145
146 146 # add the magic null revision at -1
147 147 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 148
149 149 return index, nodemap, None
150 150
151 151 def packentry(self, entry, node, version, rev):
152 152 if gettype(entry[0]):
153 153 raise RevlogError(_("index entry flags need RevlogNG"))
154 154 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 155 node(entry[5]), node(entry[6]), entry[7])
156 156 return _pack(indexformatv0, *e2)
157 157
158 158 # index ng:
159 159 # 6 bytes: offset
160 160 # 2 bytes: flags
161 161 # 4 bytes: compressed length
162 162 # 4 bytes: uncompressed length
163 163 # 4 bytes: base rev
164 164 # 4 bytes: link rev
165 165 # 4 bytes: parent 1 rev
166 166 # 4 bytes: parent 2 rev
167 167 # 32 bytes: nodeid
168 168 indexformatng = ">Qiiiiii20s12x"
169 169 ngshaoffset = 32
170 170 versionformat = ">I"
171 171
172 172 class revlogio(object):
173 173 def __init__(self):
174 174 self.size = struct.calcsize(indexformatng)
175 175
176 176 def parseindex(self, data, inline):
177 177 # call the C implementation to parse the index data
178 178 index, cache = parsers.parse_index2(data, inline)
179 179 return index, None, cache
180 180
181 181 def packentry(self, entry, node, version, rev):
182 182 p = _pack(indexformatng, *entry)
183 183 if rev == 0:
184 184 p = _pack(versionformat, version) + p[4:]
185 185 return p
186 186
187 187 class revlog(object):
188 188 """
189 189 the underlying revision storage object
190 190
191 191 A revlog consists of two parts, an index and the revision data.
192 192
193 193 The index is a file with a fixed record size containing
194 194 information on each revision, including its nodeid (hash), the
195 195 nodeids of its parents, the position and offset of its data within
196 196 the data file, and the revision it's based on. Finally, each entry
197 197 contains a linkrev entry that can serve as a pointer to external
198 198 data.
199 199
200 200 The revision data itself is a linear collection of data chunks.
201 201 Each chunk represents a revision and is usually represented as a
202 202 delta against the previous chunk. To bound lookup time, runs of
203 203 deltas are limited to about 2 times the length of the original
204 204 version data. This makes retrieval of a version proportional to
205 205 its size, or O(1) relative to the number of revisions.
206 206
207 207 Both pieces of the revlog are written to in an append-only
208 208 fashion, which means we never need to rewrite a file to insert or
209 209 remove data, and can use some simple techniques to avoid the need
210 210 for locking while reading.
211 211 """
212 212 def __init__(self, opener, indexfile, shallowroot=None):
213 213 """
214 214 create a revlog object
215 215
216 216 opener is a function that abstracts the file opening operation
217 217 and can be used to implement COW semantics or the like.
218 218 """
219 219 self.indexfile = indexfile
220 220 self.datafile = indexfile[:-2] + ".d"
221 221 self.opener = opener
222 222 self._cache = None
223 223 self._chunkcache = (0, '')
224 224 self.index = []
225 225 self._shallowroot = shallowroot
226 226 self._parentdelta = 0
227 227 self._pcache = {}
228 228 self._nodecache = {nullid: nullrev}
229 229 self._nodepos = None
230 230
231 231 v = REVLOG_DEFAULT_VERSION
232 232 if hasattr(opener, 'options') and 'defversion' in opener.options:
233 233 v = opener.options['defversion']
234 234 if v & REVLOGNG:
235 235 v |= REVLOGNGINLINEDATA
236 236 if v & REVLOGNG and 'parentdelta' in opener.options:
237 237 self._parentdelta = 1
238 238
239 239 if shallowroot:
240 240 v |= REVLOGSHALLOW
241 241
242 242 i = ''
243 243 try:
244 244 f = self.opener(self.indexfile)
245 245 i = f.read()
246 246 f.close()
247 247 if len(i) > 0:
248 248 v = struct.unpack(versionformat, i[:4])[0]
249 249 except IOError, inst:
250 250 if inst.errno != errno.ENOENT:
251 251 raise
252 252
253 253 self.version = v
254 254 self._inline = v & REVLOGNGINLINEDATA
255 255 self._shallow = v & REVLOGSHALLOW
256 256 flags = v & ~0xFFFF
257 257 fmt = v & 0xFFFF
258 258 if fmt == REVLOGV0 and flags:
259 259 raise RevlogError(_("index %s unknown flags %#04x for format v0")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
262 262 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
263 263 % (self.indexfile, flags >> 16))
264 264 elif fmt > REVLOGNG:
265 265 raise RevlogError(_("index %s unknown format %d")
266 266 % (self.indexfile, fmt))
267 267
268 268 self._io = revlogio()
269 269 if self.version == REVLOGV0:
270 270 self._io = revlogoldio()
271 271 try:
272 272 d = self._io.parseindex(i, self._inline)
273 273 except (ValueError, IndexError):
274 274 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
275 275 self.index, nodemap, self._chunkcache = d
276 276 if nodemap is not None:
277 277 self.nodemap = self._nodecache = nodemap
278 278 if not self._chunkcache:
279 279 self._chunkclear()
280 280
281 281 def tip(self):
282 282 return self.node(len(self.index) - 2)
283 283 def __len__(self):
284 284 return len(self.index) - 1
285 285 def __iter__(self):
286 286 for i in xrange(len(self)):
287 287 yield i
288 288
289 289 @util.propertycache
290 290 def nodemap(self):
291 291 self.rev(self.node(0))
292 292 return self._nodecache
293 293
294 294 def rev(self, node):
295 295 try:
296 296 return self._nodecache[node]
297 297 except KeyError:
298 298 n = self._nodecache
299 299 i = self.index
300 300 p = self._nodepos
301 301 if p is None:
302 302 p = len(i) - 2
303 303 for r in xrange(p, -1, -1):
304 304 v = i[r][7]
305 305 n[v] = r
306 306 if v == node:
307 307 self._nodepos = r - 1
308 308 return r
309 309 raise LookupError(node, self.indexfile, _('no node'))
310 310
311 311 def node(self, rev):
312 312 return self.index[rev][7]
313 313 def linkrev(self, rev):
314 314 return self.index[rev][4]
315 315 def parents(self, node):
316 316 i = self.index
317 317 d = i[self.rev(node)]
318 318 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
319 319 def parentrevs(self, rev):
320 320 return self.index[rev][5:7]
321 321 def start(self, rev):
322 322 return int(self.index[rev][0] >> 16)
323 323 def end(self, rev):
324 324 return self.start(rev) + self.length(rev)
325 325 def length(self, rev):
326 326 return self.index[rev][1]
327 327 def base(self, rev):
328 328 return self.index[rev][3]
329 329 def flags(self, rev):
330 330 return self.index[rev][0] & 0xFFFF
331 331 def rawsize(self, rev):
332 332 """return the length of the uncompressed text for a given revision"""
333 333 l = self.index[rev][2]
334 334 if l >= 0:
335 335 return l
336 336
337 337 t = self.revision(self.node(rev))
338 338 return len(t)
339 339 size = rawsize
340 340
341 341 def reachable(self, node, stop=None):
342 342 """return the set of all nodes ancestral to a given node, including
343 343 the node itself, stopping when stop is matched"""
344 344 reachable = set((node,))
345 345 visit = [node]
346 346 if stop:
347 347 stopn = self.rev(stop)
348 348 else:
349 349 stopn = 0
350 350 while visit:
351 351 n = visit.pop(0)
352 352 if n == stop:
353 353 continue
354 354 if n == nullid:
355 355 continue
356 356 for p in self.parents(n):
357 357 if self.rev(p) < stopn:
358 358 continue
359 359 if p not in reachable:
360 360 reachable.add(p)
361 361 visit.append(p)
362 362 return reachable
363 363
364 364 def ancestors(self, *revs):
365 365 """Generate the ancestors of 'revs' in reverse topological order.
366 366
367 367 Yield a sequence of revision numbers starting with the parents
368 368 of each revision in revs, i.e., each revision is *not* considered
369 369 an ancestor of itself. Results are in breadth-first order:
370 370 parents of each rev in revs, then parents of those, etc. Result
371 371 does not include the null revision."""
372 372 visit = list(revs)
373 373 seen = set([nullrev])
374 374 while visit:
375 375 for parent in self.parentrevs(visit.pop(0)):
376 376 if parent not in seen:
377 377 visit.append(parent)
378 378 seen.add(parent)
379 379 yield parent
380 380
381 381 def descendants(self, *revs):
382 382 """Generate the descendants of 'revs' in revision order.
383 383
384 384 Yield a sequence of revision numbers starting with a child of
385 385 some rev in revs, i.e., each revision is *not* considered a
386 386 descendant of itself. Results are ordered by revision number (a
387 387 topological sort)."""
388 388 first = min(revs)
389 389 if first == nullrev:
390 390 for i in self:
391 391 yield i
392 392 return
393 393
394 394 seen = set(revs)
395 395 for i in xrange(first + 1, len(self)):
396 396 for x in self.parentrevs(i):
397 397 if x != nullrev and x in seen:
398 398 seen.add(i)
399 399 yield i
400 400 break
401 401
402 402 def findcommonmissing(self, common=None, heads=None):
403 403 """Return a tuple of the ancestors of common and the ancestors of heads
404 404 that are not ancestors of common.
405 405
406 406 More specifically, the second element is a list of nodes N such that
407 407 every N satisfies the following constraints:
408 408
409 409 1. N is an ancestor of some node in 'heads'
410 410 2. N is not an ancestor of any node in 'common'
411 411
412 412 The list is sorted by revision number, meaning it is
413 413 topologically sorted.
414 414
415 415 'heads' and 'common' are both lists of node IDs. If heads is
416 416 not supplied, uses all of the revlog's heads. If common is not
417 417 supplied, uses nullid."""
418 418 if common is None:
419 419 common = [nullid]
420 420 if heads is None:
421 421 heads = self.heads()
422 422
423 423 common = [self.rev(n) for n in common]
424 424 heads = [self.rev(n) for n in heads]
425 425
426 426 # we want the ancestors, but inclusive
427 427 has = set(self.ancestors(*common))
428 428 has.add(nullrev)
429 429 has.update(common)
430 430
431 431 # take all ancestors from heads that aren't in has
432 432 missing = set()
433 433 visit = [r for r in heads if r not in has]
434 434 while visit:
435 435 r = visit.pop(0)
436 436 if r in missing:
437 437 continue
438 438 else:
439 439 missing.add(r)
440 440 for p in self.parentrevs(r):
441 441 if p not in has:
442 442 visit.append(p)
443 443 missing = list(missing)
444 444 missing.sort()
445 445 return has, [self.node(r) for r in missing]
446 446
447 447 def findmissing(self, common=None, heads=None):
448 448 """Return the ancestors of heads that are not ancestors of common.
449 449
450 450 More specifically, return a list of nodes N such that every N
451 451 satisfies the following constraints:
452 452
453 453 1. N is an ancestor of some node in 'heads'
454 454 2. N is not an ancestor of any node in 'common'
455 455
456 456 The list is sorted by revision number, meaning it is
457 457 topologically sorted.
458 458
459 459 'heads' and 'common' are both lists of node IDs. If heads is
460 460 not supplied, uses all of the revlog's heads. If common is not
461 461 supplied, uses nullid."""
462 462 _common, missing = self.findcommonmissing(common, heads)
463 463 return missing
464 464
465 465 def nodesbetween(self, roots=None, heads=None):
466 466 """Return a topological path from 'roots' to 'heads'.
467 467
468 468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
469 469 topologically sorted list of all nodes N that satisfy both of
470 470 these constraints:
471 471
472 472 1. N is a descendant of some node in 'roots'
473 473 2. N is an ancestor of some node in 'heads'
474 474
475 475 Every node is considered to be both a descendant and an ancestor
476 476 of itself, so every reachable node in 'roots' and 'heads' will be
477 477 included in 'nodes'.
478 478
479 479 'outroots' is the list of reachable nodes in 'roots', i.e., the
480 480 subset of 'roots' that is returned in 'nodes'. Likewise,
481 481 'outheads' is the subset of 'heads' that is also in 'nodes'.
482 482
483 483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
484 484 unspecified, uses nullid as the only root. If 'heads' is
485 485 unspecified, uses list of all of the revlog's heads."""
486 486 nonodes = ([], [], [])
487 487 if roots is not None:
488 488 roots = list(roots)
489 489 if not roots:
490 490 return nonodes
491 491 lowestrev = min([self.rev(n) for n in roots])
492 492 else:
493 493 roots = [nullid] # Everybody's a descendent of nullid
494 494 lowestrev = nullrev
495 495 if (lowestrev == nullrev) and (heads is None):
496 496 # We want _all_ the nodes!
497 497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
498 498 if heads is None:
499 499 # All nodes are ancestors, so the latest ancestor is the last
500 500 # node.
501 501 highestrev = len(self) - 1
502 502 # Set ancestors to None to signal that every node is an ancestor.
503 503 ancestors = None
504 504 # Set heads to an empty dictionary for later discovery of heads
505 505 heads = {}
506 506 else:
507 507 heads = list(heads)
508 508 if not heads:
509 509 return nonodes
510 510 ancestors = set()
511 511 # Turn heads into a dictionary so we can remove 'fake' heads.
512 512 # Also, later we will be using it to filter out the heads we can't
513 513 # find from roots.
514 514 heads = dict.fromkeys(heads, 0)
515 515 # Start at the top and keep marking parents until we're done.
516 516 nodestotag = set(heads)
517 517 # Remember where the top was so we can use it as a limit later.
518 518 highestrev = max([self.rev(n) for n in nodestotag])
519 519 while nodestotag:
520 520 # grab a node to tag
521 521 n = nodestotag.pop()
522 522 # Never tag nullid
523 523 if n == nullid:
524 524 continue
525 525 # A node's revision number represents its place in a
526 526 # topologically sorted list of nodes.
527 527 r = self.rev(n)
528 528 if r >= lowestrev:
529 529 if n not in ancestors:
530 530 # If we are possibly a descendent of one of the roots
531 531 # and we haven't already been marked as an ancestor
532 532 ancestors.add(n) # Mark as ancestor
533 533 # Add non-nullid parents to list of nodes to tag.
534 534 nodestotag.update([p for p in self.parents(n) if
535 535 p != nullid])
536 536 elif n in heads: # We've seen it before, is it a fake head?
537 537 # So it is, real heads should not be the ancestors of
538 538 # any other heads.
539 539 heads.pop(n)
540 540 if not ancestors:
541 541 return nonodes
542 542 # Now that we have our set of ancestors, we want to remove any
543 543 # roots that are not ancestors.
544 544
545 545 # If one of the roots was nullid, everything is included anyway.
546 546 if lowestrev > nullrev:
547 547 # But, since we weren't, let's recompute the lowest rev to not
548 548 # include roots that aren't ancestors.
549 549
550 550 # Filter out roots that aren't ancestors of heads
551 551 roots = [n for n in roots if n in ancestors]
552 552 # Recompute the lowest revision
553 553 if roots:
554 554 lowestrev = min([self.rev(n) for n in roots])
555 555 else:
556 556 # No more roots? Return empty list
557 557 return nonodes
558 558 else:
559 559 # We are descending from nullid, and don't need to care about
560 560 # any other roots.
561 561 lowestrev = nullrev
562 562 roots = [nullid]
563 563 # Transform our roots list into a set.
564 564 descendents = set(roots)
565 565 # Also, keep the original roots so we can filter out roots that aren't
566 566 # 'real' roots (i.e. are descended from other roots).
567 567 roots = descendents.copy()
568 568 # Our topologically sorted list of output nodes.
569 569 orderedout = []
570 570 # Don't start at nullid since we don't want nullid in our output list,
571 571 # and if nullid shows up in descedents, empty parents will look like
572 572 # they're descendents.
573 573 for r in xrange(max(lowestrev, 0), highestrev + 1):
574 574 n = self.node(r)
575 575 isdescendent = False
576 576 if lowestrev == nullrev: # Everybody is a descendent of nullid
577 577 isdescendent = True
578 578 elif n in descendents:
579 579 # n is already a descendent
580 580 isdescendent = True
581 581 # This check only needs to be done here because all the roots
582 582 # will start being marked is descendents before the loop.
583 583 if n in roots:
584 584 # If n was a root, check if it's a 'real' root.
585 585 p = tuple(self.parents(n))
586 586 # If any of its parents are descendents, it's not a root.
587 587 if (p[0] in descendents) or (p[1] in descendents):
588 588 roots.remove(n)
589 589 else:
590 590 p = tuple(self.parents(n))
591 591 # A node is a descendent if either of its parents are
592 592 # descendents. (We seeded the dependents list with the roots
593 593 # up there, remember?)
594 594 if (p[0] in descendents) or (p[1] in descendents):
595 595 descendents.add(n)
596 596 isdescendent = True
597 597 if isdescendent and ((ancestors is None) or (n in ancestors)):
598 598 # Only include nodes that are both descendents and ancestors.
599 599 orderedout.append(n)
600 600 if (ancestors is not None) and (n in heads):
601 601 # We're trying to figure out which heads are reachable
602 602 # from roots.
603 603 # Mark this head as having been reached
604 604 heads[n] = 1
605 605 elif ancestors is None:
606 606 # Otherwise, we're trying to discover the heads.
607 607 # Assume this is a head because if it isn't, the next step
608 608 # will eventually remove it.
609 609 heads[n] = 1
610 610 # But, obviously its parents aren't.
611 611 for p in self.parents(n):
612 612 heads.pop(p, None)
613 613 heads = [n for n in heads.iterkeys() if heads[n] != 0]
614 614 roots = list(roots)
615 615 assert orderedout
616 616 assert roots
617 617 assert heads
618 618 return (orderedout, roots, heads)
619 619
620 620 def heads(self, start=None, stop=None):
621 621 """return the list of all nodes that have no children
622 622
623 623 if start is specified, only heads that are descendants of
624 624 start will be returned
625 625 if stop is specified, it will consider all the revs from stop
626 626 as if they had no children
627 627 """
628 628 if start is None and stop is None:
629 629 count = len(self)
630 630 if not count:
631 631 return [nullid]
632 632 ishead = [1] * (count + 1)
633 633 index = self.index
634 634 for r in xrange(count):
635 635 e = index[r]
636 636 ishead[e[5]] = ishead[e[6]] = 0
637 637 return [self.node(r) for r in xrange(count) if ishead[r]]
638 638
639 639 if start is None:
640 640 start = nullid
641 641 if stop is None:
642 642 stop = []
643 643 stoprevs = set([self.rev(n) for n in stop])
644 644 startrev = self.rev(start)
645 645 reachable = set((startrev,))
646 646 heads = set((startrev,))
647 647
648 648 parentrevs = self.parentrevs
649 649 for r in xrange(startrev + 1, len(self)):
650 650 for p in parentrevs(r):
651 651 if p in reachable:
652 652 if r not in stoprevs:
653 653 reachable.add(r)
654 654 heads.add(r)
655 655 if p in heads and p not in stoprevs:
656 656 heads.remove(p)
657 657
658 658 return [self.node(r) for r in heads]
659 659
660 660 def children(self, node):
661 661 """find the children of a given node"""
662 662 c = []
663 663 p = self.rev(node)
664 664 for r in range(p + 1, len(self)):
665 665 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
666 666 if prevs:
667 667 for pr in prevs:
668 668 if pr == p:
669 669 c.append(self.node(r))
670 670 elif p == nullrev:
671 671 c.append(self.node(r))
672 672 return c
673 673
674 674 def descendant(self, start, end):
675 675 if start == nullrev:
676 676 return True
677 677 for i in self.descendants(start):
678 678 if i == end:
679 679 return True
680 680 elif i > end:
681 681 break
682 682 return False
683 683
684 684 def ancestor(self, a, b):
685 685 """calculate the least common ancestor of nodes a and b"""
686 686
687 687 # fast path, check if it is a descendant
688 688 a, b = self.rev(a), self.rev(b)
689 689 start, end = sorted((a, b))
690 690 if self.descendant(start, end):
691 691 return self.node(start)
692 692
693 693 def parents(rev):
694 694 return [p for p in self.parentrevs(rev) if p != nullrev]
695 695
696 696 c = ancestor.ancestor(a, b, parents)
697 697 if c is None:
698 698 return nullid
699 699
700 700 return self.node(c)
701 701
702 702 def _match(self, id):
703 703 if isinstance(id, (long, int)):
704 704 # rev
705 705 return self.node(id)
706 706 if len(id) == 20:
707 707 # possibly a binary node
708 708 # odds of a binary node being all hex in ASCII are 1 in 10**25
709 709 try:
710 710 node = id
711 711 self.rev(node) # quick search the index
712 712 return node
713 713 except LookupError:
714 714 pass # may be partial hex id
715 715 try:
716 716 # str(rev)
717 717 rev = int(id)
718 718 if str(rev) != id:
719 719 raise ValueError
720 720 if rev < 0:
721 721 rev = len(self) + rev
722 722 if rev < 0 or rev >= len(self):
723 723 raise ValueError
724 724 return self.node(rev)
725 725 except (ValueError, OverflowError):
726 726 pass
727 727 if len(id) == 40:
728 728 try:
729 729 # a full hex nodeid?
730 730 node = bin(id)
731 731 self.rev(node)
732 732 return node
733 733 except (TypeError, LookupError):
734 734 pass
735 735
736 736 def _partialmatch(self, id):
737 737 if id in self._pcache:
738 738 return self._pcache[id]
739 739
740 740 if len(id) < 40:
741 741 try:
742 742 # hex(node)[:...]
743 743 l = len(id) // 2 # grab an even number of digits
744 744 prefix = bin(id[:l * 2])
745 745 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
746 746 nl = [n for n in nl if hex(n).startswith(id)]
747 747 if len(nl) > 0:
748 748 if len(nl) == 1:
749 749 self._pcache[id] = nl[0]
750 750 return nl[0]
751 751 raise LookupError(id, self.indexfile,
752 752 _('ambiguous identifier'))
753 753 return None
754 754 except TypeError:
755 755 pass
756 756
757 757 def lookup(self, id):
758 758 """locate a node based on:
759 759 - revision number or str(revision number)
760 760 - nodeid or subset of hex nodeid
761 761 """
762 762 n = self._match(id)
763 763 if n is not None:
764 764 return n
765 765 n = self._partialmatch(id)
766 766 if n:
767 767 return n
768 768
769 769 raise LookupError(id, self.indexfile, _('no match found'))
770 770
771 771 def cmp(self, node, text):
772 772 """compare text with a given file revision
773 773
774 774 returns True if text is different than what is stored.
775 775 """
776 776 p1, p2 = self.parents(node)
777 777 return hash(text, p1, p2) != node
778 778
779 779 def _addchunk(self, offset, data):
780 780 o, d = self._chunkcache
781 781 # try to add to existing cache
782 782 if o + len(d) == offset and len(d) + len(data) < _chunksize:
783 783 self._chunkcache = o, d + data
784 784 else:
785 785 self._chunkcache = offset, data
786 786
787 787 def _loadchunk(self, offset, length):
788 788 if self._inline:
789 789 df = self.opener(self.indexfile)
790 790 else:
791 791 df = self.opener(self.datafile)
792 792
793 793 readahead = max(65536, length)
794 794 df.seek(offset)
795 795 d = df.read(readahead)
796 796 self._addchunk(offset, d)
797 797 if readahead > length:
798 798 return d[:length]
799 799 return d
800 800
801 801 def _getchunk(self, offset, length):
802 802 o, d = self._chunkcache
803 803 l = len(d)
804 804
805 805 # is it in the cache?
806 806 cachestart = offset - o
807 807 cacheend = cachestart + length
808 808 if cachestart >= 0 and cacheend <= l:
809 809 if cachestart == 0 and cacheend == l:
810 810 return d # avoid a copy
811 811 return d[cachestart:cacheend]
812 812
813 813 return self._loadchunk(offset, length)
814 814
815 815 def _chunkraw(self, startrev, endrev):
816 816 start = self.start(startrev)
817 817 length = self.end(endrev) - start
818 818 if self._inline:
819 819 start += (startrev + 1) * self._io.size
820 820 return self._getchunk(start, length)
821 821
822 822 def _chunk(self, rev):
823 823 return decompress(self._chunkraw(rev, rev))
824 824
825 def _chunkbase(self, rev):
826 return self._chunk(rev)
827
825 828 def _chunkclear(self):
826 829 self._chunkcache = (0, '')
827 830
828 831 def deltaparent(self, rev):
829 832 """return previous revision or parentrev according to flags"""
830 833 if self.flags(rev) & REVIDX_PARENTDELTA:
831 834 return self.parentrevs(rev)[0]
832 835 else:
833 836 return rev - 1
834 837
835 838 def revdiff(self, rev1, rev2):
836 839 """return or calculate a delta between two revisions"""
837 840 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
838 841 return self._chunk(rev2)
839 842
840 843 return mdiff.textdiff(self.revision(self.node(rev1)),
841 844 self.revision(self.node(rev2)))
842 845
843 846 def revision(self, node):
844 847 """return an uncompressed revision of a given node"""
845 848 cachedrev = None
846 849 if node == nullid:
847 850 return ""
848 851 if self._cache:
849 852 if self._cache[0] == node:
850 853 return self._cache[2]
851 854 cachedrev = self._cache[1]
852 855
853 856 # look up what we need to read
854 857 text = None
855 858 rev = self.rev(node)
856 859 base = self.base(rev)
857 860
858 861 # check rev flags
859 862 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
860 863 raise RevlogError(_('incompatible revision flag %x') %
861 864 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
862 865
863 866 # build delta chain
864 867 chain = []
865 868 index = self.index # for performance
866 869 iterrev = rev
867 870 e = index[iterrev]
868 871 while iterrev != base and iterrev != cachedrev:
869 872 chain.append(iterrev)
870 873 if e[0] & REVIDX_PARENTDELTA:
871 874 iterrev = e[5]
872 875 else:
873 876 iterrev -= 1
874 877 e = index[iterrev]
875 878 chain.reverse()
876 879 base = iterrev
877 880
878 881 if iterrev == cachedrev:
879 882 # cache hit
880 883 text = self._cache[2]
881 884
882 885 # drop cache to save memory
883 886 self._cache = None
884 887
885 888 self._chunkraw(base, rev)
886 889 if text is None:
887 text = self._chunk(base)
890 text = self._chunkbase(base)
888 891
889 892 bins = [self._chunk(r) for r in chain]
890 893 text = mdiff.patches(text, bins)
891 894
892 895 text = self._checkhash(text, node, rev)
893 896
894 897 self._cache = (node, rev, text)
895 898 return text
896 899
897 900 def _checkhash(self, text, node, rev):
898 901 p1, p2 = self.parents(node)
899 902 if (node != hash(text, p1, p2) and
900 903 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
901 904 raise RevlogError(_("integrity check failed on %s:%d")
902 905 % (self.indexfile, rev))
903 906 return text
904 907
905 908 def checkinlinesize(self, tr, fp=None):
906 909 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
907 910 return
908 911
909 912 trinfo = tr.find(self.indexfile)
910 913 if trinfo is None:
911 914 raise RevlogError(_("%s not found in the transaction")
912 915 % self.indexfile)
913 916
914 917 trindex = trinfo[2]
915 918 dataoff = self.start(trindex)
916 919
917 920 tr.add(self.datafile, dataoff)
918 921
919 922 if fp:
920 923 fp.flush()
921 924 fp.close()
922 925
923 926 df = self.opener(self.datafile, 'w')
924 927 try:
925 928 for r in self:
926 929 df.write(self._chunkraw(r, r))
927 930 finally:
928 931 df.close()
929 932
930 933 fp = self.opener(self.indexfile, 'w', atomictemp=True)
931 934 self.version &= ~(REVLOGNGINLINEDATA)
932 935 self._inline = False
933 936 for i in self:
934 937 e = self._io.packentry(self.index[i], self.node, self.version, i)
935 938 fp.write(e)
936 939
937 940 # if we don't call rename, the temp file will never replace the
938 941 # real index
939 942 fp.rename()
940 943
941 944 tr.replace(self.indexfile, trindex * self._io.size)
942 945 self._chunkclear()
943 946
944 947 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
945 948 """add a revision to the log
946 949
947 950 text - the revision data to add
948 951 transaction - the transaction object used for rollback
949 952 link - the linkrev data to add
950 953 p1, p2 - the parent nodeids of the revision
951 954 cachedelta - an optional precomputed delta
952 955 """
953 956 node = hash(text, p1, p2)
954 957 if (node in self.nodemap and
955 958 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
956 959 return node
957 960
958 961 dfh = None
959 962 if not self._inline:
960 963 dfh = self.opener(self.datafile, "a")
961 964 ifh = self.opener(self.indexfile, "a+")
962 965 try:
963 966 return self._addrevision(node, text, transaction, link, p1, p2,
964 967 cachedelta, ifh, dfh)
965 968 finally:
966 969 if dfh:
967 970 dfh.close()
968 971 ifh.close()
969 972
970 973 def _addrevision(self, node, text, transaction, link, p1, p2,
971 974 cachedelta, ifh, dfh):
972 975
973 976 btext = [text]
974 977 def buildtext():
975 978 if btext[0] is not None:
976 979 return btext[0]
977 980 # flush any pending writes here so we can read it in revision
978 981 if dfh:
979 982 dfh.flush()
980 983 ifh.flush()
981 984 basetext = self.revision(self.node(cachedelta[0]))
982 985 btext[0] = mdiff.patch(basetext, cachedelta[1])
983 986 chk = hash(btext[0], p1, p2)
984 987 if chk != node:
985 988 raise RevlogError(_("consistency error in delta"))
986 989 return btext[0]
987 990
988 991 def builddelta(rev):
989 992 # can we use the cached delta?
990 993 if cachedelta and cachedelta[0] == rev:
991 994 delta = cachedelta[1]
992 995 else:
993 996 t = buildtext()
994 997 ptext = self.revision(self.node(rev))
995 998 delta = mdiff.textdiff(ptext, t)
996 999 data = compress(delta)
997 1000 l = len(data[1]) + len(data[0])
998 1001 base = self.base(rev)
999 1002 dist = l + offset - self.start(base)
1000 1003 return dist, l, data, base
1001 1004
1002 1005 curr = len(self)
1003 1006 prev = curr - 1
1004 1007 base = curr
1005 1008 offset = self.end(prev)
1006 1009 flags = 0
1007 1010 d = None
1008 1011 p1r, p2r = self.rev(p1), self.rev(p2)
1009 1012
1010 1013 # should we try to build a delta?
1011 1014 if prev != nullrev:
1012 1015 d = builddelta(prev)
1013 1016 if self._parentdelta and prev != p1r:
1014 1017 d2 = builddelta(p1r)
1015 1018 if d2 < d:
1016 1019 d = d2
1017 1020 flags = REVIDX_PARENTDELTA
1018 1021 dist, l, data, base = d
1019 1022
1020 1023 # full versions are inserted when the needed deltas
1021 1024 # become comparable to the uncompressed text
1022 1025 # or the base revision is punched
1023 1026 if text is None:
1024 1027 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1025 1028 cachedelta[1])
1026 1029 else:
1027 1030 textlen = len(text)
1028 1031 if (d is None or dist > textlen * 2 or
1029 1032 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1030 1033 text = buildtext()
1031 1034 data = compress(text)
1032 1035 l = len(data[1]) + len(data[0])
1033 1036 base = curr
1034 1037
1035 1038 e = (offset_type(offset, flags), l, textlen,
1036 1039 base, link, p1r, p2r, node)
1037 1040 self.index.insert(-1, e)
1038 1041 self.nodemap[node] = curr
1039 1042
1040 1043 entry = self._io.packentry(e, self.node, self.version, curr)
1041 1044 if not self._inline:
1042 1045 transaction.add(self.datafile, offset)
1043 1046 transaction.add(self.indexfile, curr * len(entry))
1044 1047 if data[0]:
1045 1048 dfh.write(data[0])
1046 1049 dfh.write(data[1])
1047 1050 dfh.flush()
1048 1051 ifh.write(entry)
1049 1052 else:
1050 1053 offset += curr * self._io.size
1051 1054 transaction.add(self.indexfile, offset, curr)
1052 1055 ifh.write(entry)
1053 1056 ifh.write(data[0])
1054 1057 ifh.write(data[1])
1055 1058 self.checkinlinesize(transaction, ifh)
1056 1059
1057 1060 if type(text) == str: # only accept immutable objects
1058 1061 self._cache = (node, curr, text)
1059 1062 return node
1060 1063
1061 1064 def group(self, nodelist, bundler):
1062 1065 """Calculate a delta group, yielding a sequence of changegroup chunks
1063 1066 (strings).
1064 1067
1065 1068 Given a list of changeset revs, return a set of deltas and
1066 1069 metadata corresponding to nodes. The first delta is
1067 1070 first parent(nodelist[0]) -> nodelist[0], the receiver is
1068 1071 guaranteed to have this parent as it has all history before
1069 1072 these changesets. In the case firstparent is nullrev the
1070 1073 changegroup starts with a full revision.
1071 1074 """
1072 1075
1073 1076 revs = sorted([self.rev(n) for n in nodelist])
1074 1077
1075 1078 # if we don't have any revisions touched by these changesets, bail
1076 1079 if not revs:
1077 1080 yield bundler.close()
1078 1081 return
1079 1082
1080 1083 # add the parent of the first rev
1081 1084 p = self.parentrevs(revs[0])[0]
1082 1085 revs.insert(0, p)
1083 1086
1084 1087 # build deltas
1085 1088 for r in xrange(len(revs) - 1):
1086 1089 a, b = revs[r], revs[r + 1]
1087 1090 nb = self.node(b)
1088 1091 p1, p2 = self.parents(nb)
1089 1092 prefix = ''
1090 1093
1091 1094 if a == nullrev:
1092 1095 d = self.revision(nb)
1093 1096 prefix = mdiff.trivialdiffheader(len(d))
1094 1097 else:
1095 1098 d = self.revdiff(a, b)
1096 1099 for c in bundler.revchunk(self, nb, p1, p2, prefix, d):
1097 1100 yield c
1098 1101
1099 1102 yield bundler.close()
1100 1103
1101 1104 def addgroup(self, bundle, linkmapper, transaction):
1102 1105 """
1103 1106 add a delta group
1104 1107
1105 1108 given a set of deltas, add them to the revision log. the
1106 1109 first delta is against its parent, which should be in our
1107 1110 log, the rest are against the previous delta.
1108 1111 """
1109 1112
1110 1113 # track the base of the current delta log
1111 1114 node = None
1112 1115
1113 1116 r = len(self)
1114 1117 end = 0
1115 1118 if r:
1116 1119 end = self.end(r - 1)
1117 1120 ifh = self.opener(self.indexfile, "a+")
1118 1121 isize = r * self._io.size
1119 1122 if self._inline:
1120 1123 transaction.add(self.indexfile, end + isize, r)
1121 1124 dfh = None
1122 1125 else:
1123 1126 transaction.add(self.indexfile, isize, r)
1124 1127 transaction.add(self.datafile, end)
1125 1128 dfh = self.opener(self.datafile, "a")
1126 1129
1127 1130 try:
1128 1131 # loop through our set of deltas
1129 1132 chain = None
1130 1133 while 1:
1131 1134 chunkdata = bundle.parsechunk()
1132 1135 if not chunkdata:
1133 1136 break
1134 1137 node = chunkdata['node']
1135 1138 p1 = chunkdata['p1']
1136 1139 p2 = chunkdata['p2']
1137 1140 cs = chunkdata['cs']
1138 1141 delta = chunkdata['data']
1139 1142
1140 1143 link = linkmapper(cs)
1141 1144 if (node in self.nodemap and
1142 1145 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1143 1146 # this can happen if two branches make the same change
1144 1147 chain = node
1145 1148 continue
1146 1149
1147 1150 for p in (p1, p2):
1148 1151 if not p in self.nodemap:
1149 1152 if self._shallow:
1150 1153 # add null entries for missing parents
1151 1154 # XXX FIXME
1152 1155 #if base == nullrev:
1153 1156 # base = len(self)
1154 1157 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1155 1158 # 0, 0, base, nullrev, nullrev, nullrev, p)
1156 1159 #self.index.insert(-1, e)
1157 1160 #self.nodemap[p] = r
1158 1161 #entry = self._io.packentry(e, self.node,
1159 1162 # self.version, r)
1160 1163 #ifh.write(entry)
1161 1164 #t, r = r, r + 1
1162 1165 raise LookupError(p, self.indexfile,
1163 1166 _('unknown parent'))
1164 1167 else:
1165 1168 raise LookupError(p, self.indexfile,
1166 1169 _('unknown parent'))
1167 1170
1168 1171 if not chain:
1169 1172 # retrieve the parent revision of the delta chain
1170 1173 chain = p1
1171 1174 if not chain in self.nodemap:
1172 1175 raise LookupError(chain, self.indexfile, _('unknown base'))
1173 1176
1174 1177 chainrev = self.rev(chain)
1175 1178 chain = self._addrevision(node, None, transaction, link,
1176 1179 p1, p2, (chainrev, delta), ifh, dfh)
1177 1180 if not dfh and not self._inline:
1178 1181 # addrevision switched from inline to conventional
1179 1182 # reopen the index
1180 1183 ifh.close()
1181 1184 dfh = self.opener(self.datafile, "a")
1182 1185 ifh = self.opener(self.indexfile, "a")
1183 1186 finally:
1184 1187 if dfh:
1185 1188 dfh.close()
1186 1189 ifh.close()
1187 1190
1188 1191 return node
1189 1192
1190 1193 def strip(self, minlink, transaction):
1191 1194 """truncate the revlog on the first revision with a linkrev >= minlink
1192 1195
1193 1196 This function is called when we're stripping revision minlink and
1194 1197 its descendants from the repository.
1195 1198
1196 1199 We have to remove all revisions with linkrev >= minlink, because
1197 1200 the equivalent changelog revisions will be renumbered after the
1198 1201 strip.
1199 1202
1200 1203 So we truncate the revlog on the first of these revisions, and
1201 1204 trust that the caller has saved the revisions that shouldn't be
1202 1205 removed and that it'll readd them after this truncation.
1203 1206 """
1204 1207 if len(self) == 0:
1205 1208 return
1206 1209
1207 1210 for rev in self:
1208 1211 if self.index[rev][4] >= minlink:
1209 1212 break
1210 1213 else:
1211 1214 return
1212 1215
1213 1216 # first truncate the files on disk
1214 1217 end = self.start(rev)
1215 1218 if not self._inline:
1216 1219 transaction.add(self.datafile, end)
1217 1220 end = rev * self._io.size
1218 1221 else:
1219 1222 end += rev * self._io.size
1220 1223
1221 1224 transaction.add(self.indexfile, end)
1222 1225
1223 1226 # then reset internal state in memory to forget those revisions
1224 1227 self._cache = None
1225 1228 self._chunkclear()
1226 1229 for x in xrange(rev, len(self)):
1227 1230 del self.nodemap[self.node(x)]
1228 1231
1229 1232 del self.index[rev:-1]
1230 1233
1231 1234 def checksize(self):
1232 1235 expected = 0
1233 1236 if len(self):
1234 1237 expected = max(0, self.end(len(self) - 1))
1235 1238
1236 1239 try:
1237 1240 f = self.opener(self.datafile)
1238 1241 f.seek(0, 2)
1239 1242 actual = f.tell()
1240 1243 f.close()
1241 1244 dd = actual - expected
1242 1245 except IOError, inst:
1243 1246 if inst.errno != errno.ENOENT:
1244 1247 raise
1245 1248 dd = 0
1246 1249
1247 1250 try:
1248 1251 f = self.opener(self.indexfile)
1249 1252 f.seek(0, 2)
1250 1253 actual = f.tell()
1251 1254 f.close()
1252 1255 s = self._io.size
1253 1256 i = max(0, actual // s)
1254 1257 di = actual - (i * s)
1255 1258 if self._inline:
1256 1259 databytes = 0
1257 1260 for r in self:
1258 1261 databytes += max(0, self.length(r))
1259 1262 dd = 0
1260 1263 di = actual - len(self) * s - databytes
1261 1264 except IOError, inst:
1262 1265 if inst.errno != errno.ENOENT:
1263 1266 raise
1264 1267 di = 0
1265 1268
1266 1269 return (dd, di)
1267 1270
1268 1271 def files(self):
1269 1272 res = [self.indexfile]
1270 1273 if not self._inline:
1271 1274 res.append(self.datafile)
1272 1275 return res
General Comments 0
You need to be logged in to leave comments. Login now