##// END OF EJS Templates
revlog: remove the last bits of punched/shallow...
Sune Foldager -
r14251:258fbccf default
parent child Browse files
Show More
@@ -1,1233 +1,1227
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGSHALLOW = (1 << 17)
31 30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 31 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
33 REVLOGNG_FLAGS = REVLOGNGINLINEDATA
35 34
36 35 # revlog index flags
37 36 REVIDX_KNOWN_FLAGS = 0
38 37
39 38 # max size of revlog with inline data
40 39 _maxinline = 131072
41 40 _chunksize = 1048576
42 41
43 42 RevlogError = error.RevlogError
44 43 LookupError = error.LookupError
45 44
46 45 def getoffset(q):
47 46 return int(q >> 16)
48 47
49 48 def gettype(q):
50 49 return int(q & 0xFFFF)
51 50
52 51 def offset_type(offset, type):
53 52 return long(long(offset) << 16 | type)
54 53
55 54 nullhash = _sha(nullid)
56 55
57 56 def hash(text, p1, p2):
58 57 """generate a hash from the given text and its parent hashes
59 58
60 59 This hash combines both the current file contents and its history
61 60 in a manner that makes it easy to distinguish nodes with the same
62 61 content in the revision graph.
63 62 """
64 63 # As of now, if one of the parent node is null, p2 is null
65 64 if p2 == nullid:
66 65 # deep copy of a hash is faster than creating one
67 66 s = nullhash.copy()
68 67 s.update(p1)
69 68 else:
70 69 # none of the parent nodes are nullid
71 70 l = [p1, p2]
72 71 l.sort()
73 72 s = _sha(l[0])
74 73 s.update(l[1])
75 74 s.update(text)
76 75 return s.digest()
77 76
78 77 def compress(text):
79 78 """ generate a possibly-compressed representation of text """
80 79 if not text:
81 80 return ("", text)
82 81 l = len(text)
83 82 bin = None
84 83 if l < 44:
85 84 pass
86 85 elif l > 1000000:
87 86 # zlib makes an internal copy, thus doubling memory usage for
88 87 # large files, so lets do this in pieces
89 88 z = zlib.compressobj()
90 89 p = []
91 90 pos = 0
92 91 while pos < l:
93 92 pos2 = pos + 2**20
94 93 p.append(z.compress(text[pos:pos2]))
95 94 pos = pos2
96 95 p.append(z.flush())
97 96 if sum(map(len, p)) < l:
98 97 bin = "".join(p)
99 98 else:
100 99 bin = _compress(text)
101 100 if bin is None or len(bin) > l:
102 101 if text[0] == '\0':
103 102 return ("", text)
104 103 return ('u', text)
105 104 return ("", bin)
106 105
107 106 def decompress(bin):
108 107 """ decompress the given input """
109 108 if not bin:
110 109 return bin
111 110 t = bin[0]
112 111 if t == '\0':
113 112 return bin
114 113 if t == 'x':
115 114 return _decompress(bin)
116 115 if t == 'u':
117 116 return bin[1:]
118 117 raise RevlogError(_("unknown compression type %r") % t)
119 118
120 119 indexformatv0 = ">4l20s20s20s"
121 120 v0shaoffset = 56
122 121
123 122 class revlogoldio(object):
124 123 def __init__(self):
125 124 self.size = struct.calcsize(indexformatv0)
126 125
127 126 def parseindex(self, data, inline):
128 127 s = self.size
129 128 index = []
130 129 nodemap = {nullid: nullrev}
131 130 n = off = 0
132 131 l = len(data)
133 132 while off + s <= l:
134 133 cur = data[off:off + s]
135 134 off += s
136 135 e = _unpack(indexformatv0, cur)
137 136 # transform to revlogv1 format
138 137 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 138 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 139 index.append(e2)
141 140 nodemap[e[6]] = n
142 141 n += 1
143 142
144 143 # add the magic null revision at -1
145 144 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 145
147 146 return index, nodemap, None
148 147
149 148 def packentry(self, entry, node, version, rev):
150 149 if gettype(entry[0]):
151 150 raise RevlogError(_("index entry flags need RevlogNG"))
152 151 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 152 node(entry[5]), node(entry[6]), entry[7])
154 153 return _pack(indexformatv0, *e2)
155 154
156 155 # index ng:
157 156 # 6 bytes: offset
158 157 # 2 bytes: flags
159 158 # 4 bytes: compressed length
160 159 # 4 bytes: uncompressed length
161 160 # 4 bytes: base rev
162 161 # 4 bytes: link rev
163 162 # 4 bytes: parent 1 rev
164 163 # 4 bytes: parent 2 rev
165 164 # 32 bytes: nodeid
166 165 indexformatng = ">Qiiiiii20s12x"
167 166 ngshaoffset = 32
168 167 versionformat = ">I"
169 168
170 169 class revlogio(object):
171 170 def __init__(self):
172 171 self.size = struct.calcsize(indexformatng)
173 172
174 173 def parseindex(self, data, inline):
175 174 # call the C implementation to parse the index data
176 175 index, cache = parsers.parse_index2(data, inline)
177 176 return index, None, cache
178 177
179 178 def packentry(self, entry, node, version, rev):
180 179 p = _pack(indexformatng, *entry)
181 180 if rev == 0:
182 181 p = _pack(versionformat, version) + p[4:]
183 182 return p
184 183
185 184 class revlog(object):
186 185 """
187 186 the underlying revision storage object
188 187
189 188 A revlog consists of two parts, an index and the revision data.
190 189
191 190 The index is a file with a fixed record size containing
192 191 information on each revision, including its nodeid (hash), the
193 192 nodeids of its parents, the position and offset of its data within
194 193 the data file, and the revision it's based on. Finally, each entry
195 194 contains a linkrev entry that can serve as a pointer to external
196 195 data.
197 196
198 197 The revision data itself is a linear collection of data chunks.
199 198 Each chunk represents a revision and is usually represented as a
200 199 delta against the previous chunk. To bound lookup time, runs of
201 200 deltas are limited to about 2 times the length of the original
202 201 version data. This makes retrieval of a version proportional to
203 202 its size, or O(1) relative to the number of revisions.
204 203
205 204 Both pieces of the revlog are written to in an append-only
206 205 fashion, which means we never need to rewrite a file to insert or
207 206 remove data, and can use some simple techniques to avoid the need
208 207 for locking while reading.
209 208 """
210 def __init__(self, opener, indexfile, shallowroot=None):
209 def __init__(self, opener, indexfile):
211 210 """
212 211 create a revlog object
213 212
214 213 opener is a function that abstracts the file opening operation
215 214 and can be used to implement COW semantics or the like.
216 215 """
217 216 self.indexfile = indexfile
218 217 self.datafile = indexfile[:-2] + ".d"
219 218 self.opener = opener
220 219 self._cache = None
221 220 self._chunkcache = (0, '')
222 221 self.index = []
223 self._shallowroot = shallowroot
224 222 self._pcache = {}
225 223 self._nodecache = {nullid: nullrev}
226 224 self._nodepos = None
227 225
228 226 v = REVLOG_DEFAULT_VERSION
229 227 if hasattr(opener, 'options') and 'defversion' in opener.options:
230 228 v = opener.options['defversion']
231 229 if v & REVLOGNG:
232 230 v |= REVLOGNGINLINEDATA
233 231
234 if shallowroot:
235 v |= REVLOGSHALLOW
236
237 232 i = ''
238 233 try:
239 234 f = self.opener(self.indexfile)
240 235 i = f.read()
241 236 f.close()
242 237 if len(i) > 0:
243 238 v = struct.unpack(versionformat, i[:4])[0]
244 239 except IOError, inst:
245 240 if inst.errno != errno.ENOENT:
246 241 raise
247 242
248 243 self.version = v
249 244 self._inline = v & REVLOGNGINLINEDATA
250 self._shallow = v & REVLOGSHALLOW
251 245 flags = v & ~0xFFFF
252 246 fmt = v & 0xFFFF
253 247 if fmt == REVLOGV0 and flags:
254 248 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 249 % (self.indexfile, flags >> 16))
256 250 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 251 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 252 % (self.indexfile, flags >> 16))
259 253 elif fmt > REVLOGNG:
260 254 raise RevlogError(_("index %s unknown format %d")
261 255 % (self.indexfile, fmt))
262 256
263 257 self._io = revlogio()
264 258 if self.version == REVLOGV0:
265 259 self._io = revlogoldio()
266 260 try:
267 261 d = self._io.parseindex(i, self._inline)
268 262 except (ValueError, IndexError):
269 263 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 264 self.index, nodemap, self._chunkcache = d
271 265 if nodemap is not None:
272 266 self.nodemap = self._nodecache = nodemap
273 267 if not self._chunkcache:
274 268 self._chunkclear()
275 269
276 270 def tip(self):
277 271 return self.node(len(self.index) - 2)
278 272 def __len__(self):
279 273 return len(self.index) - 1
280 274 def __iter__(self):
281 275 for i in xrange(len(self)):
282 276 yield i
283 277
284 278 @util.propertycache
285 279 def nodemap(self):
286 280 self.rev(self.node(0))
287 281 return self._nodecache
288 282
289 283 def rev(self, node):
290 284 try:
291 285 return self._nodecache[node]
292 286 except KeyError:
293 287 n = self._nodecache
294 288 i = self.index
295 289 p = self._nodepos
296 290 if p is None:
297 291 p = len(i) - 2
298 292 for r in xrange(p, -1, -1):
299 293 v = i[r][7]
300 294 n[v] = r
301 295 if v == node:
302 296 self._nodepos = r - 1
303 297 return r
304 298 raise LookupError(node, self.indexfile, _('no node'))
305 299
306 300 def node(self, rev):
307 301 return self.index[rev][7]
308 302 def linkrev(self, rev):
309 303 return self.index[rev][4]
310 304 def parents(self, node):
311 305 i = self.index
312 306 d = i[self.rev(node)]
313 307 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
314 308 def parentrevs(self, rev):
315 309 return self.index[rev][5:7]
316 310 def start(self, rev):
317 311 return int(self.index[rev][0] >> 16)
318 312 def end(self, rev):
319 313 return self.start(rev) + self.length(rev)
320 314 def length(self, rev):
321 315 return self.index[rev][1]
322 316 def base(self, rev):
323 317 return self.index[rev][3]
324 318 def flags(self, rev):
325 319 return self.index[rev][0] & 0xFFFF
326 320 def rawsize(self, rev):
327 321 """return the length of the uncompressed text for a given revision"""
328 322 l = self.index[rev][2]
329 323 if l >= 0:
330 324 return l
331 325
332 326 t = self.revision(self.node(rev))
333 327 return len(t)
334 328 size = rawsize
335 329
336 330 def reachable(self, node, stop=None):
337 331 """return the set of all nodes ancestral to a given node, including
338 332 the node itself, stopping when stop is matched"""
339 333 reachable = set((node,))
340 334 visit = [node]
341 335 if stop:
342 336 stopn = self.rev(stop)
343 337 else:
344 338 stopn = 0
345 339 while visit:
346 340 n = visit.pop(0)
347 341 if n == stop:
348 342 continue
349 343 if n == nullid:
350 344 continue
351 345 for p in self.parents(n):
352 346 if self.rev(p) < stopn:
353 347 continue
354 348 if p not in reachable:
355 349 reachable.add(p)
356 350 visit.append(p)
357 351 return reachable
358 352
359 353 def ancestors(self, *revs):
360 354 """Generate the ancestors of 'revs' in reverse topological order.
361 355
362 356 Yield a sequence of revision numbers starting with the parents
363 357 of each revision in revs, i.e., each revision is *not* considered
364 358 an ancestor of itself. Results are in breadth-first order:
365 359 parents of each rev in revs, then parents of those, etc. Result
366 360 does not include the null revision."""
367 361 visit = list(revs)
368 362 seen = set([nullrev])
369 363 while visit:
370 364 for parent in self.parentrevs(visit.pop(0)):
371 365 if parent not in seen:
372 366 visit.append(parent)
373 367 seen.add(parent)
374 368 yield parent
375 369
376 370 def descendants(self, *revs):
377 371 """Generate the descendants of 'revs' in revision order.
378 372
379 373 Yield a sequence of revision numbers starting with a child of
380 374 some rev in revs, i.e., each revision is *not* considered a
381 375 descendant of itself. Results are ordered by revision number (a
382 376 topological sort)."""
383 377 first = min(revs)
384 378 if first == nullrev:
385 379 for i in self:
386 380 yield i
387 381 return
388 382
389 383 seen = set(revs)
390 384 for i in xrange(first + 1, len(self)):
391 385 for x in self.parentrevs(i):
392 386 if x != nullrev and x in seen:
393 387 seen.add(i)
394 388 yield i
395 389 break
396 390
397 391 def findcommonmissing(self, common=None, heads=None):
398 392 """Return a tuple of the ancestors of common and the ancestors of heads
399 393 that are not ancestors of common.
400 394
401 395 More specifically, the second element is a list of nodes N such that
402 396 every N satisfies the following constraints:
403 397
404 398 1. N is an ancestor of some node in 'heads'
405 399 2. N is not an ancestor of any node in 'common'
406 400
407 401 The list is sorted by revision number, meaning it is
408 402 topologically sorted.
409 403
410 404 'heads' and 'common' are both lists of node IDs. If heads is
411 405 not supplied, uses all of the revlog's heads. If common is not
412 406 supplied, uses nullid."""
413 407 if common is None:
414 408 common = [nullid]
415 409 if heads is None:
416 410 heads = self.heads()
417 411
418 412 common = [self.rev(n) for n in common]
419 413 heads = [self.rev(n) for n in heads]
420 414
421 415 # we want the ancestors, but inclusive
422 416 has = set(self.ancestors(*common))
423 417 has.add(nullrev)
424 418 has.update(common)
425 419
426 420 # take all ancestors from heads that aren't in has
427 421 missing = set()
428 422 visit = [r for r in heads if r not in has]
429 423 while visit:
430 424 r = visit.pop(0)
431 425 if r in missing:
432 426 continue
433 427 else:
434 428 missing.add(r)
435 429 for p in self.parentrevs(r):
436 430 if p not in has:
437 431 visit.append(p)
438 432 missing = list(missing)
439 433 missing.sort()
440 434 return has, [self.node(r) for r in missing]
441 435
442 436 def findmissing(self, common=None, heads=None):
443 437 """Return the ancestors of heads that are not ancestors of common.
444 438
445 439 More specifically, return a list of nodes N such that every N
446 440 satisfies the following constraints:
447 441
448 442 1. N is an ancestor of some node in 'heads'
449 443 2. N is not an ancestor of any node in 'common'
450 444
451 445 The list is sorted by revision number, meaning it is
452 446 topologically sorted.
453 447
454 448 'heads' and 'common' are both lists of node IDs. If heads is
455 449 not supplied, uses all of the revlog's heads. If common is not
456 450 supplied, uses nullid."""
457 451 _common, missing = self.findcommonmissing(common, heads)
458 452 return missing
459 453
460 454 def nodesbetween(self, roots=None, heads=None):
461 455 """Return a topological path from 'roots' to 'heads'.
462 456
463 457 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
464 458 topologically sorted list of all nodes N that satisfy both of
465 459 these constraints:
466 460
467 461 1. N is a descendant of some node in 'roots'
468 462 2. N is an ancestor of some node in 'heads'
469 463
470 464 Every node is considered to be both a descendant and an ancestor
471 465 of itself, so every reachable node in 'roots' and 'heads' will be
472 466 included in 'nodes'.
473 467
474 468 'outroots' is the list of reachable nodes in 'roots', i.e., the
475 469 subset of 'roots' that is returned in 'nodes'. Likewise,
476 470 'outheads' is the subset of 'heads' that is also in 'nodes'.
477 471
478 472 'roots' and 'heads' are both lists of node IDs. If 'roots' is
479 473 unspecified, uses nullid as the only root. If 'heads' is
480 474 unspecified, uses list of all of the revlog's heads."""
481 475 nonodes = ([], [], [])
482 476 if roots is not None:
483 477 roots = list(roots)
484 478 if not roots:
485 479 return nonodes
486 480 lowestrev = min([self.rev(n) for n in roots])
487 481 else:
488 482 roots = [nullid] # Everybody's a descendent of nullid
489 483 lowestrev = nullrev
490 484 if (lowestrev == nullrev) and (heads is None):
491 485 # We want _all_ the nodes!
492 486 return ([self.node(r) for r in self], [nullid], list(self.heads()))
493 487 if heads is None:
494 488 # All nodes are ancestors, so the latest ancestor is the last
495 489 # node.
496 490 highestrev = len(self) - 1
497 491 # Set ancestors to None to signal that every node is an ancestor.
498 492 ancestors = None
499 493 # Set heads to an empty dictionary for later discovery of heads
500 494 heads = {}
501 495 else:
502 496 heads = list(heads)
503 497 if not heads:
504 498 return nonodes
505 499 ancestors = set()
506 500 # Turn heads into a dictionary so we can remove 'fake' heads.
507 501 # Also, later we will be using it to filter out the heads we can't
508 502 # find from roots.
509 503 heads = dict.fromkeys(heads, False)
510 504 # Start at the top and keep marking parents until we're done.
511 505 nodestotag = set(heads)
512 506 # Remember where the top was so we can use it as a limit later.
513 507 highestrev = max([self.rev(n) for n in nodestotag])
514 508 while nodestotag:
515 509 # grab a node to tag
516 510 n = nodestotag.pop()
517 511 # Never tag nullid
518 512 if n == nullid:
519 513 continue
520 514 # A node's revision number represents its place in a
521 515 # topologically sorted list of nodes.
522 516 r = self.rev(n)
523 517 if r >= lowestrev:
524 518 if n not in ancestors:
525 519 # If we are possibly a descendent of one of the roots
526 520 # and we haven't already been marked as an ancestor
527 521 ancestors.add(n) # Mark as ancestor
528 522 # Add non-nullid parents to list of nodes to tag.
529 523 nodestotag.update([p for p in self.parents(n) if
530 524 p != nullid])
531 525 elif n in heads: # We've seen it before, is it a fake head?
532 526 # So it is, real heads should not be the ancestors of
533 527 # any other heads.
534 528 heads.pop(n)
535 529 if not ancestors:
536 530 return nonodes
537 531 # Now that we have our set of ancestors, we want to remove any
538 532 # roots that are not ancestors.
539 533
540 534 # If one of the roots was nullid, everything is included anyway.
541 535 if lowestrev > nullrev:
542 536 # But, since we weren't, let's recompute the lowest rev to not
543 537 # include roots that aren't ancestors.
544 538
545 539 # Filter out roots that aren't ancestors of heads
546 540 roots = [n for n in roots if n in ancestors]
547 541 # Recompute the lowest revision
548 542 if roots:
549 543 lowestrev = min([self.rev(n) for n in roots])
550 544 else:
551 545 # No more roots? Return empty list
552 546 return nonodes
553 547 else:
554 548 # We are descending from nullid, and don't need to care about
555 549 # any other roots.
556 550 lowestrev = nullrev
557 551 roots = [nullid]
558 552 # Transform our roots list into a set.
559 553 descendents = set(roots)
560 554 # Also, keep the original roots so we can filter out roots that aren't
561 555 # 'real' roots (i.e. are descended from other roots).
562 556 roots = descendents.copy()
563 557 # Our topologically sorted list of output nodes.
564 558 orderedout = []
565 559 # Don't start at nullid since we don't want nullid in our output list,
566 560 # and if nullid shows up in descedents, empty parents will look like
567 561 # they're descendents.
568 562 for r in xrange(max(lowestrev, 0), highestrev + 1):
569 563 n = self.node(r)
570 564 isdescendent = False
571 565 if lowestrev == nullrev: # Everybody is a descendent of nullid
572 566 isdescendent = True
573 567 elif n in descendents:
574 568 # n is already a descendent
575 569 isdescendent = True
576 570 # This check only needs to be done here because all the roots
577 571 # will start being marked is descendents before the loop.
578 572 if n in roots:
579 573 # If n was a root, check if it's a 'real' root.
580 574 p = tuple(self.parents(n))
581 575 # If any of its parents are descendents, it's not a root.
582 576 if (p[0] in descendents) or (p[1] in descendents):
583 577 roots.remove(n)
584 578 else:
585 579 p = tuple(self.parents(n))
586 580 # A node is a descendent if either of its parents are
587 581 # descendents. (We seeded the dependents list with the roots
588 582 # up there, remember?)
589 583 if (p[0] in descendents) or (p[1] in descendents):
590 584 descendents.add(n)
591 585 isdescendent = True
592 586 if isdescendent and ((ancestors is None) or (n in ancestors)):
593 587 # Only include nodes that are both descendents and ancestors.
594 588 orderedout.append(n)
595 589 if (ancestors is not None) and (n in heads):
596 590 # We're trying to figure out which heads are reachable
597 591 # from roots.
598 592 # Mark this head as having been reached
599 593 heads[n] = True
600 594 elif ancestors is None:
601 595 # Otherwise, we're trying to discover the heads.
602 596 # Assume this is a head because if it isn't, the next step
603 597 # will eventually remove it.
604 598 heads[n] = True
605 599 # But, obviously its parents aren't.
606 600 for p in self.parents(n):
607 601 heads.pop(p, None)
608 602 heads = [n for n, flag in heads.iteritems() if flag]
609 603 roots = list(roots)
610 604 assert orderedout
611 605 assert roots
612 606 assert heads
613 607 return (orderedout, roots, heads)
614 608
615 609 def headrevs(self):
616 610 count = len(self)
617 611 if not count:
618 612 return [nullrev]
619 613 ishead = [1] * (count + 1)
620 614 index = self.index
621 615 for r in xrange(count):
622 616 e = index[r]
623 617 ishead[e[5]] = ishead[e[6]] = 0
624 618 return [r for r in xrange(count) if ishead[r]]
625 619
626 620 def heads(self, start=None, stop=None):
627 621 """return the list of all nodes that have no children
628 622
629 623 if start is specified, only heads that are descendants of
630 624 start will be returned
631 625 if stop is specified, it will consider all the revs from stop
632 626 as if they had no children
633 627 """
634 628 if start is None and stop is None:
635 629 if not len(self):
636 630 return [nullid]
637 631 return [self.node(r) for r in self.headrevs()]
638 632
639 633 if start is None:
640 634 start = nullid
641 635 if stop is None:
642 636 stop = []
643 637 stoprevs = set([self.rev(n) for n in stop])
644 638 startrev = self.rev(start)
645 639 reachable = set((startrev,))
646 640 heads = set((startrev,))
647 641
648 642 parentrevs = self.parentrevs
649 643 for r in xrange(startrev + 1, len(self)):
650 644 for p in parentrevs(r):
651 645 if p in reachable:
652 646 if r not in stoprevs:
653 647 reachable.add(r)
654 648 heads.add(r)
655 649 if p in heads and p not in stoprevs:
656 650 heads.remove(p)
657 651
658 652 return [self.node(r) for r in heads]
659 653
660 654 def children(self, node):
661 655 """find the children of a given node"""
662 656 c = []
663 657 p = self.rev(node)
664 658 for r in range(p + 1, len(self)):
665 659 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
666 660 if prevs:
667 661 for pr in prevs:
668 662 if pr == p:
669 663 c.append(self.node(r))
670 664 elif p == nullrev:
671 665 c.append(self.node(r))
672 666 return c
673 667
674 668 def descendant(self, start, end):
675 669 if start == nullrev:
676 670 return True
677 671 for i in self.descendants(start):
678 672 if i == end:
679 673 return True
680 674 elif i > end:
681 675 break
682 676 return False
683 677
684 678 def ancestor(self, a, b):
685 679 """calculate the least common ancestor of nodes a and b"""
686 680
687 681 # fast path, check if it is a descendant
688 682 a, b = self.rev(a), self.rev(b)
689 683 start, end = sorted((a, b))
690 684 if self.descendant(start, end):
691 685 return self.node(start)
692 686
693 687 def parents(rev):
694 688 return [p for p in self.parentrevs(rev) if p != nullrev]
695 689
696 690 c = ancestor.ancestor(a, b, parents)
697 691 if c is None:
698 692 return nullid
699 693
700 694 return self.node(c)
701 695
702 696 def _match(self, id):
703 697 if isinstance(id, (long, int)):
704 698 # rev
705 699 return self.node(id)
706 700 if len(id) == 20:
707 701 # possibly a binary node
708 702 # odds of a binary node being all hex in ASCII are 1 in 10**25
709 703 try:
710 704 node = id
711 705 self.rev(node) # quick search the index
712 706 return node
713 707 except LookupError:
714 708 pass # may be partial hex id
715 709 try:
716 710 # str(rev)
717 711 rev = int(id)
718 712 if str(rev) != id:
719 713 raise ValueError
720 714 if rev < 0:
721 715 rev = len(self) + rev
722 716 if rev < 0 or rev >= len(self):
723 717 raise ValueError
724 718 return self.node(rev)
725 719 except (ValueError, OverflowError):
726 720 pass
727 721 if len(id) == 40:
728 722 try:
729 723 # a full hex nodeid?
730 724 node = bin(id)
731 725 self.rev(node)
732 726 return node
733 727 except (TypeError, LookupError):
734 728 pass
735 729
736 730 def _partialmatch(self, id):
737 731 if id in self._pcache:
738 732 return self._pcache[id]
739 733
740 734 if len(id) < 40:
741 735 try:
742 736 # hex(node)[:...]
743 737 l = len(id) // 2 # grab an even number of digits
744 738 prefix = bin(id[:l * 2])
745 739 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
746 740 nl = [n for n in nl if hex(n).startswith(id)]
747 741 if len(nl) > 0:
748 742 if len(nl) == 1:
749 743 self._pcache[id] = nl[0]
750 744 return nl[0]
751 745 raise LookupError(id, self.indexfile,
752 746 _('ambiguous identifier'))
753 747 return None
754 748 except TypeError:
755 749 pass
756 750
757 751 def lookup(self, id):
758 752 """locate a node based on:
759 753 - revision number or str(revision number)
760 754 - nodeid or subset of hex nodeid
761 755 """
762 756 n = self._match(id)
763 757 if n is not None:
764 758 return n
765 759 n = self._partialmatch(id)
766 760 if n:
767 761 return n
768 762
769 763 raise LookupError(id, self.indexfile, _('no match found'))
770 764
771 765 def cmp(self, node, text):
772 766 """compare text with a given file revision
773 767
774 768 returns True if text is different than what is stored.
775 769 """
776 770 p1, p2 = self.parents(node)
777 771 return hash(text, p1, p2) != node
778 772
779 773 def _addchunk(self, offset, data):
780 774 o, d = self._chunkcache
781 775 # try to add to existing cache
782 776 if o + len(d) == offset and len(d) + len(data) < _chunksize:
783 777 self._chunkcache = o, d + data
784 778 else:
785 779 self._chunkcache = offset, data
786 780
787 781 def _loadchunk(self, offset, length):
788 782 if self._inline:
789 783 df = self.opener(self.indexfile)
790 784 else:
791 785 df = self.opener(self.datafile)
792 786
793 787 readahead = max(65536, length)
794 788 df.seek(offset)
795 789 d = df.read(readahead)
796 790 self._addchunk(offset, d)
797 791 if readahead > length:
798 792 return d[:length]
799 793 return d
800 794
801 795 def _getchunk(self, offset, length):
802 796 o, d = self._chunkcache
803 797 l = len(d)
804 798
805 799 # is it in the cache?
806 800 cachestart = offset - o
807 801 cacheend = cachestart + length
808 802 if cachestart >= 0 and cacheend <= l:
809 803 if cachestart == 0 and cacheend == l:
810 804 return d # avoid a copy
811 805 return d[cachestart:cacheend]
812 806
813 807 return self._loadchunk(offset, length)
814 808
815 809 def _chunkraw(self, startrev, endrev):
816 810 start = self.start(startrev)
817 811 length = self.end(endrev) - start
818 812 if self._inline:
819 813 start += (startrev + 1) * self._io.size
820 814 return self._getchunk(start, length)
821 815
822 816 def _chunk(self, rev):
823 817 return decompress(self._chunkraw(rev, rev))
824 818
825 819 def _chunkbase(self, rev):
826 820 return self._chunk(rev)
827 821
828 822 def _chunkclear(self):
829 823 self._chunkcache = (0, '')
830 824
831 825 def deltaparent(self, rev):
832 826 """return deltaparent of the given revision"""
833 827 if self.index[rev][3] == rev:
834 828 return nullrev
835 829 else:
836 830 return rev - 1
837 831
838 832 def revdiff(self, rev1, rev2):
839 833 """return or calculate a delta between two revisions"""
840 834 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
841 835 return self._chunk(rev2)
842 836
843 837 return mdiff.textdiff(self.revision(self.node(rev1)),
844 838 self.revision(self.node(rev2)))
845 839
846 840 def revision(self, node):
847 841 """return an uncompressed revision of a given node"""
848 842 cachedrev = None
849 843 if node == nullid:
850 844 return ""
851 845 if self._cache:
852 846 if self._cache[0] == node:
853 847 return self._cache[2]
854 848 cachedrev = self._cache[1]
855 849
856 850 # look up what we need to read
857 851 text = None
858 852 rev = self.rev(node)
859 853 base = self.base(rev)
860 854
861 855 # check rev flags
862 856 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
863 857 raise RevlogError(_('incompatible revision flag %x') %
864 858 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
865 859
866 860 # build delta chain
867 861 chain = []
868 862 iterrev = rev
869 863 while iterrev != base and iterrev != cachedrev:
870 864 chain.append(iterrev)
871 865 iterrev -= 1
872 866 chain.reverse()
873 867 base = iterrev
874 868
875 869 if iterrev == cachedrev:
876 870 # cache hit
877 871 text = self._cache[2]
878 872
879 873 # drop cache to save memory
880 874 self._cache = None
881 875
882 876 self._chunkraw(base, rev)
883 877 if text is None:
884 878 text = self._chunkbase(base)
885 879
886 880 bins = [self._chunk(r) for r in chain]
887 881 text = mdiff.patches(text, bins)
888 882
889 883 text = self._checkhash(text, node, rev)
890 884
891 885 self._cache = (node, rev, text)
892 886 return text
893 887
894 888 def _checkhash(self, text, node, rev):
895 889 p1, p2 = self.parents(node)
896 890 if node != hash(text, p1, p2):
897 891 raise RevlogError(_("integrity check failed on %s:%d")
898 892 % (self.indexfile, rev))
899 893 return text
900 894
901 895 def checkinlinesize(self, tr, fp=None):
902 896 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
903 897 return
904 898
905 899 trinfo = tr.find(self.indexfile)
906 900 if trinfo is None:
907 901 raise RevlogError(_("%s not found in the transaction")
908 902 % self.indexfile)
909 903
910 904 trindex = trinfo[2]
911 905 dataoff = self.start(trindex)
912 906
913 907 tr.add(self.datafile, dataoff)
914 908
915 909 if fp:
916 910 fp.flush()
917 911 fp.close()
918 912
919 913 df = self.opener(self.datafile, 'w')
920 914 try:
921 915 for r in self:
922 916 df.write(self._chunkraw(r, r))
923 917 finally:
924 918 df.close()
925 919
926 920 fp = self.opener(self.indexfile, 'w', atomictemp=True)
927 921 self.version &= ~(REVLOGNGINLINEDATA)
928 922 self._inline = False
929 923 for i in self:
930 924 e = self._io.packentry(self.index[i], self.node, self.version, i)
931 925 fp.write(e)
932 926
933 927 # if we don't call rename, the temp file will never replace the
934 928 # real index
935 929 fp.rename()
936 930
937 931 tr.replace(self.indexfile, trindex * self._io.size)
938 932 self._chunkclear()
939 933
940 934 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
941 935 """add a revision to the log
942 936
943 937 text - the revision data to add
944 938 transaction - the transaction object used for rollback
945 939 link - the linkrev data to add
946 940 p1, p2 - the parent nodeids of the revision
947 941 cachedelta - an optional precomputed delta
948 942 """
949 943 node = hash(text, p1, p2)
950 944 if node in self.nodemap:
951 945 return node
952 946
953 947 dfh = None
954 948 if not self._inline:
955 949 dfh = self.opener(self.datafile, "a")
956 950 ifh = self.opener(self.indexfile, "a+")
957 951 try:
958 952 return self._addrevision(node, text, transaction, link, p1, p2,
959 953 cachedelta, ifh, dfh)
960 954 finally:
961 955 if dfh:
962 956 dfh.close()
963 957 ifh.close()
964 958
965 959 def _addrevision(self, node, text, transaction, link, p1, p2,
966 960 cachedelta, ifh, dfh):
967 961
968 962 btext = [text]
969 963 def buildtext():
970 964 if btext[0] is not None:
971 965 return btext[0]
972 966 # flush any pending writes here so we can read it in revision
973 967 if dfh:
974 968 dfh.flush()
975 969 ifh.flush()
976 970 basetext = self.revision(self.node(cachedelta[0]))
977 971 btext[0] = mdiff.patch(basetext, cachedelta[1])
978 972 chk = hash(btext[0], p1, p2)
979 973 if chk != node:
980 974 raise RevlogError(_("consistency error in delta"))
981 975 return btext[0]
982 976
983 977 def builddelta(rev):
984 978 # can we use the cached delta?
985 979 if cachedelta and cachedelta[0] == rev:
986 980 delta = cachedelta[1]
987 981 else:
988 982 t = buildtext()
989 983 ptext = self.revision(self.node(rev))
990 984 delta = mdiff.textdiff(ptext, t)
991 985 data = compress(delta)
992 986 l = len(data[1]) + len(data[0])
993 987 base = self.base(rev)
994 988 dist = l + offset - self.start(base)
995 989 return dist, l, data, base
996 990
997 991 curr = len(self)
998 992 prev = curr - 1
999 993 base = curr
1000 994 offset = self.end(prev)
1001 995 flags = 0
1002 996 d = None
1003 997 p1r, p2r = self.rev(p1), self.rev(p2)
1004 998
1005 999 # should we try to build a delta?
1006 1000 if prev != nullrev:
1007 1001 d = builddelta(prev)
1008 1002 dist, l, data, base = d
1009 1003
1010 1004 # full versions are inserted when the needed deltas
1011 1005 # become comparable to the uncompressed text
1012 1006 if text is None:
1013 1007 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1014 1008 cachedelta[1])
1015 1009 else:
1016 1010 textlen = len(text)
1017 1011 if d is None or dist > textlen * 2:
1018 1012 text = buildtext()
1019 1013 data = compress(text)
1020 1014 l = len(data[1]) + len(data[0])
1021 1015 base = curr
1022 1016
1023 1017 e = (offset_type(offset, flags), l, textlen,
1024 1018 base, link, p1r, p2r, node)
1025 1019 self.index.insert(-1, e)
1026 1020 self.nodemap[node] = curr
1027 1021
1028 1022 entry = self._io.packentry(e, self.node, self.version, curr)
1029 1023 if not self._inline:
1030 1024 transaction.add(self.datafile, offset)
1031 1025 transaction.add(self.indexfile, curr * len(entry))
1032 1026 if data[0]:
1033 1027 dfh.write(data[0])
1034 1028 dfh.write(data[1])
1035 1029 dfh.flush()
1036 1030 ifh.write(entry)
1037 1031 else:
1038 1032 offset += curr * self._io.size
1039 1033 transaction.add(self.indexfile, offset, curr)
1040 1034 ifh.write(entry)
1041 1035 ifh.write(data[0])
1042 1036 ifh.write(data[1])
1043 1037 self.checkinlinesize(transaction, ifh)
1044 1038
1045 1039 if type(text) == str: # only accept immutable objects
1046 1040 self._cache = (node, curr, text)
1047 1041 return node
1048 1042
1049 1043 def group(self, nodelist, bundler):
1050 1044 """Calculate a delta group, yielding a sequence of changegroup chunks
1051 1045 (strings).
1052 1046
1053 1047 Given a list of changeset revs, return a set of deltas and
1054 1048 metadata corresponding to nodes. The first delta is
1055 1049 first parent(nodelist[0]) -> nodelist[0], the receiver is
1056 1050 guaranteed to have this parent as it has all history before
1057 1051 these changesets. In the case firstparent is nullrev the
1058 1052 changegroup starts with a full revision.
1059 1053 """
1060 1054
1061 1055 revs = sorted([self.rev(n) for n in nodelist])
1062 1056
1063 1057 # if we don't have any revisions touched by these changesets, bail
1064 1058 if not revs:
1065 1059 yield bundler.close()
1066 1060 return
1067 1061
1068 1062 # add the parent of the first rev
1069 1063 p = self.parentrevs(revs[0])[0]
1070 1064 revs.insert(0, p)
1071 1065
1072 1066 # build deltas
1073 1067 for r in xrange(len(revs) - 1):
1074 1068 prev, curr = revs[r], revs[r + 1]
1075 1069 for c in bundler.revchunk(self, curr, prev):
1076 1070 yield c
1077 1071
1078 1072 yield bundler.close()
1079 1073
1080 1074 def addgroup(self, bundle, linkmapper, transaction):
1081 1075 """
1082 1076 add a delta group
1083 1077
1084 1078 given a set of deltas, add them to the revision log. the
1085 1079 first delta is against its parent, which should be in our
1086 1080 log, the rest are against the previous delta.
1087 1081 """
1088 1082
1089 1083 # track the base of the current delta log
1090 1084 node = None
1091 1085
1092 1086 r = len(self)
1093 1087 end = 0
1094 1088 if r:
1095 1089 end = self.end(r - 1)
1096 1090 ifh = self.opener(self.indexfile, "a+")
1097 1091 isize = r * self._io.size
1098 1092 if self._inline:
1099 1093 transaction.add(self.indexfile, end + isize, r)
1100 1094 dfh = None
1101 1095 else:
1102 1096 transaction.add(self.indexfile, isize, r)
1103 1097 transaction.add(self.datafile, end)
1104 1098 dfh = self.opener(self.datafile, "a")
1105 1099
1106 1100 try:
1107 1101 # loop through our set of deltas
1108 1102 chain = None
1109 1103 while 1:
1110 1104 chunkdata = bundle.deltachunk(chain)
1111 1105 if not chunkdata:
1112 1106 break
1113 1107 node = chunkdata['node']
1114 1108 p1 = chunkdata['p1']
1115 1109 p2 = chunkdata['p2']
1116 1110 cs = chunkdata['cs']
1117 1111 deltabase = chunkdata['deltabase']
1118 1112 delta = chunkdata['delta']
1119 1113
1120 1114 link = linkmapper(cs)
1121 1115 if node in self.nodemap:
1122 1116 # this can happen if two branches make the same change
1123 1117 chain = node
1124 1118 continue
1125 1119
1126 1120 for p in (p1, p2):
1127 1121 if not p in self.nodemap:
1128 1122 raise LookupError(p, self.indexfile,
1129 1123 _('unknown parent'))
1130 1124
1131 1125 if deltabase not in self.nodemap:
1132 1126 raise LookupError(deltabase, self.indexfile,
1133 1127 _('unknown delta base'))
1134 1128
1135 1129 baserev = self.rev(deltabase)
1136 1130 chain = self._addrevision(node, None, transaction, link,
1137 1131 p1, p2, (baserev, delta), ifh, dfh)
1138 1132 if not dfh and not self._inline:
1139 1133 # addrevision switched from inline to conventional
1140 1134 # reopen the index
1141 1135 ifh.close()
1142 1136 dfh = self.opener(self.datafile, "a")
1143 1137 ifh = self.opener(self.indexfile, "a")
1144 1138 finally:
1145 1139 if dfh:
1146 1140 dfh.close()
1147 1141 ifh.close()
1148 1142
1149 1143 return node
1150 1144
1151 1145 def strip(self, minlink, transaction):
1152 1146 """truncate the revlog on the first revision with a linkrev >= minlink
1153 1147
1154 1148 This function is called when we're stripping revision minlink and
1155 1149 its descendants from the repository.
1156 1150
1157 1151 We have to remove all revisions with linkrev >= minlink, because
1158 1152 the equivalent changelog revisions will be renumbered after the
1159 1153 strip.
1160 1154
1161 1155 So we truncate the revlog on the first of these revisions, and
1162 1156 trust that the caller has saved the revisions that shouldn't be
1163 1157 removed and that it'll readd them after this truncation.
1164 1158 """
1165 1159 if len(self) == 0:
1166 1160 return
1167 1161
1168 1162 for rev in self:
1169 1163 if self.index[rev][4] >= minlink:
1170 1164 break
1171 1165 else:
1172 1166 return
1173 1167
1174 1168 # first truncate the files on disk
1175 1169 end = self.start(rev)
1176 1170 if not self._inline:
1177 1171 transaction.add(self.datafile, end)
1178 1172 end = rev * self._io.size
1179 1173 else:
1180 1174 end += rev * self._io.size
1181 1175
1182 1176 transaction.add(self.indexfile, end)
1183 1177
1184 1178 # then reset internal state in memory to forget those revisions
1185 1179 self._cache = None
1186 1180 self._chunkclear()
1187 1181 for x in xrange(rev, len(self)):
1188 1182 del self.nodemap[self.node(x)]
1189 1183
1190 1184 del self.index[rev:-1]
1191 1185
1192 1186 def checksize(self):
1193 1187 expected = 0
1194 1188 if len(self):
1195 1189 expected = max(0, self.end(len(self) - 1))
1196 1190
1197 1191 try:
1198 1192 f = self.opener(self.datafile)
1199 1193 f.seek(0, 2)
1200 1194 actual = f.tell()
1201 1195 f.close()
1202 1196 dd = actual - expected
1203 1197 except IOError, inst:
1204 1198 if inst.errno != errno.ENOENT:
1205 1199 raise
1206 1200 dd = 0
1207 1201
1208 1202 try:
1209 1203 f = self.opener(self.indexfile)
1210 1204 f.seek(0, 2)
1211 1205 actual = f.tell()
1212 1206 f.close()
1213 1207 s = self._io.size
1214 1208 i = max(0, actual // s)
1215 1209 di = actual - (i * s)
1216 1210 if self._inline:
1217 1211 databytes = 0
1218 1212 for r in self:
1219 1213 databytes += max(0, self.length(r))
1220 1214 dd = 0
1221 1215 di = actual - len(self) * s - databytes
1222 1216 except IOError, inst:
1223 1217 if inst.errno != errno.ENOENT:
1224 1218 raise
1225 1219 di = 0
1226 1220
1227 1221 return (dd, di)
1228 1222
1229 1223 def files(self):
1230 1224 res = [self.indexfile]
1231 1225 if not self._inline:
1232 1226 res.append(self.datafile)
1233 1227 return res
General Comments 0
You need to be logged in to leave comments. Login now