##// END OF EJS Templates
revlog: avoid an expensive string copy...
Bryan O'Sullivan -
r16423:a150923b default
parent child Browse files
Show More
@@ -1,1306 +1,1306 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 return _decompress(bin)
116 116 if t == 'u':
117 117 return bin[1:]
118 118 raise RevlogError(_("unknown compression type %r") % t)
119 119
120 120 indexformatv0 = ">4l20s20s20s"
121 121 v0shaoffset = 56
122 122
123 123 class revlogoldio(object):
124 124 def __init__(self):
125 125 self.size = struct.calcsize(indexformatv0)
126 126
127 127 def parseindex(self, data, inline):
128 128 s = self.size
129 129 index = []
130 130 nodemap = {nullid: nullrev}
131 131 n = off = 0
132 132 l = len(data)
133 133 while off + s <= l:
134 134 cur = data[off:off + s]
135 135 off += s
136 136 e = _unpack(indexformatv0, cur)
137 137 # transform to revlogv1 format
138 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 140 index.append(e2)
141 141 nodemap[e[6]] = n
142 142 n += 1
143 143
144 144 # add the magic null revision at -1
145 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 146
147 147 return index, nodemap, None
148 148
149 149 def packentry(self, entry, node, version, rev):
150 150 if gettype(entry[0]):
151 151 raise RevlogError(_("index entry flags need RevlogNG"))
152 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 153 node(entry[5]), node(entry[6]), entry[7])
154 154 return _pack(indexformatv0, *e2)
155 155
156 156 # index ng:
157 157 # 6 bytes: offset
158 158 # 2 bytes: flags
159 159 # 4 bytes: compressed length
160 160 # 4 bytes: uncompressed length
161 161 # 4 bytes: base rev
162 162 # 4 bytes: link rev
163 163 # 4 bytes: parent 1 rev
164 164 # 4 bytes: parent 2 rev
165 165 # 32 bytes: nodeid
166 166 indexformatng = ">Qiiiiii20s12x"
167 167 ngshaoffset = 32
168 168 versionformat = ">I"
169 169
170 170 class revlogio(object):
171 171 def __init__(self):
172 172 self.size = struct.calcsize(indexformatng)
173 173
174 174 def parseindex(self, data, inline):
175 175 # call the C implementation to parse the index data
176 176 index, cache = parsers.parse_index2(data, inline)
177 177 return index, getattr(index, 'nodemap', None), cache
178 178
179 179 def packentry(self, entry, node, version, rev):
180 180 p = _pack(indexformatng, *entry)
181 181 if rev == 0:
182 182 p = _pack(versionformat, version) + p[4:]
183 183 return p
184 184
185 185 class revlog(object):
186 186 """
187 187 the underlying revision storage object
188 188
189 189 A revlog consists of two parts, an index and the revision data.
190 190
191 191 The index is a file with a fixed record size containing
192 192 information on each revision, including its nodeid (hash), the
193 193 nodeids of its parents, the position and offset of its data within
194 194 the data file, and the revision it's based on. Finally, each entry
195 195 contains a linkrev entry that can serve as a pointer to external
196 196 data.
197 197
198 198 The revision data itself is a linear collection of data chunks.
199 199 Each chunk represents a revision and is usually represented as a
200 200 delta against the previous chunk. To bound lookup time, runs of
201 201 deltas are limited to about 2 times the length of the original
202 202 version data. This makes retrieval of a version proportional to
203 203 its size, or O(1) relative to the number of revisions.
204 204
205 205 Both pieces of the revlog are written to in an append-only
206 206 fashion, which means we never need to rewrite a file to insert or
207 207 remove data, and can use some simple techniques to avoid the need
208 208 for locking while reading.
209 209 """
210 210 def __init__(self, opener, indexfile):
211 211 """
212 212 create a revlog object
213 213
214 214 opener is a function that abstracts the file opening operation
215 215 and can be used to implement COW semantics or the like.
216 216 """
217 217 self.indexfile = indexfile
218 218 self.datafile = indexfile[:-2] + ".d"
219 219 self.opener = opener
220 220 self._cache = None
221 221 self._basecache = (0, 0)
222 222 self._chunkcache = (0, '')
223 223 self.index = []
224 224 self._pcache = {}
225 225 self._nodecache = {nullid: nullrev}
226 226 self._nodepos = None
227 227
228 228 v = REVLOG_DEFAULT_VERSION
229 229 opts = getattr(opener, 'options', None)
230 230 if opts is not None:
231 231 if 'revlogv1' in opts:
232 232 if 'generaldelta' in opts:
233 233 v |= REVLOGGENERALDELTA
234 234 else:
235 235 v = 0
236 236
237 237 i = ''
238 238 self._initempty = True
239 239 try:
240 240 f = self.opener(self.indexfile)
241 241 i = f.read()
242 242 f.close()
243 243 if len(i) > 0:
244 244 v = struct.unpack(versionformat, i[:4])[0]
245 245 self._initempty = False
246 246 except IOError, inst:
247 247 if inst.errno != errno.ENOENT:
248 248 raise
249 249
250 250 self.version = v
251 251 self._inline = v & REVLOGNGINLINEDATA
252 252 self._generaldelta = v & REVLOGGENERALDELTA
253 253 flags = v & ~0xFFFF
254 254 fmt = v & 0xFFFF
255 255 if fmt == REVLOGV0 and flags:
256 256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
257 257 % (self.indexfile, flags >> 16))
258 258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
259 259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt > REVLOGNG:
262 262 raise RevlogError(_("index %s unknown format %d")
263 263 % (self.indexfile, fmt))
264 264
265 265 self._io = revlogio()
266 266 if self.version == REVLOGV0:
267 267 self._io = revlogoldio()
268 268 try:
269 269 d = self._io.parseindex(i, self._inline)
270 270 except (ValueError, IndexError):
271 271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
272 272 self.index, nodemap, self._chunkcache = d
273 273 if nodemap is not None:
274 274 self.nodemap = self._nodecache = nodemap
275 275 if not self._chunkcache:
276 276 self._chunkclear()
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __len__(self):
281 281 return len(self.index) - 1
282 282 def __iter__(self):
283 283 for i in xrange(len(self)):
284 284 yield i
285 285
286 286 @util.propertycache
287 287 def nodemap(self):
288 288 self.rev(self.node(0))
289 289 return self._nodecache
290 290
291 291 def hasnode(self, node):
292 292 try:
293 293 self.rev(node)
294 294 return True
295 295 except KeyError:
296 296 return False
297 297
298 298 def clearcaches(self):
299 299 try:
300 300 self._nodecache.clearcaches()
301 301 except AttributeError:
302 302 self._nodecache = {nullid: nullrev}
303 303 self._nodepos = None
304 304
305 305 def rev(self, node):
306 306 try:
307 307 return self._nodecache[node]
308 308 except RevlogError:
309 309 # parsers.c radix tree lookup failed
310 310 raise LookupError(node, self.indexfile, _('no node'))
311 311 except KeyError:
312 312 # pure python cache lookup failed
313 313 n = self._nodecache
314 314 i = self.index
315 315 p = self._nodepos
316 316 if p is None:
317 317 p = len(i) - 2
318 318 for r in xrange(p, -1, -1):
319 319 v = i[r][7]
320 320 n[v] = r
321 321 if v == node:
322 322 self._nodepos = r - 1
323 323 return r
324 324 raise LookupError(node, self.indexfile, _('no node'))
325 325
326 326 def node(self, rev):
327 327 return self.index[rev][7]
328 328 def linkrev(self, rev):
329 329 return self.index[rev][4]
330 330 def parents(self, node):
331 331 i = self.index
332 332 d = i[self.rev(node)]
333 333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
334 334 def parentrevs(self, rev):
335 335 return self.index[rev][5:7]
336 336 def start(self, rev):
337 337 return int(self.index[rev][0] >> 16)
338 338 def end(self, rev):
339 339 return self.start(rev) + self.length(rev)
340 340 def length(self, rev):
341 341 return self.index[rev][1]
342 342 def chainbase(self, rev):
343 343 index = self.index
344 344 base = index[rev][3]
345 345 while base != rev:
346 346 rev = base
347 347 base = index[rev][3]
348 348 return base
349 349 def flags(self, rev):
350 350 return self.index[rev][0] & 0xFFFF
351 351 def rawsize(self, rev):
352 352 """return the length of the uncompressed text for a given revision"""
353 353 l = self.index[rev][2]
354 354 if l >= 0:
355 355 return l
356 356
357 357 t = self.revision(self.node(rev))
358 358 return len(t)
359 359 size = rawsize
360 360
361 361 def reachable(self, node, stop=None):
362 362 """return the set of all nodes ancestral to a given node, including
363 363 the node itself, stopping when stop is matched"""
364 364 reachable = set((node,))
365 365 visit = [node]
366 366 if stop:
367 367 stopn = self.rev(stop)
368 368 else:
369 369 stopn = 0
370 370 while visit:
371 371 n = visit.pop(0)
372 372 if n == stop:
373 373 continue
374 374 if n == nullid:
375 375 continue
376 376 for p in self.parents(n):
377 377 if self.rev(p) < stopn:
378 378 continue
379 379 if p not in reachable:
380 380 reachable.add(p)
381 381 visit.append(p)
382 382 return reachable
383 383
384 384 def ancestors(self, *revs):
385 385 """Generate the ancestors of 'revs' in reverse topological order.
386 386
387 387 Yield a sequence of revision numbers starting with the parents
388 388 of each revision in revs, i.e., each revision is *not* considered
389 389 an ancestor of itself. Results are in breadth-first order:
390 390 parents of each rev in revs, then parents of those, etc. Result
391 391 does not include the null revision."""
392 392 visit = list(revs)
393 393 seen = set([nullrev])
394 394 while visit:
395 395 for parent in self.parentrevs(visit.pop(0)):
396 396 if parent not in seen:
397 397 visit.append(parent)
398 398 seen.add(parent)
399 399 yield parent
400 400
401 401 def descendants(self, *revs):
402 402 """Generate the descendants of 'revs' in revision order.
403 403
404 404 Yield a sequence of revision numbers starting with a child of
405 405 some rev in revs, i.e., each revision is *not* considered a
406 406 descendant of itself. Results are ordered by revision number (a
407 407 topological sort)."""
408 408 first = min(revs)
409 409 if first == nullrev:
410 410 for i in self:
411 411 yield i
412 412 return
413 413
414 414 seen = set(revs)
415 415 for i in xrange(first + 1, len(self)):
416 416 for x in self.parentrevs(i):
417 417 if x != nullrev and x in seen:
418 418 seen.add(i)
419 419 yield i
420 420 break
421 421
422 422 def findcommonmissing(self, common=None, heads=None):
423 423 """Return a tuple of the ancestors of common and the ancestors of heads
424 424 that are not ancestors of common. In revset terminology, we return the
425 425 tuple:
426 426
427 427 ::common, (::heads) - (::common)
428 428
429 429 The list is sorted by revision number, meaning it is
430 430 topologically sorted.
431 431
432 432 'heads' and 'common' are both lists of node IDs. If heads is
433 433 not supplied, uses all of the revlog's heads. If common is not
434 434 supplied, uses nullid."""
435 435 if common is None:
436 436 common = [nullid]
437 437 if heads is None:
438 438 heads = self.heads()
439 439
440 440 common = [self.rev(n) for n in common]
441 441 heads = [self.rev(n) for n in heads]
442 442
443 443 # we want the ancestors, but inclusive
444 444 has = set(self.ancestors(*common))
445 445 has.add(nullrev)
446 446 has.update(common)
447 447
448 448 # take all ancestors from heads that aren't in has
449 449 missing = set()
450 450 visit = [r for r in heads if r not in has]
451 451 while visit:
452 452 r = visit.pop(0)
453 453 if r in missing:
454 454 continue
455 455 else:
456 456 missing.add(r)
457 457 for p in self.parentrevs(r):
458 458 if p not in has:
459 459 visit.append(p)
460 460 missing = list(missing)
461 461 missing.sort()
462 462 return has, [self.node(r) for r in missing]
463 463
464 464 def findmissing(self, common=None, heads=None):
465 465 """Return the ancestors of heads that are not ancestors of common.
466 466
467 467 More specifically, return a list of nodes N such that every N
468 468 satisfies the following constraints:
469 469
470 470 1. N is an ancestor of some node in 'heads'
471 471 2. N is not an ancestor of any node in 'common'
472 472
473 473 The list is sorted by revision number, meaning it is
474 474 topologically sorted.
475 475
476 476 'heads' and 'common' are both lists of node IDs. If heads is
477 477 not supplied, uses all of the revlog's heads. If common is not
478 478 supplied, uses nullid."""
479 479 _common, missing = self.findcommonmissing(common, heads)
480 480 return missing
481 481
482 482 def nodesbetween(self, roots=None, heads=None):
483 483 """Return a topological path from 'roots' to 'heads'.
484 484
485 485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
486 486 topologically sorted list of all nodes N that satisfy both of
487 487 these constraints:
488 488
489 489 1. N is a descendant of some node in 'roots'
490 490 2. N is an ancestor of some node in 'heads'
491 491
492 492 Every node is considered to be both a descendant and an ancestor
493 493 of itself, so every reachable node in 'roots' and 'heads' will be
494 494 included in 'nodes'.
495 495
496 496 'outroots' is the list of reachable nodes in 'roots', i.e., the
497 497 subset of 'roots' that is returned in 'nodes'. Likewise,
498 498 'outheads' is the subset of 'heads' that is also in 'nodes'.
499 499
500 500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
501 501 unspecified, uses nullid as the only root. If 'heads' is
502 502 unspecified, uses list of all of the revlog's heads."""
503 503 nonodes = ([], [], [])
504 504 if roots is not None:
505 505 roots = list(roots)
506 506 if not roots:
507 507 return nonodes
508 508 lowestrev = min([self.rev(n) for n in roots])
509 509 else:
510 510 roots = [nullid] # Everybody's a descendant of nullid
511 511 lowestrev = nullrev
512 512 if (lowestrev == nullrev) and (heads is None):
513 513 # We want _all_ the nodes!
514 514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
515 515 if heads is None:
516 516 # All nodes are ancestors, so the latest ancestor is the last
517 517 # node.
518 518 highestrev = len(self) - 1
519 519 # Set ancestors to None to signal that every node is an ancestor.
520 520 ancestors = None
521 521 # Set heads to an empty dictionary for later discovery of heads
522 522 heads = {}
523 523 else:
524 524 heads = list(heads)
525 525 if not heads:
526 526 return nonodes
527 527 ancestors = set()
528 528 # Turn heads into a dictionary so we can remove 'fake' heads.
529 529 # Also, later we will be using it to filter out the heads we can't
530 530 # find from roots.
531 531 heads = dict.fromkeys(heads, False)
532 532 # Start at the top and keep marking parents until we're done.
533 533 nodestotag = set(heads)
534 534 # Remember where the top was so we can use it as a limit later.
535 535 highestrev = max([self.rev(n) for n in nodestotag])
536 536 while nodestotag:
537 537 # grab a node to tag
538 538 n = nodestotag.pop()
539 539 # Never tag nullid
540 540 if n == nullid:
541 541 continue
542 542 # A node's revision number represents its place in a
543 543 # topologically sorted list of nodes.
544 544 r = self.rev(n)
545 545 if r >= lowestrev:
546 546 if n not in ancestors:
547 547 # If we are possibly a descendant of one of the roots
548 548 # and we haven't already been marked as an ancestor
549 549 ancestors.add(n) # Mark as ancestor
550 550 # Add non-nullid parents to list of nodes to tag.
551 551 nodestotag.update([p for p in self.parents(n) if
552 552 p != nullid])
553 553 elif n in heads: # We've seen it before, is it a fake head?
554 554 # So it is, real heads should not be the ancestors of
555 555 # any other heads.
556 556 heads.pop(n)
557 557 if not ancestors:
558 558 return nonodes
559 559 # Now that we have our set of ancestors, we want to remove any
560 560 # roots that are not ancestors.
561 561
562 562 # If one of the roots was nullid, everything is included anyway.
563 563 if lowestrev > nullrev:
564 564 # But, since we weren't, let's recompute the lowest rev to not
565 565 # include roots that aren't ancestors.
566 566
567 567 # Filter out roots that aren't ancestors of heads
568 568 roots = [n for n in roots if n in ancestors]
569 569 # Recompute the lowest revision
570 570 if roots:
571 571 lowestrev = min([self.rev(n) for n in roots])
572 572 else:
573 573 # No more roots? Return empty list
574 574 return nonodes
575 575 else:
576 576 # We are descending from nullid, and don't need to care about
577 577 # any other roots.
578 578 lowestrev = nullrev
579 579 roots = [nullid]
580 580 # Transform our roots list into a set.
581 581 descendants = set(roots)
582 582 # Also, keep the original roots so we can filter out roots that aren't
583 583 # 'real' roots (i.e. are descended from other roots).
584 584 roots = descendants.copy()
585 585 # Our topologically sorted list of output nodes.
586 586 orderedout = []
587 587 # Don't start at nullid since we don't want nullid in our output list,
588 588 # and if nullid shows up in descedents, empty parents will look like
589 589 # they're descendants.
590 590 for r in xrange(max(lowestrev, 0), highestrev + 1):
591 591 n = self.node(r)
592 592 isdescendant = False
593 593 if lowestrev == nullrev: # Everybody is a descendant of nullid
594 594 isdescendant = True
595 595 elif n in descendants:
596 596 # n is already a descendant
597 597 isdescendant = True
598 598 # This check only needs to be done here because all the roots
599 599 # will start being marked is descendants before the loop.
600 600 if n in roots:
601 601 # If n was a root, check if it's a 'real' root.
602 602 p = tuple(self.parents(n))
603 603 # If any of its parents are descendants, it's not a root.
604 604 if (p[0] in descendants) or (p[1] in descendants):
605 605 roots.remove(n)
606 606 else:
607 607 p = tuple(self.parents(n))
608 608 # A node is a descendant if either of its parents are
609 609 # descendants. (We seeded the dependents list with the roots
610 610 # up there, remember?)
611 611 if (p[0] in descendants) or (p[1] in descendants):
612 612 descendants.add(n)
613 613 isdescendant = True
614 614 if isdescendant and ((ancestors is None) or (n in ancestors)):
615 615 # Only include nodes that are both descendants and ancestors.
616 616 orderedout.append(n)
617 617 if (ancestors is not None) and (n in heads):
618 618 # We're trying to figure out which heads are reachable
619 619 # from roots.
620 620 # Mark this head as having been reached
621 621 heads[n] = True
622 622 elif ancestors is None:
623 623 # Otherwise, we're trying to discover the heads.
624 624 # Assume this is a head because if it isn't, the next step
625 625 # will eventually remove it.
626 626 heads[n] = True
627 627 # But, obviously its parents aren't.
628 628 for p in self.parents(n):
629 629 heads.pop(p, None)
630 630 heads = [n for n, flag in heads.iteritems() if flag]
631 631 roots = list(roots)
632 632 assert orderedout
633 633 assert roots
634 634 assert heads
635 635 return (orderedout, roots, heads)
636 636
637 637 def headrevs(self):
638 638 count = len(self)
639 639 if not count:
640 640 return [nullrev]
641 641 ishead = [1] * (count + 1)
642 642 index = self.index
643 643 for r in xrange(count):
644 644 e = index[r]
645 645 ishead[e[5]] = ishead[e[6]] = 0
646 646 return [r for r in xrange(count) if ishead[r]]
647 647
648 648 def heads(self, start=None, stop=None):
649 649 """return the list of all nodes that have no children
650 650
651 651 if start is specified, only heads that are descendants of
652 652 start will be returned
653 653 if stop is specified, it will consider all the revs from stop
654 654 as if they had no children
655 655 """
656 656 if start is None and stop is None:
657 657 if not len(self):
658 658 return [nullid]
659 659 return [self.node(r) for r in self.headrevs()]
660 660
661 661 if start is None:
662 662 start = nullid
663 663 if stop is None:
664 664 stop = []
665 665 stoprevs = set([self.rev(n) for n in stop])
666 666 startrev = self.rev(start)
667 667 reachable = set((startrev,))
668 668 heads = set((startrev,))
669 669
670 670 parentrevs = self.parentrevs
671 671 for r in xrange(startrev + 1, len(self)):
672 672 for p in parentrevs(r):
673 673 if p in reachable:
674 674 if r not in stoprevs:
675 675 reachable.add(r)
676 676 heads.add(r)
677 677 if p in heads and p not in stoprevs:
678 678 heads.remove(p)
679 679
680 680 return [self.node(r) for r in heads]
681 681
682 682 def children(self, node):
683 683 """find the children of a given node"""
684 684 c = []
685 685 p = self.rev(node)
686 686 for r in range(p + 1, len(self)):
687 687 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
688 688 if prevs:
689 689 for pr in prevs:
690 690 if pr == p:
691 691 c.append(self.node(r))
692 692 elif p == nullrev:
693 693 c.append(self.node(r))
694 694 return c
695 695
696 696 def descendant(self, start, end):
697 697 if start == nullrev:
698 698 return True
699 699 for i in self.descendants(start):
700 700 if i == end:
701 701 return True
702 702 elif i > end:
703 703 break
704 704 return False
705 705
706 706 def ancestor(self, a, b):
707 707 """calculate the least common ancestor of nodes a and b"""
708 708
709 709 # fast path, check if it is a descendant
710 710 a, b = self.rev(a), self.rev(b)
711 711 start, end = sorted((a, b))
712 712 if self.descendant(start, end):
713 713 return self.node(start)
714 714
715 715 def parents(rev):
716 716 return [p for p in self.parentrevs(rev) if p != nullrev]
717 717
718 718 c = ancestor.ancestor(a, b, parents)
719 719 if c is None:
720 720 return nullid
721 721
722 722 return self.node(c)
723 723
724 724 def _match(self, id):
725 725 if isinstance(id, (long, int)):
726 726 # rev
727 727 return self.node(id)
728 728 if len(id) == 20:
729 729 # possibly a binary node
730 730 # odds of a binary node being all hex in ASCII are 1 in 10**25
731 731 try:
732 732 node = id
733 733 self.rev(node) # quick search the index
734 734 return node
735 735 except LookupError:
736 736 pass # may be partial hex id
737 737 try:
738 738 # str(rev)
739 739 rev = int(id)
740 740 if str(rev) != id:
741 741 raise ValueError
742 742 if rev < 0:
743 743 rev = len(self) + rev
744 744 if rev < 0 or rev >= len(self):
745 745 raise ValueError
746 746 return self.node(rev)
747 747 except (ValueError, OverflowError):
748 748 pass
749 749 if len(id) == 40:
750 750 try:
751 751 # a full hex nodeid?
752 752 node = bin(id)
753 753 self.rev(node)
754 754 return node
755 755 except (TypeError, LookupError):
756 756 pass
757 757
758 758 def _partialmatch(self, id):
759 759 if id in self._pcache:
760 760 return self._pcache[id]
761 761
762 762 if len(id) < 40:
763 763 try:
764 764 # hex(node)[:...]
765 765 l = len(id) // 2 # grab an even number of digits
766 766 prefix = bin(id[:l * 2])
767 767 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
768 768 nl = [n for n in nl if hex(n).startswith(id)]
769 769 if len(nl) > 0:
770 770 if len(nl) == 1:
771 771 self._pcache[id] = nl[0]
772 772 return nl[0]
773 773 raise LookupError(id, self.indexfile,
774 774 _('ambiguous identifier'))
775 775 return None
776 776 except TypeError:
777 777 pass
778 778
779 779 def lookup(self, id):
780 780 """locate a node based on:
781 781 - revision number or str(revision number)
782 782 - nodeid or subset of hex nodeid
783 783 """
784 784 n = self._match(id)
785 785 if n is not None:
786 786 return n
787 787 n = self._partialmatch(id)
788 788 if n:
789 789 return n
790 790
791 791 raise LookupError(id, self.indexfile, _('no match found'))
792 792
793 793 def cmp(self, node, text):
794 794 """compare text with a given file revision
795 795
796 796 returns True if text is different than what is stored.
797 797 """
798 798 p1, p2 = self.parents(node)
799 799 return hash(text, p1, p2) != node
800 800
801 801 def _addchunk(self, offset, data):
802 802 o, d = self._chunkcache
803 803 # try to add to existing cache
804 804 if o + len(d) == offset and len(d) + len(data) < _chunksize:
805 805 self._chunkcache = o, d + data
806 806 else:
807 807 self._chunkcache = offset, data
808 808
809 809 def _loadchunk(self, offset, length):
810 810 if self._inline:
811 811 df = self.opener(self.indexfile)
812 812 else:
813 813 df = self.opener(self.datafile)
814 814
815 815 readahead = max(_chunksize, length)
816 816 df.seek(offset)
817 817 d = df.read(readahead)
818 818 df.close()
819 819 self._addchunk(offset, d)
820 820 if readahead > length:
821 return d[:length]
821 return util.buffer(d, 0, length)
822 822 return d
823 823
824 824 def _getchunk(self, offset, length):
825 825 o, d = self._chunkcache
826 826 l = len(d)
827 827
828 828 # is it in the cache?
829 829 cachestart = offset - o
830 830 cacheend = cachestart + length
831 831 if cachestart >= 0 and cacheend <= l:
832 832 if cachestart == 0 and cacheend == l:
833 833 return d # avoid a copy
834 return d[cachestart:cacheend]
834 return util.buffer(d, cachestart, cacheend - cachestart)
835 835
836 836 return self._loadchunk(offset, length)
837 837
838 838 def _chunkraw(self, startrev, endrev):
839 839 start = self.start(startrev)
840 840 length = self.end(endrev) - start
841 841 if self._inline:
842 842 start += (startrev + 1) * self._io.size
843 843 return self._getchunk(start, length)
844 844
845 845 def _chunk(self, rev):
846 846 return decompress(self._chunkraw(rev, rev))
847 847
848 848 def _chunkbase(self, rev):
849 849 return self._chunk(rev)
850 850
851 851 def _chunkclear(self):
852 852 self._chunkcache = (0, '')
853 853
854 854 def deltaparent(self, rev):
855 855 """return deltaparent of the given revision"""
856 856 base = self.index[rev][3]
857 857 if base == rev:
858 858 return nullrev
859 859 elif self._generaldelta:
860 860 return base
861 861 else:
862 862 return rev - 1
863 863
864 864 def revdiff(self, rev1, rev2):
865 865 """return or calculate a delta between two revisions"""
866 866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
867 return self._chunk(rev2)
867 return str(self._chunk(rev2))
868 868
869 869 return mdiff.textdiff(self.revision(self.node(rev1)),
870 870 self.revision(self.node(rev2)))
871 871
872 872 def revision(self, nodeorrev):
873 873 """return an uncompressed revision of a given node or"""
874 874 if isinstance(nodeorrev, int):
875 875 rev = nodeorrev
876 876 node = self.node(rev)
877 877 else:
878 878 node = nodeorrev
879 879 rev = None
880 880
881 881 cachedrev = None
882 882 if node == nullid:
883 883 return ""
884 884 if self._cache:
885 885 if self._cache[0] == node:
886 886 return self._cache[2]
887 887 cachedrev = self._cache[1]
888 888
889 889 # look up what we need to read
890 890 text = None
891 891 if rev is None:
892 892 rev = self.rev(node)
893 893
894 894 # check rev flags
895 895 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
896 896 raise RevlogError(_('incompatible revision flag %x') %
897 897 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
898 898
899 899 # build delta chain
900 900 chain = []
901 901 index = self.index # for performance
902 902 generaldelta = self._generaldelta
903 903 iterrev = rev
904 904 e = index[iterrev]
905 905 while iterrev != e[3] and iterrev != cachedrev:
906 906 chain.append(iterrev)
907 907 if generaldelta:
908 908 iterrev = e[3]
909 909 else:
910 910 iterrev -= 1
911 911 e = index[iterrev]
912 912 chain.reverse()
913 913 base = iterrev
914 914
915 915 if iterrev == cachedrev:
916 916 # cache hit
917 917 text = self._cache[2]
918 918
919 919 # drop cache to save memory
920 920 self._cache = None
921 921
922 922 self._chunkraw(base, rev)
923 923 if text is None:
924 text = self._chunkbase(base)
924 text = str(self._chunkbase(base))
925 925
926 926 bins = [self._chunk(r) for r in chain]
927 927 text = mdiff.patches(text, bins)
928 928
929 929 text = self._checkhash(text, node, rev)
930 930
931 931 self._cache = (node, rev, text)
932 932 return text
933 933
934 934 def _checkhash(self, text, node, rev):
935 935 p1, p2 = self.parents(node)
936 936 if node != hash(text, p1, p2):
937 937 raise RevlogError(_("integrity check failed on %s:%d")
938 938 % (self.indexfile, rev))
939 939 return text
940 940
941 941 def checkinlinesize(self, tr, fp=None):
942 942 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
943 943 return
944 944
945 945 trinfo = tr.find(self.indexfile)
946 946 if trinfo is None:
947 947 raise RevlogError(_("%s not found in the transaction")
948 948 % self.indexfile)
949 949
950 950 trindex = trinfo[2]
951 951 dataoff = self.start(trindex)
952 952
953 953 tr.add(self.datafile, dataoff)
954 954
955 955 if fp:
956 956 fp.flush()
957 957 fp.close()
958 958
959 959 df = self.opener(self.datafile, 'w')
960 960 try:
961 961 for r in self:
962 962 df.write(self._chunkraw(r, r))
963 963 finally:
964 964 df.close()
965 965
966 966 fp = self.opener(self.indexfile, 'w', atomictemp=True)
967 967 self.version &= ~(REVLOGNGINLINEDATA)
968 968 self._inline = False
969 969 for i in self:
970 970 e = self._io.packentry(self.index[i], self.node, self.version, i)
971 971 fp.write(e)
972 972
973 973 # if we don't call close, the temp file will never replace the
974 974 # real index
975 975 fp.close()
976 976
977 977 tr.replace(self.indexfile, trindex * self._io.size)
978 978 self._chunkclear()
979 979
980 980 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
981 981 """add a revision to the log
982 982
983 983 text - the revision data to add
984 984 transaction - the transaction object used for rollback
985 985 link - the linkrev data to add
986 986 p1, p2 - the parent nodeids of the revision
987 987 cachedelta - an optional precomputed delta
988 988 """
989 989 node = hash(text, p1, p2)
990 990 if node in self.nodemap:
991 991 return node
992 992
993 993 dfh = None
994 994 if not self._inline:
995 995 dfh = self.opener(self.datafile, "a")
996 996 ifh = self.opener(self.indexfile, "a+")
997 997 try:
998 998 return self._addrevision(node, text, transaction, link, p1, p2,
999 999 cachedelta, ifh, dfh)
1000 1000 finally:
1001 1001 if dfh:
1002 1002 dfh.close()
1003 1003 ifh.close()
1004 1004
1005 1005 def _addrevision(self, node, text, transaction, link, p1, p2,
1006 1006 cachedelta, ifh, dfh):
1007 1007 """internal function to add revisions to the log
1008 1008
1009 1009 see addrevision for argument descriptions.
1010 1010 invariants:
1011 1011 - text is optional (can be None); if not set, cachedelta must be set.
1012 1012 if both are set, they must correspond to eachother.
1013 1013 """
1014 1014 btext = [text]
1015 1015 def buildtext():
1016 1016 if btext[0] is not None:
1017 1017 return btext[0]
1018 1018 # flush any pending writes here so we can read it in revision
1019 1019 if dfh:
1020 1020 dfh.flush()
1021 1021 ifh.flush()
1022 1022 basetext = self.revision(self.node(cachedelta[0]))
1023 1023 btext[0] = mdiff.patch(basetext, cachedelta[1])
1024 1024 chk = hash(btext[0], p1, p2)
1025 1025 if chk != node:
1026 1026 raise RevlogError(_("consistency error in delta"))
1027 1027 return btext[0]
1028 1028
1029 1029 def builddelta(rev):
1030 1030 # can we use the cached delta?
1031 1031 if cachedelta and cachedelta[0] == rev:
1032 1032 delta = cachedelta[1]
1033 1033 else:
1034 1034 t = buildtext()
1035 1035 ptext = self.revision(self.node(rev))
1036 1036 delta = mdiff.textdiff(ptext, t)
1037 1037 data = compress(delta)
1038 1038 l = len(data[1]) + len(data[0])
1039 1039 if basecache[0] == rev:
1040 1040 chainbase = basecache[1]
1041 1041 else:
1042 1042 chainbase = self.chainbase(rev)
1043 1043 dist = l + offset - self.start(chainbase)
1044 1044 if self._generaldelta:
1045 1045 base = rev
1046 1046 else:
1047 1047 base = chainbase
1048 1048 return dist, l, data, base, chainbase
1049 1049
1050 1050 curr = len(self)
1051 1051 prev = curr - 1
1052 1052 base = chainbase = curr
1053 1053 offset = self.end(prev)
1054 1054 flags = 0
1055 1055 d = None
1056 1056 basecache = self._basecache
1057 1057 p1r, p2r = self.rev(p1), self.rev(p2)
1058 1058
1059 1059 # should we try to build a delta?
1060 1060 if prev != nullrev:
1061 1061 if self._generaldelta:
1062 1062 if p1r >= basecache[1]:
1063 1063 d = builddelta(p1r)
1064 1064 elif p2r >= basecache[1]:
1065 1065 d = builddelta(p2r)
1066 1066 else:
1067 1067 d = builddelta(prev)
1068 1068 else:
1069 1069 d = builddelta(prev)
1070 1070 dist, l, data, base, chainbase = d
1071 1071
1072 1072 # full versions are inserted when the needed deltas
1073 1073 # become comparable to the uncompressed text
1074 1074 if text is None:
1075 1075 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1076 1076 cachedelta[1])
1077 1077 else:
1078 1078 textlen = len(text)
1079 1079 if d is None or dist > textlen * 2:
1080 1080 text = buildtext()
1081 1081 data = compress(text)
1082 1082 l = len(data[1]) + len(data[0])
1083 1083 base = chainbase = curr
1084 1084
1085 1085 e = (offset_type(offset, flags), l, textlen,
1086 1086 base, link, p1r, p2r, node)
1087 1087 self.index.insert(-1, e)
1088 1088 self.nodemap[node] = curr
1089 1089
1090 1090 entry = self._io.packentry(e, self.node, self.version, curr)
1091 1091 if not self._inline:
1092 1092 transaction.add(self.datafile, offset)
1093 1093 transaction.add(self.indexfile, curr * len(entry))
1094 1094 if data[0]:
1095 1095 dfh.write(data[0])
1096 1096 dfh.write(data[1])
1097 1097 dfh.flush()
1098 1098 ifh.write(entry)
1099 1099 else:
1100 1100 offset += curr * self._io.size
1101 1101 transaction.add(self.indexfile, offset, curr)
1102 1102 ifh.write(entry)
1103 1103 ifh.write(data[0])
1104 1104 ifh.write(data[1])
1105 1105 self.checkinlinesize(transaction, ifh)
1106 1106
1107 1107 if type(text) == str: # only accept immutable objects
1108 1108 self._cache = (node, curr, text)
1109 1109 self._basecache = (curr, chainbase)
1110 1110 return node
1111 1111
1112 1112 def group(self, nodelist, bundler, reorder=None):
1113 1113 """Calculate a delta group, yielding a sequence of changegroup chunks
1114 1114 (strings).
1115 1115
1116 1116 Given a list of changeset revs, return a set of deltas and
1117 1117 metadata corresponding to nodes. The first delta is
1118 1118 first parent(nodelist[0]) -> nodelist[0], the receiver is
1119 1119 guaranteed to have this parent as it has all history before
1120 1120 these changesets. In the case firstparent is nullrev the
1121 1121 changegroup starts with a full revision.
1122 1122 """
1123 1123
1124 1124 # if we don't have any revisions touched by these changesets, bail
1125 1125 if len(nodelist) == 0:
1126 1126 yield bundler.close()
1127 1127 return
1128 1128
1129 1129 # for generaldelta revlogs, we linearize the revs; this will both be
1130 1130 # much quicker and generate a much smaller bundle
1131 1131 if (self._generaldelta and reorder is not False) or reorder:
1132 1132 dag = dagutil.revlogdag(self)
1133 1133 revs = set(self.rev(n) for n in nodelist)
1134 1134 revs = dag.linearize(revs)
1135 1135 else:
1136 1136 revs = sorted([self.rev(n) for n in nodelist])
1137 1137
1138 1138 # add the parent of the first rev
1139 1139 p = self.parentrevs(revs[0])[0]
1140 1140 revs.insert(0, p)
1141 1141
1142 1142 # build deltas
1143 1143 for r in xrange(len(revs) - 1):
1144 1144 prev, curr = revs[r], revs[r + 1]
1145 1145 for c in bundler.revchunk(self, curr, prev):
1146 1146 yield c
1147 1147
1148 1148 yield bundler.close()
1149 1149
1150 1150 def addgroup(self, bundle, linkmapper, transaction):
1151 1151 """
1152 1152 add a delta group
1153 1153
1154 1154 given a set of deltas, add them to the revision log. the
1155 1155 first delta is against its parent, which should be in our
1156 1156 log, the rest are against the previous delta.
1157 1157 """
1158 1158
1159 1159 # track the base of the current delta log
1160 1160 content = []
1161 1161 node = None
1162 1162
1163 1163 r = len(self)
1164 1164 end = 0
1165 1165 if r:
1166 1166 end = self.end(r - 1)
1167 1167 ifh = self.opener(self.indexfile, "a+")
1168 1168 isize = r * self._io.size
1169 1169 if self._inline:
1170 1170 transaction.add(self.indexfile, end + isize, r)
1171 1171 dfh = None
1172 1172 else:
1173 1173 transaction.add(self.indexfile, isize, r)
1174 1174 transaction.add(self.datafile, end)
1175 1175 dfh = self.opener(self.datafile, "a")
1176 1176
1177 1177 try:
1178 1178 # loop through our set of deltas
1179 1179 chain = None
1180 1180 while True:
1181 1181 chunkdata = bundle.deltachunk(chain)
1182 1182 if not chunkdata:
1183 1183 break
1184 1184 node = chunkdata['node']
1185 1185 p1 = chunkdata['p1']
1186 1186 p2 = chunkdata['p2']
1187 1187 cs = chunkdata['cs']
1188 1188 deltabase = chunkdata['deltabase']
1189 1189 delta = chunkdata['delta']
1190 1190
1191 1191 content.append(node)
1192 1192
1193 1193 link = linkmapper(cs)
1194 1194 if node in self.nodemap:
1195 1195 # this can happen if two branches make the same change
1196 1196 chain = node
1197 1197 continue
1198 1198
1199 1199 for p in (p1, p2):
1200 1200 if not p in self.nodemap:
1201 1201 raise LookupError(p, self.indexfile,
1202 1202 _('unknown parent'))
1203 1203
1204 1204 if deltabase not in self.nodemap:
1205 1205 raise LookupError(deltabase, self.indexfile,
1206 1206 _('unknown delta base'))
1207 1207
1208 1208 baserev = self.rev(deltabase)
1209 1209 chain = self._addrevision(node, None, transaction, link,
1210 1210 p1, p2, (baserev, delta), ifh, dfh)
1211 1211 if not dfh and not self._inline:
1212 1212 # addrevision switched from inline to conventional
1213 1213 # reopen the index
1214 1214 ifh.close()
1215 1215 dfh = self.opener(self.datafile, "a")
1216 1216 ifh = self.opener(self.indexfile, "a")
1217 1217 finally:
1218 1218 if dfh:
1219 1219 dfh.close()
1220 1220 ifh.close()
1221 1221
1222 1222 return content
1223 1223
1224 1224 def strip(self, minlink, transaction):
1225 1225 """truncate the revlog on the first revision with a linkrev >= minlink
1226 1226
1227 1227 This function is called when we're stripping revision minlink and
1228 1228 its descendants from the repository.
1229 1229
1230 1230 We have to remove all revisions with linkrev >= minlink, because
1231 1231 the equivalent changelog revisions will be renumbered after the
1232 1232 strip.
1233 1233
1234 1234 So we truncate the revlog on the first of these revisions, and
1235 1235 trust that the caller has saved the revisions that shouldn't be
1236 1236 removed and that it'll re-add them after this truncation.
1237 1237 """
1238 1238 if len(self) == 0:
1239 1239 return
1240 1240
1241 1241 for rev in self:
1242 1242 if self.index[rev][4] >= minlink:
1243 1243 break
1244 1244 else:
1245 1245 return
1246 1246
1247 1247 # first truncate the files on disk
1248 1248 end = self.start(rev)
1249 1249 if not self._inline:
1250 1250 transaction.add(self.datafile, end)
1251 1251 end = rev * self._io.size
1252 1252 else:
1253 1253 end += rev * self._io.size
1254 1254
1255 1255 transaction.add(self.indexfile, end)
1256 1256
1257 1257 # then reset internal state in memory to forget those revisions
1258 1258 self._cache = None
1259 1259 self._chunkclear()
1260 1260 for x in xrange(rev, len(self)):
1261 1261 del self.nodemap[self.node(x)]
1262 1262
1263 1263 del self.index[rev:-1]
1264 1264
1265 1265 def checksize(self):
1266 1266 expected = 0
1267 1267 if len(self):
1268 1268 expected = max(0, self.end(len(self) - 1))
1269 1269
1270 1270 try:
1271 1271 f = self.opener(self.datafile)
1272 1272 f.seek(0, 2)
1273 1273 actual = f.tell()
1274 1274 f.close()
1275 1275 dd = actual - expected
1276 1276 except IOError, inst:
1277 1277 if inst.errno != errno.ENOENT:
1278 1278 raise
1279 1279 dd = 0
1280 1280
1281 1281 try:
1282 1282 f = self.opener(self.indexfile)
1283 1283 f.seek(0, 2)
1284 1284 actual = f.tell()
1285 1285 f.close()
1286 1286 s = self._io.size
1287 1287 i = max(0, actual // s)
1288 1288 di = actual - (i * s)
1289 1289 if self._inline:
1290 1290 databytes = 0
1291 1291 for r in self:
1292 1292 databytes += max(0, self.length(r))
1293 1293 dd = 0
1294 1294 di = actual - len(self) * s - databytes
1295 1295 except IOError, inst:
1296 1296 if inst.errno != errno.ENOENT:
1297 1297 raise
1298 1298 di = 0
1299 1299
1300 1300 return (dd, di)
1301 1301
1302 1302 def files(self):
1303 1303 res = [self.indexfile]
1304 1304 if not self._inline:
1305 1305 res.append(self.datafile)
1306 1306 return res
General Comments 0
You need to be logged in to leave comments. Login now