##// END OF EJS Templates
revlog: improve delta generation heuristics for generaldelta...
Sune Foldager -
r14301:f9499376 default
parent child Browse files
Show More
@@ -1,1265 +1,1270
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 return _decompress(bin)
116 116 if t == 'u':
117 117 return bin[1:]
118 118 raise RevlogError(_("unknown compression type %r") % t)
119 119
120 120 indexformatv0 = ">4l20s20s20s"
121 121 v0shaoffset = 56
122 122
123 123 class revlogoldio(object):
124 124 def __init__(self):
125 125 self.size = struct.calcsize(indexformatv0)
126 126
127 127 def parseindex(self, data, inline):
128 128 s = self.size
129 129 index = []
130 130 nodemap = {nullid: nullrev}
131 131 n = off = 0
132 132 l = len(data)
133 133 while off + s <= l:
134 134 cur = data[off:off + s]
135 135 off += s
136 136 e = _unpack(indexformatv0, cur)
137 137 # transform to revlogv1 format
138 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 140 index.append(e2)
141 141 nodemap[e[6]] = n
142 142 n += 1
143 143
144 144 # add the magic null revision at -1
145 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 146
147 147 return index, nodemap, None
148 148
149 149 def packentry(self, entry, node, version, rev):
150 150 if gettype(entry[0]):
151 151 raise RevlogError(_("index entry flags need RevlogNG"))
152 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 153 node(entry[5]), node(entry[6]), entry[7])
154 154 return _pack(indexformatv0, *e2)
155 155
156 156 # index ng:
157 157 # 6 bytes: offset
158 158 # 2 bytes: flags
159 159 # 4 bytes: compressed length
160 160 # 4 bytes: uncompressed length
161 161 # 4 bytes: base rev
162 162 # 4 bytes: link rev
163 163 # 4 bytes: parent 1 rev
164 164 # 4 bytes: parent 2 rev
165 165 # 32 bytes: nodeid
166 166 indexformatng = ">Qiiiiii20s12x"
167 167 ngshaoffset = 32
168 168 versionformat = ">I"
169 169
170 170 class revlogio(object):
171 171 def __init__(self):
172 172 self.size = struct.calcsize(indexformatng)
173 173
174 174 def parseindex(self, data, inline):
175 175 # call the C implementation to parse the index data
176 176 index, cache = parsers.parse_index2(data, inline)
177 177 return index, None, cache
178 178
179 179 def packentry(self, entry, node, version, rev):
180 180 p = _pack(indexformatng, *entry)
181 181 if rev == 0:
182 182 p = _pack(versionformat, version) + p[4:]
183 183 return p
184 184
185 185 class revlog(object):
186 186 """
187 187 the underlying revision storage object
188 188
189 189 A revlog consists of two parts, an index and the revision data.
190 190
191 191 The index is a file with a fixed record size containing
192 192 information on each revision, including its nodeid (hash), the
193 193 nodeids of its parents, the position and offset of its data within
194 194 the data file, and the revision it's based on. Finally, each entry
195 195 contains a linkrev entry that can serve as a pointer to external
196 196 data.
197 197
198 198 The revision data itself is a linear collection of data chunks.
199 199 Each chunk represents a revision and is usually represented as a
200 200 delta against the previous chunk. To bound lookup time, runs of
201 201 deltas are limited to about 2 times the length of the original
202 202 version data. This makes retrieval of a version proportional to
203 203 its size, or O(1) relative to the number of revisions.
204 204
205 205 Both pieces of the revlog are written to in an append-only
206 206 fashion, which means we never need to rewrite a file to insert or
207 207 remove data, and can use some simple techniques to avoid the need
208 208 for locking while reading.
209 209 """
210 210 def __init__(self, opener, indexfile):
211 211 """
212 212 create a revlog object
213 213
214 214 opener is a function that abstracts the file opening operation
215 215 and can be used to implement COW semantics or the like.
216 216 """
217 217 self.indexfile = indexfile
218 218 self.datafile = indexfile[:-2] + ".d"
219 219 self.opener = opener
220 220 self._cache = None
221 221 self._basecache = (0, 0)
222 222 self._chunkcache = (0, '')
223 223 self.index = []
224 224 self._pcache = {}
225 225 self._nodecache = {nullid: nullrev}
226 226 self._nodepos = None
227 227
228 228 v = REVLOG_DEFAULT_VERSION
229 229 if hasattr(opener, 'options'):
230 230 if 'defversion' in opener.options:
231 231 v = opener.options['defversion']
232 232 if v & REVLOGNG:
233 233 v |= REVLOGNGINLINEDATA
234 234 if v & REVLOGNG and 'generaldelta' in opener.options:
235 235 v |= REVLOGGENERALDELTA
236 236
237 237 i = ''
238 238 try:
239 239 f = self.opener(self.indexfile)
240 240 i = f.read()
241 241 f.close()
242 242 if len(i) > 0:
243 243 v = struct.unpack(versionformat, i[:4])[0]
244 244 except IOError, inst:
245 245 if inst.errno != errno.ENOENT:
246 246 raise
247 247
248 248 self.version = v
249 249 self._inline = v & REVLOGNGINLINEDATA
250 250 self._generaldelta = v & REVLOGGENERALDELTA
251 251 flags = v & ~0xFFFF
252 252 fmt = v & 0xFFFF
253 253 if fmt == REVLOGV0 and flags:
254 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 255 % (self.indexfile, flags >> 16))
256 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt > REVLOGNG:
260 260 raise RevlogError(_("index %s unknown format %d")
261 261 % (self.indexfile, fmt))
262 262
263 263 self._io = revlogio()
264 264 if self.version == REVLOGV0:
265 265 self._io = revlogoldio()
266 266 try:
267 267 d = self._io.parseindex(i, self._inline)
268 268 except (ValueError, IndexError):
269 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 270 self.index, nodemap, self._chunkcache = d
271 271 if nodemap is not None:
272 272 self.nodemap = self._nodecache = nodemap
273 273 if not self._chunkcache:
274 274 self._chunkclear()
275 275
276 276 def tip(self):
277 277 return self.node(len(self.index) - 2)
278 278 def __len__(self):
279 279 return len(self.index) - 1
280 280 def __iter__(self):
281 281 for i in xrange(len(self)):
282 282 yield i
283 283
284 284 @util.propertycache
285 285 def nodemap(self):
286 286 self.rev(self.node(0))
287 287 return self._nodecache
288 288
289 289 def rev(self, node):
290 290 try:
291 291 return self._nodecache[node]
292 292 except KeyError:
293 293 n = self._nodecache
294 294 i = self.index
295 295 p = self._nodepos
296 296 if p is None:
297 297 p = len(i) - 2
298 298 for r in xrange(p, -1, -1):
299 299 v = i[r][7]
300 300 n[v] = r
301 301 if v == node:
302 302 self._nodepos = r - 1
303 303 return r
304 304 raise LookupError(node, self.indexfile, _('no node'))
305 305
306 306 def node(self, rev):
307 307 return self.index[rev][7]
308 308 def linkrev(self, rev):
309 309 return self.index[rev][4]
310 310 def parents(self, node):
311 311 i = self.index
312 312 d = i[self.rev(node)]
313 313 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
314 314 def parentrevs(self, rev):
315 315 return self.index[rev][5:7]
316 316 def start(self, rev):
317 317 return int(self.index[rev][0] >> 16)
318 318 def end(self, rev):
319 319 return self.start(rev) + self.length(rev)
320 320 def length(self, rev):
321 321 return self.index[rev][1]
322 322 def chainbase(self, rev):
323 323 index = self.index
324 324 base = index[rev][3]
325 325 while base != rev:
326 326 rev = base
327 327 base = index[rev][3]
328 328 return base
329 329 def flags(self, rev):
330 330 return self.index[rev][0] & 0xFFFF
331 331 def rawsize(self, rev):
332 332 """return the length of the uncompressed text for a given revision"""
333 333 l = self.index[rev][2]
334 334 if l >= 0:
335 335 return l
336 336
337 337 t = self.revision(self.node(rev))
338 338 return len(t)
339 339 size = rawsize
340 340
341 341 def reachable(self, node, stop=None):
342 342 """return the set of all nodes ancestral to a given node, including
343 343 the node itself, stopping when stop is matched"""
344 344 reachable = set((node,))
345 345 visit = [node]
346 346 if stop:
347 347 stopn = self.rev(stop)
348 348 else:
349 349 stopn = 0
350 350 while visit:
351 351 n = visit.pop(0)
352 352 if n == stop:
353 353 continue
354 354 if n == nullid:
355 355 continue
356 356 for p in self.parents(n):
357 357 if self.rev(p) < stopn:
358 358 continue
359 359 if p not in reachable:
360 360 reachable.add(p)
361 361 visit.append(p)
362 362 return reachable
363 363
364 364 def ancestors(self, *revs):
365 365 """Generate the ancestors of 'revs' in reverse topological order.
366 366
367 367 Yield a sequence of revision numbers starting with the parents
368 368 of each revision in revs, i.e., each revision is *not* considered
369 369 an ancestor of itself. Results are in breadth-first order:
370 370 parents of each rev in revs, then parents of those, etc. Result
371 371 does not include the null revision."""
372 372 visit = list(revs)
373 373 seen = set([nullrev])
374 374 while visit:
375 375 for parent in self.parentrevs(visit.pop(0)):
376 376 if parent not in seen:
377 377 visit.append(parent)
378 378 seen.add(parent)
379 379 yield parent
380 380
381 381 def descendants(self, *revs):
382 382 """Generate the descendants of 'revs' in revision order.
383 383
384 384 Yield a sequence of revision numbers starting with a child of
385 385 some rev in revs, i.e., each revision is *not* considered a
386 386 descendant of itself. Results are ordered by revision number (a
387 387 topological sort)."""
388 388 first = min(revs)
389 389 if first == nullrev:
390 390 for i in self:
391 391 yield i
392 392 return
393 393
394 394 seen = set(revs)
395 395 for i in xrange(first + 1, len(self)):
396 396 for x in self.parentrevs(i):
397 397 if x != nullrev and x in seen:
398 398 seen.add(i)
399 399 yield i
400 400 break
401 401
402 402 def findcommonmissing(self, common=None, heads=None):
403 403 """Return a tuple of the ancestors of common and the ancestors of heads
404 404 that are not ancestors of common.
405 405
406 406 More specifically, the second element is a list of nodes N such that
407 407 every N satisfies the following constraints:
408 408
409 409 1. N is an ancestor of some node in 'heads'
410 410 2. N is not an ancestor of any node in 'common'
411 411
412 412 The list is sorted by revision number, meaning it is
413 413 topologically sorted.
414 414
415 415 'heads' and 'common' are both lists of node IDs. If heads is
416 416 not supplied, uses all of the revlog's heads. If common is not
417 417 supplied, uses nullid."""
418 418 if common is None:
419 419 common = [nullid]
420 420 if heads is None:
421 421 heads = self.heads()
422 422
423 423 common = [self.rev(n) for n in common]
424 424 heads = [self.rev(n) for n in heads]
425 425
426 426 # we want the ancestors, but inclusive
427 427 has = set(self.ancestors(*common))
428 428 has.add(nullrev)
429 429 has.update(common)
430 430
431 431 # take all ancestors from heads that aren't in has
432 432 missing = set()
433 433 visit = [r for r in heads if r not in has]
434 434 while visit:
435 435 r = visit.pop(0)
436 436 if r in missing:
437 437 continue
438 438 else:
439 439 missing.add(r)
440 440 for p in self.parentrevs(r):
441 441 if p not in has:
442 442 visit.append(p)
443 443 missing = list(missing)
444 444 missing.sort()
445 445 return has, [self.node(r) for r in missing]
446 446
447 447 def findmissing(self, common=None, heads=None):
448 448 """Return the ancestors of heads that are not ancestors of common.
449 449
450 450 More specifically, return a list of nodes N such that every N
451 451 satisfies the following constraints:
452 452
453 453 1. N is an ancestor of some node in 'heads'
454 454 2. N is not an ancestor of any node in 'common'
455 455
456 456 The list is sorted by revision number, meaning it is
457 457 topologically sorted.
458 458
459 459 'heads' and 'common' are both lists of node IDs. If heads is
460 460 not supplied, uses all of the revlog's heads. If common is not
461 461 supplied, uses nullid."""
462 462 _common, missing = self.findcommonmissing(common, heads)
463 463 return missing
464 464
465 465 def nodesbetween(self, roots=None, heads=None):
466 466 """Return a topological path from 'roots' to 'heads'.
467 467
468 468 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
469 469 topologically sorted list of all nodes N that satisfy both of
470 470 these constraints:
471 471
472 472 1. N is a descendant of some node in 'roots'
473 473 2. N is an ancestor of some node in 'heads'
474 474
475 475 Every node is considered to be both a descendant and an ancestor
476 476 of itself, so every reachable node in 'roots' and 'heads' will be
477 477 included in 'nodes'.
478 478
479 479 'outroots' is the list of reachable nodes in 'roots', i.e., the
480 480 subset of 'roots' that is returned in 'nodes'. Likewise,
481 481 'outheads' is the subset of 'heads' that is also in 'nodes'.
482 482
483 483 'roots' and 'heads' are both lists of node IDs. If 'roots' is
484 484 unspecified, uses nullid as the only root. If 'heads' is
485 485 unspecified, uses list of all of the revlog's heads."""
486 486 nonodes = ([], [], [])
487 487 if roots is not None:
488 488 roots = list(roots)
489 489 if not roots:
490 490 return nonodes
491 491 lowestrev = min([self.rev(n) for n in roots])
492 492 else:
493 493 roots = [nullid] # Everybody's a descendent of nullid
494 494 lowestrev = nullrev
495 495 if (lowestrev == nullrev) and (heads is None):
496 496 # We want _all_ the nodes!
497 497 return ([self.node(r) for r in self], [nullid], list(self.heads()))
498 498 if heads is None:
499 499 # All nodes are ancestors, so the latest ancestor is the last
500 500 # node.
501 501 highestrev = len(self) - 1
502 502 # Set ancestors to None to signal that every node is an ancestor.
503 503 ancestors = None
504 504 # Set heads to an empty dictionary for later discovery of heads
505 505 heads = {}
506 506 else:
507 507 heads = list(heads)
508 508 if not heads:
509 509 return nonodes
510 510 ancestors = set()
511 511 # Turn heads into a dictionary so we can remove 'fake' heads.
512 512 # Also, later we will be using it to filter out the heads we can't
513 513 # find from roots.
514 514 heads = dict.fromkeys(heads, False)
515 515 # Start at the top and keep marking parents until we're done.
516 516 nodestotag = set(heads)
517 517 # Remember where the top was so we can use it as a limit later.
518 518 highestrev = max([self.rev(n) for n in nodestotag])
519 519 while nodestotag:
520 520 # grab a node to tag
521 521 n = nodestotag.pop()
522 522 # Never tag nullid
523 523 if n == nullid:
524 524 continue
525 525 # A node's revision number represents its place in a
526 526 # topologically sorted list of nodes.
527 527 r = self.rev(n)
528 528 if r >= lowestrev:
529 529 if n not in ancestors:
530 530 # If we are possibly a descendent of one of the roots
531 531 # and we haven't already been marked as an ancestor
532 532 ancestors.add(n) # Mark as ancestor
533 533 # Add non-nullid parents to list of nodes to tag.
534 534 nodestotag.update([p for p in self.parents(n) if
535 535 p != nullid])
536 536 elif n in heads: # We've seen it before, is it a fake head?
537 537 # So it is, real heads should not be the ancestors of
538 538 # any other heads.
539 539 heads.pop(n)
540 540 if not ancestors:
541 541 return nonodes
542 542 # Now that we have our set of ancestors, we want to remove any
543 543 # roots that are not ancestors.
544 544
545 545 # If one of the roots was nullid, everything is included anyway.
546 546 if lowestrev > nullrev:
547 547 # But, since we weren't, let's recompute the lowest rev to not
548 548 # include roots that aren't ancestors.
549 549
550 550 # Filter out roots that aren't ancestors of heads
551 551 roots = [n for n in roots if n in ancestors]
552 552 # Recompute the lowest revision
553 553 if roots:
554 554 lowestrev = min([self.rev(n) for n in roots])
555 555 else:
556 556 # No more roots? Return empty list
557 557 return nonodes
558 558 else:
559 559 # We are descending from nullid, and don't need to care about
560 560 # any other roots.
561 561 lowestrev = nullrev
562 562 roots = [nullid]
563 563 # Transform our roots list into a set.
564 564 descendents = set(roots)
565 565 # Also, keep the original roots so we can filter out roots that aren't
566 566 # 'real' roots (i.e. are descended from other roots).
567 567 roots = descendents.copy()
568 568 # Our topologically sorted list of output nodes.
569 569 orderedout = []
570 570 # Don't start at nullid since we don't want nullid in our output list,
571 571 # and if nullid shows up in descedents, empty parents will look like
572 572 # they're descendents.
573 573 for r in xrange(max(lowestrev, 0), highestrev + 1):
574 574 n = self.node(r)
575 575 isdescendent = False
576 576 if lowestrev == nullrev: # Everybody is a descendent of nullid
577 577 isdescendent = True
578 578 elif n in descendents:
579 579 # n is already a descendent
580 580 isdescendent = True
581 581 # This check only needs to be done here because all the roots
582 582 # will start being marked is descendents before the loop.
583 583 if n in roots:
584 584 # If n was a root, check if it's a 'real' root.
585 585 p = tuple(self.parents(n))
586 586 # If any of its parents are descendents, it's not a root.
587 587 if (p[0] in descendents) or (p[1] in descendents):
588 588 roots.remove(n)
589 589 else:
590 590 p = tuple(self.parents(n))
591 591 # A node is a descendent if either of its parents are
592 592 # descendents. (We seeded the dependents list with the roots
593 593 # up there, remember?)
594 594 if (p[0] in descendents) or (p[1] in descendents):
595 595 descendents.add(n)
596 596 isdescendent = True
597 597 if isdescendent and ((ancestors is None) or (n in ancestors)):
598 598 # Only include nodes that are both descendents and ancestors.
599 599 orderedout.append(n)
600 600 if (ancestors is not None) and (n in heads):
601 601 # We're trying to figure out which heads are reachable
602 602 # from roots.
603 603 # Mark this head as having been reached
604 604 heads[n] = True
605 605 elif ancestors is None:
606 606 # Otherwise, we're trying to discover the heads.
607 607 # Assume this is a head because if it isn't, the next step
608 608 # will eventually remove it.
609 609 heads[n] = True
610 610 # But, obviously its parents aren't.
611 611 for p in self.parents(n):
612 612 heads.pop(p, None)
613 613 heads = [n for n, flag in heads.iteritems() if flag]
614 614 roots = list(roots)
615 615 assert orderedout
616 616 assert roots
617 617 assert heads
618 618 return (orderedout, roots, heads)
619 619
620 620 def headrevs(self):
621 621 count = len(self)
622 622 if not count:
623 623 return [nullrev]
624 624 ishead = [1] * (count + 1)
625 625 index = self.index
626 626 for r in xrange(count):
627 627 e = index[r]
628 628 ishead[e[5]] = ishead[e[6]] = 0
629 629 return [r for r in xrange(count) if ishead[r]]
630 630
631 631 def heads(self, start=None, stop=None):
632 632 """return the list of all nodes that have no children
633 633
634 634 if start is specified, only heads that are descendants of
635 635 start will be returned
636 636 if stop is specified, it will consider all the revs from stop
637 637 as if they had no children
638 638 """
639 639 if start is None and stop is None:
640 640 if not len(self):
641 641 return [nullid]
642 642 return [self.node(r) for r in self.headrevs()]
643 643
644 644 if start is None:
645 645 start = nullid
646 646 if stop is None:
647 647 stop = []
648 648 stoprevs = set([self.rev(n) for n in stop])
649 649 startrev = self.rev(start)
650 650 reachable = set((startrev,))
651 651 heads = set((startrev,))
652 652
653 653 parentrevs = self.parentrevs
654 654 for r in xrange(startrev + 1, len(self)):
655 655 for p in parentrevs(r):
656 656 if p in reachable:
657 657 if r not in stoprevs:
658 658 reachable.add(r)
659 659 heads.add(r)
660 660 if p in heads and p not in stoprevs:
661 661 heads.remove(p)
662 662
663 663 return [self.node(r) for r in heads]
664 664
665 665 def children(self, node):
666 666 """find the children of a given node"""
667 667 c = []
668 668 p = self.rev(node)
669 669 for r in range(p + 1, len(self)):
670 670 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
671 671 if prevs:
672 672 for pr in prevs:
673 673 if pr == p:
674 674 c.append(self.node(r))
675 675 elif p == nullrev:
676 676 c.append(self.node(r))
677 677 return c
678 678
679 679 def descendant(self, start, end):
680 680 if start == nullrev:
681 681 return True
682 682 for i in self.descendants(start):
683 683 if i == end:
684 684 return True
685 685 elif i > end:
686 686 break
687 687 return False
688 688
689 689 def ancestor(self, a, b):
690 690 """calculate the least common ancestor of nodes a and b"""
691 691
692 692 # fast path, check if it is a descendant
693 693 a, b = self.rev(a), self.rev(b)
694 694 start, end = sorted((a, b))
695 695 if self.descendant(start, end):
696 696 return self.node(start)
697 697
698 698 def parents(rev):
699 699 return [p for p in self.parentrevs(rev) if p != nullrev]
700 700
701 701 c = ancestor.ancestor(a, b, parents)
702 702 if c is None:
703 703 return nullid
704 704
705 705 return self.node(c)
706 706
707 707 def _match(self, id):
708 708 if isinstance(id, (long, int)):
709 709 # rev
710 710 return self.node(id)
711 711 if len(id) == 20:
712 712 # possibly a binary node
713 713 # odds of a binary node being all hex in ASCII are 1 in 10**25
714 714 try:
715 715 node = id
716 716 self.rev(node) # quick search the index
717 717 return node
718 718 except LookupError:
719 719 pass # may be partial hex id
720 720 try:
721 721 # str(rev)
722 722 rev = int(id)
723 723 if str(rev) != id:
724 724 raise ValueError
725 725 if rev < 0:
726 726 rev = len(self) + rev
727 727 if rev < 0 or rev >= len(self):
728 728 raise ValueError
729 729 return self.node(rev)
730 730 except (ValueError, OverflowError):
731 731 pass
732 732 if len(id) == 40:
733 733 try:
734 734 # a full hex nodeid?
735 735 node = bin(id)
736 736 self.rev(node)
737 737 return node
738 738 except (TypeError, LookupError):
739 739 pass
740 740
741 741 def _partialmatch(self, id):
742 742 if id in self._pcache:
743 743 return self._pcache[id]
744 744
745 745 if len(id) < 40:
746 746 try:
747 747 # hex(node)[:...]
748 748 l = len(id) // 2 # grab an even number of digits
749 749 prefix = bin(id[:l * 2])
750 750 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
751 751 nl = [n for n in nl if hex(n).startswith(id)]
752 752 if len(nl) > 0:
753 753 if len(nl) == 1:
754 754 self._pcache[id] = nl[0]
755 755 return nl[0]
756 756 raise LookupError(id, self.indexfile,
757 757 _('ambiguous identifier'))
758 758 return None
759 759 except TypeError:
760 760 pass
761 761
762 762 def lookup(self, id):
763 763 """locate a node based on:
764 764 - revision number or str(revision number)
765 765 - nodeid or subset of hex nodeid
766 766 """
767 767 n = self._match(id)
768 768 if n is not None:
769 769 return n
770 770 n = self._partialmatch(id)
771 771 if n:
772 772 return n
773 773
774 774 raise LookupError(id, self.indexfile, _('no match found'))
775 775
776 776 def cmp(self, node, text):
777 777 """compare text with a given file revision
778 778
779 779 returns True if text is different than what is stored.
780 780 """
781 781 p1, p2 = self.parents(node)
782 782 return hash(text, p1, p2) != node
783 783
784 784 def _addchunk(self, offset, data):
785 785 o, d = self._chunkcache
786 786 # try to add to existing cache
787 787 if o + len(d) == offset and len(d) + len(data) < _chunksize:
788 788 self._chunkcache = o, d + data
789 789 else:
790 790 self._chunkcache = offset, data
791 791
792 792 def _loadchunk(self, offset, length):
793 793 if self._inline:
794 794 df = self.opener(self.indexfile)
795 795 else:
796 796 df = self.opener(self.datafile)
797 797
798 798 readahead = max(65536, length)
799 799 df.seek(offset)
800 800 d = df.read(readahead)
801 801 self._addchunk(offset, d)
802 802 if readahead > length:
803 803 return d[:length]
804 804 return d
805 805
806 806 def _getchunk(self, offset, length):
807 807 o, d = self._chunkcache
808 808 l = len(d)
809 809
810 810 # is it in the cache?
811 811 cachestart = offset - o
812 812 cacheend = cachestart + length
813 813 if cachestart >= 0 and cacheend <= l:
814 814 if cachestart == 0 and cacheend == l:
815 815 return d # avoid a copy
816 816 return d[cachestart:cacheend]
817 817
818 818 return self._loadchunk(offset, length)
819 819
820 820 def _chunkraw(self, startrev, endrev):
821 821 start = self.start(startrev)
822 822 length = self.end(endrev) - start
823 823 if self._inline:
824 824 start += (startrev + 1) * self._io.size
825 825 return self._getchunk(start, length)
826 826
827 827 def _chunk(self, rev):
828 828 return decompress(self._chunkraw(rev, rev))
829 829
830 830 def _chunkbase(self, rev):
831 831 return self._chunk(rev)
832 832
833 833 def _chunkclear(self):
834 834 self._chunkcache = (0, '')
835 835
836 836 def deltaparent(self, rev):
837 837 """return deltaparent of the given revision"""
838 838 base = self.index[rev][3]
839 839 if base == rev:
840 840 return nullrev
841 841 elif self._generaldelta:
842 842 return base
843 843 else:
844 844 return rev - 1
845 845
846 846 def revdiff(self, rev1, rev2):
847 847 """return or calculate a delta between two revisions"""
848 848 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
849 849 return self._chunk(rev2)
850 850
851 851 return mdiff.textdiff(self.revision(self.node(rev1)),
852 852 self.revision(self.node(rev2)))
853 853
854 854 def revision(self, node):
855 855 """return an uncompressed revision of a given node"""
856 856 cachedrev = None
857 857 if node == nullid:
858 858 return ""
859 859 if self._cache:
860 860 if self._cache[0] == node:
861 861 return self._cache[2]
862 862 cachedrev = self._cache[1]
863 863
864 864 # look up what we need to read
865 865 text = None
866 866 rev = self.rev(node)
867 867
868 868 # check rev flags
869 869 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
870 870 raise RevlogError(_('incompatible revision flag %x') %
871 871 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
872 872
873 873 # build delta chain
874 874 chain = []
875 875 index = self.index # for performance
876 876 generaldelta = self._generaldelta
877 877 iterrev = rev
878 878 e = index[iterrev]
879 879 while iterrev != e[3] and iterrev != cachedrev:
880 880 chain.append(iterrev)
881 881 if generaldelta:
882 882 iterrev = e[3]
883 883 else:
884 884 iterrev -= 1
885 885 e = index[iterrev]
886 886 chain.reverse()
887 887 base = iterrev
888 888
889 889 if iterrev == cachedrev:
890 890 # cache hit
891 891 text = self._cache[2]
892 892
893 893 # drop cache to save memory
894 894 self._cache = None
895 895
896 896 self._chunkraw(base, rev)
897 897 if text is None:
898 898 text = self._chunkbase(base)
899 899
900 900 bins = [self._chunk(r) for r in chain]
901 901 text = mdiff.patches(text, bins)
902 902
903 903 text = self._checkhash(text, node, rev)
904 904
905 905 self._cache = (node, rev, text)
906 906 return text
907 907
908 908 def _checkhash(self, text, node, rev):
909 909 p1, p2 = self.parents(node)
910 910 if node != hash(text, p1, p2):
911 911 raise RevlogError(_("integrity check failed on %s:%d")
912 912 % (self.indexfile, rev))
913 913 return text
914 914
915 915 def checkinlinesize(self, tr, fp=None):
916 916 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
917 917 return
918 918
919 919 trinfo = tr.find(self.indexfile)
920 920 if trinfo is None:
921 921 raise RevlogError(_("%s not found in the transaction")
922 922 % self.indexfile)
923 923
924 924 trindex = trinfo[2]
925 925 dataoff = self.start(trindex)
926 926
927 927 tr.add(self.datafile, dataoff)
928 928
929 929 if fp:
930 930 fp.flush()
931 931 fp.close()
932 932
933 933 df = self.opener(self.datafile, 'w')
934 934 try:
935 935 for r in self:
936 936 df.write(self._chunkraw(r, r))
937 937 finally:
938 938 df.close()
939 939
940 940 fp = self.opener(self.indexfile, 'w', atomictemp=True)
941 941 self.version &= ~(REVLOGNGINLINEDATA)
942 942 self._inline = False
943 943 for i in self:
944 944 e = self._io.packentry(self.index[i], self.node, self.version, i)
945 945 fp.write(e)
946 946
947 947 # if we don't call rename, the temp file will never replace the
948 948 # real index
949 949 fp.rename()
950 950
951 951 tr.replace(self.indexfile, trindex * self._io.size)
952 952 self._chunkclear()
953 953
954 954 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
955 955 """add a revision to the log
956 956
957 957 text - the revision data to add
958 958 transaction - the transaction object used for rollback
959 959 link - the linkrev data to add
960 960 p1, p2 - the parent nodeids of the revision
961 961 cachedelta - an optional precomputed delta
962 962 """
963 963 node = hash(text, p1, p2)
964 964 if node in self.nodemap:
965 965 return node
966 966
967 967 dfh = None
968 968 if not self._inline:
969 969 dfh = self.opener(self.datafile, "a")
970 970 ifh = self.opener(self.indexfile, "a+")
971 971 try:
972 972 return self._addrevision(node, text, transaction, link, p1, p2,
973 973 cachedelta, ifh, dfh)
974 974 finally:
975 975 if dfh:
976 976 dfh.close()
977 977 ifh.close()
978 978
979 979 def _addrevision(self, node, text, transaction, link, p1, p2,
980 980 cachedelta, ifh, dfh):
981 981 """internal function to add revisions to the log
982 982
983 983 see addrevision for argument descriptions.
984 984 invariants:
985 985 - text is optional (can be None); if not set, cachedelta must be set.
986 986 if both are set, they must correspond to eachother.
987 987 """
988 988 btext = [text]
989 989 def buildtext():
990 990 if btext[0] is not None:
991 991 return btext[0]
992 992 # flush any pending writes here so we can read it in revision
993 993 if dfh:
994 994 dfh.flush()
995 995 ifh.flush()
996 996 basetext = self.revision(self.node(cachedelta[0]))
997 997 btext[0] = mdiff.patch(basetext, cachedelta[1])
998 998 chk = hash(btext[0], p1, p2)
999 999 if chk != node:
1000 1000 raise RevlogError(_("consistency error in delta"))
1001 1001 return btext[0]
1002 1002
1003 1003 def builddelta(rev):
1004 1004 # can we use the cached delta?
1005 1005 if cachedelta and cachedelta[0] == rev:
1006 1006 delta = cachedelta[1]
1007 1007 else:
1008 1008 t = buildtext()
1009 1009 ptext = self.revision(self.node(rev))
1010 1010 delta = mdiff.textdiff(ptext, t)
1011 1011 data = compress(delta)
1012 1012 l = len(data[1]) + len(data[0])
1013 1013 if basecache[0] == rev:
1014 1014 chainbase = basecache[1]
1015 1015 else:
1016 1016 chainbase = self.chainbase(rev)
1017 1017 dist = l + offset - self.start(chainbase)
1018 1018 if self._generaldelta:
1019 1019 base = rev
1020 1020 else:
1021 1021 base = chainbase
1022 1022 return dist, l, data, base, chainbase
1023 1023
1024 1024 curr = len(self)
1025 1025 prev = curr - 1
1026 1026 base = chainbase = curr
1027 1027 offset = self.end(prev)
1028 1028 flags = 0
1029 1029 d = None
1030 1030 basecache = self._basecache
1031 1031 p1r, p2r = self.rev(p1), self.rev(p2)
1032 1032
1033 1033 # should we try to build a delta?
1034 1034 if prev != nullrev:
1035 1035 if self._generaldelta:
1036 d = builddelta(p1r)
1036 if p1r >= basecache[1]:
1037 d = builddelta(p1r)
1038 elif p2r >= basecache[1]:
1039 d = builddelta(p2r)
1040 else:
1041 d = builddelta(prev)
1037 1042 else:
1038 1043 d = builddelta(prev)
1039 1044 dist, l, data, base, chainbase = d
1040 1045
1041 1046 # full versions are inserted when the needed deltas
1042 1047 # become comparable to the uncompressed text
1043 1048 if text is None:
1044 1049 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1045 1050 cachedelta[1])
1046 1051 else:
1047 1052 textlen = len(text)
1048 1053 if d is None or dist > textlen * 2:
1049 1054 text = buildtext()
1050 1055 data = compress(text)
1051 1056 l = len(data[1]) + len(data[0])
1052 1057 base = chainbase = curr
1053 1058
1054 1059 e = (offset_type(offset, flags), l, textlen,
1055 1060 base, link, p1r, p2r, node)
1056 1061 self.index.insert(-1, e)
1057 1062 self.nodemap[node] = curr
1058 1063
1059 1064 entry = self._io.packentry(e, self.node, self.version, curr)
1060 1065 if not self._inline:
1061 1066 transaction.add(self.datafile, offset)
1062 1067 transaction.add(self.indexfile, curr * len(entry))
1063 1068 if data[0]:
1064 1069 dfh.write(data[0])
1065 1070 dfh.write(data[1])
1066 1071 dfh.flush()
1067 1072 ifh.write(entry)
1068 1073 else:
1069 1074 offset += curr * self._io.size
1070 1075 transaction.add(self.indexfile, offset, curr)
1071 1076 ifh.write(entry)
1072 1077 ifh.write(data[0])
1073 1078 ifh.write(data[1])
1074 1079 self.checkinlinesize(transaction, ifh)
1075 1080
1076 1081 if type(text) == str: # only accept immutable objects
1077 1082 self._cache = (node, curr, text)
1078 1083 self._basecache = (curr, chainbase)
1079 1084 return node
1080 1085
1081 1086 def group(self, nodelist, bundler):
1082 1087 """Calculate a delta group, yielding a sequence of changegroup chunks
1083 1088 (strings).
1084 1089
1085 1090 Given a list of changeset revs, return a set of deltas and
1086 1091 metadata corresponding to nodes. The first delta is
1087 1092 first parent(nodelist[0]) -> nodelist[0], the receiver is
1088 1093 guaranteed to have this parent as it has all history before
1089 1094 these changesets. In the case firstparent is nullrev the
1090 1095 changegroup starts with a full revision.
1091 1096 """
1092 1097
1093 1098 revs = sorted([self.rev(n) for n in nodelist])
1094 1099
1095 1100 # if we don't have any revisions touched by these changesets, bail
1096 1101 if not revs:
1097 1102 yield bundler.close()
1098 1103 return
1099 1104
1100 1105 # add the parent of the first rev
1101 1106 p = self.parentrevs(revs[0])[0]
1102 1107 revs.insert(0, p)
1103 1108
1104 1109 # build deltas
1105 1110 for r in xrange(len(revs) - 1):
1106 1111 prev, curr = revs[r], revs[r + 1]
1107 1112 for c in bundler.revchunk(self, curr, prev):
1108 1113 yield c
1109 1114
1110 1115 yield bundler.close()
1111 1116
1112 1117 def addgroup(self, bundle, linkmapper, transaction):
1113 1118 """
1114 1119 add a delta group
1115 1120
1116 1121 given a set of deltas, add them to the revision log. the
1117 1122 first delta is against its parent, which should be in our
1118 1123 log, the rest are against the previous delta.
1119 1124 """
1120 1125
1121 1126 # track the base of the current delta log
1122 1127 node = None
1123 1128
1124 1129 r = len(self)
1125 1130 end = 0
1126 1131 if r:
1127 1132 end = self.end(r - 1)
1128 1133 ifh = self.opener(self.indexfile, "a+")
1129 1134 isize = r * self._io.size
1130 1135 if self._inline:
1131 1136 transaction.add(self.indexfile, end + isize, r)
1132 1137 dfh = None
1133 1138 else:
1134 1139 transaction.add(self.indexfile, isize, r)
1135 1140 transaction.add(self.datafile, end)
1136 1141 dfh = self.opener(self.datafile, "a")
1137 1142
1138 1143 try:
1139 1144 # loop through our set of deltas
1140 1145 chain = None
1141 1146 while 1:
1142 1147 chunkdata = bundle.deltachunk(chain)
1143 1148 if not chunkdata:
1144 1149 break
1145 1150 node = chunkdata['node']
1146 1151 p1 = chunkdata['p1']
1147 1152 p2 = chunkdata['p2']
1148 1153 cs = chunkdata['cs']
1149 1154 deltabase = chunkdata['deltabase']
1150 1155 delta = chunkdata['delta']
1151 1156
1152 1157 link = linkmapper(cs)
1153 1158 if node in self.nodemap:
1154 1159 # this can happen if two branches make the same change
1155 1160 chain = node
1156 1161 continue
1157 1162
1158 1163 for p in (p1, p2):
1159 1164 if not p in self.nodemap:
1160 1165 raise LookupError(p, self.indexfile,
1161 1166 _('unknown parent'))
1162 1167
1163 1168 if deltabase not in self.nodemap:
1164 1169 raise LookupError(deltabase, self.indexfile,
1165 1170 _('unknown delta base'))
1166 1171
1167 1172 baserev = self.rev(deltabase)
1168 1173 chain = self._addrevision(node, None, transaction, link,
1169 1174 p1, p2, (baserev, delta), ifh, dfh)
1170 1175 if not dfh and not self._inline:
1171 1176 # addrevision switched from inline to conventional
1172 1177 # reopen the index
1173 1178 ifh.close()
1174 1179 dfh = self.opener(self.datafile, "a")
1175 1180 ifh = self.opener(self.indexfile, "a")
1176 1181 finally:
1177 1182 if dfh:
1178 1183 dfh.close()
1179 1184 ifh.close()
1180 1185
1181 1186 return node
1182 1187
1183 1188 def strip(self, minlink, transaction):
1184 1189 """truncate the revlog on the first revision with a linkrev >= minlink
1185 1190
1186 1191 This function is called when we're stripping revision minlink and
1187 1192 its descendants from the repository.
1188 1193
1189 1194 We have to remove all revisions with linkrev >= minlink, because
1190 1195 the equivalent changelog revisions will be renumbered after the
1191 1196 strip.
1192 1197
1193 1198 So we truncate the revlog on the first of these revisions, and
1194 1199 trust that the caller has saved the revisions that shouldn't be
1195 1200 removed and that it'll readd them after this truncation.
1196 1201 """
1197 1202 if len(self) == 0:
1198 1203 return
1199 1204
1200 1205 for rev in self:
1201 1206 if self.index[rev][4] >= minlink:
1202 1207 break
1203 1208 else:
1204 1209 return
1205 1210
1206 1211 # first truncate the files on disk
1207 1212 end = self.start(rev)
1208 1213 if not self._inline:
1209 1214 transaction.add(self.datafile, end)
1210 1215 end = rev * self._io.size
1211 1216 else:
1212 1217 end += rev * self._io.size
1213 1218
1214 1219 transaction.add(self.indexfile, end)
1215 1220
1216 1221 # then reset internal state in memory to forget those revisions
1217 1222 self._cache = None
1218 1223 self._chunkclear()
1219 1224 for x in xrange(rev, len(self)):
1220 1225 del self.nodemap[self.node(x)]
1221 1226
1222 1227 del self.index[rev:-1]
1223 1228
1224 1229 def checksize(self):
1225 1230 expected = 0
1226 1231 if len(self):
1227 1232 expected = max(0, self.end(len(self) - 1))
1228 1233
1229 1234 try:
1230 1235 f = self.opener(self.datafile)
1231 1236 f.seek(0, 2)
1232 1237 actual = f.tell()
1233 1238 f.close()
1234 1239 dd = actual - expected
1235 1240 except IOError, inst:
1236 1241 if inst.errno != errno.ENOENT:
1237 1242 raise
1238 1243 dd = 0
1239 1244
1240 1245 try:
1241 1246 f = self.opener(self.indexfile)
1242 1247 f.seek(0, 2)
1243 1248 actual = f.tell()
1244 1249 f.close()
1245 1250 s = self._io.size
1246 1251 i = max(0, actual // s)
1247 1252 di = actual - (i * s)
1248 1253 if self._inline:
1249 1254 databytes = 0
1250 1255 for r in self:
1251 1256 databytes += max(0, self.length(r))
1252 1257 dd = 0
1253 1258 di = actual - len(self) * s - databytes
1254 1259 except IOError, inst:
1255 1260 if inst.errno != errno.ENOENT:
1256 1261 raise
1257 1262 di = 0
1258 1263
1259 1264 return (dd, di)
1260 1265
1261 1266 def files(self):
1262 1267 res = [self.indexfile]
1263 1268 if not self._inline:
1264 1269 res.append(self.datafile)
1265 1270 return res
General Comments 0
You need to be logged in to leave comments. Login now