##// END OF EJS Templates
revlog: fix for generaldelta distance calculation...
Friedrich Kastner-Masilko -
r17134:e7167007 default
parent child Browse files
Show More
@@ -1,1312 +1,1322
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 try:
116 116 return _decompress(bin)
117 117 except zlib.error, e:
118 118 raise RevlogError(_("revlog decompress error: %s") % str(e))
119 119 if t == 'u':
120 120 return bin[1:]
121 121 raise RevlogError(_("unknown compression type %r") % t)
122 122
123 123 indexformatv0 = ">4l20s20s20s"
124 124 v0shaoffset = 56
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 ngshaoffset = 32
171 171 versionformat = ">I"
172 172
173 173 class revlogio(object):
174 174 def __init__(self):
175 175 self.size = struct.calcsize(indexformatng)
176 176
177 177 def parseindex(self, data, inline):
178 178 # call the C implementation to parse the index data
179 179 index, cache = parsers.parse_index2(data, inline)
180 180 return index, getattr(index, 'nodemap', None), cache
181 181
182 182 def packentry(self, entry, node, version, rev):
183 183 p = _pack(indexformatng, *entry)
184 184 if rev == 0:
185 185 p = _pack(versionformat, version) + p[4:]
186 186 return p
187 187
188 188 class revlog(object):
189 189 """
190 190 the underlying revision storage object
191 191
192 192 A revlog consists of two parts, an index and the revision data.
193 193
194 194 The index is a file with a fixed record size containing
195 195 information on each revision, including its nodeid (hash), the
196 196 nodeids of its parents, the position and offset of its data within
197 197 the data file, and the revision it's based on. Finally, each entry
198 198 contains a linkrev entry that can serve as a pointer to external
199 199 data.
200 200
201 201 The revision data itself is a linear collection of data chunks.
202 202 Each chunk represents a revision and is usually represented as a
203 203 delta against the previous chunk. To bound lookup time, runs of
204 204 deltas are limited to about 2 times the length of the original
205 205 version data. This makes retrieval of a version proportional to
206 206 its size, or O(1) relative to the number of revisions.
207 207
208 208 Both pieces of the revlog are written to in an append-only
209 209 fashion, which means we never need to rewrite a file to insert or
210 210 remove data, and can use some simple techniques to avoid the need
211 211 for locking while reading.
212 212 """
213 213 def __init__(self, opener, indexfile):
214 214 """
215 215 create a revlog object
216 216
217 217 opener is a function that abstracts the file opening operation
218 218 and can be used to implement COW semantics or the like.
219 219 """
220 220 self.indexfile = indexfile
221 221 self.datafile = indexfile[:-2] + ".d"
222 222 self.opener = opener
223 223 self._cache = None
224 224 self._basecache = (0, 0)
225 225 self._chunkcache = (0, '')
226 226 self.index = []
227 227 self._pcache = {}
228 228 self._nodecache = {nullid: nullrev}
229 229 self._nodepos = None
230 230
231 231 v = REVLOG_DEFAULT_VERSION
232 232 opts = getattr(opener, 'options', None)
233 233 if opts is not None:
234 234 if 'revlogv1' in opts:
235 235 if 'generaldelta' in opts:
236 236 v |= REVLOGGENERALDELTA
237 237 else:
238 238 v = 0
239 239
240 240 i = ''
241 241 self._initempty = True
242 242 try:
243 243 f = self.opener(self.indexfile)
244 244 i = f.read()
245 245 f.close()
246 246 if len(i) > 0:
247 247 v = struct.unpack(versionformat, i[:4])[0]
248 248 self._initempty = False
249 249 except IOError, inst:
250 250 if inst.errno != errno.ENOENT:
251 251 raise
252 252
253 253 self.version = v
254 254 self._inline = v & REVLOGNGINLINEDATA
255 255 self._generaldelta = v & REVLOGGENERALDELTA
256 256 flags = v & ~0xFFFF
257 257 fmt = v & 0xFFFF
258 258 if fmt == REVLOGV0 and flags:
259 259 raise RevlogError(_("index %s unknown flags %#04x for format v0")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
262 262 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
263 263 % (self.indexfile, flags >> 16))
264 264 elif fmt > REVLOGNG:
265 265 raise RevlogError(_("index %s unknown format %d")
266 266 % (self.indexfile, fmt))
267 267
268 268 self._io = revlogio()
269 269 if self.version == REVLOGV0:
270 270 self._io = revlogoldio()
271 271 try:
272 272 d = self._io.parseindex(i, self._inline)
273 273 except (ValueError, IndexError):
274 274 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
275 275 self.index, nodemap, self._chunkcache = d
276 276 if nodemap is not None:
277 277 self.nodemap = self._nodecache = nodemap
278 278 if not self._chunkcache:
279 279 self._chunkclear()
280 280
281 281 def tip(self):
282 282 return self.node(len(self.index) - 2)
283 283 def __len__(self):
284 284 return len(self.index) - 1
285 285 def __iter__(self):
286 286 for i in xrange(len(self)):
287 287 yield i
288 288
289 289 @util.propertycache
290 290 def nodemap(self):
291 291 self.rev(self.node(0))
292 292 return self._nodecache
293 293
294 294 def hasnode(self, node):
295 295 try:
296 296 self.rev(node)
297 297 return True
298 298 except KeyError:
299 299 return False
300 300
301 301 def clearcaches(self):
302 302 try:
303 303 self._nodecache.clearcaches()
304 304 except AttributeError:
305 305 self._nodecache = {nullid: nullrev}
306 306 self._nodepos = None
307 307
308 308 def rev(self, node):
309 309 try:
310 310 return self._nodecache[node]
311 311 except RevlogError:
312 312 # parsers.c radix tree lookup failed
313 313 raise LookupError(node, self.indexfile, _('no node'))
314 314 except KeyError:
315 315 # pure python cache lookup failed
316 316 n = self._nodecache
317 317 i = self.index
318 318 p = self._nodepos
319 319 if p is None:
320 320 p = len(i) - 2
321 321 for r in xrange(p, -1, -1):
322 322 v = i[r][7]
323 323 n[v] = r
324 324 if v == node:
325 325 self._nodepos = r - 1
326 326 return r
327 327 raise LookupError(node, self.indexfile, _('no node'))
328 328
329 329 def node(self, rev):
330 330 return self.index[rev][7]
331 331 def linkrev(self, rev):
332 332 return self.index[rev][4]
333 333 def parents(self, node):
334 334 i = self.index
335 335 d = i[self.rev(node)]
336 336 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
337 337 def parentrevs(self, rev):
338 338 return self.index[rev][5:7]
339 339 def start(self, rev):
340 340 return int(self.index[rev][0] >> 16)
341 341 def end(self, rev):
342 342 return self.start(rev) + self.length(rev)
343 343 def length(self, rev):
344 344 return self.index[rev][1]
345 345 def chainbase(self, rev):
346 346 index = self.index
347 347 base = index[rev][3]
348 348 while base != rev:
349 349 rev = base
350 350 base = index[rev][3]
351 351 return base
352 def chainlength(self, rev):
353 index = self.index
354 base = index[rev][3]
355 length = index[rev][1]
356 while base != rev:
357 rev = base
358 base = index[rev][3]
359 length = length + index[rev][1]
360 return length
352 361 def flags(self, rev):
353 362 return self.index[rev][0] & 0xFFFF
354 363 def rawsize(self, rev):
355 364 """return the length of the uncompressed text for a given revision"""
356 365 l = self.index[rev][2]
357 366 if l >= 0:
358 367 return l
359 368
360 369 t = self.revision(self.node(rev))
361 370 return len(t)
362 371 size = rawsize
363 372
364 373 def ancestors(self, revs, stoprev=0):
365 374 """Generate the ancestors of 'revs' in reverse topological order.
366 375 Does not generate revs lower than stoprev.
367 376
368 377 Yield a sequence of revision numbers starting with the parents
369 378 of each revision in revs, i.e., each revision is *not* considered
370 379 an ancestor of itself. Results are in breadth-first order:
371 380 parents of each rev in revs, then parents of those, etc. Result
372 381 does not include the null revision."""
373 382 visit = util.deque(revs)
374 383 seen = set([nullrev])
375 384 while visit:
376 385 for parent in self.parentrevs(visit.popleft()):
377 386 if parent < stoprev:
378 387 continue
379 388 if parent not in seen:
380 389 visit.append(parent)
381 390 seen.add(parent)
382 391 yield parent
383 392
384 393 def incancestors(self, revs, stoprev=0):
385 394 """Identical to ancestors() except it also generates the
386 395 revisions, 'revs'"""
387 396 for rev in revs:
388 397 yield rev
389 398 for rev in self.ancestors(revs, stoprev):
390 399 yield rev
391 400
392 401 def descendants(self, revs):
393 402 """Generate the descendants of 'revs' in revision order.
394 403
395 404 Yield a sequence of revision numbers starting with a child of
396 405 some rev in revs, i.e., each revision is *not* considered a
397 406 descendant of itself. Results are ordered by revision number (a
398 407 topological sort)."""
399 408 first = min(revs)
400 409 if first == nullrev:
401 410 for i in self:
402 411 yield i
403 412 return
404 413
405 414 seen = set(revs)
406 415 for i in xrange(first + 1, len(self)):
407 416 for x in self.parentrevs(i):
408 417 if x != nullrev and x in seen:
409 418 seen.add(i)
410 419 yield i
411 420 break
412 421
413 422 def findcommonmissing(self, common=None, heads=None):
414 423 """Return a tuple of the ancestors of common and the ancestors of heads
415 424 that are not ancestors of common. In revset terminology, we return the
416 425 tuple:
417 426
418 427 ::common, (::heads) - (::common)
419 428
420 429 The list is sorted by revision number, meaning it is
421 430 topologically sorted.
422 431
423 432 'heads' and 'common' are both lists of node IDs. If heads is
424 433 not supplied, uses all of the revlog's heads. If common is not
425 434 supplied, uses nullid."""
426 435 if common is None:
427 436 common = [nullid]
428 437 if heads is None:
429 438 heads = self.heads()
430 439
431 440 common = [self.rev(n) for n in common]
432 441 heads = [self.rev(n) for n in heads]
433 442
434 443 # we want the ancestors, but inclusive
435 444 has = set(self.ancestors(common))
436 445 has.add(nullrev)
437 446 has.update(common)
438 447
439 448 # take all ancestors from heads that aren't in has
440 449 missing = set()
441 450 visit = util.deque(r for r in heads if r not in has)
442 451 while visit:
443 452 r = visit.popleft()
444 453 if r in missing:
445 454 continue
446 455 else:
447 456 missing.add(r)
448 457 for p in self.parentrevs(r):
449 458 if p not in has:
450 459 visit.append(p)
451 460 missing = list(missing)
452 461 missing.sort()
453 462 return has, [self.node(r) for r in missing]
454 463
455 464 def findmissing(self, common=None, heads=None):
456 465 """Return the ancestors of heads that are not ancestors of common.
457 466
458 467 More specifically, return a list of nodes N such that every N
459 468 satisfies the following constraints:
460 469
461 470 1. N is an ancestor of some node in 'heads'
462 471 2. N is not an ancestor of any node in 'common'
463 472
464 473 The list is sorted by revision number, meaning it is
465 474 topologically sorted.
466 475
467 476 'heads' and 'common' are both lists of node IDs. If heads is
468 477 not supplied, uses all of the revlog's heads. If common is not
469 478 supplied, uses nullid."""
470 479 _common, missing = self.findcommonmissing(common, heads)
471 480 return missing
472 481
473 482 def nodesbetween(self, roots=None, heads=None):
474 483 """Return a topological path from 'roots' to 'heads'.
475 484
476 485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
477 486 topologically sorted list of all nodes N that satisfy both of
478 487 these constraints:
479 488
480 489 1. N is a descendant of some node in 'roots'
481 490 2. N is an ancestor of some node in 'heads'
482 491
483 492 Every node is considered to be both a descendant and an ancestor
484 493 of itself, so every reachable node in 'roots' and 'heads' will be
485 494 included in 'nodes'.
486 495
487 496 'outroots' is the list of reachable nodes in 'roots', i.e., the
488 497 subset of 'roots' that is returned in 'nodes'. Likewise,
489 498 'outheads' is the subset of 'heads' that is also in 'nodes'.
490 499
491 500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
492 501 unspecified, uses nullid as the only root. If 'heads' is
493 502 unspecified, uses list of all of the revlog's heads."""
494 503 nonodes = ([], [], [])
495 504 if roots is not None:
496 505 roots = list(roots)
497 506 if not roots:
498 507 return nonodes
499 508 lowestrev = min([self.rev(n) for n in roots])
500 509 else:
501 510 roots = [nullid] # Everybody's a descendant of nullid
502 511 lowestrev = nullrev
503 512 if (lowestrev == nullrev) and (heads is None):
504 513 # We want _all_ the nodes!
505 514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
506 515 if heads is None:
507 516 # All nodes are ancestors, so the latest ancestor is the last
508 517 # node.
509 518 highestrev = len(self) - 1
510 519 # Set ancestors to None to signal that every node is an ancestor.
511 520 ancestors = None
512 521 # Set heads to an empty dictionary for later discovery of heads
513 522 heads = {}
514 523 else:
515 524 heads = list(heads)
516 525 if not heads:
517 526 return nonodes
518 527 ancestors = set()
519 528 # Turn heads into a dictionary so we can remove 'fake' heads.
520 529 # Also, later we will be using it to filter out the heads we can't
521 530 # find from roots.
522 531 heads = dict.fromkeys(heads, False)
523 532 # Start at the top and keep marking parents until we're done.
524 533 nodestotag = set(heads)
525 534 # Remember where the top was so we can use it as a limit later.
526 535 highestrev = max([self.rev(n) for n in nodestotag])
527 536 while nodestotag:
528 537 # grab a node to tag
529 538 n = nodestotag.pop()
530 539 # Never tag nullid
531 540 if n == nullid:
532 541 continue
533 542 # A node's revision number represents its place in a
534 543 # topologically sorted list of nodes.
535 544 r = self.rev(n)
536 545 if r >= lowestrev:
537 546 if n not in ancestors:
538 547 # If we are possibly a descendant of one of the roots
539 548 # and we haven't already been marked as an ancestor
540 549 ancestors.add(n) # Mark as ancestor
541 550 # Add non-nullid parents to list of nodes to tag.
542 551 nodestotag.update([p for p in self.parents(n) if
543 552 p != nullid])
544 553 elif n in heads: # We've seen it before, is it a fake head?
545 554 # So it is, real heads should not be the ancestors of
546 555 # any other heads.
547 556 heads.pop(n)
548 557 if not ancestors:
549 558 return nonodes
550 559 # Now that we have our set of ancestors, we want to remove any
551 560 # roots that are not ancestors.
552 561
553 562 # If one of the roots was nullid, everything is included anyway.
554 563 if lowestrev > nullrev:
555 564 # But, since we weren't, let's recompute the lowest rev to not
556 565 # include roots that aren't ancestors.
557 566
558 567 # Filter out roots that aren't ancestors of heads
559 568 roots = [n for n in roots if n in ancestors]
560 569 # Recompute the lowest revision
561 570 if roots:
562 571 lowestrev = min([self.rev(n) for n in roots])
563 572 else:
564 573 # No more roots? Return empty list
565 574 return nonodes
566 575 else:
567 576 # We are descending from nullid, and don't need to care about
568 577 # any other roots.
569 578 lowestrev = nullrev
570 579 roots = [nullid]
571 580 # Transform our roots list into a set.
572 581 descendants = set(roots)
573 582 # Also, keep the original roots so we can filter out roots that aren't
574 583 # 'real' roots (i.e. are descended from other roots).
575 584 roots = descendants.copy()
576 585 # Our topologically sorted list of output nodes.
577 586 orderedout = []
578 587 # Don't start at nullid since we don't want nullid in our output list,
579 588 # and if nullid shows up in descedents, empty parents will look like
580 589 # they're descendants.
581 590 for r in xrange(max(lowestrev, 0), highestrev + 1):
582 591 n = self.node(r)
583 592 isdescendant = False
584 593 if lowestrev == nullrev: # Everybody is a descendant of nullid
585 594 isdescendant = True
586 595 elif n in descendants:
587 596 # n is already a descendant
588 597 isdescendant = True
589 598 # This check only needs to be done here because all the roots
590 599 # will start being marked is descendants before the loop.
591 600 if n in roots:
592 601 # If n was a root, check if it's a 'real' root.
593 602 p = tuple(self.parents(n))
594 603 # If any of its parents are descendants, it's not a root.
595 604 if (p[0] in descendants) or (p[1] in descendants):
596 605 roots.remove(n)
597 606 else:
598 607 p = tuple(self.parents(n))
599 608 # A node is a descendant if either of its parents are
600 609 # descendants. (We seeded the dependents list with the roots
601 610 # up there, remember?)
602 611 if (p[0] in descendants) or (p[1] in descendants):
603 612 descendants.add(n)
604 613 isdescendant = True
605 614 if isdescendant and ((ancestors is None) or (n in ancestors)):
606 615 # Only include nodes that are both descendants and ancestors.
607 616 orderedout.append(n)
608 617 if (ancestors is not None) and (n in heads):
609 618 # We're trying to figure out which heads are reachable
610 619 # from roots.
611 620 # Mark this head as having been reached
612 621 heads[n] = True
613 622 elif ancestors is None:
614 623 # Otherwise, we're trying to discover the heads.
615 624 # Assume this is a head because if it isn't, the next step
616 625 # will eventually remove it.
617 626 heads[n] = True
618 627 # But, obviously its parents aren't.
619 628 for p in self.parents(n):
620 629 heads.pop(p, None)
621 630 heads = [n for n, flag in heads.iteritems() if flag]
622 631 roots = list(roots)
623 632 assert orderedout
624 633 assert roots
625 634 assert heads
626 635 return (orderedout, roots, heads)
627 636
628 637 def headrevs(self):
629 638 try:
630 639 return self.index.headrevs()
631 640 except AttributeError:
632 641 pass
633 642 count = len(self)
634 643 if not count:
635 644 return [nullrev]
636 645 ishead = [1] * (count + 1)
637 646 index = self.index
638 647 for r in xrange(count):
639 648 e = index[r]
640 649 ishead[e[5]] = ishead[e[6]] = 0
641 650 return [r for r in xrange(count) if ishead[r]]
642 651
643 652 def heads(self, start=None, stop=None):
644 653 """return the list of all nodes that have no children
645 654
646 655 if start is specified, only heads that are descendants of
647 656 start will be returned
648 657 if stop is specified, it will consider all the revs from stop
649 658 as if they had no children
650 659 """
651 660 if start is None and stop is None:
652 661 if not len(self):
653 662 return [nullid]
654 663 return [self.node(r) for r in self.headrevs()]
655 664
656 665 if start is None:
657 666 start = nullid
658 667 if stop is None:
659 668 stop = []
660 669 stoprevs = set([self.rev(n) for n in stop])
661 670 startrev = self.rev(start)
662 671 reachable = set((startrev,))
663 672 heads = set((startrev,))
664 673
665 674 parentrevs = self.parentrevs
666 675 for r in xrange(startrev + 1, len(self)):
667 676 for p in parentrevs(r):
668 677 if p in reachable:
669 678 if r not in stoprevs:
670 679 reachable.add(r)
671 680 heads.add(r)
672 681 if p in heads and p not in stoprevs:
673 682 heads.remove(p)
674 683
675 684 return [self.node(r) for r in heads]
676 685
677 686 def children(self, node):
678 687 """find the children of a given node"""
679 688 c = []
680 689 p = self.rev(node)
681 690 for r in range(p + 1, len(self)):
682 691 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
683 692 if prevs:
684 693 for pr in prevs:
685 694 if pr == p:
686 695 c.append(self.node(r))
687 696 elif p == nullrev:
688 697 c.append(self.node(r))
689 698 return c
690 699
691 700 def descendant(self, start, end):
692 701 if start == nullrev:
693 702 return True
694 703 for i in self.descendants([start]):
695 704 if i == end:
696 705 return True
697 706 elif i > end:
698 707 break
699 708 return False
700 709
701 710 def ancestor(self, a, b):
702 711 """calculate the least common ancestor of nodes a and b"""
703 712
704 713 # fast path, check if it is a descendant
705 714 a, b = self.rev(a), self.rev(b)
706 715 start, end = sorted((a, b))
707 716 if self.descendant(start, end):
708 717 return self.node(start)
709 718
710 719 def parents(rev):
711 720 return [p for p in self.parentrevs(rev) if p != nullrev]
712 721
713 722 c = ancestor.ancestor(a, b, parents)
714 723 if c is None:
715 724 return nullid
716 725
717 726 return self.node(c)
718 727
719 728 def _match(self, id):
720 729 if isinstance(id, int):
721 730 # rev
722 731 return self.node(id)
723 732 if len(id) == 20:
724 733 # possibly a binary node
725 734 # odds of a binary node being all hex in ASCII are 1 in 10**25
726 735 try:
727 736 node = id
728 737 self.rev(node) # quick search the index
729 738 return node
730 739 except LookupError:
731 740 pass # may be partial hex id
732 741 try:
733 742 # str(rev)
734 743 rev = int(id)
735 744 if str(rev) != id:
736 745 raise ValueError
737 746 if rev < 0:
738 747 rev = len(self) + rev
739 748 if rev < 0 or rev >= len(self):
740 749 raise ValueError
741 750 return self.node(rev)
742 751 except (ValueError, OverflowError):
743 752 pass
744 753 if len(id) == 40:
745 754 try:
746 755 # a full hex nodeid?
747 756 node = bin(id)
748 757 self.rev(node)
749 758 return node
750 759 except (TypeError, LookupError):
751 760 pass
752 761
753 762 def _partialmatch(self, id):
754 763 try:
755 764 return self.index.partialmatch(id)
756 765 except RevlogError:
757 766 # parsers.c radix tree lookup gave multiple matches
758 767 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
759 768 except (AttributeError, ValueError):
760 769 # we are pure python, or key was too short to search radix tree
761 770 pass
762 771
763 772 if id in self._pcache:
764 773 return self._pcache[id]
765 774
766 775 if len(id) < 40:
767 776 try:
768 777 # hex(node)[:...]
769 778 l = len(id) // 2 # grab an even number of digits
770 779 prefix = bin(id[:l * 2])
771 780 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
772 781 nl = [n for n in nl if hex(n).startswith(id)]
773 782 if len(nl) > 0:
774 783 if len(nl) == 1:
775 784 self._pcache[id] = nl[0]
776 785 return nl[0]
777 786 raise LookupError(id, self.indexfile,
778 787 _('ambiguous identifier'))
779 788 return None
780 789 except TypeError:
781 790 pass
782 791
783 792 def lookup(self, id):
784 793 """locate a node based on:
785 794 - revision number or str(revision number)
786 795 - nodeid or subset of hex nodeid
787 796 """
788 797 n = self._match(id)
789 798 if n is not None:
790 799 return n
791 800 n = self._partialmatch(id)
792 801 if n:
793 802 return n
794 803
795 804 raise LookupError(id, self.indexfile, _('no match found'))
796 805
797 806 def cmp(self, node, text):
798 807 """compare text with a given file revision
799 808
800 809 returns True if text is different than what is stored.
801 810 """
802 811 p1, p2 = self.parents(node)
803 812 return hash(text, p1, p2) != node
804 813
805 814 def _addchunk(self, offset, data):
806 815 o, d = self._chunkcache
807 816 # try to add to existing cache
808 817 if o + len(d) == offset and len(d) + len(data) < _chunksize:
809 818 self._chunkcache = o, d + data
810 819 else:
811 820 self._chunkcache = offset, data
812 821
813 822 def _loadchunk(self, offset, length):
814 823 if self._inline:
815 824 df = self.opener(self.indexfile)
816 825 else:
817 826 df = self.opener(self.datafile)
818 827
819 828 readahead = max(65536, length)
820 829 df.seek(offset)
821 830 d = df.read(readahead)
822 831 df.close()
823 832 self._addchunk(offset, d)
824 833 if readahead > length:
825 834 return util.buffer(d, 0, length)
826 835 return d
827 836
828 837 def _getchunk(self, offset, length):
829 838 o, d = self._chunkcache
830 839 l = len(d)
831 840
832 841 # is it in the cache?
833 842 cachestart = offset - o
834 843 cacheend = cachestart + length
835 844 if cachestart >= 0 and cacheend <= l:
836 845 if cachestart == 0 and cacheend == l:
837 846 return d # avoid a copy
838 847 return util.buffer(d, cachestart, cacheend - cachestart)
839 848
840 849 return self._loadchunk(offset, length)
841 850
842 851 def _chunkraw(self, startrev, endrev):
843 852 start = self.start(startrev)
844 853 length = self.end(endrev) - start
845 854 if self._inline:
846 855 start += (startrev + 1) * self._io.size
847 856 return self._getchunk(start, length)
848 857
849 858 def _chunk(self, rev):
850 859 return decompress(self._chunkraw(rev, rev))
851 860
852 861 def _chunkbase(self, rev):
853 862 return self._chunk(rev)
854 863
855 864 def _chunkclear(self):
856 865 self._chunkcache = (0, '')
857 866
858 867 def deltaparent(self, rev):
859 868 """return deltaparent of the given revision"""
860 869 base = self.index[rev][3]
861 870 if base == rev:
862 871 return nullrev
863 872 elif self._generaldelta:
864 873 return base
865 874 else:
866 875 return rev - 1
867 876
868 877 def revdiff(self, rev1, rev2):
869 878 """return or calculate a delta between two revisions"""
870 879 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
871 880 return str(self._chunk(rev2))
872 881
873 882 return mdiff.textdiff(self.revision(rev1),
874 883 self.revision(rev2))
875 884
876 885 def revision(self, nodeorrev):
877 886 """return an uncompressed revision of a given node or revision
878 887 number.
879 888 """
880 889 if isinstance(nodeorrev, int):
881 890 rev = nodeorrev
882 891 node = self.node(rev)
883 892 else:
884 893 node = nodeorrev
885 894 rev = None
886 895
887 896 cachedrev = None
888 897 if node == nullid:
889 898 return ""
890 899 if self._cache:
891 900 if self._cache[0] == node:
892 901 return self._cache[2]
893 902 cachedrev = self._cache[1]
894 903
895 904 # look up what we need to read
896 905 text = None
897 906 if rev is None:
898 907 rev = self.rev(node)
899 908
900 909 # check rev flags
901 910 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
902 911 raise RevlogError(_('incompatible revision flag %x') %
903 912 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
904 913
905 914 # build delta chain
906 915 chain = []
907 916 index = self.index # for performance
908 917 generaldelta = self._generaldelta
909 918 iterrev = rev
910 919 e = index[iterrev]
911 920 while iterrev != e[3] and iterrev != cachedrev:
912 921 chain.append(iterrev)
913 922 if generaldelta:
914 923 iterrev = e[3]
915 924 else:
916 925 iterrev -= 1
917 926 e = index[iterrev]
918 927 chain.reverse()
919 928 base = iterrev
920 929
921 930 if iterrev == cachedrev:
922 931 # cache hit
923 932 text = self._cache[2]
924 933
925 934 # drop cache to save memory
926 935 self._cache = None
927 936
928 937 self._chunkraw(base, rev)
929 938 if text is None:
930 939 text = str(self._chunkbase(base))
931 940
932 941 bins = [self._chunk(r) for r in chain]
933 942 text = mdiff.patches(text, bins)
934 943
935 944 text = self._checkhash(text, node, rev)
936 945
937 946 self._cache = (node, rev, text)
938 947 return text
939 948
940 949 def _checkhash(self, text, node, rev):
941 950 p1, p2 = self.parents(node)
942 951 if node != hash(text, p1, p2):
943 952 raise RevlogError(_("integrity check failed on %s:%d")
944 953 % (self.indexfile, rev))
945 954 return text
946 955
947 956 def checkinlinesize(self, tr, fp=None):
948 957 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
949 958 return
950 959
951 960 trinfo = tr.find(self.indexfile)
952 961 if trinfo is None:
953 962 raise RevlogError(_("%s not found in the transaction")
954 963 % self.indexfile)
955 964
956 965 trindex = trinfo[2]
957 966 dataoff = self.start(trindex)
958 967
959 968 tr.add(self.datafile, dataoff)
960 969
961 970 if fp:
962 971 fp.flush()
963 972 fp.close()
964 973
965 974 df = self.opener(self.datafile, 'w')
966 975 try:
967 976 for r in self:
968 977 df.write(self._chunkraw(r, r))
969 978 finally:
970 979 df.close()
971 980
972 981 fp = self.opener(self.indexfile, 'w', atomictemp=True)
973 982 self.version &= ~(REVLOGNGINLINEDATA)
974 983 self._inline = False
975 984 for i in self:
976 985 e = self._io.packentry(self.index[i], self.node, self.version, i)
977 986 fp.write(e)
978 987
979 988 # if we don't call close, the temp file will never replace the
980 989 # real index
981 990 fp.close()
982 991
983 992 tr.replace(self.indexfile, trindex * self._io.size)
984 993 self._chunkclear()
985 994
986 995 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
987 996 """add a revision to the log
988 997
989 998 text - the revision data to add
990 999 transaction - the transaction object used for rollback
991 1000 link - the linkrev data to add
992 1001 p1, p2 - the parent nodeids of the revision
993 1002 cachedelta - an optional precomputed delta
994 1003 """
995 1004 node = hash(text, p1, p2)
996 1005 if node in self.nodemap:
997 1006 return node
998 1007
999 1008 dfh = None
1000 1009 if not self._inline:
1001 1010 dfh = self.opener(self.datafile, "a")
1002 1011 ifh = self.opener(self.indexfile, "a+")
1003 1012 try:
1004 1013 return self._addrevision(node, text, transaction, link, p1, p2,
1005 1014 cachedelta, ifh, dfh)
1006 1015 finally:
1007 1016 if dfh:
1008 1017 dfh.close()
1009 1018 ifh.close()
1010 1019
1011 1020 def _addrevision(self, node, text, transaction, link, p1, p2,
1012 1021 cachedelta, ifh, dfh):
1013 1022 """internal function to add revisions to the log
1014 1023
1015 1024 see addrevision for argument descriptions.
1016 1025 invariants:
1017 1026 - text is optional (can be None); if not set, cachedelta must be set.
1018 1027 if both are set, they must correspond to eachother.
1019 1028 """
1020 1029 btext = [text]
1021 1030 def buildtext():
1022 1031 if btext[0] is not None:
1023 1032 return btext[0]
1024 1033 # flush any pending writes here so we can read it in revision
1025 1034 if dfh:
1026 1035 dfh.flush()
1027 1036 ifh.flush()
1028 1037 basetext = self.revision(self.node(cachedelta[0]))
1029 1038 btext[0] = mdiff.patch(basetext, cachedelta[1])
1030 1039 chk = hash(btext[0], p1, p2)
1031 1040 if chk != node:
1032 1041 raise RevlogError(_("consistency error in delta"))
1033 1042 return btext[0]
1034 1043
1035 1044 def builddelta(rev):
1036 1045 # can we use the cached delta?
1037 1046 if cachedelta and cachedelta[0] == rev:
1038 1047 delta = cachedelta[1]
1039 1048 else:
1040 1049 t = buildtext()
1041 1050 ptext = self.revision(self.node(rev))
1042 1051 delta = mdiff.textdiff(ptext, t)
1043 1052 data = compress(delta)
1044 1053 l = len(data[1]) + len(data[0])
1045 1054 if basecache[0] == rev:
1046 1055 chainbase = basecache[1]
1047 1056 else:
1048 1057 chainbase = self.chainbase(rev)
1049 dist = l + offset - self.start(chainbase)
1050 1058 if self._generaldelta:
1059 dist = l + self.chainlength(rev)
1051 1060 base = rev
1052 1061 else:
1062 dist = l + offset - self.start(chainbase)
1053 1063 base = chainbase
1054 1064 return dist, l, data, base, chainbase
1055 1065
1056 1066 curr = len(self)
1057 1067 prev = curr - 1
1058 1068 base = chainbase = curr
1059 1069 offset = self.end(prev)
1060 1070 flags = 0
1061 1071 d = None
1062 1072 basecache = self._basecache
1063 1073 p1r, p2r = self.rev(p1), self.rev(p2)
1064 1074
1065 1075 # should we try to build a delta?
1066 1076 if prev != nullrev:
1067 1077 if self._generaldelta:
1068 1078 if p1r >= basecache[1]:
1069 1079 d = builddelta(p1r)
1070 1080 elif p2r >= basecache[1]:
1071 1081 d = builddelta(p2r)
1072 1082 else:
1073 1083 d = builddelta(prev)
1074 1084 else:
1075 1085 d = builddelta(prev)
1076 1086 dist, l, data, base, chainbase = d
1077 1087
1078 1088 # full versions are inserted when the needed deltas
1079 1089 # become comparable to the uncompressed text
1080 1090 if text is None:
1081 1091 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1082 1092 cachedelta[1])
1083 1093 else:
1084 1094 textlen = len(text)
1085 1095 if d is None or dist > textlen * 2:
1086 1096 text = buildtext()
1087 1097 data = compress(text)
1088 1098 l = len(data[1]) + len(data[0])
1089 1099 base = chainbase = curr
1090 1100
1091 1101 e = (offset_type(offset, flags), l, textlen,
1092 1102 base, link, p1r, p2r, node)
1093 1103 self.index.insert(-1, e)
1094 1104 self.nodemap[node] = curr
1095 1105
1096 1106 entry = self._io.packentry(e, self.node, self.version, curr)
1097 1107 if not self._inline:
1098 1108 transaction.add(self.datafile, offset)
1099 1109 transaction.add(self.indexfile, curr * len(entry))
1100 1110 if data[0]:
1101 1111 dfh.write(data[0])
1102 1112 dfh.write(data[1])
1103 1113 dfh.flush()
1104 1114 ifh.write(entry)
1105 1115 else:
1106 1116 offset += curr * self._io.size
1107 1117 transaction.add(self.indexfile, offset, curr)
1108 1118 ifh.write(entry)
1109 1119 ifh.write(data[0])
1110 1120 ifh.write(data[1])
1111 1121 self.checkinlinesize(transaction, ifh)
1112 1122
1113 1123 if type(text) == str: # only accept immutable objects
1114 1124 self._cache = (node, curr, text)
1115 1125 self._basecache = (curr, chainbase)
1116 1126 return node
1117 1127
1118 1128 def group(self, nodelist, bundler, reorder=None):
1119 1129 """Calculate a delta group, yielding a sequence of changegroup chunks
1120 1130 (strings).
1121 1131
1122 1132 Given a list of changeset revs, return a set of deltas and
1123 1133 metadata corresponding to nodes. The first delta is
1124 1134 first parent(nodelist[0]) -> nodelist[0], the receiver is
1125 1135 guaranteed to have this parent as it has all history before
1126 1136 these changesets. In the case firstparent is nullrev the
1127 1137 changegroup starts with a full revision.
1128 1138 """
1129 1139
1130 1140 # if we don't have any revisions touched by these changesets, bail
1131 1141 if len(nodelist) == 0:
1132 1142 yield bundler.close()
1133 1143 return
1134 1144
1135 1145 # for generaldelta revlogs, we linearize the revs; this will both be
1136 1146 # much quicker and generate a much smaller bundle
1137 1147 if (self._generaldelta and reorder is not False) or reorder:
1138 1148 dag = dagutil.revlogdag(self)
1139 1149 revs = set(self.rev(n) for n in nodelist)
1140 1150 revs = dag.linearize(revs)
1141 1151 else:
1142 1152 revs = sorted([self.rev(n) for n in nodelist])
1143 1153
1144 1154 # add the parent of the first rev
1145 1155 p = self.parentrevs(revs[0])[0]
1146 1156 revs.insert(0, p)
1147 1157
1148 1158 # build deltas
1149 1159 for r in xrange(len(revs) - 1):
1150 1160 prev, curr = revs[r], revs[r + 1]
1151 1161 for c in bundler.revchunk(self, curr, prev):
1152 1162 yield c
1153 1163
1154 1164 yield bundler.close()
1155 1165
1156 1166 def addgroup(self, bundle, linkmapper, transaction):
1157 1167 """
1158 1168 add a delta group
1159 1169
1160 1170 given a set of deltas, add them to the revision log. the
1161 1171 first delta is against its parent, which should be in our
1162 1172 log, the rest are against the previous delta.
1163 1173 """
1164 1174
1165 1175 # track the base of the current delta log
1166 1176 content = []
1167 1177 node = None
1168 1178
1169 1179 r = len(self)
1170 1180 end = 0
1171 1181 if r:
1172 1182 end = self.end(r - 1)
1173 1183 ifh = self.opener(self.indexfile, "a+")
1174 1184 isize = r * self._io.size
1175 1185 if self._inline:
1176 1186 transaction.add(self.indexfile, end + isize, r)
1177 1187 dfh = None
1178 1188 else:
1179 1189 transaction.add(self.indexfile, isize, r)
1180 1190 transaction.add(self.datafile, end)
1181 1191 dfh = self.opener(self.datafile, "a")
1182 1192
1183 1193 try:
1184 1194 # loop through our set of deltas
1185 1195 chain = None
1186 1196 while True:
1187 1197 chunkdata = bundle.deltachunk(chain)
1188 1198 if not chunkdata:
1189 1199 break
1190 1200 node = chunkdata['node']
1191 1201 p1 = chunkdata['p1']
1192 1202 p2 = chunkdata['p2']
1193 1203 cs = chunkdata['cs']
1194 1204 deltabase = chunkdata['deltabase']
1195 1205 delta = chunkdata['delta']
1196 1206
1197 1207 content.append(node)
1198 1208
1199 1209 link = linkmapper(cs)
1200 1210 if node in self.nodemap:
1201 1211 # this can happen if two branches make the same change
1202 1212 chain = node
1203 1213 continue
1204 1214
1205 1215 for p in (p1, p2):
1206 1216 if p not in self.nodemap:
1207 1217 raise LookupError(p, self.indexfile,
1208 1218 _('unknown parent'))
1209 1219
1210 1220 if deltabase not in self.nodemap:
1211 1221 raise LookupError(deltabase, self.indexfile,
1212 1222 _('unknown delta base'))
1213 1223
1214 1224 baserev = self.rev(deltabase)
1215 1225 chain = self._addrevision(node, None, transaction, link,
1216 1226 p1, p2, (baserev, delta), ifh, dfh)
1217 1227 if not dfh and not self._inline:
1218 1228 # addrevision switched from inline to conventional
1219 1229 # reopen the index
1220 1230 ifh.close()
1221 1231 dfh = self.opener(self.datafile, "a")
1222 1232 ifh = self.opener(self.indexfile, "a")
1223 1233 finally:
1224 1234 if dfh:
1225 1235 dfh.close()
1226 1236 ifh.close()
1227 1237
1228 1238 return content
1229 1239
1230 1240 def strip(self, minlink, transaction):
1231 1241 """truncate the revlog on the first revision with a linkrev >= minlink
1232 1242
1233 1243 This function is called when we're stripping revision minlink and
1234 1244 its descendants from the repository.
1235 1245
1236 1246 We have to remove all revisions with linkrev >= minlink, because
1237 1247 the equivalent changelog revisions will be renumbered after the
1238 1248 strip.
1239 1249
1240 1250 So we truncate the revlog on the first of these revisions, and
1241 1251 trust that the caller has saved the revisions that shouldn't be
1242 1252 removed and that it'll re-add them after this truncation.
1243 1253 """
1244 1254 if len(self) == 0:
1245 1255 return
1246 1256
1247 1257 for rev in self:
1248 1258 if self.index[rev][4] >= minlink:
1249 1259 break
1250 1260 else:
1251 1261 return
1252 1262
1253 1263 # first truncate the files on disk
1254 1264 end = self.start(rev)
1255 1265 if not self._inline:
1256 1266 transaction.add(self.datafile, end)
1257 1267 end = rev * self._io.size
1258 1268 else:
1259 1269 end += rev * self._io.size
1260 1270
1261 1271 transaction.add(self.indexfile, end)
1262 1272
1263 1273 # then reset internal state in memory to forget those revisions
1264 1274 self._cache = None
1265 1275 self._chunkclear()
1266 1276 for x in xrange(rev, len(self)):
1267 1277 del self.nodemap[self.node(x)]
1268 1278
1269 1279 del self.index[rev:-1]
1270 1280
1271 1281 def checksize(self):
1272 1282 expected = 0
1273 1283 if len(self):
1274 1284 expected = max(0, self.end(len(self) - 1))
1275 1285
1276 1286 try:
1277 1287 f = self.opener(self.datafile)
1278 1288 f.seek(0, 2)
1279 1289 actual = f.tell()
1280 1290 f.close()
1281 1291 dd = actual - expected
1282 1292 except IOError, inst:
1283 1293 if inst.errno != errno.ENOENT:
1284 1294 raise
1285 1295 dd = 0
1286 1296
1287 1297 try:
1288 1298 f = self.opener(self.indexfile)
1289 1299 f.seek(0, 2)
1290 1300 actual = f.tell()
1291 1301 f.close()
1292 1302 s = self._io.size
1293 1303 i = max(0, actual // s)
1294 1304 di = actual - (i * s)
1295 1305 if self._inline:
1296 1306 databytes = 0
1297 1307 for r in self:
1298 1308 databytes += max(0, self.length(r))
1299 1309 dd = 0
1300 1310 di = actual - len(self) * s - databytes
1301 1311 except IOError, inst:
1302 1312 if inst.errno != errno.ENOENT:
1303 1313 raise
1304 1314 di = 0
1305 1315
1306 1316 return (dd, di)
1307 1317
1308 1318 def files(self):
1309 1319 res = [self.indexfile]
1310 1320 if not self._inline:
1311 1321 res.append(self.datafile)
1312 1322 return res
General Comments 0
You need to be logged in to leave comments. Login now