##// END OF EJS Templates
revlog: extract 'checkhash' method...
Wojciech Lopata -
r19624:55749cb1 default
parent child Browse files
Show More
@@ -1,1310 +1,1314 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 import ancestor, mdiff, parsers, error, util
17 import ancestor, mdiff, parsers, error, util, templatefilters
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 # index v0:
95 95 # 4 bytes: offset
96 96 # 4 bytes: compressed length
97 97 # 4 bytes: base rev
98 98 # 4 bytes: link rev
99 99 # 32 bytes: parent 1 nodeid
100 100 # 32 bytes: parent 2 nodeid
101 101 # 32 bytes: nodeid
102 102 indexformatv0 = ">4l20s20s20s"
103 103 v0shaoffset = 56
104 104
105 105 class revlogoldio(object):
106 106 def __init__(self):
107 107 self.size = struct.calcsize(indexformatv0)
108 108
109 109 def parseindex(self, data, inline):
110 110 s = self.size
111 111 index = []
112 112 nodemap = {nullid: nullrev}
113 113 n = off = 0
114 114 l = len(data)
115 115 while off + s <= l:
116 116 cur = data[off:off + s]
117 117 off += s
118 118 e = _unpack(indexformatv0, cur)
119 119 # transform to revlogv1 format
120 120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 122 index.append(e2)
123 123 nodemap[e[6]] = n
124 124 n += 1
125 125
126 126 # add the magic null revision at -1
127 127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 128
129 129 return index, nodemap, None
130 130
131 131 def packentry(self, entry, node, version, rev):
132 132 if gettype(entry[0]):
133 133 raise RevlogError(_("index entry flags need RevlogNG"))
134 134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 135 node(entry[5]), node(entry[6]), entry[7])
136 136 return _pack(indexformatv0, *e2)
137 137
138 138 # index ng:
139 139 # 6 bytes: offset
140 140 # 2 bytes: flags
141 141 # 4 bytes: compressed length
142 142 # 4 bytes: uncompressed length
143 143 # 4 bytes: base rev
144 144 # 4 bytes: link rev
145 145 # 4 bytes: parent 1 rev
146 146 # 4 bytes: parent 2 rev
147 147 # 32 bytes: nodeid
148 148 indexformatng = ">Qiiiiii20s12x"
149 149 ngshaoffset = 32
150 150 versionformat = ">I"
151 151
152 152 class revlogio(object):
153 153 def __init__(self):
154 154 self.size = struct.calcsize(indexformatng)
155 155
156 156 def parseindex(self, data, inline):
157 157 # call the C implementation to parse the index data
158 158 index, cache = parsers.parse_index2(data, inline)
159 159 return index, getattr(index, 'nodemap', None), cache
160 160
161 161 def packentry(self, entry, node, version, rev):
162 162 p = _pack(indexformatng, *entry)
163 163 if rev == 0:
164 164 p = _pack(versionformat, version) + p[4:]
165 165 return p
166 166
167 167 class revlog(object):
168 168 """
169 169 the underlying revision storage object
170 170
171 171 A revlog consists of two parts, an index and the revision data.
172 172
173 173 The index is a file with a fixed record size containing
174 174 information on each revision, including its nodeid (hash), the
175 175 nodeids of its parents, the position and offset of its data within
176 176 the data file, and the revision it's based on. Finally, each entry
177 177 contains a linkrev entry that can serve as a pointer to external
178 178 data.
179 179
180 180 The revision data itself is a linear collection of data chunks.
181 181 Each chunk represents a revision and is usually represented as a
182 182 delta against the previous chunk. To bound lookup time, runs of
183 183 deltas are limited to about 2 times the length of the original
184 184 version data. This makes retrieval of a version proportional to
185 185 its size, or O(1) relative to the number of revisions.
186 186
187 187 Both pieces of the revlog are written to in an append-only
188 188 fashion, which means we never need to rewrite a file to insert or
189 189 remove data, and can use some simple techniques to avoid the need
190 190 for locking while reading.
191 191 """
192 192 def __init__(self, opener, indexfile):
193 193 """
194 194 create a revlog object
195 195
196 196 opener is a function that abstracts the file opening operation
197 197 and can be used to implement COW semantics or the like.
198 198 """
199 199 self.indexfile = indexfile
200 200 self.datafile = indexfile[:-2] + ".d"
201 201 self.opener = opener
202 202 self._cache = None
203 203 self._basecache = (0, 0)
204 204 self._chunkcache = (0, '')
205 205 self.index = []
206 206 self._pcache = {}
207 207 self._nodecache = {nullid: nullrev}
208 208 self._nodepos = None
209 209
210 210 v = REVLOG_DEFAULT_VERSION
211 211 opts = getattr(opener, 'options', None)
212 212 if opts is not None:
213 213 if 'revlogv1' in opts:
214 214 if 'generaldelta' in opts:
215 215 v |= REVLOGGENERALDELTA
216 216 else:
217 217 v = 0
218 218
219 219 i = ''
220 220 self._initempty = True
221 221 try:
222 222 f = self.opener(self.indexfile)
223 223 i = f.read()
224 224 f.close()
225 225 if len(i) > 0:
226 226 v = struct.unpack(versionformat, i[:4])[0]
227 227 self._initempty = False
228 228 except IOError, inst:
229 229 if inst.errno != errno.ENOENT:
230 230 raise
231 231
232 232 self.version = v
233 233 self._inline = v & REVLOGNGINLINEDATA
234 234 self._generaldelta = v & REVLOGGENERALDELTA
235 235 flags = v & ~0xFFFF
236 236 fmt = v & 0xFFFF
237 237 if fmt == REVLOGV0 and flags:
238 238 raise RevlogError(_("index %s unknown flags %#04x for format v0")
239 239 % (self.indexfile, flags >> 16))
240 240 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
241 241 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
242 242 % (self.indexfile, flags >> 16))
243 243 elif fmt > REVLOGNG:
244 244 raise RevlogError(_("index %s unknown format %d")
245 245 % (self.indexfile, fmt))
246 246
247 247 self._io = revlogio()
248 248 if self.version == REVLOGV0:
249 249 self._io = revlogoldio()
250 250 try:
251 251 d = self._io.parseindex(i, self._inline)
252 252 except (ValueError, IndexError):
253 253 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
254 254 self.index, nodemap, self._chunkcache = d
255 255 if nodemap is not None:
256 256 self.nodemap = self._nodecache = nodemap
257 257 if not self._chunkcache:
258 258 self._chunkclear()
259 259
260 260 def tip(self):
261 261 return self.node(len(self.index) - 2)
262 262 def __len__(self):
263 263 return len(self.index) - 1
264 264 def __iter__(self):
265 265 return iter(xrange(len(self)))
266 266 def revs(self, start=0, stop=None):
267 267 """iterate over all rev in this revlog (from start to stop)"""
268 268 step = 1
269 269 if stop is not None:
270 270 if start > stop:
271 271 step = -1
272 272 stop += step
273 273 else:
274 274 stop = len(self)
275 275 return xrange(start, stop, step)
276 276
277 277 @util.propertycache
278 278 def nodemap(self):
279 279 self.rev(self.node(0))
280 280 return self._nodecache
281 281
282 282 def hasnode(self, node):
283 283 try:
284 284 self.rev(node)
285 285 return True
286 286 except KeyError:
287 287 return False
288 288
289 289 def clearcaches(self):
290 290 try:
291 291 self._nodecache.clearcaches()
292 292 except AttributeError:
293 293 self._nodecache = {nullid: nullrev}
294 294 self._nodepos = None
295 295
296 296 def rev(self, node):
297 297 try:
298 298 return self._nodecache[node]
299 299 except RevlogError:
300 300 # parsers.c radix tree lookup failed
301 301 raise LookupError(node, self.indexfile, _('no node'))
302 302 except KeyError:
303 303 # pure python cache lookup failed
304 304 n = self._nodecache
305 305 i = self.index
306 306 p = self._nodepos
307 307 if p is None:
308 308 p = len(i) - 2
309 309 for r in xrange(p, -1, -1):
310 310 v = i[r][7]
311 311 n[v] = r
312 312 if v == node:
313 313 self._nodepos = r - 1
314 314 return r
315 315 raise LookupError(node, self.indexfile, _('no node'))
316 316
317 317 def node(self, rev):
318 318 return self.index[rev][7]
319 319 def linkrev(self, rev):
320 320 return self.index[rev][4]
321 321 def parents(self, node):
322 322 i = self.index
323 323 d = i[self.rev(node)]
324 324 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
325 325 def parentrevs(self, rev):
326 326 return self.index[rev][5:7]
327 327 def start(self, rev):
328 328 return int(self.index[rev][0] >> 16)
329 329 def end(self, rev):
330 330 return self.start(rev) + self.length(rev)
331 331 def length(self, rev):
332 332 return self.index[rev][1]
333 333 def chainbase(self, rev):
334 334 index = self.index
335 335 base = index[rev][3]
336 336 while base != rev:
337 337 rev = base
338 338 base = index[rev][3]
339 339 return base
340 340 def flags(self, rev):
341 341 return self.index[rev][0] & 0xFFFF
342 342 def rawsize(self, rev):
343 343 """return the length of the uncompressed text for a given revision"""
344 344 l = self.index[rev][2]
345 345 if l >= 0:
346 346 return l
347 347
348 348 t = self.revision(self.node(rev))
349 349 return len(t)
350 350 size = rawsize
351 351
352 352 def ancestors(self, revs, stoprev=0, inclusive=False):
353 353 """Generate the ancestors of 'revs' in reverse topological order.
354 354 Does not generate revs lower than stoprev.
355 355
356 356 See the documentation for ancestor.lazyancestors for more details."""
357 357
358 358 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
359 359 inclusive=inclusive)
360 360
361 361 def descendants(self, revs):
362 362 """Generate the descendants of 'revs' in revision order.
363 363
364 364 Yield a sequence of revision numbers starting with a child of
365 365 some rev in revs, i.e., each revision is *not* considered a
366 366 descendant of itself. Results are ordered by revision number (a
367 367 topological sort)."""
368 368 first = min(revs)
369 369 if first == nullrev:
370 370 for i in self:
371 371 yield i
372 372 return
373 373
374 374 seen = set(revs)
375 375 for i in self.revs(start=first + 1):
376 376 for x in self.parentrevs(i):
377 377 if x != nullrev and x in seen:
378 378 seen.add(i)
379 379 yield i
380 380 break
381 381
382 382 def findcommonmissing(self, common=None, heads=None):
383 383 """Return a tuple of the ancestors of common and the ancestors of heads
384 384 that are not ancestors of common. In revset terminology, we return the
385 385 tuple:
386 386
387 387 ::common, (::heads) - (::common)
388 388
389 389 The list is sorted by revision number, meaning it is
390 390 topologically sorted.
391 391
392 392 'heads' and 'common' are both lists of node IDs. If heads is
393 393 not supplied, uses all of the revlog's heads. If common is not
394 394 supplied, uses nullid."""
395 395 if common is None:
396 396 common = [nullid]
397 397 if heads is None:
398 398 heads = self.heads()
399 399
400 400 common = [self.rev(n) for n in common]
401 401 heads = [self.rev(n) for n in heads]
402 402
403 403 # we want the ancestors, but inclusive
404 404 has = set(self.ancestors(common))
405 405 has.add(nullrev)
406 406 has.update(common)
407 407
408 408 # take all ancestors from heads that aren't in has
409 409 missing = set()
410 410 visit = util.deque(r for r in heads if r not in has)
411 411 while visit:
412 412 r = visit.popleft()
413 413 if r in missing:
414 414 continue
415 415 else:
416 416 missing.add(r)
417 417 for p in self.parentrevs(r):
418 418 if p not in has:
419 419 visit.append(p)
420 420 missing = list(missing)
421 421 missing.sort()
422 422 return has, [self.node(r) for r in missing]
423 423
424 424 def findmissingrevs(self, common=None, heads=None):
425 425 """Return the revision numbers of the ancestors of heads that
426 426 are not ancestors of common.
427 427
428 428 More specifically, return a list of revision numbers corresponding to
429 429 nodes N such that every N satisfies the following constraints:
430 430
431 431 1. N is an ancestor of some node in 'heads'
432 432 2. N is not an ancestor of any node in 'common'
433 433
434 434 The list is sorted by revision number, meaning it is
435 435 topologically sorted.
436 436
437 437 'heads' and 'common' are both lists of revision numbers. If heads is
438 438 not supplied, uses all of the revlog's heads. If common is not
439 439 supplied, uses nullid."""
440 440 if common is None:
441 441 common = [nullrev]
442 442 if heads is None:
443 443 heads = self.headrevs()
444 444
445 445 return ancestor.missingancestors(heads, common, self.parentrevs)
446 446
447 447 def findmissing(self, common=None, heads=None):
448 448 """Return the ancestors of heads that are not ancestors of common.
449 449
450 450 More specifically, return a list of nodes N such that every N
451 451 satisfies the following constraints:
452 452
453 453 1. N is an ancestor of some node in 'heads'
454 454 2. N is not an ancestor of any node in 'common'
455 455
456 456 The list is sorted by revision number, meaning it is
457 457 topologically sorted.
458 458
459 459 'heads' and 'common' are both lists of node IDs. If heads is
460 460 not supplied, uses all of the revlog's heads. If common is not
461 461 supplied, uses nullid."""
462 462 if common is None:
463 463 common = [nullid]
464 464 if heads is None:
465 465 heads = self.heads()
466 466
467 467 common = [self.rev(n) for n in common]
468 468 heads = [self.rev(n) for n in heads]
469 469
470 470 return [self.node(r) for r in
471 471 ancestor.missingancestors(heads, common, self.parentrevs)]
472 472
473 473 def nodesbetween(self, roots=None, heads=None):
474 474 """Return a topological path from 'roots' to 'heads'.
475 475
476 476 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
477 477 topologically sorted list of all nodes N that satisfy both of
478 478 these constraints:
479 479
480 480 1. N is a descendant of some node in 'roots'
481 481 2. N is an ancestor of some node in 'heads'
482 482
483 483 Every node is considered to be both a descendant and an ancestor
484 484 of itself, so every reachable node in 'roots' and 'heads' will be
485 485 included in 'nodes'.
486 486
487 487 'outroots' is the list of reachable nodes in 'roots', i.e., the
488 488 subset of 'roots' that is returned in 'nodes'. Likewise,
489 489 'outheads' is the subset of 'heads' that is also in 'nodes'.
490 490
491 491 'roots' and 'heads' are both lists of node IDs. If 'roots' is
492 492 unspecified, uses nullid as the only root. If 'heads' is
493 493 unspecified, uses list of all of the revlog's heads."""
494 494 nonodes = ([], [], [])
495 495 if roots is not None:
496 496 roots = list(roots)
497 497 if not roots:
498 498 return nonodes
499 499 lowestrev = min([self.rev(n) for n in roots])
500 500 else:
501 501 roots = [nullid] # Everybody's a descendant of nullid
502 502 lowestrev = nullrev
503 503 if (lowestrev == nullrev) and (heads is None):
504 504 # We want _all_ the nodes!
505 505 return ([self.node(r) for r in self], [nullid], list(self.heads()))
506 506 if heads is None:
507 507 # All nodes are ancestors, so the latest ancestor is the last
508 508 # node.
509 509 highestrev = len(self) - 1
510 510 # Set ancestors to None to signal that every node is an ancestor.
511 511 ancestors = None
512 512 # Set heads to an empty dictionary for later discovery of heads
513 513 heads = {}
514 514 else:
515 515 heads = list(heads)
516 516 if not heads:
517 517 return nonodes
518 518 ancestors = set()
519 519 # Turn heads into a dictionary so we can remove 'fake' heads.
520 520 # Also, later we will be using it to filter out the heads we can't
521 521 # find from roots.
522 522 heads = dict.fromkeys(heads, False)
523 523 # Start at the top and keep marking parents until we're done.
524 524 nodestotag = set(heads)
525 525 # Remember where the top was so we can use it as a limit later.
526 526 highestrev = max([self.rev(n) for n in nodestotag])
527 527 while nodestotag:
528 528 # grab a node to tag
529 529 n = nodestotag.pop()
530 530 # Never tag nullid
531 531 if n == nullid:
532 532 continue
533 533 # A node's revision number represents its place in a
534 534 # topologically sorted list of nodes.
535 535 r = self.rev(n)
536 536 if r >= lowestrev:
537 537 if n not in ancestors:
538 538 # If we are possibly a descendant of one of the roots
539 539 # and we haven't already been marked as an ancestor
540 540 ancestors.add(n) # Mark as ancestor
541 541 # Add non-nullid parents to list of nodes to tag.
542 542 nodestotag.update([p for p in self.parents(n) if
543 543 p != nullid])
544 544 elif n in heads: # We've seen it before, is it a fake head?
545 545 # So it is, real heads should not be the ancestors of
546 546 # any other heads.
547 547 heads.pop(n)
548 548 if not ancestors:
549 549 return nonodes
550 550 # Now that we have our set of ancestors, we want to remove any
551 551 # roots that are not ancestors.
552 552
553 553 # If one of the roots was nullid, everything is included anyway.
554 554 if lowestrev > nullrev:
555 555 # But, since we weren't, let's recompute the lowest rev to not
556 556 # include roots that aren't ancestors.
557 557
558 558 # Filter out roots that aren't ancestors of heads
559 559 roots = [n for n in roots if n in ancestors]
560 560 # Recompute the lowest revision
561 561 if roots:
562 562 lowestrev = min([self.rev(n) for n in roots])
563 563 else:
564 564 # No more roots? Return empty list
565 565 return nonodes
566 566 else:
567 567 # We are descending from nullid, and don't need to care about
568 568 # any other roots.
569 569 lowestrev = nullrev
570 570 roots = [nullid]
571 571 # Transform our roots list into a set.
572 572 descendants = set(roots)
573 573 # Also, keep the original roots so we can filter out roots that aren't
574 574 # 'real' roots (i.e. are descended from other roots).
575 575 roots = descendants.copy()
576 576 # Our topologically sorted list of output nodes.
577 577 orderedout = []
578 578 # Don't start at nullid since we don't want nullid in our output list,
579 579 # and if nullid shows up in descendants, empty parents will look like
580 580 # they're descendants.
581 581 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
582 582 n = self.node(r)
583 583 isdescendant = False
584 584 if lowestrev == nullrev: # Everybody is a descendant of nullid
585 585 isdescendant = True
586 586 elif n in descendants:
587 587 # n is already a descendant
588 588 isdescendant = True
589 589 # This check only needs to be done here because all the roots
590 590 # will start being marked is descendants before the loop.
591 591 if n in roots:
592 592 # If n was a root, check if it's a 'real' root.
593 593 p = tuple(self.parents(n))
594 594 # If any of its parents are descendants, it's not a root.
595 595 if (p[0] in descendants) or (p[1] in descendants):
596 596 roots.remove(n)
597 597 else:
598 598 p = tuple(self.parents(n))
599 599 # A node is a descendant if either of its parents are
600 600 # descendants. (We seeded the dependents list with the roots
601 601 # up there, remember?)
602 602 if (p[0] in descendants) or (p[1] in descendants):
603 603 descendants.add(n)
604 604 isdescendant = True
605 605 if isdescendant and ((ancestors is None) or (n in ancestors)):
606 606 # Only include nodes that are both descendants and ancestors.
607 607 orderedout.append(n)
608 608 if (ancestors is not None) and (n in heads):
609 609 # We're trying to figure out which heads are reachable
610 610 # from roots.
611 611 # Mark this head as having been reached
612 612 heads[n] = True
613 613 elif ancestors is None:
614 614 # Otherwise, we're trying to discover the heads.
615 615 # Assume this is a head because if it isn't, the next step
616 616 # will eventually remove it.
617 617 heads[n] = True
618 618 # But, obviously its parents aren't.
619 619 for p in self.parents(n):
620 620 heads.pop(p, None)
621 621 heads = [n for n, flag in heads.iteritems() if flag]
622 622 roots = list(roots)
623 623 assert orderedout
624 624 assert roots
625 625 assert heads
626 626 return (orderedout, roots, heads)
627 627
628 628 def headrevs(self):
629 629 try:
630 630 return self.index.headrevs()
631 631 except AttributeError:
632 632 return self._headrevs()
633 633
634 634 def _headrevs(self):
635 635 count = len(self)
636 636 if not count:
637 637 return [nullrev]
638 638 # we won't iter over filtered rev so nobody is a head at start
639 639 ishead = [0] * (count + 1)
640 640 index = self.index
641 641 for r in self:
642 642 ishead[r] = 1 # I may be an head
643 643 e = index[r]
644 644 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
645 645 return [r for r, val in enumerate(ishead) if val]
646 646
647 647 def heads(self, start=None, stop=None):
648 648 """return the list of all nodes that have no children
649 649
650 650 if start is specified, only heads that are descendants of
651 651 start will be returned
652 652 if stop is specified, it will consider all the revs from stop
653 653 as if they had no children
654 654 """
655 655 if start is None and stop is None:
656 656 if not len(self):
657 657 return [nullid]
658 658 return [self.node(r) for r in self.headrevs()]
659 659
660 660 if start is None:
661 661 start = nullid
662 662 if stop is None:
663 663 stop = []
664 664 stoprevs = set([self.rev(n) for n in stop])
665 665 startrev = self.rev(start)
666 666 reachable = set((startrev,))
667 667 heads = set((startrev,))
668 668
669 669 parentrevs = self.parentrevs
670 670 for r in self.revs(start=startrev + 1):
671 671 for p in parentrevs(r):
672 672 if p in reachable:
673 673 if r not in stoprevs:
674 674 reachable.add(r)
675 675 heads.add(r)
676 676 if p in heads and p not in stoprevs:
677 677 heads.remove(p)
678 678
679 679 return [self.node(r) for r in heads]
680 680
681 681 def children(self, node):
682 682 """find the children of a given node"""
683 683 c = []
684 684 p = self.rev(node)
685 685 for r in self.revs(start=p + 1):
686 686 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
687 687 if prevs:
688 688 for pr in prevs:
689 689 if pr == p:
690 690 c.append(self.node(r))
691 691 elif p == nullrev:
692 692 c.append(self.node(r))
693 693 return c
694 694
695 695 def descendant(self, start, end):
696 696 if start == nullrev:
697 697 return True
698 698 for i in self.descendants([start]):
699 699 if i == end:
700 700 return True
701 701 elif i > end:
702 702 break
703 703 return False
704 704
705 705 def ancestor(self, a, b):
706 706 """calculate the least common ancestor of nodes a and b"""
707 707
708 708 a, b = self.rev(a), self.rev(b)
709 709 try:
710 710 ancs = self.index.ancestors(a, b)
711 711 except (AttributeError, OverflowError):
712 712 ancs = ancestor.ancestors(self.parentrevs, a, b)
713 713 if ancs:
714 714 # choose a consistent winner when there's a tie
715 715 return min(map(self.node, ancs))
716 716 return nullid
717 717
718 718 def _match(self, id):
719 719 if isinstance(id, int):
720 720 # rev
721 721 return self.node(id)
722 722 if len(id) == 20:
723 723 # possibly a binary node
724 724 # odds of a binary node being all hex in ASCII are 1 in 10**25
725 725 try:
726 726 node = id
727 727 self.rev(node) # quick search the index
728 728 return node
729 729 except LookupError:
730 730 pass # may be partial hex id
731 731 try:
732 732 # str(rev)
733 733 rev = int(id)
734 734 if str(rev) != id:
735 735 raise ValueError
736 736 if rev < 0:
737 737 rev = len(self) + rev
738 738 if rev < 0 or rev >= len(self):
739 739 raise ValueError
740 740 return self.node(rev)
741 741 except (ValueError, OverflowError):
742 742 pass
743 743 if len(id) == 40:
744 744 try:
745 745 # a full hex nodeid?
746 746 node = bin(id)
747 747 self.rev(node)
748 748 return node
749 749 except (TypeError, LookupError):
750 750 pass
751 751
752 752 def _partialmatch(self, id):
753 753 try:
754 754 n = self.index.partialmatch(id)
755 755 if n and self.hasnode(n):
756 756 return n
757 757 return None
758 758 except RevlogError:
759 759 # parsers.c radix tree lookup gave multiple matches
760 760 # fall through to slow path that filters hidden revisions
761 761 pass
762 762 except (AttributeError, ValueError):
763 763 # we are pure python, or key was too short to search radix tree
764 764 pass
765 765
766 766 if id in self._pcache:
767 767 return self._pcache[id]
768 768
769 769 if len(id) < 40:
770 770 try:
771 771 # hex(node)[:...]
772 772 l = len(id) // 2 # grab an even number of digits
773 773 prefix = bin(id[:l * 2])
774 774 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
775 775 nl = [n for n in nl if hex(n).startswith(id) and
776 776 self.hasnode(n)]
777 777 if len(nl) > 0:
778 778 if len(nl) == 1:
779 779 self._pcache[id] = nl[0]
780 780 return nl[0]
781 781 raise LookupError(id, self.indexfile,
782 782 _('ambiguous identifier'))
783 783 return None
784 784 except TypeError:
785 785 pass
786 786
787 787 def lookup(self, id):
788 788 """locate a node based on:
789 789 - revision number or str(revision number)
790 790 - nodeid or subset of hex nodeid
791 791 """
792 792 n = self._match(id)
793 793 if n is not None:
794 794 return n
795 795 n = self._partialmatch(id)
796 796 if n:
797 797 return n
798 798
799 799 raise LookupError(id, self.indexfile, _('no match found'))
800 800
801 801 def cmp(self, node, text):
802 802 """compare text with a given file revision
803 803
804 804 returns True if text is different than what is stored.
805 805 """
806 806 p1, p2 = self.parents(node)
807 807 return hash(text, p1, p2) != node
808 808
809 809 def _addchunk(self, offset, data):
810 810 o, d = self._chunkcache
811 811 # try to add to existing cache
812 812 if o + len(d) == offset and len(d) + len(data) < _chunksize:
813 813 self._chunkcache = o, d + data
814 814 else:
815 815 self._chunkcache = offset, data
816 816
817 817 def _loadchunk(self, offset, length):
818 818 if self._inline:
819 819 df = self.opener(self.indexfile)
820 820 else:
821 821 df = self.opener(self.datafile)
822 822
823 823 readahead = max(65536, length)
824 824 df.seek(offset)
825 825 d = df.read(readahead)
826 826 df.close()
827 827 self._addchunk(offset, d)
828 828 if readahead > length:
829 829 return util.buffer(d, 0, length)
830 830 return d
831 831
832 832 def _getchunk(self, offset, length):
833 833 o, d = self._chunkcache
834 834 l = len(d)
835 835
836 836 # is it in the cache?
837 837 cachestart = offset - o
838 838 cacheend = cachestart + length
839 839 if cachestart >= 0 and cacheend <= l:
840 840 if cachestart == 0 and cacheend == l:
841 841 return d # avoid a copy
842 842 return util.buffer(d, cachestart, cacheend - cachestart)
843 843
844 844 return self._loadchunk(offset, length)
845 845
846 846 def _chunkraw(self, startrev, endrev):
847 847 start = self.start(startrev)
848 848 length = self.end(endrev) - start
849 849 if self._inline:
850 850 start += (startrev + 1) * self._io.size
851 851 return self._getchunk(start, length)
852 852
853 853 def _chunk(self, rev):
854 854 return decompress(self._chunkraw(rev, rev))
855 855
856 856 def _chunkbase(self, rev):
857 857 return self._chunk(rev)
858 858
859 859 def _chunkclear(self):
860 860 self._chunkcache = (0, '')
861 861
862 862 def deltaparent(self, rev):
863 863 """return deltaparent of the given revision"""
864 864 base = self.index[rev][3]
865 865 if base == rev:
866 866 return nullrev
867 867 elif self._generaldelta:
868 868 return base
869 869 else:
870 870 return rev - 1
871 871
872 872 def revdiff(self, rev1, rev2):
873 873 """return or calculate a delta between two revisions"""
874 874 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
875 875 return str(self._chunk(rev2))
876 876
877 877 return mdiff.textdiff(self.revision(rev1),
878 878 self.revision(rev2))
879 879
880 880 def revision(self, nodeorrev):
881 881 """return an uncompressed revision of a given node or revision
882 882 number.
883 883 """
884 884 if isinstance(nodeorrev, int):
885 885 rev = nodeorrev
886 886 node = self.node(rev)
887 887 else:
888 888 node = nodeorrev
889 889 rev = None
890 890
891 891 cachedrev = None
892 892 if node == nullid:
893 893 return ""
894 894 if self._cache:
895 895 if self._cache[0] == node:
896 896 return self._cache[2]
897 897 cachedrev = self._cache[1]
898 898
899 899 # look up what we need to read
900 900 text = None
901 901 if rev is None:
902 902 rev = self.rev(node)
903 903
904 904 # check rev flags
905 905 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
906 906 raise RevlogError(_('incompatible revision flag %x') %
907 907 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
908 908
909 909 # build delta chain
910 910 chain = []
911 911 index = self.index # for performance
912 912 generaldelta = self._generaldelta
913 913 iterrev = rev
914 914 e = index[iterrev]
915 915 while iterrev != e[3] and iterrev != cachedrev:
916 916 chain.append(iterrev)
917 917 if generaldelta:
918 918 iterrev = e[3]
919 919 else:
920 920 iterrev -= 1
921 921 e = index[iterrev]
922 922 chain.reverse()
923 923 base = iterrev
924 924
925 925 if iterrev == cachedrev:
926 926 # cache hit
927 927 text = self._cache[2]
928 928
929 929 # drop cache to save memory
930 930 self._cache = None
931 931
932 932 self._chunkraw(base, rev)
933 933 if text is None:
934 934 text = str(self._chunkbase(base))
935 935
936 936 bins = [self._chunk(r) for r in chain]
937 937 text = mdiff.patches(text, bins)
938 938
939 939 text = self._checkhash(text, node, rev)
940 940
941 941 self._cache = (node, rev, text)
942 942 return text
943 943
944 944 def _checkhash(self, text, node, rev):
945 945 p1, p2 = self.parents(node)
946 self.checkhash(text, p1, p2, node, rev)
947 return text
948
949 def checkhash(self, text, p1, p2, node, rev=None):
946 950 if node != hash(text, p1, p2):
947 raise RevlogError(_("integrity check failed on %s:%d")
948 % (self.indexfile, rev))
949 return text
951 revornode = rev
952 if revornode is None:
953 revornode = templatefilters.short(hex(node))
954 raise RevlogError(_("integrity check failed on %s:%s")
955 % (self.indexfile, revornode))
950 956
951 957 def checkinlinesize(self, tr, fp=None):
952 958 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
953 959 return
954 960
955 961 trinfo = tr.find(self.indexfile)
956 962 if trinfo is None:
957 963 raise RevlogError(_("%s not found in the transaction")
958 964 % self.indexfile)
959 965
960 966 trindex = trinfo[2]
961 967 dataoff = self.start(trindex)
962 968
963 969 tr.add(self.datafile, dataoff)
964 970
965 971 if fp:
966 972 fp.flush()
967 973 fp.close()
968 974
969 975 df = self.opener(self.datafile, 'w')
970 976 try:
971 977 for r in self:
972 978 df.write(self._chunkraw(r, r))
973 979 finally:
974 980 df.close()
975 981
976 982 fp = self.opener(self.indexfile, 'w', atomictemp=True)
977 983 self.version &= ~(REVLOGNGINLINEDATA)
978 984 self._inline = False
979 985 for i in self:
980 986 e = self._io.packentry(self.index[i], self.node, self.version, i)
981 987 fp.write(e)
982 988
983 989 # if we don't call close, the temp file will never replace the
984 990 # real index
985 991 fp.close()
986 992
987 993 tr.replace(self.indexfile, trindex * self._io.size)
988 994 self._chunkclear()
989 995
990 996 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
991 997 """add a revision to the log
992 998
993 999 text - the revision data to add
994 1000 transaction - the transaction object used for rollback
995 1001 link - the linkrev data to add
996 1002 p1, p2 - the parent nodeids of the revision
997 1003 cachedelta - an optional precomputed delta
998 1004 """
999 1005 if link == nullrev:
1000 1006 raise RevlogError(_("attempted to add linkrev -1 to %s")
1001 1007 % self.indexfile)
1002 1008 node = hash(text, p1, p2)
1003 1009 if node in self.nodemap:
1004 1010 return node
1005 1011
1006 1012 dfh = None
1007 1013 if not self._inline:
1008 1014 dfh = self.opener(self.datafile, "a")
1009 1015 ifh = self.opener(self.indexfile, "a+")
1010 1016 try:
1011 1017 return self._addrevision(node, text, transaction, link, p1, p2,
1012 1018 cachedelta, ifh, dfh)
1013 1019 finally:
1014 1020 if dfh:
1015 1021 dfh.close()
1016 1022 ifh.close()
1017 1023
1018 1024 def compress(self, text):
1019 1025 """ generate a possibly-compressed representation of text """
1020 1026 if not text:
1021 1027 return ("", text)
1022 1028 l = len(text)
1023 1029 bin = None
1024 1030 if l < 44:
1025 1031 pass
1026 1032 elif l > 1000000:
1027 1033 # zlib makes an internal copy, thus doubling memory usage for
1028 1034 # large files, so lets do this in pieces
1029 1035 z = zlib.compressobj()
1030 1036 p = []
1031 1037 pos = 0
1032 1038 while pos < l:
1033 1039 pos2 = pos + 2**20
1034 1040 p.append(z.compress(text[pos:pos2]))
1035 1041 pos = pos2
1036 1042 p.append(z.flush())
1037 1043 if sum(map(len, p)) < l:
1038 1044 bin = "".join(p)
1039 1045 else:
1040 1046 bin = _compress(text)
1041 1047 if bin is None or len(bin) > l:
1042 1048 if text[0] == '\0':
1043 1049 return ("", text)
1044 1050 return ('u', text)
1045 1051 return ("", bin)
1046 1052
1047 1053 def _addrevision(self, node, text, transaction, link, p1, p2,
1048 1054 cachedelta, ifh, dfh):
1049 1055 """internal function to add revisions to the log
1050 1056
1051 1057 see addrevision for argument descriptions.
1052 1058 invariants:
1053 1059 - text is optional (can be None); if not set, cachedelta must be set.
1054 1060 if both are set, they must correspond to each other.
1055 1061 """
1056 1062 btext = [text]
1057 1063 def buildtext():
1058 1064 if btext[0] is not None:
1059 1065 return btext[0]
1060 1066 # flush any pending writes here so we can read it in revision
1061 1067 if dfh:
1062 1068 dfh.flush()
1063 1069 ifh.flush()
1064 1070 basetext = self.revision(self.node(cachedelta[0]))
1065 1071 btext[0] = mdiff.patch(basetext, cachedelta[1])
1066 chk = hash(btext[0], p1, p2)
1067 if chk != node:
1068 raise RevlogError(_("consistency error in delta"))
1072 self.checkhash(btext[0], p1, p2, node)
1069 1073 return btext[0]
1070 1074
1071 1075 def builddelta(rev):
1072 1076 # can we use the cached delta?
1073 1077 if cachedelta and cachedelta[0] == rev:
1074 1078 delta = cachedelta[1]
1075 1079 else:
1076 1080 t = buildtext()
1077 1081 ptext = self.revision(self.node(rev))
1078 1082 delta = mdiff.textdiff(ptext, t)
1079 1083 data = self.compress(delta)
1080 1084 l = len(data[1]) + len(data[0])
1081 1085 if basecache[0] == rev:
1082 1086 chainbase = basecache[1]
1083 1087 else:
1084 1088 chainbase = self.chainbase(rev)
1085 1089 dist = l + offset - self.start(chainbase)
1086 1090 if self._generaldelta:
1087 1091 base = rev
1088 1092 else:
1089 1093 base = chainbase
1090 1094 return dist, l, data, base, chainbase
1091 1095
1092 1096 curr = len(self)
1093 1097 prev = curr - 1
1094 1098 base = chainbase = curr
1095 1099 offset = self.end(prev)
1096 1100 flags = 0
1097 1101 d = None
1098 1102 basecache = self._basecache
1099 1103 p1r, p2r = self.rev(p1), self.rev(p2)
1100 1104
1101 1105 # should we try to build a delta?
1102 1106 if prev != nullrev:
1103 1107 if self._generaldelta:
1104 1108 if p1r >= basecache[1]:
1105 1109 d = builddelta(p1r)
1106 1110 elif p2r >= basecache[1]:
1107 1111 d = builddelta(p2r)
1108 1112 else:
1109 1113 d = builddelta(prev)
1110 1114 else:
1111 1115 d = builddelta(prev)
1112 1116 dist, l, data, base, chainbase = d
1113 1117
1114 1118 # full versions are inserted when the needed deltas
1115 1119 # become comparable to the uncompressed text
1116 1120 if text is None:
1117 1121 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1118 1122 cachedelta[1])
1119 1123 else:
1120 1124 textlen = len(text)
1121 1125 if d is None or dist > textlen * 2:
1122 1126 text = buildtext()
1123 1127 data = self.compress(text)
1124 1128 l = len(data[1]) + len(data[0])
1125 1129 base = chainbase = curr
1126 1130
1127 1131 e = (offset_type(offset, flags), l, textlen,
1128 1132 base, link, p1r, p2r, node)
1129 1133 self.index.insert(-1, e)
1130 1134 self.nodemap[node] = curr
1131 1135
1132 1136 entry = self._io.packentry(e, self.node, self.version, curr)
1133 1137 if not self._inline:
1134 1138 transaction.add(self.datafile, offset)
1135 1139 transaction.add(self.indexfile, curr * len(entry))
1136 1140 if data[0]:
1137 1141 dfh.write(data[0])
1138 1142 dfh.write(data[1])
1139 1143 dfh.flush()
1140 1144 ifh.write(entry)
1141 1145 else:
1142 1146 offset += curr * self._io.size
1143 1147 transaction.add(self.indexfile, offset, curr)
1144 1148 ifh.write(entry)
1145 1149 ifh.write(data[0])
1146 1150 ifh.write(data[1])
1147 1151 self.checkinlinesize(transaction, ifh)
1148 1152
1149 1153 if type(text) == str: # only accept immutable objects
1150 1154 self._cache = (node, curr, text)
1151 1155 self._basecache = (curr, chainbase)
1152 1156 return node
1153 1157
1154 1158 def addgroup(self, bundle, linkmapper, transaction):
1155 1159 """
1156 1160 add a delta group
1157 1161
1158 1162 given a set of deltas, add them to the revision log. the
1159 1163 first delta is against its parent, which should be in our
1160 1164 log, the rest are against the previous delta.
1161 1165 """
1162 1166
1163 1167 # track the base of the current delta log
1164 1168 content = []
1165 1169 node = None
1166 1170
1167 1171 r = len(self)
1168 1172 end = 0
1169 1173 if r:
1170 1174 end = self.end(r - 1)
1171 1175 ifh = self.opener(self.indexfile, "a+")
1172 1176 isize = r * self._io.size
1173 1177 if self._inline:
1174 1178 transaction.add(self.indexfile, end + isize, r)
1175 1179 dfh = None
1176 1180 else:
1177 1181 transaction.add(self.indexfile, isize, r)
1178 1182 transaction.add(self.datafile, end)
1179 1183 dfh = self.opener(self.datafile, "a")
1180 1184
1181 1185 try:
1182 1186 # loop through our set of deltas
1183 1187 chain = None
1184 1188 while True:
1185 1189 chunkdata = bundle.deltachunk(chain)
1186 1190 if not chunkdata:
1187 1191 break
1188 1192 node = chunkdata['node']
1189 1193 p1 = chunkdata['p1']
1190 1194 p2 = chunkdata['p2']
1191 1195 cs = chunkdata['cs']
1192 1196 deltabase = chunkdata['deltabase']
1193 1197 delta = chunkdata['delta']
1194 1198
1195 1199 content.append(node)
1196 1200
1197 1201 link = linkmapper(cs)
1198 1202 if node in self.nodemap:
1199 1203 # this can happen if two branches make the same change
1200 1204 chain = node
1201 1205 continue
1202 1206
1203 1207 for p in (p1, p2):
1204 1208 if p not in self.nodemap:
1205 1209 raise LookupError(p, self.indexfile,
1206 1210 _('unknown parent'))
1207 1211
1208 1212 if deltabase not in self.nodemap:
1209 1213 raise LookupError(deltabase, self.indexfile,
1210 1214 _('unknown delta base'))
1211 1215
1212 1216 baserev = self.rev(deltabase)
1213 1217 chain = self._addrevision(node, None, transaction, link,
1214 1218 p1, p2, (baserev, delta), ifh, dfh)
1215 1219 if not dfh and not self._inline:
1216 1220 # addrevision switched from inline to conventional
1217 1221 # reopen the index
1218 1222 ifh.close()
1219 1223 dfh = self.opener(self.datafile, "a")
1220 1224 ifh = self.opener(self.indexfile, "a")
1221 1225 finally:
1222 1226 if dfh:
1223 1227 dfh.close()
1224 1228 ifh.close()
1225 1229
1226 1230 return content
1227 1231
1228 1232 def strip(self, minlink, transaction):
1229 1233 """truncate the revlog on the first revision with a linkrev >= minlink
1230 1234
1231 1235 This function is called when we're stripping revision minlink and
1232 1236 its descendants from the repository.
1233 1237
1234 1238 We have to remove all revisions with linkrev >= minlink, because
1235 1239 the equivalent changelog revisions will be renumbered after the
1236 1240 strip.
1237 1241
1238 1242 So we truncate the revlog on the first of these revisions, and
1239 1243 trust that the caller has saved the revisions that shouldn't be
1240 1244 removed and that it'll re-add them after this truncation.
1241 1245 """
1242 1246 if len(self) == 0:
1243 1247 return
1244 1248
1245 1249 for rev in self:
1246 1250 if self.index[rev][4] >= minlink:
1247 1251 break
1248 1252 else:
1249 1253 return
1250 1254
1251 1255 # first truncate the files on disk
1252 1256 end = self.start(rev)
1253 1257 if not self._inline:
1254 1258 transaction.add(self.datafile, end)
1255 1259 end = rev * self._io.size
1256 1260 else:
1257 1261 end += rev * self._io.size
1258 1262
1259 1263 transaction.add(self.indexfile, end)
1260 1264
1261 1265 # then reset internal state in memory to forget those revisions
1262 1266 self._cache = None
1263 1267 self._chunkclear()
1264 1268 for x in xrange(rev, len(self)):
1265 1269 del self.nodemap[self.node(x)]
1266 1270
1267 1271 del self.index[rev:-1]
1268 1272
1269 1273 def checksize(self):
1270 1274 expected = 0
1271 1275 if len(self):
1272 1276 expected = max(0, self.end(len(self) - 1))
1273 1277
1274 1278 try:
1275 1279 f = self.opener(self.datafile)
1276 1280 f.seek(0, 2)
1277 1281 actual = f.tell()
1278 1282 f.close()
1279 1283 dd = actual - expected
1280 1284 except IOError, inst:
1281 1285 if inst.errno != errno.ENOENT:
1282 1286 raise
1283 1287 dd = 0
1284 1288
1285 1289 try:
1286 1290 f = self.opener(self.indexfile)
1287 1291 f.seek(0, 2)
1288 1292 actual = f.tell()
1289 1293 f.close()
1290 1294 s = self._io.size
1291 1295 i = max(0, actual // s)
1292 1296 di = actual - (i * s)
1293 1297 if self._inline:
1294 1298 databytes = 0
1295 1299 for r in self:
1296 1300 databytes += max(0, self.length(r))
1297 1301 dd = 0
1298 1302 di = actual - len(self) * s - databytes
1299 1303 except IOError, inst:
1300 1304 if inst.errno != errno.ENOENT:
1301 1305 raise
1302 1306 di = 0
1303 1307
1304 1308 return (dd, di)
1305 1309
1306 1310 def files(self):
1307 1311 res = [self.indexfile]
1308 1312 if not self._inline:
1309 1313 res.append(self.datafile)
1310 1314 return res
General Comments 0
You need to be logged in to leave comments. Login now