##// END OF EJS Templates
backout e7167007c083...
Matt Mackall -
r17150:3ac9592b default
parent child Browse files
Show More
@@ -1,1322 +1,1312 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 indexformatv0 = ">4l20s20s20s"
95 95 v0shaoffset = 56
96 96
97 97 class revlogoldio(object):
98 98 def __init__(self):
99 99 self.size = struct.calcsize(indexformatv0)
100 100
101 101 def parseindex(self, data, inline):
102 102 s = self.size
103 103 index = []
104 104 nodemap = {nullid: nullrev}
105 105 n = off = 0
106 106 l = len(data)
107 107 while off + s <= l:
108 108 cur = data[off:off + s]
109 109 off += s
110 110 e = _unpack(indexformatv0, cur)
111 111 # transform to revlogv1 format
112 112 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
113 113 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
114 114 index.append(e2)
115 115 nodemap[e[6]] = n
116 116 n += 1
117 117
118 118 # add the magic null revision at -1
119 119 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
120 120
121 121 return index, nodemap, None
122 122
123 123 def packentry(self, entry, node, version, rev):
124 124 if gettype(entry[0]):
125 125 raise RevlogError(_("index entry flags need RevlogNG"))
126 126 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
127 127 node(entry[5]), node(entry[6]), entry[7])
128 128 return _pack(indexformatv0, *e2)
129 129
130 130 # index ng:
131 131 # 6 bytes: offset
132 132 # 2 bytes: flags
133 133 # 4 bytes: compressed length
134 134 # 4 bytes: uncompressed length
135 135 # 4 bytes: base rev
136 136 # 4 bytes: link rev
137 137 # 4 bytes: parent 1 rev
138 138 # 4 bytes: parent 2 rev
139 139 # 32 bytes: nodeid
140 140 indexformatng = ">Qiiiiii20s12x"
141 141 ngshaoffset = 32
142 142 versionformat = ">I"
143 143
144 144 class revlogio(object):
145 145 def __init__(self):
146 146 self.size = struct.calcsize(indexformatng)
147 147
148 148 def parseindex(self, data, inline):
149 149 # call the C implementation to parse the index data
150 150 index, cache = parsers.parse_index2(data, inline)
151 151 return index, getattr(index, 'nodemap', None), cache
152 152
153 153 def packentry(self, entry, node, version, rev):
154 154 p = _pack(indexformatng, *entry)
155 155 if rev == 0:
156 156 p = _pack(versionformat, version) + p[4:]
157 157 return p
158 158
159 159 class revlog(object):
160 160 """
161 161 the underlying revision storage object
162 162
163 163 A revlog consists of two parts, an index and the revision data.
164 164
165 165 The index is a file with a fixed record size containing
166 166 information on each revision, including its nodeid (hash), the
167 167 nodeids of its parents, the position and offset of its data within
168 168 the data file, and the revision it's based on. Finally, each entry
169 169 contains a linkrev entry that can serve as a pointer to external
170 170 data.
171 171
172 172 The revision data itself is a linear collection of data chunks.
173 173 Each chunk represents a revision and is usually represented as a
174 174 delta against the previous chunk. To bound lookup time, runs of
175 175 deltas are limited to about 2 times the length of the original
176 176 version data. This makes retrieval of a version proportional to
177 177 its size, or O(1) relative to the number of revisions.
178 178
179 179 Both pieces of the revlog are written to in an append-only
180 180 fashion, which means we never need to rewrite a file to insert or
181 181 remove data, and can use some simple techniques to avoid the need
182 182 for locking while reading.
183 183 """
184 184 def __init__(self, opener, indexfile):
185 185 """
186 186 create a revlog object
187 187
188 188 opener is a function that abstracts the file opening operation
189 189 and can be used to implement COW semantics or the like.
190 190 """
191 191 self.indexfile = indexfile
192 192 self.datafile = indexfile[:-2] + ".d"
193 193 self.opener = opener
194 194 self._cache = None
195 195 self._basecache = (0, 0)
196 196 self._chunkcache = (0, '')
197 197 self.index = []
198 198 self._pcache = {}
199 199 self._nodecache = {nullid: nullrev}
200 200 self._nodepos = None
201 201
202 202 v = REVLOG_DEFAULT_VERSION
203 203 opts = getattr(opener, 'options', None)
204 204 if opts is not None:
205 205 if 'revlogv1' in opts:
206 206 if 'generaldelta' in opts:
207 207 v |= REVLOGGENERALDELTA
208 208 else:
209 209 v = 0
210 210
211 211 i = ''
212 212 self._initempty = True
213 213 try:
214 214 f = self.opener(self.indexfile)
215 215 i = f.read()
216 216 f.close()
217 217 if len(i) > 0:
218 218 v = struct.unpack(versionformat, i[:4])[0]
219 219 self._initempty = False
220 220 except IOError, inst:
221 221 if inst.errno != errno.ENOENT:
222 222 raise
223 223
224 224 self.version = v
225 225 self._inline = v & REVLOGNGINLINEDATA
226 226 self._generaldelta = v & REVLOGGENERALDELTA
227 227 flags = v & ~0xFFFF
228 228 fmt = v & 0xFFFF
229 229 if fmt == REVLOGV0 and flags:
230 230 raise RevlogError(_("index %s unknown flags %#04x for format v0")
231 231 % (self.indexfile, flags >> 16))
232 232 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
233 233 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
234 234 % (self.indexfile, flags >> 16))
235 235 elif fmt > REVLOGNG:
236 236 raise RevlogError(_("index %s unknown format %d")
237 237 % (self.indexfile, fmt))
238 238
239 239 self._io = revlogio()
240 240 if self.version == REVLOGV0:
241 241 self._io = revlogoldio()
242 242 try:
243 243 d = self._io.parseindex(i, self._inline)
244 244 except (ValueError, IndexError):
245 245 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
246 246 self.index, nodemap, self._chunkcache = d
247 247 if nodemap is not None:
248 248 self.nodemap = self._nodecache = nodemap
249 249 if not self._chunkcache:
250 250 self._chunkclear()
251 251
252 252 def tip(self):
253 253 return self.node(len(self.index) - 2)
254 254 def __len__(self):
255 255 return len(self.index) - 1
256 256 def __iter__(self):
257 257 for i in xrange(len(self)):
258 258 yield i
259 259
260 260 @util.propertycache
261 261 def nodemap(self):
262 262 self.rev(self.node(0))
263 263 return self._nodecache
264 264
265 265 def hasnode(self, node):
266 266 try:
267 267 self.rev(node)
268 268 return True
269 269 except KeyError:
270 270 return False
271 271
272 272 def clearcaches(self):
273 273 try:
274 274 self._nodecache.clearcaches()
275 275 except AttributeError:
276 276 self._nodecache = {nullid: nullrev}
277 277 self._nodepos = None
278 278
279 279 def rev(self, node):
280 280 try:
281 281 return self._nodecache[node]
282 282 except RevlogError:
283 283 # parsers.c radix tree lookup failed
284 284 raise LookupError(node, self.indexfile, _('no node'))
285 285 except KeyError:
286 286 # pure python cache lookup failed
287 287 n = self._nodecache
288 288 i = self.index
289 289 p = self._nodepos
290 290 if p is None:
291 291 p = len(i) - 2
292 292 for r in xrange(p, -1, -1):
293 293 v = i[r][7]
294 294 n[v] = r
295 295 if v == node:
296 296 self._nodepos = r - 1
297 297 return r
298 298 raise LookupError(node, self.indexfile, _('no node'))
299 299
300 300 def node(self, rev):
301 301 return self.index[rev][7]
302 302 def linkrev(self, rev):
303 303 return self.index[rev][4]
304 304 def parents(self, node):
305 305 i = self.index
306 306 d = i[self.rev(node)]
307 307 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
308 308 def parentrevs(self, rev):
309 309 return self.index[rev][5:7]
310 310 def start(self, rev):
311 311 return int(self.index[rev][0] >> 16)
312 312 def end(self, rev):
313 313 return self.start(rev) + self.length(rev)
314 314 def length(self, rev):
315 315 return self.index[rev][1]
316 316 def chainbase(self, rev):
317 317 index = self.index
318 318 base = index[rev][3]
319 319 while base != rev:
320 320 rev = base
321 321 base = index[rev][3]
322 322 return base
323 def chainlength(self, rev):
324 index = self.index
325 base = index[rev][3]
326 length = index[rev][1]
327 while base != rev:
328 rev = base
329 base = index[rev][3]
330 length = length + index[rev][1]
331 return length
332 323 def flags(self, rev):
333 324 return self.index[rev][0] & 0xFFFF
334 325 def rawsize(self, rev):
335 326 """return the length of the uncompressed text for a given revision"""
336 327 l = self.index[rev][2]
337 328 if l >= 0:
338 329 return l
339 330
340 331 t = self.revision(self.node(rev))
341 332 return len(t)
342 333 size = rawsize
343 334
344 335 def ancestors(self, revs, stoprev=0):
345 336 """Generate the ancestors of 'revs' in reverse topological order.
346 337 Does not generate revs lower than stoprev.
347 338
348 339 Yield a sequence of revision numbers starting with the parents
349 340 of each revision in revs, i.e., each revision is *not* considered
350 341 an ancestor of itself. Results are in breadth-first order:
351 342 parents of each rev in revs, then parents of those, etc. Result
352 343 does not include the null revision."""
353 344 visit = util.deque(revs)
354 345 seen = set([nullrev])
355 346 while visit:
356 347 for parent in self.parentrevs(visit.popleft()):
357 348 if parent < stoprev:
358 349 continue
359 350 if parent not in seen:
360 351 visit.append(parent)
361 352 seen.add(parent)
362 353 yield parent
363 354
364 355 def incancestors(self, revs, stoprev=0):
365 356 """Identical to ancestors() except it also generates the
366 357 revisions, 'revs'"""
367 358 for rev in revs:
368 359 yield rev
369 360 for rev in self.ancestors(revs, stoprev):
370 361 yield rev
371 362
372 363 def descendants(self, revs):
373 364 """Generate the descendants of 'revs' in revision order.
374 365
375 366 Yield a sequence of revision numbers starting with a child of
376 367 some rev in revs, i.e., each revision is *not* considered a
377 368 descendant of itself. Results are ordered by revision number (a
378 369 topological sort)."""
379 370 first = min(revs)
380 371 if first == nullrev:
381 372 for i in self:
382 373 yield i
383 374 return
384 375
385 376 seen = set(revs)
386 377 for i in xrange(first + 1, len(self)):
387 378 for x in self.parentrevs(i):
388 379 if x != nullrev and x in seen:
389 380 seen.add(i)
390 381 yield i
391 382 break
392 383
393 384 def findcommonmissing(self, common=None, heads=None):
394 385 """Return a tuple of the ancestors of common and the ancestors of heads
395 386 that are not ancestors of common. In revset terminology, we return the
396 387 tuple:
397 388
398 389 ::common, (::heads) - (::common)
399 390
400 391 The list is sorted by revision number, meaning it is
401 392 topologically sorted.
402 393
403 394 'heads' and 'common' are both lists of node IDs. If heads is
404 395 not supplied, uses all of the revlog's heads. If common is not
405 396 supplied, uses nullid."""
406 397 if common is None:
407 398 common = [nullid]
408 399 if heads is None:
409 400 heads = self.heads()
410 401
411 402 common = [self.rev(n) for n in common]
412 403 heads = [self.rev(n) for n in heads]
413 404
414 405 # we want the ancestors, but inclusive
415 406 has = set(self.ancestors(common))
416 407 has.add(nullrev)
417 408 has.update(common)
418 409
419 410 # take all ancestors from heads that aren't in has
420 411 missing = set()
421 412 visit = util.deque(r for r in heads if r not in has)
422 413 while visit:
423 414 r = visit.popleft()
424 415 if r in missing:
425 416 continue
426 417 else:
427 418 missing.add(r)
428 419 for p in self.parentrevs(r):
429 420 if p not in has:
430 421 visit.append(p)
431 422 missing = list(missing)
432 423 missing.sort()
433 424 return has, [self.node(r) for r in missing]
434 425
435 426 def findmissing(self, common=None, heads=None):
436 427 """Return the ancestors of heads that are not ancestors of common.
437 428
438 429 More specifically, return a list of nodes N such that every N
439 430 satisfies the following constraints:
440 431
441 432 1. N is an ancestor of some node in 'heads'
442 433 2. N is not an ancestor of any node in 'common'
443 434
444 435 The list is sorted by revision number, meaning it is
445 436 topologically sorted.
446 437
447 438 'heads' and 'common' are both lists of node IDs. If heads is
448 439 not supplied, uses all of the revlog's heads. If common is not
449 440 supplied, uses nullid."""
450 441 _common, missing = self.findcommonmissing(common, heads)
451 442 return missing
452 443
453 444 def nodesbetween(self, roots=None, heads=None):
454 445 """Return a topological path from 'roots' to 'heads'.
455 446
456 447 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
457 448 topologically sorted list of all nodes N that satisfy both of
458 449 these constraints:
459 450
460 451 1. N is a descendant of some node in 'roots'
461 452 2. N is an ancestor of some node in 'heads'
462 453
463 454 Every node is considered to be both a descendant and an ancestor
464 455 of itself, so every reachable node in 'roots' and 'heads' will be
465 456 included in 'nodes'.
466 457
467 458 'outroots' is the list of reachable nodes in 'roots', i.e., the
468 459 subset of 'roots' that is returned in 'nodes'. Likewise,
469 460 'outheads' is the subset of 'heads' that is also in 'nodes'.
470 461
471 462 'roots' and 'heads' are both lists of node IDs. If 'roots' is
472 463 unspecified, uses nullid as the only root. If 'heads' is
473 464 unspecified, uses list of all of the revlog's heads."""
474 465 nonodes = ([], [], [])
475 466 if roots is not None:
476 467 roots = list(roots)
477 468 if not roots:
478 469 return nonodes
479 470 lowestrev = min([self.rev(n) for n in roots])
480 471 else:
481 472 roots = [nullid] # Everybody's a descendant of nullid
482 473 lowestrev = nullrev
483 474 if (lowestrev == nullrev) and (heads is None):
484 475 # We want _all_ the nodes!
485 476 return ([self.node(r) for r in self], [nullid], list(self.heads()))
486 477 if heads is None:
487 478 # All nodes are ancestors, so the latest ancestor is the last
488 479 # node.
489 480 highestrev = len(self) - 1
490 481 # Set ancestors to None to signal that every node is an ancestor.
491 482 ancestors = None
492 483 # Set heads to an empty dictionary for later discovery of heads
493 484 heads = {}
494 485 else:
495 486 heads = list(heads)
496 487 if not heads:
497 488 return nonodes
498 489 ancestors = set()
499 490 # Turn heads into a dictionary so we can remove 'fake' heads.
500 491 # Also, later we will be using it to filter out the heads we can't
501 492 # find from roots.
502 493 heads = dict.fromkeys(heads, False)
503 494 # Start at the top and keep marking parents until we're done.
504 495 nodestotag = set(heads)
505 496 # Remember where the top was so we can use it as a limit later.
506 497 highestrev = max([self.rev(n) for n in nodestotag])
507 498 while nodestotag:
508 499 # grab a node to tag
509 500 n = nodestotag.pop()
510 501 # Never tag nullid
511 502 if n == nullid:
512 503 continue
513 504 # A node's revision number represents its place in a
514 505 # topologically sorted list of nodes.
515 506 r = self.rev(n)
516 507 if r >= lowestrev:
517 508 if n not in ancestors:
518 509 # If we are possibly a descendant of one of the roots
519 510 # and we haven't already been marked as an ancestor
520 511 ancestors.add(n) # Mark as ancestor
521 512 # Add non-nullid parents to list of nodes to tag.
522 513 nodestotag.update([p for p in self.parents(n) if
523 514 p != nullid])
524 515 elif n in heads: # We've seen it before, is it a fake head?
525 516 # So it is, real heads should not be the ancestors of
526 517 # any other heads.
527 518 heads.pop(n)
528 519 if not ancestors:
529 520 return nonodes
530 521 # Now that we have our set of ancestors, we want to remove any
531 522 # roots that are not ancestors.
532 523
533 524 # If one of the roots was nullid, everything is included anyway.
534 525 if lowestrev > nullrev:
535 526 # But, since we weren't, let's recompute the lowest rev to not
536 527 # include roots that aren't ancestors.
537 528
538 529 # Filter out roots that aren't ancestors of heads
539 530 roots = [n for n in roots if n in ancestors]
540 531 # Recompute the lowest revision
541 532 if roots:
542 533 lowestrev = min([self.rev(n) for n in roots])
543 534 else:
544 535 # No more roots? Return empty list
545 536 return nonodes
546 537 else:
547 538 # We are descending from nullid, and don't need to care about
548 539 # any other roots.
549 540 lowestrev = nullrev
550 541 roots = [nullid]
551 542 # Transform our roots list into a set.
552 543 descendants = set(roots)
553 544 # Also, keep the original roots so we can filter out roots that aren't
554 545 # 'real' roots (i.e. are descended from other roots).
555 546 roots = descendants.copy()
556 547 # Our topologically sorted list of output nodes.
557 548 orderedout = []
558 549 # Don't start at nullid since we don't want nullid in our output list,
559 550 # and if nullid shows up in descedents, empty parents will look like
560 551 # they're descendants.
561 552 for r in xrange(max(lowestrev, 0), highestrev + 1):
562 553 n = self.node(r)
563 554 isdescendant = False
564 555 if lowestrev == nullrev: # Everybody is a descendant of nullid
565 556 isdescendant = True
566 557 elif n in descendants:
567 558 # n is already a descendant
568 559 isdescendant = True
569 560 # This check only needs to be done here because all the roots
570 561 # will start being marked is descendants before the loop.
571 562 if n in roots:
572 563 # If n was a root, check if it's a 'real' root.
573 564 p = tuple(self.parents(n))
574 565 # If any of its parents are descendants, it's not a root.
575 566 if (p[0] in descendants) or (p[1] in descendants):
576 567 roots.remove(n)
577 568 else:
578 569 p = tuple(self.parents(n))
579 570 # A node is a descendant if either of its parents are
580 571 # descendants. (We seeded the dependents list with the roots
581 572 # up there, remember?)
582 573 if (p[0] in descendants) or (p[1] in descendants):
583 574 descendants.add(n)
584 575 isdescendant = True
585 576 if isdescendant and ((ancestors is None) or (n in ancestors)):
586 577 # Only include nodes that are both descendants and ancestors.
587 578 orderedout.append(n)
588 579 if (ancestors is not None) and (n in heads):
589 580 # We're trying to figure out which heads are reachable
590 581 # from roots.
591 582 # Mark this head as having been reached
592 583 heads[n] = True
593 584 elif ancestors is None:
594 585 # Otherwise, we're trying to discover the heads.
595 586 # Assume this is a head because if it isn't, the next step
596 587 # will eventually remove it.
597 588 heads[n] = True
598 589 # But, obviously its parents aren't.
599 590 for p in self.parents(n):
600 591 heads.pop(p, None)
601 592 heads = [n for n, flag in heads.iteritems() if flag]
602 593 roots = list(roots)
603 594 assert orderedout
604 595 assert roots
605 596 assert heads
606 597 return (orderedout, roots, heads)
607 598
608 599 def headrevs(self):
609 600 try:
610 601 return self.index.headrevs()
611 602 except AttributeError:
612 603 pass
613 604 count = len(self)
614 605 if not count:
615 606 return [nullrev]
616 607 ishead = [1] * (count + 1)
617 608 index = self.index
618 609 for r in xrange(count):
619 610 e = index[r]
620 611 ishead[e[5]] = ishead[e[6]] = 0
621 612 return [r for r in xrange(count) if ishead[r]]
622 613
623 614 def heads(self, start=None, stop=None):
624 615 """return the list of all nodes that have no children
625 616
626 617 if start is specified, only heads that are descendants of
627 618 start will be returned
628 619 if stop is specified, it will consider all the revs from stop
629 620 as if they had no children
630 621 """
631 622 if start is None and stop is None:
632 623 if not len(self):
633 624 return [nullid]
634 625 return [self.node(r) for r in self.headrevs()]
635 626
636 627 if start is None:
637 628 start = nullid
638 629 if stop is None:
639 630 stop = []
640 631 stoprevs = set([self.rev(n) for n in stop])
641 632 startrev = self.rev(start)
642 633 reachable = set((startrev,))
643 634 heads = set((startrev,))
644 635
645 636 parentrevs = self.parentrevs
646 637 for r in xrange(startrev + 1, len(self)):
647 638 for p in parentrevs(r):
648 639 if p in reachable:
649 640 if r not in stoprevs:
650 641 reachable.add(r)
651 642 heads.add(r)
652 643 if p in heads and p not in stoprevs:
653 644 heads.remove(p)
654 645
655 646 return [self.node(r) for r in heads]
656 647
657 648 def children(self, node):
658 649 """find the children of a given node"""
659 650 c = []
660 651 p = self.rev(node)
661 652 for r in range(p + 1, len(self)):
662 653 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
663 654 if prevs:
664 655 for pr in prevs:
665 656 if pr == p:
666 657 c.append(self.node(r))
667 658 elif p == nullrev:
668 659 c.append(self.node(r))
669 660 return c
670 661
671 662 def descendant(self, start, end):
672 663 if start == nullrev:
673 664 return True
674 665 for i in self.descendants([start]):
675 666 if i == end:
676 667 return True
677 668 elif i > end:
678 669 break
679 670 return False
680 671
681 672 def ancestor(self, a, b):
682 673 """calculate the least common ancestor of nodes a and b"""
683 674
684 675 # fast path, check if it is a descendant
685 676 a, b = self.rev(a), self.rev(b)
686 677 start, end = sorted((a, b))
687 678 if self.descendant(start, end):
688 679 return self.node(start)
689 680
690 681 def parents(rev):
691 682 return [p for p in self.parentrevs(rev) if p != nullrev]
692 683
693 684 c = ancestor.ancestor(a, b, parents)
694 685 if c is None:
695 686 return nullid
696 687
697 688 return self.node(c)
698 689
699 690 def _match(self, id):
700 691 if isinstance(id, int):
701 692 # rev
702 693 return self.node(id)
703 694 if len(id) == 20:
704 695 # possibly a binary node
705 696 # odds of a binary node being all hex in ASCII are 1 in 10**25
706 697 try:
707 698 node = id
708 699 self.rev(node) # quick search the index
709 700 return node
710 701 except LookupError:
711 702 pass # may be partial hex id
712 703 try:
713 704 # str(rev)
714 705 rev = int(id)
715 706 if str(rev) != id:
716 707 raise ValueError
717 708 if rev < 0:
718 709 rev = len(self) + rev
719 710 if rev < 0 or rev >= len(self):
720 711 raise ValueError
721 712 return self.node(rev)
722 713 except (ValueError, OverflowError):
723 714 pass
724 715 if len(id) == 40:
725 716 try:
726 717 # a full hex nodeid?
727 718 node = bin(id)
728 719 self.rev(node)
729 720 return node
730 721 except (TypeError, LookupError):
731 722 pass
732 723
733 724 def _partialmatch(self, id):
734 725 try:
735 726 return self.index.partialmatch(id)
736 727 except RevlogError:
737 728 # parsers.c radix tree lookup gave multiple matches
738 729 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
739 730 except (AttributeError, ValueError):
740 731 # we are pure python, or key was too short to search radix tree
741 732 pass
742 733
743 734 if id in self._pcache:
744 735 return self._pcache[id]
745 736
746 737 if len(id) < 40:
747 738 try:
748 739 # hex(node)[:...]
749 740 l = len(id) // 2 # grab an even number of digits
750 741 prefix = bin(id[:l * 2])
751 742 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
752 743 nl = [n for n in nl if hex(n).startswith(id)]
753 744 if len(nl) > 0:
754 745 if len(nl) == 1:
755 746 self._pcache[id] = nl[0]
756 747 return nl[0]
757 748 raise LookupError(id, self.indexfile,
758 749 _('ambiguous identifier'))
759 750 return None
760 751 except TypeError:
761 752 pass
762 753
763 754 def lookup(self, id):
764 755 """locate a node based on:
765 756 - revision number or str(revision number)
766 757 - nodeid or subset of hex nodeid
767 758 """
768 759 n = self._match(id)
769 760 if n is not None:
770 761 return n
771 762 n = self._partialmatch(id)
772 763 if n:
773 764 return n
774 765
775 766 raise LookupError(id, self.indexfile, _('no match found'))
776 767
777 768 def cmp(self, node, text):
778 769 """compare text with a given file revision
779 770
780 771 returns True if text is different than what is stored.
781 772 """
782 773 p1, p2 = self.parents(node)
783 774 return hash(text, p1, p2) != node
784 775
785 776 def _addchunk(self, offset, data):
786 777 o, d = self._chunkcache
787 778 # try to add to existing cache
788 779 if o + len(d) == offset and len(d) + len(data) < _chunksize:
789 780 self._chunkcache = o, d + data
790 781 else:
791 782 self._chunkcache = offset, data
792 783
793 784 def _loadchunk(self, offset, length):
794 785 if self._inline:
795 786 df = self.opener(self.indexfile)
796 787 else:
797 788 df = self.opener(self.datafile)
798 789
799 790 readahead = max(65536, length)
800 791 df.seek(offset)
801 792 d = df.read(readahead)
802 793 df.close()
803 794 self._addchunk(offset, d)
804 795 if readahead > length:
805 796 return util.buffer(d, 0, length)
806 797 return d
807 798
808 799 def _getchunk(self, offset, length):
809 800 o, d = self._chunkcache
810 801 l = len(d)
811 802
812 803 # is it in the cache?
813 804 cachestart = offset - o
814 805 cacheend = cachestart + length
815 806 if cachestart >= 0 and cacheend <= l:
816 807 if cachestart == 0 and cacheend == l:
817 808 return d # avoid a copy
818 809 return util.buffer(d, cachestart, cacheend - cachestart)
819 810
820 811 return self._loadchunk(offset, length)
821 812
822 813 def _chunkraw(self, startrev, endrev):
823 814 start = self.start(startrev)
824 815 length = self.end(endrev) - start
825 816 if self._inline:
826 817 start += (startrev + 1) * self._io.size
827 818 return self._getchunk(start, length)
828 819
829 820 def _chunk(self, rev):
830 821 return decompress(self._chunkraw(rev, rev))
831 822
832 823 def _chunkbase(self, rev):
833 824 return self._chunk(rev)
834 825
835 826 def _chunkclear(self):
836 827 self._chunkcache = (0, '')
837 828
838 829 def deltaparent(self, rev):
839 830 """return deltaparent of the given revision"""
840 831 base = self.index[rev][3]
841 832 if base == rev:
842 833 return nullrev
843 834 elif self._generaldelta:
844 835 return base
845 836 else:
846 837 return rev - 1
847 838
848 839 def revdiff(self, rev1, rev2):
849 840 """return or calculate a delta between two revisions"""
850 841 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
851 842 return str(self._chunk(rev2))
852 843
853 844 return mdiff.textdiff(self.revision(rev1),
854 845 self.revision(rev2))
855 846
856 847 def revision(self, nodeorrev):
857 848 """return an uncompressed revision of a given node or revision
858 849 number.
859 850 """
860 851 if isinstance(nodeorrev, int):
861 852 rev = nodeorrev
862 853 node = self.node(rev)
863 854 else:
864 855 node = nodeorrev
865 856 rev = None
866 857
867 858 cachedrev = None
868 859 if node == nullid:
869 860 return ""
870 861 if self._cache:
871 862 if self._cache[0] == node:
872 863 return self._cache[2]
873 864 cachedrev = self._cache[1]
874 865
875 866 # look up what we need to read
876 867 text = None
877 868 if rev is None:
878 869 rev = self.rev(node)
879 870
880 871 # check rev flags
881 872 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
882 873 raise RevlogError(_('incompatible revision flag %x') %
883 874 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
884 875
885 876 # build delta chain
886 877 chain = []
887 878 index = self.index # for performance
888 879 generaldelta = self._generaldelta
889 880 iterrev = rev
890 881 e = index[iterrev]
891 882 while iterrev != e[3] and iterrev != cachedrev:
892 883 chain.append(iterrev)
893 884 if generaldelta:
894 885 iterrev = e[3]
895 886 else:
896 887 iterrev -= 1
897 888 e = index[iterrev]
898 889 chain.reverse()
899 890 base = iterrev
900 891
901 892 if iterrev == cachedrev:
902 893 # cache hit
903 894 text = self._cache[2]
904 895
905 896 # drop cache to save memory
906 897 self._cache = None
907 898
908 899 self._chunkraw(base, rev)
909 900 if text is None:
910 901 text = str(self._chunkbase(base))
911 902
912 903 bins = [self._chunk(r) for r in chain]
913 904 text = mdiff.patches(text, bins)
914 905
915 906 text = self._checkhash(text, node, rev)
916 907
917 908 self._cache = (node, rev, text)
918 909 return text
919 910
920 911 def _checkhash(self, text, node, rev):
921 912 p1, p2 = self.parents(node)
922 913 if node != hash(text, p1, p2):
923 914 raise RevlogError(_("integrity check failed on %s:%d")
924 915 % (self.indexfile, rev))
925 916 return text
926 917
927 918 def checkinlinesize(self, tr, fp=None):
928 919 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
929 920 return
930 921
931 922 trinfo = tr.find(self.indexfile)
932 923 if trinfo is None:
933 924 raise RevlogError(_("%s not found in the transaction")
934 925 % self.indexfile)
935 926
936 927 trindex = trinfo[2]
937 928 dataoff = self.start(trindex)
938 929
939 930 tr.add(self.datafile, dataoff)
940 931
941 932 if fp:
942 933 fp.flush()
943 934 fp.close()
944 935
945 936 df = self.opener(self.datafile, 'w')
946 937 try:
947 938 for r in self:
948 939 df.write(self._chunkraw(r, r))
949 940 finally:
950 941 df.close()
951 942
952 943 fp = self.opener(self.indexfile, 'w', atomictemp=True)
953 944 self.version &= ~(REVLOGNGINLINEDATA)
954 945 self._inline = False
955 946 for i in self:
956 947 e = self._io.packentry(self.index[i], self.node, self.version, i)
957 948 fp.write(e)
958 949
959 950 # if we don't call close, the temp file will never replace the
960 951 # real index
961 952 fp.close()
962 953
963 954 tr.replace(self.indexfile, trindex * self._io.size)
964 955 self._chunkclear()
965 956
966 957 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
967 958 """add a revision to the log
968 959
969 960 text - the revision data to add
970 961 transaction - the transaction object used for rollback
971 962 link - the linkrev data to add
972 963 p1, p2 - the parent nodeids of the revision
973 964 cachedelta - an optional precomputed delta
974 965 """
975 966 node = hash(text, p1, p2)
976 967 if node in self.nodemap:
977 968 return node
978 969
979 970 dfh = None
980 971 if not self._inline:
981 972 dfh = self.opener(self.datafile, "a")
982 973 ifh = self.opener(self.indexfile, "a+")
983 974 try:
984 975 return self._addrevision(node, text, transaction, link, p1, p2,
985 976 cachedelta, ifh, dfh)
986 977 finally:
987 978 if dfh:
988 979 dfh.close()
989 980 ifh.close()
990 981
991 982 def compress(self, text):
992 983 """ generate a possibly-compressed representation of text """
993 984 if not text:
994 985 return ("", text)
995 986 l = len(text)
996 987 bin = None
997 988 if l < 44:
998 989 pass
999 990 elif l > 1000000:
1000 991 # zlib makes an internal copy, thus doubling memory usage for
1001 992 # large files, so lets do this in pieces
1002 993 z = zlib.compressobj()
1003 994 p = []
1004 995 pos = 0
1005 996 while pos < l:
1006 997 pos2 = pos + 2**20
1007 998 p.append(z.compress(text[pos:pos2]))
1008 999 pos = pos2
1009 1000 p.append(z.flush())
1010 1001 if sum(map(len, p)) < l:
1011 1002 bin = "".join(p)
1012 1003 else:
1013 1004 bin = _compress(text)
1014 1005 if bin is None or len(bin) > l:
1015 1006 if text[0] == '\0':
1016 1007 return ("", text)
1017 1008 return ('u', text)
1018 1009 return ("", bin)
1019 1010
1020 1011 def _addrevision(self, node, text, transaction, link, p1, p2,
1021 1012 cachedelta, ifh, dfh):
1022 1013 """internal function to add revisions to the log
1023 1014
1024 1015 see addrevision for argument descriptions.
1025 1016 invariants:
1026 1017 - text is optional (can be None); if not set, cachedelta must be set.
1027 1018 if both are set, they must correspond to eachother.
1028 1019 """
1029 1020 btext = [text]
1030 1021 def buildtext():
1031 1022 if btext[0] is not None:
1032 1023 return btext[0]
1033 1024 # flush any pending writes here so we can read it in revision
1034 1025 if dfh:
1035 1026 dfh.flush()
1036 1027 ifh.flush()
1037 1028 basetext = self.revision(self.node(cachedelta[0]))
1038 1029 btext[0] = mdiff.patch(basetext, cachedelta[1])
1039 1030 chk = hash(btext[0], p1, p2)
1040 1031 if chk != node:
1041 1032 raise RevlogError(_("consistency error in delta"))
1042 1033 return btext[0]
1043 1034
1044 1035 def builddelta(rev):
1045 1036 # can we use the cached delta?
1046 1037 if cachedelta and cachedelta[0] == rev:
1047 1038 delta = cachedelta[1]
1048 1039 else:
1049 1040 t = buildtext()
1050 1041 ptext = self.revision(self.node(rev))
1051 1042 delta = mdiff.textdiff(ptext, t)
1052 1043 data = self.compress(delta)
1053 1044 l = len(data[1]) + len(data[0])
1054 1045 if basecache[0] == rev:
1055 1046 chainbase = basecache[1]
1056 1047 else:
1057 1048 chainbase = self.chainbase(rev)
1049 dist = l + offset - self.start(chainbase)
1058 1050 if self._generaldelta:
1059 dist = l + self.chainlength(rev)
1060 1051 base = rev
1061 1052 else:
1062 dist = l + offset - self.start(chainbase)
1063 1053 base = chainbase
1064 1054 return dist, l, data, base, chainbase
1065 1055
1066 1056 curr = len(self)
1067 1057 prev = curr - 1
1068 1058 base = chainbase = curr
1069 1059 offset = self.end(prev)
1070 1060 flags = 0
1071 1061 d = None
1072 1062 basecache = self._basecache
1073 1063 p1r, p2r = self.rev(p1), self.rev(p2)
1074 1064
1075 1065 # should we try to build a delta?
1076 1066 if prev != nullrev:
1077 1067 if self._generaldelta:
1078 1068 if p1r >= basecache[1]:
1079 1069 d = builddelta(p1r)
1080 1070 elif p2r >= basecache[1]:
1081 1071 d = builddelta(p2r)
1082 1072 else:
1083 1073 d = builddelta(prev)
1084 1074 else:
1085 1075 d = builddelta(prev)
1086 1076 dist, l, data, base, chainbase = d
1087 1077
1088 1078 # full versions are inserted when the needed deltas
1089 1079 # become comparable to the uncompressed text
1090 1080 if text is None:
1091 1081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1092 1082 cachedelta[1])
1093 1083 else:
1094 1084 textlen = len(text)
1095 1085 if d is None or dist > textlen * 2:
1096 1086 text = buildtext()
1097 1087 data = self.compress(text)
1098 1088 l = len(data[1]) + len(data[0])
1099 1089 base = chainbase = curr
1100 1090
1101 1091 e = (offset_type(offset, flags), l, textlen,
1102 1092 base, link, p1r, p2r, node)
1103 1093 self.index.insert(-1, e)
1104 1094 self.nodemap[node] = curr
1105 1095
1106 1096 entry = self._io.packentry(e, self.node, self.version, curr)
1107 1097 if not self._inline:
1108 1098 transaction.add(self.datafile, offset)
1109 1099 transaction.add(self.indexfile, curr * len(entry))
1110 1100 if data[0]:
1111 1101 dfh.write(data[0])
1112 1102 dfh.write(data[1])
1113 1103 dfh.flush()
1114 1104 ifh.write(entry)
1115 1105 else:
1116 1106 offset += curr * self._io.size
1117 1107 transaction.add(self.indexfile, offset, curr)
1118 1108 ifh.write(entry)
1119 1109 ifh.write(data[0])
1120 1110 ifh.write(data[1])
1121 1111 self.checkinlinesize(transaction, ifh)
1122 1112
1123 1113 if type(text) == str: # only accept immutable objects
1124 1114 self._cache = (node, curr, text)
1125 1115 self._basecache = (curr, chainbase)
1126 1116 return node
1127 1117
1128 1118 def group(self, nodelist, bundler, reorder=None):
1129 1119 """Calculate a delta group, yielding a sequence of changegroup chunks
1130 1120 (strings).
1131 1121
1132 1122 Given a list of changeset revs, return a set of deltas and
1133 1123 metadata corresponding to nodes. The first delta is
1134 1124 first parent(nodelist[0]) -> nodelist[0], the receiver is
1135 1125 guaranteed to have this parent as it has all history before
1136 1126 these changesets. In the case firstparent is nullrev the
1137 1127 changegroup starts with a full revision.
1138 1128 """
1139 1129
1140 1130 # if we don't have any revisions touched by these changesets, bail
1141 1131 if len(nodelist) == 0:
1142 1132 yield bundler.close()
1143 1133 return
1144 1134
1145 1135 # for generaldelta revlogs, we linearize the revs; this will both be
1146 1136 # much quicker and generate a much smaller bundle
1147 1137 if (self._generaldelta and reorder is not False) or reorder:
1148 1138 dag = dagutil.revlogdag(self)
1149 1139 revs = set(self.rev(n) for n in nodelist)
1150 1140 revs = dag.linearize(revs)
1151 1141 else:
1152 1142 revs = sorted([self.rev(n) for n in nodelist])
1153 1143
1154 1144 # add the parent of the first rev
1155 1145 p = self.parentrevs(revs[0])[0]
1156 1146 revs.insert(0, p)
1157 1147
1158 1148 # build deltas
1159 1149 for r in xrange(len(revs) - 1):
1160 1150 prev, curr = revs[r], revs[r + 1]
1161 1151 for c in bundler.revchunk(self, curr, prev):
1162 1152 yield c
1163 1153
1164 1154 yield bundler.close()
1165 1155
1166 1156 def addgroup(self, bundle, linkmapper, transaction):
1167 1157 """
1168 1158 add a delta group
1169 1159
1170 1160 given a set of deltas, add them to the revision log. the
1171 1161 first delta is against its parent, which should be in our
1172 1162 log, the rest are against the previous delta.
1173 1163 """
1174 1164
1175 1165 # track the base of the current delta log
1176 1166 content = []
1177 1167 node = None
1178 1168
1179 1169 r = len(self)
1180 1170 end = 0
1181 1171 if r:
1182 1172 end = self.end(r - 1)
1183 1173 ifh = self.opener(self.indexfile, "a+")
1184 1174 isize = r * self._io.size
1185 1175 if self._inline:
1186 1176 transaction.add(self.indexfile, end + isize, r)
1187 1177 dfh = None
1188 1178 else:
1189 1179 transaction.add(self.indexfile, isize, r)
1190 1180 transaction.add(self.datafile, end)
1191 1181 dfh = self.opener(self.datafile, "a")
1192 1182
1193 1183 try:
1194 1184 # loop through our set of deltas
1195 1185 chain = None
1196 1186 while True:
1197 1187 chunkdata = bundle.deltachunk(chain)
1198 1188 if not chunkdata:
1199 1189 break
1200 1190 node = chunkdata['node']
1201 1191 p1 = chunkdata['p1']
1202 1192 p2 = chunkdata['p2']
1203 1193 cs = chunkdata['cs']
1204 1194 deltabase = chunkdata['deltabase']
1205 1195 delta = chunkdata['delta']
1206 1196
1207 1197 content.append(node)
1208 1198
1209 1199 link = linkmapper(cs)
1210 1200 if node in self.nodemap:
1211 1201 # this can happen if two branches make the same change
1212 1202 chain = node
1213 1203 continue
1214 1204
1215 1205 for p in (p1, p2):
1216 1206 if p not in self.nodemap:
1217 1207 raise LookupError(p, self.indexfile,
1218 1208 _('unknown parent'))
1219 1209
1220 1210 if deltabase not in self.nodemap:
1221 1211 raise LookupError(deltabase, self.indexfile,
1222 1212 _('unknown delta base'))
1223 1213
1224 1214 baserev = self.rev(deltabase)
1225 1215 chain = self._addrevision(node, None, transaction, link,
1226 1216 p1, p2, (baserev, delta), ifh, dfh)
1227 1217 if not dfh and not self._inline:
1228 1218 # addrevision switched from inline to conventional
1229 1219 # reopen the index
1230 1220 ifh.close()
1231 1221 dfh = self.opener(self.datafile, "a")
1232 1222 ifh = self.opener(self.indexfile, "a")
1233 1223 finally:
1234 1224 if dfh:
1235 1225 dfh.close()
1236 1226 ifh.close()
1237 1227
1238 1228 return content
1239 1229
1240 1230 def strip(self, minlink, transaction):
1241 1231 """truncate the revlog on the first revision with a linkrev >= minlink
1242 1232
1243 1233 This function is called when we're stripping revision minlink and
1244 1234 its descendants from the repository.
1245 1235
1246 1236 We have to remove all revisions with linkrev >= minlink, because
1247 1237 the equivalent changelog revisions will be renumbered after the
1248 1238 strip.
1249 1239
1250 1240 So we truncate the revlog on the first of these revisions, and
1251 1241 trust that the caller has saved the revisions that shouldn't be
1252 1242 removed and that it'll re-add them after this truncation.
1253 1243 """
1254 1244 if len(self) == 0:
1255 1245 return
1256 1246
1257 1247 for rev in self:
1258 1248 if self.index[rev][4] >= minlink:
1259 1249 break
1260 1250 else:
1261 1251 return
1262 1252
1263 1253 # first truncate the files on disk
1264 1254 end = self.start(rev)
1265 1255 if not self._inline:
1266 1256 transaction.add(self.datafile, end)
1267 1257 end = rev * self._io.size
1268 1258 else:
1269 1259 end += rev * self._io.size
1270 1260
1271 1261 transaction.add(self.indexfile, end)
1272 1262
1273 1263 # then reset internal state in memory to forget those revisions
1274 1264 self._cache = None
1275 1265 self._chunkclear()
1276 1266 for x in xrange(rev, len(self)):
1277 1267 del self.nodemap[self.node(x)]
1278 1268
1279 1269 del self.index[rev:-1]
1280 1270
1281 1271 def checksize(self):
1282 1272 expected = 0
1283 1273 if len(self):
1284 1274 expected = max(0, self.end(len(self) - 1))
1285 1275
1286 1276 try:
1287 1277 f = self.opener(self.datafile)
1288 1278 f.seek(0, 2)
1289 1279 actual = f.tell()
1290 1280 f.close()
1291 1281 dd = actual - expected
1292 1282 except IOError, inst:
1293 1283 if inst.errno != errno.ENOENT:
1294 1284 raise
1295 1285 dd = 0
1296 1286
1297 1287 try:
1298 1288 f = self.opener(self.indexfile)
1299 1289 f.seek(0, 2)
1300 1290 actual = f.tell()
1301 1291 f.close()
1302 1292 s = self._io.size
1303 1293 i = max(0, actual // s)
1304 1294 di = actual - (i * s)
1305 1295 if self._inline:
1306 1296 databytes = 0
1307 1297 for r in self:
1308 1298 databytes += max(0, self.length(r))
1309 1299 dd = 0
1310 1300 di = actual - len(self) * s - databytes
1311 1301 except IOError, inst:
1312 1302 if inst.errno != errno.ENOENT:
1313 1303 raise
1314 1304 di = 0
1315 1305
1316 1306 return (dd, di)
1317 1307
1318 1308 def files(self):
1319 1309 res = [self.indexfile]
1320 1310 if not self._inline:
1321 1311 res.append(self.datafile)
1322 1312 return res
General Comments 0
You need to be logged in to leave comments. Login now