##// END OF EJS Templates
revlog: add incancestors, a version of ancestors that includes revs listed...
Joshua Redstone -
r17008:553e8f5a default
parent child Browse files
Show More
@@ -1,1327 +1,1335 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 try:
116 116 return _decompress(bin)
117 117 except zlib.error, e:
118 118 raise RevlogError(_("revlog decompress error: %s") % str(e))
119 119 if t == 'u':
120 120 return bin[1:]
121 121 raise RevlogError(_("unknown compression type %r") % t)
122 122
123 123 indexformatv0 = ">4l20s20s20s"
124 124 v0shaoffset = 56
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 ngshaoffset = 32
171 171 versionformat = ">I"
172 172
173 173 class revlogio(object):
174 174 def __init__(self):
175 175 self.size = struct.calcsize(indexformatng)
176 176
177 177 def parseindex(self, data, inline):
178 178 # call the C implementation to parse the index data
179 179 index, cache = parsers.parse_index2(data, inline)
180 180 return index, getattr(index, 'nodemap', None), cache
181 181
182 182 def packentry(self, entry, node, version, rev):
183 183 p = _pack(indexformatng, *entry)
184 184 if rev == 0:
185 185 p = _pack(versionformat, version) + p[4:]
186 186 return p
187 187
188 188 class revlog(object):
189 189 """
190 190 the underlying revision storage object
191 191
192 192 A revlog consists of two parts, an index and the revision data.
193 193
194 194 The index is a file with a fixed record size containing
195 195 information on each revision, including its nodeid (hash), the
196 196 nodeids of its parents, the position and offset of its data within
197 197 the data file, and the revision it's based on. Finally, each entry
198 198 contains a linkrev entry that can serve as a pointer to external
199 199 data.
200 200
201 201 The revision data itself is a linear collection of data chunks.
202 202 Each chunk represents a revision and is usually represented as a
203 203 delta against the previous chunk. To bound lookup time, runs of
204 204 deltas are limited to about 2 times the length of the original
205 205 version data. This makes retrieval of a version proportional to
206 206 its size, or O(1) relative to the number of revisions.
207 207
208 208 Both pieces of the revlog are written to in an append-only
209 209 fashion, which means we never need to rewrite a file to insert or
210 210 remove data, and can use some simple techniques to avoid the need
211 211 for locking while reading.
212 212 """
213 213 def __init__(self, opener, indexfile):
214 214 """
215 215 create a revlog object
216 216
217 217 opener is a function that abstracts the file opening operation
218 218 and can be used to implement COW semantics or the like.
219 219 """
220 220 self.indexfile = indexfile
221 221 self.datafile = indexfile[:-2] + ".d"
222 222 self.opener = opener
223 223 self._cache = None
224 224 self._basecache = (0, 0)
225 225 self._chunkcache = (0, '')
226 226 self.index = []
227 227 self._pcache = {}
228 228 self._nodecache = {nullid: nullrev}
229 229 self._nodepos = None
230 230
231 231 v = REVLOG_DEFAULT_VERSION
232 232 opts = getattr(opener, 'options', None)
233 233 if opts is not None:
234 234 if 'revlogv1' in opts:
235 235 if 'generaldelta' in opts:
236 236 v |= REVLOGGENERALDELTA
237 237 else:
238 238 v = 0
239 239
240 240 i = ''
241 241 self._initempty = True
242 242 try:
243 243 f = self.opener(self.indexfile)
244 244 i = f.read()
245 245 f.close()
246 246 if len(i) > 0:
247 247 v = struct.unpack(versionformat, i[:4])[0]
248 248 self._initempty = False
249 249 except IOError, inst:
250 250 if inst.errno != errno.ENOENT:
251 251 raise
252 252
253 253 self.version = v
254 254 self._inline = v & REVLOGNGINLINEDATA
255 255 self._generaldelta = v & REVLOGGENERALDELTA
256 256 flags = v & ~0xFFFF
257 257 fmt = v & 0xFFFF
258 258 if fmt == REVLOGV0 and flags:
259 259 raise RevlogError(_("index %s unknown flags %#04x for format v0")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
262 262 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
263 263 % (self.indexfile, flags >> 16))
264 264 elif fmt > REVLOGNG:
265 265 raise RevlogError(_("index %s unknown format %d")
266 266 % (self.indexfile, fmt))
267 267
268 268 self._io = revlogio()
269 269 if self.version == REVLOGV0:
270 270 self._io = revlogoldio()
271 271 try:
272 272 d = self._io.parseindex(i, self._inline)
273 273 except (ValueError, IndexError):
274 274 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
275 275 self.index, nodemap, self._chunkcache = d
276 276 if nodemap is not None:
277 277 self.nodemap = self._nodecache = nodemap
278 278 if not self._chunkcache:
279 279 self._chunkclear()
280 280
281 281 def tip(self):
282 282 return self.node(len(self.index) - 2)
283 283 def __len__(self):
284 284 return len(self.index) - 1
285 285 def __iter__(self):
286 286 for i in xrange(len(self)):
287 287 yield i
288 288
289 289 @util.propertycache
290 290 def nodemap(self):
291 291 self.rev(self.node(0))
292 292 return self._nodecache
293 293
294 294 def hasnode(self, node):
295 295 try:
296 296 self.rev(node)
297 297 return True
298 298 except KeyError:
299 299 return False
300 300
301 301 def clearcaches(self):
302 302 try:
303 303 self._nodecache.clearcaches()
304 304 except AttributeError:
305 305 self._nodecache = {nullid: nullrev}
306 306 self._nodepos = None
307 307
308 308 def rev(self, node):
309 309 try:
310 310 return self._nodecache[node]
311 311 except RevlogError:
312 312 # parsers.c radix tree lookup failed
313 313 raise LookupError(node, self.indexfile, _('no node'))
314 314 except KeyError:
315 315 # pure python cache lookup failed
316 316 n = self._nodecache
317 317 i = self.index
318 318 p = self._nodepos
319 319 if p is None:
320 320 p = len(i) - 2
321 321 for r in xrange(p, -1, -1):
322 322 v = i[r][7]
323 323 n[v] = r
324 324 if v == node:
325 325 self._nodepos = r - 1
326 326 return r
327 327 raise LookupError(node, self.indexfile, _('no node'))
328 328
329 329 def node(self, rev):
330 330 return self.index[rev][7]
331 331 def linkrev(self, rev):
332 332 return self.index[rev][4]
333 333 def parents(self, node):
334 334 i = self.index
335 335 d = i[self.rev(node)]
336 336 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
337 337 def parentrevs(self, rev):
338 338 return self.index[rev][5:7]
339 339 def start(self, rev):
340 340 return int(self.index[rev][0] >> 16)
341 341 def end(self, rev):
342 342 return self.start(rev) + self.length(rev)
343 343 def length(self, rev):
344 344 return self.index[rev][1]
345 345 def chainbase(self, rev):
346 346 index = self.index
347 347 base = index[rev][3]
348 348 while base != rev:
349 349 rev = base
350 350 base = index[rev][3]
351 351 return base
352 352 def flags(self, rev):
353 353 return self.index[rev][0] & 0xFFFF
354 354 def rawsize(self, rev):
355 355 """return the length of the uncompressed text for a given revision"""
356 356 l = self.index[rev][2]
357 357 if l >= 0:
358 358 return l
359 359
360 360 t = self.revision(self.node(rev))
361 361 return len(t)
362 362 size = rawsize
363 363
364 364 def reachable(self, node, stop=None):
365 365 """return the set of all nodes ancestral to a given node, including
366 366 the node itself, stopping when stop is matched"""
367 367 reachable = set((node,))
368 368 visit = util.deque([node])
369 369 if stop:
370 370 stopn = self.rev(stop)
371 371 else:
372 372 stopn = 0
373 373 while visit:
374 374 n = visit.popleft()
375 375 if n == stop:
376 376 continue
377 377 if n == nullid:
378 378 continue
379 379 for p in self.parents(n):
380 380 if self.rev(p) < stopn:
381 381 continue
382 382 if p not in reachable:
383 383 reachable.add(p)
384 384 visit.append(p)
385 385 return reachable
386 386
387 387 def ancestors(self, revs, stoprev=0):
388 388 """Generate the ancestors of 'revs' in reverse topological order.
389 389 Does not generate revs lower than stoprev.
390 390
391 391 Yield a sequence of revision numbers starting with the parents
392 392 of each revision in revs, i.e., each revision is *not* considered
393 393 an ancestor of itself. Results are in breadth-first order:
394 394 parents of each rev in revs, then parents of those, etc. Result
395 395 does not include the null revision."""
396 396 visit = util.deque(revs)
397 397 seen = set([nullrev])
398 398 while visit:
399 399 for parent in self.parentrevs(visit.popleft()):
400 400 if parent < stoprev:
401 401 continue
402 402 if parent not in seen:
403 403 visit.append(parent)
404 404 seen.add(parent)
405 405 yield parent
406 406
407 def incancestors(self, revs, stoprev=0):
408 """Identical to ancestors() except it also generates the
409 revisions, 'revs'"""
410 for rev in revs:
411 yield rev
412 for rev in self.ancestors(revs, stoprev):
413 yield rev
414
407 415 def descendants(self, revs):
408 416 """Generate the descendants of 'revs' in revision order.
409 417
410 418 Yield a sequence of revision numbers starting with a child of
411 419 some rev in revs, i.e., each revision is *not* considered a
412 420 descendant of itself. Results are ordered by revision number (a
413 421 topological sort)."""
414 422 first = min(revs)
415 423 if first == nullrev:
416 424 for i in self:
417 425 yield i
418 426 return
419 427
420 428 seen = set(revs)
421 429 for i in xrange(first + 1, len(self)):
422 430 for x in self.parentrevs(i):
423 431 if x != nullrev and x in seen:
424 432 seen.add(i)
425 433 yield i
426 434 break
427 435
428 436 def findcommonmissing(self, common=None, heads=None):
429 437 """Return a tuple of the ancestors of common and the ancestors of heads
430 438 that are not ancestors of common. In revset terminology, we return the
431 439 tuple:
432 440
433 441 ::common, (::heads) - (::common)
434 442
435 443 The list is sorted by revision number, meaning it is
436 444 topologically sorted.
437 445
438 446 'heads' and 'common' are both lists of node IDs. If heads is
439 447 not supplied, uses all of the revlog's heads. If common is not
440 448 supplied, uses nullid."""
441 449 if common is None:
442 450 common = [nullid]
443 451 if heads is None:
444 452 heads = self.heads()
445 453
446 454 common = [self.rev(n) for n in common]
447 455 heads = [self.rev(n) for n in heads]
448 456
449 457 # we want the ancestors, but inclusive
450 458 has = set(self.ancestors(common))
451 459 has.add(nullrev)
452 460 has.update(common)
453 461
454 462 # take all ancestors from heads that aren't in has
455 463 missing = set()
456 464 visit = util.deque(r for r in heads if r not in has)
457 465 while visit:
458 466 r = visit.popleft()
459 467 if r in missing:
460 468 continue
461 469 else:
462 470 missing.add(r)
463 471 for p in self.parentrevs(r):
464 472 if p not in has:
465 473 visit.append(p)
466 474 missing = list(missing)
467 475 missing.sort()
468 476 return has, [self.node(r) for r in missing]
469 477
470 478 def findmissing(self, common=None, heads=None):
471 479 """Return the ancestors of heads that are not ancestors of common.
472 480
473 481 More specifically, return a list of nodes N such that every N
474 482 satisfies the following constraints:
475 483
476 484 1. N is an ancestor of some node in 'heads'
477 485 2. N is not an ancestor of any node in 'common'
478 486
479 487 The list is sorted by revision number, meaning it is
480 488 topologically sorted.
481 489
482 490 'heads' and 'common' are both lists of node IDs. If heads is
483 491 not supplied, uses all of the revlog's heads. If common is not
484 492 supplied, uses nullid."""
485 493 _common, missing = self.findcommonmissing(common, heads)
486 494 return missing
487 495
488 496 def nodesbetween(self, roots=None, heads=None):
489 497 """Return a topological path from 'roots' to 'heads'.
490 498
491 499 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
492 500 topologically sorted list of all nodes N that satisfy both of
493 501 these constraints:
494 502
495 503 1. N is a descendant of some node in 'roots'
496 504 2. N is an ancestor of some node in 'heads'
497 505
498 506 Every node is considered to be both a descendant and an ancestor
499 507 of itself, so every reachable node in 'roots' and 'heads' will be
500 508 included in 'nodes'.
501 509
502 510 'outroots' is the list of reachable nodes in 'roots', i.e., the
503 511 subset of 'roots' that is returned in 'nodes'. Likewise,
504 512 'outheads' is the subset of 'heads' that is also in 'nodes'.
505 513
506 514 'roots' and 'heads' are both lists of node IDs. If 'roots' is
507 515 unspecified, uses nullid as the only root. If 'heads' is
508 516 unspecified, uses list of all of the revlog's heads."""
509 517 nonodes = ([], [], [])
510 518 if roots is not None:
511 519 roots = list(roots)
512 520 if not roots:
513 521 return nonodes
514 522 lowestrev = min([self.rev(n) for n in roots])
515 523 else:
516 524 roots = [nullid] # Everybody's a descendant of nullid
517 525 lowestrev = nullrev
518 526 if (lowestrev == nullrev) and (heads is None):
519 527 # We want _all_ the nodes!
520 528 return ([self.node(r) for r in self], [nullid], list(self.heads()))
521 529 if heads is None:
522 530 # All nodes are ancestors, so the latest ancestor is the last
523 531 # node.
524 532 highestrev = len(self) - 1
525 533 # Set ancestors to None to signal that every node is an ancestor.
526 534 ancestors = None
527 535 # Set heads to an empty dictionary for later discovery of heads
528 536 heads = {}
529 537 else:
530 538 heads = list(heads)
531 539 if not heads:
532 540 return nonodes
533 541 ancestors = set()
534 542 # Turn heads into a dictionary so we can remove 'fake' heads.
535 543 # Also, later we will be using it to filter out the heads we can't
536 544 # find from roots.
537 545 heads = dict.fromkeys(heads, False)
538 546 # Start at the top and keep marking parents until we're done.
539 547 nodestotag = set(heads)
540 548 # Remember where the top was so we can use it as a limit later.
541 549 highestrev = max([self.rev(n) for n in nodestotag])
542 550 while nodestotag:
543 551 # grab a node to tag
544 552 n = nodestotag.pop()
545 553 # Never tag nullid
546 554 if n == nullid:
547 555 continue
548 556 # A node's revision number represents its place in a
549 557 # topologically sorted list of nodes.
550 558 r = self.rev(n)
551 559 if r >= lowestrev:
552 560 if n not in ancestors:
553 561 # If we are possibly a descendant of one of the roots
554 562 # and we haven't already been marked as an ancestor
555 563 ancestors.add(n) # Mark as ancestor
556 564 # Add non-nullid parents to list of nodes to tag.
557 565 nodestotag.update([p for p in self.parents(n) if
558 566 p != nullid])
559 567 elif n in heads: # We've seen it before, is it a fake head?
560 568 # So it is, real heads should not be the ancestors of
561 569 # any other heads.
562 570 heads.pop(n)
563 571 if not ancestors:
564 572 return nonodes
565 573 # Now that we have our set of ancestors, we want to remove any
566 574 # roots that are not ancestors.
567 575
568 576 # If one of the roots was nullid, everything is included anyway.
569 577 if lowestrev > nullrev:
570 578 # But, since we weren't, let's recompute the lowest rev to not
571 579 # include roots that aren't ancestors.
572 580
573 581 # Filter out roots that aren't ancestors of heads
574 582 roots = [n for n in roots if n in ancestors]
575 583 # Recompute the lowest revision
576 584 if roots:
577 585 lowestrev = min([self.rev(n) for n in roots])
578 586 else:
579 587 # No more roots? Return empty list
580 588 return nonodes
581 589 else:
582 590 # We are descending from nullid, and don't need to care about
583 591 # any other roots.
584 592 lowestrev = nullrev
585 593 roots = [nullid]
586 594 # Transform our roots list into a set.
587 595 descendants = set(roots)
588 596 # Also, keep the original roots so we can filter out roots that aren't
589 597 # 'real' roots (i.e. are descended from other roots).
590 598 roots = descendants.copy()
591 599 # Our topologically sorted list of output nodes.
592 600 orderedout = []
593 601 # Don't start at nullid since we don't want nullid in our output list,
594 602 # and if nullid shows up in descedents, empty parents will look like
595 603 # they're descendants.
596 604 for r in xrange(max(lowestrev, 0), highestrev + 1):
597 605 n = self.node(r)
598 606 isdescendant = False
599 607 if lowestrev == nullrev: # Everybody is a descendant of nullid
600 608 isdescendant = True
601 609 elif n in descendants:
602 610 # n is already a descendant
603 611 isdescendant = True
604 612 # This check only needs to be done here because all the roots
605 613 # will start being marked is descendants before the loop.
606 614 if n in roots:
607 615 # If n was a root, check if it's a 'real' root.
608 616 p = tuple(self.parents(n))
609 617 # If any of its parents are descendants, it's not a root.
610 618 if (p[0] in descendants) or (p[1] in descendants):
611 619 roots.remove(n)
612 620 else:
613 621 p = tuple(self.parents(n))
614 622 # A node is a descendant if either of its parents are
615 623 # descendants. (We seeded the dependents list with the roots
616 624 # up there, remember?)
617 625 if (p[0] in descendants) or (p[1] in descendants):
618 626 descendants.add(n)
619 627 isdescendant = True
620 628 if isdescendant and ((ancestors is None) or (n in ancestors)):
621 629 # Only include nodes that are both descendants and ancestors.
622 630 orderedout.append(n)
623 631 if (ancestors is not None) and (n in heads):
624 632 # We're trying to figure out which heads are reachable
625 633 # from roots.
626 634 # Mark this head as having been reached
627 635 heads[n] = True
628 636 elif ancestors is None:
629 637 # Otherwise, we're trying to discover the heads.
630 638 # Assume this is a head because if it isn't, the next step
631 639 # will eventually remove it.
632 640 heads[n] = True
633 641 # But, obviously its parents aren't.
634 642 for p in self.parents(n):
635 643 heads.pop(p, None)
636 644 heads = [n for n, flag in heads.iteritems() if flag]
637 645 roots = list(roots)
638 646 assert orderedout
639 647 assert roots
640 648 assert heads
641 649 return (orderedout, roots, heads)
642 650
643 651 def headrevs(self):
644 652 try:
645 653 return self.index.headrevs()
646 654 except AttributeError:
647 655 pass
648 656 count = len(self)
649 657 if not count:
650 658 return [nullrev]
651 659 ishead = [1] * (count + 1)
652 660 index = self.index
653 661 for r in xrange(count):
654 662 e = index[r]
655 663 ishead[e[5]] = ishead[e[6]] = 0
656 664 return [r for r in xrange(count) if ishead[r]]
657 665
658 666 def heads(self, start=None, stop=None):
659 667 """return the list of all nodes that have no children
660 668
661 669 if start is specified, only heads that are descendants of
662 670 start will be returned
663 671 if stop is specified, it will consider all the revs from stop
664 672 as if they had no children
665 673 """
666 674 if start is None and stop is None:
667 675 if not len(self):
668 676 return [nullid]
669 677 return [self.node(r) for r in self.headrevs()]
670 678
671 679 if start is None:
672 680 start = nullid
673 681 if stop is None:
674 682 stop = []
675 683 stoprevs = set([self.rev(n) for n in stop])
676 684 startrev = self.rev(start)
677 685 reachable = set((startrev,))
678 686 heads = set((startrev,))
679 687
680 688 parentrevs = self.parentrevs
681 689 for r in xrange(startrev + 1, len(self)):
682 690 for p in parentrevs(r):
683 691 if p in reachable:
684 692 if r not in stoprevs:
685 693 reachable.add(r)
686 694 heads.add(r)
687 695 if p in heads and p not in stoprevs:
688 696 heads.remove(p)
689 697
690 698 return [self.node(r) for r in heads]
691 699
692 700 def children(self, node):
693 701 """find the children of a given node"""
694 702 c = []
695 703 p = self.rev(node)
696 704 for r in range(p + 1, len(self)):
697 705 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
698 706 if prevs:
699 707 for pr in prevs:
700 708 if pr == p:
701 709 c.append(self.node(r))
702 710 elif p == nullrev:
703 711 c.append(self.node(r))
704 712 return c
705 713
706 714 def descendant(self, start, end):
707 715 if start == nullrev:
708 716 return True
709 717 for i in self.descendants([start]):
710 718 if i == end:
711 719 return True
712 720 elif i > end:
713 721 break
714 722 return False
715 723
716 724 def ancestor(self, a, b):
717 725 """calculate the least common ancestor of nodes a and b"""
718 726
719 727 # fast path, check if it is a descendant
720 728 a, b = self.rev(a), self.rev(b)
721 729 start, end = sorted((a, b))
722 730 if self.descendant(start, end):
723 731 return self.node(start)
724 732
725 733 def parents(rev):
726 734 return [p for p in self.parentrevs(rev) if p != nullrev]
727 735
728 736 c = ancestor.ancestor(a, b, parents)
729 737 if c is None:
730 738 return nullid
731 739
732 740 return self.node(c)
733 741
734 742 def _match(self, id):
735 743 if isinstance(id, int):
736 744 # rev
737 745 return self.node(id)
738 746 if len(id) == 20:
739 747 # possibly a binary node
740 748 # odds of a binary node being all hex in ASCII are 1 in 10**25
741 749 try:
742 750 node = id
743 751 self.rev(node) # quick search the index
744 752 return node
745 753 except LookupError:
746 754 pass # may be partial hex id
747 755 try:
748 756 # str(rev)
749 757 rev = int(id)
750 758 if str(rev) != id:
751 759 raise ValueError
752 760 if rev < 0:
753 761 rev = len(self) + rev
754 762 if rev < 0 or rev >= len(self):
755 763 raise ValueError
756 764 return self.node(rev)
757 765 except (ValueError, OverflowError):
758 766 pass
759 767 if len(id) == 40:
760 768 try:
761 769 # a full hex nodeid?
762 770 node = bin(id)
763 771 self.rev(node)
764 772 return node
765 773 except (TypeError, LookupError):
766 774 pass
767 775
768 776 def _partialmatch(self, id):
769 777 try:
770 778 return self.index.partialmatch(id)
771 779 except RevlogError:
772 780 # parsers.c radix tree lookup gave multiple matches
773 781 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
774 782 except (AttributeError, ValueError):
775 783 # we are pure python, or key was too short to search radix tree
776 784 pass
777 785
778 786 if id in self._pcache:
779 787 return self._pcache[id]
780 788
781 789 if len(id) < 40:
782 790 try:
783 791 # hex(node)[:...]
784 792 l = len(id) // 2 # grab an even number of digits
785 793 prefix = bin(id[:l * 2])
786 794 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
787 795 nl = [n for n in nl if hex(n).startswith(id)]
788 796 if len(nl) > 0:
789 797 if len(nl) == 1:
790 798 self._pcache[id] = nl[0]
791 799 return nl[0]
792 800 raise LookupError(id, self.indexfile,
793 801 _('ambiguous identifier'))
794 802 return None
795 803 except TypeError:
796 804 pass
797 805
798 806 def lookup(self, id):
799 807 """locate a node based on:
800 808 - revision number or str(revision number)
801 809 - nodeid or subset of hex nodeid
802 810 """
803 811 n = self._match(id)
804 812 if n is not None:
805 813 return n
806 814 n = self._partialmatch(id)
807 815 if n:
808 816 return n
809 817
810 818 raise LookupError(id, self.indexfile, _('no match found'))
811 819
812 820 def cmp(self, node, text):
813 821 """compare text with a given file revision
814 822
815 823 returns True if text is different than what is stored.
816 824 """
817 825 p1, p2 = self.parents(node)
818 826 return hash(text, p1, p2) != node
819 827
820 828 def _addchunk(self, offset, data):
821 829 o, d = self._chunkcache
822 830 # try to add to existing cache
823 831 if o + len(d) == offset and len(d) + len(data) < _chunksize:
824 832 self._chunkcache = o, d + data
825 833 else:
826 834 self._chunkcache = offset, data
827 835
828 836 def _loadchunk(self, offset, length):
829 837 if self._inline:
830 838 df = self.opener(self.indexfile)
831 839 else:
832 840 df = self.opener(self.datafile)
833 841
834 842 readahead = max(65536, length)
835 843 df.seek(offset)
836 844 d = df.read(readahead)
837 845 df.close()
838 846 self._addchunk(offset, d)
839 847 if readahead > length:
840 848 return util.buffer(d, 0, length)
841 849 return d
842 850
843 851 def _getchunk(self, offset, length):
844 852 o, d = self._chunkcache
845 853 l = len(d)
846 854
847 855 # is it in the cache?
848 856 cachestart = offset - o
849 857 cacheend = cachestart + length
850 858 if cachestart >= 0 and cacheend <= l:
851 859 if cachestart == 0 and cacheend == l:
852 860 return d # avoid a copy
853 861 return util.buffer(d, cachestart, cacheend - cachestart)
854 862
855 863 return self._loadchunk(offset, length)
856 864
857 865 def _chunkraw(self, startrev, endrev):
858 866 start = self.start(startrev)
859 867 length = self.end(endrev) - start
860 868 if self._inline:
861 869 start += (startrev + 1) * self._io.size
862 870 return self._getchunk(start, length)
863 871
864 872 def _chunk(self, rev):
865 873 return decompress(self._chunkraw(rev, rev))
866 874
867 875 def _chunkbase(self, rev):
868 876 return self._chunk(rev)
869 877
870 878 def _chunkclear(self):
871 879 self._chunkcache = (0, '')
872 880
873 881 def deltaparent(self, rev):
874 882 """return deltaparent of the given revision"""
875 883 base = self.index[rev][3]
876 884 if base == rev:
877 885 return nullrev
878 886 elif self._generaldelta:
879 887 return base
880 888 else:
881 889 return rev - 1
882 890
883 891 def revdiff(self, rev1, rev2):
884 892 """return or calculate a delta between two revisions"""
885 893 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
886 894 return str(self._chunk(rev2))
887 895
888 896 return mdiff.textdiff(self.revision(rev1),
889 897 self.revision(rev2))
890 898
891 899 def revision(self, nodeorrev):
892 900 """return an uncompressed revision of a given node or revision
893 901 number.
894 902 """
895 903 if isinstance(nodeorrev, int):
896 904 rev = nodeorrev
897 905 node = self.node(rev)
898 906 else:
899 907 node = nodeorrev
900 908 rev = None
901 909
902 910 cachedrev = None
903 911 if node == nullid:
904 912 return ""
905 913 if self._cache:
906 914 if self._cache[0] == node:
907 915 return self._cache[2]
908 916 cachedrev = self._cache[1]
909 917
910 918 # look up what we need to read
911 919 text = None
912 920 if rev is None:
913 921 rev = self.rev(node)
914 922
915 923 # check rev flags
916 924 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
917 925 raise RevlogError(_('incompatible revision flag %x') %
918 926 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
919 927
920 928 # build delta chain
921 929 chain = []
922 930 index = self.index # for performance
923 931 generaldelta = self._generaldelta
924 932 iterrev = rev
925 933 e = index[iterrev]
926 934 while iterrev != e[3] and iterrev != cachedrev:
927 935 chain.append(iterrev)
928 936 if generaldelta:
929 937 iterrev = e[3]
930 938 else:
931 939 iterrev -= 1
932 940 e = index[iterrev]
933 941 chain.reverse()
934 942 base = iterrev
935 943
936 944 if iterrev == cachedrev:
937 945 # cache hit
938 946 text = self._cache[2]
939 947
940 948 # drop cache to save memory
941 949 self._cache = None
942 950
943 951 self._chunkraw(base, rev)
944 952 if text is None:
945 953 text = str(self._chunkbase(base))
946 954
947 955 bins = [self._chunk(r) for r in chain]
948 956 text = mdiff.patches(text, bins)
949 957
950 958 text = self._checkhash(text, node, rev)
951 959
952 960 self._cache = (node, rev, text)
953 961 return text
954 962
955 963 def _checkhash(self, text, node, rev):
956 964 p1, p2 = self.parents(node)
957 965 if node != hash(text, p1, p2):
958 966 raise RevlogError(_("integrity check failed on %s:%d")
959 967 % (self.indexfile, rev))
960 968 return text
961 969
962 970 def checkinlinesize(self, tr, fp=None):
963 971 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
964 972 return
965 973
966 974 trinfo = tr.find(self.indexfile)
967 975 if trinfo is None:
968 976 raise RevlogError(_("%s not found in the transaction")
969 977 % self.indexfile)
970 978
971 979 trindex = trinfo[2]
972 980 dataoff = self.start(trindex)
973 981
974 982 tr.add(self.datafile, dataoff)
975 983
976 984 if fp:
977 985 fp.flush()
978 986 fp.close()
979 987
980 988 df = self.opener(self.datafile, 'w')
981 989 try:
982 990 for r in self:
983 991 df.write(self._chunkraw(r, r))
984 992 finally:
985 993 df.close()
986 994
987 995 fp = self.opener(self.indexfile, 'w', atomictemp=True)
988 996 self.version &= ~(REVLOGNGINLINEDATA)
989 997 self._inline = False
990 998 for i in self:
991 999 e = self._io.packentry(self.index[i], self.node, self.version, i)
992 1000 fp.write(e)
993 1001
994 1002 # if we don't call close, the temp file will never replace the
995 1003 # real index
996 1004 fp.close()
997 1005
998 1006 tr.replace(self.indexfile, trindex * self._io.size)
999 1007 self._chunkclear()
1000 1008
1001 1009 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1002 1010 """add a revision to the log
1003 1011
1004 1012 text - the revision data to add
1005 1013 transaction - the transaction object used for rollback
1006 1014 link - the linkrev data to add
1007 1015 p1, p2 - the parent nodeids of the revision
1008 1016 cachedelta - an optional precomputed delta
1009 1017 """
1010 1018 node = hash(text, p1, p2)
1011 1019 if node in self.nodemap:
1012 1020 return node
1013 1021
1014 1022 dfh = None
1015 1023 if not self._inline:
1016 1024 dfh = self.opener(self.datafile, "a")
1017 1025 ifh = self.opener(self.indexfile, "a+")
1018 1026 try:
1019 1027 return self._addrevision(node, text, transaction, link, p1, p2,
1020 1028 cachedelta, ifh, dfh)
1021 1029 finally:
1022 1030 if dfh:
1023 1031 dfh.close()
1024 1032 ifh.close()
1025 1033
1026 1034 def _addrevision(self, node, text, transaction, link, p1, p2,
1027 1035 cachedelta, ifh, dfh):
1028 1036 """internal function to add revisions to the log
1029 1037
1030 1038 see addrevision for argument descriptions.
1031 1039 invariants:
1032 1040 - text is optional (can be None); if not set, cachedelta must be set.
1033 1041 if both are set, they must correspond to eachother.
1034 1042 """
1035 1043 btext = [text]
1036 1044 def buildtext():
1037 1045 if btext[0] is not None:
1038 1046 return btext[0]
1039 1047 # flush any pending writes here so we can read it in revision
1040 1048 if dfh:
1041 1049 dfh.flush()
1042 1050 ifh.flush()
1043 1051 basetext = self.revision(self.node(cachedelta[0]))
1044 1052 btext[0] = mdiff.patch(basetext, cachedelta[1])
1045 1053 chk = hash(btext[0], p1, p2)
1046 1054 if chk != node:
1047 1055 raise RevlogError(_("consistency error in delta"))
1048 1056 return btext[0]
1049 1057
1050 1058 def builddelta(rev):
1051 1059 # can we use the cached delta?
1052 1060 if cachedelta and cachedelta[0] == rev:
1053 1061 delta = cachedelta[1]
1054 1062 else:
1055 1063 t = buildtext()
1056 1064 ptext = self.revision(self.node(rev))
1057 1065 delta = mdiff.textdiff(ptext, t)
1058 1066 data = compress(delta)
1059 1067 l = len(data[1]) + len(data[0])
1060 1068 if basecache[0] == rev:
1061 1069 chainbase = basecache[1]
1062 1070 else:
1063 1071 chainbase = self.chainbase(rev)
1064 1072 dist = l + offset - self.start(chainbase)
1065 1073 if self._generaldelta:
1066 1074 base = rev
1067 1075 else:
1068 1076 base = chainbase
1069 1077 return dist, l, data, base, chainbase
1070 1078
1071 1079 curr = len(self)
1072 1080 prev = curr - 1
1073 1081 base = chainbase = curr
1074 1082 offset = self.end(prev)
1075 1083 flags = 0
1076 1084 d = None
1077 1085 basecache = self._basecache
1078 1086 p1r, p2r = self.rev(p1), self.rev(p2)
1079 1087
1080 1088 # should we try to build a delta?
1081 1089 if prev != nullrev:
1082 1090 if self._generaldelta:
1083 1091 if p1r >= basecache[1]:
1084 1092 d = builddelta(p1r)
1085 1093 elif p2r >= basecache[1]:
1086 1094 d = builddelta(p2r)
1087 1095 else:
1088 1096 d = builddelta(prev)
1089 1097 else:
1090 1098 d = builddelta(prev)
1091 1099 dist, l, data, base, chainbase = d
1092 1100
1093 1101 # full versions are inserted when the needed deltas
1094 1102 # become comparable to the uncompressed text
1095 1103 if text is None:
1096 1104 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1097 1105 cachedelta[1])
1098 1106 else:
1099 1107 textlen = len(text)
1100 1108 if d is None or dist > textlen * 2:
1101 1109 text = buildtext()
1102 1110 data = compress(text)
1103 1111 l = len(data[1]) + len(data[0])
1104 1112 base = chainbase = curr
1105 1113
1106 1114 e = (offset_type(offset, flags), l, textlen,
1107 1115 base, link, p1r, p2r, node)
1108 1116 self.index.insert(-1, e)
1109 1117 self.nodemap[node] = curr
1110 1118
1111 1119 entry = self._io.packentry(e, self.node, self.version, curr)
1112 1120 if not self._inline:
1113 1121 transaction.add(self.datafile, offset)
1114 1122 transaction.add(self.indexfile, curr * len(entry))
1115 1123 if data[0]:
1116 1124 dfh.write(data[0])
1117 1125 dfh.write(data[1])
1118 1126 dfh.flush()
1119 1127 ifh.write(entry)
1120 1128 else:
1121 1129 offset += curr * self._io.size
1122 1130 transaction.add(self.indexfile, offset, curr)
1123 1131 ifh.write(entry)
1124 1132 ifh.write(data[0])
1125 1133 ifh.write(data[1])
1126 1134 self.checkinlinesize(transaction, ifh)
1127 1135
1128 1136 if type(text) == str: # only accept immutable objects
1129 1137 self._cache = (node, curr, text)
1130 1138 self._basecache = (curr, chainbase)
1131 1139 return node
1132 1140
1133 1141 def group(self, nodelist, bundler, reorder=None):
1134 1142 """Calculate a delta group, yielding a sequence of changegroup chunks
1135 1143 (strings).
1136 1144
1137 1145 Given a list of changeset revs, return a set of deltas and
1138 1146 metadata corresponding to nodes. The first delta is
1139 1147 first parent(nodelist[0]) -> nodelist[0], the receiver is
1140 1148 guaranteed to have this parent as it has all history before
1141 1149 these changesets. In the case firstparent is nullrev the
1142 1150 changegroup starts with a full revision.
1143 1151 """
1144 1152
1145 1153 # if we don't have any revisions touched by these changesets, bail
1146 1154 if len(nodelist) == 0:
1147 1155 yield bundler.close()
1148 1156 return
1149 1157
1150 1158 # for generaldelta revlogs, we linearize the revs; this will both be
1151 1159 # much quicker and generate a much smaller bundle
1152 1160 if (self._generaldelta and reorder is not False) or reorder:
1153 1161 dag = dagutil.revlogdag(self)
1154 1162 revs = set(self.rev(n) for n in nodelist)
1155 1163 revs = dag.linearize(revs)
1156 1164 else:
1157 1165 revs = sorted([self.rev(n) for n in nodelist])
1158 1166
1159 1167 # add the parent of the first rev
1160 1168 p = self.parentrevs(revs[0])[0]
1161 1169 revs.insert(0, p)
1162 1170
1163 1171 # build deltas
1164 1172 for r in xrange(len(revs) - 1):
1165 1173 prev, curr = revs[r], revs[r + 1]
1166 1174 for c in bundler.revchunk(self, curr, prev):
1167 1175 yield c
1168 1176
1169 1177 yield bundler.close()
1170 1178
1171 1179 def addgroup(self, bundle, linkmapper, transaction):
1172 1180 """
1173 1181 add a delta group
1174 1182
1175 1183 given a set of deltas, add them to the revision log. the
1176 1184 first delta is against its parent, which should be in our
1177 1185 log, the rest are against the previous delta.
1178 1186 """
1179 1187
1180 1188 # track the base of the current delta log
1181 1189 content = []
1182 1190 node = None
1183 1191
1184 1192 r = len(self)
1185 1193 end = 0
1186 1194 if r:
1187 1195 end = self.end(r - 1)
1188 1196 ifh = self.opener(self.indexfile, "a+")
1189 1197 isize = r * self._io.size
1190 1198 if self._inline:
1191 1199 transaction.add(self.indexfile, end + isize, r)
1192 1200 dfh = None
1193 1201 else:
1194 1202 transaction.add(self.indexfile, isize, r)
1195 1203 transaction.add(self.datafile, end)
1196 1204 dfh = self.opener(self.datafile, "a")
1197 1205
1198 1206 try:
1199 1207 # loop through our set of deltas
1200 1208 chain = None
1201 1209 while True:
1202 1210 chunkdata = bundle.deltachunk(chain)
1203 1211 if not chunkdata:
1204 1212 break
1205 1213 node = chunkdata['node']
1206 1214 p1 = chunkdata['p1']
1207 1215 p2 = chunkdata['p2']
1208 1216 cs = chunkdata['cs']
1209 1217 deltabase = chunkdata['deltabase']
1210 1218 delta = chunkdata['delta']
1211 1219
1212 1220 content.append(node)
1213 1221
1214 1222 link = linkmapper(cs)
1215 1223 if node in self.nodemap:
1216 1224 # this can happen if two branches make the same change
1217 1225 chain = node
1218 1226 continue
1219 1227
1220 1228 for p in (p1, p2):
1221 1229 if p not in self.nodemap:
1222 1230 raise LookupError(p, self.indexfile,
1223 1231 _('unknown parent'))
1224 1232
1225 1233 if deltabase not in self.nodemap:
1226 1234 raise LookupError(deltabase, self.indexfile,
1227 1235 _('unknown delta base'))
1228 1236
1229 1237 baserev = self.rev(deltabase)
1230 1238 chain = self._addrevision(node, None, transaction, link,
1231 1239 p1, p2, (baserev, delta), ifh, dfh)
1232 1240 if not dfh and not self._inline:
1233 1241 # addrevision switched from inline to conventional
1234 1242 # reopen the index
1235 1243 ifh.close()
1236 1244 dfh = self.opener(self.datafile, "a")
1237 1245 ifh = self.opener(self.indexfile, "a")
1238 1246 finally:
1239 1247 if dfh:
1240 1248 dfh.close()
1241 1249 ifh.close()
1242 1250
1243 1251 return content
1244 1252
1245 1253 def strip(self, minlink, transaction):
1246 1254 """truncate the revlog on the first revision with a linkrev >= minlink
1247 1255
1248 1256 This function is called when we're stripping revision minlink and
1249 1257 its descendants from the repository.
1250 1258
1251 1259 We have to remove all revisions with linkrev >= minlink, because
1252 1260 the equivalent changelog revisions will be renumbered after the
1253 1261 strip.
1254 1262
1255 1263 So we truncate the revlog on the first of these revisions, and
1256 1264 trust that the caller has saved the revisions that shouldn't be
1257 1265 removed and that it'll re-add them after this truncation.
1258 1266 """
1259 1267 if len(self) == 0:
1260 1268 return
1261 1269
1262 1270 for rev in self:
1263 1271 if self.index[rev][4] >= minlink:
1264 1272 break
1265 1273 else:
1266 1274 return
1267 1275
1268 1276 # first truncate the files on disk
1269 1277 end = self.start(rev)
1270 1278 if not self._inline:
1271 1279 transaction.add(self.datafile, end)
1272 1280 end = rev * self._io.size
1273 1281 else:
1274 1282 end += rev * self._io.size
1275 1283
1276 1284 transaction.add(self.indexfile, end)
1277 1285
1278 1286 # then reset internal state in memory to forget those revisions
1279 1287 self._cache = None
1280 1288 self._chunkclear()
1281 1289 for x in xrange(rev, len(self)):
1282 1290 del self.nodemap[self.node(x)]
1283 1291
1284 1292 del self.index[rev:-1]
1285 1293
1286 1294 def checksize(self):
1287 1295 expected = 0
1288 1296 if len(self):
1289 1297 expected = max(0, self.end(len(self) - 1))
1290 1298
1291 1299 try:
1292 1300 f = self.opener(self.datafile)
1293 1301 f.seek(0, 2)
1294 1302 actual = f.tell()
1295 1303 f.close()
1296 1304 dd = actual - expected
1297 1305 except IOError, inst:
1298 1306 if inst.errno != errno.ENOENT:
1299 1307 raise
1300 1308 dd = 0
1301 1309
1302 1310 try:
1303 1311 f = self.opener(self.indexfile)
1304 1312 f.seek(0, 2)
1305 1313 actual = f.tell()
1306 1314 f.close()
1307 1315 s = self._io.size
1308 1316 i = max(0, actual // s)
1309 1317 di = actual - (i * s)
1310 1318 if self._inline:
1311 1319 databytes = 0
1312 1320 for r in self:
1313 1321 databytes += max(0, self.length(r))
1314 1322 dd = 0
1315 1323 di = actual - len(self) * s - databytes
1316 1324 except IOError, inst:
1317 1325 if inst.errno != errno.ENOENT:
1318 1326 raise
1319 1327 di = 0
1320 1328
1321 1329 return (dd, di)
1322 1330
1323 1331 def files(self):
1324 1332 res = [self.indexfile]
1325 1333 if not self._inline:
1326 1334 res.append(self.datafile)
1327 1335 return res
General Comments 0
You need to be logged in to leave comments. Login now