##// END OF EJS Templates
revlog: use real Booleans instead of 0/1 in nodesbetween
Martin Geisler -
r14219:c3342708 default
parent child Browse files
Show More
@@ -1,1233 +1,1233
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 return _decompress(bin)
116 116 if t == 'u':
117 117 return bin[1:]
118 118 raise RevlogError(_("unknown compression type %r") % t)
119 119
120 120 indexformatv0 = ">4l20s20s20s"
121 121 v0shaoffset = 56
122 122
123 123 class revlogoldio(object):
124 124 def __init__(self):
125 125 self.size = struct.calcsize(indexformatv0)
126 126
127 127 def parseindex(self, data, inline):
128 128 s = self.size
129 129 index = []
130 130 nodemap = {nullid: nullrev}
131 131 n = off = 0
132 132 l = len(data)
133 133 while off + s <= l:
134 134 cur = data[off:off + s]
135 135 off += s
136 136 e = _unpack(indexformatv0, cur)
137 137 # transform to revlogv1 format
138 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 140 index.append(e2)
141 141 nodemap[e[6]] = n
142 142 n += 1
143 143
144 144 # add the magic null revision at -1
145 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 146
147 147 return index, nodemap, None
148 148
149 149 def packentry(self, entry, node, version, rev):
150 150 if gettype(entry[0]):
151 151 raise RevlogError(_("index entry flags need RevlogNG"))
152 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 153 node(entry[5]), node(entry[6]), entry[7])
154 154 return _pack(indexformatv0, *e2)
155 155
156 156 # index ng:
157 157 # 6 bytes: offset
158 158 # 2 bytes: flags
159 159 # 4 bytes: compressed length
160 160 # 4 bytes: uncompressed length
161 161 # 4 bytes: base rev
162 162 # 4 bytes: link rev
163 163 # 4 bytes: parent 1 rev
164 164 # 4 bytes: parent 2 rev
165 165 # 32 bytes: nodeid
166 166 indexformatng = ">Qiiiiii20s12x"
167 167 ngshaoffset = 32
168 168 versionformat = ">I"
169 169
170 170 class revlogio(object):
171 171 def __init__(self):
172 172 self.size = struct.calcsize(indexformatng)
173 173
174 174 def parseindex(self, data, inline):
175 175 # call the C implementation to parse the index data
176 176 index, cache = parsers.parse_index2(data, inline)
177 177 return index, None, cache
178 178
179 179 def packentry(self, entry, node, version, rev):
180 180 p = _pack(indexformatng, *entry)
181 181 if rev == 0:
182 182 p = _pack(versionformat, version) + p[4:]
183 183 return p
184 184
185 185 class revlog(object):
186 186 """
187 187 the underlying revision storage object
188 188
189 189 A revlog consists of two parts, an index and the revision data.
190 190
191 191 The index is a file with a fixed record size containing
192 192 information on each revision, including its nodeid (hash), the
193 193 nodeids of its parents, the position and offset of its data within
194 194 the data file, and the revision it's based on. Finally, each entry
195 195 contains a linkrev entry that can serve as a pointer to external
196 196 data.
197 197
198 198 The revision data itself is a linear collection of data chunks.
199 199 Each chunk represents a revision and is usually represented as a
200 200 delta against the previous chunk. To bound lookup time, runs of
201 201 deltas are limited to about 2 times the length of the original
202 202 version data. This makes retrieval of a version proportional to
203 203 its size, or O(1) relative to the number of revisions.
204 204
205 205 Both pieces of the revlog are written to in an append-only
206 206 fashion, which means we never need to rewrite a file to insert or
207 207 remove data, and can use some simple techniques to avoid the need
208 208 for locking while reading.
209 209 """
210 210 def __init__(self, opener, indexfile, shallowroot=None):
211 211 """
212 212 create a revlog object
213 213
214 214 opener is a function that abstracts the file opening operation
215 215 and can be used to implement COW semantics or the like.
216 216 """
217 217 self.indexfile = indexfile
218 218 self.datafile = indexfile[:-2] + ".d"
219 219 self.opener = opener
220 220 self._cache = None
221 221 self._chunkcache = (0, '')
222 222 self.index = []
223 223 self._shallowroot = shallowroot
224 224 self._pcache = {}
225 225 self._nodecache = {nullid: nullrev}
226 226 self._nodepos = None
227 227
228 228 v = REVLOG_DEFAULT_VERSION
229 229 if hasattr(opener, 'options') and 'defversion' in opener.options:
230 230 v = opener.options['defversion']
231 231 if v & REVLOGNG:
232 232 v |= REVLOGNGINLINEDATA
233 233
234 234 if shallowroot:
235 235 v |= REVLOGSHALLOW
236 236
237 237 i = ''
238 238 try:
239 239 f = self.opener(self.indexfile)
240 240 i = f.read()
241 241 f.close()
242 242 if len(i) > 0:
243 243 v = struct.unpack(versionformat, i[:4])[0]
244 244 except IOError, inst:
245 245 if inst.errno != errno.ENOENT:
246 246 raise
247 247
248 248 self.version = v
249 249 self._inline = v & REVLOGNGINLINEDATA
250 250 self._shallow = v & REVLOGSHALLOW
251 251 flags = v & ~0xFFFF
252 252 fmt = v & 0xFFFF
253 253 if fmt == REVLOGV0 and flags:
254 254 raise RevlogError(_("index %s unknown flags %#04x for format v0")
255 255 % (self.indexfile, flags >> 16))
256 256 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
257 257 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
258 258 % (self.indexfile, flags >> 16))
259 259 elif fmt > REVLOGNG:
260 260 raise RevlogError(_("index %s unknown format %d")
261 261 % (self.indexfile, fmt))
262 262
263 263 self._io = revlogio()
264 264 if self.version == REVLOGV0:
265 265 self._io = revlogoldio()
266 266 try:
267 267 d = self._io.parseindex(i, self._inline)
268 268 except (ValueError, IndexError):
269 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
270 270 self.index, nodemap, self._chunkcache = d
271 271 if nodemap is not None:
272 272 self.nodemap = self._nodecache = nodemap
273 273 if not self._chunkcache:
274 274 self._chunkclear()
275 275
276 276 def tip(self):
277 277 return self.node(len(self.index) - 2)
278 278 def __len__(self):
279 279 return len(self.index) - 1
280 280 def __iter__(self):
281 281 for i in xrange(len(self)):
282 282 yield i
283 283
284 284 @util.propertycache
285 285 def nodemap(self):
286 286 self.rev(self.node(0))
287 287 return self._nodecache
288 288
289 289 def rev(self, node):
290 290 try:
291 291 return self._nodecache[node]
292 292 except KeyError:
293 293 n = self._nodecache
294 294 i = self.index
295 295 p = self._nodepos
296 296 if p is None:
297 297 p = len(i) - 2
298 298 for r in xrange(p, -1, -1):
299 299 v = i[r][7]
300 300 n[v] = r
301 301 if v == node:
302 302 self._nodepos = r - 1
303 303 return r
304 304 raise LookupError(node, self.indexfile, _('no node'))
305 305
306 306 def node(self, rev):
307 307 return self.index[rev][7]
308 308 def linkrev(self, rev):
309 309 return self.index[rev][4]
310 310 def parents(self, node):
311 311 i = self.index
312 312 d = i[self.rev(node)]
313 313 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
314 314 def parentrevs(self, rev):
315 315 return self.index[rev][5:7]
316 316 def start(self, rev):
317 317 return int(self.index[rev][0] >> 16)
318 318 def end(self, rev):
319 319 return self.start(rev) + self.length(rev)
320 320 def length(self, rev):
321 321 return self.index[rev][1]
322 322 def base(self, rev):
323 323 return self.index[rev][3]
324 324 def flags(self, rev):
325 325 return self.index[rev][0] & 0xFFFF
326 326 def rawsize(self, rev):
327 327 """return the length of the uncompressed text for a given revision"""
328 328 l = self.index[rev][2]
329 329 if l >= 0:
330 330 return l
331 331
332 332 t = self.revision(self.node(rev))
333 333 return len(t)
334 334 size = rawsize
335 335
336 336 def reachable(self, node, stop=None):
337 337 """return the set of all nodes ancestral to a given node, including
338 338 the node itself, stopping when stop is matched"""
339 339 reachable = set((node,))
340 340 visit = [node]
341 341 if stop:
342 342 stopn = self.rev(stop)
343 343 else:
344 344 stopn = 0
345 345 while visit:
346 346 n = visit.pop(0)
347 347 if n == stop:
348 348 continue
349 349 if n == nullid:
350 350 continue
351 351 for p in self.parents(n):
352 352 if self.rev(p) < stopn:
353 353 continue
354 354 if p not in reachable:
355 355 reachable.add(p)
356 356 visit.append(p)
357 357 return reachable
358 358
359 359 def ancestors(self, *revs):
360 360 """Generate the ancestors of 'revs' in reverse topological order.
361 361
362 362 Yield a sequence of revision numbers starting with the parents
363 363 of each revision in revs, i.e., each revision is *not* considered
364 364 an ancestor of itself. Results are in breadth-first order:
365 365 parents of each rev in revs, then parents of those, etc. Result
366 366 does not include the null revision."""
367 367 visit = list(revs)
368 368 seen = set([nullrev])
369 369 while visit:
370 370 for parent in self.parentrevs(visit.pop(0)):
371 371 if parent not in seen:
372 372 visit.append(parent)
373 373 seen.add(parent)
374 374 yield parent
375 375
376 376 def descendants(self, *revs):
377 377 """Generate the descendants of 'revs' in revision order.
378 378
379 379 Yield a sequence of revision numbers starting with a child of
380 380 some rev in revs, i.e., each revision is *not* considered a
381 381 descendant of itself. Results are ordered by revision number (a
382 382 topological sort)."""
383 383 first = min(revs)
384 384 if first == nullrev:
385 385 for i in self:
386 386 yield i
387 387 return
388 388
389 389 seen = set(revs)
390 390 for i in xrange(first + 1, len(self)):
391 391 for x in self.parentrevs(i):
392 392 if x != nullrev and x in seen:
393 393 seen.add(i)
394 394 yield i
395 395 break
396 396
397 397 def findcommonmissing(self, common=None, heads=None):
398 398 """Return a tuple of the ancestors of common and the ancestors of heads
399 399 that are not ancestors of common.
400 400
401 401 More specifically, the second element is a list of nodes N such that
402 402 every N satisfies the following constraints:
403 403
404 404 1. N is an ancestor of some node in 'heads'
405 405 2. N is not an ancestor of any node in 'common'
406 406
407 407 The list is sorted by revision number, meaning it is
408 408 topologically sorted.
409 409
410 410 'heads' and 'common' are both lists of node IDs. If heads is
411 411 not supplied, uses all of the revlog's heads. If common is not
412 412 supplied, uses nullid."""
413 413 if common is None:
414 414 common = [nullid]
415 415 if heads is None:
416 416 heads = self.heads()
417 417
418 418 common = [self.rev(n) for n in common]
419 419 heads = [self.rev(n) for n in heads]
420 420
421 421 # we want the ancestors, but inclusive
422 422 has = set(self.ancestors(*common))
423 423 has.add(nullrev)
424 424 has.update(common)
425 425
426 426 # take all ancestors from heads that aren't in has
427 427 missing = set()
428 428 visit = [r for r in heads if r not in has]
429 429 while visit:
430 430 r = visit.pop(0)
431 431 if r in missing:
432 432 continue
433 433 else:
434 434 missing.add(r)
435 435 for p in self.parentrevs(r):
436 436 if p not in has:
437 437 visit.append(p)
438 438 missing = list(missing)
439 439 missing.sort()
440 440 return has, [self.node(r) for r in missing]
441 441
442 442 def findmissing(self, common=None, heads=None):
443 443 """Return the ancestors of heads that are not ancestors of common.
444 444
445 445 More specifically, return a list of nodes N such that every N
446 446 satisfies the following constraints:
447 447
448 448 1. N is an ancestor of some node in 'heads'
449 449 2. N is not an ancestor of any node in 'common'
450 450
451 451 The list is sorted by revision number, meaning it is
452 452 topologically sorted.
453 453
454 454 'heads' and 'common' are both lists of node IDs. If heads is
455 455 not supplied, uses all of the revlog's heads. If common is not
456 456 supplied, uses nullid."""
457 457 _common, missing = self.findcommonmissing(common, heads)
458 458 return missing
459 459
460 460 def nodesbetween(self, roots=None, heads=None):
461 461 """Return a topological path from 'roots' to 'heads'.
462 462
463 463 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
464 464 topologically sorted list of all nodes N that satisfy both of
465 465 these constraints:
466 466
467 467 1. N is a descendant of some node in 'roots'
468 468 2. N is an ancestor of some node in 'heads'
469 469
470 470 Every node is considered to be both a descendant and an ancestor
471 471 of itself, so every reachable node in 'roots' and 'heads' will be
472 472 included in 'nodes'.
473 473
474 474 'outroots' is the list of reachable nodes in 'roots', i.e., the
475 475 subset of 'roots' that is returned in 'nodes'. Likewise,
476 476 'outheads' is the subset of 'heads' that is also in 'nodes'.
477 477
478 478 'roots' and 'heads' are both lists of node IDs. If 'roots' is
479 479 unspecified, uses nullid as the only root. If 'heads' is
480 480 unspecified, uses list of all of the revlog's heads."""
481 481 nonodes = ([], [], [])
482 482 if roots is not None:
483 483 roots = list(roots)
484 484 if not roots:
485 485 return nonodes
486 486 lowestrev = min([self.rev(n) for n in roots])
487 487 else:
488 488 roots = [nullid] # Everybody's a descendent of nullid
489 489 lowestrev = nullrev
490 490 if (lowestrev == nullrev) and (heads is None):
491 491 # We want _all_ the nodes!
492 492 return ([self.node(r) for r in self], [nullid], list(self.heads()))
493 493 if heads is None:
494 494 # All nodes are ancestors, so the latest ancestor is the last
495 495 # node.
496 496 highestrev = len(self) - 1
497 497 # Set ancestors to None to signal that every node is an ancestor.
498 498 ancestors = None
499 499 # Set heads to an empty dictionary for later discovery of heads
500 500 heads = {}
501 501 else:
502 502 heads = list(heads)
503 503 if not heads:
504 504 return nonodes
505 505 ancestors = set()
506 506 # Turn heads into a dictionary so we can remove 'fake' heads.
507 507 # Also, later we will be using it to filter out the heads we can't
508 508 # find from roots.
509 heads = dict.fromkeys(heads, 0)
509 heads = dict.fromkeys(heads, False)
510 510 # Start at the top and keep marking parents until we're done.
511 511 nodestotag = set(heads)
512 512 # Remember where the top was so we can use it as a limit later.
513 513 highestrev = max([self.rev(n) for n in nodestotag])
514 514 while nodestotag:
515 515 # grab a node to tag
516 516 n = nodestotag.pop()
517 517 # Never tag nullid
518 518 if n == nullid:
519 519 continue
520 520 # A node's revision number represents its place in a
521 521 # topologically sorted list of nodes.
522 522 r = self.rev(n)
523 523 if r >= lowestrev:
524 524 if n not in ancestors:
525 525 # If we are possibly a descendent of one of the roots
526 526 # and we haven't already been marked as an ancestor
527 527 ancestors.add(n) # Mark as ancestor
528 528 # Add non-nullid parents to list of nodes to tag.
529 529 nodestotag.update([p for p in self.parents(n) if
530 530 p != nullid])
531 531 elif n in heads: # We've seen it before, is it a fake head?
532 532 # So it is, real heads should not be the ancestors of
533 533 # any other heads.
534 534 heads.pop(n)
535 535 if not ancestors:
536 536 return nonodes
537 537 # Now that we have our set of ancestors, we want to remove any
538 538 # roots that are not ancestors.
539 539
540 540 # If one of the roots was nullid, everything is included anyway.
541 541 if lowestrev > nullrev:
542 542 # But, since we weren't, let's recompute the lowest rev to not
543 543 # include roots that aren't ancestors.
544 544
545 545 # Filter out roots that aren't ancestors of heads
546 546 roots = [n for n in roots if n in ancestors]
547 547 # Recompute the lowest revision
548 548 if roots:
549 549 lowestrev = min([self.rev(n) for n in roots])
550 550 else:
551 551 # No more roots? Return empty list
552 552 return nonodes
553 553 else:
554 554 # We are descending from nullid, and don't need to care about
555 555 # any other roots.
556 556 lowestrev = nullrev
557 557 roots = [nullid]
558 558 # Transform our roots list into a set.
559 559 descendents = set(roots)
560 560 # Also, keep the original roots so we can filter out roots that aren't
561 561 # 'real' roots (i.e. are descended from other roots).
562 562 roots = descendents.copy()
563 563 # Our topologically sorted list of output nodes.
564 564 orderedout = []
565 565 # Don't start at nullid since we don't want nullid in our output list,
566 566 # and if nullid shows up in descedents, empty parents will look like
567 567 # they're descendents.
568 568 for r in xrange(max(lowestrev, 0), highestrev + 1):
569 569 n = self.node(r)
570 570 isdescendent = False
571 571 if lowestrev == nullrev: # Everybody is a descendent of nullid
572 572 isdescendent = True
573 573 elif n in descendents:
574 574 # n is already a descendent
575 575 isdescendent = True
576 576 # This check only needs to be done here because all the roots
577 577 # will start being marked is descendents before the loop.
578 578 if n in roots:
579 579 # If n was a root, check if it's a 'real' root.
580 580 p = tuple(self.parents(n))
581 581 # If any of its parents are descendents, it's not a root.
582 582 if (p[0] in descendents) or (p[1] in descendents):
583 583 roots.remove(n)
584 584 else:
585 585 p = tuple(self.parents(n))
586 586 # A node is a descendent if either of its parents are
587 587 # descendents. (We seeded the dependents list with the roots
588 588 # up there, remember?)
589 589 if (p[0] in descendents) or (p[1] in descendents):
590 590 descendents.add(n)
591 591 isdescendent = True
592 592 if isdescendent and ((ancestors is None) or (n in ancestors)):
593 593 # Only include nodes that are both descendents and ancestors.
594 594 orderedout.append(n)
595 595 if (ancestors is not None) and (n in heads):
596 596 # We're trying to figure out which heads are reachable
597 597 # from roots.
598 598 # Mark this head as having been reached
599 heads[n] = 1
599 heads[n] = True
600 600 elif ancestors is None:
601 601 # Otherwise, we're trying to discover the heads.
602 602 # Assume this is a head because if it isn't, the next step
603 603 # will eventually remove it.
604 heads[n] = 1
604 heads[n] = True
605 605 # But, obviously its parents aren't.
606 606 for p in self.parents(n):
607 607 heads.pop(p, None)
608 heads = [n for n in heads.iterkeys() if heads[n] != 0]
608 heads = [n for n, flag in heads.iteritems() if flag]
609 609 roots = list(roots)
610 610 assert orderedout
611 611 assert roots
612 612 assert heads
613 613 return (orderedout, roots, heads)
614 614
615 615 def headrevs(self):
616 616 count = len(self)
617 617 if not count:
618 618 return [nullrev]
619 619 ishead = [1] * (count + 1)
620 620 index = self.index
621 621 for r in xrange(count):
622 622 e = index[r]
623 623 ishead[e[5]] = ishead[e[6]] = 0
624 624 return [r for r in xrange(count) if ishead[r]]
625 625
626 626 def heads(self, start=None, stop=None):
627 627 """return the list of all nodes that have no children
628 628
629 629 if start is specified, only heads that are descendants of
630 630 start will be returned
631 631 if stop is specified, it will consider all the revs from stop
632 632 as if they had no children
633 633 """
634 634 if start is None and stop is None:
635 635 if not len(self):
636 636 return [nullid]
637 637 return [self.node(r) for r in self.headrevs()]
638 638
639 639 if start is None:
640 640 start = nullid
641 641 if stop is None:
642 642 stop = []
643 643 stoprevs = set([self.rev(n) for n in stop])
644 644 startrev = self.rev(start)
645 645 reachable = set((startrev,))
646 646 heads = set((startrev,))
647 647
648 648 parentrevs = self.parentrevs
649 649 for r in xrange(startrev + 1, len(self)):
650 650 for p in parentrevs(r):
651 651 if p in reachable:
652 652 if r not in stoprevs:
653 653 reachable.add(r)
654 654 heads.add(r)
655 655 if p in heads and p not in stoprevs:
656 656 heads.remove(p)
657 657
658 658 return [self.node(r) for r in heads]
659 659
660 660 def children(self, node):
661 661 """find the children of a given node"""
662 662 c = []
663 663 p = self.rev(node)
664 664 for r in range(p + 1, len(self)):
665 665 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
666 666 if prevs:
667 667 for pr in prevs:
668 668 if pr == p:
669 669 c.append(self.node(r))
670 670 elif p == nullrev:
671 671 c.append(self.node(r))
672 672 return c
673 673
674 674 def descendant(self, start, end):
675 675 if start == nullrev:
676 676 return True
677 677 for i in self.descendants(start):
678 678 if i == end:
679 679 return True
680 680 elif i > end:
681 681 break
682 682 return False
683 683
684 684 def ancestor(self, a, b):
685 685 """calculate the least common ancestor of nodes a and b"""
686 686
687 687 # fast path, check if it is a descendant
688 688 a, b = self.rev(a), self.rev(b)
689 689 start, end = sorted((a, b))
690 690 if self.descendant(start, end):
691 691 return self.node(start)
692 692
693 693 def parents(rev):
694 694 return [p for p in self.parentrevs(rev) if p != nullrev]
695 695
696 696 c = ancestor.ancestor(a, b, parents)
697 697 if c is None:
698 698 return nullid
699 699
700 700 return self.node(c)
701 701
702 702 def _match(self, id):
703 703 if isinstance(id, (long, int)):
704 704 # rev
705 705 return self.node(id)
706 706 if len(id) == 20:
707 707 # possibly a binary node
708 708 # odds of a binary node being all hex in ASCII are 1 in 10**25
709 709 try:
710 710 node = id
711 711 self.rev(node) # quick search the index
712 712 return node
713 713 except LookupError:
714 714 pass # may be partial hex id
715 715 try:
716 716 # str(rev)
717 717 rev = int(id)
718 718 if str(rev) != id:
719 719 raise ValueError
720 720 if rev < 0:
721 721 rev = len(self) + rev
722 722 if rev < 0 or rev >= len(self):
723 723 raise ValueError
724 724 return self.node(rev)
725 725 except (ValueError, OverflowError):
726 726 pass
727 727 if len(id) == 40:
728 728 try:
729 729 # a full hex nodeid?
730 730 node = bin(id)
731 731 self.rev(node)
732 732 return node
733 733 except (TypeError, LookupError):
734 734 pass
735 735
736 736 def _partialmatch(self, id):
737 737 if id in self._pcache:
738 738 return self._pcache[id]
739 739
740 740 if len(id) < 40:
741 741 try:
742 742 # hex(node)[:...]
743 743 l = len(id) // 2 # grab an even number of digits
744 744 prefix = bin(id[:l * 2])
745 745 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
746 746 nl = [n for n in nl if hex(n).startswith(id)]
747 747 if len(nl) > 0:
748 748 if len(nl) == 1:
749 749 self._pcache[id] = nl[0]
750 750 return nl[0]
751 751 raise LookupError(id, self.indexfile,
752 752 _('ambiguous identifier'))
753 753 return None
754 754 except TypeError:
755 755 pass
756 756
757 757 def lookup(self, id):
758 758 """locate a node based on:
759 759 - revision number or str(revision number)
760 760 - nodeid or subset of hex nodeid
761 761 """
762 762 n = self._match(id)
763 763 if n is not None:
764 764 return n
765 765 n = self._partialmatch(id)
766 766 if n:
767 767 return n
768 768
769 769 raise LookupError(id, self.indexfile, _('no match found'))
770 770
771 771 def cmp(self, node, text):
772 772 """compare text with a given file revision
773 773
774 774 returns True if text is different than what is stored.
775 775 """
776 776 p1, p2 = self.parents(node)
777 777 return hash(text, p1, p2) != node
778 778
779 779 def _addchunk(self, offset, data):
780 780 o, d = self._chunkcache
781 781 # try to add to existing cache
782 782 if o + len(d) == offset and len(d) + len(data) < _chunksize:
783 783 self._chunkcache = o, d + data
784 784 else:
785 785 self._chunkcache = offset, data
786 786
787 787 def _loadchunk(self, offset, length):
788 788 if self._inline:
789 789 df = self.opener(self.indexfile)
790 790 else:
791 791 df = self.opener(self.datafile)
792 792
793 793 readahead = max(65536, length)
794 794 df.seek(offset)
795 795 d = df.read(readahead)
796 796 self._addchunk(offset, d)
797 797 if readahead > length:
798 798 return d[:length]
799 799 return d
800 800
801 801 def _getchunk(self, offset, length):
802 802 o, d = self._chunkcache
803 803 l = len(d)
804 804
805 805 # is it in the cache?
806 806 cachestart = offset - o
807 807 cacheend = cachestart + length
808 808 if cachestart >= 0 and cacheend <= l:
809 809 if cachestart == 0 and cacheend == l:
810 810 return d # avoid a copy
811 811 return d[cachestart:cacheend]
812 812
813 813 return self._loadchunk(offset, length)
814 814
815 815 def _chunkraw(self, startrev, endrev):
816 816 start = self.start(startrev)
817 817 length = self.end(endrev) - start
818 818 if self._inline:
819 819 start += (startrev + 1) * self._io.size
820 820 return self._getchunk(start, length)
821 821
822 822 def _chunk(self, rev):
823 823 return decompress(self._chunkraw(rev, rev))
824 824
825 825 def _chunkbase(self, rev):
826 826 return self._chunk(rev)
827 827
828 828 def _chunkclear(self):
829 829 self._chunkcache = (0, '')
830 830
831 831 def deltaparent(self, rev):
832 832 """return deltaparent of the given revision"""
833 833 if self.index[rev][3] == rev:
834 834 return nullrev
835 835 else:
836 836 return rev - 1
837 837
838 838 def revdiff(self, rev1, rev2):
839 839 """return or calculate a delta between two revisions"""
840 840 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
841 841 return self._chunk(rev2)
842 842
843 843 return mdiff.textdiff(self.revision(self.node(rev1)),
844 844 self.revision(self.node(rev2)))
845 845
846 846 def revision(self, node):
847 847 """return an uncompressed revision of a given node"""
848 848 cachedrev = None
849 849 if node == nullid:
850 850 return ""
851 851 if self._cache:
852 852 if self._cache[0] == node:
853 853 return self._cache[2]
854 854 cachedrev = self._cache[1]
855 855
856 856 # look up what we need to read
857 857 text = None
858 858 rev = self.rev(node)
859 859 base = self.base(rev)
860 860
861 861 # check rev flags
862 862 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
863 863 raise RevlogError(_('incompatible revision flag %x') %
864 864 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
865 865
866 866 # build delta chain
867 867 chain = []
868 868 iterrev = rev
869 869 while iterrev != base and iterrev != cachedrev:
870 870 chain.append(iterrev)
871 871 iterrev -= 1
872 872 chain.reverse()
873 873 base = iterrev
874 874
875 875 if iterrev == cachedrev:
876 876 # cache hit
877 877 text = self._cache[2]
878 878
879 879 # drop cache to save memory
880 880 self._cache = None
881 881
882 882 self._chunkraw(base, rev)
883 883 if text is None:
884 884 text = self._chunkbase(base)
885 885
886 886 bins = [self._chunk(r) for r in chain]
887 887 text = mdiff.patches(text, bins)
888 888
889 889 text = self._checkhash(text, node, rev)
890 890
891 891 self._cache = (node, rev, text)
892 892 return text
893 893
894 894 def _checkhash(self, text, node, rev):
895 895 p1, p2 = self.parents(node)
896 896 if node != hash(text, p1, p2):
897 897 raise RevlogError(_("integrity check failed on %s:%d")
898 898 % (self.indexfile, rev))
899 899 return text
900 900
901 901 def checkinlinesize(self, tr, fp=None):
902 902 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
903 903 return
904 904
905 905 trinfo = tr.find(self.indexfile)
906 906 if trinfo is None:
907 907 raise RevlogError(_("%s not found in the transaction")
908 908 % self.indexfile)
909 909
910 910 trindex = trinfo[2]
911 911 dataoff = self.start(trindex)
912 912
913 913 tr.add(self.datafile, dataoff)
914 914
915 915 if fp:
916 916 fp.flush()
917 917 fp.close()
918 918
919 919 df = self.opener(self.datafile, 'w')
920 920 try:
921 921 for r in self:
922 922 df.write(self._chunkraw(r, r))
923 923 finally:
924 924 df.close()
925 925
926 926 fp = self.opener(self.indexfile, 'w', atomictemp=True)
927 927 self.version &= ~(REVLOGNGINLINEDATA)
928 928 self._inline = False
929 929 for i in self:
930 930 e = self._io.packentry(self.index[i], self.node, self.version, i)
931 931 fp.write(e)
932 932
933 933 # if we don't call rename, the temp file will never replace the
934 934 # real index
935 935 fp.rename()
936 936
937 937 tr.replace(self.indexfile, trindex * self._io.size)
938 938 self._chunkclear()
939 939
940 940 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
941 941 """add a revision to the log
942 942
943 943 text - the revision data to add
944 944 transaction - the transaction object used for rollback
945 945 link - the linkrev data to add
946 946 p1, p2 - the parent nodeids of the revision
947 947 cachedelta - an optional precomputed delta
948 948 """
949 949 node = hash(text, p1, p2)
950 950 if node in self.nodemap:
951 951 return node
952 952
953 953 dfh = None
954 954 if not self._inline:
955 955 dfh = self.opener(self.datafile, "a")
956 956 ifh = self.opener(self.indexfile, "a+")
957 957 try:
958 958 return self._addrevision(node, text, transaction, link, p1, p2,
959 959 cachedelta, ifh, dfh)
960 960 finally:
961 961 if dfh:
962 962 dfh.close()
963 963 ifh.close()
964 964
965 965 def _addrevision(self, node, text, transaction, link, p1, p2,
966 966 cachedelta, ifh, dfh):
967 967
968 968 btext = [text]
969 969 def buildtext():
970 970 if btext[0] is not None:
971 971 return btext[0]
972 972 # flush any pending writes here so we can read it in revision
973 973 if dfh:
974 974 dfh.flush()
975 975 ifh.flush()
976 976 basetext = self.revision(self.node(cachedelta[0]))
977 977 btext[0] = mdiff.patch(basetext, cachedelta[1])
978 978 chk = hash(btext[0], p1, p2)
979 979 if chk != node:
980 980 raise RevlogError(_("consistency error in delta"))
981 981 return btext[0]
982 982
983 983 def builddelta(rev):
984 984 # can we use the cached delta?
985 985 if cachedelta and cachedelta[0] == rev:
986 986 delta = cachedelta[1]
987 987 else:
988 988 t = buildtext()
989 989 ptext = self.revision(self.node(rev))
990 990 delta = mdiff.textdiff(ptext, t)
991 991 data = compress(delta)
992 992 l = len(data[1]) + len(data[0])
993 993 base = self.base(rev)
994 994 dist = l + offset - self.start(base)
995 995 return dist, l, data, base
996 996
997 997 curr = len(self)
998 998 prev = curr - 1
999 999 base = curr
1000 1000 offset = self.end(prev)
1001 1001 flags = 0
1002 1002 d = None
1003 1003 p1r, p2r = self.rev(p1), self.rev(p2)
1004 1004
1005 1005 # should we try to build a delta?
1006 1006 if prev != nullrev:
1007 1007 d = builddelta(prev)
1008 1008 dist, l, data, base = d
1009 1009
1010 1010 # full versions are inserted when the needed deltas
1011 1011 # become comparable to the uncompressed text
1012 1012 if text is None:
1013 1013 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1014 1014 cachedelta[1])
1015 1015 else:
1016 1016 textlen = len(text)
1017 1017 if d is None or dist > textlen * 2:
1018 1018 text = buildtext()
1019 1019 data = compress(text)
1020 1020 l = len(data[1]) + len(data[0])
1021 1021 base = curr
1022 1022
1023 1023 e = (offset_type(offset, flags), l, textlen,
1024 1024 base, link, p1r, p2r, node)
1025 1025 self.index.insert(-1, e)
1026 1026 self.nodemap[node] = curr
1027 1027
1028 1028 entry = self._io.packentry(e, self.node, self.version, curr)
1029 1029 if not self._inline:
1030 1030 transaction.add(self.datafile, offset)
1031 1031 transaction.add(self.indexfile, curr * len(entry))
1032 1032 if data[0]:
1033 1033 dfh.write(data[0])
1034 1034 dfh.write(data[1])
1035 1035 dfh.flush()
1036 1036 ifh.write(entry)
1037 1037 else:
1038 1038 offset += curr * self._io.size
1039 1039 transaction.add(self.indexfile, offset, curr)
1040 1040 ifh.write(entry)
1041 1041 ifh.write(data[0])
1042 1042 ifh.write(data[1])
1043 1043 self.checkinlinesize(transaction, ifh)
1044 1044
1045 1045 if type(text) == str: # only accept immutable objects
1046 1046 self._cache = (node, curr, text)
1047 1047 return node
1048 1048
1049 1049 def group(self, nodelist, bundler):
1050 1050 """Calculate a delta group, yielding a sequence of changegroup chunks
1051 1051 (strings).
1052 1052
1053 1053 Given a list of changeset revs, return a set of deltas and
1054 1054 metadata corresponding to nodes. The first delta is
1055 1055 first parent(nodelist[0]) -> nodelist[0], the receiver is
1056 1056 guaranteed to have this parent as it has all history before
1057 1057 these changesets. In the case firstparent is nullrev the
1058 1058 changegroup starts with a full revision.
1059 1059 """
1060 1060
1061 1061 revs = sorted([self.rev(n) for n in nodelist])
1062 1062
1063 1063 # if we don't have any revisions touched by these changesets, bail
1064 1064 if not revs:
1065 1065 yield bundler.close()
1066 1066 return
1067 1067
1068 1068 # add the parent of the first rev
1069 1069 p = self.parentrevs(revs[0])[0]
1070 1070 revs.insert(0, p)
1071 1071
1072 1072 # build deltas
1073 1073 for r in xrange(len(revs) - 1):
1074 1074 prev, curr = revs[r], revs[r + 1]
1075 1075 for c in bundler.revchunk(self, curr, prev):
1076 1076 yield c
1077 1077
1078 1078 yield bundler.close()
1079 1079
1080 1080 def addgroup(self, bundle, linkmapper, transaction):
1081 1081 """
1082 1082 add a delta group
1083 1083
1084 1084 given a set of deltas, add them to the revision log. the
1085 1085 first delta is against its parent, which should be in our
1086 1086 log, the rest are against the previous delta.
1087 1087 """
1088 1088
1089 1089 # track the base of the current delta log
1090 1090 node = None
1091 1091
1092 1092 r = len(self)
1093 1093 end = 0
1094 1094 if r:
1095 1095 end = self.end(r - 1)
1096 1096 ifh = self.opener(self.indexfile, "a+")
1097 1097 isize = r * self._io.size
1098 1098 if self._inline:
1099 1099 transaction.add(self.indexfile, end + isize, r)
1100 1100 dfh = None
1101 1101 else:
1102 1102 transaction.add(self.indexfile, isize, r)
1103 1103 transaction.add(self.datafile, end)
1104 1104 dfh = self.opener(self.datafile, "a")
1105 1105
1106 1106 try:
1107 1107 # loop through our set of deltas
1108 1108 chain = None
1109 1109 while 1:
1110 1110 chunkdata = bundle.deltachunk(chain)
1111 1111 if not chunkdata:
1112 1112 break
1113 1113 node = chunkdata['node']
1114 1114 p1 = chunkdata['p1']
1115 1115 p2 = chunkdata['p2']
1116 1116 cs = chunkdata['cs']
1117 1117 deltabase = chunkdata['deltabase']
1118 1118 delta = chunkdata['delta']
1119 1119
1120 1120 link = linkmapper(cs)
1121 1121 if node in self.nodemap:
1122 1122 # this can happen if two branches make the same change
1123 1123 chain = node
1124 1124 continue
1125 1125
1126 1126 for p in (p1, p2):
1127 1127 if not p in self.nodemap:
1128 1128 raise LookupError(p, self.indexfile,
1129 1129 _('unknown parent'))
1130 1130
1131 1131 if deltabase not in self.nodemap:
1132 1132 raise LookupError(deltabase, self.indexfile,
1133 1133 _('unknown delta base'))
1134 1134
1135 1135 baserev = self.rev(deltabase)
1136 1136 chain = self._addrevision(node, None, transaction, link,
1137 1137 p1, p2, (baserev, delta), ifh, dfh)
1138 1138 if not dfh and not self._inline:
1139 1139 # addrevision switched from inline to conventional
1140 1140 # reopen the index
1141 1141 ifh.close()
1142 1142 dfh = self.opener(self.datafile, "a")
1143 1143 ifh = self.opener(self.indexfile, "a")
1144 1144 finally:
1145 1145 if dfh:
1146 1146 dfh.close()
1147 1147 ifh.close()
1148 1148
1149 1149 return node
1150 1150
1151 1151 def strip(self, minlink, transaction):
1152 1152 """truncate the revlog on the first revision with a linkrev >= minlink
1153 1153
1154 1154 This function is called when we're stripping revision minlink and
1155 1155 its descendants from the repository.
1156 1156
1157 1157 We have to remove all revisions with linkrev >= minlink, because
1158 1158 the equivalent changelog revisions will be renumbered after the
1159 1159 strip.
1160 1160
1161 1161 So we truncate the revlog on the first of these revisions, and
1162 1162 trust that the caller has saved the revisions that shouldn't be
1163 1163 removed and that it'll readd them after this truncation.
1164 1164 """
1165 1165 if len(self) == 0:
1166 1166 return
1167 1167
1168 1168 for rev in self:
1169 1169 if self.index[rev][4] >= minlink:
1170 1170 break
1171 1171 else:
1172 1172 return
1173 1173
1174 1174 # first truncate the files on disk
1175 1175 end = self.start(rev)
1176 1176 if not self._inline:
1177 1177 transaction.add(self.datafile, end)
1178 1178 end = rev * self._io.size
1179 1179 else:
1180 1180 end += rev * self._io.size
1181 1181
1182 1182 transaction.add(self.indexfile, end)
1183 1183
1184 1184 # then reset internal state in memory to forget those revisions
1185 1185 self._cache = None
1186 1186 self._chunkclear()
1187 1187 for x in xrange(rev, len(self)):
1188 1188 del self.nodemap[self.node(x)]
1189 1189
1190 1190 del self.index[rev:-1]
1191 1191
1192 1192 def checksize(self):
1193 1193 expected = 0
1194 1194 if len(self):
1195 1195 expected = max(0, self.end(len(self) - 1))
1196 1196
1197 1197 try:
1198 1198 f = self.opener(self.datafile)
1199 1199 f.seek(0, 2)
1200 1200 actual = f.tell()
1201 1201 f.close()
1202 1202 dd = actual - expected
1203 1203 except IOError, inst:
1204 1204 if inst.errno != errno.ENOENT:
1205 1205 raise
1206 1206 dd = 0
1207 1207
1208 1208 try:
1209 1209 f = self.opener(self.indexfile)
1210 1210 f.seek(0, 2)
1211 1211 actual = f.tell()
1212 1212 f.close()
1213 1213 s = self._io.size
1214 1214 i = max(0, actual // s)
1215 1215 di = actual - (i * s)
1216 1216 if self._inline:
1217 1217 databytes = 0
1218 1218 for r in self:
1219 1219 databytes += max(0, self.length(r))
1220 1220 dd = 0
1221 1221 di = actual - len(self) * s - databytes
1222 1222 except IOError, inst:
1223 1223 if inst.errno != errno.ENOENT:
1224 1224 raise
1225 1225 di = 0
1226 1226
1227 1227 return (dd, di)
1228 1228
1229 1229 def files(self):
1230 1230 res = [self.indexfile]
1231 1231 if not self._inline:
1232 1232 res.append(self.datafile)
1233 1233 return res
General Comments 0
You need to be logged in to leave comments. Login now