##// END OF EJS Templates
revlog: use getattr instead of hasattr
Augie Fackler -
r14960:49781981 default
parent child Browse files
Show More
@@ -1,1278 +1,1279 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 return _decompress(bin)
116 116 if t == 'u':
117 117 return bin[1:]
118 118 raise RevlogError(_("unknown compression type %r") % t)
119 119
120 120 indexformatv0 = ">4l20s20s20s"
121 121 v0shaoffset = 56
122 122
123 123 class revlogoldio(object):
124 124 def __init__(self):
125 125 self.size = struct.calcsize(indexformatv0)
126 126
127 127 def parseindex(self, data, inline):
128 128 s = self.size
129 129 index = []
130 130 nodemap = {nullid: nullrev}
131 131 n = off = 0
132 132 l = len(data)
133 133 while off + s <= l:
134 134 cur = data[off:off + s]
135 135 off += s
136 136 e = _unpack(indexformatv0, cur)
137 137 # transform to revlogv1 format
138 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 140 index.append(e2)
141 141 nodemap[e[6]] = n
142 142 n += 1
143 143
144 144 # add the magic null revision at -1
145 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 146
147 147 return index, nodemap, None
148 148
149 149 def packentry(self, entry, node, version, rev):
150 150 if gettype(entry[0]):
151 151 raise RevlogError(_("index entry flags need RevlogNG"))
152 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 153 node(entry[5]), node(entry[6]), entry[7])
154 154 return _pack(indexformatv0, *e2)
155 155
156 156 # index ng:
157 157 # 6 bytes: offset
158 158 # 2 bytes: flags
159 159 # 4 bytes: compressed length
160 160 # 4 bytes: uncompressed length
161 161 # 4 bytes: base rev
162 162 # 4 bytes: link rev
163 163 # 4 bytes: parent 1 rev
164 164 # 4 bytes: parent 2 rev
165 165 # 32 bytes: nodeid
166 166 indexformatng = ">Qiiiiii20s12x"
167 167 ngshaoffset = 32
168 168 versionformat = ">I"
169 169
170 170 class revlogio(object):
171 171 def __init__(self):
172 172 self.size = struct.calcsize(indexformatng)
173 173
174 174 def parseindex(self, data, inline):
175 175 # call the C implementation to parse the index data
176 176 index, cache = parsers.parse_index2(data, inline)
177 177 return index, None, cache
178 178
179 179 def packentry(self, entry, node, version, rev):
180 180 p = _pack(indexformatng, *entry)
181 181 if rev == 0:
182 182 p = _pack(versionformat, version) + p[4:]
183 183 return p
184 184
185 185 class revlog(object):
186 186 """
187 187 the underlying revision storage object
188 188
189 189 A revlog consists of two parts, an index and the revision data.
190 190
191 191 The index is a file with a fixed record size containing
192 192 information on each revision, including its nodeid (hash), the
193 193 nodeids of its parents, the position and offset of its data within
194 194 the data file, and the revision it's based on. Finally, each entry
195 195 contains a linkrev entry that can serve as a pointer to external
196 196 data.
197 197
198 198 The revision data itself is a linear collection of data chunks.
199 199 Each chunk represents a revision and is usually represented as a
200 200 delta against the previous chunk. To bound lookup time, runs of
201 201 deltas are limited to about 2 times the length of the original
202 202 version data. This makes retrieval of a version proportional to
203 203 its size, or O(1) relative to the number of revisions.
204 204
205 205 Both pieces of the revlog are written to in an append-only
206 206 fashion, which means we never need to rewrite a file to insert or
207 207 remove data, and can use some simple techniques to avoid the need
208 208 for locking while reading.
209 209 """
210 210 def __init__(self, opener, indexfile):
211 211 """
212 212 create a revlog object
213 213
214 214 opener is a function that abstracts the file opening operation
215 215 and can be used to implement COW semantics or the like.
216 216 """
217 217 self.indexfile = indexfile
218 218 self.datafile = indexfile[:-2] + ".d"
219 219 self.opener = opener
220 220 self._cache = None
221 221 self._basecache = (0, 0)
222 222 self._chunkcache = (0, '')
223 223 self.index = []
224 224 self._pcache = {}
225 225 self._nodecache = {nullid: nullrev}
226 226 self._nodepos = None
227 227
228 228 v = REVLOG_DEFAULT_VERSION
229 if hasattr(opener, 'options'):
230 if 'revlogv1' in opener.options:
231 if 'generaldelta' in opener.options:
229 opts = getattr(opener, 'options', None)
230 if opts is not None:
231 if 'revlogv1' in opts:
232 if 'generaldelta' in opts:
232 233 v |= REVLOGGENERALDELTA
233 234 else:
234 235 v = 0
235 236
236 237 i = ''
237 238 self._initempty = True
238 239 try:
239 240 f = self.opener(self.indexfile)
240 241 i = f.read()
241 242 f.close()
242 243 if len(i) > 0:
243 244 v = struct.unpack(versionformat, i[:4])[0]
244 245 self._initempty = False
245 246 except IOError, inst:
246 247 if inst.errno != errno.ENOENT:
247 248 raise
248 249
249 250 self.version = v
250 251 self._inline = v & REVLOGNGINLINEDATA
251 252 self._generaldelta = v & REVLOGGENERALDELTA
252 253 flags = v & ~0xFFFF
253 254 fmt = v & 0xFFFF
254 255 if fmt == REVLOGV0 and flags:
255 256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
256 257 % (self.indexfile, flags >> 16))
257 258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
258 259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
259 260 % (self.indexfile, flags >> 16))
260 261 elif fmt > REVLOGNG:
261 262 raise RevlogError(_("index %s unknown format %d")
262 263 % (self.indexfile, fmt))
263 264
264 265 self._io = revlogio()
265 266 if self.version == REVLOGV0:
266 267 self._io = revlogoldio()
267 268 try:
268 269 d = self._io.parseindex(i, self._inline)
269 270 except (ValueError, IndexError):
270 271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
271 272 self.index, nodemap, self._chunkcache = d
272 273 if nodemap is not None:
273 274 self.nodemap = self._nodecache = nodemap
274 275 if not self._chunkcache:
275 276 self._chunkclear()
276 277
277 278 def tip(self):
278 279 return self.node(len(self.index) - 2)
279 280 def __len__(self):
280 281 return len(self.index) - 1
281 282 def __iter__(self):
282 283 for i in xrange(len(self)):
283 284 yield i
284 285
285 286 @util.propertycache
286 287 def nodemap(self):
287 288 self.rev(self.node(0))
288 289 return self._nodecache
289 290
290 291 def rev(self, node):
291 292 try:
292 293 return self._nodecache[node]
293 294 except KeyError:
294 295 n = self._nodecache
295 296 i = self.index
296 297 p = self._nodepos
297 298 if p is None:
298 299 p = len(i) - 2
299 300 for r in xrange(p, -1, -1):
300 301 v = i[r][7]
301 302 n[v] = r
302 303 if v == node:
303 304 self._nodepos = r - 1
304 305 return r
305 306 raise LookupError(node, self.indexfile, _('no node'))
306 307
307 308 def node(self, rev):
308 309 return self.index[rev][7]
309 310 def linkrev(self, rev):
310 311 return self.index[rev][4]
311 312 def parents(self, node):
312 313 i = self.index
313 314 d = i[self.rev(node)]
314 315 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
315 316 def parentrevs(self, rev):
316 317 return self.index[rev][5:7]
317 318 def start(self, rev):
318 319 return int(self.index[rev][0] >> 16)
319 320 def end(self, rev):
320 321 return self.start(rev) + self.length(rev)
321 322 def length(self, rev):
322 323 return self.index[rev][1]
323 324 def chainbase(self, rev):
324 325 index = self.index
325 326 base = index[rev][3]
326 327 while base != rev:
327 328 rev = base
328 329 base = index[rev][3]
329 330 return base
330 331 def flags(self, rev):
331 332 return self.index[rev][0] & 0xFFFF
332 333 def rawsize(self, rev):
333 334 """return the length of the uncompressed text for a given revision"""
334 335 l = self.index[rev][2]
335 336 if l >= 0:
336 337 return l
337 338
338 339 t = self.revision(self.node(rev))
339 340 return len(t)
340 341 size = rawsize
341 342
342 343 def reachable(self, node, stop=None):
343 344 """return the set of all nodes ancestral to a given node, including
344 345 the node itself, stopping when stop is matched"""
345 346 reachable = set((node,))
346 347 visit = [node]
347 348 if stop:
348 349 stopn = self.rev(stop)
349 350 else:
350 351 stopn = 0
351 352 while visit:
352 353 n = visit.pop(0)
353 354 if n == stop:
354 355 continue
355 356 if n == nullid:
356 357 continue
357 358 for p in self.parents(n):
358 359 if self.rev(p) < stopn:
359 360 continue
360 361 if p not in reachable:
361 362 reachable.add(p)
362 363 visit.append(p)
363 364 return reachable
364 365
365 366 def ancestors(self, *revs):
366 367 """Generate the ancestors of 'revs' in reverse topological order.
367 368
368 369 Yield a sequence of revision numbers starting with the parents
369 370 of each revision in revs, i.e., each revision is *not* considered
370 371 an ancestor of itself. Results are in breadth-first order:
371 372 parents of each rev in revs, then parents of those, etc. Result
372 373 does not include the null revision."""
373 374 visit = list(revs)
374 375 seen = set([nullrev])
375 376 while visit:
376 377 for parent in self.parentrevs(visit.pop(0)):
377 378 if parent not in seen:
378 379 visit.append(parent)
379 380 seen.add(parent)
380 381 yield parent
381 382
382 383 def descendants(self, *revs):
383 384 """Generate the descendants of 'revs' in revision order.
384 385
385 386 Yield a sequence of revision numbers starting with a child of
386 387 some rev in revs, i.e., each revision is *not* considered a
387 388 descendant of itself. Results are ordered by revision number (a
388 389 topological sort)."""
389 390 first = min(revs)
390 391 if first == nullrev:
391 392 for i in self:
392 393 yield i
393 394 return
394 395
395 396 seen = set(revs)
396 397 for i in xrange(first + 1, len(self)):
397 398 for x in self.parentrevs(i):
398 399 if x != nullrev and x in seen:
399 400 seen.add(i)
400 401 yield i
401 402 break
402 403
403 404 def findcommonmissing(self, common=None, heads=None):
404 405 """Return a tuple of the ancestors of common and the ancestors of heads
405 406 that are not ancestors of common.
406 407
407 408 More specifically, the second element is a list of nodes N such that
408 409 every N satisfies the following constraints:
409 410
410 411 1. N is an ancestor of some node in 'heads'
411 412 2. N is not an ancestor of any node in 'common'
412 413
413 414 The list is sorted by revision number, meaning it is
414 415 topologically sorted.
415 416
416 417 'heads' and 'common' are both lists of node IDs. If heads is
417 418 not supplied, uses all of the revlog's heads. If common is not
418 419 supplied, uses nullid."""
419 420 if common is None:
420 421 common = [nullid]
421 422 if heads is None:
422 423 heads = self.heads()
423 424
424 425 common = [self.rev(n) for n in common]
425 426 heads = [self.rev(n) for n in heads]
426 427
427 428 # we want the ancestors, but inclusive
428 429 has = set(self.ancestors(*common))
429 430 has.add(nullrev)
430 431 has.update(common)
431 432
432 433 # take all ancestors from heads that aren't in has
433 434 missing = set()
434 435 visit = [r for r in heads if r not in has]
435 436 while visit:
436 437 r = visit.pop(0)
437 438 if r in missing:
438 439 continue
439 440 else:
440 441 missing.add(r)
441 442 for p in self.parentrevs(r):
442 443 if p not in has:
443 444 visit.append(p)
444 445 missing = list(missing)
445 446 missing.sort()
446 447 return has, [self.node(r) for r in missing]
447 448
448 449 def findmissing(self, common=None, heads=None):
449 450 """Return the ancestors of heads that are not ancestors of common.
450 451
451 452 More specifically, return a list of nodes N such that every N
452 453 satisfies the following constraints:
453 454
454 455 1. N is an ancestor of some node in 'heads'
455 456 2. N is not an ancestor of any node in 'common'
456 457
457 458 The list is sorted by revision number, meaning it is
458 459 topologically sorted.
459 460
460 461 'heads' and 'common' are both lists of node IDs. If heads is
461 462 not supplied, uses all of the revlog's heads. If common is not
462 463 supplied, uses nullid."""
463 464 _common, missing = self.findcommonmissing(common, heads)
464 465 return missing
465 466
466 467 def nodesbetween(self, roots=None, heads=None):
467 468 """Return a topological path from 'roots' to 'heads'.
468 469
469 470 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
470 471 topologically sorted list of all nodes N that satisfy both of
471 472 these constraints:
472 473
473 474 1. N is a descendant of some node in 'roots'
474 475 2. N is an ancestor of some node in 'heads'
475 476
476 477 Every node is considered to be both a descendant and an ancestor
477 478 of itself, so every reachable node in 'roots' and 'heads' will be
478 479 included in 'nodes'.
479 480
480 481 'outroots' is the list of reachable nodes in 'roots', i.e., the
481 482 subset of 'roots' that is returned in 'nodes'. Likewise,
482 483 'outheads' is the subset of 'heads' that is also in 'nodes'.
483 484
484 485 'roots' and 'heads' are both lists of node IDs. If 'roots' is
485 486 unspecified, uses nullid as the only root. If 'heads' is
486 487 unspecified, uses list of all of the revlog's heads."""
487 488 nonodes = ([], [], [])
488 489 if roots is not None:
489 490 roots = list(roots)
490 491 if not roots:
491 492 return nonodes
492 493 lowestrev = min([self.rev(n) for n in roots])
493 494 else:
494 495 roots = [nullid] # Everybody's a descendant of nullid
495 496 lowestrev = nullrev
496 497 if (lowestrev == nullrev) and (heads is None):
497 498 # We want _all_ the nodes!
498 499 return ([self.node(r) for r in self], [nullid], list(self.heads()))
499 500 if heads is None:
500 501 # All nodes are ancestors, so the latest ancestor is the last
501 502 # node.
502 503 highestrev = len(self) - 1
503 504 # Set ancestors to None to signal that every node is an ancestor.
504 505 ancestors = None
505 506 # Set heads to an empty dictionary for later discovery of heads
506 507 heads = {}
507 508 else:
508 509 heads = list(heads)
509 510 if not heads:
510 511 return nonodes
511 512 ancestors = set()
512 513 # Turn heads into a dictionary so we can remove 'fake' heads.
513 514 # Also, later we will be using it to filter out the heads we can't
514 515 # find from roots.
515 516 heads = dict.fromkeys(heads, False)
516 517 # Start at the top and keep marking parents until we're done.
517 518 nodestotag = set(heads)
518 519 # Remember where the top was so we can use it as a limit later.
519 520 highestrev = max([self.rev(n) for n in nodestotag])
520 521 while nodestotag:
521 522 # grab a node to tag
522 523 n = nodestotag.pop()
523 524 # Never tag nullid
524 525 if n == nullid:
525 526 continue
526 527 # A node's revision number represents its place in a
527 528 # topologically sorted list of nodes.
528 529 r = self.rev(n)
529 530 if r >= lowestrev:
530 531 if n not in ancestors:
531 532 # If we are possibly a descendant of one of the roots
532 533 # and we haven't already been marked as an ancestor
533 534 ancestors.add(n) # Mark as ancestor
534 535 # Add non-nullid parents to list of nodes to tag.
535 536 nodestotag.update([p for p in self.parents(n) if
536 537 p != nullid])
537 538 elif n in heads: # We've seen it before, is it a fake head?
538 539 # So it is, real heads should not be the ancestors of
539 540 # any other heads.
540 541 heads.pop(n)
541 542 if not ancestors:
542 543 return nonodes
543 544 # Now that we have our set of ancestors, we want to remove any
544 545 # roots that are not ancestors.
545 546
546 547 # If one of the roots was nullid, everything is included anyway.
547 548 if lowestrev > nullrev:
548 549 # But, since we weren't, let's recompute the lowest rev to not
549 550 # include roots that aren't ancestors.
550 551
551 552 # Filter out roots that aren't ancestors of heads
552 553 roots = [n for n in roots if n in ancestors]
553 554 # Recompute the lowest revision
554 555 if roots:
555 556 lowestrev = min([self.rev(n) for n in roots])
556 557 else:
557 558 # No more roots? Return empty list
558 559 return nonodes
559 560 else:
560 561 # We are descending from nullid, and don't need to care about
561 562 # any other roots.
562 563 lowestrev = nullrev
563 564 roots = [nullid]
564 565 # Transform our roots list into a set.
565 566 descendants = set(roots)
566 567 # Also, keep the original roots so we can filter out roots that aren't
567 568 # 'real' roots (i.e. are descended from other roots).
568 569 roots = descendants.copy()
569 570 # Our topologically sorted list of output nodes.
570 571 orderedout = []
571 572 # Don't start at nullid since we don't want nullid in our output list,
572 573 # and if nullid shows up in descedents, empty parents will look like
573 574 # they're descendants.
574 575 for r in xrange(max(lowestrev, 0), highestrev + 1):
575 576 n = self.node(r)
576 577 isdescendant = False
577 578 if lowestrev == nullrev: # Everybody is a descendant of nullid
578 579 isdescendant = True
579 580 elif n in descendants:
580 581 # n is already a descendant
581 582 isdescendant = True
582 583 # This check only needs to be done here because all the roots
583 584 # will start being marked is descendants before the loop.
584 585 if n in roots:
585 586 # If n was a root, check if it's a 'real' root.
586 587 p = tuple(self.parents(n))
587 588 # If any of its parents are descendants, it's not a root.
588 589 if (p[0] in descendants) or (p[1] in descendants):
589 590 roots.remove(n)
590 591 else:
591 592 p = tuple(self.parents(n))
592 593 # A node is a descendant if either of its parents are
593 594 # descendants. (We seeded the dependents list with the roots
594 595 # up there, remember?)
595 596 if (p[0] in descendants) or (p[1] in descendants):
596 597 descendants.add(n)
597 598 isdescendant = True
598 599 if isdescendant and ((ancestors is None) or (n in ancestors)):
599 600 # Only include nodes that are both descendants and ancestors.
600 601 orderedout.append(n)
601 602 if (ancestors is not None) and (n in heads):
602 603 # We're trying to figure out which heads are reachable
603 604 # from roots.
604 605 # Mark this head as having been reached
605 606 heads[n] = True
606 607 elif ancestors is None:
607 608 # Otherwise, we're trying to discover the heads.
608 609 # Assume this is a head because if it isn't, the next step
609 610 # will eventually remove it.
610 611 heads[n] = True
611 612 # But, obviously its parents aren't.
612 613 for p in self.parents(n):
613 614 heads.pop(p, None)
614 615 heads = [n for n, flag in heads.iteritems() if flag]
615 616 roots = list(roots)
616 617 assert orderedout
617 618 assert roots
618 619 assert heads
619 620 return (orderedout, roots, heads)
620 621
621 622 def headrevs(self):
622 623 count = len(self)
623 624 if not count:
624 625 return [nullrev]
625 626 ishead = [1] * (count + 1)
626 627 index = self.index
627 628 for r in xrange(count):
628 629 e = index[r]
629 630 ishead[e[5]] = ishead[e[6]] = 0
630 631 return [r for r in xrange(count) if ishead[r]]
631 632
632 633 def heads(self, start=None, stop=None):
633 634 """return the list of all nodes that have no children
634 635
635 636 if start is specified, only heads that are descendants of
636 637 start will be returned
637 638 if stop is specified, it will consider all the revs from stop
638 639 as if they had no children
639 640 """
640 641 if start is None and stop is None:
641 642 if not len(self):
642 643 return [nullid]
643 644 return [self.node(r) for r in self.headrevs()]
644 645
645 646 if start is None:
646 647 start = nullid
647 648 if stop is None:
648 649 stop = []
649 650 stoprevs = set([self.rev(n) for n in stop])
650 651 startrev = self.rev(start)
651 652 reachable = set((startrev,))
652 653 heads = set((startrev,))
653 654
654 655 parentrevs = self.parentrevs
655 656 for r in xrange(startrev + 1, len(self)):
656 657 for p in parentrevs(r):
657 658 if p in reachable:
658 659 if r not in stoprevs:
659 660 reachable.add(r)
660 661 heads.add(r)
661 662 if p in heads and p not in stoprevs:
662 663 heads.remove(p)
663 664
664 665 return [self.node(r) for r in heads]
665 666
666 667 def children(self, node):
667 668 """find the children of a given node"""
668 669 c = []
669 670 p = self.rev(node)
670 671 for r in range(p + 1, len(self)):
671 672 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
672 673 if prevs:
673 674 for pr in prevs:
674 675 if pr == p:
675 676 c.append(self.node(r))
676 677 elif p == nullrev:
677 678 c.append(self.node(r))
678 679 return c
679 680
680 681 def descendant(self, start, end):
681 682 if start == nullrev:
682 683 return True
683 684 for i in self.descendants(start):
684 685 if i == end:
685 686 return True
686 687 elif i > end:
687 688 break
688 689 return False
689 690
690 691 def ancestor(self, a, b):
691 692 """calculate the least common ancestor of nodes a and b"""
692 693
693 694 # fast path, check if it is a descendant
694 695 a, b = self.rev(a), self.rev(b)
695 696 start, end = sorted((a, b))
696 697 if self.descendant(start, end):
697 698 return self.node(start)
698 699
699 700 def parents(rev):
700 701 return [p for p in self.parentrevs(rev) if p != nullrev]
701 702
702 703 c = ancestor.ancestor(a, b, parents)
703 704 if c is None:
704 705 return nullid
705 706
706 707 return self.node(c)
707 708
708 709 def _match(self, id):
709 710 if isinstance(id, (long, int)):
710 711 # rev
711 712 return self.node(id)
712 713 if len(id) == 20:
713 714 # possibly a binary node
714 715 # odds of a binary node being all hex in ASCII are 1 in 10**25
715 716 try:
716 717 node = id
717 718 self.rev(node) # quick search the index
718 719 return node
719 720 except LookupError:
720 721 pass # may be partial hex id
721 722 try:
722 723 # str(rev)
723 724 rev = int(id)
724 725 if str(rev) != id:
725 726 raise ValueError
726 727 if rev < 0:
727 728 rev = len(self) + rev
728 729 if rev < 0 or rev >= len(self):
729 730 raise ValueError
730 731 return self.node(rev)
731 732 except (ValueError, OverflowError):
732 733 pass
733 734 if len(id) == 40:
734 735 try:
735 736 # a full hex nodeid?
736 737 node = bin(id)
737 738 self.rev(node)
738 739 return node
739 740 except (TypeError, LookupError):
740 741 pass
741 742
742 743 def _partialmatch(self, id):
743 744 if id in self._pcache:
744 745 return self._pcache[id]
745 746
746 747 if len(id) < 40:
747 748 try:
748 749 # hex(node)[:...]
749 750 l = len(id) // 2 # grab an even number of digits
750 751 prefix = bin(id[:l * 2])
751 752 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
752 753 nl = [n for n in nl if hex(n).startswith(id)]
753 754 if len(nl) > 0:
754 755 if len(nl) == 1:
755 756 self._pcache[id] = nl[0]
756 757 return nl[0]
757 758 raise LookupError(id, self.indexfile,
758 759 _('ambiguous identifier'))
759 760 return None
760 761 except TypeError:
761 762 pass
762 763
763 764 def lookup(self, id):
764 765 """locate a node based on:
765 766 - revision number or str(revision number)
766 767 - nodeid or subset of hex nodeid
767 768 """
768 769 n = self._match(id)
769 770 if n is not None:
770 771 return n
771 772 n = self._partialmatch(id)
772 773 if n:
773 774 return n
774 775
775 776 raise LookupError(id, self.indexfile, _('no match found'))
776 777
777 778 def cmp(self, node, text):
778 779 """compare text with a given file revision
779 780
780 781 returns True if text is different than what is stored.
781 782 """
782 783 p1, p2 = self.parents(node)
783 784 return hash(text, p1, p2) != node
784 785
785 786 def _addchunk(self, offset, data):
786 787 o, d = self._chunkcache
787 788 # try to add to existing cache
788 789 if o + len(d) == offset and len(d) + len(data) < _chunksize:
789 790 self._chunkcache = o, d + data
790 791 else:
791 792 self._chunkcache = offset, data
792 793
793 794 def _loadchunk(self, offset, length):
794 795 if self._inline:
795 796 df = self.opener(self.indexfile)
796 797 else:
797 798 df = self.opener(self.datafile)
798 799
799 800 readahead = max(65536, length)
800 801 df.seek(offset)
801 802 d = df.read(readahead)
802 803 self._addchunk(offset, d)
803 804 if readahead > length:
804 805 return d[:length]
805 806 return d
806 807
807 808 def _getchunk(self, offset, length):
808 809 o, d = self._chunkcache
809 810 l = len(d)
810 811
811 812 # is it in the cache?
812 813 cachestart = offset - o
813 814 cacheend = cachestart + length
814 815 if cachestart >= 0 and cacheend <= l:
815 816 if cachestart == 0 and cacheend == l:
816 817 return d # avoid a copy
817 818 return d[cachestart:cacheend]
818 819
819 820 return self._loadchunk(offset, length)
820 821
821 822 def _chunkraw(self, startrev, endrev):
822 823 start = self.start(startrev)
823 824 length = self.end(endrev) - start
824 825 if self._inline:
825 826 start += (startrev + 1) * self._io.size
826 827 return self._getchunk(start, length)
827 828
828 829 def _chunk(self, rev):
829 830 return decompress(self._chunkraw(rev, rev))
830 831
831 832 def _chunkbase(self, rev):
832 833 return self._chunk(rev)
833 834
834 835 def _chunkclear(self):
835 836 self._chunkcache = (0, '')
836 837
837 838 def deltaparent(self, rev):
838 839 """return deltaparent of the given revision"""
839 840 base = self.index[rev][3]
840 841 if base == rev:
841 842 return nullrev
842 843 elif self._generaldelta:
843 844 return base
844 845 else:
845 846 return rev - 1
846 847
847 848 def revdiff(self, rev1, rev2):
848 849 """return or calculate a delta between two revisions"""
849 850 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
850 851 return self._chunk(rev2)
851 852
852 853 return mdiff.textdiff(self.revision(self.node(rev1)),
853 854 self.revision(self.node(rev2)))
854 855
855 856 def revision(self, node):
856 857 """return an uncompressed revision of a given node"""
857 858 cachedrev = None
858 859 if node == nullid:
859 860 return ""
860 861 if self._cache:
861 862 if self._cache[0] == node:
862 863 return self._cache[2]
863 864 cachedrev = self._cache[1]
864 865
865 866 # look up what we need to read
866 867 text = None
867 868 rev = self.rev(node)
868 869
869 870 # check rev flags
870 871 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
871 872 raise RevlogError(_('incompatible revision flag %x') %
872 873 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
873 874
874 875 # build delta chain
875 876 chain = []
876 877 index = self.index # for performance
877 878 generaldelta = self._generaldelta
878 879 iterrev = rev
879 880 e = index[iterrev]
880 881 while iterrev != e[3] and iterrev != cachedrev:
881 882 chain.append(iterrev)
882 883 if generaldelta:
883 884 iterrev = e[3]
884 885 else:
885 886 iterrev -= 1
886 887 e = index[iterrev]
887 888 chain.reverse()
888 889 base = iterrev
889 890
890 891 if iterrev == cachedrev:
891 892 # cache hit
892 893 text = self._cache[2]
893 894
894 895 # drop cache to save memory
895 896 self._cache = None
896 897
897 898 self._chunkraw(base, rev)
898 899 if text is None:
899 900 text = self._chunkbase(base)
900 901
901 902 bins = [self._chunk(r) for r in chain]
902 903 text = mdiff.patches(text, bins)
903 904
904 905 text = self._checkhash(text, node, rev)
905 906
906 907 self._cache = (node, rev, text)
907 908 return text
908 909
909 910 def _checkhash(self, text, node, rev):
910 911 p1, p2 = self.parents(node)
911 912 if node != hash(text, p1, p2):
912 913 raise RevlogError(_("integrity check failed on %s:%d")
913 914 % (self.indexfile, rev))
914 915 return text
915 916
916 917 def checkinlinesize(self, tr, fp=None):
917 918 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
918 919 return
919 920
920 921 trinfo = tr.find(self.indexfile)
921 922 if trinfo is None:
922 923 raise RevlogError(_("%s not found in the transaction")
923 924 % self.indexfile)
924 925
925 926 trindex = trinfo[2]
926 927 dataoff = self.start(trindex)
927 928
928 929 tr.add(self.datafile, dataoff)
929 930
930 931 if fp:
931 932 fp.flush()
932 933 fp.close()
933 934
934 935 df = self.opener(self.datafile, 'w')
935 936 try:
936 937 for r in self:
937 938 df.write(self._chunkraw(r, r))
938 939 finally:
939 940 df.close()
940 941
941 942 fp = self.opener(self.indexfile, 'w', atomictemp=True)
942 943 self.version &= ~(REVLOGNGINLINEDATA)
943 944 self._inline = False
944 945 for i in self:
945 946 e = self._io.packentry(self.index[i], self.node, self.version, i)
946 947 fp.write(e)
947 948
948 949 # if we don't call rename, the temp file will never replace the
949 950 # real index
950 951 fp.rename()
951 952
952 953 tr.replace(self.indexfile, trindex * self._io.size)
953 954 self._chunkclear()
954 955
955 956 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
956 957 """add a revision to the log
957 958
958 959 text - the revision data to add
959 960 transaction - the transaction object used for rollback
960 961 link - the linkrev data to add
961 962 p1, p2 - the parent nodeids of the revision
962 963 cachedelta - an optional precomputed delta
963 964 """
964 965 node = hash(text, p1, p2)
965 966 if node in self.nodemap:
966 967 return node
967 968
968 969 dfh = None
969 970 if not self._inline:
970 971 dfh = self.opener(self.datafile, "a")
971 972 ifh = self.opener(self.indexfile, "a+")
972 973 try:
973 974 return self._addrevision(node, text, transaction, link, p1, p2,
974 975 cachedelta, ifh, dfh)
975 976 finally:
976 977 if dfh:
977 978 dfh.close()
978 979 ifh.close()
979 980
980 981 def _addrevision(self, node, text, transaction, link, p1, p2,
981 982 cachedelta, ifh, dfh):
982 983 """internal function to add revisions to the log
983 984
984 985 see addrevision for argument descriptions.
985 986 invariants:
986 987 - text is optional (can be None); if not set, cachedelta must be set.
987 988 if both are set, they must correspond to eachother.
988 989 """
989 990 btext = [text]
990 991 def buildtext():
991 992 if btext[0] is not None:
992 993 return btext[0]
993 994 # flush any pending writes here so we can read it in revision
994 995 if dfh:
995 996 dfh.flush()
996 997 ifh.flush()
997 998 basetext = self.revision(self.node(cachedelta[0]))
998 999 btext[0] = mdiff.patch(basetext, cachedelta[1])
999 1000 chk = hash(btext[0], p1, p2)
1000 1001 if chk != node:
1001 1002 raise RevlogError(_("consistency error in delta"))
1002 1003 return btext[0]
1003 1004
1004 1005 def builddelta(rev):
1005 1006 # can we use the cached delta?
1006 1007 if cachedelta and cachedelta[0] == rev:
1007 1008 delta = cachedelta[1]
1008 1009 else:
1009 1010 t = buildtext()
1010 1011 ptext = self.revision(self.node(rev))
1011 1012 delta = mdiff.textdiff(ptext, t)
1012 1013 data = compress(delta)
1013 1014 l = len(data[1]) + len(data[0])
1014 1015 if basecache[0] == rev:
1015 1016 chainbase = basecache[1]
1016 1017 else:
1017 1018 chainbase = self.chainbase(rev)
1018 1019 dist = l + offset - self.start(chainbase)
1019 1020 if self._generaldelta:
1020 1021 base = rev
1021 1022 else:
1022 1023 base = chainbase
1023 1024 return dist, l, data, base, chainbase
1024 1025
1025 1026 curr = len(self)
1026 1027 prev = curr - 1
1027 1028 base = chainbase = curr
1028 1029 offset = self.end(prev)
1029 1030 flags = 0
1030 1031 d = None
1031 1032 basecache = self._basecache
1032 1033 p1r, p2r = self.rev(p1), self.rev(p2)
1033 1034
1034 1035 # should we try to build a delta?
1035 1036 if prev != nullrev:
1036 1037 if self._generaldelta:
1037 1038 if p1r >= basecache[1]:
1038 1039 d = builddelta(p1r)
1039 1040 elif p2r >= basecache[1]:
1040 1041 d = builddelta(p2r)
1041 1042 else:
1042 1043 d = builddelta(prev)
1043 1044 else:
1044 1045 d = builddelta(prev)
1045 1046 dist, l, data, base, chainbase = d
1046 1047
1047 1048 # full versions are inserted when the needed deltas
1048 1049 # become comparable to the uncompressed text
1049 1050 if text is None:
1050 1051 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1051 1052 cachedelta[1])
1052 1053 else:
1053 1054 textlen = len(text)
1054 1055 if d is None or dist > textlen * 2:
1055 1056 text = buildtext()
1056 1057 data = compress(text)
1057 1058 l = len(data[1]) + len(data[0])
1058 1059 base = chainbase = curr
1059 1060
1060 1061 e = (offset_type(offset, flags), l, textlen,
1061 1062 base, link, p1r, p2r, node)
1062 1063 self.index.insert(-1, e)
1063 1064 self.nodemap[node] = curr
1064 1065
1065 1066 entry = self._io.packentry(e, self.node, self.version, curr)
1066 1067 if not self._inline:
1067 1068 transaction.add(self.datafile, offset)
1068 1069 transaction.add(self.indexfile, curr * len(entry))
1069 1070 if data[0]:
1070 1071 dfh.write(data[0])
1071 1072 dfh.write(data[1])
1072 1073 dfh.flush()
1073 1074 ifh.write(entry)
1074 1075 else:
1075 1076 offset += curr * self._io.size
1076 1077 transaction.add(self.indexfile, offset, curr)
1077 1078 ifh.write(entry)
1078 1079 ifh.write(data[0])
1079 1080 ifh.write(data[1])
1080 1081 self.checkinlinesize(transaction, ifh)
1081 1082
1082 1083 if type(text) == str: # only accept immutable objects
1083 1084 self._cache = (node, curr, text)
1084 1085 self._basecache = (curr, chainbase)
1085 1086 return node
1086 1087
1087 1088 def group(self, nodelist, bundler, reorder=None):
1088 1089 """Calculate a delta group, yielding a sequence of changegroup chunks
1089 1090 (strings).
1090 1091
1091 1092 Given a list of changeset revs, return a set of deltas and
1092 1093 metadata corresponding to nodes. The first delta is
1093 1094 first parent(nodelist[0]) -> nodelist[0], the receiver is
1094 1095 guaranteed to have this parent as it has all history before
1095 1096 these changesets. In the case firstparent is nullrev the
1096 1097 changegroup starts with a full revision.
1097 1098 """
1098 1099
1099 1100 # if we don't have any revisions touched by these changesets, bail
1100 1101 if len(nodelist) == 0:
1101 1102 yield bundler.close()
1102 1103 return
1103 1104
1104 1105 # for generaldelta revlogs, we linearize the revs; this will both be
1105 1106 # much quicker and generate a much smaller bundle
1106 1107 if (self._generaldelta and reorder is not False) or reorder:
1107 1108 dag = dagutil.revlogdag(self)
1108 1109 revs = set(self.rev(n) for n in nodelist)
1109 1110 revs = dag.linearize(revs)
1110 1111 else:
1111 1112 revs = sorted([self.rev(n) for n in nodelist])
1112 1113
1113 1114 # add the parent of the first rev
1114 1115 p = self.parentrevs(revs[0])[0]
1115 1116 revs.insert(0, p)
1116 1117
1117 1118 # build deltas
1118 1119 for r in xrange(len(revs) - 1):
1119 1120 prev, curr = revs[r], revs[r + 1]
1120 1121 for c in bundler.revchunk(self, curr, prev):
1121 1122 yield c
1122 1123
1123 1124 yield bundler.close()
1124 1125
1125 1126 def addgroup(self, bundle, linkmapper, transaction):
1126 1127 """
1127 1128 add a delta group
1128 1129
1129 1130 given a set of deltas, add them to the revision log. the
1130 1131 first delta is against its parent, which should be in our
1131 1132 log, the rest are against the previous delta.
1132 1133 """
1133 1134
1134 1135 # track the base of the current delta log
1135 1136 node = None
1136 1137
1137 1138 r = len(self)
1138 1139 end = 0
1139 1140 if r:
1140 1141 end = self.end(r - 1)
1141 1142 ifh = self.opener(self.indexfile, "a+")
1142 1143 isize = r * self._io.size
1143 1144 if self._inline:
1144 1145 transaction.add(self.indexfile, end + isize, r)
1145 1146 dfh = None
1146 1147 else:
1147 1148 transaction.add(self.indexfile, isize, r)
1148 1149 transaction.add(self.datafile, end)
1149 1150 dfh = self.opener(self.datafile, "a")
1150 1151
1151 1152 try:
1152 1153 # loop through our set of deltas
1153 1154 chain = None
1154 1155 while True:
1155 1156 chunkdata = bundle.deltachunk(chain)
1156 1157 if not chunkdata:
1157 1158 break
1158 1159 node = chunkdata['node']
1159 1160 p1 = chunkdata['p1']
1160 1161 p2 = chunkdata['p2']
1161 1162 cs = chunkdata['cs']
1162 1163 deltabase = chunkdata['deltabase']
1163 1164 delta = chunkdata['delta']
1164 1165
1165 1166 link = linkmapper(cs)
1166 1167 if node in self.nodemap:
1167 1168 # this can happen if two branches make the same change
1168 1169 chain = node
1169 1170 continue
1170 1171
1171 1172 for p in (p1, p2):
1172 1173 if not p in self.nodemap:
1173 1174 raise LookupError(p, self.indexfile,
1174 1175 _('unknown parent'))
1175 1176
1176 1177 if deltabase not in self.nodemap:
1177 1178 raise LookupError(deltabase, self.indexfile,
1178 1179 _('unknown delta base'))
1179 1180
1180 1181 baserev = self.rev(deltabase)
1181 1182 chain = self._addrevision(node, None, transaction, link,
1182 1183 p1, p2, (baserev, delta), ifh, dfh)
1183 1184 if not dfh and not self._inline:
1184 1185 # addrevision switched from inline to conventional
1185 1186 # reopen the index
1186 1187 ifh.close()
1187 1188 dfh = self.opener(self.datafile, "a")
1188 1189 ifh = self.opener(self.indexfile, "a")
1189 1190 finally:
1190 1191 if dfh:
1191 1192 dfh.close()
1192 1193 ifh.close()
1193 1194
1194 1195 return node
1195 1196
1196 1197 def strip(self, minlink, transaction):
1197 1198 """truncate the revlog on the first revision with a linkrev >= minlink
1198 1199
1199 1200 This function is called when we're stripping revision minlink and
1200 1201 its descendants from the repository.
1201 1202
1202 1203 We have to remove all revisions with linkrev >= minlink, because
1203 1204 the equivalent changelog revisions will be renumbered after the
1204 1205 strip.
1205 1206
1206 1207 So we truncate the revlog on the first of these revisions, and
1207 1208 trust that the caller has saved the revisions that shouldn't be
1208 1209 removed and that it'll readd them after this truncation.
1209 1210 """
1210 1211 if len(self) == 0:
1211 1212 return
1212 1213
1213 1214 for rev in self:
1214 1215 if self.index[rev][4] >= minlink:
1215 1216 break
1216 1217 else:
1217 1218 return
1218 1219
1219 1220 # first truncate the files on disk
1220 1221 end = self.start(rev)
1221 1222 if not self._inline:
1222 1223 transaction.add(self.datafile, end)
1223 1224 end = rev * self._io.size
1224 1225 else:
1225 1226 end += rev * self._io.size
1226 1227
1227 1228 transaction.add(self.indexfile, end)
1228 1229
1229 1230 # then reset internal state in memory to forget those revisions
1230 1231 self._cache = None
1231 1232 self._chunkclear()
1232 1233 for x in xrange(rev, len(self)):
1233 1234 del self.nodemap[self.node(x)]
1234 1235
1235 1236 del self.index[rev:-1]
1236 1237
1237 1238 def checksize(self):
1238 1239 expected = 0
1239 1240 if len(self):
1240 1241 expected = max(0, self.end(len(self) - 1))
1241 1242
1242 1243 try:
1243 1244 f = self.opener(self.datafile)
1244 1245 f.seek(0, 2)
1245 1246 actual = f.tell()
1246 1247 f.close()
1247 1248 dd = actual - expected
1248 1249 except IOError, inst:
1249 1250 if inst.errno != errno.ENOENT:
1250 1251 raise
1251 1252 dd = 0
1252 1253
1253 1254 try:
1254 1255 f = self.opener(self.indexfile)
1255 1256 f.seek(0, 2)
1256 1257 actual = f.tell()
1257 1258 f.close()
1258 1259 s = self._io.size
1259 1260 i = max(0, actual // s)
1260 1261 di = actual - (i * s)
1261 1262 if self._inline:
1262 1263 databytes = 0
1263 1264 for r in self:
1264 1265 databytes += max(0, self.length(r))
1265 1266 dd = 0
1266 1267 di = actual - len(self) * s - databytes
1267 1268 except IOError, inst:
1268 1269 if inst.errno != errno.ENOENT:
1269 1270 raise
1270 1271 di = 0
1271 1272
1272 1273 return (dd, di)
1273 1274
1274 1275 def files(self):
1275 1276 res = [self.indexfile]
1276 1277 if not self._inline:
1277 1278 res.append(self.datafile)
1278 1279 return res
General Comments 0
You need to be logged in to leave comments. Login now