##// END OF EJS Templates
revlog: introduce a cache for partial lookups...
Matt Mackall -
r13258:c2661863 default
parent child Browse files
Show More
@@ -1,1244 +1,1249 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47
48 48 def getoffset(q):
49 49 return int(q >> 16)
50 50
51 51 def gettype(q):
52 52 return int(q & 0xFFFF)
53 53
54 54 def offset_type(offset, type):
55 55 return long(long(offset) << 16 | type)
56 56
57 57 nullhash = _sha(nullid)
58 58
59 59 def hash(text, p1, p2):
60 60 """generate a hash from the given text and its parent hashes
61 61
62 62 This hash combines both the current file contents and its history
63 63 in a manner that makes it easy to distinguish nodes with the same
64 64 content in the revision graph.
65 65 """
66 66 # As of now, if one of the parent node is null, p2 is null
67 67 if p2 == nullid:
68 68 # deep copy of a hash is faster than creating one
69 69 s = nullhash.copy()
70 70 s.update(p1)
71 71 else:
72 72 # none of the parent nodes are nullid
73 73 l = [p1, p2]
74 74 l.sort()
75 75 s = _sha(l[0])
76 76 s.update(l[1])
77 77 s.update(text)
78 78 return s.digest()
79 79
80 80 def compress(text):
81 81 """ generate a possibly-compressed representation of text """
82 82 if not text:
83 83 return ("", text)
84 84 l = len(text)
85 85 bin = None
86 86 if l < 44:
87 87 pass
88 88 elif l > 1000000:
89 89 # zlib makes an internal copy, thus doubling memory usage for
90 90 # large files, so lets do this in pieces
91 91 z = zlib.compressobj()
92 92 p = []
93 93 pos = 0
94 94 while pos < l:
95 95 pos2 = pos + 2**20
96 96 p.append(z.compress(text[pos:pos2]))
97 97 pos = pos2
98 98 p.append(z.flush())
99 99 if sum(map(len, p)) < l:
100 100 bin = "".join(p)
101 101 else:
102 102 bin = _compress(text)
103 103 if bin is None or len(bin) > l:
104 104 if text[0] == '\0':
105 105 return ("", text)
106 106 return ('u', text)
107 107 return ("", bin)
108 108
109 109 def decompress(bin):
110 110 """ decompress the given input """
111 111 if not bin:
112 112 return bin
113 113 t = bin[0]
114 114 if t == '\0':
115 115 return bin
116 116 if t == 'x':
117 117 return _decompress(bin)
118 118 if t == 'u':
119 119 return bin[1:]
120 120 raise RevlogError(_("unknown compression type %r") % t)
121 121
122 122 indexformatv0 = ">4l20s20s20s"
123 123 v0shaoffset = 56
124 124
125 125 class revlogoldio(object):
126 126 def __init__(self):
127 127 self.size = struct.calcsize(indexformatv0)
128 128
129 129 def parseindex(self, fp, data, inline):
130 130 s = self.size
131 131 index = []
132 132 nodemap = {nullid: nullrev}
133 133 n = off = 0
134 134 l = len(data)
135 135 while off + s <= l:
136 136 cur = data[off:off + s]
137 137 off += s
138 138 e = _unpack(indexformatv0, cur)
139 139 # transform to revlogv1 format
140 140 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 141 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 142 index.append(e2)
143 143 nodemap[e[6]] = n
144 144 n += 1
145 145
146 146 return index, nodemap, None
147 147
148 148 def packentry(self, entry, node, version, rev):
149 149 if gettype(entry[0]):
150 150 raise RevlogError(_("index entry flags need RevlogNG"))
151 151 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
152 152 node(entry[5]), node(entry[6]), entry[7])
153 153 return _pack(indexformatv0, *e2)
154 154
155 155 # index ng:
156 156 # 6 bytes: offset
157 157 # 2 bytes: flags
158 158 # 4 bytes: compressed length
159 159 # 4 bytes: uncompressed length
160 160 # 4 bytes: base rev
161 161 # 4 bytes: link rev
162 162 # 4 bytes: parent 1 rev
163 163 # 4 bytes: parent 2 rev
164 164 # 32 bytes: nodeid
165 165 indexformatng = ">Qiiiiii20s12x"
166 166 ngshaoffset = 32
167 167 versionformat = ">I"
168 168
169 169 class revlogio(object):
170 170 def __init__(self):
171 171 self.size = struct.calcsize(indexformatng)
172 172
173 173 def parseindex(self, fp, data, inline):
174 174 # call the C implementation to parse the index data
175 175 index, cache = parsers.parse_index2(data, inline)
176 176 return index, None, cache
177 177
178 178 def packentry(self, entry, node, version, rev):
179 179 p = _pack(indexformatng, *entry)
180 180 if rev == 0:
181 181 p = _pack(versionformat, version) + p[4:]
182 182 return p
183 183
184 184 class revlog(object):
185 185 """
186 186 the underlying revision storage object
187 187
188 188 A revlog consists of two parts, an index and the revision data.
189 189
190 190 The index is a file with a fixed record size containing
191 191 information on each revision, including its nodeid (hash), the
192 192 nodeids of its parents, the position and offset of its data within
193 193 the data file, and the revision it's based on. Finally, each entry
194 194 contains a linkrev entry that can serve as a pointer to external
195 195 data.
196 196
197 197 The revision data itself is a linear collection of data chunks.
198 198 Each chunk represents a revision and is usually represented as a
199 199 delta against the previous chunk. To bound lookup time, runs of
200 200 deltas are limited to about 2 times the length of the original
201 201 version data. This makes retrieval of a version proportional to
202 202 its size, or O(1) relative to the number of revisions.
203 203
204 204 Both pieces of the revlog are written to in an append-only
205 205 fashion, which means we never need to rewrite a file to insert or
206 206 remove data, and can use some simple techniques to avoid the need
207 207 for locking while reading.
208 208 """
209 209 def __init__(self, opener, indexfile, shallowroot=None):
210 210 """
211 211 create a revlog object
212 212
213 213 opener is a function that abstracts the file opening operation
214 214 and can be used to implement COW semantics or the like.
215 215 """
216 216 self.indexfile = indexfile
217 217 self.datafile = indexfile[:-2] + ".d"
218 218 self.opener = opener
219 219 self._cache = None
220 220 self._chunkcache = (0, '')
221 221 self.index = []
222 222 self._shallowroot = shallowroot
223 223 self._parentdelta = 0
224 self._pcache = {}
224 225
225 226 v = REVLOG_DEFAULT_VERSION
226 227 if hasattr(opener, 'options') and 'defversion' in opener.options:
227 228 v = opener.options['defversion']
228 229 if v & REVLOGNG:
229 230 v |= REVLOGNGINLINEDATA
230 231 if v & REVLOGNG and 'parentdelta' in opener.options:
231 232 self._parentdelta = 1
232 233
233 234 if shallowroot:
234 235 v |= REVLOGSHALLOW
235 236
236 237 i = ''
237 238 try:
238 239 f = self.opener(self.indexfile)
239 240 i = f.read()
240 241 if len(i) > 0:
241 242 v = struct.unpack(versionformat, i[:4])[0]
242 243 except IOError, inst:
243 244 if inst.errno != errno.ENOENT:
244 245 raise
245 246
246 247 self.version = v
247 248 self._inline = v & REVLOGNGINLINEDATA
248 249 self._shallow = v & REVLOGSHALLOW
249 250 flags = v & ~0xFFFF
250 251 fmt = v & 0xFFFF
251 252 if fmt == REVLOGV0 and flags:
252 253 raise RevlogError(_("index %s unknown flags %#04x for format v0")
253 254 % (self.indexfile, flags >> 16))
254 255 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
255 256 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
256 257 % (self.indexfile, flags >> 16))
257 258 elif fmt > REVLOGNG:
258 259 raise RevlogError(_("index %s unknown format %d")
259 260 % (self.indexfile, fmt))
260 261
261 262 self._io = revlogio()
262 263 if self.version == REVLOGV0:
263 264 self._io = revlogoldio()
264 265 if i:
265 266 try:
266 267 d = self._io.parseindex(f, i, self._inline)
267 268 except (ValueError, IndexError):
268 269 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
269 270 self.index, n, self._chunkcache = d
270 271 if n:
271 272 self.nodemap = n
272 273 if not self._chunkcache:
273 274 self._chunkclear()
274 275
275 276 # add the magic null revision at -1 (if it hasn't been done already)
276 277 if self.index == [] or self.index[-1][7] != nullid:
277 278 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
278 279
279 280 @util.propertycache
280 281 def nodemap(self):
281 282 n = {nullid: nullrev}
282 283 i = self.index
283 284 for r in xrange(len(i) - 1):
284 285 n[i[r][7]] = r
285 286 return n
286 287
287 288 def tip(self):
288 289 return self.node(len(self.index) - 2)
289 290 def __len__(self):
290 291 return len(self.index) - 1
291 292 def __iter__(self):
292 293 for i in xrange(len(self)):
293 294 yield i
294 295 def rev(self, node):
295 296 try:
296 297 return self.nodemap[node]
297 298 except KeyError:
298 299 raise LookupError(node, self.indexfile, _('no node'))
299 300 def node(self, rev):
300 301 return self.index[rev][7]
301 302 def linkrev(self, rev):
302 303 return self.index[rev][4]
303 304 def parents(self, node):
304 305 i = self.index
305 306 d = i[self.rev(node)]
306 307 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
307 308 def parentrevs(self, rev):
308 309 return self.index[rev][5:7]
309 310 def start(self, rev):
310 311 return int(self.index[rev][0] >> 16)
311 312 def end(self, rev):
312 313 return self.start(rev) + self.length(rev)
313 314 def length(self, rev):
314 315 return self.index[rev][1]
315 316 def base(self, rev):
316 317 return self.index[rev][3]
317 318 def flags(self, rev):
318 319 return self.index[rev][0] & 0xFFFF
319 320 def rawsize(self, rev):
320 321 """return the length of the uncompressed text for a given revision"""
321 322 l = self.index[rev][2]
322 323 if l >= 0:
323 324 return l
324 325
325 326 t = self.revision(self.node(rev))
326 327 return len(t)
327 328 size = rawsize
328 329
329 330 def reachable(self, node, stop=None):
330 331 """return the set of all nodes ancestral to a given node, including
331 332 the node itself, stopping when stop is matched"""
332 333 reachable = set((node,))
333 334 visit = [node]
334 335 if stop:
335 336 stopn = self.rev(stop)
336 337 else:
337 338 stopn = 0
338 339 while visit:
339 340 n = visit.pop(0)
340 341 if n == stop:
341 342 continue
342 343 if n == nullid:
343 344 continue
344 345 for p in self.parents(n):
345 346 if self.rev(p) < stopn:
346 347 continue
347 348 if p not in reachable:
348 349 reachable.add(p)
349 350 visit.append(p)
350 351 return reachable
351 352
352 353 def ancestors(self, *revs):
353 354 """Generate the ancestors of 'revs' in reverse topological order.
354 355
355 356 Yield a sequence of revision numbers starting with the parents
356 357 of each revision in revs, i.e., each revision is *not* considered
357 358 an ancestor of itself. Results are in breadth-first order:
358 359 parents of each rev in revs, then parents of those, etc. Result
359 360 does not include the null revision."""
360 361 visit = list(revs)
361 362 seen = set([nullrev])
362 363 while visit:
363 364 for parent in self.parentrevs(visit.pop(0)):
364 365 if parent not in seen:
365 366 visit.append(parent)
366 367 seen.add(parent)
367 368 yield parent
368 369
369 370 def descendants(self, *revs):
370 371 """Generate the descendants of 'revs' in revision order.
371 372
372 373 Yield a sequence of revision numbers starting with a child of
373 374 some rev in revs, i.e., each revision is *not* considered a
374 375 descendant of itself. Results are ordered by revision number (a
375 376 topological sort)."""
376 377 first = min(revs)
377 378 if first == nullrev:
378 379 for i in self:
379 380 yield i
380 381 return
381 382
382 383 seen = set(revs)
383 384 for i in xrange(first + 1, len(self)):
384 385 for x in self.parentrevs(i):
385 386 if x != nullrev and x in seen:
386 387 seen.add(i)
387 388 yield i
388 389 break
389 390
390 391 def findmissing(self, common=None, heads=None):
391 392 """Return the ancestors of heads that are not ancestors of common.
392 393
393 394 More specifically, return a list of nodes N such that every N
394 395 satisfies the following constraints:
395 396
396 397 1. N is an ancestor of some node in 'heads'
397 398 2. N is not an ancestor of any node in 'common'
398 399
399 400 The list is sorted by revision number, meaning it is
400 401 topologically sorted.
401 402
402 403 'heads' and 'common' are both lists of node IDs. If heads is
403 404 not supplied, uses all of the revlog's heads. If common is not
404 405 supplied, uses nullid."""
405 406 if common is None:
406 407 common = [nullid]
407 408 if heads is None:
408 409 heads = self.heads()
409 410
410 411 common = [self.rev(n) for n in common]
411 412 heads = [self.rev(n) for n in heads]
412 413
413 414 # we want the ancestors, but inclusive
414 415 has = set(self.ancestors(*common))
415 416 has.add(nullrev)
416 417 has.update(common)
417 418
418 419 # take all ancestors from heads that aren't in has
419 420 missing = set()
420 421 visit = [r for r in heads if r not in has]
421 422 while visit:
422 423 r = visit.pop(0)
423 424 if r in missing:
424 425 continue
425 426 else:
426 427 missing.add(r)
427 428 for p in self.parentrevs(r):
428 429 if p not in has:
429 430 visit.append(p)
430 431 missing = list(missing)
431 432 missing.sort()
432 433 return [self.node(r) for r in missing]
433 434
434 435 def nodesbetween(self, roots=None, heads=None):
435 436 """Return a topological path from 'roots' to 'heads'.
436 437
437 438 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
438 439 topologically sorted list of all nodes N that satisfy both of
439 440 these constraints:
440 441
441 442 1. N is a descendant of some node in 'roots'
442 443 2. N is an ancestor of some node in 'heads'
443 444
444 445 Every node is considered to be both a descendant and an ancestor
445 446 of itself, so every reachable node in 'roots' and 'heads' will be
446 447 included in 'nodes'.
447 448
448 449 'outroots' is the list of reachable nodes in 'roots', i.e., the
449 450 subset of 'roots' that is returned in 'nodes'. Likewise,
450 451 'outheads' is the subset of 'heads' that is also in 'nodes'.
451 452
452 453 'roots' and 'heads' are both lists of node IDs. If 'roots' is
453 454 unspecified, uses nullid as the only root. If 'heads' is
454 455 unspecified, uses list of all of the revlog's heads."""
455 456 nonodes = ([], [], [])
456 457 if roots is not None:
457 458 roots = list(roots)
458 459 if not roots:
459 460 return nonodes
460 461 lowestrev = min([self.rev(n) for n in roots])
461 462 else:
462 463 roots = [nullid] # Everybody's a descendent of nullid
463 464 lowestrev = nullrev
464 465 if (lowestrev == nullrev) and (heads is None):
465 466 # We want _all_ the nodes!
466 467 return ([self.node(r) for r in self], [nullid], list(self.heads()))
467 468 if heads is None:
468 469 # All nodes are ancestors, so the latest ancestor is the last
469 470 # node.
470 471 highestrev = len(self) - 1
471 472 # Set ancestors to None to signal that every node is an ancestor.
472 473 ancestors = None
473 474 # Set heads to an empty dictionary for later discovery of heads
474 475 heads = {}
475 476 else:
476 477 heads = list(heads)
477 478 if not heads:
478 479 return nonodes
479 480 ancestors = set()
480 481 # Turn heads into a dictionary so we can remove 'fake' heads.
481 482 # Also, later we will be using it to filter out the heads we can't
482 483 # find from roots.
483 484 heads = dict.fromkeys(heads, 0)
484 485 # Start at the top and keep marking parents until we're done.
485 486 nodestotag = set(heads)
486 487 # Remember where the top was so we can use it as a limit later.
487 488 highestrev = max([self.rev(n) for n in nodestotag])
488 489 while nodestotag:
489 490 # grab a node to tag
490 491 n = nodestotag.pop()
491 492 # Never tag nullid
492 493 if n == nullid:
493 494 continue
494 495 # A node's revision number represents its place in a
495 496 # topologically sorted list of nodes.
496 497 r = self.rev(n)
497 498 if r >= lowestrev:
498 499 if n not in ancestors:
499 500 # If we are possibly a descendent of one of the roots
500 501 # and we haven't already been marked as an ancestor
501 502 ancestors.add(n) # Mark as ancestor
502 503 # Add non-nullid parents to list of nodes to tag.
503 504 nodestotag.update([p for p in self.parents(n) if
504 505 p != nullid])
505 506 elif n in heads: # We've seen it before, is it a fake head?
506 507 # So it is, real heads should not be the ancestors of
507 508 # any other heads.
508 509 heads.pop(n)
509 510 if not ancestors:
510 511 return nonodes
511 512 # Now that we have our set of ancestors, we want to remove any
512 513 # roots that are not ancestors.
513 514
514 515 # If one of the roots was nullid, everything is included anyway.
515 516 if lowestrev > nullrev:
516 517 # But, since we weren't, let's recompute the lowest rev to not
517 518 # include roots that aren't ancestors.
518 519
519 520 # Filter out roots that aren't ancestors of heads
520 521 roots = [n for n in roots if n in ancestors]
521 522 # Recompute the lowest revision
522 523 if roots:
523 524 lowestrev = min([self.rev(n) for n in roots])
524 525 else:
525 526 # No more roots? Return empty list
526 527 return nonodes
527 528 else:
528 529 # We are descending from nullid, and don't need to care about
529 530 # any other roots.
530 531 lowestrev = nullrev
531 532 roots = [nullid]
532 533 # Transform our roots list into a set.
533 534 descendents = set(roots)
534 535 # Also, keep the original roots so we can filter out roots that aren't
535 536 # 'real' roots (i.e. are descended from other roots).
536 537 roots = descendents.copy()
537 538 # Our topologically sorted list of output nodes.
538 539 orderedout = []
539 540 # Don't start at nullid since we don't want nullid in our output list,
540 541 # and if nullid shows up in descedents, empty parents will look like
541 542 # they're descendents.
542 543 for r in xrange(max(lowestrev, 0), highestrev + 1):
543 544 n = self.node(r)
544 545 isdescendent = False
545 546 if lowestrev == nullrev: # Everybody is a descendent of nullid
546 547 isdescendent = True
547 548 elif n in descendents:
548 549 # n is already a descendent
549 550 isdescendent = True
550 551 # This check only needs to be done here because all the roots
551 552 # will start being marked is descendents before the loop.
552 553 if n in roots:
553 554 # If n was a root, check if it's a 'real' root.
554 555 p = tuple(self.parents(n))
555 556 # If any of its parents are descendents, it's not a root.
556 557 if (p[0] in descendents) or (p[1] in descendents):
557 558 roots.remove(n)
558 559 else:
559 560 p = tuple(self.parents(n))
560 561 # A node is a descendent if either of its parents are
561 562 # descendents. (We seeded the dependents list with the roots
562 563 # up there, remember?)
563 564 if (p[0] in descendents) or (p[1] in descendents):
564 565 descendents.add(n)
565 566 isdescendent = True
566 567 if isdescendent and ((ancestors is None) or (n in ancestors)):
567 568 # Only include nodes that are both descendents and ancestors.
568 569 orderedout.append(n)
569 570 if (ancestors is not None) and (n in heads):
570 571 # We're trying to figure out which heads are reachable
571 572 # from roots.
572 573 # Mark this head as having been reached
573 574 heads[n] = 1
574 575 elif ancestors is None:
575 576 # Otherwise, we're trying to discover the heads.
576 577 # Assume this is a head because if it isn't, the next step
577 578 # will eventually remove it.
578 579 heads[n] = 1
579 580 # But, obviously its parents aren't.
580 581 for p in self.parents(n):
581 582 heads.pop(p, None)
582 583 heads = [n for n in heads.iterkeys() if heads[n] != 0]
583 584 roots = list(roots)
584 585 assert orderedout
585 586 assert roots
586 587 assert heads
587 588 return (orderedout, roots, heads)
588 589
589 590 def heads(self, start=None, stop=None):
590 591 """return the list of all nodes that have no children
591 592
592 593 if start is specified, only heads that are descendants of
593 594 start will be returned
594 595 if stop is specified, it will consider all the revs from stop
595 596 as if they had no children
596 597 """
597 598 if start is None and stop is None:
598 599 count = len(self)
599 600 if not count:
600 601 return [nullid]
601 602 ishead = [1] * (count + 1)
602 603 index = self.index
603 604 for r in xrange(count):
604 605 e = index[r]
605 606 ishead[e[5]] = ishead[e[6]] = 0
606 607 return [self.node(r) for r in xrange(count) if ishead[r]]
607 608
608 609 if start is None:
609 610 start = nullid
610 611 if stop is None:
611 612 stop = []
612 613 stoprevs = set([self.rev(n) for n in stop])
613 614 startrev = self.rev(start)
614 615 reachable = set((startrev,))
615 616 heads = set((startrev,))
616 617
617 618 parentrevs = self.parentrevs
618 619 for r in xrange(startrev + 1, len(self)):
619 620 for p in parentrevs(r):
620 621 if p in reachable:
621 622 if r not in stoprevs:
622 623 reachable.add(r)
623 624 heads.add(r)
624 625 if p in heads and p not in stoprevs:
625 626 heads.remove(p)
626 627
627 628 return [self.node(r) for r in heads]
628 629
629 630 def children(self, node):
630 631 """find the children of a given node"""
631 632 c = []
632 633 p = self.rev(node)
633 634 for r in range(p + 1, len(self)):
634 635 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
635 636 if prevs:
636 637 for pr in prevs:
637 638 if pr == p:
638 639 c.append(self.node(r))
639 640 elif p == nullrev:
640 641 c.append(self.node(r))
641 642 return c
642 643
643 644 def descendant(self, start, end):
644 645 if start == nullrev:
645 646 return True
646 647 for i in self.descendants(start):
647 648 if i == end:
648 649 return True
649 650 elif i > end:
650 651 break
651 652 return False
652 653
653 654 def ancestor(self, a, b):
654 655 """calculate the least common ancestor of nodes a and b"""
655 656
656 657 # fast path, check if it is a descendant
657 658 a, b = self.rev(a), self.rev(b)
658 659 start, end = sorted((a, b))
659 660 if self.descendant(start, end):
660 661 return self.node(start)
661 662
662 663 def parents(rev):
663 664 return [p for p in self.parentrevs(rev) if p != nullrev]
664 665
665 666 c = ancestor.ancestor(a, b, parents)
666 667 if c is None:
667 668 return nullid
668 669
669 670 return self.node(c)
670 671
671 672 def _match(self, id):
672 673 if isinstance(id, (long, int)):
673 674 # rev
674 675 return self.node(id)
675 676 if len(id) == 20:
676 677 # possibly a binary node
677 678 # odds of a binary node being all hex in ASCII are 1 in 10**25
678 679 try:
679 680 node = id
680 681 self.rev(node) # quick search the index
681 682 return node
682 683 except LookupError:
683 684 pass # may be partial hex id
684 685 try:
685 686 # str(rev)
686 687 rev = int(id)
687 688 if str(rev) != id:
688 689 raise ValueError
689 690 if rev < 0:
690 691 rev = len(self) + rev
691 692 if rev < 0 or rev >= len(self):
692 693 raise ValueError
693 694 return self.node(rev)
694 695 except (ValueError, OverflowError):
695 696 pass
696 697 if len(id) == 40:
697 698 try:
698 699 # a full hex nodeid?
699 700 node = bin(id)
700 701 self.rev(node)
701 702 return node
702 703 except (TypeError, LookupError):
703 704 pass
704 705
705 706 def _partialmatch(self, id):
707 if id in self._pcache:
708 return self._pcache[id]
709
706 710 if len(id) < 40:
707 711 try:
708 712 # hex(node)[:...]
709 713 l = len(id) // 2 # grab an even number of digits
710 714 bin_id = bin(id[:l * 2])
711 715 nl = [n for n in self.nodemap if n[:l] == bin_id]
712 716 nl = [n for n in nl if hex(n).startswith(id)]
713 717 if len(nl) > 0:
714 718 if len(nl) == 1:
719 self._pcache[id] = nl[0]
715 720 return nl[0]
716 721 raise LookupError(id, self.indexfile,
717 722 _('ambiguous identifier'))
718 723 return None
719 724 except TypeError:
720 725 pass
721 726
722 727 def lookup(self, id):
723 728 """locate a node based on:
724 729 - revision number or str(revision number)
725 730 - nodeid or subset of hex nodeid
726 731 """
727 732 n = self._match(id)
728 733 if n is not None:
729 734 return n
730 735 n = self._partialmatch(id)
731 736 if n:
732 737 return n
733 738
734 739 raise LookupError(id, self.indexfile, _('no match found'))
735 740
736 741 def cmp(self, node, text):
737 742 """compare text with a given file revision
738 743
739 744 returns True if text is different than what is stored.
740 745 """
741 746 p1, p2 = self.parents(node)
742 747 return hash(text, p1, p2) != node
743 748
744 749 def _addchunk(self, offset, data):
745 750 o, d = self._chunkcache
746 751 # try to add to existing cache
747 752 if o + len(d) == offset and len(d) + len(data) < _chunksize:
748 753 self._chunkcache = o, d + data
749 754 else:
750 755 self._chunkcache = offset, data
751 756
752 757 def _loadchunk(self, offset, length):
753 758 if self._inline:
754 759 df = self.opener(self.indexfile)
755 760 else:
756 761 df = self.opener(self.datafile)
757 762
758 763 readahead = max(65536, length)
759 764 df.seek(offset)
760 765 d = df.read(readahead)
761 766 self._addchunk(offset, d)
762 767 if readahead > length:
763 768 return d[:length]
764 769 return d
765 770
766 771 def _getchunk(self, offset, length):
767 772 o, d = self._chunkcache
768 773 l = len(d)
769 774
770 775 # is it in the cache?
771 776 cachestart = offset - o
772 777 cacheend = cachestart + length
773 778 if cachestart >= 0 and cacheend <= l:
774 779 if cachestart == 0 and cacheend == l:
775 780 return d # avoid a copy
776 781 return d[cachestart:cacheend]
777 782
778 783 return self._loadchunk(offset, length)
779 784
780 785 def _chunkraw(self, startrev, endrev):
781 786 start = self.start(startrev)
782 787 length = self.end(endrev) - start
783 788 if self._inline:
784 789 start += (startrev + 1) * self._io.size
785 790 return self._getchunk(start, length)
786 791
787 792 def _chunk(self, rev):
788 793 return decompress(self._chunkraw(rev, rev))
789 794
790 795 def _chunkclear(self):
791 796 self._chunkcache = (0, '')
792 797
793 798 def deltaparent(self, rev):
794 799 """return previous revision or parentrev according to flags"""
795 800 if self.flags(rev) & REVIDX_PARENTDELTA:
796 801 return self.parentrevs(rev)[0]
797 802 else:
798 803 return rev - 1
799 804
800 805 def revdiff(self, rev1, rev2):
801 806 """return or calculate a delta between two revisions"""
802 807 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
803 808 return self._chunk(rev2)
804 809
805 810 return mdiff.textdiff(self.revision(self.node(rev1)),
806 811 self.revision(self.node(rev2)))
807 812
808 813 def revision(self, node):
809 814 """return an uncompressed revision of a given node"""
810 815 cachedrev = None
811 816 if node == nullid:
812 817 return ""
813 818 if self._cache:
814 819 if self._cache[0] == node:
815 820 return self._cache[2]
816 821 cachedrev = self._cache[1]
817 822
818 823 # look up what we need to read
819 824 text = None
820 825 rev = self.rev(node)
821 826 base = self.base(rev)
822 827
823 828 # check rev flags
824 829 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
825 830 raise RevlogError(_('incompatible revision flag %x') %
826 831 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
827 832
828 833 # build delta chain
829 834 chain = []
830 835 index = self.index # for performance
831 836 iterrev = rev
832 837 e = index[iterrev]
833 838 while iterrev != base and iterrev != cachedrev:
834 839 chain.append(iterrev)
835 840 if e[0] & REVIDX_PARENTDELTA:
836 841 iterrev = e[5]
837 842 else:
838 843 iterrev -= 1
839 844 e = index[iterrev]
840 845 chain.reverse()
841 846 base = iterrev
842 847
843 848 if iterrev == cachedrev:
844 849 # cache hit
845 850 text = self._cache[2]
846 851
847 852 # drop cache to save memory
848 853 self._cache = None
849 854
850 855 self._chunkraw(base, rev)
851 856 if text is None:
852 857 text = self._chunk(base)
853 858
854 859 bins = [self._chunk(r) for r in chain]
855 860 text = mdiff.patches(text, bins)
856 861
857 862 text = self._checkhash(text, node)
858 863
859 864 self._cache = (node, rev, text)
860 865 return text
861 866
862 867 def _checkhash(self, text, node):
863 868 p1, p2 = self.parents(node)
864 869 if (node != hash(text, p1, p2) and
865 870 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
866 871 raise RevlogError(_("integrity check failed on %s:%d")
867 872 % (self.indexfile, rev))
868 873 return text
869 874
870 875 def checkinlinesize(self, tr, fp=None):
871 876 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
872 877 return
873 878
874 879 trinfo = tr.find(self.indexfile)
875 880 if trinfo is None:
876 881 raise RevlogError(_("%s not found in the transaction")
877 882 % self.indexfile)
878 883
879 884 trindex = trinfo[2]
880 885 dataoff = self.start(trindex)
881 886
882 887 tr.add(self.datafile, dataoff)
883 888
884 889 if fp:
885 890 fp.flush()
886 891 fp.close()
887 892
888 893 df = self.opener(self.datafile, 'w')
889 894 try:
890 895 for r in self:
891 896 df.write(self._chunkraw(r, r))
892 897 finally:
893 898 df.close()
894 899
895 900 fp = self.opener(self.indexfile, 'w', atomictemp=True)
896 901 self.version &= ~(REVLOGNGINLINEDATA)
897 902 self._inline = False
898 903 for i in self:
899 904 e = self._io.packentry(self.index[i], self.node, self.version, i)
900 905 fp.write(e)
901 906
902 907 # if we don't call rename, the temp file will never replace the
903 908 # real index
904 909 fp.rename()
905 910
906 911 tr.replace(self.indexfile, trindex * self._io.size)
907 912 self._chunkclear()
908 913
909 914 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
910 915 """add a revision to the log
911 916
912 917 text - the revision data to add
913 918 transaction - the transaction object used for rollback
914 919 link - the linkrev data to add
915 920 p1, p2 - the parent nodeids of the revision
916 921 cachedelta - an optional precomputed delta
917 922 """
918 923 node = hash(text, p1, p2)
919 924 if (node in self.nodemap and
920 925 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
921 926 return node
922 927
923 928 dfh = None
924 929 if not self._inline:
925 930 dfh = self.opener(self.datafile, "a")
926 931 ifh = self.opener(self.indexfile, "a+")
927 932 try:
928 933 return self._addrevision(node, text, transaction, link, p1, p2,
929 934 cachedelta, ifh, dfh)
930 935 finally:
931 936 if dfh:
932 937 dfh.close()
933 938 ifh.close()
934 939
935 940 def _addrevision(self, node, text, transaction, link, p1, p2,
936 941 cachedelta, ifh, dfh):
937 942
938 943 btext = [text]
939 944 def buildtext():
940 945 if btext[0] is not None:
941 946 return btext[0]
942 947 # flush any pending writes here so we can read it in revision
943 948 if dfh:
944 949 dfh.flush()
945 950 ifh.flush()
946 951 basetext = self.revision(self.node(cachedelta[0]))
947 952 btext[0] = mdiff.patch(basetext, cachedelta[1])
948 953 chk = hash(btext[0], p1, p2)
949 954 if chk != node:
950 955 raise RevlogError(_("consistency error in delta"))
951 956 return btext[0]
952 957
953 958 def builddelta(rev):
954 959 # can we use the cached delta?
955 960 if cachedelta and cachedelta[0] == rev:
956 961 delta = cachedelta[1]
957 962 else:
958 963 t = buildtext()
959 964 ptext = self.revision(self.node(rev))
960 965 delta = mdiff.textdiff(ptext, t)
961 966 data = compress(delta)
962 967 l = len(data[1]) + len(data[0])
963 968 base = self.base(rev)
964 969 dist = l + offset - self.start(base)
965 970 return dist, l, data, base
966 971
967 972 curr = len(self)
968 973 prev = curr - 1
969 974 base = curr
970 975 offset = self.end(prev)
971 976 flags = 0
972 977 d = None
973 978 p1r, p2r = self.rev(p1), self.rev(p2)
974 979
975 980 # should we try to build a delta?
976 981 if prev != nullrev:
977 982 d = builddelta(prev)
978 983 if self._parentdelta and prev != p1r:
979 984 d2 = builddelta(p1r)
980 985 if d2 < d:
981 986 d = d2
982 987 flags = REVIDX_PARENTDELTA
983 988 dist, l, data, base = d
984 989
985 990 # full versions are inserted when the needed deltas
986 991 # become comparable to the uncompressed text
987 992 # or the base revision is punched
988 993 if text is None:
989 994 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
990 995 cachedelta[1])
991 996 else:
992 997 textlen = len(text)
993 998 if (d is None or dist > textlen * 2 or
994 999 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
995 1000 text = buildtext()
996 1001 data = compress(text)
997 1002 l = len(data[1]) + len(data[0])
998 1003 base = curr
999 1004
1000 1005 e = (offset_type(offset, flags), l, textlen,
1001 1006 base, link, p1r, p2r, node)
1002 1007 self.index.insert(-1, e)
1003 1008 self.nodemap[node] = curr
1004 1009
1005 1010 entry = self._io.packentry(e, self.node, self.version, curr)
1006 1011 if not self._inline:
1007 1012 transaction.add(self.datafile, offset)
1008 1013 transaction.add(self.indexfile, curr * len(entry))
1009 1014 if data[0]:
1010 1015 dfh.write(data[0])
1011 1016 dfh.write(data[1])
1012 1017 dfh.flush()
1013 1018 ifh.write(entry)
1014 1019 else:
1015 1020 offset += curr * self._io.size
1016 1021 transaction.add(self.indexfile, offset, curr)
1017 1022 ifh.write(entry)
1018 1023 ifh.write(data[0])
1019 1024 ifh.write(data[1])
1020 1025 self.checkinlinesize(transaction, ifh)
1021 1026
1022 1027 if type(text) == str: # only accept immutable objects
1023 1028 self._cache = (node, curr, text)
1024 1029 return node
1025 1030
1026 1031 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1027 1032 """Calculate a delta group, yielding a sequence of changegroup chunks
1028 1033 (strings).
1029 1034
1030 1035 Given a list of changeset revs, return a set of deltas and
1031 1036 metadata corresponding to nodes. The first delta is
1032 1037 first parent(nodelist[0]) -> nodelist[0], the receiver is
1033 1038 guaranteed to have this parent as it has all history before
1034 1039 these changesets. In the case firstparent is nullrev the
1035 1040 changegroup starts with a full revision.
1036 1041 fullrev forces the insertion of the full revision, necessary
1037 1042 in the case of shallow clones where the first parent might
1038 1043 not exist at the reciever.
1039 1044 """
1040 1045
1041 1046 revs = [self.rev(n) for n in nodelist]
1042 1047
1043 1048 # if we don't have any revisions touched by these changesets, bail
1044 1049 if not revs:
1045 1050 yield changegroup.closechunk()
1046 1051 return
1047 1052
1048 1053 # add the parent of the first rev
1049 1054 p = self.parentrevs(revs[0])[0]
1050 1055 revs.insert(0, p)
1051 1056 if p == nullrev:
1052 1057 fullrev = True
1053 1058
1054 1059 # build deltas
1055 1060 for d in xrange(len(revs) - 1):
1056 1061 a, b = revs[d], revs[d + 1]
1057 1062 nb = self.node(b)
1058 1063
1059 1064 if infocollect is not None:
1060 1065 infocollect(nb)
1061 1066
1062 1067 p = self.parents(nb)
1063 1068 meta = nb + p[0] + p[1] + lookup(nb)
1064 1069 if fullrev:
1065 1070 d = self.revision(nb)
1066 1071 meta += mdiff.trivialdiffheader(len(d))
1067 1072 fullrev = False
1068 1073 else:
1069 1074 d = self.revdiff(a, b)
1070 1075 yield changegroup.chunkheader(len(meta) + len(d))
1071 1076 yield meta
1072 1077 yield d
1073 1078
1074 1079 yield changegroup.closechunk()
1075 1080
1076 1081 def addgroup(self, bundle, linkmapper, transaction):
1077 1082 """
1078 1083 add a delta group
1079 1084
1080 1085 given a set of deltas, add them to the revision log. the
1081 1086 first delta is against its parent, which should be in our
1082 1087 log, the rest are against the previous delta.
1083 1088 """
1084 1089
1085 1090 # track the base of the current delta log
1086 1091 node = None
1087 1092
1088 1093 r = len(self)
1089 1094 end = 0
1090 1095 if r:
1091 1096 end = self.end(r - 1)
1092 1097 ifh = self.opener(self.indexfile, "a+")
1093 1098 isize = r * self._io.size
1094 1099 if self._inline:
1095 1100 transaction.add(self.indexfile, end + isize, r)
1096 1101 dfh = None
1097 1102 else:
1098 1103 transaction.add(self.indexfile, isize, r)
1099 1104 transaction.add(self.datafile, end)
1100 1105 dfh = self.opener(self.datafile, "a")
1101 1106
1102 1107 try:
1103 1108 # loop through our set of deltas
1104 1109 chain = None
1105 1110 while 1:
1106 1111 chunkdata = bundle.parsechunk()
1107 1112 if not chunkdata:
1108 1113 break
1109 1114 node = chunkdata['node']
1110 1115 p1 = chunkdata['p1']
1111 1116 p2 = chunkdata['p2']
1112 1117 cs = chunkdata['cs']
1113 1118 delta = chunkdata['data']
1114 1119
1115 1120 link = linkmapper(cs)
1116 1121 if (node in self.nodemap and
1117 1122 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1118 1123 # this can happen if two branches make the same change
1119 1124 chain = node
1120 1125 continue
1121 1126
1122 1127 for p in (p1, p2):
1123 1128 if not p in self.nodemap:
1124 1129 if self._shallow:
1125 1130 # add null entries for missing parents
1126 1131 # XXX FIXME
1127 1132 #if base == nullrev:
1128 1133 # base = len(self)
1129 1134 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1130 1135 # 0, 0, base, nullrev, nullrev, nullrev, p)
1131 1136 #self.index.insert(-1, e)
1132 1137 #self.nodemap[p] = r
1133 1138 #entry = self._io.packentry(e, self.node,
1134 1139 # self.version, r)
1135 1140 #ifh.write(entry)
1136 1141 #t, r = r, r + 1
1137 1142 raise LookupError(p, self.indexfile,
1138 1143 _('unknown parent'))
1139 1144 else:
1140 1145 raise LookupError(p, self.indexfile,
1141 1146 _('unknown parent'))
1142 1147
1143 1148 if not chain:
1144 1149 # retrieve the parent revision of the delta chain
1145 1150 chain = p1
1146 1151 if not chain in self.nodemap:
1147 1152 raise LookupError(chain, self.indexfile, _('unknown base'))
1148 1153
1149 1154 chainrev = self.rev(chain)
1150 1155 chain = self._addrevision(node, None, transaction, link,
1151 1156 p1, p2, (chainrev, delta), ifh, dfh)
1152 1157 if not dfh and not self._inline:
1153 1158 # addrevision switched from inline to conventional
1154 1159 # reopen the index
1155 1160 dfh = self.opener(self.datafile, "a")
1156 1161 ifh = self.opener(self.indexfile, "a")
1157 1162 finally:
1158 1163 if dfh:
1159 1164 dfh.close()
1160 1165 ifh.close()
1161 1166
1162 1167 return node
1163 1168
1164 1169 def strip(self, minlink, transaction):
1165 1170 """truncate the revlog on the first revision with a linkrev >= minlink
1166 1171
1167 1172 This function is called when we're stripping revision minlink and
1168 1173 its descendants from the repository.
1169 1174
1170 1175 We have to remove all revisions with linkrev >= minlink, because
1171 1176 the equivalent changelog revisions will be renumbered after the
1172 1177 strip.
1173 1178
1174 1179 So we truncate the revlog on the first of these revisions, and
1175 1180 trust that the caller has saved the revisions that shouldn't be
1176 1181 removed and that it'll readd them after this truncation.
1177 1182 """
1178 1183 if len(self) == 0:
1179 1184 return
1180 1185
1181 1186 for rev in self:
1182 1187 if self.index[rev][4] >= minlink:
1183 1188 break
1184 1189 else:
1185 1190 return
1186 1191
1187 1192 # first truncate the files on disk
1188 1193 end = self.start(rev)
1189 1194 if not self._inline:
1190 1195 transaction.add(self.datafile, end)
1191 1196 end = rev * self._io.size
1192 1197 else:
1193 1198 end += rev * self._io.size
1194 1199
1195 1200 transaction.add(self.indexfile, end)
1196 1201
1197 1202 # then reset internal state in memory to forget those revisions
1198 1203 self._cache = None
1199 1204 self._chunkclear()
1200 1205 for x in xrange(rev, len(self)):
1201 1206 del self.nodemap[self.node(x)]
1202 1207
1203 1208 del self.index[rev:-1]
1204 1209
1205 1210 def checksize(self):
1206 1211 expected = 0
1207 1212 if len(self):
1208 1213 expected = max(0, self.end(len(self) - 1))
1209 1214
1210 1215 try:
1211 1216 f = self.opener(self.datafile)
1212 1217 f.seek(0, 2)
1213 1218 actual = f.tell()
1214 1219 dd = actual - expected
1215 1220 except IOError, inst:
1216 1221 if inst.errno != errno.ENOENT:
1217 1222 raise
1218 1223 dd = 0
1219 1224
1220 1225 try:
1221 1226 f = self.opener(self.indexfile)
1222 1227 f.seek(0, 2)
1223 1228 actual = f.tell()
1224 1229 s = self._io.size
1225 1230 i = max(0, actual // s)
1226 1231 di = actual - (i * s)
1227 1232 if self._inline:
1228 1233 databytes = 0
1229 1234 for r in self:
1230 1235 databytes += max(0, self.length(r))
1231 1236 dd = 0
1232 1237 di = actual - len(self) * s - databytes
1233 1238 except IOError, inst:
1234 1239 if inst.errno != errno.ENOENT:
1235 1240 raise
1236 1241 di = 0
1237 1242
1238 1243 return (dd, di)
1239 1244
1240 1245 def files(self):
1241 1246 res = [self.indexfile]
1242 1247 if not self._inline:
1243 1248 res.append(self.datafile)
1244 1249 return res
General Comments 0
You need to be logged in to leave comments. Login now