##// END OF EJS Templates
revlog: pass rev to _checkhash
Matt Mackall -
r13276:ba6a6333 default
parent child Browse files
Show More
@@ -1,1261 +1,1261
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 16 from i18n import _
17 17 import changegroup, ancestor, mdiff, parsers, error, util
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGSHALLOW = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35 35
36 36 # revlog index flags
37 37 REVIDX_PARENTDELTA = 1
38 38 REVIDX_PUNCHED_FLAG = 2
39 39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40 40
41 41 # max size of revlog with inline data
42 42 _maxinline = 131072
43 43 _chunksize = 1048576
44 44
45 45 RevlogError = error.RevlogError
46 46 LookupError = error.LookupError
47 47
48 48 def getoffset(q):
49 49 return int(q >> 16)
50 50
51 51 def gettype(q):
52 52 return int(q & 0xFFFF)
53 53
54 54 def offset_type(offset, type):
55 55 return long(long(offset) << 16 | type)
56 56
57 57 nullhash = _sha(nullid)
58 58
59 59 def hash(text, p1, p2):
60 60 """generate a hash from the given text and its parent hashes
61 61
62 62 This hash combines both the current file contents and its history
63 63 in a manner that makes it easy to distinguish nodes with the same
64 64 content in the revision graph.
65 65 """
66 66 # As of now, if one of the parent node is null, p2 is null
67 67 if p2 == nullid:
68 68 # deep copy of a hash is faster than creating one
69 69 s = nullhash.copy()
70 70 s.update(p1)
71 71 else:
72 72 # none of the parent nodes are nullid
73 73 l = [p1, p2]
74 74 l.sort()
75 75 s = _sha(l[0])
76 76 s.update(l[1])
77 77 s.update(text)
78 78 return s.digest()
79 79
80 80 def compress(text):
81 81 """ generate a possibly-compressed representation of text """
82 82 if not text:
83 83 return ("", text)
84 84 l = len(text)
85 85 bin = None
86 86 if l < 44:
87 87 pass
88 88 elif l > 1000000:
89 89 # zlib makes an internal copy, thus doubling memory usage for
90 90 # large files, so lets do this in pieces
91 91 z = zlib.compressobj()
92 92 p = []
93 93 pos = 0
94 94 while pos < l:
95 95 pos2 = pos + 2**20
96 96 p.append(z.compress(text[pos:pos2]))
97 97 pos = pos2
98 98 p.append(z.flush())
99 99 if sum(map(len, p)) < l:
100 100 bin = "".join(p)
101 101 else:
102 102 bin = _compress(text)
103 103 if bin is None or len(bin) > l:
104 104 if text[0] == '\0':
105 105 return ("", text)
106 106 return ('u', text)
107 107 return ("", bin)
108 108
109 109 def decompress(bin):
110 110 """ decompress the given input """
111 111 if not bin:
112 112 return bin
113 113 t = bin[0]
114 114 if t == '\0':
115 115 return bin
116 116 if t == 'x':
117 117 return _decompress(bin)
118 118 if t == 'u':
119 119 return bin[1:]
120 120 raise RevlogError(_("unknown compression type %r") % t)
121 121
122 122 indexformatv0 = ">4l20s20s20s"
123 123 v0shaoffset = 56
124 124
125 125 class revlogoldio(object):
126 126 def __init__(self):
127 127 self.size = struct.calcsize(indexformatv0)
128 128
129 129 def parseindex(self, data, inline):
130 130 s = self.size
131 131 index = []
132 132 nodemap = {nullid: nullrev}
133 133 n = off = 0
134 134 l = len(data)
135 135 while off + s <= l:
136 136 cur = data[off:off + s]
137 137 off += s
138 138 e = _unpack(indexformatv0, cur)
139 139 # transform to revlogv1 format
140 140 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 141 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 142 index.append(e2)
143 143 nodemap[e[6]] = n
144 144 n += 1
145 145
146 146 # add the magic null revision at -1
147 147 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 148
149 149 return index, nodemap, None
150 150
151 151 def packentry(self, entry, node, version, rev):
152 152 if gettype(entry[0]):
153 153 raise RevlogError(_("index entry flags need RevlogNG"))
154 154 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 155 node(entry[5]), node(entry[6]), entry[7])
156 156 return _pack(indexformatv0, *e2)
157 157
158 158 # index ng:
159 159 # 6 bytes: offset
160 160 # 2 bytes: flags
161 161 # 4 bytes: compressed length
162 162 # 4 bytes: uncompressed length
163 163 # 4 bytes: base rev
164 164 # 4 bytes: link rev
165 165 # 4 bytes: parent 1 rev
166 166 # 4 bytes: parent 2 rev
167 167 # 32 bytes: nodeid
168 168 indexformatng = ">Qiiiiii20s12x"
169 169 ngshaoffset = 32
170 170 versionformat = ">I"
171 171
172 172 class revlogio(object):
173 173 def __init__(self):
174 174 self.size = struct.calcsize(indexformatng)
175 175
176 176 def parseindex(self, data, inline):
177 177 # call the C implementation to parse the index data
178 178 index, cache = parsers.parse_index2(data, inline)
179 179 return index, None, cache
180 180
181 181 def packentry(self, entry, node, version, rev):
182 182 p = _pack(indexformatng, *entry)
183 183 if rev == 0:
184 184 p = _pack(versionformat, version) + p[4:]
185 185 return p
186 186
187 187 class revlog(object):
188 188 """
189 189 the underlying revision storage object
190 190
191 191 A revlog consists of two parts, an index and the revision data.
192 192
193 193 The index is a file with a fixed record size containing
194 194 information on each revision, including its nodeid (hash), the
195 195 nodeids of its parents, the position and offset of its data within
196 196 the data file, and the revision it's based on. Finally, each entry
197 197 contains a linkrev entry that can serve as a pointer to external
198 198 data.
199 199
200 200 The revision data itself is a linear collection of data chunks.
201 201 Each chunk represents a revision and is usually represented as a
202 202 delta against the previous chunk. To bound lookup time, runs of
203 203 deltas are limited to about 2 times the length of the original
204 204 version data. This makes retrieval of a version proportional to
205 205 its size, or O(1) relative to the number of revisions.
206 206
207 207 Both pieces of the revlog are written to in an append-only
208 208 fashion, which means we never need to rewrite a file to insert or
209 209 remove data, and can use some simple techniques to avoid the need
210 210 for locking while reading.
211 211 """
212 212 def __init__(self, opener, indexfile, shallowroot=None):
213 213 """
214 214 create a revlog object
215 215
216 216 opener is a function that abstracts the file opening operation
217 217 and can be used to implement COW semantics or the like.
218 218 """
219 219 self.indexfile = indexfile
220 220 self.datafile = indexfile[:-2] + ".d"
221 221 self.opener = opener
222 222 self._cache = None
223 223 self._chunkcache = (0, '')
224 224 self.index = []
225 225 self._shallowroot = shallowroot
226 226 self._parentdelta = 0
227 227 self._pcache = {}
228 228 self._nodecache = {nullid: nullrev}
229 229 self._nodepos = None
230 230
231 231 v = REVLOG_DEFAULT_VERSION
232 232 if hasattr(opener, 'options') and 'defversion' in opener.options:
233 233 v = opener.options['defversion']
234 234 if v & REVLOGNG:
235 235 v |= REVLOGNGINLINEDATA
236 236 if v & REVLOGNG and 'parentdelta' in opener.options:
237 237 self._parentdelta = 1
238 238
239 239 if shallowroot:
240 240 v |= REVLOGSHALLOW
241 241
242 242 i = ''
243 243 try:
244 244 f = self.opener(self.indexfile)
245 245 i = f.read()
246 246 if len(i) > 0:
247 247 v = struct.unpack(versionformat, i[:4])[0]
248 248 except IOError, inst:
249 249 if inst.errno != errno.ENOENT:
250 250 raise
251 251
252 252 self.version = v
253 253 self._inline = v & REVLOGNGINLINEDATA
254 254 self._shallow = v & REVLOGSHALLOW
255 255 flags = v & ~0xFFFF
256 256 fmt = v & 0xFFFF
257 257 if fmt == REVLOGV0 and flags:
258 258 raise RevlogError(_("index %s unknown flags %#04x for format v0")
259 259 % (self.indexfile, flags >> 16))
260 260 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
261 261 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
262 262 % (self.indexfile, flags >> 16))
263 263 elif fmt > REVLOGNG:
264 264 raise RevlogError(_("index %s unknown format %d")
265 265 % (self.indexfile, fmt))
266 266
267 267 self._io = revlogio()
268 268 if self.version == REVLOGV0:
269 269 self._io = revlogoldio()
270 270 try:
271 271 d = self._io.parseindex(i, self._inline)
272 272 except (ValueError, IndexError):
273 273 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
274 274 self.index, nodemap, self._chunkcache = d
275 275 if nodemap is not None:
276 276 self.nodemap = self._nodecache = nodemap
277 277 if not self._chunkcache:
278 278 self._chunkclear()
279 279
280 280 def tip(self):
281 281 return self.node(len(self.index) - 2)
282 282 def __len__(self):
283 283 return len(self.index) - 1
284 284 def __iter__(self):
285 285 for i in xrange(len(self)):
286 286 yield i
287 287
288 288 @util.propertycache
289 289 def nodemap(self):
290 290 n = self.rev(self.node(0))
291 291 return self._nodecache
292 292
293 293 def rev(self, node):
294 294 try:
295 295 return self._nodecache[node]
296 296 except KeyError:
297 297 n = self._nodecache
298 298 if node in n:
299 299 return n[node]
300 300 i = self.index
301 301 p = self._nodepos
302 302 if p is None:
303 303 p = len(i) - 2
304 304 for r in xrange(p, -1, -1):
305 305 v = i[r][7]
306 306 n[v] = r
307 307 if v == node:
308 308 self._nodepos = r - 1
309 309 return r
310 310 raise LookupError(node, self.indexfile, _('no node'))
311 311
312 312 def node(self, rev):
313 313 return self.index[rev][7]
314 314 def linkrev(self, rev):
315 315 return self.index[rev][4]
316 316 def parents(self, node):
317 317 i = self.index
318 318 d = i[self.rev(node)]
319 319 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
320 320 def parentrevs(self, rev):
321 321 return self.index[rev][5:7]
322 322 def start(self, rev):
323 323 return int(self.index[rev][0] >> 16)
324 324 def end(self, rev):
325 325 return self.start(rev) + self.length(rev)
326 326 def length(self, rev):
327 327 return self.index[rev][1]
328 328 def base(self, rev):
329 329 return self.index[rev][3]
330 330 def flags(self, rev):
331 331 return self.index[rev][0] & 0xFFFF
332 332 def rawsize(self, rev):
333 333 """return the length of the uncompressed text for a given revision"""
334 334 l = self.index[rev][2]
335 335 if l >= 0:
336 336 return l
337 337
338 338 t = self.revision(self.node(rev))
339 339 return len(t)
340 340 size = rawsize
341 341
342 342 def reachable(self, node, stop=None):
343 343 """return the set of all nodes ancestral to a given node, including
344 344 the node itself, stopping when stop is matched"""
345 345 reachable = set((node,))
346 346 visit = [node]
347 347 if stop:
348 348 stopn = self.rev(stop)
349 349 else:
350 350 stopn = 0
351 351 while visit:
352 352 n = visit.pop(0)
353 353 if n == stop:
354 354 continue
355 355 if n == nullid:
356 356 continue
357 357 for p in self.parents(n):
358 358 if self.rev(p) < stopn:
359 359 continue
360 360 if p not in reachable:
361 361 reachable.add(p)
362 362 visit.append(p)
363 363 return reachable
364 364
365 365 def ancestors(self, *revs):
366 366 """Generate the ancestors of 'revs' in reverse topological order.
367 367
368 368 Yield a sequence of revision numbers starting with the parents
369 369 of each revision in revs, i.e., each revision is *not* considered
370 370 an ancestor of itself. Results are in breadth-first order:
371 371 parents of each rev in revs, then parents of those, etc. Result
372 372 does not include the null revision."""
373 373 visit = list(revs)
374 374 seen = set([nullrev])
375 375 while visit:
376 376 for parent in self.parentrevs(visit.pop(0)):
377 377 if parent not in seen:
378 378 visit.append(parent)
379 379 seen.add(parent)
380 380 yield parent
381 381
382 382 def descendants(self, *revs):
383 383 """Generate the descendants of 'revs' in revision order.
384 384
385 385 Yield a sequence of revision numbers starting with a child of
386 386 some rev in revs, i.e., each revision is *not* considered a
387 387 descendant of itself. Results are ordered by revision number (a
388 388 topological sort)."""
389 389 first = min(revs)
390 390 if first == nullrev:
391 391 for i in self:
392 392 yield i
393 393 return
394 394
395 395 seen = set(revs)
396 396 for i in xrange(first + 1, len(self)):
397 397 for x in self.parentrevs(i):
398 398 if x != nullrev and x in seen:
399 399 seen.add(i)
400 400 yield i
401 401 break
402 402
403 403 def findmissing(self, common=None, heads=None):
404 404 """Return the ancestors of heads that are not ancestors of common.
405 405
406 406 More specifically, return a list of nodes N such that every N
407 407 satisfies the following constraints:
408 408
409 409 1. N is an ancestor of some node in 'heads'
410 410 2. N is not an ancestor of any node in 'common'
411 411
412 412 The list is sorted by revision number, meaning it is
413 413 topologically sorted.
414 414
415 415 'heads' and 'common' are both lists of node IDs. If heads is
416 416 not supplied, uses all of the revlog's heads. If common is not
417 417 supplied, uses nullid."""
418 418 if common is None:
419 419 common = [nullid]
420 420 if heads is None:
421 421 heads = self.heads()
422 422
423 423 common = [self.rev(n) for n in common]
424 424 heads = [self.rev(n) for n in heads]
425 425
426 426 # we want the ancestors, but inclusive
427 427 has = set(self.ancestors(*common))
428 428 has.add(nullrev)
429 429 has.update(common)
430 430
431 431 # take all ancestors from heads that aren't in has
432 432 missing = set()
433 433 visit = [r for r in heads if r not in has]
434 434 while visit:
435 435 r = visit.pop(0)
436 436 if r in missing:
437 437 continue
438 438 else:
439 439 missing.add(r)
440 440 for p in self.parentrevs(r):
441 441 if p not in has:
442 442 visit.append(p)
443 443 missing = list(missing)
444 444 missing.sort()
445 445 return [self.node(r) for r in missing]
446 446
447 447 def nodesbetween(self, roots=None, heads=None):
448 448 """Return a topological path from 'roots' to 'heads'.
449 449
450 450 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
451 451 topologically sorted list of all nodes N that satisfy both of
452 452 these constraints:
453 453
454 454 1. N is a descendant of some node in 'roots'
455 455 2. N is an ancestor of some node in 'heads'
456 456
457 457 Every node is considered to be both a descendant and an ancestor
458 458 of itself, so every reachable node in 'roots' and 'heads' will be
459 459 included in 'nodes'.
460 460
461 461 'outroots' is the list of reachable nodes in 'roots', i.e., the
462 462 subset of 'roots' that is returned in 'nodes'. Likewise,
463 463 'outheads' is the subset of 'heads' that is also in 'nodes'.
464 464
465 465 'roots' and 'heads' are both lists of node IDs. If 'roots' is
466 466 unspecified, uses nullid as the only root. If 'heads' is
467 467 unspecified, uses list of all of the revlog's heads."""
468 468 nonodes = ([], [], [])
469 469 if roots is not None:
470 470 roots = list(roots)
471 471 if not roots:
472 472 return nonodes
473 473 lowestrev = min([self.rev(n) for n in roots])
474 474 else:
475 475 roots = [nullid] # Everybody's a descendent of nullid
476 476 lowestrev = nullrev
477 477 if (lowestrev == nullrev) and (heads is None):
478 478 # We want _all_ the nodes!
479 479 return ([self.node(r) for r in self], [nullid], list(self.heads()))
480 480 if heads is None:
481 481 # All nodes are ancestors, so the latest ancestor is the last
482 482 # node.
483 483 highestrev = len(self) - 1
484 484 # Set ancestors to None to signal that every node is an ancestor.
485 485 ancestors = None
486 486 # Set heads to an empty dictionary for later discovery of heads
487 487 heads = {}
488 488 else:
489 489 heads = list(heads)
490 490 if not heads:
491 491 return nonodes
492 492 ancestors = set()
493 493 # Turn heads into a dictionary so we can remove 'fake' heads.
494 494 # Also, later we will be using it to filter out the heads we can't
495 495 # find from roots.
496 496 heads = dict.fromkeys(heads, 0)
497 497 # Start at the top and keep marking parents until we're done.
498 498 nodestotag = set(heads)
499 499 # Remember where the top was so we can use it as a limit later.
500 500 highestrev = max([self.rev(n) for n in nodestotag])
501 501 while nodestotag:
502 502 # grab a node to tag
503 503 n = nodestotag.pop()
504 504 # Never tag nullid
505 505 if n == nullid:
506 506 continue
507 507 # A node's revision number represents its place in a
508 508 # topologically sorted list of nodes.
509 509 r = self.rev(n)
510 510 if r >= lowestrev:
511 511 if n not in ancestors:
512 512 # If we are possibly a descendent of one of the roots
513 513 # and we haven't already been marked as an ancestor
514 514 ancestors.add(n) # Mark as ancestor
515 515 # Add non-nullid parents to list of nodes to tag.
516 516 nodestotag.update([p for p in self.parents(n) if
517 517 p != nullid])
518 518 elif n in heads: # We've seen it before, is it a fake head?
519 519 # So it is, real heads should not be the ancestors of
520 520 # any other heads.
521 521 heads.pop(n)
522 522 if not ancestors:
523 523 return nonodes
524 524 # Now that we have our set of ancestors, we want to remove any
525 525 # roots that are not ancestors.
526 526
527 527 # If one of the roots was nullid, everything is included anyway.
528 528 if lowestrev > nullrev:
529 529 # But, since we weren't, let's recompute the lowest rev to not
530 530 # include roots that aren't ancestors.
531 531
532 532 # Filter out roots that aren't ancestors of heads
533 533 roots = [n for n in roots if n in ancestors]
534 534 # Recompute the lowest revision
535 535 if roots:
536 536 lowestrev = min([self.rev(n) for n in roots])
537 537 else:
538 538 # No more roots? Return empty list
539 539 return nonodes
540 540 else:
541 541 # We are descending from nullid, and don't need to care about
542 542 # any other roots.
543 543 lowestrev = nullrev
544 544 roots = [nullid]
545 545 # Transform our roots list into a set.
546 546 descendents = set(roots)
547 547 # Also, keep the original roots so we can filter out roots that aren't
548 548 # 'real' roots (i.e. are descended from other roots).
549 549 roots = descendents.copy()
550 550 # Our topologically sorted list of output nodes.
551 551 orderedout = []
552 552 # Don't start at nullid since we don't want nullid in our output list,
553 553 # and if nullid shows up in descedents, empty parents will look like
554 554 # they're descendents.
555 555 for r in xrange(max(lowestrev, 0), highestrev + 1):
556 556 n = self.node(r)
557 557 isdescendent = False
558 558 if lowestrev == nullrev: # Everybody is a descendent of nullid
559 559 isdescendent = True
560 560 elif n in descendents:
561 561 # n is already a descendent
562 562 isdescendent = True
563 563 # This check only needs to be done here because all the roots
564 564 # will start being marked is descendents before the loop.
565 565 if n in roots:
566 566 # If n was a root, check if it's a 'real' root.
567 567 p = tuple(self.parents(n))
568 568 # If any of its parents are descendents, it's not a root.
569 569 if (p[0] in descendents) or (p[1] in descendents):
570 570 roots.remove(n)
571 571 else:
572 572 p = tuple(self.parents(n))
573 573 # A node is a descendent if either of its parents are
574 574 # descendents. (We seeded the dependents list with the roots
575 575 # up there, remember?)
576 576 if (p[0] in descendents) or (p[1] in descendents):
577 577 descendents.add(n)
578 578 isdescendent = True
579 579 if isdescendent and ((ancestors is None) or (n in ancestors)):
580 580 # Only include nodes that are both descendents and ancestors.
581 581 orderedout.append(n)
582 582 if (ancestors is not None) and (n in heads):
583 583 # We're trying to figure out which heads are reachable
584 584 # from roots.
585 585 # Mark this head as having been reached
586 586 heads[n] = 1
587 587 elif ancestors is None:
588 588 # Otherwise, we're trying to discover the heads.
589 589 # Assume this is a head because if it isn't, the next step
590 590 # will eventually remove it.
591 591 heads[n] = 1
592 592 # But, obviously its parents aren't.
593 593 for p in self.parents(n):
594 594 heads.pop(p, None)
595 595 heads = [n for n in heads.iterkeys() if heads[n] != 0]
596 596 roots = list(roots)
597 597 assert orderedout
598 598 assert roots
599 599 assert heads
600 600 return (orderedout, roots, heads)
601 601
602 602 def heads(self, start=None, stop=None):
603 603 """return the list of all nodes that have no children
604 604
605 605 if start is specified, only heads that are descendants of
606 606 start will be returned
607 607 if stop is specified, it will consider all the revs from stop
608 608 as if they had no children
609 609 """
610 610 if start is None and stop is None:
611 611 count = len(self)
612 612 if not count:
613 613 return [nullid]
614 614 ishead = [1] * (count + 1)
615 615 index = self.index
616 616 for r in xrange(count):
617 617 e = index[r]
618 618 ishead[e[5]] = ishead[e[6]] = 0
619 619 return [self.node(r) for r in xrange(count) if ishead[r]]
620 620
621 621 if start is None:
622 622 start = nullid
623 623 if stop is None:
624 624 stop = []
625 625 stoprevs = set([self.rev(n) for n in stop])
626 626 startrev = self.rev(start)
627 627 reachable = set((startrev,))
628 628 heads = set((startrev,))
629 629
630 630 parentrevs = self.parentrevs
631 631 for r in xrange(startrev + 1, len(self)):
632 632 for p in parentrevs(r):
633 633 if p in reachable:
634 634 if r not in stoprevs:
635 635 reachable.add(r)
636 636 heads.add(r)
637 637 if p in heads and p not in stoprevs:
638 638 heads.remove(p)
639 639
640 640 return [self.node(r) for r in heads]
641 641
642 642 def children(self, node):
643 643 """find the children of a given node"""
644 644 c = []
645 645 p = self.rev(node)
646 646 for r in range(p + 1, len(self)):
647 647 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
648 648 if prevs:
649 649 for pr in prevs:
650 650 if pr == p:
651 651 c.append(self.node(r))
652 652 elif p == nullrev:
653 653 c.append(self.node(r))
654 654 return c
655 655
656 656 def descendant(self, start, end):
657 657 if start == nullrev:
658 658 return True
659 659 for i in self.descendants(start):
660 660 if i == end:
661 661 return True
662 662 elif i > end:
663 663 break
664 664 return False
665 665
666 666 def ancestor(self, a, b):
667 667 """calculate the least common ancestor of nodes a and b"""
668 668
669 669 # fast path, check if it is a descendant
670 670 a, b = self.rev(a), self.rev(b)
671 671 start, end = sorted((a, b))
672 672 if self.descendant(start, end):
673 673 return self.node(start)
674 674
675 675 def parents(rev):
676 676 return [p for p in self.parentrevs(rev) if p != nullrev]
677 677
678 678 c = ancestor.ancestor(a, b, parents)
679 679 if c is None:
680 680 return nullid
681 681
682 682 return self.node(c)
683 683
684 684 def _match(self, id):
685 685 if isinstance(id, (long, int)):
686 686 # rev
687 687 return self.node(id)
688 688 if len(id) == 20:
689 689 # possibly a binary node
690 690 # odds of a binary node being all hex in ASCII are 1 in 10**25
691 691 try:
692 692 node = id
693 693 self.rev(node) # quick search the index
694 694 return node
695 695 except LookupError:
696 696 pass # may be partial hex id
697 697 try:
698 698 # str(rev)
699 699 rev = int(id)
700 700 if str(rev) != id:
701 701 raise ValueError
702 702 if rev < 0:
703 703 rev = len(self) + rev
704 704 if rev < 0 or rev >= len(self):
705 705 raise ValueError
706 706 return self.node(rev)
707 707 except (ValueError, OverflowError):
708 708 pass
709 709 if len(id) == 40:
710 710 try:
711 711 # a full hex nodeid?
712 712 node = bin(id)
713 713 self.rev(node)
714 714 return node
715 715 except (TypeError, LookupError):
716 716 pass
717 717
718 718 def _partialmatch(self, id):
719 719 if id in self._pcache:
720 720 return self._pcache[id]
721 721
722 722 if len(id) < 40:
723 723 try:
724 724 # hex(node)[:...]
725 725 l = len(id) // 2 # grab an even number of digits
726 726 prefix = bin(id[:l * 2])
727 727 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
728 728 nl = [n for n in nl if hex(n).startswith(id)]
729 729 if len(nl) > 0:
730 730 if len(nl) == 1:
731 731 self._pcache[id] = nl[0]
732 732 return nl[0]
733 733 raise LookupError(id, self.indexfile,
734 734 _('ambiguous identifier'))
735 735 return None
736 736 except TypeError:
737 737 pass
738 738
739 739 def lookup(self, id):
740 740 """locate a node based on:
741 741 - revision number or str(revision number)
742 742 - nodeid or subset of hex nodeid
743 743 """
744 744 n = self._match(id)
745 745 if n is not None:
746 746 return n
747 747 n = self._partialmatch(id)
748 748 if n:
749 749 return n
750 750
751 751 raise LookupError(id, self.indexfile, _('no match found'))
752 752
753 753 def cmp(self, node, text):
754 754 """compare text with a given file revision
755 755
756 756 returns True if text is different than what is stored.
757 757 """
758 758 p1, p2 = self.parents(node)
759 759 return hash(text, p1, p2) != node
760 760
761 761 def _addchunk(self, offset, data):
762 762 o, d = self._chunkcache
763 763 # try to add to existing cache
764 764 if o + len(d) == offset and len(d) + len(data) < _chunksize:
765 765 self._chunkcache = o, d + data
766 766 else:
767 767 self._chunkcache = offset, data
768 768
769 769 def _loadchunk(self, offset, length):
770 770 if self._inline:
771 771 df = self.opener(self.indexfile)
772 772 else:
773 773 df = self.opener(self.datafile)
774 774
775 775 readahead = max(65536, length)
776 776 df.seek(offset)
777 777 d = df.read(readahead)
778 778 self._addchunk(offset, d)
779 779 if readahead > length:
780 780 return d[:length]
781 781 return d
782 782
783 783 def _getchunk(self, offset, length):
784 784 o, d = self._chunkcache
785 785 l = len(d)
786 786
787 787 # is it in the cache?
788 788 cachestart = offset - o
789 789 cacheend = cachestart + length
790 790 if cachestart >= 0 and cacheend <= l:
791 791 if cachestart == 0 and cacheend == l:
792 792 return d # avoid a copy
793 793 return d[cachestart:cacheend]
794 794
795 795 return self._loadchunk(offset, length)
796 796
797 797 def _chunkraw(self, startrev, endrev):
798 798 start = self.start(startrev)
799 799 length = self.end(endrev) - start
800 800 if self._inline:
801 801 start += (startrev + 1) * self._io.size
802 802 return self._getchunk(start, length)
803 803
804 804 def _chunk(self, rev):
805 805 return decompress(self._chunkraw(rev, rev))
806 806
807 807 def _chunkclear(self):
808 808 self._chunkcache = (0, '')
809 809
810 810 def deltaparent(self, rev):
811 811 """return previous revision or parentrev according to flags"""
812 812 if self.flags(rev) & REVIDX_PARENTDELTA:
813 813 return self.parentrevs(rev)[0]
814 814 else:
815 815 return rev - 1
816 816
817 817 def revdiff(self, rev1, rev2):
818 818 """return or calculate a delta between two revisions"""
819 819 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
820 820 return self._chunk(rev2)
821 821
822 822 return mdiff.textdiff(self.revision(self.node(rev1)),
823 823 self.revision(self.node(rev2)))
824 824
825 825 def revision(self, node):
826 826 """return an uncompressed revision of a given node"""
827 827 cachedrev = None
828 828 if node == nullid:
829 829 return ""
830 830 if self._cache:
831 831 if self._cache[0] == node:
832 832 return self._cache[2]
833 833 cachedrev = self._cache[1]
834 834
835 835 # look up what we need to read
836 836 text = None
837 837 rev = self.rev(node)
838 838 base = self.base(rev)
839 839
840 840 # check rev flags
841 841 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
842 842 raise RevlogError(_('incompatible revision flag %x') %
843 843 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
844 844
845 845 # build delta chain
846 846 chain = []
847 847 index = self.index # for performance
848 848 iterrev = rev
849 849 e = index[iterrev]
850 850 while iterrev != base and iterrev != cachedrev:
851 851 chain.append(iterrev)
852 852 if e[0] & REVIDX_PARENTDELTA:
853 853 iterrev = e[5]
854 854 else:
855 855 iterrev -= 1
856 856 e = index[iterrev]
857 857 chain.reverse()
858 858 base = iterrev
859 859
860 860 if iterrev == cachedrev:
861 861 # cache hit
862 862 text = self._cache[2]
863 863
864 864 # drop cache to save memory
865 865 self._cache = None
866 866
867 867 self._chunkraw(base, rev)
868 868 if text is None:
869 869 text = self._chunk(base)
870 870
871 871 bins = [self._chunk(r) for r in chain]
872 872 text = mdiff.patches(text, bins)
873 873
874 text = self._checkhash(text, node)
874 text = self._checkhash(text, node, rev)
875 875
876 876 self._cache = (node, rev, text)
877 877 return text
878 878
879 def _checkhash(self, text, node):
879 def _checkhash(self, text, node, rev):
880 880 p1, p2 = self.parents(node)
881 881 if (node != hash(text, p1, p2) and
882 882 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
883 883 raise RevlogError(_("integrity check failed on %s:%d")
884 884 % (self.indexfile, rev))
885 885 return text
886 886
887 887 def checkinlinesize(self, tr, fp=None):
888 888 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
889 889 return
890 890
891 891 trinfo = tr.find(self.indexfile)
892 892 if trinfo is None:
893 893 raise RevlogError(_("%s not found in the transaction")
894 894 % self.indexfile)
895 895
896 896 trindex = trinfo[2]
897 897 dataoff = self.start(trindex)
898 898
899 899 tr.add(self.datafile, dataoff)
900 900
901 901 if fp:
902 902 fp.flush()
903 903 fp.close()
904 904
905 905 df = self.opener(self.datafile, 'w')
906 906 try:
907 907 for r in self:
908 908 df.write(self._chunkraw(r, r))
909 909 finally:
910 910 df.close()
911 911
912 912 fp = self.opener(self.indexfile, 'w', atomictemp=True)
913 913 self.version &= ~(REVLOGNGINLINEDATA)
914 914 self._inline = False
915 915 for i in self:
916 916 e = self._io.packentry(self.index[i], self.node, self.version, i)
917 917 fp.write(e)
918 918
919 919 # if we don't call rename, the temp file will never replace the
920 920 # real index
921 921 fp.rename()
922 922
923 923 tr.replace(self.indexfile, trindex * self._io.size)
924 924 self._chunkclear()
925 925
926 926 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
927 927 """add a revision to the log
928 928
929 929 text - the revision data to add
930 930 transaction - the transaction object used for rollback
931 931 link - the linkrev data to add
932 932 p1, p2 - the parent nodeids of the revision
933 933 cachedelta - an optional precomputed delta
934 934 """
935 935 node = hash(text, p1, p2)
936 936 if (node in self.nodemap and
937 937 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
938 938 return node
939 939
940 940 dfh = None
941 941 if not self._inline:
942 942 dfh = self.opener(self.datafile, "a")
943 943 ifh = self.opener(self.indexfile, "a+")
944 944 try:
945 945 return self._addrevision(node, text, transaction, link, p1, p2,
946 946 cachedelta, ifh, dfh)
947 947 finally:
948 948 if dfh:
949 949 dfh.close()
950 950 ifh.close()
951 951
952 952 def _addrevision(self, node, text, transaction, link, p1, p2,
953 953 cachedelta, ifh, dfh):
954 954
955 955 btext = [text]
956 956 def buildtext():
957 957 if btext[0] is not None:
958 958 return btext[0]
959 959 # flush any pending writes here so we can read it in revision
960 960 if dfh:
961 961 dfh.flush()
962 962 ifh.flush()
963 963 basetext = self.revision(self.node(cachedelta[0]))
964 964 btext[0] = mdiff.patch(basetext, cachedelta[1])
965 965 chk = hash(btext[0], p1, p2)
966 966 if chk != node:
967 967 raise RevlogError(_("consistency error in delta"))
968 968 return btext[0]
969 969
970 970 def builddelta(rev):
971 971 # can we use the cached delta?
972 972 if cachedelta and cachedelta[0] == rev:
973 973 delta = cachedelta[1]
974 974 else:
975 975 t = buildtext()
976 976 ptext = self.revision(self.node(rev))
977 977 delta = mdiff.textdiff(ptext, t)
978 978 data = compress(delta)
979 979 l = len(data[1]) + len(data[0])
980 980 base = self.base(rev)
981 981 dist = l + offset - self.start(base)
982 982 return dist, l, data, base
983 983
984 984 curr = len(self)
985 985 prev = curr - 1
986 986 base = curr
987 987 offset = self.end(prev)
988 988 flags = 0
989 989 d = None
990 990 p1r, p2r = self.rev(p1), self.rev(p2)
991 991
992 992 # should we try to build a delta?
993 993 if prev != nullrev:
994 994 d = builddelta(prev)
995 995 if self._parentdelta and prev != p1r:
996 996 d2 = builddelta(p1r)
997 997 if d2 < d:
998 998 d = d2
999 999 flags = REVIDX_PARENTDELTA
1000 1000 dist, l, data, base = d
1001 1001
1002 1002 # full versions are inserted when the needed deltas
1003 1003 # become comparable to the uncompressed text
1004 1004 # or the base revision is punched
1005 1005 if text is None:
1006 1006 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1007 1007 cachedelta[1])
1008 1008 else:
1009 1009 textlen = len(text)
1010 1010 if (d is None or dist > textlen * 2 or
1011 1011 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1012 1012 text = buildtext()
1013 1013 data = compress(text)
1014 1014 l = len(data[1]) + len(data[0])
1015 1015 base = curr
1016 1016
1017 1017 e = (offset_type(offset, flags), l, textlen,
1018 1018 base, link, p1r, p2r, node)
1019 1019 self.index.insert(-1, e)
1020 1020 self.nodemap[node] = curr
1021 1021
1022 1022 entry = self._io.packentry(e, self.node, self.version, curr)
1023 1023 if not self._inline:
1024 1024 transaction.add(self.datafile, offset)
1025 1025 transaction.add(self.indexfile, curr * len(entry))
1026 1026 if data[0]:
1027 1027 dfh.write(data[0])
1028 1028 dfh.write(data[1])
1029 1029 dfh.flush()
1030 1030 ifh.write(entry)
1031 1031 else:
1032 1032 offset += curr * self._io.size
1033 1033 transaction.add(self.indexfile, offset, curr)
1034 1034 ifh.write(entry)
1035 1035 ifh.write(data[0])
1036 1036 ifh.write(data[1])
1037 1037 self.checkinlinesize(transaction, ifh)
1038 1038
1039 1039 if type(text) == str: # only accept immutable objects
1040 1040 self._cache = (node, curr, text)
1041 1041 return node
1042 1042
1043 1043 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1044 1044 """Calculate a delta group, yielding a sequence of changegroup chunks
1045 1045 (strings).
1046 1046
1047 1047 Given a list of changeset revs, return a set of deltas and
1048 1048 metadata corresponding to nodes. The first delta is
1049 1049 first parent(nodelist[0]) -> nodelist[0], the receiver is
1050 1050 guaranteed to have this parent as it has all history before
1051 1051 these changesets. In the case firstparent is nullrev the
1052 1052 changegroup starts with a full revision.
1053 1053 fullrev forces the insertion of the full revision, necessary
1054 1054 in the case of shallow clones where the first parent might
1055 1055 not exist at the reciever.
1056 1056 """
1057 1057
1058 1058 revs = [self.rev(n) for n in nodelist]
1059 1059
1060 1060 # if we don't have any revisions touched by these changesets, bail
1061 1061 if not revs:
1062 1062 yield changegroup.closechunk()
1063 1063 return
1064 1064
1065 1065 # add the parent of the first rev
1066 1066 p = self.parentrevs(revs[0])[0]
1067 1067 revs.insert(0, p)
1068 1068 if p == nullrev:
1069 1069 fullrev = True
1070 1070
1071 1071 # build deltas
1072 1072 for d in xrange(len(revs) - 1):
1073 1073 a, b = revs[d], revs[d + 1]
1074 1074 nb = self.node(b)
1075 1075
1076 1076 if infocollect is not None:
1077 1077 infocollect(nb)
1078 1078
1079 1079 p = self.parents(nb)
1080 1080 meta = nb + p[0] + p[1] + lookup(nb)
1081 1081 if fullrev:
1082 1082 d = self.revision(nb)
1083 1083 meta += mdiff.trivialdiffheader(len(d))
1084 1084 fullrev = False
1085 1085 else:
1086 1086 d = self.revdiff(a, b)
1087 1087 yield changegroup.chunkheader(len(meta) + len(d))
1088 1088 yield meta
1089 1089 yield d
1090 1090
1091 1091 yield changegroup.closechunk()
1092 1092
1093 1093 def addgroup(self, bundle, linkmapper, transaction):
1094 1094 """
1095 1095 add a delta group
1096 1096
1097 1097 given a set of deltas, add them to the revision log. the
1098 1098 first delta is against its parent, which should be in our
1099 1099 log, the rest are against the previous delta.
1100 1100 """
1101 1101
1102 1102 # track the base of the current delta log
1103 1103 node = None
1104 1104
1105 1105 r = len(self)
1106 1106 end = 0
1107 1107 if r:
1108 1108 end = self.end(r - 1)
1109 1109 ifh = self.opener(self.indexfile, "a+")
1110 1110 isize = r * self._io.size
1111 1111 if self._inline:
1112 1112 transaction.add(self.indexfile, end + isize, r)
1113 1113 dfh = None
1114 1114 else:
1115 1115 transaction.add(self.indexfile, isize, r)
1116 1116 transaction.add(self.datafile, end)
1117 1117 dfh = self.opener(self.datafile, "a")
1118 1118
1119 1119 try:
1120 1120 # loop through our set of deltas
1121 1121 chain = None
1122 1122 while 1:
1123 1123 chunkdata = bundle.parsechunk()
1124 1124 if not chunkdata:
1125 1125 break
1126 1126 node = chunkdata['node']
1127 1127 p1 = chunkdata['p1']
1128 1128 p2 = chunkdata['p2']
1129 1129 cs = chunkdata['cs']
1130 1130 delta = chunkdata['data']
1131 1131
1132 1132 link = linkmapper(cs)
1133 1133 if (node in self.nodemap and
1134 1134 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1135 1135 # this can happen if two branches make the same change
1136 1136 chain = node
1137 1137 continue
1138 1138
1139 1139 for p in (p1, p2):
1140 1140 if not p in self.nodemap:
1141 1141 if self._shallow:
1142 1142 # add null entries for missing parents
1143 1143 # XXX FIXME
1144 1144 #if base == nullrev:
1145 1145 # base = len(self)
1146 1146 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1147 1147 # 0, 0, base, nullrev, nullrev, nullrev, p)
1148 1148 #self.index.insert(-1, e)
1149 1149 #self.nodemap[p] = r
1150 1150 #entry = self._io.packentry(e, self.node,
1151 1151 # self.version, r)
1152 1152 #ifh.write(entry)
1153 1153 #t, r = r, r + 1
1154 1154 raise LookupError(p, self.indexfile,
1155 1155 _('unknown parent'))
1156 1156 else:
1157 1157 raise LookupError(p, self.indexfile,
1158 1158 _('unknown parent'))
1159 1159
1160 1160 if not chain:
1161 1161 # retrieve the parent revision of the delta chain
1162 1162 chain = p1
1163 1163 if not chain in self.nodemap:
1164 1164 raise LookupError(chain, self.indexfile, _('unknown base'))
1165 1165
1166 1166 chainrev = self.rev(chain)
1167 1167 chain = self._addrevision(node, None, transaction, link,
1168 1168 p1, p2, (chainrev, delta), ifh, dfh)
1169 1169 if not dfh and not self._inline:
1170 1170 # addrevision switched from inline to conventional
1171 1171 # reopen the index
1172 1172 dfh = self.opener(self.datafile, "a")
1173 1173 ifh = self.opener(self.indexfile, "a")
1174 1174 finally:
1175 1175 if dfh:
1176 1176 dfh.close()
1177 1177 ifh.close()
1178 1178
1179 1179 return node
1180 1180
1181 1181 def strip(self, minlink, transaction):
1182 1182 """truncate the revlog on the first revision with a linkrev >= minlink
1183 1183
1184 1184 This function is called when we're stripping revision minlink and
1185 1185 its descendants from the repository.
1186 1186
1187 1187 We have to remove all revisions with linkrev >= minlink, because
1188 1188 the equivalent changelog revisions will be renumbered after the
1189 1189 strip.
1190 1190
1191 1191 So we truncate the revlog on the first of these revisions, and
1192 1192 trust that the caller has saved the revisions that shouldn't be
1193 1193 removed and that it'll readd them after this truncation.
1194 1194 """
1195 1195 if len(self) == 0:
1196 1196 return
1197 1197
1198 1198 for rev in self:
1199 1199 if self.index[rev][4] >= minlink:
1200 1200 break
1201 1201 else:
1202 1202 return
1203 1203
1204 1204 # first truncate the files on disk
1205 1205 end = self.start(rev)
1206 1206 if not self._inline:
1207 1207 transaction.add(self.datafile, end)
1208 1208 end = rev * self._io.size
1209 1209 else:
1210 1210 end += rev * self._io.size
1211 1211
1212 1212 transaction.add(self.indexfile, end)
1213 1213
1214 1214 # then reset internal state in memory to forget those revisions
1215 1215 self._cache = None
1216 1216 self._chunkclear()
1217 1217 for x in xrange(rev, len(self)):
1218 1218 del self.nodemap[self.node(x)]
1219 1219
1220 1220 del self.index[rev:-1]
1221 1221
1222 1222 def checksize(self):
1223 1223 expected = 0
1224 1224 if len(self):
1225 1225 expected = max(0, self.end(len(self) - 1))
1226 1226
1227 1227 try:
1228 1228 f = self.opener(self.datafile)
1229 1229 f.seek(0, 2)
1230 1230 actual = f.tell()
1231 1231 dd = actual - expected
1232 1232 except IOError, inst:
1233 1233 if inst.errno != errno.ENOENT:
1234 1234 raise
1235 1235 dd = 0
1236 1236
1237 1237 try:
1238 1238 f = self.opener(self.indexfile)
1239 1239 f.seek(0, 2)
1240 1240 actual = f.tell()
1241 1241 s = self._io.size
1242 1242 i = max(0, actual // s)
1243 1243 di = actual - (i * s)
1244 1244 if self._inline:
1245 1245 databytes = 0
1246 1246 for r in self:
1247 1247 databytes += max(0, self.length(r))
1248 1248 dd = 0
1249 1249 di = actual - len(self) * s - databytes
1250 1250 except IOError, inst:
1251 1251 if inst.errno != errno.ENOENT:
1252 1252 raise
1253 1253 di = 0
1254 1254
1255 1255 return (dd, di)
1256 1256
1257 1257 def files(self):
1258 1258 res = [self.indexfile]
1259 1259 if not self._inline:
1260 1260 res.append(self.datafile)
1261 1261 return res
General Comments 0
You need to be logged in to leave comments. Login now