##// END OF EJS Templates
revlog: raise error.WdirUnsupported from revlog.node() if wdirrev is passed...
Pulkit Goyal -
r32443:34e9b8b9 default
parent child Browse files
Show More
@@ -1,2162 +1,2167 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 wdirrev,
30 30 )
31 31 from .i18n import _
32 32 from . import (
33 33 ancestor,
34 34 error,
35 35 mdiff,
36 36 policy,
37 37 pycompat,
38 38 templatefilters,
39 39 util,
40 40 )
41 41
42 42 parsers = policy.importmod(r'parsers')
43 43
44 44 _pack = struct.pack
45 45 _unpack = struct.unpack
46 46 # Aliased for performance.
47 47 _zlibdecompress = zlib.decompress
48 48
49 49 # revlog header flags
50 50 REVLOGV0 = 0
51 51 REVLOGV1 = 1
52 52 FLAG_INLINE_DATA = (1 << 16)
53 53 FLAG_GENERALDELTA = (1 << 17)
54 54 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
55 55 REVLOG_DEFAULT_FORMAT = REVLOGV1
56 56 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
57 57 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
58 58
59 59 # revlog index flags
60 60 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
61 61 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
62 62 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
63 63 REVIDX_DEFAULT_FLAGS = 0
64 64 # stable order in which flags need to be processed and their processors applied
65 65 REVIDX_FLAGS_ORDER = [
66 66 REVIDX_ISCENSORED,
67 67 REVIDX_ELLIPSIS,
68 68 REVIDX_EXTSTORED,
69 69 ]
70 70 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
71 71
72 72 # max size of revlog with inline data
73 73 _maxinline = 131072
74 74 _chunksize = 1048576
75 75
76 76 RevlogError = error.RevlogError
77 77 LookupError = error.LookupError
78 78 CensoredNodeError = error.CensoredNodeError
79 79 ProgrammingError = error.ProgrammingError
80 80
81 81 # Store flag processors (cf. 'addflagprocessor()' to register)
82 82 _flagprocessors = {
83 83 REVIDX_ISCENSORED: None,
84 84 }
85 85
86 86 def addflagprocessor(flag, processor):
87 87 """Register a flag processor on a revision data flag.
88 88
89 89 Invariant:
90 90 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
91 91 - Only one flag processor can be registered on a specific flag.
92 92 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
93 93 following signatures:
94 94 - (read) f(self, rawtext) -> text, bool
95 95 - (write) f(self, text) -> rawtext, bool
96 96 - (raw) f(self, rawtext) -> bool
97 97 "text" is presented to the user. "rawtext" is stored in revlog data, not
98 98 directly visible to the user.
99 99 The boolean returned by these transforms is used to determine whether
100 100 the returned text can be used for hash integrity checking. For example,
101 101 if "write" returns False, then "text" is used to generate hash. If
102 102 "write" returns True, that basically means "rawtext" returned by "write"
103 103 should be used to generate hash. Usually, "write" and "read" return
104 104 different booleans. And "raw" returns a same boolean as "write".
105 105
106 106 Note: The 'raw' transform is used for changegroup generation and in some
107 107 debug commands. In this case the transform only indicates whether the
108 108 contents can be used for hash integrity checks.
109 109 """
110 110 if not flag & REVIDX_KNOWN_FLAGS:
111 111 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
112 112 raise ProgrammingError(msg)
113 113 if flag not in REVIDX_FLAGS_ORDER:
114 114 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
115 115 raise ProgrammingError(msg)
116 116 if flag in _flagprocessors:
117 117 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
118 118 raise error.Abort(msg)
119 119 _flagprocessors[flag] = processor
120 120
121 121 def getoffset(q):
122 122 return int(q >> 16)
123 123
124 124 def gettype(q):
125 125 return int(q & 0xFFFF)
126 126
127 127 def offset_type(offset, type):
128 128 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
129 129 raise ValueError('unknown revlog index flags')
130 130 return int(int(offset) << 16 | type)
131 131
132 132 _nullhash = hashlib.sha1(nullid)
133 133
134 134 def hash(text, p1, p2):
135 135 """generate a hash from the given text and its parent hashes
136 136
137 137 This hash combines both the current file contents and its history
138 138 in a manner that makes it easy to distinguish nodes with the same
139 139 content in the revision graph.
140 140 """
141 141 # As of now, if one of the parent node is null, p2 is null
142 142 if p2 == nullid:
143 143 # deep copy of a hash is faster than creating one
144 144 s = _nullhash.copy()
145 145 s.update(p1)
146 146 else:
147 147 # none of the parent nodes are nullid
148 148 l = [p1, p2]
149 149 l.sort()
150 150 s = hashlib.sha1(l[0])
151 151 s.update(l[1])
152 152 s.update(text)
153 153 return s.digest()
154 154
155 155 # index v0:
156 156 # 4 bytes: offset
157 157 # 4 bytes: compressed length
158 158 # 4 bytes: base rev
159 159 # 4 bytes: link rev
160 160 # 20 bytes: parent 1 nodeid
161 161 # 20 bytes: parent 2 nodeid
162 162 # 20 bytes: nodeid
163 163 indexformatv0 = ">4l20s20s20s"
164 164
165 165 class revlogoldio(object):
166 166 def __init__(self):
167 167 self.size = struct.calcsize(indexformatv0)
168 168
169 169 def parseindex(self, data, inline):
170 170 s = self.size
171 171 index = []
172 172 nodemap = {nullid: nullrev}
173 173 n = off = 0
174 174 l = len(data)
175 175 while off + s <= l:
176 176 cur = data[off:off + s]
177 177 off += s
178 178 e = _unpack(indexformatv0, cur)
179 179 # transform to revlogv1 format
180 180 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
181 181 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
182 182 index.append(e2)
183 183 nodemap[e[6]] = n
184 184 n += 1
185 185
186 186 # add the magic null revision at -1
187 187 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
188 188
189 189 return index, nodemap, None
190 190
191 191 def packentry(self, entry, node, version, rev):
192 192 if gettype(entry[0]):
193 193 raise RevlogError(_('index entry flags need revlog version 1'))
194 194 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
195 195 node(entry[5]), node(entry[6]), entry[7])
196 196 return _pack(indexformatv0, *e2)
197 197
198 198 # index ng:
199 199 # 6 bytes: offset
200 200 # 2 bytes: flags
201 201 # 4 bytes: compressed length
202 202 # 4 bytes: uncompressed length
203 203 # 4 bytes: base rev
204 204 # 4 bytes: link rev
205 205 # 4 bytes: parent 1 rev
206 206 # 4 bytes: parent 2 rev
207 207 # 32 bytes: nodeid
208 208 indexformatng = ">Qiiiiii20s12x"
209 209 versionformat = ">I"
210 210
211 211 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
212 212 # signed integer)
213 213 _maxentrysize = 0x7fffffff
214 214
215 215 class revlogio(object):
216 216 def __init__(self):
217 217 self.size = struct.calcsize(indexformatng)
218 218
219 219 def parseindex(self, data, inline):
220 220 # call the C implementation to parse the index data
221 221 index, cache = parsers.parse_index2(data, inline)
222 222 return index, getattr(index, 'nodemap', None), cache
223 223
224 224 def packentry(self, entry, node, version, rev):
225 225 p = _pack(indexformatng, *entry)
226 226 if rev == 0:
227 227 p = _pack(versionformat, version) + p[4:]
228 228 return p
229 229
230 230 class revlog(object):
231 231 """
232 232 the underlying revision storage object
233 233
234 234 A revlog consists of two parts, an index and the revision data.
235 235
236 236 The index is a file with a fixed record size containing
237 237 information on each revision, including its nodeid (hash), the
238 238 nodeids of its parents, the position and offset of its data within
239 239 the data file, and the revision it's based on. Finally, each entry
240 240 contains a linkrev entry that can serve as a pointer to external
241 241 data.
242 242
243 243 The revision data itself is a linear collection of data chunks.
244 244 Each chunk represents a revision and is usually represented as a
245 245 delta against the previous chunk. To bound lookup time, runs of
246 246 deltas are limited to about 2 times the length of the original
247 247 version data. This makes retrieval of a version proportional to
248 248 its size, or O(1) relative to the number of revisions.
249 249
250 250 Both pieces of the revlog are written to in an append-only
251 251 fashion, which means we never need to rewrite a file to insert or
252 252 remove data, and can use some simple techniques to avoid the need
253 253 for locking while reading.
254 254
255 255 If checkambig, indexfile is opened with checkambig=True at
256 256 writing, to avoid file stat ambiguity.
257 257 """
258 258 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
259 259 """
260 260 create a revlog object
261 261
262 262 opener is a function that abstracts the file opening operation
263 263 and can be used to implement COW semantics or the like.
264 264 """
265 265 self.indexfile = indexfile
266 266 self.datafile = datafile or (indexfile[:-2] + ".d")
267 267 self.opener = opener
268 268 # When True, indexfile is opened with checkambig=True at writing, to
269 269 # avoid file stat ambiguity.
270 270 self._checkambig = checkambig
271 271 # 3-tuple of (node, rev, text) for a raw revision.
272 272 self._cache = None
273 273 # Maps rev to chain base rev.
274 274 self._chainbasecache = util.lrucachedict(100)
275 275 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
276 276 self._chunkcache = (0, '')
277 277 # How much data to read and cache into the raw revlog data cache.
278 278 self._chunkcachesize = 65536
279 279 self._maxchainlen = None
280 280 self._aggressivemergedeltas = False
281 281 self.index = []
282 282 # Mapping of partial identifiers to full nodes.
283 283 self._pcache = {}
284 284 # Mapping of revision integer to full node.
285 285 self._nodecache = {nullid: nullrev}
286 286 self._nodepos = None
287 287 self._compengine = 'zlib'
288 288
289 289 v = REVLOG_DEFAULT_VERSION
290 290 opts = getattr(opener, 'options', None)
291 291 if opts is not None:
292 292 if 'revlogv1' in opts:
293 293 if 'generaldelta' in opts:
294 294 v |= FLAG_GENERALDELTA
295 295 else:
296 296 v = 0
297 297 if 'chunkcachesize' in opts:
298 298 self._chunkcachesize = opts['chunkcachesize']
299 299 if 'maxchainlen' in opts:
300 300 self._maxchainlen = opts['maxchainlen']
301 301 if 'aggressivemergedeltas' in opts:
302 302 self._aggressivemergedeltas = opts['aggressivemergedeltas']
303 303 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
304 304 if 'compengine' in opts:
305 305 self._compengine = opts['compengine']
306 306
307 307 if self._chunkcachesize <= 0:
308 308 raise RevlogError(_('revlog chunk cache size %r is not greater '
309 309 'than 0') % self._chunkcachesize)
310 310 elif self._chunkcachesize & (self._chunkcachesize - 1):
311 311 raise RevlogError(_('revlog chunk cache size %r is not a power '
312 312 'of 2') % self._chunkcachesize)
313 313
314 314 indexdata = ''
315 315 self._initempty = True
316 316 try:
317 317 f = self.opener(self.indexfile)
318 318 indexdata = f.read()
319 319 f.close()
320 320 if len(indexdata) > 0:
321 321 v = struct.unpack(versionformat, indexdata[:4])[0]
322 322 self._initempty = False
323 323 except IOError as inst:
324 324 if inst.errno != errno.ENOENT:
325 325 raise
326 326
327 327 self.version = v
328 328 self._inline = v & FLAG_INLINE_DATA
329 329 self._generaldelta = v & FLAG_GENERALDELTA
330 330 flags = v & ~0xFFFF
331 331 fmt = v & 0xFFFF
332 332 if fmt == REVLOGV0:
333 333 if flags:
334 334 raise RevlogError(_('unknown flags (%#04x) in version %d '
335 335 'revlog %s') %
336 336 (flags >> 16, fmt, self.indexfile))
337 337 elif fmt == REVLOGV1:
338 338 if flags & ~REVLOGV1_FLAGS:
339 339 raise RevlogError(_('unknown flags (%#04x) in version %d '
340 340 'revlog %s') %
341 341 (flags >> 16, fmt, self.indexfile))
342 342 else:
343 343 raise RevlogError(_('unknown version (%d) in revlog %s') %
344 344 (fmt, self.indexfile))
345 345
346 346 self.storedeltachains = True
347 347
348 348 self._io = revlogio()
349 349 if self.version == REVLOGV0:
350 350 self._io = revlogoldio()
351 351 try:
352 352 d = self._io.parseindex(indexdata, self._inline)
353 353 except (ValueError, IndexError):
354 354 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
355 355 self.index, nodemap, self._chunkcache = d
356 356 if nodemap is not None:
357 357 self.nodemap = self._nodecache = nodemap
358 358 if not self._chunkcache:
359 359 self._chunkclear()
360 360 # revnum -> (chain-length, sum-delta-length)
361 361 self._chaininfocache = {}
362 362 # revlog header -> revlog compressor
363 363 self._decompressors = {}
364 364
365 365 @util.propertycache
366 366 def _compressor(self):
367 367 return util.compengines[self._compengine].revlogcompressor()
368 368
369 369 def tip(self):
370 370 return self.node(len(self.index) - 2)
371 371 def __contains__(self, rev):
372 372 return 0 <= rev < len(self)
373 373 def __len__(self):
374 374 return len(self.index) - 1
375 375 def __iter__(self):
376 376 return iter(xrange(len(self)))
377 377 def revs(self, start=0, stop=None):
378 378 """iterate over all rev in this revlog (from start to stop)"""
379 379 step = 1
380 380 if stop is not None:
381 381 if start > stop:
382 382 step = -1
383 383 stop += step
384 384 else:
385 385 stop = len(self)
386 386 return xrange(start, stop, step)
387 387
388 388 @util.propertycache
389 389 def nodemap(self):
390 390 self.rev(self.node(0))
391 391 return self._nodecache
392 392
393 393 def hasnode(self, node):
394 394 try:
395 395 self.rev(node)
396 396 return True
397 397 except KeyError:
398 398 return False
399 399
400 400 def clearcaches(self):
401 401 self._cache = None
402 402 self._chainbasecache.clear()
403 403 self._chunkcache = (0, '')
404 404 self._pcache = {}
405 405
406 406 try:
407 407 self._nodecache.clearcaches()
408 408 except AttributeError:
409 409 self._nodecache = {nullid: nullrev}
410 410 self._nodepos = None
411 411
412 412 def rev(self, node):
413 413 try:
414 414 return self._nodecache[node]
415 415 except TypeError:
416 416 raise
417 417 except RevlogError:
418 418 # parsers.c radix tree lookup failed
419 419 raise LookupError(node, self.indexfile, _('no node'))
420 420 except KeyError:
421 421 # pure python cache lookup failed
422 422 n = self._nodecache
423 423 i = self.index
424 424 p = self._nodepos
425 425 if p is None:
426 426 p = len(i) - 2
427 427 for r in xrange(p, -1, -1):
428 428 v = i[r][7]
429 429 n[v] = r
430 430 if v == node:
431 431 self._nodepos = r - 1
432 432 return r
433 433 raise LookupError(node, self.indexfile, _('no node'))
434 434
435 435 # Accessors for index entries.
436 436
437 437 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
438 438 # are flags.
439 439 def start(self, rev):
440 440 return int(self.index[rev][0] >> 16)
441 441
442 442 def flags(self, rev):
443 443 return self.index[rev][0] & 0xFFFF
444 444
445 445 def length(self, rev):
446 446 return self.index[rev][1]
447 447
448 448 def rawsize(self, rev):
449 449 """return the length of the uncompressed text for a given revision"""
450 450 l = self.index[rev][2]
451 451 if l >= 0:
452 452 return l
453 453
454 454 t = self.revision(rev, raw=True)
455 455 return len(t)
456 456
457 457 def size(self, rev):
458 458 """length of non-raw text (processed by a "read" flag processor)"""
459 459 # fast path: if no "read" flag processor could change the content,
460 460 # size is rawsize. note: ELLIPSIS is known to not change the content.
461 461 flags = self.flags(rev)
462 462 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
463 463 return self.rawsize(rev)
464 464
465 465 return len(self.revision(rev, raw=False))
466 466
467 467 def chainbase(self, rev):
468 468 base = self._chainbasecache.get(rev)
469 469 if base is not None:
470 470 return base
471 471
472 472 index = self.index
473 473 base = index[rev][3]
474 474 while base != rev:
475 475 rev = base
476 476 base = index[rev][3]
477 477
478 478 self._chainbasecache[rev] = base
479 479 return base
480 480
481 481 def linkrev(self, rev):
482 482 return self.index[rev][4]
483 483
484 484 def parentrevs(self, rev):
485 485 try:
486 486 return self.index[rev][5:7]
487 487 except IndexError:
488 488 if rev == wdirrev:
489 489 raise error.WdirUnsupported
490 490 raise
491 491
492 492 def node(self, rev):
493 return self.index[rev][7]
493 try:
494 return self.index[rev][7]
495 except IndexError:
496 if rev == wdirrev:
497 raise error.WdirUnsupported
498 raise
494 499
495 500 # Derived from index values.
496 501
497 502 def end(self, rev):
498 503 return self.start(rev) + self.length(rev)
499 504
500 505 def parents(self, node):
501 506 i = self.index
502 507 d = i[self.rev(node)]
503 508 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
504 509
505 510 def chainlen(self, rev):
506 511 return self._chaininfo(rev)[0]
507 512
508 513 def _chaininfo(self, rev):
509 514 chaininfocache = self._chaininfocache
510 515 if rev in chaininfocache:
511 516 return chaininfocache[rev]
512 517 index = self.index
513 518 generaldelta = self._generaldelta
514 519 iterrev = rev
515 520 e = index[iterrev]
516 521 clen = 0
517 522 compresseddeltalen = 0
518 523 while iterrev != e[3]:
519 524 clen += 1
520 525 compresseddeltalen += e[1]
521 526 if generaldelta:
522 527 iterrev = e[3]
523 528 else:
524 529 iterrev -= 1
525 530 if iterrev in chaininfocache:
526 531 t = chaininfocache[iterrev]
527 532 clen += t[0]
528 533 compresseddeltalen += t[1]
529 534 break
530 535 e = index[iterrev]
531 536 else:
532 537 # Add text length of base since decompressing that also takes
533 538 # work. For cache hits the length is already included.
534 539 compresseddeltalen += e[1]
535 540 r = (clen, compresseddeltalen)
536 541 chaininfocache[rev] = r
537 542 return r
538 543
539 544 def _deltachain(self, rev, stoprev=None):
540 545 """Obtain the delta chain for a revision.
541 546
542 547 ``stoprev`` specifies a revision to stop at. If not specified, we
543 548 stop at the base of the chain.
544 549
545 550 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
546 551 revs in ascending order and ``stopped`` is a bool indicating whether
547 552 ``stoprev`` was hit.
548 553 """
549 554 chain = []
550 555
551 556 # Alias to prevent attribute lookup in tight loop.
552 557 index = self.index
553 558 generaldelta = self._generaldelta
554 559
555 560 iterrev = rev
556 561 e = index[iterrev]
557 562 while iterrev != e[3] and iterrev != stoprev:
558 563 chain.append(iterrev)
559 564 if generaldelta:
560 565 iterrev = e[3]
561 566 else:
562 567 iterrev -= 1
563 568 e = index[iterrev]
564 569
565 570 if iterrev == stoprev:
566 571 stopped = True
567 572 else:
568 573 chain.append(iterrev)
569 574 stopped = False
570 575
571 576 chain.reverse()
572 577 return chain, stopped
573 578
574 579 def ancestors(self, revs, stoprev=0, inclusive=False):
575 580 """Generate the ancestors of 'revs' in reverse topological order.
576 581 Does not generate revs lower than stoprev.
577 582
578 583 See the documentation for ancestor.lazyancestors for more details."""
579 584
580 585 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
581 586 inclusive=inclusive)
582 587
583 588 def descendants(self, revs):
584 589 """Generate the descendants of 'revs' in revision order.
585 590
586 591 Yield a sequence of revision numbers starting with a child of
587 592 some rev in revs, i.e., each revision is *not* considered a
588 593 descendant of itself. Results are ordered by revision number (a
589 594 topological sort)."""
590 595 first = min(revs)
591 596 if first == nullrev:
592 597 for i in self:
593 598 yield i
594 599 return
595 600
596 601 seen = set(revs)
597 602 for i in self.revs(start=first + 1):
598 603 for x in self.parentrevs(i):
599 604 if x != nullrev and x in seen:
600 605 seen.add(i)
601 606 yield i
602 607 break
603 608
604 609 def findcommonmissing(self, common=None, heads=None):
605 610 """Return a tuple of the ancestors of common and the ancestors of heads
606 611 that are not ancestors of common. In revset terminology, we return the
607 612 tuple:
608 613
609 614 ::common, (::heads) - (::common)
610 615
611 616 The list is sorted by revision number, meaning it is
612 617 topologically sorted.
613 618
614 619 'heads' and 'common' are both lists of node IDs. If heads is
615 620 not supplied, uses all of the revlog's heads. If common is not
616 621 supplied, uses nullid."""
617 622 if common is None:
618 623 common = [nullid]
619 624 if heads is None:
620 625 heads = self.heads()
621 626
622 627 common = [self.rev(n) for n in common]
623 628 heads = [self.rev(n) for n in heads]
624 629
625 630 # we want the ancestors, but inclusive
626 631 class lazyset(object):
627 632 def __init__(self, lazyvalues):
628 633 self.addedvalues = set()
629 634 self.lazyvalues = lazyvalues
630 635
631 636 def __contains__(self, value):
632 637 return value in self.addedvalues or value in self.lazyvalues
633 638
634 639 def __iter__(self):
635 640 added = self.addedvalues
636 641 for r in added:
637 642 yield r
638 643 for r in self.lazyvalues:
639 644 if not r in added:
640 645 yield r
641 646
642 647 def add(self, value):
643 648 self.addedvalues.add(value)
644 649
645 650 def update(self, values):
646 651 self.addedvalues.update(values)
647 652
648 653 has = lazyset(self.ancestors(common))
649 654 has.add(nullrev)
650 655 has.update(common)
651 656
652 657 # take all ancestors from heads that aren't in has
653 658 missing = set()
654 659 visit = collections.deque(r for r in heads if r not in has)
655 660 while visit:
656 661 r = visit.popleft()
657 662 if r in missing:
658 663 continue
659 664 else:
660 665 missing.add(r)
661 666 for p in self.parentrevs(r):
662 667 if p not in has:
663 668 visit.append(p)
664 669 missing = list(missing)
665 670 missing.sort()
666 671 return has, [self.node(miss) for miss in missing]
667 672
668 673 def incrementalmissingrevs(self, common=None):
669 674 """Return an object that can be used to incrementally compute the
670 675 revision numbers of the ancestors of arbitrary sets that are not
671 676 ancestors of common. This is an ancestor.incrementalmissingancestors
672 677 object.
673 678
674 679 'common' is a list of revision numbers. If common is not supplied, uses
675 680 nullrev.
676 681 """
677 682 if common is None:
678 683 common = [nullrev]
679 684
680 685 return ancestor.incrementalmissingancestors(self.parentrevs, common)
681 686
682 687 def findmissingrevs(self, common=None, heads=None):
683 688 """Return the revision numbers of the ancestors of heads that
684 689 are not ancestors of common.
685 690
686 691 More specifically, return a list of revision numbers corresponding to
687 692 nodes N such that every N satisfies the following constraints:
688 693
689 694 1. N is an ancestor of some node in 'heads'
690 695 2. N is not an ancestor of any node in 'common'
691 696
692 697 The list is sorted by revision number, meaning it is
693 698 topologically sorted.
694 699
695 700 'heads' and 'common' are both lists of revision numbers. If heads is
696 701 not supplied, uses all of the revlog's heads. If common is not
697 702 supplied, uses nullid."""
698 703 if common is None:
699 704 common = [nullrev]
700 705 if heads is None:
701 706 heads = self.headrevs()
702 707
703 708 inc = self.incrementalmissingrevs(common=common)
704 709 return inc.missingancestors(heads)
705 710
706 711 def findmissing(self, common=None, heads=None):
707 712 """Return the ancestors of heads that are not ancestors of common.
708 713
709 714 More specifically, return a list of nodes N such that every N
710 715 satisfies the following constraints:
711 716
712 717 1. N is an ancestor of some node in 'heads'
713 718 2. N is not an ancestor of any node in 'common'
714 719
715 720 The list is sorted by revision number, meaning it is
716 721 topologically sorted.
717 722
718 723 'heads' and 'common' are both lists of node IDs. If heads is
719 724 not supplied, uses all of the revlog's heads. If common is not
720 725 supplied, uses nullid."""
721 726 if common is None:
722 727 common = [nullid]
723 728 if heads is None:
724 729 heads = self.heads()
725 730
726 731 common = [self.rev(n) for n in common]
727 732 heads = [self.rev(n) for n in heads]
728 733
729 734 inc = self.incrementalmissingrevs(common=common)
730 735 return [self.node(r) for r in inc.missingancestors(heads)]
731 736
732 737 def nodesbetween(self, roots=None, heads=None):
733 738 """Return a topological path from 'roots' to 'heads'.
734 739
735 740 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
736 741 topologically sorted list of all nodes N that satisfy both of
737 742 these constraints:
738 743
739 744 1. N is a descendant of some node in 'roots'
740 745 2. N is an ancestor of some node in 'heads'
741 746
742 747 Every node is considered to be both a descendant and an ancestor
743 748 of itself, so every reachable node in 'roots' and 'heads' will be
744 749 included in 'nodes'.
745 750
746 751 'outroots' is the list of reachable nodes in 'roots', i.e., the
747 752 subset of 'roots' that is returned in 'nodes'. Likewise,
748 753 'outheads' is the subset of 'heads' that is also in 'nodes'.
749 754
750 755 'roots' and 'heads' are both lists of node IDs. If 'roots' is
751 756 unspecified, uses nullid as the only root. If 'heads' is
752 757 unspecified, uses list of all of the revlog's heads."""
753 758 nonodes = ([], [], [])
754 759 if roots is not None:
755 760 roots = list(roots)
756 761 if not roots:
757 762 return nonodes
758 763 lowestrev = min([self.rev(n) for n in roots])
759 764 else:
760 765 roots = [nullid] # Everybody's a descendant of nullid
761 766 lowestrev = nullrev
762 767 if (lowestrev == nullrev) and (heads is None):
763 768 # We want _all_ the nodes!
764 769 return ([self.node(r) for r in self], [nullid], list(self.heads()))
765 770 if heads is None:
766 771 # All nodes are ancestors, so the latest ancestor is the last
767 772 # node.
768 773 highestrev = len(self) - 1
769 774 # Set ancestors to None to signal that every node is an ancestor.
770 775 ancestors = None
771 776 # Set heads to an empty dictionary for later discovery of heads
772 777 heads = {}
773 778 else:
774 779 heads = list(heads)
775 780 if not heads:
776 781 return nonodes
777 782 ancestors = set()
778 783 # Turn heads into a dictionary so we can remove 'fake' heads.
779 784 # Also, later we will be using it to filter out the heads we can't
780 785 # find from roots.
781 786 heads = dict.fromkeys(heads, False)
782 787 # Start at the top and keep marking parents until we're done.
783 788 nodestotag = set(heads)
784 789 # Remember where the top was so we can use it as a limit later.
785 790 highestrev = max([self.rev(n) for n in nodestotag])
786 791 while nodestotag:
787 792 # grab a node to tag
788 793 n = nodestotag.pop()
789 794 # Never tag nullid
790 795 if n == nullid:
791 796 continue
792 797 # A node's revision number represents its place in a
793 798 # topologically sorted list of nodes.
794 799 r = self.rev(n)
795 800 if r >= lowestrev:
796 801 if n not in ancestors:
797 802 # If we are possibly a descendant of one of the roots
798 803 # and we haven't already been marked as an ancestor
799 804 ancestors.add(n) # Mark as ancestor
800 805 # Add non-nullid parents to list of nodes to tag.
801 806 nodestotag.update([p for p in self.parents(n) if
802 807 p != nullid])
803 808 elif n in heads: # We've seen it before, is it a fake head?
804 809 # So it is, real heads should not be the ancestors of
805 810 # any other heads.
806 811 heads.pop(n)
807 812 if not ancestors:
808 813 return nonodes
809 814 # Now that we have our set of ancestors, we want to remove any
810 815 # roots that are not ancestors.
811 816
812 817 # If one of the roots was nullid, everything is included anyway.
813 818 if lowestrev > nullrev:
814 819 # But, since we weren't, let's recompute the lowest rev to not
815 820 # include roots that aren't ancestors.
816 821
817 822 # Filter out roots that aren't ancestors of heads
818 823 roots = [root for root in roots if root in ancestors]
819 824 # Recompute the lowest revision
820 825 if roots:
821 826 lowestrev = min([self.rev(root) for root in roots])
822 827 else:
823 828 # No more roots? Return empty list
824 829 return nonodes
825 830 else:
826 831 # We are descending from nullid, and don't need to care about
827 832 # any other roots.
828 833 lowestrev = nullrev
829 834 roots = [nullid]
830 835 # Transform our roots list into a set.
831 836 descendants = set(roots)
832 837 # Also, keep the original roots so we can filter out roots that aren't
833 838 # 'real' roots (i.e. are descended from other roots).
834 839 roots = descendants.copy()
835 840 # Our topologically sorted list of output nodes.
836 841 orderedout = []
837 842 # Don't start at nullid since we don't want nullid in our output list,
838 843 # and if nullid shows up in descendants, empty parents will look like
839 844 # they're descendants.
840 845 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
841 846 n = self.node(r)
842 847 isdescendant = False
843 848 if lowestrev == nullrev: # Everybody is a descendant of nullid
844 849 isdescendant = True
845 850 elif n in descendants:
846 851 # n is already a descendant
847 852 isdescendant = True
848 853 # This check only needs to be done here because all the roots
849 854 # will start being marked is descendants before the loop.
850 855 if n in roots:
851 856 # If n was a root, check if it's a 'real' root.
852 857 p = tuple(self.parents(n))
853 858 # If any of its parents are descendants, it's not a root.
854 859 if (p[0] in descendants) or (p[1] in descendants):
855 860 roots.remove(n)
856 861 else:
857 862 p = tuple(self.parents(n))
858 863 # A node is a descendant if either of its parents are
859 864 # descendants. (We seeded the dependents list with the roots
860 865 # up there, remember?)
861 866 if (p[0] in descendants) or (p[1] in descendants):
862 867 descendants.add(n)
863 868 isdescendant = True
864 869 if isdescendant and ((ancestors is None) or (n in ancestors)):
865 870 # Only include nodes that are both descendants and ancestors.
866 871 orderedout.append(n)
867 872 if (ancestors is not None) and (n in heads):
868 873 # We're trying to figure out which heads are reachable
869 874 # from roots.
870 875 # Mark this head as having been reached
871 876 heads[n] = True
872 877 elif ancestors is None:
873 878 # Otherwise, we're trying to discover the heads.
874 879 # Assume this is a head because if it isn't, the next step
875 880 # will eventually remove it.
876 881 heads[n] = True
877 882 # But, obviously its parents aren't.
878 883 for p in self.parents(n):
879 884 heads.pop(p, None)
880 885 heads = [head for head, flag in heads.iteritems() if flag]
881 886 roots = list(roots)
882 887 assert orderedout
883 888 assert roots
884 889 assert heads
885 890 return (orderedout, roots, heads)
886 891
887 892 def headrevs(self):
888 893 try:
889 894 return self.index.headrevs()
890 895 except AttributeError:
891 896 return self._headrevs()
892 897
893 898 def computephases(self, roots):
894 899 return self.index.computephasesmapsets(roots)
895 900
896 901 def _headrevs(self):
897 902 count = len(self)
898 903 if not count:
899 904 return [nullrev]
900 905 # we won't iter over filtered rev so nobody is a head at start
901 906 ishead = [0] * (count + 1)
902 907 index = self.index
903 908 for r in self:
904 909 ishead[r] = 1 # I may be an head
905 910 e = index[r]
906 911 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
907 912 return [r for r, val in enumerate(ishead) if val]
908 913
909 914 def heads(self, start=None, stop=None):
910 915 """return the list of all nodes that have no children
911 916
912 917 if start is specified, only heads that are descendants of
913 918 start will be returned
914 919 if stop is specified, it will consider all the revs from stop
915 920 as if they had no children
916 921 """
917 922 if start is None and stop is None:
918 923 if not len(self):
919 924 return [nullid]
920 925 return [self.node(r) for r in self.headrevs()]
921 926
922 927 if start is None:
923 928 start = nullid
924 929 if stop is None:
925 930 stop = []
926 931 stoprevs = set([self.rev(n) for n in stop])
927 932 startrev = self.rev(start)
928 933 reachable = {startrev}
929 934 heads = {startrev}
930 935
931 936 parentrevs = self.parentrevs
932 937 for r in self.revs(start=startrev + 1):
933 938 for p in parentrevs(r):
934 939 if p in reachable:
935 940 if r not in stoprevs:
936 941 reachable.add(r)
937 942 heads.add(r)
938 943 if p in heads and p not in stoprevs:
939 944 heads.remove(p)
940 945
941 946 return [self.node(r) for r in heads]
942 947
943 948 def children(self, node):
944 949 """find the children of a given node"""
945 950 c = []
946 951 p = self.rev(node)
947 952 for r in self.revs(start=p + 1):
948 953 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
949 954 if prevs:
950 955 for pr in prevs:
951 956 if pr == p:
952 957 c.append(self.node(r))
953 958 elif p == nullrev:
954 959 c.append(self.node(r))
955 960 return c
956 961
957 962 def descendant(self, start, end):
958 963 if start == nullrev:
959 964 return True
960 965 for i in self.descendants([start]):
961 966 if i == end:
962 967 return True
963 968 elif i > end:
964 969 break
965 970 return False
966 971
967 972 def commonancestorsheads(self, a, b):
968 973 """calculate all the heads of the common ancestors of nodes a and b"""
969 974 a, b = self.rev(a), self.rev(b)
970 975 try:
971 976 ancs = self.index.commonancestorsheads(a, b)
972 977 except (AttributeError, OverflowError): # C implementation failed
973 978 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
974 979 return pycompat.maplist(self.node, ancs)
975 980
976 981 def isancestor(self, a, b):
977 982 """return True if node a is an ancestor of node b
978 983
979 984 The implementation of this is trivial but the use of
980 985 commonancestorsheads is not."""
981 986 return a in self.commonancestorsheads(a, b)
982 987
983 988 def ancestor(self, a, b):
984 989 """calculate the "best" common ancestor of nodes a and b"""
985 990
986 991 a, b = self.rev(a), self.rev(b)
987 992 try:
988 993 ancs = self.index.ancestors(a, b)
989 994 except (AttributeError, OverflowError):
990 995 ancs = ancestor.ancestors(self.parentrevs, a, b)
991 996 if ancs:
992 997 # choose a consistent winner when there's a tie
993 998 return min(map(self.node, ancs))
994 999 return nullid
995 1000
996 1001 def _match(self, id):
997 1002 if isinstance(id, int):
998 1003 # rev
999 1004 return self.node(id)
1000 1005 if len(id) == 20:
1001 1006 # possibly a binary node
1002 1007 # odds of a binary node being all hex in ASCII are 1 in 10**25
1003 1008 try:
1004 1009 node = id
1005 1010 self.rev(node) # quick search the index
1006 1011 return node
1007 1012 except LookupError:
1008 1013 pass # may be partial hex id
1009 1014 try:
1010 1015 # str(rev)
1011 1016 rev = int(id)
1012 1017 if str(rev) != id:
1013 1018 raise ValueError
1014 1019 if rev < 0:
1015 1020 rev = len(self) + rev
1016 1021 if rev < 0 or rev >= len(self):
1017 1022 raise ValueError
1018 1023 return self.node(rev)
1019 1024 except (ValueError, OverflowError):
1020 1025 pass
1021 1026 if len(id) == 40:
1022 1027 try:
1023 1028 # a full hex nodeid?
1024 1029 node = bin(id)
1025 1030 self.rev(node)
1026 1031 return node
1027 1032 except (TypeError, LookupError):
1028 1033 pass
1029 1034
1030 1035 def _partialmatch(self, id):
1031 1036 try:
1032 1037 partial = self.index.partialmatch(id)
1033 1038 if partial and self.hasnode(partial):
1034 1039 return partial
1035 1040 return None
1036 1041 except RevlogError:
1037 1042 # parsers.c radix tree lookup gave multiple matches
1038 1043 # fast path: for unfiltered changelog, radix tree is accurate
1039 1044 if not getattr(self, 'filteredrevs', None):
1040 1045 raise LookupError(id, self.indexfile,
1041 1046 _('ambiguous identifier'))
1042 1047 # fall through to slow path that filters hidden revisions
1043 1048 except (AttributeError, ValueError):
1044 1049 # we are pure python, or key was too short to search radix tree
1045 1050 pass
1046 1051
1047 1052 if id in self._pcache:
1048 1053 return self._pcache[id]
1049 1054
1050 1055 if len(id) < 40:
1051 1056 try:
1052 1057 # hex(node)[:...]
1053 1058 l = len(id) // 2 # grab an even number of digits
1054 1059 prefix = bin(id[:l * 2])
1055 1060 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1056 1061 nl = [n for n in nl if hex(n).startswith(id) and
1057 1062 self.hasnode(n)]
1058 1063 if len(nl) > 0:
1059 1064 if len(nl) == 1:
1060 1065 self._pcache[id] = nl[0]
1061 1066 return nl[0]
1062 1067 raise LookupError(id, self.indexfile,
1063 1068 _('ambiguous identifier'))
1064 1069 return None
1065 1070 except TypeError:
1066 1071 pass
1067 1072
1068 1073 def lookup(self, id):
1069 1074 """locate a node based on:
1070 1075 - revision number or str(revision number)
1071 1076 - nodeid or subset of hex nodeid
1072 1077 """
1073 1078 n = self._match(id)
1074 1079 if n is not None:
1075 1080 return n
1076 1081 n = self._partialmatch(id)
1077 1082 if n:
1078 1083 return n
1079 1084
1080 1085 raise LookupError(id, self.indexfile, _('no match found'))
1081 1086
1082 1087 def cmp(self, node, text):
1083 1088 """compare text with a given file revision
1084 1089
1085 1090 returns True if text is different than what is stored.
1086 1091 """
1087 1092 p1, p2 = self.parents(node)
1088 1093 return hash(text, p1, p2) != node
1089 1094
1090 1095 def _cachesegment(self, offset, data):
1091 1096 """Add a segment to the revlog cache.
1092 1097
1093 1098 Accepts an absolute offset and the data that is at that location.
1094 1099 """
1095 1100 o, d = self._chunkcache
1096 1101 # try to add to existing cache
1097 1102 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1098 1103 self._chunkcache = o, d + data
1099 1104 else:
1100 1105 self._chunkcache = offset, data
1101 1106
1102 1107 def _readsegment(self, offset, length, df=None):
1103 1108 """Load a segment of raw data from the revlog.
1104 1109
1105 1110 Accepts an absolute offset, length to read, and an optional existing
1106 1111 file handle to read from.
1107 1112
1108 1113 If an existing file handle is passed, it will be seeked and the
1109 1114 original seek position will NOT be restored.
1110 1115
1111 1116 Returns a str or buffer of raw byte data.
1112 1117 """
1113 1118 if df is not None:
1114 1119 closehandle = False
1115 1120 else:
1116 1121 if self._inline:
1117 1122 df = self.opener(self.indexfile)
1118 1123 else:
1119 1124 df = self.opener(self.datafile)
1120 1125 closehandle = True
1121 1126
1122 1127 # Cache data both forward and backward around the requested
1123 1128 # data, in a fixed size window. This helps speed up operations
1124 1129 # involving reading the revlog backwards.
1125 1130 cachesize = self._chunkcachesize
1126 1131 realoffset = offset & ~(cachesize - 1)
1127 1132 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1128 1133 - realoffset)
1129 1134 df.seek(realoffset)
1130 1135 d = df.read(reallength)
1131 1136 if closehandle:
1132 1137 df.close()
1133 1138 self._cachesegment(realoffset, d)
1134 1139 if offset != realoffset or reallength != length:
1135 1140 return util.buffer(d, offset - realoffset, length)
1136 1141 return d
1137 1142
1138 1143 def _getsegment(self, offset, length, df=None):
1139 1144 """Obtain a segment of raw data from the revlog.
1140 1145
1141 1146 Accepts an absolute offset, length of bytes to obtain, and an
1142 1147 optional file handle to the already-opened revlog. If the file
1143 1148 handle is used, it's original seek position will not be preserved.
1144 1149
1145 1150 Requests for data may be returned from a cache.
1146 1151
1147 1152 Returns a str or a buffer instance of raw byte data.
1148 1153 """
1149 1154 o, d = self._chunkcache
1150 1155 l = len(d)
1151 1156
1152 1157 # is it in the cache?
1153 1158 cachestart = offset - o
1154 1159 cacheend = cachestart + length
1155 1160 if cachestart >= 0 and cacheend <= l:
1156 1161 if cachestart == 0 and cacheend == l:
1157 1162 return d # avoid a copy
1158 1163 return util.buffer(d, cachestart, cacheend - cachestart)
1159 1164
1160 1165 return self._readsegment(offset, length, df=df)
1161 1166
1162 1167 def _getsegmentforrevs(self, startrev, endrev, df=None):
1163 1168 """Obtain a segment of raw data corresponding to a range of revisions.
1164 1169
1165 1170 Accepts the start and end revisions and an optional already-open
1166 1171 file handle to be used for reading. If the file handle is read, its
1167 1172 seek position will not be preserved.
1168 1173
1169 1174 Requests for data may be satisfied by a cache.
1170 1175
1171 1176 Returns a 2-tuple of (offset, data) for the requested range of
1172 1177 revisions. Offset is the integer offset from the beginning of the
1173 1178 revlog and data is a str or buffer of the raw byte data.
1174 1179
1175 1180 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1176 1181 to determine where each revision's data begins and ends.
1177 1182 """
1178 1183 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1179 1184 # (functions are expensive).
1180 1185 index = self.index
1181 1186 istart = index[startrev]
1182 1187 start = int(istart[0] >> 16)
1183 1188 if startrev == endrev:
1184 1189 end = start + istart[1]
1185 1190 else:
1186 1191 iend = index[endrev]
1187 1192 end = int(iend[0] >> 16) + iend[1]
1188 1193
1189 1194 if self._inline:
1190 1195 start += (startrev + 1) * self._io.size
1191 1196 end += (endrev + 1) * self._io.size
1192 1197 length = end - start
1193 1198
1194 1199 return start, self._getsegment(start, length, df=df)
1195 1200
1196 1201 def _chunk(self, rev, df=None):
1197 1202 """Obtain a single decompressed chunk for a revision.
1198 1203
1199 1204 Accepts an integer revision and an optional already-open file handle
1200 1205 to be used for reading. If used, the seek position of the file will not
1201 1206 be preserved.
1202 1207
1203 1208 Returns a str holding uncompressed data for the requested revision.
1204 1209 """
1205 1210 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1206 1211
1207 1212 def _chunks(self, revs, df=None):
1208 1213 """Obtain decompressed chunks for the specified revisions.
1209 1214
1210 1215 Accepts an iterable of numeric revisions that are assumed to be in
1211 1216 ascending order. Also accepts an optional already-open file handle
1212 1217 to be used for reading. If used, the seek position of the file will
1213 1218 not be preserved.
1214 1219
1215 1220 This function is similar to calling ``self._chunk()`` multiple times,
1216 1221 but is faster.
1217 1222
1218 1223 Returns a list with decompressed data for each requested revision.
1219 1224 """
1220 1225 if not revs:
1221 1226 return []
1222 1227 start = self.start
1223 1228 length = self.length
1224 1229 inline = self._inline
1225 1230 iosize = self._io.size
1226 1231 buffer = util.buffer
1227 1232
1228 1233 l = []
1229 1234 ladd = l.append
1230 1235
1231 1236 try:
1232 1237 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1233 1238 except OverflowError:
1234 1239 # issue4215 - we can't cache a run of chunks greater than
1235 1240 # 2G on Windows
1236 1241 return [self._chunk(rev, df=df) for rev in revs]
1237 1242
1238 1243 decomp = self.decompress
1239 1244 for rev in revs:
1240 1245 chunkstart = start(rev)
1241 1246 if inline:
1242 1247 chunkstart += (rev + 1) * iosize
1243 1248 chunklength = length(rev)
1244 1249 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1245 1250
1246 1251 return l
1247 1252
1248 1253 def _chunkclear(self):
1249 1254 """Clear the raw chunk cache."""
1250 1255 self._chunkcache = (0, '')
1251 1256
1252 1257 def deltaparent(self, rev):
1253 1258 """return deltaparent of the given revision"""
1254 1259 base = self.index[rev][3]
1255 1260 if base == rev:
1256 1261 return nullrev
1257 1262 elif self._generaldelta:
1258 1263 return base
1259 1264 else:
1260 1265 return rev - 1
1261 1266
1262 1267 def revdiff(self, rev1, rev2):
1263 1268 """return or calculate a delta between two revisions
1264 1269
1265 1270 The delta calculated is in binary form and is intended to be written to
1266 1271 revlog data directly. So this function needs raw revision data.
1267 1272 """
1268 1273 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1269 1274 return bytes(self._chunk(rev2))
1270 1275
1271 1276 return mdiff.textdiff(self.revision(rev1, raw=True),
1272 1277 self.revision(rev2, raw=True))
1273 1278
1274 1279 def revision(self, nodeorrev, _df=None, raw=False):
1275 1280 """return an uncompressed revision of a given node or revision
1276 1281 number.
1277 1282
1278 1283 _df - an existing file handle to read from. (internal-only)
1279 1284 raw - an optional argument specifying if the revision data is to be
1280 1285 treated as raw data when applying flag transforms. 'raw' should be set
1281 1286 to True when generating changegroups or in debug commands.
1282 1287 """
1283 1288 if isinstance(nodeorrev, int):
1284 1289 rev = nodeorrev
1285 1290 node = self.node(rev)
1286 1291 else:
1287 1292 node = nodeorrev
1288 1293 rev = None
1289 1294
1290 1295 cachedrev = None
1291 1296 flags = None
1292 1297 rawtext = None
1293 1298 if node == nullid:
1294 1299 return ""
1295 1300 if self._cache:
1296 1301 if self._cache[0] == node:
1297 1302 # _cache only stores rawtext
1298 1303 if raw:
1299 1304 return self._cache[2]
1300 1305 # duplicated, but good for perf
1301 1306 if rev is None:
1302 1307 rev = self.rev(node)
1303 1308 if flags is None:
1304 1309 flags = self.flags(rev)
1305 1310 # no extra flags set, no flag processor runs, text = rawtext
1306 1311 if flags == REVIDX_DEFAULT_FLAGS:
1307 1312 return self._cache[2]
1308 1313 # rawtext is reusable. need to run flag processor
1309 1314 rawtext = self._cache[2]
1310 1315
1311 1316 cachedrev = self._cache[1]
1312 1317
1313 1318 # look up what we need to read
1314 1319 if rawtext is None:
1315 1320 if rev is None:
1316 1321 rev = self.rev(node)
1317 1322
1318 1323 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1319 1324 if stopped:
1320 1325 rawtext = self._cache[2]
1321 1326
1322 1327 # drop cache to save memory
1323 1328 self._cache = None
1324 1329
1325 1330 bins = self._chunks(chain, df=_df)
1326 1331 if rawtext is None:
1327 1332 rawtext = bytes(bins[0])
1328 1333 bins = bins[1:]
1329 1334
1330 1335 rawtext = mdiff.patches(rawtext, bins)
1331 1336 self._cache = (node, rev, rawtext)
1332 1337
1333 1338 if flags is None:
1334 1339 if rev is None:
1335 1340 rev = self.rev(node)
1336 1341 flags = self.flags(rev)
1337 1342
1338 1343 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1339 1344 if validatehash:
1340 1345 self.checkhash(text, node, rev=rev)
1341 1346
1342 1347 return text
1343 1348
1344 1349 def hash(self, text, p1, p2):
1345 1350 """Compute a node hash.
1346 1351
1347 1352 Available as a function so that subclasses can replace the hash
1348 1353 as needed.
1349 1354 """
1350 1355 return hash(text, p1, p2)
1351 1356
1352 1357 def _processflags(self, text, flags, operation, raw=False):
1353 1358 """Inspect revision data flags and applies transforms defined by
1354 1359 registered flag processors.
1355 1360
1356 1361 ``text`` - the revision data to process
1357 1362 ``flags`` - the revision flags
1358 1363 ``operation`` - the operation being performed (read or write)
1359 1364 ``raw`` - an optional argument describing if the raw transform should be
1360 1365 applied.
1361 1366
1362 1367 This method processes the flags in the order (or reverse order if
1363 1368 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1364 1369 flag processors registered for present flags. The order of flags defined
1365 1370 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1366 1371
1367 1372 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1368 1373 processed text and ``validatehash`` is a bool indicating whether the
1369 1374 returned text should be checked for hash integrity.
1370 1375
1371 1376 Note: If the ``raw`` argument is set, it has precedence over the
1372 1377 operation and will only update the value of ``validatehash``.
1373 1378 """
1374 1379 # fast path: no flag processors will run
1375 1380 if flags == 0:
1376 1381 return text, True
1377 1382 if not operation in ('read', 'write'):
1378 1383 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1379 1384 # Check all flags are known.
1380 1385 if flags & ~REVIDX_KNOWN_FLAGS:
1381 1386 raise RevlogError(_("incompatible revision flag '%#x'") %
1382 1387 (flags & ~REVIDX_KNOWN_FLAGS))
1383 1388 validatehash = True
1384 1389 # Depending on the operation (read or write), the order might be
1385 1390 # reversed due to non-commutative transforms.
1386 1391 orderedflags = REVIDX_FLAGS_ORDER
1387 1392 if operation == 'write':
1388 1393 orderedflags = reversed(orderedflags)
1389 1394
1390 1395 for flag in orderedflags:
1391 1396 # If a flagprocessor has been registered for a known flag, apply the
1392 1397 # related operation transform and update result tuple.
1393 1398 if flag & flags:
1394 1399 vhash = True
1395 1400
1396 1401 if flag not in _flagprocessors:
1397 1402 message = _("missing processor for flag '%#x'") % (flag)
1398 1403 raise RevlogError(message)
1399 1404
1400 1405 processor = _flagprocessors[flag]
1401 1406 if processor is not None:
1402 1407 readtransform, writetransform, rawtransform = processor
1403 1408
1404 1409 if raw:
1405 1410 vhash = rawtransform(self, text)
1406 1411 elif operation == 'read':
1407 1412 text, vhash = readtransform(self, text)
1408 1413 else: # write operation
1409 1414 text, vhash = writetransform(self, text)
1410 1415 validatehash = validatehash and vhash
1411 1416
1412 1417 return text, validatehash
1413 1418
1414 1419 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1415 1420 """Check node hash integrity.
1416 1421
1417 1422 Available as a function so that subclasses can extend hash mismatch
1418 1423 behaviors as needed.
1419 1424 """
1420 1425 if p1 is None and p2 is None:
1421 1426 p1, p2 = self.parents(node)
1422 1427 if node != self.hash(text, p1, p2):
1423 1428 revornode = rev
1424 1429 if revornode is None:
1425 1430 revornode = templatefilters.short(hex(node))
1426 1431 raise RevlogError(_("integrity check failed on %s:%s")
1427 1432 % (self.indexfile, revornode))
1428 1433
1429 1434 def checkinlinesize(self, tr, fp=None):
1430 1435 """Check if the revlog is too big for inline and convert if so.
1431 1436
1432 1437 This should be called after revisions are added to the revlog. If the
1433 1438 revlog has grown too large to be an inline revlog, it will convert it
1434 1439 to use multiple index and data files.
1435 1440 """
1436 1441 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1437 1442 return
1438 1443
1439 1444 trinfo = tr.find(self.indexfile)
1440 1445 if trinfo is None:
1441 1446 raise RevlogError(_("%s not found in the transaction")
1442 1447 % self.indexfile)
1443 1448
1444 1449 trindex = trinfo[2]
1445 1450 if trindex is not None:
1446 1451 dataoff = self.start(trindex)
1447 1452 else:
1448 1453 # revlog was stripped at start of transaction, use all leftover data
1449 1454 trindex = len(self) - 1
1450 1455 dataoff = self.end(-2)
1451 1456
1452 1457 tr.add(self.datafile, dataoff)
1453 1458
1454 1459 if fp:
1455 1460 fp.flush()
1456 1461 fp.close()
1457 1462
1458 1463 df = self.opener(self.datafile, 'w')
1459 1464 try:
1460 1465 for r in self:
1461 1466 df.write(self._getsegmentforrevs(r, r)[1])
1462 1467 finally:
1463 1468 df.close()
1464 1469
1465 1470 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1466 1471 checkambig=self._checkambig)
1467 1472 self.version &= ~FLAG_INLINE_DATA
1468 1473 self._inline = False
1469 1474 for i in self:
1470 1475 e = self._io.packentry(self.index[i], self.node, self.version, i)
1471 1476 fp.write(e)
1472 1477
1473 1478 # if we don't call close, the temp file will never replace the
1474 1479 # real index
1475 1480 fp.close()
1476 1481
1477 1482 tr.replace(self.indexfile, trindex * self._io.size)
1478 1483 self._chunkclear()
1479 1484
1480 1485 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1481 1486 node=None, flags=REVIDX_DEFAULT_FLAGS):
1482 1487 """add a revision to the log
1483 1488
1484 1489 text - the revision data to add
1485 1490 transaction - the transaction object used for rollback
1486 1491 link - the linkrev data to add
1487 1492 p1, p2 - the parent nodeids of the revision
1488 1493 cachedelta - an optional precomputed delta
1489 1494 node - nodeid of revision; typically node is not specified, and it is
1490 1495 computed by default as hash(text, p1, p2), however subclasses might
1491 1496 use different hashing method (and override checkhash() in such case)
1492 1497 flags - the known flags to set on the revision
1493 1498 """
1494 1499 if link == nullrev:
1495 1500 raise RevlogError(_("attempted to add linkrev -1 to %s")
1496 1501 % self.indexfile)
1497 1502
1498 1503 if flags:
1499 1504 node = node or self.hash(text, p1, p2)
1500 1505
1501 1506 rawtext, validatehash = self._processflags(text, flags, 'write')
1502 1507
1503 1508 # If the flag processor modifies the revision data, ignore any provided
1504 1509 # cachedelta.
1505 1510 if rawtext != text:
1506 1511 cachedelta = None
1507 1512
1508 1513 if len(rawtext) > _maxentrysize:
1509 1514 raise RevlogError(
1510 1515 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1511 1516 % (self.indexfile, len(rawtext)))
1512 1517
1513 1518 node = node or self.hash(rawtext, p1, p2)
1514 1519 if node in self.nodemap:
1515 1520 return node
1516 1521
1517 1522 if validatehash:
1518 1523 self.checkhash(rawtext, node, p1=p1, p2=p2)
1519 1524
1520 1525 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1521 1526 flags, cachedelta=cachedelta)
1522 1527
1523 1528 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1524 1529 cachedelta=None):
1525 1530 """add a raw revision with known flags, node and parents
1526 1531 useful when reusing a revision not stored in this revlog (ex: received
1527 1532 over wire, or read from an external bundle).
1528 1533 """
1529 1534 dfh = None
1530 1535 if not self._inline:
1531 1536 dfh = self.opener(self.datafile, "a+")
1532 1537 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1533 1538 try:
1534 1539 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1535 1540 flags, cachedelta, ifh, dfh)
1536 1541 finally:
1537 1542 if dfh:
1538 1543 dfh.close()
1539 1544 ifh.close()
1540 1545
1541 1546 def compress(self, data):
1542 1547 """Generate a possibly-compressed representation of data."""
1543 1548 if not data:
1544 1549 return '', data
1545 1550
1546 1551 compressed = self._compressor.compress(data)
1547 1552
1548 1553 if compressed:
1549 1554 # The revlog compressor added the header in the returned data.
1550 1555 return '', compressed
1551 1556
1552 1557 if data[0:1] == '\0':
1553 1558 return '', data
1554 1559 return 'u', data
1555 1560
1556 1561 def decompress(self, data):
1557 1562 """Decompress a revlog chunk.
1558 1563
1559 1564 The chunk is expected to begin with a header identifying the
1560 1565 format type so it can be routed to an appropriate decompressor.
1561 1566 """
1562 1567 if not data:
1563 1568 return data
1564 1569
1565 1570 # Revlogs are read much more frequently than they are written and many
1566 1571 # chunks only take microseconds to decompress, so performance is
1567 1572 # important here.
1568 1573 #
1569 1574 # We can make a few assumptions about revlogs:
1570 1575 #
1571 1576 # 1) the majority of chunks will be compressed (as opposed to inline
1572 1577 # raw data).
1573 1578 # 2) decompressing *any* data will likely by at least 10x slower than
1574 1579 # returning raw inline data.
1575 1580 # 3) we want to prioritize common and officially supported compression
1576 1581 # engines
1577 1582 #
1578 1583 # It follows that we want to optimize for "decompress compressed data
1579 1584 # when encoded with common and officially supported compression engines"
1580 1585 # case over "raw data" and "data encoded by less common or non-official
1581 1586 # compression engines." That is why we have the inline lookup first
1582 1587 # followed by the compengines lookup.
1583 1588 #
1584 1589 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1585 1590 # compressed chunks. And this matters for changelog and manifest reads.
1586 1591 t = data[0:1]
1587 1592
1588 1593 if t == 'x':
1589 1594 try:
1590 1595 return _zlibdecompress(data)
1591 1596 except zlib.error as e:
1592 1597 raise RevlogError(_('revlog decompress error: %s') % str(e))
1593 1598 # '\0' is more common than 'u' so it goes first.
1594 1599 elif t == '\0':
1595 1600 return data
1596 1601 elif t == 'u':
1597 1602 return util.buffer(data, 1)
1598 1603
1599 1604 try:
1600 1605 compressor = self._decompressors[t]
1601 1606 except KeyError:
1602 1607 try:
1603 1608 engine = util.compengines.forrevlogheader(t)
1604 1609 compressor = engine.revlogcompressor()
1605 1610 self._decompressors[t] = compressor
1606 1611 except KeyError:
1607 1612 raise RevlogError(_('unknown compression type %r') % t)
1608 1613
1609 1614 return compressor.decompress(data)
1610 1615
1611 1616 def _isgooddelta(self, d, textlen):
1612 1617 """Returns True if the given delta is good. Good means that it is within
1613 1618 the disk span, disk size, and chain length bounds that we know to be
1614 1619 performant."""
1615 1620 if d is None:
1616 1621 return False
1617 1622
1618 1623 # - 'dist' is the distance from the base revision -- bounding it limits
1619 1624 # the amount of I/O we need to do.
1620 1625 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1621 1626 # to apply -- bounding it limits the amount of CPU we consume.
1622 1627 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1623 1628 if (dist > textlen * 4 or l > textlen or
1624 1629 compresseddeltalen > textlen * 2 or
1625 1630 (self._maxchainlen and chainlen > self._maxchainlen)):
1626 1631 return False
1627 1632
1628 1633 return True
1629 1634
1630 1635 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1631 1636 cachedelta, ifh, dfh, alwayscache=False):
1632 1637 """internal function to add revisions to the log
1633 1638
1634 1639 see addrevision for argument descriptions.
1635 1640
1636 1641 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1637 1642
1638 1643 invariants:
1639 1644 - rawtext is optional (can be None); if not set, cachedelta must be set.
1640 1645 if both are set, they must correspond to each other.
1641 1646 """
1642 1647 btext = [rawtext]
1643 1648 def buildtext():
1644 1649 if btext[0] is not None:
1645 1650 return btext[0]
1646 1651 baserev = cachedelta[0]
1647 1652 delta = cachedelta[1]
1648 1653 # special case deltas which replace entire base; no need to decode
1649 1654 # base revision. this neatly avoids censored bases, which throw when
1650 1655 # they're decoded.
1651 1656 hlen = struct.calcsize(">lll")
1652 1657 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1653 1658 len(delta) - hlen):
1654 1659 btext[0] = delta[hlen:]
1655 1660 else:
1656 1661 if self._inline:
1657 1662 fh = ifh
1658 1663 else:
1659 1664 fh = dfh
1660 1665 basetext = self.revision(baserev, _df=fh, raw=True)
1661 1666 btext[0] = mdiff.patch(basetext, delta)
1662 1667
1663 1668 try:
1664 1669 res = self._processflags(btext[0], flags, 'read', raw=True)
1665 1670 btext[0], validatehash = res
1666 1671 if validatehash:
1667 1672 self.checkhash(btext[0], node, p1=p1, p2=p2)
1668 1673 if flags & REVIDX_ISCENSORED:
1669 1674 raise RevlogError(_('node %s is not censored') % node)
1670 1675 except CensoredNodeError:
1671 1676 # must pass the censored index flag to add censored revisions
1672 1677 if not flags & REVIDX_ISCENSORED:
1673 1678 raise
1674 1679 return btext[0]
1675 1680
1676 1681 def builddelta(rev):
1677 1682 # can we use the cached delta?
1678 1683 if cachedelta and cachedelta[0] == rev:
1679 1684 delta = cachedelta[1]
1680 1685 else:
1681 1686 t = buildtext()
1682 1687 if self.iscensored(rev):
1683 1688 # deltas based on a censored revision must replace the
1684 1689 # full content in one patch, so delta works everywhere
1685 1690 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1686 1691 delta = header + t
1687 1692 else:
1688 1693 if self._inline:
1689 1694 fh = ifh
1690 1695 else:
1691 1696 fh = dfh
1692 1697 ptext = self.revision(rev, _df=fh, raw=True)
1693 1698 delta = mdiff.textdiff(ptext, t)
1694 1699 header, data = self.compress(delta)
1695 1700 deltalen = len(header) + len(data)
1696 1701 chainbase = self.chainbase(rev)
1697 1702 dist = deltalen + offset - self.start(chainbase)
1698 1703 if self._generaldelta:
1699 1704 base = rev
1700 1705 else:
1701 1706 base = chainbase
1702 1707 chainlen, compresseddeltalen = self._chaininfo(rev)
1703 1708 chainlen += 1
1704 1709 compresseddeltalen += deltalen
1705 1710 return (dist, deltalen, (header, data), base,
1706 1711 chainbase, chainlen, compresseddeltalen)
1707 1712
1708 1713 curr = len(self)
1709 1714 prev = curr - 1
1710 1715 offset = self.end(prev)
1711 1716 delta = None
1712 1717 p1r, p2r = self.rev(p1), self.rev(p2)
1713 1718
1714 1719 # full versions are inserted when the needed deltas
1715 1720 # become comparable to the uncompressed text
1716 1721 if rawtext is None:
1717 1722 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1718 1723 cachedelta[1])
1719 1724 else:
1720 1725 textlen = len(rawtext)
1721 1726
1722 1727 # should we try to build a delta?
1723 1728 if prev != nullrev and self.storedeltachains:
1724 1729 tested = set()
1725 1730 # This condition is true most of the time when processing
1726 1731 # changegroup data into a generaldelta repo. The only time it
1727 1732 # isn't true is if this is the first revision in a delta chain
1728 1733 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1729 1734 if cachedelta and self._generaldelta and self._lazydeltabase:
1730 1735 # Assume what we received from the server is a good choice
1731 1736 # build delta will reuse the cache
1732 1737 candidatedelta = builddelta(cachedelta[0])
1733 1738 tested.add(cachedelta[0])
1734 1739 if self._isgooddelta(candidatedelta, textlen):
1735 1740 delta = candidatedelta
1736 1741 if delta is None and self._generaldelta:
1737 1742 # exclude already lazy tested base if any
1738 1743 parents = [p for p in (p1r, p2r)
1739 1744 if p != nullrev and p not in tested]
1740 1745 if parents and not self._aggressivemergedeltas:
1741 1746 # Pick whichever parent is closer to us (to minimize the
1742 1747 # chance of having to build a fulltext).
1743 1748 parents = [max(parents)]
1744 1749 tested.update(parents)
1745 1750 pdeltas = []
1746 1751 for p in parents:
1747 1752 pd = builddelta(p)
1748 1753 if self._isgooddelta(pd, textlen):
1749 1754 pdeltas.append(pd)
1750 1755 if pdeltas:
1751 1756 delta = min(pdeltas, key=lambda x: x[1])
1752 1757 if delta is None and prev not in tested:
1753 1758 # other approach failed try against prev to hopefully save us a
1754 1759 # fulltext.
1755 1760 candidatedelta = builddelta(prev)
1756 1761 if self._isgooddelta(candidatedelta, textlen):
1757 1762 delta = candidatedelta
1758 1763 if delta is not None:
1759 1764 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1760 1765 else:
1761 1766 rawtext = buildtext()
1762 1767 data = self.compress(rawtext)
1763 1768 l = len(data[1]) + len(data[0])
1764 1769 base = chainbase = curr
1765 1770
1766 1771 e = (offset_type(offset, flags), l, textlen,
1767 1772 base, link, p1r, p2r, node)
1768 1773 self.index.insert(-1, e)
1769 1774 self.nodemap[node] = curr
1770 1775
1771 1776 entry = self._io.packentry(e, self.node, self.version, curr)
1772 1777 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1773 1778
1774 1779 if alwayscache and rawtext is None:
1775 1780 rawtext = buildtext()
1776 1781
1777 1782 if type(rawtext) == str: # only accept immutable objects
1778 1783 self._cache = (node, curr, rawtext)
1779 1784 self._chainbasecache[curr] = chainbase
1780 1785 return node
1781 1786
1782 1787 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1783 1788 # Files opened in a+ mode have inconsistent behavior on various
1784 1789 # platforms. Windows requires that a file positioning call be made
1785 1790 # when the file handle transitions between reads and writes. See
1786 1791 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1787 1792 # platforms, Python or the platform itself can be buggy. Some versions
1788 1793 # of Solaris have been observed to not append at the end of the file
1789 1794 # if the file was seeked to before the end. See issue4943 for more.
1790 1795 #
1791 1796 # We work around this issue by inserting a seek() before writing.
1792 1797 # Note: This is likely not necessary on Python 3.
1793 1798 ifh.seek(0, os.SEEK_END)
1794 1799 if dfh:
1795 1800 dfh.seek(0, os.SEEK_END)
1796 1801
1797 1802 curr = len(self) - 1
1798 1803 if not self._inline:
1799 1804 transaction.add(self.datafile, offset)
1800 1805 transaction.add(self.indexfile, curr * len(entry))
1801 1806 if data[0]:
1802 1807 dfh.write(data[0])
1803 1808 dfh.write(data[1])
1804 1809 ifh.write(entry)
1805 1810 else:
1806 1811 offset += curr * self._io.size
1807 1812 transaction.add(self.indexfile, offset, curr)
1808 1813 ifh.write(entry)
1809 1814 ifh.write(data[0])
1810 1815 ifh.write(data[1])
1811 1816 self.checkinlinesize(transaction, ifh)
1812 1817
1813 1818 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1814 1819 """
1815 1820 add a delta group
1816 1821
1817 1822 given a set of deltas, add them to the revision log. the
1818 1823 first delta is against its parent, which should be in our
1819 1824 log, the rest are against the previous delta.
1820 1825
1821 1826 If ``addrevisioncb`` is defined, it will be called with arguments of
1822 1827 this revlog and the node that was added.
1823 1828 """
1824 1829
1825 1830 # track the base of the current delta log
1826 1831 content = []
1827 1832 node = None
1828 1833
1829 1834 r = len(self)
1830 1835 end = 0
1831 1836 if r:
1832 1837 end = self.end(r - 1)
1833 1838 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1834 1839 isize = r * self._io.size
1835 1840 if self._inline:
1836 1841 transaction.add(self.indexfile, end + isize, r)
1837 1842 dfh = None
1838 1843 else:
1839 1844 transaction.add(self.indexfile, isize, r)
1840 1845 transaction.add(self.datafile, end)
1841 1846 dfh = self.opener(self.datafile, "a+")
1842 1847 def flush():
1843 1848 if dfh:
1844 1849 dfh.flush()
1845 1850 ifh.flush()
1846 1851 try:
1847 1852 # loop through our set of deltas
1848 1853 chain = None
1849 1854 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1850 1855 node = chunkdata['node']
1851 1856 p1 = chunkdata['p1']
1852 1857 p2 = chunkdata['p2']
1853 1858 cs = chunkdata['cs']
1854 1859 deltabase = chunkdata['deltabase']
1855 1860 delta = chunkdata['delta']
1856 1861 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1857 1862
1858 1863 content.append(node)
1859 1864
1860 1865 link = linkmapper(cs)
1861 1866 if node in self.nodemap:
1862 1867 # this can happen if two branches make the same change
1863 1868 chain = node
1864 1869 continue
1865 1870
1866 1871 for p in (p1, p2):
1867 1872 if p not in self.nodemap:
1868 1873 raise LookupError(p, self.indexfile,
1869 1874 _('unknown parent'))
1870 1875
1871 1876 if deltabase not in self.nodemap:
1872 1877 raise LookupError(deltabase, self.indexfile,
1873 1878 _('unknown delta base'))
1874 1879
1875 1880 baserev = self.rev(deltabase)
1876 1881
1877 1882 if baserev != nullrev and self.iscensored(baserev):
1878 1883 # if base is censored, delta must be full replacement in a
1879 1884 # single patch operation
1880 1885 hlen = struct.calcsize(">lll")
1881 1886 oldlen = self.rawsize(baserev)
1882 1887 newlen = len(delta) - hlen
1883 1888 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1884 1889 raise error.CensoredBaseError(self.indexfile,
1885 1890 self.node(baserev))
1886 1891
1887 1892 if not flags and self._peek_iscensored(baserev, delta, flush):
1888 1893 flags |= REVIDX_ISCENSORED
1889 1894
1890 1895 # We assume consumers of addrevisioncb will want to retrieve
1891 1896 # the added revision, which will require a call to
1892 1897 # revision(). revision() will fast path if there is a cache
1893 1898 # hit. So, we tell _addrevision() to always cache in this case.
1894 1899 # We're only using addgroup() in the context of changegroup
1895 1900 # generation so the revision data can always be handled as raw
1896 1901 # by the flagprocessor.
1897 1902 chain = self._addrevision(node, None, transaction, link,
1898 1903 p1, p2, flags, (baserev, delta),
1899 1904 ifh, dfh,
1900 1905 alwayscache=bool(addrevisioncb))
1901 1906
1902 1907 if addrevisioncb:
1903 1908 addrevisioncb(self, chain)
1904 1909
1905 1910 if not dfh and not self._inline:
1906 1911 # addrevision switched from inline to conventional
1907 1912 # reopen the index
1908 1913 ifh.close()
1909 1914 dfh = self.opener(self.datafile, "a+")
1910 1915 ifh = self.opener(self.indexfile, "a+",
1911 1916 checkambig=self._checkambig)
1912 1917 finally:
1913 1918 if dfh:
1914 1919 dfh.close()
1915 1920 ifh.close()
1916 1921
1917 1922 return content
1918 1923
1919 1924 def iscensored(self, rev):
1920 1925 """Check if a file revision is censored."""
1921 1926 return False
1922 1927
1923 1928 def _peek_iscensored(self, baserev, delta, flush):
1924 1929 """Quickly check if a delta produces a censored revision."""
1925 1930 return False
1926 1931
1927 1932 def getstrippoint(self, minlink):
1928 1933 """find the minimum rev that must be stripped to strip the linkrev
1929 1934
1930 1935 Returns a tuple containing the minimum rev and a set of all revs that
1931 1936 have linkrevs that will be broken by this strip.
1932 1937 """
1933 1938 brokenrevs = set()
1934 1939 strippoint = len(self)
1935 1940
1936 1941 heads = {}
1937 1942 futurelargelinkrevs = set()
1938 1943 for head in self.headrevs():
1939 1944 headlinkrev = self.linkrev(head)
1940 1945 heads[head] = headlinkrev
1941 1946 if headlinkrev >= minlink:
1942 1947 futurelargelinkrevs.add(headlinkrev)
1943 1948
1944 1949 # This algorithm involves walking down the rev graph, starting at the
1945 1950 # heads. Since the revs are topologically sorted according to linkrev,
1946 1951 # once all head linkrevs are below the minlink, we know there are
1947 1952 # no more revs that could have a linkrev greater than minlink.
1948 1953 # So we can stop walking.
1949 1954 while futurelargelinkrevs:
1950 1955 strippoint -= 1
1951 1956 linkrev = heads.pop(strippoint)
1952 1957
1953 1958 if linkrev < minlink:
1954 1959 brokenrevs.add(strippoint)
1955 1960 else:
1956 1961 futurelargelinkrevs.remove(linkrev)
1957 1962
1958 1963 for p in self.parentrevs(strippoint):
1959 1964 if p != nullrev:
1960 1965 plinkrev = self.linkrev(p)
1961 1966 heads[p] = plinkrev
1962 1967 if plinkrev >= minlink:
1963 1968 futurelargelinkrevs.add(plinkrev)
1964 1969
1965 1970 return strippoint, brokenrevs
1966 1971
1967 1972 def strip(self, minlink, transaction):
1968 1973 """truncate the revlog on the first revision with a linkrev >= minlink
1969 1974
1970 1975 This function is called when we're stripping revision minlink and
1971 1976 its descendants from the repository.
1972 1977
1973 1978 We have to remove all revisions with linkrev >= minlink, because
1974 1979 the equivalent changelog revisions will be renumbered after the
1975 1980 strip.
1976 1981
1977 1982 So we truncate the revlog on the first of these revisions, and
1978 1983 trust that the caller has saved the revisions that shouldn't be
1979 1984 removed and that it'll re-add them after this truncation.
1980 1985 """
1981 1986 if len(self) == 0:
1982 1987 return
1983 1988
1984 1989 rev, _ = self.getstrippoint(minlink)
1985 1990 if rev == len(self):
1986 1991 return
1987 1992
1988 1993 # first truncate the files on disk
1989 1994 end = self.start(rev)
1990 1995 if not self._inline:
1991 1996 transaction.add(self.datafile, end)
1992 1997 end = rev * self._io.size
1993 1998 else:
1994 1999 end += rev * self._io.size
1995 2000
1996 2001 transaction.add(self.indexfile, end)
1997 2002
1998 2003 # then reset internal state in memory to forget those revisions
1999 2004 self._cache = None
2000 2005 self._chaininfocache = {}
2001 2006 self._chunkclear()
2002 2007 for x in xrange(rev, len(self)):
2003 2008 del self.nodemap[self.node(x)]
2004 2009
2005 2010 del self.index[rev:-1]
2006 2011
2007 2012 def checksize(self):
2008 2013 expected = 0
2009 2014 if len(self):
2010 2015 expected = max(0, self.end(len(self) - 1))
2011 2016
2012 2017 try:
2013 2018 f = self.opener(self.datafile)
2014 2019 f.seek(0, 2)
2015 2020 actual = f.tell()
2016 2021 f.close()
2017 2022 dd = actual - expected
2018 2023 except IOError as inst:
2019 2024 if inst.errno != errno.ENOENT:
2020 2025 raise
2021 2026 dd = 0
2022 2027
2023 2028 try:
2024 2029 f = self.opener(self.indexfile)
2025 2030 f.seek(0, 2)
2026 2031 actual = f.tell()
2027 2032 f.close()
2028 2033 s = self._io.size
2029 2034 i = max(0, actual // s)
2030 2035 di = actual - (i * s)
2031 2036 if self._inline:
2032 2037 databytes = 0
2033 2038 for r in self:
2034 2039 databytes += max(0, self.length(r))
2035 2040 dd = 0
2036 2041 di = actual - len(self) * s - databytes
2037 2042 except IOError as inst:
2038 2043 if inst.errno != errno.ENOENT:
2039 2044 raise
2040 2045 di = 0
2041 2046
2042 2047 return (dd, di)
2043 2048
2044 2049 def files(self):
2045 2050 res = [self.indexfile]
2046 2051 if not self._inline:
2047 2052 res.append(self.datafile)
2048 2053 return res
2049 2054
2050 2055 DELTAREUSEALWAYS = 'always'
2051 2056 DELTAREUSESAMEREVS = 'samerevs'
2052 2057 DELTAREUSENEVER = 'never'
2053 2058
2054 2059 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2055 2060
2056 2061 def clone(self, tr, destrevlog, addrevisioncb=None,
2057 2062 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2058 2063 """Copy this revlog to another, possibly with format changes.
2059 2064
2060 2065 The destination revlog will contain the same revisions and nodes.
2061 2066 However, it may not be bit-for-bit identical due to e.g. delta encoding
2062 2067 differences.
2063 2068
2064 2069 The ``deltareuse`` argument control how deltas from the existing revlog
2065 2070 are preserved in the destination revlog. The argument can have the
2066 2071 following values:
2067 2072
2068 2073 DELTAREUSEALWAYS
2069 2074 Deltas will always be reused (if possible), even if the destination
2070 2075 revlog would not select the same revisions for the delta. This is the
2071 2076 fastest mode of operation.
2072 2077 DELTAREUSESAMEREVS
2073 2078 Deltas will be reused if the destination revlog would pick the same
2074 2079 revisions for the delta. This mode strikes a balance between speed
2075 2080 and optimization.
2076 2081 DELTAREUSENEVER
2077 2082 Deltas will never be reused. This is the slowest mode of execution.
2078 2083 This mode can be used to recompute deltas (e.g. if the diff/delta
2079 2084 algorithm changes).
2080 2085
2081 2086 Delta computation can be slow, so the choice of delta reuse policy can
2082 2087 significantly affect run time.
2083 2088
2084 2089 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2085 2090 two extremes. Deltas will be reused if they are appropriate. But if the
2086 2091 delta could choose a better revision, it will do so. This means if you
2087 2092 are converting a non-generaldelta revlog to a generaldelta revlog,
2088 2093 deltas will be recomputed if the delta's parent isn't a parent of the
2089 2094 revision.
2090 2095
2091 2096 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2092 2097 controls whether to compute deltas against both parents for merges.
2093 2098 By default, the current default is used.
2094 2099 """
2095 2100 if deltareuse not in self.DELTAREUSEALL:
2096 2101 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2097 2102
2098 2103 if len(destrevlog):
2099 2104 raise ValueError(_('destination revlog is not empty'))
2100 2105
2101 2106 if getattr(self, 'filteredrevs', None):
2102 2107 raise ValueError(_('source revlog has filtered revisions'))
2103 2108 if getattr(destrevlog, 'filteredrevs', None):
2104 2109 raise ValueError(_('destination revlog has filtered revisions'))
2105 2110
2106 2111 # lazydeltabase controls whether to reuse a cached delta, if possible.
2107 2112 oldlazydeltabase = destrevlog._lazydeltabase
2108 2113 oldamd = destrevlog._aggressivemergedeltas
2109 2114
2110 2115 try:
2111 2116 if deltareuse == self.DELTAREUSEALWAYS:
2112 2117 destrevlog._lazydeltabase = True
2113 2118 elif deltareuse == self.DELTAREUSESAMEREVS:
2114 2119 destrevlog._lazydeltabase = False
2115 2120
2116 2121 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2117 2122
2118 2123 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2119 2124 self.DELTAREUSESAMEREVS)
2120 2125
2121 2126 index = self.index
2122 2127 for rev in self:
2123 2128 entry = index[rev]
2124 2129
2125 2130 # Some classes override linkrev to take filtered revs into
2126 2131 # account. Use raw entry from index.
2127 2132 flags = entry[0] & 0xffff
2128 2133 linkrev = entry[4]
2129 2134 p1 = index[entry[5]][7]
2130 2135 p2 = index[entry[6]][7]
2131 2136 node = entry[7]
2132 2137
2133 2138 # (Possibly) reuse the delta from the revlog if allowed and
2134 2139 # the revlog chunk is a delta.
2135 2140 cachedelta = None
2136 2141 rawtext = None
2137 2142 if populatecachedelta:
2138 2143 dp = self.deltaparent(rev)
2139 2144 if dp != nullrev:
2140 2145 cachedelta = (dp, str(self._chunk(rev)))
2141 2146
2142 2147 if not cachedelta:
2143 2148 rawtext = self.revision(rev, raw=True)
2144 2149
2145 2150 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2146 2151 checkambig=False)
2147 2152 dfh = None
2148 2153 if not destrevlog._inline:
2149 2154 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2150 2155 try:
2151 2156 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2152 2157 flags, cachedelta, ifh, dfh)
2153 2158 finally:
2154 2159 if dfh:
2155 2160 dfh.close()
2156 2161 ifh.close()
2157 2162
2158 2163 if addrevisioncb:
2159 2164 addrevisioncb(self, rev, node)
2160 2165 finally:
2161 2166 destrevlog._lazydeltabase = oldlazydeltabase
2162 2167 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now