##// END OF EJS Templates
py3: catch binascii.Error raised from binascii.unhexlify...
Pulkit Goyal -
r32969:30d0cb27 default
parent child Browse files
Show More
@@ -1,2192 +1,2193 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 import binascii
16 17 import collections
17 18 import errno
18 19 import hashlib
19 20 import os
20 21 import struct
21 22 import zlib
22 23
23 24 # import stuff from node for others to import from revlog
24 25 from .node import (
25 26 bin,
26 27 hex,
27 28 nullid,
28 29 nullrev,
29 30 wdirhex,
30 31 wdirid,
31 32 wdirrev,
32 33 )
33 34 from .i18n import _
34 35 from . import (
35 36 ancestor,
36 37 error,
37 38 mdiff,
38 39 policy,
39 40 pycompat,
40 41 templatefilters,
41 42 util,
42 43 )
43 44
44 45 parsers = policy.importmod(r'parsers')
45 46
46 47 _pack = struct.pack
47 48 _unpack = struct.unpack
48 49 # Aliased for performance.
49 50 _zlibdecompress = zlib.decompress
50 51
51 52 # revlog header flags
52 53 REVLOGV0 = 0
53 54 REVLOGV1 = 1
54 55 # Dummy value until file format is finalized.
55 56 # Reminder: change the bounds check in revlog.__init__ when this is changed.
56 57 REVLOGV2 = 0xDEAD
57 58 FLAG_INLINE_DATA = (1 << 16)
58 59 FLAG_GENERALDELTA = (1 << 17)
59 60 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
60 61 REVLOG_DEFAULT_FORMAT = REVLOGV1
61 62 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
62 63 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
63 64 REVLOGV2_FLAGS = REVLOGV1_FLAGS
64 65
65 66 # revlog index flags
66 67 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
67 68 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
68 69 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
69 70 REVIDX_DEFAULT_FLAGS = 0
70 71 # stable order in which flags need to be processed and their processors applied
71 72 REVIDX_FLAGS_ORDER = [
72 73 REVIDX_ISCENSORED,
73 74 REVIDX_ELLIPSIS,
74 75 REVIDX_EXTSTORED,
75 76 ]
76 77 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
77 78
78 79 # max size of revlog with inline data
79 80 _maxinline = 131072
80 81 _chunksize = 1048576
81 82
82 83 RevlogError = error.RevlogError
83 84 LookupError = error.LookupError
84 85 CensoredNodeError = error.CensoredNodeError
85 86 ProgrammingError = error.ProgrammingError
86 87
87 88 # Store flag processors (cf. 'addflagprocessor()' to register)
88 89 _flagprocessors = {
89 90 REVIDX_ISCENSORED: None,
90 91 }
91 92
92 93 def addflagprocessor(flag, processor):
93 94 """Register a flag processor on a revision data flag.
94 95
95 96 Invariant:
96 97 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
97 98 - Only one flag processor can be registered on a specific flag.
98 99 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
99 100 following signatures:
100 101 - (read) f(self, rawtext) -> text, bool
101 102 - (write) f(self, text) -> rawtext, bool
102 103 - (raw) f(self, rawtext) -> bool
103 104 "text" is presented to the user. "rawtext" is stored in revlog data, not
104 105 directly visible to the user.
105 106 The boolean returned by these transforms is used to determine whether
106 107 the returned text can be used for hash integrity checking. For example,
107 108 if "write" returns False, then "text" is used to generate hash. If
108 109 "write" returns True, that basically means "rawtext" returned by "write"
109 110 should be used to generate hash. Usually, "write" and "read" return
110 111 different booleans. And "raw" returns a same boolean as "write".
111 112
112 113 Note: The 'raw' transform is used for changegroup generation and in some
113 114 debug commands. In this case the transform only indicates whether the
114 115 contents can be used for hash integrity checks.
115 116 """
116 117 if not flag & REVIDX_KNOWN_FLAGS:
117 118 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
118 119 raise ProgrammingError(msg)
119 120 if flag not in REVIDX_FLAGS_ORDER:
120 121 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
121 122 raise ProgrammingError(msg)
122 123 if flag in _flagprocessors:
123 124 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
124 125 raise error.Abort(msg)
125 126 _flagprocessors[flag] = processor
126 127
127 128 def getoffset(q):
128 129 return int(q >> 16)
129 130
130 131 def gettype(q):
131 132 return int(q & 0xFFFF)
132 133
133 134 def offset_type(offset, type):
134 135 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
135 136 raise ValueError('unknown revlog index flags')
136 137 return int(int(offset) << 16 | type)
137 138
138 139 _nullhash = hashlib.sha1(nullid)
139 140
140 141 def hash(text, p1, p2):
141 142 """generate a hash from the given text and its parent hashes
142 143
143 144 This hash combines both the current file contents and its history
144 145 in a manner that makes it easy to distinguish nodes with the same
145 146 content in the revision graph.
146 147 """
147 148 # As of now, if one of the parent node is null, p2 is null
148 149 if p2 == nullid:
149 150 # deep copy of a hash is faster than creating one
150 151 s = _nullhash.copy()
151 152 s.update(p1)
152 153 else:
153 154 # none of the parent nodes are nullid
154 155 l = [p1, p2]
155 156 l.sort()
156 157 s = hashlib.sha1(l[0])
157 158 s.update(l[1])
158 159 s.update(text)
159 160 return s.digest()
160 161
161 162 # index v0:
162 163 # 4 bytes: offset
163 164 # 4 bytes: compressed length
164 165 # 4 bytes: base rev
165 166 # 4 bytes: link rev
166 167 # 20 bytes: parent 1 nodeid
167 168 # 20 bytes: parent 2 nodeid
168 169 # 20 bytes: nodeid
169 170 indexformatv0 = ">4l20s20s20s"
170 171
171 172 class revlogoldio(object):
172 173 def __init__(self):
173 174 self.size = struct.calcsize(indexformatv0)
174 175
175 176 def parseindex(self, data, inline):
176 177 s = self.size
177 178 index = []
178 179 nodemap = {nullid: nullrev}
179 180 n = off = 0
180 181 l = len(data)
181 182 while off + s <= l:
182 183 cur = data[off:off + s]
183 184 off += s
184 185 e = _unpack(indexformatv0, cur)
185 186 # transform to revlogv1 format
186 187 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
187 188 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
188 189 index.append(e2)
189 190 nodemap[e[6]] = n
190 191 n += 1
191 192
192 193 # add the magic null revision at -1
193 194 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
194 195
195 196 return index, nodemap, None
196 197
197 198 def packentry(self, entry, node, version, rev):
198 199 if gettype(entry[0]):
199 200 raise RevlogError(_('index entry flags need revlog version 1'))
200 201 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
201 202 node(entry[5]), node(entry[6]), entry[7])
202 203 return _pack(indexformatv0, *e2)
203 204
204 205 # index ng:
205 206 # 6 bytes: offset
206 207 # 2 bytes: flags
207 208 # 4 bytes: compressed length
208 209 # 4 bytes: uncompressed length
209 210 # 4 bytes: base rev
210 211 # 4 bytes: link rev
211 212 # 4 bytes: parent 1 rev
212 213 # 4 bytes: parent 2 rev
213 214 # 32 bytes: nodeid
214 215 indexformatng = ">Qiiiiii20s12x"
215 216 versionformat = ">I"
216 217
217 218 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
218 219 # signed integer)
219 220 _maxentrysize = 0x7fffffff
220 221
221 222 class revlogio(object):
222 223 def __init__(self):
223 224 self.size = struct.calcsize(indexformatng)
224 225
225 226 def parseindex(self, data, inline):
226 227 # call the C implementation to parse the index data
227 228 index, cache = parsers.parse_index2(data, inline)
228 229 return index, getattr(index, 'nodemap', None), cache
229 230
230 231 def packentry(self, entry, node, version, rev):
231 232 p = _pack(indexformatng, *entry)
232 233 if rev == 0:
233 234 p = _pack(versionformat, version) + p[4:]
234 235 return p
235 236
236 237 class revlog(object):
237 238 """
238 239 the underlying revision storage object
239 240
240 241 A revlog consists of two parts, an index and the revision data.
241 242
242 243 The index is a file with a fixed record size containing
243 244 information on each revision, including its nodeid (hash), the
244 245 nodeids of its parents, the position and offset of its data within
245 246 the data file, and the revision it's based on. Finally, each entry
246 247 contains a linkrev entry that can serve as a pointer to external
247 248 data.
248 249
249 250 The revision data itself is a linear collection of data chunks.
250 251 Each chunk represents a revision and is usually represented as a
251 252 delta against the previous chunk. To bound lookup time, runs of
252 253 deltas are limited to about 2 times the length of the original
253 254 version data. This makes retrieval of a version proportional to
254 255 its size, or O(1) relative to the number of revisions.
255 256
256 257 Both pieces of the revlog are written to in an append-only
257 258 fashion, which means we never need to rewrite a file to insert or
258 259 remove data, and can use some simple techniques to avoid the need
259 260 for locking while reading.
260 261
261 262 If checkambig, indexfile is opened with checkambig=True at
262 263 writing, to avoid file stat ambiguity.
263 264 """
264 265 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
265 266 """
266 267 create a revlog object
267 268
268 269 opener is a function that abstracts the file opening operation
269 270 and can be used to implement COW semantics or the like.
270 271 """
271 272 self.indexfile = indexfile
272 273 self.datafile = datafile or (indexfile[:-2] + ".d")
273 274 self.opener = opener
274 275 # When True, indexfile is opened with checkambig=True at writing, to
275 276 # avoid file stat ambiguity.
276 277 self._checkambig = checkambig
277 278 # 3-tuple of (node, rev, text) for a raw revision.
278 279 self._cache = None
279 280 # Maps rev to chain base rev.
280 281 self._chainbasecache = util.lrucachedict(100)
281 282 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
282 283 self._chunkcache = (0, '')
283 284 # How much data to read and cache into the raw revlog data cache.
284 285 self._chunkcachesize = 65536
285 286 self._maxchainlen = None
286 287 self._aggressivemergedeltas = False
287 288 self.index = []
288 289 # Mapping of partial identifiers to full nodes.
289 290 self._pcache = {}
290 291 # Mapping of revision integer to full node.
291 292 self._nodecache = {nullid: nullrev}
292 293 self._nodepos = None
293 294 self._compengine = 'zlib'
294 295
295 296 v = REVLOG_DEFAULT_VERSION
296 297 opts = getattr(opener, 'options', None)
297 298 if opts is not None:
298 299 if 'revlogv2' in opts:
299 300 # version 2 revlogs always use generaldelta.
300 301 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
301 302 elif 'revlogv1' in opts:
302 303 if 'generaldelta' in opts:
303 304 v |= FLAG_GENERALDELTA
304 305 else:
305 306 v = 0
306 307 if 'chunkcachesize' in opts:
307 308 self._chunkcachesize = opts['chunkcachesize']
308 309 if 'maxchainlen' in opts:
309 310 self._maxchainlen = opts['maxchainlen']
310 311 if 'aggressivemergedeltas' in opts:
311 312 self._aggressivemergedeltas = opts['aggressivemergedeltas']
312 313 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
313 314 if 'compengine' in opts:
314 315 self._compengine = opts['compengine']
315 316
316 317 if self._chunkcachesize <= 0:
317 318 raise RevlogError(_('revlog chunk cache size %r is not greater '
318 319 'than 0') % self._chunkcachesize)
319 320 elif self._chunkcachesize & (self._chunkcachesize - 1):
320 321 raise RevlogError(_('revlog chunk cache size %r is not a power '
321 322 'of 2') % self._chunkcachesize)
322 323
323 324 indexdata = ''
324 325 self._initempty = True
325 326 try:
326 327 f = self.opener(self.indexfile)
327 328 indexdata = f.read()
328 329 f.close()
329 330 if len(indexdata) > 0:
330 331 v = struct.unpack(versionformat, indexdata[:4])[0]
331 332 self._initempty = False
332 333 except IOError as inst:
333 334 if inst.errno != errno.ENOENT:
334 335 raise
335 336
336 337 self.version = v
337 338 self._inline = v & FLAG_INLINE_DATA
338 339 self._generaldelta = v & FLAG_GENERALDELTA
339 340 flags = v & ~0xFFFF
340 341 fmt = v & 0xFFFF
341 342 if fmt == REVLOGV0:
342 343 if flags:
343 344 raise RevlogError(_('unknown flags (%#04x) in version %d '
344 345 'revlog %s') %
345 346 (flags >> 16, fmt, self.indexfile))
346 347 elif fmt == REVLOGV1:
347 348 if flags & ~REVLOGV1_FLAGS:
348 349 raise RevlogError(_('unknown flags (%#04x) in version %d '
349 350 'revlog %s') %
350 351 (flags >> 16, fmt, self.indexfile))
351 352 elif fmt == REVLOGV2:
352 353 if flags & ~REVLOGV2_FLAGS:
353 354 raise RevlogError(_('unknown flags (%#04x) in version %d '
354 355 'revlog %s') %
355 356 (flags >> 16, fmt, self.indexfile))
356 357 else:
357 358 raise RevlogError(_('unknown version (%d) in revlog %s') %
358 359 (fmt, self.indexfile))
359 360
360 361 self.storedeltachains = True
361 362
362 363 self._io = revlogio()
363 364 if self.version == REVLOGV0:
364 365 self._io = revlogoldio()
365 366 try:
366 367 d = self._io.parseindex(indexdata, self._inline)
367 368 except (ValueError, IndexError):
368 369 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
369 370 self.index, nodemap, self._chunkcache = d
370 371 if nodemap is not None:
371 372 self.nodemap = self._nodecache = nodemap
372 373 if not self._chunkcache:
373 374 self._chunkclear()
374 375 # revnum -> (chain-length, sum-delta-length)
375 376 self._chaininfocache = {}
376 377 # revlog header -> revlog compressor
377 378 self._decompressors = {}
378 379
379 380 @util.propertycache
380 381 def _compressor(self):
381 382 return util.compengines[self._compengine].revlogcompressor()
382 383
383 384 def tip(self):
384 385 return self.node(len(self.index) - 2)
385 386 def __contains__(self, rev):
386 387 return 0 <= rev < len(self)
387 388 def __len__(self):
388 389 return len(self.index) - 1
389 390 def __iter__(self):
390 391 return iter(xrange(len(self)))
391 392 def revs(self, start=0, stop=None):
392 393 """iterate over all rev in this revlog (from start to stop)"""
393 394 step = 1
394 395 if stop is not None:
395 396 if start > stop:
396 397 step = -1
397 398 stop += step
398 399 else:
399 400 stop = len(self)
400 401 return xrange(start, stop, step)
401 402
402 403 @util.propertycache
403 404 def nodemap(self):
404 405 self.rev(self.node(0))
405 406 return self._nodecache
406 407
407 408 def hasnode(self, node):
408 409 try:
409 410 self.rev(node)
410 411 return True
411 412 except KeyError:
412 413 return False
413 414
414 415 def clearcaches(self):
415 416 self._cache = None
416 417 self._chainbasecache.clear()
417 418 self._chunkcache = (0, '')
418 419 self._pcache = {}
419 420
420 421 try:
421 422 self._nodecache.clearcaches()
422 423 except AttributeError:
423 424 self._nodecache = {nullid: nullrev}
424 425 self._nodepos = None
425 426
426 427 def rev(self, node):
427 428 try:
428 429 return self._nodecache[node]
429 430 except TypeError:
430 431 raise
431 432 except RevlogError:
432 433 # parsers.c radix tree lookup failed
433 434 if node == wdirid:
434 435 raise error.WdirUnsupported
435 436 raise LookupError(node, self.indexfile, _('no node'))
436 437 except KeyError:
437 438 # pure python cache lookup failed
438 439 n = self._nodecache
439 440 i = self.index
440 441 p = self._nodepos
441 442 if p is None:
442 443 p = len(i) - 2
443 444 for r in xrange(p, -1, -1):
444 445 v = i[r][7]
445 446 n[v] = r
446 447 if v == node:
447 448 self._nodepos = r - 1
448 449 return r
449 450 if node == wdirid:
450 451 raise error.WdirUnsupported
451 452 raise LookupError(node, self.indexfile, _('no node'))
452 453
453 454 # Accessors for index entries.
454 455
455 456 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
456 457 # are flags.
457 458 def start(self, rev):
458 459 return int(self.index[rev][0] >> 16)
459 460
460 461 def flags(self, rev):
461 462 return self.index[rev][0] & 0xFFFF
462 463
463 464 def length(self, rev):
464 465 return self.index[rev][1]
465 466
466 467 def rawsize(self, rev):
467 468 """return the length of the uncompressed text for a given revision"""
468 469 l = self.index[rev][2]
469 470 if l >= 0:
470 471 return l
471 472
472 473 t = self.revision(rev, raw=True)
473 474 return len(t)
474 475
475 476 def size(self, rev):
476 477 """length of non-raw text (processed by a "read" flag processor)"""
477 478 # fast path: if no "read" flag processor could change the content,
478 479 # size is rawsize. note: ELLIPSIS is known to not change the content.
479 480 flags = self.flags(rev)
480 481 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
481 482 return self.rawsize(rev)
482 483
483 484 return len(self.revision(rev, raw=False))
484 485
485 486 def chainbase(self, rev):
486 487 base = self._chainbasecache.get(rev)
487 488 if base is not None:
488 489 return base
489 490
490 491 index = self.index
491 492 base = index[rev][3]
492 493 while base != rev:
493 494 rev = base
494 495 base = index[rev][3]
495 496
496 497 self._chainbasecache[rev] = base
497 498 return base
498 499
499 500 def linkrev(self, rev):
500 501 return self.index[rev][4]
501 502
502 503 def parentrevs(self, rev):
503 504 try:
504 505 return self.index[rev][5:7]
505 506 except IndexError:
506 507 if rev == wdirrev:
507 508 raise error.WdirUnsupported
508 509 raise
509 510
510 511 def node(self, rev):
511 512 try:
512 513 return self.index[rev][7]
513 514 except IndexError:
514 515 if rev == wdirrev:
515 516 raise error.WdirUnsupported
516 517 raise
517 518
518 519 # Derived from index values.
519 520
520 521 def end(self, rev):
521 522 return self.start(rev) + self.length(rev)
522 523
523 524 def parents(self, node):
524 525 i = self.index
525 526 d = i[self.rev(node)]
526 527 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
527 528
528 529 def chainlen(self, rev):
529 530 return self._chaininfo(rev)[0]
530 531
531 532 def _chaininfo(self, rev):
532 533 chaininfocache = self._chaininfocache
533 534 if rev in chaininfocache:
534 535 return chaininfocache[rev]
535 536 index = self.index
536 537 generaldelta = self._generaldelta
537 538 iterrev = rev
538 539 e = index[iterrev]
539 540 clen = 0
540 541 compresseddeltalen = 0
541 542 while iterrev != e[3]:
542 543 clen += 1
543 544 compresseddeltalen += e[1]
544 545 if generaldelta:
545 546 iterrev = e[3]
546 547 else:
547 548 iterrev -= 1
548 549 if iterrev in chaininfocache:
549 550 t = chaininfocache[iterrev]
550 551 clen += t[0]
551 552 compresseddeltalen += t[1]
552 553 break
553 554 e = index[iterrev]
554 555 else:
555 556 # Add text length of base since decompressing that also takes
556 557 # work. For cache hits the length is already included.
557 558 compresseddeltalen += e[1]
558 559 r = (clen, compresseddeltalen)
559 560 chaininfocache[rev] = r
560 561 return r
561 562
562 563 def _deltachain(self, rev, stoprev=None):
563 564 """Obtain the delta chain for a revision.
564 565
565 566 ``stoprev`` specifies a revision to stop at. If not specified, we
566 567 stop at the base of the chain.
567 568
568 569 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
569 570 revs in ascending order and ``stopped`` is a bool indicating whether
570 571 ``stoprev`` was hit.
571 572 """
572 573 chain = []
573 574
574 575 # Alias to prevent attribute lookup in tight loop.
575 576 index = self.index
576 577 generaldelta = self._generaldelta
577 578
578 579 iterrev = rev
579 580 e = index[iterrev]
580 581 while iterrev != e[3] and iterrev != stoprev:
581 582 chain.append(iterrev)
582 583 if generaldelta:
583 584 iterrev = e[3]
584 585 else:
585 586 iterrev -= 1
586 587 e = index[iterrev]
587 588
588 589 if iterrev == stoprev:
589 590 stopped = True
590 591 else:
591 592 chain.append(iterrev)
592 593 stopped = False
593 594
594 595 chain.reverse()
595 596 return chain, stopped
596 597
597 598 def ancestors(self, revs, stoprev=0, inclusive=False):
598 599 """Generate the ancestors of 'revs' in reverse topological order.
599 600 Does not generate revs lower than stoprev.
600 601
601 602 See the documentation for ancestor.lazyancestors for more details."""
602 603
603 604 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
604 605 inclusive=inclusive)
605 606
606 607 def descendants(self, revs):
607 608 """Generate the descendants of 'revs' in revision order.
608 609
609 610 Yield a sequence of revision numbers starting with a child of
610 611 some rev in revs, i.e., each revision is *not* considered a
611 612 descendant of itself. Results are ordered by revision number (a
612 613 topological sort)."""
613 614 first = min(revs)
614 615 if first == nullrev:
615 616 for i in self:
616 617 yield i
617 618 return
618 619
619 620 seen = set(revs)
620 621 for i in self.revs(start=first + 1):
621 622 for x in self.parentrevs(i):
622 623 if x != nullrev and x in seen:
623 624 seen.add(i)
624 625 yield i
625 626 break
626 627
627 628 def findcommonmissing(self, common=None, heads=None):
628 629 """Return a tuple of the ancestors of common and the ancestors of heads
629 630 that are not ancestors of common. In revset terminology, we return the
630 631 tuple:
631 632
632 633 ::common, (::heads) - (::common)
633 634
634 635 The list is sorted by revision number, meaning it is
635 636 topologically sorted.
636 637
637 638 'heads' and 'common' are both lists of node IDs. If heads is
638 639 not supplied, uses all of the revlog's heads. If common is not
639 640 supplied, uses nullid."""
640 641 if common is None:
641 642 common = [nullid]
642 643 if heads is None:
643 644 heads = self.heads()
644 645
645 646 common = [self.rev(n) for n in common]
646 647 heads = [self.rev(n) for n in heads]
647 648
648 649 # we want the ancestors, but inclusive
649 650 class lazyset(object):
650 651 def __init__(self, lazyvalues):
651 652 self.addedvalues = set()
652 653 self.lazyvalues = lazyvalues
653 654
654 655 def __contains__(self, value):
655 656 return value in self.addedvalues or value in self.lazyvalues
656 657
657 658 def __iter__(self):
658 659 added = self.addedvalues
659 660 for r in added:
660 661 yield r
661 662 for r in self.lazyvalues:
662 663 if not r in added:
663 664 yield r
664 665
665 666 def add(self, value):
666 667 self.addedvalues.add(value)
667 668
668 669 def update(self, values):
669 670 self.addedvalues.update(values)
670 671
671 672 has = lazyset(self.ancestors(common))
672 673 has.add(nullrev)
673 674 has.update(common)
674 675
675 676 # take all ancestors from heads that aren't in has
676 677 missing = set()
677 678 visit = collections.deque(r for r in heads if r not in has)
678 679 while visit:
679 680 r = visit.popleft()
680 681 if r in missing:
681 682 continue
682 683 else:
683 684 missing.add(r)
684 685 for p in self.parentrevs(r):
685 686 if p not in has:
686 687 visit.append(p)
687 688 missing = list(missing)
688 689 missing.sort()
689 690 return has, [self.node(miss) for miss in missing]
690 691
691 692 def incrementalmissingrevs(self, common=None):
692 693 """Return an object that can be used to incrementally compute the
693 694 revision numbers of the ancestors of arbitrary sets that are not
694 695 ancestors of common. This is an ancestor.incrementalmissingancestors
695 696 object.
696 697
697 698 'common' is a list of revision numbers. If common is not supplied, uses
698 699 nullrev.
699 700 """
700 701 if common is None:
701 702 common = [nullrev]
702 703
703 704 return ancestor.incrementalmissingancestors(self.parentrevs, common)
704 705
705 706 def findmissingrevs(self, common=None, heads=None):
706 707 """Return the revision numbers of the ancestors of heads that
707 708 are not ancestors of common.
708 709
709 710 More specifically, return a list of revision numbers corresponding to
710 711 nodes N such that every N satisfies the following constraints:
711 712
712 713 1. N is an ancestor of some node in 'heads'
713 714 2. N is not an ancestor of any node in 'common'
714 715
715 716 The list is sorted by revision number, meaning it is
716 717 topologically sorted.
717 718
718 719 'heads' and 'common' are both lists of revision numbers. If heads is
719 720 not supplied, uses all of the revlog's heads. If common is not
720 721 supplied, uses nullid."""
721 722 if common is None:
722 723 common = [nullrev]
723 724 if heads is None:
724 725 heads = self.headrevs()
725 726
726 727 inc = self.incrementalmissingrevs(common=common)
727 728 return inc.missingancestors(heads)
728 729
729 730 def findmissing(self, common=None, heads=None):
730 731 """Return the ancestors of heads that are not ancestors of common.
731 732
732 733 More specifically, return a list of nodes N such that every N
733 734 satisfies the following constraints:
734 735
735 736 1. N is an ancestor of some node in 'heads'
736 737 2. N is not an ancestor of any node in 'common'
737 738
738 739 The list is sorted by revision number, meaning it is
739 740 topologically sorted.
740 741
741 742 'heads' and 'common' are both lists of node IDs. If heads is
742 743 not supplied, uses all of the revlog's heads. If common is not
743 744 supplied, uses nullid."""
744 745 if common is None:
745 746 common = [nullid]
746 747 if heads is None:
747 748 heads = self.heads()
748 749
749 750 common = [self.rev(n) for n in common]
750 751 heads = [self.rev(n) for n in heads]
751 752
752 753 inc = self.incrementalmissingrevs(common=common)
753 754 return [self.node(r) for r in inc.missingancestors(heads)]
754 755
755 756 def nodesbetween(self, roots=None, heads=None):
756 757 """Return a topological path from 'roots' to 'heads'.
757 758
758 759 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
759 760 topologically sorted list of all nodes N that satisfy both of
760 761 these constraints:
761 762
762 763 1. N is a descendant of some node in 'roots'
763 764 2. N is an ancestor of some node in 'heads'
764 765
765 766 Every node is considered to be both a descendant and an ancestor
766 767 of itself, so every reachable node in 'roots' and 'heads' will be
767 768 included in 'nodes'.
768 769
769 770 'outroots' is the list of reachable nodes in 'roots', i.e., the
770 771 subset of 'roots' that is returned in 'nodes'. Likewise,
771 772 'outheads' is the subset of 'heads' that is also in 'nodes'.
772 773
773 774 'roots' and 'heads' are both lists of node IDs. If 'roots' is
774 775 unspecified, uses nullid as the only root. If 'heads' is
775 776 unspecified, uses list of all of the revlog's heads."""
776 777 nonodes = ([], [], [])
777 778 if roots is not None:
778 779 roots = list(roots)
779 780 if not roots:
780 781 return nonodes
781 782 lowestrev = min([self.rev(n) for n in roots])
782 783 else:
783 784 roots = [nullid] # Everybody's a descendant of nullid
784 785 lowestrev = nullrev
785 786 if (lowestrev == nullrev) and (heads is None):
786 787 # We want _all_ the nodes!
787 788 return ([self.node(r) for r in self], [nullid], list(self.heads()))
788 789 if heads is None:
789 790 # All nodes are ancestors, so the latest ancestor is the last
790 791 # node.
791 792 highestrev = len(self) - 1
792 793 # Set ancestors to None to signal that every node is an ancestor.
793 794 ancestors = None
794 795 # Set heads to an empty dictionary for later discovery of heads
795 796 heads = {}
796 797 else:
797 798 heads = list(heads)
798 799 if not heads:
799 800 return nonodes
800 801 ancestors = set()
801 802 # Turn heads into a dictionary so we can remove 'fake' heads.
802 803 # Also, later we will be using it to filter out the heads we can't
803 804 # find from roots.
804 805 heads = dict.fromkeys(heads, False)
805 806 # Start at the top and keep marking parents until we're done.
806 807 nodestotag = set(heads)
807 808 # Remember where the top was so we can use it as a limit later.
808 809 highestrev = max([self.rev(n) for n in nodestotag])
809 810 while nodestotag:
810 811 # grab a node to tag
811 812 n = nodestotag.pop()
812 813 # Never tag nullid
813 814 if n == nullid:
814 815 continue
815 816 # A node's revision number represents its place in a
816 817 # topologically sorted list of nodes.
817 818 r = self.rev(n)
818 819 if r >= lowestrev:
819 820 if n not in ancestors:
820 821 # If we are possibly a descendant of one of the roots
821 822 # and we haven't already been marked as an ancestor
822 823 ancestors.add(n) # Mark as ancestor
823 824 # Add non-nullid parents to list of nodes to tag.
824 825 nodestotag.update([p for p in self.parents(n) if
825 826 p != nullid])
826 827 elif n in heads: # We've seen it before, is it a fake head?
827 828 # So it is, real heads should not be the ancestors of
828 829 # any other heads.
829 830 heads.pop(n)
830 831 if not ancestors:
831 832 return nonodes
832 833 # Now that we have our set of ancestors, we want to remove any
833 834 # roots that are not ancestors.
834 835
835 836 # If one of the roots was nullid, everything is included anyway.
836 837 if lowestrev > nullrev:
837 838 # But, since we weren't, let's recompute the lowest rev to not
838 839 # include roots that aren't ancestors.
839 840
840 841 # Filter out roots that aren't ancestors of heads
841 842 roots = [root for root in roots if root in ancestors]
842 843 # Recompute the lowest revision
843 844 if roots:
844 845 lowestrev = min([self.rev(root) for root in roots])
845 846 else:
846 847 # No more roots? Return empty list
847 848 return nonodes
848 849 else:
849 850 # We are descending from nullid, and don't need to care about
850 851 # any other roots.
851 852 lowestrev = nullrev
852 853 roots = [nullid]
853 854 # Transform our roots list into a set.
854 855 descendants = set(roots)
855 856 # Also, keep the original roots so we can filter out roots that aren't
856 857 # 'real' roots (i.e. are descended from other roots).
857 858 roots = descendants.copy()
858 859 # Our topologically sorted list of output nodes.
859 860 orderedout = []
860 861 # Don't start at nullid since we don't want nullid in our output list,
861 862 # and if nullid shows up in descendants, empty parents will look like
862 863 # they're descendants.
863 864 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
864 865 n = self.node(r)
865 866 isdescendant = False
866 867 if lowestrev == nullrev: # Everybody is a descendant of nullid
867 868 isdescendant = True
868 869 elif n in descendants:
869 870 # n is already a descendant
870 871 isdescendant = True
871 872 # This check only needs to be done here because all the roots
872 873 # will start being marked is descendants before the loop.
873 874 if n in roots:
874 875 # If n was a root, check if it's a 'real' root.
875 876 p = tuple(self.parents(n))
876 877 # If any of its parents are descendants, it's not a root.
877 878 if (p[0] in descendants) or (p[1] in descendants):
878 879 roots.remove(n)
879 880 else:
880 881 p = tuple(self.parents(n))
881 882 # A node is a descendant if either of its parents are
882 883 # descendants. (We seeded the dependents list with the roots
883 884 # up there, remember?)
884 885 if (p[0] in descendants) or (p[1] in descendants):
885 886 descendants.add(n)
886 887 isdescendant = True
887 888 if isdescendant and ((ancestors is None) or (n in ancestors)):
888 889 # Only include nodes that are both descendants and ancestors.
889 890 orderedout.append(n)
890 891 if (ancestors is not None) and (n in heads):
891 892 # We're trying to figure out which heads are reachable
892 893 # from roots.
893 894 # Mark this head as having been reached
894 895 heads[n] = True
895 896 elif ancestors is None:
896 897 # Otherwise, we're trying to discover the heads.
897 898 # Assume this is a head because if it isn't, the next step
898 899 # will eventually remove it.
899 900 heads[n] = True
900 901 # But, obviously its parents aren't.
901 902 for p in self.parents(n):
902 903 heads.pop(p, None)
903 904 heads = [head for head, flag in heads.iteritems() if flag]
904 905 roots = list(roots)
905 906 assert orderedout
906 907 assert roots
907 908 assert heads
908 909 return (orderedout, roots, heads)
909 910
910 911 def headrevs(self):
911 912 try:
912 913 return self.index.headrevs()
913 914 except AttributeError:
914 915 return self._headrevs()
915 916
916 917 def computephases(self, roots):
917 918 return self.index.computephasesmapsets(roots)
918 919
919 920 def _headrevs(self):
920 921 count = len(self)
921 922 if not count:
922 923 return [nullrev]
923 924 # we won't iter over filtered rev so nobody is a head at start
924 925 ishead = [0] * (count + 1)
925 926 index = self.index
926 927 for r in self:
927 928 ishead[r] = 1 # I may be an head
928 929 e = index[r]
929 930 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
930 931 return [r for r, val in enumerate(ishead) if val]
931 932
932 933 def heads(self, start=None, stop=None):
933 934 """return the list of all nodes that have no children
934 935
935 936 if start is specified, only heads that are descendants of
936 937 start will be returned
937 938 if stop is specified, it will consider all the revs from stop
938 939 as if they had no children
939 940 """
940 941 if start is None and stop is None:
941 942 if not len(self):
942 943 return [nullid]
943 944 return [self.node(r) for r in self.headrevs()]
944 945
945 946 if start is None:
946 947 start = nullid
947 948 if stop is None:
948 949 stop = []
949 950 stoprevs = set([self.rev(n) for n in stop])
950 951 startrev = self.rev(start)
951 952 reachable = {startrev}
952 953 heads = {startrev}
953 954
954 955 parentrevs = self.parentrevs
955 956 for r in self.revs(start=startrev + 1):
956 957 for p in parentrevs(r):
957 958 if p in reachable:
958 959 if r not in stoprevs:
959 960 reachable.add(r)
960 961 heads.add(r)
961 962 if p in heads and p not in stoprevs:
962 963 heads.remove(p)
963 964
964 965 return [self.node(r) for r in heads]
965 966
966 967 def children(self, node):
967 968 """find the children of a given node"""
968 969 c = []
969 970 p = self.rev(node)
970 971 for r in self.revs(start=p + 1):
971 972 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
972 973 if prevs:
973 974 for pr in prevs:
974 975 if pr == p:
975 976 c.append(self.node(r))
976 977 elif p == nullrev:
977 978 c.append(self.node(r))
978 979 return c
979 980
980 981 def descendant(self, start, end):
981 982 if start == nullrev:
982 983 return True
983 984 for i in self.descendants([start]):
984 985 if i == end:
985 986 return True
986 987 elif i > end:
987 988 break
988 989 return False
989 990
990 991 def commonancestorsheads(self, a, b):
991 992 """calculate all the heads of the common ancestors of nodes a and b"""
992 993 a, b = self.rev(a), self.rev(b)
993 994 try:
994 995 ancs = self.index.commonancestorsheads(a, b)
995 996 except (AttributeError, OverflowError): # C implementation failed
996 997 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
997 998 return pycompat.maplist(self.node, ancs)
998 999
999 1000 def isancestor(self, a, b):
1000 1001 """return True if node a is an ancestor of node b
1001 1002
1002 1003 The implementation of this is trivial but the use of
1003 1004 commonancestorsheads is not."""
1004 1005 return a in self.commonancestorsheads(a, b)
1005 1006
1006 1007 def ancestor(self, a, b):
1007 1008 """calculate the "best" common ancestor of nodes a and b"""
1008 1009
1009 1010 a, b = self.rev(a), self.rev(b)
1010 1011 try:
1011 1012 ancs = self.index.ancestors(a, b)
1012 1013 except (AttributeError, OverflowError):
1013 1014 ancs = ancestor.ancestors(self.parentrevs, a, b)
1014 1015 if ancs:
1015 1016 # choose a consistent winner when there's a tie
1016 1017 return min(map(self.node, ancs))
1017 1018 return nullid
1018 1019
1019 1020 def _match(self, id):
1020 1021 if isinstance(id, int):
1021 1022 # rev
1022 1023 return self.node(id)
1023 1024 if len(id) == 20:
1024 1025 # possibly a binary node
1025 1026 # odds of a binary node being all hex in ASCII are 1 in 10**25
1026 1027 try:
1027 1028 node = id
1028 1029 self.rev(node) # quick search the index
1029 1030 return node
1030 1031 except LookupError:
1031 1032 pass # may be partial hex id
1032 1033 try:
1033 1034 # str(rev)
1034 1035 rev = int(id)
1035 1036 if str(rev) != id:
1036 1037 raise ValueError
1037 1038 if rev < 0:
1038 1039 rev = len(self) + rev
1039 1040 if rev < 0 or rev >= len(self):
1040 1041 raise ValueError
1041 1042 return self.node(rev)
1042 1043 except (ValueError, OverflowError):
1043 1044 pass
1044 1045 if len(id) == 40:
1045 1046 try:
1046 1047 # a full hex nodeid?
1047 1048 node = bin(id)
1048 1049 self.rev(node)
1049 1050 return node
1050 1051 except (TypeError, LookupError):
1051 1052 pass
1052 1053
1053 1054 def _partialmatch(self, id):
1054 1055 maybewdir = wdirhex.startswith(id)
1055 1056 try:
1056 1057 partial = self.index.partialmatch(id)
1057 1058 if partial and self.hasnode(partial):
1058 1059 if maybewdir:
1059 1060 # single 'ff...' match in radix tree, ambiguous with wdir
1060 1061 raise RevlogError
1061 1062 return partial
1062 1063 if maybewdir:
1063 1064 # no 'ff...' match in radix tree, wdir identified
1064 1065 raise error.WdirUnsupported
1065 1066 return None
1066 1067 except RevlogError:
1067 1068 # parsers.c radix tree lookup gave multiple matches
1068 1069 # fast path: for unfiltered changelog, radix tree is accurate
1069 1070 if not getattr(self, 'filteredrevs', None):
1070 1071 raise LookupError(id, self.indexfile,
1071 1072 _('ambiguous identifier'))
1072 1073 # fall through to slow path that filters hidden revisions
1073 1074 except (AttributeError, ValueError):
1074 1075 # we are pure python, or key was too short to search radix tree
1075 1076 pass
1076 1077
1077 1078 if id in self._pcache:
1078 1079 return self._pcache[id]
1079 1080
1080 1081 if len(id) < 40:
1081 1082 try:
1082 1083 # hex(node)[:...]
1083 1084 l = len(id) // 2 # grab an even number of digits
1084 1085 prefix = bin(id[:l * 2])
1085 1086 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1086 1087 nl = [n for n in nl if hex(n).startswith(id) and
1087 1088 self.hasnode(n)]
1088 1089 if len(nl) > 0:
1089 1090 if len(nl) == 1 and not maybewdir:
1090 1091 self._pcache[id] = nl[0]
1091 1092 return nl[0]
1092 1093 raise LookupError(id, self.indexfile,
1093 1094 _('ambiguous identifier'))
1094 1095 if maybewdir:
1095 1096 raise error.WdirUnsupported
1096 1097 return None
1097 except TypeError:
1098 except (TypeError, binascii.Error):
1098 1099 pass
1099 1100
1100 1101 def lookup(self, id):
1101 1102 """locate a node based on:
1102 1103 - revision number or str(revision number)
1103 1104 - nodeid or subset of hex nodeid
1104 1105 """
1105 1106 n = self._match(id)
1106 1107 if n is not None:
1107 1108 return n
1108 1109 n = self._partialmatch(id)
1109 1110 if n:
1110 1111 return n
1111 1112
1112 1113 raise LookupError(id, self.indexfile, _('no match found'))
1113 1114
1114 1115 def cmp(self, node, text):
1115 1116 """compare text with a given file revision
1116 1117
1117 1118 returns True if text is different than what is stored.
1118 1119 """
1119 1120 p1, p2 = self.parents(node)
1120 1121 return hash(text, p1, p2) != node
1121 1122
1122 1123 def _cachesegment(self, offset, data):
1123 1124 """Add a segment to the revlog cache.
1124 1125
1125 1126 Accepts an absolute offset and the data that is at that location.
1126 1127 """
1127 1128 o, d = self._chunkcache
1128 1129 # try to add to existing cache
1129 1130 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1130 1131 self._chunkcache = o, d + data
1131 1132 else:
1132 1133 self._chunkcache = offset, data
1133 1134
1134 1135 def _readsegment(self, offset, length, df=None):
1135 1136 """Load a segment of raw data from the revlog.
1136 1137
1137 1138 Accepts an absolute offset, length to read, and an optional existing
1138 1139 file handle to read from.
1139 1140
1140 1141 If an existing file handle is passed, it will be seeked and the
1141 1142 original seek position will NOT be restored.
1142 1143
1143 1144 Returns a str or buffer of raw byte data.
1144 1145 """
1145 1146 if df is not None:
1146 1147 closehandle = False
1147 1148 else:
1148 1149 if self._inline:
1149 1150 df = self.opener(self.indexfile)
1150 1151 else:
1151 1152 df = self.opener(self.datafile)
1152 1153 closehandle = True
1153 1154
1154 1155 # Cache data both forward and backward around the requested
1155 1156 # data, in a fixed size window. This helps speed up operations
1156 1157 # involving reading the revlog backwards.
1157 1158 cachesize = self._chunkcachesize
1158 1159 realoffset = offset & ~(cachesize - 1)
1159 1160 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1160 1161 - realoffset)
1161 1162 df.seek(realoffset)
1162 1163 d = df.read(reallength)
1163 1164 if closehandle:
1164 1165 df.close()
1165 1166 self._cachesegment(realoffset, d)
1166 1167 if offset != realoffset or reallength != length:
1167 1168 return util.buffer(d, offset - realoffset, length)
1168 1169 return d
1169 1170
1170 1171 def _getsegment(self, offset, length, df=None):
1171 1172 """Obtain a segment of raw data from the revlog.
1172 1173
1173 1174 Accepts an absolute offset, length of bytes to obtain, and an
1174 1175 optional file handle to the already-opened revlog. If the file
1175 1176 handle is used, it's original seek position will not be preserved.
1176 1177
1177 1178 Requests for data may be returned from a cache.
1178 1179
1179 1180 Returns a str or a buffer instance of raw byte data.
1180 1181 """
1181 1182 o, d = self._chunkcache
1182 1183 l = len(d)
1183 1184
1184 1185 # is it in the cache?
1185 1186 cachestart = offset - o
1186 1187 cacheend = cachestart + length
1187 1188 if cachestart >= 0 and cacheend <= l:
1188 1189 if cachestart == 0 and cacheend == l:
1189 1190 return d # avoid a copy
1190 1191 return util.buffer(d, cachestart, cacheend - cachestart)
1191 1192
1192 1193 return self._readsegment(offset, length, df=df)
1193 1194
1194 1195 def _getsegmentforrevs(self, startrev, endrev, df=None):
1195 1196 """Obtain a segment of raw data corresponding to a range of revisions.
1196 1197
1197 1198 Accepts the start and end revisions and an optional already-open
1198 1199 file handle to be used for reading. If the file handle is read, its
1199 1200 seek position will not be preserved.
1200 1201
1201 1202 Requests for data may be satisfied by a cache.
1202 1203
1203 1204 Returns a 2-tuple of (offset, data) for the requested range of
1204 1205 revisions. Offset is the integer offset from the beginning of the
1205 1206 revlog and data is a str or buffer of the raw byte data.
1206 1207
1207 1208 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1208 1209 to determine where each revision's data begins and ends.
1209 1210 """
1210 1211 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1211 1212 # (functions are expensive).
1212 1213 index = self.index
1213 1214 istart = index[startrev]
1214 1215 start = int(istart[0] >> 16)
1215 1216 if startrev == endrev:
1216 1217 end = start + istart[1]
1217 1218 else:
1218 1219 iend = index[endrev]
1219 1220 end = int(iend[0] >> 16) + iend[1]
1220 1221
1221 1222 if self._inline:
1222 1223 start += (startrev + 1) * self._io.size
1223 1224 end += (endrev + 1) * self._io.size
1224 1225 length = end - start
1225 1226
1226 1227 return start, self._getsegment(start, length, df=df)
1227 1228
1228 1229 def _chunk(self, rev, df=None):
1229 1230 """Obtain a single decompressed chunk for a revision.
1230 1231
1231 1232 Accepts an integer revision and an optional already-open file handle
1232 1233 to be used for reading. If used, the seek position of the file will not
1233 1234 be preserved.
1234 1235
1235 1236 Returns a str holding uncompressed data for the requested revision.
1236 1237 """
1237 1238 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1238 1239
1239 1240 def _chunks(self, revs, df=None):
1240 1241 """Obtain decompressed chunks for the specified revisions.
1241 1242
1242 1243 Accepts an iterable of numeric revisions that are assumed to be in
1243 1244 ascending order. Also accepts an optional already-open file handle
1244 1245 to be used for reading. If used, the seek position of the file will
1245 1246 not be preserved.
1246 1247
1247 1248 This function is similar to calling ``self._chunk()`` multiple times,
1248 1249 but is faster.
1249 1250
1250 1251 Returns a list with decompressed data for each requested revision.
1251 1252 """
1252 1253 if not revs:
1253 1254 return []
1254 1255 start = self.start
1255 1256 length = self.length
1256 1257 inline = self._inline
1257 1258 iosize = self._io.size
1258 1259 buffer = util.buffer
1259 1260
1260 1261 l = []
1261 1262 ladd = l.append
1262 1263
1263 1264 try:
1264 1265 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1265 1266 except OverflowError:
1266 1267 # issue4215 - we can't cache a run of chunks greater than
1267 1268 # 2G on Windows
1268 1269 return [self._chunk(rev, df=df) for rev in revs]
1269 1270
1270 1271 decomp = self.decompress
1271 1272 for rev in revs:
1272 1273 chunkstart = start(rev)
1273 1274 if inline:
1274 1275 chunkstart += (rev + 1) * iosize
1275 1276 chunklength = length(rev)
1276 1277 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1277 1278
1278 1279 return l
1279 1280
1280 1281 def _chunkclear(self):
1281 1282 """Clear the raw chunk cache."""
1282 1283 self._chunkcache = (0, '')
1283 1284
1284 1285 def deltaparent(self, rev):
1285 1286 """return deltaparent of the given revision"""
1286 1287 base = self.index[rev][3]
1287 1288 if base == rev:
1288 1289 return nullrev
1289 1290 elif self._generaldelta:
1290 1291 return base
1291 1292 else:
1292 1293 return rev - 1
1293 1294
1294 1295 def revdiff(self, rev1, rev2):
1295 1296 """return or calculate a delta between two revisions
1296 1297
1297 1298 The delta calculated is in binary form and is intended to be written to
1298 1299 revlog data directly. So this function needs raw revision data.
1299 1300 """
1300 1301 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1301 1302 return bytes(self._chunk(rev2))
1302 1303
1303 1304 return mdiff.textdiff(self.revision(rev1, raw=True),
1304 1305 self.revision(rev2, raw=True))
1305 1306
1306 1307 def revision(self, nodeorrev, _df=None, raw=False):
1307 1308 """return an uncompressed revision of a given node or revision
1308 1309 number.
1309 1310
1310 1311 _df - an existing file handle to read from. (internal-only)
1311 1312 raw - an optional argument specifying if the revision data is to be
1312 1313 treated as raw data when applying flag transforms. 'raw' should be set
1313 1314 to True when generating changegroups or in debug commands.
1314 1315 """
1315 1316 if isinstance(nodeorrev, int):
1316 1317 rev = nodeorrev
1317 1318 node = self.node(rev)
1318 1319 else:
1319 1320 node = nodeorrev
1320 1321 rev = None
1321 1322
1322 1323 cachedrev = None
1323 1324 flags = None
1324 1325 rawtext = None
1325 1326 if node == nullid:
1326 1327 return ""
1327 1328 if self._cache:
1328 1329 if self._cache[0] == node:
1329 1330 # _cache only stores rawtext
1330 1331 if raw:
1331 1332 return self._cache[2]
1332 1333 # duplicated, but good for perf
1333 1334 if rev is None:
1334 1335 rev = self.rev(node)
1335 1336 if flags is None:
1336 1337 flags = self.flags(rev)
1337 1338 # no extra flags set, no flag processor runs, text = rawtext
1338 1339 if flags == REVIDX_DEFAULT_FLAGS:
1339 1340 return self._cache[2]
1340 1341 # rawtext is reusable. need to run flag processor
1341 1342 rawtext = self._cache[2]
1342 1343
1343 1344 cachedrev = self._cache[1]
1344 1345
1345 1346 # look up what we need to read
1346 1347 if rawtext is None:
1347 1348 if rev is None:
1348 1349 rev = self.rev(node)
1349 1350
1350 1351 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1351 1352 if stopped:
1352 1353 rawtext = self._cache[2]
1353 1354
1354 1355 # drop cache to save memory
1355 1356 self._cache = None
1356 1357
1357 1358 bins = self._chunks(chain, df=_df)
1358 1359 if rawtext is None:
1359 1360 rawtext = bytes(bins[0])
1360 1361 bins = bins[1:]
1361 1362
1362 1363 rawtext = mdiff.patches(rawtext, bins)
1363 1364 self._cache = (node, rev, rawtext)
1364 1365
1365 1366 if flags is None:
1366 1367 if rev is None:
1367 1368 rev = self.rev(node)
1368 1369 flags = self.flags(rev)
1369 1370
1370 1371 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1371 1372 if validatehash:
1372 1373 self.checkhash(text, node, rev=rev)
1373 1374
1374 1375 return text
1375 1376
1376 1377 def hash(self, text, p1, p2):
1377 1378 """Compute a node hash.
1378 1379
1379 1380 Available as a function so that subclasses can replace the hash
1380 1381 as needed.
1381 1382 """
1382 1383 return hash(text, p1, p2)
1383 1384
1384 1385 def _processflags(self, text, flags, operation, raw=False):
1385 1386 """Inspect revision data flags and applies transforms defined by
1386 1387 registered flag processors.
1387 1388
1388 1389 ``text`` - the revision data to process
1389 1390 ``flags`` - the revision flags
1390 1391 ``operation`` - the operation being performed (read or write)
1391 1392 ``raw`` - an optional argument describing if the raw transform should be
1392 1393 applied.
1393 1394
1394 1395 This method processes the flags in the order (or reverse order if
1395 1396 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1396 1397 flag processors registered for present flags. The order of flags defined
1397 1398 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1398 1399
1399 1400 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1400 1401 processed text and ``validatehash`` is a bool indicating whether the
1401 1402 returned text should be checked for hash integrity.
1402 1403
1403 1404 Note: If the ``raw`` argument is set, it has precedence over the
1404 1405 operation and will only update the value of ``validatehash``.
1405 1406 """
1406 1407 # fast path: no flag processors will run
1407 1408 if flags == 0:
1408 1409 return text, True
1409 1410 if not operation in ('read', 'write'):
1410 1411 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1411 1412 # Check all flags are known.
1412 1413 if flags & ~REVIDX_KNOWN_FLAGS:
1413 1414 raise RevlogError(_("incompatible revision flag '%#x'") %
1414 1415 (flags & ~REVIDX_KNOWN_FLAGS))
1415 1416 validatehash = True
1416 1417 # Depending on the operation (read or write), the order might be
1417 1418 # reversed due to non-commutative transforms.
1418 1419 orderedflags = REVIDX_FLAGS_ORDER
1419 1420 if operation == 'write':
1420 1421 orderedflags = reversed(orderedflags)
1421 1422
1422 1423 for flag in orderedflags:
1423 1424 # If a flagprocessor has been registered for a known flag, apply the
1424 1425 # related operation transform and update result tuple.
1425 1426 if flag & flags:
1426 1427 vhash = True
1427 1428
1428 1429 if flag not in _flagprocessors:
1429 1430 message = _("missing processor for flag '%#x'") % (flag)
1430 1431 raise RevlogError(message)
1431 1432
1432 1433 processor = _flagprocessors[flag]
1433 1434 if processor is not None:
1434 1435 readtransform, writetransform, rawtransform = processor
1435 1436
1436 1437 if raw:
1437 1438 vhash = rawtransform(self, text)
1438 1439 elif operation == 'read':
1439 1440 text, vhash = readtransform(self, text)
1440 1441 else: # write operation
1441 1442 text, vhash = writetransform(self, text)
1442 1443 validatehash = validatehash and vhash
1443 1444
1444 1445 return text, validatehash
1445 1446
1446 1447 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1447 1448 """Check node hash integrity.
1448 1449
1449 1450 Available as a function so that subclasses can extend hash mismatch
1450 1451 behaviors as needed.
1451 1452 """
1452 1453 if p1 is None and p2 is None:
1453 1454 p1, p2 = self.parents(node)
1454 1455 if node != self.hash(text, p1, p2):
1455 1456 revornode = rev
1456 1457 if revornode is None:
1457 1458 revornode = templatefilters.short(hex(node))
1458 1459 raise RevlogError(_("integrity check failed on %s:%s")
1459 1460 % (self.indexfile, revornode))
1460 1461
1461 1462 def checkinlinesize(self, tr, fp=None):
1462 1463 """Check if the revlog is too big for inline and convert if so.
1463 1464
1464 1465 This should be called after revisions are added to the revlog. If the
1465 1466 revlog has grown too large to be an inline revlog, it will convert it
1466 1467 to use multiple index and data files.
1467 1468 """
1468 1469 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1469 1470 return
1470 1471
1471 1472 trinfo = tr.find(self.indexfile)
1472 1473 if trinfo is None:
1473 1474 raise RevlogError(_("%s not found in the transaction")
1474 1475 % self.indexfile)
1475 1476
1476 1477 trindex = trinfo[2]
1477 1478 if trindex is not None:
1478 1479 dataoff = self.start(trindex)
1479 1480 else:
1480 1481 # revlog was stripped at start of transaction, use all leftover data
1481 1482 trindex = len(self) - 1
1482 1483 dataoff = self.end(-2)
1483 1484
1484 1485 tr.add(self.datafile, dataoff)
1485 1486
1486 1487 if fp:
1487 1488 fp.flush()
1488 1489 fp.close()
1489 1490
1490 1491 df = self.opener(self.datafile, 'w')
1491 1492 try:
1492 1493 for r in self:
1493 1494 df.write(self._getsegmentforrevs(r, r)[1])
1494 1495 finally:
1495 1496 df.close()
1496 1497
1497 1498 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1498 1499 checkambig=self._checkambig)
1499 1500 self.version &= ~FLAG_INLINE_DATA
1500 1501 self._inline = False
1501 1502 for i in self:
1502 1503 e = self._io.packentry(self.index[i], self.node, self.version, i)
1503 1504 fp.write(e)
1504 1505
1505 1506 # if we don't call close, the temp file will never replace the
1506 1507 # real index
1507 1508 fp.close()
1508 1509
1509 1510 tr.replace(self.indexfile, trindex * self._io.size)
1510 1511 self._chunkclear()
1511 1512
1512 1513 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1513 1514 node=None, flags=REVIDX_DEFAULT_FLAGS):
1514 1515 """add a revision to the log
1515 1516
1516 1517 text - the revision data to add
1517 1518 transaction - the transaction object used for rollback
1518 1519 link - the linkrev data to add
1519 1520 p1, p2 - the parent nodeids of the revision
1520 1521 cachedelta - an optional precomputed delta
1521 1522 node - nodeid of revision; typically node is not specified, and it is
1522 1523 computed by default as hash(text, p1, p2), however subclasses might
1523 1524 use different hashing method (and override checkhash() in such case)
1524 1525 flags - the known flags to set on the revision
1525 1526 """
1526 1527 if link == nullrev:
1527 1528 raise RevlogError(_("attempted to add linkrev -1 to %s")
1528 1529 % self.indexfile)
1529 1530
1530 1531 if flags:
1531 1532 node = node or self.hash(text, p1, p2)
1532 1533
1533 1534 rawtext, validatehash = self._processflags(text, flags, 'write')
1534 1535
1535 1536 # If the flag processor modifies the revision data, ignore any provided
1536 1537 # cachedelta.
1537 1538 if rawtext != text:
1538 1539 cachedelta = None
1539 1540
1540 1541 if len(rawtext) > _maxentrysize:
1541 1542 raise RevlogError(
1542 1543 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1543 1544 % (self.indexfile, len(rawtext)))
1544 1545
1545 1546 node = node or self.hash(rawtext, p1, p2)
1546 1547 if node in self.nodemap:
1547 1548 return node
1548 1549
1549 1550 if validatehash:
1550 1551 self.checkhash(rawtext, node, p1=p1, p2=p2)
1551 1552
1552 1553 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1553 1554 flags, cachedelta=cachedelta)
1554 1555
1555 1556 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1556 1557 cachedelta=None):
1557 1558 """add a raw revision with known flags, node and parents
1558 1559 useful when reusing a revision not stored in this revlog (ex: received
1559 1560 over wire, or read from an external bundle).
1560 1561 """
1561 1562 dfh = None
1562 1563 if not self._inline:
1563 1564 dfh = self.opener(self.datafile, "a+")
1564 1565 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1565 1566 try:
1566 1567 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1567 1568 flags, cachedelta, ifh, dfh)
1568 1569 finally:
1569 1570 if dfh:
1570 1571 dfh.close()
1571 1572 ifh.close()
1572 1573
1573 1574 def compress(self, data):
1574 1575 """Generate a possibly-compressed representation of data."""
1575 1576 if not data:
1576 1577 return '', data
1577 1578
1578 1579 compressed = self._compressor.compress(data)
1579 1580
1580 1581 if compressed:
1581 1582 # The revlog compressor added the header in the returned data.
1582 1583 return '', compressed
1583 1584
1584 1585 if data[0:1] == '\0':
1585 1586 return '', data
1586 1587 return 'u', data
1587 1588
1588 1589 def decompress(self, data):
1589 1590 """Decompress a revlog chunk.
1590 1591
1591 1592 The chunk is expected to begin with a header identifying the
1592 1593 format type so it can be routed to an appropriate decompressor.
1593 1594 """
1594 1595 if not data:
1595 1596 return data
1596 1597
1597 1598 # Revlogs are read much more frequently than they are written and many
1598 1599 # chunks only take microseconds to decompress, so performance is
1599 1600 # important here.
1600 1601 #
1601 1602 # We can make a few assumptions about revlogs:
1602 1603 #
1603 1604 # 1) the majority of chunks will be compressed (as opposed to inline
1604 1605 # raw data).
1605 1606 # 2) decompressing *any* data will likely by at least 10x slower than
1606 1607 # returning raw inline data.
1607 1608 # 3) we want to prioritize common and officially supported compression
1608 1609 # engines
1609 1610 #
1610 1611 # It follows that we want to optimize for "decompress compressed data
1611 1612 # when encoded with common and officially supported compression engines"
1612 1613 # case over "raw data" and "data encoded by less common or non-official
1613 1614 # compression engines." That is why we have the inline lookup first
1614 1615 # followed by the compengines lookup.
1615 1616 #
1616 1617 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1617 1618 # compressed chunks. And this matters for changelog and manifest reads.
1618 1619 t = data[0:1]
1619 1620
1620 1621 if t == 'x':
1621 1622 try:
1622 1623 return _zlibdecompress(data)
1623 1624 except zlib.error as e:
1624 1625 raise RevlogError(_('revlog decompress error: %s') % str(e))
1625 1626 # '\0' is more common than 'u' so it goes first.
1626 1627 elif t == '\0':
1627 1628 return data
1628 1629 elif t == 'u':
1629 1630 return util.buffer(data, 1)
1630 1631
1631 1632 try:
1632 1633 compressor = self._decompressors[t]
1633 1634 except KeyError:
1634 1635 try:
1635 1636 engine = util.compengines.forrevlogheader(t)
1636 1637 compressor = engine.revlogcompressor()
1637 1638 self._decompressors[t] = compressor
1638 1639 except KeyError:
1639 1640 raise RevlogError(_('unknown compression type %r') % t)
1640 1641
1641 1642 return compressor.decompress(data)
1642 1643
1643 1644 def _isgooddelta(self, d, textlen):
1644 1645 """Returns True if the given delta is good. Good means that it is within
1645 1646 the disk span, disk size, and chain length bounds that we know to be
1646 1647 performant."""
1647 1648 if d is None:
1648 1649 return False
1649 1650
1650 1651 # - 'dist' is the distance from the base revision -- bounding it limits
1651 1652 # the amount of I/O we need to do.
1652 1653 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1653 1654 # to apply -- bounding it limits the amount of CPU we consume.
1654 1655 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1655 1656 if (dist > textlen * 4 or l > textlen or
1656 1657 compresseddeltalen > textlen * 2 or
1657 1658 (self._maxchainlen and chainlen > self._maxchainlen)):
1658 1659 return False
1659 1660
1660 1661 return True
1661 1662
1662 1663 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1663 1664 cachedelta, ifh, dfh, alwayscache=False):
1664 1665 """internal function to add revisions to the log
1665 1666
1666 1667 see addrevision for argument descriptions.
1667 1668
1668 1669 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1669 1670
1670 1671 invariants:
1671 1672 - rawtext is optional (can be None); if not set, cachedelta must be set.
1672 1673 if both are set, they must correspond to each other.
1673 1674 """
1674 1675 btext = [rawtext]
1675 1676 def buildtext():
1676 1677 if btext[0] is not None:
1677 1678 return btext[0]
1678 1679 baserev = cachedelta[0]
1679 1680 delta = cachedelta[1]
1680 1681 # special case deltas which replace entire base; no need to decode
1681 1682 # base revision. this neatly avoids censored bases, which throw when
1682 1683 # they're decoded.
1683 1684 hlen = struct.calcsize(">lll")
1684 1685 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1685 1686 len(delta) - hlen):
1686 1687 btext[0] = delta[hlen:]
1687 1688 else:
1688 1689 if self._inline:
1689 1690 fh = ifh
1690 1691 else:
1691 1692 fh = dfh
1692 1693 basetext = self.revision(baserev, _df=fh, raw=True)
1693 1694 btext[0] = mdiff.patch(basetext, delta)
1694 1695
1695 1696 try:
1696 1697 res = self._processflags(btext[0], flags, 'read', raw=True)
1697 1698 btext[0], validatehash = res
1698 1699 if validatehash:
1699 1700 self.checkhash(btext[0], node, p1=p1, p2=p2)
1700 1701 if flags & REVIDX_ISCENSORED:
1701 1702 raise RevlogError(_('node %s is not censored') % node)
1702 1703 except CensoredNodeError:
1703 1704 # must pass the censored index flag to add censored revisions
1704 1705 if not flags & REVIDX_ISCENSORED:
1705 1706 raise
1706 1707 return btext[0]
1707 1708
1708 1709 def builddelta(rev):
1709 1710 # can we use the cached delta?
1710 1711 if cachedelta and cachedelta[0] == rev:
1711 1712 delta = cachedelta[1]
1712 1713 else:
1713 1714 t = buildtext()
1714 1715 if self.iscensored(rev):
1715 1716 # deltas based on a censored revision must replace the
1716 1717 # full content in one patch, so delta works everywhere
1717 1718 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1718 1719 delta = header + t
1719 1720 else:
1720 1721 if self._inline:
1721 1722 fh = ifh
1722 1723 else:
1723 1724 fh = dfh
1724 1725 ptext = self.revision(rev, _df=fh, raw=True)
1725 1726 delta = mdiff.textdiff(ptext, t)
1726 1727 header, data = self.compress(delta)
1727 1728 deltalen = len(header) + len(data)
1728 1729 chainbase = self.chainbase(rev)
1729 1730 dist = deltalen + offset - self.start(chainbase)
1730 1731 if self._generaldelta:
1731 1732 base = rev
1732 1733 else:
1733 1734 base = chainbase
1734 1735 chainlen, compresseddeltalen = self._chaininfo(rev)
1735 1736 chainlen += 1
1736 1737 compresseddeltalen += deltalen
1737 1738 return (dist, deltalen, (header, data), base,
1738 1739 chainbase, chainlen, compresseddeltalen)
1739 1740
1740 1741 curr = len(self)
1741 1742 prev = curr - 1
1742 1743 offset = self.end(prev)
1743 1744 delta = None
1744 1745 p1r, p2r = self.rev(p1), self.rev(p2)
1745 1746
1746 1747 # full versions are inserted when the needed deltas
1747 1748 # become comparable to the uncompressed text
1748 1749 if rawtext is None:
1749 1750 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1750 1751 cachedelta[1])
1751 1752 else:
1752 1753 textlen = len(rawtext)
1753 1754
1754 1755 # should we try to build a delta?
1755 1756 if prev != nullrev and self.storedeltachains:
1756 1757 tested = set()
1757 1758 # This condition is true most of the time when processing
1758 1759 # changegroup data into a generaldelta repo. The only time it
1759 1760 # isn't true is if this is the first revision in a delta chain
1760 1761 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1761 1762 if cachedelta and self._generaldelta and self._lazydeltabase:
1762 1763 # Assume what we received from the server is a good choice
1763 1764 # build delta will reuse the cache
1764 1765 candidatedelta = builddelta(cachedelta[0])
1765 1766 tested.add(cachedelta[0])
1766 1767 if self._isgooddelta(candidatedelta, textlen):
1767 1768 delta = candidatedelta
1768 1769 if delta is None and self._generaldelta:
1769 1770 # exclude already lazy tested base if any
1770 1771 parents = [p for p in (p1r, p2r)
1771 1772 if p != nullrev and p not in tested]
1772 1773 if parents and not self._aggressivemergedeltas:
1773 1774 # Pick whichever parent is closer to us (to minimize the
1774 1775 # chance of having to build a fulltext).
1775 1776 parents = [max(parents)]
1776 1777 tested.update(parents)
1777 1778 pdeltas = []
1778 1779 for p in parents:
1779 1780 pd = builddelta(p)
1780 1781 if self._isgooddelta(pd, textlen):
1781 1782 pdeltas.append(pd)
1782 1783 if pdeltas:
1783 1784 delta = min(pdeltas, key=lambda x: x[1])
1784 1785 if delta is None and prev not in tested:
1785 1786 # other approach failed try against prev to hopefully save us a
1786 1787 # fulltext.
1787 1788 candidatedelta = builddelta(prev)
1788 1789 if self._isgooddelta(candidatedelta, textlen):
1789 1790 delta = candidatedelta
1790 1791 if delta is not None:
1791 1792 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1792 1793 else:
1793 1794 rawtext = buildtext()
1794 1795 data = self.compress(rawtext)
1795 1796 l = len(data[1]) + len(data[0])
1796 1797 base = chainbase = curr
1797 1798
1798 1799 e = (offset_type(offset, flags), l, textlen,
1799 1800 base, link, p1r, p2r, node)
1800 1801 self.index.insert(-1, e)
1801 1802 self.nodemap[node] = curr
1802 1803
1803 1804 entry = self._io.packentry(e, self.node, self.version, curr)
1804 1805 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1805 1806
1806 1807 if alwayscache and rawtext is None:
1807 1808 rawtext = buildtext()
1808 1809
1809 1810 if type(rawtext) == str: # only accept immutable objects
1810 1811 self._cache = (node, curr, rawtext)
1811 1812 self._chainbasecache[curr] = chainbase
1812 1813 return node
1813 1814
1814 1815 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1815 1816 # Files opened in a+ mode have inconsistent behavior on various
1816 1817 # platforms. Windows requires that a file positioning call be made
1817 1818 # when the file handle transitions between reads and writes. See
1818 1819 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1819 1820 # platforms, Python or the platform itself can be buggy. Some versions
1820 1821 # of Solaris have been observed to not append at the end of the file
1821 1822 # if the file was seeked to before the end. See issue4943 for more.
1822 1823 #
1823 1824 # We work around this issue by inserting a seek() before writing.
1824 1825 # Note: This is likely not necessary on Python 3.
1825 1826 ifh.seek(0, os.SEEK_END)
1826 1827 if dfh:
1827 1828 dfh.seek(0, os.SEEK_END)
1828 1829
1829 1830 curr = len(self) - 1
1830 1831 if not self._inline:
1831 1832 transaction.add(self.datafile, offset)
1832 1833 transaction.add(self.indexfile, curr * len(entry))
1833 1834 if data[0]:
1834 1835 dfh.write(data[0])
1835 1836 dfh.write(data[1])
1836 1837 ifh.write(entry)
1837 1838 else:
1838 1839 offset += curr * self._io.size
1839 1840 transaction.add(self.indexfile, offset, curr)
1840 1841 ifh.write(entry)
1841 1842 ifh.write(data[0])
1842 1843 ifh.write(data[1])
1843 1844 self.checkinlinesize(transaction, ifh)
1844 1845
1845 1846 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1846 1847 """
1847 1848 add a delta group
1848 1849
1849 1850 given a set of deltas, add them to the revision log. the
1850 1851 first delta is against its parent, which should be in our
1851 1852 log, the rest are against the previous delta.
1852 1853
1853 1854 If ``addrevisioncb`` is defined, it will be called with arguments of
1854 1855 this revlog and the node that was added.
1855 1856 """
1856 1857
1857 1858 nodes = []
1858 1859
1859 1860 r = len(self)
1860 1861 end = 0
1861 1862 if r:
1862 1863 end = self.end(r - 1)
1863 1864 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1864 1865 isize = r * self._io.size
1865 1866 if self._inline:
1866 1867 transaction.add(self.indexfile, end + isize, r)
1867 1868 dfh = None
1868 1869 else:
1869 1870 transaction.add(self.indexfile, isize, r)
1870 1871 transaction.add(self.datafile, end)
1871 1872 dfh = self.opener(self.datafile, "a+")
1872 1873 def flush():
1873 1874 if dfh:
1874 1875 dfh.flush()
1875 1876 ifh.flush()
1876 1877 try:
1877 1878 # loop through our set of deltas
1878 1879 chain = None
1879 1880 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1880 1881 node = chunkdata['node']
1881 1882 p1 = chunkdata['p1']
1882 1883 p2 = chunkdata['p2']
1883 1884 cs = chunkdata['cs']
1884 1885 deltabase = chunkdata['deltabase']
1885 1886 delta = chunkdata['delta']
1886 1887 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1887 1888
1888 1889 nodes.append(node)
1889 1890
1890 1891 link = linkmapper(cs)
1891 1892 if node in self.nodemap:
1892 1893 # this can happen if two branches make the same change
1893 1894 chain = node
1894 1895 continue
1895 1896
1896 1897 for p in (p1, p2):
1897 1898 if p not in self.nodemap:
1898 1899 raise LookupError(p, self.indexfile,
1899 1900 _('unknown parent'))
1900 1901
1901 1902 if deltabase not in self.nodemap:
1902 1903 raise LookupError(deltabase, self.indexfile,
1903 1904 _('unknown delta base'))
1904 1905
1905 1906 baserev = self.rev(deltabase)
1906 1907
1907 1908 if baserev != nullrev and self.iscensored(baserev):
1908 1909 # if base is censored, delta must be full replacement in a
1909 1910 # single patch operation
1910 1911 hlen = struct.calcsize(">lll")
1911 1912 oldlen = self.rawsize(baserev)
1912 1913 newlen = len(delta) - hlen
1913 1914 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1914 1915 raise error.CensoredBaseError(self.indexfile,
1915 1916 self.node(baserev))
1916 1917
1917 1918 if not flags and self._peek_iscensored(baserev, delta, flush):
1918 1919 flags |= REVIDX_ISCENSORED
1919 1920
1920 1921 # We assume consumers of addrevisioncb will want to retrieve
1921 1922 # the added revision, which will require a call to
1922 1923 # revision(). revision() will fast path if there is a cache
1923 1924 # hit. So, we tell _addrevision() to always cache in this case.
1924 1925 # We're only using addgroup() in the context of changegroup
1925 1926 # generation so the revision data can always be handled as raw
1926 1927 # by the flagprocessor.
1927 1928 chain = self._addrevision(node, None, transaction, link,
1928 1929 p1, p2, flags, (baserev, delta),
1929 1930 ifh, dfh,
1930 1931 alwayscache=bool(addrevisioncb))
1931 1932
1932 1933 if addrevisioncb:
1933 1934 addrevisioncb(self, chain)
1934 1935
1935 1936 if not dfh and not self._inline:
1936 1937 # addrevision switched from inline to conventional
1937 1938 # reopen the index
1938 1939 ifh.close()
1939 1940 dfh = self.opener(self.datafile, "a+")
1940 1941 ifh = self.opener(self.indexfile, "a+",
1941 1942 checkambig=self._checkambig)
1942 1943 finally:
1943 1944 if dfh:
1944 1945 dfh.close()
1945 1946 ifh.close()
1946 1947
1947 1948 return nodes
1948 1949
1949 1950 def iscensored(self, rev):
1950 1951 """Check if a file revision is censored."""
1951 1952 return False
1952 1953
1953 1954 def _peek_iscensored(self, baserev, delta, flush):
1954 1955 """Quickly check if a delta produces a censored revision."""
1955 1956 return False
1956 1957
1957 1958 def getstrippoint(self, minlink):
1958 1959 """find the minimum rev that must be stripped to strip the linkrev
1959 1960
1960 1961 Returns a tuple containing the minimum rev and a set of all revs that
1961 1962 have linkrevs that will be broken by this strip.
1962 1963 """
1963 1964 brokenrevs = set()
1964 1965 strippoint = len(self)
1965 1966
1966 1967 heads = {}
1967 1968 futurelargelinkrevs = set()
1968 1969 for head in self.headrevs():
1969 1970 headlinkrev = self.linkrev(head)
1970 1971 heads[head] = headlinkrev
1971 1972 if headlinkrev >= minlink:
1972 1973 futurelargelinkrevs.add(headlinkrev)
1973 1974
1974 1975 # This algorithm involves walking down the rev graph, starting at the
1975 1976 # heads. Since the revs are topologically sorted according to linkrev,
1976 1977 # once all head linkrevs are below the minlink, we know there are
1977 1978 # no more revs that could have a linkrev greater than minlink.
1978 1979 # So we can stop walking.
1979 1980 while futurelargelinkrevs:
1980 1981 strippoint -= 1
1981 1982 linkrev = heads.pop(strippoint)
1982 1983
1983 1984 if linkrev < minlink:
1984 1985 brokenrevs.add(strippoint)
1985 1986 else:
1986 1987 futurelargelinkrevs.remove(linkrev)
1987 1988
1988 1989 for p in self.parentrevs(strippoint):
1989 1990 if p != nullrev:
1990 1991 plinkrev = self.linkrev(p)
1991 1992 heads[p] = plinkrev
1992 1993 if plinkrev >= minlink:
1993 1994 futurelargelinkrevs.add(plinkrev)
1994 1995
1995 1996 return strippoint, brokenrevs
1996 1997
1997 1998 def strip(self, minlink, transaction):
1998 1999 """truncate the revlog on the first revision with a linkrev >= minlink
1999 2000
2000 2001 This function is called when we're stripping revision minlink and
2001 2002 its descendants from the repository.
2002 2003
2003 2004 We have to remove all revisions with linkrev >= minlink, because
2004 2005 the equivalent changelog revisions will be renumbered after the
2005 2006 strip.
2006 2007
2007 2008 So we truncate the revlog on the first of these revisions, and
2008 2009 trust that the caller has saved the revisions that shouldn't be
2009 2010 removed and that it'll re-add them after this truncation.
2010 2011 """
2011 2012 if len(self) == 0:
2012 2013 return
2013 2014
2014 2015 rev, _ = self.getstrippoint(minlink)
2015 2016 if rev == len(self):
2016 2017 return
2017 2018
2018 2019 # first truncate the files on disk
2019 2020 end = self.start(rev)
2020 2021 if not self._inline:
2021 2022 transaction.add(self.datafile, end)
2022 2023 end = rev * self._io.size
2023 2024 else:
2024 2025 end += rev * self._io.size
2025 2026
2026 2027 transaction.add(self.indexfile, end)
2027 2028
2028 2029 # then reset internal state in memory to forget those revisions
2029 2030 self._cache = None
2030 2031 self._chaininfocache = {}
2031 2032 self._chunkclear()
2032 2033 for x in xrange(rev, len(self)):
2033 2034 del self.nodemap[self.node(x)]
2034 2035
2035 2036 del self.index[rev:-1]
2036 2037
2037 2038 def checksize(self):
2038 2039 expected = 0
2039 2040 if len(self):
2040 2041 expected = max(0, self.end(len(self) - 1))
2041 2042
2042 2043 try:
2043 2044 f = self.opener(self.datafile)
2044 2045 f.seek(0, 2)
2045 2046 actual = f.tell()
2046 2047 f.close()
2047 2048 dd = actual - expected
2048 2049 except IOError as inst:
2049 2050 if inst.errno != errno.ENOENT:
2050 2051 raise
2051 2052 dd = 0
2052 2053
2053 2054 try:
2054 2055 f = self.opener(self.indexfile)
2055 2056 f.seek(0, 2)
2056 2057 actual = f.tell()
2057 2058 f.close()
2058 2059 s = self._io.size
2059 2060 i = max(0, actual // s)
2060 2061 di = actual - (i * s)
2061 2062 if self._inline:
2062 2063 databytes = 0
2063 2064 for r in self:
2064 2065 databytes += max(0, self.length(r))
2065 2066 dd = 0
2066 2067 di = actual - len(self) * s - databytes
2067 2068 except IOError as inst:
2068 2069 if inst.errno != errno.ENOENT:
2069 2070 raise
2070 2071 di = 0
2071 2072
2072 2073 return (dd, di)
2073 2074
2074 2075 def files(self):
2075 2076 res = [self.indexfile]
2076 2077 if not self._inline:
2077 2078 res.append(self.datafile)
2078 2079 return res
2079 2080
2080 2081 DELTAREUSEALWAYS = 'always'
2081 2082 DELTAREUSESAMEREVS = 'samerevs'
2082 2083 DELTAREUSENEVER = 'never'
2083 2084
2084 2085 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2085 2086
2086 2087 def clone(self, tr, destrevlog, addrevisioncb=None,
2087 2088 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2088 2089 """Copy this revlog to another, possibly with format changes.
2089 2090
2090 2091 The destination revlog will contain the same revisions and nodes.
2091 2092 However, it may not be bit-for-bit identical due to e.g. delta encoding
2092 2093 differences.
2093 2094
2094 2095 The ``deltareuse`` argument control how deltas from the existing revlog
2095 2096 are preserved in the destination revlog. The argument can have the
2096 2097 following values:
2097 2098
2098 2099 DELTAREUSEALWAYS
2099 2100 Deltas will always be reused (if possible), even if the destination
2100 2101 revlog would not select the same revisions for the delta. This is the
2101 2102 fastest mode of operation.
2102 2103 DELTAREUSESAMEREVS
2103 2104 Deltas will be reused if the destination revlog would pick the same
2104 2105 revisions for the delta. This mode strikes a balance between speed
2105 2106 and optimization.
2106 2107 DELTAREUSENEVER
2107 2108 Deltas will never be reused. This is the slowest mode of execution.
2108 2109 This mode can be used to recompute deltas (e.g. if the diff/delta
2109 2110 algorithm changes).
2110 2111
2111 2112 Delta computation can be slow, so the choice of delta reuse policy can
2112 2113 significantly affect run time.
2113 2114
2114 2115 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2115 2116 two extremes. Deltas will be reused if they are appropriate. But if the
2116 2117 delta could choose a better revision, it will do so. This means if you
2117 2118 are converting a non-generaldelta revlog to a generaldelta revlog,
2118 2119 deltas will be recomputed if the delta's parent isn't a parent of the
2119 2120 revision.
2120 2121
2121 2122 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2122 2123 controls whether to compute deltas against both parents for merges.
2123 2124 By default, the current default is used.
2124 2125 """
2125 2126 if deltareuse not in self.DELTAREUSEALL:
2126 2127 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2127 2128
2128 2129 if len(destrevlog):
2129 2130 raise ValueError(_('destination revlog is not empty'))
2130 2131
2131 2132 if getattr(self, 'filteredrevs', None):
2132 2133 raise ValueError(_('source revlog has filtered revisions'))
2133 2134 if getattr(destrevlog, 'filteredrevs', None):
2134 2135 raise ValueError(_('destination revlog has filtered revisions'))
2135 2136
2136 2137 # lazydeltabase controls whether to reuse a cached delta, if possible.
2137 2138 oldlazydeltabase = destrevlog._lazydeltabase
2138 2139 oldamd = destrevlog._aggressivemergedeltas
2139 2140
2140 2141 try:
2141 2142 if deltareuse == self.DELTAREUSEALWAYS:
2142 2143 destrevlog._lazydeltabase = True
2143 2144 elif deltareuse == self.DELTAREUSESAMEREVS:
2144 2145 destrevlog._lazydeltabase = False
2145 2146
2146 2147 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2147 2148
2148 2149 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2149 2150 self.DELTAREUSESAMEREVS)
2150 2151
2151 2152 index = self.index
2152 2153 for rev in self:
2153 2154 entry = index[rev]
2154 2155
2155 2156 # Some classes override linkrev to take filtered revs into
2156 2157 # account. Use raw entry from index.
2157 2158 flags = entry[0] & 0xffff
2158 2159 linkrev = entry[4]
2159 2160 p1 = index[entry[5]][7]
2160 2161 p2 = index[entry[6]][7]
2161 2162 node = entry[7]
2162 2163
2163 2164 # (Possibly) reuse the delta from the revlog if allowed and
2164 2165 # the revlog chunk is a delta.
2165 2166 cachedelta = None
2166 2167 rawtext = None
2167 2168 if populatecachedelta:
2168 2169 dp = self.deltaparent(rev)
2169 2170 if dp != nullrev:
2170 2171 cachedelta = (dp, str(self._chunk(rev)))
2171 2172
2172 2173 if not cachedelta:
2173 2174 rawtext = self.revision(rev, raw=True)
2174 2175
2175 2176 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2176 2177 checkambig=False)
2177 2178 dfh = None
2178 2179 if not destrevlog._inline:
2179 2180 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2180 2181 try:
2181 2182 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2182 2183 flags, cachedelta, ifh, dfh)
2183 2184 finally:
2184 2185 if dfh:
2185 2186 dfh.close()
2186 2187 ifh.close()
2187 2188
2188 2189 if addrevisioncb:
2189 2190 addrevisioncb(self, rev, node)
2190 2191 finally:
2191 2192 destrevlog._lazydeltabase = oldlazydeltabase
2192 2193 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now