##// END OF EJS Templates
revlog: split `rawtext` retrieval out of _revisiondata...
marmoute -
r43060:e91411fc default
parent child Browse files
Show More
@@ -1,2680 +1,2676 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 57 )
58 58 from .thirdparty import (
59 59 attr,
60 60 )
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 repository,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .revlogutils import (
73 73 deltas as deltautil,
74 74 flagutil,
75 75 )
76 76 from .utils import (
77 77 interfaceutil,
78 78 storageutil,
79 79 stringutil,
80 80 )
81 81
82 82 # blanked usage of all the name to prevent pyflakes constraints
83 83 # We need these name available in the module for extensions.
84 84 REVLOGV0
85 85 REVLOGV1
86 86 REVLOGV2
87 87 FLAG_INLINE_DATA
88 88 FLAG_GENERALDELTA
89 89 REVLOG_DEFAULT_FLAGS
90 90 REVLOG_DEFAULT_FORMAT
91 91 REVLOG_DEFAULT_VERSION
92 92 REVLOGV1_FLAGS
93 93 REVLOGV2_FLAGS
94 94 REVIDX_ISCENSORED
95 95 REVIDX_ELLIPSIS
96 96 REVIDX_EXTSTORED
97 97 REVIDX_DEFAULT_FLAGS
98 98 REVIDX_FLAGS_ORDER
99 99 REVIDX_RAWTEXT_CHANGING_FLAGS
100 100
101 101 parsers = policy.importmod(r'parsers')
102 102 rustancestor = policy.importrust(r'ancestor')
103 103 rustdagop = policy.importrust(r'dagop')
104 104
105 105 # Aliased for performance.
106 106 _zlibdecompress = zlib.decompress
107 107
108 108 # max size of revlog with inline data
109 109 _maxinline = 131072
110 110 _chunksize = 1048576
111 111
112 112 # Flag processors for REVIDX_ELLIPSIS.
113 113 def ellipsisreadprocessor(rl, text):
114 114 return text, False
115 115
116 116 def ellipsiswriteprocessor(rl, text):
117 117 return text, False
118 118
119 119 def ellipsisrawprocessor(rl, text):
120 120 return False
121 121
122 122 ellipsisprocessor = (
123 123 ellipsisreadprocessor,
124 124 ellipsiswriteprocessor,
125 125 ellipsisrawprocessor,
126 126 )
127 127
128 128 def getoffset(q):
129 129 return int(q >> 16)
130 130
131 131 def gettype(q):
132 132 return int(q & 0xFFFF)
133 133
134 134 def offset_type(offset, type):
135 135 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
136 136 raise ValueError('unknown revlog index flags')
137 137 return int(int(offset) << 16 | type)
138 138
139 139 @attr.s(slots=True, frozen=True)
140 140 class _revisioninfo(object):
141 141 """Information about a revision that allows building its fulltext
142 142 node: expected hash of the revision
143 143 p1, p2: parent revs of the revision
144 144 btext: built text cache consisting of a one-element list
145 145 cachedelta: (baserev, uncompressed_delta) or None
146 146 flags: flags associated to the revision storage
147 147
148 148 One of btext[0] or cachedelta must be set.
149 149 """
150 150 node = attr.ib()
151 151 p1 = attr.ib()
152 152 p2 = attr.ib()
153 153 btext = attr.ib()
154 154 textlen = attr.ib()
155 155 cachedelta = attr.ib()
156 156 flags = attr.ib()
157 157
158 158 @interfaceutil.implementer(repository.irevisiondelta)
159 159 @attr.s(slots=True)
160 160 class revlogrevisiondelta(object):
161 161 node = attr.ib()
162 162 p1node = attr.ib()
163 163 p2node = attr.ib()
164 164 basenode = attr.ib()
165 165 flags = attr.ib()
166 166 baserevisionsize = attr.ib()
167 167 revision = attr.ib()
168 168 delta = attr.ib()
169 169 linknode = attr.ib(default=None)
170 170
171 171 @interfaceutil.implementer(repository.iverifyproblem)
172 172 @attr.s(frozen=True)
173 173 class revlogproblem(object):
174 174 warning = attr.ib(default=None)
175 175 error = attr.ib(default=None)
176 176 node = attr.ib(default=None)
177 177
178 178 # index v0:
179 179 # 4 bytes: offset
180 180 # 4 bytes: compressed length
181 181 # 4 bytes: base rev
182 182 # 4 bytes: link rev
183 183 # 20 bytes: parent 1 nodeid
184 184 # 20 bytes: parent 2 nodeid
185 185 # 20 bytes: nodeid
186 186 indexformatv0 = struct.Struct(">4l20s20s20s")
187 187 indexformatv0_pack = indexformatv0.pack
188 188 indexformatv0_unpack = indexformatv0.unpack
189 189
190 190 class revlogoldindex(list):
191 191 def __getitem__(self, i):
192 192 if i == -1:
193 193 return (0, 0, 0, -1, -1, -1, -1, nullid)
194 194 return list.__getitem__(self, i)
195 195
196 196 class revlogoldio(object):
197 197 def __init__(self):
198 198 self.size = indexformatv0.size
199 199
200 200 def parseindex(self, data, inline):
201 201 s = self.size
202 202 index = []
203 203 nodemap = {nullid: nullrev}
204 204 n = off = 0
205 205 l = len(data)
206 206 while off + s <= l:
207 207 cur = data[off:off + s]
208 208 off += s
209 209 e = indexformatv0_unpack(cur)
210 210 # transform to revlogv1 format
211 211 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
212 212 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
213 213 index.append(e2)
214 214 nodemap[e[6]] = n
215 215 n += 1
216 216
217 217 return revlogoldindex(index), nodemap, None
218 218
219 219 def packentry(self, entry, node, version, rev):
220 220 if gettype(entry[0]):
221 221 raise error.RevlogError(_('index entry flags need revlog '
222 222 'version 1'))
223 223 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
224 224 node(entry[5]), node(entry[6]), entry[7])
225 225 return indexformatv0_pack(*e2)
226 226
227 227 # index ng:
228 228 # 6 bytes: offset
229 229 # 2 bytes: flags
230 230 # 4 bytes: compressed length
231 231 # 4 bytes: uncompressed length
232 232 # 4 bytes: base rev
233 233 # 4 bytes: link rev
234 234 # 4 bytes: parent 1 rev
235 235 # 4 bytes: parent 2 rev
236 236 # 32 bytes: nodeid
237 237 indexformatng = struct.Struct(">Qiiiiii20s12x")
238 238 indexformatng_pack = indexformatng.pack
239 239 versionformat = struct.Struct(">I")
240 240 versionformat_pack = versionformat.pack
241 241 versionformat_unpack = versionformat.unpack
242 242
243 243 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
244 244 # signed integer)
245 245 _maxentrysize = 0x7fffffff
246 246
247 247 class revlogio(object):
248 248 def __init__(self):
249 249 self.size = indexformatng.size
250 250
251 251 def parseindex(self, data, inline):
252 252 # call the C implementation to parse the index data
253 253 index, cache = parsers.parse_index2(data, inline)
254 254 return index, getattr(index, 'nodemap', None), cache
255 255
256 256 def packentry(self, entry, node, version, rev):
257 257 p = indexformatng_pack(*entry)
258 258 if rev == 0:
259 259 p = versionformat_pack(version) + p[4:]
260 260 return p
261 261
262 262 class revlog(object):
263 263 """
264 264 the underlying revision storage object
265 265
266 266 A revlog consists of two parts, an index and the revision data.
267 267
268 268 The index is a file with a fixed record size containing
269 269 information on each revision, including its nodeid (hash), the
270 270 nodeids of its parents, the position and offset of its data within
271 271 the data file, and the revision it's based on. Finally, each entry
272 272 contains a linkrev entry that can serve as a pointer to external
273 273 data.
274 274
275 275 The revision data itself is a linear collection of data chunks.
276 276 Each chunk represents a revision and is usually represented as a
277 277 delta against the previous chunk. To bound lookup time, runs of
278 278 deltas are limited to about 2 times the length of the original
279 279 version data. This makes retrieval of a version proportional to
280 280 its size, or O(1) relative to the number of revisions.
281 281
282 282 Both pieces of the revlog are written to in an append-only
283 283 fashion, which means we never need to rewrite a file to insert or
284 284 remove data, and can use some simple techniques to avoid the need
285 285 for locking while reading.
286 286
287 287 If checkambig, indexfile is opened with checkambig=True at
288 288 writing, to avoid file stat ambiguity.
289 289
290 290 If mmaplargeindex is True, and an mmapindexthreshold is set, the
291 291 index will be mmapped rather than read if it is larger than the
292 292 configured threshold.
293 293
294 294 If censorable is True, the revlog can have censored revisions.
295 295
296 296 If `upperboundcomp` is not None, this is the expected maximal gain from
297 297 compression for the data content.
298 298 """
299 299 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
300 300 mmaplargeindex=False, censorable=False,
301 301 upperboundcomp=None):
302 302 """
303 303 create a revlog object
304 304
305 305 opener is a function that abstracts the file opening operation
306 306 and can be used to implement COW semantics or the like.
307 307
308 308 """
309 309 self.upperboundcomp = upperboundcomp
310 310 self.indexfile = indexfile
311 311 self.datafile = datafile or (indexfile[:-2] + ".d")
312 312 self.opener = opener
313 313 # When True, indexfile is opened with checkambig=True at writing, to
314 314 # avoid file stat ambiguity.
315 315 self._checkambig = checkambig
316 316 self._mmaplargeindex = mmaplargeindex
317 317 self._censorable = censorable
318 318 # 3-tuple of (node, rev, text) for a raw revision.
319 319 self._revisioncache = None
320 320 # Maps rev to chain base rev.
321 321 self._chainbasecache = util.lrucachedict(100)
322 322 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
323 323 self._chunkcache = (0, '')
324 324 # How much data to read and cache into the raw revlog data cache.
325 325 self._chunkcachesize = 65536
326 326 self._maxchainlen = None
327 327 self._deltabothparents = True
328 328 self.index = []
329 329 # Mapping of partial identifiers to full nodes.
330 330 self._pcache = {}
331 331 # Mapping of revision integer to full node.
332 332 self._nodecache = {nullid: nullrev}
333 333 self._nodepos = None
334 334 self._compengine = 'zlib'
335 335 self._compengineopts = {}
336 336 self._maxdeltachainspan = -1
337 337 self._withsparseread = False
338 338 self._sparserevlog = False
339 339 self._srdensitythreshold = 0.50
340 340 self._srmingapsize = 262144
341 341
342 342 # Make copy of flag processors so each revlog instance can support
343 343 # custom flags.
344 344 self._flagprocessors = dict(flagutil.flagprocessors)
345 345
346 346 # 2-tuple of file handles being used for active writing.
347 347 self._writinghandles = None
348 348
349 349 self._loadindex()
350 350
351 351 def _loadindex(self):
352 352 mmapindexthreshold = None
353 353 opts = getattr(self.opener, 'options', {}) or {}
354 354
355 355 if 'revlogv2' in opts:
356 356 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
357 357 elif 'revlogv1' in opts:
358 358 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
359 359 if 'generaldelta' in opts:
360 360 newversionflags |= FLAG_GENERALDELTA
361 361 elif getattr(self.opener, 'options', None) is not None:
362 362 # If options provided but no 'revlog*' found, the repository
363 363 # would have no 'requires' file in it, which means we have to
364 364 # stick to the old format.
365 365 newversionflags = REVLOGV0
366 366 else:
367 367 newversionflags = REVLOG_DEFAULT_VERSION
368 368
369 369 if 'chunkcachesize' in opts:
370 370 self._chunkcachesize = opts['chunkcachesize']
371 371 if 'maxchainlen' in opts:
372 372 self._maxchainlen = opts['maxchainlen']
373 373 if 'deltabothparents' in opts:
374 374 self._deltabothparents = opts['deltabothparents']
375 375 self._lazydelta = bool(opts.get('lazydelta', True))
376 376 self._lazydeltabase = False
377 377 if self._lazydelta:
378 378 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
379 379 if 'compengine' in opts:
380 380 self._compengine = opts['compengine']
381 381 if 'zlib.level' in opts:
382 382 self._compengineopts['zlib.level'] = opts['zlib.level']
383 383 if 'zstd.level' in opts:
384 384 self._compengineopts['zstd.level'] = opts['zstd.level']
385 385 if 'maxdeltachainspan' in opts:
386 386 self._maxdeltachainspan = opts['maxdeltachainspan']
387 387 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
388 388 mmapindexthreshold = opts['mmapindexthreshold']
389 389 self._sparserevlog = bool(opts.get('sparse-revlog', False))
390 390 withsparseread = bool(opts.get('with-sparse-read', False))
391 391 # sparse-revlog forces sparse-read
392 392 self._withsparseread = self._sparserevlog or withsparseread
393 393 if 'sparse-read-density-threshold' in opts:
394 394 self._srdensitythreshold = opts['sparse-read-density-threshold']
395 395 if 'sparse-read-min-gap-size' in opts:
396 396 self._srmingapsize = opts['sparse-read-min-gap-size']
397 397 if opts.get('enableellipsis'):
398 398 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
399 399
400 400 # revlog v0 doesn't have flag processors
401 401 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
402 402 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
403 403
404 404 if self._chunkcachesize <= 0:
405 405 raise error.RevlogError(_('revlog chunk cache size %r is not '
406 406 'greater than 0') % self._chunkcachesize)
407 407 elif self._chunkcachesize & (self._chunkcachesize - 1):
408 408 raise error.RevlogError(_('revlog chunk cache size %r is not a '
409 409 'power of 2') % self._chunkcachesize)
410 410
411 411 indexdata = ''
412 412 self._initempty = True
413 413 try:
414 414 with self._indexfp() as f:
415 415 if (mmapindexthreshold is not None and
416 416 self.opener.fstat(f).st_size >= mmapindexthreshold):
417 417 # TODO: should .close() to release resources without
418 418 # relying on Python GC
419 419 indexdata = util.buffer(util.mmapread(f))
420 420 else:
421 421 indexdata = f.read()
422 422 if len(indexdata) > 0:
423 423 versionflags = versionformat_unpack(indexdata[:4])[0]
424 424 self._initempty = False
425 425 else:
426 426 versionflags = newversionflags
427 427 except IOError as inst:
428 428 if inst.errno != errno.ENOENT:
429 429 raise
430 430
431 431 versionflags = newversionflags
432 432
433 433 self.version = versionflags
434 434
435 435 flags = versionflags & ~0xFFFF
436 436 fmt = versionflags & 0xFFFF
437 437
438 438 if fmt == REVLOGV0:
439 439 if flags:
440 440 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
441 441 'revlog %s') %
442 442 (flags >> 16, fmt, self.indexfile))
443 443
444 444 self._inline = False
445 445 self._generaldelta = False
446 446
447 447 elif fmt == REVLOGV1:
448 448 if flags & ~REVLOGV1_FLAGS:
449 449 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
450 450 'revlog %s') %
451 451 (flags >> 16, fmt, self.indexfile))
452 452
453 453 self._inline = versionflags & FLAG_INLINE_DATA
454 454 self._generaldelta = versionflags & FLAG_GENERALDELTA
455 455
456 456 elif fmt == REVLOGV2:
457 457 if flags & ~REVLOGV2_FLAGS:
458 458 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
459 459 'revlog %s') %
460 460 (flags >> 16, fmt, self.indexfile))
461 461
462 462 self._inline = versionflags & FLAG_INLINE_DATA
463 463 # generaldelta implied by version 2 revlogs.
464 464 self._generaldelta = True
465 465
466 466 else:
467 467 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
468 468 (fmt, self.indexfile))
469 469 # sparse-revlog can't be on without general-delta (issue6056)
470 470 if not self._generaldelta:
471 471 self._sparserevlog = False
472 472
473 473 self._storedeltachains = True
474 474
475 475 self._io = revlogio()
476 476 if self.version == REVLOGV0:
477 477 self._io = revlogoldio()
478 478 try:
479 479 d = self._io.parseindex(indexdata, self._inline)
480 480 except (ValueError, IndexError):
481 481 raise error.RevlogError(_("index %s is corrupted") %
482 482 self.indexfile)
483 483 self.index, nodemap, self._chunkcache = d
484 484 if nodemap is not None:
485 485 self.nodemap = self._nodecache = nodemap
486 486 if not self._chunkcache:
487 487 self._chunkclear()
488 488 # revnum -> (chain-length, sum-delta-length)
489 489 self._chaininfocache = {}
490 490 # revlog header -> revlog compressor
491 491 self._decompressors = {}
492 492
493 493 @util.propertycache
494 494 def _compressor(self):
495 495 engine = util.compengines[self._compengine]
496 496 return engine.revlogcompressor(self._compengineopts)
497 497
498 498 def _indexfp(self, mode='r'):
499 499 """file object for the revlog's index file"""
500 500 args = {r'mode': mode}
501 501 if mode != 'r':
502 502 args[r'checkambig'] = self._checkambig
503 503 if mode == 'w':
504 504 args[r'atomictemp'] = True
505 505 return self.opener(self.indexfile, **args)
506 506
507 507 def _datafp(self, mode='r'):
508 508 """file object for the revlog's data file"""
509 509 return self.opener(self.datafile, mode=mode)
510 510
511 511 @contextlib.contextmanager
512 512 def _datareadfp(self, existingfp=None):
513 513 """file object suitable to read data"""
514 514 # Use explicit file handle, if given.
515 515 if existingfp is not None:
516 516 yield existingfp
517 517
518 518 # Use a file handle being actively used for writes, if available.
519 519 # There is some danger to doing this because reads will seek the
520 520 # file. However, _writeentry() performs a SEEK_END before all writes,
521 521 # so we should be safe.
522 522 elif self._writinghandles:
523 523 if self._inline:
524 524 yield self._writinghandles[0]
525 525 else:
526 526 yield self._writinghandles[1]
527 527
528 528 # Otherwise open a new file handle.
529 529 else:
530 530 if self._inline:
531 531 func = self._indexfp
532 532 else:
533 533 func = self._datafp
534 534 with func() as fp:
535 535 yield fp
536 536
537 537 def tip(self):
538 538 return self.node(len(self.index) - 1)
539 539 def __contains__(self, rev):
540 540 return 0 <= rev < len(self)
541 541 def __len__(self):
542 542 return len(self.index)
543 543 def __iter__(self):
544 544 return iter(pycompat.xrange(len(self)))
545 545 def revs(self, start=0, stop=None):
546 546 """iterate over all rev in this revlog (from start to stop)"""
547 547 return storageutil.iterrevs(len(self), start=start, stop=stop)
548 548
549 549 @util.propertycache
550 550 def nodemap(self):
551 551 if self.index:
552 552 # populate mapping down to the initial node
553 553 node0 = self.index[0][7] # get around changelog filtering
554 554 self.rev(node0)
555 555 return self._nodecache
556 556
557 557 def hasnode(self, node):
558 558 try:
559 559 self.rev(node)
560 560 return True
561 561 except KeyError:
562 562 return False
563 563
564 564 def candelta(self, baserev, rev):
565 565 """whether two revisions (baserev, rev) can be delta-ed or not"""
566 566 # Disable delta if either rev requires a content-changing flag
567 567 # processor (ex. LFS). This is because such flag processor can alter
568 568 # the rawtext content that the delta will be based on, and two clients
569 569 # could have a same revlog node with different flags (i.e. different
570 570 # rawtext contents) and the delta could be incompatible.
571 571 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
572 572 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
573 573 return False
574 574 return True
575 575
576 576 def clearcaches(self):
577 577 self._revisioncache = None
578 578 self._chainbasecache.clear()
579 579 self._chunkcache = (0, '')
580 580 self._pcache = {}
581 581
582 582 try:
583 583 # If we are using the native C version, you are in a fun case
584 584 # where self.index, self.nodemap and self._nodecaches is the same
585 585 # object.
586 586 self._nodecache.clearcaches()
587 587 except AttributeError:
588 588 self._nodecache = {nullid: nullrev}
589 589 self._nodepos = None
590 590
591 591 def rev(self, node):
592 592 try:
593 593 return self._nodecache[node]
594 594 except TypeError:
595 595 raise
596 596 except error.RevlogError:
597 597 # parsers.c radix tree lookup failed
598 598 if node == wdirid or node in wdirfilenodeids:
599 599 raise error.WdirUnsupported
600 600 raise error.LookupError(node, self.indexfile, _('no node'))
601 601 except KeyError:
602 602 # pure python cache lookup failed
603 603 n = self._nodecache
604 604 i = self.index
605 605 p = self._nodepos
606 606 if p is None:
607 607 p = len(i) - 1
608 608 else:
609 609 assert p < len(i)
610 610 for r in pycompat.xrange(p, -1, -1):
611 611 v = i[r][7]
612 612 n[v] = r
613 613 if v == node:
614 614 self._nodepos = r - 1
615 615 return r
616 616 if node == wdirid or node in wdirfilenodeids:
617 617 raise error.WdirUnsupported
618 618 raise error.LookupError(node, self.indexfile, _('no node'))
619 619
620 620 # Accessors for index entries.
621 621
622 622 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
623 623 # are flags.
624 624 def start(self, rev):
625 625 return int(self.index[rev][0] >> 16)
626 626
627 627 def flags(self, rev):
628 628 return self.index[rev][0] & 0xFFFF
629 629
630 630 def length(self, rev):
631 631 return self.index[rev][1]
632 632
633 633 def rawsize(self, rev):
634 634 """return the length of the uncompressed text for a given revision"""
635 635 l = self.index[rev][2]
636 636 if l >= 0:
637 637 return l
638 638
639 639 t = self.rawdata(rev)
640 640 return len(t)
641 641
642 642 def size(self, rev):
643 643 """length of non-raw text (processed by a "read" flag processor)"""
644 644 # fast path: if no "read" flag processor could change the content,
645 645 # size is rawsize. note: ELLIPSIS is known to not change the content.
646 646 flags = self.flags(rev)
647 647 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
648 648 return self.rawsize(rev)
649 649
650 650 return len(self.revision(rev, raw=False))
651 651
652 652 def chainbase(self, rev):
653 653 base = self._chainbasecache.get(rev)
654 654 if base is not None:
655 655 return base
656 656
657 657 index = self.index
658 658 iterrev = rev
659 659 base = index[iterrev][3]
660 660 while base != iterrev:
661 661 iterrev = base
662 662 base = index[iterrev][3]
663 663
664 664 self._chainbasecache[rev] = base
665 665 return base
666 666
667 667 def linkrev(self, rev):
668 668 return self.index[rev][4]
669 669
670 670 def parentrevs(self, rev):
671 671 try:
672 672 entry = self.index[rev]
673 673 except IndexError:
674 674 if rev == wdirrev:
675 675 raise error.WdirUnsupported
676 676 raise
677 677
678 678 return entry[5], entry[6]
679 679
680 680 # fast parentrevs(rev) where rev isn't filtered
681 681 _uncheckedparentrevs = parentrevs
682 682
683 683 def node(self, rev):
684 684 try:
685 685 return self.index[rev][7]
686 686 except IndexError:
687 687 if rev == wdirrev:
688 688 raise error.WdirUnsupported
689 689 raise
690 690
691 691 # Derived from index values.
692 692
693 693 def end(self, rev):
694 694 return self.start(rev) + self.length(rev)
695 695
696 696 def parents(self, node):
697 697 i = self.index
698 698 d = i[self.rev(node)]
699 699 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
700 700
701 701 def chainlen(self, rev):
702 702 return self._chaininfo(rev)[0]
703 703
704 704 def _chaininfo(self, rev):
705 705 chaininfocache = self._chaininfocache
706 706 if rev in chaininfocache:
707 707 return chaininfocache[rev]
708 708 index = self.index
709 709 generaldelta = self._generaldelta
710 710 iterrev = rev
711 711 e = index[iterrev]
712 712 clen = 0
713 713 compresseddeltalen = 0
714 714 while iterrev != e[3]:
715 715 clen += 1
716 716 compresseddeltalen += e[1]
717 717 if generaldelta:
718 718 iterrev = e[3]
719 719 else:
720 720 iterrev -= 1
721 721 if iterrev in chaininfocache:
722 722 t = chaininfocache[iterrev]
723 723 clen += t[0]
724 724 compresseddeltalen += t[1]
725 725 break
726 726 e = index[iterrev]
727 727 else:
728 728 # Add text length of base since decompressing that also takes
729 729 # work. For cache hits the length is already included.
730 730 compresseddeltalen += e[1]
731 731 r = (clen, compresseddeltalen)
732 732 chaininfocache[rev] = r
733 733 return r
734 734
735 735 def _deltachain(self, rev, stoprev=None):
736 736 """Obtain the delta chain for a revision.
737 737
738 738 ``stoprev`` specifies a revision to stop at. If not specified, we
739 739 stop at the base of the chain.
740 740
741 741 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
742 742 revs in ascending order and ``stopped`` is a bool indicating whether
743 743 ``stoprev`` was hit.
744 744 """
745 745 # Try C implementation.
746 746 try:
747 747 return self.index.deltachain(rev, stoprev, self._generaldelta)
748 748 except AttributeError:
749 749 pass
750 750
751 751 chain = []
752 752
753 753 # Alias to prevent attribute lookup in tight loop.
754 754 index = self.index
755 755 generaldelta = self._generaldelta
756 756
757 757 iterrev = rev
758 758 e = index[iterrev]
759 759 while iterrev != e[3] and iterrev != stoprev:
760 760 chain.append(iterrev)
761 761 if generaldelta:
762 762 iterrev = e[3]
763 763 else:
764 764 iterrev -= 1
765 765 e = index[iterrev]
766 766
767 767 if iterrev == stoprev:
768 768 stopped = True
769 769 else:
770 770 chain.append(iterrev)
771 771 stopped = False
772 772
773 773 chain.reverse()
774 774 return chain, stopped
775 775
776 776 def ancestors(self, revs, stoprev=0, inclusive=False):
777 777 """Generate the ancestors of 'revs' in reverse revision order.
778 778 Does not generate revs lower than stoprev.
779 779
780 780 See the documentation for ancestor.lazyancestors for more details."""
781 781
782 782 # first, make sure start revisions aren't filtered
783 783 revs = list(revs)
784 784 checkrev = self.node
785 785 for r in revs:
786 786 checkrev(r)
787 787 # and we're sure ancestors aren't filtered as well
788 788
789 789 if rustancestor is not None:
790 790 lazyancestors = rustancestor.LazyAncestors
791 791 arg = self.index
792 792 elif util.safehasattr(parsers, 'rustlazyancestors'):
793 793 lazyancestors = ancestor.rustlazyancestors
794 794 arg = self.index
795 795 else:
796 796 lazyancestors = ancestor.lazyancestors
797 797 arg = self._uncheckedparentrevs
798 798 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
799 799
800 800 def descendants(self, revs):
801 801 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
802 802
803 803 def findcommonmissing(self, common=None, heads=None):
804 804 """Return a tuple of the ancestors of common and the ancestors of heads
805 805 that are not ancestors of common. In revset terminology, we return the
806 806 tuple:
807 807
808 808 ::common, (::heads) - (::common)
809 809
810 810 The list is sorted by revision number, meaning it is
811 811 topologically sorted.
812 812
813 813 'heads' and 'common' are both lists of node IDs. If heads is
814 814 not supplied, uses all of the revlog's heads. If common is not
815 815 supplied, uses nullid."""
816 816 if common is None:
817 817 common = [nullid]
818 818 if heads is None:
819 819 heads = self.heads()
820 820
821 821 common = [self.rev(n) for n in common]
822 822 heads = [self.rev(n) for n in heads]
823 823
824 824 # we want the ancestors, but inclusive
825 825 class lazyset(object):
826 826 def __init__(self, lazyvalues):
827 827 self.addedvalues = set()
828 828 self.lazyvalues = lazyvalues
829 829
830 830 def __contains__(self, value):
831 831 return value in self.addedvalues or value in self.lazyvalues
832 832
833 833 def __iter__(self):
834 834 added = self.addedvalues
835 835 for r in added:
836 836 yield r
837 837 for r in self.lazyvalues:
838 838 if not r in added:
839 839 yield r
840 840
841 841 def add(self, value):
842 842 self.addedvalues.add(value)
843 843
844 844 def update(self, values):
845 845 self.addedvalues.update(values)
846 846
847 847 has = lazyset(self.ancestors(common))
848 848 has.add(nullrev)
849 849 has.update(common)
850 850
851 851 # take all ancestors from heads that aren't in has
852 852 missing = set()
853 853 visit = collections.deque(r for r in heads if r not in has)
854 854 while visit:
855 855 r = visit.popleft()
856 856 if r in missing:
857 857 continue
858 858 else:
859 859 missing.add(r)
860 860 for p in self.parentrevs(r):
861 861 if p not in has:
862 862 visit.append(p)
863 863 missing = list(missing)
864 864 missing.sort()
865 865 return has, [self.node(miss) for miss in missing]
866 866
867 867 def incrementalmissingrevs(self, common=None):
868 868 """Return an object that can be used to incrementally compute the
869 869 revision numbers of the ancestors of arbitrary sets that are not
870 870 ancestors of common. This is an ancestor.incrementalmissingancestors
871 871 object.
872 872
873 873 'common' is a list of revision numbers. If common is not supplied, uses
874 874 nullrev.
875 875 """
876 876 if common is None:
877 877 common = [nullrev]
878 878
879 879 if rustancestor is not None:
880 880 return rustancestor.MissingAncestors(self.index, common)
881 881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
882 882
883 883 def findmissingrevs(self, common=None, heads=None):
884 884 """Return the revision numbers of the ancestors of heads that
885 885 are not ancestors of common.
886 886
887 887 More specifically, return a list of revision numbers corresponding to
888 888 nodes N such that every N satisfies the following constraints:
889 889
890 890 1. N is an ancestor of some node in 'heads'
891 891 2. N is not an ancestor of any node in 'common'
892 892
893 893 The list is sorted by revision number, meaning it is
894 894 topologically sorted.
895 895
896 896 'heads' and 'common' are both lists of revision numbers. If heads is
897 897 not supplied, uses all of the revlog's heads. If common is not
898 898 supplied, uses nullid."""
899 899 if common is None:
900 900 common = [nullrev]
901 901 if heads is None:
902 902 heads = self.headrevs()
903 903
904 904 inc = self.incrementalmissingrevs(common=common)
905 905 return inc.missingancestors(heads)
906 906
907 907 def findmissing(self, common=None, heads=None):
908 908 """Return the ancestors of heads that are not ancestors of common.
909 909
910 910 More specifically, return a list of nodes N such that every N
911 911 satisfies the following constraints:
912 912
913 913 1. N is an ancestor of some node in 'heads'
914 914 2. N is not an ancestor of any node in 'common'
915 915
916 916 The list is sorted by revision number, meaning it is
917 917 topologically sorted.
918 918
919 919 'heads' and 'common' are both lists of node IDs. If heads is
920 920 not supplied, uses all of the revlog's heads. If common is not
921 921 supplied, uses nullid."""
922 922 if common is None:
923 923 common = [nullid]
924 924 if heads is None:
925 925 heads = self.heads()
926 926
927 927 common = [self.rev(n) for n in common]
928 928 heads = [self.rev(n) for n in heads]
929 929
930 930 inc = self.incrementalmissingrevs(common=common)
931 931 return [self.node(r) for r in inc.missingancestors(heads)]
932 932
933 933 def nodesbetween(self, roots=None, heads=None):
934 934 """Return a topological path from 'roots' to 'heads'.
935 935
936 936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
937 937 topologically sorted list of all nodes N that satisfy both of
938 938 these constraints:
939 939
940 940 1. N is a descendant of some node in 'roots'
941 941 2. N is an ancestor of some node in 'heads'
942 942
943 943 Every node is considered to be both a descendant and an ancestor
944 944 of itself, so every reachable node in 'roots' and 'heads' will be
945 945 included in 'nodes'.
946 946
947 947 'outroots' is the list of reachable nodes in 'roots', i.e., the
948 948 subset of 'roots' that is returned in 'nodes'. Likewise,
949 949 'outheads' is the subset of 'heads' that is also in 'nodes'.
950 950
951 951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
952 952 unspecified, uses nullid as the only root. If 'heads' is
953 953 unspecified, uses list of all of the revlog's heads."""
954 954 nonodes = ([], [], [])
955 955 if roots is not None:
956 956 roots = list(roots)
957 957 if not roots:
958 958 return nonodes
959 959 lowestrev = min([self.rev(n) for n in roots])
960 960 else:
961 961 roots = [nullid] # Everybody's a descendant of nullid
962 962 lowestrev = nullrev
963 963 if (lowestrev == nullrev) and (heads is None):
964 964 # We want _all_ the nodes!
965 965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
966 966 if heads is None:
967 967 # All nodes are ancestors, so the latest ancestor is the last
968 968 # node.
969 969 highestrev = len(self) - 1
970 970 # Set ancestors to None to signal that every node is an ancestor.
971 971 ancestors = None
972 972 # Set heads to an empty dictionary for later discovery of heads
973 973 heads = {}
974 974 else:
975 975 heads = list(heads)
976 976 if not heads:
977 977 return nonodes
978 978 ancestors = set()
979 979 # Turn heads into a dictionary so we can remove 'fake' heads.
980 980 # Also, later we will be using it to filter out the heads we can't
981 981 # find from roots.
982 982 heads = dict.fromkeys(heads, False)
983 983 # Start at the top and keep marking parents until we're done.
984 984 nodestotag = set(heads)
985 985 # Remember where the top was so we can use it as a limit later.
986 986 highestrev = max([self.rev(n) for n in nodestotag])
987 987 while nodestotag:
988 988 # grab a node to tag
989 989 n = nodestotag.pop()
990 990 # Never tag nullid
991 991 if n == nullid:
992 992 continue
993 993 # A node's revision number represents its place in a
994 994 # topologically sorted list of nodes.
995 995 r = self.rev(n)
996 996 if r >= lowestrev:
997 997 if n not in ancestors:
998 998 # If we are possibly a descendant of one of the roots
999 999 # and we haven't already been marked as an ancestor
1000 1000 ancestors.add(n) # Mark as ancestor
1001 1001 # Add non-nullid parents to list of nodes to tag.
1002 1002 nodestotag.update([p for p in self.parents(n) if
1003 1003 p != nullid])
1004 1004 elif n in heads: # We've seen it before, is it a fake head?
1005 1005 # So it is, real heads should not be the ancestors of
1006 1006 # any other heads.
1007 1007 heads.pop(n)
1008 1008 if not ancestors:
1009 1009 return nonodes
1010 1010 # Now that we have our set of ancestors, we want to remove any
1011 1011 # roots that are not ancestors.
1012 1012
1013 1013 # If one of the roots was nullid, everything is included anyway.
1014 1014 if lowestrev > nullrev:
1015 1015 # But, since we weren't, let's recompute the lowest rev to not
1016 1016 # include roots that aren't ancestors.
1017 1017
1018 1018 # Filter out roots that aren't ancestors of heads
1019 1019 roots = [root for root in roots if root in ancestors]
1020 1020 # Recompute the lowest revision
1021 1021 if roots:
1022 1022 lowestrev = min([self.rev(root) for root in roots])
1023 1023 else:
1024 1024 # No more roots? Return empty list
1025 1025 return nonodes
1026 1026 else:
1027 1027 # We are descending from nullid, and don't need to care about
1028 1028 # any other roots.
1029 1029 lowestrev = nullrev
1030 1030 roots = [nullid]
1031 1031 # Transform our roots list into a set.
1032 1032 descendants = set(roots)
1033 1033 # Also, keep the original roots so we can filter out roots that aren't
1034 1034 # 'real' roots (i.e. are descended from other roots).
1035 1035 roots = descendants.copy()
1036 1036 # Our topologically sorted list of output nodes.
1037 1037 orderedout = []
1038 1038 # Don't start at nullid since we don't want nullid in our output list,
1039 1039 # and if nullid shows up in descendants, empty parents will look like
1040 1040 # they're descendants.
1041 1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1042 1042 n = self.node(r)
1043 1043 isdescendant = False
1044 1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1045 1045 isdescendant = True
1046 1046 elif n in descendants:
1047 1047 # n is already a descendant
1048 1048 isdescendant = True
1049 1049 # This check only needs to be done here because all the roots
1050 1050 # will start being marked is descendants before the loop.
1051 1051 if n in roots:
1052 1052 # If n was a root, check if it's a 'real' root.
1053 1053 p = tuple(self.parents(n))
1054 1054 # If any of its parents are descendants, it's not a root.
1055 1055 if (p[0] in descendants) or (p[1] in descendants):
1056 1056 roots.remove(n)
1057 1057 else:
1058 1058 p = tuple(self.parents(n))
1059 1059 # A node is a descendant if either of its parents are
1060 1060 # descendants. (We seeded the dependents list with the roots
1061 1061 # up there, remember?)
1062 1062 if (p[0] in descendants) or (p[1] in descendants):
1063 1063 descendants.add(n)
1064 1064 isdescendant = True
1065 1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1066 1066 # Only include nodes that are both descendants and ancestors.
1067 1067 orderedout.append(n)
1068 1068 if (ancestors is not None) and (n in heads):
1069 1069 # We're trying to figure out which heads are reachable
1070 1070 # from roots.
1071 1071 # Mark this head as having been reached
1072 1072 heads[n] = True
1073 1073 elif ancestors is None:
1074 1074 # Otherwise, we're trying to discover the heads.
1075 1075 # Assume this is a head because if it isn't, the next step
1076 1076 # will eventually remove it.
1077 1077 heads[n] = True
1078 1078 # But, obviously its parents aren't.
1079 1079 for p in self.parents(n):
1080 1080 heads.pop(p, None)
1081 1081 heads = [head for head, flag in heads.iteritems() if flag]
1082 1082 roots = list(roots)
1083 1083 assert orderedout
1084 1084 assert roots
1085 1085 assert heads
1086 1086 return (orderedout, roots, heads)
1087 1087
1088 1088 def headrevs(self, revs=None):
1089 1089 if revs is None:
1090 1090 try:
1091 1091 return self.index.headrevs()
1092 1092 except AttributeError:
1093 1093 return self._headrevs()
1094 1094 if rustdagop is not None:
1095 1095 return rustdagop.headrevs(self.index, revs)
1096 1096 return dagop.headrevs(revs, self._uncheckedparentrevs)
1097 1097
1098 1098 def computephases(self, roots):
1099 1099 return self.index.computephasesmapsets(roots)
1100 1100
1101 1101 def _headrevs(self):
1102 1102 count = len(self)
1103 1103 if not count:
1104 1104 return [nullrev]
1105 1105 # we won't iter over filtered rev so nobody is a head at start
1106 1106 ishead = [0] * (count + 1)
1107 1107 index = self.index
1108 1108 for r in self:
1109 1109 ishead[r] = 1 # I may be an head
1110 1110 e = index[r]
1111 1111 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1112 1112 return [r for r, val in enumerate(ishead) if val]
1113 1113
1114 1114 def heads(self, start=None, stop=None):
1115 1115 """return the list of all nodes that have no children
1116 1116
1117 1117 if start is specified, only heads that are descendants of
1118 1118 start will be returned
1119 1119 if stop is specified, it will consider all the revs from stop
1120 1120 as if they had no children
1121 1121 """
1122 1122 if start is None and stop is None:
1123 1123 if not len(self):
1124 1124 return [nullid]
1125 1125 return [self.node(r) for r in self.headrevs()]
1126 1126
1127 1127 if start is None:
1128 1128 start = nullrev
1129 1129 else:
1130 1130 start = self.rev(start)
1131 1131
1132 1132 stoprevs = set(self.rev(n) for n in stop or [])
1133 1133
1134 1134 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1135 1135 stoprevs=stoprevs)
1136 1136
1137 1137 return [self.node(rev) for rev in revs]
1138 1138
1139 1139 def children(self, node):
1140 1140 """find the children of a given node"""
1141 1141 c = []
1142 1142 p = self.rev(node)
1143 1143 for r in self.revs(start=p + 1):
1144 1144 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1145 1145 if prevs:
1146 1146 for pr in prevs:
1147 1147 if pr == p:
1148 1148 c.append(self.node(r))
1149 1149 elif p == nullrev:
1150 1150 c.append(self.node(r))
1151 1151 return c
1152 1152
1153 1153 def commonancestorsheads(self, a, b):
1154 1154 """calculate all the heads of the common ancestors of nodes a and b"""
1155 1155 a, b = self.rev(a), self.rev(b)
1156 1156 ancs = self._commonancestorsheads(a, b)
1157 1157 return pycompat.maplist(self.node, ancs)
1158 1158
1159 1159 def _commonancestorsheads(self, *revs):
1160 1160 """calculate all the heads of the common ancestors of revs"""
1161 1161 try:
1162 1162 ancs = self.index.commonancestorsheads(*revs)
1163 1163 except (AttributeError, OverflowError): # C implementation failed
1164 1164 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1165 1165 return ancs
1166 1166
1167 1167 def isancestor(self, a, b):
1168 1168 """return True if node a is an ancestor of node b
1169 1169
1170 1170 A revision is considered an ancestor of itself."""
1171 1171 a, b = self.rev(a), self.rev(b)
1172 1172 return self.isancestorrev(a, b)
1173 1173
1174 1174 def isancestorrev(self, a, b):
1175 1175 """return True if revision a is an ancestor of revision b
1176 1176
1177 1177 A revision is considered an ancestor of itself.
1178 1178
1179 1179 The implementation of this is trivial but the use of
1180 1180 reachableroots is not."""
1181 1181 if a == nullrev:
1182 1182 return True
1183 1183 elif a == b:
1184 1184 return True
1185 1185 elif a > b:
1186 1186 return False
1187 1187 return bool(self.reachableroots(a, [b], [a], includepath=False))
1188 1188
1189 1189 def reachableroots(self, minroot, heads, roots, includepath=False):
1190 1190 """return (heads(::<roots> and <roots>::<heads>))
1191 1191
1192 1192 If includepath is True, return (<roots>::<heads>)."""
1193 1193 try:
1194 1194 return self.index.reachableroots2(minroot, heads, roots,
1195 1195 includepath)
1196 1196 except AttributeError:
1197 1197 return dagop._reachablerootspure(self.parentrevs,
1198 1198 minroot, roots, heads, includepath)
1199 1199
1200 1200 def ancestor(self, a, b):
1201 1201 """calculate the "best" common ancestor of nodes a and b"""
1202 1202
1203 1203 a, b = self.rev(a), self.rev(b)
1204 1204 try:
1205 1205 ancs = self.index.ancestors(a, b)
1206 1206 except (AttributeError, OverflowError):
1207 1207 ancs = ancestor.ancestors(self.parentrevs, a, b)
1208 1208 if ancs:
1209 1209 # choose a consistent winner when there's a tie
1210 1210 return min(map(self.node, ancs))
1211 1211 return nullid
1212 1212
1213 1213 def _match(self, id):
1214 1214 if isinstance(id, int):
1215 1215 # rev
1216 1216 return self.node(id)
1217 1217 if len(id) == 20:
1218 1218 # possibly a binary node
1219 1219 # odds of a binary node being all hex in ASCII are 1 in 10**25
1220 1220 try:
1221 1221 node = id
1222 1222 self.rev(node) # quick search the index
1223 1223 return node
1224 1224 except error.LookupError:
1225 1225 pass # may be partial hex id
1226 1226 try:
1227 1227 # str(rev)
1228 1228 rev = int(id)
1229 1229 if "%d" % rev != id:
1230 1230 raise ValueError
1231 1231 if rev < 0:
1232 1232 rev = len(self) + rev
1233 1233 if rev < 0 or rev >= len(self):
1234 1234 raise ValueError
1235 1235 return self.node(rev)
1236 1236 except (ValueError, OverflowError):
1237 1237 pass
1238 1238 if len(id) == 40:
1239 1239 try:
1240 1240 # a full hex nodeid?
1241 1241 node = bin(id)
1242 1242 self.rev(node)
1243 1243 return node
1244 1244 except (TypeError, error.LookupError):
1245 1245 pass
1246 1246
1247 1247 def _partialmatch(self, id):
1248 1248 # we don't care wdirfilenodeids as they should be always full hash
1249 1249 maybewdir = wdirhex.startswith(id)
1250 1250 try:
1251 1251 partial = self.index.partialmatch(id)
1252 1252 if partial and self.hasnode(partial):
1253 1253 if maybewdir:
1254 1254 # single 'ff...' match in radix tree, ambiguous with wdir
1255 1255 raise error.RevlogError
1256 1256 return partial
1257 1257 if maybewdir:
1258 1258 # no 'ff...' match in radix tree, wdir identified
1259 1259 raise error.WdirUnsupported
1260 1260 return None
1261 1261 except error.RevlogError:
1262 1262 # parsers.c radix tree lookup gave multiple matches
1263 1263 # fast path: for unfiltered changelog, radix tree is accurate
1264 1264 if not getattr(self, 'filteredrevs', None):
1265 1265 raise error.AmbiguousPrefixLookupError(
1266 1266 id, self.indexfile, _('ambiguous identifier'))
1267 1267 # fall through to slow path that filters hidden revisions
1268 1268 except (AttributeError, ValueError):
1269 1269 # we are pure python, or key was too short to search radix tree
1270 1270 pass
1271 1271
1272 1272 if id in self._pcache:
1273 1273 return self._pcache[id]
1274 1274
1275 1275 if len(id) <= 40:
1276 1276 try:
1277 1277 # hex(node)[:...]
1278 1278 l = len(id) // 2 # grab an even number of digits
1279 1279 prefix = bin(id[:l * 2])
1280 1280 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1281 1281 nl = [n for n in nl if hex(n).startswith(id) and
1282 1282 self.hasnode(n)]
1283 1283 if nullhex.startswith(id):
1284 1284 nl.append(nullid)
1285 1285 if len(nl) > 0:
1286 1286 if len(nl) == 1 and not maybewdir:
1287 1287 self._pcache[id] = nl[0]
1288 1288 return nl[0]
1289 1289 raise error.AmbiguousPrefixLookupError(
1290 1290 id, self.indexfile, _('ambiguous identifier'))
1291 1291 if maybewdir:
1292 1292 raise error.WdirUnsupported
1293 1293 return None
1294 1294 except TypeError:
1295 1295 pass
1296 1296
1297 1297 def lookup(self, id):
1298 1298 """locate a node based on:
1299 1299 - revision number or str(revision number)
1300 1300 - nodeid or subset of hex nodeid
1301 1301 """
1302 1302 n = self._match(id)
1303 1303 if n is not None:
1304 1304 return n
1305 1305 n = self._partialmatch(id)
1306 1306 if n:
1307 1307 return n
1308 1308
1309 1309 raise error.LookupError(id, self.indexfile, _('no match found'))
1310 1310
1311 1311 def shortest(self, node, minlength=1):
1312 1312 """Find the shortest unambiguous prefix that matches node."""
1313 1313 def isvalid(prefix):
1314 1314 try:
1315 1315 matchednode = self._partialmatch(prefix)
1316 1316 except error.AmbiguousPrefixLookupError:
1317 1317 return False
1318 1318 except error.WdirUnsupported:
1319 1319 # single 'ff...' match
1320 1320 return True
1321 1321 if matchednode is None:
1322 1322 raise error.LookupError(node, self.indexfile, _('no node'))
1323 1323 return True
1324 1324
1325 1325 def maybewdir(prefix):
1326 1326 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1327 1327
1328 1328 hexnode = hex(node)
1329 1329
1330 1330 def disambiguate(hexnode, minlength):
1331 1331 """Disambiguate against wdirid."""
1332 1332 for length in range(minlength, 41):
1333 1333 prefix = hexnode[:length]
1334 1334 if not maybewdir(prefix):
1335 1335 return prefix
1336 1336
1337 1337 if not getattr(self, 'filteredrevs', None):
1338 1338 try:
1339 1339 length = max(self.index.shortest(node), minlength)
1340 1340 return disambiguate(hexnode, length)
1341 1341 except error.RevlogError:
1342 1342 if node != wdirid:
1343 1343 raise error.LookupError(node, self.indexfile, _('no node'))
1344 1344 except AttributeError:
1345 1345 # Fall through to pure code
1346 1346 pass
1347 1347
1348 1348 if node == wdirid:
1349 1349 for length in range(minlength, 41):
1350 1350 prefix = hexnode[:length]
1351 1351 if isvalid(prefix):
1352 1352 return prefix
1353 1353
1354 1354 for length in range(minlength, 41):
1355 1355 prefix = hexnode[:length]
1356 1356 if isvalid(prefix):
1357 1357 return disambiguate(hexnode, length)
1358 1358
1359 1359 def cmp(self, node, text):
1360 1360 """compare text with a given file revision
1361 1361
1362 1362 returns True if text is different than what is stored.
1363 1363 """
1364 1364 p1, p2 = self.parents(node)
1365 1365 return storageutil.hashrevisionsha1(text, p1, p2) != node
1366 1366
1367 1367 def _cachesegment(self, offset, data):
1368 1368 """Add a segment to the revlog cache.
1369 1369
1370 1370 Accepts an absolute offset and the data that is at that location.
1371 1371 """
1372 1372 o, d = self._chunkcache
1373 1373 # try to add to existing cache
1374 1374 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1375 1375 self._chunkcache = o, d + data
1376 1376 else:
1377 1377 self._chunkcache = offset, data
1378 1378
1379 1379 def _readsegment(self, offset, length, df=None):
1380 1380 """Load a segment of raw data from the revlog.
1381 1381
1382 1382 Accepts an absolute offset, length to read, and an optional existing
1383 1383 file handle to read from.
1384 1384
1385 1385 If an existing file handle is passed, it will be seeked and the
1386 1386 original seek position will NOT be restored.
1387 1387
1388 1388 Returns a str or buffer of raw byte data.
1389 1389
1390 1390 Raises if the requested number of bytes could not be read.
1391 1391 """
1392 1392 # Cache data both forward and backward around the requested
1393 1393 # data, in a fixed size window. This helps speed up operations
1394 1394 # involving reading the revlog backwards.
1395 1395 cachesize = self._chunkcachesize
1396 1396 realoffset = offset & ~(cachesize - 1)
1397 1397 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1398 1398 - realoffset)
1399 1399 with self._datareadfp(df) as df:
1400 1400 df.seek(realoffset)
1401 1401 d = df.read(reallength)
1402 1402
1403 1403 self._cachesegment(realoffset, d)
1404 1404 if offset != realoffset or reallength != length:
1405 1405 startoffset = offset - realoffset
1406 1406 if len(d) - startoffset < length:
1407 1407 raise error.RevlogError(
1408 1408 _('partial read of revlog %s; expected %d bytes from '
1409 1409 'offset %d, got %d') %
1410 1410 (self.indexfile if self._inline else self.datafile,
1411 1411 length, realoffset, len(d) - startoffset))
1412 1412
1413 1413 return util.buffer(d, startoffset, length)
1414 1414
1415 1415 if len(d) < length:
1416 1416 raise error.RevlogError(
1417 1417 _('partial read of revlog %s; expected %d bytes from offset '
1418 1418 '%d, got %d') %
1419 1419 (self.indexfile if self._inline else self.datafile,
1420 1420 length, offset, len(d)))
1421 1421
1422 1422 return d
1423 1423
1424 1424 def _getsegment(self, offset, length, df=None):
1425 1425 """Obtain a segment of raw data from the revlog.
1426 1426
1427 1427 Accepts an absolute offset, length of bytes to obtain, and an
1428 1428 optional file handle to the already-opened revlog. If the file
1429 1429 handle is used, it's original seek position will not be preserved.
1430 1430
1431 1431 Requests for data may be returned from a cache.
1432 1432
1433 1433 Returns a str or a buffer instance of raw byte data.
1434 1434 """
1435 1435 o, d = self._chunkcache
1436 1436 l = len(d)
1437 1437
1438 1438 # is it in the cache?
1439 1439 cachestart = offset - o
1440 1440 cacheend = cachestart + length
1441 1441 if cachestart >= 0 and cacheend <= l:
1442 1442 if cachestart == 0 and cacheend == l:
1443 1443 return d # avoid a copy
1444 1444 return util.buffer(d, cachestart, cacheend - cachestart)
1445 1445
1446 1446 return self._readsegment(offset, length, df=df)
1447 1447
1448 1448 def _getsegmentforrevs(self, startrev, endrev, df=None):
1449 1449 """Obtain a segment of raw data corresponding to a range of revisions.
1450 1450
1451 1451 Accepts the start and end revisions and an optional already-open
1452 1452 file handle to be used for reading. If the file handle is read, its
1453 1453 seek position will not be preserved.
1454 1454
1455 1455 Requests for data may be satisfied by a cache.
1456 1456
1457 1457 Returns a 2-tuple of (offset, data) for the requested range of
1458 1458 revisions. Offset is the integer offset from the beginning of the
1459 1459 revlog and data is a str or buffer of the raw byte data.
1460 1460
1461 1461 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1462 1462 to determine where each revision's data begins and ends.
1463 1463 """
1464 1464 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1465 1465 # (functions are expensive).
1466 1466 index = self.index
1467 1467 istart = index[startrev]
1468 1468 start = int(istart[0] >> 16)
1469 1469 if startrev == endrev:
1470 1470 end = start + istart[1]
1471 1471 else:
1472 1472 iend = index[endrev]
1473 1473 end = int(iend[0] >> 16) + iend[1]
1474 1474
1475 1475 if self._inline:
1476 1476 start += (startrev + 1) * self._io.size
1477 1477 end += (endrev + 1) * self._io.size
1478 1478 length = end - start
1479 1479
1480 1480 return start, self._getsegment(start, length, df=df)
1481 1481
1482 1482 def _chunk(self, rev, df=None):
1483 1483 """Obtain a single decompressed chunk for a revision.
1484 1484
1485 1485 Accepts an integer revision and an optional already-open file handle
1486 1486 to be used for reading. If used, the seek position of the file will not
1487 1487 be preserved.
1488 1488
1489 1489 Returns a str holding uncompressed data for the requested revision.
1490 1490 """
1491 1491 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1492 1492
1493 1493 def _chunks(self, revs, df=None, targetsize=None):
1494 1494 """Obtain decompressed chunks for the specified revisions.
1495 1495
1496 1496 Accepts an iterable of numeric revisions that are assumed to be in
1497 1497 ascending order. Also accepts an optional already-open file handle
1498 1498 to be used for reading. If used, the seek position of the file will
1499 1499 not be preserved.
1500 1500
1501 1501 This function is similar to calling ``self._chunk()`` multiple times,
1502 1502 but is faster.
1503 1503
1504 1504 Returns a list with decompressed data for each requested revision.
1505 1505 """
1506 1506 if not revs:
1507 1507 return []
1508 1508 start = self.start
1509 1509 length = self.length
1510 1510 inline = self._inline
1511 1511 iosize = self._io.size
1512 1512 buffer = util.buffer
1513 1513
1514 1514 l = []
1515 1515 ladd = l.append
1516 1516
1517 1517 if not self._withsparseread:
1518 1518 slicedchunks = (revs,)
1519 1519 else:
1520 1520 slicedchunks = deltautil.slicechunk(self, revs,
1521 1521 targetsize=targetsize)
1522 1522
1523 1523 for revschunk in slicedchunks:
1524 1524 firstrev = revschunk[0]
1525 1525 # Skip trailing revisions with empty diff
1526 1526 for lastrev in revschunk[::-1]:
1527 1527 if length(lastrev) != 0:
1528 1528 break
1529 1529
1530 1530 try:
1531 1531 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1532 1532 except OverflowError:
1533 1533 # issue4215 - we can't cache a run of chunks greater than
1534 1534 # 2G on Windows
1535 1535 return [self._chunk(rev, df=df) for rev in revschunk]
1536 1536
1537 1537 decomp = self.decompress
1538 1538 for rev in revschunk:
1539 1539 chunkstart = start(rev)
1540 1540 if inline:
1541 1541 chunkstart += (rev + 1) * iosize
1542 1542 chunklength = length(rev)
1543 1543 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1544 1544
1545 1545 return l
1546 1546
1547 1547 def _chunkclear(self):
1548 1548 """Clear the raw chunk cache."""
1549 1549 self._chunkcache = (0, '')
1550 1550
1551 1551 def deltaparent(self, rev):
1552 1552 """return deltaparent of the given revision"""
1553 1553 base = self.index[rev][3]
1554 1554 if base == rev:
1555 1555 return nullrev
1556 1556 elif self._generaldelta:
1557 1557 return base
1558 1558 else:
1559 1559 return rev - 1
1560 1560
1561 1561 def issnapshot(self, rev):
1562 1562 """tells whether rev is a snapshot
1563 1563 """
1564 1564 if not self._sparserevlog:
1565 1565 return self.deltaparent(rev) == nullrev
1566 1566 elif util.safehasattr(self.index, 'issnapshot'):
1567 1567 # directly assign the method to cache the testing and access
1568 1568 self.issnapshot = self.index.issnapshot
1569 1569 return self.issnapshot(rev)
1570 1570 if rev == nullrev:
1571 1571 return True
1572 1572 entry = self.index[rev]
1573 1573 base = entry[3]
1574 1574 if base == rev:
1575 1575 return True
1576 1576 if base == nullrev:
1577 1577 return True
1578 1578 p1 = entry[5]
1579 1579 p2 = entry[6]
1580 1580 if base == p1 or base == p2:
1581 1581 return False
1582 1582 return self.issnapshot(base)
1583 1583
1584 1584 def snapshotdepth(self, rev):
1585 1585 """number of snapshot in the chain before this one"""
1586 1586 if not self.issnapshot(rev):
1587 1587 raise error.ProgrammingError('revision %d not a snapshot')
1588 1588 return len(self._deltachain(rev)[0]) - 1
1589 1589
1590 1590 def revdiff(self, rev1, rev2):
1591 1591 """return or calculate a delta between two revisions
1592 1592
1593 1593 The delta calculated is in binary form and is intended to be written to
1594 1594 revlog data directly. So this function needs raw revision data.
1595 1595 """
1596 1596 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1597 1597 return bytes(self._chunk(rev2))
1598 1598
1599 1599 return mdiff.textdiff(self.rawdata(rev1),
1600 1600 self.rawdata(rev2))
1601 1601
1602 1602 def revision(self, nodeorrev, _df=None, raw=False):
1603 1603 """return an uncompressed revision of a given node or revision
1604 1604 number.
1605 1605
1606 1606 _df - an existing file handle to read from. (internal-only)
1607 1607 raw - an optional argument specifying if the revision data is to be
1608 1608 treated as raw data when applying flag transforms. 'raw' should be set
1609 1609 to True when generating changegroups or in debug commands.
1610 1610 """
1611 1611 return self._revisiondata(nodeorrev, _df, raw=raw)
1612 1612
1613 1613 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1614 1614 # deal with <nodeorrev> argument type
1615 1615 if isinstance(nodeorrev, int):
1616 1616 rev = nodeorrev
1617 1617 node = self.node(rev)
1618 1618 else:
1619 1619 node = nodeorrev
1620 1620 rev = None
1621 1621
1622 1622 # fast path the special `nullid` rev
1623 1623 if node == nullid:
1624 1624 return ""
1625 1625
1626 # revision in the cache (could be useful to apply delta)
1627 cachedrev = None
1628 # the revlog's flag for this revision
1629 # (usually alter its state or content)
1630 flags = None
1631 1626 # The text as stored inside the revlog. Might be the revision or might
1632 1627 # need to be processed to retrieve the revision.
1633 1628 rawtext = None
1629
1630 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1631
1632 if raw and validated:
1633 # if we don't want to process the raw text and that raw
1634 # text is cached, we can exit early.
1635 return rawtext
1636 if rev is None:
1637 rev = self.rev(node)
1638 # the revlog's flag for this revision
1639 # (usually alter its state or content)
1640 flags = self.flags(rev)
1641
1642 if validated and flags == REVIDX_DEFAULT_FLAGS:
1643 # no extra flags set, no flag processor runs, text = rawtext
1644 return rawtext
1645
1646 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1647 if validatehash:
1648 self.checkhash(text, node, rev=rev)
1649 if not validated:
1650 self._revisioncache = (node, rev, rawtext)
1651
1652 return text
1653
1654 def _rawtext(self, node, rev, _df=None):
1655 """return the possibly unvalidated rawtext for a revision
1656
1657 returns (rev, rawtext, validated)
1658 """
1659
1660 # revision in the cache (could be useful to apply delta)
1661 cachedrev = None
1634 1662 # An intermediate text to apply deltas to
1635 1663 basetext = None
1636 # Do we need to update the rawtext cache once it is validated ?
1637 needcaching = True
1638 1664
1639 1665 # Check if we have the entry in cache
1640 1666 # The cache entry looks like (node, rev, rawtext)
1641 1667 if self._revisioncache:
1642 1668 if self._revisioncache[0] == node:
1643 needcaching = False
1644 # _cache only stores rawtext
1645 # rawtext is reusable. but we might need to run flag processors
1646 rawtext = self._revisioncache[2]
1647 if raw:
1648 # if we don't want to process the raw text and that raw
1649 # text is cached, we can exit early.
1650 return rawtext
1651 # duplicated, but good for perf
1652 if rev is None:
1653 rev = self.rev(node)
1654 if flags is None:
1655 flags = self.flags(rev)
1656 # no extra flags set, no flag processor runs, text = rawtext
1657 if flags == REVIDX_DEFAULT_FLAGS:
1658 return rawtext
1659
1669 return (rev, self._revisioncache[2], True)
1660 1670 cachedrev = self._revisioncache[1]
1661 1671
1662 # look up what we need to read
1663 if rawtext is None:
1664 if rev is None:
1665 rev = self.rev(node)
1666
1667 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1668 if stopped:
1669 basetext = self._revisioncache[2]
1670
1671 # drop cache to save memory
1672 self._revisioncache = None
1673
1674 targetsize = None
1675 rawsize = self.index[rev][2]
1676 if 0 <= rawsize:
1677 targetsize = 4 * rawsize
1678
1679 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1680 if basetext is None:
1681 basetext = bytes(bins[0])
1682 bins = bins[1:]
1683
1684 rawtext = mdiff.patches(basetext, bins)
1685 del basetext # let us have a chance to free memory early
1686
1687 if flags is None:
1688 if rev is None:
1689 rev = self.rev(node)
1690 flags = self.flags(rev)
1691
1692 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1693 if validatehash:
1694 self.checkhash(text, node, rev=rev)
1695
1696 if needcaching:
1697 self._revisioncache = (node, rev, rawtext)
1698
1699 return text
1672 if rev is None:
1673 rev = self.rev(node)
1674
1675 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1676 if stopped:
1677 basetext = self._revisioncache[2]
1678
1679 # drop cache to save memory, the caller is expected to
1680 # update self._revisioncache after validating the text
1681 self._revisioncache = None
1682
1683 targetsize = None
1684 rawsize = self.index[rev][2]
1685 if 0 <= rawsize:
1686 targetsize = 4 * rawsize
1687
1688 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1689 if basetext is None:
1690 basetext = bytes(bins[0])
1691 bins = bins[1:]
1692
1693 rawtext = mdiff.patches(basetext, bins)
1694 del basetext # let us have a chance to free memory early
1695 return (rev, rawtext, False)
1700 1696
1701 1697 def rawdata(self, nodeorrev, _df=None):
1702 1698 """return an uncompressed raw data of a given node or revision number.
1703 1699
1704 1700 _df - an existing file handle to read from. (internal-only)
1705 1701 """
1706 1702 return self._revisiondata(nodeorrev, _df, raw=True)
1707 1703
1708 1704 def hash(self, text, p1, p2):
1709 1705 """Compute a node hash.
1710 1706
1711 1707 Available as a function so that subclasses can replace the hash
1712 1708 as needed.
1713 1709 """
1714 1710 return storageutil.hashrevisionsha1(text, p1, p2)
1715 1711
1716 1712 def _processflags(self, text, flags, operation, raw=False):
1717 1713 """Inspect revision data flags and applies transforms defined by
1718 1714 registered flag processors.
1719 1715
1720 1716 ``text`` - the revision data to process
1721 1717 ``flags`` - the revision flags
1722 1718 ``operation`` - the operation being performed (read or write)
1723 1719 ``raw`` - an optional argument describing if the raw transform should be
1724 1720 applied.
1725 1721
1726 1722 This method processes the flags in the order (or reverse order if
1727 1723 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1728 1724 flag processors registered for present flags. The order of flags defined
1729 1725 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1730 1726
1731 1727 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1732 1728 processed text and ``validatehash`` is a bool indicating whether the
1733 1729 returned text should be checked for hash integrity.
1734 1730
1735 1731 Note: If the ``raw`` argument is set, it has precedence over the
1736 1732 operation and will only update the value of ``validatehash``.
1737 1733 """
1738 1734 # fast path: no flag processors will run
1739 1735 if flags == 0:
1740 1736 return text, True
1741 1737 if not operation in ('read', 'write'):
1742 1738 raise error.ProgrammingError(_("invalid '%s' operation") %
1743 1739 operation)
1744 1740 # Check all flags are known.
1745 1741 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
1746 1742 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1747 1743 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
1748 1744 validatehash = True
1749 1745 # Depending on the operation (read or write), the order might be
1750 1746 # reversed due to non-commutative transforms.
1751 1747 orderedflags = REVIDX_FLAGS_ORDER
1752 1748 if operation == 'write':
1753 1749 orderedflags = reversed(orderedflags)
1754 1750
1755 1751 for flag in orderedflags:
1756 1752 # If a flagprocessor has been registered for a known flag, apply the
1757 1753 # related operation transform and update result tuple.
1758 1754 if flag & flags:
1759 1755 vhash = True
1760 1756
1761 1757 if flag not in self._flagprocessors:
1762 1758 message = _("missing processor for flag '%#x'") % (flag)
1763 1759 raise error.RevlogError(message)
1764 1760
1765 1761 processor = self._flagprocessors[flag]
1766 1762 if processor is not None:
1767 1763 readtransform, writetransform, rawtransform = processor
1768 1764
1769 1765 if raw:
1770 1766 vhash = rawtransform(self, text)
1771 1767 elif operation == 'read':
1772 1768 text, vhash = readtransform(self, text)
1773 1769 else: # write operation
1774 1770 text, vhash = writetransform(self, text)
1775 1771 validatehash = validatehash and vhash
1776 1772
1777 1773 return text, validatehash
1778 1774
1779 1775 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1780 1776 """Check node hash integrity.
1781 1777
1782 1778 Available as a function so that subclasses can extend hash mismatch
1783 1779 behaviors as needed.
1784 1780 """
1785 1781 try:
1786 1782 if p1 is None and p2 is None:
1787 1783 p1, p2 = self.parents(node)
1788 1784 if node != self.hash(text, p1, p2):
1789 1785 # Clear the revision cache on hash failure. The revision cache
1790 1786 # only stores the raw revision and clearing the cache does have
1791 1787 # the side-effect that we won't have a cache hit when the raw
1792 1788 # revision data is accessed. But this case should be rare and
1793 1789 # it is extra work to teach the cache about the hash
1794 1790 # verification state.
1795 1791 if self._revisioncache and self._revisioncache[0] == node:
1796 1792 self._revisioncache = None
1797 1793
1798 1794 revornode = rev
1799 1795 if revornode is None:
1800 1796 revornode = templatefilters.short(hex(node))
1801 1797 raise error.RevlogError(_("integrity check failed on %s:%s")
1802 1798 % (self.indexfile, pycompat.bytestr(revornode)))
1803 1799 except error.RevlogError:
1804 1800 if self._censorable and storageutil.iscensoredtext(text):
1805 1801 raise error.CensoredNodeError(self.indexfile, node, text)
1806 1802 raise
1807 1803
1808 1804 def _enforceinlinesize(self, tr, fp=None):
1809 1805 """Check if the revlog is too big for inline and convert if so.
1810 1806
1811 1807 This should be called after revisions are added to the revlog. If the
1812 1808 revlog has grown too large to be an inline revlog, it will convert it
1813 1809 to use multiple index and data files.
1814 1810 """
1815 1811 tiprev = len(self) - 1
1816 1812 if (not self._inline or
1817 1813 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1818 1814 return
1819 1815
1820 1816 trinfo = tr.find(self.indexfile)
1821 1817 if trinfo is None:
1822 1818 raise error.RevlogError(_("%s not found in the transaction")
1823 1819 % self.indexfile)
1824 1820
1825 1821 trindex = trinfo[2]
1826 1822 if trindex is not None:
1827 1823 dataoff = self.start(trindex)
1828 1824 else:
1829 1825 # revlog was stripped at start of transaction, use all leftover data
1830 1826 trindex = len(self) - 1
1831 1827 dataoff = self.end(tiprev)
1832 1828
1833 1829 tr.add(self.datafile, dataoff)
1834 1830
1835 1831 if fp:
1836 1832 fp.flush()
1837 1833 fp.close()
1838 1834 # We can't use the cached file handle after close(). So prevent
1839 1835 # its usage.
1840 1836 self._writinghandles = None
1841 1837
1842 1838 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1843 1839 for r in self:
1844 1840 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1845 1841
1846 1842 with self._indexfp('w') as fp:
1847 1843 self.version &= ~FLAG_INLINE_DATA
1848 1844 self._inline = False
1849 1845 io = self._io
1850 1846 for i in self:
1851 1847 e = io.packentry(self.index[i], self.node, self.version, i)
1852 1848 fp.write(e)
1853 1849
1854 1850 # the temp file replace the real index when we exit the context
1855 1851 # manager
1856 1852
1857 1853 tr.replace(self.indexfile, trindex * self._io.size)
1858 1854 self._chunkclear()
1859 1855
1860 1856 def _nodeduplicatecallback(self, transaction, node):
1861 1857 """called when trying to add a node already stored.
1862 1858 """
1863 1859
1864 1860 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1865 1861 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1866 1862 """add a revision to the log
1867 1863
1868 1864 text - the revision data to add
1869 1865 transaction - the transaction object used for rollback
1870 1866 link - the linkrev data to add
1871 1867 p1, p2 - the parent nodeids of the revision
1872 1868 cachedelta - an optional precomputed delta
1873 1869 node - nodeid of revision; typically node is not specified, and it is
1874 1870 computed by default as hash(text, p1, p2), however subclasses might
1875 1871 use different hashing method (and override checkhash() in such case)
1876 1872 flags - the known flags to set on the revision
1877 1873 deltacomputer - an optional deltacomputer instance shared between
1878 1874 multiple calls
1879 1875 """
1880 1876 if link == nullrev:
1881 1877 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1882 1878 % self.indexfile)
1883 1879
1884 1880 if flags:
1885 1881 node = node or self.hash(text, p1, p2)
1886 1882
1887 1883 rawtext, validatehash = self._processflags(text, flags, 'write')
1888 1884
1889 1885 # If the flag processor modifies the revision data, ignore any provided
1890 1886 # cachedelta.
1891 1887 if rawtext != text:
1892 1888 cachedelta = None
1893 1889
1894 1890 if len(rawtext) > _maxentrysize:
1895 1891 raise error.RevlogError(
1896 1892 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1897 1893 % (self.indexfile, len(rawtext)))
1898 1894
1899 1895 node = node or self.hash(rawtext, p1, p2)
1900 1896 if node in self.nodemap:
1901 1897 return node
1902 1898
1903 1899 if validatehash:
1904 1900 self.checkhash(rawtext, node, p1=p1, p2=p2)
1905 1901
1906 1902 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1907 1903 flags, cachedelta=cachedelta,
1908 1904 deltacomputer=deltacomputer)
1909 1905
1910 1906 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1911 1907 cachedelta=None, deltacomputer=None):
1912 1908 """add a raw revision with known flags, node and parents
1913 1909 useful when reusing a revision not stored in this revlog (ex: received
1914 1910 over wire, or read from an external bundle).
1915 1911 """
1916 1912 dfh = None
1917 1913 if not self._inline:
1918 1914 dfh = self._datafp("a+")
1919 1915 ifh = self._indexfp("a+")
1920 1916 try:
1921 1917 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1922 1918 flags, cachedelta, ifh, dfh,
1923 1919 deltacomputer=deltacomputer)
1924 1920 finally:
1925 1921 if dfh:
1926 1922 dfh.close()
1927 1923 ifh.close()
1928 1924
1929 1925 def compress(self, data):
1930 1926 """Generate a possibly-compressed representation of data."""
1931 1927 if not data:
1932 1928 return '', data
1933 1929
1934 1930 compressed = self._compressor.compress(data)
1935 1931
1936 1932 if compressed:
1937 1933 # The revlog compressor added the header in the returned data.
1938 1934 return '', compressed
1939 1935
1940 1936 if data[0:1] == '\0':
1941 1937 return '', data
1942 1938 return 'u', data
1943 1939
1944 1940 def decompress(self, data):
1945 1941 """Decompress a revlog chunk.
1946 1942
1947 1943 The chunk is expected to begin with a header identifying the
1948 1944 format type so it can be routed to an appropriate decompressor.
1949 1945 """
1950 1946 if not data:
1951 1947 return data
1952 1948
1953 1949 # Revlogs are read much more frequently than they are written and many
1954 1950 # chunks only take microseconds to decompress, so performance is
1955 1951 # important here.
1956 1952 #
1957 1953 # We can make a few assumptions about revlogs:
1958 1954 #
1959 1955 # 1) the majority of chunks will be compressed (as opposed to inline
1960 1956 # raw data).
1961 1957 # 2) decompressing *any* data will likely by at least 10x slower than
1962 1958 # returning raw inline data.
1963 1959 # 3) we want to prioritize common and officially supported compression
1964 1960 # engines
1965 1961 #
1966 1962 # It follows that we want to optimize for "decompress compressed data
1967 1963 # when encoded with common and officially supported compression engines"
1968 1964 # case over "raw data" and "data encoded by less common or non-official
1969 1965 # compression engines." That is why we have the inline lookup first
1970 1966 # followed by the compengines lookup.
1971 1967 #
1972 1968 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1973 1969 # compressed chunks. And this matters for changelog and manifest reads.
1974 1970 t = data[0:1]
1975 1971
1976 1972 if t == 'x':
1977 1973 try:
1978 1974 return _zlibdecompress(data)
1979 1975 except zlib.error as e:
1980 1976 raise error.RevlogError(_('revlog decompress error: %s') %
1981 1977 stringutil.forcebytestr(e))
1982 1978 # '\0' is more common than 'u' so it goes first.
1983 1979 elif t == '\0':
1984 1980 return data
1985 1981 elif t == 'u':
1986 1982 return util.buffer(data, 1)
1987 1983
1988 1984 try:
1989 1985 compressor = self._decompressors[t]
1990 1986 except KeyError:
1991 1987 try:
1992 1988 engine = util.compengines.forrevlogheader(t)
1993 1989 compressor = engine.revlogcompressor(self._compengineopts)
1994 1990 self._decompressors[t] = compressor
1995 1991 except KeyError:
1996 1992 raise error.RevlogError(_('unknown compression type %r') % t)
1997 1993
1998 1994 return compressor.decompress(data)
1999 1995
2000 1996 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2001 1997 cachedelta, ifh, dfh, alwayscache=False,
2002 1998 deltacomputer=None):
2003 1999 """internal function to add revisions to the log
2004 2000
2005 2001 see addrevision for argument descriptions.
2006 2002
2007 2003 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2008 2004
2009 2005 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2010 2006 be used.
2011 2007
2012 2008 invariants:
2013 2009 - rawtext is optional (can be None); if not set, cachedelta must be set.
2014 2010 if both are set, they must correspond to each other.
2015 2011 """
2016 2012 if node == nullid:
2017 2013 raise error.RevlogError(_("%s: attempt to add null revision") %
2018 2014 self.indexfile)
2019 2015 if node == wdirid or node in wdirfilenodeids:
2020 2016 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2021 2017 self.indexfile)
2022 2018
2023 2019 if self._inline:
2024 2020 fh = ifh
2025 2021 else:
2026 2022 fh = dfh
2027 2023
2028 2024 btext = [rawtext]
2029 2025
2030 2026 curr = len(self)
2031 2027 prev = curr - 1
2032 2028 offset = self.end(prev)
2033 2029 p1r, p2r = self.rev(p1), self.rev(p2)
2034 2030
2035 2031 # full versions are inserted when the needed deltas
2036 2032 # become comparable to the uncompressed text
2037 2033 if rawtext is None:
2038 2034 # need rawtext size, before changed by flag processors, which is
2039 2035 # the non-raw size. use revlog explicitly to avoid filelog's extra
2040 2036 # logic that might remove metadata size.
2041 2037 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2042 2038 cachedelta[1])
2043 2039 else:
2044 2040 textlen = len(rawtext)
2045 2041
2046 2042 if deltacomputer is None:
2047 2043 deltacomputer = deltautil.deltacomputer(self)
2048 2044
2049 2045 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2050 2046
2051 2047 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2052 2048
2053 2049 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2054 2050 deltainfo.base, link, p1r, p2r, node)
2055 2051 self.index.append(e)
2056 2052 self.nodemap[node] = curr
2057 2053
2058 2054 # Reset the pure node cache start lookup offset to account for new
2059 2055 # revision.
2060 2056 if self._nodepos is not None:
2061 2057 self._nodepos = curr
2062 2058
2063 2059 entry = self._io.packentry(e, self.node, self.version, curr)
2064 2060 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2065 2061 link, offset)
2066 2062
2067 2063 rawtext = btext[0]
2068 2064
2069 2065 if alwayscache and rawtext is None:
2070 2066 rawtext = deltacomputer.buildtext(revinfo, fh)
2071 2067
2072 2068 if type(rawtext) == bytes: # only accept immutable objects
2073 2069 self._revisioncache = (node, curr, rawtext)
2074 2070 self._chainbasecache[curr] = deltainfo.chainbase
2075 2071 return node
2076 2072
2077 2073 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2078 2074 # Files opened in a+ mode have inconsistent behavior on various
2079 2075 # platforms. Windows requires that a file positioning call be made
2080 2076 # when the file handle transitions between reads and writes. See
2081 2077 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2082 2078 # platforms, Python or the platform itself can be buggy. Some versions
2083 2079 # of Solaris have been observed to not append at the end of the file
2084 2080 # if the file was seeked to before the end. See issue4943 for more.
2085 2081 #
2086 2082 # We work around this issue by inserting a seek() before writing.
2087 2083 # Note: This is likely not necessary on Python 3. However, because
2088 2084 # the file handle is reused for reads and may be seeked there, we need
2089 2085 # to be careful before changing this.
2090 2086 ifh.seek(0, os.SEEK_END)
2091 2087 if dfh:
2092 2088 dfh.seek(0, os.SEEK_END)
2093 2089
2094 2090 curr = len(self) - 1
2095 2091 if not self._inline:
2096 2092 transaction.add(self.datafile, offset)
2097 2093 transaction.add(self.indexfile, curr * len(entry))
2098 2094 if data[0]:
2099 2095 dfh.write(data[0])
2100 2096 dfh.write(data[1])
2101 2097 ifh.write(entry)
2102 2098 else:
2103 2099 offset += curr * self._io.size
2104 2100 transaction.add(self.indexfile, offset, curr)
2105 2101 ifh.write(entry)
2106 2102 ifh.write(data[0])
2107 2103 ifh.write(data[1])
2108 2104 self._enforceinlinesize(transaction, ifh)
2109 2105
2110 2106 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2111 2107 """
2112 2108 add a delta group
2113 2109
2114 2110 given a set of deltas, add them to the revision log. the
2115 2111 first delta is against its parent, which should be in our
2116 2112 log, the rest are against the previous delta.
2117 2113
2118 2114 If ``addrevisioncb`` is defined, it will be called with arguments of
2119 2115 this revlog and the node that was added.
2120 2116 """
2121 2117
2122 2118 if self._writinghandles:
2123 2119 raise error.ProgrammingError('cannot nest addgroup() calls')
2124 2120
2125 2121 nodes = []
2126 2122
2127 2123 r = len(self)
2128 2124 end = 0
2129 2125 if r:
2130 2126 end = self.end(r - 1)
2131 2127 ifh = self._indexfp("a+")
2132 2128 isize = r * self._io.size
2133 2129 if self._inline:
2134 2130 transaction.add(self.indexfile, end + isize, r)
2135 2131 dfh = None
2136 2132 else:
2137 2133 transaction.add(self.indexfile, isize, r)
2138 2134 transaction.add(self.datafile, end)
2139 2135 dfh = self._datafp("a+")
2140 2136 def flush():
2141 2137 if dfh:
2142 2138 dfh.flush()
2143 2139 ifh.flush()
2144 2140
2145 2141 self._writinghandles = (ifh, dfh)
2146 2142
2147 2143 try:
2148 2144 deltacomputer = deltautil.deltacomputer(self)
2149 2145 # loop through our set of deltas
2150 2146 for data in deltas:
2151 2147 node, p1, p2, linknode, deltabase, delta, flags = data
2152 2148 link = linkmapper(linknode)
2153 2149 flags = flags or REVIDX_DEFAULT_FLAGS
2154 2150
2155 2151 nodes.append(node)
2156 2152
2157 2153 if node in self.nodemap:
2158 2154 self._nodeduplicatecallback(transaction, node)
2159 2155 # this can happen if two branches make the same change
2160 2156 continue
2161 2157
2162 2158 for p in (p1, p2):
2163 2159 if p not in self.nodemap:
2164 2160 raise error.LookupError(p, self.indexfile,
2165 2161 _('unknown parent'))
2166 2162
2167 2163 if deltabase not in self.nodemap:
2168 2164 raise error.LookupError(deltabase, self.indexfile,
2169 2165 _('unknown delta base'))
2170 2166
2171 2167 baserev = self.rev(deltabase)
2172 2168
2173 2169 if baserev != nullrev and self.iscensored(baserev):
2174 2170 # if base is censored, delta must be full replacement in a
2175 2171 # single patch operation
2176 2172 hlen = struct.calcsize(">lll")
2177 2173 oldlen = self.rawsize(baserev)
2178 2174 newlen = len(delta) - hlen
2179 2175 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2180 2176 raise error.CensoredBaseError(self.indexfile,
2181 2177 self.node(baserev))
2182 2178
2183 2179 if not flags and self._peek_iscensored(baserev, delta, flush):
2184 2180 flags |= REVIDX_ISCENSORED
2185 2181
2186 2182 # We assume consumers of addrevisioncb will want to retrieve
2187 2183 # the added revision, which will require a call to
2188 2184 # revision(). revision() will fast path if there is a cache
2189 2185 # hit. So, we tell _addrevision() to always cache in this case.
2190 2186 # We're only using addgroup() in the context of changegroup
2191 2187 # generation so the revision data can always be handled as raw
2192 2188 # by the flagprocessor.
2193 2189 self._addrevision(node, None, transaction, link,
2194 2190 p1, p2, flags, (baserev, delta),
2195 2191 ifh, dfh,
2196 2192 alwayscache=bool(addrevisioncb),
2197 2193 deltacomputer=deltacomputer)
2198 2194
2199 2195 if addrevisioncb:
2200 2196 addrevisioncb(self, node)
2201 2197
2202 2198 if not dfh and not self._inline:
2203 2199 # addrevision switched from inline to conventional
2204 2200 # reopen the index
2205 2201 ifh.close()
2206 2202 dfh = self._datafp("a+")
2207 2203 ifh = self._indexfp("a+")
2208 2204 self._writinghandles = (ifh, dfh)
2209 2205 finally:
2210 2206 self._writinghandles = None
2211 2207
2212 2208 if dfh:
2213 2209 dfh.close()
2214 2210 ifh.close()
2215 2211
2216 2212 return nodes
2217 2213
2218 2214 def iscensored(self, rev):
2219 2215 """Check if a file revision is censored."""
2220 2216 if not self._censorable:
2221 2217 return False
2222 2218
2223 2219 return self.flags(rev) & REVIDX_ISCENSORED
2224 2220
2225 2221 def _peek_iscensored(self, baserev, delta, flush):
2226 2222 """Quickly check if a delta produces a censored revision."""
2227 2223 if not self._censorable:
2228 2224 return False
2229 2225
2230 2226 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2231 2227
2232 2228 def getstrippoint(self, minlink):
2233 2229 """find the minimum rev that must be stripped to strip the linkrev
2234 2230
2235 2231 Returns a tuple containing the minimum rev and a set of all revs that
2236 2232 have linkrevs that will be broken by this strip.
2237 2233 """
2238 2234 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2239 2235 self.headrevs(),
2240 2236 self.linkrev, self.parentrevs)
2241 2237
2242 2238 def strip(self, minlink, transaction):
2243 2239 """truncate the revlog on the first revision with a linkrev >= minlink
2244 2240
2245 2241 This function is called when we're stripping revision minlink and
2246 2242 its descendants from the repository.
2247 2243
2248 2244 We have to remove all revisions with linkrev >= minlink, because
2249 2245 the equivalent changelog revisions will be renumbered after the
2250 2246 strip.
2251 2247
2252 2248 So we truncate the revlog on the first of these revisions, and
2253 2249 trust that the caller has saved the revisions that shouldn't be
2254 2250 removed and that it'll re-add them after this truncation.
2255 2251 """
2256 2252 if len(self) == 0:
2257 2253 return
2258 2254
2259 2255 rev, _ = self.getstrippoint(minlink)
2260 2256 if rev == len(self):
2261 2257 return
2262 2258
2263 2259 # first truncate the files on disk
2264 2260 end = self.start(rev)
2265 2261 if not self._inline:
2266 2262 transaction.add(self.datafile, end)
2267 2263 end = rev * self._io.size
2268 2264 else:
2269 2265 end += rev * self._io.size
2270 2266
2271 2267 transaction.add(self.indexfile, end)
2272 2268
2273 2269 # then reset internal state in memory to forget those revisions
2274 2270 self._revisioncache = None
2275 2271 self._chaininfocache = {}
2276 2272 self._chunkclear()
2277 2273 for x in pycompat.xrange(rev, len(self)):
2278 2274 del self.nodemap[self.node(x)]
2279 2275
2280 2276 del self.index[rev:-1]
2281 2277 self._nodepos = None
2282 2278
2283 2279 def checksize(self):
2284 2280 """Check size of index and data files
2285 2281
2286 2282 return a (dd, di) tuple.
2287 2283 - dd: extra bytes for the "data" file
2288 2284 - di: extra bytes for the "index" file
2289 2285
2290 2286 A healthy revlog will return (0, 0).
2291 2287 """
2292 2288 expected = 0
2293 2289 if len(self):
2294 2290 expected = max(0, self.end(len(self) - 1))
2295 2291
2296 2292 try:
2297 2293 with self._datafp() as f:
2298 2294 f.seek(0, io.SEEK_END)
2299 2295 actual = f.tell()
2300 2296 dd = actual - expected
2301 2297 except IOError as inst:
2302 2298 if inst.errno != errno.ENOENT:
2303 2299 raise
2304 2300 dd = 0
2305 2301
2306 2302 try:
2307 2303 f = self.opener(self.indexfile)
2308 2304 f.seek(0, io.SEEK_END)
2309 2305 actual = f.tell()
2310 2306 f.close()
2311 2307 s = self._io.size
2312 2308 i = max(0, actual // s)
2313 2309 di = actual - (i * s)
2314 2310 if self._inline:
2315 2311 databytes = 0
2316 2312 for r in self:
2317 2313 databytes += max(0, self.length(r))
2318 2314 dd = 0
2319 2315 di = actual - len(self) * s - databytes
2320 2316 except IOError as inst:
2321 2317 if inst.errno != errno.ENOENT:
2322 2318 raise
2323 2319 di = 0
2324 2320
2325 2321 return (dd, di)
2326 2322
2327 2323 def files(self):
2328 2324 res = [self.indexfile]
2329 2325 if not self._inline:
2330 2326 res.append(self.datafile)
2331 2327 return res
2332 2328
2333 2329 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2334 2330 assumehaveparentrevisions=False,
2335 2331 deltamode=repository.CG_DELTAMODE_STD):
2336 2332 if nodesorder not in ('nodes', 'storage', 'linear', None):
2337 2333 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2338 2334 nodesorder)
2339 2335
2340 2336 if nodesorder is None and not self._generaldelta:
2341 2337 nodesorder = 'storage'
2342 2338
2343 2339 if (not self._storedeltachains and
2344 2340 deltamode != repository.CG_DELTAMODE_PREV):
2345 2341 deltamode = repository.CG_DELTAMODE_FULL
2346 2342
2347 2343 return storageutil.emitrevisions(
2348 2344 self, nodes, nodesorder, revlogrevisiondelta,
2349 2345 deltaparentfn=self.deltaparent,
2350 2346 candeltafn=self.candelta,
2351 2347 rawsizefn=self.rawsize,
2352 2348 revdifffn=self.revdiff,
2353 2349 flagsfn=self.flags,
2354 2350 deltamode=deltamode,
2355 2351 revisiondata=revisiondata,
2356 2352 assumehaveparentrevisions=assumehaveparentrevisions)
2357 2353
2358 2354 DELTAREUSEALWAYS = 'always'
2359 2355 DELTAREUSESAMEREVS = 'samerevs'
2360 2356 DELTAREUSENEVER = 'never'
2361 2357
2362 2358 DELTAREUSEFULLADD = 'fulladd'
2363 2359
2364 2360 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2365 2361
2366 2362 def clone(self, tr, destrevlog, addrevisioncb=None,
2367 2363 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2368 2364 """Copy this revlog to another, possibly with format changes.
2369 2365
2370 2366 The destination revlog will contain the same revisions and nodes.
2371 2367 However, it may not be bit-for-bit identical due to e.g. delta encoding
2372 2368 differences.
2373 2369
2374 2370 The ``deltareuse`` argument control how deltas from the existing revlog
2375 2371 are preserved in the destination revlog. The argument can have the
2376 2372 following values:
2377 2373
2378 2374 DELTAREUSEALWAYS
2379 2375 Deltas will always be reused (if possible), even if the destination
2380 2376 revlog would not select the same revisions for the delta. This is the
2381 2377 fastest mode of operation.
2382 2378 DELTAREUSESAMEREVS
2383 2379 Deltas will be reused if the destination revlog would pick the same
2384 2380 revisions for the delta. This mode strikes a balance between speed
2385 2381 and optimization.
2386 2382 DELTAREUSENEVER
2387 2383 Deltas will never be reused. This is the slowest mode of execution.
2388 2384 This mode can be used to recompute deltas (e.g. if the diff/delta
2389 2385 algorithm changes).
2390 2386
2391 2387 Delta computation can be slow, so the choice of delta reuse policy can
2392 2388 significantly affect run time.
2393 2389
2394 2390 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2395 2391 two extremes. Deltas will be reused if they are appropriate. But if the
2396 2392 delta could choose a better revision, it will do so. This means if you
2397 2393 are converting a non-generaldelta revlog to a generaldelta revlog,
2398 2394 deltas will be recomputed if the delta's parent isn't a parent of the
2399 2395 revision.
2400 2396
2401 2397 In addition to the delta policy, the ``forcedeltabothparents``
2402 2398 argument controls whether to force compute deltas against both parents
2403 2399 for merges. By default, the current default is used.
2404 2400 """
2405 2401 if deltareuse not in self.DELTAREUSEALL:
2406 2402 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2407 2403
2408 2404 if len(destrevlog):
2409 2405 raise ValueError(_('destination revlog is not empty'))
2410 2406
2411 2407 if getattr(self, 'filteredrevs', None):
2412 2408 raise ValueError(_('source revlog has filtered revisions'))
2413 2409 if getattr(destrevlog, 'filteredrevs', None):
2414 2410 raise ValueError(_('destination revlog has filtered revisions'))
2415 2411
2416 2412 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2417 2413 # if possible.
2418 2414 oldlazydelta = destrevlog._lazydelta
2419 2415 oldlazydeltabase = destrevlog._lazydeltabase
2420 2416 oldamd = destrevlog._deltabothparents
2421 2417
2422 2418 try:
2423 2419 if deltareuse == self.DELTAREUSEALWAYS:
2424 2420 destrevlog._lazydeltabase = True
2425 2421 destrevlog._lazydelta = True
2426 2422 elif deltareuse == self.DELTAREUSESAMEREVS:
2427 2423 destrevlog._lazydeltabase = False
2428 2424 destrevlog._lazydelta = True
2429 2425 elif deltareuse == self.DELTAREUSENEVER:
2430 2426 destrevlog._lazydeltabase = False
2431 2427 destrevlog._lazydelta = False
2432 2428
2433 2429 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2434 2430
2435 2431 deltacomputer = deltautil.deltacomputer(destrevlog)
2436 2432 index = self.index
2437 2433 for rev in self:
2438 2434 entry = index[rev]
2439 2435
2440 2436 # Some classes override linkrev to take filtered revs into
2441 2437 # account. Use raw entry from index.
2442 2438 flags = entry[0] & 0xffff
2443 2439 linkrev = entry[4]
2444 2440 p1 = index[entry[5]][7]
2445 2441 p2 = index[entry[6]][7]
2446 2442 node = entry[7]
2447 2443
2448 2444 # (Possibly) reuse the delta from the revlog if allowed and
2449 2445 # the revlog chunk is a delta.
2450 2446 cachedelta = None
2451 2447 rawtext = None
2452 2448 if (deltareuse != self.DELTAREUSEFULLADD
2453 2449 and destrevlog._lazydelta):
2454 2450 dp = self.deltaparent(rev)
2455 2451 if dp != nullrev:
2456 2452 cachedelta = (dp, bytes(self._chunk(rev)))
2457 2453
2458 2454 if not cachedelta:
2459 2455 rawtext = self.rawdata(rev)
2460 2456
2461 2457
2462 2458 if deltareuse == self.DELTAREUSEFULLADD:
2463 2459 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2464 2460 cachedelta=cachedelta,
2465 2461 node=node, flags=flags,
2466 2462 deltacomputer=deltacomputer)
2467 2463 else:
2468 2464 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2469 2465 checkambig=False)
2470 2466 dfh = None
2471 2467 if not destrevlog._inline:
2472 2468 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2473 2469 try:
2474 2470 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2475 2471 p2, flags, cachedelta, ifh, dfh,
2476 2472 deltacomputer=deltacomputer)
2477 2473 finally:
2478 2474 if dfh:
2479 2475 dfh.close()
2480 2476 ifh.close()
2481 2477
2482 2478 if addrevisioncb:
2483 2479 addrevisioncb(self, rev, node)
2484 2480 finally:
2485 2481 destrevlog._lazydelta = oldlazydelta
2486 2482 destrevlog._lazydeltabase = oldlazydeltabase
2487 2483 destrevlog._deltabothparents = oldamd
2488 2484
2489 2485 def censorrevision(self, tr, censornode, tombstone=b''):
2490 2486 if (self.version & 0xFFFF) == REVLOGV0:
2491 2487 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2492 2488 self.version)
2493 2489
2494 2490 censorrev = self.rev(censornode)
2495 2491 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2496 2492
2497 2493 if len(tombstone) > self.rawsize(censorrev):
2498 2494 raise error.Abort(_('censor tombstone must be no longer than '
2499 2495 'censored data'))
2500 2496
2501 2497 # Rewriting the revlog in place is hard. Our strategy for censoring is
2502 2498 # to create a new revlog, copy all revisions to it, then replace the
2503 2499 # revlogs on transaction close.
2504 2500
2505 2501 newindexfile = self.indexfile + b'.tmpcensored'
2506 2502 newdatafile = self.datafile + b'.tmpcensored'
2507 2503
2508 2504 # This is a bit dangerous. We could easily have a mismatch of state.
2509 2505 newrl = revlog(self.opener, newindexfile, newdatafile,
2510 2506 censorable=True)
2511 2507 newrl.version = self.version
2512 2508 newrl._generaldelta = self._generaldelta
2513 2509 newrl._io = self._io
2514 2510
2515 2511 for rev in self.revs():
2516 2512 node = self.node(rev)
2517 2513 p1, p2 = self.parents(node)
2518 2514
2519 2515 if rev == censorrev:
2520 2516 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2521 2517 p1, p2, censornode, REVIDX_ISCENSORED)
2522 2518
2523 2519 if newrl.deltaparent(rev) != nullrev:
2524 2520 raise error.Abort(_('censored revision stored as delta; '
2525 2521 'cannot censor'),
2526 2522 hint=_('censoring of revlogs is not '
2527 2523 'fully implemented; please report '
2528 2524 'this bug'))
2529 2525 continue
2530 2526
2531 2527 if self.iscensored(rev):
2532 2528 if self.deltaparent(rev) != nullrev:
2533 2529 raise error.Abort(_('cannot censor due to censored '
2534 2530 'revision having delta stored'))
2535 2531 rawtext = self._chunk(rev)
2536 2532 else:
2537 2533 rawtext = self.rawdata(rev)
2538 2534
2539 2535 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2540 2536 self.flags(rev))
2541 2537
2542 2538 tr.addbackup(self.indexfile, location='store')
2543 2539 if not self._inline:
2544 2540 tr.addbackup(self.datafile, location='store')
2545 2541
2546 2542 self.opener.rename(newrl.indexfile, self.indexfile)
2547 2543 if not self._inline:
2548 2544 self.opener.rename(newrl.datafile, self.datafile)
2549 2545
2550 2546 self.clearcaches()
2551 2547 self._loadindex()
2552 2548
2553 2549 def verifyintegrity(self, state):
2554 2550 """Verifies the integrity of the revlog.
2555 2551
2556 2552 Yields ``revlogproblem`` instances describing problems that are
2557 2553 found.
2558 2554 """
2559 2555 dd, di = self.checksize()
2560 2556 if dd:
2561 2557 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2562 2558 if di:
2563 2559 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2564 2560
2565 2561 version = self.version & 0xFFFF
2566 2562
2567 2563 # The verifier tells us what version revlog we should be.
2568 2564 if version != state['expectedversion']:
2569 2565 yield revlogproblem(
2570 2566 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2571 2567 (self.indexfile, version, state['expectedversion']))
2572 2568
2573 2569 state['skipread'] = set()
2574 2570
2575 2571 for rev in self:
2576 2572 node = self.node(rev)
2577 2573
2578 2574 # Verify contents. 4 cases to care about:
2579 2575 #
2580 2576 # common: the most common case
2581 2577 # rename: with a rename
2582 2578 # meta: file content starts with b'\1\n', the metadata
2583 2579 # header defined in filelog.py, but without a rename
2584 2580 # ext: content stored externally
2585 2581 #
2586 2582 # More formally, their differences are shown below:
2587 2583 #
2588 2584 # | common | rename | meta | ext
2589 2585 # -------------------------------------------------------
2590 2586 # flags() | 0 | 0 | 0 | not 0
2591 2587 # renamed() | False | True | False | ?
2592 2588 # rawtext[0:2]=='\1\n'| False | True | True | ?
2593 2589 #
2594 2590 # "rawtext" means the raw text stored in revlog data, which
2595 2591 # could be retrieved by "rawdata(rev)". "text"
2596 2592 # mentioned below is "revision(rev)".
2597 2593 #
2598 2594 # There are 3 different lengths stored physically:
2599 2595 # 1. L1: rawsize, stored in revlog index
2600 2596 # 2. L2: len(rawtext), stored in revlog data
2601 2597 # 3. L3: len(text), stored in revlog data if flags==0, or
2602 2598 # possibly somewhere else if flags!=0
2603 2599 #
2604 2600 # L1 should be equal to L2. L3 could be different from them.
2605 2601 # "text" may or may not affect commit hash depending on flag
2606 2602 # processors (see flagutil.addflagprocessor).
2607 2603 #
2608 2604 # | common | rename | meta | ext
2609 2605 # -------------------------------------------------
2610 2606 # rawsize() | L1 | L1 | L1 | L1
2611 2607 # size() | L1 | L2-LM | L1(*) | L1 (?)
2612 2608 # len(rawtext) | L2 | L2 | L2 | L2
2613 2609 # len(text) | L2 | L2 | L2 | L3
2614 2610 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2615 2611 #
2616 2612 # LM: length of metadata, depending on rawtext
2617 2613 # (*): not ideal, see comment in filelog.size
2618 2614 # (?): could be "- len(meta)" if the resolved content has
2619 2615 # rename metadata
2620 2616 #
2621 2617 # Checks needed to be done:
2622 2618 # 1. length check: L1 == L2, in all cases.
2623 2619 # 2. hash check: depending on flag processor, we may need to
2624 2620 # use either "text" (external), or "rawtext" (in revlog).
2625 2621
2626 2622 try:
2627 2623 skipflags = state.get('skipflags', 0)
2628 2624 if skipflags:
2629 2625 skipflags &= self.flags(rev)
2630 2626
2631 2627 if skipflags:
2632 2628 state['skipread'].add(node)
2633 2629 else:
2634 2630 # Side-effect: read content and verify hash.
2635 2631 self.revision(node)
2636 2632
2637 2633 l1 = self.rawsize(rev)
2638 2634 l2 = len(self.rawdata(node))
2639 2635
2640 2636 if l1 != l2:
2641 2637 yield revlogproblem(
2642 2638 error=_('unpacked size is %d, %d expected') % (l2, l1),
2643 2639 node=node)
2644 2640
2645 2641 except error.CensoredNodeError:
2646 2642 if state['erroroncensored']:
2647 2643 yield revlogproblem(error=_('censored file data'),
2648 2644 node=node)
2649 2645 state['skipread'].add(node)
2650 2646 except Exception as e:
2651 2647 yield revlogproblem(
2652 2648 error=_('unpacking %s: %s') % (short(node),
2653 2649 stringutil.forcebytestr(e)),
2654 2650 node=node)
2655 2651 state['skipread'].add(node)
2656 2652
2657 2653 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2658 2654 revisionscount=False, trackedsize=False,
2659 2655 storedsize=False):
2660 2656 d = {}
2661 2657
2662 2658 if exclusivefiles:
2663 2659 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2664 2660 if not self._inline:
2665 2661 d['exclusivefiles'].append((self.opener, self.datafile))
2666 2662
2667 2663 if sharedfiles:
2668 2664 d['sharedfiles'] = []
2669 2665
2670 2666 if revisionscount:
2671 2667 d['revisionscount'] = len(self)
2672 2668
2673 2669 if trackedsize:
2674 2670 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2675 2671
2676 2672 if storedsize:
2677 2673 d['storedsize'] = sum(self.opener.stat(path).st_size
2678 2674 for path in self.files())
2679 2675
2680 2676 return d
General Comments 0
You need to be logged in to leave comments. Login now