##// END OF EJS Templates
rawdata: introduce a `rawdata` method on revlog...
marmoute -
r42945:415e4136 default
parent child Browse files
Show More
@@ -1,2695 +1,2702
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVIDX_DEFAULT_FLAGS,
42 42 REVIDX_ELLIPSIS,
43 43 REVIDX_EXTSTORED,
44 44 REVIDX_FLAGS_ORDER,
45 45 REVIDX_ISCENSORED,
46 46 REVIDX_KNOWN_FLAGS,
47 47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 )
57 57 from .thirdparty import (
58 58 attr,
59 59 )
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 repository,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .revlogutils import (
72 72 deltas as deltautil,
73 73 )
74 74 from .utils import (
75 75 interfaceutil,
76 76 storageutil,
77 77 stringutil,
78 78 )
79 79
80 80 # blanked usage of all the name to prevent pyflakes constraints
81 81 # We need these name available in the module for extensions.
82 82 REVLOGV0
83 83 REVLOGV1
84 84 REVLOGV2
85 85 FLAG_INLINE_DATA
86 86 FLAG_GENERALDELTA
87 87 REVLOG_DEFAULT_FLAGS
88 88 REVLOG_DEFAULT_FORMAT
89 89 REVLOG_DEFAULT_VERSION
90 90 REVLOGV1_FLAGS
91 91 REVLOGV2_FLAGS
92 92 REVIDX_ISCENSORED
93 93 REVIDX_ELLIPSIS
94 94 REVIDX_EXTSTORED
95 95 REVIDX_DEFAULT_FLAGS
96 96 REVIDX_FLAGS_ORDER
97 97 REVIDX_KNOWN_FLAGS
98 98 REVIDX_RAWTEXT_CHANGING_FLAGS
99 99
100 100 parsers = policy.importmod(r'parsers')
101 101 rustancestor = policy.importrust(r'ancestor')
102 102 rustdagop = policy.importrust(r'dagop')
103 103
104 104 # Aliased for performance.
105 105 _zlibdecompress = zlib.decompress
106 106
107 107 # max size of revlog with inline data
108 108 _maxinline = 131072
109 109 _chunksize = 1048576
110 110
111 111 # Store flag processors (cf. 'addflagprocessor()' to register)
112 112 _flagprocessors = {
113 113 REVIDX_ISCENSORED: None,
114 114 }
115 115
116 116 # Flag processors for REVIDX_ELLIPSIS.
117 117 def ellipsisreadprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsiswriteprocessor(rl, text):
121 121 return text, False
122 122
123 123 def ellipsisrawprocessor(rl, text):
124 124 return False
125 125
126 126 ellipsisprocessor = (
127 127 ellipsisreadprocessor,
128 128 ellipsiswriteprocessor,
129 129 ellipsisrawprocessor,
130 130 )
131 131
132 132 def addflagprocessor(flag, processor):
133 133 """Register a flag processor on a revision data flag.
134 134
135 135 Invariant:
136 136 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
137 137 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
138 138 - Only one flag processor can be registered on a specific flag.
139 139 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
140 140 following signatures:
141 141 - (read) f(self, rawtext) -> text, bool
142 142 - (write) f(self, text) -> rawtext, bool
143 143 - (raw) f(self, rawtext) -> bool
144 144 "text" is presented to the user. "rawtext" is stored in revlog data, not
145 145 directly visible to the user.
146 146 The boolean returned by these transforms is used to determine whether
147 147 the returned text can be used for hash integrity checking. For example,
148 148 if "write" returns False, then "text" is used to generate hash. If
149 149 "write" returns True, that basically means "rawtext" returned by "write"
150 150 should be used to generate hash. Usually, "write" and "read" return
151 151 different booleans. And "raw" returns a same boolean as "write".
152 152
153 153 Note: The 'raw' transform is used for changegroup generation and in some
154 154 debug commands. In this case the transform only indicates whether the
155 155 contents can be used for hash integrity checks.
156 156 """
157 157 _insertflagprocessor(flag, processor, _flagprocessors)
158 158
159 159 def _insertflagprocessor(flag, processor, flagprocessors):
160 160 if not flag & REVIDX_KNOWN_FLAGS:
161 161 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
162 162 raise error.ProgrammingError(msg)
163 163 if flag not in REVIDX_FLAGS_ORDER:
164 164 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
165 165 raise error.ProgrammingError(msg)
166 166 if flag in flagprocessors:
167 167 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
168 168 raise error.Abort(msg)
169 169 flagprocessors[flag] = processor
170 170
171 171 def getoffset(q):
172 172 return int(q >> 16)
173 173
174 174 def gettype(q):
175 175 return int(q & 0xFFFF)
176 176
177 177 def offset_type(offset, type):
178 178 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
179 179 raise ValueError('unknown revlog index flags')
180 180 return int(int(offset) << 16 | type)
181 181
182 182 @attr.s(slots=True, frozen=True)
183 183 class _revisioninfo(object):
184 184 """Information about a revision that allows building its fulltext
185 185 node: expected hash of the revision
186 186 p1, p2: parent revs of the revision
187 187 btext: built text cache consisting of a one-element list
188 188 cachedelta: (baserev, uncompressed_delta) or None
189 189 flags: flags associated to the revision storage
190 190
191 191 One of btext[0] or cachedelta must be set.
192 192 """
193 193 node = attr.ib()
194 194 p1 = attr.ib()
195 195 p2 = attr.ib()
196 196 btext = attr.ib()
197 197 textlen = attr.ib()
198 198 cachedelta = attr.ib()
199 199 flags = attr.ib()
200 200
201 201 @interfaceutil.implementer(repository.irevisiondelta)
202 202 @attr.s(slots=True)
203 203 class revlogrevisiondelta(object):
204 204 node = attr.ib()
205 205 p1node = attr.ib()
206 206 p2node = attr.ib()
207 207 basenode = attr.ib()
208 208 flags = attr.ib()
209 209 baserevisionsize = attr.ib()
210 210 revision = attr.ib()
211 211 delta = attr.ib()
212 212 linknode = attr.ib(default=None)
213 213
214 214 @interfaceutil.implementer(repository.iverifyproblem)
215 215 @attr.s(frozen=True)
216 216 class revlogproblem(object):
217 217 warning = attr.ib(default=None)
218 218 error = attr.ib(default=None)
219 219 node = attr.ib(default=None)
220 220
221 221 # index v0:
222 222 # 4 bytes: offset
223 223 # 4 bytes: compressed length
224 224 # 4 bytes: base rev
225 225 # 4 bytes: link rev
226 226 # 20 bytes: parent 1 nodeid
227 227 # 20 bytes: parent 2 nodeid
228 228 # 20 bytes: nodeid
229 229 indexformatv0 = struct.Struct(">4l20s20s20s")
230 230 indexformatv0_pack = indexformatv0.pack
231 231 indexformatv0_unpack = indexformatv0.unpack
232 232
233 233 class revlogoldindex(list):
234 234 def __getitem__(self, i):
235 235 if i == -1:
236 236 return (0, 0, 0, -1, -1, -1, -1, nullid)
237 237 return list.__getitem__(self, i)
238 238
239 239 class revlogoldio(object):
240 240 def __init__(self):
241 241 self.size = indexformatv0.size
242 242
243 243 def parseindex(self, data, inline):
244 244 s = self.size
245 245 index = []
246 246 nodemap = {nullid: nullrev}
247 247 n = off = 0
248 248 l = len(data)
249 249 while off + s <= l:
250 250 cur = data[off:off + s]
251 251 off += s
252 252 e = indexformatv0_unpack(cur)
253 253 # transform to revlogv1 format
254 254 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
255 255 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
256 256 index.append(e2)
257 257 nodemap[e[6]] = n
258 258 n += 1
259 259
260 260 return revlogoldindex(index), nodemap, None
261 261
262 262 def packentry(self, entry, node, version, rev):
263 263 if gettype(entry[0]):
264 264 raise error.RevlogError(_('index entry flags need revlog '
265 265 'version 1'))
266 266 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
267 267 node(entry[5]), node(entry[6]), entry[7])
268 268 return indexformatv0_pack(*e2)
269 269
270 270 # index ng:
271 271 # 6 bytes: offset
272 272 # 2 bytes: flags
273 273 # 4 bytes: compressed length
274 274 # 4 bytes: uncompressed length
275 275 # 4 bytes: base rev
276 276 # 4 bytes: link rev
277 277 # 4 bytes: parent 1 rev
278 278 # 4 bytes: parent 2 rev
279 279 # 32 bytes: nodeid
280 280 indexformatng = struct.Struct(">Qiiiiii20s12x")
281 281 indexformatng_pack = indexformatng.pack
282 282 versionformat = struct.Struct(">I")
283 283 versionformat_pack = versionformat.pack
284 284 versionformat_unpack = versionformat.unpack
285 285
286 286 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
287 287 # signed integer)
288 288 _maxentrysize = 0x7fffffff
289 289
290 290 class revlogio(object):
291 291 def __init__(self):
292 292 self.size = indexformatng.size
293 293
294 294 def parseindex(self, data, inline):
295 295 # call the C implementation to parse the index data
296 296 index, cache = parsers.parse_index2(data, inline)
297 297 return index, getattr(index, 'nodemap', None), cache
298 298
299 299 def packentry(self, entry, node, version, rev):
300 300 p = indexformatng_pack(*entry)
301 301 if rev == 0:
302 302 p = versionformat_pack(version) + p[4:]
303 303 return p
304 304
305 305 class revlog(object):
306 306 """
307 307 the underlying revision storage object
308 308
309 309 A revlog consists of two parts, an index and the revision data.
310 310
311 311 The index is a file with a fixed record size containing
312 312 information on each revision, including its nodeid (hash), the
313 313 nodeids of its parents, the position and offset of its data within
314 314 the data file, and the revision it's based on. Finally, each entry
315 315 contains a linkrev entry that can serve as a pointer to external
316 316 data.
317 317
318 318 The revision data itself is a linear collection of data chunks.
319 319 Each chunk represents a revision and is usually represented as a
320 320 delta against the previous chunk. To bound lookup time, runs of
321 321 deltas are limited to about 2 times the length of the original
322 322 version data. This makes retrieval of a version proportional to
323 323 its size, or O(1) relative to the number of revisions.
324 324
325 325 Both pieces of the revlog are written to in an append-only
326 326 fashion, which means we never need to rewrite a file to insert or
327 327 remove data, and can use some simple techniques to avoid the need
328 328 for locking while reading.
329 329
330 330 If checkambig, indexfile is opened with checkambig=True at
331 331 writing, to avoid file stat ambiguity.
332 332
333 333 If mmaplargeindex is True, and an mmapindexthreshold is set, the
334 334 index will be mmapped rather than read if it is larger than the
335 335 configured threshold.
336 336
337 337 If censorable is True, the revlog can have censored revisions.
338 338
339 339 If `upperboundcomp` is not None, this is the expected maximal gain from
340 340 compression for the data content.
341 341 """
342 342 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
343 343 mmaplargeindex=False, censorable=False,
344 344 upperboundcomp=None):
345 345 """
346 346 create a revlog object
347 347
348 348 opener is a function that abstracts the file opening operation
349 349 and can be used to implement COW semantics or the like.
350 350
351 351 """
352 352 self.upperboundcomp = upperboundcomp
353 353 self.indexfile = indexfile
354 354 self.datafile = datafile or (indexfile[:-2] + ".d")
355 355 self.opener = opener
356 356 # When True, indexfile is opened with checkambig=True at writing, to
357 357 # avoid file stat ambiguity.
358 358 self._checkambig = checkambig
359 359 self._mmaplargeindex = mmaplargeindex
360 360 self._censorable = censorable
361 361 # 3-tuple of (node, rev, text) for a raw revision.
362 362 self._revisioncache = None
363 363 # Maps rev to chain base rev.
364 364 self._chainbasecache = util.lrucachedict(100)
365 365 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
366 366 self._chunkcache = (0, '')
367 367 # How much data to read and cache into the raw revlog data cache.
368 368 self._chunkcachesize = 65536
369 369 self._maxchainlen = None
370 370 self._deltabothparents = True
371 371 self.index = []
372 372 # Mapping of partial identifiers to full nodes.
373 373 self._pcache = {}
374 374 # Mapping of revision integer to full node.
375 375 self._nodecache = {nullid: nullrev}
376 376 self._nodepos = None
377 377 self._compengine = 'zlib'
378 378 self._compengineopts = {}
379 379 self._maxdeltachainspan = -1
380 380 self._withsparseread = False
381 381 self._sparserevlog = False
382 382 self._srdensitythreshold = 0.50
383 383 self._srmingapsize = 262144
384 384
385 385 # Make copy of flag processors so each revlog instance can support
386 386 # custom flags.
387 387 self._flagprocessors = dict(_flagprocessors)
388 388
389 389 # 2-tuple of file handles being used for active writing.
390 390 self._writinghandles = None
391 391
392 392 self._loadindex()
393 393
394 394 def _loadindex(self):
395 395 mmapindexthreshold = None
396 396 opts = getattr(self.opener, 'options', {}) or {}
397 397
398 398 if 'revlogv2' in opts:
399 399 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
400 400 elif 'revlogv1' in opts:
401 401 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
402 402 if 'generaldelta' in opts:
403 403 newversionflags |= FLAG_GENERALDELTA
404 404 elif getattr(self.opener, 'options', None) is not None:
405 405 # If options provided but no 'revlog*' found, the repository
406 406 # would have no 'requires' file in it, which means we have to
407 407 # stick to the old format.
408 408 newversionflags = REVLOGV0
409 409 else:
410 410 newversionflags = REVLOG_DEFAULT_VERSION
411 411
412 412 if 'chunkcachesize' in opts:
413 413 self._chunkcachesize = opts['chunkcachesize']
414 414 if 'maxchainlen' in opts:
415 415 self._maxchainlen = opts['maxchainlen']
416 416 if 'deltabothparents' in opts:
417 417 self._deltabothparents = opts['deltabothparents']
418 418 self._lazydelta = bool(opts.get('lazydelta', True))
419 419 self._lazydeltabase = False
420 420 if self._lazydelta:
421 421 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
422 422 if 'compengine' in opts:
423 423 self._compengine = opts['compengine']
424 424 if 'zlib.level' in opts:
425 425 self._compengineopts['zlib.level'] = opts['zlib.level']
426 426 if 'zstd.level' in opts:
427 427 self._compengineopts['zstd.level'] = opts['zstd.level']
428 428 if 'maxdeltachainspan' in opts:
429 429 self._maxdeltachainspan = opts['maxdeltachainspan']
430 430 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
431 431 mmapindexthreshold = opts['mmapindexthreshold']
432 432 self._sparserevlog = bool(opts.get('sparse-revlog', False))
433 433 withsparseread = bool(opts.get('with-sparse-read', False))
434 434 # sparse-revlog forces sparse-read
435 435 self._withsparseread = self._sparserevlog or withsparseread
436 436 if 'sparse-read-density-threshold' in opts:
437 437 self._srdensitythreshold = opts['sparse-read-density-threshold']
438 438 if 'sparse-read-min-gap-size' in opts:
439 439 self._srmingapsize = opts['sparse-read-min-gap-size']
440 440 if opts.get('enableellipsis'):
441 441 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
442 442
443 443 # revlog v0 doesn't have flag processors
444 444 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
445 445 _insertflagprocessor(flag, processor, self._flagprocessors)
446 446
447 447 if self._chunkcachesize <= 0:
448 448 raise error.RevlogError(_('revlog chunk cache size %r is not '
449 449 'greater than 0') % self._chunkcachesize)
450 450 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 451 raise error.RevlogError(_('revlog chunk cache size %r is not a '
452 452 'power of 2') % self._chunkcachesize)
453 453
454 454 indexdata = ''
455 455 self._initempty = True
456 456 try:
457 457 with self._indexfp() as f:
458 458 if (mmapindexthreshold is not None and
459 459 self.opener.fstat(f).st_size >= mmapindexthreshold):
460 460 # TODO: should .close() to release resources without
461 461 # relying on Python GC
462 462 indexdata = util.buffer(util.mmapread(f))
463 463 else:
464 464 indexdata = f.read()
465 465 if len(indexdata) > 0:
466 466 versionflags = versionformat_unpack(indexdata[:4])[0]
467 467 self._initempty = False
468 468 else:
469 469 versionflags = newversionflags
470 470 except IOError as inst:
471 471 if inst.errno != errno.ENOENT:
472 472 raise
473 473
474 474 versionflags = newversionflags
475 475
476 476 self.version = versionflags
477 477
478 478 flags = versionflags & ~0xFFFF
479 479 fmt = versionflags & 0xFFFF
480 480
481 481 if fmt == REVLOGV0:
482 482 if flags:
483 483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
484 484 'revlog %s') %
485 485 (flags >> 16, fmt, self.indexfile))
486 486
487 487 self._inline = False
488 488 self._generaldelta = False
489 489
490 490 elif fmt == REVLOGV1:
491 491 if flags & ~REVLOGV1_FLAGS:
492 492 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
493 493 'revlog %s') %
494 494 (flags >> 16, fmt, self.indexfile))
495 495
496 496 self._inline = versionflags & FLAG_INLINE_DATA
497 497 self._generaldelta = versionflags & FLAG_GENERALDELTA
498 498
499 499 elif fmt == REVLOGV2:
500 500 if flags & ~REVLOGV2_FLAGS:
501 501 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
502 502 'revlog %s') %
503 503 (flags >> 16, fmt, self.indexfile))
504 504
505 505 self._inline = versionflags & FLAG_INLINE_DATA
506 506 # generaldelta implied by version 2 revlogs.
507 507 self._generaldelta = True
508 508
509 509 else:
510 510 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
511 511 (fmt, self.indexfile))
512 512 # sparse-revlog can't be on without general-delta (issue6056)
513 513 if not self._generaldelta:
514 514 self._sparserevlog = False
515 515
516 516 self._storedeltachains = True
517 517
518 518 self._io = revlogio()
519 519 if self.version == REVLOGV0:
520 520 self._io = revlogoldio()
521 521 try:
522 522 d = self._io.parseindex(indexdata, self._inline)
523 523 except (ValueError, IndexError):
524 524 raise error.RevlogError(_("index %s is corrupted") %
525 525 self.indexfile)
526 526 self.index, nodemap, self._chunkcache = d
527 527 if nodemap is not None:
528 528 self.nodemap = self._nodecache = nodemap
529 529 if not self._chunkcache:
530 530 self._chunkclear()
531 531 # revnum -> (chain-length, sum-delta-length)
532 532 self._chaininfocache = {}
533 533 # revlog header -> revlog compressor
534 534 self._decompressors = {}
535 535
536 536 @util.propertycache
537 537 def _compressor(self):
538 538 engine = util.compengines[self._compengine]
539 539 return engine.revlogcompressor(self._compengineopts)
540 540
541 541 def _indexfp(self, mode='r'):
542 542 """file object for the revlog's index file"""
543 543 args = {r'mode': mode}
544 544 if mode != 'r':
545 545 args[r'checkambig'] = self._checkambig
546 546 if mode == 'w':
547 547 args[r'atomictemp'] = True
548 548 return self.opener(self.indexfile, **args)
549 549
550 550 def _datafp(self, mode='r'):
551 551 """file object for the revlog's data file"""
552 552 return self.opener(self.datafile, mode=mode)
553 553
554 554 @contextlib.contextmanager
555 555 def _datareadfp(self, existingfp=None):
556 556 """file object suitable to read data"""
557 557 # Use explicit file handle, if given.
558 558 if existingfp is not None:
559 559 yield existingfp
560 560
561 561 # Use a file handle being actively used for writes, if available.
562 562 # There is some danger to doing this because reads will seek the
563 563 # file. However, _writeentry() performs a SEEK_END before all writes,
564 564 # so we should be safe.
565 565 elif self._writinghandles:
566 566 if self._inline:
567 567 yield self._writinghandles[0]
568 568 else:
569 569 yield self._writinghandles[1]
570 570
571 571 # Otherwise open a new file handle.
572 572 else:
573 573 if self._inline:
574 574 func = self._indexfp
575 575 else:
576 576 func = self._datafp
577 577 with func() as fp:
578 578 yield fp
579 579
580 580 def tip(self):
581 581 return self.node(len(self.index) - 1)
582 582 def __contains__(self, rev):
583 583 return 0 <= rev < len(self)
584 584 def __len__(self):
585 585 return len(self.index)
586 586 def __iter__(self):
587 587 return iter(pycompat.xrange(len(self)))
588 588 def revs(self, start=0, stop=None):
589 589 """iterate over all rev in this revlog (from start to stop)"""
590 590 return storageutil.iterrevs(len(self), start=start, stop=stop)
591 591
592 592 @util.propertycache
593 593 def nodemap(self):
594 594 if self.index:
595 595 # populate mapping down to the initial node
596 596 node0 = self.index[0][7] # get around changelog filtering
597 597 self.rev(node0)
598 598 return self._nodecache
599 599
600 600 def hasnode(self, node):
601 601 try:
602 602 self.rev(node)
603 603 return True
604 604 except KeyError:
605 605 return False
606 606
607 607 def candelta(self, baserev, rev):
608 608 """whether two revisions (baserev, rev) can be delta-ed or not"""
609 609 # Disable delta if either rev requires a content-changing flag
610 610 # processor (ex. LFS). This is because such flag processor can alter
611 611 # the rawtext content that the delta will be based on, and two clients
612 612 # could have a same revlog node with different flags (i.e. different
613 613 # rawtext contents) and the delta could be incompatible.
614 614 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
615 615 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
616 616 return False
617 617 return True
618 618
619 619 def clearcaches(self):
620 620 self._revisioncache = None
621 621 self._chainbasecache.clear()
622 622 self._chunkcache = (0, '')
623 623 self._pcache = {}
624 624
625 625 try:
626 626 # If we are using the native C version, you are in a fun case
627 627 # where self.index, self.nodemap and self._nodecaches is the same
628 628 # object.
629 629 self._nodecache.clearcaches()
630 630 except AttributeError:
631 631 self._nodecache = {nullid: nullrev}
632 632 self._nodepos = None
633 633
634 634 def rev(self, node):
635 635 try:
636 636 return self._nodecache[node]
637 637 except TypeError:
638 638 raise
639 639 except error.RevlogError:
640 640 # parsers.c radix tree lookup failed
641 641 if node == wdirid or node in wdirfilenodeids:
642 642 raise error.WdirUnsupported
643 643 raise error.LookupError(node, self.indexfile, _('no node'))
644 644 except KeyError:
645 645 # pure python cache lookup failed
646 646 n = self._nodecache
647 647 i = self.index
648 648 p = self._nodepos
649 649 if p is None:
650 650 p = len(i) - 1
651 651 else:
652 652 assert p < len(i)
653 653 for r in pycompat.xrange(p, -1, -1):
654 654 v = i[r][7]
655 655 n[v] = r
656 656 if v == node:
657 657 self._nodepos = r - 1
658 658 return r
659 659 if node == wdirid or node in wdirfilenodeids:
660 660 raise error.WdirUnsupported
661 661 raise error.LookupError(node, self.indexfile, _('no node'))
662 662
663 663 # Accessors for index entries.
664 664
665 665 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
666 666 # are flags.
667 667 def start(self, rev):
668 668 return int(self.index[rev][0] >> 16)
669 669
670 670 def flags(self, rev):
671 671 return self.index[rev][0] & 0xFFFF
672 672
673 673 def length(self, rev):
674 674 return self.index[rev][1]
675 675
676 676 def rawsize(self, rev):
677 677 """return the length of the uncompressed text for a given revision"""
678 678 l = self.index[rev][2]
679 679 if l >= 0:
680 680 return l
681 681
682 682 t = self.revision(rev, raw=True)
683 683 return len(t)
684 684
685 685 def size(self, rev):
686 686 """length of non-raw text (processed by a "read" flag processor)"""
687 687 # fast path: if no "read" flag processor could change the content,
688 688 # size is rawsize. note: ELLIPSIS is known to not change the content.
689 689 flags = self.flags(rev)
690 690 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
691 691 return self.rawsize(rev)
692 692
693 693 return len(self.revision(rev, raw=False))
694 694
695 695 def chainbase(self, rev):
696 696 base = self._chainbasecache.get(rev)
697 697 if base is not None:
698 698 return base
699 699
700 700 index = self.index
701 701 iterrev = rev
702 702 base = index[iterrev][3]
703 703 while base != iterrev:
704 704 iterrev = base
705 705 base = index[iterrev][3]
706 706
707 707 self._chainbasecache[rev] = base
708 708 return base
709 709
710 710 def linkrev(self, rev):
711 711 return self.index[rev][4]
712 712
713 713 def parentrevs(self, rev):
714 714 try:
715 715 entry = self.index[rev]
716 716 except IndexError:
717 717 if rev == wdirrev:
718 718 raise error.WdirUnsupported
719 719 raise
720 720
721 721 return entry[5], entry[6]
722 722
723 723 # fast parentrevs(rev) where rev isn't filtered
724 724 _uncheckedparentrevs = parentrevs
725 725
726 726 def node(self, rev):
727 727 try:
728 728 return self.index[rev][7]
729 729 except IndexError:
730 730 if rev == wdirrev:
731 731 raise error.WdirUnsupported
732 732 raise
733 733
734 734 # Derived from index values.
735 735
736 736 def end(self, rev):
737 737 return self.start(rev) + self.length(rev)
738 738
739 739 def parents(self, node):
740 740 i = self.index
741 741 d = i[self.rev(node)]
742 742 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
743 743
744 744 def chainlen(self, rev):
745 745 return self._chaininfo(rev)[0]
746 746
747 747 def _chaininfo(self, rev):
748 748 chaininfocache = self._chaininfocache
749 749 if rev in chaininfocache:
750 750 return chaininfocache[rev]
751 751 index = self.index
752 752 generaldelta = self._generaldelta
753 753 iterrev = rev
754 754 e = index[iterrev]
755 755 clen = 0
756 756 compresseddeltalen = 0
757 757 while iterrev != e[3]:
758 758 clen += 1
759 759 compresseddeltalen += e[1]
760 760 if generaldelta:
761 761 iterrev = e[3]
762 762 else:
763 763 iterrev -= 1
764 764 if iterrev in chaininfocache:
765 765 t = chaininfocache[iterrev]
766 766 clen += t[0]
767 767 compresseddeltalen += t[1]
768 768 break
769 769 e = index[iterrev]
770 770 else:
771 771 # Add text length of base since decompressing that also takes
772 772 # work. For cache hits the length is already included.
773 773 compresseddeltalen += e[1]
774 774 r = (clen, compresseddeltalen)
775 775 chaininfocache[rev] = r
776 776 return r
777 777
778 778 def _deltachain(self, rev, stoprev=None):
779 779 """Obtain the delta chain for a revision.
780 780
781 781 ``stoprev`` specifies a revision to stop at. If not specified, we
782 782 stop at the base of the chain.
783 783
784 784 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
785 785 revs in ascending order and ``stopped`` is a bool indicating whether
786 786 ``stoprev`` was hit.
787 787 """
788 788 # Try C implementation.
789 789 try:
790 790 return self.index.deltachain(rev, stoprev, self._generaldelta)
791 791 except AttributeError:
792 792 pass
793 793
794 794 chain = []
795 795
796 796 # Alias to prevent attribute lookup in tight loop.
797 797 index = self.index
798 798 generaldelta = self._generaldelta
799 799
800 800 iterrev = rev
801 801 e = index[iterrev]
802 802 while iterrev != e[3] and iterrev != stoprev:
803 803 chain.append(iterrev)
804 804 if generaldelta:
805 805 iterrev = e[3]
806 806 else:
807 807 iterrev -= 1
808 808 e = index[iterrev]
809 809
810 810 if iterrev == stoprev:
811 811 stopped = True
812 812 else:
813 813 chain.append(iterrev)
814 814 stopped = False
815 815
816 816 chain.reverse()
817 817 return chain, stopped
818 818
819 819 def ancestors(self, revs, stoprev=0, inclusive=False):
820 820 """Generate the ancestors of 'revs' in reverse revision order.
821 821 Does not generate revs lower than stoprev.
822 822
823 823 See the documentation for ancestor.lazyancestors for more details."""
824 824
825 825 # first, make sure start revisions aren't filtered
826 826 revs = list(revs)
827 827 checkrev = self.node
828 828 for r in revs:
829 829 checkrev(r)
830 830 # and we're sure ancestors aren't filtered as well
831 831
832 832 if rustancestor is not None:
833 833 lazyancestors = rustancestor.LazyAncestors
834 834 arg = self.index
835 835 elif util.safehasattr(parsers, 'rustlazyancestors'):
836 836 lazyancestors = ancestor.rustlazyancestors
837 837 arg = self.index
838 838 else:
839 839 lazyancestors = ancestor.lazyancestors
840 840 arg = self._uncheckedparentrevs
841 841 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
842 842
843 843 def descendants(self, revs):
844 844 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
845 845
846 846 def findcommonmissing(self, common=None, heads=None):
847 847 """Return a tuple of the ancestors of common and the ancestors of heads
848 848 that are not ancestors of common. In revset terminology, we return the
849 849 tuple:
850 850
851 851 ::common, (::heads) - (::common)
852 852
853 853 The list is sorted by revision number, meaning it is
854 854 topologically sorted.
855 855
856 856 'heads' and 'common' are both lists of node IDs. If heads is
857 857 not supplied, uses all of the revlog's heads. If common is not
858 858 supplied, uses nullid."""
859 859 if common is None:
860 860 common = [nullid]
861 861 if heads is None:
862 862 heads = self.heads()
863 863
864 864 common = [self.rev(n) for n in common]
865 865 heads = [self.rev(n) for n in heads]
866 866
867 867 # we want the ancestors, but inclusive
868 868 class lazyset(object):
869 869 def __init__(self, lazyvalues):
870 870 self.addedvalues = set()
871 871 self.lazyvalues = lazyvalues
872 872
873 873 def __contains__(self, value):
874 874 return value in self.addedvalues or value in self.lazyvalues
875 875
876 876 def __iter__(self):
877 877 added = self.addedvalues
878 878 for r in added:
879 879 yield r
880 880 for r in self.lazyvalues:
881 881 if not r in added:
882 882 yield r
883 883
884 884 def add(self, value):
885 885 self.addedvalues.add(value)
886 886
887 887 def update(self, values):
888 888 self.addedvalues.update(values)
889 889
890 890 has = lazyset(self.ancestors(common))
891 891 has.add(nullrev)
892 892 has.update(common)
893 893
894 894 # take all ancestors from heads that aren't in has
895 895 missing = set()
896 896 visit = collections.deque(r for r in heads if r not in has)
897 897 while visit:
898 898 r = visit.popleft()
899 899 if r in missing:
900 900 continue
901 901 else:
902 902 missing.add(r)
903 903 for p in self.parentrevs(r):
904 904 if p not in has:
905 905 visit.append(p)
906 906 missing = list(missing)
907 907 missing.sort()
908 908 return has, [self.node(miss) for miss in missing]
909 909
910 910 def incrementalmissingrevs(self, common=None):
911 911 """Return an object that can be used to incrementally compute the
912 912 revision numbers of the ancestors of arbitrary sets that are not
913 913 ancestors of common. This is an ancestor.incrementalmissingancestors
914 914 object.
915 915
916 916 'common' is a list of revision numbers. If common is not supplied, uses
917 917 nullrev.
918 918 """
919 919 if common is None:
920 920 common = [nullrev]
921 921
922 922 if rustancestor is not None:
923 923 return rustancestor.MissingAncestors(self.index, common)
924 924 return ancestor.incrementalmissingancestors(self.parentrevs, common)
925 925
926 926 def findmissingrevs(self, common=None, heads=None):
927 927 """Return the revision numbers of the ancestors of heads that
928 928 are not ancestors of common.
929 929
930 930 More specifically, return a list of revision numbers corresponding to
931 931 nodes N such that every N satisfies the following constraints:
932 932
933 933 1. N is an ancestor of some node in 'heads'
934 934 2. N is not an ancestor of any node in 'common'
935 935
936 936 The list is sorted by revision number, meaning it is
937 937 topologically sorted.
938 938
939 939 'heads' and 'common' are both lists of revision numbers. If heads is
940 940 not supplied, uses all of the revlog's heads. If common is not
941 941 supplied, uses nullid."""
942 942 if common is None:
943 943 common = [nullrev]
944 944 if heads is None:
945 945 heads = self.headrevs()
946 946
947 947 inc = self.incrementalmissingrevs(common=common)
948 948 return inc.missingancestors(heads)
949 949
950 950 def findmissing(self, common=None, heads=None):
951 951 """Return the ancestors of heads that are not ancestors of common.
952 952
953 953 More specifically, return a list of nodes N such that every N
954 954 satisfies the following constraints:
955 955
956 956 1. N is an ancestor of some node in 'heads'
957 957 2. N is not an ancestor of any node in 'common'
958 958
959 959 The list is sorted by revision number, meaning it is
960 960 topologically sorted.
961 961
962 962 'heads' and 'common' are both lists of node IDs. If heads is
963 963 not supplied, uses all of the revlog's heads. If common is not
964 964 supplied, uses nullid."""
965 965 if common is None:
966 966 common = [nullid]
967 967 if heads is None:
968 968 heads = self.heads()
969 969
970 970 common = [self.rev(n) for n in common]
971 971 heads = [self.rev(n) for n in heads]
972 972
973 973 inc = self.incrementalmissingrevs(common=common)
974 974 return [self.node(r) for r in inc.missingancestors(heads)]
975 975
976 976 def nodesbetween(self, roots=None, heads=None):
977 977 """Return a topological path from 'roots' to 'heads'.
978 978
979 979 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
980 980 topologically sorted list of all nodes N that satisfy both of
981 981 these constraints:
982 982
983 983 1. N is a descendant of some node in 'roots'
984 984 2. N is an ancestor of some node in 'heads'
985 985
986 986 Every node is considered to be both a descendant and an ancestor
987 987 of itself, so every reachable node in 'roots' and 'heads' will be
988 988 included in 'nodes'.
989 989
990 990 'outroots' is the list of reachable nodes in 'roots', i.e., the
991 991 subset of 'roots' that is returned in 'nodes'. Likewise,
992 992 'outheads' is the subset of 'heads' that is also in 'nodes'.
993 993
994 994 'roots' and 'heads' are both lists of node IDs. If 'roots' is
995 995 unspecified, uses nullid as the only root. If 'heads' is
996 996 unspecified, uses list of all of the revlog's heads."""
997 997 nonodes = ([], [], [])
998 998 if roots is not None:
999 999 roots = list(roots)
1000 1000 if not roots:
1001 1001 return nonodes
1002 1002 lowestrev = min([self.rev(n) for n in roots])
1003 1003 else:
1004 1004 roots = [nullid] # Everybody's a descendant of nullid
1005 1005 lowestrev = nullrev
1006 1006 if (lowestrev == nullrev) and (heads is None):
1007 1007 # We want _all_ the nodes!
1008 1008 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1009 1009 if heads is None:
1010 1010 # All nodes are ancestors, so the latest ancestor is the last
1011 1011 # node.
1012 1012 highestrev = len(self) - 1
1013 1013 # Set ancestors to None to signal that every node is an ancestor.
1014 1014 ancestors = None
1015 1015 # Set heads to an empty dictionary for later discovery of heads
1016 1016 heads = {}
1017 1017 else:
1018 1018 heads = list(heads)
1019 1019 if not heads:
1020 1020 return nonodes
1021 1021 ancestors = set()
1022 1022 # Turn heads into a dictionary so we can remove 'fake' heads.
1023 1023 # Also, later we will be using it to filter out the heads we can't
1024 1024 # find from roots.
1025 1025 heads = dict.fromkeys(heads, False)
1026 1026 # Start at the top and keep marking parents until we're done.
1027 1027 nodestotag = set(heads)
1028 1028 # Remember where the top was so we can use it as a limit later.
1029 1029 highestrev = max([self.rev(n) for n in nodestotag])
1030 1030 while nodestotag:
1031 1031 # grab a node to tag
1032 1032 n = nodestotag.pop()
1033 1033 # Never tag nullid
1034 1034 if n == nullid:
1035 1035 continue
1036 1036 # A node's revision number represents its place in a
1037 1037 # topologically sorted list of nodes.
1038 1038 r = self.rev(n)
1039 1039 if r >= lowestrev:
1040 1040 if n not in ancestors:
1041 1041 # If we are possibly a descendant of one of the roots
1042 1042 # and we haven't already been marked as an ancestor
1043 1043 ancestors.add(n) # Mark as ancestor
1044 1044 # Add non-nullid parents to list of nodes to tag.
1045 1045 nodestotag.update([p for p in self.parents(n) if
1046 1046 p != nullid])
1047 1047 elif n in heads: # We've seen it before, is it a fake head?
1048 1048 # So it is, real heads should not be the ancestors of
1049 1049 # any other heads.
1050 1050 heads.pop(n)
1051 1051 if not ancestors:
1052 1052 return nonodes
1053 1053 # Now that we have our set of ancestors, we want to remove any
1054 1054 # roots that are not ancestors.
1055 1055
1056 1056 # If one of the roots was nullid, everything is included anyway.
1057 1057 if lowestrev > nullrev:
1058 1058 # But, since we weren't, let's recompute the lowest rev to not
1059 1059 # include roots that aren't ancestors.
1060 1060
1061 1061 # Filter out roots that aren't ancestors of heads
1062 1062 roots = [root for root in roots if root in ancestors]
1063 1063 # Recompute the lowest revision
1064 1064 if roots:
1065 1065 lowestrev = min([self.rev(root) for root in roots])
1066 1066 else:
1067 1067 # No more roots? Return empty list
1068 1068 return nonodes
1069 1069 else:
1070 1070 # We are descending from nullid, and don't need to care about
1071 1071 # any other roots.
1072 1072 lowestrev = nullrev
1073 1073 roots = [nullid]
1074 1074 # Transform our roots list into a set.
1075 1075 descendants = set(roots)
1076 1076 # Also, keep the original roots so we can filter out roots that aren't
1077 1077 # 'real' roots (i.e. are descended from other roots).
1078 1078 roots = descendants.copy()
1079 1079 # Our topologically sorted list of output nodes.
1080 1080 orderedout = []
1081 1081 # Don't start at nullid since we don't want nullid in our output list,
1082 1082 # and if nullid shows up in descendants, empty parents will look like
1083 1083 # they're descendants.
1084 1084 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1085 1085 n = self.node(r)
1086 1086 isdescendant = False
1087 1087 if lowestrev == nullrev: # Everybody is a descendant of nullid
1088 1088 isdescendant = True
1089 1089 elif n in descendants:
1090 1090 # n is already a descendant
1091 1091 isdescendant = True
1092 1092 # This check only needs to be done here because all the roots
1093 1093 # will start being marked is descendants before the loop.
1094 1094 if n in roots:
1095 1095 # If n was a root, check if it's a 'real' root.
1096 1096 p = tuple(self.parents(n))
1097 1097 # If any of its parents are descendants, it's not a root.
1098 1098 if (p[0] in descendants) or (p[1] in descendants):
1099 1099 roots.remove(n)
1100 1100 else:
1101 1101 p = tuple(self.parents(n))
1102 1102 # A node is a descendant if either of its parents are
1103 1103 # descendants. (We seeded the dependents list with the roots
1104 1104 # up there, remember?)
1105 1105 if (p[0] in descendants) or (p[1] in descendants):
1106 1106 descendants.add(n)
1107 1107 isdescendant = True
1108 1108 if isdescendant and ((ancestors is None) or (n in ancestors)):
1109 1109 # Only include nodes that are both descendants and ancestors.
1110 1110 orderedout.append(n)
1111 1111 if (ancestors is not None) and (n in heads):
1112 1112 # We're trying to figure out which heads are reachable
1113 1113 # from roots.
1114 1114 # Mark this head as having been reached
1115 1115 heads[n] = True
1116 1116 elif ancestors is None:
1117 1117 # Otherwise, we're trying to discover the heads.
1118 1118 # Assume this is a head because if it isn't, the next step
1119 1119 # will eventually remove it.
1120 1120 heads[n] = True
1121 1121 # But, obviously its parents aren't.
1122 1122 for p in self.parents(n):
1123 1123 heads.pop(p, None)
1124 1124 heads = [head for head, flag in heads.iteritems() if flag]
1125 1125 roots = list(roots)
1126 1126 assert orderedout
1127 1127 assert roots
1128 1128 assert heads
1129 1129 return (orderedout, roots, heads)
1130 1130
1131 1131 def headrevs(self, revs=None):
1132 1132 if revs is None:
1133 1133 try:
1134 1134 return self.index.headrevs()
1135 1135 except AttributeError:
1136 1136 return self._headrevs()
1137 1137 if rustdagop is not None:
1138 1138 return rustdagop.headrevs(self.index, revs)
1139 1139 return dagop.headrevs(revs, self._uncheckedparentrevs)
1140 1140
1141 1141 def computephases(self, roots):
1142 1142 return self.index.computephasesmapsets(roots)
1143 1143
1144 1144 def _headrevs(self):
1145 1145 count = len(self)
1146 1146 if not count:
1147 1147 return [nullrev]
1148 1148 # we won't iter over filtered rev so nobody is a head at start
1149 1149 ishead = [0] * (count + 1)
1150 1150 index = self.index
1151 1151 for r in self:
1152 1152 ishead[r] = 1 # I may be an head
1153 1153 e = index[r]
1154 1154 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1155 1155 return [r for r, val in enumerate(ishead) if val]
1156 1156
1157 1157 def heads(self, start=None, stop=None):
1158 1158 """return the list of all nodes that have no children
1159 1159
1160 1160 if start is specified, only heads that are descendants of
1161 1161 start will be returned
1162 1162 if stop is specified, it will consider all the revs from stop
1163 1163 as if they had no children
1164 1164 """
1165 1165 if start is None and stop is None:
1166 1166 if not len(self):
1167 1167 return [nullid]
1168 1168 return [self.node(r) for r in self.headrevs()]
1169 1169
1170 1170 if start is None:
1171 1171 start = nullrev
1172 1172 else:
1173 1173 start = self.rev(start)
1174 1174
1175 1175 stoprevs = set(self.rev(n) for n in stop or [])
1176 1176
1177 1177 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1178 1178 stoprevs=stoprevs)
1179 1179
1180 1180 return [self.node(rev) for rev in revs]
1181 1181
1182 1182 def children(self, node):
1183 1183 """find the children of a given node"""
1184 1184 c = []
1185 1185 p = self.rev(node)
1186 1186 for r in self.revs(start=p + 1):
1187 1187 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1188 1188 if prevs:
1189 1189 for pr in prevs:
1190 1190 if pr == p:
1191 1191 c.append(self.node(r))
1192 1192 elif p == nullrev:
1193 1193 c.append(self.node(r))
1194 1194 return c
1195 1195
1196 1196 def commonancestorsheads(self, a, b):
1197 1197 """calculate all the heads of the common ancestors of nodes a and b"""
1198 1198 a, b = self.rev(a), self.rev(b)
1199 1199 ancs = self._commonancestorsheads(a, b)
1200 1200 return pycompat.maplist(self.node, ancs)
1201 1201
1202 1202 def _commonancestorsheads(self, *revs):
1203 1203 """calculate all the heads of the common ancestors of revs"""
1204 1204 try:
1205 1205 ancs = self.index.commonancestorsheads(*revs)
1206 1206 except (AttributeError, OverflowError): # C implementation failed
1207 1207 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1208 1208 return ancs
1209 1209
1210 1210 def isancestor(self, a, b):
1211 1211 """return True if node a is an ancestor of node b
1212 1212
1213 1213 A revision is considered an ancestor of itself."""
1214 1214 a, b = self.rev(a), self.rev(b)
1215 1215 return self.isancestorrev(a, b)
1216 1216
1217 1217 def isancestorrev(self, a, b):
1218 1218 """return True if revision a is an ancestor of revision b
1219 1219
1220 1220 A revision is considered an ancestor of itself.
1221 1221
1222 1222 The implementation of this is trivial but the use of
1223 1223 reachableroots is not."""
1224 1224 if a == nullrev:
1225 1225 return True
1226 1226 elif a == b:
1227 1227 return True
1228 1228 elif a > b:
1229 1229 return False
1230 1230 return bool(self.reachableroots(a, [b], [a], includepath=False))
1231 1231
1232 1232 def reachableroots(self, minroot, heads, roots, includepath=False):
1233 1233 """return (heads(::<roots> and <roots>::<heads>))
1234 1234
1235 1235 If includepath is True, return (<roots>::<heads>)."""
1236 1236 try:
1237 1237 return self.index.reachableroots2(minroot, heads, roots,
1238 1238 includepath)
1239 1239 except AttributeError:
1240 1240 return dagop._reachablerootspure(self.parentrevs,
1241 1241 minroot, roots, heads, includepath)
1242 1242
1243 1243 def ancestor(self, a, b):
1244 1244 """calculate the "best" common ancestor of nodes a and b"""
1245 1245
1246 1246 a, b = self.rev(a), self.rev(b)
1247 1247 try:
1248 1248 ancs = self.index.ancestors(a, b)
1249 1249 except (AttributeError, OverflowError):
1250 1250 ancs = ancestor.ancestors(self.parentrevs, a, b)
1251 1251 if ancs:
1252 1252 # choose a consistent winner when there's a tie
1253 1253 return min(map(self.node, ancs))
1254 1254 return nullid
1255 1255
1256 1256 def _match(self, id):
1257 1257 if isinstance(id, int):
1258 1258 # rev
1259 1259 return self.node(id)
1260 1260 if len(id) == 20:
1261 1261 # possibly a binary node
1262 1262 # odds of a binary node being all hex in ASCII are 1 in 10**25
1263 1263 try:
1264 1264 node = id
1265 1265 self.rev(node) # quick search the index
1266 1266 return node
1267 1267 except error.LookupError:
1268 1268 pass # may be partial hex id
1269 1269 try:
1270 1270 # str(rev)
1271 1271 rev = int(id)
1272 1272 if "%d" % rev != id:
1273 1273 raise ValueError
1274 1274 if rev < 0:
1275 1275 rev = len(self) + rev
1276 1276 if rev < 0 or rev >= len(self):
1277 1277 raise ValueError
1278 1278 return self.node(rev)
1279 1279 except (ValueError, OverflowError):
1280 1280 pass
1281 1281 if len(id) == 40:
1282 1282 try:
1283 1283 # a full hex nodeid?
1284 1284 node = bin(id)
1285 1285 self.rev(node)
1286 1286 return node
1287 1287 except (TypeError, error.LookupError):
1288 1288 pass
1289 1289
1290 1290 def _partialmatch(self, id):
1291 1291 # we don't care wdirfilenodeids as they should be always full hash
1292 1292 maybewdir = wdirhex.startswith(id)
1293 1293 try:
1294 1294 partial = self.index.partialmatch(id)
1295 1295 if partial and self.hasnode(partial):
1296 1296 if maybewdir:
1297 1297 # single 'ff...' match in radix tree, ambiguous with wdir
1298 1298 raise error.RevlogError
1299 1299 return partial
1300 1300 if maybewdir:
1301 1301 # no 'ff...' match in radix tree, wdir identified
1302 1302 raise error.WdirUnsupported
1303 1303 return None
1304 1304 except error.RevlogError:
1305 1305 # parsers.c radix tree lookup gave multiple matches
1306 1306 # fast path: for unfiltered changelog, radix tree is accurate
1307 1307 if not getattr(self, 'filteredrevs', None):
1308 1308 raise error.AmbiguousPrefixLookupError(
1309 1309 id, self.indexfile, _('ambiguous identifier'))
1310 1310 # fall through to slow path that filters hidden revisions
1311 1311 except (AttributeError, ValueError):
1312 1312 # we are pure python, or key was too short to search radix tree
1313 1313 pass
1314 1314
1315 1315 if id in self._pcache:
1316 1316 return self._pcache[id]
1317 1317
1318 1318 if len(id) <= 40:
1319 1319 try:
1320 1320 # hex(node)[:...]
1321 1321 l = len(id) // 2 # grab an even number of digits
1322 1322 prefix = bin(id[:l * 2])
1323 1323 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1324 1324 nl = [n for n in nl if hex(n).startswith(id) and
1325 1325 self.hasnode(n)]
1326 1326 if nullhex.startswith(id):
1327 1327 nl.append(nullid)
1328 1328 if len(nl) > 0:
1329 1329 if len(nl) == 1 and not maybewdir:
1330 1330 self._pcache[id] = nl[0]
1331 1331 return nl[0]
1332 1332 raise error.AmbiguousPrefixLookupError(
1333 1333 id, self.indexfile, _('ambiguous identifier'))
1334 1334 if maybewdir:
1335 1335 raise error.WdirUnsupported
1336 1336 return None
1337 1337 except TypeError:
1338 1338 pass
1339 1339
1340 1340 def lookup(self, id):
1341 1341 """locate a node based on:
1342 1342 - revision number or str(revision number)
1343 1343 - nodeid or subset of hex nodeid
1344 1344 """
1345 1345 n = self._match(id)
1346 1346 if n is not None:
1347 1347 return n
1348 1348 n = self._partialmatch(id)
1349 1349 if n:
1350 1350 return n
1351 1351
1352 1352 raise error.LookupError(id, self.indexfile, _('no match found'))
1353 1353
1354 1354 def shortest(self, node, minlength=1):
1355 1355 """Find the shortest unambiguous prefix that matches node."""
1356 1356 def isvalid(prefix):
1357 1357 try:
1358 1358 matchednode = self._partialmatch(prefix)
1359 1359 except error.AmbiguousPrefixLookupError:
1360 1360 return False
1361 1361 except error.WdirUnsupported:
1362 1362 # single 'ff...' match
1363 1363 return True
1364 1364 if matchednode is None:
1365 1365 raise error.LookupError(node, self.indexfile, _('no node'))
1366 1366 return True
1367 1367
1368 1368 def maybewdir(prefix):
1369 1369 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1370 1370
1371 1371 hexnode = hex(node)
1372 1372
1373 1373 def disambiguate(hexnode, minlength):
1374 1374 """Disambiguate against wdirid."""
1375 1375 for length in range(minlength, 41):
1376 1376 prefix = hexnode[:length]
1377 1377 if not maybewdir(prefix):
1378 1378 return prefix
1379 1379
1380 1380 if not getattr(self, 'filteredrevs', None):
1381 1381 try:
1382 1382 length = max(self.index.shortest(node), minlength)
1383 1383 return disambiguate(hexnode, length)
1384 1384 except error.RevlogError:
1385 1385 if node != wdirid:
1386 1386 raise error.LookupError(node, self.indexfile, _('no node'))
1387 1387 except AttributeError:
1388 1388 # Fall through to pure code
1389 1389 pass
1390 1390
1391 1391 if node == wdirid:
1392 1392 for length in range(minlength, 41):
1393 1393 prefix = hexnode[:length]
1394 1394 if isvalid(prefix):
1395 1395 return prefix
1396 1396
1397 1397 for length in range(minlength, 41):
1398 1398 prefix = hexnode[:length]
1399 1399 if isvalid(prefix):
1400 1400 return disambiguate(hexnode, length)
1401 1401
1402 1402 def cmp(self, node, text):
1403 1403 """compare text with a given file revision
1404 1404
1405 1405 returns True if text is different than what is stored.
1406 1406 """
1407 1407 p1, p2 = self.parents(node)
1408 1408 return storageutil.hashrevisionsha1(text, p1, p2) != node
1409 1409
1410 1410 def _cachesegment(self, offset, data):
1411 1411 """Add a segment to the revlog cache.
1412 1412
1413 1413 Accepts an absolute offset and the data that is at that location.
1414 1414 """
1415 1415 o, d = self._chunkcache
1416 1416 # try to add to existing cache
1417 1417 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1418 1418 self._chunkcache = o, d + data
1419 1419 else:
1420 1420 self._chunkcache = offset, data
1421 1421
1422 1422 def _readsegment(self, offset, length, df=None):
1423 1423 """Load a segment of raw data from the revlog.
1424 1424
1425 1425 Accepts an absolute offset, length to read, and an optional existing
1426 1426 file handle to read from.
1427 1427
1428 1428 If an existing file handle is passed, it will be seeked and the
1429 1429 original seek position will NOT be restored.
1430 1430
1431 1431 Returns a str or buffer of raw byte data.
1432 1432
1433 1433 Raises if the requested number of bytes could not be read.
1434 1434 """
1435 1435 # Cache data both forward and backward around the requested
1436 1436 # data, in a fixed size window. This helps speed up operations
1437 1437 # involving reading the revlog backwards.
1438 1438 cachesize = self._chunkcachesize
1439 1439 realoffset = offset & ~(cachesize - 1)
1440 1440 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1441 1441 - realoffset)
1442 1442 with self._datareadfp(df) as df:
1443 1443 df.seek(realoffset)
1444 1444 d = df.read(reallength)
1445 1445
1446 1446 self._cachesegment(realoffset, d)
1447 1447 if offset != realoffset or reallength != length:
1448 1448 startoffset = offset - realoffset
1449 1449 if len(d) - startoffset < length:
1450 1450 raise error.RevlogError(
1451 1451 _('partial read of revlog %s; expected %d bytes from '
1452 1452 'offset %d, got %d') %
1453 1453 (self.indexfile if self._inline else self.datafile,
1454 1454 length, realoffset, len(d) - startoffset))
1455 1455
1456 1456 return util.buffer(d, startoffset, length)
1457 1457
1458 1458 if len(d) < length:
1459 1459 raise error.RevlogError(
1460 1460 _('partial read of revlog %s; expected %d bytes from offset '
1461 1461 '%d, got %d') %
1462 1462 (self.indexfile if self._inline else self.datafile,
1463 1463 length, offset, len(d)))
1464 1464
1465 1465 return d
1466 1466
1467 1467 def _getsegment(self, offset, length, df=None):
1468 1468 """Obtain a segment of raw data from the revlog.
1469 1469
1470 1470 Accepts an absolute offset, length of bytes to obtain, and an
1471 1471 optional file handle to the already-opened revlog. If the file
1472 1472 handle is used, it's original seek position will not be preserved.
1473 1473
1474 1474 Requests for data may be returned from a cache.
1475 1475
1476 1476 Returns a str or a buffer instance of raw byte data.
1477 1477 """
1478 1478 o, d = self._chunkcache
1479 1479 l = len(d)
1480 1480
1481 1481 # is it in the cache?
1482 1482 cachestart = offset - o
1483 1483 cacheend = cachestart + length
1484 1484 if cachestart >= 0 and cacheend <= l:
1485 1485 if cachestart == 0 and cacheend == l:
1486 1486 return d # avoid a copy
1487 1487 return util.buffer(d, cachestart, cacheend - cachestart)
1488 1488
1489 1489 return self._readsegment(offset, length, df=df)
1490 1490
1491 1491 def _getsegmentforrevs(self, startrev, endrev, df=None):
1492 1492 """Obtain a segment of raw data corresponding to a range of revisions.
1493 1493
1494 1494 Accepts the start and end revisions and an optional already-open
1495 1495 file handle to be used for reading. If the file handle is read, its
1496 1496 seek position will not be preserved.
1497 1497
1498 1498 Requests for data may be satisfied by a cache.
1499 1499
1500 1500 Returns a 2-tuple of (offset, data) for the requested range of
1501 1501 revisions. Offset is the integer offset from the beginning of the
1502 1502 revlog and data is a str or buffer of the raw byte data.
1503 1503
1504 1504 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1505 1505 to determine where each revision's data begins and ends.
1506 1506 """
1507 1507 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1508 1508 # (functions are expensive).
1509 1509 index = self.index
1510 1510 istart = index[startrev]
1511 1511 start = int(istart[0] >> 16)
1512 1512 if startrev == endrev:
1513 1513 end = start + istart[1]
1514 1514 else:
1515 1515 iend = index[endrev]
1516 1516 end = int(iend[0] >> 16) + iend[1]
1517 1517
1518 1518 if self._inline:
1519 1519 start += (startrev + 1) * self._io.size
1520 1520 end += (endrev + 1) * self._io.size
1521 1521 length = end - start
1522 1522
1523 1523 return start, self._getsegment(start, length, df=df)
1524 1524
1525 1525 def _chunk(self, rev, df=None):
1526 1526 """Obtain a single decompressed chunk for a revision.
1527 1527
1528 1528 Accepts an integer revision and an optional already-open file handle
1529 1529 to be used for reading. If used, the seek position of the file will not
1530 1530 be preserved.
1531 1531
1532 1532 Returns a str holding uncompressed data for the requested revision.
1533 1533 """
1534 1534 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1535 1535
1536 1536 def _chunks(self, revs, df=None, targetsize=None):
1537 1537 """Obtain decompressed chunks for the specified revisions.
1538 1538
1539 1539 Accepts an iterable of numeric revisions that are assumed to be in
1540 1540 ascending order. Also accepts an optional already-open file handle
1541 1541 to be used for reading. If used, the seek position of the file will
1542 1542 not be preserved.
1543 1543
1544 1544 This function is similar to calling ``self._chunk()`` multiple times,
1545 1545 but is faster.
1546 1546
1547 1547 Returns a list with decompressed data for each requested revision.
1548 1548 """
1549 1549 if not revs:
1550 1550 return []
1551 1551 start = self.start
1552 1552 length = self.length
1553 1553 inline = self._inline
1554 1554 iosize = self._io.size
1555 1555 buffer = util.buffer
1556 1556
1557 1557 l = []
1558 1558 ladd = l.append
1559 1559
1560 1560 if not self._withsparseread:
1561 1561 slicedchunks = (revs,)
1562 1562 else:
1563 1563 slicedchunks = deltautil.slicechunk(self, revs,
1564 1564 targetsize=targetsize)
1565 1565
1566 1566 for revschunk in slicedchunks:
1567 1567 firstrev = revschunk[0]
1568 1568 # Skip trailing revisions with empty diff
1569 1569 for lastrev in revschunk[::-1]:
1570 1570 if length(lastrev) != 0:
1571 1571 break
1572 1572
1573 1573 try:
1574 1574 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1575 1575 except OverflowError:
1576 1576 # issue4215 - we can't cache a run of chunks greater than
1577 1577 # 2G on Windows
1578 1578 return [self._chunk(rev, df=df) for rev in revschunk]
1579 1579
1580 1580 decomp = self.decompress
1581 1581 for rev in revschunk:
1582 1582 chunkstart = start(rev)
1583 1583 if inline:
1584 1584 chunkstart += (rev + 1) * iosize
1585 1585 chunklength = length(rev)
1586 1586 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1587 1587
1588 1588 return l
1589 1589
1590 1590 def _chunkclear(self):
1591 1591 """Clear the raw chunk cache."""
1592 1592 self._chunkcache = (0, '')
1593 1593
1594 1594 def deltaparent(self, rev):
1595 1595 """return deltaparent of the given revision"""
1596 1596 base = self.index[rev][3]
1597 1597 if base == rev:
1598 1598 return nullrev
1599 1599 elif self._generaldelta:
1600 1600 return base
1601 1601 else:
1602 1602 return rev - 1
1603 1603
1604 1604 def issnapshot(self, rev):
1605 1605 """tells whether rev is a snapshot
1606 1606 """
1607 1607 if not self._sparserevlog:
1608 1608 return self.deltaparent(rev) == nullrev
1609 1609 elif util.safehasattr(self.index, 'issnapshot'):
1610 1610 # directly assign the method to cache the testing and access
1611 1611 self.issnapshot = self.index.issnapshot
1612 1612 return self.issnapshot(rev)
1613 1613 if rev == nullrev:
1614 1614 return True
1615 1615 entry = self.index[rev]
1616 1616 base = entry[3]
1617 1617 if base == rev:
1618 1618 return True
1619 1619 if base == nullrev:
1620 1620 return True
1621 1621 p1 = entry[5]
1622 1622 p2 = entry[6]
1623 1623 if base == p1 or base == p2:
1624 1624 return False
1625 1625 return self.issnapshot(base)
1626 1626
1627 1627 def snapshotdepth(self, rev):
1628 1628 """number of snapshot in the chain before this one"""
1629 1629 if not self.issnapshot(rev):
1630 1630 raise error.ProgrammingError('revision %d not a snapshot')
1631 1631 return len(self._deltachain(rev)[0]) - 1
1632 1632
1633 1633 def revdiff(self, rev1, rev2):
1634 1634 """return or calculate a delta between two revisions
1635 1635
1636 1636 The delta calculated is in binary form and is intended to be written to
1637 1637 revlog data directly. So this function needs raw revision data.
1638 1638 """
1639 1639 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1640 1640 return bytes(self._chunk(rev2))
1641 1641
1642 1642 return mdiff.textdiff(self.revision(rev1, raw=True),
1643 1643 self.revision(rev2, raw=True))
1644 1644
1645 1645 def revision(self, nodeorrev, _df=None, raw=False):
1646 1646 """return an uncompressed revision of a given node or revision
1647 1647 number.
1648 1648
1649 1649 _df - an existing file handle to read from. (internal-only)
1650 1650 raw - an optional argument specifying if the revision data is to be
1651 1651 treated as raw data when applying flag transforms. 'raw' should be set
1652 1652 to True when generating changegroups or in debug commands.
1653 1653 """
1654 1654 return self._revisiondata(nodeorrev, _df, raw=raw)
1655 1655
1656 1656 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1657 1657 if isinstance(nodeorrev, int):
1658 1658 rev = nodeorrev
1659 1659 node = self.node(rev)
1660 1660 else:
1661 1661 node = nodeorrev
1662 1662 rev = None
1663 1663
1664 1664 cachedrev = None
1665 1665 flags = None
1666 1666 rawtext = None
1667 1667 if node == nullid:
1668 1668 return ""
1669 1669 if self._revisioncache:
1670 1670 if self._revisioncache[0] == node:
1671 1671 # _cache only stores rawtext
1672 1672 if raw:
1673 1673 return self._revisioncache[2]
1674 1674 # duplicated, but good for perf
1675 1675 if rev is None:
1676 1676 rev = self.rev(node)
1677 1677 if flags is None:
1678 1678 flags = self.flags(rev)
1679 1679 # no extra flags set, no flag processor runs, text = rawtext
1680 1680 if flags == REVIDX_DEFAULT_FLAGS:
1681 1681 return self._revisioncache[2]
1682 1682 # rawtext is reusable. need to run flag processor
1683 1683 rawtext = self._revisioncache[2]
1684 1684
1685 1685 cachedrev = self._revisioncache[1]
1686 1686
1687 1687 # look up what we need to read
1688 1688 if rawtext is None:
1689 1689 if rev is None:
1690 1690 rev = self.rev(node)
1691 1691
1692 1692 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1693 1693 if stopped:
1694 1694 rawtext = self._revisioncache[2]
1695 1695
1696 1696 # drop cache to save memory
1697 1697 self._revisioncache = None
1698 1698
1699 1699 targetsize = None
1700 1700 rawsize = self.index[rev][2]
1701 1701 if 0 <= rawsize:
1702 1702 targetsize = 4 * rawsize
1703 1703
1704 1704 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1705 1705 if rawtext is None:
1706 1706 rawtext = bytes(bins[0])
1707 1707 bins = bins[1:]
1708 1708
1709 1709 rawtext = mdiff.patches(rawtext, bins)
1710 1710 self._revisioncache = (node, rev, rawtext)
1711 1711
1712 1712 if flags is None:
1713 1713 if rev is None:
1714 1714 rev = self.rev(node)
1715 1715 flags = self.flags(rev)
1716 1716
1717 1717 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1718 1718 if validatehash:
1719 1719 self.checkhash(text, node, rev=rev)
1720 1720
1721 1721 return text
1722 1722
1723 def rawdata(self, nodeorrev, _df=None, raw=False):
1724 """return an uncompressed raw data of a given node or revision number.
1725
1726 _df - an existing file handle to read from. (internal-only)
1727 """
1728 return self._revisiondata(nodeorrev, _df, raw=True)
1729
1723 1730 def hash(self, text, p1, p2):
1724 1731 """Compute a node hash.
1725 1732
1726 1733 Available as a function so that subclasses can replace the hash
1727 1734 as needed.
1728 1735 """
1729 1736 return storageutil.hashrevisionsha1(text, p1, p2)
1730 1737
1731 1738 def _processflags(self, text, flags, operation, raw=False):
1732 1739 """Inspect revision data flags and applies transforms defined by
1733 1740 registered flag processors.
1734 1741
1735 1742 ``text`` - the revision data to process
1736 1743 ``flags`` - the revision flags
1737 1744 ``operation`` - the operation being performed (read or write)
1738 1745 ``raw`` - an optional argument describing if the raw transform should be
1739 1746 applied.
1740 1747
1741 1748 This method processes the flags in the order (or reverse order if
1742 1749 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1743 1750 flag processors registered for present flags. The order of flags defined
1744 1751 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1745 1752
1746 1753 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1747 1754 processed text and ``validatehash`` is a bool indicating whether the
1748 1755 returned text should be checked for hash integrity.
1749 1756
1750 1757 Note: If the ``raw`` argument is set, it has precedence over the
1751 1758 operation and will only update the value of ``validatehash``.
1752 1759 """
1753 1760 # fast path: no flag processors will run
1754 1761 if flags == 0:
1755 1762 return text, True
1756 1763 if not operation in ('read', 'write'):
1757 1764 raise error.ProgrammingError(_("invalid '%s' operation") %
1758 1765 operation)
1759 1766 # Check all flags are known.
1760 1767 if flags & ~REVIDX_KNOWN_FLAGS:
1761 1768 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1762 1769 (flags & ~REVIDX_KNOWN_FLAGS))
1763 1770 validatehash = True
1764 1771 # Depending on the operation (read or write), the order might be
1765 1772 # reversed due to non-commutative transforms.
1766 1773 orderedflags = REVIDX_FLAGS_ORDER
1767 1774 if operation == 'write':
1768 1775 orderedflags = reversed(orderedflags)
1769 1776
1770 1777 for flag in orderedflags:
1771 1778 # If a flagprocessor has been registered for a known flag, apply the
1772 1779 # related operation transform and update result tuple.
1773 1780 if flag & flags:
1774 1781 vhash = True
1775 1782
1776 1783 if flag not in self._flagprocessors:
1777 1784 message = _("missing processor for flag '%#x'") % (flag)
1778 1785 raise error.RevlogError(message)
1779 1786
1780 1787 processor = self._flagprocessors[flag]
1781 1788 if processor is not None:
1782 1789 readtransform, writetransform, rawtransform = processor
1783 1790
1784 1791 if raw:
1785 1792 vhash = rawtransform(self, text)
1786 1793 elif operation == 'read':
1787 1794 text, vhash = readtransform(self, text)
1788 1795 else: # write operation
1789 1796 text, vhash = writetransform(self, text)
1790 1797 validatehash = validatehash and vhash
1791 1798
1792 1799 return text, validatehash
1793 1800
1794 1801 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1795 1802 """Check node hash integrity.
1796 1803
1797 1804 Available as a function so that subclasses can extend hash mismatch
1798 1805 behaviors as needed.
1799 1806 """
1800 1807 try:
1801 1808 if p1 is None and p2 is None:
1802 1809 p1, p2 = self.parents(node)
1803 1810 if node != self.hash(text, p1, p2):
1804 1811 # Clear the revision cache on hash failure. The revision cache
1805 1812 # only stores the raw revision and clearing the cache does have
1806 1813 # the side-effect that we won't have a cache hit when the raw
1807 1814 # revision data is accessed. But this case should be rare and
1808 1815 # it is extra work to teach the cache about the hash
1809 1816 # verification state.
1810 1817 if self._revisioncache and self._revisioncache[0] == node:
1811 1818 self._revisioncache = None
1812 1819
1813 1820 revornode = rev
1814 1821 if revornode is None:
1815 1822 revornode = templatefilters.short(hex(node))
1816 1823 raise error.RevlogError(_("integrity check failed on %s:%s")
1817 1824 % (self.indexfile, pycompat.bytestr(revornode)))
1818 1825 except error.RevlogError:
1819 1826 if self._censorable and storageutil.iscensoredtext(text):
1820 1827 raise error.CensoredNodeError(self.indexfile, node, text)
1821 1828 raise
1822 1829
1823 1830 def _enforceinlinesize(self, tr, fp=None):
1824 1831 """Check if the revlog is too big for inline and convert if so.
1825 1832
1826 1833 This should be called after revisions are added to the revlog. If the
1827 1834 revlog has grown too large to be an inline revlog, it will convert it
1828 1835 to use multiple index and data files.
1829 1836 """
1830 1837 tiprev = len(self) - 1
1831 1838 if (not self._inline or
1832 1839 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1833 1840 return
1834 1841
1835 1842 trinfo = tr.find(self.indexfile)
1836 1843 if trinfo is None:
1837 1844 raise error.RevlogError(_("%s not found in the transaction")
1838 1845 % self.indexfile)
1839 1846
1840 1847 trindex = trinfo[2]
1841 1848 if trindex is not None:
1842 1849 dataoff = self.start(trindex)
1843 1850 else:
1844 1851 # revlog was stripped at start of transaction, use all leftover data
1845 1852 trindex = len(self) - 1
1846 1853 dataoff = self.end(tiprev)
1847 1854
1848 1855 tr.add(self.datafile, dataoff)
1849 1856
1850 1857 if fp:
1851 1858 fp.flush()
1852 1859 fp.close()
1853 1860 # We can't use the cached file handle after close(). So prevent
1854 1861 # its usage.
1855 1862 self._writinghandles = None
1856 1863
1857 1864 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1858 1865 for r in self:
1859 1866 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1860 1867
1861 1868 with self._indexfp('w') as fp:
1862 1869 self.version &= ~FLAG_INLINE_DATA
1863 1870 self._inline = False
1864 1871 io = self._io
1865 1872 for i in self:
1866 1873 e = io.packentry(self.index[i], self.node, self.version, i)
1867 1874 fp.write(e)
1868 1875
1869 1876 # the temp file replace the real index when we exit the context
1870 1877 # manager
1871 1878
1872 1879 tr.replace(self.indexfile, trindex * self._io.size)
1873 1880 self._chunkclear()
1874 1881
1875 1882 def _nodeduplicatecallback(self, transaction, node):
1876 1883 """called when trying to add a node already stored.
1877 1884 """
1878 1885
1879 1886 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1880 1887 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1881 1888 """add a revision to the log
1882 1889
1883 1890 text - the revision data to add
1884 1891 transaction - the transaction object used for rollback
1885 1892 link - the linkrev data to add
1886 1893 p1, p2 - the parent nodeids of the revision
1887 1894 cachedelta - an optional precomputed delta
1888 1895 node - nodeid of revision; typically node is not specified, and it is
1889 1896 computed by default as hash(text, p1, p2), however subclasses might
1890 1897 use different hashing method (and override checkhash() in such case)
1891 1898 flags - the known flags to set on the revision
1892 1899 deltacomputer - an optional deltacomputer instance shared between
1893 1900 multiple calls
1894 1901 """
1895 1902 if link == nullrev:
1896 1903 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1897 1904 % self.indexfile)
1898 1905
1899 1906 if flags:
1900 1907 node = node or self.hash(text, p1, p2)
1901 1908
1902 1909 rawtext, validatehash = self._processflags(text, flags, 'write')
1903 1910
1904 1911 # If the flag processor modifies the revision data, ignore any provided
1905 1912 # cachedelta.
1906 1913 if rawtext != text:
1907 1914 cachedelta = None
1908 1915
1909 1916 if len(rawtext) > _maxentrysize:
1910 1917 raise error.RevlogError(
1911 1918 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1912 1919 % (self.indexfile, len(rawtext)))
1913 1920
1914 1921 node = node or self.hash(rawtext, p1, p2)
1915 1922 if node in self.nodemap:
1916 1923 return node
1917 1924
1918 1925 if validatehash:
1919 1926 self.checkhash(rawtext, node, p1=p1, p2=p2)
1920 1927
1921 1928 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1922 1929 flags, cachedelta=cachedelta,
1923 1930 deltacomputer=deltacomputer)
1924 1931
1925 1932 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1926 1933 cachedelta=None, deltacomputer=None):
1927 1934 """add a raw revision with known flags, node and parents
1928 1935 useful when reusing a revision not stored in this revlog (ex: received
1929 1936 over wire, or read from an external bundle).
1930 1937 """
1931 1938 dfh = None
1932 1939 if not self._inline:
1933 1940 dfh = self._datafp("a+")
1934 1941 ifh = self._indexfp("a+")
1935 1942 try:
1936 1943 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1937 1944 flags, cachedelta, ifh, dfh,
1938 1945 deltacomputer=deltacomputer)
1939 1946 finally:
1940 1947 if dfh:
1941 1948 dfh.close()
1942 1949 ifh.close()
1943 1950
1944 1951 def compress(self, data):
1945 1952 """Generate a possibly-compressed representation of data."""
1946 1953 if not data:
1947 1954 return '', data
1948 1955
1949 1956 compressed = self._compressor.compress(data)
1950 1957
1951 1958 if compressed:
1952 1959 # The revlog compressor added the header in the returned data.
1953 1960 return '', compressed
1954 1961
1955 1962 if data[0:1] == '\0':
1956 1963 return '', data
1957 1964 return 'u', data
1958 1965
1959 1966 def decompress(self, data):
1960 1967 """Decompress a revlog chunk.
1961 1968
1962 1969 The chunk is expected to begin with a header identifying the
1963 1970 format type so it can be routed to an appropriate decompressor.
1964 1971 """
1965 1972 if not data:
1966 1973 return data
1967 1974
1968 1975 # Revlogs are read much more frequently than they are written and many
1969 1976 # chunks only take microseconds to decompress, so performance is
1970 1977 # important here.
1971 1978 #
1972 1979 # We can make a few assumptions about revlogs:
1973 1980 #
1974 1981 # 1) the majority of chunks will be compressed (as opposed to inline
1975 1982 # raw data).
1976 1983 # 2) decompressing *any* data will likely by at least 10x slower than
1977 1984 # returning raw inline data.
1978 1985 # 3) we want to prioritize common and officially supported compression
1979 1986 # engines
1980 1987 #
1981 1988 # It follows that we want to optimize for "decompress compressed data
1982 1989 # when encoded with common and officially supported compression engines"
1983 1990 # case over "raw data" and "data encoded by less common or non-official
1984 1991 # compression engines." That is why we have the inline lookup first
1985 1992 # followed by the compengines lookup.
1986 1993 #
1987 1994 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1988 1995 # compressed chunks. And this matters for changelog and manifest reads.
1989 1996 t = data[0:1]
1990 1997
1991 1998 if t == 'x':
1992 1999 try:
1993 2000 return _zlibdecompress(data)
1994 2001 except zlib.error as e:
1995 2002 raise error.RevlogError(_('revlog decompress error: %s') %
1996 2003 stringutil.forcebytestr(e))
1997 2004 # '\0' is more common than 'u' so it goes first.
1998 2005 elif t == '\0':
1999 2006 return data
2000 2007 elif t == 'u':
2001 2008 return util.buffer(data, 1)
2002 2009
2003 2010 try:
2004 2011 compressor = self._decompressors[t]
2005 2012 except KeyError:
2006 2013 try:
2007 2014 engine = util.compengines.forrevlogheader(t)
2008 2015 compressor = engine.revlogcompressor(self._compengineopts)
2009 2016 self._decompressors[t] = compressor
2010 2017 except KeyError:
2011 2018 raise error.RevlogError(_('unknown compression type %r') % t)
2012 2019
2013 2020 return compressor.decompress(data)
2014 2021
2015 2022 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2016 2023 cachedelta, ifh, dfh, alwayscache=False,
2017 2024 deltacomputer=None):
2018 2025 """internal function to add revisions to the log
2019 2026
2020 2027 see addrevision for argument descriptions.
2021 2028
2022 2029 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2023 2030
2024 2031 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2025 2032 be used.
2026 2033
2027 2034 invariants:
2028 2035 - rawtext is optional (can be None); if not set, cachedelta must be set.
2029 2036 if both are set, they must correspond to each other.
2030 2037 """
2031 2038 if node == nullid:
2032 2039 raise error.RevlogError(_("%s: attempt to add null revision") %
2033 2040 self.indexfile)
2034 2041 if node == wdirid or node in wdirfilenodeids:
2035 2042 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2036 2043 self.indexfile)
2037 2044
2038 2045 if self._inline:
2039 2046 fh = ifh
2040 2047 else:
2041 2048 fh = dfh
2042 2049
2043 2050 btext = [rawtext]
2044 2051
2045 2052 curr = len(self)
2046 2053 prev = curr - 1
2047 2054 offset = self.end(prev)
2048 2055 p1r, p2r = self.rev(p1), self.rev(p2)
2049 2056
2050 2057 # full versions are inserted when the needed deltas
2051 2058 # become comparable to the uncompressed text
2052 2059 if rawtext is None:
2053 2060 # need rawtext size, before changed by flag processors, which is
2054 2061 # the non-raw size. use revlog explicitly to avoid filelog's extra
2055 2062 # logic that might remove metadata size.
2056 2063 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2057 2064 cachedelta[1])
2058 2065 else:
2059 2066 textlen = len(rawtext)
2060 2067
2061 2068 if deltacomputer is None:
2062 2069 deltacomputer = deltautil.deltacomputer(self)
2063 2070
2064 2071 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2065 2072
2066 2073 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2067 2074
2068 2075 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2069 2076 deltainfo.base, link, p1r, p2r, node)
2070 2077 self.index.append(e)
2071 2078 self.nodemap[node] = curr
2072 2079
2073 2080 # Reset the pure node cache start lookup offset to account for new
2074 2081 # revision.
2075 2082 if self._nodepos is not None:
2076 2083 self._nodepos = curr
2077 2084
2078 2085 entry = self._io.packentry(e, self.node, self.version, curr)
2079 2086 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2080 2087 link, offset)
2081 2088
2082 2089 rawtext = btext[0]
2083 2090
2084 2091 if alwayscache and rawtext is None:
2085 2092 rawtext = deltacomputer.buildtext(revinfo, fh)
2086 2093
2087 2094 if type(rawtext) == bytes: # only accept immutable objects
2088 2095 self._revisioncache = (node, curr, rawtext)
2089 2096 self._chainbasecache[curr] = deltainfo.chainbase
2090 2097 return node
2091 2098
2092 2099 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2093 2100 # Files opened in a+ mode have inconsistent behavior on various
2094 2101 # platforms. Windows requires that a file positioning call be made
2095 2102 # when the file handle transitions between reads and writes. See
2096 2103 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2097 2104 # platforms, Python or the platform itself can be buggy. Some versions
2098 2105 # of Solaris have been observed to not append at the end of the file
2099 2106 # if the file was seeked to before the end. See issue4943 for more.
2100 2107 #
2101 2108 # We work around this issue by inserting a seek() before writing.
2102 2109 # Note: This is likely not necessary on Python 3. However, because
2103 2110 # the file handle is reused for reads and may be seeked there, we need
2104 2111 # to be careful before changing this.
2105 2112 ifh.seek(0, os.SEEK_END)
2106 2113 if dfh:
2107 2114 dfh.seek(0, os.SEEK_END)
2108 2115
2109 2116 curr = len(self) - 1
2110 2117 if not self._inline:
2111 2118 transaction.add(self.datafile, offset)
2112 2119 transaction.add(self.indexfile, curr * len(entry))
2113 2120 if data[0]:
2114 2121 dfh.write(data[0])
2115 2122 dfh.write(data[1])
2116 2123 ifh.write(entry)
2117 2124 else:
2118 2125 offset += curr * self._io.size
2119 2126 transaction.add(self.indexfile, offset, curr)
2120 2127 ifh.write(entry)
2121 2128 ifh.write(data[0])
2122 2129 ifh.write(data[1])
2123 2130 self._enforceinlinesize(transaction, ifh)
2124 2131
2125 2132 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2126 2133 """
2127 2134 add a delta group
2128 2135
2129 2136 given a set of deltas, add them to the revision log. the
2130 2137 first delta is against its parent, which should be in our
2131 2138 log, the rest are against the previous delta.
2132 2139
2133 2140 If ``addrevisioncb`` is defined, it will be called with arguments of
2134 2141 this revlog and the node that was added.
2135 2142 """
2136 2143
2137 2144 if self._writinghandles:
2138 2145 raise error.ProgrammingError('cannot nest addgroup() calls')
2139 2146
2140 2147 nodes = []
2141 2148
2142 2149 r = len(self)
2143 2150 end = 0
2144 2151 if r:
2145 2152 end = self.end(r - 1)
2146 2153 ifh = self._indexfp("a+")
2147 2154 isize = r * self._io.size
2148 2155 if self._inline:
2149 2156 transaction.add(self.indexfile, end + isize, r)
2150 2157 dfh = None
2151 2158 else:
2152 2159 transaction.add(self.indexfile, isize, r)
2153 2160 transaction.add(self.datafile, end)
2154 2161 dfh = self._datafp("a+")
2155 2162 def flush():
2156 2163 if dfh:
2157 2164 dfh.flush()
2158 2165 ifh.flush()
2159 2166
2160 2167 self._writinghandles = (ifh, dfh)
2161 2168
2162 2169 try:
2163 2170 deltacomputer = deltautil.deltacomputer(self)
2164 2171 # loop through our set of deltas
2165 2172 for data in deltas:
2166 2173 node, p1, p2, linknode, deltabase, delta, flags = data
2167 2174 link = linkmapper(linknode)
2168 2175 flags = flags or REVIDX_DEFAULT_FLAGS
2169 2176
2170 2177 nodes.append(node)
2171 2178
2172 2179 if node in self.nodemap:
2173 2180 self._nodeduplicatecallback(transaction, node)
2174 2181 # this can happen if two branches make the same change
2175 2182 continue
2176 2183
2177 2184 for p in (p1, p2):
2178 2185 if p not in self.nodemap:
2179 2186 raise error.LookupError(p, self.indexfile,
2180 2187 _('unknown parent'))
2181 2188
2182 2189 if deltabase not in self.nodemap:
2183 2190 raise error.LookupError(deltabase, self.indexfile,
2184 2191 _('unknown delta base'))
2185 2192
2186 2193 baserev = self.rev(deltabase)
2187 2194
2188 2195 if baserev != nullrev and self.iscensored(baserev):
2189 2196 # if base is censored, delta must be full replacement in a
2190 2197 # single patch operation
2191 2198 hlen = struct.calcsize(">lll")
2192 2199 oldlen = self.rawsize(baserev)
2193 2200 newlen = len(delta) - hlen
2194 2201 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2195 2202 raise error.CensoredBaseError(self.indexfile,
2196 2203 self.node(baserev))
2197 2204
2198 2205 if not flags and self._peek_iscensored(baserev, delta, flush):
2199 2206 flags |= REVIDX_ISCENSORED
2200 2207
2201 2208 # We assume consumers of addrevisioncb will want to retrieve
2202 2209 # the added revision, which will require a call to
2203 2210 # revision(). revision() will fast path if there is a cache
2204 2211 # hit. So, we tell _addrevision() to always cache in this case.
2205 2212 # We're only using addgroup() in the context of changegroup
2206 2213 # generation so the revision data can always be handled as raw
2207 2214 # by the flagprocessor.
2208 2215 self._addrevision(node, None, transaction, link,
2209 2216 p1, p2, flags, (baserev, delta),
2210 2217 ifh, dfh,
2211 2218 alwayscache=bool(addrevisioncb),
2212 2219 deltacomputer=deltacomputer)
2213 2220
2214 2221 if addrevisioncb:
2215 2222 addrevisioncb(self, node)
2216 2223
2217 2224 if not dfh and not self._inline:
2218 2225 # addrevision switched from inline to conventional
2219 2226 # reopen the index
2220 2227 ifh.close()
2221 2228 dfh = self._datafp("a+")
2222 2229 ifh = self._indexfp("a+")
2223 2230 self._writinghandles = (ifh, dfh)
2224 2231 finally:
2225 2232 self._writinghandles = None
2226 2233
2227 2234 if dfh:
2228 2235 dfh.close()
2229 2236 ifh.close()
2230 2237
2231 2238 return nodes
2232 2239
2233 2240 def iscensored(self, rev):
2234 2241 """Check if a file revision is censored."""
2235 2242 if not self._censorable:
2236 2243 return False
2237 2244
2238 2245 return self.flags(rev) & REVIDX_ISCENSORED
2239 2246
2240 2247 def _peek_iscensored(self, baserev, delta, flush):
2241 2248 """Quickly check if a delta produces a censored revision."""
2242 2249 if not self._censorable:
2243 2250 return False
2244 2251
2245 2252 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2246 2253
2247 2254 def getstrippoint(self, minlink):
2248 2255 """find the minimum rev that must be stripped to strip the linkrev
2249 2256
2250 2257 Returns a tuple containing the minimum rev and a set of all revs that
2251 2258 have linkrevs that will be broken by this strip.
2252 2259 """
2253 2260 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2254 2261 self.headrevs(),
2255 2262 self.linkrev, self.parentrevs)
2256 2263
2257 2264 def strip(self, minlink, transaction):
2258 2265 """truncate the revlog on the first revision with a linkrev >= minlink
2259 2266
2260 2267 This function is called when we're stripping revision minlink and
2261 2268 its descendants from the repository.
2262 2269
2263 2270 We have to remove all revisions with linkrev >= minlink, because
2264 2271 the equivalent changelog revisions will be renumbered after the
2265 2272 strip.
2266 2273
2267 2274 So we truncate the revlog on the first of these revisions, and
2268 2275 trust that the caller has saved the revisions that shouldn't be
2269 2276 removed and that it'll re-add them after this truncation.
2270 2277 """
2271 2278 if len(self) == 0:
2272 2279 return
2273 2280
2274 2281 rev, _ = self.getstrippoint(minlink)
2275 2282 if rev == len(self):
2276 2283 return
2277 2284
2278 2285 # first truncate the files on disk
2279 2286 end = self.start(rev)
2280 2287 if not self._inline:
2281 2288 transaction.add(self.datafile, end)
2282 2289 end = rev * self._io.size
2283 2290 else:
2284 2291 end += rev * self._io.size
2285 2292
2286 2293 transaction.add(self.indexfile, end)
2287 2294
2288 2295 # then reset internal state in memory to forget those revisions
2289 2296 self._revisioncache = None
2290 2297 self._chaininfocache = {}
2291 2298 self._chunkclear()
2292 2299 for x in pycompat.xrange(rev, len(self)):
2293 2300 del self.nodemap[self.node(x)]
2294 2301
2295 2302 del self.index[rev:-1]
2296 2303 self._nodepos = None
2297 2304
2298 2305 def checksize(self):
2299 2306 """Check size of index and data files
2300 2307
2301 2308 return a (dd, di) tuple.
2302 2309 - dd: extra bytes for the "data" file
2303 2310 - di: extra bytes for the "index" file
2304 2311
2305 2312 A healthy revlog will return (0, 0).
2306 2313 """
2307 2314 expected = 0
2308 2315 if len(self):
2309 2316 expected = max(0, self.end(len(self) - 1))
2310 2317
2311 2318 try:
2312 2319 with self._datafp() as f:
2313 2320 f.seek(0, io.SEEK_END)
2314 2321 actual = f.tell()
2315 2322 dd = actual - expected
2316 2323 except IOError as inst:
2317 2324 if inst.errno != errno.ENOENT:
2318 2325 raise
2319 2326 dd = 0
2320 2327
2321 2328 try:
2322 2329 f = self.opener(self.indexfile)
2323 2330 f.seek(0, io.SEEK_END)
2324 2331 actual = f.tell()
2325 2332 f.close()
2326 2333 s = self._io.size
2327 2334 i = max(0, actual // s)
2328 2335 di = actual - (i * s)
2329 2336 if self._inline:
2330 2337 databytes = 0
2331 2338 for r in self:
2332 2339 databytes += max(0, self.length(r))
2333 2340 dd = 0
2334 2341 di = actual - len(self) * s - databytes
2335 2342 except IOError as inst:
2336 2343 if inst.errno != errno.ENOENT:
2337 2344 raise
2338 2345 di = 0
2339 2346
2340 2347 return (dd, di)
2341 2348
2342 2349 def files(self):
2343 2350 res = [self.indexfile]
2344 2351 if not self._inline:
2345 2352 res.append(self.datafile)
2346 2353 return res
2347 2354
2348 2355 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2349 2356 assumehaveparentrevisions=False,
2350 2357 deltamode=repository.CG_DELTAMODE_STD):
2351 2358 if nodesorder not in ('nodes', 'storage', 'linear', None):
2352 2359 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2353 2360 nodesorder)
2354 2361
2355 2362 if nodesorder is None and not self._generaldelta:
2356 2363 nodesorder = 'storage'
2357 2364
2358 2365 if (not self._storedeltachains and
2359 2366 deltamode != repository.CG_DELTAMODE_PREV):
2360 2367 deltamode = repository.CG_DELTAMODE_FULL
2361 2368
2362 2369 return storageutil.emitrevisions(
2363 2370 self, nodes, nodesorder, revlogrevisiondelta,
2364 2371 deltaparentfn=self.deltaparent,
2365 2372 candeltafn=self.candelta,
2366 2373 rawsizefn=self.rawsize,
2367 2374 revdifffn=self.revdiff,
2368 2375 flagsfn=self.flags,
2369 2376 deltamode=deltamode,
2370 2377 revisiondata=revisiondata,
2371 2378 assumehaveparentrevisions=assumehaveparentrevisions)
2372 2379
2373 2380 DELTAREUSEALWAYS = 'always'
2374 2381 DELTAREUSESAMEREVS = 'samerevs'
2375 2382 DELTAREUSENEVER = 'never'
2376 2383
2377 2384 DELTAREUSEFULLADD = 'fulladd'
2378 2385
2379 2386 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2380 2387
2381 2388 def clone(self, tr, destrevlog, addrevisioncb=None,
2382 2389 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2383 2390 """Copy this revlog to another, possibly with format changes.
2384 2391
2385 2392 The destination revlog will contain the same revisions and nodes.
2386 2393 However, it may not be bit-for-bit identical due to e.g. delta encoding
2387 2394 differences.
2388 2395
2389 2396 The ``deltareuse`` argument control how deltas from the existing revlog
2390 2397 are preserved in the destination revlog. The argument can have the
2391 2398 following values:
2392 2399
2393 2400 DELTAREUSEALWAYS
2394 2401 Deltas will always be reused (if possible), even if the destination
2395 2402 revlog would not select the same revisions for the delta. This is the
2396 2403 fastest mode of operation.
2397 2404 DELTAREUSESAMEREVS
2398 2405 Deltas will be reused if the destination revlog would pick the same
2399 2406 revisions for the delta. This mode strikes a balance between speed
2400 2407 and optimization.
2401 2408 DELTAREUSENEVER
2402 2409 Deltas will never be reused. This is the slowest mode of execution.
2403 2410 This mode can be used to recompute deltas (e.g. if the diff/delta
2404 2411 algorithm changes).
2405 2412
2406 2413 Delta computation can be slow, so the choice of delta reuse policy can
2407 2414 significantly affect run time.
2408 2415
2409 2416 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2410 2417 two extremes. Deltas will be reused if they are appropriate. But if the
2411 2418 delta could choose a better revision, it will do so. This means if you
2412 2419 are converting a non-generaldelta revlog to a generaldelta revlog,
2413 2420 deltas will be recomputed if the delta's parent isn't a parent of the
2414 2421 revision.
2415 2422
2416 2423 In addition to the delta policy, the ``forcedeltabothparents``
2417 2424 argument controls whether to force compute deltas against both parents
2418 2425 for merges. By default, the current default is used.
2419 2426 """
2420 2427 if deltareuse not in self.DELTAREUSEALL:
2421 2428 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2422 2429
2423 2430 if len(destrevlog):
2424 2431 raise ValueError(_('destination revlog is not empty'))
2425 2432
2426 2433 if getattr(self, 'filteredrevs', None):
2427 2434 raise ValueError(_('source revlog has filtered revisions'))
2428 2435 if getattr(destrevlog, 'filteredrevs', None):
2429 2436 raise ValueError(_('destination revlog has filtered revisions'))
2430 2437
2431 2438 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2432 2439 # if possible.
2433 2440 oldlazydelta = destrevlog._lazydelta
2434 2441 oldlazydeltabase = destrevlog._lazydeltabase
2435 2442 oldamd = destrevlog._deltabothparents
2436 2443
2437 2444 try:
2438 2445 if deltareuse == self.DELTAREUSEALWAYS:
2439 2446 destrevlog._lazydeltabase = True
2440 2447 destrevlog._lazydelta = True
2441 2448 elif deltareuse == self.DELTAREUSESAMEREVS:
2442 2449 destrevlog._lazydeltabase = False
2443 2450 destrevlog._lazydelta = True
2444 2451 elif deltareuse == self.DELTAREUSENEVER:
2445 2452 destrevlog._lazydeltabase = False
2446 2453 destrevlog._lazydelta = False
2447 2454
2448 2455 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2449 2456
2450 2457 deltacomputer = deltautil.deltacomputer(destrevlog)
2451 2458 index = self.index
2452 2459 for rev in self:
2453 2460 entry = index[rev]
2454 2461
2455 2462 # Some classes override linkrev to take filtered revs into
2456 2463 # account. Use raw entry from index.
2457 2464 flags = entry[0] & 0xffff
2458 2465 linkrev = entry[4]
2459 2466 p1 = index[entry[5]][7]
2460 2467 p2 = index[entry[6]][7]
2461 2468 node = entry[7]
2462 2469
2463 2470 # (Possibly) reuse the delta from the revlog if allowed and
2464 2471 # the revlog chunk is a delta.
2465 2472 cachedelta = None
2466 2473 rawtext = None
2467 2474 if (deltareuse != self.DELTAREUSEFULLADD
2468 2475 and destrevlog._lazydelta):
2469 2476 dp = self.deltaparent(rev)
2470 2477 if dp != nullrev:
2471 2478 cachedelta = (dp, bytes(self._chunk(rev)))
2472 2479
2473 2480 if not cachedelta:
2474 2481 rawtext = self.revision(rev, raw=True)
2475 2482
2476 2483
2477 2484 if deltareuse == self.DELTAREUSEFULLADD:
2478 2485 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2479 2486 cachedelta=cachedelta,
2480 2487 node=node, flags=flags,
2481 2488 deltacomputer=deltacomputer)
2482 2489 else:
2483 2490 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2484 2491 checkambig=False)
2485 2492 dfh = None
2486 2493 if not destrevlog._inline:
2487 2494 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2488 2495 try:
2489 2496 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2490 2497 p2, flags, cachedelta, ifh, dfh,
2491 2498 deltacomputer=deltacomputer)
2492 2499 finally:
2493 2500 if dfh:
2494 2501 dfh.close()
2495 2502 ifh.close()
2496 2503
2497 2504 if addrevisioncb:
2498 2505 addrevisioncb(self, rev, node)
2499 2506 finally:
2500 2507 destrevlog._lazydelta = oldlazydelta
2501 2508 destrevlog._lazydeltabase = oldlazydeltabase
2502 2509 destrevlog._deltabothparents = oldamd
2503 2510
2504 2511 def censorrevision(self, tr, censornode, tombstone=b''):
2505 2512 if (self.version & 0xFFFF) == REVLOGV0:
2506 2513 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2507 2514 self.version)
2508 2515
2509 2516 censorrev = self.rev(censornode)
2510 2517 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2511 2518
2512 2519 if len(tombstone) > self.rawsize(censorrev):
2513 2520 raise error.Abort(_('censor tombstone must be no longer than '
2514 2521 'censored data'))
2515 2522
2516 2523 # Rewriting the revlog in place is hard. Our strategy for censoring is
2517 2524 # to create a new revlog, copy all revisions to it, then replace the
2518 2525 # revlogs on transaction close.
2519 2526
2520 2527 newindexfile = self.indexfile + b'.tmpcensored'
2521 2528 newdatafile = self.datafile + b'.tmpcensored'
2522 2529
2523 2530 # This is a bit dangerous. We could easily have a mismatch of state.
2524 2531 newrl = revlog(self.opener, newindexfile, newdatafile,
2525 2532 censorable=True)
2526 2533 newrl.version = self.version
2527 2534 newrl._generaldelta = self._generaldelta
2528 2535 newrl._io = self._io
2529 2536
2530 2537 for rev in self.revs():
2531 2538 node = self.node(rev)
2532 2539 p1, p2 = self.parents(node)
2533 2540
2534 2541 if rev == censorrev:
2535 2542 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2536 2543 p1, p2, censornode, REVIDX_ISCENSORED)
2537 2544
2538 2545 if newrl.deltaparent(rev) != nullrev:
2539 2546 raise error.Abort(_('censored revision stored as delta; '
2540 2547 'cannot censor'),
2541 2548 hint=_('censoring of revlogs is not '
2542 2549 'fully implemented; please report '
2543 2550 'this bug'))
2544 2551 continue
2545 2552
2546 2553 if self.iscensored(rev):
2547 2554 if self.deltaparent(rev) != nullrev:
2548 2555 raise error.Abort(_('cannot censor due to censored '
2549 2556 'revision having delta stored'))
2550 2557 rawtext = self._chunk(rev)
2551 2558 else:
2552 2559 rawtext = self.revision(rev, raw=True)
2553 2560
2554 2561 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2555 2562 self.flags(rev))
2556 2563
2557 2564 tr.addbackup(self.indexfile, location='store')
2558 2565 if not self._inline:
2559 2566 tr.addbackup(self.datafile, location='store')
2560 2567
2561 2568 self.opener.rename(newrl.indexfile, self.indexfile)
2562 2569 if not self._inline:
2563 2570 self.opener.rename(newrl.datafile, self.datafile)
2564 2571
2565 2572 self.clearcaches()
2566 2573 self._loadindex()
2567 2574
2568 2575 def verifyintegrity(self, state):
2569 2576 """Verifies the integrity of the revlog.
2570 2577
2571 2578 Yields ``revlogproblem`` instances describing problems that are
2572 2579 found.
2573 2580 """
2574 2581 dd, di = self.checksize()
2575 2582 if dd:
2576 2583 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2577 2584 if di:
2578 2585 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2579 2586
2580 2587 version = self.version & 0xFFFF
2581 2588
2582 2589 # The verifier tells us what version revlog we should be.
2583 2590 if version != state['expectedversion']:
2584 2591 yield revlogproblem(
2585 2592 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2586 2593 (self.indexfile, version, state['expectedversion']))
2587 2594
2588 2595 state['skipread'] = set()
2589 2596
2590 2597 for rev in self:
2591 2598 node = self.node(rev)
2592 2599
2593 2600 # Verify contents. 4 cases to care about:
2594 2601 #
2595 2602 # common: the most common case
2596 2603 # rename: with a rename
2597 2604 # meta: file content starts with b'\1\n', the metadata
2598 2605 # header defined in filelog.py, but without a rename
2599 2606 # ext: content stored externally
2600 2607 #
2601 2608 # More formally, their differences are shown below:
2602 2609 #
2603 2610 # | common | rename | meta | ext
2604 2611 # -------------------------------------------------------
2605 2612 # flags() | 0 | 0 | 0 | not 0
2606 2613 # renamed() | False | True | False | ?
2607 2614 # rawtext[0:2]=='\1\n'| False | True | True | ?
2608 2615 #
2609 2616 # "rawtext" means the raw text stored in revlog data, which
2610 2617 # could be retrieved by "revision(rev, raw=True)". "text"
2611 2618 # mentioned below is "revision(rev, raw=False)".
2612 2619 #
2613 2620 # There are 3 different lengths stored physically:
2614 2621 # 1. L1: rawsize, stored in revlog index
2615 2622 # 2. L2: len(rawtext), stored in revlog data
2616 2623 # 3. L3: len(text), stored in revlog data if flags==0, or
2617 2624 # possibly somewhere else if flags!=0
2618 2625 #
2619 2626 # L1 should be equal to L2. L3 could be different from them.
2620 2627 # "text" may or may not affect commit hash depending on flag
2621 2628 # processors (see revlog.addflagprocessor).
2622 2629 #
2623 2630 # | common | rename | meta | ext
2624 2631 # -------------------------------------------------
2625 2632 # rawsize() | L1 | L1 | L1 | L1
2626 2633 # size() | L1 | L2-LM | L1(*) | L1 (?)
2627 2634 # len(rawtext) | L2 | L2 | L2 | L2
2628 2635 # len(text) | L2 | L2 | L2 | L3
2629 2636 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2630 2637 #
2631 2638 # LM: length of metadata, depending on rawtext
2632 2639 # (*): not ideal, see comment in filelog.size
2633 2640 # (?): could be "- len(meta)" if the resolved content has
2634 2641 # rename metadata
2635 2642 #
2636 2643 # Checks needed to be done:
2637 2644 # 1. length check: L1 == L2, in all cases.
2638 2645 # 2. hash check: depending on flag processor, we may need to
2639 2646 # use either "text" (external), or "rawtext" (in revlog).
2640 2647
2641 2648 try:
2642 2649 skipflags = state.get('skipflags', 0)
2643 2650 if skipflags:
2644 2651 skipflags &= self.flags(rev)
2645 2652
2646 2653 if skipflags:
2647 2654 state['skipread'].add(node)
2648 2655 else:
2649 2656 # Side-effect: read content and verify hash.
2650 2657 self.revision(node)
2651 2658
2652 2659 l1 = self.rawsize(rev)
2653 2660 l2 = len(self.revision(node, raw=True))
2654 2661
2655 2662 if l1 != l2:
2656 2663 yield revlogproblem(
2657 2664 error=_('unpacked size is %d, %d expected') % (l2, l1),
2658 2665 node=node)
2659 2666
2660 2667 except error.CensoredNodeError:
2661 2668 if state['erroroncensored']:
2662 2669 yield revlogproblem(error=_('censored file data'),
2663 2670 node=node)
2664 2671 state['skipread'].add(node)
2665 2672 except Exception as e:
2666 2673 yield revlogproblem(
2667 2674 error=_('unpacking %s: %s') % (short(node),
2668 2675 stringutil.forcebytestr(e)),
2669 2676 node=node)
2670 2677 state['skipread'].add(node)
2671 2678
2672 2679 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2673 2680 revisionscount=False, trackedsize=False,
2674 2681 storedsize=False):
2675 2682 d = {}
2676 2683
2677 2684 if exclusivefiles:
2678 2685 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2679 2686 if not self._inline:
2680 2687 d['exclusivefiles'].append((self.opener, self.datafile))
2681 2688
2682 2689 if sharedfiles:
2683 2690 d['sharedfiles'] = []
2684 2691
2685 2692 if revisionscount:
2686 2693 d['revisionscount'] = len(self)
2687 2694
2688 2695 if trackedsize:
2689 2696 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2690 2697
2691 2698 if storedsize:
2692 2699 d['storedsize'] = sum(self.opener.stat(path).st_size
2693 2700 for path in self.files())
2694 2701
2695 2702 return d
General Comments 0
You need to be logged in to leave comments. Login now