##// END OF EJS Templates
sidedata: make sure we don't use the flag if there are not sidedata...
marmoute -
r43307:7902001a default
parent child Browse files
Show More
@@ -1,2678 +1,2679 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 57 REVIDX_SIDEDATA,
58 58 )
59 59 from .thirdparty import (
60 60 attr,
61 61 )
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod(r'parsers')
107 107 rustancestor = policy.importrust(r'ancestor')
108 108 rustdagop = policy.importrust(r'dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121 def ellipsiswriteprocessor(rl, text, sidedata):
122 122 return text, False
123 123
124 124 def ellipsisrawprocessor(rl, text):
125 125 return False
126 126
127 127 ellipsisprocessor = (
128 128 ellipsisreadprocessor,
129 129 ellipsiswriteprocessor,
130 130 ellipsisrawprocessor,
131 131 )
132 132
133 133 def getoffset(q):
134 134 return int(q >> 16)
135 135
136 136 def gettype(q):
137 137 return int(q & 0xFFFF)
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError('unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144 @attr.s(slots=True, frozen=True)
145 145 class _revisioninfo(object):
146 146 """Information about a revision that allows building its fulltext
147 147 node: expected hash of the revision
148 148 p1, p2: parent revs of the revision
149 149 btext: built text cache consisting of a one-element list
150 150 cachedelta: (baserev, uncompressed_delta) or None
151 151 flags: flags associated to the revision storage
152 152
153 153 One of btext[0] or cachedelta must be set.
154 154 """
155 155 node = attr.ib()
156 156 p1 = attr.ib()
157 157 p2 = attr.ib()
158 158 btext = attr.ib()
159 159 textlen = attr.ib()
160 160 cachedelta = attr.ib()
161 161 flags = attr.ib()
162 162
163 163 @interfaceutil.implementer(repository.irevisiondelta)
164 164 @attr.s(slots=True)
165 165 class revlogrevisiondelta(object):
166 166 node = attr.ib()
167 167 p1node = attr.ib()
168 168 p2node = attr.ib()
169 169 basenode = attr.ib()
170 170 flags = attr.ib()
171 171 baserevisionsize = attr.ib()
172 172 revision = attr.ib()
173 173 delta = attr.ib()
174 174 linknode = attr.ib(default=None)
175 175
176 176 @interfaceutil.implementer(repository.iverifyproblem)
177 177 @attr.s(frozen=True)
178 178 class revlogproblem(object):
179 179 warning = attr.ib(default=None)
180 180 error = attr.ib(default=None)
181 181 node = attr.ib(default=None)
182 182
183 183 # index v0:
184 184 # 4 bytes: offset
185 185 # 4 bytes: compressed length
186 186 # 4 bytes: base rev
187 187 # 4 bytes: link rev
188 188 # 20 bytes: parent 1 nodeid
189 189 # 20 bytes: parent 2 nodeid
190 190 # 20 bytes: nodeid
191 191 indexformatv0 = struct.Struct(">4l20s20s20s")
192 192 indexformatv0_pack = indexformatv0.pack
193 193 indexformatv0_unpack = indexformatv0.unpack
194 194
195 195 class revlogoldindex(list):
196 196 def __getitem__(self, i):
197 197 if i == -1:
198 198 return (0, 0, 0, -1, -1, -1, -1, nullid)
199 199 return list.__getitem__(self, i)
200 200
201 201 class revlogoldio(object):
202 202 def __init__(self):
203 203 self.size = indexformatv0.size
204 204
205 205 def parseindex(self, data, inline):
206 206 s = self.size
207 207 index = []
208 208 nodemap = {nullid: nullrev}
209 209 n = off = 0
210 210 l = len(data)
211 211 while off + s <= l:
212 212 cur = data[off:off + s]
213 213 off += s
214 214 e = indexformatv0_unpack(cur)
215 215 # transform to revlogv1 format
216 216 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
217 217 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
218 218 index.append(e2)
219 219 nodemap[e[6]] = n
220 220 n += 1
221 221
222 222 return revlogoldindex(index), nodemap, None
223 223
224 224 def packentry(self, entry, node, version, rev):
225 225 if gettype(entry[0]):
226 226 raise error.RevlogError(_('index entry flags need revlog '
227 227 'version 1'))
228 228 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
229 229 node(entry[5]), node(entry[6]), entry[7])
230 230 return indexformatv0_pack(*e2)
231 231
232 232 # index ng:
233 233 # 6 bytes: offset
234 234 # 2 bytes: flags
235 235 # 4 bytes: compressed length
236 236 # 4 bytes: uncompressed length
237 237 # 4 bytes: base rev
238 238 # 4 bytes: link rev
239 239 # 4 bytes: parent 1 rev
240 240 # 4 bytes: parent 2 rev
241 241 # 32 bytes: nodeid
242 242 indexformatng = struct.Struct(">Qiiiiii20s12x")
243 243 indexformatng_pack = indexformatng.pack
244 244 versionformat = struct.Struct(">I")
245 245 versionformat_pack = versionformat.pack
246 246 versionformat_unpack = versionformat.unpack
247 247
248 248 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
249 249 # signed integer)
250 250 _maxentrysize = 0x7fffffff
251 251
252 252 class revlogio(object):
253 253 def __init__(self):
254 254 self.size = indexformatng.size
255 255
256 256 def parseindex(self, data, inline):
257 257 # call the C implementation to parse the index data
258 258 index, cache = parsers.parse_index2(data, inline)
259 259 return index, getattr(index, 'nodemap', None), cache
260 260
261 261 def packentry(self, entry, node, version, rev):
262 262 p = indexformatng_pack(*entry)
263 263 if rev == 0:
264 264 p = versionformat_pack(version) + p[4:]
265 265 return p
266 266
267 267 class revlog(object):
268 268 """
269 269 the underlying revision storage object
270 270
271 271 A revlog consists of two parts, an index and the revision data.
272 272
273 273 The index is a file with a fixed record size containing
274 274 information on each revision, including its nodeid (hash), the
275 275 nodeids of its parents, the position and offset of its data within
276 276 the data file, and the revision it's based on. Finally, each entry
277 277 contains a linkrev entry that can serve as a pointer to external
278 278 data.
279 279
280 280 The revision data itself is a linear collection of data chunks.
281 281 Each chunk represents a revision and is usually represented as a
282 282 delta against the previous chunk. To bound lookup time, runs of
283 283 deltas are limited to about 2 times the length of the original
284 284 version data. This makes retrieval of a version proportional to
285 285 its size, or O(1) relative to the number of revisions.
286 286
287 287 Both pieces of the revlog are written to in an append-only
288 288 fashion, which means we never need to rewrite a file to insert or
289 289 remove data, and can use some simple techniques to avoid the need
290 290 for locking while reading.
291 291
292 292 If checkambig, indexfile is opened with checkambig=True at
293 293 writing, to avoid file stat ambiguity.
294 294
295 295 If mmaplargeindex is True, and an mmapindexthreshold is set, the
296 296 index will be mmapped rather than read if it is larger than the
297 297 configured threshold.
298 298
299 299 If censorable is True, the revlog can have censored revisions.
300 300
301 301 If `upperboundcomp` is not None, this is the expected maximal gain from
302 302 compression for the data content.
303 303 """
304 304
305 305 _flagserrorclass = error.RevlogError
306 306
307 307 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
308 308 mmaplargeindex=False, censorable=False,
309 309 upperboundcomp=None):
310 310 """
311 311 create a revlog object
312 312
313 313 opener is a function that abstracts the file opening operation
314 314 and can be used to implement COW semantics or the like.
315 315
316 316 """
317 317 self.upperboundcomp = upperboundcomp
318 318 self.indexfile = indexfile
319 319 self.datafile = datafile or (indexfile[:-2] + ".d")
320 320 self.opener = opener
321 321 # When True, indexfile is opened with checkambig=True at writing, to
322 322 # avoid file stat ambiguity.
323 323 self._checkambig = checkambig
324 324 self._mmaplargeindex = mmaplargeindex
325 325 self._censorable = censorable
326 326 # 3-tuple of (node, rev, text) for a raw revision.
327 327 self._revisioncache = None
328 328 # Maps rev to chain base rev.
329 329 self._chainbasecache = util.lrucachedict(100)
330 330 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
331 331 self._chunkcache = (0, '')
332 332 # How much data to read and cache into the raw revlog data cache.
333 333 self._chunkcachesize = 65536
334 334 self._maxchainlen = None
335 335 self._deltabothparents = True
336 336 self.index = []
337 337 # Mapping of partial identifiers to full nodes.
338 338 self._pcache = {}
339 339 # Mapping of revision integer to full node.
340 340 self._nodecache = {nullid: nullrev}
341 341 self._nodepos = None
342 342 self._compengine = 'zlib'
343 343 self._compengineopts = {}
344 344 self._maxdeltachainspan = -1
345 345 self._withsparseread = False
346 346 self._sparserevlog = False
347 347 self._srdensitythreshold = 0.50
348 348 self._srmingapsize = 262144
349 349
350 350 # Make copy of flag processors so each revlog instance can support
351 351 # custom flags.
352 352 self._flagprocessors = dict(flagutil.flagprocessors)
353 353
354 354 # 2-tuple of file handles being used for active writing.
355 355 self._writinghandles = None
356 356
357 357 self._loadindex()
358 358
359 359 def _loadindex(self):
360 360 mmapindexthreshold = None
361 361 opts = self.opener.options
362 362
363 363 if 'revlogv2' in opts:
364 364 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
365 365 elif 'revlogv1' in opts:
366 366 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
367 367 if 'generaldelta' in opts:
368 368 newversionflags |= FLAG_GENERALDELTA
369 369 elif 'revlogv0' in self.opener.options:
370 370 newversionflags = REVLOGV0
371 371 else:
372 372 newversionflags = REVLOG_DEFAULT_VERSION
373 373
374 374 if 'chunkcachesize' in opts:
375 375 self._chunkcachesize = opts['chunkcachesize']
376 376 if 'maxchainlen' in opts:
377 377 self._maxchainlen = opts['maxchainlen']
378 378 if 'deltabothparents' in opts:
379 379 self._deltabothparents = opts['deltabothparents']
380 380 self._lazydelta = bool(opts.get('lazydelta', True))
381 381 self._lazydeltabase = False
382 382 if self._lazydelta:
383 383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
384 384 if 'compengine' in opts:
385 385 self._compengine = opts['compengine']
386 386 if 'zlib.level' in opts:
387 387 self._compengineopts['zlib.level'] = opts['zlib.level']
388 388 if 'zstd.level' in opts:
389 389 self._compengineopts['zstd.level'] = opts['zstd.level']
390 390 if 'maxdeltachainspan' in opts:
391 391 self._maxdeltachainspan = opts['maxdeltachainspan']
392 392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
393 393 mmapindexthreshold = opts['mmapindexthreshold']
394 394 self.hassidedata = bool(opts.get('side-data', False))
395 395 if self.hassidedata:
396 396 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
397 397 self._sparserevlog = bool(opts.get('sparse-revlog', False))
398 398 withsparseread = bool(opts.get('with-sparse-read', False))
399 399 # sparse-revlog forces sparse-read
400 400 self._withsparseread = self._sparserevlog or withsparseread
401 401 if 'sparse-read-density-threshold' in opts:
402 402 self._srdensitythreshold = opts['sparse-read-density-threshold']
403 403 if 'sparse-read-min-gap-size' in opts:
404 404 self._srmingapsize = opts['sparse-read-min-gap-size']
405 405 if opts.get('enableellipsis'):
406 406 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
407 407
408 408 # revlog v0 doesn't have flag processors
409 409 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
410 410 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
411 411
412 412 if self._chunkcachesize <= 0:
413 413 raise error.RevlogError(_('revlog chunk cache size %r is not '
414 414 'greater than 0') % self._chunkcachesize)
415 415 elif self._chunkcachesize & (self._chunkcachesize - 1):
416 416 raise error.RevlogError(_('revlog chunk cache size %r is not a '
417 417 'power of 2') % self._chunkcachesize)
418 418
419 419 indexdata = ''
420 420 self._initempty = True
421 421 try:
422 422 with self._indexfp() as f:
423 423 if (mmapindexthreshold is not None and
424 424 self.opener.fstat(f).st_size >= mmapindexthreshold):
425 425 # TODO: should .close() to release resources without
426 426 # relying on Python GC
427 427 indexdata = util.buffer(util.mmapread(f))
428 428 else:
429 429 indexdata = f.read()
430 430 if len(indexdata) > 0:
431 431 versionflags = versionformat_unpack(indexdata[:4])[0]
432 432 self._initempty = False
433 433 else:
434 434 versionflags = newversionflags
435 435 except IOError as inst:
436 436 if inst.errno != errno.ENOENT:
437 437 raise
438 438
439 439 versionflags = newversionflags
440 440
441 441 self.version = versionflags
442 442
443 443 flags = versionflags & ~0xFFFF
444 444 fmt = versionflags & 0xFFFF
445 445
446 446 if fmt == REVLOGV0:
447 447 if flags:
448 448 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
449 449 'revlog %s') %
450 450 (flags >> 16, fmt, self.indexfile))
451 451
452 452 self._inline = False
453 453 self._generaldelta = False
454 454
455 455 elif fmt == REVLOGV1:
456 456 if flags & ~REVLOGV1_FLAGS:
457 457 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
458 458 'revlog %s') %
459 459 (flags >> 16, fmt, self.indexfile))
460 460
461 461 self._inline = versionflags & FLAG_INLINE_DATA
462 462 self._generaldelta = versionflags & FLAG_GENERALDELTA
463 463
464 464 elif fmt == REVLOGV2:
465 465 if flags & ~REVLOGV2_FLAGS:
466 466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
467 467 'revlog %s') %
468 468 (flags >> 16, fmt, self.indexfile))
469 469
470 470 self._inline = versionflags & FLAG_INLINE_DATA
471 471 # generaldelta implied by version 2 revlogs.
472 472 self._generaldelta = True
473 473
474 474 else:
475 475 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
476 476 (fmt, self.indexfile))
477 477 # sparse-revlog can't be on without general-delta (issue6056)
478 478 if not self._generaldelta:
479 479 self._sparserevlog = False
480 480
481 481 self._storedeltachains = True
482 482
483 483 self._io = revlogio()
484 484 if self.version == REVLOGV0:
485 485 self._io = revlogoldio()
486 486 try:
487 487 d = self._io.parseindex(indexdata, self._inline)
488 488 except (ValueError, IndexError):
489 489 raise error.RevlogError(_("index %s is corrupted") %
490 490 self.indexfile)
491 491 self.index, nodemap, self._chunkcache = d
492 492 if nodemap is not None:
493 493 self.nodemap = self._nodecache = nodemap
494 494 if not self._chunkcache:
495 495 self._chunkclear()
496 496 # revnum -> (chain-length, sum-delta-length)
497 497 self._chaininfocache = {}
498 498 # revlog header -> revlog compressor
499 499 self._decompressors = {}
500 500
501 501 @util.propertycache
502 502 def _compressor(self):
503 503 engine = util.compengines[self._compengine]
504 504 return engine.revlogcompressor(self._compengineopts)
505 505
506 506 def _indexfp(self, mode='r'):
507 507 """file object for the revlog's index file"""
508 508 args = {r'mode': mode}
509 509 if mode != 'r':
510 510 args[r'checkambig'] = self._checkambig
511 511 if mode == 'w':
512 512 args[r'atomictemp'] = True
513 513 return self.opener(self.indexfile, **args)
514 514
515 515 def _datafp(self, mode='r'):
516 516 """file object for the revlog's data file"""
517 517 return self.opener(self.datafile, mode=mode)
518 518
519 519 @contextlib.contextmanager
520 520 def _datareadfp(self, existingfp=None):
521 521 """file object suitable to read data"""
522 522 # Use explicit file handle, if given.
523 523 if existingfp is not None:
524 524 yield existingfp
525 525
526 526 # Use a file handle being actively used for writes, if available.
527 527 # There is some danger to doing this because reads will seek the
528 528 # file. However, _writeentry() performs a SEEK_END before all writes,
529 529 # so we should be safe.
530 530 elif self._writinghandles:
531 531 if self._inline:
532 532 yield self._writinghandles[0]
533 533 else:
534 534 yield self._writinghandles[1]
535 535
536 536 # Otherwise open a new file handle.
537 537 else:
538 538 if self._inline:
539 539 func = self._indexfp
540 540 else:
541 541 func = self._datafp
542 542 with func() as fp:
543 543 yield fp
544 544
545 545 def tip(self):
546 546 return self.node(len(self.index) - 1)
547 547 def __contains__(self, rev):
548 548 return 0 <= rev < len(self)
549 549 def __len__(self):
550 550 return len(self.index)
551 551 def __iter__(self):
552 552 return iter(pycompat.xrange(len(self)))
553 553 def revs(self, start=0, stop=None):
554 554 """iterate over all rev in this revlog (from start to stop)"""
555 555 return storageutil.iterrevs(len(self), start=start, stop=stop)
556 556
557 557 @util.propertycache
558 558 def nodemap(self):
559 559 if self.index:
560 560 # populate mapping down to the initial node
561 561 node0 = self.index[0][7] # get around changelog filtering
562 562 self.rev(node0)
563 563 return self._nodecache
564 564
565 565 def hasnode(self, node):
566 566 try:
567 567 self.rev(node)
568 568 return True
569 569 except KeyError:
570 570 return False
571 571
572 572 def candelta(self, baserev, rev):
573 573 """whether two revisions (baserev, rev) can be delta-ed or not"""
574 574 # Disable delta if either rev requires a content-changing flag
575 575 # processor (ex. LFS). This is because such flag processor can alter
576 576 # the rawtext content that the delta will be based on, and two clients
577 577 # could have a same revlog node with different flags (i.e. different
578 578 # rawtext contents) and the delta could be incompatible.
579 579 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
580 580 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
581 581 return False
582 582 return True
583 583
584 584 def clearcaches(self):
585 585 self._revisioncache = None
586 586 self._chainbasecache.clear()
587 587 self._chunkcache = (0, '')
588 588 self._pcache = {}
589 589
590 590 try:
591 591 # If we are using the native C version, you are in a fun case
592 592 # where self.index, self.nodemap and self._nodecaches is the same
593 593 # object.
594 594 self._nodecache.clearcaches()
595 595 except AttributeError:
596 596 self._nodecache = {nullid: nullrev}
597 597 self._nodepos = None
598 598
599 599 def rev(self, node):
600 600 try:
601 601 return self._nodecache[node]
602 602 except TypeError:
603 603 raise
604 604 except error.RevlogError:
605 605 # parsers.c radix tree lookup failed
606 606 if node == wdirid or node in wdirfilenodeids:
607 607 raise error.WdirUnsupported
608 608 raise error.LookupError(node, self.indexfile, _('no node'))
609 609 except KeyError:
610 610 # pure python cache lookup failed
611 611 n = self._nodecache
612 612 i = self.index
613 613 p = self._nodepos
614 614 if p is None:
615 615 p = len(i) - 1
616 616 else:
617 617 assert p < len(i)
618 618 for r in pycompat.xrange(p, -1, -1):
619 619 v = i[r][7]
620 620 n[v] = r
621 621 if v == node:
622 622 self._nodepos = r - 1
623 623 return r
624 624 if node == wdirid or node in wdirfilenodeids:
625 625 raise error.WdirUnsupported
626 626 raise error.LookupError(node, self.indexfile, _('no node'))
627 627
628 628 # Accessors for index entries.
629 629
630 630 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
631 631 # are flags.
632 632 def start(self, rev):
633 633 return int(self.index[rev][0] >> 16)
634 634
635 635 def flags(self, rev):
636 636 return self.index[rev][0] & 0xFFFF
637 637
638 638 def length(self, rev):
639 639 return self.index[rev][1]
640 640
641 641 def rawsize(self, rev):
642 642 """return the length of the uncompressed text for a given revision"""
643 643 l = self.index[rev][2]
644 644 if l >= 0:
645 645 return l
646 646
647 647 t = self.rawdata(rev)
648 648 return len(t)
649 649
650 650 def size(self, rev):
651 651 """length of non-raw text (processed by a "read" flag processor)"""
652 652 # fast path: if no "read" flag processor could change the content,
653 653 # size is rawsize. note: ELLIPSIS is known to not change the content.
654 654 flags = self.flags(rev)
655 655 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
656 656 return self.rawsize(rev)
657 657
658 658 return len(self.revision(rev, raw=False))
659 659
660 660 def chainbase(self, rev):
661 661 base = self._chainbasecache.get(rev)
662 662 if base is not None:
663 663 return base
664 664
665 665 index = self.index
666 666 iterrev = rev
667 667 base = index[iterrev][3]
668 668 while base != iterrev:
669 669 iterrev = base
670 670 base = index[iterrev][3]
671 671
672 672 self._chainbasecache[rev] = base
673 673 return base
674 674
675 675 def linkrev(self, rev):
676 676 return self.index[rev][4]
677 677
678 678 def parentrevs(self, rev):
679 679 try:
680 680 entry = self.index[rev]
681 681 except IndexError:
682 682 if rev == wdirrev:
683 683 raise error.WdirUnsupported
684 684 raise
685 685
686 686 return entry[5], entry[6]
687 687
688 688 # fast parentrevs(rev) where rev isn't filtered
689 689 _uncheckedparentrevs = parentrevs
690 690
691 691 def node(self, rev):
692 692 try:
693 693 return self.index[rev][7]
694 694 except IndexError:
695 695 if rev == wdirrev:
696 696 raise error.WdirUnsupported
697 697 raise
698 698
699 699 # Derived from index values.
700 700
701 701 def end(self, rev):
702 702 return self.start(rev) + self.length(rev)
703 703
704 704 def parents(self, node):
705 705 i = self.index
706 706 d = i[self.rev(node)]
707 707 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
708 708
709 709 def chainlen(self, rev):
710 710 return self._chaininfo(rev)[0]
711 711
712 712 def _chaininfo(self, rev):
713 713 chaininfocache = self._chaininfocache
714 714 if rev in chaininfocache:
715 715 return chaininfocache[rev]
716 716 index = self.index
717 717 generaldelta = self._generaldelta
718 718 iterrev = rev
719 719 e = index[iterrev]
720 720 clen = 0
721 721 compresseddeltalen = 0
722 722 while iterrev != e[3]:
723 723 clen += 1
724 724 compresseddeltalen += e[1]
725 725 if generaldelta:
726 726 iterrev = e[3]
727 727 else:
728 728 iterrev -= 1
729 729 if iterrev in chaininfocache:
730 730 t = chaininfocache[iterrev]
731 731 clen += t[0]
732 732 compresseddeltalen += t[1]
733 733 break
734 734 e = index[iterrev]
735 735 else:
736 736 # Add text length of base since decompressing that also takes
737 737 # work. For cache hits the length is already included.
738 738 compresseddeltalen += e[1]
739 739 r = (clen, compresseddeltalen)
740 740 chaininfocache[rev] = r
741 741 return r
742 742
743 743 def _deltachain(self, rev, stoprev=None):
744 744 """Obtain the delta chain for a revision.
745 745
746 746 ``stoprev`` specifies a revision to stop at. If not specified, we
747 747 stop at the base of the chain.
748 748
749 749 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
750 750 revs in ascending order and ``stopped`` is a bool indicating whether
751 751 ``stoprev`` was hit.
752 752 """
753 753 # Try C implementation.
754 754 try:
755 755 return self.index.deltachain(rev, stoprev, self._generaldelta)
756 756 except AttributeError:
757 757 pass
758 758
759 759 chain = []
760 760
761 761 # Alias to prevent attribute lookup in tight loop.
762 762 index = self.index
763 763 generaldelta = self._generaldelta
764 764
765 765 iterrev = rev
766 766 e = index[iterrev]
767 767 while iterrev != e[3] and iterrev != stoprev:
768 768 chain.append(iterrev)
769 769 if generaldelta:
770 770 iterrev = e[3]
771 771 else:
772 772 iterrev -= 1
773 773 e = index[iterrev]
774 774
775 775 if iterrev == stoprev:
776 776 stopped = True
777 777 else:
778 778 chain.append(iterrev)
779 779 stopped = False
780 780
781 781 chain.reverse()
782 782 return chain, stopped
783 783
784 784 def ancestors(self, revs, stoprev=0, inclusive=False):
785 785 """Generate the ancestors of 'revs' in reverse revision order.
786 786 Does not generate revs lower than stoprev.
787 787
788 788 See the documentation for ancestor.lazyancestors for more details."""
789 789
790 790 # first, make sure start revisions aren't filtered
791 791 revs = list(revs)
792 792 checkrev = self.node
793 793 for r in revs:
794 794 checkrev(r)
795 795 # and we're sure ancestors aren't filtered as well
796 796
797 797 if rustancestor is not None:
798 798 lazyancestors = rustancestor.LazyAncestors
799 799 arg = self.index
800 800 elif util.safehasattr(parsers, 'rustlazyancestors'):
801 801 lazyancestors = ancestor.rustlazyancestors
802 802 arg = self.index
803 803 else:
804 804 lazyancestors = ancestor.lazyancestors
805 805 arg = self._uncheckedparentrevs
806 806 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
807 807
808 808 def descendants(self, revs):
809 809 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
810 810
811 811 def findcommonmissing(self, common=None, heads=None):
812 812 """Return a tuple of the ancestors of common and the ancestors of heads
813 813 that are not ancestors of common. In revset terminology, we return the
814 814 tuple:
815 815
816 816 ::common, (::heads) - (::common)
817 817
818 818 The list is sorted by revision number, meaning it is
819 819 topologically sorted.
820 820
821 821 'heads' and 'common' are both lists of node IDs. If heads is
822 822 not supplied, uses all of the revlog's heads. If common is not
823 823 supplied, uses nullid."""
824 824 if common is None:
825 825 common = [nullid]
826 826 if heads is None:
827 827 heads = self.heads()
828 828
829 829 common = [self.rev(n) for n in common]
830 830 heads = [self.rev(n) for n in heads]
831 831
832 832 # we want the ancestors, but inclusive
833 833 class lazyset(object):
834 834 def __init__(self, lazyvalues):
835 835 self.addedvalues = set()
836 836 self.lazyvalues = lazyvalues
837 837
838 838 def __contains__(self, value):
839 839 return value in self.addedvalues or value in self.lazyvalues
840 840
841 841 def __iter__(self):
842 842 added = self.addedvalues
843 843 for r in added:
844 844 yield r
845 845 for r in self.lazyvalues:
846 846 if not r in added:
847 847 yield r
848 848
849 849 def add(self, value):
850 850 self.addedvalues.add(value)
851 851
852 852 def update(self, values):
853 853 self.addedvalues.update(values)
854 854
855 855 has = lazyset(self.ancestors(common))
856 856 has.add(nullrev)
857 857 has.update(common)
858 858
859 859 # take all ancestors from heads that aren't in has
860 860 missing = set()
861 861 visit = collections.deque(r for r in heads if r not in has)
862 862 while visit:
863 863 r = visit.popleft()
864 864 if r in missing:
865 865 continue
866 866 else:
867 867 missing.add(r)
868 868 for p in self.parentrevs(r):
869 869 if p not in has:
870 870 visit.append(p)
871 871 missing = list(missing)
872 872 missing.sort()
873 873 return has, [self.node(miss) for miss in missing]
874 874
875 875 def incrementalmissingrevs(self, common=None):
876 876 """Return an object that can be used to incrementally compute the
877 877 revision numbers of the ancestors of arbitrary sets that are not
878 878 ancestors of common. This is an ancestor.incrementalmissingancestors
879 879 object.
880 880
881 881 'common' is a list of revision numbers. If common is not supplied, uses
882 882 nullrev.
883 883 """
884 884 if common is None:
885 885 common = [nullrev]
886 886
887 887 if rustancestor is not None:
888 888 return rustancestor.MissingAncestors(self.index, common)
889 889 return ancestor.incrementalmissingancestors(self.parentrevs, common)
890 890
891 891 def findmissingrevs(self, common=None, heads=None):
892 892 """Return the revision numbers of the ancestors of heads that
893 893 are not ancestors of common.
894 894
895 895 More specifically, return a list of revision numbers corresponding to
896 896 nodes N such that every N satisfies the following constraints:
897 897
898 898 1. N is an ancestor of some node in 'heads'
899 899 2. N is not an ancestor of any node in 'common'
900 900
901 901 The list is sorted by revision number, meaning it is
902 902 topologically sorted.
903 903
904 904 'heads' and 'common' are both lists of revision numbers. If heads is
905 905 not supplied, uses all of the revlog's heads. If common is not
906 906 supplied, uses nullid."""
907 907 if common is None:
908 908 common = [nullrev]
909 909 if heads is None:
910 910 heads = self.headrevs()
911 911
912 912 inc = self.incrementalmissingrevs(common=common)
913 913 return inc.missingancestors(heads)
914 914
915 915 def findmissing(self, common=None, heads=None):
916 916 """Return the ancestors of heads that are not ancestors of common.
917 917
918 918 More specifically, return a list of nodes N such that every N
919 919 satisfies the following constraints:
920 920
921 921 1. N is an ancestor of some node in 'heads'
922 922 2. N is not an ancestor of any node in 'common'
923 923
924 924 The list is sorted by revision number, meaning it is
925 925 topologically sorted.
926 926
927 927 'heads' and 'common' are both lists of node IDs. If heads is
928 928 not supplied, uses all of the revlog's heads. If common is not
929 929 supplied, uses nullid."""
930 930 if common is None:
931 931 common = [nullid]
932 932 if heads is None:
933 933 heads = self.heads()
934 934
935 935 common = [self.rev(n) for n in common]
936 936 heads = [self.rev(n) for n in heads]
937 937
938 938 inc = self.incrementalmissingrevs(common=common)
939 939 return [self.node(r) for r in inc.missingancestors(heads)]
940 940
941 941 def nodesbetween(self, roots=None, heads=None):
942 942 """Return a topological path from 'roots' to 'heads'.
943 943
944 944 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
945 945 topologically sorted list of all nodes N that satisfy both of
946 946 these constraints:
947 947
948 948 1. N is a descendant of some node in 'roots'
949 949 2. N is an ancestor of some node in 'heads'
950 950
951 951 Every node is considered to be both a descendant and an ancestor
952 952 of itself, so every reachable node in 'roots' and 'heads' will be
953 953 included in 'nodes'.
954 954
955 955 'outroots' is the list of reachable nodes in 'roots', i.e., the
956 956 subset of 'roots' that is returned in 'nodes'. Likewise,
957 957 'outheads' is the subset of 'heads' that is also in 'nodes'.
958 958
959 959 'roots' and 'heads' are both lists of node IDs. If 'roots' is
960 960 unspecified, uses nullid as the only root. If 'heads' is
961 961 unspecified, uses list of all of the revlog's heads."""
962 962 nonodes = ([], [], [])
963 963 if roots is not None:
964 964 roots = list(roots)
965 965 if not roots:
966 966 return nonodes
967 967 lowestrev = min([self.rev(n) for n in roots])
968 968 else:
969 969 roots = [nullid] # Everybody's a descendant of nullid
970 970 lowestrev = nullrev
971 971 if (lowestrev == nullrev) and (heads is None):
972 972 # We want _all_ the nodes!
973 973 return ([self.node(r) for r in self], [nullid], list(self.heads()))
974 974 if heads is None:
975 975 # All nodes are ancestors, so the latest ancestor is the last
976 976 # node.
977 977 highestrev = len(self) - 1
978 978 # Set ancestors to None to signal that every node is an ancestor.
979 979 ancestors = None
980 980 # Set heads to an empty dictionary for later discovery of heads
981 981 heads = {}
982 982 else:
983 983 heads = list(heads)
984 984 if not heads:
985 985 return nonodes
986 986 ancestors = set()
987 987 # Turn heads into a dictionary so we can remove 'fake' heads.
988 988 # Also, later we will be using it to filter out the heads we can't
989 989 # find from roots.
990 990 heads = dict.fromkeys(heads, False)
991 991 # Start at the top and keep marking parents until we're done.
992 992 nodestotag = set(heads)
993 993 # Remember where the top was so we can use it as a limit later.
994 994 highestrev = max([self.rev(n) for n in nodestotag])
995 995 while nodestotag:
996 996 # grab a node to tag
997 997 n = nodestotag.pop()
998 998 # Never tag nullid
999 999 if n == nullid:
1000 1000 continue
1001 1001 # A node's revision number represents its place in a
1002 1002 # topologically sorted list of nodes.
1003 1003 r = self.rev(n)
1004 1004 if r >= lowestrev:
1005 1005 if n not in ancestors:
1006 1006 # If we are possibly a descendant of one of the roots
1007 1007 # and we haven't already been marked as an ancestor
1008 1008 ancestors.add(n) # Mark as ancestor
1009 1009 # Add non-nullid parents to list of nodes to tag.
1010 1010 nodestotag.update([p for p in self.parents(n) if
1011 1011 p != nullid])
1012 1012 elif n in heads: # We've seen it before, is it a fake head?
1013 1013 # So it is, real heads should not be the ancestors of
1014 1014 # any other heads.
1015 1015 heads.pop(n)
1016 1016 if not ancestors:
1017 1017 return nonodes
1018 1018 # Now that we have our set of ancestors, we want to remove any
1019 1019 # roots that are not ancestors.
1020 1020
1021 1021 # If one of the roots was nullid, everything is included anyway.
1022 1022 if lowestrev > nullrev:
1023 1023 # But, since we weren't, let's recompute the lowest rev to not
1024 1024 # include roots that aren't ancestors.
1025 1025
1026 1026 # Filter out roots that aren't ancestors of heads
1027 1027 roots = [root for root in roots if root in ancestors]
1028 1028 # Recompute the lowest revision
1029 1029 if roots:
1030 1030 lowestrev = min([self.rev(root) for root in roots])
1031 1031 else:
1032 1032 # No more roots? Return empty list
1033 1033 return nonodes
1034 1034 else:
1035 1035 # We are descending from nullid, and don't need to care about
1036 1036 # any other roots.
1037 1037 lowestrev = nullrev
1038 1038 roots = [nullid]
1039 1039 # Transform our roots list into a set.
1040 1040 descendants = set(roots)
1041 1041 # Also, keep the original roots so we can filter out roots that aren't
1042 1042 # 'real' roots (i.e. are descended from other roots).
1043 1043 roots = descendants.copy()
1044 1044 # Our topologically sorted list of output nodes.
1045 1045 orderedout = []
1046 1046 # Don't start at nullid since we don't want nullid in our output list,
1047 1047 # and if nullid shows up in descendants, empty parents will look like
1048 1048 # they're descendants.
1049 1049 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1050 1050 n = self.node(r)
1051 1051 isdescendant = False
1052 1052 if lowestrev == nullrev: # Everybody is a descendant of nullid
1053 1053 isdescendant = True
1054 1054 elif n in descendants:
1055 1055 # n is already a descendant
1056 1056 isdescendant = True
1057 1057 # This check only needs to be done here because all the roots
1058 1058 # will start being marked is descendants before the loop.
1059 1059 if n in roots:
1060 1060 # If n was a root, check if it's a 'real' root.
1061 1061 p = tuple(self.parents(n))
1062 1062 # If any of its parents are descendants, it's not a root.
1063 1063 if (p[0] in descendants) or (p[1] in descendants):
1064 1064 roots.remove(n)
1065 1065 else:
1066 1066 p = tuple(self.parents(n))
1067 1067 # A node is a descendant if either of its parents are
1068 1068 # descendants. (We seeded the dependents list with the roots
1069 1069 # up there, remember?)
1070 1070 if (p[0] in descendants) or (p[1] in descendants):
1071 1071 descendants.add(n)
1072 1072 isdescendant = True
1073 1073 if isdescendant and ((ancestors is None) or (n in ancestors)):
1074 1074 # Only include nodes that are both descendants and ancestors.
1075 1075 orderedout.append(n)
1076 1076 if (ancestors is not None) and (n in heads):
1077 1077 # We're trying to figure out which heads are reachable
1078 1078 # from roots.
1079 1079 # Mark this head as having been reached
1080 1080 heads[n] = True
1081 1081 elif ancestors is None:
1082 1082 # Otherwise, we're trying to discover the heads.
1083 1083 # Assume this is a head because if it isn't, the next step
1084 1084 # will eventually remove it.
1085 1085 heads[n] = True
1086 1086 # But, obviously its parents aren't.
1087 1087 for p in self.parents(n):
1088 1088 heads.pop(p, None)
1089 1089 heads = [head for head, flag in heads.iteritems() if flag]
1090 1090 roots = list(roots)
1091 1091 assert orderedout
1092 1092 assert roots
1093 1093 assert heads
1094 1094 return (orderedout, roots, heads)
1095 1095
1096 1096 def headrevs(self, revs=None):
1097 1097 if revs is None:
1098 1098 try:
1099 1099 return self.index.headrevs()
1100 1100 except AttributeError:
1101 1101 return self._headrevs()
1102 1102 if rustdagop is not None:
1103 1103 return rustdagop.headrevs(self.index, revs)
1104 1104 return dagop.headrevs(revs, self._uncheckedparentrevs)
1105 1105
1106 1106 def computephases(self, roots):
1107 1107 return self.index.computephasesmapsets(roots)
1108 1108
1109 1109 def _headrevs(self):
1110 1110 count = len(self)
1111 1111 if not count:
1112 1112 return [nullrev]
1113 1113 # we won't iter over filtered rev so nobody is a head at start
1114 1114 ishead = [0] * (count + 1)
1115 1115 index = self.index
1116 1116 for r in self:
1117 1117 ishead[r] = 1 # I may be an head
1118 1118 e = index[r]
1119 1119 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1120 1120 return [r for r, val in enumerate(ishead) if val]
1121 1121
1122 1122 def heads(self, start=None, stop=None):
1123 1123 """return the list of all nodes that have no children
1124 1124
1125 1125 if start is specified, only heads that are descendants of
1126 1126 start will be returned
1127 1127 if stop is specified, it will consider all the revs from stop
1128 1128 as if they had no children
1129 1129 """
1130 1130 if start is None and stop is None:
1131 1131 if not len(self):
1132 1132 return [nullid]
1133 1133 return [self.node(r) for r in self.headrevs()]
1134 1134
1135 1135 if start is None:
1136 1136 start = nullrev
1137 1137 else:
1138 1138 start = self.rev(start)
1139 1139
1140 1140 stoprevs = set(self.rev(n) for n in stop or [])
1141 1141
1142 1142 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1143 1143 stoprevs=stoprevs)
1144 1144
1145 1145 return [self.node(rev) for rev in revs]
1146 1146
1147 1147 def children(self, node):
1148 1148 """find the children of a given node"""
1149 1149 c = []
1150 1150 p = self.rev(node)
1151 1151 for r in self.revs(start=p + 1):
1152 1152 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1153 1153 if prevs:
1154 1154 for pr in prevs:
1155 1155 if pr == p:
1156 1156 c.append(self.node(r))
1157 1157 elif p == nullrev:
1158 1158 c.append(self.node(r))
1159 1159 return c
1160 1160
1161 1161 def commonancestorsheads(self, a, b):
1162 1162 """calculate all the heads of the common ancestors of nodes a and b"""
1163 1163 a, b = self.rev(a), self.rev(b)
1164 1164 ancs = self._commonancestorsheads(a, b)
1165 1165 return pycompat.maplist(self.node, ancs)
1166 1166
1167 1167 def _commonancestorsheads(self, *revs):
1168 1168 """calculate all the heads of the common ancestors of revs"""
1169 1169 try:
1170 1170 ancs = self.index.commonancestorsheads(*revs)
1171 1171 except (AttributeError, OverflowError): # C implementation failed
1172 1172 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1173 1173 return ancs
1174 1174
1175 1175 def isancestor(self, a, b):
1176 1176 """return True if node a is an ancestor of node b
1177 1177
1178 1178 A revision is considered an ancestor of itself."""
1179 1179 a, b = self.rev(a), self.rev(b)
1180 1180 return self.isancestorrev(a, b)
1181 1181
1182 1182 def isancestorrev(self, a, b):
1183 1183 """return True if revision a is an ancestor of revision b
1184 1184
1185 1185 A revision is considered an ancestor of itself.
1186 1186
1187 1187 The implementation of this is trivial but the use of
1188 1188 reachableroots is not."""
1189 1189 if a == nullrev:
1190 1190 return True
1191 1191 elif a == b:
1192 1192 return True
1193 1193 elif a > b:
1194 1194 return False
1195 1195 return bool(self.reachableroots(a, [b], [a], includepath=False))
1196 1196
1197 1197 def reachableroots(self, minroot, heads, roots, includepath=False):
1198 1198 """return (heads(::<roots> and <roots>::<heads>))
1199 1199
1200 1200 If includepath is True, return (<roots>::<heads>)."""
1201 1201 try:
1202 1202 return self.index.reachableroots2(minroot, heads, roots,
1203 1203 includepath)
1204 1204 except AttributeError:
1205 1205 return dagop._reachablerootspure(self.parentrevs,
1206 1206 minroot, roots, heads, includepath)
1207 1207
1208 1208 def ancestor(self, a, b):
1209 1209 """calculate the "best" common ancestor of nodes a and b"""
1210 1210
1211 1211 a, b = self.rev(a), self.rev(b)
1212 1212 try:
1213 1213 ancs = self.index.ancestors(a, b)
1214 1214 except (AttributeError, OverflowError):
1215 1215 ancs = ancestor.ancestors(self.parentrevs, a, b)
1216 1216 if ancs:
1217 1217 # choose a consistent winner when there's a tie
1218 1218 return min(map(self.node, ancs))
1219 1219 return nullid
1220 1220
1221 1221 def _match(self, id):
1222 1222 if isinstance(id, int):
1223 1223 # rev
1224 1224 return self.node(id)
1225 1225 if len(id) == 20:
1226 1226 # possibly a binary node
1227 1227 # odds of a binary node being all hex in ASCII are 1 in 10**25
1228 1228 try:
1229 1229 node = id
1230 1230 self.rev(node) # quick search the index
1231 1231 return node
1232 1232 except error.LookupError:
1233 1233 pass # may be partial hex id
1234 1234 try:
1235 1235 # str(rev)
1236 1236 rev = int(id)
1237 1237 if "%d" % rev != id:
1238 1238 raise ValueError
1239 1239 if rev < 0:
1240 1240 rev = len(self) + rev
1241 1241 if rev < 0 or rev >= len(self):
1242 1242 raise ValueError
1243 1243 return self.node(rev)
1244 1244 except (ValueError, OverflowError):
1245 1245 pass
1246 1246 if len(id) == 40:
1247 1247 try:
1248 1248 # a full hex nodeid?
1249 1249 node = bin(id)
1250 1250 self.rev(node)
1251 1251 return node
1252 1252 except (TypeError, error.LookupError):
1253 1253 pass
1254 1254
1255 1255 def _partialmatch(self, id):
1256 1256 # we don't care wdirfilenodeids as they should be always full hash
1257 1257 maybewdir = wdirhex.startswith(id)
1258 1258 try:
1259 1259 partial = self.index.partialmatch(id)
1260 1260 if partial and self.hasnode(partial):
1261 1261 if maybewdir:
1262 1262 # single 'ff...' match in radix tree, ambiguous with wdir
1263 1263 raise error.RevlogError
1264 1264 return partial
1265 1265 if maybewdir:
1266 1266 # no 'ff...' match in radix tree, wdir identified
1267 1267 raise error.WdirUnsupported
1268 1268 return None
1269 1269 except error.RevlogError:
1270 1270 # parsers.c radix tree lookup gave multiple matches
1271 1271 # fast path: for unfiltered changelog, radix tree is accurate
1272 1272 if not getattr(self, 'filteredrevs', None):
1273 1273 raise error.AmbiguousPrefixLookupError(
1274 1274 id, self.indexfile, _('ambiguous identifier'))
1275 1275 # fall through to slow path that filters hidden revisions
1276 1276 except (AttributeError, ValueError):
1277 1277 # we are pure python, or key was too short to search radix tree
1278 1278 pass
1279 1279
1280 1280 if id in self._pcache:
1281 1281 return self._pcache[id]
1282 1282
1283 1283 if len(id) <= 40:
1284 1284 try:
1285 1285 # hex(node)[:...]
1286 1286 l = len(id) // 2 # grab an even number of digits
1287 1287 prefix = bin(id[:l * 2])
1288 1288 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1289 1289 nl = [n for n in nl if hex(n).startswith(id) and
1290 1290 self.hasnode(n)]
1291 1291 if nullhex.startswith(id):
1292 1292 nl.append(nullid)
1293 1293 if len(nl) > 0:
1294 1294 if len(nl) == 1 and not maybewdir:
1295 1295 self._pcache[id] = nl[0]
1296 1296 return nl[0]
1297 1297 raise error.AmbiguousPrefixLookupError(
1298 1298 id, self.indexfile, _('ambiguous identifier'))
1299 1299 if maybewdir:
1300 1300 raise error.WdirUnsupported
1301 1301 return None
1302 1302 except TypeError:
1303 1303 pass
1304 1304
1305 1305 def lookup(self, id):
1306 1306 """locate a node based on:
1307 1307 - revision number or str(revision number)
1308 1308 - nodeid or subset of hex nodeid
1309 1309 """
1310 1310 n = self._match(id)
1311 1311 if n is not None:
1312 1312 return n
1313 1313 n = self._partialmatch(id)
1314 1314 if n:
1315 1315 return n
1316 1316
1317 1317 raise error.LookupError(id, self.indexfile, _('no match found'))
1318 1318
1319 1319 def shortest(self, node, minlength=1):
1320 1320 """Find the shortest unambiguous prefix that matches node."""
1321 1321 def isvalid(prefix):
1322 1322 try:
1323 1323 matchednode = self._partialmatch(prefix)
1324 1324 except error.AmbiguousPrefixLookupError:
1325 1325 return False
1326 1326 except error.WdirUnsupported:
1327 1327 # single 'ff...' match
1328 1328 return True
1329 1329 if matchednode is None:
1330 1330 raise error.LookupError(node, self.indexfile, _('no node'))
1331 1331 return True
1332 1332
1333 1333 def maybewdir(prefix):
1334 1334 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1335 1335
1336 1336 hexnode = hex(node)
1337 1337
1338 1338 def disambiguate(hexnode, minlength):
1339 1339 """Disambiguate against wdirid."""
1340 1340 for length in range(minlength, 41):
1341 1341 prefix = hexnode[:length]
1342 1342 if not maybewdir(prefix):
1343 1343 return prefix
1344 1344
1345 1345 if not getattr(self, 'filteredrevs', None):
1346 1346 try:
1347 1347 length = max(self.index.shortest(node), minlength)
1348 1348 return disambiguate(hexnode, length)
1349 1349 except error.RevlogError:
1350 1350 if node != wdirid:
1351 1351 raise error.LookupError(node, self.indexfile, _('no node'))
1352 1352 except AttributeError:
1353 1353 # Fall through to pure code
1354 1354 pass
1355 1355
1356 1356 if node == wdirid:
1357 1357 for length in range(minlength, 41):
1358 1358 prefix = hexnode[:length]
1359 1359 if isvalid(prefix):
1360 1360 return prefix
1361 1361
1362 1362 for length in range(minlength, 41):
1363 1363 prefix = hexnode[:length]
1364 1364 if isvalid(prefix):
1365 1365 return disambiguate(hexnode, length)
1366 1366
1367 1367 def cmp(self, node, text):
1368 1368 """compare text with a given file revision
1369 1369
1370 1370 returns True if text is different than what is stored.
1371 1371 """
1372 1372 p1, p2 = self.parents(node)
1373 1373 return storageutil.hashrevisionsha1(text, p1, p2) != node
1374 1374
1375 1375 def _cachesegment(self, offset, data):
1376 1376 """Add a segment to the revlog cache.
1377 1377
1378 1378 Accepts an absolute offset and the data that is at that location.
1379 1379 """
1380 1380 o, d = self._chunkcache
1381 1381 # try to add to existing cache
1382 1382 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1383 1383 self._chunkcache = o, d + data
1384 1384 else:
1385 1385 self._chunkcache = offset, data
1386 1386
1387 1387 def _readsegment(self, offset, length, df=None):
1388 1388 """Load a segment of raw data from the revlog.
1389 1389
1390 1390 Accepts an absolute offset, length to read, and an optional existing
1391 1391 file handle to read from.
1392 1392
1393 1393 If an existing file handle is passed, it will be seeked and the
1394 1394 original seek position will NOT be restored.
1395 1395
1396 1396 Returns a str or buffer of raw byte data.
1397 1397
1398 1398 Raises if the requested number of bytes could not be read.
1399 1399 """
1400 1400 # Cache data both forward and backward around the requested
1401 1401 # data, in a fixed size window. This helps speed up operations
1402 1402 # involving reading the revlog backwards.
1403 1403 cachesize = self._chunkcachesize
1404 1404 realoffset = offset & ~(cachesize - 1)
1405 1405 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1406 1406 - realoffset)
1407 1407 with self._datareadfp(df) as df:
1408 1408 df.seek(realoffset)
1409 1409 d = df.read(reallength)
1410 1410
1411 1411 self._cachesegment(realoffset, d)
1412 1412 if offset != realoffset or reallength != length:
1413 1413 startoffset = offset - realoffset
1414 1414 if len(d) - startoffset < length:
1415 1415 raise error.RevlogError(
1416 1416 _('partial read of revlog %s; expected %d bytes from '
1417 1417 'offset %d, got %d') %
1418 1418 (self.indexfile if self._inline else self.datafile,
1419 1419 length, realoffset, len(d) - startoffset))
1420 1420
1421 1421 return util.buffer(d, startoffset, length)
1422 1422
1423 1423 if len(d) < length:
1424 1424 raise error.RevlogError(
1425 1425 _('partial read of revlog %s; expected %d bytes from offset '
1426 1426 '%d, got %d') %
1427 1427 (self.indexfile if self._inline else self.datafile,
1428 1428 length, offset, len(d)))
1429 1429
1430 1430 return d
1431 1431
1432 1432 def _getsegment(self, offset, length, df=None):
1433 1433 """Obtain a segment of raw data from the revlog.
1434 1434
1435 1435 Accepts an absolute offset, length of bytes to obtain, and an
1436 1436 optional file handle to the already-opened revlog. If the file
1437 1437 handle is used, it's original seek position will not be preserved.
1438 1438
1439 1439 Requests for data may be returned from a cache.
1440 1440
1441 1441 Returns a str or a buffer instance of raw byte data.
1442 1442 """
1443 1443 o, d = self._chunkcache
1444 1444 l = len(d)
1445 1445
1446 1446 # is it in the cache?
1447 1447 cachestart = offset - o
1448 1448 cacheend = cachestart + length
1449 1449 if cachestart >= 0 and cacheend <= l:
1450 1450 if cachestart == 0 and cacheend == l:
1451 1451 return d # avoid a copy
1452 1452 return util.buffer(d, cachestart, cacheend - cachestart)
1453 1453
1454 1454 return self._readsegment(offset, length, df=df)
1455 1455
1456 1456 def _getsegmentforrevs(self, startrev, endrev, df=None):
1457 1457 """Obtain a segment of raw data corresponding to a range of revisions.
1458 1458
1459 1459 Accepts the start and end revisions and an optional already-open
1460 1460 file handle to be used for reading. If the file handle is read, its
1461 1461 seek position will not be preserved.
1462 1462
1463 1463 Requests for data may be satisfied by a cache.
1464 1464
1465 1465 Returns a 2-tuple of (offset, data) for the requested range of
1466 1466 revisions. Offset is the integer offset from the beginning of the
1467 1467 revlog and data is a str or buffer of the raw byte data.
1468 1468
1469 1469 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1470 1470 to determine where each revision's data begins and ends.
1471 1471 """
1472 1472 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1473 1473 # (functions are expensive).
1474 1474 index = self.index
1475 1475 istart = index[startrev]
1476 1476 start = int(istart[0] >> 16)
1477 1477 if startrev == endrev:
1478 1478 end = start + istart[1]
1479 1479 else:
1480 1480 iend = index[endrev]
1481 1481 end = int(iend[0] >> 16) + iend[1]
1482 1482
1483 1483 if self._inline:
1484 1484 start += (startrev + 1) * self._io.size
1485 1485 end += (endrev + 1) * self._io.size
1486 1486 length = end - start
1487 1487
1488 1488 return start, self._getsegment(start, length, df=df)
1489 1489
1490 1490 def _chunk(self, rev, df=None):
1491 1491 """Obtain a single decompressed chunk for a revision.
1492 1492
1493 1493 Accepts an integer revision and an optional already-open file handle
1494 1494 to be used for reading. If used, the seek position of the file will not
1495 1495 be preserved.
1496 1496
1497 1497 Returns a str holding uncompressed data for the requested revision.
1498 1498 """
1499 1499 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1500 1500
1501 1501 def _chunks(self, revs, df=None, targetsize=None):
1502 1502 """Obtain decompressed chunks for the specified revisions.
1503 1503
1504 1504 Accepts an iterable of numeric revisions that are assumed to be in
1505 1505 ascending order. Also accepts an optional already-open file handle
1506 1506 to be used for reading. If used, the seek position of the file will
1507 1507 not be preserved.
1508 1508
1509 1509 This function is similar to calling ``self._chunk()`` multiple times,
1510 1510 but is faster.
1511 1511
1512 1512 Returns a list with decompressed data for each requested revision.
1513 1513 """
1514 1514 if not revs:
1515 1515 return []
1516 1516 start = self.start
1517 1517 length = self.length
1518 1518 inline = self._inline
1519 1519 iosize = self._io.size
1520 1520 buffer = util.buffer
1521 1521
1522 1522 l = []
1523 1523 ladd = l.append
1524 1524
1525 1525 if not self._withsparseread:
1526 1526 slicedchunks = (revs,)
1527 1527 else:
1528 1528 slicedchunks = deltautil.slicechunk(self, revs,
1529 1529 targetsize=targetsize)
1530 1530
1531 1531 for revschunk in slicedchunks:
1532 1532 firstrev = revschunk[0]
1533 1533 # Skip trailing revisions with empty diff
1534 1534 for lastrev in revschunk[::-1]:
1535 1535 if length(lastrev) != 0:
1536 1536 break
1537 1537
1538 1538 try:
1539 1539 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1540 1540 except OverflowError:
1541 1541 # issue4215 - we can't cache a run of chunks greater than
1542 1542 # 2G on Windows
1543 1543 return [self._chunk(rev, df=df) for rev in revschunk]
1544 1544
1545 1545 decomp = self.decompress
1546 1546 for rev in revschunk:
1547 1547 chunkstart = start(rev)
1548 1548 if inline:
1549 1549 chunkstart += (rev + 1) * iosize
1550 1550 chunklength = length(rev)
1551 1551 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1552 1552
1553 1553 return l
1554 1554
1555 1555 def _chunkclear(self):
1556 1556 """Clear the raw chunk cache."""
1557 1557 self._chunkcache = (0, '')
1558 1558
1559 1559 def deltaparent(self, rev):
1560 1560 """return deltaparent of the given revision"""
1561 1561 base = self.index[rev][3]
1562 1562 if base == rev:
1563 1563 return nullrev
1564 1564 elif self._generaldelta:
1565 1565 return base
1566 1566 else:
1567 1567 return rev - 1
1568 1568
1569 1569 def issnapshot(self, rev):
1570 1570 """tells whether rev is a snapshot
1571 1571 """
1572 1572 if not self._sparserevlog:
1573 1573 return self.deltaparent(rev) == nullrev
1574 1574 elif util.safehasattr(self.index, 'issnapshot'):
1575 1575 # directly assign the method to cache the testing and access
1576 1576 self.issnapshot = self.index.issnapshot
1577 1577 return self.issnapshot(rev)
1578 1578 if rev == nullrev:
1579 1579 return True
1580 1580 entry = self.index[rev]
1581 1581 base = entry[3]
1582 1582 if base == rev:
1583 1583 return True
1584 1584 if base == nullrev:
1585 1585 return True
1586 1586 p1 = entry[5]
1587 1587 p2 = entry[6]
1588 1588 if base == p1 or base == p2:
1589 1589 return False
1590 1590 return self.issnapshot(base)
1591 1591
1592 1592 def snapshotdepth(self, rev):
1593 1593 """number of snapshot in the chain before this one"""
1594 1594 if not self.issnapshot(rev):
1595 1595 raise error.ProgrammingError('revision %d not a snapshot')
1596 1596 return len(self._deltachain(rev)[0]) - 1
1597 1597
1598 1598 def revdiff(self, rev1, rev2):
1599 1599 """return or calculate a delta between two revisions
1600 1600
1601 1601 The delta calculated is in binary form and is intended to be written to
1602 1602 revlog data directly. So this function needs raw revision data.
1603 1603 """
1604 1604 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1605 1605 return bytes(self._chunk(rev2))
1606 1606
1607 1607 return mdiff.textdiff(self.rawdata(rev1),
1608 1608 self.rawdata(rev2))
1609 1609
1610 1610 def _processflags(self, text, flags, operation, raw=False):
1611 1611 """deprecated entry point to access flag processors"""
1612 1612 msg = ('_processflag(...) use the specialized variant')
1613 1613 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1614 1614 if raw:
1615 1615 return text, flagutil.processflagsraw(self, text, flags)
1616 1616 elif operation == 'read':
1617 1617 return flagutil.processflagsread(self, text, flags)
1618 1618 else: # write operation
1619 1619 return flagutil.processflagswrite(self, text, flags)
1620 1620
1621 1621 def revision(self, nodeorrev, _df=None, raw=False):
1622 1622 """return an uncompressed revision of a given node or revision
1623 1623 number.
1624 1624
1625 1625 _df - an existing file handle to read from. (internal-only)
1626 1626 raw - an optional argument specifying if the revision data is to be
1627 1627 treated as raw data when applying flag transforms. 'raw' should be set
1628 1628 to True when generating changegroups or in debug commands.
1629 1629 """
1630 1630 if raw:
1631 1631 msg = ('revlog.revision(..., raw=True) is deprecated, '
1632 1632 'use revlog.rawdata(...)')
1633 1633 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1634 1634 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1635 1635
1636 1636 def sidedata(self, nodeorrev, _df=None):
1637 1637 """a map of extra data related to the changeset but not part of the hash
1638 1638
1639 1639 This function currently return a dictionary. However, more advanced
1640 1640 mapping object will likely be used in the future for a more
1641 1641 efficient/lazy code.
1642 1642 """
1643 1643 return self._revisiondata(nodeorrev, _df)[1]
1644 1644
1645 1645 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1646 1646 # deal with <nodeorrev> argument type
1647 1647 if isinstance(nodeorrev, int):
1648 1648 rev = nodeorrev
1649 1649 node = self.node(rev)
1650 1650 else:
1651 1651 node = nodeorrev
1652 1652 rev = None
1653 1653
1654 1654 # fast path the special `nullid` rev
1655 1655 if node == nullid:
1656 1656 return "", {}
1657 1657
1658 1658 # The text as stored inside the revlog. Might be the revision or might
1659 1659 # need to be processed to retrieve the revision.
1660 1660 rawtext = None
1661 1661
1662 1662 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1663 1663
1664 1664 if raw and validated:
1665 1665 # if we don't want to process the raw text and that raw
1666 1666 # text is cached, we can exit early.
1667 1667 return rawtext, {}
1668 1668 if rev is None:
1669 1669 rev = self.rev(node)
1670 1670 # the revlog's flag for this revision
1671 1671 # (usually alter its state or content)
1672 1672 flags = self.flags(rev)
1673 1673
1674 1674 if validated and flags == REVIDX_DEFAULT_FLAGS:
1675 1675 # no extra flags set, no flag processor runs, text = rawtext
1676 1676 return rawtext, {}
1677 1677
1678 1678 sidedata = {}
1679 1679 if raw:
1680 1680 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1681 1681 text = rawtext
1682 1682 else:
1683 1683 try:
1684 1684 r = flagutil.processflagsread(self, rawtext, flags)
1685 1685 except error.SidedataHashError as exc:
1686 1686 msg = _("integrity check failed on %s:%s sidedata key %d")
1687 1687 msg %= (self.indexfile, pycompat.bytestr(rev),
1688 1688 exc.sidedatakey)
1689 1689 raise error.RevlogError(msg)
1690 1690 text, validatehash, sidedata = r
1691 1691 if validatehash:
1692 1692 self.checkhash(text, node, rev=rev)
1693 1693 if not validated:
1694 1694 self._revisioncache = (node, rev, rawtext)
1695 1695
1696 1696 return text, sidedata
1697 1697
1698 1698 def _rawtext(self, node, rev, _df=None):
1699 1699 """return the possibly unvalidated rawtext for a revision
1700 1700
1701 1701 returns (rev, rawtext, validated)
1702 1702 """
1703 1703
1704 1704 # revision in the cache (could be useful to apply delta)
1705 1705 cachedrev = None
1706 1706 # An intermediate text to apply deltas to
1707 1707 basetext = None
1708 1708
1709 1709 # Check if we have the entry in cache
1710 1710 # The cache entry looks like (node, rev, rawtext)
1711 1711 if self._revisioncache:
1712 1712 if self._revisioncache[0] == node:
1713 1713 return (rev, self._revisioncache[2], True)
1714 1714 cachedrev = self._revisioncache[1]
1715 1715
1716 1716 if rev is None:
1717 1717 rev = self.rev(node)
1718 1718
1719 1719 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1720 1720 if stopped:
1721 1721 basetext = self._revisioncache[2]
1722 1722
1723 1723 # drop cache to save memory, the caller is expected to
1724 1724 # update self._revisioncache after validating the text
1725 1725 self._revisioncache = None
1726 1726
1727 1727 targetsize = None
1728 1728 rawsize = self.index[rev][2]
1729 1729 if 0 <= rawsize:
1730 1730 targetsize = 4 * rawsize
1731 1731
1732 1732 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1733 1733 if basetext is None:
1734 1734 basetext = bytes(bins[0])
1735 1735 bins = bins[1:]
1736 1736
1737 1737 rawtext = mdiff.patches(basetext, bins)
1738 1738 del basetext # let us have a chance to free memory early
1739 1739 return (rev, rawtext, False)
1740 1740
1741 1741 def rawdata(self, nodeorrev, _df=None):
1742 1742 """return an uncompressed raw data of a given node or revision number.
1743 1743
1744 1744 _df - an existing file handle to read from. (internal-only)
1745 1745 """
1746 1746 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1747 1747
1748 1748 def hash(self, text, p1, p2):
1749 1749 """Compute a node hash.
1750 1750
1751 1751 Available as a function so that subclasses can replace the hash
1752 1752 as needed.
1753 1753 """
1754 1754 return storageutil.hashrevisionsha1(text, p1, p2)
1755 1755
1756 1756 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1757 1757 """Check node hash integrity.
1758 1758
1759 1759 Available as a function so that subclasses can extend hash mismatch
1760 1760 behaviors as needed.
1761 1761 """
1762 1762 try:
1763 1763 if p1 is None and p2 is None:
1764 1764 p1, p2 = self.parents(node)
1765 1765 if node != self.hash(text, p1, p2):
1766 1766 # Clear the revision cache on hash failure. The revision cache
1767 1767 # only stores the raw revision and clearing the cache does have
1768 1768 # the side-effect that we won't have a cache hit when the raw
1769 1769 # revision data is accessed. But this case should be rare and
1770 1770 # it is extra work to teach the cache about the hash
1771 1771 # verification state.
1772 1772 if self._revisioncache and self._revisioncache[0] == node:
1773 1773 self._revisioncache = None
1774 1774
1775 1775 revornode = rev
1776 1776 if revornode is None:
1777 1777 revornode = templatefilters.short(hex(node))
1778 1778 raise error.RevlogError(_("integrity check failed on %s:%s")
1779 1779 % (self.indexfile, pycompat.bytestr(revornode)))
1780 1780 except error.RevlogError:
1781 1781 if self._censorable and storageutil.iscensoredtext(text):
1782 1782 raise error.CensoredNodeError(self.indexfile, node, text)
1783 1783 raise
1784 1784
1785 1785 def _enforceinlinesize(self, tr, fp=None):
1786 1786 """Check if the revlog is too big for inline and convert if so.
1787 1787
1788 1788 This should be called after revisions are added to the revlog. If the
1789 1789 revlog has grown too large to be an inline revlog, it will convert it
1790 1790 to use multiple index and data files.
1791 1791 """
1792 1792 tiprev = len(self) - 1
1793 1793 if (not self._inline or
1794 1794 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1795 1795 return
1796 1796
1797 1797 trinfo = tr.find(self.indexfile)
1798 1798 if trinfo is None:
1799 1799 raise error.RevlogError(_("%s not found in the transaction")
1800 1800 % self.indexfile)
1801 1801
1802 1802 trindex = trinfo[2]
1803 1803 if trindex is not None:
1804 1804 dataoff = self.start(trindex)
1805 1805 else:
1806 1806 # revlog was stripped at start of transaction, use all leftover data
1807 1807 trindex = len(self) - 1
1808 1808 dataoff = self.end(tiprev)
1809 1809
1810 1810 tr.add(self.datafile, dataoff)
1811 1811
1812 1812 if fp:
1813 1813 fp.flush()
1814 1814 fp.close()
1815 1815 # We can't use the cached file handle after close(). So prevent
1816 1816 # its usage.
1817 1817 self._writinghandles = None
1818 1818
1819 1819 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1820 1820 for r in self:
1821 1821 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1822 1822
1823 1823 with self._indexfp('w') as fp:
1824 1824 self.version &= ~FLAG_INLINE_DATA
1825 1825 self._inline = False
1826 1826 io = self._io
1827 1827 for i in self:
1828 1828 e = io.packentry(self.index[i], self.node, self.version, i)
1829 1829 fp.write(e)
1830 1830
1831 1831 # the temp file replace the real index when we exit the context
1832 1832 # manager
1833 1833
1834 1834 tr.replace(self.indexfile, trindex * self._io.size)
1835 1835 self._chunkclear()
1836 1836
1837 1837 def _nodeduplicatecallback(self, transaction, node):
1838 1838 """called when trying to add a node already stored.
1839 1839 """
1840 1840
1841 1841 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1842 1842 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1843 1843 sidedata=None):
1844 1844 """add a revision to the log
1845 1845
1846 1846 text - the revision data to add
1847 1847 transaction - the transaction object used for rollback
1848 1848 link - the linkrev data to add
1849 1849 p1, p2 - the parent nodeids of the revision
1850 1850 cachedelta - an optional precomputed delta
1851 1851 node - nodeid of revision; typically node is not specified, and it is
1852 1852 computed by default as hash(text, p1, p2), however subclasses might
1853 1853 use different hashing method (and override checkhash() in such case)
1854 1854 flags - the known flags to set on the revision
1855 1855 deltacomputer - an optional deltacomputer instance shared between
1856 1856 multiple calls
1857 1857 """
1858 1858 if link == nullrev:
1859 1859 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1860 1860 % self.indexfile)
1861 1861
1862 1862 if sidedata is None:
1863 1863 sidedata = {}
1864 flags = flags & ~REVIDX_SIDEDATA
1864 1865 elif not self.hassidedata:
1865 1866 raise error.ProgrammingError(
1866 1867 _("trying to add sidedata to a revlog who don't support them")
1867 1868 )
1868 1869 else:
1869 1870 flags |= REVIDX_SIDEDATA
1870 1871
1871 1872 if flags:
1872 1873 node = node or self.hash(text, p1, p2)
1873 1874
1874 1875 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1875 1876 sidedata=sidedata)
1876 1877
1877 1878 # If the flag processor modifies the revision data, ignore any provided
1878 1879 # cachedelta.
1879 1880 if rawtext != text:
1880 1881 cachedelta = None
1881 1882
1882 1883 if len(rawtext) > _maxentrysize:
1883 1884 raise error.RevlogError(
1884 1885 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1885 1886 % (self.indexfile, len(rawtext)))
1886 1887
1887 1888 node = node or self.hash(rawtext, p1, p2)
1888 1889 if node in self.nodemap:
1889 1890 return node
1890 1891
1891 1892 if validatehash:
1892 1893 self.checkhash(rawtext, node, p1=p1, p2=p2)
1893 1894
1894 1895 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1895 1896 flags, cachedelta=cachedelta,
1896 1897 deltacomputer=deltacomputer)
1897 1898
1898 1899 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1899 1900 cachedelta=None, deltacomputer=None):
1900 1901 """add a raw revision with known flags, node and parents
1901 1902 useful when reusing a revision not stored in this revlog (ex: received
1902 1903 over wire, or read from an external bundle).
1903 1904 """
1904 1905 dfh = None
1905 1906 if not self._inline:
1906 1907 dfh = self._datafp("a+")
1907 1908 ifh = self._indexfp("a+")
1908 1909 try:
1909 1910 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1910 1911 flags, cachedelta, ifh, dfh,
1911 1912 deltacomputer=deltacomputer)
1912 1913 finally:
1913 1914 if dfh:
1914 1915 dfh.close()
1915 1916 ifh.close()
1916 1917
1917 1918 def compress(self, data):
1918 1919 """Generate a possibly-compressed representation of data."""
1919 1920 if not data:
1920 1921 return '', data
1921 1922
1922 1923 compressed = self._compressor.compress(data)
1923 1924
1924 1925 if compressed:
1925 1926 # The revlog compressor added the header in the returned data.
1926 1927 return '', compressed
1927 1928
1928 1929 if data[0:1] == '\0':
1929 1930 return '', data
1930 1931 return 'u', data
1931 1932
1932 1933 def decompress(self, data):
1933 1934 """Decompress a revlog chunk.
1934 1935
1935 1936 The chunk is expected to begin with a header identifying the
1936 1937 format type so it can be routed to an appropriate decompressor.
1937 1938 """
1938 1939 if not data:
1939 1940 return data
1940 1941
1941 1942 # Revlogs are read much more frequently than they are written and many
1942 1943 # chunks only take microseconds to decompress, so performance is
1943 1944 # important here.
1944 1945 #
1945 1946 # We can make a few assumptions about revlogs:
1946 1947 #
1947 1948 # 1) the majority of chunks will be compressed (as opposed to inline
1948 1949 # raw data).
1949 1950 # 2) decompressing *any* data will likely by at least 10x slower than
1950 1951 # returning raw inline data.
1951 1952 # 3) we want to prioritize common and officially supported compression
1952 1953 # engines
1953 1954 #
1954 1955 # It follows that we want to optimize for "decompress compressed data
1955 1956 # when encoded with common and officially supported compression engines"
1956 1957 # case over "raw data" and "data encoded by less common or non-official
1957 1958 # compression engines." That is why we have the inline lookup first
1958 1959 # followed by the compengines lookup.
1959 1960 #
1960 1961 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1961 1962 # compressed chunks. And this matters for changelog and manifest reads.
1962 1963 t = data[0:1]
1963 1964
1964 1965 if t == 'x':
1965 1966 try:
1966 1967 return _zlibdecompress(data)
1967 1968 except zlib.error as e:
1968 1969 raise error.RevlogError(_('revlog decompress error: %s') %
1969 1970 stringutil.forcebytestr(e))
1970 1971 # '\0' is more common than 'u' so it goes first.
1971 1972 elif t == '\0':
1972 1973 return data
1973 1974 elif t == 'u':
1974 1975 return util.buffer(data, 1)
1975 1976
1976 1977 try:
1977 1978 compressor = self._decompressors[t]
1978 1979 except KeyError:
1979 1980 try:
1980 1981 engine = util.compengines.forrevlogheader(t)
1981 1982 compressor = engine.revlogcompressor(self._compengineopts)
1982 1983 self._decompressors[t] = compressor
1983 1984 except KeyError:
1984 1985 raise error.RevlogError(_('unknown compression type %r') % t)
1985 1986
1986 1987 return compressor.decompress(data)
1987 1988
1988 1989 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1989 1990 cachedelta, ifh, dfh, alwayscache=False,
1990 1991 deltacomputer=None):
1991 1992 """internal function to add revisions to the log
1992 1993
1993 1994 see addrevision for argument descriptions.
1994 1995
1995 1996 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1996 1997
1997 1998 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1998 1999 be used.
1999 2000
2000 2001 invariants:
2001 2002 - rawtext is optional (can be None); if not set, cachedelta must be set.
2002 2003 if both are set, they must correspond to each other.
2003 2004 """
2004 2005 if node == nullid:
2005 2006 raise error.RevlogError(_("%s: attempt to add null revision") %
2006 2007 self.indexfile)
2007 2008 if node == wdirid or node in wdirfilenodeids:
2008 2009 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2009 2010 self.indexfile)
2010 2011
2011 2012 if self._inline:
2012 2013 fh = ifh
2013 2014 else:
2014 2015 fh = dfh
2015 2016
2016 2017 btext = [rawtext]
2017 2018
2018 2019 curr = len(self)
2019 2020 prev = curr - 1
2020 2021 offset = self.end(prev)
2021 2022 p1r, p2r = self.rev(p1), self.rev(p2)
2022 2023
2023 2024 # full versions are inserted when the needed deltas
2024 2025 # become comparable to the uncompressed text
2025 2026 if rawtext is None:
2026 2027 # need rawtext size, before changed by flag processors, which is
2027 2028 # the non-raw size. use revlog explicitly to avoid filelog's extra
2028 2029 # logic that might remove metadata size.
2029 2030 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2030 2031 cachedelta[1])
2031 2032 else:
2032 2033 textlen = len(rawtext)
2033 2034
2034 2035 if deltacomputer is None:
2035 2036 deltacomputer = deltautil.deltacomputer(self)
2036 2037
2037 2038 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2038 2039
2039 2040 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2040 2041
2041 2042 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2042 2043 deltainfo.base, link, p1r, p2r, node)
2043 2044 self.index.append(e)
2044 2045 self.nodemap[node] = curr
2045 2046
2046 2047 # Reset the pure node cache start lookup offset to account for new
2047 2048 # revision.
2048 2049 if self._nodepos is not None:
2049 2050 self._nodepos = curr
2050 2051
2051 2052 entry = self._io.packentry(e, self.node, self.version, curr)
2052 2053 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2053 2054 link, offset)
2054 2055
2055 2056 rawtext = btext[0]
2056 2057
2057 2058 if alwayscache and rawtext is None:
2058 2059 rawtext = deltacomputer.buildtext(revinfo, fh)
2059 2060
2060 2061 if type(rawtext) == bytes: # only accept immutable objects
2061 2062 self._revisioncache = (node, curr, rawtext)
2062 2063 self._chainbasecache[curr] = deltainfo.chainbase
2063 2064 return node
2064 2065
2065 2066 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2066 2067 # Files opened in a+ mode have inconsistent behavior on various
2067 2068 # platforms. Windows requires that a file positioning call be made
2068 2069 # when the file handle transitions between reads and writes. See
2069 2070 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2070 2071 # platforms, Python or the platform itself can be buggy. Some versions
2071 2072 # of Solaris have been observed to not append at the end of the file
2072 2073 # if the file was seeked to before the end. See issue4943 for more.
2073 2074 #
2074 2075 # We work around this issue by inserting a seek() before writing.
2075 2076 # Note: This is likely not necessary on Python 3. However, because
2076 2077 # the file handle is reused for reads and may be seeked there, we need
2077 2078 # to be careful before changing this.
2078 2079 ifh.seek(0, os.SEEK_END)
2079 2080 if dfh:
2080 2081 dfh.seek(0, os.SEEK_END)
2081 2082
2082 2083 curr = len(self) - 1
2083 2084 if not self._inline:
2084 2085 transaction.add(self.datafile, offset)
2085 2086 transaction.add(self.indexfile, curr * len(entry))
2086 2087 if data[0]:
2087 2088 dfh.write(data[0])
2088 2089 dfh.write(data[1])
2089 2090 ifh.write(entry)
2090 2091 else:
2091 2092 offset += curr * self._io.size
2092 2093 transaction.add(self.indexfile, offset, curr)
2093 2094 ifh.write(entry)
2094 2095 ifh.write(data[0])
2095 2096 ifh.write(data[1])
2096 2097 self._enforceinlinesize(transaction, ifh)
2097 2098
2098 2099 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2099 2100 """
2100 2101 add a delta group
2101 2102
2102 2103 given a set of deltas, add them to the revision log. the
2103 2104 first delta is against its parent, which should be in our
2104 2105 log, the rest are against the previous delta.
2105 2106
2106 2107 If ``addrevisioncb`` is defined, it will be called with arguments of
2107 2108 this revlog and the node that was added.
2108 2109 """
2109 2110
2110 2111 if self._writinghandles:
2111 2112 raise error.ProgrammingError('cannot nest addgroup() calls')
2112 2113
2113 2114 nodes = []
2114 2115
2115 2116 r = len(self)
2116 2117 end = 0
2117 2118 if r:
2118 2119 end = self.end(r - 1)
2119 2120 ifh = self._indexfp("a+")
2120 2121 isize = r * self._io.size
2121 2122 if self._inline:
2122 2123 transaction.add(self.indexfile, end + isize, r)
2123 2124 dfh = None
2124 2125 else:
2125 2126 transaction.add(self.indexfile, isize, r)
2126 2127 transaction.add(self.datafile, end)
2127 2128 dfh = self._datafp("a+")
2128 2129 def flush():
2129 2130 if dfh:
2130 2131 dfh.flush()
2131 2132 ifh.flush()
2132 2133
2133 2134 self._writinghandles = (ifh, dfh)
2134 2135
2135 2136 try:
2136 2137 deltacomputer = deltautil.deltacomputer(self)
2137 2138 # loop through our set of deltas
2138 2139 for data in deltas:
2139 2140 node, p1, p2, linknode, deltabase, delta, flags = data
2140 2141 link = linkmapper(linknode)
2141 2142 flags = flags or REVIDX_DEFAULT_FLAGS
2142 2143
2143 2144 nodes.append(node)
2144 2145
2145 2146 if node in self.nodemap:
2146 2147 self._nodeduplicatecallback(transaction, node)
2147 2148 # this can happen if two branches make the same change
2148 2149 continue
2149 2150
2150 2151 for p in (p1, p2):
2151 2152 if p not in self.nodemap:
2152 2153 raise error.LookupError(p, self.indexfile,
2153 2154 _('unknown parent'))
2154 2155
2155 2156 if deltabase not in self.nodemap:
2156 2157 raise error.LookupError(deltabase, self.indexfile,
2157 2158 _('unknown delta base'))
2158 2159
2159 2160 baserev = self.rev(deltabase)
2160 2161
2161 2162 if baserev != nullrev and self.iscensored(baserev):
2162 2163 # if base is censored, delta must be full replacement in a
2163 2164 # single patch operation
2164 2165 hlen = struct.calcsize(">lll")
2165 2166 oldlen = self.rawsize(baserev)
2166 2167 newlen = len(delta) - hlen
2167 2168 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2168 2169 raise error.CensoredBaseError(self.indexfile,
2169 2170 self.node(baserev))
2170 2171
2171 2172 if not flags and self._peek_iscensored(baserev, delta, flush):
2172 2173 flags |= REVIDX_ISCENSORED
2173 2174
2174 2175 # We assume consumers of addrevisioncb will want to retrieve
2175 2176 # the added revision, which will require a call to
2176 2177 # revision(). revision() will fast path if there is a cache
2177 2178 # hit. So, we tell _addrevision() to always cache in this case.
2178 2179 # We're only using addgroup() in the context of changegroup
2179 2180 # generation so the revision data can always be handled as raw
2180 2181 # by the flagprocessor.
2181 2182 self._addrevision(node, None, transaction, link,
2182 2183 p1, p2, flags, (baserev, delta),
2183 2184 ifh, dfh,
2184 2185 alwayscache=bool(addrevisioncb),
2185 2186 deltacomputer=deltacomputer)
2186 2187
2187 2188 if addrevisioncb:
2188 2189 addrevisioncb(self, node)
2189 2190
2190 2191 if not dfh and not self._inline:
2191 2192 # addrevision switched from inline to conventional
2192 2193 # reopen the index
2193 2194 ifh.close()
2194 2195 dfh = self._datafp("a+")
2195 2196 ifh = self._indexfp("a+")
2196 2197 self._writinghandles = (ifh, dfh)
2197 2198 finally:
2198 2199 self._writinghandles = None
2199 2200
2200 2201 if dfh:
2201 2202 dfh.close()
2202 2203 ifh.close()
2203 2204
2204 2205 return nodes
2205 2206
2206 2207 def iscensored(self, rev):
2207 2208 """Check if a file revision is censored."""
2208 2209 if not self._censorable:
2209 2210 return False
2210 2211
2211 2212 return self.flags(rev) & REVIDX_ISCENSORED
2212 2213
2213 2214 def _peek_iscensored(self, baserev, delta, flush):
2214 2215 """Quickly check if a delta produces a censored revision."""
2215 2216 if not self._censorable:
2216 2217 return False
2217 2218
2218 2219 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2219 2220
2220 2221 def getstrippoint(self, minlink):
2221 2222 """find the minimum rev that must be stripped to strip the linkrev
2222 2223
2223 2224 Returns a tuple containing the minimum rev and a set of all revs that
2224 2225 have linkrevs that will be broken by this strip.
2225 2226 """
2226 2227 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2227 2228 self.headrevs(),
2228 2229 self.linkrev, self.parentrevs)
2229 2230
2230 2231 def strip(self, minlink, transaction):
2231 2232 """truncate the revlog on the first revision with a linkrev >= minlink
2232 2233
2233 2234 This function is called when we're stripping revision minlink and
2234 2235 its descendants from the repository.
2235 2236
2236 2237 We have to remove all revisions with linkrev >= minlink, because
2237 2238 the equivalent changelog revisions will be renumbered after the
2238 2239 strip.
2239 2240
2240 2241 So we truncate the revlog on the first of these revisions, and
2241 2242 trust that the caller has saved the revisions that shouldn't be
2242 2243 removed and that it'll re-add them after this truncation.
2243 2244 """
2244 2245 if len(self) == 0:
2245 2246 return
2246 2247
2247 2248 rev, _ = self.getstrippoint(minlink)
2248 2249 if rev == len(self):
2249 2250 return
2250 2251
2251 2252 # first truncate the files on disk
2252 2253 end = self.start(rev)
2253 2254 if not self._inline:
2254 2255 transaction.add(self.datafile, end)
2255 2256 end = rev * self._io.size
2256 2257 else:
2257 2258 end += rev * self._io.size
2258 2259
2259 2260 transaction.add(self.indexfile, end)
2260 2261
2261 2262 # then reset internal state in memory to forget those revisions
2262 2263 self._revisioncache = None
2263 2264 self._chaininfocache = {}
2264 2265 self._chunkclear()
2265 2266 for x in pycompat.xrange(rev, len(self)):
2266 2267 del self.nodemap[self.node(x)]
2267 2268
2268 2269 del self.index[rev:-1]
2269 2270 self._nodepos = None
2270 2271
2271 2272 def checksize(self):
2272 2273 """Check size of index and data files
2273 2274
2274 2275 return a (dd, di) tuple.
2275 2276 - dd: extra bytes for the "data" file
2276 2277 - di: extra bytes for the "index" file
2277 2278
2278 2279 A healthy revlog will return (0, 0).
2279 2280 """
2280 2281 expected = 0
2281 2282 if len(self):
2282 2283 expected = max(0, self.end(len(self) - 1))
2283 2284
2284 2285 try:
2285 2286 with self._datafp() as f:
2286 2287 f.seek(0, io.SEEK_END)
2287 2288 actual = f.tell()
2288 2289 dd = actual - expected
2289 2290 except IOError as inst:
2290 2291 if inst.errno != errno.ENOENT:
2291 2292 raise
2292 2293 dd = 0
2293 2294
2294 2295 try:
2295 2296 f = self.opener(self.indexfile)
2296 2297 f.seek(0, io.SEEK_END)
2297 2298 actual = f.tell()
2298 2299 f.close()
2299 2300 s = self._io.size
2300 2301 i = max(0, actual // s)
2301 2302 di = actual - (i * s)
2302 2303 if self._inline:
2303 2304 databytes = 0
2304 2305 for r in self:
2305 2306 databytes += max(0, self.length(r))
2306 2307 dd = 0
2307 2308 di = actual - len(self) * s - databytes
2308 2309 except IOError as inst:
2309 2310 if inst.errno != errno.ENOENT:
2310 2311 raise
2311 2312 di = 0
2312 2313
2313 2314 return (dd, di)
2314 2315
2315 2316 def files(self):
2316 2317 res = [self.indexfile]
2317 2318 if not self._inline:
2318 2319 res.append(self.datafile)
2319 2320 return res
2320 2321
2321 2322 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2322 2323 assumehaveparentrevisions=False,
2323 2324 deltamode=repository.CG_DELTAMODE_STD):
2324 2325 if nodesorder not in ('nodes', 'storage', 'linear', None):
2325 2326 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2326 2327 nodesorder)
2327 2328
2328 2329 if nodesorder is None and not self._generaldelta:
2329 2330 nodesorder = 'storage'
2330 2331
2331 2332 if (not self._storedeltachains and
2332 2333 deltamode != repository.CG_DELTAMODE_PREV):
2333 2334 deltamode = repository.CG_DELTAMODE_FULL
2334 2335
2335 2336 return storageutil.emitrevisions(
2336 2337 self, nodes, nodesorder, revlogrevisiondelta,
2337 2338 deltaparentfn=self.deltaparent,
2338 2339 candeltafn=self.candelta,
2339 2340 rawsizefn=self.rawsize,
2340 2341 revdifffn=self.revdiff,
2341 2342 flagsfn=self.flags,
2342 2343 deltamode=deltamode,
2343 2344 revisiondata=revisiondata,
2344 2345 assumehaveparentrevisions=assumehaveparentrevisions)
2345 2346
2346 2347 DELTAREUSEALWAYS = 'always'
2347 2348 DELTAREUSESAMEREVS = 'samerevs'
2348 2349 DELTAREUSENEVER = 'never'
2349 2350
2350 2351 DELTAREUSEFULLADD = 'fulladd'
2351 2352
2352 2353 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2353 2354
2354 2355 def clone(self, tr, destrevlog, addrevisioncb=None,
2355 2356 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2356 2357 """Copy this revlog to another, possibly with format changes.
2357 2358
2358 2359 The destination revlog will contain the same revisions and nodes.
2359 2360 However, it may not be bit-for-bit identical due to e.g. delta encoding
2360 2361 differences.
2361 2362
2362 2363 The ``deltareuse`` argument control how deltas from the existing revlog
2363 2364 are preserved in the destination revlog. The argument can have the
2364 2365 following values:
2365 2366
2366 2367 DELTAREUSEALWAYS
2367 2368 Deltas will always be reused (if possible), even if the destination
2368 2369 revlog would not select the same revisions for the delta. This is the
2369 2370 fastest mode of operation.
2370 2371 DELTAREUSESAMEREVS
2371 2372 Deltas will be reused if the destination revlog would pick the same
2372 2373 revisions for the delta. This mode strikes a balance between speed
2373 2374 and optimization.
2374 2375 DELTAREUSENEVER
2375 2376 Deltas will never be reused. This is the slowest mode of execution.
2376 2377 This mode can be used to recompute deltas (e.g. if the diff/delta
2377 2378 algorithm changes).
2378 2379 DELTAREUSEFULLADD
2379 2380 Revision will be re-added as if their were new content. This is
2380 2381 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2381 2382 eg: large file detection and handling.
2382 2383
2383 2384 Delta computation can be slow, so the choice of delta reuse policy can
2384 2385 significantly affect run time.
2385 2386
2386 2387 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2387 2388 two extremes. Deltas will be reused if they are appropriate. But if the
2388 2389 delta could choose a better revision, it will do so. This means if you
2389 2390 are converting a non-generaldelta revlog to a generaldelta revlog,
2390 2391 deltas will be recomputed if the delta's parent isn't a parent of the
2391 2392 revision.
2392 2393
2393 2394 In addition to the delta policy, the ``forcedeltabothparents``
2394 2395 argument controls whether to force compute deltas against both parents
2395 2396 for merges. By default, the current default is used.
2396 2397 """
2397 2398 if deltareuse not in self.DELTAREUSEALL:
2398 2399 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2399 2400
2400 2401 if len(destrevlog):
2401 2402 raise ValueError(_('destination revlog is not empty'))
2402 2403
2403 2404 if getattr(self, 'filteredrevs', None):
2404 2405 raise ValueError(_('source revlog has filtered revisions'))
2405 2406 if getattr(destrevlog, 'filteredrevs', None):
2406 2407 raise ValueError(_('destination revlog has filtered revisions'))
2407 2408
2408 2409 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2409 2410 # if possible.
2410 2411 oldlazydelta = destrevlog._lazydelta
2411 2412 oldlazydeltabase = destrevlog._lazydeltabase
2412 2413 oldamd = destrevlog._deltabothparents
2413 2414
2414 2415 try:
2415 2416 if deltareuse == self.DELTAREUSEALWAYS:
2416 2417 destrevlog._lazydeltabase = True
2417 2418 destrevlog._lazydelta = True
2418 2419 elif deltareuse == self.DELTAREUSESAMEREVS:
2419 2420 destrevlog._lazydeltabase = False
2420 2421 destrevlog._lazydelta = True
2421 2422 elif deltareuse == self.DELTAREUSENEVER:
2422 2423 destrevlog._lazydeltabase = False
2423 2424 destrevlog._lazydelta = False
2424 2425
2425 2426 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2426 2427
2427 2428 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2428 2429 forcedeltabothparents)
2429 2430
2430 2431 finally:
2431 2432 destrevlog._lazydelta = oldlazydelta
2432 2433 destrevlog._lazydeltabase = oldlazydeltabase
2433 2434 destrevlog._deltabothparents = oldamd
2434 2435
2435 2436 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2436 2437 forcedeltabothparents):
2437 2438 """perform the core duty of `revlog.clone` after parameter processing"""
2438 2439 deltacomputer = deltautil.deltacomputer(destrevlog)
2439 2440 index = self.index
2440 2441 for rev in self:
2441 2442 entry = index[rev]
2442 2443
2443 2444 # Some classes override linkrev to take filtered revs into
2444 2445 # account. Use raw entry from index.
2445 2446 flags = entry[0] & 0xffff
2446 2447 linkrev = entry[4]
2447 2448 p1 = index[entry[5]][7]
2448 2449 p2 = index[entry[6]][7]
2449 2450 node = entry[7]
2450 2451
2451 2452 # (Possibly) reuse the delta from the revlog if allowed and
2452 2453 # the revlog chunk is a delta.
2453 2454 cachedelta = None
2454 2455 rawtext = None
2455 2456 if deltareuse == self.DELTAREUSEFULLADD:
2456 2457 text = self.revision(rev)
2457 2458 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2458 2459 cachedelta=cachedelta,
2459 2460 node=node, flags=flags,
2460 2461 deltacomputer=deltacomputer)
2461 2462 else:
2462 2463 if destrevlog._lazydelta:
2463 2464 dp = self.deltaparent(rev)
2464 2465 if dp != nullrev:
2465 2466 cachedelta = (dp, bytes(self._chunk(rev)))
2466 2467
2467 2468 if not cachedelta:
2468 2469 rawtext = self.rawdata(rev)
2469 2470
2470 2471 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2471 2472 checkambig=False)
2472 2473 dfh = None
2473 2474 if not destrevlog._inline:
2474 2475 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2475 2476 try:
2476 2477 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2477 2478 p2, flags, cachedelta, ifh, dfh,
2478 2479 deltacomputer=deltacomputer)
2479 2480 finally:
2480 2481 if dfh:
2481 2482 dfh.close()
2482 2483 ifh.close()
2483 2484
2484 2485 if addrevisioncb:
2485 2486 addrevisioncb(self, rev, node)
2486 2487
2487 2488 def censorrevision(self, tr, censornode, tombstone=b''):
2488 2489 if (self.version & 0xFFFF) == REVLOGV0:
2489 2490 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2490 2491 self.version)
2491 2492
2492 2493 censorrev = self.rev(censornode)
2493 2494 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2494 2495
2495 2496 if len(tombstone) > self.rawsize(censorrev):
2496 2497 raise error.Abort(_('censor tombstone must be no longer than '
2497 2498 'censored data'))
2498 2499
2499 2500 # Rewriting the revlog in place is hard. Our strategy for censoring is
2500 2501 # to create a new revlog, copy all revisions to it, then replace the
2501 2502 # revlogs on transaction close.
2502 2503
2503 2504 newindexfile = self.indexfile + b'.tmpcensored'
2504 2505 newdatafile = self.datafile + b'.tmpcensored'
2505 2506
2506 2507 # This is a bit dangerous. We could easily have a mismatch of state.
2507 2508 newrl = revlog(self.opener, newindexfile, newdatafile,
2508 2509 censorable=True)
2509 2510 newrl.version = self.version
2510 2511 newrl._generaldelta = self._generaldelta
2511 2512 newrl._io = self._io
2512 2513
2513 2514 for rev in self.revs():
2514 2515 node = self.node(rev)
2515 2516 p1, p2 = self.parents(node)
2516 2517
2517 2518 if rev == censorrev:
2518 2519 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2519 2520 p1, p2, censornode, REVIDX_ISCENSORED)
2520 2521
2521 2522 if newrl.deltaparent(rev) != nullrev:
2522 2523 raise error.Abort(_('censored revision stored as delta; '
2523 2524 'cannot censor'),
2524 2525 hint=_('censoring of revlogs is not '
2525 2526 'fully implemented; please report '
2526 2527 'this bug'))
2527 2528 continue
2528 2529
2529 2530 if self.iscensored(rev):
2530 2531 if self.deltaparent(rev) != nullrev:
2531 2532 raise error.Abort(_('cannot censor due to censored '
2532 2533 'revision having delta stored'))
2533 2534 rawtext = self._chunk(rev)
2534 2535 else:
2535 2536 rawtext = self.rawdata(rev)
2536 2537
2537 2538 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2538 2539 self.flags(rev))
2539 2540
2540 2541 tr.addbackup(self.indexfile, location='store')
2541 2542 if not self._inline:
2542 2543 tr.addbackup(self.datafile, location='store')
2543 2544
2544 2545 self.opener.rename(newrl.indexfile, self.indexfile)
2545 2546 if not self._inline:
2546 2547 self.opener.rename(newrl.datafile, self.datafile)
2547 2548
2548 2549 self.clearcaches()
2549 2550 self._loadindex()
2550 2551
2551 2552 def verifyintegrity(self, state):
2552 2553 """Verifies the integrity of the revlog.
2553 2554
2554 2555 Yields ``revlogproblem`` instances describing problems that are
2555 2556 found.
2556 2557 """
2557 2558 dd, di = self.checksize()
2558 2559 if dd:
2559 2560 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2560 2561 if di:
2561 2562 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2562 2563
2563 2564 version = self.version & 0xFFFF
2564 2565
2565 2566 # The verifier tells us what version revlog we should be.
2566 2567 if version != state['expectedversion']:
2567 2568 yield revlogproblem(
2568 2569 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2569 2570 (self.indexfile, version, state['expectedversion']))
2570 2571
2571 2572 state['skipread'] = set()
2572 2573
2573 2574 for rev in self:
2574 2575 node = self.node(rev)
2575 2576
2576 2577 # Verify contents. 4 cases to care about:
2577 2578 #
2578 2579 # common: the most common case
2579 2580 # rename: with a rename
2580 2581 # meta: file content starts with b'\1\n', the metadata
2581 2582 # header defined in filelog.py, but without a rename
2582 2583 # ext: content stored externally
2583 2584 #
2584 2585 # More formally, their differences are shown below:
2585 2586 #
2586 2587 # | common | rename | meta | ext
2587 2588 # -------------------------------------------------------
2588 2589 # flags() | 0 | 0 | 0 | not 0
2589 2590 # renamed() | False | True | False | ?
2590 2591 # rawtext[0:2]=='\1\n'| False | True | True | ?
2591 2592 #
2592 2593 # "rawtext" means the raw text stored in revlog data, which
2593 2594 # could be retrieved by "rawdata(rev)". "text"
2594 2595 # mentioned below is "revision(rev)".
2595 2596 #
2596 2597 # There are 3 different lengths stored physically:
2597 2598 # 1. L1: rawsize, stored in revlog index
2598 2599 # 2. L2: len(rawtext), stored in revlog data
2599 2600 # 3. L3: len(text), stored in revlog data if flags==0, or
2600 2601 # possibly somewhere else if flags!=0
2601 2602 #
2602 2603 # L1 should be equal to L2. L3 could be different from them.
2603 2604 # "text" may or may not affect commit hash depending on flag
2604 2605 # processors (see flagutil.addflagprocessor).
2605 2606 #
2606 2607 # | common | rename | meta | ext
2607 2608 # -------------------------------------------------
2608 2609 # rawsize() | L1 | L1 | L1 | L1
2609 2610 # size() | L1 | L2-LM | L1(*) | L1 (?)
2610 2611 # len(rawtext) | L2 | L2 | L2 | L2
2611 2612 # len(text) | L2 | L2 | L2 | L3
2612 2613 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2613 2614 #
2614 2615 # LM: length of metadata, depending on rawtext
2615 2616 # (*): not ideal, see comment in filelog.size
2616 2617 # (?): could be "- len(meta)" if the resolved content has
2617 2618 # rename metadata
2618 2619 #
2619 2620 # Checks needed to be done:
2620 2621 # 1. length check: L1 == L2, in all cases.
2621 2622 # 2. hash check: depending on flag processor, we may need to
2622 2623 # use either "text" (external), or "rawtext" (in revlog).
2623 2624
2624 2625 try:
2625 2626 skipflags = state.get('skipflags', 0)
2626 2627 if skipflags:
2627 2628 skipflags &= self.flags(rev)
2628 2629
2629 2630 if skipflags:
2630 2631 state['skipread'].add(node)
2631 2632 else:
2632 2633 # Side-effect: read content and verify hash.
2633 2634 self.revision(node)
2634 2635
2635 2636 l1 = self.rawsize(rev)
2636 2637 l2 = len(self.rawdata(node))
2637 2638
2638 2639 if l1 != l2:
2639 2640 yield revlogproblem(
2640 2641 error=_('unpacked size is %d, %d expected') % (l2, l1),
2641 2642 node=node)
2642 2643
2643 2644 except error.CensoredNodeError:
2644 2645 if state['erroroncensored']:
2645 2646 yield revlogproblem(error=_('censored file data'),
2646 2647 node=node)
2647 2648 state['skipread'].add(node)
2648 2649 except Exception as e:
2649 2650 yield revlogproblem(
2650 2651 error=_('unpacking %s: %s') % (short(node),
2651 2652 stringutil.forcebytestr(e)),
2652 2653 node=node)
2653 2654 state['skipread'].add(node)
2654 2655
2655 2656 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2656 2657 revisionscount=False, trackedsize=False,
2657 2658 storedsize=False):
2658 2659 d = {}
2659 2660
2660 2661 if exclusivefiles:
2661 2662 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2662 2663 if not self._inline:
2663 2664 d['exclusivefiles'].append((self.opener, self.datafile))
2664 2665
2665 2666 if sharedfiles:
2666 2667 d['sharedfiles'] = []
2667 2668
2668 2669 if revisionscount:
2669 2670 d['revisionscount'] = len(self)
2670 2671
2671 2672 if trackedsize:
2672 2673 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2673 2674
2674 2675 if storedsize:
2675 2676 d['storedsize'] = sum(self.opener.stat(path).st_size
2676 2677 for path in self.files())
2677 2678
2678 2679 return d
General Comments 0
You need to be logged in to leave comments. Login now