##// END OF EJS Templates
upgrade: move most of revlog.clone method into a _clone method...
marmoute -
r43266:4f2315cc default
parent child Browse files
Show More
@@ -1,2653 +1,2659 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 57 )
58 58 from .thirdparty import (
59 59 attr,
60 60 )
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 )
79 79 from .utils import (
80 80 storageutil,
81 81 stringutil,
82 82 )
83 83
84 84 # blanked usage of all the name to prevent pyflakes constraints
85 85 # We need these name available in the module for extensions.
86 86 REVLOGV0
87 87 REVLOGV1
88 88 REVLOGV2
89 89 FLAG_INLINE_DATA
90 90 FLAG_GENERALDELTA
91 91 REVLOG_DEFAULT_FLAGS
92 92 REVLOG_DEFAULT_FORMAT
93 93 REVLOG_DEFAULT_VERSION
94 94 REVLOGV1_FLAGS
95 95 REVLOGV2_FLAGS
96 96 REVIDX_ISCENSORED
97 97 REVIDX_ELLIPSIS
98 98 REVIDX_EXTSTORED
99 99 REVIDX_DEFAULT_FLAGS
100 100 REVIDX_FLAGS_ORDER
101 101 REVIDX_RAWTEXT_CHANGING_FLAGS
102 102
103 103 parsers = policy.importmod(r'parsers')
104 104 rustancestor = policy.importrust(r'ancestor')
105 105 rustdagop = policy.importrust(r'dagop')
106 106
107 107 # Aliased for performance.
108 108 _zlibdecompress = zlib.decompress
109 109
110 110 # max size of revlog with inline data
111 111 _maxinline = 131072
112 112 _chunksize = 1048576
113 113
114 114 # Flag processors for REVIDX_ELLIPSIS.
115 115 def ellipsisreadprocessor(rl, text):
116 116 return text, False, {}
117 117
118 118 def ellipsiswriteprocessor(rl, text, sidedata):
119 119 return text, False
120 120
121 121 def ellipsisrawprocessor(rl, text):
122 122 return False
123 123
124 124 ellipsisprocessor = (
125 125 ellipsisreadprocessor,
126 126 ellipsiswriteprocessor,
127 127 ellipsisrawprocessor,
128 128 )
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 @attr.s(slots=True, frozen=True)
142 142 class _revisioninfo(object):
143 143 """Information about a revision that allows building its fulltext
144 144 node: expected hash of the revision
145 145 p1, p2: parent revs of the revision
146 146 btext: built text cache consisting of a one-element list
147 147 cachedelta: (baserev, uncompressed_delta) or None
148 148 flags: flags associated to the revision storage
149 149
150 150 One of btext[0] or cachedelta must be set.
151 151 """
152 152 node = attr.ib()
153 153 p1 = attr.ib()
154 154 p2 = attr.ib()
155 155 btext = attr.ib()
156 156 textlen = attr.ib()
157 157 cachedelta = attr.ib()
158 158 flags = attr.ib()
159 159
160 160 @interfaceutil.implementer(repository.irevisiondelta)
161 161 @attr.s(slots=True)
162 162 class revlogrevisiondelta(object):
163 163 node = attr.ib()
164 164 p1node = attr.ib()
165 165 p2node = attr.ib()
166 166 basenode = attr.ib()
167 167 flags = attr.ib()
168 168 baserevisionsize = attr.ib()
169 169 revision = attr.ib()
170 170 delta = attr.ib()
171 171 linknode = attr.ib(default=None)
172 172
173 173 @interfaceutil.implementer(repository.iverifyproblem)
174 174 @attr.s(frozen=True)
175 175 class revlogproblem(object):
176 176 warning = attr.ib(default=None)
177 177 error = attr.ib(default=None)
178 178 node = attr.ib(default=None)
179 179
180 180 # index v0:
181 181 # 4 bytes: offset
182 182 # 4 bytes: compressed length
183 183 # 4 bytes: base rev
184 184 # 4 bytes: link rev
185 185 # 20 bytes: parent 1 nodeid
186 186 # 20 bytes: parent 2 nodeid
187 187 # 20 bytes: nodeid
188 188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 189 indexformatv0_pack = indexformatv0.pack
190 190 indexformatv0_unpack = indexformatv0.unpack
191 191
192 192 class revlogoldindex(list):
193 193 def __getitem__(self, i):
194 194 if i == -1:
195 195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 196 return list.__getitem__(self, i)
197 197
198 198 class revlogoldio(object):
199 199 def __init__(self):
200 200 self.size = indexformatv0.size
201 201
202 202 def parseindex(self, data, inline):
203 203 s = self.size
204 204 index = []
205 205 nodemap = {nullid: nullrev}
206 206 n = off = 0
207 207 l = len(data)
208 208 while off + s <= l:
209 209 cur = data[off:off + s]
210 210 off += s
211 211 e = indexformatv0_unpack(cur)
212 212 # transform to revlogv1 format
213 213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 215 index.append(e2)
216 216 nodemap[e[6]] = n
217 217 n += 1
218 218
219 219 return revlogoldindex(index), nodemap, None
220 220
221 221 def packentry(self, entry, node, version, rev):
222 222 if gettype(entry[0]):
223 223 raise error.RevlogError(_('index entry flags need revlog '
224 224 'version 1'))
225 225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 226 node(entry[5]), node(entry[6]), entry[7])
227 227 return indexformatv0_pack(*e2)
228 228
229 229 # index ng:
230 230 # 6 bytes: offset
231 231 # 2 bytes: flags
232 232 # 4 bytes: compressed length
233 233 # 4 bytes: uncompressed length
234 234 # 4 bytes: base rev
235 235 # 4 bytes: link rev
236 236 # 4 bytes: parent 1 rev
237 237 # 4 bytes: parent 2 rev
238 238 # 32 bytes: nodeid
239 239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 240 indexformatng_pack = indexformatng.pack
241 241 versionformat = struct.Struct(">I")
242 242 versionformat_pack = versionformat.pack
243 243 versionformat_unpack = versionformat.unpack
244 244
245 245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 246 # signed integer)
247 247 _maxentrysize = 0x7fffffff
248 248
249 249 class revlogio(object):
250 250 def __init__(self):
251 251 self.size = indexformatng.size
252 252
253 253 def parseindex(self, data, inline):
254 254 # call the C implementation to parse the index data
255 255 index, cache = parsers.parse_index2(data, inline)
256 256 return index, getattr(index, 'nodemap', None), cache
257 257
258 258 def packentry(self, entry, node, version, rev):
259 259 p = indexformatng_pack(*entry)
260 260 if rev == 0:
261 261 p = versionformat_pack(version) + p[4:]
262 262 return p
263 263
264 264 class revlog(object):
265 265 """
266 266 the underlying revision storage object
267 267
268 268 A revlog consists of two parts, an index and the revision data.
269 269
270 270 The index is a file with a fixed record size containing
271 271 information on each revision, including its nodeid (hash), the
272 272 nodeids of its parents, the position and offset of its data within
273 273 the data file, and the revision it's based on. Finally, each entry
274 274 contains a linkrev entry that can serve as a pointer to external
275 275 data.
276 276
277 277 The revision data itself is a linear collection of data chunks.
278 278 Each chunk represents a revision and is usually represented as a
279 279 delta against the previous chunk. To bound lookup time, runs of
280 280 deltas are limited to about 2 times the length of the original
281 281 version data. This makes retrieval of a version proportional to
282 282 its size, or O(1) relative to the number of revisions.
283 283
284 284 Both pieces of the revlog are written to in an append-only
285 285 fashion, which means we never need to rewrite a file to insert or
286 286 remove data, and can use some simple techniques to avoid the need
287 287 for locking while reading.
288 288
289 289 If checkambig, indexfile is opened with checkambig=True at
290 290 writing, to avoid file stat ambiguity.
291 291
292 292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 293 index will be mmapped rather than read if it is larger than the
294 294 configured threshold.
295 295
296 296 If censorable is True, the revlog can have censored revisions.
297 297
298 298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 299 compression for the data content.
300 300 """
301 301
302 302 _flagserrorclass = error.RevlogError
303 303
304 304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 305 mmaplargeindex=False, censorable=False,
306 306 upperboundcomp=None):
307 307 """
308 308 create a revlog object
309 309
310 310 opener is a function that abstracts the file opening operation
311 311 and can be used to implement COW semantics or the like.
312 312
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile or (indexfile[:-2] + ".d")
317 317 self.opener = opener
318 318 # When True, indexfile is opened with checkambig=True at writing, to
319 319 # avoid file stat ambiguity.
320 320 self._checkambig = checkambig
321 321 self._mmaplargeindex = mmaplargeindex
322 322 self._censorable = censorable
323 323 # 3-tuple of (node, rev, text) for a raw revision.
324 324 self._revisioncache = None
325 325 # Maps rev to chain base rev.
326 326 self._chainbasecache = util.lrucachedict(100)
327 327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 328 self._chunkcache = (0, '')
329 329 # How much data to read and cache into the raw revlog data cache.
330 330 self._chunkcachesize = 65536
331 331 self._maxchainlen = None
332 332 self._deltabothparents = True
333 333 self.index = []
334 334 # Mapping of partial identifiers to full nodes.
335 335 self._pcache = {}
336 336 # Mapping of revision integer to full node.
337 337 self._nodecache = {nullid: nullrev}
338 338 self._nodepos = None
339 339 self._compengine = 'zlib'
340 340 self._compengineopts = {}
341 341 self._maxdeltachainspan = -1
342 342 self._withsparseread = False
343 343 self._sparserevlog = False
344 344 self._srdensitythreshold = 0.50
345 345 self._srmingapsize = 262144
346 346
347 347 # Make copy of flag processors so each revlog instance can support
348 348 # custom flags.
349 349 self._flagprocessors = dict(flagutil.flagprocessors)
350 350
351 351 # 2-tuple of file handles being used for active writing.
352 352 self._writinghandles = None
353 353
354 354 self._loadindex()
355 355
356 356 def _loadindex(self):
357 357 mmapindexthreshold = None
358 358 opts = getattr(self.opener, 'options', {}) or {}
359 359
360 360 if 'revlogv2' in opts:
361 361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 362 elif 'revlogv1' in opts:
363 363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 364 if 'generaldelta' in opts:
365 365 newversionflags |= FLAG_GENERALDELTA
366 366 elif getattr(self.opener, 'options', None) is not None:
367 367 # If options provided but no 'revlog*' found, the repository
368 368 # would have no 'requires' file in it, which means we have to
369 369 # stick to the old format.
370 370 newversionflags = REVLOGV0
371 371 else:
372 372 newversionflags = REVLOG_DEFAULT_VERSION
373 373
374 374 if 'chunkcachesize' in opts:
375 375 self._chunkcachesize = opts['chunkcachesize']
376 376 if 'maxchainlen' in opts:
377 377 self._maxchainlen = opts['maxchainlen']
378 378 if 'deltabothparents' in opts:
379 379 self._deltabothparents = opts['deltabothparents']
380 380 self._lazydelta = bool(opts.get('lazydelta', True))
381 381 self._lazydeltabase = False
382 382 if self._lazydelta:
383 383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
384 384 if 'compengine' in opts:
385 385 self._compengine = opts['compengine']
386 386 if 'zlib.level' in opts:
387 387 self._compengineopts['zlib.level'] = opts['zlib.level']
388 388 if 'zstd.level' in opts:
389 389 self._compengineopts['zstd.level'] = opts['zstd.level']
390 390 if 'maxdeltachainspan' in opts:
391 391 self._maxdeltachainspan = opts['maxdeltachainspan']
392 392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
393 393 mmapindexthreshold = opts['mmapindexthreshold']
394 394 self._sparserevlog = bool(opts.get('sparse-revlog', False))
395 395 withsparseread = bool(opts.get('with-sparse-read', False))
396 396 # sparse-revlog forces sparse-read
397 397 self._withsparseread = self._sparserevlog or withsparseread
398 398 if 'sparse-read-density-threshold' in opts:
399 399 self._srdensitythreshold = opts['sparse-read-density-threshold']
400 400 if 'sparse-read-min-gap-size' in opts:
401 401 self._srmingapsize = opts['sparse-read-min-gap-size']
402 402 if opts.get('enableellipsis'):
403 403 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
404 404
405 405 # revlog v0 doesn't have flag processors
406 406 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
407 407 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
408 408
409 409 if self._chunkcachesize <= 0:
410 410 raise error.RevlogError(_('revlog chunk cache size %r is not '
411 411 'greater than 0') % self._chunkcachesize)
412 412 elif self._chunkcachesize & (self._chunkcachesize - 1):
413 413 raise error.RevlogError(_('revlog chunk cache size %r is not a '
414 414 'power of 2') % self._chunkcachesize)
415 415
416 416 indexdata = ''
417 417 self._initempty = True
418 418 try:
419 419 with self._indexfp() as f:
420 420 if (mmapindexthreshold is not None and
421 421 self.opener.fstat(f).st_size >= mmapindexthreshold):
422 422 # TODO: should .close() to release resources without
423 423 # relying on Python GC
424 424 indexdata = util.buffer(util.mmapread(f))
425 425 else:
426 426 indexdata = f.read()
427 427 if len(indexdata) > 0:
428 428 versionflags = versionformat_unpack(indexdata[:4])[0]
429 429 self._initempty = False
430 430 else:
431 431 versionflags = newversionflags
432 432 except IOError as inst:
433 433 if inst.errno != errno.ENOENT:
434 434 raise
435 435
436 436 versionflags = newversionflags
437 437
438 438 self.version = versionflags
439 439
440 440 flags = versionflags & ~0xFFFF
441 441 fmt = versionflags & 0xFFFF
442 442
443 443 if fmt == REVLOGV0:
444 444 if flags:
445 445 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
446 446 'revlog %s') %
447 447 (flags >> 16, fmt, self.indexfile))
448 448
449 449 self._inline = False
450 450 self._generaldelta = False
451 451
452 452 elif fmt == REVLOGV1:
453 453 if flags & ~REVLOGV1_FLAGS:
454 454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 455 'revlog %s') %
456 456 (flags >> 16, fmt, self.indexfile))
457 457
458 458 self._inline = versionflags & FLAG_INLINE_DATA
459 459 self._generaldelta = versionflags & FLAG_GENERALDELTA
460 460
461 461 elif fmt == REVLOGV2:
462 462 if flags & ~REVLOGV2_FLAGS:
463 463 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 464 'revlog %s') %
465 465 (flags >> 16, fmt, self.indexfile))
466 466
467 467 self._inline = versionflags & FLAG_INLINE_DATA
468 468 # generaldelta implied by version 2 revlogs.
469 469 self._generaldelta = True
470 470
471 471 else:
472 472 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
473 473 (fmt, self.indexfile))
474 474 # sparse-revlog can't be on without general-delta (issue6056)
475 475 if not self._generaldelta:
476 476 self._sparserevlog = False
477 477
478 478 self._storedeltachains = True
479 479
480 480 self._io = revlogio()
481 481 if self.version == REVLOGV0:
482 482 self._io = revlogoldio()
483 483 try:
484 484 d = self._io.parseindex(indexdata, self._inline)
485 485 except (ValueError, IndexError):
486 486 raise error.RevlogError(_("index %s is corrupted") %
487 487 self.indexfile)
488 488 self.index, nodemap, self._chunkcache = d
489 489 if nodemap is not None:
490 490 self.nodemap = self._nodecache = nodemap
491 491 if not self._chunkcache:
492 492 self._chunkclear()
493 493 # revnum -> (chain-length, sum-delta-length)
494 494 self._chaininfocache = {}
495 495 # revlog header -> revlog compressor
496 496 self._decompressors = {}
497 497
498 498 @util.propertycache
499 499 def _compressor(self):
500 500 engine = util.compengines[self._compengine]
501 501 return engine.revlogcompressor(self._compengineopts)
502 502
503 503 def _indexfp(self, mode='r'):
504 504 """file object for the revlog's index file"""
505 505 args = {r'mode': mode}
506 506 if mode != 'r':
507 507 args[r'checkambig'] = self._checkambig
508 508 if mode == 'w':
509 509 args[r'atomictemp'] = True
510 510 return self.opener(self.indexfile, **args)
511 511
512 512 def _datafp(self, mode='r'):
513 513 """file object for the revlog's data file"""
514 514 return self.opener(self.datafile, mode=mode)
515 515
516 516 @contextlib.contextmanager
517 517 def _datareadfp(self, existingfp=None):
518 518 """file object suitable to read data"""
519 519 # Use explicit file handle, if given.
520 520 if existingfp is not None:
521 521 yield existingfp
522 522
523 523 # Use a file handle being actively used for writes, if available.
524 524 # There is some danger to doing this because reads will seek the
525 525 # file. However, _writeentry() performs a SEEK_END before all writes,
526 526 # so we should be safe.
527 527 elif self._writinghandles:
528 528 if self._inline:
529 529 yield self._writinghandles[0]
530 530 else:
531 531 yield self._writinghandles[1]
532 532
533 533 # Otherwise open a new file handle.
534 534 else:
535 535 if self._inline:
536 536 func = self._indexfp
537 537 else:
538 538 func = self._datafp
539 539 with func() as fp:
540 540 yield fp
541 541
542 542 def tip(self):
543 543 return self.node(len(self.index) - 1)
544 544 def __contains__(self, rev):
545 545 return 0 <= rev < len(self)
546 546 def __len__(self):
547 547 return len(self.index)
548 548 def __iter__(self):
549 549 return iter(pycompat.xrange(len(self)))
550 550 def revs(self, start=0, stop=None):
551 551 """iterate over all rev in this revlog (from start to stop)"""
552 552 return storageutil.iterrevs(len(self), start=start, stop=stop)
553 553
554 554 @util.propertycache
555 555 def nodemap(self):
556 556 if self.index:
557 557 # populate mapping down to the initial node
558 558 node0 = self.index[0][7] # get around changelog filtering
559 559 self.rev(node0)
560 560 return self._nodecache
561 561
562 562 def hasnode(self, node):
563 563 try:
564 564 self.rev(node)
565 565 return True
566 566 except KeyError:
567 567 return False
568 568
569 569 def candelta(self, baserev, rev):
570 570 """whether two revisions (baserev, rev) can be delta-ed or not"""
571 571 # Disable delta if either rev requires a content-changing flag
572 572 # processor (ex. LFS). This is because such flag processor can alter
573 573 # the rawtext content that the delta will be based on, and two clients
574 574 # could have a same revlog node with different flags (i.e. different
575 575 # rawtext contents) and the delta could be incompatible.
576 576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
577 577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
578 578 return False
579 579 return True
580 580
581 581 def clearcaches(self):
582 582 self._revisioncache = None
583 583 self._chainbasecache.clear()
584 584 self._chunkcache = (0, '')
585 585 self._pcache = {}
586 586
587 587 try:
588 588 # If we are using the native C version, you are in a fun case
589 589 # where self.index, self.nodemap and self._nodecaches is the same
590 590 # object.
591 591 self._nodecache.clearcaches()
592 592 except AttributeError:
593 593 self._nodecache = {nullid: nullrev}
594 594 self._nodepos = None
595 595
596 596 def rev(self, node):
597 597 try:
598 598 return self._nodecache[node]
599 599 except TypeError:
600 600 raise
601 601 except error.RevlogError:
602 602 # parsers.c radix tree lookup failed
603 603 if node == wdirid or node in wdirfilenodeids:
604 604 raise error.WdirUnsupported
605 605 raise error.LookupError(node, self.indexfile, _('no node'))
606 606 except KeyError:
607 607 # pure python cache lookup failed
608 608 n = self._nodecache
609 609 i = self.index
610 610 p = self._nodepos
611 611 if p is None:
612 612 p = len(i) - 1
613 613 else:
614 614 assert p < len(i)
615 615 for r in pycompat.xrange(p, -1, -1):
616 616 v = i[r][7]
617 617 n[v] = r
618 618 if v == node:
619 619 self._nodepos = r - 1
620 620 return r
621 621 if node == wdirid or node in wdirfilenodeids:
622 622 raise error.WdirUnsupported
623 623 raise error.LookupError(node, self.indexfile, _('no node'))
624 624
625 625 # Accessors for index entries.
626 626
627 627 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
628 628 # are flags.
629 629 def start(self, rev):
630 630 return int(self.index[rev][0] >> 16)
631 631
632 632 def flags(self, rev):
633 633 return self.index[rev][0] & 0xFFFF
634 634
635 635 def length(self, rev):
636 636 return self.index[rev][1]
637 637
638 638 def rawsize(self, rev):
639 639 """return the length of the uncompressed text for a given revision"""
640 640 l = self.index[rev][2]
641 641 if l >= 0:
642 642 return l
643 643
644 644 t = self.rawdata(rev)
645 645 return len(t)
646 646
647 647 def size(self, rev):
648 648 """length of non-raw text (processed by a "read" flag processor)"""
649 649 # fast path: if no "read" flag processor could change the content,
650 650 # size is rawsize. note: ELLIPSIS is known to not change the content.
651 651 flags = self.flags(rev)
652 652 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
653 653 return self.rawsize(rev)
654 654
655 655 return len(self.revision(rev, raw=False))
656 656
657 657 def chainbase(self, rev):
658 658 base = self._chainbasecache.get(rev)
659 659 if base is not None:
660 660 return base
661 661
662 662 index = self.index
663 663 iterrev = rev
664 664 base = index[iterrev][3]
665 665 while base != iterrev:
666 666 iterrev = base
667 667 base = index[iterrev][3]
668 668
669 669 self._chainbasecache[rev] = base
670 670 return base
671 671
672 672 def linkrev(self, rev):
673 673 return self.index[rev][4]
674 674
675 675 def parentrevs(self, rev):
676 676 try:
677 677 entry = self.index[rev]
678 678 except IndexError:
679 679 if rev == wdirrev:
680 680 raise error.WdirUnsupported
681 681 raise
682 682
683 683 return entry[5], entry[6]
684 684
685 685 # fast parentrevs(rev) where rev isn't filtered
686 686 _uncheckedparentrevs = parentrevs
687 687
688 688 def node(self, rev):
689 689 try:
690 690 return self.index[rev][7]
691 691 except IndexError:
692 692 if rev == wdirrev:
693 693 raise error.WdirUnsupported
694 694 raise
695 695
696 696 # Derived from index values.
697 697
698 698 def end(self, rev):
699 699 return self.start(rev) + self.length(rev)
700 700
701 701 def parents(self, node):
702 702 i = self.index
703 703 d = i[self.rev(node)]
704 704 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
705 705
706 706 def chainlen(self, rev):
707 707 return self._chaininfo(rev)[0]
708 708
709 709 def _chaininfo(self, rev):
710 710 chaininfocache = self._chaininfocache
711 711 if rev in chaininfocache:
712 712 return chaininfocache[rev]
713 713 index = self.index
714 714 generaldelta = self._generaldelta
715 715 iterrev = rev
716 716 e = index[iterrev]
717 717 clen = 0
718 718 compresseddeltalen = 0
719 719 while iterrev != e[3]:
720 720 clen += 1
721 721 compresseddeltalen += e[1]
722 722 if generaldelta:
723 723 iterrev = e[3]
724 724 else:
725 725 iterrev -= 1
726 726 if iterrev in chaininfocache:
727 727 t = chaininfocache[iterrev]
728 728 clen += t[0]
729 729 compresseddeltalen += t[1]
730 730 break
731 731 e = index[iterrev]
732 732 else:
733 733 # Add text length of base since decompressing that also takes
734 734 # work. For cache hits the length is already included.
735 735 compresseddeltalen += e[1]
736 736 r = (clen, compresseddeltalen)
737 737 chaininfocache[rev] = r
738 738 return r
739 739
740 740 def _deltachain(self, rev, stoprev=None):
741 741 """Obtain the delta chain for a revision.
742 742
743 743 ``stoprev`` specifies a revision to stop at. If not specified, we
744 744 stop at the base of the chain.
745 745
746 746 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
747 747 revs in ascending order and ``stopped`` is a bool indicating whether
748 748 ``stoprev`` was hit.
749 749 """
750 750 # Try C implementation.
751 751 try:
752 752 return self.index.deltachain(rev, stoprev, self._generaldelta)
753 753 except AttributeError:
754 754 pass
755 755
756 756 chain = []
757 757
758 758 # Alias to prevent attribute lookup in tight loop.
759 759 index = self.index
760 760 generaldelta = self._generaldelta
761 761
762 762 iterrev = rev
763 763 e = index[iterrev]
764 764 while iterrev != e[3] and iterrev != stoprev:
765 765 chain.append(iterrev)
766 766 if generaldelta:
767 767 iterrev = e[3]
768 768 else:
769 769 iterrev -= 1
770 770 e = index[iterrev]
771 771
772 772 if iterrev == stoprev:
773 773 stopped = True
774 774 else:
775 775 chain.append(iterrev)
776 776 stopped = False
777 777
778 778 chain.reverse()
779 779 return chain, stopped
780 780
781 781 def ancestors(self, revs, stoprev=0, inclusive=False):
782 782 """Generate the ancestors of 'revs' in reverse revision order.
783 783 Does not generate revs lower than stoprev.
784 784
785 785 See the documentation for ancestor.lazyancestors for more details."""
786 786
787 787 # first, make sure start revisions aren't filtered
788 788 revs = list(revs)
789 789 checkrev = self.node
790 790 for r in revs:
791 791 checkrev(r)
792 792 # and we're sure ancestors aren't filtered as well
793 793
794 794 if rustancestor is not None:
795 795 lazyancestors = rustancestor.LazyAncestors
796 796 arg = self.index
797 797 elif util.safehasattr(parsers, 'rustlazyancestors'):
798 798 lazyancestors = ancestor.rustlazyancestors
799 799 arg = self.index
800 800 else:
801 801 lazyancestors = ancestor.lazyancestors
802 802 arg = self._uncheckedparentrevs
803 803 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
804 804
805 805 def descendants(self, revs):
806 806 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
807 807
808 808 def findcommonmissing(self, common=None, heads=None):
809 809 """Return a tuple of the ancestors of common and the ancestors of heads
810 810 that are not ancestors of common. In revset terminology, we return the
811 811 tuple:
812 812
813 813 ::common, (::heads) - (::common)
814 814
815 815 The list is sorted by revision number, meaning it is
816 816 topologically sorted.
817 817
818 818 'heads' and 'common' are both lists of node IDs. If heads is
819 819 not supplied, uses all of the revlog's heads. If common is not
820 820 supplied, uses nullid."""
821 821 if common is None:
822 822 common = [nullid]
823 823 if heads is None:
824 824 heads = self.heads()
825 825
826 826 common = [self.rev(n) for n in common]
827 827 heads = [self.rev(n) for n in heads]
828 828
829 829 # we want the ancestors, but inclusive
830 830 class lazyset(object):
831 831 def __init__(self, lazyvalues):
832 832 self.addedvalues = set()
833 833 self.lazyvalues = lazyvalues
834 834
835 835 def __contains__(self, value):
836 836 return value in self.addedvalues or value in self.lazyvalues
837 837
838 838 def __iter__(self):
839 839 added = self.addedvalues
840 840 for r in added:
841 841 yield r
842 842 for r in self.lazyvalues:
843 843 if not r in added:
844 844 yield r
845 845
846 846 def add(self, value):
847 847 self.addedvalues.add(value)
848 848
849 849 def update(self, values):
850 850 self.addedvalues.update(values)
851 851
852 852 has = lazyset(self.ancestors(common))
853 853 has.add(nullrev)
854 854 has.update(common)
855 855
856 856 # take all ancestors from heads that aren't in has
857 857 missing = set()
858 858 visit = collections.deque(r for r in heads if r not in has)
859 859 while visit:
860 860 r = visit.popleft()
861 861 if r in missing:
862 862 continue
863 863 else:
864 864 missing.add(r)
865 865 for p in self.parentrevs(r):
866 866 if p not in has:
867 867 visit.append(p)
868 868 missing = list(missing)
869 869 missing.sort()
870 870 return has, [self.node(miss) for miss in missing]
871 871
872 872 def incrementalmissingrevs(self, common=None):
873 873 """Return an object that can be used to incrementally compute the
874 874 revision numbers of the ancestors of arbitrary sets that are not
875 875 ancestors of common. This is an ancestor.incrementalmissingancestors
876 876 object.
877 877
878 878 'common' is a list of revision numbers. If common is not supplied, uses
879 879 nullrev.
880 880 """
881 881 if common is None:
882 882 common = [nullrev]
883 883
884 884 if rustancestor is not None:
885 885 return rustancestor.MissingAncestors(self.index, common)
886 886 return ancestor.incrementalmissingancestors(self.parentrevs, common)
887 887
888 888 def findmissingrevs(self, common=None, heads=None):
889 889 """Return the revision numbers of the ancestors of heads that
890 890 are not ancestors of common.
891 891
892 892 More specifically, return a list of revision numbers corresponding to
893 893 nodes N such that every N satisfies the following constraints:
894 894
895 895 1. N is an ancestor of some node in 'heads'
896 896 2. N is not an ancestor of any node in 'common'
897 897
898 898 The list is sorted by revision number, meaning it is
899 899 topologically sorted.
900 900
901 901 'heads' and 'common' are both lists of revision numbers. If heads is
902 902 not supplied, uses all of the revlog's heads. If common is not
903 903 supplied, uses nullid."""
904 904 if common is None:
905 905 common = [nullrev]
906 906 if heads is None:
907 907 heads = self.headrevs()
908 908
909 909 inc = self.incrementalmissingrevs(common=common)
910 910 return inc.missingancestors(heads)
911 911
912 912 def findmissing(self, common=None, heads=None):
913 913 """Return the ancestors of heads that are not ancestors of common.
914 914
915 915 More specifically, return a list of nodes N such that every N
916 916 satisfies the following constraints:
917 917
918 918 1. N is an ancestor of some node in 'heads'
919 919 2. N is not an ancestor of any node in 'common'
920 920
921 921 The list is sorted by revision number, meaning it is
922 922 topologically sorted.
923 923
924 924 'heads' and 'common' are both lists of node IDs. If heads is
925 925 not supplied, uses all of the revlog's heads. If common is not
926 926 supplied, uses nullid."""
927 927 if common is None:
928 928 common = [nullid]
929 929 if heads is None:
930 930 heads = self.heads()
931 931
932 932 common = [self.rev(n) for n in common]
933 933 heads = [self.rev(n) for n in heads]
934 934
935 935 inc = self.incrementalmissingrevs(common=common)
936 936 return [self.node(r) for r in inc.missingancestors(heads)]
937 937
938 938 def nodesbetween(self, roots=None, heads=None):
939 939 """Return a topological path from 'roots' to 'heads'.
940 940
941 941 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
942 942 topologically sorted list of all nodes N that satisfy both of
943 943 these constraints:
944 944
945 945 1. N is a descendant of some node in 'roots'
946 946 2. N is an ancestor of some node in 'heads'
947 947
948 948 Every node is considered to be both a descendant and an ancestor
949 949 of itself, so every reachable node in 'roots' and 'heads' will be
950 950 included in 'nodes'.
951 951
952 952 'outroots' is the list of reachable nodes in 'roots', i.e., the
953 953 subset of 'roots' that is returned in 'nodes'. Likewise,
954 954 'outheads' is the subset of 'heads' that is also in 'nodes'.
955 955
956 956 'roots' and 'heads' are both lists of node IDs. If 'roots' is
957 957 unspecified, uses nullid as the only root. If 'heads' is
958 958 unspecified, uses list of all of the revlog's heads."""
959 959 nonodes = ([], [], [])
960 960 if roots is not None:
961 961 roots = list(roots)
962 962 if not roots:
963 963 return nonodes
964 964 lowestrev = min([self.rev(n) for n in roots])
965 965 else:
966 966 roots = [nullid] # Everybody's a descendant of nullid
967 967 lowestrev = nullrev
968 968 if (lowestrev == nullrev) and (heads is None):
969 969 # We want _all_ the nodes!
970 970 return ([self.node(r) for r in self], [nullid], list(self.heads()))
971 971 if heads is None:
972 972 # All nodes are ancestors, so the latest ancestor is the last
973 973 # node.
974 974 highestrev = len(self) - 1
975 975 # Set ancestors to None to signal that every node is an ancestor.
976 976 ancestors = None
977 977 # Set heads to an empty dictionary for later discovery of heads
978 978 heads = {}
979 979 else:
980 980 heads = list(heads)
981 981 if not heads:
982 982 return nonodes
983 983 ancestors = set()
984 984 # Turn heads into a dictionary so we can remove 'fake' heads.
985 985 # Also, later we will be using it to filter out the heads we can't
986 986 # find from roots.
987 987 heads = dict.fromkeys(heads, False)
988 988 # Start at the top and keep marking parents until we're done.
989 989 nodestotag = set(heads)
990 990 # Remember where the top was so we can use it as a limit later.
991 991 highestrev = max([self.rev(n) for n in nodestotag])
992 992 while nodestotag:
993 993 # grab a node to tag
994 994 n = nodestotag.pop()
995 995 # Never tag nullid
996 996 if n == nullid:
997 997 continue
998 998 # A node's revision number represents its place in a
999 999 # topologically sorted list of nodes.
1000 1000 r = self.rev(n)
1001 1001 if r >= lowestrev:
1002 1002 if n not in ancestors:
1003 1003 # If we are possibly a descendant of one of the roots
1004 1004 # and we haven't already been marked as an ancestor
1005 1005 ancestors.add(n) # Mark as ancestor
1006 1006 # Add non-nullid parents to list of nodes to tag.
1007 1007 nodestotag.update([p for p in self.parents(n) if
1008 1008 p != nullid])
1009 1009 elif n in heads: # We've seen it before, is it a fake head?
1010 1010 # So it is, real heads should not be the ancestors of
1011 1011 # any other heads.
1012 1012 heads.pop(n)
1013 1013 if not ancestors:
1014 1014 return nonodes
1015 1015 # Now that we have our set of ancestors, we want to remove any
1016 1016 # roots that are not ancestors.
1017 1017
1018 1018 # If one of the roots was nullid, everything is included anyway.
1019 1019 if lowestrev > nullrev:
1020 1020 # But, since we weren't, let's recompute the lowest rev to not
1021 1021 # include roots that aren't ancestors.
1022 1022
1023 1023 # Filter out roots that aren't ancestors of heads
1024 1024 roots = [root for root in roots if root in ancestors]
1025 1025 # Recompute the lowest revision
1026 1026 if roots:
1027 1027 lowestrev = min([self.rev(root) for root in roots])
1028 1028 else:
1029 1029 # No more roots? Return empty list
1030 1030 return nonodes
1031 1031 else:
1032 1032 # We are descending from nullid, and don't need to care about
1033 1033 # any other roots.
1034 1034 lowestrev = nullrev
1035 1035 roots = [nullid]
1036 1036 # Transform our roots list into a set.
1037 1037 descendants = set(roots)
1038 1038 # Also, keep the original roots so we can filter out roots that aren't
1039 1039 # 'real' roots (i.e. are descended from other roots).
1040 1040 roots = descendants.copy()
1041 1041 # Our topologically sorted list of output nodes.
1042 1042 orderedout = []
1043 1043 # Don't start at nullid since we don't want nullid in our output list,
1044 1044 # and if nullid shows up in descendants, empty parents will look like
1045 1045 # they're descendants.
1046 1046 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1047 1047 n = self.node(r)
1048 1048 isdescendant = False
1049 1049 if lowestrev == nullrev: # Everybody is a descendant of nullid
1050 1050 isdescendant = True
1051 1051 elif n in descendants:
1052 1052 # n is already a descendant
1053 1053 isdescendant = True
1054 1054 # This check only needs to be done here because all the roots
1055 1055 # will start being marked is descendants before the loop.
1056 1056 if n in roots:
1057 1057 # If n was a root, check if it's a 'real' root.
1058 1058 p = tuple(self.parents(n))
1059 1059 # If any of its parents are descendants, it's not a root.
1060 1060 if (p[0] in descendants) or (p[1] in descendants):
1061 1061 roots.remove(n)
1062 1062 else:
1063 1063 p = tuple(self.parents(n))
1064 1064 # A node is a descendant if either of its parents are
1065 1065 # descendants. (We seeded the dependents list with the roots
1066 1066 # up there, remember?)
1067 1067 if (p[0] in descendants) or (p[1] in descendants):
1068 1068 descendants.add(n)
1069 1069 isdescendant = True
1070 1070 if isdescendant and ((ancestors is None) or (n in ancestors)):
1071 1071 # Only include nodes that are both descendants and ancestors.
1072 1072 orderedout.append(n)
1073 1073 if (ancestors is not None) and (n in heads):
1074 1074 # We're trying to figure out which heads are reachable
1075 1075 # from roots.
1076 1076 # Mark this head as having been reached
1077 1077 heads[n] = True
1078 1078 elif ancestors is None:
1079 1079 # Otherwise, we're trying to discover the heads.
1080 1080 # Assume this is a head because if it isn't, the next step
1081 1081 # will eventually remove it.
1082 1082 heads[n] = True
1083 1083 # But, obviously its parents aren't.
1084 1084 for p in self.parents(n):
1085 1085 heads.pop(p, None)
1086 1086 heads = [head for head, flag in heads.iteritems() if flag]
1087 1087 roots = list(roots)
1088 1088 assert orderedout
1089 1089 assert roots
1090 1090 assert heads
1091 1091 return (orderedout, roots, heads)
1092 1092
1093 1093 def headrevs(self, revs=None):
1094 1094 if revs is None:
1095 1095 try:
1096 1096 return self.index.headrevs()
1097 1097 except AttributeError:
1098 1098 return self._headrevs()
1099 1099 if rustdagop is not None:
1100 1100 return rustdagop.headrevs(self.index, revs)
1101 1101 return dagop.headrevs(revs, self._uncheckedparentrevs)
1102 1102
1103 1103 def computephases(self, roots):
1104 1104 return self.index.computephasesmapsets(roots)
1105 1105
1106 1106 def _headrevs(self):
1107 1107 count = len(self)
1108 1108 if not count:
1109 1109 return [nullrev]
1110 1110 # we won't iter over filtered rev so nobody is a head at start
1111 1111 ishead = [0] * (count + 1)
1112 1112 index = self.index
1113 1113 for r in self:
1114 1114 ishead[r] = 1 # I may be an head
1115 1115 e = index[r]
1116 1116 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1117 1117 return [r for r, val in enumerate(ishead) if val]
1118 1118
1119 1119 def heads(self, start=None, stop=None):
1120 1120 """return the list of all nodes that have no children
1121 1121
1122 1122 if start is specified, only heads that are descendants of
1123 1123 start will be returned
1124 1124 if stop is specified, it will consider all the revs from stop
1125 1125 as if they had no children
1126 1126 """
1127 1127 if start is None and stop is None:
1128 1128 if not len(self):
1129 1129 return [nullid]
1130 1130 return [self.node(r) for r in self.headrevs()]
1131 1131
1132 1132 if start is None:
1133 1133 start = nullrev
1134 1134 else:
1135 1135 start = self.rev(start)
1136 1136
1137 1137 stoprevs = set(self.rev(n) for n in stop or [])
1138 1138
1139 1139 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1140 1140 stoprevs=stoprevs)
1141 1141
1142 1142 return [self.node(rev) for rev in revs]
1143 1143
1144 1144 def children(self, node):
1145 1145 """find the children of a given node"""
1146 1146 c = []
1147 1147 p = self.rev(node)
1148 1148 for r in self.revs(start=p + 1):
1149 1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1150 1150 if prevs:
1151 1151 for pr in prevs:
1152 1152 if pr == p:
1153 1153 c.append(self.node(r))
1154 1154 elif p == nullrev:
1155 1155 c.append(self.node(r))
1156 1156 return c
1157 1157
1158 1158 def commonancestorsheads(self, a, b):
1159 1159 """calculate all the heads of the common ancestors of nodes a and b"""
1160 1160 a, b = self.rev(a), self.rev(b)
1161 1161 ancs = self._commonancestorsheads(a, b)
1162 1162 return pycompat.maplist(self.node, ancs)
1163 1163
1164 1164 def _commonancestorsheads(self, *revs):
1165 1165 """calculate all the heads of the common ancestors of revs"""
1166 1166 try:
1167 1167 ancs = self.index.commonancestorsheads(*revs)
1168 1168 except (AttributeError, OverflowError): # C implementation failed
1169 1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1170 1170 return ancs
1171 1171
1172 1172 def isancestor(self, a, b):
1173 1173 """return True if node a is an ancestor of node b
1174 1174
1175 1175 A revision is considered an ancestor of itself."""
1176 1176 a, b = self.rev(a), self.rev(b)
1177 1177 return self.isancestorrev(a, b)
1178 1178
1179 1179 def isancestorrev(self, a, b):
1180 1180 """return True if revision a is an ancestor of revision b
1181 1181
1182 1182 A revision is considered an ancestor of itself.
1183 1183
1184 1184 The implementation of this is trivial but the use of
1185 1185 reachableroots is not."""
1186 1186 if a == nullrev:
1187 1187 return True
1188 1188 elif a == b:
1189 1189 return True
1190 1190 elif a > b:
1191 1191 return False
1192 1192 return bool(self.reachableroots(a, [b], [a], includepath=False))
1193 1193
1194 1194 def reachableroots(self, minroot, heads, roots, includepath=False):
1195 1195 """return (heads(::<roots> and <roots>::<heads>))
1196 1196
1197 1197 If includepath is True, return (<roots>::<heads>)."""
1198 1198 try:
1199 1199 return self.index.reachableroots2(minroot, heads, roots,
1200 1200 includepath)
1201 1201 except AttributeError:
1202 1202 return dagop._reachablerootspure(self.parentrevs,
1203 1203 minroot, roots, heads, includepath)
1204 1204
1205 1205 def ancestor(self, a, b):
1206 1206 """calculate the "best" common ancestor of nodes a and b"""
1207 1207
1208 1208 a, b = self.rev(a), self.rev(b)
1209 1209 try:
1210 1210 ancs = self.index.ancestors(a, b)
1211 1211 except (AttributeError, OverflowError):
1212 1212 ancs = ancestor.ancestors(self.parentrevs, a, b)
1213 1213 if ancs:
1214 1214 # choose a consistent winner when there's a tie
1215 1215 return min(map(self.node, ancs))
1216 1216 return nullid
1217 1217
1218 1218 def _match(self, id):
1219 1219 if isinstance(id, int):
1220 1220 # rev
1221 1221 return self.node(id)
1222 1222 if len(id) == 20:
1223 1223 # possibly a binary node
1224 1224 # odds of a binary node being all hex in ASCII are 1 in 10**25
1225 1225 try:
1226 1226 node = id
1227 1227 self.rev(node) # quick search the index
1228 1228 return node
1229 1229 except error.LookupError:
1230 1230 pass # may be partial hex id
1231 1231 try:
1232 1232 # str(rev)
1233 1233 rev = int(id)
1234 1234 if "%d" % rev != id:
1235 1235 raise ValueError
1236 1236 if rev < 0:
1237 1237 rev = len(self) + rev
1238 1238 if rev < 0 or rev >= len(self):
1239 1239 raise ValueError
1240 1240 return self.node(rev)
1241 1241 except (ValueError, OverflowError):
1242 1242 pass
1243 1243 if len(id) == 40:
1244 1244 try:
1245 1245 # a full hex nodeid?
1246 1246 node = bin(id)
1247 1247 self.rev(node)
1248 1248 return node
1249 1249 except (TypeError, error.LookupError):
1250 1250 pass
1251 1251
1252 1252 def _partialmatch(self, id):
1253 1253 # we don't care wdirfilenodeids as they should be always full hash
1254 1254 maybewdir = wdirhex.startswith(id)
1255 1255 try:
1256 1256 partial = self.index.partialmatch(id)
1257 1257 if partial and self.hasnode(partial):
1258 1258 if maybewdir:
1259 1259 # single 'ff...' match in radix tree, ambiguous with wdir
1260 1260 raise error.RevlogError
1261 1261 return partial
1262 1262 if maybewdir:
1263 1263 # no 'ff...' match in radix tree, wdir identified
1264 1264 raise error.WdirUnsupported
1265 1265 return None
1266 1266 except error.RevlogError:
1267 1267 # parsers.c radix tree lookup gave multiple matches
1268 1268 # fast path: for unfiltered changelog, radix tree is accurate
1269 1269 if not getattr(self, 'filteredrevs', None):
1270 1270 raise error.AmbiguousPrefixLookupError(
1271 1271 id, self.indexfile, _('ambiguous identifier'))
1272 1272 # fall through to slow path that filters hidden revisions
1273 1273 except (AttributeError, ValueError):
1274 1274 # we are pure python, or key was too short to search radix tree
1275 1275 pass
1276 1276
1277 1277 if id in self._pcache:
1278 1278 return self._pcache[id]
1279 1279
1280 1280 if len(id) <= 40:
1281 1281 try:
1282 1282 # hex(node)[:...]
1283 1283 l = len(id) // 2 # grab an even number of digits
1284 1284 prefix = bin(id[:l * 2])
1285 1285 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1286 1286 nl = [n for n in nl if hex(n).startswith(id) and
1287 1287 self.hasnode(n)]
1288 1288 if nullhex.startswith(id):
1289 1289 nl.append(nullid)
1290 1290 if len(nl) > 0:
1291 1291 if len(nl) == 1 and not maybewdir:
1292 1292 self._pcache[id] = nl[0]
1293 1293 return nl[0]
1294 1294 raise error.AmbiguousPrefixLookupError(
1295 1295 id, self.indexfile, _('ambiguous identifier'))
1296 1296 if maybewdir:
1297 1297 raise error.WdirUnsupported
1298 1298 return None
1299 1299 except TypeError:
1300 1300 pass
1301 1301
1302 1302 def lookup(self, id):
1303 1303 """locate a node based on:
1304 1304 - revision number or str(revision number)
1305 1305 - nodeid or subset of hex nodeid
1306 1306 """
1307 1307 n = self._match(id)
1308 1308 if n is not None:
1309 1309 return n
1310 1310 n = self._partialmatch(id)
1311 1311 if n:
1312 1312 return n
1313 1313
1314 1314 raise error.LookupError(id, self.indexfile, _('no match found'))
1315 1315
1316 1316 def shortest(self, node, minlength=1):
1317 1317 """Find the shortest unambiguous prefix that matches node."""
1318 1318 def isvalid(prefix):
1319 1319 try:
1320 1320 matchednode = self._partialmatch(prefix)
1321 1321 except error.AmbiguousPrefixLookupError:
1322 1322 return False
1323 1323 except error.WdirUnsupported:
1324 1324 # single 'ff...' match
1325 1325 return True
1326 1326 if matchednode is None:
1327 1327 raise error.LookupError(node, self.indexfile, _('no node'))
1328 1328 return True
1329 1329
1330 1330 def maybewdir(prefix):
1331 1331 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1332 1332
1333 1333 hexnode = hex(node)
1334 1334
1335 1335 def disambiguate(hexnode, minlength):
1336 1336 """Disambiguate against wdirid."""
1337 1337 for length in range(minlength, 41):
1338 1338 prefix = hexnode[:length]
1339 1339 if not maybewdir(prefix):
1340 1340 return prefix
1341 1341
1342 1342 if not getattr(self, 'filteredrevs', None):
1343 1343 try:
1344 1344 length = max(self.index.shortest(node), minlength)
1345 1345 return disambiguate(hexnode, length)
1346 1346 except error.RevlogError:
1347 1347 if node != wdirid:
1348 1348 raise error.LookupError(node, self.indexfile, _('no node'))
1349 1349 except AttributeError:
1350 1350 # Fall through to pure code
1351 1351 pass
1352 1352
1353 1353 if node == wdirid:
1354 1354 for length in range(minlength, 41):
1355 1355 prefix = hexnode[:length]
1356 1356 if isvalid(prefix):
1357 1357 return prefix
1358 1358
1359 1359 for length in range(minlength, 41):
1360 1360 prefix = hexnode[:length]
1361 1361 if isvalid(prefix):
1362 1362 return disambiguate(hexnode, length)
1363 1363
1364 1364 def cmp(self, node, text):
1365 1365 """compare text with a given file revision
1366 1366
1367 1367 returns True if text is different than what is stored.
1368 1368 """
1369 1369 p1, p2 = self.parents(node)
1370 1370 return storageutil.hashrevisionsha1(text, p1, p2) != node
1371 1371
1372 1372 def _cachesegment(self, offset, data):
1373 1373 """Add a segment to the revlog cache.
1374 1374
1375 1375 Accepts an absolute offset and the data that is at that location.
1376 1376 """
1377 1377 o, d = self._chunkcache
1378 1378 # try to add to existing cache
1379 1379 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1380 1380 self._chunkcache = o, d + data
1381 1381 else:
1382 1382 self._chunkcache = offset, data
1383 1383
1384 1384 def _readsegment(self, offset, length, df=None):
1385 1385 """Load a segment of raw data from the revlog.
1386 1386
1387 1387 Accepts an absolute offset, length to read, and an optional existing
1388 1388 file handle to read from.
1389 1389
1390 1390 If an existing file handle is passed, it will be seeked and the
1391 1391 original seek position will NOT be restored.
1392 1392
1393 1393 Returns a str or buffer of raw byte data.
1394 1394
1395 1395 Raises if the requested number of bytes could not be read.
1396 1396 """
1397 1397 # Cache data both forward and backward around the requested
1398 1398 # data, in a fixed size window. This helps speed up operations
1399 1399 # involving reading the revlog backwards.
1400 1400 cachesize = self._chunkcachesize
1401 1401 realoffset = offset & ~(cachesize - 1)
1402 1402 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1403 1403 - realoffset)
1404 1404 with self._datareadfp(df) as df:
1405 1405 df.seek(realoffset)
1406 1406 d = df.read(reallength)
1407 1407
1408 1408 self._cachesegment(realoffset, d)
1409 1409 if offset != realoffset or reallength != length:
1410 1410 startoffset = offset - realoffset
1411 1411 if len(d) - startoffset < length:
1412 1412 raise error.RevlogError(
1413 1413 _('partial read of revlog %s; expected %d bytes from '
1414 1414 'offset %d, got %d') %
1415 1415 (self.indexfile if self._inline else self.datafile,
1416 1416 length, realoffset, len(d) - startoffset))
1417 1417
1418 1418 return util.buffer(d, startoffset, length)
1419 1419
1420 1420 if len(d) < length:
1421 1421 raise error.RevlogError(
1422 1422 _('partial read of revlog %s; expected %d bytes from offset '
1423 1423 '%d, got %d') %
1424 1424 (self.indexfile if self._inline else self.datafile,
1425 1425 length, offset, len(d)))
1426 1426
1427 1427 return d
1428 1428
1429 1429 def _getsegment(self, offset, length, df=None):
1430 1430 """Obtain a segment of raw data from the revlog.
1431 1431
1432 1432 Accepts an absolute offset, length of bytes to obtain, and an
1433 1433 optional file handle to the already-opened revlog. If the file
1434 1434 handle is used, it's original seek position will not be preserved.
1435 1435
1436 1436 Requests for data may be returned from a cache.
1437 1437
1438 1438 Returns a str or a buffer instance of raw byte data.
1439 1439 """
1440 1440 o, d = self._chunkcache
1441 1441 l = len(d)
1442 1442
1443 1443 # is it in the cache?
1444 1444 cachestart = offset - o
1445 1445 cacheend = cachestart + length
1446 1446 if cachestart >= 0 and cacheend <= l:
1447 1447 if cachestart == 0 and cacheend == l:
1448 1448 return d # avoid a copy
1449 1449 return util.buffer(d, cachestart, cacheend - cachestart)
1450 1450
1451 1451 return self._readsegment(offset, length, df=df)
1452 1452
1453 1453 def _getsegmentforrevs(self, startrev, endrev, df=None):
1454 1454 """Obtain a segment of raw data corresponding to a range of revisions.
1455 1455
1456 1456 Accepts the start and end revisions and an optional already-open
1457 1457 file handle to be used for reading. If the file handle is read, its
1458 1458 seek position will not be preserved.
1459 1459
1460 1460 Requests for data may be satisfied by a cache.
1461 1461
1462 1462 Returns a 2-tuple of (offset, data) for the requested range of
1463 1463 revisions. Offset is the integer offset from the beginning of the
1464 1464 revlog and data is a str or buffer of the raw byte data.
1465 1465
1466 1466 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1467 1467 to determine where each revision's data begins and ends.
1468 1468 """
1469 1469 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1470 1470 # (functions are expensive).
1471 1471 index = self.index
1472 1472 istart = index[startrev]
1473 1473 start = int(istart[0] >> 16)
1474 1474 if startrev == endrev:
1475 1475 end = start + istart[1]
1476 1476 else:
1477 1477 iend = index[endrev]
1478 1478 end = int(iend[0] >> 16) + iend[1]
1479 1479
1480 1480 if self._inline:
1481 1481 start += (startrev + 1) * self._io.size
1482 1482 end += (endrev + 1) * self._io.size
1483 1483 length = end - start
1484 1484
1485 1485 return start, self._getsegment(start, length, df=df)
1486 1486
1487 1487 def _chunk(self, rev, df=None):
1488 1488 """Obtain a single decompressed chunk for a revision.
1489 1489
1490 1490 Accepts an integer revision and an optional already-open file handle
1491 1491 to be used for reading. If used, the seek position of the file will not
1492 1492 be preserved.
1493 1493
1494 1494 Returns a str holding uncompressed data for the requested revision.
1495 1495 """
1496 1496 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1497 1497
1498 1498 def _chunks(self, revs, df=None, targetsize=None):
1499 1499 """Obtain decompressed chunks for the specified revisions.
1500 1500
1501 1501 Accepts an iterable of numeric revisions that are assumed to be in
1502 1502 ascending order. Also accepts an optional already-open file handle
1503 1503 to be used for reading. If used, the seek position of the file will
1504 1504 not be preserved.
1505 1505
1506 1506 This function is similar to calling ``self._chunk()`` multiple times,
1507 1507 but is faster.
1508 1508
1509 1509 Returns a list with decompressed data for each requested revision.
1510 1510 """
1511 1511 if not revs:
1512 1512 return []
1513 1513 start = self.start
1514 1514 length = self.length
1515 1515 inline = self._inline
1516 1516 iosize = self._io.size
1517 1517 buffer = util.buffer
1518 1518
1519 1519 l = []
1520 1520 ladd = l.append
1521 1521
1522 1522 if not self._withsparseread:
1523 1523 slicedchunks = (revs,)
1524 1524 else:
1525 1525 slicedchunks = deltautil.slicechunk(self, revs,
1526 1526 targetsize=targetsize)
1527 1527
1528 1528 for revschunk in slicedchunks:
1529 1529 firstrev = revschunk[0]
1530 1530 # Skip trailing revisions with empty diff
1531 1531 for lastrev in revschunk[::-1]:
1532 1532 if length(lastrev) != 0:
1533 1533 break
1534 1534
1535 1535 try:
1536 1536 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1537 1537 except OverflowError:
1538 1538 # issue4215 - we can't cache a run of chunks greater than
1539 1539 # 2G on Windows
1540 1540 return [self._chunk(rev, df=df) for rev in revschunk]
1541 1541
1542 1542 decomp = self.decompress
1543 1543 for rev in revschunk:
1544 1544 chunkstart = start(rev)
1545 1545 if inline:
1546 1546 chunkstart += (rev + 1) * iosize
1547 1547 chunklength = length(rev)
1548 1548 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1549 1549
1550 1550 return l
1551 1551
1552 1552 def _chunkclear(self):
1553 1553 """Clear the raw chunk cache."""
1554 1554 self._chunkcache = (0, '')
1555 1555
1556 1556 def deltaparent(self, rev):
1557 1557 """return deltaparent of the given revision"""
1558 1558 base = self.index[rev][3]
1559 1559 if base == rev:
1560 1560 return nullrev
1561 1561 elif self._generaldelta:
1562 1562 return base
1563 1563 else:
1564 1564 return rev - 1
1565 1565
1566 1566 def issnapshot(self, rev):
1567 1567 """tells whether rev is a snapshot
1568 1568 """
1569 1569 if not self._sparserevlog:
1570 1570 return self.deltaparent(rev) == nullrev
1571 1571 elif util.safehasattr(self.index, 'issnapshot'):
1572 1572 # directly assign the method to cache the testing and access
1573 1573 self.issnapshot = self.index.issnapshot
1574 1574 return self.issnapshot(rev)
1575 1575 if rev == nullrev:
1576 1576 return True
1577 1577 entry = self.index[rev]
1578 1578 base = entry[3]
1579 1579 if base == rev:
1580 1580 return True
1581 1581 if base == nullrev:
1582 1582 return True
1583 1583 p1 = entry[5]
1584 1584 p2 = entry[6]
1585 1585 if base == p1 or base == p2:
1586 1586 return False
1587 1587 return self.issnapshot(base)
1588 1588
1589 1589 def snapshotdepth(self, rev):
1590 1590 """number of snapshot in the chain before this one"""
1591 1591 if not self.issnapshot(rev):
1592 1592 raise error.ProgrammingError('revision %d not a snapshot')
1593 1593 return len(self._deltachain(rev)[0]) - 1
1594 1594
1595 1595 def revdiff(self, rev1, rev2):
1596 1596 """return or calculate a delta between two revisions
1597 1597
1598 1598 The delta calculated is in binary form and is intended to be written to
1599 1599 revlog data directly. So this function needs raw revision data.
1600 1600 """
1601 1601 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1602 1602 return bytes(self._chunk(rev2))
1603 1603
1604 1604 return mdiff.textdiff(self.rawdata(rev1),
1605 1605 self.rawdata(rev2))
1606 1606
1607 1607 def _processflags(self, text, flags, operation, raw=False):
1608 1608 """deprecated entry point to access flag processors"""
1609 1609 msg = ('_processflag(...) use the specialized variant')
1610 1610 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1611 1611 if raw:
1612 1612 return text, flagutil.processflagsraw(self, text, flags)
1613 1613 elif operation == 'read':
1614 1614 return flagutil.processflagsread(self, text, flags)
1615 1615 else: # write operation
1616 1616 return flagutil.processflagswrite(self, text, flags)
1617 1617
1618 1618 def revision(self, nodeorrev, _df=None, raw=False):
1619 1619 """return an uncompressed revision of a given node or revision
1620 1620 number.
1621 1621
1622 1622 _df - an existing file handle to read from. (internal-only)
1623 1623 raw - an optional argument specifying if the revision data is to be
1624 1624 treated as raw data when applying flag transforms. 'raw' should be set
1625 1625 to True when generating changegroups or in debug commands.
1626 1626 """
1627 1627 if raw:
1628 1628 msg = ('revlog.revision(..., raw=True) is deprecated, '
1629 1629 'use revlog.rawdata(...)')
1630 1630 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1631 1631 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1632 1632
1633 1633 def sidedata(self, nodeorrev, _df=None):
1634 1634 """a map of extra data related to the changeset but not part of the hash
1635 1635
1636 1636 This function currently return a dictionary. However, more advanced
1637 1637 mapping object will likely be used in the future for a more
1638 1638 efficient/lazy code.
1639 1639 """
1640 1640 return self._revisiondata(nodeorrev, _df)[1]
1641 1641
1642 1642 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1643 1643 # deal with <nodeorrev> argument type
1644 1644 if isinstance(nodeorrev, int):
1645 1645 rev = nodeorrev
1646 1646 node = self.node(rev)
1647 1647 else:
1648 1648 node = nodeorrev
1649 1649 rev = None
1650 1650
1651 1651 # fast path the special `nullid` rev
1652 1652 if node == nullid:
1653 1653 return "", {}
1654 1654
1655 1655 # The text as stored inside the revlog. Might be the revision or might
1656 1656 # need to be processed to retrieve the revision.
1657 1657 rawtext = None
1658 1658
1659 1659 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1660 1660
1661 1661 if raw and validated:
1662 1662 # if we don't want to process the raw text and that raw
1663 1663 # text is cached, we can exit early.
1664 1664 return rawtext, {}
1665 1665 if rev is None:
1666 1666 rev = self.rev(node)
1667 1667 # the revlog's flag for this revision
1668 1668 # (usually alter its state or content)
1669 1669 flags = self.flags(rev)
1670 1670
1671 1671 if validated and flags == REVIDX_DEFAULT_FLAGS:
1672 1672 # no extra flags set, no flag processor runs, text = rawtext
1673 1673 return rawtext, {}
1674 1674
1675 1675 sidedata = {}
1676 1676 if raw:
1677 1677 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1678 1678 text = rawtext
1679 1679 else:
1680 1680 r = flagutil.processflagsread(self, rawtext, flags)
1681 1681 text, validatehash, sidedata = r
1682 1682 if validatehash:
1683 1683 self.checkhash(text, node, rev=rev)
1684 1684 if not validated:
1685 1685 self._revisioncache = (node, rev, rawtext)
1686 1686
1687 1687 return text, sidedata
1688 1688
1689 1689 def _rawtext(self, node, rev, _df=None):
1690 1690 """return the possibly unvalidated rawtext for a revision
1691 1691
1692 1692 returns (rev, rawtext, validated)
1693 1693 """
1694 1694
1695 1695 # revision in the cache (could be useful to apply delta)
1696 1696 cachedrev = None
1697 1697 # An intermediate text to apply deltas to
1698 1698 basetext = None
1699 1699
1700 1700 # Check if we have the entry in cache
1701 1701 # The cache entry looks like (node, rev, rawtext)
1702 1702 if self._revisioncache:
1703 1703 if self._revisioncache[0] == node:
1704 1704 return (rev, self._revisioncache[2], True)
1705 1705 cachedrev = self._revisioncache[1]
1706 1706
1707 1707 if rev is None:
1708 1708 rev = self.rev(node)
1709 1709
1710 1710 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1711 1711 if stopped:
1712 1712 basetext = self._revisioncache[2]
1713 1713
1714 1714 # drop cache to save memory, the caller is expected to
1715 1715 # update self._revisioncache after validating the text
1716 1716 self._revisioncache = None
1717 1717
1718 1718 targetsize = None
1719 1719 rawsize = self.index[rev][2]
1720 1720 if 0 <= rawsize:
1721 1721 targetsize = 4 * rawsize
1722 1722
1723 1723 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1724 1724 if basetext is None:
1725 1725 basetext = bytes(bins[0])
1726 1726 bins = bins[1:]
1727 1727
1728 1728 rawtext = mdiff.patches(basetext, bins)
1729 1729 del basetext # let us have a chance to free memory early
1730 1730 return (rev, rawtext, False)
1731 1731
1732 1732 def rawdata(self, nodeorrev, _df=None):
1733 1733 """return an uncompressed raw data of a given node or revision number.
1734 1734
1735 1735 _df - an existing file handle to read from. (internal-only)
1736 1736 """
1737 1737 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1738 1738
1739 1739 def hash(self, text, p1, p2):
1740 1740 """Compute a node hash.
1741 1741
1742 1742 Available as a function so that subclasses can replace the hash
1743 1743 as needed.
1744 1744 """
1745 1745 return storageutil.hashrevisionsha1(text, p1, p2)
1746 1746
1747 1747 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1748 1748 """Check node hash integrity.
1749 1749
1750 1750 Available as a function so that subclasses can extend hash mismatch
1751 1751 behaviors as needed.
1752 1752 """
1753 1753 try:
1754 1754 if p1 is None and p2 is None:
1755 1755 p1, p2 = self.parents(node)
1756 1756 if node != self.hash(text, p1, p2):
1757 1757 # Clear the revision cache on hash failure. The revision cache
1758 1758 # only stores the raw revision and clearing the cache does have
1759 1759 # the side-effect that we won't have a cache hit when the raw
1760 1760 # revision data is accessed. But this case should be rare and
1761 1761 # it is extra work to teach the cache about the hash
1762 1762 # verification state.
1763 1763 if self._revisioncache and self._revisioncache[0] == node:
1764 1764 self._revisioncache = None
1765 1765
1766 1766 revornode = rev
1767 1767 if revornode is None:
1768 1768 revornode = templatefilters.short(hex(node))
1769 1769 raise error.RevlogError(_("integrity check failed on %s:%s")
1770 1770 % (self.indexfile, pycompat.bytestr(revornode)))
1771 1771 except error.RevlogError:
1772 1772 if self._censorable and storageutil.iscensoredtext(text):
1773 1773 raise error.CensoredNodeError(self.indexfile, node, text)
1774 1774 raise
1775 1775
1776 1776 def _enforceinlinesize(self, tr, fp=None):
1777 1777 """Check if the revlog is too big for inline and convert if so.
1778 1778
1779 1779 This should be called after revisions are added to the revlog. If the
1780 1780 revlog has grown too large to be an inline revlog, it will convert it
1781 1781 to use multiple index and data files.
1782 1782 """
1783 1783 tiprev = len(self) - 1
1784 1784 if (not self._inline or
1785 1785 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1786 1786 return
1787 1787
1788 1788 trinfo = tr.find(self.indexfile)
1789 1789 if trinfo is None:
1790 1790 raise error.RevlogError(_("%s not found in the transaction")
1791 1791 % self.indexfile)
1792 1792
1793 1793 trindex = trinfo[2]
1794 1794 if trindex is not None:
1795 1795 dataoff = self.start(trindex)
1796 1796 else:
1797 1797 # revlog was stripped at start of transaction, use all leftover data
1798 1798 trindex = len(self) - 1
1799 1799 dataoff = self.end(tiprev)
1800 1800
1801 1801 tr.add(self.datafile, dataoff)
1802 1802
1803 1803 if fp:
1804 1804 fp.flush()
1805 1805 fp.close()
1806 1806 # We can't use the cached file handle after close(). So prevent
1807 1807 # its usage.
1808 1808 self._writinghandles = None
1809 1809
1810 1810 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1811 1811 for r in self:
1812 1812 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1813 1813
1814 1814 with self._indexfp('w') as fp:
1815 1815 self.version &= ~FLAG_INLINE_DATA
1816 1816 self._inline = False
1817 1817 io = self._io
1818 1818 for i in self:
1819 1819 e = io.packentry(self.index[i], self.node, self.version, i)
1820 1820 fp.write(e)
1821 1821
1822 1822 # the temp file replace the real index when we exit the context
1823 1823 # manager
1824 1824
1825 1825 tr.replace(self.indexfile, trindex * self._io.size)
1826 1826 self._chunkclear()
1827 1827
1828 1828 def _nodeduplicatecallback(self, transaction, node):
1829 1829 """called when trying to add a node already stored.
1830 1830 """
1831 1831
1832 1832 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1833 1833 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1834 1834 sidedata=None):
1835 1835 """add a revision to the log
1836 1836
1837 1837 text - the revision data to add
1838 1838 transaction - the transaction object used for rollback
1839 1839 link - the linkrev data to add
1840 1840 p1, p2 - the parent nodeids of the revision
1841 1841 cachedelta - an optional precomputed delta
1842 1842 node - nodeid of revision; typically node is not specified, and it is
1843 1843 computed by default as hash(text, p1, p2), however subclasses might
1844 1844 use different hashing method (and override checkhash() in such case)
1845 1845 flags - the known flags to set on the revision
1846 1846 deltacomputer - an optional deltacomputer instance shared between
1847 1847 multiple calls
1848 1848 """
1849 1849 if link == nullrev:
1850 1850 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1851 1851 % self.indexfile)
1852 1852
1853 1853 if sidedata is None:
1854 1854 sidedata = {}
1855 1855
1856 1856 if flags:
1857 1857 node = node or self.hash(text, p1, p2)
1858 1858
1859 1859 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1860 1860 sidedata=sidedata)
1861 1861
1862 1862 # If the flag processor modifies the revision data, ignore any provided
1863 1863 # cachedelta.
1864 1864 if rawtext != text:
1865 1865 cachedelta = None
1866 1866
1867 1867 if len(rawtext) > _maxentrysize:
1868 1868 raise error.RevlogError(
1869 1869 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1870 1870 % (self.indexfile, len(rawtext)))
1871 1871
1872 1872 node = node or self.hash(rawtext, p1, p2)
1873 1873 if node in self.nodemap:
1874 1874 return node
1875 1875
1876 1876 if validatehash:
1877 1877 self.checkhash(rawtext, node, p1=p1, p2=p2)
1878 1878
1879 1879 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1880 1880 flags, cachedelta=cachedelta,
1881 1881 deltacomputer=deltacomputer)
1882 1882
1883 1883 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1884 1884 cachedelta=None, deltacomputer=None):
1885 1885 """add a raw revision with known flags, node and parents
1886 1886 useful when reusing a revision not stored in this revlog (ex: received
1887 1887 over wire, or read from an external bundle).
1888 1888 """
1889 1889 dfh = None
1890 1890 if not self._inline:
1891 1891 dfh = self._datafp("a+")
1892 1892 ifh = self._indexfp("a+")
1893 1893 try:
1894 1894 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1895 1895 flags, cachedelta, ifh, dfh,
1896 1896 deltacomputer=deltacomputer)
1897 1897 finally:
1898 1898 if dfh:
1899 1899 dfh.close()
1900 1900 ifh.close()
1901 1901
1902 1902 def compress(self, data):
1903 1903 """Generate a possibly-compressed representation of data."""
1904 1904 if not data:
1905 1905 return '', data
1906 1906
1907 1907 compressed = self._compressor.compress(data)
1908 1908
1909 1909 if compressed:
1910 1910 # The revlog compressor added the header in the returned data.
1911 1911 return '', compressed
1912 1912
1913 1913 if data[0:1] == '\0':
1914 1914 return '', data
1915 1915 return 'u', data
1916 1916
1917 1917 def decompress(self, data):
1918 1918 """Decompress a revlog chunk.
1919 1919
1920 1920 The chunk is expected to begin with a header identifying the
1921 1921 format type so it can be routed to an appropriate decompressor.
1922 1922 """
1923 1923 if not data:
1924 1924 return data
1925 1925
1926 1926 # Revlogs are read much more frequently than they are written and many
1927 1927 # chunks only take microseconds to decompress, so performance is
1928 1928 # important here.
1929 1929 #
1930 1930 # We can make a few assumptions about revlogs:
1931 1931 #
1932 1932 # 1) the majority of chunks will be compressed (as opposed to inline
1933 1933 # raw data).
1934 1934 # 2) decompressing *any* data will likely by at least 10x slower than
1935 1935 # returning raw inline data.
1936 1936 # 3) we want to prioritize common and officially supported compression
1937 1937 # engines
1938 1938 #
1939 1939 # It follows that we want to optimize for "decompress compressed data
1940 1940 # when encoded with common and officially supported compression engines"
1941 1941 # case over "raw data" and "data encoded by less common or non-official
1942 1942 # compression engines." That is why we have the inline lookup first
1943 1943 # followed by the compengines lookup.
1944 1944 #
1945 1945 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1946 1946 # compressed chunks. And this matters for changelog and manifest reads.
1947 1947 t = data[0:1]
1948 1948
1949 1949 if t == 'x':
1950 1950 try:
1951 1951 return _zlibdecompress(data)
1952 1952 except zlib.error as e:
1953 1953 raise error.RevlogError(_('revlog decompress error: %s') %
1954 1954 stringutil.forcebytestr(e))
1955 1955 # '\0' is more common than 'u' so it goes first.
1956 1956 elif t == '\0':
1957 1957 return data
1958 1958 elif t == 'u':
1959 1959 return util.buffer(data, 1)
1960 1960
1961 1961 try:
1962 1962 compressor = self._decompressors[t]
1963 1963 except KeyError:
1964 1964 try:
1965 1965 engine = util.compengines.forrevlogheader(t)
1966 1966 compressor = engine.revlogcompressor(self._compengineopts)
1967 1967 self._decompressors[t] = compressor
1968 1968 except KeyError:
1969 1969 raise error.RevlogError(_('unknown compression type %r') % t)
1970 1970
1971 1971 return compressor.decompress(data)
1972 1972
1973 1973 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1974 1974 cachedelta, ifh, dfh, alwayscache=False,
1975 1975 deltacomputer=None):
1976 1976 """internal function to add revisions to the log
1977 1977
1978 1978 see addrevision for argument descriptions.
1979 1979
1980 1980 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1981 1981
1982 1982 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1983 1983 be used.
1984 1984
1985 1985 invariants:
1986 1986 - rawtext is optional (can be None); if not set, cachedelta must be set.
1987 1987 if both are set, they must correspond to each other.
1988 1988 """
1989 1989 if node == nullid:
1990 1990 raise error.RevlogError(_("%s: attempt to add null revision") %
1991 1991 self.indexfile)
1992 1992 if node == wdirid or node in wdirfilenodeids:
1993 1993 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1994 1994 self.indexfile)
1995 1995
1996 1996 if self._inline:
1997 1997 fh = ifh
1998 1998 else:
1999 1999 fh = dfh
2000 2000
2001 2001 btext = [rawtext]
2002 2002
2003 2003 curr = len(self)
2004 2004 prev = curr - 1
2005 2005 offset = self.end(prev)
2006 2006 p1r, p2r = self.rev(p1), self.rev(p2)
2007 2007
2008 2008 # full versions are inserted when the needed deltas
2009 2009 # become comparable to the uncompressed text
2010 2010 if rawtext is None:
2011 2011 # need rawtext size, before changed by flag processors, which is
2012 2012 # the non-raw size. use revlog explicitly to avoid filelog's extra
2013 2013 # logic that might remove metadata size.
2014 2014 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2015 2015 cachedelta[1])
2016 2016 else:
2017 2017 textlen = len(rawtext)
2018 2018
2019 2019 if deltacomputer is None:
2020 2020 deltacomputer = deltautil.deltacomputer(self)
2021 2021
2022 2022 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2023 2023
2024 2024 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2025 2025
2026 2026 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2027 2027 deltainfo.base, link, p1r, p2r, node)
2028 2028 self.index.append(e)
2029 2029 self.nodemap[node] = curr
2030 2030
2031 2031 # Reset the pure node cache start lookup offset to account for new
2032 2032 # revision.
2033 2033 if self._nodepos is not None:
2034 2034 self._nodepos = curr
2035 2035
2036 2036 entry = self._io.packentry(e, self.node, self.version, curr)
2037 2037 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2038 2038 link, offset)
2039 2039
2040 2040 rawtext = btext[0]
2041 2041
2042 2042 if alwayscache and rawtext is None:
2043 2043 rawtext = deltacomputer.buildtext(revinfo, fh)
2044 2044
2045 2045 if type(rawtext) == bytes: # only accept immutable objects
2046 2046 self._revisioncache = (node, curr, rawtext)
2047 2047 self._chainbasecache[curr] = deltainfo.chainbase
2048 2048 return node
2049 2049
2050 2050 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2051 2051 # Files opened in a+ mode have inconsistent behavior on various
2052 2052 # platforms. Windows requires that a file positioning call be made
2053 2053 # when the file handle transitions between reads and writes. See
2054 2054 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2055 2055 # platforms, Python or the platform itself can be buggy. Some versions
2056 2056 # of Solaris have been observed to not append at the end of the file
2057 2057 # if the file was seeked to before the end. See issue4943 for more.
2058 2058 #
2059 2059 # We work around this issue by inserting a seek() before writing.
2060 2060 # Note: This is likely not necessary on Python 3. However, because
2061 2061 # the file handle is reused for reads and may be seeked there, we need
2062 2062 # to be careful before changing this.
2063 2063 ifh.seek(0, os.SEEK_END)
2064 2064 if dfh:
2065 2065 dfh.seek(0, os.SEEK_END)
2066 2066
2067 2067 curr = len(self) - 1
2068 2068 if not self._inline:
2069 2069 transaction.add(self.datafile, offset)
2070 2070 transaction.add(self.indexfile, curr * len(entry))
2071 2071 if data[0]:
2072 2072 dfh.write(data[0])
2073 2073 dfh.write(data[1])
2074 2074 ifh.write(entry)
2075 2075 else:
2076 2076 offset += curr * self._io.size
2077 2077 transaction.add(self.indexfile, offset, curr)
2078 2078 ifh.write(entry)
2079 2079 ifh.write(data[0])
2080 2080 ifh.write(data[1])
2081 2081 self._enforceinlinesize(transaction, ifh)
2082 2082
2083 2083 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2084 2084 """
2085 2085 add a delta group
2086 2086
2087 2087 given a set of deltas, add them to the revision log. the
2088 2088 first delta is against its parent, which should be in our
2089 2089 log, the rest are against the previous delta.
2090 2090
2091 2091 If ``addrevisioncb`` is defined, it will be called with arguments of
2092 2092 this revlog and the node that was added.
2093 2093 """
2094 2094
2095 2095 if self._writinghandles:
2096 2096 raise error.ProgrammingError('cannot nest addgroup() calls')
2097 2097
2098 2098 nodes = []
2099 2099
2100 2100 r = len(self)
2101 2101 end = 0
2102 2102 if r:
2103 2103 end = self.end(r - 1)
2104 2104 ifh = self._indexfp("a+")
2105 2105 isize = r * self._io.size
2106 2106 if self._inline:
2107 2107 transaction.add(self.indexfile, end + isize, r)
2108 2108 dfh = None
2109 2109 else:
2110 2110 transaction.add(self.indexfile, isize, r)
2111 2111 transaction.add(self.datafile, end)
2112 2112 dfh = self._datafp("a+")
2113 2113 def flush():
2114 2114 if dfh:
2115 2115 dfh.flush()
2116 2116 ifh.flush()
2117 2117
2118 2118 self._writinghandles = (ifh, dfh)
2119 2119
2120 2120 try:
2121 2121 deltacomputer = deltautil.deltacomputer(self)
2122 2122 # loop through our set of deltas
2123 2123 for data in deltas:
2124 2124 node, p1, p2, linknode, deltabase, delta, flags = data
2125 2125 link = linkmapper(linknode)
2126 2126 flags = flags or REVIDX_DEFAULT_FLAGS
2127 2127
2128 2128 nodes.append(node)
2129 2129
2130 2130 if node in self.nodemap:
2131 2131 self._nodeduplicatecallback(transaction, node)
2132 2132 # this can happen if two branches make the same change
2133 2133 continue
2134 2134
2135 2135 for p in (p1, p2):
2136 2136 if p not in self.nodemap:
2137 2137 raise error.LookupError(p, self.indexfile,
2138 2138 _('unknown parent'))
2139 2139
2140 2140 if deltabase not in self.nodemap:
2141 2141 raise error.LookupError(deltabase, self.indexfile,
2142 2142 _('unknown delta base'))
2143 2143
2144 2144 baserev = self.rev(deltabase)
2145 2145
2146 2146 if baserev != nullrev and self.iscensored(baserev):
2147 2147 # if base is censored, delta must be full replacement in a
2148 2148 # single patch operation
2149 2149 hlen = struct.calcsize(">lll")
2150 2150 oldlen = self.rawsize(baserev)
2151 2151 newlen = len(delta) - hlen
2152 2152 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2153 2153 raise error.CensoredBaseError(self.indexfile,
2154 2154 self.node(baserev))
2155 2155
2156 2156 if not flags and self._peek_iscensored(baserev, delta, flush):
2157 2157 flags |= REVIDX_ISCENSORED
2158 2158
2159 2159 # We assume consumers of addrevisioncb will want to retrieve
2160 2160 # the added revision, which will require a call to
2161 2161 # revision(). revision() will fast path if there is a cache
2162 2162 # hit. So, we tell _addrevision() to always cache in this case.
2163 2163 # We're only using addgroup() in the context of changegroup
2164 2164 # generation so the revision data can always be handled as raw
2165 2165 # by the flagprocessor.
2166 2166 self._addrevision(node, None, transaction, link,
2167 2167 p1, p2, flags, (baserev, delta),
2168 2168 ifh, dfh,
2169 2169 alwayscache=bool(addrevisioncb),
2170 2170 deltacomputer=deltacomputer)
2171 2171
2172 2172 if addrevisioncb:
2173 2173 addrevisioncb(self, node)
2174 2174
2175 2175 if not dfh and not self._inline:
2176 2176 # addrevision switched from inline to conventional
2177 2177 # reopen the index
2178 2178 ifh.close()
2179 2179 dfh = self._datafp("a+")
2180 2180 ifh = self._indexfp("a+")
2181 2181 self._writinghandles = (ifh, dfh)
2182 2182 finally:
2183 2183 self._writinghandles = None
2184 2184
2185 2185 if dfh:
2186 2186 dfh.close()
2187 2187 ifh.close()
2188 2188
2189 2189 return nodes
2190 2190
2191 2191 def iscensored(self, rev):
2192 2192 """Check if a file revision is censored."""
2193 2193 if not self._censorable:
2194 2194 return False
2195 2195
2196 2196 return self.flags(rev) & REVIDX_ISCENSORED
2197 2197
2198 2198 def _peek_iscensored(self, baserev, delta, flush):
2199 2199 """Quickly check if a delta produces a censored revision."""
2200 2200 if not self._censorable:
2201 2201 return False
2202 2202
2203 2203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2204 2204
2205 2205 def getstrippoint(self, minlink):
2206 2206 """find the minimum rev that must be stripped to strip the linkrev
2207 2207
2208 2208 Returns a tuple containing the minimum rev and a set of all revs that
2209 2209 have linkrevs that will be broken by this strip.
2210 2210 """
2211 2211 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2212 2212 self.headrevs(),
2213 2213 self.linkrev, self.parentrevs)
2214 2214
2215 2215 def strip(self, minlink, transaction):
2216 2216 """truncate the revlog on the first revision with a linkrev >= minlink
2217 2217
2218 2218 This function is called when we're stripping revision minlink and
2219 2219 its descendants from the repository.
2220 2220
2221 2221 We have to remove all revisions with linkrev >= minlink, because
2222 2222 the equivalent changelog revisions will be renumbered after the
2223 2223 strip.
2224 2224
2225 2225 So we truncate the revlog on the first of these revisions, and
2226 2226 trust that the caller has saved the revisions that shouldn't be
2227 2227 removed and that it'll re-add them after this truncation.
2228 2228 """
2229 2229 if len(self) == 0:
2230 2230 return
2231 2231
2232 2232 rev, _ = self.getstrippoint(minlink)
2233 2233 if rev == len(self):
2234 2234 return
2235 2235
2236 2236 # first truncate the files on disk
2237 2237 end = self.start(rev)
2238 2238 if not self._inline:
2239 2239 transaction.add(self.datafile, end)
2240 2240 end = rev * self._io.size
2241 2241 else:
2242 2242 end += rev * self._io.size
2243 2243
2244 2244 transaction.add(self.indexfile, end)
2245 2245
2246 2246 # then reset internal state in memory to forget those revisions
2247 2247 self._revisioncache = None
2248 2248 self._chaininfocache = {}
2249 2249 self._chunkclear()
2250 2250 for x in pycompat.xrange(rev, len(self)):
2251 2251 del self.nodemap[self.node(x)]
2252 2252
2253 2253 del self.index[rev:-1]
2254 2254 self._nodepos = None
2255 2255
2256 2256 def checksize(self):
2257 2257 """Check size of index and data files
2258 2258
2259 2259 return a (dd, di) tuple.
2260 2260 - dd: extra bytes for the "data" file
2261 2261 - di: extra bytes for the "index" file
2262 2262
2263 2263 A healthy revlog will return (0, 0).
2264 2264 """
2265 2265 expected = 0
2266 2266 if len(self):
2267 2267 expected = max(0, self.end(len(self) - 1))
2268 2268
2269 2269 try:
2270 2270 with self._datafp() as f:
2271 2271 f.seek(0, io.SEEK_END)
2272 2272 actual = f.tell()
2273 2273 dd = actual - expected
2274 2274 except IOError as inst:
2275 2275 if inst.errno != errno.ENOENT:
2276 2276 raise
2277 2277 dd = 0
2278 2278
2279 2279 try:
2280 2280 f = self.opener(self.indexfile)
2281 2281 f.seek(0, io.SEEK_END)
2282 2282 actual = f.tell()
2283 2283 f.close()
2284 2284 s = self._io.size
2285 2285 i = max(0, actual // s)
2286 2286 di = actual - (i * s)
2287 2287 if self._inline:
2288 2288 databytes = 0
2289 2289 for r in self:
2290 2290 databytes += max(0, self.length(r))
2291 2291 dd = 0
2292 2292 di = actual - len(self) * s - databytes
2293 2293 except IOError as inst:
2294 2294 if inst.errno != errno.ENOENT:
2295 2295 raise
2296 2296 di = 0
2297 2297
2298 2298 return (dd, di)
2299 2299
2300 2300 def files(self):
2301 2301 res = [self.indexfile]
2302 2302 if not self._inline:
2303 2303 res.append(self.datafile)
2304 2304 return res
2305 2305
2306 2306 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2307 2307 assumehaveparentrevisions=False,
2308 2308 deltamode=repository.CG_DELTAMODE_STD):
2309 2309 if nodesorder not in ('nodes', 'storage', 'linear', None):
2310 2310 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2311 2311 nodesorder)
2312 2312
2313 2313 if nodesorder is None and not self._generaldelta:
2314 2314 nodesorder = 'storage'
2315 2315
2316 2316 if (not self._storedeltachains and
2317 2317 deltamode != repository.CG_DELTAMODE_PREV):
2318 2318 deltamode = repository.CG_DELTAMODE_FULL
2319 2319
2320 2320 return storageutil.emitrevisions(
2321 2321 self, nodes, nodesorder, revlogrevisiondelta,
2322 2322 deltaparentfn=self.deltaparent,
2323 2323 candeltafn=self.candelta,
2324 2324 rawsizefn=self.rawsize,
2325 2325 revdifffn=self.revdiff,
2326 2326 flagsfn=self.flags,
2327 2327 deltamode=deltamode,
2328 2328 revisiondata=revisiondata,
2329 2329 assumehaveparentrevisions=assumehaveparentrevisions)
2330 2330
2331 2331 DELTAREUSEALWAYS = 'always'
2332 2332 DELTAREUSESAMEREVS = 'samerevs'
2333 2333 DELTAREUSENEVER = 'never'
2334 2334
2335 2335 DELTAREUSEFULLADD = 'fulladd'
2336 2336
2337 2337 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2338 2338
2339 2339 def clone(self, tr, destrevlog, addrevisioncb=None,
2340 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2340 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2341 2341 """Copy this revlog to another, possibly with format changes.
2342 2342
2343 2343 The destination revlog will contain the same revisions and nodes.
2344 2344 However, it may not be bit-for-bit identical due to e.g. delta encoding
2345 2345 differences.
2346 2346
2347 2347 The ``deltareuse`` argument control how deltas from the existing revlog
2348 2348 are preserved in the destination revlog. The argument can have the
2349 2349 following values:
2350 2350
2351 2351 DELTAREUSEALWAYS
2352 2352 Deltas will always be reused (if possible), even if the destination
2353 2353 revlog would not select the same revisions for the delta. This is the
2354 2354 fastest mode of operation.
2355 2355 DELTAREUSESAMEREVS
2356 2356 Deltas will be reused if the destination revlog would pick the same
2357 2357 revisions for the delta. This mode strikes a balance between speed
2358 2358 and optimization.
2359 2359 DELTAREUSENEVER
2360 2360 Deltas will never be reused. This is the slowest mode of execution.
2361 2361 This mode can be used to recompute deltas (e.g. if the diff/delta
2362 2362 algorithm changes).
2363 2363
2364 2364 Delta computation can be slow, so the choice of delta reuse policy can
2365 2365 significantly affect run time.
2366 2366
2367 2367 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2368 2368 two extremes. Deltas will be reused if they are appropriate. But if the
2369 2369 delta could choose a better revision, it will do so. This means if you
2370 2370 are converting a non-generaldelta revlog to a generaldelta revlog,
2371 2371 deltas will be recomputed if the delta's parent isn't a parent of the
2372 2372 revision.
2373 2373
2374 2374 In addition to the delta policy, the ``forcedeltabothparents``
2375 2375 argument controls whether to force compute deltas against both parents
2376 2376 for merges. By default, the current default is used.
2377 2377 """
2378 2378 if deltareuse not in self.DELTAREUSEALL:
2379 2379 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2380 2380
2381 2381 if len(destrevlog):
2382 2382 raise ValueError(_('destination revlog is not empty'))
2383 2383
2384 2384 if getattr(self, 'filteredrevs', None):
2385 2385 raise ValueError(_('source revlog has filtered revisions'))
2386 2386 if getattr(destrevlog, 'filteredrevs', None):
2387 2387 raise ValueError(_('destination revlog has filtered revisions'))
2388 2388
2389 2389 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2390 2390 # if possible.
2391 2391 oldlazydelta = destrevlog._lazydelta
2392 2392 oldlazydeltabase = destrevlog._lazydeltabase
2393 2393 oldamd = destrevlog._deltabothparents
2394 2394
2395 2395 try:
2396 2396 if deltareuse == self.DELTAREUSEALWAYS:
2397 2397 destrevlog._lazydeltabase = True
2398 2398 destrevlog._lazydelta = True
2399 2399 elif deltareuse == self.DELTAREUSESAMEREVS:
2400 2400 destrevlog._lazydeltabase = False
2401 2401 destrevlog._lazydelta = True
2402 2402 elif deltareuse == self.DELTAREUSENEVER:
2403 2403 destrevlog._lazydeltabase = False
2404 2404 destrevlog._lazydelta = False
2405 2405
2406 2406 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2407 2407
2408 deltacomputer = deltautil.deltacomputer(destrevlog)
2409 index = self.index
2410 for rev in self:
2411 entry = index[rev]
2412
2413 # Some classes override linkrev to take filtered revs into
2414 # account. Use raw entry from index.
2415 flags = entry[0] & 0xffff
2416 linkrev = entry[4]
2417 p1 = index[entry[5]][7]
2418 p2 = index[entry[6]][7]
2419 node = entry[7]
2420
2421 # (Possibly) reuse the delta from the revlog if allowed and
2422 # the revlog chunk is a delta.
2423 cachedelta = None
2424 rawtext = None
2425 if (deltareuse != self.DELTAREUSEFULLADD
2426 and destrevlog._lazydelta):
2427 dp = self.deltaparent(rev)
2428 if dp != nullrev:
2429 cachedelta = (dp, bytes(self._chunk(rev)))
2430
2431 if not cachedelta:
2432 rawtext = self.rawdata(rev)
2433
2434
2435 if deltareuse == self.DELTAREUSEFULLADD:
2436 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2437 cachedelta=cachedelta,
2438 node=node, flags=flags,
2439 deltacomputer=deltacomputer)
2440 else:
2441 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2442 checkambig=False)
2443 dfh = None
2444 if not destrevlog._inline:
2445 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2446 try:
2447 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2448 p2, flags, cachedelta, ifh, dfh,
2449 deltacomputer=deltacomputer)
2450 finally:
2451 if dfh:
2452 dfh.close()
2453 ifh.close()
2454
2455 if addrevisioncb:
2456 addrevisioncb(self, rev, node)
2408 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2409 forcedeltabothparents)
2410
2457 2411 finally:
2458 2412 destrevlog._lazydelta = oldlazydelta
2459 2413 destrevlog._lazydeltabase = oldlazydeltabase
2460 2414 destrevlog._deltabothparents = oldamd
2461 2415
2416 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2417 forcedeltabothparents):
2418 """perform the core duty of `revlog.clone` after parameter processing"""
2419 deltacomputer = deltautil.deltacomputer(destrevlog)
2420 index = self.index
2421 for rev in self:
2422 entry = index[rev]
2423
2424 # Some classes override linkrev to take filtered revs into
2425 # account. Use raw entry from index.
2426 flags = entry[0] & 0xffff
2427 linkrev = entry[4]
2428 p1 = index[entry[5]][7]
2429 p2 = index[entry[6]][7]
2430 node = entry[7]
2431
2432 # (Possibly) reuse the delta from the revlog if allowed and
2433 # the revlog chunk is a delta.
2434 cachedelta = None
2435 rawtext = None
2436 if (deltareuse != self.DELTAREUSEFULLADD and destrevlog._lazydelta):
2437 dp = self.deltaparent(rev)
2438 if dp != nullrev:
2439 cachedelta = (dp, bytes(self._chunk(rev)))
2440
2441 if not cachedelta:
2442 rawtext = self.rawdata(rev)
2443
2444
2445 if deltareuse == self.DELTAREUSEFULLADD:
2446 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2447 cachedelta=cachedelta,
2448 node=node, flags=flags,
2449 deltacomputer=deltacomputer)
2450 else:
2451 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2452 checkambig=False)
2453 dfh = None
2454 if not destrevlog._inline:
2455 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2456 try:
2457 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2458 p2, flags, cachedelta, ifh, dfh,
2459 deltacomputer=deltacomputer)
2460 finally:
2461 if dfh:
2462 dfh.close()
2463 ifh.close()
2464
2465 if addrevisioncb:
2466 addrevisioncb(self, rev, node)
2467
2462 2468 def censorrevision(self, tr, censornode, tombstone=b''):
2463 2469 if (self.version & 0xFFFF) == REVLOGV0:
2464 2470 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2465 2471 self.version)
2466 2472
2467 2473 censorrev = self.rev(censornode)
2468 2474 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2469 2475
2470 2476 if len(tombstone) > self.rawsize(censorrev):
2471 2477 raise error.Abort(_('censor tombstone must be no longer than '
2472 2478 'censored data'))
2473 2479
2474 2480 # Rewriting the revlog in place is hard. Our strategy for censoring is
2475 2481 # to create a new revlog, copy all revisions to it, then replace the
2476 2482 # revlogs on transaction close.
2477 2483
2478 2484 newindexfile = self.indexfile + b'.tmpcensored'
2479 2485 newdatafile = self.datafile + b'.tmpcensored'
2480 2486
2481 2487 # This is a bit dangerous. We could easily have a mismatch of state.
2482 2488 newrl = revlog(self.opener, newindexfile, newdatafile,
2483 2489 censorable=True)
2484 2490 newrl.version = self.version
2485 2491 newrl._generaldelta = self._generaldelta
2486 2492 newrl._io = self._io
2487 2493
2488 2494 for rev in self.revs():
2489 2495 node = self.node(rev)
2490 2496 p1, p2 = self.parents(node)
2491 2497
2492 2498 if rev == censorrev:
2493 2499 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2494 2500 p1, p2, censornode, REVIDX_ISCENSORED)
2495 2501
2496 2502 if newrl.deltaparent(rev) != nullrev:
2497 2503 raise error.Abort(_('censored revision stored as delta; '
2498 2504 'cannot censor'),
2499 2505 hint=_('censoring of revlogs is not '
2500 2506 'fully implemented; please report '
2501 2507 'this bug'))
2502 2508 continue
2503 2509
2504 2510 if self.iscensored(rev):
2505 2511 if self.deltaparent(rev) != nullrev:
2506 2512 raise error.Abort(_('cannot censor due to censored '
2507 2513 'revision having delta stored'))
2508 2514 rawtext = self._chunk(rev)
2509 2515 else:
2510 2516 rawtext = self.rawdata(rev)
2511 2517
2512 2518 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2513 2519 self.flags(rev))
2514 2520
2515 2521 tr.addbackup(self.indexfile, location='store')
2516 2522 if not self._inline:
2517 2523 tr.addbackup(self.datafile, location='store')
2518 2524
2519 2525 self.opener.rename(newrl.indexfile, self.indexfile)
2520 2526 if not self._inline:
2521 2527 self.opener.rename(newrl.datafile, self.datafile)
2522 2528
2523 2529 self.clearcaches()
2524 2530 self._loadindex()
2525 2531
2526 2532 def verifyintegrity(self, state):
2527 2533 """Verifies the integrity of the revlog.
2528 2534
2529 2535 Yields ``revlogproblem`` instances describing problems that are
2530 2536 found.
2531 2537 """
2532 2538 dd, di = self.checksize()
2533 2539 if dd:
2534 2540 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2535 2541 if di:
2536 2542 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2537 2543
2538 2544 version = self.version & 0xFFFF
2539 2545
2540 2546 # The verifier tells us what version revlog we should be.
2541 2547 if version != state['expectedversion']:
2542 2548 yield revlogproblem(
2543 2549 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2544 2550 (self.indexfile, version, state['expectedversion']))
2545 2551
2546 2552 state['skipread'] = set()
2547 2553
2548 2554 for rev in self:
2549 2555 node = self.node(rev)
2550 2556
2551 2557 # Verify contents. 4 cases to care about:
2552 2558 #
2553 2559 # common: the most common case
2554 2560 # rename: with a rename
2555 2561 # meta: file content starts with b'\1\n', the metadata
2556 2562 # header defined in filelog.py, but without a rename
2557 2563 # ext: content stored externally
2558 2564 #
2559 2565 # More formally, their differences are shown below:
2560 2566 #
2561 2567 # | common | rename | meta | ext
2562 2568 # -------------------------------------------------------
2563 2569 # flags() | 0 | 0 | 0 | not 0
2564 2570 # renamed() | False | True | False | ?
2565 2571 # rawtext[0:2]=='\1\n'| False | True | True | ?
2566 2572 #
2567 2573 # "rawtext" means the raw text stored in revlog data, which
2568 2574 # could be retrieved by "rawdata(rev)". "text"
2569 2575 # mentioned below is "revision(rev)".
2570 2576 #
2571 2577 # There are 3 different lengths stored physically:
2572 2578 # 1. L1: rawsize, stored in revlog index
2573 2579 # 2. L2: len(rawtext), stored in revlog data
2574 2580 # 3. L3: len(text), stored in revlog data if flags==0, or
2575 2581 # possibly somewhere else if flags!=0
2576 2582 #
2577 2583 # L1 should be equal to L2. L3 could be different from them.
2578 2584 # "text" may or may not affect commit hash depending on flag
2579 2585 # processors (see flagutil.addflagprocessor).
2580 2586 #
2581 2587 # | common | rename | meta | ext
2582 2588 # -------------------------------------------------
2583 2589 # rawsize() | L1 | L1 | L1 | L1
2584 2590 # size() | L1 | L2-LM | L1(*) | L1 (?)
2585 2591 # len(rawtext) | L2 | L2 | L2 | L2
2586 2592 # len(text) | L2 | L2 | L2 | L3
2587 2593 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2588 2594 #
2589 2595 # LM: length of metadata, depending on rawtext
2590 2596 # (*): not ideal, see comment in filelog.size
2591 2597 # (?): could be "- len(meta)" if the resolved content has
2592 2598 # rename metadata
2593 2599 #
2594 2600 # Checks needed to be done:
2595 2601 # 1. length check: L1 == L2, in all cases.
2596 2602 # 2. hash check: depending on flag processor, we may need to
2597 2603 # use either "text" (external), or "rawtext" (in revlog).
2598 2604
2599 2605 try:
2600 2606 skipflags = state.get('skipflags', 0)
2601 2607 if skipflags:
2602 2608 skipflags &= self.flags(rev)
2603 2609
2604 2610 if skipflags:
2605 2611 state['skipread'].add(node)
2606 2612 else:
2607 2613 # Side-effect: read content and verify hash.
2608 2614 self.revision(node)
2609 2615
2610 2616 l1 = self.rawsize(rev)
2611 2617 l2 = len(self.rawdata(node))
2612 2618
2613 2619 if l1 != l2:
2614 2620 yield revlogproblem(
2615 2621 error=_('unpacked size is %d, %d expected') % (l2, l1),
2616 2622 node=node)
2617 2623
2618 2624 except error.CensoredNodeError:
2619 2625 if state['erroroncensored']:
2620 2626 yield revlogproblem(error=_('censored file data'),
2621 2627 node=node)
2622 2628 state['skipread'].add(node)
2623 2629 except Exception as e:
2624 2630 yield revlogproblem(
2625 2631 error=_('unpacking %s: %s') % (short(node),
2626 2632 stringutil.forcebytestr(e)),
2627 2633 node=node)
2628 2634 state['skipread'].add(node)
2629 2635
2630 2636 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2631 2637 revisionscount=False, trackedsize=False,
2632 2638 storedsize=False):
2633 2639 d = {}
2634 2640
2635 2641 if exclusivefiles:
2636 2642 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2637 2643 if not self._inline:
2638 2644 d['exclusivefiles'].append((self.opener, self.datafile))
2639 2645
2640 2646 if sharedfiles:
2641 2647 d['sharedfiles'] = []
2642 2648
2643 2649 if revisionscount:
2644 2650 d['revisionscount'] = len(self)
2645 2651
2646 2652 if trackedsize:
2647 2653 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2648 2654
2649 2655 if storedsize:
2650 2656 d['storedsize'] = sum(self.opener.stat(path).st_size
2651 2657 for path in self.files())
2652 2658
2653 2659 return d
General Comments 0
You need to be logged in to leave comments. Login now