##// END OF EJS Templates
revlog: detect incomplete revlog reads...
Gregory Szorc -
r40660:87a87255 default
parent child Browse files
Show More
@@ -1,2548 +1,2567 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100
101 101 # Aliased for performance.
102 102 _zlibdecompress = zlib.decompress
103 103
104 104 # max size of revlog with inline data
105 105 _maxinline = 131072
106 106 _chunksize = 1048576
107 107
108 108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 109 _flagprocessors = {
110 110 REVIDX_ISCENSORED: None,
111 111 }
112 112
113 113 # Flag processors for REVIDX_ELLIPSIS.
114 114 def ellipsisreadprocessor(rl, text):
115 115 return text, False
116 116
117 117 def ellipsiswriteprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsisrawprocessor(rl, text):
121 121 return False
122 122
123 123 ellipsisprocessor = (
124 124 ellipsisreadprocessor,
125 125 ellipsiswriteprocessor,
126 126 ellipsisrawprocessor,
127 127 )
128 128
129 129 def addflagprocessor(flag, processor):
130 130 """Register a flag processor on a revision data flag.
131 131
132 132 Invariant:
133 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 135 - Only one flag processor can be registered on a specific flag.
136 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 137 following signatures:
138 138 - (read) f(self, rawtext) -> text, bool
139 139 - (write) f(self, text) -> rawtext, bool
140 140 - (raw) f(self, rawtext) -> bool
141 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 142 directly visible to the user.
143 143 The boolean returned by these transforms is used to determine whether
144 144 the returned text can be used for hash integrity checking. For example,
145 145 if "write" returns False, then "text" is used to generate hash. If
146 146 "write" returns True, that basically means "rawtext" returned by "write"
147 147 should be used to generate hash. Usually, "write" and "read" return
148 148 different booleans. And "raw" returns a same boolean as "write".
149 149
150 150 Note: The 'raw' transform is used for changegroup generation and in some
151 151 debug commands. In this case the transform only indicates whether the
152 152 contents can be used for hash integrity checks.
153 153 """
154 154 _insertflagprocessor(flag, processor, _flagprocessors)
155 155
156 156 def _insertflagprocessor(flag, processor, flagprocessors):
157 157 if not flag & REVIDX_KNOWN_FLAGS:
158 158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 159 raise error.ProgrammingError(msg)
160 160 if flag not in REVIDX_FLAGS_ORDER:
161 161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 162 raise error.ProgrammingError(msg)
163 163 if flag in flagprocessors:
164 164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 165 raise error.Abort(msg)
166 166 flagprocessors[flag] = processor
167 167
168 168 def getoffset(q):
169 169 return int(q >> 16)
170 170
171 171 def gettype(q):
172 172 return int(q & 0xFFFF)
173 173
174 174 def offset_type(offset, type):
175 175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 176 raise ValueError('unknown revlog index flags')
177 177 return int(int(offset) << 16 | type)
178 178
179 179 @attr.s(slots=True, frozen=True)
180 180 class _revisioninfo(object):
181 181 """Information about a revision that allows building its fulltext
182 182 node: expected hash of the revision
183 183 p1, p2: parent revs of the revision
184 184 btext: built text cache consisting of a one-element list
185 185 cachedelta: (baserev, uncompressed_delta) or None
186 186 flags: flags associated to the revision storage
187 187
188 188 One of btext[0] or cachedelta must be set.
189 189 """
190 190 node = attr.ib()
191 191 p1 = attr.ib()
192 192 p2 = attr.ib()
193 193 btext = attr.ib()
194 194 textlen = attr.ib()
195 195 cachedelta = attr.ib()
196 196 flags = attr.ib()
197 197
198 198 @interfaceutil.implementer(repository.irevisiondelta)
199 199 @attr.s(slots=True)
200 200 class revlogrevisiondelta(object):
201 201 node = attr.ib()
202 202 p1node = attr.ib()
203 203 p2node = attr.ib()
204 204 basenode = attr.ib()
205 205 flags = attr.ib()
206 206 baserevisionsize = attr.ib()
207 207 revision = attr.ib()
208 208 delta = attr.ib()
209 209 linknode = attr.ib(default=None)
210 210
211 211 @interfaceutil.implementer(repository.iverifyproblem)
212 212 @attr.s(frozen=True)
213 213 class revlogproblem(object):
214 214 warning = attr.ib(default=None)
215 215 error = attr.ib(default=None)
216 216 node = attr.ib(default=None)
217 217
218 218 # index v0:
219 219 # 4 bytes: offset
220 220 # 4 bytes: compressed length
221 221 # 4 bytes: base rev
222 222 # 4 bytes: link rev
223 223 # 20 bytes: parent 1 nodeid
224 224 # 20 bytes: parent 2 nodeid
225 225 # 20 bytes: nodeid
226 226 indexformatv0 = struct.Struct(">4l20s20s20s")
227 227 indexformatv0_pack = indexformatv0.pack
228 228 indexformatv0_unpack = indexformatv0.unpack
229 229
230 230 class revlogoldindex(list):
231 231 def __getitem__(self, i):
232 232 if i == -1:
233 233 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 234 return list.__getitem__(self, i)
235 235
236 236 class revlogoldio(object):
237 237 def __init__(self):
238 238 self.size = indexformatv0.size
239 239
240 240 def parseindex(self, data, inline):
241 241 s = self.size
242 242 index = []
243 243 nodemap = {nullid: nullrev}
244 244 n = off = 0
245 245 l = len(data)
246 246 while off + s <= l:
247 247 cur = data[off:off + s]
248 248 off += s
249 249 e = indexformatv0_unpack(cur)
250 250 # transform to revlogv1 format
251 251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 253 index.append(e2)
254 254 nodemap[e[6]] = n
255 255 n += 1
256 256
257 257 return revlogoldindex(index), nodemap, None
258 258
259 259 def packentry(self, entry, node, version, rev):
260 260 if gettype(entry[0]):
261 261 raise error.RevlogError(_('index entry flags need revlog '
262 262 'version 1'))
263 263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 264 node(entry[5]), node(entry[6]), entry[7])
265 265 return indexformatv0_pack(*e2)
266 266
267 267 # index ng:
268 268 # 6 bytes: offset
269 269 # 2 bytes: flags
270 270 # 4 bytes: compressed length
271 271 # 4 bytes: uncompressed length
272 272 # 4 bytes: base rev
273 273 # 4 bytes: link rev
274 274 # 4 bytes: parent 1 rev
275 275 # 4 bytes: parent 2 rev
276 276 # 32 bytes: nodeid
277 277 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 278 indexformatng_pack = indexformatng.pack
279 279 versionformat = struct.Struct(">I")
280 280 versionformat_pack = versionformat.pack
281 281 versionformat_unpack = versionformat.unpack
282 282
283 283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 284 # signed integer)
285 285 _maxentrysize = 0x7fffffff
286 286
287 287 class revlogio(object):
288 288 def __init__(self):
289 289 self.size = indexformatng.size
290 290
291 291 def parseindex(self, data, inline):
292 292 # call the C implementation to parse the index data
293 293 index, cache = parsers.parse_index2(data, inline)
294 294 return index, getattr(index, 'nodemap', None), cache
295 295
296 296 def packentry(self, entry, node, version, rev):
297 297 p = indexformatng_pack(*entry)
298 298 if rev == 0:
299 299 p = versionformat_pack(version) + p[4:]
300 300 return p
301 301
302 302 class revlog(object):
303 303 """
304 304 the underlying revision storage object
305 305
306 306 A revlog consists of two parts, an index and the revision data.
307 307
308 308 The index is a file with a fixed record size containing
309 309 information on each revision, including its nodeid (hash), the
310 310 nodeids of its parents, the position and offset of its data within
311 311 the data file, and the revision it's based on. Finally, each entry
312 312 contains a linkrev entry that can serve as a pointer to external
313 313 data.
314 314
315 315 The revision data itself is a linear collection of data chunks.
316 316 Each chunk represents a revision and is usually represented as a
317 317 delta against the previous chunk. To bound lookup time, runs of
318 318 deltas are limited to about 2 times the length of the original
319 319 version data. This makes retrieval of a version proportional to
320 320 its size, or O(1) relative to the number of revisions.
321 321
322 322 Both pieces of the revlog are written to in an append-only
323 323 fashion, which means we never need to rewrite a file to insert or
324 324 remove data, and can use some simple techniques to avoid the need
325 325 for locking while reading.
326 326
327 327 If checkambig, indexfile is opened with checkambig=True at
328 328 writing, to avoid file stat ambiguity.
329 329
330 330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 331 index will be mmapped rather than read if it is larger than the
332 332 configured threshold.
333 333
334 334 If censorable is True, the revlog can have censored revisions.
335 335 """
336 336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 337 mmaplargeindex=False, censorable=False):
338 338 """
339 339 create a revlog object
340 340
341 341 opener is a function that abstracts the file opening operation
342 342 and can be used to implement COW semantics or the like.
343 343 """
344 344 self.indexfile = indexfile
345 345 self.datafile = datafile or (indexfile[:-2] + ".d")
346 346 self.opener = opener
347 347 # When True, indexfile is opened with checkambig=True at writing, to
348 348 # avoid file stat ambiguity.
349 349 self._checkambig = checkambig
350 350 self._censorable = censorable
351 351 # 3-tuple of (node, rev, text) for a raw revision.
352 352 self._revisioncache = None
353 353 # Maps rev to chain base rev.
354 354 self._chainbasecache = util.lrucachedict(100)
355 355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 356 self._chunkcache = (0, '')
357 357 # How much data to read and cache into the raw revlog data cache.
358 358 self._chunkcachesize = 65536
359 359 self._maxchainlen = None
360 360 self._deltabothparents = True
361 361 self.index = []
362 362 # Mapping of partial identifiers to full nodes.
363 363 self._pcache = {}
364 364 # Mapping of revision integer to full node.
365 365 self._nodecache = {nullid: nullrev}
366 366 self._nodepos = None
367 367 self._compengine = 'zlib'
368 368 self._maxdeltachainspan = -1
369 369 self._withsparseread = False
370 370 self._sparserevlog = False
371 371 self._srdensitythreshold = 0.50
372 372 self._srmingapsize = 262144
373 373
374 374 # Make copy of flag processors so each revlog instance can support
375 375 # custom flags.
376 376 self._flagprocessors = dict(_flagprocessors)
377 377
378 378 mmapindexthreshold = None
379 379 v = REVLOG_DEFAULT_VERSION
380 380 opts = getattr(opener, 'options', None)
381 381 if opts is not None:
382 382 if 'revlogv2' in opts:
383 383 # version 2 revlogs always use generaldelta.
384 384 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
385 385 elif 'revlogv1' in opts:
386 386 if 'generaldelta' in opts:
387 387 v |= FLAG_GENERALDELTA
388 388 else:
389 389 v = 0
390 390 if 'chunkcachesize' in opts:
391 391 self._chunkcachesize = opts['chunkcachesize']
392 392 if 'maxchainlen' in opts:
393 393 self._maxchainlen = opts['maxchainlen']
394 394 if 'deltabothparents' in opts:
395 395 self._deltabothparents = opts['deltabothparents']
396 396 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
397 397 if 'compengine' in opts:
398 398 self._compengine = opts['compengine']
399 399 if 'maxdeltachainspan' in opts:
400 400 self._maxdeltachainspan = opts['maxdeltachainspan']
401 401 if mmaplargeindex and 'mmapindexthreshold' in opts:
402 402 mmapindexthreshold = opts['mmapindexthreshold']
403 403 self._sparserevlog = bool(opts.get('sparse-revlog', False))
404 404 withsparseread = bool(opts.get('with-sparse-read', False))
405 405 # sparse-revlog forces sparse-read
406 406 self._withsparseread = self._sparserevlog or withsparseread
407 407 if 'sparse-read-density-threshold' in opts:
408 408 self._srdensitythreshold = opts['sparse-read-density-threshold']
409 409 if 'sparse-read-min-gap-size' in opts:
410 410 self._srmingapsize = opts['sparse-read-min-gap-size']
411 411 if opts.get('enableellipsis'):
412 412 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
413 413
414 414 # revlog v0 doesn't have flag processors
415 415 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
416 416 _insertflagprocessor(flag, processor, self._flagprocessors)
417 417
418 418 if self._chunkcachesize <= 0:
419 419 raise error.RevlogError(_('revlog chunk cache size %r is not '
420 420 'greater than 0') % self._chunkcachesize)
421 421 elif self._chunkcachesize & (self._chunkcachesize - 1):
422 422 raise error.RevlogError(_('revlog chunk cache size %r is not a '
423 423 'power of 2') % self._chunkcachesize)
424 424
425 425 self._loadindex(v, mmapindexthreshold)
426 426
427 427 def _loadindex(self, v, mmapindexthreshold):
428 428 indexdata = ''
429 429 self._initempty = True
430 430 try:
431 431 with self._indexfp() as f:
432 432 if (mmapindexthreshold is not None and
433 433 self.opener.fstat(f).st_size >= mmapindexthreshold):
434 434 indexdata = util.buffer(util.mmapread(f))
435 435 else:
436 436 indexdata = f.read()
437 437 if len(indexdata) > 0:
438 438 v = versionformat_unpack(indexdata[:4])[0]
439 439 self._initempty = False
440 440 except IOError as inst:
441 441 if inst.errno != errno.ENOENT:
442 442 raise
443 443
444 444 self.version = v
445 445 self._inline = v & FLAG_INLINE_DATA
446 446 self._generaldelta = v & FLAG_GENERALDELTA
447 447 flags = v & ~0xFFFF
448 448 fmt = v & 0xFFFF
449 449 if fmt == REVLOGV0:
450 450 if flags:
451 451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 452 'revlog %s') %
453 453 (flags >> 16, fmt, self.indexfile))
454 454 elif fmt == REVLOGV1:
455 455 if flags & ~REVLOGV1_FLAGS:
456 456 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
457 457 'revlog %s') %
458 458 (flags >> 16, fmt, self.indexfile))
459 459 elif fmt == REVLOGV2:
460 460 if flags & ~REVLOGV2_FLAGS:
461 461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
462 462 'revlog %s') %
463 463 (flags >> 16, fmt, self.indexfile))
464 464 else:
465 465 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
466 466 (fmt, self.indexfile))
467 467
468 468 self._storedeltachains = True
469 469
470 470 self._io = revlogio()
471 471 if self.version == REVLOGV0:
472 472 self._io = revlogoldio()
473 473 try:
474 474 d = self._io.parseindex(indexdata, self._inline)
475 475 except (ValueError, IndexError):
476 476 raise error.RevlogError(_("index %s is corrupted") %
477 477 self.indexfile)
478 478 self.index, nodemap, self._chunkcache = d
479 479 if nodemap is not None:
480 480 self.nodemap = self._nodecache = nodemap
481 481 if not self._chunkcache:
482 482 self._chunkclear()
483 483 # revnum -> (chain-length, sum-delta-length)
484 484 self._chaininfocache = {}
485 485 # revlog header -> revlog compressor
486 486 self._decompressors = {}
487 487
488 488 @util.propertycache
489 489 def _compressor(self):
490 490 return util.compengines[self._compengine].revlogcompressor()
491 491
492 492 def _indexfp(self, mode='r'):
493 493 """file object for the revlog's index file"""
494 494 args = {r'mode': mode}
495 495 if mode != 'r':
496 496 args[r'checkambig'] = self._checkambig
497 497 if mode == 'w':
498 498 args[r'atomictemp'] = True
499 499 return self.opener(self.indexfile, **args)
500 500
501 501 def _datafp(self, mode='r'):
502 502 """file object for the revlog's data file"""
503 503 return self.opener(self.datafile, mode=mode)
504 504
505 505 @contextlib.contextmanager
506 506 def _datareadfp(self, existingfp=None):
507 507 """file object suitable to read data"""
508 508 if existingfp is not None:
509 509 yield existingfp
510 510 else:
511 511 if self._inline:
512 512 func = self._indexfp
513 513 else:
514 514 func = self._datafp
515 515 with func() as fp:
516 516 yield fp
517 517
518 518 def tip(self):
519 519 return self.node(len(self.index) - 1)
520 520 def __contains__(self, rev):
521 521 return 0 <= rev < len(self)
522 522 def __len__(self):
523 523 return len(self.index)
524 524 def __iter__(self):
525 525 return iter(pycompat.xrange(len(self)))
526 526 def revs(self, start=0, stop=None):
527 527 """iterate over all rev in this revlog (from start to stop)"""
528 528 return storageutil.iterrevs(len(self), start=start, stop=stop)
529 529
530 530 @util.propertycache
531 531 def nodemap(self):
532 532 if self.index:
533 533 # populate mapping down to the initial node
534 534 node0 = self.index[0][7] # get around changelog filtering
535 535 self.rev(node0)
536 536 return self._nodecache
537 537
538 538 def hasnode(self, node):
539 539 try:
540 540 self.rev(node)
541 541 return True
542 542 except KeyError:
543 543 return False
544 544
545 545 def candelta(self, baserev, rev):
546 546 """whether two revisions (baserev, rev) can be delta-ed or not"""
547 547 # Disable delta if either rev requires a content-changing flag
548 548 # processor (ex. LFS). This is because such flag processor can alter
549 549 # the rawtext content that the delta will be based on, and two clients
550 550 # could have a same revlog node with different flags (i.e. different
551 551 # rawtext contents) and the delta could be incompatible.
552 552 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
553 553 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
554 554 return False
555 555 return True
556 556
557 557 def clearcaches(self):
558 558 self._revisioncache = None
559 559 self._chainbasecache.clear()
560 560 self._chunkcache = (0, '')
561 561 self._pcache = {}
562 562
563 563 try:
564 564 self._nodecache.clearcaches()
565 565 except AttributeError:
566 566 self._nodecache = {nullid: nullrev}
567 567 self._nodepos = None
568 568
569 569 def rev(self, node):
570 570 try:
571 571 return self._nodecache[node]
572 572 except TypeError:
573 573 raise
574 574 except error.RevlogError:
575 575 # parsers.c radix tree lookup failed
576 576 if node == wdirid or node in wdirfilenodeids:
577 577 raise error.WdirUnsupported
578 578 raise error.LookupError(node, self.indexfile, _('no node'))
579 579 except KeyError:
580 580 # pure python cache lookup failed
581 581 n = self._nodecache
582 582 i = self.index
583 583 p = self._nodepos
584 584 if p is None:
585 585 p = len(i) - 1
586 586 else:
587 587 assert p < len(i)
588 588 for r in pycompat.xrange(p, -1, -1):
589 589 v = i[r][7]
590 590 n[v] = r
591 591 if v == node:
592 592 self._nodepos = r - 1
593 593 return r
594 594 if node == wdirid or node in wdirfilenodeids:
595 595 raise error.WdirUnsupported
596 596 raise error.LookupError(node, self.indexfile, _('no node'))
597 597
598 598 # Accessors for index entries.
599 599
600 600 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
601 601 # are flags.
602 602 def start(self, rev):
603 603 return int(self.index[rev][0] >> 16)
604 604
605 605 def flags(self, rev):
606 606 return self.index[rev][0] & 0xFFFF
607 607
608 608 def length(self, rev):
609 609 return self.index[rev][1]
610 610
611 611 def rawsize(self, rev):
612 612 """return the length of the uncompressed text for a given revision"""
613 613 l = self.index[rev][2]
614 614 if l >= 0:
615 615 return l
616 616
617 617 t = self.revision(rev, raw=True)
618 618 return len(t)
619 619
620 620 def size(self, rev):
621 621 """length of non-raw text (processed by a "read" flag processor)"""
622 622 # fast path: if no "read" flag processor could change the content,
623 623 # size is rawsize. note: ELLIPSIS is known to not change the content.
624 624 flags = self.flags(rev)
625 625 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
626 626 return self.rawsize(rev)
627 627
628 628 return len(self.revision(rev, raw=False))
629 629
630 630 def chainbase(self, rev):
631 631 base = self._chainbasecache.get(rev)
632 632 if base is not None:
633 633 return base
634 634
635 635 index = self.index
636 636 iterrev = rev
637 637 base = index[iterrev][3]
638 638 while base != iterrev:
639 639 iterrev = base
640 640 base = index[iterrev][3]
641 641
642 642 self._chainbasecache[rev] = base
643 643 return base
644 644
645 645 def linkrev(self, rev):
646 646 return self.index[rev][4]
647 647
648 648 def parentrevs(self, rev):
649 649 try:
650 650 entry = self.index[rev]
651 651 except IndexError:
652 652 if rev == wdirrev:
653 653 raise error.WdirUnsupported
654 654 raise
655 655
656 656 return entry[5], entry[6]
657 657
658 658 # fast parentrevs(rev) where rev isn't filtered
659 659 _uncheckedparentrevs = parentrevs
660 660
661 661 def node(self, rev):
662 662 try:
663 663 return self.index[rev][7]
664 664 except IndexError:
665 665 if rev == wdirrev:
666 666 raise error.WdirUnsupported
667 667 raise
668 668
669 669 # Derived from index values.
670 670
671 671 def end(self, rev):
672 672 return self.start(rev) + self.length(rev)
673 673
674 674 def parents(self, node):
675 675 i = self.index
676 676 d = i[self.rev(node)]
677 677 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
678 678
679 679 def chainlen(self, rev):
680 680 return self._chaininfo(rev)[0]
681 681
682 682 def _chaininfo(self, rev):
683 683 chaininfocache = self._chaininfocache
684 684 if rev in chaininfocache:
685 685 return chaininfocache[rev]
686 686 index = self.index
687 687 generaldelta = self._generaldelta
688 688 iterrev = rev
689 689 e = index[iterrev]
690 690 clen = 0
691 691 compresseddeltalen = 0
692 692 while iterrev != e[3]:
693 693 clen += 1
694 694 compresseddeltalen += e[1]
695 695 if generaldelta:
696 696 iterrev = e[3]
697 697 else:
698 698 iterrev -= 1
699 699 if iterrev in chaininfocache:
700 700 t = chaininfocache[iterrev]
701 701 clen += t[0]
702 702 compresseddeltalen += t[1]
703 703 break
704 704 e = index[iterrev]
705 705 else:
706 706 # Add text length of base since decompressing that also takes
707 707 # work. For cache hits the length is already included.
708 708 compresseddeltalen += e[1]
709 709 r = (clen, compresseddeltalen)
710 710 chaininfocache[rev] = r
711 711 return r
712 712
713 713 def _deltachain(self, rev, stoprev=None):
714 714 """Obtain the delta chain for a revision.
715 715
716 716 ``stoprev`` specifies a revision to stop at. If not specified, we
717 717 stop at the base of the chain.
718 718
719 719 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
720 720 revs in ascending order and ``stopped`` is a bool indicating whether
721 721 ``stoprev`` was hit.
722 722 """
723 723 # Try C implementation.
724 724 try:
725 725 return self.index.deltachain(rev, stoprev, self._generaldelta)
726 726 except AttributeError:
727 727 pass
728 728
729 729 chain = []
730 730
731 731 # Alias to prevent attribute lookup in tight loop.
732 732 index = self.index
733 733 generaldelta = self._generaldelta
734 734
735 735 iterrev = rev
736 736 e = index[iterrev]
737 737 while iterrev != e[3] and iterrev != stoprev:
738 738 chain.append(iterrev)
739 739 if generaldelta:
740 740 iterrev = e[3]
741 741 else:
742 742 iterrev -= 1
743 743 e = index[iterrev]
744 744
745 745 if iterrev == stoprev:
746 746 stopped = True
747 747 else:
748 748 chain.append(iterrev)
749 749 stopped = False
750 750
751 751 chain.reverse()
752 752 return chain, stopped
753 753
754 754 def ancestors(self, revs, stoprev=0, inclusive=False):
755 755 """Generate the ancestors of 'revs' in reverse topological order.
756 756 Does not generate revs lower than stoprev.
757 757
758 758 See the documentation for ancestor.lazyancestors for more details."""
759 759
760 760 # first, make sure start revisions aren't filtered
761 761 revs = list(revs)
762 762 checkrev = self.node
763 763 for r in revs:
764 764 checkrev(r)
765 765 # and we're sure ancestors aren't filtered as well
766 766 if util.safehasattr(parsers, 'rustlazyancestors'):
767 767 return ancestor.rustlazyancestors(
768 768 self.index, revs,
769 769 stoprev=stoprev, inclusive=inclusive)
770 770 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
771 771 stoprev=stoprev, inclusive=inclusive)
772 772
773 773 def descendants(self, revs):
774 774 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
775 775
776 776 def findcommonmissing(self, common=None, heads=None):
777 777 """Return a tuple of the ancestors of common and the ancestors of heads
778 778 that are not ancestors of common. In revset terminology, we return the
779 779 tuple:
780 780
781 781 ::common, (::heads) - (::common)
782 782
783 783 The list is sorted by revision number, meaning it is
784 784 topologically sorted.
785 785
786 786 'heads' and 'common' are both lists of node IDs. If heads is
787 787 not supplied, uses all of the revlog's heads. If common is not
788 788 supplied, uses nullid."""
789 789 if common is None:
790 790 common = [nullid]
791 791 if heads is None:
792 792 heads = self.heads()
793 793
794 794 common = [self.rev(n) for n in common]
795 795 heads = [self.rev(n) for n in heads]
796 796
797 797 # we want the ancestors, but inclusive
798 798 class lazyset(object):
799 799 def __init__(self, lazyvalues):
800 800 self.addedvalues = set()
801 801 self.lazyvalues = lazyvalues
802 802
803 803 def __contains__(self, value):
804 804 return value in self.addedvalues or value in self.lazyvalues
805 805
806 806 def __iter__(self):
807 807 added = self.addedvalues
808 808 for r in added:
809 809 yield r
810 810 for r in self.lazyvalues:
811 811 if not r in added:
812 812 yield r
813 813
814 814 def add(self, value):
815 815 self.addedvalues.add(value)
816 816
817 817 def update(self, values):
818 818 self.addedvalues.update(values)
819 819
820 820 has = lazyset(self.ancestors(common))
821 821 has.add(nullrev)
822 822 has.update(common)
823 823
824 824 # take all ancestors from heads that aren't in has
825 825 missing = set()
826 826 visit = collections.deque(r for r in heads if r not in has)
827 827 while visit:
828 828 r = visit.popleft()
829 829 if r in missing:
830 830 continue
831 831 else:
832 832 missing.add(r)
833 833 for p in self.parentrevs(r):
834 834 if p not in has:
835 835 visit.append(p)
836 836 missing = list(missing)
837 837 missing.sort()
838 838 return has, [self.node(miss) for miss in missing]
839 839
840 840 def incrementalmissingrevs(self, common=None):
841 841 """Return an object that can be used to incrementally compute the
842 842 revision numbers of the ancestors of arbitrary sets that are not
843 843 ancestors of common. This is an ancestor.incrementalmissingancestors
844 844 object.
845 845
846 846 'common' is a list of revision numbers. If common is not supplied, uses
847 847 nullrev.
848 848 """
849 849 if common is None:
850 850 common = [nullrev]
851 851
852 852 return ancestor.incrementalmissingancestors(self.parentrevs, common)
853 853
854 854 def findmissingrevs(self, common=None, heads=None):
855 855 """Return the revision numbers of the ancestors of heads that
856 856 are not ancestors of common.
857 857
858 858 More specifically, return a list of revision numbers corresponding to
859 859 nodes N such that every N satisfies the following constraints:
860 860
861 861 1. N is an ancestor of some node in 'heads'
862 862 2. N is not an ancestor of any node in 'common'
863 863
864 864 The list is sorted by revision number, meaning it is
865 865 topologically sorted.
866 866
867 867 'heads' and 'common' are both lists of revision numbers. If heads is
868 868 not supplied, uses all of the revlog's heads. If common is not
869 869 supplied, uses nullid."""
870 870 if common is None:
871 871 common = [nullrev]
872 872 if heads is None:
873 873 heads = self.headrevs()
874 874
875 875 inc = self.incrementalmissingrevs(common=common)
876 876 return inc.missingancestors(heads)
877 877
878 878 def findmissing(self, common=None, heads=None):
879 879 """Return the ancestors of heads that are not ancestors of common.
880 880
881 881 More specifically, return a list of nodes N such that every N
882 882 satisfies the following constraints:
883 883
884 884 1. N is an ancestor of some node in 'heads'
885 885 2. N is not an ancestor of any node in 'common'
886 886
887 887 The list is sorted by revision number, meaning it is
888 888 topologically sorted.
889 889
890 890 'heads' and 'common' are both lists of node IDs. If heads is
891 891 not supplied, uses all of the revlog's heads. If common is not
892 892 supplied, uses nullid."""
893 893 if common is None:
894 894 common = [nullid]
895 895 if heads is None:
896 896 heads = self.heads()
897 897
898 898 common = [self.rev(n) for n in common]
899 899 heads = [self.rev(n) for n in heads]
900 900
901 901 inc = self.incrementalmissingrevs(common=common)
902 902 return [self.node(r) for r in inc.missingancestors(heads)]
903 903
904 904 def nodesbetween(self, roots=None, heads=None):
905 905 """Return a topological path from 'roots' to 'heads'.
906 906
907 907 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
908 908 topologically sorted list of all nodes N that satisfy both of
909 909 these constraints:
910 910
911 911 1. N is a descendant of some node in 'roots'
912 912 2. N is an ancestor of some node in 'heads'
913 913
914 914 Every node is considered to be both a descendant and an ancestor
915 915 of itself, so every reachable node in 'roots' and 'heads' will be
916 916 included in 'nodes'.
917 917
918 918 'outroots' is the list of reachable nodes in 'roots', i.e., the
919 919 subset of 'roots' that is returned in 'nodes'. Likewise,
920 920 'outheads' is the subset of 'heads' that is also in 'nodes'.
921 921
922 922 'roots' and 'heads' are both lists of node IDs. If 'roots' is
923 923 unspecified, uses nullid as the only root. If 'heads' is
924 924 unspecified, uses list of all of the revlog's heads."""
925 925 nonodes = ([], [], [])
926 926 if roots is not None:
927 927 roots = list(roots)
928 928 if not roots:
929 929 return nonodes
930 930 lowestrev = min([self.rev(n) for n in roots])
931 931 else:
932 932 roots = [nullid] # Everybody's a descendant of nullid
933 933 lowestrev = nullrev
934 934 if (lowestrev == nullrev) and (heads is None):
935 935 # We want _all_ the nodes!
936 936 return ([self.node(r) for r in self], [nullid], list(self.heads()))
937 937 if heads is None:
938 938 # All nodes are ancestors, so the latest ancestor is the last
939 939 # node.
940 940 highestrev = len(self) - 1
941 941 # Set ancestors to None to signal that every node is an ancestor.
942 942 ancestors = None
943 943 # Set heads to an empty dictionary for later discovery of heads
944 944 heads = {}
945 945 else:
946 946 heads = list(heads)
947 947 if not heads:
948 948 return nonodes
949 949 ancestors = set()
950 950 # Turn heads into a dictionary so we can remove 'fake' heads.
951 951 # Also, later we will be using it to filter out the heads we can't
952 952 # find from roots.
953 953 heads = dict.fromkeys(heads, False)
954 954 # Start at the top and keep marking parents until we're done.
955 955 nodestotag = set(heads)
956 956 # Remember where the top was so we can use it as a limit later.
957 957 highestrev = max([self.rev(n) for n in nodestotag])
958 958 while nodestotag:
959 959 # grab a node to tag
960 960 n = nodestotag.pop()
961 961 # Never tag nullid
962 962 if n == nullid:
963 963 continue
964 964 # A node's revision number represents its place in a
965 965 # topologically sorted list of nodes.
966 966 r = self.rev(n)
967 967 if r >= lowestrev:
968 968 if n not in ancestors:
969 969 # If we are possibly a descendant of one of the roots
970 970 # and we haven't already been marked as an ancestor
971 971 ancestors.add(n) # Mark as ancestor
972 972 # Add non-nullid parents to list of nodes to tag.
973 973 nodestotag.update([p for p in self.parents(n) if
974 974 p != nullid])
975 975 elif n in heads: # We've seen it before, is it a fake head?
976 976 # So it is, real heads should not be the ancestors of
977 977 # any other heads.
978 978 heads.pop(n)
979 979 if not ancestors:
980 980 return nonodes
981 981 # Now that we have our set of ancestors, we want to remove any
982 982 # roots that are not ancestors.
983 983
984 984 # If one of the roots was nullid, everything is included anyway.
985 985 if lowestrev > nullrev:
986 986 # But, since we weren't, let's recompute the lowest rev to not
987 987 # include roots that aren't ancestors.
988 988
989 989 # Filter out roots that aren't ancestors of heads
990 990 roots = [root for root in roots if root in ancestors]
991 991 # Recompute the lowest revision
992 992 if roots:
993 993 lowestrev = min([self.rev(root) for root in roots])
994 994 else:
995 995 # No more roots? Return empty list
996 996 return nonodes
997 997 else:
998 998 # We are descending from nullid, and don't need to care about
999 999 # any other roots.
1000 1000 lowestrev = nullrev
1001 1001 roots = [nullid]
1002 1002 # Transform our roots list into a set.
1003 1003 descendants = set(roots)
1004 1004 # Also, keep the original roots so we can filter out roots that aren't
1005 1005 # 'real' roots (i.e. are descended from other roots).
1006 1006 roots = descendants.copy()
1007 1007 # Our topologically sorted list of output nodes.
1008 1008 orderedout = []
1009 1009 # Don't start at nullid since we don't want nullid in our output list,
1010 1010 # and if nullid shows up in descendants, empty parents will look like
1011 1011 # they're descendants.
1012 1012 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1013 1013 n = self.node(r)
1014 1014 isdescendant = False
1015 1015 if lowestrev == nullrev: # Everybody is a descendant of nullid
1016 1016 isdescendant = True
1017 1017 elif n in descendants:
1018 1018 # n is already a descendant
1019 1019 isdescendant = True
1020 1020 # This check only needs to be done here because all the roots
1021 1021 # will start being marked is descendants before the loop.
1022 1022 if n in roots:
1023 1023 # If n was a root, check if it's a 'real' root.
1024 1024 p = tuple(self.parents(n))
1025 1025 # If any of its parents are descendants, it's not a root.
1026 1026 if (p[0] in descendants) or (p[1] in descendants):
1027 1027 roots.remove(n)
1028 1028 else:
1029 1029 p = tuple(self.parents(n))
1030 1030 # A node is a descendant if either of its parents are
1031 1031 # descendants. (We seeded the dependents list with the roots
1032 1032 # up there, remember?)
1033 1033 if (p[0] in descendants) or (p[1] in descendants):
1034 1034 descendants.add(n)
1035 1035 isdescendant = True
1036 1036 if isdescendant and ((ancestors is None) or (n in ancestors)):
1037 1037 # Only include nodes that are both descendants and ancestors.
1038 1038 orderedout.append(n)
1039 1039 if (ancestors is not None) and (n in heads):
1040 1040 # We're trying to figure out which heads are reachable
1041 1041 # from roots.
1042 1042 # Mark this head as having been reached
1043 1043 heads[n] = True
1044 1044 elif ancestors is None:
1045 1045 # Otherwise, we're trying to discover the heads.
1046 1046 # Assume this is a head because if it isn't, the next step
1047 1047 # will eventually remove it.
1048 1048 heads[n] = True
1049 1049 # But, obviously its parents aren't.
1050 1050 for p in self.parents(n):
1051 1051 heads.pop(p, None)
1052 1052 heads = [head for head, flag in heads.iteritems() if flag]
1053 1053 roots = list(roots)
1054 1054 assert orderedout
1055 1055 assert roots
1056 1056 assert heads
1057 1057 return (orderedout, roots, heads)
1058 1058
1059 1059 def headrevs(self):
1060 1060 try:
1061 1061 return self.index.headrevs()
1062 1062 except AttributeError:
1063 1063 return self._headrevs()
1064 1064
1065 1065 def computephases(self, roots):
1066 1066 return self.index.computephasesmapsets(roots)
1067 1067
1068 1068 def _headrevs(self):
1069 1069 count = len(self)
1070 1070 if not count:
1071 1071 return [nullrev]
1072 1072 # we won't iter over filtered rev so nobody is a head at start
1073 1073 ishead = [0] * (count + 1)
1074 1074 index = self.index
1075 1075 for r in self:
1076 1076 ishead[r] = 1 # I may be an head
1077 1077 e = index[r]
1078 1078 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1079 1079 return [r for r, val in enumerate(ishead) if val]
1080 1080
1081 1081 def heads(self, start=None, stop=None):
1082 1082 """return the list of all nodes that have no children
1083 1083
1084 1084 if start is specified, only heads that are descendants of
1085 1085 start will be returned
1086 1086 if stop is specified, it will consider all the revs from stop
1087 1087 as if they had no children
1088 1088 """
1089 1089 if start is None and stop is None:
1090 1090 if not len(self):
1091 1091 return [nullid]
1092 1092 return [self.node(r) for r in self.headrevs()]
1093 1093
1094 1094 if start is None:
1095 1095 start = nullrev
1096 1096 else:
1097 1097 start = self.rev(start)
1098 1098
1099 1099 stoprevs = set(self.rev(n) for n in stop or [])
1100 1100
1101 1101 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1102 1102 stoprevs=stoprevs)
1103 1103
1104 1104 return [self.node(rev) for rev in revs]
1105 1105
1106 1106 def children(self, node):
1107 1107 """find the children of a given node"""
1108 1108 c = []
1109 1109 p = self.rev(node)
1110 1110 for r in self.revs(start=p + 1):
1111 1111 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1112 1112 if prevs:
1113 1113 for pr in prevs:
1114 1114 if pr == p:
1115 1115 c.append(self.node(r))
1116 1116 elif p == nullrev:
1117 1117 c.append(self.node(r))
1118 1118 return c
1119 1119
1120 1120 def commonancestorsheads(self, a, b):
1121 1121 """calculate all the heads of the common ancestors of nodes a and b"""
1122 1122 a, b = self.rev(a), self.rev(b)
1123 1123 ancs = self._commonancestorsheads(a, b)
1124 1124 return pycompat.maplist(self.node, ancs)
1125 1125
1126 1126 def _commonancestorsheads(self, *revs):
1127 1127 """calculate all the heads of the common ancestors of revs"""
1128 1128 try:
1129 1129 ancs = self.index.commonancestorsheads(*revs)
1130 1130 except (AttributeError, OverflowError): # C implementation failed
1131 1131 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1132 1132 return ancs
1133 1133
1134 1134 def isancestor(self, a, b):
1135 1135 """return True if node a is an ancestor of node b
1136 1136
1137 1137 A revision is considered an ancestor of itself."""
1138 1138 a, b = self.rev(a), self.rev(b)
1139 1139 return self.isancestorrev(a, b)
1140 1140
1141 1141 def isancestorrev(self, a, b):
1142 1142 """return True if revision a is an ancestor of revision b
1143 1143
1144 1144 A revision is considered an ancestor of itself.
1145 1145
1146 1146 The implementation of this is trivial but the use of
1147 1147 commonancestorsheads is not."""
1148 1148 if a == nullrev:
1149 1149 return True
1150 1150 elif a == b:
1151 1151 return True
1152 1152 elif a > b:
1153 1153 return False
1154 1154 return a in self._commonancestorsheads(a, b)
1155 1155
1156 1156 def ancestor(self, a, b):
1157 1157 """calculate the "best" common ancestor of nodes a and b"""
1158 1158
1159 1159 a, b = self.rev(a), self.rev(b)
1160 1160 try:
1161 1161 ancs = self.index.ancestors(a, b)
1162 1162 except (AttributeError, OverflowError):
1163 1163 ancs = ancestor.ancestors(self.parentrevs, a, b)
1164 1164 if ancs:
1165 1165 # choose a consistent winner when there's a tie
1166 1166 return min(map(self.node, ancs))
1167 1167 return nullid
1168 1168
1169 1169 def _match(self, id):
1170 1170 if isinstance(id, int):
1171 1171 # rev
1172 1172 return self.node(id)
1173 1173 if len(id) == 20:
1174 1174 # possibly a binary node
1175 1175 # odds of a binary node being all hex in ASCII are 1 in 10**25
1176 1176 try:
1177 1177 node = id
1178 1178 self.rev(node) # quick search the index
1179 1179 return node
1180 1180 except error.LookupError:
1181 1181 pass # may be partial hex id
1182 1182 try:
1183 1183 # str(rev)
1184 1184 rev = int(id)
1185 1185 if "%d" % rev != id:
1186 1186 raise ValueError
1187 1187 if rev < 0:
1188 1188 rev = len(self) + rev
1189 1189 if rev < 0 or rev >= len(self):
1190 1190 raise ValueError
1191 1191 return self.node(rev)
1192 1192 except (ValueError, OverflowError):
1193 1193 pass
1194 1194 if len(id) == 40:
1195 1195 try:
1196 1196 # a full hex nodeid?
1197 1197 node = bin(id)
1198 1198 self.rev(node)
1199 1199 return node
1200 1200 except (TypeError, error.LookupError):
1201 1201 pass
1202 1202
1203 1203 def _partialmatch(self, id):
1204 1204 # we don't care wdirfilenodeids as they should be always full hash
1205 1205 maybewdir = wdirhex.startswith(id)
1206 1206 try:
1207 1207 partial = self.index.partialmatch(id)
1208 1208 if partial and self.hasnode(partial):
1209 1209 if maybewdir:
1210 1210 # single 'ff...' match in radix tree, ambiguous with wdir
1211 1211 raise error.RevlogError
1212 1212 return partial
1213 1213 if maybewdir:
1214 1214 # no 'ff...' match in radix tree, wdir identified
1215 1215 raise error.WdirUnsupported
1216 1216 return None
1217 1217 except error.RevlogError:
1218 1218 # parsers.c radix tree lookup gave multiple matches
1219 1219 # fast path: for unfiltered changelog, radix tree is accurate
1220 1220 if not getattr(self, 'filteredrevs', None):
1221 1221 raise error.AmbiguousPrefixLookupError(
1222 1222 id, self.indexfile, _('ambiguous identifier'))
1223 1223 # fall through to slow path that filters hidden revisions
1224 1224 except (AttributeError, ValueError):
1225 1225 # we are pure python, or key was too short to search radix tree
1226 1226 pass
1227 1227
1228 1228 if id in self._pcache:
1229 1229 return self._pcache[id]
1230 1230
1231 1231 if len(id) <= 40:
1232 1232 try:
1233 1233 # hex(node)[:...]
1234 1234 l = len(id) // 2 # grab an even number of digits
1235 1235 prefix = bin(id[:l * 2])
1236 1236 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1237 1237 nl = [n for n in nl if hex(n).startswith(id) and
1238 1238 self.hasnode(n)]
1239 1239 if nullhex.startswith(id):
1240 1240 nl.append(nullid)
1241 1241 if len(nl) > 0:
1242 1242 if len(nl) == 1 and not maybewdir:
1243 1243 self._pcache[id] = nl[0]
1244 1244 return nl[0]
1245 1245 raise error.AmbiguousPrefixLookupError(
1246 1246 id, self.indexfile, _('ambiguous identifier'))
1247 1247 if maybewdir:
1248 1248 raise error.WdirUnsupported
1249 1249 return None
1250 1250 except TypeError:
1251 1251 pass
1252 1252
1253 1253 def lookup(self, id):
1254 1254 """locate a node based on:
1255 1255 - revision number or str(revision number)
1256 1256 - nodeid or subset of hex nodeid
1257 1257 """
1258 1258 n = self._match(id)
1259 1259 if n is not None:
1260 1260 return n
1261 1261 n = self._partialmatch(id)
1262 1262 if n:
1263 1263 return n
1264 1264
1265 1265 raise error.LookupError(id, self.indexfile, _('no match found'))
1266 1266
1267 1267 def shortest(self, node, minlength=1):
1268 1268 """Find the shortest unambiguous prefix that matches node."""
1269 1269 def isvalid(prefix):
1270 1270 try:
1271 1271 node = self._partialmatch(prefix)
1272 1272 except error.AmbiguousPrefixLookupError:
1273 1273 return False
1274 1274 except error.WdirUnsupported:
1275 1275 # single 'ff...' match
1276 1276 return True
1277 1277 if node is None:
1278 1278 raise error.LookupError(node, self.indexfile, _('no node'))
1279 1279 return True
1280 1280
1281 1281 def maybewdir(prefix):
1282 1282 return all(c == 'f' for c in prefix)
1283 1283
1284 1284 hexnode = hex(node)
1285 1285
1286 1286 def disambiguate(hexnode, minlength):
1287 1287 """Disambiguate against wdirid."""
1288 1288 for length in range(minlength, 41):
1289 1289 prefix = hexnode[:length]
1290 1290 if not maybewdir(prefix):
1291 1291 return prefix
1292 1292
1293 1293 if not getattr(self, 'filteredrevs', None):
1294 1294 try:
1295 1295 length = max(self.index.shortest(node), minlength)
1296 1296 return disambiguate(hexnode, length)
1297 1297 except error.RevlogError:
1298 1298 if node != wdirid:
1299 1299 raise error.LookupError(node, self.indexfile, _('no node'))
1300 1300 except AttributeError:
1301 1301 # Fall through to pure code
1302 1302 pass
1303 1303
1304 1304 if node == wdirid:
1305 1305 for length in range(minlength, 41):
1306 1306 prefix = hexnode[:length]
1307 1307 if isvalid(prefix):
1308 1308 return prefix
1309 1309
1310 1310 for length in range(minlength, 41):
1311 1311 prefix = hexnode[:length]
1312 1312 if isvalid(prefix):
1313 1313 return disambiguate(hexnode, length)
1314 1314
1315 1315 def cmp(self, node, text):
1316 1316 """compare text with a given file revision
1317 1317
1318 1318 returns True if text is different than what is stored.
1319 1319 """
1320 1320 p1, p2 = self.parents(node)
1321 1321 return storageutil.hashrevisionsha1(text, p1, p2) != node
1322 1322
1323 1323 def _cachesegment(self, offset, data):
1324 1324 """Add a segment to the revlog cache.
1325 1325
1326 1326 Accepts an absolute offset and the data that is at that location.
1327 1327 """
1328 1328 o, d = self._chunkcache
1329 1329 # try to add to existing cache
1330 1330 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1331 1331 self._chunkcache = o, d + data
1332 1332 else:
1333 1333 self._chunkcache = offset, data
1334 1334
1335 1335 def _readsegment(self, offset, length, df=None):
1336 1336 """Load a segment of raw data from the revlog.
1337 1337
1338 1338 Accepts an absolute offset, length to read, and an optional existing
1339 1339 file handle to read from.
1340 1340
1341 1341 If an existing file handle is passed, it will be seeked and the
1342 1342 original seek position will NOT be restored.
1343 1343
1344 1344 Returns a str or buffer of raw byte data.
1345
1346 Raises if the requested number of bytes could not be read.
1345 1347 """
1346 1348 # Cache data both forward and backward around the requested
1347 1349 # data, in a fixed size window. This helps speed up operations
1348 1350 # involving reading the revlog backwards.
1349 1351 cachesize = self._chunkcachesize
1350 1352 realoffset = offset & ~(cachesize - 1)
1351 1353 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1352 1354 - realoffset)
1353 1355 with self._datareadfp(df) as df:
1354 1356 df.seek(realoffset)
1355 1357 d = df.read(reallength)
1358
1356 1359 self._cachesegment(realoffset, d)
1357 1360 if offset != realoffset or reallength != length:
1358 return util.buffer(d, offset - realoffset, length)
1361 startoffset = offset - realoffset
1362 if len(d) - startoffset < length:
1363 raise error.RevlogError(
1364 _('partial read of revlog %s; expected %d bytes from '
1365 'offset %d, got %d') %
1366 (self.indexfile if self._inline else self.datafile,
1367 length, realoffset, len(d) - startoffset))
1368
1369 return util.buffer(d, startoffset, length)
1370
1371 if len(d) < length:
1372 raise error.RevlogError(
1373 _('partial read of revlog %s; expected %d bytes from offset '
1374 '%d, got %d') %
1375 (self.indexfile if self._inline else self.datafile,
1376 length, offset, len(d)))
1377
1359 1378 return d
1360 1379
1361 1380 def _getsegment(self, offset, length, df=None):
1362 1381 """Obtain a segment of raw data from the revlog.
1363 1382
1364 1383 Accepts an absolute offset, length of bytes to obtain, and an
1365 1384 optional file handle to the already-opened revlog. If the file
1366 1385 handle is used, it's original seek position will not be preserved.
1367 1386
1368 1387 Requests for data may be returned from a cache.
1369 1388
1370 1389 Returns a str or a buffer instance of raw byte data.
1371 1390 """
1372 1391 o, d = self._chunkcache
1373 1392 l = len(d)
1374 1393
1375 1394 # is it in the cache?
1376 1395 cachestart = offset - o
1377 1396 cacheend = cachestart + length
1378 1397 if cachestart >= 0 and cacheend <= l:
1379 1398 if cachestart == 0 and cacheend == l:
1380 1399 return d # avoid a copy
1381 1400 return util.buffer(d, cachestart, cacheend - cachestart)
1382 1401
1383 1402 return self._readsegment(offset, length, df=df)
1384 1403
1385 1404 def _getsegmentforrevs(self, startrev, endrev, df=None):
1386 1405 """Obtain a segment of raw data corresponding to a range of revisions.
1387 1406
1388 1407 Accepts the start and end revisions and an optional already-open
1389 1408 file handle to be used for reading. If the file handle is read, its
1390 1409 seek position will not be preserved.
1391 1410
1392 1411 Requests for data may be satisfied by a cache.
1393 1412
1394 1413 Returns a 2-tuple of (offset, data) for the requested range of
1395 1414 revisions. Offset is the integer offset from the beginning of the
1396 1415 revlog and data is a str or buffer of the raw byte data.
1397 1416
1398 1417 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1399 1418 to determine where each revision's data begins and ends.
1400 1419 """
1401 1420 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1402 1421 # (functions are expensive).
1403 1422 index = self.index
1404 1423 istart = index[startrev]
1405 1424 start = int(istart[0] >> 16)
1406 1425 if startrev == endrev:
1407 1426 end = start + istart[1]
1408 1427 else:
1409 1428 iend = index[endrev]
1410 1429 end = int(iend[0] >> 16) + iend[1]
1411 1430
1412 1431 if self._inline:
1413 1432 start += (startrev + 1) * self._io.size
1414 1433 end += (endrev + 1) * self._io.size
1415 1434 length = end - start
1416 1435
1417 1436 return start, self._getsegment(start, length, df=df)
1418 1437
1419 1438 def _chunk(self, rev, df=None):
1420 1439 """Obtain a single decompressed chunk for a revision.
1421 1440
1422 1441 Accepts an integer revision and an optional already-open file handle
1423 1442 to be used for reading. If used, the seek position of the file will not
1424 1443 be preserved.
1425 1444
1426 1445 Returns a str holding uncompressed data for the requested revision.
1427 1446 """
1428 1447 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1429 1448
1430 1449 def _chunks(self, revs, df=None, targetsize=None):
1431 1450 """Obtain decompressed chunks for the specified revisions.
1432 1451
1433 1452 Accepts an iterable of numeric revisions that are assumed to be in
1434 1453 ascending order. Also accepts an optional already-open file handle
1435 1454 to be used for reading. If used, the seek position of the file will
1436 1455 not be preserved.
1437 1456
1438 1457 This function is similar to calling ``self._chunk()`` multiple times,
1439 1458 but is faster.
1440 1459
1441 1460 Returns a list with decompressed data for each requested revision.
1442 1461 """
1443 1462 if not revs:
1444 1463 return []
1445 1464 start = self.start
1446 1465 length = self.length
1447 1466 inline = self._inline
1448 1467 iosize = self._io.size
1449 1468 buffer = util.buffer
1450 1469
1451 1470 l = []
1452 1471 ladd = l.append
1453 1472
1454 1473 if not self._withsparseread:
1455 1474 slicedchunks = (revs,)
1456 1475 else:
1457 1476 slicedchunks = deltautil.slicechunk(self, revs,
1458 1477 targetsize=targetsize)
1459 1478
1460 1479 for revschunk in slicedchunks:
1461 1480 firstrev = revschunk[0]
1462 1481 # Skip trailing revisions with empty diff
1463 1482 for lastrev in revschunk[::-1]:
1464 1483 if length(lastrev) != 0:
1465 1484 break
1466 1485
1467 1486 try:
1468 1487 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1469 1488 except OverflowError:
1470 1489 # issue4215 - we can't cache a run of chunks greater than
1471 1490 # 2G on Windows
1472 1491 return [self._chunk(rev, df=df) for rev in revschunk]
1473 1492
1474 1493 decomp = self.decompress
1475 1494 for rev in revschunk:
1476 1495 chunkstart = start(rev)
1477 1496 if inline:
1478 1497 chunkstart += (rev + 1) * iosize
1479 1498 chunklength = length(rev)
1480 1499 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1481 1500
1482 1501 return l
1483 1502
1484 1503 def _chunkclear(self):
1485 1504 """Clear the raw chunk cache."""
1486 1505 self._chunkcache = (0, '')
1487 1506
1488 1507 def deltaparent(self, rev):
1489 1508 """return deltaparent of the given revision"""
1490 1509 base = self.index[rev][3]
1491 1510 if base == rev:
1492 1511 return nullrev
1493 1512 elif self._generaldelta:
1494 1513 return base
1495 1514 else:
1496 1515 return rev - 1
1497 1516
1498 1517 def issnapshot(self, rev):
1499 1518 """tells whether rev is a snapshot
1500 1519 """
1501 1520 if rev == nullrev:
1502 1521 return True
1503 1522 deltap = self.deltaparent(rev)
1504 1523 if deltap == nullrev:
1505 1524 return True
1506 1525 p1, p2 = self.parentrevs(rev)
1507 1526 if deltap in (p1, p2):
1508 1527 return False
1509 1528 return self.issnapshot(deltap)
1510 1529
1511 1530 def snapshotdepth(self, rev):
1512 1531 """number of snapshot in the chain before this one"""
1513 1532 if not self.issnapshot(rev):
1514 1533 raise error.ProgrammingError('revision %d not a snapshot')
1515 1534 return len(self._deltachain(rev)[0]) - 1
1516 1535
1517 1536 def revdiff(self, rev1, rev2):
1518 1537 """return or calculate a delta between two revisions
1519 1538
1520 1539 The delta calculated is in binary form and is intended to be written to
1521 1540 revlog data directly. So this function needs raw revision data.
1522 1541 """
1523 1542 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1524 1543 return bytes(self._chunk(rev2))
1525 1544
1526 1545 return mdiff.textdiff(self.revision(rev1, raw=True),
1527 1546 self.revision(rev2, raw=True))
1528 1547
1529 1548 def revision(self, nodeorrev, _df=None, raw=False):
1530 1549 """return an uncompressed revision of a given node or revision
1531 1550 number.
1532 1551
1533 1552 _df - an existing file handle to read from. (internal-only)
1534 1553 raw - an optional argument specifying if the revision data is to be
1535 1554 treated as raw data when applying flag transforms. 'raw' should be set
1536 1555 to True when generating changegroups or in debug commands.
1537 1556 """
1538 1557 if isinstance(nodeorrev, int):
1539 1558 rev = nodeorrev
1540 1559 node = self.node(rev)
1541 1560 else:
1542 1561 node = nodeorrev
1543 1562 rev = None
1544 1563
1545 1564 cachedrev = None
1546 1565 flags = None
1547 1566 rawtext = None
1548 1567 if node == nullid:
1549 1568 return ""
1550 1569 if self._revisioncache:
1551 1570 if self._revisioncache[0] == node:
1552 1571 # _cache only stores rawtext
1553 1572 if raw:
1554 1573 return self._revisioncache[2]
1555 1574 # duplicated, but good for perf
1556 1575 if rev is None:
1557 1576 rev = self.rev(node)
1558 1577 if flags is None:
1559 1578 flags = self.flags(rev)
1560 1579 # no extra flags set, no flag processor runs, text = rawtext
1561 1580 if flags == REVIDX_DEFAULT_FLAGS:
1562 1581 return self._revisioncache[2]
1563 1582 # rawtext is reusable. need to run flag processor
1564 1583 rawtext = self._revisioncache[2]
1565 1584
1566 1585 cachedrev = self._revisioncache[1]
1567 1586
1568 1587 # look up what we need to read
1569 1588 if rawtext is None:
1570 1589 if rev is None:
1571 1590 rev = self.rev(node)
1572 1591
1573 1592 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1574 1593 if stopped:
1575 1594 rawtext = self._revisioncache[2]
1576 1595
1577 1596 # drop cache to save memory
1578 1597 self._revisioncache = None
1579 1598
1580 1599 targetsize = None
1581 1600 rawsize = self.index[rev][2]
1582 1601 if 0 <= rawsize:
1583 1602 targetsize = 4 * rawsize
1584 1603
1585 1604 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1586 1605 if rawtext is None:
1587 1606 rawtext = bytes(bins[0])
1588 1607 bins = bins[1:]
1589 1608
1590 1609 rawtext = mdiff.patches(rawtext, bins)
1591 1610 self._revisioncache = (node, rev, rawtext)
1592 1611
1593 1612 if flags is None:
1594 1613 if rev is None:
1595 1614 rev = self.rev(node)
1596 1615 flags = self.flags(rev)
1597 1616
1598 1617 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1599 1618 if validatehash:
1600 1619 self.checkhash(text, node, rev=rev)
1601 1620
1602 1621 return text
1603 1622
1604 1623 def hash(self, text, p1, p2):
1605 1624 """Compute a node hash.
1606 1625
1607 1626 Available as a function so that subclasses can replace the hash
1608 1627 as needed.
1609 1628 """
1610 1629 return storageutil.hashrevisionsha1(text, p1, p2)
1611 1630
1612 1631 def _processflags(self, text, flags, operation, raw=False):
1613 1632 """Inspect revision data flags and applies transforms defined by
1614 1633 registered flag processors.
1615 1634
1616 1635 ``text`` - the revision data to process
1617 1636 ``flags`` - the revision flags
1618 1637 ``operation`` - the operation being performed (read or write)
1619 1638 ``raw`` - an optional argument describing if the raw transform should be
1620 1639 applied.
1621 1640
1622 1641 This method processes the flags in the order (or reverse order if
1623 1642 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1624 1643 flag processors registered for present flags. The order of flags defined
1625 1644 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1626 1645
1627 1646 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1628 1647 processed text and ``validatehash`` is a bool indicating whether the
1629 1648 returned text should be checked for hash integrity.
1630 1649
1631 1650 Note: If the ``raw`` argument is set, it has precedence over the
1632 1651 operation and will only update the value of ``validatehash``.
1633 1652 """
1634 1653 # fast path: no flag processors will run
1635 1654 if flags == 0:
1636 1655 return text, True
1637 1656 if not operation in ('read', 'write'):
1638 1657 raise error.ProgrammingError(_("invalid '%s' operation") %
1639 1658 operation)
1640 1659 # Check all flags are known.
1641 1660 if flags & ~REVIDX_KNOWN_FLAGS:
1642 1661 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1643 1662 (flags & ~REVIDX_KNOWN_FLAGS))
1644 1663 validatehash = True
1645 1664 # Depending on the operation (read or write), the order might be
1646 1665 # reversed due to non-commutative transforms.
1647 1666 orderedflags = REVIDX_FLAGS_ORDER
1648 1667 if operation == 'write':
1649 1668 orderedflags = reversed(orderedflags)
1650 1669
1651 1670 for flag in orderedflags:
1652 1671 # If a flagprocessor has been registered for a known flag, apply the
1653 1672 # related operation transform and update result tuple.
1654 1673 if flag & flags:
1655 1674 vhash = True
1656 1675
1657 1676 if flag not in self._flagprocessors:
1658 1677 message = _("missing processor for flag '%#x'") % (flag)
1659 1678 raise error.RevlogError(message)
1660 1679
1661 1680 processor = self._flagprocessors[flag]
1662 1681 if processor is not None:
1663 1682 readtransform, writetransform, rawtransform = processor
1664 1683
1665 1684 if raw:
1666 1685 vhash = rawtransform(self, text)
1667 1686 elif operation == 'read':
1668 1687 text, vhash = readtransform(self, text)
1669 1688 else: # write operation
1670 1689 text, vhash = writetransform(self, text)
1671 1690 validatehash = validatehash and vhash
1672 1691
1673 1692 return text, validatehash
1674 1693
1675 1694 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1676 1695 """Check node hash integrity.
1677 1696
1678 1697 Available as a function so that subclasses can extend hash mismatch
1679 1698 behaviors as needed.
1680 1699 """
1681 1700 try:
1682 1701 if p1 is None and p2 is None:
1683 1702 p1, p2 = self.parents(node)
1684 1703 if node != self.hash(text, p1, p2):
1685 1704 # Clear the revision cache on hash failure. The revision cache
1686 1705 # only stores the raw revision and clearing the cache does have
1687 1706 # the side-effect that we won't have a cache hit when the raw
1688 1707 # revision data is accessed. But this case should be rare and
1689 1708 # it is extra work to teach the cache about the hash
1690 1709 # verification state.
1691 1710 if self._revisioncache and self._revisioncache[0] == node:
1692 1711 self._revisioncache = None
1693 1712
1694 1713 revornode = rev
1695 1714 if revornode is None:
1696 1715 revornode = templatefilters.short(hex(node))
1697 1716 raise error.RevlogError(_("integrity check failed on %s:%s")
1698 1717 % (self.indexfile, pycompat.bytestr(revornode)))
1699 1718 except error.RevlogError:
1700 1719 if self._censorable and storageutil.iscensoredtext(text):
1701 1720 raise error.CensoredNodeError(self.indexfile, node, text)
1702 1721 raise
1703 1722
1704 1723 def _enforceinlinesize(self, tr, fp=None):
1705 1724 """Check if the revlog is too big for inline and convert if so.
1706 1725
1707 1726 This should be called after revisions are added to the revlog. If the
1708 1727 revlog has grown too large to be an inline revlog, it will convert it
1709 1728 to use multiple index and data files.
1710 1729 """
1711 1730 tiprev = len(self) - 1
1712 1731 if (not self._inline or
1713 1732 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1714 1733 return
1715 1734
1716 1735 trinfo = tr.find(self.indexfile)
1717 1736 if trinfo is None:
1718 1737 raise error.RevlogError(_("%s not found in the transaction")
1719 1738 % self.indexfile)
1720 1739
1721 1740 trindex = trinfo[2]
1722 1741 if trindex is not None:
1723 1742 dataoff = self.start(trindex)
1724 1743 else:
1725 1744 # revlog was stripped at start of transaction, use all leftover data
1726 1745 trindex = len(self) - 1
1727 1746 dataoff = self.end(tiprev)
1728 1747
1729 1748 tr.add(self.datafile, dataoff)
1730 1749
1731 1750 if fp:
1732 1751 fp.flush()
1733 1752 fp.close()
1734 1753
1735 1754 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1736 1755 for r in self:
1737 1756 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1738 1757
1739 1758 with self._indexfp('w') as fp:
1740 1759 self.version &= ~FLAG_INLINE_DATA
1741 1760 self._inline = False
1742 1761 io = self._io
1743 1762 for i in self:
1744 1763 e = io.packentry(self.index[i], self.node, self.version, i)
1745 1764 fp.write(e)
1746 1765
1747 1766 # the temp file replace the real index when we exit the context
1748 1767 # manager
1749 1768
1750 1769 tr.replace(self.indexfile, trindex * self._io.size)
1751 1770 self._chunkclear()
1752 1771
1753 1772 def _nodeduplicatecallback(self, transaction, node):
1754 1773 """called when trying to add a node already stored.
1755 1774 """
1756 1775
1757 1776 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1758 1777 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1759 1778 """add a revision to the log
1760 1779
1761 1780 text - the revision data to add
1762 1781 transaction - the transaction object used for rollback
1763 1782 link - the linkrev data to add
1764 1783 p1, p2 - the parent nodeids of the revision
1765 1784 cachedelta - an optional precomputed delta
1766 1785 node - nodeid of revision; typically node is not specified, and it is
1767 1786 computed by default as hash(text, p1, p2), however subclasses might
1768 1787 use different hashing method (and override checkhash() in such case)
1769 1788 flags - the known flags to set on the revision
1770 1789 deltacomputer - an optional deltacomputer instance shared between
1771 1790 multiple calls
1772 1791 """
1773 1792 if link == nullrev:
1774 1793 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1775 1794 % self.indexfile)
1776 1795
1777 1796 if flags:
1778 1797 node = node or self.hash(text, p1, p2)
1779 1798
1780 1799 rawtext, validatehash = self._processflags(text, flags, 'write')
1781 1800
1782 1801 # If the flag processor modifies the revision data, ignore any provided
1783 1802 # cachedelta.
1784 1803 if rawtext != text:
1785 1804 cachedelta = None
1786 1805
1787 1806 if len(rawtext) > _maxentrysize:
1788 1807 raise error.RevlogError(
1789 1808 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1790 1809 % (self.indexfile, len(rawtext)))
1791 1810
1792 1811 node = node or self.hash(rawtext, p1, p2)
1793 1812 if node in self.nodemap:
1794 1813 return node
1795 1814
1796 1815 if validatehash:
1797 1816 self.checkhash(rawtext, node, p1=p1, p2=p2)
1798 1817
1799 1818 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1800 1819 flags, cachedelta=cachedelta,
1801 1820 deltacomputer=deltacomputer)
1802 1821
1803 1822 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1804 1823 cachedelta=None, deltacomputer=None):
1805 1824 """add a raw revision with known flags, node and parents
1806 1825 useful when reusing a revision not stored in this revlog (ex: received
1807 1826 over wire, or read from an external bundle).
1808 1827 """
1809 1828 dfh = None
1810 1829 if not self._inline:
1811 1830 dfh = self._datafp("a+")
1812 1831 ifh = self._indexfp("a+")
1813 1832 try:
1814 1833 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1815 1834 flags, cachedelta, ifh, dfh,
1816 1835 deltacomputer=deltacomputer)
1817 1836 finally:
1818 1837 if dfh:
1819 1838 dfh.close()
1820 1839 ifh.close()
1821 1840
1822 1841 def compress(self, data):
1823 1842 """Generate a possibly-compressed representation of data."""
1824 1843 if not data:
1825 1844 return '', data
1826 1845
1827 1846 compressed = self._compressor.compress(data)
1828 1847
1829 1848 if compressed:
1830 1849 # The revlog compressor added the header in the returned data.
1831 1850 return '', compressed
1832 1851
1833 1852 if data[0:1] == '\0':
1834 1853 return '', data
1835 1854 return 'u', data
1836 1855
1837 1856 def decompress(self, data):
1838 1857 """Decompress a revlog chunk.
1839 1858
1840 1859 The chunk is expected to begin with a header identifying the
1841 1860 format type so it can be routed to an appropriate decompressor.
1842 1861 """
1843 1862 if not data:
1844 1863 return data
1845 1864
1846 1865 # Revlogs are read much more frequently than they are written and many
1847 1866 # chunks only take microseconds to decompress, so performance is
1848 1867 # important here.
1849 1868 #
1850 1869 # We can make a few assumptions about revlogs:
1851 1870 #
1852 1871 # 1) the majority of chunks will be compressed (as opposed to inline
1853 1872 # raw data).
1854 1873 # 2) decompressing *any* data will likely by at least 10x slower than
1855 1874 # returning raw inline data.
1856 1875 # 3) we want to prioritize common and officially supported compression
1857 1876 # engines
1858 1877 #
1859 1878 # It follows that we want to optimize for "decompress compressed data
1860 1879 # when encoded with common and officially supported compression engines"
1861 1880 # case over "raw data" and "data encoded by less common or non-official
1862 1881 # compression engines." That is why we have the inline lookup first
1863 1882 # followed by the compengines lookup.
1864 1883 #
1865 1884 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1866 1885 # compressed chunks. And this matters for changelog and manifest reads.
1867 1886 t = data[0:1]
1868 1887
1869 1888 if t == 'x':
1870 1889 try:
1871 1890 return _zlibdecompress(data)
1872 1891 except zlib.error as e:
1873 1892 raise error.RevlogError(_('revlog decompress error: %s') %
1874 1893 stringutil.forcebytestr(e))
1875 1894 # '\0' is more common than 'u' so it goes first.
1876 1895 elif t == '\0':
1877 1896 return data
1878 1897 elif t == 'u':
1879 1898 return util.buffer(data, 1)
1880 1899
1881 1900 try:
1882 1901 compressor = self._decompressors[t]
1883 1902 except KeyError:
1884 1903 try:
1885 1904 engine = util.compengines.forrevlogheader(t)
1886 1905 compressor = engine.revlogcompressor()
1887 1906 self._decompressors[t] = compressor
1888 1907 except KeyError:
1889 1908 raise error.RevlogError(_('unknown compression type %r') % t)
1890 1909
1891 1910 return compressor.decompress(data)
1892 1911
1893 1912 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1894 1913 cachedelta, ifh, dfh, alwayscache=False,
1895 1914 deltacomputer=None):
1896 1915 """internal function to add revisions to the log
1897 1916
1898 1917 see addrevision for argument descriptions.
1899 1918
1900 1919 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1901 1920
1902 1921 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1903 1922 be used.
1904 1923
1905 1924 invariants:
1906 1925 - rawtext is optional (can be None); if not set, cachedelta must be set.
1907 1926 if both are set, they must correspond to each other.
1908 1927 """
1909 1928 if node == nullid:
1910 1929 raise error.RevlogError(_("%s: attempt to add null revision") %
1911 1930 self.indexfile)
1912 1931 if node == wdirid or node in wdirfilenodeids:
1913 1932 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1914 1933 self.indexfile)
1915 1934
1916 1935 if self._inline:
1917 1936 fh = ifh
1918 1937 else:
1919 1938 fh = dfh
1920 1939
1921 1940 btext = [rawtext]
1922 1941
1923 1942 curr = len(self)
1924 1943 prev = curr - 1
1925 1944 offset = self.end(prev)
1926 1945 p1r, p2r = self.rev(p1), self.rev(p2)
1927 1946
1928 1947 # full versions are inserted when the needed deltas
1929 1948 # become comparable to the uncompressed text
1930 1949 if rawtext is None:
1931 1950 # need rawtext size, before changed by flag processors, which is
1932 1951 # the non-raw size. use revlog explicitly to avoid filelog's extra
1933 1952 # logic that might remove metadata size.
1934 1953 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1935 1954 cachedelta[1])
1936 1955 else:
1937 1956 textlen = len(rawtext)
1938 1957
1939 1958 if deltacomputer is None:
1940 1959 deltacomputer = deltautil.deltacomputer(self)
1941 1960
1942 1961 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1943 1962
1944 1963 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1945 1964
1946 1965 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1947 1966 deltainfo.base, link, p1r, p2r, node)
1948 1967 self.index.append(e)
1949 1968 self.nodemap[node] = curr
1950 1969
1951 1970 # Reset the pure node cache start lookup offset to account for new
1952 1971 # revision.
1953 1972 if self._nodepos is not None:
1954 1973 self._nodepos = curr
1955 1974
1956 1975 entry = self._io.packentry(e, self.node, self.version, curr)
1957 1976 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1958 1977 link, offset)
1959 1978
1960 1979 rawtext = btext[0]
1961 1980
1962 1981 if alwayscache and rawtext is None:
1963 1982 rawtext = deltacomputer.buildtext(revinfo, fh)
1964 1983
1965 1984 if type(rawtext) == bytes: # only accept immutable objects
1966 1985 self._revisioncache = (node, curr, rawtext)
1967 1986 self._chainbasecache[curr] = deltainfo.chainbase
1968 1987 return node
1969 1988
1970 1989 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1971 1990 # Files opened in a+ mode have inconsistent behavior on various
1972 1991 # platforms. Windows requires that a file positioning call be made
1973 1992 # when the file handle transitions between reads and writes. See
1974 1993 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1975 1994 # platforms, Python or the platform itself can be buggy. Some versions
1976 1995 # of Solaris have been observed to not append at the end of the file
1977 1996 # if the file was seeked to before the end. See issue4943 for more.
1978 1997 #
1979 1998 # We work around this issue by inserting a seek() before writing.
1980 1999 # Note: This is likely not necessary on Python 3.
1981 2000 ifh.seek(0, os.SEEK_END)
1982 2001 if dfh:
1983 2002 dfh.seek(0, os.SEEK_END)
1984 2003
1985 2004 curr = len(self) - 1
1986 2005 if not self._inline:
1987 2006 transaction.add(self.datafile, offset)
1988 2007 transaction.add(self.indexfile, curr * len(entry))
1989 2008 if data[0]:
1990 2009 dfh.write(data[0])
1991 2010 dfh.write(data[1])
1992 2011 ifh.write(entry)
1993 2012 else:
1994 2013 offset += curr * self._io.size
1995 2014 transaction.add(self.indexfile, offset, curr)
1996 2015 ifh.write(entry)
1997 2016 ifh.write(data[0])
1998 2017 ifh.write(data[1])
1999 2018 self._enforceinlinesize(transaction, ifh)
2000 2019
2001 2020 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2002 2021 """
2003 2022 add a delta group
2004 2023
2005 2024 given a set of deltas, add them to the revision log. the
2006 2025 first delta is against its parent, which should be in our
2007 2026 log, the rest are against the previous delta.
2008 2027
2009 2028 If ``addrevisioncb`` is defined, it will be called with arguments of
2010 2029 this revlog and the node that was added.
2011 2030 """
2012 2031
2013 2032 nodes = []
2014 2033
2015 2034 r = len(self)
2016 2035 end = 0
2017 2036 if r:
2018 2037 end = self.end(r - 1)
2019 2038 ifh = self._indexfp("a+")
2020 2039 isize = r * self._io.size
2021 2040 if self._inline:
2022 2041 transaction.add(self.indexfile, end + isize, r)
2023 2042 dfh = None
2024 2043 else:
2025 2044 transaction.add(self.indexfile, isize, r)
2026 2045 transaction.add(self.datafile, end)
2027 2046 dfh = self._datafp("a+")
2028 2047 def flush():
2029 2048 if dfh:
2030 2049 dfh.flush()
2031 2050 ifh.flush()
2032 2051 try:
2033 2052 deltacomputer = deltautil.deltacomputer(self)
2034 2053 # loop through our set of deltas
2035 2054 for data in deltas:
2036 2055 node, p1, p2, linknode, deltabase, delta, flags = data
2037 2056 link = linkmapper(linknode)
2038 2057 flags = flags or REVIDX_DEFAULT_FLAGS
2039 2058
2040 2059 nodes.append(node)
2041 2060
2042 2061 if node in self.nodemap:
2043 2062 self._nodeduplicatecallback(transaction, node)
2044 2063 # this can happen if two branches make the same change
2045 2064 continue
2046 2065
2047 2066 for p in (p1, p2):
2048 2067 if p not in self.nodemap:
2049 2068 raise error.LookupError(p, self.indexfile,
2050 2069 _('unknown parent'))
2051 2070
2052 2071 if deltabase not in self.nodemap:
2053 2072 raise error.LookupError(deltabase, self.indexfile,
2054 2073 _('unknown delta base'))
2055 2074
2056 2075 baserev = self.rev(deltabase)
2057 2076
2058 2077 if baserev != nullrev and self.iscensored(baserev):
2059 2078 # if base is censored, delta must be full replacement in a
2060 2079 # single patch operation
2061 2080 hlen = struct.calcsize(">lll")
2062 2081 oldlen = self.rawsize(baserev)
2063 2082 newlen = len(delta) - hlen
2064 2083 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2065 2084 raise error.CensoredBaseError(self.indexfile,
2066 2085 self.node(baserev))
2067 2086
2068 2087 if not flags and self._peek_iscensored(baserev, delta, flush):
2069 2088 flags |= REVIDX_ISCENSORED
2070 2089
2071 2090 # We assume consumers of addrevisioncb will want to retrieve
2072 2091 # the added revision, which will require a call to
2073 2092 # revision(). revision() will fast path if there is a cache
2074 2093 # hit. So, we tell _addrevision() to always cache in this case.
2075 2094 # We're only using addgroup() in the context of changegroup
2076 2095 # generation so the revision data can always be handled as raw
2077 2096 # by the flagprocessor.
2078 2097 self._addrevision(node, None, transaction, link,
2079 2098 p1, p2, flags, (baserev, delta),
2080 2099 ifh, dfh,
2081 2100 alwayscache=bool(addrevisioncb),
2082 2101 deltacomputer=deltacomputer)
2083 2102
2084 2103 if addrevisioncb:
2085 2104 addrevisioncb(self, node)
2086 2105
2087 2106 if not dfh and not self._inline:
2088 2107 # addrevision switched from inline to conventional
2089 2108 # reopen the index
2090 2109 ifh.close()
2091 2110 dfh = self._datafp("a+")
2092 2111 ifh = self._indexfp("a+")
2093 2112 finally:
2094 2113 if dfh:
2095 2114 dfh.close()
2096 2115 ifh.close()
2097 2116
2098 2117 return nodes
2099 2118
2100 2119 def iscensored(self, rev):
2101 2120 """Check if a file revision is censored."""
2102 2121 if not self._censorable:
2103 2122 return False
2104 2123
2105 2124 return self.flags(rev) & REVIDX_ISCENSORED
2106 2125
2107 2126 def _peek_iscensored(self, baserev, delta, flush):
2108 2127 """Quickly check if a delta produces a censored revision."""
2109 2128 if not self._censorable:
2110 2129 return False
2111 2130
2112 2131 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2113 2132
2114 2133 def getstrippoint(self, minlink):
2115 2134 """find the minimum rev that must be stripped to strip the linkrev
2116 2135
2117 2136 Returns a tuple containing the minimum rev and a set of all revs that
2118 2137 have linkrevs that will be broken by this strip.
2119 2138 """
2120 2139 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2121 2140 self.headrevs(),
2122 2141 self.linkrev, self.parentrevs)
2123 2142
2124 2143 def strip(self, minlink, transaction):
2125 2144 """truncate the revlog on the first revision with a linkrev >= minlink
2126 2145
2127 2146 This function is called when we're stripping revision minlink and
2128 2147 its descendants from the repository.
2129 2148
2130 2149 We have to remove all revisions with linkrev >= minlink, because
2131 2150 the equivalent changelog revisions will be renumbered after the
2132 2151 strip.
2133 2152
2134 2153 So we truncate the revlog on the first of these revisions, and
2135 2154 trust that the caller has saved the revisions that shouldn't be
2136 2155 removed and that it'll re-add them after this truncation.
2137 2156 """
2138 2157 if len(self) == 0:
2139 2158 return
2140 2159
2141 2160 rev, _ = self.getstrippoint(minlink)
2142 2161 if rev == len(self):
2143 2162 return
2144 2163
2145 2164 # first truncate the files on disk
2146 2165 end = self.start(rev)
2147 2166 if not self._inline:
2148 2167 transaction.add(self.datafile, end)
2149 2168 end = rev * self._io.size
2150 2169 else:
2151 2170 end += rev * self._io.size
2152 2171
2153 2172 transaction.add(self.indexfile, end)
2154 2173
2155 2174 # then reset internal state in memory to forget those revisions
2156 2175 self._revisioncache = None
2157 2176 self._chaininfocache = {}
2158 2177 self._chunkclear()
2159 2178 for x in pycompat.xrange(rev, len(self)):
2160 2179 del self.nodemap[self.node(x)]
2161 2180
2162 2181 del self.index[rev:-1]
2163 2182 self._nodepos = None
2164 2183
2165 2184 def checksize(self):
2166 2185 expected = 0
2167 2186 if len(self):
2168 2187 expected = max(0, self.end(len(self) - 1))
2169 2188
2170 2189 try:
2171 2190 with self._datafp() as f:
2172 2191 f.seek(0, 2)
2173 2192 actual = f.tell()
2174 2193 dd = actual - expected
2175 2194 except IOError as inst:
2176 2195 if inst.errno != errno.ENOENT:
2177 2196 raise
2178 2197 dd = 0
2179 2198
2180 2199 try:
2181 2200 f = self.opener(self.indexfile)
2182 2201 f.seek(0, 2)
2183 2202 actual = f.tell()
2184 2203 f.close()
2185 2204 s = self._io.size
2186 2205 i = max(0, actual // s)
2187 2206 di = actual - (i * s)
2188 2207 if self._inline:
2189 2208 databytes = 0
2190 2209 for r in self:
2191 2210 databytes += max(0, self.length(r))
2192 2211 dd = 0
2193 2212 di = actual - len(self) * s - databytes
2194 2213 except IOError as inst:
2195 2214 if inst.errno != errno.ENOENT:
2196 2215 raise
2197 2216 di = 0
2198 2217
2199 2218 return (dd, di)
2200 2219
2201 2220 def files(self):
2202 2221 res = [self.indexfile]
2203 2222 if not self._inline:
2204 2223 res.append(self.datafile)
2205 2224 return res
2206 2225
2207 2226 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2208 2227 assumehaveparentrevisions=False,
2209 2228 deltamode=repository.CG_DELTAMODE_STD):
2210 2229 if nodesorder not in ('nodes', 'storage', 'linear', None):
2211 2230 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2212 2231 nodesorder)
2213 2232
2214 2233 if nodesorder is None and not self._generaldelta:
2215 2234 nodesorder = 'storage'
2216 2235
2217 2236 if (not self._storedeltachains and
2218 2237 deltamode != repository.CG_DELTAMODE_PREV):
2219 2238 deltamode = repository.CG_DELTAMODE_FULL
2220 2239
2221 2240 return storageutil.emitrevisions(
2222 2241 self, nodes, nodesorder, revlogrevisiondelta,
2223 2242 deltaparentfn=self.deltaparent,
2224 2243 candeltafn=self.candelta,
2225 2244 rawsizefn=self.rawsize,
2226 2245 revdifffn=self.revdiff,
2227 2246 flagsfn=self.flags,
2228 2247 deltamode=deltamode,
2229 2248 revisiondata=revisiondata,
2230 2249 assumehaveparentrevisions=assumehaveparentrevisions)
2231 2250
2232 2251 DELTAREUSEALWAYS = 'always'
2233 2252 DELTAREUSESAMEREVS = 'samerevs'
2234 2253 DELTAREUSENEVER = 'never'
2235 2254
2236 2255 DELTAREUSEFULLADD = 'fulladd'
2237 2256
2238 2257 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2239 2258
2240 2259 def clone(self, tr, destrevlog, addrevisioncb=None,
2241 2260 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2242 2261 """Copy this revlog to another, possibly with format changes.
2243 2262
2244 2263 The destination revlog will contain the same revisions and nodes.
2245 2264 However, it may not be bit-for-bit identical due to e.g. delta encoding
2246 2265 differences.
2247 2266
2248 2267 The ``deltareuse`` argument control how deltas from the existing revlog
2249 2268 are preserved in the destination revlog. The argument can have the
2250 2269 following values:
2251 2270
2252 2271 DELTAREUSEALWAYS
2253 2272 Deltas will always be reused (if possible), even if the destination
2254 2273 revlog would not select the same revisions for the delta. This is the
2255 2274 fastest mode of operation.
2256 2275 DELTAREUSESAMEREVS
2257 2276 Deltas will be reused if the destination revlog would pick the same
2258 2277 revisions for the delta. This mode strikes a balance between speed
2259 2278 and optimization.
2260 2279 DELTAREUSENEVER
2261 2280 Deltas will never be reused. This is the slowest mode of execution.
2262 2281 This mode can be used to recompute deltas (e.g. if the diff/delta
2263 2282 algorithm changes).
2264 2283
2265 2284 Delta computation can be slow, so the choice of delta reuse policy can
2266 2285 significantly affect run time.
2267 2286
2268 2287 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2269 2288 two extremes. Deltas will be reused if they are appropriate. But if the
2270 2289 delta could choose a better revision, it will do so. This means if you
2271 2290 are converting a non-generaldelta revlog to a generaldelta revlog,
2272 2291 deltas will be recomputed if the delta's parent isn't a parent of the
2273 2292 revision.
2274 2293
2275 2294 In addition to the delta policy, the ``deltabothparents`` argument
2276 2295 controls whether to compute deltas against both parents for merges.
2277 2296 By default, the current default is used.
2278 2297 """
2279 2298 if deltareuse not in self.DELTAREUSEALL:
2280 2299 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2281 2300
2282 2301 if len(destrevlog):
2283 2302 raise ValueError(_('destination revlog is not empty'))
2284 2303
2285 2304 if getattr(self, 'filteredrevs', None):
2286 2305 raise ValueError(_('source revlog has filtered revisions'))
2287 2306 if getattr(destrevlog, 'filteredrevs', None):
2288 2307 raise ValueError(_('destination revlog has filtered revisions'))
2289 2308
2290 2309 # lazydeltabase controls whether to reuse a cached delta, if possible.
2291 2310 oldlazydeltabase = destrevlog._lazydeltabase
2292 2311 oldamd = destrevlog._deltabothparents
2293 2312
2294 2313 try:
2295 2314 if deltareuse == self.DELTAREUSEALWAYS:
2296 2315 destrevlog._lazydeltabase = True
2297 2316 elif deltareuse == self.DELTAREUSESAMEREVS:
2298 2317 destrevlog._lazydeltabase = False
2299 2318
2300 2319 destrevlog._deltabothparents = deltabothparents or oldamd
2301 2320
2302 2321 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2303 2322 self.DELTAREUSESAMEREVS)
2304 2323
2305 2324 deltacomputer = deltautil.deltacomputer(destrevlog)
2306 2325 index = self.index
2307 2326 for rev in self:
2308 2327 entry = index[rev]
2309 2328
2310 2329 # Some classes override linkrev to take filtered revs into
2311 2330 # account. Use raw entry from index.
2312 2331 flags = entry[0] & 0xffff
2313 2332 linkrev = entry[4]
2314 2333 p1 = index[entry[5]][7]
2315 2334 p2 = index[entry[6]][7]
2316 2335 node = entry[7]
2317 2336
2318 2337 # (Possibly) reuse the delta from the revlog if allowed and
2319 2338 # the revlog chunk is a delta.
2320 2339 cachedelta = None
2321 2340 rawtext = None
2322 2341 if populatecachedelta:
2323 2342 dp = self.deltaparent(rev)
2324 2343 if dp != nullrev:
2325 2344 cachedelta = (dp, bytes(self._chunk(rev)))
2326 2345
2327 2346 if not cachedelta:
2328 2347 rawtext = self.revision(rev, raw=True)
2329 2348
2330 2349
2331 2350 if deltareuse == self.DELTAREUSEFULLADD:
2332 2351 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2333 2352 cachedelta=cachedelta,
2334 2353 node=node, flags=flags,
2335 2354 deltacomputer=deltacomputer)
2336 2355 else:
2337 2356 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2338 2357 checkambig=False)
2339 2358 dfh = None
2340 2359 if not destrevlog._inline:
2341 2360 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2342 2361 try:
2343 2362 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2344 2363 p2, flags, cachedelta, ifh, dfh,
2345 2364 deltacomputer=deltacomputer)
2346 2365 finally:
2347 2366 if dfh:
2348 2367 dfh.close()
2349 2368 ifh.close()
2350 2369
2351 2370 if addrevisioncb:
2352 2371 addrevisioncb(self, rev, node)
2353 2372 finally:
2354 2373 destrevlog._lazydeltabase = oldlazydeltabase
2355 2374 destrevlog._deltabothparents = oldamd
2356 2375
2357 2376 def censorrevision(self, tr, censornode, tombstone=b''):
2358 2377 if (self.version & 0xFFFF) == REVLOGV0:
2359 2378 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2360 2379 self.version)
2361 2380
2362 2381 censorrev = self.rev(censornode)
2363 2382 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2364 2383
2365 2384 if len(tombstone) > self.rawsize(censorrev):
2366 2385 raise error.Abort(_('censor tombstone must be no longer than '
2367 2386 'censored data'))
2368 2387
2369 2388 # Rewriting the revlog in place is hard. Our strategy for censoring is
2370 2389 # to create a new revlog, copy all revisions to it, then replace the
2371 2390 # revlogs on transaction close.
2372 2391
2373 2392 newindexfile = self.indexfile + b'.tmpcensored'
2374 2393 newdatafile = self.datafile + b'.tmpcensored'
2375 2394
2376 2395 # This is a bit dangerous. We could easily have a mismatch of state.
2377 2396 newrl = revlog(self.opener, newindexfile, newdatafile,
2378 2397 censorable=True)
2379 2398 newrl.version = self.version
2380 2399 newrl._generaldelta = self._generaldelta
2381 2400 newrl._io = self._io
2382 2401
2383 2402 for rev in self.revs():
2384 2403 node = self.node(rev)
2385 2404 p1, p2 = self.parents(node)
2386 2405
2387 2406 if rev == censorrev:
2388 2407 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2389 2408 p1, p2, censornode, REVIDX_ISCENSORED)
2390 2409
2391 2410 if newrl.deltaparent(rev) != nullrev:
2392 2411 raise error.Abort(_('censored revision stored as delta; '
2393 2412 'cannot censor'),
2394 2413 hint=_('censoring of revlogs is not '
2395 2414 'fully implemented; please report '
2396 2415 'this bug'))
2397 2416 continue
2398 2417
2399 2418 if self.iscensored(rev):
2400 2419 if self.deltaparent(rev) != nullrev:
2401 2420 raise error.Abort(_('cannot censor due to censored '
2402 2421 'revision having delta stored'))
2403 2422 rawtext = self._chunk(rev)
2404 2423 else:
2405 2424 rawtext = self.revision(rev, raw=True)
2406 2425
2407 2426 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2408 2427 self.flags(rev))
2409 2428
2410 2429 tr.addbackup(self.indexfile, location='store')
2411 2430 if not self._inline:
2412 2431 tr.addbackup(self.datafile, location='store')
2413 2432
2414 2433 self.opener.rename(newrl.indexfile, self.indexfile)
2415 2434 if not self._inline:
2416 2435 self.opener.rename(newrl.datafile, self.datafile)
2417 2436
2418 2437 self.clearcaches()
2419 2438 self._loadindex(self.version, None)
2420 2439
2421 2440 def verifyintegrity(self, state):
2422 2441 """Verifies the integrity of the revlog.
2423 2442
2424 2443 Yields ``revlogproblem`` instances describing problems that are
2425 2444 found.
2426 2445 """
2427 2446 dd, di = self.checksize()
2428 2447 if dd:
2429 2448 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2430 2449 if di:
2431 2450 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2432 2451
2433 2452 version = self.version & 0xFFFF
2434 2453
2435 2454 # The verifier tells us what version revlog we should be.
2436 2455 if version != state['expectedversion']:
2437 2456 yield revlogproblem(
2438 2457 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2439 2458 (self.indexfile, version, state['expectedversion']))
2440 2459
2441 2460 state['skipread'] = set()
2442 2461
2443 2462 for rev in self:
2444 2463 node = self.node(rev)
2445 2464
2446 2465 # Verify contents. 4 cases to care about:
2447 2466 #
2448 2467 # common: the most common case
2449 2468 # rename: with a rename
2450 2469 # meta: file content starts with b'\1\n', the metadata
2451 2470 # header defined in filelog.py, but without a rename
2452 2471 # ext: content stored externally
2453 2472 #
2454 2473 # More formally, their differences are shown below:
2455 2474 #
2456 2475 # | common | rename | meta | ext
2457 2476 # -------------------------------------------------------
2458 2477 # flags() | 0 | 0 | 0 | not 0
2459 2478 # renamed() | False | True | False | ?
2460 2479 # rawtext[0:2]=='\1\n'| False | True | True | ?
2461 2480 #
2462 2481 # "rawtext" means the raw text stored in revlog data, which
2463 2482 # could be retrieved by "revision(rev, raw=True)". "text"
2464 2483 # mentioned below is "revision(rev, raw=False)".
2465 2484 #
2466 2485 # There are 3 different lengths stored physically:
2467 2486 # 1. L1: rawsize, stored in revlog index
2468 2487 # 2. L2: len(rawtext), stored in revlog data
2469 2488 # 3. L3: len(text), stored in revlog data if flags==0, or
2470 2489 # possibly somewhere else if flags!=0
2471 2490 #
2472 2491 # L1 should be equal to L2. L3 could be different from them.
2473 2492 # "text" may or may not affect commit hash depending on flag
2474 2493 # processors (see revlog.addflagprocessor).
2475 2494 #
2476 2495 # | common | rename | meta | ext
2477 2496 # -------------------------------------------------
2478 2497 # rawsize() | L1 | L1 | L1 | L1
2479 2498 # size() | L1 | L2-LM | L1(*) | L1 (?)
2480 2499 # len(rawtext) | L2 | L2 | L2 | L2
2481 2500 # len(text) | L2 | L2 | L2 | L3
2482 2501 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2483 2502 #
2484 2503 # LM: length of metadata, depending on rawtext
2485 2504 # (*): not ideal, see comment in filelog.size
2486 2505 # (?): could be "- len(meta)" if the resolved content has
2487 2506 # rename metadata
2488 2507 #
2489 2508 # Checks needed to be done:
2490 2509 # 1. length check: L1 == L2, in all cases.
2491 2510 # 2. hash check: depending on flag processor, we may need to
2492 2511 # use either "text" (external), or "rawtext" (in revlog).
2493 2512
2494 2513 try:
2495 2514 skipflags = state.get('skipflags', 0)
2496 2515 if skipflags:
2497 2516 skipflags &= self.flags(rev)
2498 2517
2499 2518 if skipflags:
2500 2519 state['skipread'].add(node)
2501 2520 else:
2502 2521 # Side-effect: read content and verify hash.
2503 2522 self.revision(node)
2504 2523
2505 2524 l1 = self.rawsize(rev)
2506 2525 l2 = len(self.revision(node, raw=True))
2507 2526
2508 2527 if l1 != l2:
2509 2528 yield revlogproblem(
2510 2529 error=_('unpacked size is %d, %d expected') % (l2, l1),
2511 2530 node=node)
2512 2531
2513 2532 except error.CensoredNodeError:
2514 2533 if state['erroroncensored']:
2515 2534 yield revlogproblem(error=_('censored file data'),
2516 2535 node=node)
2517 2536 state['skipread'].add(node)
2518 2537 except Exception as e:
2519 2538 yield revlogproblem(
2520 2539 error=_('unpacking %s: %s') % (short(node),
2521 2540 stringutil.forcebytestr(e)),
2522 2541 node=node)
2523 2542 state['skipread'].add(node)
2524 2543
2525 2544 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2526 2545 revisionscount=False, trackedsize=False,
2527 2546 storedsize=False):
2528 2547 d = {}
2529 2548
2530 2549 if exclusivefiles:
2531 2550 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2532 2551 if not self._inline:
2533 2552 d['exclusivefiles'].append((self.opener, self.datafile))
2534 2553
2535 2554 if sharedfiles:
2536 2555 d['sharedfiles'] = []
2537 2556
2538 2557 if revisionscount:
2539 2558 d['revisionscount'] = len(self)
2540 2559
2541 2560 if trackedsize:
2542 2561 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2543 2562
2544 2563 if storedsize:
2545 2564 d['storedsize'] = sum(self.opener.stat(path).st_size
2546 2565 for path in self.files())
2547 2566
2548 2567 return d
General Comments 0
You need to be logged in to leave comments. Login now