##// END OF EJS Templates
revlog: store mmaplargeindex as an instance attribute...
Gregory Szorc -
r41239:d0de4fdd default
parent child Browse files
Show More
@@ -1,2630 +1,2631 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 356 self._censorable = censorable
356 357 # 3-tuple of (node, rev, text) for a raw revision.
357 358 self._revisioncache = None
358 359 # Maps rev to chain base rev.
359 360 self._chainbasecache = util.lrucachedict(100)
360 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 362 self._chunkcache = (0, '')
362 363 # How much data to read and cache into the raw revlog data cache.
363 364 self._chunkcachesize = 65536
364 365 self._maxchainlen = None
365 366 self._deltabothparents = True
366 367 self.index = []
367 368 # Mapping of partial identifiers to full nodes.
368 369 self._pcache = {}
369 370 # Mapping of revision integer to full node.
370 371 self._nodecache = {nullid: nullrev}
371 372 self._nodepos = None
372 373 self._compengine = 'zlib'
373 374 self._maxdeltachainspan = -1
374 375 self._withsparseread = False
375 376 self._sparserevlog = False
376 377 self._srdensitythreshold = 0.50
377 378 self._srmingapsize = 262144
378 379
379 380 # Make copy of flag processors so each revlog instance can support
380 381 # custom flags.
381 382 self._flagprocessors = dict(_flagprocessors)
382 383
383 384 # 2-tuple of file handles being used for active writing.
384 385 self._writinghandles = None
385 386
386 387 mmapindexthreshold = None
387 388 opts = getattr(opener, 'options', {}) or {}
388 389
389 390 if 'revlogv2' in opts:
390 391 versionflags = REVLOGV2 | FLAG_INLINE_DATA
391 392 elif 'revlogv1' in opts:
392 393 versionflags = REVLOGV1 | FLAG_INLINE_DATA
393 394 if 'generaldelta' in opts:
394 395 versionflags |= FLAG_GENERALDELTA
395 396 else:
396 397 versionflags = REVLOG_DEFAULT_VERSION
397 398
398 399 if 'chunkcachesize' in opts:
399 400 self._chunkcachesize = opts['chunkcachesize']
400 401 if 'maxchainlen' in opts:
401 402 self._maxchainlen = opts['maxchainlen']
402 403 if 'deltabothparents' in opts:
403 404 self._deltabothparents = opts['deltabothparents']
404 405 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
405 406 if 'compengine' in opts:
406 407 self._compengine = opts['compengine']
407 408 if 'maxdeltachainspan' in opts:
408 409 self._maxdeltachainspan = opts['maxdeltachainspan']
409 if mmaplargeindex and 'mmapindexthreshold' in opts:
410 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
410 411 mmapindexthreshold = opts['mmapindexthreshold']
411 412 self._sparserevlog = bool(opts.get('sparse-revlog', False))
412 413 withsparseread = bool(opts.get('with-sparse-read', False))
413 414 # sparse-revlog forces sparse-read
414 415 self._withsparseread = self._sparserevlog or withsparseread
415 416 if 'sparse-read-density-threshold' in opts:
416 417 self._srdensitythreshold = opts['sparse-read-density-threshold']
417 418 if 'sparse-read-min-gap-size' in opts:
418 419 self._srmingapsize = opts['sparse-read-min-gap-size']
419 420 if opts.get('enableellipsis'):
420 421 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
421 422
422 423 # revlog v0 doesn't have flag processors
423 424 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
424 425 _insertflagprocessor(flag, processor, self._flagprocessors)
425 426
426 427 if self._chunkcachesize <= 0:
427 428 raise error.RevlogError(_('revlog chunk cache size %r is not '
428 429 'greater than 0') % self._chunkcachesize)
429 430 elif self._chunkcachesize & (self._chunkcachesize - 1):
430 431 raise error.RevlogError(_('revlog chunk cache size %r is not a '
431 432 'power of 2') % self._chunkcachesize)
432 433
433 434 self._loadindex(versionflags, mmapindexthreshold)
434 435
435 436 def _loadindex(self, versionflags, mmapindexthreshold):
436 437 indexdata = ''
437 438 self._initempty = True
438 439 try:
439 440 with self._indexfp() as f:
440 441 if (mmapindexthreshold is not None and
441 442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 443 indexdata = util.buffer(util.mmapread(f))
443 444 else:
444 445 indexdata = f.read()
445 446 if len(indexdata) > 0:
446 447 versionflags = versionformat_unpack(indexdata[:4])[0]
447 448 self._initempty = False
448 449 except IOError as inst:
449 450 if inst.errno != errno.ENOENT:
450 451 raise
451 452
452 453 self.version = versionflags
453 454
454 455 flags = versionflags & ~0xFFFF
455 456 fmt = versionflags & 0xFFFF
456 457
457 458 if fmt == REVLOGV0:
458 459 if flags:
459 460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 461 'revlog %s') %
461 462 (flags >> 16, fmt, self.indexfile))
462 463
463 464 self._inline = False
464 465 self._generaldelta = False
465 466
466 467 elif fmt == REVLOGV1:
467 468 if flags & ~REVLOGV1_FLAGS:
468 469 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
469 470 'revlog %s') %
470 471 (flags >> 16, fmt, self.indexfile))
471 472
472 473 self._inline = versionflags & FLAG_INLINE_DATA
473 474 self._generaldelta = versionflags & FLAG_GENERALDELTA
474 475
475 476 elif fmt == REVLOGV2:
476 477 if flags & ~REVLOGV2_FLAGS:
477 478 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
478 479 'revlog %s') %
479 480 (flags >> 16, fmt, self.indexfile))
480 481
481 482 self._inline = versionflags & FLAG_INLINE_DATA
482 483 # generaldelta implied by version 2 revlogs.
483 484 self._generaldelta = True
484 485
485 486 else:
486 487 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
487 488 (fmt, self.indexfile))
488 489
489 490 self._storedeltachains = True
490 491
491 492 self._io = revlogio()
492 493 if self.version == REVLOGV0:
493 494 self._io = revlogoldio()
494 495 try:
495 496 d = self._io.parseindex(indexdata, self._inline)
496 497 except (ValueError, IndexError):
497 498 raise error.RevlogError(_("index %s is corrupted") %
498 499 self.indexfile)
499 500 self.index, nodemap, self._chunkcache = d
500 501 if nodemap is not None:
501 502 self.nodemap = self._nodecache = nodemap
502 503 if not self._chunkcache:
503 504 self._chunkclear()
504 505 # revnum -> (chain-length, sum-delta-length)
505 506 self._chaininfocache = {}
506 507 # revlog header -> revlog compressor
507 508 self._decompressors = {}
508 509
509 510 @util.propertycache
510 511 def _compressor(self):
511 512 return util.compengines[self._compengine].revlogcompressor()
512 513
513 514 def _indexfp(self, mode='r'):
514 515 """file object for the revlog's index file"""
515 516 args = {r'mode': mode}
516 517 if mode != 'r':
517 518 args[r'checkambig'] = self._checkambig
518 519 if mode == 'w':
519 520 args[r'atomictemp'] = True
520 521 return self.opener(self.indexfile, **args)
521 522
522 523 def _datafp(self, mode='r'):
523 524 """file object for the revlog's data file"""
524 525 return self.opener(self.datafile, mode=mode)
525 526
526 527 @contextlib.contextmanager
527 528 def _datareadfp(self, existingfp=None):
528 529 """file object suitable to read data"""
529 530 # Use explicit file handle, if given.
530 531 if existingfp is not None:
531 532 yield existingfp
532 533
533 534 # Use a file handle being actively used for writes, if available.
534 535 # There is some danger to doing this because reads will seek the
535 536 # file. However, _writeentry() performs a SEEK_END before all writes,
536 537 # so we should be safe.
537 538 elif self._writinghandles:
538 539 if self._inline:
539 540 yield self._writinghandles[0]
540 541 else:
541 542 yield self._writinghandles[1]
542 543
543 544 # Otherwise open a new file handle.
544 545 else:
545 546 if self._inline:
546 547 func = self._indexfp
547 548 else:
548 549 func = self._datafp
549 550 with func() as fp:
550 551 yield fp
551 552
552 553 def tip(self):
553 554 return self.node(len(self.index) - 1)
554 555 def __contains__(self, rev):
555 556 return 0 <= rev < len(self)
556 557 def __len__(self):
557 558 return len(self.index)
558 559 def __iter__(self):
559 560 return iter(pycompat.xrange(len(self)))
560 561 def revs(self, start=0, stop=None):
561 562 """iterate over all rev in this revlog (from start to stop)"""
562 563 return storageutil.iterrevs(len(self), start=start, stop=stop)
563 564
564 565 @util.propertycache
565 566 def nodemap(self):
566 567 if self.index:
567 568 # populate mapping down to the initial node
568 569 node0 = self.index[0][7] # get around changelog filtering
569 570 self.rev(node0)
570 571 return self._nodecache
571 572
572 573 def hasnode(self, node):
573 574 try:
574 575 self.rev(node)
575 576 return True
576 577 except KeyError:
577 578 return False
578 579
579 580 def candelta(self, baserev, rev):
580 581 """whether two revisions (baserev, rev) can be delta-ed or not"""
581 582 # Disable delta if either rev requires a content-changing flag
582 583 # processor (ex. LFS). This is because such flag processor can alter
583 584 # the rawtext content that the delta will be based on, and two clients
584 585 # could have a same revlog node with different flags (i.e. different
585 586 # rawtext contents) and the delta could be incompatible.
586 587 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
587 588 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
588 589 return False
589 590 return True
590 591
591 592 def clearcaches(self):
592 593 self._revisioncache = None
593 594 self._chainbasecache.clear()
594 595 self._chunkcache = (0, '')
595 596 self._pcache = {}
596 597
597 598 try:
598 599 self._nodecache.clearcaches()
599 600 except AttributeError:
600 601 self._nodecache = {nullid: nullrev}
601 602 self._nodepos = None
602 603
603 604 def rev(self, node):
604 605 try:
605 606 return self._nodecache[node]
606 607 except TypeError:
607 608 raise
608 609 except error.RevlogError:
609 610 # parsers.c radix tree lookup failed
610 611 if node == wdirid or node in wdirfilenodeids:
611 612 raise error.WdirUnsupported
612 613 raise error.LookupError(node, self.indexfile, _('no node'))
613 614 except KeyError:
614 615 # pure python cache lookup failed
615 616 n = self._nodecache
616 617 i = self.index
617 618 p = self._nodepos
618 619 if p is None:
619 620 p = len(i) - 1
620 621 else:
621 622 assert p < len(i)
622 623 for r in pycompat.xrange(p, -1, -1):
623 624 v = i[r][7]
624 625 n[v] = r
625 626 if v == node:
626 627 self._nodepos = r - 1
627 628 return r
628 629 if node == wdirid or node in wdirfilenodeids:
629 630 raise error.WdirUnsupported
630 631 raise error.LookupError(node, self.indexfile, _('no node'))
631 632
632 633 # Accessors for index entries.
633 634
634 635 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
635 636 # are flags.
636 637 def start(self, rev):
637 638 return int(self.index[rev][0] >> 16)
638 639
639 640 def flags(self, rev):
640 641 return self.index[rev][0] & 0xFFFF
641 642
642 643 def length(self, rev):
643 644 return self.index[rev][1]
644 645
645 646 def rawsize(self, rev):
646 647 """return the length of the uncompressed text for a given revision"""
647 648 l = self.index[rev][2]
648 649 if l >= 0:
649 650 return l
650 651
651 652 t = self.revision(rev, raw=True)
652 653 return len(t)
653 654
654 655 def size(self, rev):
655 656 """length of non-raw text (processed by a "read" flag processor)"""
656 657 # fast path: if no "read" flag processor could change the content,
657 658 # size is rawsize. note: ELLIPSIS is known to not change the content.
658 659 flags = self.flags(rev)
659 660 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
660 661 return self.rawsize(rev)
661 662
662 663 return len(self.revision(rev, raw=False))
663 664
664 665 def chainbase(self, rev):
665 666 base = self._chainbasecache.get(rev)
666 667 if base is not None:
667 668 return base
668 669
669 670 index = self.index
670 671 iterrev = rev
671 672 base = index[iterrev][3]
672 673 while base != iterrev:
673 674 iterrev = base
674 675 base = index[iterrev][3]
675 676
676 677 self._chainbasecache[rev] = base
677 678 return base
678 679
679 680 def linkrev(self, rev):
680 681 return self.index[rev][4]
681 682
682 683 def parentrevs(self, rev):
683 684 try:
684 685 entry = self.index[rev]
685 686 except IndexError:
686 687 if rev == wdirrev:
687 688 raise error.WdirUnsupported
688 689 raise
689 690
690 691 return entry[5], entry[6]
691 692
692 693 # fast parentrevs(rev) where rev isn't filtered
693 694 _uncheckedparentrevs = parentrevs
694 695
695 696 def node(self, rev):
696 697 try:
697 698 return self.index[rev][7]
698 699 except IndexError:
699 700 if rev == wdirrev:
700 701 raise error.WdirUnsupported
701 702 raise
702 703
703 704 # Derived from index values.
704 705
705 706 def end(self, rev):
706 707 return self.start(rev) + self.length(rev)
707 708
708 709 def parents(self, node):
709 710 i = self.index
710 711 d = i[self.rev(node)]
711 712 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
712 713
713 714 def chainlen(self, rev):
714 715 return self._chaininfo(rev)[0]
715 716
716 717 def _chaininfo(self, rev):
717 718 chaininfocache = self._chaininfocache
718 719 if rev in chaininfocache:
719 720 return chaininfocache[rev]
720 721 index = self.index
721 722 generaldelta = self._generaldelta
722 723 iterrev = rev
723 724 e = index[iterrev]
724 725 clen = 0
725 726 compresseddeltalen = 0
726 727 while iterrev != e[3]:
727 728 clen += 1
728 729 compresseddeltalen += e[1]
729 730 if generaldelta:
730 731 iterrev = e[3]
731 732 else:
732 733 iterrev -= 1
733 734 if iterrev in chaininfocache:
734 735 t = chaininfocache[iterrev]
735 736 clen += t[0]
736 737 compresseddeltalen += t[1]
737 738 break
738 739 e = index[iterrev]
739 740 else:
740 741 # Add text length of base since decompressing that also takes
741 742 # work. For cache hits the length is already included.
742 743 compresseddeltalen += e[1]
743 744 r = (clen, compresseddeltalen)
744 745 chaininfocache[rev] = r
745 746 return r
746 747
747 748 def _deltachain(self, rev, stoprev=None):
748 749 """Obtain the delta chain for a revision.
749 750
750 751 ``stoprev`` specifies a revision to stop at. If not specified, we
751 752 stop at the base of the chain.
752 753
753 754 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
754 755 revs in ascending order and ``stopped`` is a bool indicating whether
755 756 ``stoprev`` was hit.
756 757 """
757 758 # Try C implementation.
758 759 try:
759 760 return self.index.deltachain(rev, stoprev, self._generaldelta)
760 761 except AttributeError:
761 762 pass
762 763
763 764 chain = []
764 765
765 766 # Alias to prevent attribute lookup in tight loop.
766 767 index = self.index
767 768 generaldelta = self._generaldelta
768 769
769 770 iterrev = rev
770 771 e = index[iterrev]
771 772 while iterrev != e[3] and iterrev != stoprev:
772 773 chain.append(iterrev)
773 774 if generaldelta:
774 775 iterrev = e[3]
775 776 else:
776 777 iterrev -= 1
777 778 e = index[iterrev]
778 779
779 780 if iterrev == stoprev:
780 781 stopped = True
781 782 else:
782 783 chain.append(iterrev)
783 784 stopped = False
784 785
785 786 chain.reverse()
786 787 return chain, stopped
787 788
788 789 def ancestors(self, revs, stoprev=0, inclusive=False):
789 790 """Generate the ancestors of 'revs' in reverse revision order.
790 791 Does not generate revs lower than stoprev.
791 792
792 793 See the documentation for ancestor.lazyancestors for more details."""
793 794
794 795 # first, make sure start revisions aren't filtered
795 796 revs = list(revs)
796 797 checkrev = self.node
797 798 for r in revs:
798 799 checkrev(r)
799 800 # and we're sure ancestors aren't filtered as well
800 801
801 802 if rustext is not None:
802 803 lazyancestors = rustext.ancestor.LazyAncestors
803 804 arg = self.index
804 805 elif util.safehasattr(parsers, 'rustlazyancestors'):
805 806 lazyancestors = ancestor.rustlazyancestors
806 807 arg = self.index
807 808 else:
808 809 lazyancestors = ancestor.lazyancestors
809 810 arg = self._uncheckedparentrevs
810 811 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
811 812
812 813 def descendants(self, revs):
813 814 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
814 815
815 816 def findcommonmissing(self, common=None, heads=None):
816 817 """Return a tuple of the ancestors of common and the ancestors of heads
817 818 that are not ancestors of common. In revset terminology, we return the
818 819 tuple:
819 820
820 821 ::common, (::heads) - (::common)
821 822
822 823 The list is sorted by revision number, meaning it is
823 824 topologically sorted.
824 825
825 826 'heads' and 'common' are both lists of node IDs. If heads is
826 827 not supplied, uses all of the revlog's heads. If common is not
827 828 supplied, uses nullid."""
828 829 if common is None:
829 830 common = [nullid]
830 831 if heads is None:
831 832 heads = self.heads()
832 833
833 834 common = [self.rev(n) for n in common]
834 835 heads = [self.rev(n) for n in heads]
835 836
836 837 # we want the ancestors, but inclusive
837 838 class lazyset(object):
838 839 def __init__(self, lazyvalues):
839 840 self.addedvalues = set()
840 841 self.lazyvalues = lazyvalues
841 842
842 843 def __contains__(self, value):
843 844 return value in self.addedvalues or value in self.lazyvalues
844 845
845 846 def __iter__(self):
846 847 added = self.addedvalues
847 848 for r in added:
848 849 yield r
849 850 for r in self.lazyvalues:
850 851 if not r in added:
851 852 yield r
852 853
853 854 def add(self, value):
854 855 self.addedvalues.add(value)
855 856
856 857 def update(self, values):
857 858 self.addedvalues.update(values)
858 859
859 860 has = lazyset(self.ancestors(common))
860 861 has.add(nullrev)
861 862 has.update(common)
862 863
863 864 # take all ancestors from heads that aren't in has
864 865 missing = set()
865 866 visit = collections.deque(r for r in heads if r not in has)
866 867 while visit:
867 868 r = visit.popleft()
868 869 if r in missing:
869 870 continue
870 871 else:
871 872 missing.add(r)
872 873 for p in self.parentrevs(r):
873 874 if p not in has:
874 875 visit.append(p)
875 876 missing = list(missing)
876 877 missing.sort()
877 878 return has, [self.node(miss) for miss in missing]
878 879
879 880 def incrementalmissingrevs(self, common=None):
880 881 """Return an object that can be used to incrementally compute the
881 882 revision numbers of the ancestors of arbitrary sets that are not
882 883 ancestors of common. This is an ancestor.incrementalmissingancestors
883 884 object.
884 885
885 886 'common' is a list of revision numbers. If common is not supplied, uses
886 887 nullrev.
887 888 """
888 889 if common is None:
889 890 common = [nullrev]
890 891
891 892 return ancestor.incrementalmissingancestors(self.parentrevs, common)
892 893
893 894 def findmissingrevs(self, common=None, heads=None):
894 895 """Return the revision numbers of the ancestors of heads that
895 896 are not ancestors of common.
896 897
897 898 More specifically, return a list of revision numbers corresponding to
898 899 nodes N such that every N satisfies the following constraints:
899 900
900 901 1. N is an ancestor of some node in 'heads'
901 902 2. N is not an ancestor of any node in 'common'
902 903
903 904 The list is sorted by revision number, meaning it is
904 905 topologically sorted.
905 906
906 907 'heads' and 'common' are both lists of revision numbers. If heads is
907 908 not supplied, uses all of the revlog's heads. If common is not
908 909 supplied, uses nullid."""
909 910 if common is None:
910 911 common = [nullrev]
911 912 if heads is None:
912 913 heads = self.headrevs()
913 914
914 915 inc = self.incrementalmissingrevs(common=common)
915 916 return inc.missingancestors(heads)
916 917
917 918 def findmissing(self, common=None, heads=None):
918 919 """Return the ancestors of heads that are not ancestors of common.
919 920
920 921 More specifically, return a list of nodes N such that every N
921 922 satisfies the following constraints:
922 923
923 924 1. N is an ancestor of some node in 'heads'
924 925 2. N is not an ancestor of any node in 'common'
925 926
926 927 The list is sorted by revision number, meaning it is
927 928 topologically sorted.
928 929
929 930 'heads' and 'common' are both lists of node IDs. If heads is
930 931 not supplied, uses all of the revlog's heads. If common is not
931 932 supplied, uses nullid."""
932 933 if common is None:
933 934 common = [nullid]
934 935 if heads is None:
935 936 heads = self.heads()
936 937
937 938 common = [self.rev(n) for n in common]
938 939 heads = [self.rev(n) for n in heads]
939 940
940 941 inc = self.incrementalmissingrevs(common=common)
941 942 return [self.node(r) for r in inc.missingancestors(heads)]
942 943
943 944 def nodesbetween(self, roots=None, heads=None):
944 945 """Return a topological path from 'roots' to 'heads'.
945 946
946 947 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
947 948 topologically sorted list of all nodes N that satisfy both of
948 949 these constraints:
949 950
950 951 1. N is a descendant of some node in 'roots'
951 952 2. N is an ancestor of some node in 'heads'
952 953
953 954 Every node is considered to be both a descendant and an ancestor
954 955 of itself, so every reachable node in 'roots' and 'heads' will be
955 956 included in 'nodes'.
956 957
957 958 'outroots' is the list of reachable nodes in 'roots', i.e., the
958 959 subset of 'roots' that is returned in 'nodes'. Likewise,
959 960 'outheads' is the subset of 'heads' that is also in 'nodes'.
960 961
961 962 'roots' and 'heads' are both lists of node IDs. If 'roots' is
962 963 unspecified, uses nullid as the only root. If 'heads' is
963 964 unspecified, uses list of all of the revlog's heads."""
964 965 nonodes = ([], [], [])
965 966 if roots is not None:
966 967 roots = list(roots)
967 968 if not roots:
968 969 return nonodes
969 970 lowestrev = min([self.rev(n) for n in roots])
970 971 else:
971 972 roots = [nullid] # Everybody's a descendant of nullid
972 973 lowestrev = nullrev
973 974 if (lowestrev == nullrev) and (heads is None):
974 975 # We want _all_ the nodes!
975 976 return ([self.node(r) for r in self], [nullid], list(self.heads()))
976 977 if heads is None:
977 978 # All nodes are ancestors, so the latest ancestor is the last
978 979 # node.
979 980 highestrev = len(self) - 1
980 981 # Set ancestors to None to signal that every node is an ancestor.
981 982 ancestors = None
982 983 # Set heads to an empty dictionary for later discovery of heads
983 984 heads = {}
984 985 else:
985 986 heads = list(heads)
986 987 if not heads:
987 988 return nonodes
988 989 ancestors = set()
989 990 # Turn heads into a dictionary so we can remove 'fake' heads.
990 991 # Also, later we will be using it to filter out the heads we can't
991 992 # find from roots.
992 993 heads = dict.fromkeys(heads, False)
993 994 # Start at the top and keep marking parents until we're done.
994 995 nodestotag = set(heads)
995 996 # Remember where the top was so we can use it as a limit later.
996 997 highestrev = max([self.rev(n) for n in nodestotag])
997 998 while nodestotag:
998 999 # grab a node to tag
999 1000 n = nodestotag.pop()
1000 1001 # Never tag nullid
1001 1002 if n == nullid:
1002 1003 continue
1003 1004 # A node's revision number represents its place in a
1004 1005 # topologically sorted list of nodes.
1005 1006 r = self.rev(n)
1006 1007 if r >= lowestrev:
1007 1008 if n not in ancestors:
1008 1009 # If we are possibly a descendant of one of the roots
1009 1010 # and we haven't already been marked as an ancestor
1010 1011 ancestors.add(n) # Mark as ancestor
1011 1012 # Add non-nullid parents to list of nodes to tag.
1012 1013 nodestotag.update([p for p in self.parents(n) if
1013 1014 p != nullid])
1014 1015 elif n in heads: # We've seen it before, is it a fake head?
1015 1016 # So it is, real heads should not be the ancestors of
1016 1017 # any other heads.
1017 1018 heads.pop(n)
1018 1019 if not ancestors:
1019 1020 return nonodes
1020 1021 # Now that we have our set of ancestors, we want to remove any
1021 1022 # roots that are not ancestors.
1022 1023
1023 1024 # If one of the roots was nullid, everything is included anyway.
1024 1025 if lowestrev > nullrev:
1025 1026 # But, since we weren't, let's recompute the lowest rev to not
1026 1027 # include roots that aren't ancestors.
1027 1028
1028 1029 # Filter out roots that aren't ancestors of heads
1029 1030 roots = [root for root in roots if root in ancestors]
1030 1031 # Recompute the lowest revision
1031 1032 if roots:
1032 1033 lowestrev = min([self.rev(root) for root in roots])
1033 1034 else:
1034 1035 # No more roots? Return empty list
1035 1036 return nonodes
1036 1037 else:
1037 1038 # We are descending from nullid, and don't need to care about
1038 1039 # any other roots.
1039 1040 lowestrev = nullrev
1040 1041 roots = [nullid]
1041 1042 # Transform our roots list into a set.
1042 1043 descendants = set(roots)
1043 1044 # Also, keep the original roots so we can filter out roots that aren't
1044 1045 # 'real' roots (i.e. are descended from other roots).
1045 1046 roots = descendants.copy()
1046 1047 # Our topologically sorted list of output nodes.
1047 1048 orderedout = []
1048 1049 # Don't start at nullid since we don't want nullid in our output list,
1049 1050 # and if nullid shows up in descendants, empty parents will look like
1050 1051 # they're descendants.
1051 1052 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1052 1053 n = self.node(r)
1053 1054 isdescendant = False
1054 1055 if lowestrev == nullrev: # Everybody is a descendant of nullid
1055 1056 isdescendant = True
1056 1057 elif n in descendants:
1057 1058 # n is already a descendant
1058 1059 isdescendant = True
1059 1060 # This check only needs to be done here because all the roots
1060 1061 # will start being marked is descendants before the loop.
1061 1062 if n in roots:
1062 1063 # If n was a root, check if it's a 'real' root.
1063 1064 p = tuple(self.parents(n))
1064 1065 # If any of its parents are descendants, it's not a root.
1065 1066 if (p[0] in descendants) or (p[1] in descendants):
1066 1067 roots.remove(n)
1067 1068 else:
1068 1069 p = tuple(self.parents(n))
1069 1070 # A node is a descendant if either of its parents are
1070 1071 # descendants. (We seeded the dependents list with the roots
1071 1072 # up there, remember?)
1072 1073 if (p[0] in descendants) or (p[1] in descendants):
1073 1074 descendants.add(n)
1074 1075 isdescendant = True
1075 1076 if isdescendant and ((ancestors is None) or (n in ancestors)):
1076 1077 # Only include nodes that are both descendants and ancestors.
1077 1078 orderedout.append(n)
1078 1079 if (ancestors is not None) and (n in heads):
1079 1080 # We're trying to figure out which heads are reachable
1080 1081 # from roots.
1081 1082 # Mark this head as having been reached
1082 1083 heads[n] = True
1083 1084 elif ancestors is None:
1084 1085 # Otherwise, we're trying to discover the heads.
1085 1086 # Assume this is a head because if it isn't, the next step
1086 1087 # will eventually remove it.
1087 1088 heads[n] = True
1088 1089 # But, obviously its parents aren't.
1089 1090 for p in self.parents(n):
1090 1091 heads.pop(p, None)
1091 1092 heads = [head for head, flag in heads.iteritems() if flag]
1092 1093 roots = list(roots)
1093 1094 assert orderedout
1094 1095 assert roots
1095 1096 assert heads
1096 1097 return (orderedout, roots, heads)
1097 1098
1098 1099 def headrevs(self):
1099 1100 try:
1100 1101 return self.index.headrevs()
1101 1102 except AttributeError:
1102 1103 return self._headrevs()
1103 1104
1104 1105 def computephases(self, roots):
1105 1106 return self.index.computephasesmapsets(roots)
1106 1107
1107 1108 def _headrevs(self):
1108 1109 count = len(self)
1109 1110 if not count:
1110 1111 return [nullrev]
1111 1112 # we won't iter over filtered rev so nobody is a head at start
1112 1113 ishead = [0] * (count + 1)
1113 1114 index = self.index
1114 1115 for r in self:
1115 1116 ishead[r] = 1 # I may be an head
1116 1117 e = index[r]
1117 1118 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1118 1119 return [r for r, val in enumerate(ishead) if val]
1119 1120
1120 1121 def heads(self, start=None, stop=None):
1121 1122 """return the list of all nodes that have no children
1122 1123
1123 1124 if start is specified, only heads that are descendants of
1124 1125 start will be returned
1125 1126 if stop is specified, it will consider all the revs from stop
1126 1127 as if they had no children
1127 1128 """
1128 1129 if start is None and stop is None:
1129 1130 if not len(self):
1130 1131 return [nullid]
1131 1132 return [self.node(r) for r in self.headrevs()]
1132 1133
1133 1134 if start is None:
1134 1135 start = nullrev
1135 1136 else:
1136 1137 start = self.rev(start)
1137 1138
1138 1139 stoprevs = set(self.rev(n) for n in stop or [])
1139 1140
1140 1141 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1141 1142 stoprevs=stoprevs)
1142 1143
1143 1144 return [self.node(rev) for rev in revs]
1144 1145
1145 1146 def children(self, node):
1146 1147 """find the children of a given node"""
1147 1148 c = []
1148 1149 p = self.rev(node)
1149 1150 for r in self.revs(start=p + 1):
1150 1151 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1151 1152 if prevs:
1152 1153 for pr in prevs:
1153 1154 if pr == p:
1154 1155 c.append(self.node(r))
1155 1156 elif p == nullrev:
1156 1157 c.append(self.node(r))
1157 1158 return c
1158 1159
1159 1160 def commonancestorsheads(self, a, b):
1160 1161 """calculate all the heads of the common ancestors of nodes a and b"""
1161 1162 a, b = self.rev(a), self.rev(b)
1162 1163 ancs = self._commonancestorsheads(a, b)
1163 1164 return pycompat.maplist(self.node, ancs)
1164 1165
1165 1166 def _commonancestorsheads(self, *revs):
1166 1167 """calculate all the heads of the common ancestors of revs"""
1167 1168 try:
1168 1169 ancs = self.index.commonancestorsheads(*revs)
1169 1170 except (AttributeError, OverflowError): # C implementation failed
1170 1171 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1171 1172 return ancs
1172 1173
1173 1174 def isancestor(self, a, b):
1174 1175 """return True if node a is an ancestor of node b
1175 1176
1176 1177 A revision is considered an ancestor of itself."""
1177 1178 a, b = self.rev(a), self.rev(b)
1178 1179 return self.isancestorrev(a, b)
1179 1180
1180 1181 def isancestorrev(self, a, b):
1181 1182 """return True if revision a is an ancestor of revision b
1182 1183
1183 1184 A revision is considered an ancestor of itself.
1184 1185
1185 1186 The implementation of this is trivial but the use of
1186 1187 commonancestorsheads is not."""
1187 1188 if a == nullrev:
1188 1189 return True
1189 1190 elif a == b:
1190 1191 return True
1191 1192 elif a > b:
1192 1193 return False
1193 1194 return a in self._commonancestorsheads(a, b)
1194 1195
1195 1196 def ancestor(self, a, b):
1196 1197 """calculate the "best" common ancestor of nodes a and b"""
1197 1198
1198 1199 a, b = self.rev(a), self.rev(b)
1199 1200 try:
1200 1201 ancs = self.index.ancestors(a, b)
1201 1202 except (AttributeError, OverflowError):
1202 1203 ancs = ancestor.ancestors(self.parentrevs, a, b)
1203 1204 if ancs:
1204 1205 # choose a consistent winner when there's a tie
1205 1206 return min(map(self.node, ancs))
1206 1207 return nullid
1207 1208
1208 1209 def _match(self, id):
1209 1210 if isinstance(id, int):
1210 1211 # rev
1211 1212 return self.node(id)
1212 1213 if len(id) == 20:
1213 1214 # possibly a binary node
1214 1215 # odds of a binary node being all hex in ASCII are 1 in 10**25
1215 1216 try:
1216 1217 node = id
1217 1218 self.rev(node) # quick search the index
1218 1219 return node
1219 1220 except error.LookupError:
1220 1221 pass # may be partial hex id
1221 1222 try:
1222 1223 # str(rev)
1223 1224 rev = int(id)
1224 1225 if "%d" % rev != id:
1225 1226 raise ValueError
1226 1227 if rev < 0:
1227 1228 rev = len(self) + rev
1228 1229 if rev < 0 or rev >= len(self):
1229 1230 raise ValueError
1230 1231 return self.node(rev)
1231 1232 except (ValueError, OverflowError):
1232 1233 pass
1233 1234 if len(id) == 40:
1234 1235 try:
1235 1236 # a full hex nodeid?
1236 1237 node = bin(id)
1237 1238 self.rev(node)
1238 1239 return node
1239 1240 except (TypeError, error.LookupError):
1240 1241 pass
1241 1242
1242 1243 def _partialmatch(self, id):
1243 1244 # we don't care wdirfilenodeids as they should be always full hash
1244 1245 maybewdir = wdirhex.startswith(id)
1245 1246 try:
1246 1247 partial = self.index.partialmatch(id)
1247 1248 if partial and self.hasnode(partial):
1248 1249 if maybewdir:
1249 1250 # single 'ff...' match in radix tree, ambiguous with wdir
1250 1251 raise error.RevlogError
1251 1252 return partial
1252 1253 if maybewdir:
1253 1254 # no 'ff...' match in radix tree, wdir identified
1254 1255 raise error.WdirUnsupported
1255 1256 return None
1256 1257 except error.RevlogError:
1257 1258 # parsers.c radix tree lookup gave multiple matches
1258 1259 # fast path: for unfiltered changelog, radix tree is accurate
1259 1260 if not getattr(self, 'filteredrevs', None):
1260 1261 raise error.AmbiguousPrefixLookupError(
1261 1262 id, self.indexfile, _('ambiguous identifier'))
1262 1263 # fall through to slow path that filters hidden revisions
1263 1264 except (AttributeError, ValueError):
1264 1265 # we are pure python, or key was too short to search radix tree
1265 1266 pass
1266 1267
1267 1268 if id in self._pcache:
1268 1269 return self._pcache[id]
1269 1270
1270 1271 if len(id) <= 40:
1271 1272 try:
1272 1273 # hex(node)[:...]
1273 1274 l = len(id) // 2 # grab an even number of digits
1274 1275 prefix = bin(id[:l * 2])
1275 1276 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1276 1277 nl = [n for n in nl if hex(n).startswith(id) and
1277 1278 self.hasnode(n)]
1278 1279 if nullhex.startswith(id):
1279 1280 nl.append(nullid)
1280 1281 if len(nl) > 0:
1281 1282 if len(nl) == 1 and not maybewdir:
1282 1283 self._pcache[id] = nl[0]
1283 1284 return nl[0]
1284 1285 raise error.AmbiguousPrefixLookupError(
1285 1286 id, self.indexfile, _('ambiguous identifier'))
1286 1287 if maybewdir:
1287 1288 raise error.WdirUnsupported
1288 1289 return None
1289 1290 except TypeError:
1290 1291 pass
1291 1292
1292 1293 def lookup(self, id):
1293 1294 """locate a node based on:
1294 1295 - revision number or str(revision number)
1295 1296 - nodeid or subset of hex nodeid
1296 1297 """
1297 1298 n = self._match(id)
1298 1299 if n is not None:
1299 1300 return n
1300 1301 n = self._partialmatch(id)
1301 1302 if n:
1302 1303 return n
1303 1304
1304 1305 raise error.LookupError(id, self.indexfile, _('no match found'))
1305 1306
1306 1307 def shortest(self, node, minlength=1):
1307 1308 """Find the shortest unambiguous prefix that matches node."""
1308 1309 def isvalid(prefix):
1309 1310 try:
1310 1311 node = self._partialmatch(prefix)
1311 1312 except error.AmbiguousPrefixLookupError:
1312 1313 return False
1313 1314 except error.WdirUnsupported:
1314 1315 # single 'ff...' match
1315 1316 return True
1316 1317 if node is None:
1317 1318 raise error.LookupError(node, self.indexfile, _('no node'))
1318 1319 return True
1319 1320
1320 1321 def maybewdir(prefix):
1321 1322 return all(c == 'f' for c in prefix)
1322 1323
1323 1324 hexnode = hex(node)
1324 1325
1325 1326 def disambiguate(hexnode, minlength):
1326 1327 """Disambiguate against wdirid."""
1327 1328 for length in range(minlength, 41):
1328 1329 prefix = hexnode[:length]
1329 1330 if not maybewdir(prefix):
1330 1331 return prefix
1331 1332
1332 1333 if not getattr(self, 'filteredrevs', None):
1333 1334 try:
1334 1335 length = max(self.index.shortest(node), minlength)
1335 1336 return disambiguate(hexnode, length)
1336 1337 except error.RevlogError:
1337 1338 if node != wdirid:
1338 1339 raise error.LookupError(node, self.indexfile, _('no node'))
1339 1340 except AttributeError:
1340 1341 # Fall through to pure code
1341 1342 pass
1342 1343
1343 1344 if node == wdirid:
1344 1345 for length in range(minlength, 41):
1345 1346 prefix = hexnode[:length]
1346 1347 if isvalid(prefix):
1347 1348 return prefix
1348 1349
1349 1350 for length in range(minlength, 41):
1350 1351 prefix = hexnode[:length]
1351 1352 if isvalid(prefix):
1352 1353 return disambiguate(hexnode, length)
1353 1354
1354 1355 def cmp(self, node, text):
1355 1356 """compare text with a given file revision
1356 1357
1357 1358 returns True if text is different than what is stored.
1358 1359 """
1359 1360 p1, p2 = self.parents(node)
1360 1361 return storageutil.hashrevisionsha1(text, p1, p2) != node
1361 1362
1362 1363 def _cachesegment(self, offset, data):
1363 1364 """Add a segment to the revlog cache.
1364 1365
1365 1366 Accepts an absolute offset and the data that is at that location.
1366 1367 """
1367 1368 o, d = self._chunkcache
1368 1369 # try to add to existing cache
1369 1370 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1370 1371 self._chunkcache = o, d + data
1371 1372 else:
1372 1373 self._chunkcache = offset, data
1373 1374
1374 1375 def _readsegment(self, offset, length, df=None):
1375 1376 """Load a segment of raw data from the revlog.
1376 1377
1377 1378 Accepts an absolute offset, length to read, and an optional existing
1378 1379 file handle to read from.
1379 1380
1380 1381 If an existing file handle is passed, it will be seeked and the
1381 1382 original seek position will NOT be restored.
1382 1383
1383 1384 Returns a str or buffer of raw byte data.
1384 1385
1385 1386 Raises if the requested number of bytes could not be read.
1386 1387 """
1387 1388 # Cache data both forward and backward around the requested
1388 1389 # data, in a fixed size window. This helps speed up operations
1389 1390 # involving reading the revlog backwards.
1390 1391 cachesize = self._chunkcachesize
1391 1392 realoffset = offset & ~(cachesize - 1)
1392 1393 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1393 1394 - realoffset)
1394 1395 with self._datareadfp(df) as df:
1395 1396 df.seek(realoffset)
1396 1397 d = df.read(reallength)
1397 1398
1398 1399 self._cachesegment(realoffset, d)
1399 1400 if offset != realoffset or reallength != length:
1400 1401 startoffset = offset - realoffset
1401 1402 if len(d) - startoffset < length:
1402 1403 raise error.RevlogError(
1403 1404 _('partial read of revlog %s; expected %d bytes from '
1404 1405 'offset %d, got %d') %
1405 1406 (self.indexfile if self._inline else self.datafile,
1406 1407 length, realoffset, len(d) - startoffset))
1407 1408
1408 1409 return util.buffer(d, startoffset, length)
1409 1410
1410 1411 if len(d) < length:
1411 1412 raise error.RevlogError(
1412 1413 _('partial read of revlog %s; expected %d bytes from offset '
1413 1414 '%d, got %d') %
1414 1415 (self.indexfile if self._inline else self.datafile,
1415 1416 length, offset, len(d)))
1416 1417
1417 1418 return d
1418 1419
1419 1420 def _getsegment(self, offset, length, df=None):
1420 1421 """Obtain a segment of raw data from the revlog.
1421 1422
1422 1423 Accepts an absolute offset, length of bytes to obtain, and an
1423 1424 optional file handle to the already-opened revlog. If the file
1424 1425 handle is used, it's original seek position will not be preserved.
1425 1426
1426 1427 Requests for data may be returned from a cache.
1427 1428
1428 1429 Returns a str or a buffer instance of raw byte data.
1429 1430 """
1430 1431 o, d = self._chunkcache
1431 1432 l = len(d)
1432 1433
1433 1434 # is it in the cache?
1434 1435 cachestart = offset - o
1435 1436 cacheend = cachestart + length
1436 1437 if cachestart >= 0 and cacheend <= l:
1437 1438 if cachestart == 0 and cacheend == l:
1438 1439 return d # avoid a copy
1439 1440 return util.buffer(d, cachestart, cacheend - cachestart)
1440 1441
1441 1442 return self._readsegment(offset, length, df=df)
1442 1443
1443 1444 def _getsegmentforrevs(self, startrev, endrev, df=None):
1444 1445 """Obtain a segment of raw data corresponding to a range of revisions.
1445 1446
1446 1447 Accepts the start and end revisions and an optional already-open
1447 1448 file handle to be used for reading. If the file handle is read, its
1448 1449 seek position will not be preserved.
1449 1450
1450 1451 Requests for data may be satisfied by a cache.
1451 1452
1452 1453 Returns a 2-tuple of (offset, data) for the requested range of
1453 1454 revisions. Offset is the integer offset from the beginning of the
1454 1455 revlog and data is a str or buffer of the raw byte data.
1455 1456
1456 1457 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1457 1458 to determine where each revision's data begins and ends.
1458 1459 """
1459 1460 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1460 1461 # (functions are expensive).
1461 1462 index = self.index
1462 1463 istart = index[startrev]
1463 1464 start = int(istart[0] >> 16)
1464 1465 if startrev == endrev:
1465 1466 end = start + istart[1]
1466 1467 else:
1467 1468 iend = index[endrev]
1468 1469 end = int(iend[0] >> 16) + iend[1]
1469 1470
1470 1471 if self._inline:
1471 1472 start += (startrev + 1) * self._io.size
1472 1473 end += (endrev + 1) * self._io.size
1473 1474 length = end - start
1474 1475
1475 1476 return start, self._getsegment(start, length, df=df)
1476 1477
1477 1478 def _chunk(self, rev, df=None):
1478 1479 """Obtain a single decompressed chunk for a revision.
1479 1480
1480 1481 Accepts an integer revision and an optional already-open file handle
1481 1482 to be used for reading. If used, the seek position of the file will not
1482 1483 be preserved.
1483 1484
1484 1485 Returns a str holding uncompressed data for the requested revision.
1485 1486 """
1486 1487 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1487 1488
1488 1489 def _chunks(self, revs, df=None, targetsize=None):
1489 1490 """Obtain decompressed chunks for the specified revisions.
1490 1491
1491 1492 Accepts an iterable of numeric revisions that are assumed to be in
1492 1493 ascending order. Also accepts an optional already-open file handle
1493 1494 to be used for reading. If used, the seek position of the file will
1494 1495 not be preserved.
1495 1496
1496 1497 This function is similar to calling ``self._chunk()`` multiple times,
1497 1498 but is faster.
1498 1499
1499 1500 Returns a list with decompressed data for each requested revision.
1500 1501 """
1501 1502 if not revs:
1502 1503 return []
1503 1504 start = self.start
1504 1505 length = self.length
1505 1506 inline = self._inline
1506 1507 iosize = self._io.size
1507 1508 buffer = util.buffer
1508 1509
1509 1510 l = []
1510 1511 ladd = l.append
1511 1512
1512 1513 if not self._withsparseread:
1513 1514 slicedchunks = (revs,)
1514 1515 else:
1515 1516 slicedchunks = deltautil.slicechunk(self, revs,
1516 1517 targetsize=targetsize)
1517 1518
1518 1519 for revschunk in slicedchunks:
1519 1520 firstrev = revschunk[0]
1520 1521 # Skip trailing revisions with empty diff
1521 1522 for lastrev in revschunk[::-1]:
1522 1523 if length(lastrev) != 0:
1523 1524 break
1524 1525
1525 1526 try:
1526 1527 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1527 1528 except OverflowError:
1528 1529 # issue4215 - we can't cache a run of chunks greater than
1529 1530 # 2G on Windows
1530 1531 return [self._chunk(rev, df=df) for rev in revschunk]
1531 1532
1532 1533 decomp = self.decompress
1533 1534 for rev in revschunk:
1534 1535 chunkstart = start(rev)
1535 1536 if inline:
1536 1537 chunkstart += (rev + 1) * iosize
1537 1538 chunklength = length(rev)
1538 1539 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1539 1540
1540 1541 return l
1541 1542
1542 1543 def _chunkclear(self):
1543 1544 """Clear the raw chunk cache."""
1544 1545 self._chunkcache = (0, '')
1545 1546
1546 1547 def deltaparent(self, rev):
1547 1548 """return deltaparent of the given revision"""
1548 1549 base = self.index[rev][3]
1549 1550 if base == rev:
1550 1551 return nullrev
1551 1552 elif self._generaldelta:
1552 1553 return base
1553 1554 else:
1554 1555 return rev - 1
1555 1556
1556 1557 def issnapshot(self, rev):
1557 1558 """tells whether rev is a snapshot
1558 1559 """
1559 1560 if not self._sparserevlog:
1560 1561 return self.deltaparent(rev) == nullrev
1561 1562 elif util.safehasattr(self.index, 'issnapshot'):
1562 1563 # directly assign the method to cache the testing and access
1563 1564 self.issnapshot = self.index.issnapshot
1564 1565 return self.issnapshot(rev)
1565 1566 if rev == nullrev:
1566 1567 return True
1567 1568 entry = self.index[rev]
1568 1569 base = entry[3]
1569 1570 if base == rev:
1570 1571 return True
1571 1572 if base == nullrev:
1572 1573 return True
1573 1574 p1 = entry[5]
1574 1575 p2 = entry[6]
1575 1576 if base == p1 or base == p2:
1576 1577 return False
1577 1578 return self.issnapshot(base)
1578 1579
1579 1580 def snapshotdepth(self, rev):
1580 1581 """number of snapshot in the chain before this one"""
1581 1582 if not self.issnapshot(rev):
1582 1583 raise error.ProgrammingError('revision %d not a snapshot')
1583 1584 return len(self._deltachain(rev)[0]) - 1
1584 1585
1585 1586 def revdiff(self, rev1, rev2):
1586 1587 """return or calculate a delta between two revisions
1587 1588
1588 1589 The delta calculated is in binary form and is intended to be written to
1589 1590 revlog data directly. So this function needs raw revision data.
1590 1591 """
1591 1592 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1592 1593 return bytes(self._chunk(rev2))
1593 1594
1594 1595 return mdiff.textdiff(self.revision(rev1, raw=True),
1595 1596 self.revision(rev2, raw=True))
1596 1597
1597 1598 def revision(self, nodeorrev, _df=None, raw=False):
1598 1599 """return an uncompressed revision of a given node or revision
1599 1600 number.
1600 1601
1601 1602 _df - an existing file handle to read from. (internal-only)
1602 1603 raw - an optional argument specifying if the revision data is to be
1603 1604 treated as raw data when applying flag transforms. 'raw' should be set
1604 1605 to True when generating changegroups or in debug commands.
1605 1606 """
1606 1607 if isinstance(nodeorrev, int):
1607 1608 rev = nodeorrev
1608 1609 node = self.node(rev)
1609 1610 else:
1610 1611 node = nodeorrev
1611 1612 rev = None
1612 1613
1613 1614 cachedrev = None
1614 1615 flags = None
1615 1616 rawtext = None
1616 1617 if node == nullid:
1617 1618 return ""
1618 1619 if self._revisioncache:
1619 1620 if self._revisioncache[0] == node:
1620 1621 # _cache only stores rawtext
1621 1622 if raw:
1622 1623 return self._revisioncache[2]
1623 1624 # duplicated, but good for perf
1624 1625 if rev is None:
1625 1626 rev = self.rev(node)
1626 1627 if flags is None:
1627 1628 flags = self.flags(rev)
1628 1629 # no extra flags set, no flag processor runs, text = rawtext
1629 1630 if flags == REVIDX_DEFAULT_FLAGS:
1630 1631 return self._revisioncache[2]
1631 1632 # rawtext is reusable. need to run flag processor
1632 1633 rawtext = self._revisioncache[2]
1633 1634
1634 1635 cachedrev = self._revisioncache[1]
1635 1636
1636 1637 # look up what we need to read
1637 1638 if rawtext is None:
1638 1639 if rev is None:
1639 1640 rev = self.rev(node)
1640 1641
1641 1642 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1642 1643 if stopped:
1643 1644 rawtext = self._revisioncache[2]
1644 1645
1645 1646 # drop cache to save memory
1646 1647 self._revisioncache = None
1647 1648
1648 1649 targetsize = None
1649 1650 rawsize = self.index[rev][2]
1650 1651 if 0 <= rawsize:
1651 1652 targetsize = 4 * rawsize
1652 1653
1653 1654 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1654 1655 if rawtext is None:
1655 1656 rawtext = bytes(bins[0])
1656 1657 bins = bins[1:]
1657 1658
1658 1659 rawtext = mdiff.patches(rawtext, bins)
1659 1660 self._revisioncache = (node, rev, rawtext)
1660 1661
1661 1662 if flags is None:
1662 1663 if rev is None:
1663 1664 rev = self.rev(node)
1664 1665 flags = self.flags(rev)
1665 1666
1666 1667 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1667 1668 if validatehash:
1668 1669 self.checkhash(text, node, rev=rev)
1669 1670
1670 1671 return text
1671 1672
1672 1673 def hash(self, text, p1, p2):
1673 1674 """Compute a node hash.
1674 1675
1675 1676 Available as a function so that subclasses can replace the hash
1676 1677 as needed.
1677 1678 """
1678 1679 return storageutil.hashrevisionsha1(text, p1, p2)
1679 1680
1680 1681 def _processflags(self, text, flags, operation, raw=False):
1681 1682 """Inspect revision data flags and applies transforms defined by
1682 1683 registered flag processors.
1683 1684
1684 1685 ``text`` - the revision data to process
1685 1686 ``flags`` - the revision flags
1686 1687 ``operation`` - the operation being performed (read or write)
1687 1688 ``raw`` - an optional argument describing if the raw transform should be
1688 1689 applied.
1689 1690
1690 1691 This method processes the flags in the order (or reverse order if
1691 1692 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1692 1693 flag processors registered for present flags. The order of flags defined
1693 1694 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1694 1695
1695 1696 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1696 1697 processed text and ``validatehash`` is a bool indicating whether the
1697 1698 returned text should be checked for hash integrity.
1698 1699
1699 1700 Note: If the ``raw`` argument is set, it has precedence over the
1700 1701 operation and will only update the value of ``validatehash``.
1701 1702 """
1702 1703 # fast path: no flag processors will run
1703 1704 if flags == 0:
1704 1705 return text, True
1705 1706 if not operation in ('read', 'write'):
1706 1707 raise error.ProgrammingError(_("invalid '%s' operation") %
1707 1708 operation)
1708 1709 # Check all flags are known.
1709 1710 if flags & ~REVIDX_KNOWN_FLAGS:
1710 1711 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1711 1712 (flags & ~REVIDX_KNOWN_FLAGS))
1712 1713 validatehash = True
1713 1714 # Depending on the operation (read or write), the order might be
1714 1715 # reversed due to non-commutative transforms.
1715 1716 orderedflags = REVIDX_FLAGS_ORDER
1716 1717 if operation == 'write':
1717 1718 orderedflags = reversed(orderedflags)
1718 1719
1719 1720 for flag in orderedflags:
1720 1721 # If a flagprocessor has been registered for a known flag, apply the
1721 1722 # related operation transform and update result tuple.
1722 1723 if flag & flags:
1723 1724 vhash = True
1724 1725
1725 1726 if flag not in self._flagprocessors:
1726 1727 message = _("missing processor for flag '%#x'") % (flag)
1727 1728 raise error.RevlogError(message)
1728 1729
1729 1730 processor = self._flagprocessors[flag]
1730 1731 if processor is not None:
1731 1732 readtransform, writetransform, rawtransform = processor
1732 1733
1733 1734 if raw:
1734 1735 vhash = rawtransform(self, text)
1735 1736 elif operation == 'read':
1736 1737 text, vhash = readtransform(self, text)
1737 1738 else: # write operation
1738 1739 text, vhash = writetransform(self, text)
1739 1740 validatehash = validatehash and vhash
1740 1741
1741 1742 return text, validatehash
1742 1743
1743 1744 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1744 1745 """Check node hash integrity.
1745 1746
1746 1747 Available as a function so that subclasses can extend hash mismatch
1747 1748 behaviors as needed.
1748 1749 """
1749 1750 try:
1750 1751 if p1 is None and p2 is None:
1751 1752 p1, p2 = self.parents(node)
1752 1753 if node != self.hash(text, p1, p2):
1753 1754 # Clear the revision cache on hash failure. The revision cache
1754 1755 # only stores the raw revision and clearing the cache does have
1755 1756 # the side-effect that we won't have a cache hit when the raw
1756 1757 # revision data is accessed. But this case should be rare and
1757 1758 # it is extra work to teach the cache about the hash
1758 1759 # verification state.
1759 1760 if self._revisioncache and self._revisioncache[0] == node:
1760 1761 self._revisioncache = None
1761 1762
1762 1763 revornode = rev
1763 1764 if revornode is None:
1764 1765 revornode = templatefilters.short(hex(node))
1765 1766 raise error.RevlogError(_("integrity check failed on %s:%s")
1766 1767 % (self.indexfile, pycompat.bytestr(revornode)))
1767 1768 except error.RevlogError:
1768 1769 if self._censorable and storageutil.iscensoredtext(text):
1769 1770 raise error.CensoredNodeError(self.indexfile, node, text)
1770 1771 raise
1771 1772
1772 1773 def _enforceinlinesize(self, tr, fp=None):
1773 1774 """Check if the revlog is too big for inline and convert if so.
1774 1775
1775 1776 This should be called after revisions are added to the revlog. If the
1776 1777 revlog has grown too large to be an inline revlog, it will convert it
1777 1778 to use multiple index and data files.
1778 1779 """
1779 1780 tiprev = len(self) - 1
1780 1781 if (not self._inline or
1781 1782 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1782 1783 return
1783 1784
1784 1785 trinfo = tr.find(self.indexfile)
1785 1786 if trinfo is None:
1786 1787 raise error.RevlogError(_("%s not found in the transaction")
1787 1788 % self.indexfile)
1788 1789
1789 1790 trindex = trinfo[2]
1790 1791 if trindex is not None:
1791 1792 dataoff = self.start(trindex)
1792 1793 else:
1793 1794 # revlog was stripped at start of transaction, use all leftover data
1794 1795 trindex = len(self) - 1
1795 1796 dataoff = self.end(tiprev)
1796 1797
1797 1798 tr.add(self.datafile, dataoff)
1798 1799
1799 1800 if fp:
1800 1801 fp.flush()
1801 1802 fp.close()
1802 1803 # We can't use the cached file handle after close(). So prevent
1803 1804 # its usage.
1804 1805 self._writinghandles = None
1805 1806
1806 1807 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1807 1808 for r in self:
1808 1809 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1809 1810
1810 1811 with self._indexfp('w') as fp:
1811 1812 self.version &= ~FLAG_INLINE_DATA
1812 1813 self._inline = False
1813 1814 io = self._io
1814 1815 for i in self:
1815 1816 e = io.packentry(self.index[i], self.node, self.version, i)
1816 1817 fp.write(e)
1817 1818
1818 1819 # the temp file replace the real index when we exit the context
1819 1820 # manager
1820 1821
1821 1822 tr.replace(self.indexfile, trindex * self._io.size)
1822 1823 self._chunkclear()
1823 1824
1824 1825 def _nodeduplicatecallback(self, transaction, node):
1825 1826 """called when trying to add a node already stored.
1826 1827 """
1827 1828
1828 1829 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1829 1830 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1830 1831 """add a revision to the log
1831 1832
1832 1833 text - the revision data to add
1833 1834 transaction - the transaction object used for rollback
1834 1835 link - the linkrev data to add
1835 1836 p1, p2 - the parent nodeids of the revision
1836 1837 cachedelta - an optional precomputed delta
1837 1838 node - nodeid of revision; typically node is not specified, and it is
1838 1839 computed by default as hash(text, p1, p2), however subclasses might
1839 1840 use different hashing method (and override checkhash() in such case)
1840 1841 flags - the known flags to set on the revision
1841 1842 deltacomputer - an optional deltacomputer instance shared between
1842 1843 multiple calls
1843 1844 """
1844 1845 if link == nullrev:
1845 1846 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1846 1847 % self.indexfile)
1847 1848
1848 1849 if flags:
1849 1850 node = node or self.hash(text, p1, p2)
1850 1851
1851 1852 rawtext, validatehash = self._processflags(text, flags, 'write')
1852 1853
1853 1854 # If the flag processor modifies the revision data, ignore any provided
1854 1855 # cachedelta.
1855 1856 if rawtext != text:
1856 1857 cachedelta = None
1857 1858
1858 1859 if len(rawtext) > _maxentrysize:
1859 1860 raise error.RevlogError(
1860 1861 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1861 1862 % (self.indexfile, len(rawtext)))
1862 1863
1863 1864 node = node or self.hash(rawtext, p1, p2)
1864 1865 if node in self.nodemap:
1865 1866 return node
1866 1867
1867 1868 if validatehash:
1868 1869 self.checkhash(rawtext, node, p1=p1, p2=p2)
1869 1870
1870 1871 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1871 1872 flags, cachedelta=cachedelta,
1872 1873 deltacomputer=deltacomputer)
1873 1874
1874 1875 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1875 1876 cachedelta=None, deltacomputer=None):
1876 1877 """add a raw revision with known flags, node and parents
1877 1878 useful when reusing a revision not stored in this revlog (ex: received
1878 1879 over wire, or read from an external bundle).
1879 1880 """
1880 1881 dfh = None
1881 1882 if not self._inline:
1882 1883 dfh = self._datafp("a+")
1883 1884 ifh = self._indexfp("a+")
1884 1885 try:
1885 1886 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1886 1887 flags, cachedelta, ifh, dfh,
1887 1888 deltacomputer=deltacomputer)
1888 1889 finally:
1889 1890 if dfh:
1890 1891 dfh.close()
1891 1892 ifh.close()
1892 1893
1893 1894 def compress(self, data):
1894 1895 """Generate a possibly-compressed representation of data."""
1895 1896 if not data:
1896 1897 return '', data
1897 1898
1898 1899 compressed = self._compressor.compress(data)
1899 1900
1900 1901 if compressed:
1901 1902 # The revlog compressor added the header in the returned data.
1902 1903 return '', compressed
1903 1904
1904 1905 if data[0:1] == '\0':
1905 1906 return '', data
1906 1907 return 'u', data
1907 1908
1908 1909 def decompress(self, data):
1909 1910 """Decompress a revlog chunk.
1910 1911
1911 1912 The chunk is expected to begin with a header identifying the
1912 1913 format type so it can be routed to an appropriate decompressor.
1913 1914 """
1914 1915 if not data:
1915 1916 return data
1916 1917
1917 1918 # Revlogs are read much more frequently than they are written and many
1918 1919 # chunks only take microseconds to decompress, so performance is
1919 1920 # important here.
1920 1921 #
1921 1922 # We can make a few assumptions about revlogs:
1922 1923 #
1923 1924 # 1) the majority of chunks will be compressed (as opposed to inline
1924 1925 # raw data).
1925 1926 # 2) decompressing *any* data will likely by at least 10x slower than
1926 1927 # returning raw inline data.
1927 1928 # 3) we want to prioritize common and officially supported compression
1928 1929 # engines
1929 1930 #
1930 1931 # It follows that we want to optimize for "decompress compressed data
1931 1932 # when encoded with common and officially supported compression engines"
1932 1933 # case over "raw data" and "data encoded by less common or non-official
1933 1934 # compression engines." That is why we have the inline lookup first
1934 1935 # followed by the compengines lookup.
1935 1936 #
1936 1937 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1937 1938 # compressed chunks. And this matters for changelog and manifest reads.
1938 1939 t = data[0:1]
1939 1940
1940 1941 if t == 'x':
1941 1942 try:
1942 1943 return _zlibdecompress(data)
1943 1944 except zlib.error as e:
1944 1945 raise error.RevlogError(_('revlog decompress error: %s') %
1945 1946 stringutil.forcebytestr(e))
1946 1947 # '\0' is more common than 'u' so it goes first.
1947 1948 elif t == '\0':
1948 1949 return data
1949 1950 elif t == 'u':
1950 1951 return util.buffer(data, 1)
1951 1952
1952 1953 try:
1953 1954 compressor = self._decompressors[t]
1954 1955 except KeyError:
1955 1956 try:
1956 1957 engine = util.compengines.forrevlogheader(t)
1957 1958 compressor = engine.revlogcompressor()
1958 1959 self._decompressors[t] = compressor
1959 1960 except KeyError:
1960 1961 raise error.RevlogError(_('unknown compression type %r') % t)
1961 1962
1962 1963 return compressor.decompress(data)
1963 1964
1964 1965 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1965 1966 cachedelta, ifh, dfh, alwayscache=False,
1966 1967 deltacomputer=None):
1967 1968 """internal function to add revisions to the log
1968 1969
1969 1970 see addrevision for argument descriptions.
1970 1971
1971 1972 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1972 1973
1973 1974 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1974 1975 be used.
1975 1976
1976 1977 invariants:
1977 1978 - rawtext is optional (can be None); if not set, cachedelta must be set.
1978 1979 if both are set, they must correspond to each other.
1979 1980 """
1980 1981 if node == nullid:
1981 1982 raise error.RevlogError(_("%s: attempt to add null revision") %
1982 1983 self.indexfile)
1983 1984 if node == wdirid or node in wdirfilenodeids:
1984 1985 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1985 1986 self.indexfile)
1986 1987
1987 1988 if self._inline:
1988 1989 fh = ifh
1989 1990 else:
1990 1991 fh = dfh
1991 1992
1992 1993 btext = [rawtext]
1993 1994
1994 1995 curr = len(self)
1995 1996 prev = curr - 1
1996 1997 offset = self.end(prev)
1997 1998 p1r, p2r = self.rev(p1), self.rev(p2)
1998 1999
1999 2000 # full versions are inserted when the needed deltas
2000 2001 # become comparable to the uncompressed text
2001 2002 if rawtext is None:
2002 2003 # need rawtext size, before changed by flag processors, which is
2003 2004 # the non-raw size. use revlog explicitly to avoid filelog's extra
2004 2005 # logic that might remove metadata size.
2005 2006 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2006 2007 cachedelta[1])
2007 2008 else:
2008 2009 textlen = len(rawtext)
2009 2010
2010 2011 if deltacomputer is None:
2011 2012 deltacomputer = deltautil.deltacomputer(self)
2012 2013
2013 2014 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2014 2015
2015 2016 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2016 2017
2017 2018 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2018 2019 deltainfo.base, link, p1r, p2r, node)
2019 2020 self.index.append(e)
2020 2021 self.nodemap[node] = curr
2021 2022
2022 2023 # Reset the pure node cache start lookup offset to account for new
2023 2024 # revision.
2024 2025 if self._nodepos is not None:
2025 2026 self._nodepos = curr
2026 2027
2027 2028 entry = self._io.packentry(e, self.node, self.version, curr)
2028 2029 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2029 2030 link, offset)
2030 2031
2031 2032 rawtext = btext[0]
2032 2033
2033 2034 if alwayscache and rawtext is None:
2034 2035 rawtext = deltacomputer.buildtext(revinfo, fh)
2035 2036
2036 2037 if type(rawtext) == bytes: # only accept immutable objects
2037 2038 self._revisioncache = (node, curr, rawtext)
2038 2039 self._chainbasecache[curr] = deltainfo.chainbase
2039 2040 return node
2040 2041
2041 2042 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2042 2043 # Files opened in a+ mode have inconsistent behavior on various
2043 2044 # platforms. Windows requires that a file positioning call be made
2044 2045 # when the file handle transitions between reads and writes. See
2045 2046 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2046 2047 # platforms, Python or the platform itself can be buggy. Some versions
2047 2048 # of Solaris have been observed to not append at the end of the file
2048 2049 # if the file was seeked to before the end. See issue4943 for more.
2049 2050 #
2050 2051 # We work around this issue by inserting a seek() before writing.
2051 2052 # Note: This is likely not necessary on Python 3. However, because
2052 2053 # the file handle is reused for reads and may be seeked there, we need
2053 2054 # to be careful before changing this.
2054 2055 ifh.seek(0, os.SEEK_END)
2055 2056 if dfh:
2056 2057 dfh.seek(0, os.SEEK_END)
2057 2058
2058 2059 curr = len(self) - 1
2059 2060 if not self._inline:
2060 2061 transaction.add(self.datafile, offset)
2061 2062 transaction.add(self.indexfile, curr * len(entry))
2062 2063 if data[0]:
2063 2064 dfh.write(data[0])
2064 2065 dfh.write(data[1])
2065 2066 ifh.write(entry)
2066 2067 else:
2067 2068 offset += curr * self._io.size
2068 2069 transaction.add(self.indexfile, offset, curr)
2069 2070 ifh.write(entry)
2070 2071 ifh.write(data[0])
2071 2072 ifh.write(data[1])
2072 2073 self._enforceinlinesize(transaction, ifh)
2073 2074
2074 2075 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2075 2076 """
2076 2077 add a delta group
2077 2078
2078 2079 given a set of deltas, add them to the revision log. the
2079 2080 first delta is against its parent, which should be in our
2080 2081 log, the rest are against the previous delta.
2081 2082
2082 2083 If ``addrevisioncb`` is defined, it will be called with arguments of
2083 2084 this revlog and the node that was added.
2084 2085 """
2085 2086
2086 2087 if self._writinghandles:
2087 2088 raise error.ProgrammingError('cannot nest addgroup() calls')
2088 2089
2089 2090 nodes = []
2090 2091
2091 2092 r = len(self)
2092 2093 end = 0
2093 2094 if r:
2094 2095 end = self.end(r - 1)
2095 2096 ifh = self._indexfp("a+")
2096 2097 isize = r * self._io.size
2097 2098 if self._inline:
2098 2099 transaction.add(self.indexfile, end + isize, r)
2099 2100 dfh = None
2100 2101 else:
2101 2102 transaction.add(self.indexfile, isize, r)
2102 2103 transaction.add(self.datafile, end)
2103 2104 dfh = self._datafp("a+")
2104 2105 def flush():
2105 2106 if dfh:
2106 2107 dfh.flush()
2107 2108 ifh.flush()
2108 2109
2109 2110 self._writinghandles = (ifh, dfh)
2110 2111
2111 2112 try:
2112 2113 deltacomputer = deltautil.deltacomputer(self)
2113 2114 # loop through our set of deltas
2114 2115 for data in deltas:
2115 2116 node, p1, p2, linknode, deltabase, delta, flags = data
2116 2117 link = linkmapper(linknode)
2117 2118 flags = flags or REVIDX_DEFAULT_FLAGS
2118 2119
2119 2120 nodes.append(node)
2120 2121
2121 2122 if node in self.nodemap:
2122 2123 self._nodeduplicatecallback(transaction, node)
2123 2124 # this can happen if two branches make the same change
2124 2125 continue
2125 2126
2126 2127 for p in (p1, p2):
2127 2128 if p not in self.nodemap:
2128 2129 raise error.LookupError(p, self.indexfile,
2129 2130 _('unknown parent'))
2130 2131
2131 2132 if deltabase not in self.nodemap:
2132 2133 raise error.LookupError(deltabase, self.indexfile,
2133 2134 _('unknown delta base'))
2134 2135
2135 2136 baserev = self.rev(deltabase)
2136 2137
2137 2138 if baserev != nullrev and self.iscensored(baserev):
2138 2139 # if base is censored, delta must be full replacement in a
2139 2140 # single patch operation
2140 2141 hlen = struct.calcsize(">lll")
2141 2142 oldlen = self.rawsize(baserev)
2142 2143 newlen = len(delta) - hlen
2143 2144 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2144 2145 raise error.CensoredBaseError(self.indexfile,
2145 2146 self.node(baserev))
2146 2147
2147 2148 if not flags and self._peek_iscensored(baserev, delta, flush):
2148 2149 flags |= REVIDX_ISCENSORED
2149 2150
2150 2151 # We assume consumers of addrevisioncb will want to retrieve
2151 2152 # the added revision, which will require a call to
2152 2153 # revision(). revision() will fast path if there is a cache
2153 2154 # hit. So, we tell _addrevision() to always cache in this case.
2154 2155 # We're only using addgroup() in the context of changegroup
2155 2156 # generation so the revision data can always be handled as raw
2156 2157 # by the flagprocessor.
2157 2158 self._addrevision(node, None, transaction, link,
2158 2159 p1, p2, flags, (baserev, delta),
2159 2160 ifh, dfh,
2160 2161 alwayscache=bool(addrevisioncb),
2161 2162 deltacomputer=deltacomputer)
2162 2163
2163 2164 if addrevisioncb:
2164 2165 addrevisioncb(self, node)
2165 2166
2166 2167 if not dfh and not self._inline:
2167 2168 # addrevision switched from inline to conventional
2168 2169 # reopen the index
2169 2170 ifh.close()
2170 2171 dfh = self._datafp("a+")
2171 2172 ifh = self._indexfp("a+")
2172 2173 self._writinghandles = (ifh, dfh)
2173 2174 finally:
2174 2175 self._writinghandles = None
2175 2176
2176 2177 if dfh:
2177 2178 dfh.close()
2178 2179 ifh.close()
2179 2180
2180 2181 return nodes
2181 2182
2182 2183 def iscensored(self, rev):
2183 2184 """Check if a file revision is censored."""
2184 2185 if not self._censorable:
2185 2186 return False
2186 2187
2187 2188 return self.flags(rev) & REVIDX_ISCENSORED
2188 2189
2189 2190 def _peek_iscensored(self, baserev, delta, flush):
2190 2191 """Quickly check if a delta produces a censored revision."""
2191 2192 if not self._censorable:
2192 2193 return False
2193 2194
2194 2195 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2195 2196
2196 2197 def getstrippoint(self, minlink):
2197 2198 """find the minimum rev that must be stripped to strip the linkrev
2198 2199
2199 2200 Returns a tuple containing the minimum rev and a set of all revs that
2200 2201 have linkrevs that will be broken by this strip.
2201 2202 """
2202 2203 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2203 2204 self.headrevs(),
2204 2205 self.linkrev, self.parentrevs)
2205 2206
2206 2207 def strip(self, minlink, transaction):
2207 2208 """truncate the revlog on the first revision with a linkrev >= minlink
2208 2209
2209 2210 This function is called when we're stripping revision minlink and
2210 2211 its descendants from the repository.
2211 2212
2212 2213 We have to remove all revisions with linkrev >= minlink, because
2213 2214 the equivalent changelog revisions will be renumbered after the
2214 2215 strip.
2215 2216
2216 2217 So we truncate the revlog on the first of these revisions, and
2217 2218 trust that the caller has saved the revisions that shouldn't be
2218 2219 removed and that it'll re-add them after this truncation.
2219 2220 """
2220 2221 if len(self) == 0:
2221 2222 return
2222 2223
2223 2224 rev, _ = self.getstrippoint(minlink)
2224 2225 if rev == len(self):
2225 2226 return
2226 2227
2227 2228 # first truncate the files on disk
2228 2229 end = self.start(rev)
2229 2230 if not self._inline:
2230 2231 transaction.add(self.datafile, end)
2231 2232 end = rev * self._io.size
2232 2233 else:
2233 2234 end += rev * self._io.size
2234 2235
2235 2236 transaction.add(self.indexfile, end)
2236 2237
2237 2238 # then reset internal state in memory to forget those revisions
2238 2239 self._revisioncache = None
2239 2240 self._chaininfocache = {}
2240 2241 self._chunkclear()
2241 2242 for x in pycompat.xrange(rev, len(self)):
2242 2243 del self.nodemap[self.node(x)]
2243 2244
2244 2245 del self.index[rev:-1]
2245 2246 self._nodepos = None
2246 2247
2247 2248 def checksize(self):
2248 2249 expected = 0
2249 2250 if len(self):
2250 2251 expected = max(0, self.end(len(self) - 1))
2251 2252
2252 2253 try:
2253 2254 with self._datafp() as f:
2254 2255 f.seek(0, 2)
2255 2256 actual = f.tell()
2256 2257 dd = actual - expected
2257 2258 except IOError as inst:
2258 2259 if inst.errno != errno.ENOENT:
2259 2260 raise
2260 2261 dd = 0
2261 2262
2262 2263 try:
2263 2264 f = self.opener(self.indexfile)
2264 2265 f.seek(0, 2)
2265 2266 actual = f.tell()
2266 2267 f.close()
2267 2268 s = self._io.size
2268 2269 i = max(0, actual // s)
2269 2270 di = actual - (i * s)
2270 2271 if self._inline:
2271 2272 databytes = 0
2272 2273 for r in self:
2273 2274 databytes += max(0, self.length(r))
2274 2275 dd = 0
2275 2276 di = actual - len(self) * s - databytes
2276 2277 except IOError as inst:
2277 2278 if inst.errno != errno.ENOENT:
2278 2279 raise
2279 2280 di = 0
2280 2281
2281 2282 return (dd, di)
2282 2283
2283 2284 def files(self):
2284 2285 res = [self.indexfile]
2285 2286 if not self._inline:
2286 2287 res.append(self.datafile)
2287 2288 return res
2288 2289
2289 2290 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2290 2291 assumehaveparentrevisions=False,
2291 2292 deltamode=repository.CG_DELTAMODE_STD):
2292 2293 if nodesorder not in ('nodes', 'storage', 'linear', None):
2293 2294 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2294 2295 nodesorder)
2295 2296
2296 2297 if nodesorder is None and not self._generaldelta:
2297 2298 nodesorder = 'storage'
2298 2299
2299 2300 if (not self._storedeltachains and
2300 2301 deltamode != repository.CG_DELTAMODE_PREV):
2301 2302 deltamode = repository.CG_DELTAMODE_FULL
2302 2303
2303 2304 return storageutil.emitrevisions(
2304 2305 self, nodes, nodesorder, revlogrevisiondelta,
2305 2306 deltaparentfn=self.deltaparent,
2306 2307 candeltafn=self.candelta,
2307 2308 rawsizefn=self.rawsize,
2308 2309 revdifffn=self.revdiff,
2309 2310 flagsfn=self.flags,
2310 2311 deltamode=deltamode,
2311 2312 revisiondata=revisiondata,
2312 2313 assumehaveparentrevisions=assumehaveparentrevisions)
2313 2314
2314 2315 DELTAREUSEALWAYS = 'always'
2315 2316 DELTAREUSESAMEREVS = 'samerevs'
2316 2317 DELTAREUSENEVER = 'never'
2317 2318
2318 2319 DELTAREUSEFULLADD = 'fulladd'
2319 2320
2320 2321 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2321 2322
2322 2323 def clone(self, tr, destrevlog, addrevisioncb=None,
2323 2324 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2324 2325 """Copy this revlog to another, possibly with format changes.
2325 2326
2326 2327 The destination revlog will contain the same revisions and nodes.
2327 2328 However, it may not be bit-for-bit identical due to e.g. delta encoding
2328 2329 differences.
2329 2330
2330 2331 The ``deltareuse`` argument control how deltas from the existing revlog
2331 2332 are preserved in the destination revlog. The argument can have the
2332 2333 following values:
2333 2334
2334 2335 DELTAREUSEALWAYS
2335 2336 Deltas will always be reused (if possible), even if the destination
2336 2337 revlog would not select the same revisions for the delta. This is the
2337 2338 fastest mode of operation.
2338 2339 DELTAREUSESAMEREVS
2339 2340 Deltas will be reused if the destination revlog would pick the same
2340 2341 revisions for the delta. This mode strikes a balance between speed
2341 2342 and optimization.
2342 2343 DELTAREUSENEVER
2343 2344 Deltas will never be reused. This is the slowest mode of execution.
2344 2345 This mode can be used to recompute deltas (e.g. if the diff/delta
2345 2346 algorithm changes).
2346 2347
2347 2348 Delta computation can be slow, so the choice of delta reuse policy can
2348 2349 significantly affect run time.
2349 2350
2350 2351 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2351 2352 two extremes. Deltas will be reused if they are appropriate. But if the
2352 2353 delta could choose a better revision, it will do so. This means if you
2353 2354 are converting a non-generaldelta revlog to a generaldelta revlog,
2354 2355 deltas will be recomputed if the delta's parent isn't a parent of the
2355 2356 revision.
2356 2357
2357 2358 In addition to the delta policy, the ``forcedeltabothparents``
2358 2359 argument controls whether to force compute deltas against both parents
2359 2360 for merges. By default, the current default is used.
2360 2361 """
2361 2362 if deltareuse not in self.DELTAREUSEALL:
2362 2363 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2363 2364
2364 2365 if len(destrevlog):
2365 2366 raise ValueError(_('destination revlog is not empty'))
2366 2367
2367 2368 if getattr(self, 'filteredrevs', None):
2368 2369 raise ValueError(_('source revlog has filtered revisions'))
2369 2370 if getattr(destrevlog, 'filteredrevs', None):
2370 2371 raise ValueError(_('destination revlog has filtered revisions'))
2371 2372
2372 2373 # lazydeltabase controls whether to reuse a cached delta, if possible.
2373 2374 oldlazydeltabase = destrevlog._lazydeltabase
2374 2375 oldamd = destrevlog._deltabothparents
2375 2376
2376 2377 try:
2377 2378 if deltareuse == self.DELTAREUSEALWAYS:
2378 2379 destrevlog._lazydeltabase = True
2379 2380 elif deltareuse == self.DELTAREUSESAMEREVS:
2380 2381 destrevlog._lazydeltabase = False
2381 2382
2382 2383 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2383 2384
2384 2385 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2385 2386 self.DELTAREUSESAMEREVS)
2386 2387
2387 2388 deltacomputer = deltautil.deltacomputer(destrevlog)
2388 2389 index = self.index
2389 2390 for rev in self:
2390 2391 entry = index[rev]
2391 2392
2392 2393 # Some classes override linkrev to take filtered revs into
2393 2394 # account. Use raw entry from index.
2394 2395 flags = entry[0] & 0xffff
2395 2396 linkrev = entry[4]
2396 2397 p1 = index[entry[5]][7]
2397 2398 p2 = index[entry[6]][7]
2398 2399 node = entry[7]
2399 2400
2400 2401 # (Possibly) reuse the delta from the revlog if allowed and
2401 2402 # the revlog chunk is a delta.
2402 2403 cachedelta = None
2403 2404 rawtext = None
2404 2405 if populatecachedelta:
2405 2406 dp = self.deltaparent(rev)
2406 2407 if dp != nullrev:
2407 2408 cachedelta = (dp, bytes(self._chunk(rev)))
2408 2409
2409 2410 if not cachedelta:
2410 2411 rawtext = self.revision(rev, raw=True)
2411 2412
2412 2413
2413 2414 if deltareuse == self.DELTAREUSEFULLADD:
2414 2415 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2415 2416 cachedelta=cachedelta,
2416 2417 node=node, flags=flags,
2417 2418 deltacomputer=deltacomputer)
2418 2419 else:
2419 2420 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2420 2421 checkambig=False)
2421 2422 dfh = None
2422 2423 if not destrevlog._inline:
2423 2424 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2424 2425 try:
2425 2426 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2426 2427 p2, flags, cachedelta, ifh, dfh,
2427 2428 deltacomputer=deltacomputer)
2428 2429 finally:
2429 2430 if dfh:
2430 2431 dfh.close()
2431 2432 ifh.close()
2432 2433
2433 2434 if addrevisioncb:
2434 2435 addrevisioncb(self, rev, node)
2435 2436 finally:
2436 2437 destrevlog._lazydeltabase = oldlazydeltabase
2437 2438 destrevlog._deltabothparents = oldamd
2438 2439
2439 2440 def censorrevision(self, tr, censornode, tombstone=b''):
2440 2441 if (self.version & 0xFFFF) == REVLOGV0:
2441 2442 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2442 2443 self.version)
2443 2444
2444 2445 censorrev = self.rev(censornode)
2445 2446 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2446 2447
2447 2448 if len(tombstone) > self.rawsize(censorrev):
2448 2449 raise error.Abort(_('censor tombstone must be no longer than '
2449 2450 'censored data'))
2450 2451
2451 2452 # Rewriting the revlog in place is hard. Our strategy for censoring is
2452 2453 # to create a new revlog, copy all revisions to it, then replace the
2453 2454 # revlogs on transaction close.
2454 2455
2455 2456 newindexfile = self.indexfile + b'.tmpcensored'
2456 2457 newdatafile = self.datafile + b'.tmpcensored'
2457 2458
2458 2459 # This is a bit dangerous. We could easily have a mismatch of state.
2459 2460 newrl = revlog(self.opener, newindexfile, newdatafile,
2460 2461 censorable=True)
2461 2462 newrl.version = self.version
2462 2463 newrl._generaldelta = self._generaldelta
2463 2464 newrl._io = self._io
2464 2465
2465 2466 for rev in self.revs():
2466 2467 node = self.node(rev)
2467 2468 p1, p2 = self.parents(node)
2468 2469
2469 2470 if rev == censorrev:
2470 2471 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2471 2472 p1, p2, censornode, REVIDX_ISCENSORED)
2472 2473
2473 2474 if newrl.deltaparent(rev) != nullrev:
2474 2475 raise error.Abort(_('censored revision stored as delta; '
2475 2476 'cannot censor'),
2476 2477 hint=_('censoring of revlogs is not '
2477 2478 'fully implemented; please report '
2478 2479 'this bug'))
2479 2480 continue
2480 2481
2481 2482 if self.iscensored(rev):
2482 2483 if self.deltaparent(rev) != nullrev:
2483 2484 raise error.Abort(_('cannot censor due to censored '
2484 2485 'revision having delta stored'))
2485 2486 rawtext = self._chunk(rev)
2486 2487 else:
2487 2488 rawtext = self.revision(rev, raw=True)
2488 2489
2489 2490 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2490 2491 self.flags(rev))
2491 2492
2492 2493 tr.addbackup(self.indexfile, location='store')
2493 2494 if not self._inline:
2494 2495 tr.addbackup(self.datafile, location='store')
2495 2496
2496 2497 self.opener.rename(newrl.indexfile, self.indexfile)
2497 2498 if not self._inline:
2498 2499 self.opener.rename(newrl.datafile, self.datafile)
2499 2500
2500 2501 self.clearcaches()
2501 2502 self._loadindex(self.version, None)
2502 2503
2503 2504 def verifyintegrity(self, state):
2504 2505 """Verifies the integrity of the revlog.
2505 2506
2506 2507 Yields ``revlogproblem`` instances describing problems that are
2507 2508 found.
2508 2509 """
2509 2510 dd, di = self.checksize()
2510 2511 if dd:
2511 2512 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2512 2513 if di:
2513 2514 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2514 2515
2515 2516 version = self.version & 0xFFFF
2516 2517
2517 2518 # The verifier tells us what version revlog we should be.
2518 2519 if version != state['expectedversion']:
2519 2520 yield revlogproblem(
2520 2521 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2521 2522 (self.indexfile, version, state['expectedversion']))
2522 2523
2523 2524 state['skipread'] = set()
2524 2525
2525 2526 for rev in self:
2526 2527 node = self.node(rev)
2527 2528
2528 2529 # Verify contents. 4 cases to care about:
2529 2530 #
2530 2531 # common: the most common case
2531 2532 # rename: with a rename
2532 2533 # meta: file content starts with b'\1\n', the metadata
2533 2534 # header defined in filelog.py, but without a rename
2534 2535 # ext: content stored externally
2535 2536 #
2536 2537 # More formally, their differences are shown below:
2537 2538 #
2538 2539 # | common | rename | meta | ext
2539 2540 # -------------------------------------------------------
2540 2541 # flags() | 0 | 0 | 0 | not 0
2541 2542 # renamed() | False | True | False | ?
2542 2543 # rawtext[0:2]=='\1\n'| False | True | True | ?
2543 2544 #
2544 2545 # "rawtext" means the raw text stored in revlog data, which
2545 2546 # could be retrieved by "revision(rev, raw=True)". "text"
2546 2547 # mentioned below is "revision(rev, raw=False)".
2547 2548 #
2548 2549 # There are 3 different lengths stored physically:
2549 2550 # 1. L1: rawsize, stored in revlog index
2550 2551 # 2. L2: len(rawtext), stored in revlog data
2551 2552 # 3. L3: len(text), stored in revlog data if flags==0, or
2552 2553 # possibly somewhere else if flags!=0
2553 2554 #
2554 2555 # L1 should be equal to L2. L3 could be different from them.
2555 2556 # "text" may or may not affect commit hash depending on flag
2556 2557 # processors (see revlog.addflagprocessor).
2557 2558 #
2558 2559 # | common | rename | meta | ext
2559 2560 # -------------------------------------------------
2560 2561 # rawsize() | L1 | L1 | L1 | L1
2561 2562 # size() | L1 | L2-LM | L1(*) | L1 (?)
2562 2563 # len(rawtext) | L2 | L2 | L2 | L2
2563 2564 # len(text) | L2 | L2 | L2 | L3
2564 2565 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2565 2566 #
2566 2567 # LM: length of metadata, depending on rawtext
2567 2568 # (*): not ideal, see comment in filelog.size
2568 2569 # (?): could be "- len(meta)" if the resolved content has
2569 2570 # rename metadata
2570 2571 #
2571 2572 # Checks needed to be done:
2572 2573 # 1. length check: L1 == L2, in all cases.
2573 2574 # 2. hash check: depending on flag processor, we may need to
2574 2575 # use either "text" (external), or "rawtext" (in revlog).
2575 2576
2576 2577 try:
2577 2578 skipflags = state.get('skipflags', 0)
2578 2579 if skipflags:
2579 2580 skipflags &= self.flags(rev)
2580 2581
2581 2582 if skipflags:
2582 2583 state['skipread'].add(node)
2583 2584 else:
2584 2585 # Side-effect: read content and verify hash.
2585 2586 self.revision(node)
2586 2587
2587 2588 l1 = self.rawsize(rev)
2588 2589 l2 = len(self.revision(node, raw=True))
2589 2590
2590 2591 if l1 != l2:
2591 2592 yield revlogproblem(
2592 2593 error=_('unpacked size is %d, %d expected') % (l2, l1),
2593 2594 node=node)
2594 2595
2595 2596 except error.CensoredNodeError:
2596 2597 if state['erroroncensored']:
2597 2598 yield revlogproblem(error=_('censored file data'),
2598 2599 node=node)
2599 2600 state['skipread'].add(node)
2600 2601 except Exception as e:
2601 2602 yield revlogproblem(
2602 2603 error=_('unpacking %s: %s') % (short(node),
2603 2604 stringutil.forcebytestr(e)),
2604 2605 node=node)
2605 2606 state['skipread'].add(node)
2606 2607
2607 2608 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2608 2609 revisionscount=False, trackedsize=False,
2609 2610 storedsize=False):
2610 2611 d = {}
2611 2612
2612 2613 if exclusivefiles:
2613 2614 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2614 2615 if not self._inline:
2615 2616 d['exclusivefiles'].append((self.opener, self.datafile))
2616 2617
2617 2618 if sharedfiles:
2618 2619 d['sharedfiles'] = []
2619 2620
2620 2621 if revisionscount:
2621 2622 d['revisionscount'] = len(self)
2622 2623
2623 2624 if trackedsize:
2624 2625 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2625 2626
2626 2627 if storedsize:
2627 2628 d['storedsize'] = sum(self.opener.stat(path).st_size
2628 2629 for path in self.files())
2629 2630
2630 2631 return d
General Comments 0
You need to be logged in to leave comments. Login now