##// END OF EJS Templates
revlog: use separate variables to track version flags...
Gregory Szorc -
r41241:3f807237 default
parent child Browse files
Show More
@@ -1,2631 +1,2635 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._maxdeltachainspan = -1
375 375 self._withsparseread = False
376 376 self._sparserevlog = False
377 377 self._srdensitythreshold = 0.50
378 378 self._srmingapsize = 262144
379 379
380 380 # Make copy of flag processors so each revlog instance can support
381 381 # custom flags.
382 382 self._flagprocessors = dict(_flagprocessors)
383 383
384 384 # 2-tuple of file handles being used for active writing.
385 385 self._writinghandles = None
386 386
387 387 self._loadindex()
388 388
389 389 def _loadindex(self):
390 390 mmapindexthreshold = None
391 391 opts = getattr(self.opener, 'options', {}) or {}
392 392
393 393 if 'revlogv2' in opts:
394 versionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 395 elif 'revlogv1' in opts:
396 versionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 397 if 'generaldelta' in opts:
398 versionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 399 else:
400 versionflags = REVLOG_DEFAULT_VERSION
400 newversionflags = REVLOG_DEFAULT_VERSION
401 401
402 402 if 'chunkcachesize' in opts:
403 403 self._chunkcachesize = opts['chunkcachesize']
404 404 if 'maxchainlen' in opts:
405 405 self._maxchainlen = opts['maxchainlen']
406 406 if 'deltabothparents' in opts:
407 407 self._deltabothparents = opts['deltabothparents']
408 408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 409 if 'compengine' in opts:
410 410 self._compengine = opts['compengine']
411 411 if 'maxdeltachainspan' in opts:
412 412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 414 mmapindexthreshold = opts['mmapindexthreshold']
415 415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 416 withsparseread = bool(opts.get('with-sparse-read', False))
417 417 # sparse-revlog forces sparse-read
418 418 self._withsparseread = self._sparserevlog or withsparseread
419 419 if 'sparse-read-density-threshold' in opts:
420 420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 421 if 'sparse-read-min-gap-size' in opts:
422 422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 423 if opts.get('enableellipsis'):
424 424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425 425
426 426 # revlog v0 doesn't have flag processors
427 427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 428 _insertflagprocessor(flag, processor, self._flagprocessors)
429 429
430 430 if self._chunkcachesize <= 0:
431 431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 432 'greater than 0') % self._chunkcachesize)
433 433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 435 'power of 2') % self._chunkcachesize)
436 436
437 437 indexdata = ''
438 438 self._initempty = True
439 439 try:
440 440 with self._indexfp() as f:
441 441 if (mmapindexthreshold is not None and
442 442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 443 indexdata = util.buffer(util.mmapread(f))
444 444 else:
445 445 indexdata = f.read()
446 446 if len(indexdata) > 0:
447 447 versionflags = versionformat_unpack(indexdata[:4])[0]
448 448 self._initempty = False
449 else:
450 versionflags = newversionflags
449 451 except IOError as inst:
450 452 if inst.errno != errno.ENOENT:
451 453 raise
452 454
455 versionflags = newversionflags
456
453 457 self.version = versionflags
454 458
455 459 flags = versionflags & ~0xFFFF
456 460 fmt = versionflags & 0xFFFF
457 461
458 462 if fmt == REVLOGV0:
459 463 if flags:
460 464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 465 'revlog %s') %
462 466 (flags >> 16, fmt, self.indexfile))
463 467
464 468 self._inline = False
465 469 self._generaldelta = False
466 470
467 471 elif fmt == REVLOGV1:
468 472 if flags & ~REVLOGV1_FLAGS:
469 473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
470 474 'revlog %s') %
471 475 (flags >> 16, fmt, self.indexfile))
472 476
473 477 self._inline = versionflags & FLAG_INLINE_DATA
474 478 self._generaldelta = versionflags & FLAG_GENERALDELTA
475 479
476 480 elif fmt == REVLOGV2:
477 481 if flags & ~REVLOGV2_FLAGS:
478 482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
479 483 'revlog %s') %
480 484 (flags >> 16, fmt, self.indexfile))
481 485
482 486 self._inline = versionflags & FLAG_INLINE_DATA
483 487 # generaldelta implied by version 2 revlogs.
484 488 self._generaldelta = True
485 489
486 490 else:
487 491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
488 492 (fmt, self.indexfile))
489 493
490 494 self._storedeltachains = True
491 495
492 496 self._io = revlogio()
493 497 if self.version == REVLOGV0:
494 498 self._io = revlogoldio()
495 499 try:
496 500 d = self._io.parseindex(indexdata, self._inline)
497 501 except (ValueError, IndexError):
498 502 raise error.RevlogError(_("index %s is corrupted") %
499 503 self.indexfile)
500 504 self.index, nodemap, self._chunkcache = d
501 505 if nodemap is not None:
502 506 self.nodemap = self._nodecache = nodemap
503 507 if not self._chunkcache:
504 508 self._chunkclear()
505 509 # revnum -> (chain-length, sum-delta-length)
506 510 self._chaininfocache = {}
507 511 # revlog header -> revlog compressor
508 512 self._decompressors = {}
509 513
510 514 @util.propertycache
511 515 def _compressor(self):
512 516 return util.compengines[self._compengine].revlogcompressor()
513 517
514 518 def _indexfp(self, mode='r'):
515 519 """file object for the revlog's index file"""
516 520 args = {r'mode': mode}
517 521 if mode != 'r':
518 522 args[r'checkambig'] = self._checkambig
519 523 if mode == 'w':
520 524 args[r'atomictemp'] = True
521 525 return self.opener(self.indexfile, **args)
522 526
523 527 def _datafp(self, mode='r'):
524 528 """file object for the revlog's data file"""
525 529 return self.opener(self.datafile, mode=mode)
526 530
527 531 @contextlib.contextmanager
528 532 def _datareadfp(self, existingfp=None):
529 533 """file object suitable to read data"""
530 534 # Use explicit file handle, if given.
531 535 if existingfp is not None:
532 536 yield existingfp
533 537
534 538 # Use a file handle being actively used for writes, if available.
535 539 # There is some danger to doing this because reads will seek the
536 540 # file. However, _writeentry() performs a SEEK_END before all writes,
537 541 # so we should be safe.
538 542 elif self._writinghandles:
539 543 if self._inline:
540 544 yield self._writinghandles[0]
541 545 else:
542 546 yield self._writinghandles[1]
543 547
544 548 # Otherwise open a new file handle.
545 549 else:
546 550 if self._inline:
547 551 func = self._indexfp
548 552 else:
549 553 func = self._datafp
550 554 with func() as fp:
551 555 yield fp
552 556
553 557 def tip(self):
554 558 return self.node(len(self.index) - 1)
555 559 def __contains__(self, rev):
556 560 return 0 <= rev < len(self)
557 561 def __len__(self):
558 562 return len(self.index)
559 563 def __iter__(self):
560 564 return iter(pycompat.xrange(len(self)))
561 565 def revs(self, start=0, stop=None):
562 566 """iterate over all rev in this revlog (from start to stop)"""
563 567 return storageutil.iterrevs(len(self), start=start, stop=stop)
564 568
565 569 @util.propertycache
566 570 def nodemap(self):
567 571 if self.index:
568 572 # populate mapping down to the initial node
569 573 node0 = self.index[0][7] # get around changelog filtering
570 574 self.rev(node0)
571 575 return self._nodecache
572 576
573 577 def hasnode(self, node):
574 578 try:
575 579 self.rev(node)
576 580 return True
577 581 except KeyError:
578 582 return False
579 583
580 584 def candelta(self, baserev, rev):
581 585 """whether two revisions (baserev, rev) can be delta-ed or not"""
582 586 # Disable delta if either rev requires a content-changing flag
583 587 # processor (ex. LFS). This is because such flag processor can alter
584 588 # the rawtext content that the delta will be based on, and two clients
585 589 # could have a same revlog node with different flags (i.e. different
586 590 # rawtext contents) and the delta could be incompatible.
587 591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
588 592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
589 593 return False
590 594 return True
591 595
592 596 def clearcaches(self):
593 597 self._revisioncache = None
594 598 self._chainbasecache.clear()
595 599 self._chunkcache = (0, '')
596 600 self._pcache = {}
597 601
598 602 try:
599 603 self._nodecache.clearcaches()
600 604 except AttributeError:
601 605 self._nodecache = {nullid: nullrev}
602 606 self._nodepos = None
603 607
604 608 def rev(self, node):
605 609 try:
606 610 return self._nodecache[node]
607 611 except TypeError:
608 612 raise
609 613 except error.RevlogError:
610 614 # parsers.c radix tree lookup failed
611 615 if node == wdirid or node in wdirfilenodeids:
612 616 raise error.WdirUnsupported
613 617 raise error.LookupError(node, self.indexfile, _('no node'))
614 618 except KeyError:
615 619 # pure python cache lookup failed
616 620 n = self._nodecache
617 621 i = self.index
618 622 p = self._nodepos
619 623 if p is None:
620 624 p = len(i) - 1
621 625 else:
622 626 assert p < len(i)
623 627 for r in pycompat.xrange(p, -1, -1):
624 628 v = i[r][7]
625 629 n[v] = r
626 630 if v == node:
627 631 self._nodepos = r - 1
628 632 return r
629 633 if node == wdirid or node in wdirfilenodeids:
630 634 raise error.WdirUnsupported
631 635 raise error.LookupError(node, self.indexfile, _('no node'))
632 636
633 637 # Accessors for index entries.
634 638
635 639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
636 640 # are flags.
637 641 def start(self, rev):
638 642 return int(self.index[rev][0] >> 16)
639 643
640 644 def flags(self, rev):
641 645 return self.index[rev][0] & 0xFFFF
642 646
643 647 def length(self, rev):
644 648 return self.index[rev][1]
645 649
646 650 def rawsize(self, rev):
647 651 """return the length of the uncompressed text for a given revision"""
648 652 l = self.index[rev][2]
649 653 if l >= 0:
650 654 return l
651 655
652 656 t = self.revision(rev, raw=True)
653 657 return len(t)
654 658
655 659 def size(self, rev):
656 660 """length of non-raw text (processed by a "read" flag processor)"""
657 661 # fast path: if no "read" flag processor could change the content,
658 662 # size is rawsize. note: ELLIPSIS is known to not change the content.
659 663 flags = self.flags(rev)
660 664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
661 665 return self.rawsize(rev)
662 666
663 667 return len(self.revision(rev, raw=False))
664 668
665 669 def chainbase(self, rev):
666 670 base = self._chainbasecache.get(rev)
667 671 if base is not None:
668 672 return base
669 673
670 674 index = self.index
671 675 iterrev = rev
672 676 base = index[iterrev][3]
673 677 while base != iterrev:
674 678 iterrev = base
675 679 base = index[iterrev][3]
676 680
677 681 self._chainbasecache[rev] = base
678 682 return base
679 683
680 684 def linkrev(self, rev):
681 685 return self.index[rev][4]
682 686
683 687 def parentrevs(self, rev):
684 688 try:
685 689 entry = self.index[rev]
686 690 except IndexError:
687 691 if rev == wdirrev:
688 692 raise error.WdirUnsupported
689 693 raise
690 694
691 695 return entry[5], entry[6]
692 696
693 697 # fast parentrevs(rev) where rev isn't filtered
694 698 _uncheckedparentrevs = parentrevs
695 699
696 700 def node(self, rev):
697 701 try:
698 702 return self.index[rev][7]
699 703 except IndexError:
700 704 if rev == wdirrev:
701 705 raise error.WdirUnsupported
702 706 raise
703 707
704 708 # Derived from index values.
705 709
706 710 def end(self, rev):
707 711 return self.start(rev) + self.length(rev)
708 712
709 713 def parents(self, node):
710 714 i = self.index
711 715 d = i[self.rev(node)]
712 716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
713 717
714 718 def chainlen(self, rev):
715 719 return self._chaininfo(rev)[0]
716 720
717 721 def _chaininfo(self, rev):
718 722 chaininfocache = self._chaininfocache
719 723 if rev in chaininfocache:
720 724 return chaininfocache[rev]
721 725 index = self.index
722 726 generaldelta = self._generaldelta
723 727 iterrev = rev
724 728 e = index[iterrev]
725 729 clen = 0
726 730 compresseddeltalen = 0
727 731 while iterrev != e[3]:
728 732 clen += 1
729 733 compresseddeltalen += e[1]
730 734 if generaldelta:
731 735 iterrev = e[3]
732 736 else:
733 737 iterrev -= 1
734 738 if iterrev in chaininfocache:
735 739 t = chaininfocache[iterrev]
736 740 clen += t[0]
737 741 compresseddeltalen += t[1]
738 742 break
739 743 e = index[iterrev]
740 744 else:
741 745 # Add text length of base since decompressing that also takes
742 746 # work. For cache hits the length is already included.
743 747 compresseddeltalen += e[1]
744 748 r = (clen, compresseddeltalen)
745 749 chaininfocache[rev] = r
746 750 return r
747 751
748 752 def _deltachain(self, rev, stoprev=None):
749 753 """Obtain the delta chain for a revision.
750 754
751 755 ``stoprev`` specifies a revision to stop at. If not specified, we
752 756 stop at the base of the chain.
753 757
754 758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
755 759 revs in ascending order and ``stopped`` is a bool indicating whether
756 760 ``stoprev`` was hit.
757 761 """
758 762 # Try C implementation.
759 763 try:
760 764 return self.index.deltachain(rev, stoprev, self._generaldelta)
761 765 except AttributeError:
762 766 pass
763 767
764 768 chain = []
765 769
766 770 # Alias to prevent attribute lookup in tight loop.
767 771 index = self.index
768 772 generaldelta = self._generaldelta
769 773
770 774 iterrev = rev
771 775 e = index[iterrev]
772 776 while iterrev != e[3] and iterrev != stoprev:
773 777 chain.append(iterrev)
774 778 if generaldelta:
775 779 iterrev = e[3]
776 780 else:
777 781 iterrev -= 1
778 782 e = index[iterrev]
779 783
780 784 if iterrev == stoprev:
781 785 stopped = True
782 786 else:
783 787 chain.append(iterrev)
784 788 stopped = False
785 789
786 790 chain.reverse()
787 791 return chain, stopped
788 792
789 793 def ancestors(self, revs, stoprev=0, inclusive=False):
790 794 """Generate the ancestors of 'revs' in reverse revision order.
791 795 Does not generate revs lower than stoprev.
792 796
793 797 See the documentation for ancestor.lazyancestors for more details."""
794 798
795 799 # first, make sure start revisions aren't filtered
796 800 revs = list(revs)
797 801 checkrev = self.node
798 802 for r in revs:
799 803 checkrev(r)
800 804 # and we're sure ancestors aren't filtered as well
801 805
802 806 if rustext is not None:
803 807 lazyancestors = rustext.ancestor.LazyAncestors
804 808 arg = self.index
805 809 elif util.safehasattr(parsers, 'rustlazyancestors'):
806 810 lazyancestors = ancestor.rustlazyancestors
807 811 arg = self.index
808 812 else:
809 813 lazyancestors = ancestor.lazyancestors
810 814 arg = self._uncheckedparentrevs
811 815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
812 816
813 817 def descendants(self, revs):
814 818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
815 819
816 820 def findcommonmissing(self, common=None, heads=None):
817 821 """Return a tuple of the ancestors of common and the ancestors of heads
818 822 that are not ancestors of common. In revset terminology, we return the
819 823 tuple:
820 824
821 825 ::common, (::heads) - (::common)
822 826
823 827 The list is sorted by revision number, meaning it is
824 828 topologically sorted.
825 829
826 830 'heads' and 'common' are both lists of node IDs. If heads is
827 831 not supplied, uses all of the revlog's heads. If common is not
828 832 supplied, uses nullid."""
829 833 if common is None:
830 834 common = [nullid]
831 835 if heads is None:
832 836 heads = self.heads()
833 837
834 838 common = [self.rev(n) for n in common]
835 839 heads = [self.rev(n) for n in heads]
836 840
837 841 # we want the ancestors, but inclusive
838 842 class lazyset(object):
839 843 def __init__(self, lazyvalues):
840 844 self.addedvalues = set()
841 845 self.lazyvalues = lazyvalues
842 846
843 847 def __contains__(self, value):
844 848 return value in self.addedvalues or value in self.lazyvalues
845 849
846 850 def __iter__(self):
847 851 added = self.addedvalues
848 852 for r in added:
849 853 yield r
850 854 for r in self.lazyvalues:
851 855 if not r in added:
852 856 yield r
853 857
854 858 def add(self, value):
855 859 self.addedvalues.add(value)
856 860
857 861 def update(self, values):
858 862 self.addedvalues.update(values)
859 863
860 864 has = lazyset(self.ancestors(common))
861 865 has.add(nullrev)
862 866 has.update(common)
863 867
864 868 # take all ancestors from heads that aren't in has
865 869 missing = set()
866 870 visit = collections.deque(r for r in heads if r not in has)
867 871 while visit:
868 872 r = visit.popleft()
869 873 if r in missing:
870 874 continue
871 875 else:
872 876 missing.add(r)
873 877 for p in self.parentrevs(r):
874 878 if p not in has:
875 879 visit.append(p)
876 880 missing = list(missing)
877 881 missing.sort()
878 882 return has, [self.node(miss) for miss in missing]
879 883
880 884 def incrementalmissingrevs(self, common=None):
881 885 """Return an object that can be used to incrementally compute the
882 886 revision numbers of the ancestors of arbitrary sets that are not
883 887 ancestors of common. This is an ancestor.incrementalmissingancestors
884 888 object.
885 889
886 890 'common' is a list of revision numbers. If common is not supplied, uses
887 891 nullrev.
888 892 """
889 893 if common is None:
890 894 common = [nullrev]
891 895
892 896 return ancestor.incrementalmissingancestors(self.parentrevs, common)
893 897
894 898 def findmissingrevs(self, common=None, heads=None):
895 899 """Return the revision numbers of the ancestors of heads that
896 900 are not ancestors of common.
897 901
898 902 More specifically, return a list of revision numbers corresponding to
899 903 nodes N such that every N satisfies the following constraints:
900 904
901 905 1. N is an ancestor of some node in 'heads'
902 906 2. N is not an ancestor of any node in 'common'
903 907
904 908 The list is sorted by revision number, meaning it is
905 909 topologically sorted.
906 910
907 911 'heads' and 'common' are both lists of revision numbers. If heads is
908 912 not supplied, uses all of the revlog's heads. If common is not
909 913 supplied, uses nullid."""
910 914 if common is None:
911 915 common = [nullrev]
912 916 if heads is None:
913 917 heads = self.headrevs()
914 918
915 919 inc = self.incrementalmissingrevs(common=common)
916 920 return inc.missingancestors(heads)
917 921
918 922 def findmissing(self, common=None, heads=None):
919 923 """Return the ancestors of heads that are not ancestors of common.
920 924
921 925 More specifically, return a list of nodes N such that every N
922 926 satisfies the following constraints:
923 927
924 928 1. N is an ancestor of some node in 'heads'
925 929 2. N is not an ancestor of any node in 'common'
926 930
927 931 The list is sorted by revision number, meaning it is
928 932 topologically sorted.
929 933
930 934 'heads' and 'common' are both lists of node IDs. If heads is
931 935 not supplied, uses all of the revlog's heads. If common is not
932 936 supplied, uses nullid."""
933 937 if common is None:
934 938 common = [nullid]
935 939 if heads is None:
936 940 heads = self.heads()
937 941
938 942 common = [self.rev(n) for n in common]
939 943 heads = [self.rev(n) for n in heads]
940 944
941 945 inc = self.incrementalmissingrevs(common=common)
942 946 return [self.node(r) for r in inc.missingancestors(heads)]
943 947
944 948 def nodesbetween(self, roots=None, heads=None):
945 949 """Return a topological path from 'roots' to 'heads'.
946 950
947 951 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
948 952 topologically sorted list of all nodes N that satisfy both of
949 953 these constraints:
950 954
951 955 1. N is a descendant of some node in 'roots'
952 956 2. N is an ancestor of some node in 'heads'
953 957
954 958 Every node is considered to be both a descendant and an ancestor
955 959 of itself, so every reachable node in 'roots' and 'heads' will be
956 960 included in 'nodes'.
957 961
958 962 'outroots' is the list of reachable nodes in 'roots', i.e., the
959 963 subset of 'roots' that is returned in 'nodes'. Likewise,
960 964 'outheads' is the subset of 'heads' that is also in 'nodes'.
961 965
962 966 'roots' and 'heads' are both lists of node IDs. If 'roots' is
963 967 unspecified, uses nullid as the only root. If 'heads' is
964 968 unspecified, uses list of all of the revlog's heads."""
965 969 nonodes = ([], [], [])
966 970 if roots is not None:
967 971 roots = list(roots)
968 972 if not roots:
969 973 return nonodes
970 974 lowestrev = min([self.rev(n) for n in roots])
971 975 else:
972 976 roots = [nullid] # Everybody's a descendant of nullid
973 977 lowestrev = nullrev
974 978 if (lowestrev == nullrev) and (heads is None):
975 979 # We want _all_ the nodes!
976 980 return ([self.node(r) for r in self], [nullid], list(self.heads()))
977 981 if heads is None:
978 982 # All nodes are ancestors, so the latest ancestor is the last
979 983 # node.
980 984 highestrev = len(self) - 1
981 985 # Set ancestors to None to signal that every node is an ancestor.
982 986 ancestors = None
983 987 # Set heads to an empty dictionary for later discovery of heads
984 988 heads = {}
985 989 else:
986 990 heads = list(heads)
987 991 if not heads:
988 992 return nonodes
989 993 ancestors = set()
990 994 # Turn heads into a dictionary so we can remove 'fake' heads.
991 995 # Also, later we will be using it to filter out the heads we can't
992 996 # find from roots.
993 997 heads = dict.fromkeys(heads, False)
994 998 # Start at the top and keep marking parents until we're done.
995 999 nodestotag = set(heads)
996 1000 # Remember where the top was so we can use it as a limit later.
997 1001 highestrev = max([self.rev(n) for n in nodestotag])
998 1002 while nodestotag:
999 1003 # grab a node to tag
1000 1004 n = nodestotag.pop()
1001 1005 # Never tag nullid
1002 1006 if n == nullid:
1003 1007 continue
1004 1008 # A node's revision number represents its place in a
1005 1009 # topologically sorted list of nodes.
1006 1010 r = self.rev(n)
1007 1011 if r >= lowestrev:
1008 1012 if n not in ancestors:
1009 1013 # If we are possibly a descendant of one of the roots
1010 1014 # and we haven't already been marked as an ancestor
1011 1015 ancestors.add(n) # Mark as ancestor
1012 1016 # Add non-nullid parents to list of nodes to tag.
1013 1017 nodestotag.update([p for p in self.parents(n) if
1014 1018 p != nullid])
1015 1019 elif n in heads: # We've seen it before, is it a fake head?
1016 1020 # So it is, real heads should not be the ancestors of
1017 1021 # any other heads.
1018 1022 heads.pop(n)
1019 1023 if not ancestors:
1020 1024 return nonodes
1021 1025 # Now that we have our set of ancestors, we want to remove any
1022 1026 # roots that are not ancestors.
1023 1027
1024 1028 # If one of the roots was nullid, everything is included anyway.
1025 1029 if lowestrev > nullrev:
1026 1030 # But, since we weren't, let's recompute the lowest rev to not
1027 1031 # include roots that aren't ancestors.
1028 1032
1029 1033 # Filter out roots that aren't ancestors of heads
1030 1034 roots = [root for root in roots if root in ancestors]
1031 1035 # Recompute the lowest revision
1032 1036 if roots:
1033 1037 lowestrev = min([self.rev(root) for root in roots])
1034 1038 else:
1035 1039 # No more roots? Return empty list
1036 1040 return nonodes
1037 1041 else:
1038 1042 # We are descending from nullid, and don't need to care about
1039 1043 # any other roots.
1040 1044 lowestrev = nullrev
1041 1045 roots = [nullid]
1042 1046 # Transform our roots list into a set.
1043 1047 descendants = set(roots)
1044 1048 # Also, keep the original roots so we can filter out roots that aren't
1045 1049 # 'real' roots (i.e. are descended from other roots).
1046 1050 roots = descendants.copy()
1047 1051 # Our topologically sorted list of output nodes.
1048 1052 orderedout = []
1049 1053 # Don't start at nullid since we don't want nullid in our output list,
1050 1054 # and if nullid shows up in descendants, empty parents will look like
1051 1055 # they're descendants.
1052 1056 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1053 1057 n = self.node(r)
1054 1058 isdescendant = False
1055 1059 if lowestrev == nullrev: # Everybody is a descendant of nullid
1056 1060 isdescendant = True
1057 1061 elif n in descendants:
1058 1062 # n is already a descendant
1059 1063 isdescendant = True
1060 1064 # This check only needs to be done here because all the roots
1061 1065 # will start being marked is descendants before the loop.
1062 1066 if n in roots:
1063 1067 # If n was a root, check if it's a 'real' root.
1064 1068 p = tuple(self.parents(n))
1065 1069 # If any of its parents are descendants, it's not a root.
1066 1070 if (p[0] in descendants) or (p[1] in descendants):
1067 1071 roots.remove(n)
1068 1072 else:
1069 1073 p = tuple(self.parents(n))
1070 1074 # A node is a descendant if either of its parents are
1071 1075 # descendants. (We seeded the dependents list with the roots
1072 1076 # up there, remember?)
1073 1077 if (p[0] in descendants) or (p[1] in descendants):
1074 1078 descendants.add(n)
1075 1079 isdescendant = True
1076 1080 if isdescendant and ((ancestors is None) or (n in ancestors)):
1077 1081 # Only include nodes that are both descendants and ancestors.
1078 1082 orderedout.append(n)
1079 1083 if (ancestors is not None) and (n in heads):
1080 1084 # We're trying to figure out which heads are reachable
1081 1085 # from roots.
1082 1086 # Mark this head as having been reached
1083 1087 heads[n] = True
1084 1088 elif ancestors is None:
1085 1089 # Otherwise, we're trying to discover the heads.
1086 1090 # Assume this is a head because if it isn't, the next step
1087 1091 # will eventually remove it.
1088 1092 heads[n] = True
1089 1093 # But, obviously its parents aren't.
1090 1094 for p in self.parents(n):
1091 1095 heads.pop(p, None)
1092 1096 heads = [head for head, flag in heads.iteritems() if flag]
1093 1097 roots = list(roots)
1094 1098 assert orderedout
1095 1099 assert roots
1096 1100 assert heads
1097 1101 return (orderedout, roots, heads)
1098 1102
1099 1103 def headrevs(self):
1100 1104 try:
1101 1105 return self.index.headrevs()
1102 1106 except AttributeError:
1103 1107 return self._headrevs()
1104 1108
1105 1109 def computephases(self, roots):
1106 1110 return self.index.computephasesmapsets(roots)
1107 1111
1108 1112 def _headrevs(self):
1109 1113 count = len(self)
1110 1114 if not count:
1111 1115 return [nullrev]
1112 1116 # we won't iter over filtered rev so nobody is a head at start
1113 1117 ishead = [0] * (count + 1)
1114 1118 index = self.index
1115 1119 for r in self:
1116 1120 ishead[r] = 1 # I may be an head
1117 1121 e = index[r]
1118 1122 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1119 1123 return [r for r, val in enumerate(ishead) if val]
1120 1124
1121 1125 def heads(self, start=None, stop=None):
1122 1126 """return the list of all nodes that have no children
1123 1127
1124 1128 if start is specified, only heads that are descendants of
1125 1129 start will be returned
1126 1130 if stop is specified, it will consider all the revs from stop
1127 1131 as if they had no children
1128 1132 """
1129 1133 if start is None and stop is None:
1130 1134 if not len(self):
1131 1135 return [nullid]
1132 1136 return [self.node(r) for r in self.headrevs()]
1133 1137
1134 1138 if start is None:
1135 1139 start = nullrev
1136 1140 else:
1137 1141 start = self.rev(start)
1138 1142
1139 1143 stoprevs = set(self.rev(n) for n in stop or [])
1140 1144
1141 1145 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1142 1146 stoprevs=stoprevs)
1143 1147
1144 1148 return [self.node(rev) for rev in revs]
1145 1149
1146 1150 def children(self, node):
1147 1151 """find the children of a given node"""
1148 1152 c = []
1149 1153 p = self.rev(node)
1150 1154 for r in self.revs(start=p + 1):
1151 1155 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1152 1156 if prevs:
1153 1157 for pr in prevs:
1154 1158 if pr == p:
1155 1159 c.append(self.node(r))
1156 1160 elif p == nullrev:
1157 1161 c.append(self.node(r))
1158 1162 return c
1159 1163
1160 1164 def commonancestorsheads(self, a, b):
1161 1165 """calculate all the heads of the common ancestors of nodes a and b"""
1162 1166 a, b = self.rev(a), self.rev(b)
1163 1167 ancs = self._commonancestorsheads(a, b)
1164 1168 return pycompat.maplist(self.node, ancs)
1165 1169
1166 1170 def _commonancestorsheads(self, *revs):
1167 1171 """calculate all the heads of the common ancestors of revs"""
1168 1172 try:
1169 1173 ancs = self.index.commonancestorsheads(*revs)
1170 1174 except (AttributeError, OverflowError): # C implementation failed
1171 1175 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1172 1176 return ancs
1173 1177
1174 1178 def isancestor(self, a, b):
1175 1179 """return True if node a is an ancestor of node b
1176 1180
1177 1181 A revision is considered an ancestor of itself."""
1178 1182 a, b = self.rev(a), self.rev(b)
1179 1183 return self.isancestorrev(a, b)
1180 1184
1181 1185 def isancestorrev(self, a, b):
1182 1186 """return True if revision a is an ancestor of revision b
1183 1187
1184 1188 A revision is considered an ancestor of itself.
1185 1189
1186 1190 The implementation of this is trivial but the use of
1187 1191 commonancestorsheads is not."""
1188 1192 if a == nullrev:
1189 1193 return True
1190 1194 elif a == b:
1191 1195 return True
1192 1196 elif a > b:
1193 1197 return False
1194 1198 return a in self._commonancestorsheads(a, b)
1195 1199
1196 1200 def ancestor(self, a, b):
1197 1201 """calculate the "best" common ancestor of nodes a and b"""
1198 1202
1199 1203 a, b = self.rev(a), self.rev(b)
1200 1204 try:
1201 1205 ancs = self.index.ancestors(a, b)
1202 1206 except (AttributeError, OverflowError):
1203 1207 ancs = ancestor.ancestors(self.parentrevs, a, b)
1204 1208 if ancs:
1205 1209 # choose a consistent winner when there's a tie
1206 1210 return min(map(self.node, ancs))
1207 1211 return nullid
1208 1212
1209 1213 def _match(self, id):
1210 1214 if isinstance(id, int):
1211 1215 # rev
1212 1216 return self.node(id)
1213 1217 if len(id) == 20:
1214 1218 # possibly a binary node
1215 1219 # odds of a binary node being all hex in ASCII are 1 in 10**25
1216 1220 try:
1217 1221 node = id
1218 1222 self.rev(node) # quick search the index
1219 1223 return node
1220 1224 except error.LookupError:
1221 1225 pass # may be partial hex id
1222 1226 try:
1223 1227 # str(rev)
1224 1228 rev = int(id)
1225 1229 if "%d" % rev != id:
1226 1230 raise ValueError
1227 1231 if rev < 0:
1228 1232 rev = len(self) + rev
1229 1233 if rev < 0 or rev >= len(self):
1230 1234 raise ValueError
1231 1235 return self.node(rev)
1232 1236 except (ValueError, OverflowError):
1233 1237 pass
1234 1238 if len(id) == 40:
1235 1239 try:
1236 1240 # a full hex nodeid?
1237 1241 node = bin(id)
1238 1242 self.rev(node)
1239 1243 return node
1240 1244 except (TypeError, error.LookupError):
1241 1245 pass
1242 1246
1243 1247 def _partialmatch(self, id):
1244 1248 # we don't care wdirfilenodeids as they should be always full hash
1245 1249 maybewdir = wdirhex.startswith(id)
1246 1250 try:
1247 1251 partial = self.index.partialmatch(id)
1248 1252 if partial and self.hasnode(partial):
1249 1253 if maybewdir:
1250 1254 # single 'ff...' match in radix tree, ambiguous with wdir
1251 1255 raise error.RevlogError
1252 1256 return partial
1253 1257 if maybewdir:
1254 1258 # no 'ff...' match in radix tree, wdir identified
1255 1259 raise error.WdirUnsupported
1256 1260 return None
1257 1261 except error.RevlogError:
1258 1262 # parsers.c radix tree lookup gave multiple matches
1259 1263 # fast path: for unfiltered changelog, radix tree is accurate
1260 1264 if not getattr(self, 'filteredrevs', None):
1261 1265 raise error.AmbiguousPrefixLookupError(
1262 1266 id, self.indexfile, _('ambiguous identifier'))
1263 1267 # fall through to slow path that filters hidden revisions
1264 1268 except (AttributeError, ValueError):
1265 1269 # we are pure python, or key was too short to search radix tree
1266 1270 pass
1267 1271
1268 1272 if id in self._pcache:
1269 1273 return self._pcache[id]
1270 1274
1271 1275 if len(id) <= 40:
1272 1276 try:
1273 1277 # hex(node)[:...]
1274 1278 l = len(id) // 2 # grab an even number of digits
1275 1279 prefix = bin(id[:l * 2])
1276 1280 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1277 1281 nl = [n for n in nl if hex(n).startswith(id) and
1278 1282 self.hasnode(n)]
1279 1283 if nullhex.startswith(id):
1280 1284 nl.append(nullid)
1281 1285 if len(nl) > 0:
1282 1286 if len(nl) == 1 and not maybewdir:
1283 1287 self._pcache[id] = nl[0]
1284 1288 return nl[0]
1285 1289 raise error.AmbiguousPrefixLookupError(
1286 1290 id, self.indexfile, _('ambiguous identifier'))
1287 1291 if maybewdir:
1288 1292 raise error.WdirUnsupported
1289 1293 return None
1290 1294 except TypeError:
1291 1295 pass
1292 1296
1293 1297 def lookup(self, id):
1294 1298 """locate a node based on:
1295 1299 - revision number or str(revision number)
1296 1300 - nodeid or subset of hex nodeid
1297 1301 """
1298 1302 n = self._match(id)
1299 1303 if n is not None:
1300 1304 return n
1301 1305 n = self._partialmatch(id)
1302 1306 if n:
1303 1307 return n
1304 1308
1305 1309 raise error.LookupError(id, self.indexfile, _('no match found'))
1306 1310
1307 1311 def shortest(self, node, minlength=1):
1308 1312 """Find the shortest unambiguous prefix that matches node."""
1309 1313 def isvalid(prefix):
1310 1314 try:
1311 1315 node = self._partialmatch(prefix)
1312 1316 except error.AmbiguousPrefixLookupError:
1313 1317 return False
1314 1318 except error.WdirUnsupported:
1315 1319 # single 'ff...' match
1316 1320 return True
1317 1321 if node is None:
1318 1322 raise error.LookupError(node, self.indexfile, _('no node'))
1319 1323 return True
1320 1324
1321 1325 def maybewdir(prefix):
1322 1326 return all(c == 'f' for c in prefix)
1323 1327
1324 1328 hexnode = hex(node)
1325 1329
1326 1330 def disambiguate(hexnode, minlength):
1327 1331 """Disambiguate against wdirid."""
1328 1332 for length in range(minlength, 41):
1329 1333 prefix = hexnode[:length]
1330 1334 if not maybewdir(prefix):
1331 1335 return prefix
1332 1336
1333 1337 if not getattr(self, 'filteredrevs', None):
1334 1338 try:
1335 1339 length = max(self.index.shortest(node), minlength)
1336 1340 return disambiguate(hexnode, length)
1337 1341 except error.RevlogError:
1338 1342 if node != wdirid:
1339 1343 raise error.LookupError(node, self.indexfile, _('no node'))
1340 1344 except AttributeError:
1341 1345 # Fall through to pure code
1342 1346 pass
1343 1347
1344 1348 if node == wdirid:
1345 1349 for length in range(minlength, 41):
1346 1350 prefix = hexnode[:length]
1347 1351 if isvalid(prefix):
1348 1352 return prefix
1349 1353
1350 1354 for length in range(minlength, 41):
1351 1355 prefix = hexnode[:length]
1352 1356 if isvalid(prefix):
1353 1357 return disambiguate(hexnode, length)
1354 1358
1355 1359 def cmp(self, node, text):
1356 1360 """compare text with a given file revision
1357 1361
1358 1362 returns True if text is different than what is stored.
1359 1363 """
1360 1364 p1, p2 = self.parents(node)
1361 1365 return storageutil.hashrevisionsha1(text, p1, p2) != node
1362 1366
1363 1367 def _cachesegment(self, offset, data):
1364 1368 """Add a segment to the revlog cache.
1365 1369
1366 1370 Accepts an absolute offset and the data that is at that location.
1367 1371 """
1368 1372 o, d = self._chunkcache
1369 1373 # try to add to existing cache
1370 1374 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1371 1375 self._chunkcache = o, d + data
1372 1376 else:
1373 1377 self._chunkcache = offset, data
1374 1378
1375 1379 def _readsegment(self, offset, length, df=None):
1376 1380 """Load a segment of raw data from the revlog.
1377 1381
1378 1382 Accepts an absolute offset, length to read, and an optional existing
1379 1383 file handle to read from.
1380 1384
1381 1385 If an existing file handle is passed, it will be seeked and the
1382 1386 original seek position will NOT be restored.
1383 1387
1384 1388 Returns a str or buffer of raw byte data.
1385 1389
1386 1390 Raises if the requested number of bytes could not be read.
1387 1391 """
1388 1392 # Cache data both forward and backward around the requested
1389 1393 # data, in a fixed size window. This helps speed up operations
1390 1394 # involving reading the revlog backwards.
1391 1395 cachesize = self._chunkcachesize
1392 1396 realoffset = offset & ~(cachesize - 1)
1393 1397 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1394 1398 - realoffset)
1395 1399 with self._datareadfp(df) as df:
1396 1400 df.seek(realoffset)
1397 1401 d = df.read(reallength)
1398 1402
1399 1403 self._cachesegment(realoffset, d)
1400 1404 if offset != realoffset or reallength != length:
1401 1405 startoffset = offset - realoffset
1402 1406 if len(d) - startoffset < length:
1403 1407 raise error.RevlogError(
1404 1408 _('partial read of revlog %s; expected %d bytes from '
1405 1409 'offset %d, got %d') %
1406 1410 (self.indexfile if self._inline else self.datafile,
1407 1411 length, realoffset, len(d) - startoffset))
1408 1412
1409 1413 return util.buffer(d, startoffset, length)
1410 1414
1411 1415 if len(d) < length:
1412 1416 raise error.RevlogError(
1413 1417 _('partial read of revlog %s; expected %d bytes from offset '
1414 1418 '%d, got %d') %
1415 1419 (self.indexfile if self._inline else self.datafile,
1416 1420 length, offset, len(d)))
1417 1421
1418 1422 return d
1419 1423
1420 1424 def _getsegment(self, offset, length, df=None):
1421 1425 """Obtain a segment of raw data from the revlog.
1422 1426
1423 1427 Accepts an absolute offset, length of bytes to obtain, and an
1424 1428 optional file handle to the already-opened revlog. If the file
1425 1429 handle is used, it's original seek position will not be preserved.
1426 1430
1427 1431 Requests for data may be returned from a cache.
1428 1432
1429 1433 Returns a str or a buffer instance of raw byte data.
1430 1434 """
1431 1435 o, d = self._chunkcache
1432 1436 l = len(d)
1433 1437
1434 1438 # is it in the cache?
1435 1439 cachestart = offset - o
1436 1440 cacheend = cachestart + length
1437 1441 if cachestart >= 0 and cacheend <= l:
1438 1442 if cachestart == 0 and cacheend == l:
1439 1443 return d # avoid a copy
1440 1444 return util.buffer(d, cachestart, cacheend - cachestart)
1441 1445
1442 1446 return self._readsegment(offset, length, df=df)
1443 1447
1444 1448 def _getsegmentforrevs(self, startrev, endrev, df=None):
1445 1449 """Obtain a segment of raw data corresponding to a range of revisions.
1446 1450
1447 1451 Accepts the start and end revisions and an optional already-open
1448 1452 file handle to be used for reading. If the file handle is read, its
1449 1453 seek position will not be preserved.
1450 1454
1451 1455 Requests for data may be satisfied by a cache.
1452 1456
1453 1457 Returns a 2-tuple of (offset, data) for the requested range of
1454 1458 revisions. Offset is the integer offset from the beginning of the
1455 1459 revlog and data is a str or buffer of the raw byte data.
1456 1460
1457 1461 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1458 1462 to determine where each revision's data begins and ends.
1459 1463 """
1460 1464 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1461 1465 # (functions are expensive).
1462 1466 index = self.index
1463 1467 istart = index[startrev]
1464 1468 start = int(istart[0] >> 16)
1465 1469 if startrev == endrev:
1466 1470 end = start + istart[1]
1467 1471 else:
1468 1472 iend = index[endrev]
1469 1473 end = int(iend[0] >> 16) + iend[1]
1470 1474
1471 1475 if self._inline:
1472 1476 start += (startrev + 1) * self._io.size
1473 1477 end += (endrev + 1) * self._io.size
1474 1478 length = end - start
1475 1479
1476 1480 return start, self._getsegment(start, length, df=df)
1477 1481
1478 1482 def _chunk(self, rev, df=None):
1479 1483 """Obtain a single decompressed chunk for a revision.
1480 1484
1481 1485 Accepts an integer revision and an optional already-open file handle
1482 1486 to be used for reading. If used, the seek position of the file will not
1483 1487 be preserved.
1484 1488
1485 1489 Returns a str holding uncompressed data for the requested revision.
1486 1490 """
1487 1491 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1488 1492
1489 1493 def _chunks(self, revs, df=None, targetsize=None):
1490 1494 """Obtain decompressed chunks for the specified revisions.
1491 1495
1492 1496 Accepts an iterable of numeric revisions that are assumed to be in
1493 1497 ascending order. Also accepts an optional already-open file handle
1494 1498 to be used for reading. If used, the seek position of the file will
1495 1499 not be preserved.
1496 1500
1497 1501 This function is similar to calling ``self._chunk()`` multiple times,
1498 1502 but is faster.
1499 1503
1500 1504 Returns a list with decompressed data for each requested revision.
1501 1505 """
1502 1506 if not revs:
1503 1507 return []
1504 1508 start = self.start
1505 1509 length = self.length
1506 1510 inline = self._inline
1507 1511 iosize = self._io.size
1508 1512 buffer = util.buffer
1509 1513
1510 1514 l = []
1511 1515 ladd = l.append
1512 1516
1513 1517 if not self._withsparseread:
1514 1518 slicedchunks = (revs,)
1515 1519 else:
1516 1520 slicedchunks = deltautil.slicechunk(self, revs,
1517 1521 targetsize=targetsize)
1518 1522
1519 1523 for revschunk in slicedchunks:
1520 1524 firstrev = revschunk[0]
1521 1525 # Skip trailing revisions with empty diff
1522 1526 for lastrev in revschunk[::-1]:
1523 1527 if length(lastrev) != 0:
1524 1528 break
1525 1529
1526 1530 try:
1527 1531 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1528 1532 except OverflowError:
1529 1533 # issue4215 - we can't cache a run of chunks greater than
1530 1534 # 2G on Windows
1531 1535 return [self._chunk(rev, df=df) for rev in revschunk]
1532 1536
1533 1537 decomp = self.decompress
1534 1538 for rev in revschunk:
1535 1539 chunkstart = start(rev)
1536 1540 if inline:
1537 1541 chunkstart += (rev + 1) * iosize
1538 1542 chunklength = length(rev)
1539 1543 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1540 1544
1541 1545 return l
1542 1546
1543 1547 def _chunkclear(self):
1544 1548 """Clear the raw chunk cache."""
1545 1549 self._chunkcache = (0, '')
1546 1550
1547 1551 def deltaparent(self, rev):
1548 1552 """return deltaparent of the given revision"""
1549 1553 base = self.index[rev][3]
1550 1554 if base == rev:
1551 1555 return nullrev
1552 1556 elif self._generaldelta:
1553 1557 return base
1554 1558 else:
1555 1559 return rev - 1
1556 1560
1557 1561 def issnapshot(self, rev):
1558 1562 """tells whether rev is a snapshot
1559 1563 """
1560 1564 if not self._sparserevlog:
1561 1565 return self.deltaparent(rev) == nullrev
1562 1566 elif util.safehasattr(self.index, 'issnapshot'):
1563 1567 # directly assign the method to cache the testing and access
1564 1568 self.issnapshot = self.index.issnapshot
1565 1569 return self.issnapshot(rev)
1566 1570 if rev == nullrev:
1567 1571 return True
1568 1572 entry = self.index[rev]
1569 1573 base = entry[3]
1570 1574 if base == rev:
1571 1575 return True
1572 1576 if base == nullrev:
1573 1577 return True
1574 1578 p1 = entry[5]
1575 1579 p2 = entry[6]
1576 1580 if base == p1 or base == p2:
1577 1581 return False
1578 1582 return self.issnapshot(base)
1579 1583
1580 1584 def snapshotdepth(self, rev):
1581 1585 """number of snapshot in the chain before this one"""
1582 1586 if not self.issnapshot(rev):
1583 1587 raise error.ProgrammingError('revision %d not a snapshot')
1584 1588 return len(self._deltachain(rev)[0]) - 1
1585 1589
1586 1590 def revdiff(self, rev1, rev2):
1587 1591 """return or calculate a delta between two revisions
1588 1592
1589 1593 The delta calculated is in binary form and is intended to be written to
1590 1594 revlog data directly. So this function needs raw revision data.
1591 1595 """
1592 1596 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1593 1597 return bytes(self._chunk(rev2))
1594 1598
1595 1599 return mdiff.textdiff(self.revision(rev1, raw=True),
1596 1600 self.revision(rev2, raw=True))
1597 1601
1598 1602 def revision(self, nodeorrev, _df=None, raw=False):
1599 1603 """return an uncompressed revision of a given node or revision
1600 1604 number.
1601 1605
1602 1606 _df - an existing file handle to read from. (internal-only)
1603 1607 raw - an optional argument specifying if the revision data is to be
1604 1608 treated as raw data when applying flag transforms. 'raw' should be set
1605 1609 to True when generating changegroups or in debug commands.
1606 1610 """
1607 1611 if isinstance(nodeorrev, int):
1608 1612 rev = nodeorrev
1609 1613 node = self.node(rev)
1610 1614 else:
1611 1615 node = nodeorrev
1612 1616 rev = None
1613 1617
1614 1618 cachedrev = None
1615 1619 flags = None
1616 1620 rawtext = None
1617 1621 if node == nullid:
1618 1622 return ""
1619 1623 if self._revisioncache:
1620 1624 if self._revisioncache[0] == node:
1621 1625 # _cache only stores rawtext
1622 1626 if raw:
1623 1627 return self._revisioncache[2]
1624 1628 # duplicated, but good for perf
1625 1629 if rev is None:
1626 1630 rev = self.rev(node)
1627 1631 if flags is None:
1628 1632 flags = self.flags(rev)
1629 1633 # no extra flags set, no flag processor runs, text = rawtext
1630 1634 if flags == REVIDX_DEFAULT_FLAGS:
1631 1635 return self._revisioncache[2]
1632 1636 # rawtext is reusable. need to run flag processor
1633 1637 rawtext = self._revisioncache[2]
1634 1638
1635 1639 cachedrev = self._revisioncache[1]
1636 1640
1637 1641 # look up what we need to read
1638 1642 if rawtext is None:
1639 1643 if rev is None:
1640 1644 rev = self.rev(node)
1641 1645
1642 1646 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1643 1647 if stopped:
1644 1648 rawtext = self._revisioncache[2]
1645 1649
1646 1650 # drop cache to save memory
1647 1651 self._revisioncache = None
1648 1652
1649 1653 targetsize = None
1650 1654 rawsize = self.index[rev][2]
1651 1655 if 0 <= rawsize:
1652 1656 targetsize = 4 * rawsize
1653 1657
1654 1658 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1655 1659 if rawtext is None:
1656 1660 rawtext = bytes(bins[0])
1657 1661 bins = bins[1:]
1658 1662
1659 1663 rawtext = mdiff.patches(rawtext, bins)
1660 1664 self._revisioncache = (node, rev, rawtext)
1661 1665
1662 1666 if flags is None:
1663 1667 if rev is None:
1664 1668 rev = self.rev(node)
1665 1669 flags = self.flags(rev)
1666 1670
1667 1671 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1668 1672 if validatehash:
1669 1673 self.checkhash(text, node, rev=rev)
1670 1674
1671 1675 return text
1672 1676
1673 1677 def hash(self, text, p1, p2):
1674 1678 """Compute a node hash.
1675 1679
1676 1680 Available as a function so that subclasses can replace the hash
1677 1681 as needed.
1678 1682 """
1679 1683 return storageutil.hashrevisionsha1(text, p1, p2)
1680 1684
1681 1685 def _processflags(self, text, flags, operation, raw=False):
1682 1686 """Inspect revision data flags and applies transforms defined by
1683 1687 registered flag processors.
1684 1688
1685 1689 ``text`` - the revision data to process
1686 1690 ``flags`` - the revision flags
1687 1691 ``operation`` - the operation being performed (read or write)
1688 1692 ``raw`` - an optional argument describing if the raw transform should be
1689 1693 applied.
1690 1694
1691 1695 This method processes the flags in the order (or reverse order if
1692 1696 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1693 1697 flag processors registered for present flags. The order of flags defined
1694 1698 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1695 1699
1696 1700 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1697 1701 processed text and ``validatehash`` is a bool indicating whether the
1698 1702 returned text should be checked for hash integrity.
1699 1703
1700 1704 Note: If the ``raw`` argument is set, it has precedence over the
1701 1705 operation and will only update the value of ``validatehash``.
1702 1706 """
1703 1707 # fast path: no flag processors will run
1704 1708 if flags == 0:
1705 1709 return text, True
1706 1710 if not operation in ('read', 'write'):
1707 1711 raise error.ProgrammingError(_("invalid '%s' operation") %
1708 1712 operation)
1709 1713 # Check all flags are known.
1710 1714 if flags & ~REVIDX_KNOWN_FLAGS:
1711 1715 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1712 1716 (flags & ~REVIDX_KNOWN_FLAGS))
1713 1717 validatehash = True
1714 1718 # Depending on the operation (read or write), the order might be
1715 1719 # reversed due to non-commutative transforms.
1716 1720 orderedflags = REVIDX_FLAGS_ORDER
1717 1721 if operation == 'write':
1718 1722 orderedflags = reversed(orderedflags)
1719 1723
1720 1724 for flag in orderedflags:
1721 1725 # If a flagprocessor has been registered for a known flag, apply the
1722 1726 # related operation transform and update result tuple.
1723 1727 if flag & flags:
1724 1728 vhash = True
1725 1729
1726 1730 if flag not in self._flagprocessors:
1727 1731 message = _("missing processor for flag '%#x'") % (flag)
1728 1732 raise error.RevlogError(message)
1729 1733
1730 1734 processor = self._flagprocessors[flag]
1731 1735 if processor is not None:
1732 1736 readtransform, writetransform, rawtransform = processor
1733 1737
1734 1738 if raw:
1735 1739 vhash = rawtransform(self, text)
1736 1740 elif operation == 'read':
1737 1741 text, vhash = readtransform(self, text)
1738 1742 else: # write operation
1739 1743 text, vhash = writetransform(self, text)
1740 1744 validatehash = validatehash and vhash
1741 1745
1742 1746 return text, validatehash
1743 1747
1744 1748 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1745 1749 """Check node hash integrity.
1746 1750
1747 1751 Available as a function so that subclasses can extend hash mismatch
1748 1752 behaviors as needed.
1749 1753 """
1750 1754 try:
1751 1755 if p1 is None and p2 is None:
1752 1756 p1, p2 = self.parents(node)
1753 1757 if node != self.hash(text, p1, p2):
1754 1758 # Clear the revision cache on hash failure. The revision cache
1755 1759 # only stores the raw revision and clearing the cache does have
1756 1760 # the side-effect that we won't have a cache hit when the raw
1757 1761 # revision data is accessed. But this case should be rare and
1758 1762 # it is extra work to teach the cache about the hash
1759 1763 # verification state.
1760 1764 if self._revisioncache and self._revisioncache[0] == node:
1761 1765 self._revisioncache = None
1762 1766
1763 1767 revornode = rev
1764 1768 if revornode is None:
1765 1769 revornode = templatefilters.short(hex(node))
1766 1770 raise error.RevlogError(_("integrity check failed on %s:%s")
1767 1771 % (self.indexfile, pycompat.bytestr(revornode)))
1768 1772 except error.RevlogError:
1769 1773 if self._censorable and storageutil.iscensoredtext(text):
1770 1774 raise error.CensoredNodeError(self.indexfile, node, text)
1771 1775 raise
1772 1776
1773 1777 def _enforceinlinesize(self, tr, fp=None):
1774 1778 """Check if the revlog is too big for inline and convert if so.
1775 1779
1776 1780 This should be called after revisions are added to the revlog. If the
1777 1781 revlog has grown too large to be an inline revlog, it will convert it
1778 1782 to use multiple index and data files.
1779 1783 """
1780 1784 tiprev = len(self) - 1
1781 1785 if (not self._inline or
1782 1786 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1783 1787 return
1784 1788
1785 1789 trinfo = tr.find(self.indexfile)
1786 1790 if trinfo is None:
1787 1791 raise error.RevlogError(_("%s not found in the transaction")
1788 1792 % self.indexfile)
1789 1793
1790 1794 trindex = trinfo[2]
1791 1795 if trindex is not None:
1792 1796 dataoff = self.start(trindex)
1793 1797 else:
1794 1798 # revlog was stripped at start of transaction, use all leftover data
1795 1799 trindex = len(self) - 1
1796 1800 dataoff = self.end(tiprev)
1797 1801
1798 1802 tr.add(self.datafile, dataoff)
1799 1803
1800 1804 if fp:
1801 1805 fp.flush()
1802 1806 fp.close()
1803 1807 # We can't use the cached file handle after close(). So prevent
1804 1808 # its usage.
1805 1809 self._writinghandles = None
1806 1810
1807 1811 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1808 1812 for r in self:
1809 1813 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1810 1814
1811 1815 with self._indexfp('w') as fp:
1812 1816 self.version &= ~FLAG_INLINE_DATA
1813 1817 self._inline = False
1814 1818 io = self._io
1815 1819 for i in self:
1816 1820 e = io.packentry(self.index[i], self.node, self.version, i)
1817 1821 fp.write(e)
1818 1822
1819 1823 # the temp file replace the real index when we exit the context
1820 1824 # manager
1821 1825
1822 1826 tr.replace(self.indexfile, trindex * self._io.size)
1823 1827 self._chunkclear()
1824 1828
1825 1829 def _nodeduplicatecallback(self, transaction, node):
1826 1830 """called when trying to add a node already stored.
1827 1831 """
1828 1832
1829 1833 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1830 1834 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1831 1835 """add a revision to the log
1832 1836
1833 1837 text - the revision data to add
1834 1838 transaction - the transaction object used for rollback
1835 1839 link - the linkrev data to add
1836 1840 p1, p2 - the parent nodeids of the revision
1837 1841 cachedelta - an optional precomputed delta
1838 1842 node - nodeid of revision; typically node is not specified, and it is
1839 1843 computed by default as hash(text, p1, p2), however subclasses might
1840 1844 use different hashing method (and override checkhash() in such case)
1841 1845 flags - the known flags to set on the revision
1842 1846 deltacomputer - an optional deltacomputer instance shared between
1843 1847 multiple calls
1844 1848 """
1845 1849 if link == nullrev:
1846 1850 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1847 1851 % self.indexfile)
1848 1852
1849 1853 if flags:
1850 1854 node = node or self.hash(text, p1, p2)
1851 1855
1852 1856 rawtext, validatehash = self._processflags(text, flags, 'write')
1853 1857
1854 1858 # If the flag processor modifies the revision data, ignore any provided
1855 1859 # cachedelta.
1856 1860 if rawtext != text:
1857 1861 cachedelta = None
1858 1862
1859 1863 if len(rawtext) > _maxentrysize:
1860 1864 raise error.RevlogError(
1861 1865 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1862 1866 % (self.indexfile, len(rawtext)))
1863 1867
1864 1868 node = node or self.hash(rawtext, p1, p2)
1865 1869 if node in self.nodemap:
1866 1870 return node
1867 1871
1868 1872 if validatehash:
1869 1873 self.checkhash(rawtext, node, p1=p1, p2=p2)
1870 1874
1871 1875 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1872 1876 flags, cachedelta=cachedelta,
1873 1877 deltacomputer=deltacomputer)
1874 1878
1875 1879 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1876 1880 cachedelta=None, deltacomputer=None):
1877 1881 """add a raw revision with known flags, node and parents
1878 1882 useful when reusing a revision not stored in this revlog (ex: received
1879 1883 over wire, or read from an external bundle).
1880 1884 """
1881 1885 dfh = None
1882 1886 if not self._inline:
1883 1887 dfh = self._datafp("a+")
1884 1888 ifh = self._indexfp("a+")
1885 1889 try:
1886 1890 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1887 1891 flags, cachedelta, ifh, dfh,
1888 1892 deltacomputer=deltacomputer)
1889 1893 finally:
1890 1894 if dfh:
1891 1895 dfh.close()
1892 1896 ifh.close()
1893 1897
1894 1898 def compress(self, data):
1895 1899 """Generate a possibly-compressed representation of data."""
1896 1900 if not data:
1897 1901 return '', data
1898 1902
1899 1903 compressed = self._compressor.compress(data)
1900 1904
1901 1905 if compressed:
1902 1906 # The revlog compressor added the header in the returned data.
1903 1907 return '', compressed
1904 1908
1905 1909 if data[0:1] == '\0':
1906 1910 return '', data
1907 1911 return 'u', data
1908 1912
1909 1913 def decompress(self, data):
1910 1914 """Decompress a revlog chunk.
1911 1915
1912 1916 The chunk is expected to begin with a header identifying the
1913 1917 format type so it can be routed to an appropriate decompressor.
1914 1918 """
1915 1919 if not data:
1916 1920 return data
1917 1921
1918 1922 # Revlogs are read much more frequently than they are written and many
1919 1923 # chunks only take microseconds to decompress, so performance is
1920 1924 # important here.
1921 1925 #
1922 1926 # We can make a few assumptions about revlogs:
1923 1927 #
1924 1928 # 1) the majority of chunks will be compressed (as opposed to inline
1925 1929 # raw data).
1926 1930 # 2) decompressing *any* data will likely by at least 10x slower than
1927 1931 # returning raw inline data.
1928 1932 # 3) we want to prioritize common and officially supported compression
1929 1933 # engines
1930 1934 #
1931 1935 # It follows that we want to optimize for "decompress compressed data
1932 1936 # when encoded with common and officially supported compression engines"
1933 1937 # case over "raw data" and "data encoded by less common or non-official
1934 1938 # compression engines." That is why we have the inline lookup first
1935 1939 # followed by the compengines lookup.
1936 1940 #
1937 1941 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1938 1942 # compressed chunks. And this matters for changelog and manifest reads.
1939 1943 t = data[0:1]
1940 1944
1941 1945 if t == 'x':
1942 1946 try:
1943 1947 return _zlibdecompress(data)
1944 1948 except zlib.error as e:
1945 1949 raise error.RevlogError(_('revlog decompress error: %s') %
1946 1950 stringutil.forcebytestr(e))
1947 1951 # '\0' is more common than 'u' so it goes first.
1948 1952 elif t == '\0':
1949 1953 return data
1950 1954 elif t == 'u':
1951 1955 return util.buffer(data, 1)
1952 1956
1953 1957 try:
1954 1958 compressor = self._decompressors[t]
1955 1959 except KeyError:
1956 1960 try:
1957 1961 engine = util.compengines.forrevlogheader(t)
1958 1962 compressor = engine.revlogcompressor()
1959 1963 self._decompressors[t] = compressor
1960 1964 except KeyError:
1961 1965 raise error.RevlogError(_('unknown compression type %r') % t)
1962 1966
1963 1967 return compressor.decompress(data)
1964 1968
1965 1969 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1966 1970 cachedelta, ifh, dfh, alwayscache=False,
1967 1971 deltacomputer=None):
1968 1972 """internal function to add revisions to the log
1969 1973
1970 1974 see addrevision for argument descriptions.
1971 1975
1972 1976 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1973 1977
1974 1978 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1975 1979 be used.
1976 1980
1977 1981 invariants:
1978 1982 - rawtext is optional (can be None); if not set, cachedelta must be set.
1979 1983 if both are set, they must correspond to each other.
1980 1984 """
1981 1985 if node == nullid:
1982 1986 raise error.RevlogError(_("%s: attempt to add null revision") %
1983 1987 self.indexfile)
1984 1988 if node == wdirid or node in wdirfilenodeids:
1985 1989 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1986 1990 self.indexfile)
1987 1991
1988 1992 if self._inline:
1989 1993 fh = ifh
1990 1994 else:
1991 1995 fh = dfh
1992 1996
1993 1997 btext = [rawtext]
1994 1998
1995 1999 curr = len(self)
1996 2000 prev = curr - 1
1997 2001 offset = self.end(prev)
1998 2002 p1r, p2r = self.rev(p1), self.rev(p2)
1999 2003
2000 2004 # full versions are inserted when the needed deltas
2001 2005 # become comparable to the uncompressed text
2002 2006 if rawtext is None:
2003 2007 # need rawtext size, before changed by flag processors, which is
2004 2008 # the non-raw size. use revlog explicitly to avoid filelog's extra
2005 2009 # logic that might remove metadata size.
2006 2010 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2007 2011 cachedelta[1])
2008 2012 else:
2009 2013 textlen = len(rawtext)
2010 2014
2011 2015 if deltacomputer is None:
2012 2016 deltacomputer = deltautil.deltacomputer(self)
2013 2017
2014 2018 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2015 2019
2016 2020 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2017 2021
2018 2022 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2019 2023 deltainfo.base, link, p1r, p2r, node)
2020 2024 self.index.append(e)
2021 2025 self.nodemap[node] = curr
2022 2026
2023 2027 # Reset the pure node cache start lookup offset to account for new
2024 2028 # revision.
2025 2029 if self._nodepos is not None:
2026 2030 self._nodepos = curr
2027 2031
2028 2032 entry = self._io.packentry(e, self.node, self.version, curr)
2029 2033 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2030 2034 link, offset)
2031 2035
2032 2036 rawtext = btext[0]
2033 2037
2034 2038 if alwayscache and rawtext is None:
2035 2039 rawtext = deltacomputer.buildtext(revinfo, fh)
2036 2040
2037 2041 if type(rawtext) == bytes: # only accept immutable objects
2038 2042 self._revisioncache = (node, curr, rawtext)
2039 2043 self._chainbasecache[curr] = deltainfo.chainbase
2040 2044 return node
2041 2045
2042 2046 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2043 2047 # Files opened in a+ mode have inconsistent behavior on various
2044 2048 # platforms. Windows requires that a file positioning call be made
2045 2049 # when the file handle transitions between reads and writes. See
2046 2050 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2047 2051 # platforms, Python or the platform itself can be buggy. Some versions
2048 2052 # of Solaris have been observed to not append at the end of the file
2049 2053 # if the file was seeked to before the end. See issue4943 for more.
2050 2054 #
2051 2055 # We work around this issue by inserting a seek() before writing.
2052 2056 # Note: This is likely not necessary on Python 3. However, because
2053 2057 # the file handle is reused for reads and may be seeked there, we need
2054 2058 # to be careful before changing this.
2055 2059 ifh.seek(0, os.SEEK_END)
2056 2060 if dfh:
2057 2061 dfh.seek(0, os.SEEK_END)
2058 2062
2059 2063 curr = len(self) - 1
2060 2064 if not self._inline:
2061 2065 transaction.add(self.datafile, offset)
2062 2066 transaction.add(self.indexfile, curr * len(entry))
2063 2067 if data[0]:
2064 2068 dfh.write(data[0])
2065 2069 dfh.write(data[1])
2066 2070 ifh.write(entry)
2067 2071 else:
2068 2072 offset += curr * self._io.size
2069 2073 transaction.add(self.indexfile, offset, curr)
2070 2074 ifh.write(entry)
2071 2075 ifh.write(data[0])
2072 2076 ifh.write(data[1])
2073 2077 self._enforceinlinesize(transaction, ifh)
2074 2078
2075 2079 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2076 2080 """
2077 2081 add a delta group
2078 2082
2079 2083 given a set of deltas, add them to the revision log. the
2080 2084 first delta is against its parent, which should be in our
2081 2085 log, the rest are against the previous delta.
2082 2086
2083 2087 If ``addrevisioncb`` is defined, it will be called with arguments of
2084 2088 this revlog and the node that was added.
2085 2089 """
2086 2090
2087 2091 if self._writinghandles:
2088 2092 raise error.ProgrammingError('cannot nest addgroup() calls')
2089 2093
2090 2094 nodes = []
2091 2095
2092 2096 r = len(self)
2093 2097 end = 0
2094 2098 if r:
2095 2099 end = self.end(r - 1)
2096 2100 ifh = self._indexfp("a+")
2097 2101 isize = r * self._io.size
2098 2102 if self._inline:
2099 2103 transaction.add(self.indexfile, end + isize, r)
2100 2104 dfh = None
2101 2105 else:
2102 2106 transaction.add(self.indexfile, isize, r)
2103 2107 transaction.add(self.datafile, end)
2104 2108 dfh = self._datafp("a+")
2105 2109 def flush():
2106 2110 if dfh:
2107 2111 dfh.flush()
2108 2112 ifh.flush()
2109 2113
2110 2114 self._writinghandles = (ifh, dfh)
2111 2115
2112 2116 try:
2113 2117 deltacomputer = deltautil.deltacomputer(self)
2114 2118 # loop through our set of deltas
2115 2119 for data in deltas:
2116 2120 node, p1, p2, linknode, deltabase, delta, flags = data
2117 2121 link = linkmapper(linknode)
2118 2122 flags = flags or REVIDX_DEFAULT_FLAGS
2119 2123
2120 2124 nodes.append(node)
2121 2125
2122 2126 if node in self.nodemap:
2123 2127 self._nodeduplicatecallback(transaction, node)
2124 2128 # this can happen if two branches make the same change
2125 2129 continue
2126 2130
2127 2131 for p in (p1, p2):
2128 2132 if p not in self.nodemap:
2129 2133 raise error.LookupError(p, self.indexfile,
2130 2134 _('unknown parent'))
2131 2135
2132 2136 if deltabase not in self.nodemap:
2133 2137 raise error.LookupError(deltabase, self.indexfile,
2134 2138 _('unknown delta base'))
2135 2139
2136 2140 baserev = self.rev(deltabase)
2137 2141
2138 2142 if baserev != nullrev and self.iscensored(baserev):
2139 2143 # if base is censored, delta must be full replacement in a
2140 2144 # single patch operation
2141 2145 hlen = struct.calcsize(">lll")
2142 2146 oldlen = self.rawsize(baserev)
2143 2147 newlen = len(delta) - hlen
2144 2148 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2145 2149 raise error.CensoredBaseError(self.indexfile,
2146 2150 self.node(baserev))
2147 2151
2148 2152 if not flags and self._peek_iscensored(baserev, delta, flush):
2149 2153 flags |= REVIDX_ISCENSORED
2150 2154
2151 2155 # We assume consumers of addrevisioncb will want to retrieve
2152 2156 # the added revision, which will require a call to
2153 2157 # revision(). revision() will fast path if there is a cache
2154 2158 # hit. So, we tell _addrevision() to always cache in this case.
2155 2159 # We're only using addgroup() in the context of changegroup
2156 2160 # generation so the revision data can always be handled as raw
2157 2161 # by the flagprocessor.
2158 2162 self._addrevision(node, None, transaction, link,
2159 2163 p1, p2, flags, (baserev, delta),
2160 2164 ifh, dfh,
2161 2165 alwayscache=bool(addrevisioncb),
2162 2166 deltacomputer=deltacomputer)
2163 2167
2164 2168 if addrevisioncb:
2165 2169 addrevisioncb(self, node)
2166 2170
2167 2171 if not dfh and not self._inline:
2168 2172 # addrevision switched from inline to conventional
2169 2173 # reopen the index
2170 2174 ifh.close()
2171 2175 dfh = self._datafp("a+")
2172 2176 ifh = self._indexfp("a+")
2173 2177 self._writinghandles = (ifh, dfh)
2174 2178 finally:
2175 2179 self._writinghandles = None
2176 2180
2177 2181 if dfh:
2178 2182 dfh.close()
2179 2183 ifh.close()
2180 2184
2181 2185 return nodes
2182 2186
2183 2187 def iscensored(self, rev):
2184 2188 """Check if a file revision is censored."""
2185 2189 if not self._censorable:
2186 2190 return False
2187 2191
2188 2192 return self.flags(rev) & REVIDX_ISCENSORED
2189 2193
2190 2194 def _peek_iscensored(self, baserev, delta, flush):
2191 2195 """Quickly check if a delta produces a censored revision."""
2192 2196 if not self._censorable:
2193 2197 return False
2194 2198
2195 2199 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2196 2200
2197 2201 def getstrippoint(self, minlink):
2198 2202 """find the minimum rev that must be stripped to strip the linkrev
2199 2203
2200 2204 Returns a tuple containing the minimum rev and a set of all revs that
2201 2205 have linkrevs that will be broken by this strip.
2202 2206 """
2203 2207 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2204 2208 self.headrevs(),
2205 2209 self.linkrev, self.parentrevs)
2206 2210
2207 2211 def strip(self, minlink, transaction):
2208 2212 """truncate the revlog on the first revision with a linkrev >= minlink
2209 2213
2210 2214 This function is called when we're stripping revision minlink and
2211 2215 its descendants from the repository.
2212 2216
2213 2217 We have to remove all revisions with linkrev >= minlink, because
2214 2218 the equivalent changelog revisions will be renumbered after the
2215 2219 strip.
2216 2220
2217 2221 So we truncate the revlog on the first of these revisions, and
2218 2222 trust that the caller has saved the revisions that shouldn't be
2219 2223 removed and that it'll re-add them after this truncation.
2220 2224 """
2221 2225 if len(self) == 0:
2222 2226 return
2223 2227
2224 2228 rev, _ = self.getstrippoint(minlink)
2225 2229 if rev == len(self):
2226 2230 return
2227 2231
2228 2232 # first truncate the files on disk
2229 2233 end = self.start(rev)
2230 2234 if not self._inline:
2231 2235 transaction.add(self.datafile, end)
2232 2236 end = rev * self._io.size
2233 2237 else:
2234 2238 end += rev * self._io.size
2235 2239
2236 2240 transaction.add(self.indexfile, end)
2237 2241
2238 2242 # then reset internal state in memory to forget those revisions
2239 2243 self._revisioncache = None
2240 2244 self._chaininfocache = {}
2241 2245 self._chunkclear()
2242 2246 for x in pycompat.xrange(rev, len(self)):
2243 2247 del self.nodemap[self.node(x)]
2244 2248
2245 2249 del self.index[rev:-1]
2246 2250 self._nodepos = None
2247 2251
2248 2252 def checksize(self):
2249 2253 expected = 0
2250 2254 if len(self):
2251 2255 expected = max(0, self.end(len(self) - 1))
2252 2256
2253 2257 try:
2254 2258 with self._datafp() as f:
2255 2259 f.seek(0, 2)
2256 2260 actual = f.tell()
2257 2261 dd = actual - expected
2258 2262 except IOError as inst:
2259 2263 if inst.errno != errno.ENOENT:
2260 2264 raise
2261 2265 dd = 0
2262 2266
2263 2267 try:
2264 2268 f = self.opener(self.indexfile)
2265 2269 f.seek(0, 2)
2266 2270 actual = f.tell()
2267 2271 f.close()
2268 2272 s = self._io.size
2269 2273 i = max(0, actual // s)
2270 2274 di = actual - (i * s)
2271 2275 if self._inline:
2272 2276 databytes = 0
2273 2277 for r in self:
2274 2278 databytes += max(0, self.length(r))
2275 2279 dd = 0
2276 2280 di = actual - len(self) * s - databytes
2277 2281 except IOError as inst:
2278 2282 if inst.errno != errno.ENOENT:
2279 2283 raise
2280 2284 di = 0
2281 2285
2282 2286 return (dd, di)
2283 2287
2284 2288 def files(self):
2285 2289 res = [self.indexfile]
2286 2290 if not self._inline:
2287 2291 res.append(self.datafile)
2288 2292 return res
2289 2293
2290 2294 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2291 2295 assumehaveparentrevisions=False,
2292 2296 deltamode=repository.CG_DELTAMODE_STD):
2293 2297 if nodesorder not in ('nodes', 'storage', 'linear', None):
2294 2298 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2295 2299 nodesorder)
2296 2300
2297 2301 if nodesorder is None and not self._generaldelta:
2298 2302 nodesorder = 'storage'
2299 2303
2300 2304 if (not self._storedeltachains and
2301 2305 deltamode != repository.CG_DELTAMODE_PREV):
2302 2306 deltamode = repository.CG_DELTAMODE_FULL
2303 2307
2304 2308 return storageutil.emitrevisions(
2305 2309 self, nodes, nodesorder, revlogrevisiondelta,
2306 2310 deltaparentfn=self.deltaparent,
2307 2311 candeltafn=self.candelta,
2308 2312 rawsizefn=self.rawsize,
2309 2313 revdifffn=self.revdiff,
2310 2314 flagsfn=self.flags,
2311 2315 deltamode=deltamode,
2312 2316 revisiondata=revisiondata,
2313 2317 assumehaveparentrevisions=assumehaveparentrevisions)
2314 2318
2315 2319 DELTAREUSEALWAYS = 'always'
2316 2320 DELTAREUSESAMEREVS = 'samerevs'
2317 2321 DELTAREUSENEVER = 'never'
2318 2322
2319 2323 DELTAREUSEFULLADD = 'fulladd'
2320 2324
2321 2325 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2322 2326
2323 2327 def clone(self, tr, destrevlog, addrevisioncb=None,
2324 2328 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2325 2329 """Copy this revlog to another, possibly with format changes.
2326 2330
2327 2331 The destination revlog will contain the same revisions and nodes.
2328 2332 However, it may not be bit-for-bit identical due to e.g. delta encoding
2329 2333 differences.
2330 2334
2331 2335 The ``deltareuse`` argument control how deltas from the existing revlog
2332 2336 are preserved in the destination revlog. The argument can have the
2333 2337 following values:
2334 2338
2335 2339 DELTAREUSEALWAYS
2336 2340 Deltas will always be reused (if possible), even if the destination
2337 2341 revlog would not select the same revisions for the delta. This is the
2338 2342 fastest mode of operation.
2339 2343 DELTAREUSESAMEREVS
2340 2344 Deltas will be reused if the destination revlog would pick the same
2341 2345 revisions for the delta. This mode strikes a balance between speed
2342 2346 and optimization.
2343 2347 DELTAREUSENEVER
2344 2348 Deltas will never be reused. This is the slowest mode of execution.
2345 2349 This mode can be used to recompute deltas (e.g. if the diff/delta
2346 2350 algorithm changes).
2347 2351
2348 2352 Delta computation can be slow, so the choice of delta reuse policy can
2349 2353 significantly affect run time.
2350 2354
2351 2355 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2352 2356 two extremes. Deltas will be reused if they are appropriate. But if the
2353 2357 delta could choose a better revision, it will do so. This means if you
2354 2358 are converting a non-generaldelta revlog to a generaldelta revlog,
2355 2359 deltas will be recomputed if the delta's parent isn't a parent of the
2356 2360 revision.
2357 2361
2358 2362 In addition to the delta policy, the ``forcedeltabothparents``
2359 2363 argument controls whether to force compute deltas against both parents
2360 2364 for merges. By default, the current default is used.
2361 2365 """
2362 2366 if deltareuse not in self.DELTAREUSEALL:
2363 2367 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2364 2368
2365 2369 if len(destrevlog):
2366 2370 raise ValueError(_('destination revlog is not empty'))
2367 2371
2368 2372 if getattr(self, 'filteredrevs', None):
2369 2373 raise ValueError(_('source revlog has filtered revisions'))
2370 2374 if getattr(destrevlog, 'filteredrevs', None):
2371 2375 raise ValueError(_('destination revlog has filtered revisions'))
2372 2376
2373 2377 # lazydeltabase controls whether to reuse a cached delta, if possible.
2374 2378 oldlazydeltabase = destrevlog._lazydeltabase
2375 2379 oldamd = destrevlog._deltabothparents
2376 2380
2377 2381 try:
2378 2382 if deltareuse == self.DELTAREUSEALWAYS:
2379 2383 destrevlog._lazydeltabase = True
2380 2384 elif deltareuse == self.DELTAREUSESAMEREVS:
2381 2385 destrevlog._lazydeltabase = False
2382 2386
2383 2387 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2384 2388
2385 2389 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2386 2390 self.DELTAREUSESAMEREVS)
2387 2391
2388 2392 deltacomputer = deltautil.deltacomputer(destrevlog)
2389 2393 index = self.index
2390 2394 for rev in self:
2391 2395 entry = index[rev]
2392 2396
2393 2397 # Some classes override linkrev to take filtered revs into
2394 2398 # account. Use raw entry from index.
2395 2399 flags = entry[0] & 0xffff
2396 2400 linkrev = entry[4]
2397 2401 p1 = index[entry[5]][7]
2398 2402 p2 = index[entry[6]][7]
2399 2403 node = entry[7]
2400 2404
2401 2405 # (Possibly) reuse the delta from the revlog if allowed and
2402 2406 # the revlog chunk is a delta.
2403 2407 cachedelta = None
2404 2408 rawtext = None
2405 2409 if populatecachedelta:
2406 2410 dp = self.deltaparent(rev)
2407 2411 if dp != nullrev:
2408 2412 cachedelta = (dp, bytes(self._chunk(rev)))
2409 2413
2410 2414 if not cachedelta:
2411 2415 rawtext = self.revision(rev, raw=True)
2412 2416
2413 2417
2414 2418 if deltareuse == self.DELTAREUSEFULLADD:
2415 2419 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2416 2420 cachedelta=cachedelta,
2417 2421 node=node, flags=flags,
2418 2422 deltacomputer=deltacomputer)
2419 2423 else:
2420 2424 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2421 2425 checkambig=False)
2422 2426 dfh = None
2423 2427 if not destrevlog._inline:
2424 2428 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2425 2429 try:
2426 2430 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2427 2431 p2, flags, cachedelta, ifh, dfh,
2428 2432 deltacomputer=deltacomputer)
2429 2433 finally:
2430 2434 if dfh:
2431 2435 dfh.close()
2432 2436 ifh.close()
2433 2437
2434 2438 if addrevisioncb:
2435 2439 addrevisioncb(self, rev, node)
2436 2440 finally:
2437 2441 destrevlog._lazydeltabase = oldlazydeltabase
2438 2442 destrevlog._deltabothparents = oldamd
2439 2443
2440 2444 def censorrevision(self, tr, censornode, tombstone=b''):
2441 2445 if (self.version & 0xFFFF) == REVLOGV0:
2442 2446 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2443 2447 self.version)
2444 2448
2445 2449 censorrev = self.rev(censornode)
2446 2450 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2447 2451
2448 2452 if len(tombstone) > self.rawsize(censorrev):
2449 2453 raise error.Abort(_('censor tombstone must be no longer than '
2450 2454 'censored data'))
2451 2455
2452 2456 # Rewriting the revlog in place is hard. Our strategy for censoring is
2453 2457 # to create a new revlog, copy all revisions to it, then replace the
2454 2458 # revlogs on transaction close.
2455 2459
2456 2460 newindexfile = self.indexfile + b'.tmpcensored'
2457 2461 newdatafile = self.datafile + b'.tmpcensored'
2458 2462
2459 2463 # This is a bit dangerous. We could easily have a mismatch of state.
2460 2464 newrl = revlog(self.opener, newindexfile, newdatafile,
2461 2465 censorable=True)
2462 2466 newrl.version = self.version
2463 2467 newrl._generaldelta = self._generaldelta
2464 2468 newrl._io = self._io
2465 2469
2466 2470 for rev in self.revs():
2467 2471 node = self.node(rev)
2468 2472 p1, p2 = self.parents(node)
2469 2473
2470 2474 if rev == censorrev:
2471 2475 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2472 2476 p1, p2, censornode, REVIDX_ISCENSORED)
2473 2477
2474 2478 if newrl.deltaparent(rev) != nullrev:
2475 2479 raise error.Abort(_('censored revision stored as delta; '
2476 2480 'cannot censor'),
2477 2481 hint=_('censoring of revlogs is not '
2478 2482 'fully implemented; please report '
2479 2483 'this bug'))
2480 2484 continue
2481 2485
2482 2486 if self.iscensored(rev):
2483 2487 if self.deltaparent(rev) != nullrev:
2484 2488 raise error.Abort(_('cannot censor due to censored '
2485 2489 'revision having delta stored'))
2486 2490 rawtext = self._chunk(rev)
2487 2491 else:
2488 2492 rawtext = self.revision(rev, raw=True)
2489 2493
2490 2494 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2491 2495 self.flags(rev))
2492 2496
2493 2497 tr.addbackup(self.indexfile, location='store')
2494 2498 if not self._inline:
2495 2499 tr.addbackup(self.datafile, location='store')
2496 2500
2497 2501 self.opener.rename(newrl.indexfile, self.indexfile)
2498 2502 if not self._inline:
2499 2503 self.opener.rename(newrl.datafile, self.datafile)
2500 2504
2501 2505 self.clearcaches()
2502 2506 self._loadindex()
2503 2507
2504 2508 def verifyintegrity(self, state):
2505 2509 """Verifies the integrity of the revlog.
2506 2510
2507 2511 Yields ``revlogproblem`` instances describing problems that are
2508 2512 found.
2509 2513 """
2510 2514 dd, di = self.checksize()
2511 2515 if dd:
2512 2516 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2513 2517 if di:
2514 2518 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2515 2519
2516 2520 version = self.version & 0xFFFF
2517 2521
2518 2522 # The verifier tells us what version revlog we should be.
2519 2523 if version != state['expectedversion']:
2520 2524 yield revlogproblem(
2521 2525 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2522 2526 (self.indexfile, version, state['expectedversion']))
2523 2527
2524 2528 state['skipread'] = set()
2525 2529
2526 2530 for rev in self:
2527 2531 node = self.node(rev)
2528 2532
2529 2533 # Verify contents. 4 cases to care about:
2530 2534 #
2531 2535 # common: the most common case
2532 2536 # rename: with a rename
2533 2537 # meta: file content starts with b'\1\n', the metadata
2534 2538 # header defined in filelog.py, but without a rename
2535 2539 # ext: content stored externally
2536 2540 #
2537 2541 # More formally, their differences are shown below:
2538 2542 #
2539 2543 # | common | rename | meta | ext
2540 2544 # -------------------------------------------------------
2541 2545 # flags() | 0 | 0 | 0 | not 0
2542 2546 # renamed() | False | True | False | ?
2543 2547 # rawtext[0:2]=='\1\n'| False | True | True | ?
2544 2548 #
2545 2549 # "rawtext" means the raw text stored in revlog data, which
2546 2550 # could be retrieved by "revision(rev, raw=True)". "text"
2547 2551 # mentioned below is "revision(rev, raw=False)".
2548 2552 #
2549 2553 # There are 3 different lengths stored physically:
2550 2554 # 1. L1: rawsize, stored in revlog index
2551 2555 # 2. L2: len(rawtext), stored in revlog data
2552 2556 # 3. L3: len(text), stored in revlog data if flags==0, or
2553 2557 # possibly somewhere else if flags!=0
2554 2558 #
2555 2559 # L1 should be equal to L2. L3 could be different from them.
2556 2560 # "text" may or may not affect commit hash depending on flag
2557 2561 # processors (see revlog.addflagprocessor).
2558 2562 #
2559 2563 # | common | rename | meta | ext
2560 2564 # -------------------------------------------------
2561 2565 # rawsize() | L1 | L1 | L1 | L1
2562 2566 # size() | L1 | L2-LM | L1(*) | L1 (?)
2563 2567 # len(rawtext) | L2 | L2 | L2 | L2
2564 2568 # len(text) | L2 | L2 | L2 | L3
2565 2569 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2566 2570 #
2567 2571 # LM: length of metadata, depending on rawtext
2568 2572 # (*): not ideal, see comment in filelog.size
2569 2573 # (?): could be "- len(meta)" if the resolved content has
2570 2574 # rename metadata
2571 2575 #
2572 2576 # Checks needed to be done:
2573 2577 # 1. length check: L1 == L2, in all cases.
2574 2578 # 2. hash check: depending on flag processor, we may need to
2575 2579 # use either "text" (external), or "rawtext" (in revlog).
2576 2580
2577 2581 try:
2578 2582 skipflags = state.get('skipflags', 0)
2579 2583 if skipflags:
2580 2584 skipflags &= self.flags(rev)
2581 2585
2582 2586 if skipflags:
2583 2587 state['skipread'].add(node)
2584 2588 else:
2585 2589 # Side-effect: read content and verify hash.
2586 2590 self.revision(node)
2587 2591
2588 2592 l1 = self.rawsize(rev)
2589 2593 l2 = len(self.revision(node, raw=True))
2590 2594
2591 2595 if l1 != l2:
2592 2596 yield revlogproblem(
2593 2597 error=_('unpacked size is %d, %d expected') % (l2, l1),
2594 2598 node=node)
2595 2599
2596 2600 except error.CensoredNodeError:
2597 2601 if state['erroroncensored']:
2598 2602 yield revlogproblem(error=_('censored file data'),
2599 2603 node=node)
2600 2604 state['skipread'].add(node)
2601 2605 except Exception as e:
2602 2606 yield revlogproblem(
2603 2607 error=_('unpacking %s: %s') % (short(node),
2604 2608 stringutil.forcebytestr(e)),
2605 2609 node=node)
2606 2610 state['skipread'].add(node)
2607 2611
2608 2612 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2609 2613 revisionscount=False, trackedsize=False,
2610 2614 storedsize=False):
2611 2615 d = {}
2612 2616
2613 2617 if exclusivefiles:
2614 2618 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2615 2619 if not self._inline:
2616 2620 d['exclusivefiles'].append((self.opener, self.datafile))
2617 2621
2618 2622 if sharedfiles:
2619 2623 d['sharedfiles'] = []
2620 2624
2621 2625 if revisionscount:
2622 2626 d['revisionscount'] = len(self)
2623 2627
2624 2628 if trackedsize:
2625 2629 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2626 2630
2627 2631 if storedsize:
2628 2632 d['storedsize'] = sum(self.opener.stat(path).st_size
2629 2633 for path in self.files())
2630 2634
2631 2635 return d
General Comments 0
You need to be logged in to leave comments. Login now