##// END OF EJS Templates
rust-cpython: using MissingAncestors from Python code...
Georges Racinet -
r41283:a89b20a4 default
parent child Browse files
Show More
@@ -1,2635 +1,2637 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._maxdeltachainspan = -1
375 375 self._withsparseread = False
376 376 self._sparserevlog = False
377 377 self._srdensitythreshold = 0.50
378 378 self._srmingapsize = 262144
379 379
380 380 # Make copy of flag processors so each revlog instance can support
381 381 # custom flags.
382 382 self._flagprocessors = dict(_flagprocessors)
383 383
384 384 # 2-tuple of file handles being used for active writing.
385 385 self._writinghandles = None
386 386
387 387 self._loadindex()
388 388
389 389 def _loadindex(self):
390 390 mmapindexthreshold = None
391 391 opts = getattr(self.opener, 'options', {}) or {}
392 392
393 393 if 'revlogv2' in opts:
394 394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 395 elif 'revlogv1' in opts:
396 396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 397 if 'generaldelta' in opts:
398 398 newversionflags |= FLAG_GENERALDELTA
399 399 else:
400 400 newversionflags = REVLOG_DEFAULT_VERSION
401 401
402 402 if 'chunkcachesize' in opts:
403 403 self._chunkcachesize = opts['chunkcachesize']
404 404 if 'maxchainlen' in opts:
405 405 self._maxchainlen = opts['maxchainlen']
406 406 if 'deltabothparents' in opts:
407 407 self._deltabothparents = opts['deltabothparents']
408 408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 409 if 'compengine' in opts:
410 410 self._compengine = opts['compengine']
411 411 if 'maxdeltachainspan' in opts:
412 412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 414 mmapindexthreshold = opts['mmapindexthreshold']
415 415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 416 withsparseread = bool(opts.get('with-sparse-read', False))
417 417 # sparse-revlog forces sparse-read
418 418 self._withsparseread = self._sparserevlog or withsparseread
419 419 if 'sparse-read-density-threshold' in opts:
420 420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 421 if 'sparse-read-min-gap-size' in opts:
422 422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 423 if opts.get('enableellipsis'):
424 424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425 425
426 426 # revlog v0 doesn't have flag processors
427 427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 428 _insertflagprocessor(flag, processor, self._flagprocessors)
429 429
430 430 if self._chunkcachesize <= 0:
431 431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 432 'greater than 0') % self._chunkcachesize)
433 433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 435 'power of 2') % self._chunkcachesize)
436 436
437 437 indexdata = ''
438 438 self._initempty = True
439 439 try:
440 440 with self._indexfp() as f:
441 441 if (mmapindexthreshold is not None and
442 442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 443 indexdata = util.buffer(util.mmapread(f))
444 444 else:
445 445 indexdata = f.read()
446 446 if len(indexdata) > 0:
447 447 versionflags = versionformat_unpack(indexdata[:4])[0]
448 448 self._initempty = False
449 449 else:
450 450 versionflags = newversionflags
451 451 except IOError as inst:
452 452 if inst.errno != errno.ENOENT:
453 453 raise
454 454
455 455 versionflags = newversionflags
456 456
457 457 self.version = versionflags
458 458
459 459 flags = versionflags & ~0xFFFF
460 460 fmt = versionflags & 0xFFFF
461 461
462 462 if fmt == REVLOGV0:
463 463 if flags:
464 464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 465 'revlog %s') %
466 466 (flags >> 16, fmt, self.indexfile))
467 467
468 468 self._inline = False
469 469 self._generaldelta = False
470 470
471 471 elif fmt == REVLOGV1:
472 472 if flags & ~REVLOGV1_FLAGS:
473 473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
474 474 'revlog %s') %
475 475 (flags >> 16, fmt, self.indexfile))
476 476
477 477 self._inline = versionflags & FLAG_INLINE_DATA
478 478 self._generaldelta = versionflags & FLAG_GENERALDELTA
479 479
480 480 elif fmt == REVLOGV2:
481 481 if flags & ~REVLOGV2_FLAGS:
482 482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 483 'revlog %s') %
484 484 (flags >> 16, fmt, self.indexfile))
485 485
486 486 self._inline = versionflags & FLAG_INLINE_DATA
487 487 # generaldelta implied by version 2 revlogs.
488 488 self._generaldelta = True
489 489
490 490 else:
491 491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
492 492 (fmt, self.indexfile))
493 493
494 494 self._storedeltachains = True
495 495
496 496 self._io = revlogio()
497 497 if self.version == REVLOGV0:
498 498 self._io = revlogoldio()
499 499 try:
500 500 d = self._io.parseindex(indexdata, self._inline)
501 501 except (ValueError, IndexError):
502 502 raise error.RevlogError(_("index %s is corrupted") %
503 503 self.indexfile)
504 504 self.index, nodemap, self._chunkcache = d
505 505 if nodemap is not None:
506 506 self.nodemap = self._nodecache = nodemap
507 507 if not self._chunkcache:
508 508 self._chunkclear()
509 509 # revnum -> (chain-length, sum-delta-length)
510 510 self._chaininfocache = {}
511 511 # revlog header -> revlog compressor
512 512 self._decompressors = {}
513 513
514 514 @util.propertycache
515 515 def _compressor(self):
516 516 return util.compengines[self._compengine].revlogcompressor()
517 517
518 518 def _indexfp(self, mode='r'):
519 519 """file object for the revlog's index file"""
520 520 args = {r'mode': mode}
521 521 if mode != 'r':
522 522 args[r'checkambig'] = self._checkambig
523 523 if mode == 'w':
524 524 args[r'atomictemp'] = True
525 525 return self.opener(self.indexfile, **args)
526 526
527 527 def _datafp(self, mode='r'):
528 528 """file object for the revlog's data file"""
529 529 return self.opener(self.datafile, mode=mode)
530 530
531 531 @contextlib.contextmanager
532 532 def _datareadfp(self, existingfp=None):
533 533 """file object suitable to read data"""
534 534 # Use explicit file handle, if given.
535 535 if existingfp is not None:
536 536 yield existingfp
537 537
538 538 # Use a file handle being actively used for writes, if available.
539 539 # There is some danger to doing this because reads will seek the
540 540 # file. However, _writeentry() performs a SEEK_END before all writes,
541 541 # so we should be safe.
542 542 elif self._writinghandles:
543 543 if self._inline:
544 544 yield self._writinghandles[0]
545 545 else:
546 546 yield self._writinghandles[1]
547 547
548 548 # Otherwise open a new file handle.
549 549 else:
550 550 if self._inline:
551 551 func = self._indexfp
552 552 else:
553 553 func = self._datafp
554 554 with func() as fp:
555 555 yield fp
556 556
557 557 def tip(self):
558 558 return self.node(len(self.index) - 1)
559 559 def __contains__(self, rev):
560 560 return 0 <= rev < len(self)
561 561 def __len__(self):
562 562 return len(self.index)
563 563 def __iter__(self):
564 564 return iter(pycompat.xrange(len(self)))
565 565 def revs(self, start=0, stop=None):
566 566 """iterate over all rev in this revlog (from start to stop)"""
567 567 return storageutil.iterrevs(len(self), start=start, stop=stop)
568 568
569 569 @util.propertycache
570 570 def nodemap(self):
571 571 if self.index:
572 572 # populate mapping down to the initial node
573 573 node0 = self.index[0][7] # get around changelog filtering
574 574 self.rev(node0)
575 575 return self._nodecache
576 576
577 577 def hasnode(self, node):
578 578 try:
579 579 self.rev(node)
580 580 return True
581 581 except KeyError:
582 582 return False
583 583
584 584 def candelta(self, baserev, rev):
585 585 """whether two revisions (baserev, rev) can be delta-ed or not"""
586 586 # Disable delta if either rev requires a content-changing flag
587 587 # processor (ex. LFS). This is because such flag processor can alter
588 588 # the rawtext content that the delta will be based on, and two clients
589 589 # could have a same revlog node with different flags (i.e. different
590 590 # rawtext contents) and the delta could be incompatible.
591 591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
592 592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
593 593 return False
594 594 return True
595 595
596 596 def clearcaches(self):
597 597 self._revisioncache = None
598 598 self._chainbasecache.clear()
599 599 self._chunkcache = (0, '')
600 600 self._pcache = {}
601 601
602 602 try:
603 603 self._nodecache.clearcaches()
604 604 except AttributeError:
605 605 self._nodecache = {nullid: nullrev}
606 606 self._nodepos = None
607 607
608 608 def rev(self, node):
609 609 try:
610 610 return self._nodecache[node]
611 611 except TypeError:
612 612 raise
613 613 except error.RevlogError:
614 614 # parsers.c radix tree lookup failed
615 615 if node == wdirid or node in wdirfilenodeids:
616 616 raise error.WdirUnsupported
617 617 raise error.LookupError(node, self.indexfile, _('no node'))
618 618 except KeyError:
619 619 # pure python cache lookup failed
620 620 n = self._nodecache
621 621 i = self.index
622 622 p = self._nodepos
623 623 if p is None:
624 624 p = len(i) - 1
625 625 else:
626 626 assert p < len(i)
627 627 for r in pycompat.xrange(p, -1, -1):
628 628 v = i[r][7]
629 629 n[v] = r
630 630 if v == node:
631 631 self._nodepos = r - 1
632 632 return r
633 633 if node == wdirid or node in wdirfilenodeids:
634 634 raise error.WdirUnsupported
635 635 raise error.LookupError(node, self.indexfile, _('no node'))
636 636
637 637 # Accessors for index entries.
638 638
639 639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
640 640 # are flags.
641 641 def start(self, rev):
642 642 return int(self.index[rev][0] >> 16)
643 643
644 644 def flags(self, rev):
645 645 return self.index[rev][0] & 0xFFFF
646 646
647 647 def length(self, rev):
648 648 return self.index[rev][1]
649 649
650 650 def rawsize(self, rev):
651 651 """return the length of the uncompressed text for a given revision"""
652 652 l = self.index[rev][2]
653 653 if l >= 0:
654 654 return l
655 655
656 656 t = self.revision(rev, raw=True)
657 657 return len(t)
658 658
659 659 def size(self, rev):
660 660 """length of non-raw text (processed by a "read" flag processor)"""
661 661 # fast path: if no "read" flag processor could change the content,
662 662 # size is rawsize. note: ELLIPSIS is known to not change the content.
663 663 flags = self.flags(rev)
664 664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
665 665 return self.rawsize(rev)
666 666
667 667 return len(self.revision(rev, raw=False))
668 668
669 669 def chainbase(self, rev):
670 670 base = self._chainbasecache.get(rev)
671 671 if base is not None:
672 672 return base
673 673
674 674 index = self.index
675 675 iterrev = rev
676 676 base = index[iterrev][3]
677 677 while base != iterrev:
678 678 iterrev = base
679 679 base = index[iterrev][3]
680 680
681 681 self._chainbasecache[rev] = base
682 682 return base
683 683
684 684 def linkrev(self, rev):
685 685 return self.index[rev][4]
686 686
687 687 def parentrevs(self, rev):
688 688 try:
689 689 entry = self.index[rev]
690 690 except IndexError:
691 691 if rev == wdirrev:
692 692 raise error.WdirUnsupported
693 693 raise
694 694
695 695 return entry[5], entry[6]
696 696
697 697 # fast parentrevs(rev) where rev isn't filtered
698 698 _uncheckedparentrevs = parentrevs
699 699
700 700 def node(self, rev):
701 701 try:
702 702 return self.index[rev][7]
703 703 except IndexError:
704 704 if rev == wdirrev:
705 705 raise error.WdirUnsupported
706 706 raise
707 707
708 708 # Derived from index values.
709 709
710 710 def end(self, rev):
711 711 return self.start(rev) + self.length(rev)
712 712
713 713 def parents(self, node):
714 714 i = self.index
715 715 d = i[self.rev(node)]
716 716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
717 717
718 718 def chainlen(self, rev):
719 719 return self._chaininfo(rev)[0]
720 720
721 721 def _chaininfo(self, rev):
722 722 chaininfocache = self._chaininfocache
723 723 if rev in chaininfocache:
724 724 return chaininfocache[rev]
725 725 index = self.index
726 726 generaldelta = self._generaldelta
727 727 iterrev = rev
728 728 e = index[iterrev]
729 729 clen = 0
730 730 compresseddeltalen = 0
731 731 while iterrev != e[3]:
732 732 clen += 1
733 733 compresseddeltalen += e[1]
734 734 if generaldelta:
735 735 iterrev = e[3]
736 736 else:
737 737 iterrev -= 1
738 738 if iterrev in chaininfocache:
739 739 t = chaininfocache[iterrev]
740 740 clen += t[0]
741 741 compresseddeltalen += t[1]
742 742 break
743 743 e = index[iterrev]
744 744 else:
745 745 # Add text length of base since decompressing that also takes
746 746 # work. For cache hits the length is already included.
747 747 compresseddeltalen += e[1]
748 748 r = (clen, compresseddeltalen)
749 749 chaininfocache[rev] = r
750 750 return r
751 751
752 752 def _deltachain(self, rev, stoprev=None):
753 753 """Obtain the delta chain for a revision.
754 754
755 755 ``stoprev`` specifies a revision to stop at. If not specified, we
756 756 stop at the base of the chain.
757 757
758 758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
759 759 revs in ascending order and ``stopped`` is a bool indicating whether
760 760 ``stoprev`` was hit.
761 761 """
762 762 # Try C implementation.
763 763 try:
764 764 return self.index.deltachain(rev, stoprev, self._generaldelta)
765 765 except AttributeError:
766 766 pass
767 767
768 768 chain = []
769 769
770 770 # Alias to prevent attribute lookup in tight loop.
771 771 index = self.index
772 772 generaldelta = self._generaldelta
773 773
774 774 iterrev = rev
775 775 e = index[iterrev]
776 776 while iterrev != e[3] and iterrev != stoprev:
777 777 chain.append(iterrev)
778 778 if generaldelta:
779 779 iterrev = e[3]
780 780 else:
781 781 iterrev -= 1
782 782 e = index[iterrev]
783 783
784 784 if iterrev == stoprev:
785 785 stopped = True
786 786 else:
787 787 chain.append(iterrev)
788 788 stopped = False
789 789
790 790 chain.reverse()
791 791 return chain, stopped
792 792
793 793 def ancestors(self, revs, stoprev=0, inclusive=False):
794 794 """Generate the ancestors of 'revs' in reverse revision order.
795 795 Does not generate revs lower than stoprev.
796 796
797 797 See the documentation for ancestor.lazyancestors for more details."""
798 798
799 799 # first, make sure start revisions aren't filtered
800 800 revs = list(revs)
801 801 checkrev = self.node
802 802 for r in revs:
803 803 checkrev(r)
804 804 # and we're sure ancestors aren't filtered as well
805 805
806 806 if rustext is not None:
807 807 lazyancestors = rustext.ancestor.LazyAncestors
808 808 arg = self.index
809 809 elif util.safehasattr(parsers, 'rustlazyancestors'):
810 810 lazyancestors = ancestor.rustlazyancestors
811 811 arg = self.index
812 812 else:
813 813 lazyancestors = ancestor.lazyancestors
814 814 arg = self._uncheckedparentrevs
815 815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
816 816
817 817 def descendants(self, revs):
818 818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
819 819
820 820 def findcommonmissing(self, common=None, heads=None):
821 821 """Return a tuple of the ancestors of common and the ancestors of heads
822 822 that are not ancestors of common. In revset terminology, we return the
823 823 tuple:
824 824
825 825 ::common, (::heads) - (::common)
826 826
827 827 The list is sorted by revision number, meaning it is
828 828 topologically sorted.
829 829
830 830 'heads' and 'common' are both lists of node IDs. If heads is
831 831 not supplied, uses all of the revlog's heads. If common is not
832 832 supplied, uses nullid."""
833 833 if common is None:
834 834 common = [nullid]
835 835 if heads is None:
836 836 heads = self.heads()
837 837
838 838 common = [self.rev(n) for n in common]
839 839 heads = [self.rev(n) for n in heads]
840 840
841 841 # we want the ancestors, but inclusive
842 842 class lazyset(object):
843 843 def __init__(self, lazyvalues):
844 844 self.addedvalues = set()
845 845 self.lazyvalues = lazyvalues
846 846
847 847 def __contains__(self, value):
848 848 return value in self.addedvalues or value in self.lazyvalues
849 849
850 850 def __iter__(self):
851 851 added = self.addedvalues
852 852 for r in added:
853 853 yield r
854 854 for r in self.lazyvalues:
855 855 if not r in added:
856 856 yield r
857 857
858 858 def add(self, value):
859 859 self.addedvalues.add(value)
860 860
861 861 def update(self, values):
862 862 self.addedvalues.update(values)
863 863
864 864 has = lazyset(self.ancestors(common))
865 865 has.add(nullrev)
866 866 has.update(common)
867 867
868 868 # take all ancestors from heads that aren't in has
869 869 missing = set()
870 870 visit = collections.deque(r for r in heads if r not in has)
871 871 while visit:
872 872 r = visit.popleft()
873 873 if r in missing:
874 874 continue
875 875 else:
876 876 missing.add(r)
877 877 for p in self.parentrevs(r):
878 878 if p not in has:
879 879 visit.append(p)
880 880 missing = list(missing)
881 881 missing.sort()
882 882 return has, [self.node(miss) for miss in missing]
883 883
884 884 def incrementalmissingrevs(self, common=None):
885 885 """Return an object that can be used to incrementally compute the
886 886 revision numbers of the ancestors of arbitrary sets that are not
887 887 ancestors of common. This is an ancestor.incrementalmissingancestors
888 888 object.
889 889
890 890 'common' is a list of revision numbers. If common is not supplied, uses
891 891 nullrev.
892 892 """
893 893 if common is None:
894 894 common = [nullrev]
895 895
896 if rustext is not None:
897 return rustext.ancestor.MissingAncestors(self.index, common)
896 898 return ancestor.incrementalmissingancestors(self.parentrevs, common)
897 899
898 900 def findmissingrevs(self, common=None, heads=None):
899 901 """Return the revision numbers of the ancestors of heads that
900 902 are not ancestors of common.
901 903
902 904 More specifically, return a list of revision numbers corresponding to
903 905 nodes N such that every N satisfies the following constraints:
904 906
905 907 1. N is an ancestor of some node in 'heads'
906 908 2. N is not an ancestor of any node in 'common'
907 909
908 910 The list is sorted by revision number, meaning it is
909 911 topologically sorted.
910 912
911 913 'heads' and 'common' are both lists of revision numbers. If heads is
912 914 not supplied, uses all of the revlog's heads. If common is not
913 915 supplied, uses nullid."""
914 916 if common is None:
915 917 common = [nullrev]
916 918 if heads is None:
917 919 heads = self.headrevs()
918 920
919 921 inc = self.incrementalmissingrevs(common=common)
920 922 return inc.missingancestors(heads)
921 923
922 924 def findmissing(self, common=None, heads=None):
923 925 """Return the ancestors of heads that are not ancestors of common.
924 926
925 927 More specifically, return a list of nodes N such that every N
926 928 satisfies the following constraints:
927 929
928 930 1. N is an ancestor of some node in 'heads'
929 931 2. N is not an ancestor of any node in 'common'
930 932
931 933 The list is sorted by revision number, meaning it is
932 934 topologically sorted.
933 935
934 936 'heads' and 'common' are both lists of node IDs. If heads is
935 937 not supplied, uses all of the revlog's heads. If common is not
936 938 supplied, uses nullid."""
937 939 if common is None:
938 940 common = [nullid]
939 941 if heads is None:
940 942 heads = self.heads()
941 943
942 944 common = [self.rev(n) for n in common]
943 945 heads = [self.rev(n) for n in heads]
944 946
945 947 inc = self.incrementalmissingrevs(common=common)
946 948 return [self.node(r) for r in inc.missingancestors(heads)]
947 949
948 950 def nodesbetween(self, roots=None, heads=None):
949 951 """Return a topological path from 'roots' to 'heads'.
950 952
951 953 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
952 954 topologically sorted list of all nodes N that satisfy both of
953 955 these constraints:
954 956
955 957 1. N is a descendant of some node in 'roots'
956 958 2. N is an ancestor of some node in 'heads'
957 959
958 960 Every node is considered to be both a descendant and an ancestor
959 961 of itself, so every reachable node in 'roots' and 'heads' will be
960 962 included in 'nodes'.
961 963
962 964 'outroots' is the list of reachable nodes in 'roots', i.e., the
963 965 subset of 'roots' that is returned in 'nodes'. Likewise,
964 966 'outheads' is the subset of 'heads' that is also in 'nodes'.
965 967
966 968 'roots' and 'heads' are both lists of node IDs. If 'roots' is
967 969 unspecified, uses nullid as the only root. If 'heads' is
968 970 unspecified, uses list of all of the revlog's heads."""
969 971 nonodes = ([], [], [])
970 972 if roots is not None:
971 973 roots = list(roots)
972 974 if not roots:
973 975 return nonodes
974 976 lowestrev = min([self.rev(n) for n in roots])
975 977 else:
976 978 roots = [nullid] # Everybody's a descendant of nullid
977 979 lowestrev = nullrev
978 980 if (lowestrev == nullrev) and (heads is None):
979 981 # We want _all_ the nodes!
980 982 return ([self.node(r) for r in self], [nullid], list(self.heads()))
981 983 if heads is None:
982 984 # All nodes are ancestors, so the latest ancestor is the last
983 985 # node.
984 986 highestrev = len(self) - 1
985 987 # Set ancestors to None to signal that every node is an ancestor.
986 988 ancestors = None
987 989 # Set heads to an empty dictionary for later discovery of heads
988 990 heads = {}
989 991 else:
990 992 heads = list(heads)
991 993 if not heads:
992 994 return nonodes
993 995 ancestors = set()
994 996 # Turn heads into a dictionary so we can remove 'fake' heads.
995 997 # Also, later we will be using it to filter out the heads we can't
996 998 # find from roots.
997 999 heads = dict.fromkeys(heads, False)
998 1000 # Start at the top and keep marking parents until we're done.
999 1001 nodestotag = set(heads)
1000 1002 # Remember where the top was so we can use it as a limit later.
1001 1003 highestrev = max([self.rev(n) for n in nodestotag])
1002 1004 while nodestotag:
1003 1005 # grab a node to tag
1004 1006 n = nodestotag.pop()
1005 1007 # Never tag nullid
1006 1008 if n == nullid:
1007 1009 continue
1008 1010 # A node's revision number represents its place in a
1009 1011 # topologically sorted list of nodes.
1010 1012 r = self.rev(n)
1011 1013 if r >= lowestrev:
1012 1014 if n not in ancestors:
1013 1015 # If we are possibly a descendant of one of the roots
1014 1016 # and we haven't already been marked as an ancestor
1015 1017 ancestors.add(n) # Mark as ancestor
1016 1018 # Add non-nullid parents to list of nodes to tag.
1017 1019 nodestotag.update([p for p in self.parents(n) if
1018 1020 p != nullid])
1019 1021 elif n in heads: # We've seen it before, is it a fake head?
1020 1022 # So it is, real heads should not be the ancestors of
1021 1023 # any other heads.
1022 1024 heads.pop(n)
1023 1025 if not ancestors:
1024 1026 return nonodes
1025 1027 # Now that we have our set of ancestors, we want to remove any
1026 1028 # roots that are not ancestors.
1027 1029
1028 1030 # If one of the roots was nullid, everything is included anyway.
1029 1031 if lowestrev > nullrev:
1030 1032 # But, since we weren't, let's recompute the lowest rev to not
1031 1033 # include roots that aren't ancestors.
1032 1034
1033 1035 # Filter out roots that aren't ancestors of heads
1034 1036 roots = [root for root in roots if root in ancestors]
1035 1037 # Recompute the lowest revision
1036 1038 if roots:
1037 1039 lowestrev = min([self.rev(root) for root in roots])
1038 1040 else:
1039 1041 # No more roots? Return empty list
1040 1042 return nonodes
1041 1043 else:
1042 1044 # We are descending from nullid, and don't need to care about
1043 1045 # any other roots.
1044 1046 lowestrev = nullrev
1045 1047 roots = [nullid]
1046 1048 # Transform our roots list into a set.
1047 1049 descendants = set(roots)
1048 1050 # Also, keep the original roots so we can filter out roots that aren't
1049 1051 # 'real' roots (i.e. are descended from other roots).
1050 1052 roots = descendants.copy()
1051 1053 # Our topologically sorted list of output nodes.
1052 1054 orderedout = []
1053 1055 # Don't start at nullid since we don't want nullid in our output list,
1054 1056 # and if nullid shows up in descendants, empty parents will look like
1055 1057 # they're descendants.
1056 1058 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1057 1059 n = self.node(r)
1058 1060 isdescendant = False
1059 1061 if lowestrev == nullrev: # Everybody is a descendant of nullid
1060 1062 isdescendant = True
1061 1063 elif n in descendants:
1062 1064 # n is already a descendant
1063 1065 isdescendant = True
1064 1066 # This check only needs to be done here because all the roots
1065 1067 # will start being marked is descendants before the loop.
1066 1068 if n in roots:
1067 1069 # If n was a root, check if it's a 'real' root.
1068 1070 p = tuple(self.parents(n))
1069 1071 # If any of its parents are descendants, it's not a root.
1070 1072 if (p[0] in descendants) or (p[1] in descendants):
1071 1073 roots.remove(n)
1072 1074 else:
1073 1075 p = tuple(self.parents(n))
1074 1076 # A node is a descendant if either of its parents are
1075 1077 # descendants. (We seeded the dependents list with the roots
1076 1078 # up there, remember?)
1077 1079 if (p[0] in descendants) or (p[1] in descendants):
1078 1080 descendants.add(n)
1079 1081 isdescendant = True
1080 1082 if isdescendant and ((ancestors is None) or (n in ancestors)):
1081 1083 # Only include nodes that are both descendants and ancestors.
1082 1084 orderedout.append(n)
1083 1085 if (ancestors is not None) and (n in heads):
1084 1086 # We're trying to figure out which heads are reachable
1085 1087 # from roots.
1086 1088 # Mark this head as having been reached
1087 1089 heads[n] = True
1088 1090 elif ancestors is None:
1089 1091 # Otherwise, we're trying to discover the heads.
1090 1092 # Assume this is a head because if it isn't, the next step
1091 1093 # will eventually remove it.
1092 1094 heads[n] = True
1093 1095 # But, obviously its parents aren't.
1094 1096 for p in self.parents(n):
1095 1097 heads.pop(p, None)
1096 1098 heads = [head for head, flag in heads.iteritems() if flag]
1097 1099 roots = list(roots)
1098 1100 assert orderedout
1099 1101 assert roots
1100 1102 assert heads
1101 1103 return (orderedout, roots, heads)
1102 1104
1103 1105 def headrevs(self):
1104 1106 try:
1105 1107 return self.index.headrevs()
1106 1108 except AttributeError:
1107 1109 return self._headrevs()
1108 1110
1109 1111 def computephases(self, roots):
1110 1112 return self.index.computephasesmapsets(roots)
1111 1113
1112 1114 def _headrevs(self):
1113 1115 count = len(self)
1114 1116 if not count:
1115 1117 return [nullrev]
1116 1118 # we won't iter over filtered rev so nobody is a head at start
1117 1119 ishead = [0] * (count + 1)
1118 1120 index = self.index
1119 1121 for r in self:
1120 1122 ishead[r] = 1 # I may be an head
1121 1123 e = index[r]
1122 1124 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1123 1125 return [r for r, val in enumerate(ishead) if val]
1124 1126
1125 1127 def heads(self, start=None, stop=None):
1126 1128 """return the list of all nodes that have no children
1127 1129
1128 1130 if start is specified, only heads that are descendants of
1129 1131 start will be returned
1130 1132 if stop is specified, it will consider all the revs from stop
1131 1133 as if they had no children
1132 1134 """
1133 1135 if start is None and stop is None:
1134 1136 if not len(self):
1135 1137 return [nullid]
1136 1138 return [self.node(r) for r in self.headrevs()]
1137 1139
1138 1140 if start is None:
1139 1141 start = nullrev
1140 1142 else:
1141 1143 start = self.rev(start)
1142 1144
1143 1145 stoprevs = set(self.rev(n) for n in stop or [])
1144 1146
1145 1147 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1146 1148 stoprevs=stoprevs)
1147 1149
1148 1150 return [self.node(rev) for rev in revs]
1149 1151
1150 1152 def children(self, node):
1151 1153 """find the children of a given node"""
1152 1154 c = []
1153 1155 p = self.rev(node)
1154 1156 for r in self.revs(start=p + 1):
1155 1157 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1156 1158 if prevs:
1157 1159 for pr in prevs:
1158 1160 if pr == p:
1159 1161 c.append(self.node(r))
1160 1162 elif p == nullrev:
1161 1163 c.append(self.node(r))
1162 1164 return c
1163 1165
1164 1166 def commonancestorsheads(self, a, b):
1165 1167 """calculate all the heads of the common ancestors of nodes a and b"""
1166 1168 a, b = self.rev(a), self.rev(b)
1167 1169 ancs = self._commonancestorsheads(a, b)
1168 1170 return pycompat.maplist(self.node, ancs)
1169 1171
1170 1172 def _commonancestorsheads(self, *revs):
1171 1173 """calculate all the heads of the common ancestors of revs"""
1172 1174 try:
1173 1175 ancs = self.index.commonancestorsheads(*revs)
1174 1176 except (AttributeError, OverflowError): # C implementation failed
1175 1177 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1176 1178 return ancs
1177 1179
1178 1180 def isancestor(self, a, b):
1179 1181 """return True if node a is an ancestor of node b
1180 1182
1181 1183 A revision is considered an ancestor of itself."""
1182 1184 a, b = self.rev(a), self.rev(b)
1183 1185 return self.isancestorrev(a, b)
1184 1186
1185 1187 def isancestorrev(self, a, b):
1186 1188 """return True if revision a is an ancestor of revision b
1187 1189
1188 1190 A revision is considered an ancestor of itself.
1189 1191
1190 1192 The implementation of this is trivial but the use of
1191 1193 commonancestorsheads is not."""
1192 1194 if a == nullrev:
1193 1195 return True
1194 1196 elif a == b:
1195 1197 return True
1196 1198 elif a > b:
1197 1199 return False
1198 1200 return a in self._commonancestorsheads(a, b)
1199 1201
1200 1202 def ancestor(self, a, b):
1201 1203 """calculate the "best" common ancestor of nodes a and b"""
1202 1204
1203 1205 a, b = self.rev(a), self.rev(b)
1204 1206 try:
1205 1207 ancs = self.index.ancestors(a, b)
1206 1208 except (AttributeError, OverflowError):
1207 1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1208 1210 if ancs:
1209 1211 # choose a consistent winner when there's a tie
1210 1212 return min(map(self.node, ancs))
1211 1213 return nullid
1212 1214
1213 1215 def _match(self, id):
1214 1216 if isinstance(id, int):
1215 1217 # rev
1216 1218 return self.node(id)
1217 1219 if len(id) == 20:
1218 1220 # possibly a binary node
1219 1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1220 1222 try:
1221 1223 node = id
1222 1224 self.rev(node) # quick search the index
1223 1225 return node
1224 1226 except error.LookupError:
1225 1227 pass # may be partial hex id
1226 1228 try:
1227 1229 # str(rev)
1228 1230 rev = int(id)
1229 1231 if "%d" % rev != id:
1230 1232 raise ValueError
1231 1233 if rev < 0:
1232 1234 rev = len(self) + rev
1233 1235 if rev < 0 or rev >= len(self):
1234 1236 raise ValueError
1235 1237 return self.node(rev)
1236 1238 except (ValueError, OverflowError):
1237 1239 pass
1238 1240 if len(id) == 40:
1239 1241 try:
1240 1242 # a full hex nodeid?
1241 1243 node = bin(id)
1242 1244 self.rev(node)
1243 1245 return node
1244 1246 except (TypeError, error.LookupError):
1245 1247 pass
1246 1248
1247 1249 def _partialmatch(self, id):
1248 1250 # we don't care wdirfilenodeids as they should be always full hash
1249 1251 maybewdir = wdirhex.startswith(id)
1250 1252 try:
1251 1253 partial = self.index.partialmatch(id)
1252 1254 if partial and self.hasnode(partial):
1253 1255 if maybewdir:
1254 1256 # single 'ff...' match in radix tree, ambiguous with wdir
1255 1257 raise error.RevlogError
1256 1258 return partial
1257 1259 if maybewdir:
1258 1260 # no 'ff...' match in radix tree, wdir identified
1259 1261 raise error.WdirUnsupported
1260 1262 return None
1261 1263 except error.RevlogError:
1262 1264 # parsers.c radix tree lookup gave multiple matches
1263 1265 # fast path: for unfiltered changelog, radix tree is accurate
1264 1266 if not getattr(self, 'filteredrevs', None):
1265 1267 raise error.AmbiguousPrefixLookupError(
1266 1268 id, self.indexfile, _('ambiguous identifier'))
1267 1269 # fall through to slow path that filters hidden revisions
1268 1270 except (AttributeError, ValueError):
1269 1271 # we are pure python, or key was too short to search radix tree
1270 1272 pass
1271 1273
1272 1274 if id in self._pcache:
1273 1275 return self._pcache[id]
1274 1276
1275 1277 if len(id) <= 40:
1276 1278 try:
1277 1279 # hex(node)[:...]
1278 1280 l = len(id) // 2 # grab an even number of digits
1279 1281 prefix = bin(id[:l * 2])
1280 1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1281 1283 nl = [n for n in nl if hex(n).startswith(id) and
1282 1284 self.hasnode(n)]
1283 1285 if nullhex.startswith(id):
1284 1286 nl.append(nullid)
1285 1287 if len(nl) > 0:
1286 1288 if len(nl) == 1 and not maybewdir:
1287 1289 self._pcache[id] = nl[0]
1288 1290 return nl[0]
1289 1291 raise error.AmbiguousPrefixLookupError(
1290 1292 id, self.indexfile, _('ambiguous identifier'))
1291 1293 if maybewdir:
1292 1294 raise error.WdirUnsupported
1293 1295 return None
1294 1296 except TypeError:
1295 1297 pass
1296 1298
1297 1299 def lookup(self, id):
1298 1300 """locate a node based on:
1299 1301 - revision number or str(revision number)
1300 1302 - nodeid or subset of hex nodeid
1301 1303 """
1302 1304 n = self._match(id)
1303 1305 if n is not None:
1304 1306 return n
1305 1307 n = self._partialmatch(id)
1306 1308 if n:
1307 1309 return n
1308 1310
1309 1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1310 1312
1311 1313 def shortest(self, node, minlength=1):
1312 1314 """Find the shortest unambiguous prefix that matches node."""
1313 1315 def isvalid(prefix):
1314 1316 try:
1315 1317 node = self._partialmatch(prefix)
1316 1318 except error.AmbiguousPrefixLookupError:
1317 1319 return False
1318 1320 except error.WdirUnsupported:
1319 1321 # single 'ff...' match
1320 1322 return True
1321 1323 if node is None:
1322 1324 raise error.LookupError(node, self.indexfile, _('no node'))
1323 1325 return True
1324 1326
1325 1327 def maybewdir(prefix):
1326 1328 return all(c == 'f' for c in prefix)
1327 1329
1328 1330 hexnode = hex(node)
1329 1331
1330 1332 def disambiguate(hexnode, minlength):
1331 1333 """Disambiguate against wdirid."""
1332 1334 for length in range(minlength, 41):
1333 1335 prefix = hexnode[:length]
1334 1336 if not maybewdir(prefix):
1335 1337 return prefix
1336 1338
1337 1339 if not getattr(self, 'filteredrevs', None):
1338 1340 try:
1339 1341 length = max(self.index.shortest(node), minlength)
1340 1342 return disambiguate(hexnode, length)
1341 1343 except error.RevlogError:
1342 1344 if node != wdirid:
1343 1345 raise error.LookupError(node, self.indexfile, _('no node'))
1344 1346 except AttributeError:
1345 1347 # Fall through to pure code
1346 1348 pass
1347 1349
1348 1350 if node == wdirid:
1349 1351 for length in range(minlength, 41):
1350 1352 prefix = hexnode[:length]
1351 1353 if isvalid(prefix):
1352 1354 return prefix
1353 1355
1354 1356 for length in range(minlength, 41):
1355 1357 prefix = hexnode[:length]
1356 1358 if isvalid(prefix):
1357 1359 return disambiguate(hexnode, length)
1358 1360
1359 1361 def cmp(self, node, text):
1360 1362 """compare text with a given file revision
1361 1363
1362 1364 returns True if text is different than what is stored.
1363 1365 """
1364 1366 p1, p2 = self.parents(node)
1365 1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1366 1368
1367 1369 def _cachesegment(self, offset, data):
1368 1370 """Add a segment to the revlog cache.
1369 1371
1370 1372 Accepts an absolute offset and the data that is at that location.
1371 1373 """
1372 1374 o, d = self._chunkcache
1373 1375 # try to add to existing cache
1374 1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1375 1377 self._chunkcache = o, d + data
1376 1378 else:
1377 1379 self._chunkcache = offset, data
1378 1380
1379 1381 def _readsegment(self, offset, length, df=None):
1380 1382 """Load a segment of raw data from the revlog.
1381 1383
1382 1384 Accepts an absolute offset, length to read, and an optional existing
1383 1385 file handle to read from.
1384 1386
1385 1387 If an existing file handle is passed, it will be seeked and the
1386 1388 original seek position will NOT be restored.
1387 1389
1388 1390 Returns a str or buffer of raw byte data.
1389 1391
1390 1392 Raises if the requested number of bytes could not be read.
1391 1393 """
1392 1394 # Cache data both forward and backward around the requested
1393 1395 # data, in a fixed size window. This helps speed up operations
1394 1396 # involving reading the revlog backwards.
1395 1397 cachesize = self._chunkcachesize
1396 1398 realoffset = offset & ~(cachesize - 1)
1397 1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1398 1400 - realoffset)
1399 1401 with self._datareadfp(df) as df:
1400 1402 df.seek(realoffset)
1401 1403 d = df.read(reallength)
1402 1404
1403 1405 self._cachesegment(realoffset, d)
1404 1406 if offset != realoffset or reallength != length:
1405 1407 startoffset = offset - realoffset
1406 1408 if len(d) - startoffset < length:
1407 1409 raise error.RevlogError(
1408 1410 _('partial read of revlog %s; expected %d bytes from '
1409 1411 'offset %d, got %d') %
1410 1412 (self.indexfile if self._inline else self.datafile,
1411 1413 length, realoffset, len(d) - startoffset))
1412 1414
1413 1415 return util.buffer(d, startoffset, length)
1414 1416
1415 1417 if len(d) < length:
1416 1418 raise error.RevlogError(
1417 1419 _('partial read of revlog %s; expected %d bytes from offset '
1418 1420 '%d, got %d') %
1419 1421 (self.indexfile if self._inline else self.datafile,
1420 1422 length, offset, len(d)))
1421 1423
1422 1424 return d
1423 1425
1424 1426 def _getsegment(self, offset, length, df=None):
1425 1427 """Obtain a segment of raw data from the revlog.
1426 1428
1427 1429 Accepts an absolute offset, length of bytes to obtain, and an
1428 1430 optional file handle to the already-opened revlog. If the file
1429 1431 handle is used, it's original seek position will not be preserved.
1430 1432
1431 1433 Requests for data may be returned from a cache.
1432 1434
1433 1435 Returns a str or a buffer instance of raw byte data.
1434 1436 """
1435 1437 o, d = self._chunkcache
1436 1438 l = len(d)
1437 1439
1438 1440 # is it in the cache?
1439 1441 cachestart = offset - o
1440 1442 cacheend = cachestart + length
1441 1443 if cachestart >= 0 and cacheend <= l:
1442 1444 if cachestart == 0 and cacheend == l:
1443 1445 return d # avoid a copy
1444 1446 return util.buffer(d, cachestart, cacheend - cachestart)
1445 1447
1446 1448 return self._readsegment(offset, length, df=df)
1447 1449
1448 1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1449 1451 """Obtain a segment of raw data corresponding to a range of revisions.
1450 1452
1451 1453 Accepts the start and end revisions and an optional already-open
1452 1454 file handle to be used for reading. If the file handle is read, its
1453 1455 seek position will not be preserved.
1454 1456
1455 1457 Requests for data may be satisfied by a cache.
1456 1458
1457 1459 Returns a 2-tuple of (offset, data) for the requested range of
1458 1460 revisions. Offset is the integer offset from the beginning of the
1459 1461 revlog and data is a str or buffer of the raw byte data.
1460 1462
1461 1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1462 1464 to determine where each revision's data begins and ends.
1463 1465 """
1464 1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1465 1467 # (functions are expensive).
1466 1468 index = self.index
1467 1469 istart = index[startrev]
1468 1470 start = int(istart[0] >> 16)
1469 1471 if startrev == endrev:
1470 1472 end = start + istart[1]
1471 1473 else:
1472 1474 iend = index[endrev]
1473 1475 end = int(iend[0] >> 16) + iend[1]
1474 1476
1475 1477 if self._inline:
1476 1478 start += (startrev + 1) * self._io.size
1477 1479 end += (endrev + 1) * self._io.size
1478 1480 length = end - start
1479 1481
1480 1482 return start, self._getsegment(start, length, df=df)
1481 1483
1482 1484 def _chunk(self, rev, df=None):
1483 1485 """Obtain a single decompressed chunk for a revision.
1484 1486
1485 1487 Accepts an integer revision and an optional already-open file handle
1486 1488 to be used for reading. If used, the seek position of the file will not
1487 1489 be preserved.
1488 1490
1489 1491 Returns a str holding uncompressed data for the requested revision.
1490 1492 """
1491 1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1492 1494
1493 1495 def _chunks(self, revs, df=None, targetsize=None):
1494 1496 """Obtain decompressed chunks for the specified revisions.
1495 1497
1496 1498 Accepts an iterable of numeric revisions that are assumed to be in
1497 1499 ascending order. Also accepts an optional already-open file handle
1498 1500 to be used for reading. If used, the seek position of the file will
1499 1501 not be preserved.
1500 1502
1501 1503 This function is similar to calling ``self._chunk()`` multiple times,
1502 1504 but is faster.
1503 1505
1504 1506 Returns a list with decompressed data for each requested revision.
1505 1507 """
1506 1508 if not revs:
1507 1509 return []
1508 1510 start = self.start
1509 1511 length = self.length
1510 1512 inline = self._inline
1511 1513 iosize = self._io.size
1512 1514 buffer = util.buffer
1513 1515
1514 1516 l = []
1515 1517 ladd = l.append
1516 1518
1517 1519 if not self._withsparseread:
1518 1520 slicedchunks = (revs,)
1519 1521 else:
1520 1522 slicedchunks = deltautil.slicechunk(self, revs,
1521 1523 targetsize=targetsize)
1522 1524
1523 1525 for revschunk in slicedchunks:
1524 1526 firstrev = revschunk[0]
1525 1527 # Skip trailing revisions with empty diff
1526 1528 for lastrev in revschunk[::-1]:
1527 1529 if length(lastrev) != 0:
1528 1530 break
1529 1531
1530 1532 try:
1531 1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1532 1534 except OverflowError:
1533 1535 # issue4215 - we can't cache a run of chunks greater than
1534 1536 # 2G on Windows
1535 1537 return [self._chunk(rev, df=df) for rev in revschunk]
1536 1538
1537 1539 decomp = self.decompress
1538 1540 for rev in revschunk:
1539 1541 chunkstart = start(rev)
1540 1542 if inline:
1541 1543 chunkstart += (rev + 1) * iosize
1542 1544 chunklength = length(rev)
1543 1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1544 1546
1545 1547 return l
1546 1548
1547 1549 def _chunkclear(self):
1548 1550 """Clear the raw chunk cache."""
1549 1551 self._chunkcache = (0, '')
1550 1552
1551 1553 def deltaparent(self, rev):
1552 1554 """return deltaparent of the given revision"""
1553 1555 base = self.index[rev][3]
1554 1556 if base == rev:
1555 1557 return nullrev
1556 1558 elif self._generaldelta:
1557 1559 return base
1558 1560 else:
1559 1561 return rev - 1
1560 1562
1561 1563 def issnapshot(self, rev):
1562 1564 """tells whether rev is a snapshot
1563 1565 """
1564 1566 if not self._sparserevlog:
1565 1567 return self.deltaparent(rev) == nullrev
1566 1568 elif util.safehasattr(self.index, 'issnapshot'):
1567 1569 # directly assign the method to cache the testing and access
1568 1570 self.issnapshot = self.index.issnapshot
1569 1571 return self.issnapshot(rev)
1570 1572 if rev == nullrev:
1571 1573 return True
1572 1574 entry = self.index[rev]
1573 1575 base = entry[3]
1574 1576 if base == rev:
1575 1577 return True
1576 1578 if base == nullrev:
1577 1579 return True
1578 1580 p1 = entry[5]
1579 1581 p2 = entry[6]
1580 1582 if base == p1 or base == p2:
1581 1583 return False
1582 1584 return self.issnapshot(base)
1583 1585
1584 1586 def snapshotdepth(self, rev):
1585 1587 """number of snapshot in the chain before this one"""
1586 1588 if not self.issnapshot(rev):
1587 1589 raise error.ProgrammingError('revision %d not a snapshot')
1588 1590 return len(self._deltachain(rev)[0]) - 1
1589 1591
1590 1592 def revdiff(self, rev1, rev2):
1591 1593 """return or calculate a delta between two revisions
1592 1594
1593 1595 The delta calculated is in binary form and is intended to be written to
1594 1596 revlog data directly. So this function needs raw revision data.
1595 1597 """
1596 1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1597 1599 return bytes(self._chunk(rev2))
1598 1600
1599 1601 return mdiff.textdiff(self.revision(rev1, raw=True),
1600 1602 self.revision(rev2, raw=True))
1601 1603
1602 1604 def revision(self, nodeorrev, _df=None, raw=False):
1603 1605 """return an uncompressed revision of a given node or revision
1604 1606 number.
1605 1607
1606 1608 _df - an existing file handle to read from. (internal-only)
1607 1609 raw - an optional argument specifying if the revision data is to be
1608 1610 treated as raw data when applying flag transforms. 'raw' should be set
1609 1611 to True when generating changegroups or in debug commands.
1610 1612 """
1611 1613 if isinstance(nodeorrev, int):
1612 1614 rev = nodeorrev
1613 1615 node = self.node(rev)
1614 1616 else:
1615 1617 node = nodeorrev
1616 1618 rev = None
1617 1619
1618 1620 cachedrev = None
1619 1621 flags = None
1620 1622 rawtext = None
1621 1623 if node == nullid:
1622 1624 return ""
1623 1625 if self._revisioncache:
1624 1626 if self._revisioncache[0] == node:
1625 1627 # _cache only stores rawtext
1626 1628 if raw:
1627 1629 return self._revisioncache[2]
1628 1630 # duplicated, but good for perf
1629 1631 if rev is None:
1630 1632 rev = self.rev(node)
1631 1633 if flags is None:
1632 1634 flags = self.flags(rev)
1633 1635 # no extra flags set, no flag processor runs, text = rawtext
1634 1636 if flags == REVIDX_DEFAULT_FLAGS:
1635 1637 return self._revisioncache[2]
1636 1638 # rawtext is reusable. need to run flag processor
1637 1639 rawtext = self._revisioncache[2]
1638 1640
1639 1641 cachedrev = self._revisioncache[1]
1640 1642
1641 1643 # look up what we need to read
1642 1644 if rawtext is None:
1643 1645 if rev is None:
1644 1646 rev = self.rev(node)
1645 1647
1646 1648 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1647 1649 if stopped:
1648 1650 rawtext = self._revisioncache[2]
1649 1651
1650 1652 # drop cache to save memory
1651 1653 self._revisioncache = None
1652 1654
1653 1655 targetsize = None
1654 1656 rawsize = self.index[rev][2]
1655 1657 if 0 <= rawsize:
1656 1658 targetsize = 4 * rawsize
1657 1659
1658 1660 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1659 1661 if rawtext is None:
1660 1662 rawtext = bytes(bins[0])
1661 1663 bins = bins[1:]
1662 1664
1663 1665 rawtext = mdiff.patches(rawtext, bins)
1664 1666 self._revisioncache = (node, rev, rawtext)
1665 1667
1666 1668 if flags is None:
1667 1669 if rev is None:
1668 1670 rev = self.rev(node)
1669 1671 flags = self.flags(rev)
1670 1672
1671 1673 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1672 1674 if validatehash:
1673 1675 self.checkhash(text, node, rev=rev)
1674 1676
1675 1677 return text
1676 1678
1677 1679 def hash(self, text, p1, p2):
1678 1680 """Compute a node hash.
1679 1681
1680 1682 Available as a function so that subclasses can replace the hash
1681 1683 as needed.
1682 1684 """
1683 1685 return storageutil.hashrevisionsha1(text, p1, p2)
1684 1686
1685 1687 def _processflags(self, text, flags, operation, raw=False):
1686 1688 """Inspect revision data flags and applies transforms defined by
1687 1689 registered flag processors.
1688 1690
1689 1691 ``text`` - the revision data to process
1690 1692 ``flags`` - the revision flags
1691 1693 ``operation`` - the operation being performed (read or write)
1692 1694 ``raw`` - an optional argument describing if the raw transform should be
1693 1695 applied.
1694 1696
1695 1697 This method processes the flags in the order (or reverse order if
1696 1698 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1697 1699 flag processors registered for present flags. The order of flags defined
1698 1700 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1699 1701
1700 1702 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1701 1703 processed text and ``validatehash`` is a bool indicating whether the
1702 1704 returned text should be checked for hash integrity.
1703 1705
1704 1706 Note: If the ``raw`` argument is set, it has precedence over the
1705 1707 operation and will only update the value of ``validatehash``.
1706 1708 """
1707 1709 # fast path: no flag processors will run
1708 1710 if flags == 0:
1709 1711 return text, True
1710 1712 if not operation in ('read', 'write'):
1711 1713 raise error.ProgrammingError(_("invalid '%s' operation") %
1712 1714 operation)
1713 1715 # Check all flags are known.
1714 1716 if flags & ~REVIDX_KNOWN_FLAGS:
1715 1717 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1716 1718 (flags & ~REVIDX_KNOWN_FLAGS))
1717 1719 validatehash = True
1718 1720 # Depending on the operation (read or write), the order might be
1719 1721 # reversed due to non-commutative transforms.
1720 1722 orderedflags = REVIDX_FLAGS_ORDER
1721 1723 if operation == 'write':
1722 1724 orderedflags = reversed(orderedflags)
1723 1725
1724 1726 for flag in orderedflags:
1725 1727 # If a flagprocessor has been registered for a known flag, apply the
1726 1728 # related operation transform and update result tuple.
1727 1729 if flag & flags:
1728 1730 vhash = True
1729 1731
1730 1732 if flag not in self._flagprocessors:
1731 1733 message = _("missing processor for flag '%#x'") % (flag)
1732 1734 raise error.RevlogError(message)
1733 1735
1734 1736 processor = self._flagprocessors[flag]
1735 1737 if processor is not None:
1736 1738 readtransform, writetransform, rawtransform = processor
1737 1739
1738 1740 if raw:
1739 1741 vhash = rawtransform(self, text)
1740 1742 elif operation == 'read':
1741 1743 text, vhash = readtransform(self, text)
1742 1744 else: # write operation
1743 1745 text, vhash = writetransform(self, text)
1744 1746 validatehash = validatehash and vhash
1745 1747
1746 1748 return text, validatehash
1747 1749
1748 1750 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1749 1751 """Check node hash integrity.
1750 1752
1751 1753 Available as a function so that subclasses can extend hash mismatch
1752 1754 behaviors as needed.
1753 1755 """
1754 1756 try:
1755 1757 if p1 is None and p2 is None:
1756 1758 p1, p2 = self.parents(node)
1757 1759 if node != self.hash(text, p1, p2):
1758 1760 # Clear the revision cache on hash failure. The revision cache
1759 1761 # only stores the raw revision and clearing the cache does have
1760 1762 # the side-effect that we won't have a cache hit when the raw
1761 1763 # revision data is accessed. But this case should be rare and
1762 1764 # it is extra work to teach the cache about the hash
1763 1765 # verification state.
1764 1766 if self._revisioncache and self._revisioncache[0] == node:
1765 1767 self._revisioncache = None
1766 1768
1767 1769 revornode = rev
1768 1770 if revornode is None:
1769 1771 revornode = templatefilters.short(hex(node))
1770 1772 raise error.RevlogError(_("integrity check failed on %s:%s")
1771 1773 % (self.indexfile, pycompat.bytestr(revornode)))
1772 1774 except error.RevlogError:
1773 1775 if self._censorable and storageutil.iscensoredtext(text):
1774 1776 raise error.CensoredNodeError(self.indexfile, node, text)
1775 1777 raise
1776 1778
1777 1779 def _enforceinlinesize(self, tr, fp=None):
1778 1780 """Check if the revlog is too big for inline and convert if so.
1779 1781
1780 1782 This should be called after revisions are added to the revlog. If the
1781 1783 revlog has grown too large to be an inline revlog, it will convert it
1782 1784 to use multiple index and data files.
1783 1785 """
1784 1786 tiprev = len(self) - 1
1785 1787 if (not self._inline or
1786 1788 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1787 1789 return
1788 1790
1789 1791 trinfo = tr.find(self.indexfile)
1790 1792 if trinfo is None:
1791 1793 raise error.RevlogError(_("%s not found in the transaction")
1792 1794 % self.indexfile)
1793 1795
1794 1796 trindex = trinfo[2]
1795 1797 if trindex is not None:
1796 1798 dataoff = self.start(trindex)
1797 1799 else:
1798 1800 # revlog was stripped at start of transaction, use all leftover data
1799 1801 trindex = len(self) - 1
1800 1802 dataoff = self.end(tiprev)
1801 1803
1802 1804 tr.add(self.datafile, dataoff)
1803 1805
1804 1806 if fp:
1805 1807 fp.flush()
1806 1808 fp.close()
1807 1809 # We can't use the cached file handle after close(). So prevent
1808 1810 # its usage.
1809 1811 self._writinghandles = None
1810 1812
1811 1813 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1812 1814 for r in self:
1813 1815 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1814 1816
1815 1817 with self._indexfp('w') as fp:
1816 1818 self.version &= ~FLAG_INLINE_DATA
1817 1819 self._inline = False
1818 1820 io = self._io
1819 1821 for i in self:
1820 1822 e = io.packentry(self.index[i], self.node, self.version, i)
1821 1823 fp.write(e)
1822 1824
1823 1825 # the temp file replace the real index when we exit the context
1824 1826 # manager
1825 1827
1826 1828 tr.replace(self.indexfile, trindex * self._io.size)
1827 1829 self._chunkclear()
1828 1830
1829 1831 def _nodeduplicatecallback(self, transaction, node):
1830 1832 """called when trying to add a node already stored.
1831 1833 """
1832 1834
1833 1835 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1834 1836 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1835 1837 """add a revision to the log
1836 1838
1837 1839 text - the revision data to add
1838 1840 transaction - the transaction object used for rollback
1839 1841 link - the linkrev data to add
1840 1842 p1, p2 - the parent nodeids of the revision
1841 1843 cachedelta - an optional precomputed delta
1842 1844 node - nodeid of revision; typically node is not specified, and it is
1843 1845 computed by default as hash(text, p1, p2), however subclasses might
1844 1846 use different hashing method (and override checkhash() in such case)
1845 1847 flags - the known flags to set on the revision
1846 1848 deltacomputer - an optional deltacomputer instance shared between
1847 1849 multiple calls
1848 1850 """
1849 1851 if link == nullrev:
1850 1852 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1851 1853 % self.indexfile)
1852 1854
1853 1855 if flags:
1854 1856 node = node or self.hash(text, p1, p2)
1855 1857
1856 1858 rawtext, validatehash = self._processflags(text, flags, 'write')
1857 1859
1858 1860 # If the flag processor modifies the revision data, ignore any provided
1859 1861 # cachedelta.
1860 1862 if rawtext != text:
1861 1863 cachedelta = None
1862 1864
1863 1865 if len(rawtext) > _maxentrysize:
1864 1866 raise error.RevlogError(
1865 1867 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1866 1868 % (self.indexfile, len(rawtext)))
1867 1869
1868 1870 node = node or self.hash(rawtext, p1, p2)
1869 1871 if node in self.nodemap:
1870 1872 return node
1871 1873
1872 1874 if validatehash:
1873 1875 self.checkhash(rawtext, node, p1=p1, p2=p2)
1874 1876
1875 1877 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1876 1878 flags, cachedelta=cachedelta,
1877 1879 deltacomputer=deltacomputer)
1878 1880
1879 1881 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1880 1882 cachedelta=None, deltacomputer=None):
1881 1883 """add a raw revision with known flags, node and parents
1882 1884 useful when reusing a revision not stored in this revlog (ex: received
1883 1885 over wire, or read from an external bundle).
1884 1886 """
1885 1887 dfh = None
1886 1888 if not self._inline:
1887 1889 dfh = self._datafp("a+")
1888 1890 ifh = self._indexfp("a+")
1889 1891 try:
1890 1892 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1891 1893 flags, cachedelta, ifh, dfh,
1892 1894 deltacomputer=deltacomputer)
1893 1895 finally:
1894 1896 if dfh:
1895 1897 dfh.close()
1896 1898 ifh.close()
1897 1899
1898 1900 def compress(self, data):
1899 1901 """Generate a possibly-compressed representation of data."""
1900 1902 if not data:
1901 1903 return '', data
1902 1904
1903 1905 compressed = self._compressor.compress(data)
1904 1906
1905 1907 if compressed:
1906 1908 # The revlog compressor added the header in the returned data.
1907 1909 return '', compressed
1908 1910
1909 1911 if data[0:1] == '\0':
1910 1912 return '', data
1911 1913 return 'u', data
1912 1914
1913 1915 def decompress(self, data):
1914 1916 """Decompress a revlog chunk.
1915 1917
1916 1918 The chunk is expected to begin with a header identifying the
1917 1919 format type so it can be routed to an appropriate decompressor.
1918 1920 """
1919 1921 if not data:
1920 1922 return data
1921 1923
1922 1924 # Revlogs are read much more frequently than they are written and many
1923 1925 # chunks only take microseconds to decompress, so performance is
1924 1926 # important here.
1925 1927 #
1926 1928 # We can make a few assumptions about revlogs:
1927 1929 #
1928 1930 # 1) the majority of chunks will be compressed (as opposed to inline
1929 1931 # raw data).
1930 1932 # 2) decompressing *any* data will likely by at least 10x slower than
1931 1933 # returning raw inline data.
1932 1934 # 3) we want to prioritize common and officially supported compression
1933 1935 # engines
1934 1936 #
1935 1937 # It follows that we want to optimize for "decompress compressed data
1936 1938 # when encoded with common and officially supported compression engines"
1937 1939 # case over "raw data" and "data encoded by less common or non-official
1938 1940 # compression engines." That is why we have the inline lookup first
1939 1941 # followed by the compengines lookup.
1940 1942 #
1941 1943 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1942 1944 # compressed chunks. And this matters for changelog and manifest reads.
1943 1945 t = data[0:1]
1944 1946
1945 1947 if t == 'x':
1946 1948 try:
1947 1949 return _zlibdecompress(data)
1948 1950 except zlib.error as e:
1949 1951 raise error.RevlogError(_('revlog decompress error: %s') %
1950 1952 stringutil.forcebytestr(e))
1951 1953 # '\0' is more common than 'u' so it goes first.
1952 1954 elif t == '\0':
1953 1955 return data
1954 1956 elif t == 'u':
1955 1957 return util.buffer(data, 1)
1956 1958
1957 1959 try:
1958 1960 compressor = self._decompressors[t]
1959 1961 except KeyError:
1960 1962 try:
1961 1963 engine = util.compengines.forrevlogheader(t)
1962 1964 compressor = engine.revlogcompressor()
1963 1965 self._decompressors[t] = compressor
1964 1966 except KeyError:
1965 1967 raise error.RevlogError(_('unknown compression type %r') % t)
1966 1968
1967 1969 return compressor.decompress(data)
1968 1970
1969 1971 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1970 1972 cachedelta, ifh, dfh, alwayscache=False,
1971 1973 deltacomputer=None):
1972 1974 """internal function to add revisions to the log
1973 1975
1974 1976 see addrevision for argument descriptions.
1975 1977
1976 1978 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1977 1979
1978 1980 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1979 1981 be used.
1980 1982
1981 1983 invariants:
1982 1984 - rawtext is optional (can be None); if not set, cachedelta must be set.
1983 1985 if both are set, they must correspond to each other.
1984 1986 """
1985 1987 if node == nullid:
1986 1988 raise error.RevlogError(_("%s: attempt to add null revision") %
1987 1989 self.indexfile)
1988 1990 if node == wdirid or node in wdirfilenodeids:
1989 1991 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1990 1992 self.indexfile)
1991 1993
1992 1994 if self._inline:
1993 1995 fh = ifh
1994 1996 else:
1995 1997 fh = dfh
1996 1998
1997 1999 btext = [rawtext]
1998 2000
1999 2001 curr = len(self)
2000 2002 prev = curr - 1
2001 2003 offset = self.end(prev)
2002 2004 p1r, p2r = self.rev(p1), self.rev(p2)
2003 2005
2004 2006 # full versions are inserted when the needed deltas
2005 2007 # become comparable to the uncompressed text
2006 2008 if rawtext is None:
2007 2009 # need rawtext size, before changed by flag processors, which is
2008 2010 # the non-raw size. use revlog explicitly to avoid filelog's extra
2009 2011 # logic that might remove metadata size.
2010 2012 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2011 2013 cachedelta[1])
2012 2014 else:
2013 2015 textlen = len(rawtext)
2014 2016
2015 2017 if deltacomputer is None:
2016 2018 deltacomputer = deltautil.deltacomputer(self)
2017 2019
2018 2020 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2019 2021
2020 2022 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2021 2023
2022 2024 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2023 2025 deltainfo.base, link, p1r, p2r, node)
2024 2026 self.index.append(e)
2025 2027 self.nodemap[node] = curr
2026 2028
2027 2029 # Reset the pure node cache start lookup offset to account for new
2028 2030 # revision.
2029 2031 if self._nodepos is not None:
2030 2032 self._nodepos = curr
2031 2033
2032 2034 entry = self._io.packentry(e, self.node, self.version, curr)
2033 2035 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2034 2036 link, offset)
2035 2037
2036 2038 rawtext = btext[0]
2037 2039
2038 2040 if alwayscache and rawtext is None:
2039 2041 rawtext = deltacomputer.buildtext(revinfo, fh)
2040 2042
2041 2043 if type(rawtext) == bytes: # only accept immutable objects
2042 2044 self._revisioncache = (node, curr, rawtext)
2043 2045 self._chainbasecache[curr] = deltainfo.chainbase
2044 2046 return node
2045 2047
2046 2048 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2047 2049 # Files opened in a+ mode have inconsistent behavior on various
2048 2050 # platforms. Windows requires that a file positioning call be made
2049 2051 # when the file handle transitions between reads and writes. See
2050 2052 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2051 2053 # platforms, Python or the platform itself can be buggy. Some versions
2052 2054 # of Solaris have been observed to not append at the end of the file
2053 2055 # if the file was seeked to before the end. See issue4943 for more.
2054 2056 #
2055 2057 # We work around this issue by inserting a seek() before writing.
2056 2058 # Note: This is likely not necessary on Python 3. However, because
2057 2059 # the file handle is reused for reads and may be seeked there, we need
2058 2060 # to be careful before changing this.
2059 2061 ifh.seek(0, os.SEEK_END)
2060 2062 if dfh:
2061 2063 dfh.seek(0, os.SEEK_END)
2062 2064
2063 2065 curr = len(self) - 1
2064 2066 if not self._inline:
2065 2067 transaction.add(self.datafile, offset)
2066 2068 transaction.add(self.indexfile, curr * len(entry))
2067 2069 if data[0]:
2068 2070 dfh.write(data[0])
2069 2071 dfh.write(data[1])
2070 2072 ifh.write(entry)
2071 2073 else:
2072 2074 offset += curr * self._io.size
2073 2075 transaction.add(self.indexfile, offset, curr)
2074 2076 ifh.write(entry)
2075 2077 ifh.write(data[0])
2076 2078 ifh.write(data[1])
2077 2079 self._enforceinlinesize(transaction, ifh)
2078 2080
2079 2081 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2080 2082 """
2081 2083 add a delta group
2082 2084
2083 2085 given a set of deltas, add them to the revision log. the
2084 2086 first delta is against its parent, which should be in our
2085 2087 log, the rest are against the previous delta.
2086 2088
2087 2089 If ``addrevisioncb`` is defined, it will be called with arguments of
2088 2090 this revlog and the node that was added.
2089 2091 """
2090 2092
2091 2093 if self._writinghandles:
2092 2094 raise error.ProgrammingError('cannot nest addgroup() calls')
2093 2095
2094 2096 nodes = []
2095 2097
2096 2098 r = len(self)
2097 2099 end = 0
2098 2100 if r:
2099 2101 end = self.end(r - 1)
2100 2102 ifh = self._indexfp("a+")
2101 2103 isize = r * self._io.size
2102 2104 if self._inline:
2103 2105 transaction.add(self.indexfile, end + isize, r)
2104 2106 dfh = None
2105 2107 else:
2106 2108 transaction.add(self.indexfile, isize, r)
2107 2109 transaction.add(self.datafile, end)
2108 2110 dfh = self._datafp("a+")
2109 2111 def flush():
2110 2112 if dfh:
2111 2113 dfh.flush()
2112 2114 ifh.flush()
2113 2115
2114 2116 self._writinghandles = (ifh, dfh)
2115 2117
2116 2118 try:
2117 2119 deltacomputer = deltautil.deltacomputer(self)
2118 2120 # loop through our set of deltas
2119 2121 for data in deltas:
2120 2122 node, p1, p2, linknode, deltabase, delta, flags = data
2121 2123 link = linkmapper(linknode)
2122 2124 flags = flags or REVIDX_DEFAULT_FLAGS
2123 2125
2124 2126 nodes.append(node)
2125 2127
2126 2128 if node in self.nodemap:
2127 2129 self._nodeduplicatecallback(transaction, node)
2128 2130 # this can happen if two branches make the same change
2129 2131 continue
2130 2132
2131 2133 for p in (p1, p2):
2132 2134 if p not in self.nodemap:
2133 2135 raise error.LookupError(p, self.indexfile,
2134 2136 _('unknown parent'))
2135 2137
2136 2138 if deltabase not in self.nodemap:
2137 2139 raise error.LookupError(deltabase, self.indexfile,
2138 2140 _('unknown delta base'))
2139 2141
2140 2142 baserev = self.rev(deltabase)
2141 2143
2142 2144 if baserev != nullrev and self.iscensored(baserev):
2143 2145 # if base is censored, delta must be full replacement in a
2144 2146 # single patch operation
2145 2147 hlen = struct.calcsize(">lll")
2146 2148 oldlen = self.rawsize(baserev)
2147 2149 newlen = len(delta) - hlen
2148 2150 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2149 2151 raise error.CensoredBaseError(self.indexfile,
2150 2152 self.node(baserev))
2151 2153
2152 2154 if not flags and self._peek_iscensored(baserev, delta, flush):
2153 2155 flags |= REVIDX_ISCENSORED
2154 2156
2155 2157 # We assume consumers of addrevisioncb will want to retrieve
2156 2158 # the added revision, which will require a call to
2157 2159 # revision(). revision() will fast path if there is a cache
2158 2160 # hit. So, we tell _addrevision() to always cache in this case.
2159 2161 # We're only using addgroup() in the context of changegroup
2160 2162 # generation so the revision data can always be handled as raw
2161 2163 # by the flagprocessor.
2162 2164 self._addrevision(node, None, transaction, link,
2163 2165 p1, p2, flags, (baserev, delta),
2164 2166 ifh, dfh,
2165 2167 alwayscache=bool(addrevisioncb),
2166 2168 deltacomputer=deltacomputer)
2167 2169
2168 2170 if addrevisioncb:
2169 2171 addrevisioncb(self, node)
2170 2172
2171 2173 if not dfh and not self._inline:
2172 2174 # addrevision switched from inline to conventional
2173 2175 # reopen the index
2174 2176 ifh.close()
2175 2177 dfh = self._datafp("a+")
2176 2178 ifh = self._indexfp("a+")
2177 2179 self._writinghandles = (ifh, dfh)
2178 2180 finally:
2179 2181 self._writinghandles = None
2180 2182
2181 2183 if dfh:
2182 2184 dfh.close()
2183 2185 ifh.close()
2184 2186
2185 2187 return nodes
2186 2188
2187 2189 def iscensored(self, rev):
2188 2190 """Check if a file revision is censored."""
2189 2191 if not self._censorable:
2190 2192 return False
2191 2193
2192 2194 return self.flags(rev) & REVIDX_ISCENSORED
2193 2195
2194 2196 def _peek_iscensored(self, baserev, delta, flush):
2195 2197 """Quickly check if a delta produces a censored revision."""
2196 2198 if not self._censorable:
2197 2199 return False
2198 2200
2199 2201 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2200 2202
2201 2203 def getstrippoint(self, minlink):
2202 2204 """find the minimum rev that must be stripped to strip the linkrev
2203 2205
2204 2206 Returns a tuple containing the minimum rev and a set of all revs that
2205 2207 have linkrevs that will be broken by this strip.
2206 2208 """
2207 2209 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2208 2210 self.headrevs(),
2209 2211 self.linkrev, self.parentrevs)
2210 2212
2211 2213 def strip(self, minlink, transaction):
2212 2214 """truncate the revlog on the first revision with a linkrev >= minlink
2213 2215
2214 2216 This function is called when we're stripping revision minlink and
2215 2217 its descendants from the repository.
2216 2218
2217 2219 We have to remove all revisions with linkrev >= minlink, because
2218 2220 the equivalent changelog revisions will be renumbered after the
2219 2221 strip.
2220 2222
2221 2223 So we truncate the revlog on the first of these revisions, and
2222 2224 trust that the caller has saved the revisions that shouldn't be
2223 2225 removed and that it'll re-add them after this truncation.
2224 2226 """
2225 2227 if len(self) == 0:
2226 2228 return
2227 2229
2228 2230 rev, _ = self.getstrippoint(minlink)
2229 2231 if rev == len(self):
2230 2232 return
2231 2233
2232 2234 # first truncate the files on disk
2233 2235 end = self.start(rev)
2234 2236 if not self._inline:
2235 2237 transaction.add(self.datafile, end)
2236 2238 end = rev * self._io.size
2237 2239 else:
2238 2240 end += rev * self._io.size
2239 2241
2240 2242 transaction.add(self.indexfile, end)
2241 2243
2242 2244 # then reset internal state in memory to forget those revisions
2243 2245 self._revisioncache = None
2244 2246 self._chaininfocache = {}
2245 2247 self._chunkclear()
2246 2248 for x in pycompat.xrange(rev, len(self)):
2247 2249 del self.nodemap[self.node(x)]
2248 2250
2249 2251 del self.index[rev:-1]
2250 2252 self._nodepos = None
2251 2253
2252 2254 def checksize(self):
2253 2255 expected = 0
2254 2256 if len(self):
2255 2257 expected = max(0, self.end(len(self) - 1))
2256 2258
2257 2259 try:
2258 2260 with self._datafp() as f:
2259 2261 f.seek(0, 2)
2260 2262 actual = f.tell()
2261 2263 dd = actual - expected
2262 2264 except IOError as inst:
2263 2265 if inst.errno != errno.ENOENT:
2264 2266 raise
2265 2267 dd = 0
2266 2268
2267 2269 try:
2268 2270 f = self.opener(self.indexfile)
2269 2271 f.seek(0, 2)
2270 2272 actual = f.tell()
2271 2273 f.close()
2272 2274 s = self._io.size
2273 2275 i = max(0, actual // s)
2274 2276 di = actual - (i * s)
2275 2277 if self._inline:
2276 2278 databytes = 0
2277 2279 for r in self:
2278 2280 databytes += max(0, self.length(r))
2279 2281 dd = 0
2280 2282 di = actual - len(self) * s - databytes
2281 2283 except IOError as inst:
2282 2284 if inst.errno != errno.ENOENT:
2283 2285 raise
2284 2286 di = 0
2285 2287
2286 2288 return (dd, di)
2287 2289
2288 2290 def files(self):
2289 2291 res = [self.indexfile]
2290 2292 if not self._inline:
2291 2293 res.append(self.datafile)
2292 2294 return res
2293 2295
2294 2296 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2295 2297 assumehaveparentrevisions=False,
2296 2298 deltamode=repository.CG_DELTAMODE_STD):
2297 2299 if nodesorder not in ('nodes', 'storage', 'linear', None):
2298 2300 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2299 2301 nodesorder)
2300 2302
2301 2303 if nodesorder is None and not self._generaldelta:
2302 2304 nodesorder = 'storage'
2303 2305
2304 2306 if (not self._storedeltachains and
2305 2307 deltamode != repository.CG_DELTAMODE_PREV):
2306 2308 deltamode = repository.CG_DELTAMODE_FULL
2307 2309
2308 2310 return storageutil.emitrevisions(
2309 2311 self, nodes, nodesorder, revlogrevisiondelta,
2310 2312 deltaparentfn=self.deltaparent,
2311 2313 candeltafn=self.candelta,
2312 2314 rawsizefn=self.rawsize,
2313 2315 revdifffn=self.revdiff,
2314 2316 flagsfn=self.flags,
2315 2317 deltamode=deltamode,
2316 2318 revisiondata=revisiondata,
2317 2319 assumehaveparentrevisions=assumehaveparentrevisions)
2318 2320
2319 2321 DELTAREUSEALWAYS = 'always'
2320 2322 DELTAREUSESAMEREVS = 'samerevs'
2321 2323 DELTAREUSENEVER = 'never'
2322 2324
2323 2325 DELTAREUSEFULLADD = 'fulladd'
2324 2326
2325 2327 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2326 2328
2327 2329 def clone(self, tr, destrevlog, addrevisioncb=None,
2328 2330 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2329 2331 """Copy this revlog to another, possibly with format changes.
2330 2332
2331 2333 The destination revlog will contain the same revisions and nodes.
2332 2334 However, it may not be bit-for-bit identical due to e.g. delta encoding
2333 2335 differences.
2334 2336
2335 2337 The ``deltareuse`` argument control how deltas from the existing revlog
2336 2338 are preserved in the destination revlog. The argument can have the
2337 2339 following values:
2338 2340
2339 2341 DELTAREUSEALWAYS
2340 2342 Deltas will always be reused (if possible), even if the destination
2341 2343 revlog would not select the same revisions for the delta. This is the
2342 2344 fastest mode of operation.
2343 2345 DELTAREUSESAMEREVS
2344 2346 Deltas will be reused if the destination revlog would pick the same
2345 2347 revisions for the delta. This mode strikes a balance between speed
2346 2348 and optimization.
2347 2349 DELTAREUSENEVER
2348 2350 Deltas will never be reused. This is the slowest mode of execution.
2349 2351 This mode can be used to recompute deltas (e.g. if the diff/delta
2350 2352 algorithm changes).
2351 2353
2352 2354 Delta computation can be slow, so the choice of delta reuse policy can
2353 2355 significantly affect run time.
2354 2356
2355 2357 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2356 2358 two extremes. Deltas will be reused if they are appropriate. But if the
2357 2359 delta could choose a better revision, it will do so. This means if you
2358 2360 are converting a non-generaldelta revlog to a generaldelta revlog,
2359 2361 deltas will be recomputed if the delta's parent isn't a parent of the
2360 2362 revision.
2361 2363
2362 2364 In addition to the delta policy, the ``forcedeltabothparents``
2363 2365 argument controls whether to force compute deltas against both parents
2364 2366 for merges. By default, the current default is used.
2365 2367 """
2366 2368 if deltareuse not in self.DELTAREUSEALL:
2367 2369 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2368 2370
2369 2371 if len(destrevlog):
2370 2372 raise ValueError(_('destination revlog is not empty'))
2371 2373
2372 2374 if getattr(self, 'filteredrevs', None):
2373 2375 raise ValueError(_('source revlog has filtered revisions'))
2374 2376 if getattr(destrevlog, 'filteredrevs', None):
2375 2377 raise ValueError(_('destination revlog has filtered revisions'))
2376 2378
2377 2379 # lazydeltabase controls whether to reuse a cached delta, if possible.
2378 2380 oldlazydeltabase = destrevlog._lazydeltabase
2379 2381 oldamd = destrevlog._deltabothparents
2380 2382
2381 2383 try:
2382 2384 if deltareuse == self.DELTAREUSEALWAYS:
2383 2385 destrevlog._lazydeltabase = True
2384 2386 elif deltareuse == self.DELTAREUSESAMEREVS:
2385 2387 destrevlog._lazydeltabase = False
2386 2388
2387 2389 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2388 2390
2389 2391 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2390 2392 self.DELTAREUSESAMEREVS)
2391 2393
2392 2394 deltacomputer = deltautil.deltacomputer(destrevlog)
2393 2395 index = self.index
2394 2396 for rev in self:
2395 2397 entry = index[rev]
2396 2398
2397 2399 # Some classes override linkrev to take filtered revs into
2398 2400 # account. Use raw entry from index.
2399 2401 flags = entry[0] & 0xffff
2400 2402 linkrev = entry[4]
2401 2403 p1 = index[entry[5]][7]
2402 2404 p2 = index[entry[6]][7]
2403 2405 node = entry[7]
2404 2406
2405 2407 # (Possibly) reuse the delta from the revlog if allowed and
2406 2408 # the revlog chunk is a delta.
2407 2409 cachedelta = None
2408 2410 rawtext = None
2409 2411 if populatecachedelta:
2410 2412 dp = self.deltaparent(rev)
2411 2413 if dp != nullrev:
2412 2414 cachedelta = (dp, bytes(self._chunk(rev)))
2413 2415
2414 2416 if not cachedelta:
2415 2417 rawtext = self.revision(rev, raw=True)
2416 2418
2417 2419
2418 2420 if deltareuse == self.DELTAREUSEFULLADD:
2419 2421 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2420 2422 cachedelta=cachedelta,
2421 2423 node=node, flags=flags,
2422 2424 deltacomputer=deltacomputer)
2423 2425 else:
2424 2426 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2425 2427 checkambig=False)
2426 2428 dfh = None
2427 2429 if not destrevlog._inline:
2428 2430 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2429 2431 try:
2430 2432 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2431 2433 p2, flags, cachedelta, ifh, dfh,
2432 2434 deltacomputer=deltacomputer)
2433 2435 finally:
2434 2436 if dfh:
2435 2437 dfh.close()
2436 2438 ifh.close()
2437 2439
2438 2440 if addrevisioncb:
2439 2441 addrevisioncb(self, rev, node)
2440 2442 finally:
2441 2443 destrevlog._lazydeltabase = oldlazydeltabase
2442 2444 destrevlog._deltabothparents = oldamd
2443 2445
2444 2446 def censorrevision(self, tr, censornode, tombstone=b''):
2445 2447 if (self.version & 0xFFFF) == REVLOGV0:
2446 2448 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2447 2449 self.version)
2448 2450
2449 2451 censorrev = self.rev(censornode)
2450 2452 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2451 2453
2452 2454 if len(tombstone) > self.rawsize(censorrev):
2453 2455 raise error.Abort(_('censor tombstone must be no longer than '
2454 2456 'censored data'))
2455 2457
2456 2458 # Rewriting the revlog in place is hard. Our strategy for censoring is
2457 2459 # to create a new revlog, copy all revisions to it, then replace the
2458 2460 # revlogs on transaction close.
2459 2461
2460 2462 newindexfile = self.indexfile + b'.tmpcensored'
2461 2463 newdatafile = self.datafile + b'.tmpcensored'
2462 2464
2463 2465 # This is a bit dangerous. We could easily have a mismatch of state.
2464 2466 newrl = revlog(self.opener, newindexfile, newdatafile,
2465 2467 censorable=True)
2466 2468 newrl.version = self.version
2467 2469 newrl._generaldelta = self._generaldelta
2468 2470 newrl._io = self._io
2469 2471
2470 2472 for rev in self.revs():
2471 2473 node = self.node(rev)
2472 2474 p1, p2 = self.parents(node)
2473 2475
2474 2476 if rev == censorrev:
2475 2477 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2476 2478 p1, p2, censornode, REVIDX_ISCENSORED)
2477 2479
2478 2480 if newrl.deltaparent(rev) != nullrev:
2479 2481 raise error.Abort(_('censored revision stored as delta; '
2480 2482 'cannot censor'),
2481 2483 hint=_('censoring of revlogs is not '
2482 2484 'fully implemented; please report '
2483 2485 'this bug'))
2484 2486 continue
2485 2487
2486 2488 if self.iscensored(rev):
2487 2489 if self.deltaparent(rev) != nullrev:
2488 2490 raise error.Abort(_('cannot censor due to censored '
2489 2491 'revision having delta stored'))
2490 2492 rawtext = self._chunk(rev)
2491 2493 else:
2492 2494 rawtext = self.revision(rev, raw=True)
2493 2495
2494 2496 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2495 2497 self.flags(rev))
2496 2498
2497 2499 tr.addbackup(self.indexfile, location='store')
2498 2500 if not self._inline:
2499 2501 tr.addbackup(self.datafile, location='store')
2500 2502
2501 2503 self.opener.rename(newrl.indexfile, self.indexfile)
2502 2504 if not self._inline:
2503 2505 self.opener.rename(newrl.datafile, self.datafile)
2504 2506
2505 2507 self.clearcaches()
2506 2508 self._loadindex()
2507 2509
2508 2510 def verifyintegrity(self, state):
2509 2511 """Verifies the integrity of the revlog.
2510 2512
2511 2513 Yields ``revlogproblem`` instances describing problems that are
2512 2514 found.
2513 2515 """
2514 2516 dd, di = self.checksize()
2515 2517 if dd:
2516 2518 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2517 2519 if di:
2518 2520 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2519 2521
2520 2522 version = self.version & 0xFFFF
2521 2523
2522 2524 # The verifier tells us what version revlog we should be.
2523 2525 if version != state['expectedversion']:
2524 2526 yield revlogproblem(
2525 2527 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2526 2528 (self.indexfile, version, state['expectedversion']))
2527 2529
2528 2530 state['skipread'] = set()
2529 2531
2530 2532 for rev in self:
2531 2533 node = self.node(rev)
2532 2534
2533 2535 # Verify contents. 4 cases to care about:
2534 2536 #
2535 2537 # common: the most common case
2536 2538 # rename: with a rename
2537 2539 # meta: file content starts with b'\1\n', the metadata
2538 2540 # header defined in filelog.py, but without a rename
2539 2541 # ext: content stored externally
2540 2542 #
2541 2543 # More formally, their differences are shown below:
2542 2544 #
2543 2545 # | common | rename | meta | ext
2544 2546 # -------------------------------------------------------
2545 2547 # flags() | 0 | 0 | 0 | not 0
2546 2548 # renamed() | False | True | False | ?
2547 2549 # rawtext[0:2]=='\1\n'| False | True | True | ?
2548 2550 #
2549 2551 # "rawtext" means the raw text stored in revlog data, which
2550 2552 # could be retrieved by "revision(rev, raw=True)". "text"
2551 2553 # mentioned below is "revision(rev, raw=False)".
2552 2554 #
2553 2555 # There are 3 different lengths stored physically:
2554 2556 # 1. L1: rawsize, stored in revlog index
2555 2557 # 2. L2: len(rawtext), stored in revlog data
2556 2558 # 3. L3: len(text), stored in revlog data if flags==0, or
2557 2559 # possibly somewhere else if flags!=0
2558 2560 #
2559 2561 # L1 should be equal to L2. L3 could be different from them.
2560 2562 # "text" may or may not affect commit hash depending on flag
2561 2563 # processors (see revlog.addflagprocessor).
2562 2564 #
2563 2565 # | common | rename | meta | ext
2564 2566 # -------------------------------------------------
2565 2567 # rawsize() | L1 | L1 | L1 | L1
2566 2568 # size() | L1 | L2-LM | L1(*) | L1 (?)
2567 2569 # len(rawtext) | L2 | L2 | L2 | L2
2568 2570 # len(text) | L2 | L2 | L2 | L3
2569 2571 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2570 2572 #
2571 2573 # LM: length of metadata, depending on rawtext
2572 2574 # (*): not ideal, see comment in filelog.size
2573 2575 # (?): could be "- len(meta)" if the resolved content has
2574 2576 # rename metadata
2575 2577 #
2576 2578 # Checks needed to be done:
2577 2579 # 1. length check: L1 == L2, in all cases.
2578 2580 # 2. hash check: depending on flag processor, we may need to
2579 2581 # use either "text" (external), or "rawtext" (in revlog).
2580 2582
2581 2583 try:
2582 2584 skipflags = state.get('skipflags', 0)
2583 2585 if skipflags:
2584 2586 skipflags &= self.flags(rev)
2585 2587
2586 2588 if skipflags:
2587 2589 state['skipread'].add(node)
2588 2590 else:
2589 2591 # Side-effect: read content and verify hash.
2590 2592 self.revision(node)
2591 2593
2592 2594 l1 = self.rawsize(rev)
2593 2595 l2 = len(self.revision(node, raw=True))
2594 2596
2595 2597 if l1 != l2:
2596 2598 yield revlogproblem(
2597 2599 error=_('unpacked size is %d, %d expected') % (l2, l1),
2598 2600 node=node)
2599 2601
2600 2602 except error.CensoredNodeError:
2601 2603 if state['erroroncensored']:
2602 2604 yield revlogproblem(error=_('censored file data'),
2603 2605 node=node)
2604 2606 state['skipread'].add(node)
2605 2607 except Exception as e:
2606 2608 yield revlogproblem(
2607 2609 error=_('unpacking %s: %s') % (short(node),
2608 2610 stringutil.forcebytestr(e)),
2609 2611 node=node)
2610 2612 state['skipread'].add(node)
2611 2613
2612 2614 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2613 2615 revisionscount=False, trackedsize=False,
2614 2616 storedsize=False):
2615 2617 d = {}
2616 2618
2617 2619 if exclusivefiles:
2618 2620 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2619 2621 if not self._inline:
2620 2622 d['exclusivefiles'].append((self.opener, self.datafile))
2621 2623
2622 2624 if sharedfiles:
2623 2625 d['sharedfiles'] = []
2624 2626
2625 2627 if revisionscount:
2626 2628 d['revisionscount'] = len(self)
2627 2629
2628 2630 if trackedsize:
2629 2631 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2630 2632
2631 2633 if storedsize:
2632 2634 d['storedsize'] = sum(self.opener.stat(path).st_size
2633 2635 for path in self.files())
2634 2636
2635 2637 return d
General Comments 0
You need to be logged in to leave comments. Login now