##// END OF EJS Templates
rust: add comment about lack of wdirrev handling...
Yuya Nishihara -
r41337:13c23396 stable
parent child Browse files
Show More
@@ -1,2641 +1,2643
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._maxdeltachainspan = -1
375 375 self._withsparseread = False
376 376 self._sparserevlog = False
377 377 self._srdensitythreshold = 0.50
378 378 self._srmingapsize = 262144
379 379
380 380 # Make copy of flag processors so each revlog instance can support
381 381 # custom flags.
382 382 self._flagprocessors = dict(_flagprocessors)
383 383
384 384 # 2-tuple of file handles being used for active writing.
385 385 self._writinghandles = None
386 386
387 387 self._loadindex()
388 388
389 389 def _loadindex(self):
390 390 mmapindexthreshold = None
391 391 opts = getattr(self.opener, 'options', {}) or {}
392 392
393 393 if 'revlogv2' in opts:
394 394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 395 elif 'revlogv1' in opts:
396 396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 397 if 'generaldelta' in opts:
398 398 newversionflags |= FLAG_GENERALDELTA
399 399 else:
400 400 newversionflags = REVLOG_DEFAULT_VERSION
401 401
402 402 if 'chunkcachesize' in opts:
403 403 self._chunkcachesize = opts['chunkcachesize']
404 404 if 'maxchainlen' in opts:
405 405 self._maxchainlen = opts['maxchainlen']
406 406 if 'deltabothparents' in opts:
407 407 self._deltabothparents = opts['deltabothparents']
408 408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 409 if 'compengine' in opts:
410 410 self._compengine = opts['compengine']
411 411 if 'maxdeltachainspan' in opts:
412 412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 414 mmapindexthreshold = opts['mmapindexthreshold']
415 415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 416 withsparseread = bool(opts.get('with-sparse-read', False))
417 417 # sparse-revlog forces sparse-read
418 418 self._withsparseread = self._sparserevlog or withsparseread
419 419 if 'sparse-read-density-threshold' in opts:
420 420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 421 if 'sparse-read-min-gap-size' in opts:
422 422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 423 if opts.get('enableellipsis'):
424 424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425 425
426 426 # revlog v0 doesn't have flag processors
427 427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 428 _insertflagprocessor(flag, processor, self._flagprocessors)
429 429
430 430 if self._chunkcachesize <= 0:
431 431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 432 'greater than 0') % self._chunkcachesize)
433 433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 435 'power of 2') % self._chunkcachesize)
436 436
437 437 indexdata = ''
438 438 self._initempty = True
439 439 try:
440 440 with self._indexfp() as f:
441 441 if (mmapindexthreshold is not None and
442 442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 443 # TODO: should .close() to release resources without
444 444 # relying on Python GC
445 445 indexdata = util.buffer(util.mmapread(f))
446 446 else:
447 447 indexdata = f.read()
448 448 if len(indexdata) > 0:
449 449 versionflags = versionformat_unpack(indexdata[:4])[0]
450 450 self._initempty = False
451 451 else:
452 452 versionflags = newversionflags
453 453 except IOError as inst:
454 454 if inst.errno != errno.ENOENT:
455 455 raise
456 456
457 457 versionflags = newversionflags
458 458
459 459 self.version = versionflags
460 460
461 461 flags = versionflags & ~0xFFFF
462 462 fmt = versionflags & 0xFFFF
463 463
464 464 if fmt == REVLOGV0:
465 465 if flags:
466 466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
467 467 'revlog %s') %
468 468 (flags >> 16, fmt, self.indexfile))
469 469
470 470 self._inline = False
471 471 self._generaldelta = False
472 472
473 473 elif fmt == REVLOGV1:
474 474 if flags & ~REVLOGV1_FLAGS:
475 475 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
476 476 'revlog %s') %
477 477 (flags >> 16, fmt, self.indexfile))
478 478
479 479 self._inline = versionflags & FLAG_INLINE_DATA
480 480 self._generaldelta = versionflags & FLAG_GENERALDELTA
481 481
482 482 elif fmt == REVLOGV2:
483 483 if flags & ~REVLOGV2_FLAGS:
484 484 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
485 485 'revlog %s') %
486 486 (flags >> 16, fmt, self.indexfile))
487 487
488 488 self._inline = versionflags & FLAG_INLINE_DATA
489 489 # generaldelta implied by version 2 revlogs.
490 490 self._generaldelta = True
491 491
492 492 else:
493 493 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
494 494 (fmt, self.indexfile))
495 495
496 496 self._storedeltachains = True
497 497
498 498 self._io = revlogio()
499 499 if self.version == REVLOGV0:
500 500 self._io = revlogoldio()
501 501 try:
502 502 d = self._io.parseindex(indexdata, self._inline)
503 503 except (ValueError, IndexError):
504 504 raise error.RevlogError(_("index %s is corrupted") %
505 505 self.indexfile)
506 506 self.index, nodemap, self._chunkcache = d
507 507 if nodemap is not None:
508 508 self.nodemap = self._nodecache = nodemap
509 509 if not self._chunkcache:
510 510 self._chunkclear()
511 511 # revnum -> (chain-length, sum-delta-length)
512 512 self._chaininfocache = {}
513 513 # revlog header -> revlog compressor
514 514 self._decompressors = {}
515 515
516 516 @util.propertycache
517 517 def _compressor(self):
518 518 return util.compengines[self._compengine].revlogcompressor()
519 519
520 520 def _indexfp(self, mode='r'):
521 521 """file object for the revlog's index file"""
522 522 args = {r'mode': mode}
523 523 if mode != 'r':
524 524 args[r'checkambig'] = self._checkambig
525 525 if mode == 'w':
526 526 args[r'atomictemp'] = True
527 527 return self.opener(self.indexfile, **args)
528 528
529 529 def _datafp(self, mode='r'):
530 530 """file object for the revlog's data file"""
531 531 return self.opener(self.datafile, mode=mode)
532 532
533 533 @contextlib.contextmanager
534 534 def _datareadfp(self, existingfp=None):
535 535 """file object suitable to read data"""
536 536 # Use explicit file handle, if given.
537 537 if existingfp is not None:
538 538 yield existingfp
539 539
540 540 # Use a file handle being actively used for writes, if available.
541 541 # There is some danger to doing this because reads will seek the
542 542 # file. However, _writeentry() performs a SEEK_END before all writes,
543 543 # so we should be safe.
544 544 elif self._writinghandles:
545 545 if self._inline:
546 546 yield self._writinghandles[0]
547 547 else:
548 548 yield self._writinghandles[1]
549 549
550 550 # Otherwise open a new file handle.
551 551 else:
552 552 if self._inline:
553 553 func = self._indexfp
554 554 else:
555 555 func = self._datafp
556 556 with func() as fp:
557 557 yield fp
558 558
559 559 def tip(self):
560 560 return self.node(len(self.index) - 1)
561 561 def __contains__(self, rev):
562 562 return 0 <= rev < len(self)
563 563 def __len__(self):
564 564 return len(self.index)
565 565 def __iter__(self):
566 566 return iter(pycompat.xrange(len(self)))
567 567 def revs(self, start=0, stop=None):
568 568 """iterate over all rev in this revlog (from start to stop)"""
569 569 return storageutil.iterrevs(len(self), start=start, stop=stop)
570 570
571 571 @util.propertycache
572 572 def nodemap(self):
573 573 if self.index:
574 574 # populate mapping down to the initial node
575 575 node0 = self.index[0][7] # get around changelog filtering
576 576 self.rev(node0)
577 577 return self._nodecache
578 578
579 579 def hasnode(self, node):
580 580 try:
581 581 self.rev(node)
582 582 return True
583 583 except KeyError:
584 584 return False
585 585
586 586 def candelta(self, baserev, rev):
587 587 """whether two revisions (baserev, rev) can be delta-ed or not"""
588 588 # Disable delta if either rev requires a content-changing flag
589 589 # processor (ex. LFS). This is because such flag processor can alter
590 590 # the rawtext content that the delta will be based on, and two clients
591 591 # could have a same revlog node with different flags (i.e. different
592 592 # rawtext contents) and the delta could be incompatible.
593 593 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
594 594 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
595 595 return False
596 596 return True
597 597
598 598 def clearcaches(self):
599 599 self._revisioncache = None
600 600 self._chainbasecache.clear()
601 601 self._chunkcache = (0, '')
602 602 self._pcache = {}
603 603
604 604 try:
605 605 self._nodecache.clearcaches()
606 606 except AttributeError:
607 607 self._nodecache = {nullid: nullrev}
608 608 self._nodepos = None
609 609
610 610 def rev(self, node):
611 611 try:
612 612 return self._nodecache[node]
613 613 except TypeError:
614 614 raise
615 615 except error.RevlogError:
616 616 # parsers.c radix tree lookup failed
617 617 if node == wdirid or node in wdirfilenodeids:
618 618 raise error.WdirUnsupported
619 619 raise error.LookupError(node, self.indexfile, _('no node'))
620 620 except KeyError:
621 621 # pure python cache lookup failed
622 622 n = self._nodecache
623 623 i = self.index
624 624 p = self._nodepos
625 625 if p is None:
626 626 p = len(i) - 1
627 627 else:
628 628 assert p < len(i)
629 629 for r in pycompat.xrange(p, -1, -1):
630 630 v = i[r][7]
631 631 n[v] = r
632 632 if v == node:
633 633 self._nodepos = r - 1
634 634 return r
635 635 if node == wdirid or node in wdirfilenodeids:
636 636 raise error.WdirUnsupported
637 637 raise error.LookupError(node, self.indexfile, _('no node'))
638 638
639 639 # Accessors for index entries.
640 640
641 641 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
642 642 # are flags.
643 643 def start(self, rev):
644 644 return int(self.index[rev][0] >> 16)
645 645
646 646 def flags(self, rev):
647 647 return self.index[rev][0] & 0xFFFF
648 648
649 649 def length(self, rev):
650 650 return self.index[rev][1]
651 651
652 652 def rawsize(self, rev):
653 653 """return the length of the uncompressed text for a given revision"""
654 654 l = self.index[rev][2]
655 655 if l >= 0:
656 656 return l
657 657
658 658 t = self.revision(rev, raw=True)
659 659 return len(t)
660 660
661 661 def size(self, rev):
662 662 """length of non-raw text (processed by a "read" flag processor)"""
663 663 # fast path: if no "read" flag processor could change the content,
664 664 # size is rawsize. note: ELLIPSIS is known to not change the content.
665 665 flags = self.flags(rev)
666 666 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
667 667 return self.rawsize(rev)
668 668
669 669 return len(self.revision(rev, raw=False))
670 670
671 671 def chainbase(self, rev):
672 672 base = self._chainbasecache.get(rev)
673 673 if base is not None:
674 674 return base
675 675
676 676 index = self.index
677 677 iterrev = rev
678 678 base = index[iterrev][3]
679 679 while base != iterrev:
680 680 iterrev = base
681 681 base = index[iterrev][3]
682 682
683 683 self._chainbasecache[rev] = base
684 684 return base
685 685
686 686 def linkrev(self, rev):
687 687 return self.index[rev][4]
688 688
689 689 def parentrevs(self, rev):
690 690 try:
691 691 entry = self.index[rev]
692 692 except IndexError:
693 693 if rev == wdirrev:
694 694 raise error.WdirUnsupported
695 695 raise
696 696
697 697 return entry[5], entry[6]
698 698
699 699 # fast parentrevs(rev) where rev isn't filtered
700 700 _uncheckedparentrevs = parentrevs
701 701
702 702 def node(self, rev):
703 703 try:
704 704 return self.index[rev][7]
705 705 except IndexError:
706 706 if rev == wdirrev:
707 707 raise error.WdirUnsupported
708 708 raise
709 709
710 710 # Derived from index values.
711 711
712 712 def end(self, rev):
713 713 return self.start(rev) + self.length(rev)
714 714
715 715 def parents(self, node):
716 716 i = self.index
717 717 d = i[self.rev(node)]
718 718 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
719 719
720 720 def chainlen(self, rev):
721 721 return self._chaininfo(rev)[0]
722 722
723 723 def _chaininfo(self, rev):
724 724 chaininfocache = self._chaininfocache
725 725 if rev in chaininfocache:
726 726 return chaininfocache[rev]
727 727 index = self.index
728 728 generaldelta = self._generaldelta
729 729 iterrev = rev
730 730 e = index[iterrev]
731 731 clen = 0
732 732 compresseddeltalen = 0
733 733 while iterrev != e[3]:
734 734 clen += 1
735 735 compresseddeltalen += e[1]
736 736 if generaldelta:
737 737 iterrev = e[3]
738 738 else:
739 739 iterrev -= 1
740 740 if iterrev in chaininfocache:
741 741 t = chaininfocache[iterrev]
742 742 clen += t[0]
743 743 compresseddeltalen += t[1]
744 744 break
745 745 e = index[iterrev]
746 746 else:
747 747 # Add text length of base since decompressing that also takes
748 748 # work. For cache hits the length is already included.
749 749 compresseddeltalen += e[1]
750 750 r = (clen, compresseddeltalen)
751 751 chaininfocache[rev] = r
752 752 return r
753 753
754 754 def _deltachain(self, rev, stoprev=None):
755 755 """Obtain the delta chain for a revision.
756 756
757 757 ``stoprev`` specifies a revision to stop at. If not specified, we
758 758 stop at the base of the chain.
759 759
760 760 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
761 761 revs in ascending order and ``stopped`` is a bool indicating whether
762 762 ``stoprev`` was hit.
763 763 """
764 764 # Try C implementation.
765 765 try:
766 766 return self.index.deltachain(rev, stoprev, self._generaldelta)
767 767 except AttributeError:
768 768 pass
769 769
770 770 chain = []
771 771
772 772 # Alias to prevent attribute lookup in tight loop.
773 773 index = self.index
774 774 generaldelta = self._generaldelta
775 775
776 776 iterrev = rev
777 777 e = index[iterrev]
778 778 while iterrev != e[3] and iterrev != stoprev:
779 779 chain.append(iterrev)
780 780 if generaldelta:
781 781 iterrev = e[3]
782 782 else:
783 783 iterrev -= 1
784 784 e = index[iterrev]
785 785
786 786 if iterrev == stoprev:
787 787 stopped = True
788 788 else:
789 789 chain.append(iterrev)
790 790 stopped = False
791 791
792 792 chain.reverse()
793 793 return chain, stopped
794 794
795 795 def ancestors(self, revs, stoprev=0, inclusive=False):
796 796 """Generate the ancestors of 'revs' in reverse revision order.
797 797 Does not generate revs lower than stoprev.
798 798
799 799 See the documentation for ancestor.lazyancestors for more details."""
800 800
801 801 # first, make sure start revisions aren't filtered
802 802 revs = list(revs)
803 803 checkrev = self.node
804 804 for r in revs:
805 805 checkrev(r)
806 806 # and we're sure ancestors aren't filtered as well
807 807
808 808 if rustext is not None:
809 809 lazyancestors = rustext.ancestor.LazyAncestors
810 810 arg = self.index
811 811 elif util.safehasattr(parsers, 'rustlazyancestors'):
812 812 lazyancestors = ancestor.rustlazyancestors
813 813 arg = self.index
814 814 else:
815 815 lazyancestors = ancestor.lazyancestors
816 816 arg = self._uncheckedparentrevs
817 817 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
818 818
819 819 def descendants(self, revs):
820 820 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
821 821
822 822 def findcommonmissing(self, common=None, heads=None):
823 823 """Return a tuple of the ancestors of common and the ancestors of heads
824 824 that are not ancestors of common. In revset terminology, we return the
825 825 tuple:
826 826
827 827 ::common, (::heads) - (::common)
828 828
829 829 The list is sorted by revision number, meaning it is
830 830 topologically sorted.
831 831
832 832 'heads' and 'common' are both lists of node IDs. If heads is
833 833 not supplied, uses all of the revlog's heads. If common is not
834 834 supplied, uses nullid."""
835 835 if common is None:
836 836 common = [nullid]
837 837 if heads is None:
838 838 heads = self.heads()
839 839
840 840 common = [self.rev(n) for n in common]
841 841 heads = [self.rev(n) for n in heads]
842 842
843 843 # we want the ancestors, but inclusive
844 844 class lazyset(object):
845 845 def __init__(self, lazyvalues):
846 846 self.addedvalues = set()
847 847 self.lazyvalues = lazyvalues
848 848
849 849 def __contains__(self, value):
850 850 return value in self.addedvalues or value in self.lazyvalues
851 851
852 852 def __iter__(self):
853 853 added = self.addedvalues
854 854 for r in added:
855 855 yield r
856 856 for r in self.lazyvalues:
857 857 if not r in added:
858 858 yield r
859 859
860 860 def add(self, value):
861 861 self.addedvalues.add(value)
862 862
863 863 def update(self, values):
864 864 self.addedvalues.update(values)
865 865
866 866 has = lazyset(self.ancestors(common))
867 867 has.add(nullrev)
868 868 has.update(common)
869 869
870 870 # take all ancestors from heads that aren't in has
871 871 missing = set()
872 872 visit = collections.deque(r for r in heads if r not in has)
873 873 while visit:
874 874 r = visit.popleft()
875 875 if r in missing:
876 876 continue
877 877 else:
878 878 missing.add(r)
879 879 for p in self.parentrevs(r):
880 880 if p not in has:
881 881 visit.append(p)
882 882 missing = list(missing)
883 883 missing.sort()
884 884 return has, [self.node(miss) for miss in missing]
885 885
886 886 def incrementalmissingrevs(self, common=None):
887 887 """Return an object that can be used to incrementally compute the
888 888 revision numbers of the ancestors of arbitrary sets that are not
889 889 ancestors of common. This is an ancestor.incrementalmissingancestors
890 890 object.
891 891
892 892 'common' is a list of revision numbers. If common is not supplied, uses
893 893 nullrev.
894 894 """
895 895 if common is None:
896 896 common = [nullrev]
897 897
898 898 if rustext is not None:
899 # TODO: WdirUnsupported should be raised instead of GraphError
900 # if common includes wdirrev
899 901 return rustext.ancestor.MissingAncestors(self.index, common)
900 902 return ancestor.incrementalmissingancestors(self.parentrevs, common)
901 903
902 904 def findmissingrevs(self, common=None, heads=None):
903 905 """Return the revision numbers of the ancestors of heads that
904 906 are not ancestors of common.
905 907
906 908 More specifically, return a list of revision numbers corresponding to
907 909 nodes N such that every N satisfies the following constraints:
908 910
909 911 1. N is an ancestor of some node in 'heads'
910 912 2. N is not an ancestor of any node in 'common'
911 913
912 914 The list is sorted by revision number, meaning it is
913 915 topologically sorted.
914 916
915 917 'heads' and 'common' are both lists of revision numbers. If heads is
916 918 not supplied, uses all of the revlog's heads. If common is not
917 919 supplied, uses nullid."""
918 920 if common is None:
919 921 common = [nullrev]
920 922 if heads is None:
921 923 heads = self.headrevs()
922 924
923 925 inc = self.incrementalmissingrevs(common=common)
924 926 return inc.missingancestors(heads)
925 927
926 928 def findmissing(self, common=None, heads=None):
927 929 """Return the ancestors of heads that are not ancestors of common.
928 930
929 931 More specifically, return a list of nodes N such that every N
930 932 satisfies the following constraints:
931 933
932 934 1. N is an ancestor of some node in 'heads'
933 935 2. N is not an ancestor of any node in 'common'
934 936
935 937 The list is sorted by revision number, meaning it is
936 938 topologically sorted.
937 939
938 940 'heads' and 'common' are both lists of node IDs. If heads is
939 941 not supplied, uses all of the revlog's heads. If common is not
940 942 supplied, uses nullid."""
941 943 if common is None:
942 944 common = [nullid]
943 945 if heads is None:
944 946 heads = self.heads()
945 947
946 948 common = [self.rev(n) for n in common]
947 949 heads = [self.rev(n) for n in heads]
948 950
949 951 inc = self.incrementalmissingrevs(common=common)
950 952 return [self.node(r) for r in inc.missingancestors(heads)]
951 953
952 954 def nodesbetween(self, roots=None, heads=None):
953 955 """Return a topological path from 'roots' to 'heads'.
954 956
955 957 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
956 958 topologically sorted list of all nodes N that satisfy both of
957 959 these constraints:
958 960
959 961 1. N is a descendant of some node in 'roots'
960 962 2. N is an ancestor of some node in 'heads'
961 963
962 964 Every node is considered to be both a descendant and an ancestor
963 965 of itself, so every reachable node in 'roots' and 'heads' will be
964 966 included in 'nodes'.
965 967
966 968 'outroots' is the list of reachable nodes in 'roots', i.e., the
967 969 subset of 'roots' that is returned in 'nodes'. Likewise,
968 970 'outheads' is the subset of 'heads' that is also in 'nodes'.
969 971
970 972 'roots' and 'heads' are both lists of node IDs. If 'roots' is
971 973 unspecified, uses nullid as the only root. If 'heads' is
972 974 unspecified, uses list of all of the revlog's heads."""
973 975 nonodes = ([], [], [])
974 976 if roots is not None:
975 977 roots = list(roots)
976 978 if not roots:
977 979 return nonodes
978 980 lowestrev = min([self.rev(n) for n in roots])
979 981 else:
980 982 roots = [nullid] # Everybody's a descendant of nullid
981 983 lowestrev = nullrev
982 984 if (lowestrev == nullrev) and (heads is None):
983 985 # We want _all_ the nodes!
984 986 return ([self.node(r) for r in self], [nullid], list(self.heads()))
985 987 if heads is None:
986 988 # All nodes are ancestors, so the latest ancestor is the last
987 989 # node.
988 990 highestrev = len(self) - 1
989 991 # Set ancestors to None to signal that every node is an ancestor.
990 992 ancestors = None
991 993 # Set heads to an empty dictionary for later discovery of heads
992 994 heads = {}
993 995 else:
994 996 heads = list(heads)
995 997 if not heads:
996 998 return nonodes
997 999 ancestors = set()
998 1000 # Turn heads into a dictionary so we can remove 'fake' heads.
999 1001 # Also, later we will be using it to filter out the heads we can't
1000 1002 # find from roots.
1001 1003 heads = dict.fromkeys(heads, False)
1002 1004 # Start at the top and keep marking parents until we're done.
1003 1005 nodestotag = set(heads)
1004 1006 # Remember where the top was so we can use it as a limit later.
1005 1007 highestrev = max([self.rev(n) for n in nodestotag])
1006 1008 while nodestotag:
1007 1009 # grab a node to tag
1008 1010 n = nodestotag.pop()
1009 1011 # Never tag nullid
1010 1012 if n == nullid:
1011 1013 continue
1012 1014 # A node's revision number represents its place in a
1013 1015 # topologically sorted list of nodes.
1014 1016 r = self.rev(n)
1015 1017 if r >= lowestrev:
1016 1018 if n not in ancestors:
1017 1019 # If we are possibly a descendant of one of the roots
1018 1020 # and we haven't already been marked as an ancestor
1019 1021 ancestors.add(n) # Mark as ancestor
1020 1022 # Add non-nullid parents to list of nodes to tag.
1021 1023 nodestotag.update([p for p in self.parents(n) if
1022 1024 p != nullid])
1023 1025 elif n in heads: # We've seen it before, is it a fake head?
1024 1026 # So it is, real heads should not be the ancestors of
1025 1027 # any other heads.
1026 1028 heads.pop(n)
1027 1029 if not ancestors:
1028 1030 return nonodes
1029 1031 # Now that we have our set of ancestors, we want to remove any
1030 1032 # roots that are not ancestors.
1031 1033
1032 1034 # If one of the roots was nullid, everything is included anyway.
1033 1035 if lowestrev > nullrev:
1034 1036 # But, since we weren't, let's recompute the lowest rev to not
1035 1037 # include roots that aren't ancestors.
1036 1038
1037 1039 # Filter out roots that aren't ancestors of heads
1038 1040 roots = [root for root in roots if root in ancestors]
1039 1041 # Recompute the lowest revision
1040 1042 if roots:
1041 1043 lowestrev = min([self.rev(root) for root in roots])
1042 1044 else:
1043 1045 # No more roots? Return empty list
1044 1046 return nonodes
1045 1047 else:
1046 1048 # We are descending from nullid, and don't need to care about
1047 1049 # any other roots.
1048 1050 lowestrev = nullrev
1049 1051 roots = [nullid]
1050 1052 # Transform our roots list into a set.
1051 1053 descendants = set(roots)
1052 1054 # Also, keep the original roots so we can filter out roots that aren't
1053 1055 # 'real' roots (i.e. are descended from other roots).
1054 1056 roots = descendants.copy()
1055 1057 # Our topologically sorted list of output nodes.
1056 1058 orderedout = []
1057 1059 # Don't start at nullid since we don't want nullid in our output list,
1058 1060 # and if nullid shows up in descendants, empty parents will look like
1059 1061 # they're descendants.
1060 1062 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1061 1063 n = self.node(r)
1062 1064 isdescendant = False
1063 1065 if lowestrev == nullrev: # Everybody is a descendant of nullid
1064 1066 isdescendant = True
1065 1067 elif n in descendants:
1066 1068 # n is already a descendant
1067 1069 isdescendant = True
1068 1070 # This check only needs to be done here because all the roots
1069 1071 # will start being marked is descendants before the loop.
1070 1072 if n in roots:
1071 1073 # If n was a root, check if it's a 'real' root.
1072 1074 p = tuple(self.parents(n))
1073 1075 # If any of its parents are descendants, it's not a root.
1074 1076 if (p[0] in descendants) or (p[1] in descendants):
1075 1077 roots.remove(n)
1076 1078 else:
1077 1079 p = tuple(self.parents(n))
1078 1080 # A node is a descendant if either of its parents are
1079 1081 # descendants. (We seeded the dependents list with the roots
1080 1082 # up there, remember?)
1081 1083 if (p[0] in descendants) or (p[1] in descendants):
1082 1084 descendants.add(n)
1083 1085 isdescendant = True
1084 1086 if isdescendant and ((ancestors is None) or (n in ancestors)):
1085 1087 # Only include nodes that are both descendants and ancestors.
1086 1088 orderedout.append(n)
1087 1089 if (ancestors is not None) and (n in heads):
1088 1090 # We're trying to figure out which heads are reachable
1089 1091 # from roots.
1090 1092 # Mark this head as having been reached
1091 1093 heads[n] = True
1092 1094 elif ancestors is None:
1093 1095 # Otherwise, we're trying to discover the heads.
1094 1096 # Assume this is a head because if it isn't, the next step
1095 1097 # will eventually remove it.
1096 1098 heads[n] = True
1097 1099 # But, obviously its parents aren't.
1098 1100 for p in self.parents(n):
1099 1101 heads.pop(p, None)
1100 1102 heads = [head for head, flag in heads.iteritems() if flag]
1101 1103 roots = list(roots)
1102 1104 assert orderedout
1103 1105 assert roots
1104 1106 assert heads
1105 1107 return (orderedout, roots, heads)
1106 1108
1107 1109 def headrevs(self, revs=None):
1108 1110 if revs is None:
1109 1111 try:
1110 1112 return self.index.headrevs()
1111 1113 except AttributeError:
1112 1114 return self._headrevs()
1113 1115 return dagop.headrevs(revs, self.parentrevs)
1114 1116
1115 1117 def computephases(self, roots):
1116 1118 return self.index.computephasesmapsets(roots)
1117 1119
1118 1120 def _headrevs(self):
1119 1121 count = len(self)
1120 1122 if not count:
1121 1123 return [nullrev]
1122 1124 # we won't iter over filtered rev so nobody is a head at start
1123 1125 ishead = [0] * (count + 1)
1124 1126 index = self.index
1125 1127 for r in self:
1126 1128 ishead[r] = 1 # I may be an head
1127 1129 e = index[r]
1128 1130 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1129 1131 return [r for r, val in enumerate(ishead) if val]
1130 1132
1131 1133 def heads(self, start=None, stop=None):
1132 1134 """return the list of all nodes that have no children
1133 1135
1134 1136 if start is specified, only heads that are descendants of
1135 1137 start will be returned
1136 1138 if stop is specified, it will consider all the revs from stop
1137 1139 as if they had no children
1138 1140 """
1139 1141 if start is None and stop is None:
1140 1142 if not len(self):
1141 1143 return [nullid]
1142 1144 return [self.node(r) for r in self.headrevs()]
1143 1145
1144 1146 if start is None:
1145 1147 start = nullrev
1146 1148 else:
1147 1149 start = self.rev(start)
1148 1150
1149 1151 stoprevs = set(self.rev(n) for n in stop or [])
1150 1152
1151 1153 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1152 1154 stoprevs=stoprevs)
1153 1155
1154 1156 return [self.node(rev) for rev in revs]
1155 1157
1156 1158 def children(self, node):
1157 1159 """find the children of a given node"""
1158 1160 c = []
1159 1161 p = self.rev(node)
1160 1162 for r in self.revs(start=p + 1):
1161 1163 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1162 1164 if prevs:
1163 1165 for pr in prevs:
1164 1166 if pr == p:
1165 1167 c.append(self.node(r))
1166 1168 elif p == nullrev:
1167 1169 c.append(self.node(r))
1168 1170 return c
1169 1171
1170 1172 def commonancestorsheads(self, a, b):
1171 1173 """calculate all the heads of the common ancestors of nodes a and b"""
1172 1174 a, b = self.rev(a), self.rev(b)
1173 1175 ancs = self._commonancestorsheads(a, b)
1174 1176 return pycompat.maplist(self.node, ancs)
1175 1177
1176 1178 def _commonancestorsheads(self, *revs):
1177 1179 """calculate all the heads of the common ancestors of revs"""
1178 1180 try:
1179 1181 ancs = self.index.commonancestorsheads(*revs)
1180 1182 except (AttributeError, OverflowError): # C implementation failed
1181 1183 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1182 1184 return ancs
1183 1185
1184 1186 def isancestor(self, a, b):
1185 1187 """return True if node a is an ancestor of node b
1186 1188
1187 1189 A revision is considered an ancestor of itself."""
1188 1190 a, b = self.rev(a), self.rev(b)
1189 1191 return self.isancestorrev(a, b)
1190 1192
1191 1193 def isancestorrev(self, a, b):
1192 1194 """return True if revision a is an ancestor of revision b
1193 1195
1194 1196 A revision is considered an ancestor of itself.
1195 1197
1196 1198 The implementation of this is trivial but the use of
1197 1199 commonancestorsheads is not."""
1198 1200 if a == nullrev:
1199 1201 return True
1200 1202 elif a == b:
1201 1203 return True
1202 1204 elif a > b:
1203 1205 return False
1204 1206 return a in self._commonancestorsheads(a, b)
1205 1207
1206 1208 def ancestor(self, a, b):
1207 1209 """calculate the "best" common ancestor of nodes a and b"""
1208 1210
1209 1211 a, b = self.rev(a), self.rev(b)
1210 1212 try:
1211 1213 ancs = self.index.ancestors(a, b)
1212 1214 except (AttributeError, OverflowError):
1213 1215 ancs = ancestor.ancestors(self.parentrevs, a, b)
1214 1216 if ancs:
1215 1217 # choose a consistent winner when there's a tie
1216 1218 return min(map(self.node, ancs))
1217 1219 return nullid
1218 1220
1219 1221 def _match(self, id):
1220 1222 if isinstance(id, int):
1221 1223 # rev
1222 1224 return self.node(id)
1223 1225 if len(id) == 20:
1224 1226 # possibly a binary node
1225 1227 # odds of a binary node being all hex in ASCII are 1 in 10**25
1226 1228 try:
1227 1229 node = id
1228 1230 self.rev(node) # quick search the index
1229 1231 return node
1230 1232 except error.LookupError:
1231 1233 pass # may be partial hex id
1232 1234 try:
1233 1235 # str(rev)
1234 1236 rev = int(id)
1235 1237 if "%d" % rev != id:
1236 1238 raise ValueError
1237 1239 if rev < 0:
1238 1240 rev = len(self) + rev
1239 1241 if rev < 0 or rev >= len(self):
1240 1242 raise ValueError
1241 1243 return self.node(rev)
1242 1244 except (ValueError, OverflowError):
1243 1245 pass
1244 1246 if len(id) == 40:
1245 1247 try:
1246 1248 # a full hex nodeid?
1247 1249 node = bin(id)
1248 1250 self.rev(node)
1249 1251 return node
1250 1252 except (TypeError, error.LookupError):
1251 1253 pass
1252 1254
1253 1255 def _partialmatch(self, id):
1254 1256 # we don't care wdirfilenodeids as they should be always full hash
1255 1257 maybewdir = wdirhex.startswith(id)
1256 1258 try:
1257 1259 partial = self.index.partialmatch(id)
1258 1260 if partial and self.hasnode(partial):
1259 1261 if maybewdir:
1260 1262 # single 'ff...' match in radix tree, ambiguous with wdir
1261 1263 raise error.RevlogError
1262 1264 return partial
1263 1265 if maybewdir:
1264 1266 # no 'ff...' match in radix tree, wdir identified
1265 1267 raise error.WdirUnsupported
1266 1268 return None
1267 1269 except error.RevlogError:
1268 1270 # parsers.c radix tree lookup gave multiple matches
1269 1271 # fast path: for unfiltered changelog, radix tree is accurate
1270 1272 if not getattr(self, 'filteredrevs', None):
1271 1273 raise error.AmbiguousPrefixLookupError(
1272 1274 id, self.indexfile, _('ambiguous identifier'))
1273 1275 # fall through to slow path that filters hidden revisions
1274 1276 except (AttributeError, ValueError):
1275 1277 # we are pure python, or key was too short to search radix tree
1276 1278 pass
1277 1279
1278 1280 if id in self._pcache:
1279 1281 return self._pcache[id]
1280 1282
1281 1283 if len(id) <= 40:
1282 1284 try:
1283 1285 # hex(node)[:...]
1284 1286 l = len(id) // 2 # grab an even number of digits
1285 1287 prefix = bin(id[:l * 2])
1286 1288 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1287 1289 nl = [n for n in nl if hex(n).startswith(id) and
1288 1290 self.hasnode(n)]
1289 1291 if nullhex.startswith(id):
1290 1292 nl.append(nullid)
1291 1293 if len(nl) > 0:
1292 1294 if len(nl) == 1 and not maybewdir:
1293 1295 self._pcache[id] = nl[0]
1294 1296 return nl[0]
1295 1297 raise error.AmbiguousPrefixLookupError(
1296 1298 id, self.indexfile, _('ambiguous identifier'))
1297 1299 if maybewdir:
1298 1300 raise error.WdirUnsupported
1299 1301 return None
1300 1302 except TypeError:
1301 1303 pass
1302 1304
1303 1305 def lookup(self, id):
1304 1306 """locate a node based on:
1305 1307 - revision number or str(revision number)
1306 1308 - nodeid or subset of hex nodeid
1307 1309 """
1308 1310 n = self._match(id)
1309 1311 if n is not None:
1310 1312 return n
1311 1313 n = self._partialmatch(id)
1312 1314 if n:
1313 1315 return n
1314 1316
1315 1317 raise error.LookupError(id, self.indexfile, _('no match found'))
1316 1318
1317 1319 def shortest(self, node, minlength=1):
1318 1320 """Find the shortest unambiguous prefix that matches node."""
1319 1321 def isvalid(prefix):
1320 1322 try:
1321 1323 node = self._partialmatch(prefix)
1322 1324 except error.AmbiguousPrefixLookupError:
1323 1325 return False
1324 1326 except error.WdirUnsupported:
1325 1327 # single 'ff...' match
1326 1328 return True
1327 1329 if node is None:
1328 1330 raise error.LookupError(node, self.indexfile, _('no node'))
1329 1331 return True
1330 1332
1331 1333 def maybewdir(prefix):
1332 1334 return all(c == 'f' for c in prefix)
1333 1335
1334 1336 hexnode = hex(node)
1335 1337
1336 1338 def disambiguate(hexnode, minlength):
1337 1339 """Disambiguate against wdirid."""
1338 1340 for length in range(minlength, 41):
1339 1341 prefix = hexnode[:length]
1340 1342 if not maybewdir(prefix):
1341 1343 return prefix
1342 1344
1343 1345 if not getattr(self, 'filteredrevs', None):
1344 1346 try:
1345 1347 length = max(self.index.shortest(node), minlength)
1346 1348 return disambiguate(hexnode, length)
1347 1349 except error.RevlogError:
1348 1350 if node != wdirid:
1349 1351 raise error.LookupError(node, self.indexfile, _('no node'))
1350 1352 except AttributeError:
1351 1353 # Fall through to pure code
1352 1354 pass
1353 1355
1354 1356 if node == wdirid:
1355 1357 for length in range(minlength, 41):
1356 1358 prefix = hexnode[:length]
1357 1359 if isvalid(prefix):
1358 1360 return prefix
1359 1361
1360 1362 for length in range(minlength, 41):
1361 1363 prefix = hexnode[:length]
1362 1364 if isvalid(prefix):
1363 1365 return disambiguate(hexnode, length)
1364 1366
1365 1367 def cmp(self, node, text):
1366 1368 """compare text with a given file revision
1367 1369
1368 1370 returns True if text is different than what is stored.
1369 1371 """
1370 1372 p1, p2 = self.parents(node)
1371 1373 return storageutil.hashrevisionsha1(text, p1, p2) != node
1372 1374
1373 1375 def _cachesegment(self, offset, data):
1374 1376 """Add a segment to the revlog cache.
1375 1377
1376 1378 Accepts an absolute offset and the data that is at that location.
1377 1379 """
1378 1380 o, d = self._chunkcache
1379 1381 # try to add to existing cache
1380 1382 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1381 1383 self._chunkcache = o, d + data
1382 1384 else:
1383 1385 self._chunkcache = offset, data
1384 1386
1385 1387 def _readsegment(self, offset, length, df=None):
1386 1388 """Load a segment of raw data from the revlog.
1387 1389
1388 1390 Accepts an absolute offset, length to read, and an optional existing
1389 1391 file handle to read from.
1390 1392
1391 1393 If an existing file handle is passed, it will be seeked and the
1392 1394 original seek position will NOT be restored.
1393 1395
1394 1396 Returns a str or buffer of raw byte data.
1395 1397
1396 1398 Raises if the requested number of bytes could not be read.
1397 1399 """
1398 1400 # Cache data both forward and backward around the requested
1399 1401 # data, in a fixed size window. This helps speed up operations
1400 1402 # involving reading the revlog backwards.
1401 1403 cachesize = self._chunkcachesize
1402 1404 realoffset = offset & ~(cachesize - 1)
1403 1405 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1404 1406 - realoffset)
1405 1407 with self._datareadfp(df) as df:
1406 1408 df.seek(realoffset)
1407 1409 d = df.read(reallength)
1408 1410
1409 1411 self._cachesegment(realoffset, d)
1410 1412 if offset != realoffset or reallength != length:
1411 1413 startoffset = offset - realoffset
1412 1414 if len(d) - startoffset < length:
1413 1415 raise error.RevlogError(
1414 1416 _('partial read of revlog %s; expected %d bytes from '
1415 1417 'offset %d, got %d') %
1416 1418 (self.indexfile if self._inline else self.datafile,
1417 1419 length, realoffset, len(d) - startoffset))
1418 1420
1419 1421 return util.buffer(d, startoffset, length)
1420 1422
1421 1423 if len(d) < length:
1422 1424 raise error.RevlogError(
1423 1425 _('partial read of revlog %s; expected %d bytes from offset '
1424 1426 '%d, got %d') %
1425 1427 (self.indexfile if self._inline else self.datafile,
1426 1428 length, offset, len(d)))
1427 1429
1428 1430 return d
1429 1431
1430 1432 def _getsegment(self, offset, length, df=None):
1431 1433 """Obtain a segment of raw data from the revlog.
1432 1434
1433 1435 Accepts an absolute offset, length of bytes to obtain, and an
1434 1436 optional file handle to the already-opened revlog. If the file
1435 1437 handle is used, it's original seek position will not be preserved.
1436 1438
1437 1439 Requests for data may be returned from a cache.
1438 1440
1439 1441 Returns a str or a buffer instance of raw byte data.
1440 1442 """
1441 1443 o, d = self._chunkcache
1442 1444 l = len(d)
1443 1445
1444 1446 # is it in the cache?
1445 1447 cachestart = offset - o
1446 1448 cacheend = cachestart + length
1447 1449 if cachestart >= 0 and cacheend <= l:
1448 1450 if cachestart == 0 and cacheend == l:
1449 1451 return d # avoid a copy
1450 1452 return util.buffer(d, cachestart, cacheend - cachestart)
1451 1453
1452 1454 return self._readsegment(offset, length, df=df)
1453 1455
1454 1456 def _getsegmentforrevs(self, startrev, endrev, df=None):
1455 1457 """Obtain a segment of raw data corresponding to a range of revisions.
1456 1458
1457 1459 Accepts the start and end revisions and an optional already-open
1458 1460 file handle to be used for reading. If the file handle is read, its
1459 1461 seek position will not be preserved.
1460 1462
1461 1463 Requests for data may be satisfied by a cache.
1462 1464
1463 1465 Returns a 2-tuple of (offset, data) for the requested range of
1464 1466 revisions. Offset is the integer offset from the beginning of the
1465 1467 revlog and data is a str or buffer of the raw byte data.
1466 1468
1467 1469 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1468 1470 to determine where each revision's data begins and ends.
1469 1471 """
1470 1472 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1471 1473 # (functions are expensive).
1472 1474 index = self.index
1473 1475 istart = index[startrev]
1474 1476 start = int(istart[0] >> 16)
1475 1477 if startrev == endrev:
1476 1478 end = start + istart[1]
1477 1479 else:
1478 1480 iend = index[endrev]
1479 1481 end = int(iend[0] >> 16) + iend[1]
1480 1482
1481 1483 if self._inline:
1482 1484 start += (startrev + 1) * self._io.size
1483 1485 end += (endrev + 1) * self._io.size
1484 1486 length = end - start
1485 1487
1486 1488 return start, self._getsegment(start, length, df=df)
1487 1489
1488 1490 def _chunk(self, rev, df=None):
1489 1491 """Obtain a single decompressed chunk for a revision.
1490 1492
1491 1493 Accepts an integer revision and an optional already-open file handle
1492 1494 to be used for reading. If used, the seek position of the file will not
1493 1495 be preserved.
1494 1496
1495 1497 Returns a str holding uncompressed data for the requested revision.
1496 1498 """
1497 1499 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1498 1500
1499 1501 def _chunks(self, revs, df=None, targetsize=None):
1500 1502 """Obtain decompressed chunks for the specified revisions.
1501 1503
1502 1504 Accepts an iterable of numeric revisions that are assumed to be in
1503 1505 ascending order. Also accepts an optional already-open file handle
1504 1506 to be used for reading. If used, the seek position of the file will
1505 1507 not be preserved.
1506 1508
1507 1509 This function is similar to calling ``self._chunk()`` multiple times,
1508 1510 but is faster.
1509 1511
1510 1512 Returns a list with decompressed data for each requested revision.
1511 1513 """
1512 1514 if not revs:
1513 1515 return []
1514 1516 start = self.start
1515 1517 length = self.length
1516 1518 inline = self._inline
1517 1519 iosize = self._io.size
1518 1520 buffer = util.buffer
1519 1521
1520 1522 l = []
1521 1523 ladd = l.append
1522 1524
1523 1525 if not self._withsparseread:
1524 1526 slicedchunks = (revs,)
1525 1527 else:
1526 1528 slicedchunks = deltautil.slicechunk(self, revs,
1527 1529 targetsize=targetsize)
1528 1530
1529 1531 for revschunk in slicedchunks:
1530 1532 firstrev = revschunk[0]
1531 1533 # Skip trailing revisions with empty diff
1532 1534 for lastrev in revschunk[::-1]:
1533 1535 if length(lastrev) != 0:
1534 1536 break
1535 1537
1536 1538 try:
1537 1539 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1538 1540 except OverflowError:
1539 1541 # issue4215 - we can't cache a run of chunks greater than
1540 1542 # 2G on Windows
1541 1543 return [self._chunk(rev, df=df) for rev in revschunk]
1542 1544
1543 1545 decomp = self.decompress
1544 1546 for rev in revschunk:
1545 1547 chunkstart = start(rev)
1546 1548 if inline:
1547 1549 chunkstart += (rev + 1) * iosize
1548 1550 chunklength = length(rev)
1549 1551 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1550 1552
1551 1553 return l
1552 1554
1553 1555 def _chunkclear(self):
1554 1556 """Clear the raw chunk cache."""
1555 1557 self._chunkcache = (0, '')
1556 1558
1557 1559 def deltaparent(self, rev):
1558 1560 """return deltaparent of the given revision"""
1559 1561 base = self.index[rev][3]
1560 1562 if base == rev:
1561 1563 return nullrev
1562 1564 elif self._generaldelta:
1563 1565 return base
1564 1566 else:
1565 1567 return rev - 1
1566 1568
1567 1569 def issnapshot(self, rev):
1568 1570 """tells whether rev is a snapshot
1569 1571 """
1570 1572 if not self._sparserevlog:
1571 1573 return self.deltaparent(rev) == nullrev
1572 1574 elif util.safehasattr(self.index, 'issnapshot'):
1573 1575 # directly assign the method to cache the testing and access
1574 1576 self.issnapshot = self.index.issnapshot
1575 1577 return self.issnapshot(rev)
1576 1578 if rev == nullrev:
1577 1579 return True
1578 1580 entry = self.index[rev]
1579 1581 base = entry[3]
1580 1582 if base == rev:
1581 1583 return True
1582 1584 if base == nullrev:
1583 1585 return True
1584 1586 p1 = entry[5]
1585 1587 p2 = entry[6]
1586 1588 if base == p1 or base == p2:
1587 1589 return False
1588 1590 return self.issnapshot(base)
1589 1591
1590 1592 def snapshotdepth(self, rev):
1591 1593 """number of snapshot in the chain before this one"""
1592 1594 if not self.issnapshot(rev):
1593 1595 raise error.ProgrammingError('revision %d not a snapshot')
1594 1596 return len(self._deltachain(rev)[0]) - 1
1595 1597
1596 1598 def revdiff(self, rev1, rev2):
1597 1599 """return or calculate a delta between two revisions
1598 1600
1599 1601 The delta calculated is in binary form and is intended to be written to
1600 1602 revlog data directly. So this function needs raw revision data.
1601 1603 """
1602 1604 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1603 1605 return bytes(self._chunk(rev2))
1604 1606
1605 1607 return mdiff.textdiff(self.revision(rev1, raw=True),
1606 1608 self.revision(rev2, raw=True))
1607 1609
1608 1610 def revision(self, nodeorrev, _df=None, raw=False):
1609 1611 """return an uncompressed revision of a given node or revision
1610 1612 number.
1611 1613
1612 1614 _df - an existing file handle to read from. (internal-only)
1613 1615 raw - an optional argument specifying if the revision data is to be
1614 1616 treated as raw data when applying flag transforms. 'raw' should be set
1615 1617 to True when generating changegroups or in debug commands.
1616 1618 """
1617 1619 if isinstance(nodeorrev, int):
1618 1620 rev = nodeorrev
1619 1621 node = self.node(rev)
1620 1622 else:
1621 1623 node = nodeorrev
1622 1624 rev = None
1623 1625
1624 1626 cachedrev = None
1625 1627 flags = None
1626 1628 rawtext = None
1627 1629 if node == nullid:
1628 1630 return ""
1629 1631 if self._revisioncache:
1630 1632 if self._revisioncache[0] == node:
1631 1633 # _cache only stores rawtext
1632 1634 if raw:
1633 1635 return self._revisioncache[2]
1634 1636 # duplicated, but good for perf
1635 1637 if rev is None:
1636 1638 rev = self.rev(node)
1637 1639 if flags is None:
1638 1640 flags = self.flags(rev)
1639 1641 # no extra flags set, no flag processor runs, text = rawtext
1640 1642 if flags == REVIDX_DEFAULT_FLAGS:
1641 1643 return self._revisioncache[2]
1642 1644 # rawtext is reusable. need to run flag processor
1643 1645 rawtext = self._revisioncache[2]
1644 1646
1645 1647 cachedrev = self._revisioncache[1]
1646 1648
1647 1649 # look up what we need to read
1648 1650 if rawtext is None:
1649 1651 if rev is None:
1650 1652 rev = self.rev(node)
1651 1653
1652 1654 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1653 1655 if stopped:
1654 1656 rawtext = self._revisioncache[2]
1655 1657
1656 1658 # drop cache to save memory
1657 1659 self._revisioncache = None
1658 1660
1659 1661 targetsize = None
1660 1662 rawsize = self.index[rev][2]
1661 1663 if 0 <= rawsize:
1662 1664 targetsize = 4 * rawsize
1663 1665
1664 1666 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1665 1667 if rawtext is None:
1666 1668 rawtext = bytes(bins[0])
1667 1669 bins = bins[1:]
1668 1670
1669 1671 rawtext = mdiff.patches(rawtext, bins)
1670 1672 self._revisioncache = (node, rev, rawtext)
1671 1673
1672 1674 if flags is None:
1673 1675 if rev is None:
1674 1676 rev = self.rev(node)
1675 1677 flags = self.flags(rev)
1676 1678
1677 1679 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1678 1680 if validatehash:
1679 1681 self.checkhash(text, node, rev=rev)
1680 1682
1681 1683 return text
1682 1684
1683 1685 def hash(self, text, p1, p2):
1684 1686 """Compute a node hash.
1685 1687
1686 1688 Available as a function so that subclasses can replace the hash
1687 1689 as needed.
1688 1690 """
1689 1691 return storageutil.hashrevisionsha1(text, p1, p2)
1690 1692
1691 1693 def _processflags(self, text, flags, operation, raw=False):
1692 1694 """Inspect revision data flags and applies transforms defined by
1693 1695 registered flag processors.
1694 1696
1695 1697 ``text`` - the revision data to process
1696 1698 ``flags`` - the revision flags
1697 1699 ``operation`` - the operation being performed (read or write)
1698 1700 ``raw`` - an optional argument describing if the raw transform should be
1699 1701 applied.
1700 1702
1701 1703 This method processes the flags in the order (or reverse order if
1702 1704 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1703 1705 flag processors registered for present flags. The order of flags defined
1704 1706 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1705 1707
1706 1708 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1707 1709 processed text and ``validatehash`` is a bool indicating whether the
1708 1710 returned text should be checked for hash integrity.
1709 1711
1710 1712 Note: If the ``raw`` argument is set, it has precedence over the
1711 1713 operation and will only update the value of ``validatehash``.
1712 1714 """
1713 1715 # fast path: no flag processors will run
1714 1716 if flags == 0:
1715 1717 return text, True
1716 1718 if not operation in ('read', 'write'):
1717 1719 raise error.ProgrammingError(_("invalid '%s' operation") %
1718 1720 operation)
1719 1721 # Check all flags are known.
1720 1722 if flags & ~REVIDX_KNOWN_FLAGS:
1721 1723 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1722 1724 (flags & ~REVIDX_KNOWN_FLAGS))
1723 1725 validatehash = True
1724 1726 # Depending on the operation (read or write), the order might be
1725 1727 # reversed due to non-commutative transforms.
1726 1728 orderedflags = REVIDX_FLAGS_ORDER
1727 1729 if operation == 'write':
1728 1730 orderedflags = reversed(orderedflags)
1729 1731
1730 1732 for flag in orderedflags:
1731 1733 # If a flagprocessor has been registered for a known flag, apply the
1732 1734 # related operation transform and update result tuple.
1733 1735 if flag & flags:
1734 1736 vhash = True
1735 1737
1736 1738 if flag not in self._flagprocessors:
1737 1739 message = _("missing processor for flag '%#x'") % (flag)
1738 1740 raise error.RevlogError(message)
1739 1741
1740 1742 processor = self._flagprocessors[flag]
1741 1743 if processor is not None:
1742 1744 readtransform, writetransform, rawtransform = processor
1743 1745
1744 1746 if raw:
1745 1747 vhash = rawtransform(self, text)
1746 1748 elif operation == 'read':
1747 1749 text, vhash = readtransform(self, text)
1748 1750 else: # write operation
1749 1751 text, vhash = writetransform(self, text)
1750 1752 validatehash = validatehash and vhash
1751 1753
1752 1754 return text, validatehash
1753 1755
1754 1756 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1755 1757 """Check node hash integrity.
1756 1758
1757 1759 Available as a function so that subclasses can extend hash mismatch
1758 1760 behaviors as needed.
1759 1761 """
1760 1762 try:
1761 1763 if p1 is None and p2 is None:
1762 1764 p1, p2 = self.parents(node)
1763 1765 if node != self.hash(text, p1, p2):
1764 1766 # Clear the revision cache on hash failure. The revision cache
1765 1767 # only stores the raw revision and clearing the cache does have
1766 1768 # the side-effect that we won't have a cache hit when the raw
1767 1769 # revision data is accessed. But this case should be rare and
1768 1770 # it is extra work to teach the cache about the hash
1769 1771 # verification state.
1770 1772 if self._revisioncache and self._revisioncache[0] == node:
1771 1773 self._revisioncache = None
1772 1774
1773 1775 revornode = rev
1774 1776 if revornode is None:
1775 1777 revornode = templatefilters.short(hex(node))
1776 1778 raise error.RevlogError(_("integrity check failed on %s:%s")
1777 1779 % (self.indexfile, pycompat.bytestr(revornode)))
1778 1780 except error.RevlogError:
1779 1781 if self._censorable and storageutil.iscensoredtext(text):
1780 1782 raise error.CensoredNodeError(self.indexfile, node, text)
1781 1783 raise
1782 1784
1783 1785 def _enforceinlinesize(self, tr, fp=None):
1784 1786 """Check if the revlog is too big for inline and convert if so.
1785 1787
1786 1788 This should be called after revisions are added to the revlog. If the
1787 1789 revlog has grown too large to be an inline revlog, it will convert it
1788 1790 to use multiple index and data files.
1789 1791 """
1790 1792 tiprev = len(self) - 1
1791 1793 if (not self._inline or
1792 1794 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1793 1795 return
1794 1796
1795 1797 trinfo = tr.find(self.indexfile)
1796 1798 if trinfo is None:
1797 1799 raise error.RevlogError(_("%s not found in the transaction")
1798 1800 % self.indexfile)
1799 1801
1800 1802 trindex = trinfo[2]
1801 1803 if trindex is not None:
1802 1804 dataoff = self.start(trindex)
1803 1805 else:
1804 1806 # revlog was stripped at start of transaction, use all leftover data
1805 1807 trindex = len(self) - 1
1806 1808 dataoff = self.end(tiprev)
1807 1809
1808 1810 tr.add(self.datafile, dataoff)
1809 1811
1810 1812 if fp:
1811 1813 fp.flush()
1812 1814 fp.close()
1813 1815 # We can't use the cached file handle after close(). So prevent
1814 1816 # its usage.
1815 1817 self._writinghandles = None
1816 1818
1817 1819 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1818 1820 for r in self:
1819 1821 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1820 1822
1821 1823 with self._indexfp('w') as fp:
1822 1824 self.version &= ~FLAG_INLINE_DATA
1823 1825 self._inline = False
1824 1826 io = self._io
1825 1827 for i in self:
1826 1828 e = io.packentry(self.index[i], self.node, self.version, i)
1827 1829 fp.write(e)
1828 1830
1829 1831 # the temp file replace the real index when we exit the context
1830 1832 # manager
1831 1833
1832 1834 tr.replace(self.indexfile, trindex * self._io.size)
1833 1835 self._chunkclear()
1834 1836
1835 1837 def _nodeduplicatecallback(self, transaction, node):
1836 1838 """called when trying to add a node already stored.
1837 1839 """
1838 1840
1839 1841 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1840 1842 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1841 1843 """add a revision to the log
1842 1844
1843 1845 text - the revision data to add
1844 1846 transaction - the transaction object used for rollback
1845 1847 link - the linkrev data to add
1846 1848 p1, p2 - the parent nodeids of the revision
1847 1849 cachedelta - an optional precomputed delta
1848 1850 node - nodeid of revision; typically node is not specified, and it is
1849 1851 computed by default as hash(text, p1, p2), however subclasses might
1850 1852 use different hashing method (and override checkhash() in such case)
1851 1853 flags - the known flags to set on the revision
1852 1854 deltacomputer - an optional deltacomputer instance shared between
1853 1855 multiple calls
1854 1856 """
1855 1857 if link == nullrev:
1856 1858 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1857 1859 % self.indexfile)
1858 1860
1859 1861 if flags:
1860 1862 node = node or self.hash(text, p1, p2)
1861 1863
1862 1864 rawtext, validatehash = self._processflags(text, flags, 'write')
1863 1865
1864 1866 # If the flag processor modifies the revision data, ignore any provided
1865 1867 # cachedelta.
1866 1868 if rawtext != text:
1867 1869 cachedelta = None
1868 1870
1869 1871 if len(rawtext) > _maxentrysize:
1870 1872 raise error.RevlogError(
1871 1873 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1872 1874 % (self.indexfile, len(rawtext)))
1873 1875
1874 1876 node = node or self.hash(rawtext, p1, p2)
1875 1877 if node in self.nodemap:
1876 1878 return node
1877 1879
1878 1880 if validatehash:
1879 1881 self.checkhash(rawtext, node, p1=p1, p2=p2)
1880 1882
1881 1883 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1882 1884 flags, cachedelta=cachedelta,
1883 1885 deltacomputer=deltacomputer)
1884 1886
1885 1887 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1886 1888 cachedelta=None, deltacomputer=None):
1887 1889 """add a raw revision with known flags, node and parents
1888 1890 useful when reusing a revision not stored in this revlog (ex: received
1889 1891 over wire, or read from an external bundle).
1890 1892 """
1891 1893 dfh = None
1892 1894 if not self._inline:
1893 1895 dfh = self._datafp("a+")
1894 1896 ifh = self._indexfp("a+")
1895 1897 try:
1896 1898 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1897 1899 flags, cachedelta, ifh, dfh,
1898 1900 deltacomputer=deltacomputer)
1899 1901 finally:
1900 1902 if dfh:
1901 1903 dfh.close()
1902 1904 ifh.close()
1903 1905
1904 1906 def compress(self, data):
1905 1907 """Generate a possibly-compressed representation of data."""
1906 1908 if not data:
1907 1909 return '', data
1908 1910
1909 1911 compressed = self._compressor.compress(data)
1910 1912
1911 1913 if compressed:
1912 1914 # The revlog compressor added the header in the returned data.
1913 1915 return '', compressed
1914 1916
1915 1917 if data[0:1] == '\0':
1916 1918 return '', data
1917 1919 return 'u', data
1918 1920
1919 1921 def decompress(self, data):
1920 1922 """Decompress a revlog chunk.
1921 1923
1922 1924 The chunk is expected to begin with a header identifying the
1923 1925 format type so it can be routed to an appropriate decompressor.
1924 1926 """
1925 1927 if not data:
1926 1928 return data
1927 1929
1928 1930 # Revlogs are read much more frequently than they are written and many
1929 1931 # chunks only take microseconds to decompress, so performance is
1930 1932 # important here.
1931 1933 #
1932 1934 # We can make a few assumptions about revlogs:
1933 1935 #
1934 1936 # 1) the majority of chunks will be compressed (as opposed to inline
1935 1937 # raw data).
1936 1938 # 2) decompressing *any* data will likely by at least 10x slower than
1937 1939 # returning raw inline data.
1938 1940 # 3) we want to prioritize common and officially supported compression
1939 1941 # engines
1940 1942 #
1941 1943 # It follows that we want to optimize for "decompress compressed data
1942 1944 # when encoded with common and officially supported compression engines"
1943 1945 # case over "raw data" and "data encoded by less common or non-official
1944 1946 # compression engines." That is why we have the inline lookup first
1945 1947 # followed by the compengines lookup.
1946 1948 #
1947 1949 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1948 1950 # compressed chunks. And this matters for changelog and manifest reads.
1949 1951 t = data[0:1]
1950 1952
1951 1953 if t == 'x':
1952 1954 try:
1953 1955 return _zlibdecompress(data)
1954 1956 except zlib.error as e:
1955 1957 raise error.RevlogError(_('revlog decompress error: %s') %
1956 1958 stringutil.forcebytestr(e))
1957 1959 # '\0' is more common than 'u' so it goes first.
1958 1960 elif t == '\0':
1959 1961 return data
1960 1962 elif t == 'u':
1961 1963 return util.buffer(data, 1)
1962 1964
1963 1965 try:
1964 1966 compressor = self._decompressors[t]
1965 1967 except KeyError:
1966 1968 try:
1967 1969 engine = util.compengines.forrevlogheader(t)
1968 1970 compressor = engine.revlogcompressor()
1969 1971 self._decompressors[t] = compressor
1970 1972 except KeyError:
1971 1973 raise error.RevlogError(_('unknown compression type %r') % t)
1972 1974
1973 1975 return compressor.decompress(data)
1974 1976
1975 1977 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1976 1978 cachedelta, ifh, dfh, alwayscache=False,
1977 1979 deltacomputer=None):
1978 1980 """internal function to add revisions to the log
1979 1981
1980 1982 see addrevision for argument descriptions.
1981 1983
1982 1984 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1983 1985
1984 1986 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1985 1987 be used.
1986 1988
1987 1989 invariants:
1988 1990 - rawtext is optional (can be None); if not set, cachedelta must be set.
1989 1991 if both are set, they must correspond to each other.
1990 1992 """
1991 1993 if node == nullid:
1992 1994 raise error.RevlogError(_("%s: attempt to add null revision") %
1993 1995 self.indexfile)
1994 1996 if node == wdirid or node in wdirfilenodeids:
1995 1997 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1996 1998 self.indexfile)
1997 1999
1998 2000 if self._inline:
1999 2001 fh = ifh
2000 2002 else:
2001 2003 fh = dfh
2002 2004
2003 2005 btext = [rawtext]
2004 2006
2005 2007 curr = len(self)
2006 2008 prev = curr - 1
2007 2009 offset = self.end(prev)
2008 2010 p1r, p2r = self.rev(p1), self.rev(p2)
2009 2011
2010 2012 # full versions are inserted when the needed deltas
2011 2013 # become comparable to the uncompressed text
2012 2014 if rawtext is None:
2013 2015 # need rawtext size, before changed by flag processors, which is
2014 2016 # the non-raw size. use revlog explicitly to avoid filelog's extra
2015 2017 # logic that might remove metadata size.
2016 2018 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2017 2019 cachedelta[1])
2018 2020 else:
2019 2021 textlen = len(rawtext)
2020 2022
2021 2023 if deltacomputer is None:
2022 2024 deltacomputer = deltautil.deltacomputer(self)
2023 2025
2024 2026 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2025 2027
2026 2028 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2027 2029
2028 2030 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2029 2031 deltainfo.base, link, p1r, p2r, node)
2030 2032 self.index.append(e)
2031 2033 self.nodemap[node] = curr
2032 2034
2033 2035 # Reset the pure node cache start lookup offset to account for new
2034 2036 # revision.
2035 2037 if self._nodepos is not None:
2036 2038 self._nodepos = curr
2037 2039
2038 2040 entry = self._io.packentry(e, self.node, self.version, curr)
2039 2041 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2040 2042 link, offset)
2041 2043
2042 2044 rawtext = btext[0]
2043 2045
2044 2046 if alwayscache and rawtext is None:
2045 2047 rawtext = deltacomputer.buildtext(revinfo, fh)
2046 2048
2047 2049 if type(rawtext) == bytes: # only accept immutable objects
2048 2050 self._revisioncache = (node, curr, rawtext)
2049 2051 self._chainbasecache[curr] = deltainfo.chainbase
2050 2052 return node
2051 2053
2052 2054 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2053 2055 # Files opened in a+ mode have inconsistent behavior on various
2054 2056 # platforms. Windows requires that a file positioning call be made
2055 2057 # when the file handle transitions between reads and writes. See
2056 2058 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2057 2059 # platforms, Python or the platform itself can be buggy. Some versions
2058 2060 # of Solaris have been observed to not append at the end of the file
2059 2061 # if the file was seeked to before the end. See issue4943 for more.
2060 2062 #
2061 2063 # We work around this issue by inserting a seek() before writing.
2062 2064 # Note: This is likely not necessary on Python 3. However, because
2063 2065 # the file handle is reused for reads and may be seeked there, we need
2064 2066 # to be careful before changing this.
2065 2067 ifh.seek(0, os.SEEK_END)
2066 2068 if dfh:
2067 2069 dfh.seek(0, os.SEEK_END)
2068 2070
2069 2071 curr = len(self) - 1
2070 2072 if not self._inline:
2071 2073 transaction.add(self.datafile, offset)
2072 2074 transaction.add(self.indexfile, curr * len(entry))
2073 2075 if data[0]:
2074 2076 dfh.write(data[0])
2075 2077 dfh.write(data[1])
2076 2078 ifh.write(entry)
2077 2079 else:
2078 2080 offset += curr * self._io.size
2079 2081 transaction.add(self.indexfile, offset, curr)
2080 2082 ifh.write(entry)
2081 2083 ifh.write(data[0])
2082 2084 ifh.write(data[1])
2083 2085 self._enforceinlinesize(transaction, ifh)
2084 2086
2085 2087 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2086 2088 """
2087 2089 add a delta group
2088 2090
2089 2091 given a set of deltas, add them to the revision log. the
2090 2092 first delta is against its parent, which should be in our
2091 2093 log, the rest are against the previous delta.
2092 2094
2093 2095 If ``addrevisioncb`` is defined, it will be called with arguments of
2094 2096 this revlog and the node that was added.
2095 2097 """
2096 2098
2097 2099 if self._writinghandles:
2098 2100 raise error.ProgrammingError('cannot nest addgroup() calls')
2099 2101
2100 2102 nodes = []
2101 2103
2102 2104 r = len(self)
2103 2105 end = 0
2104 2106 if r:
2105 2107 end = self.end(r - 1)
2106 2108 ifh = self._indexfp("a+")
2107 2109 isize = r * self._io.size
2108 2110 if self._inline:
2109 2111 transaction.add(self.indexfile, end + isize, r)
2110 2112 dfh = None
2111 2113 else:
2112 2114 transaction.add(self.indexfile, isize, r)
2113 2115 transaction.add(self.datafile, end)
2114 2116 dfh = self._datafp("a+")
2115 2117 def flush():
2116 2118 if dfh:
2117 2119 dfh.flush()
2118 2120 ifh.flush()
2119 2121
2120 2122 self._writinghandles = (ifh, dfh)
2121 2123
2122 2124 try:
2123 2125 deltacomputer = deltautil.deltacomputer(self)
2124 2126 # loop through our set of deltas
2125 2127 for data in deltas:
2126 2128 node, p1, p2, linknode, deltabase, delta, flags = data
2127 2129 link = linkmapper(linknode)
2128 2130 flags = flags or REVIDX_DEFAULT_FLAGS
2129 2131
2130 2132 nodes.append(node)
2131 2133
2132 2134 if node in self.nodemap:
2133 2135 self._nodeduplicatecallback(transaction, node)
2134 2136 # this can happen if two branches make the same change
2135 2137 continue
2136 2138
2137 2139 for p in (p1, p2):
2138 2140 if p not in self.nodemap:
2139 2141 raise error.LookupError(p, self.indexfile,
2140 2142 _('unknown parent'))
2141 2143
2142 2144 if deltabase not in self.nodemap:
2143 2145 raise error.LookupError(deltabase, self.indexfile,
2144 2146 _('unknown delta base'))
2145 2147
2146 2148 baserev = self.rev(deltabase)
2147 2149
2148 2150 if baserev != nullrev and self.iscensored(baserev):
2149 2151 # if base is censored, delta must be full replacement in a
2150 2152 # single patch operation
2151 2153 hlen = struct.calcsize(">lll")
2152 2154 oldlen = self.rawsize(baserev)
2153 2155 newlen = len(delta) - hlen
2154 2156 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2155 2157 raise error.CensoredBaseError(self.indexfile,
2156 2158 self.node(baserev))
2157 2159
2158 2160 if not flags and self._peek_iscensored(baserev, delta, flush):
2159 2161 flags |= REVIDX_ISCENSORED
2160 2162
2161 2163 # We assume consumers of addrevisioncb will want to retrieve
2162 2164 # the added revision, which will require a call to
2163 2165 # revision(). revision() will fast path if there is a cache
2164 2166 # hit. So, we tell _addrevision() to always cache in this case.
2165 2167 # We're only using addgroup() in the context of changegroup
2166 2168 # generation so the revision data can always be handled as raw
2167 2169 # by the flagprocessor.
2168 2170 self._addrevision(node, None, transaction, link,
2169 2171 p1, p2, flags, (baserev, delta),
2170 2172 ifh, dfh,
2171 2173 alwayscache=bool(addrevisioncb),
2172 2174 deltacomputer=deltacomputer)
2173 2175
2174 2176 if addrevisioncb:
2175 2177 addrevisioncb(self, node)
2176 2178
2177 2179 if not dfh and not self._inline:
2178 2180 # addrevision switched from inline to conventional
2179 2181 # reopen the index
2180 2182 ifh.close()
2181 2183 dfh = self._datafp("a+")
2182 2184 ifh = self._indexfp("a+")
2183 2185 self._writinghandles = (ifh, dfh)
2184 2186 finally:
2185 2187 self._writinghandles = None
2186 2188
2187 2189 if dfh:
2188 2190 dfh.close()
2189 2191 ifh.close()
2190 2192
2191 2193 return nodes
2192 2194
2193 2195 def iscensored(self, rev):
2194 2196 """Check if a file revision is censored."""
2195 2197 if not self._censorable:
2196 2198 return False
2197 2199
2198 2200 return self.flags(rev) & REVIDX_ISCENSORED
2199 2201
2200 2202 def _peek_iscensored(self, baserev, delta, flush):
2201 2203 """Quickly check if a delta produces a censored revision."""
2202 2204 if not self._censorable:
2203 2205 return False
2204 2206
2205 2207 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2206 2208
2207 2209 def getstrippoint(self, minlink):
2208 2210 """find the minimum rev that must be stripped to strip the linkrev
2209 2211
2210 2212 Returns a tuple containing the minimum rev and a set of all revs that
2211 2213 have linkrevs that will be broken by this strip.
2212 2214 """
2213 2215 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2214 2216 self.headrevs(),
2215 2217 self.linkrev, self.parentrevs)
2216 2218
2217 2219 def strip(self, minlink, transaction):
2218 2220 """truncate the revlog on the first revision with a linkrev >= minlink
2219 2221
2220 2222 This function is called when we're stripping revision minlink and
2221 2223 its descendants from the repository.
2222 2224
2223 2225 We have to remove all revisions with linkrev >= minlink, because
2224 2226 the equivalent changelog revisions will be renumbered after the
2225 2227 strip.
2226 2228
2227 2229 So we truncate the revlog on the first of these revisions, and
2228 2230 trust that the caller has saved the revisions that shouldn't be
2229 2231 removed and that it'll re-add them after this truncation.
2230 2232 """
2231 2233 if len(self) == 0:
2232 2234 return
2233 2235
2234 2236 rev, _ = self.getstrippoint(minlink)
2235 2237 if rev == len(self):
2236 2238 return
2237 2239
2238 2240 # first truncate the files on disk
2239 2241 end = self.start(rev)
2240 2242 if not self._inline:
2241 2243 transaction.add(self.datafile, end)
2242 2244 end = rev * self._io.size
2243 2245 else:
2244 2246 end += rev * self._io.size
2245 2247
2246 2248 transaction.add(self.indexfile, end)
2247 2249
2248 2250 # then reset internal state in memory to forget those revisions
2249 2251 self._revisioncache = None
2250 2252 self._chaininfocache = {}
2251 2253 self._chunkclear()
2252 2254 for x in pycompat.xrange(rev, len(self)):
2253 2255 del self.nodemap[self.node(x)]
2254 2256
2255 2257 del self.index[rev:-1]
2256 2258 self._nodepos = None
2257 2259
2258 2260 def checksize(self):
2259 2261 expected = 0
2260 2262 if len(self):
2261 2263 expected = max(0, self.end(len(self) - 1))
2262 2264
2263 2265 try:
2264 2266 with self._datafp() as f:
2265 2267 f.seek(0, 2)
2266 2268 actual = f.tell()
2267 2269 dd = actual - expected
2268 2270 except IOError as inst:
2269 2271 if inst.errno != errno.ENOENT:
2270 2272 raise
2271 2273 dd = 0
2272 2274
2273 2275 try:
2274 2276 f = self.opener(self.indexfile)
2275 2277 f.seek(0, 2)
2276 2278 actual = f.tell()
2277 2279 f.close()
2278 2280 s = self._io.size
2279 2281 i = max(0, actual // s)
2280 2282 di = actual - (i * s)
2281 2283 if self._inline:
2282 2284 databytes = 0
2283 2285 for r in self:
2284 2286 databytes += max(0, self.length(r))
2285 2287 dd = 0
2286 2288 di = actual - len(self) * s - databytes
2287 2289 except IOError as inst:
2288 2290 if inst.errno != errno.ENOENT:
2289 2291 raise
2290 2292 di = 0
2291 2293
2292 2294 return (dd, di)
2293 2295
2294 2296 def files(self):
2295 2297 res = [self.indexfile]
2296 2298 if not self._inline:
2297 2299 res.append(self.datafile)
2298 2300 return res
2299 2301
2300 2302 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2301 2303 assumehaveparentrevisions=False,
2302 2304 deltamode=repository.CG_DELTAMODE_STD):
2303 2305 if nodesorder not in ('nodes', 'storage', 'linear', None):
2304 2306 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2305 2307 nodesorder)
2306 2308
2307 2309 if nodesorder is None and not self._generaldelta:
2308 2310 nodesorder = 'storage'
2309 2311
2310 2312 if (not self._storedeltachains and
2311 2313 deltamode != repository.CG_DELTAMODE_PREV):
2312 2314 deltamode = repository.CG_DELTAMODE_FULL
2313 2315
2314 2316 return storageutil.emitrevisions(
2315 2317 self, nodes, nodesorder, revlogrevisiondelta,
2316 2318 deltaparentfn=self.deltaparent,
2317 2319 candeltafn=self.candelta,
2318 2320 rawsizefn=self.rawsize,
2319 2321 revdifffn=self.revdiff,
2320 2322 flagsfn=self.flags,
2321 2323 deltamode=deltamode,
2322 2324 revisiondata=revisiondata,
2323 2325 assumehaveparentrevisions=assumehaveparentrevisions)
2324 2326
2325 2327 DELTAREUSEALWAYS = 'always'
2326 2328 DELTAREUSESAMEREVS = 'samerevs'
2327 2329 DELTAREUSENEVER = 'never'
2328 2330
2329 2331 DELTAREUSEFULLADD = 'fulladd'
2330 2332
2331 2333 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2332 2334
2333 2335 def clone(self, tr, destrevlog, addrevisioncb=None,
2334 2336 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2335 2337 """Copy this revlog to another, possibly with format changes.
2336 2338
2337 2339 The destination revlog will contain the same revisions and nodes.
2338 2340 However, it may not be bit-for-bit identical due to e.g. delta encoding
2339 2341 differences.
2340 2342
2341 2343 The ``deltareuse`` argument control how deltas from the existing revlog
2342 2344 are preserved in the destination revlog. The argument can have the
2343 2345 following values:
2344 2346
2345 2347 DELTAREUSEALWAYS
2346 2348 Deltas will always be reused (if possible), even if the destination
2347 2349 revlog would not select the same revisions for the delta. This is the
2348 2350 fastest mode of operation.
2349 2351 DELTAREUSESAMEREVS
2350 2352 Deltas will be reused if the destination revlog would pick the same
2351 2353 revisions for the delta. This mode strikes a balance between speed
2352 2354 and optimization.
2353 2355 DELTAREUSENEVER
2354 2356 Deltas will never be reused. This is the slowest mode of execution.
2355 2357 This mode can be used to recompute deltas (e.g. if the diff/delta
2356 2358 algorithm changes).
2357 2359
2358 2360 Delta computation can be slow, so the choice of delta reuse policy can
2359 2361 significantly affect run time.
2360 2362
2361 2363 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2362 2364 two extremes. Deltas will be reused if they are appropriate. But if the
2363 2365 delta could choose a better revision, it will do so. This means if you
2364 2366 are converting a non-generaldelta revlog to a generaldelta revlog,
2365 2367 deltas will be recomputed if the delta's parent isn't a parent of the
2366 2368 revision.
2367 2369
2368 2370 In addition to the delta policy, the ``forcedeltabothparents``
2369 2371 argument controls whether to force compute deltas against both parents
2370 2372 for merges. By default, the current default is used.
2371 2373 """
2372 2374 if deltareuse not in self.DELTAREUSEALL:
2373 2375 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2374 2376
2375 2377 if len(destrevlog):
2376 2378 raise ValueError(_('destination revlog is not empty'))
2377 2379
2378 2380 if getattr(self, 'filteredrevs', None):
2379 2381 raise ValueError(_('source revlog has filtered revisions'))
2380 2382 if getattr(destrevlog, 'filteredrevs', None):
2381 2383 raise ValueError(_('destination revlog has filtered revisions'))
2382 2384
2383 2385 # lazydeltabase controls whether to reuse a cached delta, if possible.
2384 2386 oldlazydeltabase = destrevlog._lazydeltabase
2385 2387 oldamd = destrevlog._deltabothparents
2386 2388
2387 2389 try:
2388 2390 if deltareuse == self.DELTAREUSEALWAYS:
2389 2391 destrevlog._lazydeltabase = True
2390 2392 elif deltareuse == self.DELTAREUSESAMEREVS:
2391 2393 destrevlog._lazydeltabase = False
2392 2394
2393 2395 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2394 2396
2395 2397 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2396 2398 self.DELTAREUSESAMEREVS)
2397 2399
2398 2400 deltacomputer = deltautil.deltacomputer(destrevlog)
2399 2401 index = self.index
2400 2402 for rev in self:
2401 2403 entry = index[rev]
2402 2404
2403 2405 # Some classes override linkrev to take filtered revs into
2404 2406 # account. Use raw entry from index.
2405 2407 flags = entry[0] & 0xffff
2406 2408 linkrev = entry[4]
2407 2409 p1 = index[entry[5]][7]
2408 2410 p2 = index[entry[6]][7]
2409 2411 node = entry[7]
2410 2412
2411 2413 # (Possibly) reuse the delta from the revlog if allowed and
2412 2414 # the revlog chunk is a delta.
2413 2415 cachedelta = None
2414 2416 rawtext = None
2415 2417 if populatecachedelta:
2416 2418 dp = self.deltaparent(rev)
2417 2419 if dp != nullrev:
2418 2420 cachedelta = (dp, bytes(self._chunk(rev)))
2419 2421
2420 2422 if not cachedelta:
2421 2423 rawtext = self.revision(rev, raw=True)
2422 2424
2423 2425
2424 2426 if deltareuse == self.DELTAREUSEFULLADD:
2425 2427 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2426 2428 cachedelta=cachedelta,
2427 2429 node=node, flags=flags,
2428 2430 deltacomputer=deltacomputer)
2429 2431 else:
2430 2432 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2431 2433 checkambig=False)
2432 2434 dfh = None
2433 2435 if not destrevlog._inline:
2434 2436 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2435 2437 try:
2436 2438 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2437 2439 p2, flags, cachedelta, ifh, dfh,
2438 2440 deltacomputer=deltacomputer)
2439 2441 finally:
2440 2442 if dfh:
2441 2443 dfh.close()
2442 2444 ifh.close()
2443 2445
2444 2446 if addrevisioncb:
2445 2447 addrevisioncb(self, rev, node)
2446 2448 finally:
2447 2449 destrevlog._lazydeltabase = oldlazydeltabase
2448 2450 destrevlog._deltabothparents = oldamd
2449 2451
2450 2452 def censorrevision(self, tr, censornode, tombstone=b''):
2451 2453 if (self.version & 0xFFFF) == REVLOGV0:
2452 2454 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2453 2455 self.version)
2454 2456
2455 2457 censorrev = self.rev(censornode)
2456 2458 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2457 2459
2458 2460 if len(tombstone) > self.rawsize(censorrev):
2459 2461 raise error.Abort(_('censor tombstone must be no longer than '
2460 2462 'censored data'))
2461 2463
2462 2464 # Rewriting the revlog in place is hard. Our strategy for censoring is
2463 2465 # to create a new revlog, copy all revisions to it, then replace the
2464 2466 # revlogs on transaction close.
2465 2467
2466 2468 newindexfile = self.indexfile + b'.tmpcensored'
2467 2469 newdatafile = self.datafile + b'.tmpcensored'
2468 2470
2469 2471 # This is a bit dangerous. We could easily have a mismatch of state.
2470 2472 newrl = revlog(self.opener, newindexfile, newdatafile,
2471 2473 censorable=True)
2472 2474 newrl.version = self.version
2473 2475 newrl._generaldelta = self._generaldelta
2474 2476 newrl._io = self._io
2475 2477
2476 2478 for rev in self.revs():
2477 2479 node = self.node(rev)
2478 2480 p1, p2 = self.parents(node)
2479 2481
2480 2482 if rev == censorrev:
2481 2483 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2482 2484 p1, p2, censornode, REVIDX_ISCENSORED)
2483 2485
2484 2486 if newrl.deltaparent(rev) != nullrev:
2485 2487 raise error.Abort(_('censored revision stored as delta; '
2486 2488 'cannot censor'),
2487 2489 hint=_('censoring of revlogs is not '
2488 2490 'fully implemented; please report '
2489 2491 'this bug'))
2490 2492 continue
2491 2493
2492 2494 if self.iscensored(rev):
2493 2495 if self.deltaparent(rev) != nullrev:
2494 2496 raise error.Abort(_('cannot censor due to censored '
2495 2497 'revision having delta stored'))
2496 2498 rawtext = self._chunk(rev)
2497 2499 else:
2498 2500 rawtext = self.revision(rev, raw=True)
2499 2501
2500 2502 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2501 2503 self.flags(rev))
2502 2504
2503 2505 tr.addbackup(self.indexfile, location='store')
2504 2506 if not self._inline:
2505 2507 tr.addbackup(self.datafile, location='store')
2506 2508
2507 2509 self.opener.rename(newrl.indexfile, self.indexfile)
2508 2510 if not self._inline:
2509 2511 self.opener.rename(newrl.datafile, self.datafile)
2510 2512
2511 2513 self.clearcaches()
2512 2514 self._loadindex()
2513 2515
2514 2516 def verifyintegrity(self, state):
2515 2517 """Verifies the integrity of the revlog.
2516 2518
2517 2519 Yields ``revlogproblem`` instances describing problems that are
2518 2520 found.
2519 2521 """
2520 2522 dd, di = self.checksize()
2521 2523 if dd:
2522 2524 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2523 2525 if di:
2524 2526 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2525 2527
2526 2528 version = self.version & 0xFFFF
2527 2529
2528 2530 # The verifier tells us what version revlog we should be.
2529 2531 if version != state['expectedversion']:
2530 2532 yield revlogproblem(
2531 2533 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2532 2534 (self.indexfile, version, state['expectedversion']))
2533 2535
2534 2536 state['skipread'] = set()
2535 2537
2536 2538 for rev in self:
2537 2539 node = self.node(rev)
2538 2540
2539 2541 # Verify contents. 4 cases to care about:
2540 2542 #
2541 2543 # common: the most common case
2542 2544 # rename: with a rename
2543 2545 # meta: file content starts with b'\1\n', the metadata
2544 2546 # header defined in filelog.py, but without a rename
2545 2547 # ext: content stored externally
2546 2548 #
2547 2549 # More formally, their differences are shown below:
2548 2550 #
2549 2551 # | common | rename | meta | ext
2550 2552 # -------------------------------------------------------
2551 2553 # flags() | 0 | 0 | 0 | not 0
2552 2554 # renamed() | False | True | False | ?
2553 2555 # rawtext[0:2]=='\1\n'| False | True | True | ?
2554 2556 #
2555 2557 # "rawtext" means the raw text stored in revlog data, which
2556 2558 # could be retrieved by "revision(rev, raw=True)". "text"
2557 2559 # mentioned below is "revision(rev, raw=False)".
2558 2560 #
2559 2561 # There are 3 different lengths stored physically:
2560 2562 # 1. L1: rawsize, stored in revlog index
2561 2563 # 2. L2: len(rawtext), stored in revlog data
2562 2564 # 3. L3: len(text), stored in revlog data if flags==0, or
2563 2565 # possibly somewhere else if flags!=0
2564 2566 #
2565 2567 # L1 should be equal to L2. L3 could be different from them.
2566 2568 # "text" may or may not affect commit hash depending on flag
2567 2569 # processors (see revlog.addflagprocessor).
2568 2570 #
2569 2571 # | common | rename | meta | ext
2570 2572 # -------------------------------------------------
2571 2573 # rawsize() | L1 | L1 | L1 | L1
2572 2574 # size() | L1 | L2-LM | L1(*) | L1 (?)
2573 2575 # len(rawtext) | L2 | L2 | L2 | L2
2574 2576 # len(text) | L2 | L2 | L2 | L3
2575 2577 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2576 2578 #
2577 2579 # LM: length of metadata, depending on rawtext
2578 2580 # (*): not ideal, see comment in filelog.size
2579 2581 # (?): could be "- len(meta)" if the resolved content has
2580 2582 # rename metadata
2581 2583 #
2582 2584 # Checks needed to be done:
2583 2585 # 1. length check: L1 == L2, in all cases.
2584 2586 # 2. hash check: depending on flag processor, we may need to
2585 2587 # use either "text" (external), or "rawtext" (in revlog).
2586 2588
2587 2589 try:
2588 2590 skipflags = state.get('skipflags', 0)
2589 2591 if skipflags:
2590 2592 skipflags &= self.flags(rev)
2591 2593
2592 2594 if skipflags:
2593 2595 state['skipread'].add(node)
2594 2596 else:
2595 2597 # Side-effect: read content and verify hash.
2596 2598 self.revision(node)
2597 2599
2598 2600 l1 = self.rawsize(rev)
2599 2601 l2 = len(self.revision(node, raw=True))
2600 2602
2601 2603 if l1 != l2:
2602 2604 yield revlogproblem(
2603 2605 error=_('unpacked size is %d, %d expected') % (l2, l1),
2604 2606 node=node)
2605 2607
2606 2608 except error.CensoredNodeError:
2607 2609 if state['erroroncensored']:
2608 2610 yield revlogproblem(error=_('censored file data'),
2609 2611 node=node)
2610 2612 state['skipread'].add(node)
2611 2613 except Exception as e:
2612 2614 yield revlogproblem(
2613 2615 error=_('unpacking %s: %s') % (short(node),
2614 2616 stringutil.forcebytestr(e)),
2615 2617 node=node)
2616 2618 state['skipread'].add(node)
2617 2619
2618 2620 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2619 2621 revisionscount=False, trackedsize=False,
2620 2622 storedsize=False):
2621 2623 d = {}
2622 2624
2623 2625 if exclusivefiles:
2624 2626 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2625 2627 if not self._inline:
2626 2628 d['exclusivefiles'].append((self.opener, self.datafile))
2627 2629
2628 2630 if sharedfiles:
2629 2631 d['sharedfiles'] = []
2630 2632
2631 2633 if revisionscount:
2632 2634 d['revisionscount'] = len(self)
2633 2635
2634 2636 if trackedsize:
2635 2637 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2636 2638
2637 2639 if storedsize:
2638 2640 d['storedsize'] = sum(self.opener.stat(path).st_size
2639 2641 for path in self.files())
2640 2642
2641 2643 return d
General Comments 0
You need to be logged in to leave comments. Login now