##// END OF EJS Templates
revlog: inline opener options logic into _loadindex()...
Gregory Szorc -
r41240:e3cfe070 default
parent child Browse files
Show More
@@ -1,2631 +1,2631
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._maxdeltachainspan = -1
375 375 self._withsparseread = False
376 376 self._sparserevlog = False
377 377 self._srdensitythreshold = 0.50
378 378 self._srmingapsize = 262144
379 379
380 380 # Make copy of flag processors so each revlog instance can support
381 381 # custom flags.
382 382 self._flagprocessors = dict(_flagprocessors)
383 383
384 384 # 2-tuple of file handles being used for active writing.
385 385 self._writinghandles = None
386 386
387 self._loadindex()
388
389 def _loadindex(self):
387 390 mmapindexthreshold = None
388 opts = getattr(opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
389 392
390 393 if 'revlogv2' in opts:
391 394 versionflags = REVLOGV2 | FLAG_INLINE_DATA
392 395 elif 'revlogv1' in opts:
393 396 versionflags = REVLOGV1 | FLAG_INLINE_DATA
394 397 if 'generaldelta' in opts:
395 398 versionflags |= FLAG_GENERALDELTA
396 399 else:
397 400 versionflags = REVLOG_DEFAULT_VERSION
398 401
399 402 if 'chunkcachesize' in opts:
400 403 self._chunkcachesize = opts['chunkcachesize']
401 404 if 'maxchainlen' in opts:
402 405 self._maxchainlen = opts['maxchainlen']
403 406 if 'deltabothparents' in opts:
404 407 self._deltabothparents = opts['deltabothparents']
405 408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
406 409 if 'compengine' in opts:
407 410 self._compengine = opts['compengine']
408 411 if 'maxdeltachainspan' in opts:
409 412 self._maxdeltachainspan = opts['maxdeltachainspan']
410 413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
411 414 mmapindexthreshold = opts['mmapindexthreshold']
412 415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
413 416 withsparseread = bool(opts.get('with-sparse-read', False))
414 417 # sparse-revlog forces sparse-read
415 418 self._withsparseread = self._sparserevlog or withsparseread
416 419 if 'sparse-read-density-threshold' in opts:
417 420 self._srdensitythreshold = opts['sparse-read-density-threshold']
418 421 if 'sparse-read-min-gap-size' in opts:
419 422 self._srmingapsize = opts['sparse-read-min-gap-size']
420 423 if opts.get('enableellipsis'):
421 424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
422 425
423 426 # revlog v0 doesn't have flag processors
424 427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
425 428 _insertflagprocessor(flag, processor, self._flagprocessors)
426 429
427 430 if self._chunkcachesize <= 0:
428 431 raise error.RevlogError(_('revlog chunk cache size %r is not '
429 432 'greater than 0') % self._chunkcachesize)
430 433 elif self._chunkcachesize & (self._chunkcachesize - 1):
431 434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
432 435 'power of 2') % self._chunkcachesize)
433 436
434 self._loadindex(versionflags, mmapindexthreshold)
435
436 def _loadindex(self, versionflags, mmapindexthreshold):
437 437 indexdata = ''
438 438 self._initempty = True
439 439 try:
440 440 with self._indexfp() as f:
441 441 if (mmapindexthreshold is not None and
442 442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 443 indexdata = util.buffer(util.mmapread(f))
444 444 else:
445 445 indexdata = f.read()
446 446 if len(indexdata) > 0:
447 447 versionflags = versionformat_unpack(indexdata[:4])[0]
448 448 self._initempty = False
449 449 except IOError as inst:
450 450 if inst.errno != errno.ENOENT:
451 451 raise
452 452
453 453 self.version = versionflags
454 454
455 455 flags = versionflags & ~0xFFFF
456 456 fmt = versionflags & 0xFFFF
457 457
458 458 if fmt == REVLOGV0:
459 459 if flags:
460 460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 461 'revlog %s') %
462 462 (flags >> 16, fmt, self.indexfile))
463 463
464 464 self._inline = False
465 465 self._generaldelta = False
466 466
467 467 elif fmt == REVLOGV1:
468 468 if flags & ~REVLOGV1_FLAGS:
469 469 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
470 470 'revlog %s') %
471 471 (flags >> 16, fmt, self.indexfile))
472 472
473 473 self._inline = versionflags & FLAG_INLINE_DATA
474 474 self._generaldelta = versionflags & FLAG_GENERALDELTA
475 475
476 476 elif fmt == REVLOGV2:
477 477 if flags & ~REVLOGV2_FLAGS:
478 478 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
479 479 'revlog %s') %
480 480 (flags >> 16, fmt, self.indexfile))
481 481
482 482 self._inline = versionflags & FLAG_INLINE_DATA
483 483 # generaldelta implied by version 2 revlogs.
484 484 self._generaldelta = True
485 485
486 486 else:
487 487 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
488 488 (fmt, self.indexfile))
489 489
490 490 self._storedeltachains = True
491 491
492 492 self._io = revlogio()
493 493 if self.version == REVLOGV0:
494 494 self._io = revlogoldio()
495 495 try:
496 496 d = self._io.parseindex(indexdata, self._inline)
497 497 except (ValueError, IndexError):
498 498 raise error.RevlogError(_("index %s is corrupted") %
499 499 self.indexfile)
500 500 self.index, nodemap, self._chunkcache = d
501 501 if nodemap is not None:
502 502 self.nodemap = self._nodecache = nodemap
503 503 if not self._chunkcache:
504 504 self._chunkclear()
505 505 # revnum -> (chain-length, sum-delta-length)
506 506 self._chaininfocache = {}
507 507 # revlog header -> revlog compressor
508 508 self._decompressors = {}
509 509
510 510 @util.propertycache
511 511 def _compressor(self):
512 512 return util.compengines[self._compengine].revlogcompressor()
513 513
514 514 def _indexfp(self, mode='r'):
515 515 """file object for the revlog's index file"""
516 516 args = {r'mode': mode}
517 517 if mode != 'r':
518 518 args[r'checkambig'] = self._checkambig
519 519 if mode == 'w':
520 520 args[r'atomictemp'] = True
521 521 return self.opener(self.indexfile, **args)
522 522
523 523 def _datafp(self, mode='r'):
524 524 """file object for the revlog's data file"""
525 525 return self.opener(self.datafile, mode=mode)
526 526
527 527 @contextlib.contextmanager
528 528 def _datareadfp(self, existingfp=None):
529 529 """file object suitable to read data"""
530 530 # Use explicit file handle, if given.
531 531 if existingfp is not None:
532 532 yield existingfp
533 533
534 534 # Use a file handle being actively used for writes, if available.
535 535 # There is some danger to doing this because reads will seek the
536 536 # file. However, _writeentry() performs a SEEK_END before all writes,
537 537 # so we should be safe.
538 538 elif self._writinghandles:
539 539 if self._inline:
540 540 yield self._writinghandles[0]
541 541 else:
542 542 yield self._writinghandles[1]
543 543
544 544 # Otherwise open a new file handle.
545 545 else:
546 546 if self._inline:
547 547 func = self._indexfp
548 548 else:
549 549 func = self._datafp
550 550 with func() as fp:
551 551 yield fp
552 552
553 553 def tip(self):
554 554 return self.node(len(self.index) - 1)
555 555 def __contains__(self, rev):
556 556 return 0 <= rev < len(self)
557 557 def __len__(self):
558 558 return len(self.index)
559 559 def __iter__(self):
560 560 return iter(pycompat.xrange(len(self)))
561 561 def revs(self, start=0, stop=None):
562 562 """iterate over all rev in this revlog (from start to stop)"""
563 563 return storageutil.iterrevs(len(self), start=start, stop=stop)
564 564
565 565 @util.propertycache
566 566 def nodemap(self):
567 567 if self.index:
568 568 # populate mapping down to the initial node
569 569 node0 = self.index[0][7] # get around changelog filtering
570 570 self.rev(node0)
571 571 return self._nodecache
572 572
573 573 def hasnode(self, node):
574 574 try:
575 575 self.rev(node)
576 576 return True
577 577 except KeyError:
578 578 return False
579 579
580 580 def candelta(self, baserev, rev):
581 581 """whether two revisions (baserev, rev) can be delta-ed or not"""
582 582 # Disable delta if either rev requires a content-changing flag
583 583 # processor (ex. LFS). This is because such flag processor can alter
584 584 # the rawtext content that the delta will be based on, and two clients
585 585 # could have a same revlog node with different flags (i.e. different
586 586 # rawtext contents) and the delta could be incompatible.
587 587 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
588 588 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
589 589 return False
590 590 return True
591 591
592 592 def clearcaches(self):
593 593 self._revisioncache = None
594 594 self._chainbasecache.clear()
595 595 self._chunkcache = (0, '')
596 596 self._pcache = {}
597 597
598 598 try:
599 599 self._nodecache.clearcaches()
600 600 except AttributeError:
601 601 self._nodecache = {nullid: nullrev}
602 602 self._nodepos = None
603 603
604 604 def rev(self, node):
605 605 try:
606 606 return self._nodecache[node]
607 607 except TypeError:
608 608 raise
609 609 except error.RevlogError:
610 610 # parsers.c radix tree lookup failed
611 611 if node == wdirid or node in wdirfilenodeids:
612 612 raise error.WdirUnsupported
613 613 raise error.LookupError(node, self.indexfile, _('no node'))
614 614 except KeyError:
615 615 # pure python cache lookup failed
616 616 n = self._nodecache
617 617 i = self.index
618 618 p = self._nodepos
619 619 if p is None:
620 620 p = len(i) - 1
621 621 else:
622 622 assert p < len(i)
623 623 for r in pycompat.xrange(p, -1, -1):
624 624 v = i[r][7]
625 625 n[v] = r
626 626 if v == node:
627 627 self._nodepos = r - 1
628 628 return r
629 629 if node == wdirid or node in wdirfilenodeids:
630 630 raise error.WdirUnsupported
631 631 raise error.LookupError(node, self.indexfile, _('no node'))
632 632
633 633 # Accessors for index entries.
634 634
635 635 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
636 636 # are flags.
637 637 def start(self, rev):
638 638 return int(self.index[rev][0] >> 16)
639 639
640 640 def flags(self, rev):
641 641 return self.index[rev][0] & 0xFFFF
642 642
643 643 def length(self, rev):
644 644 return self.index[rev][1]
645 645
646 646 def rawsize(self, rev):
647 647 """return the length of the uncompressed text for a given revision"""
648 648 l = self.index[rev][2]
649 649 if l >= 0:
650 650 return l
651 651
652 652 t = self.revision(rev, raw=True)
653 653 return len(t)
654 654
655 655 def size(self, rev):
656 656 """length of non-raw text (processed by a "read" flag processor)"""
657 657 # fast path: if no "read" flag processor could change the content,
658 658 # size is rawsize. note: ELLIPSIS is known to not change the content.
659 659 flags = self.flags(rev)
660 660 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
661 661 return self.rawsize(rev)
662 662
663 663 return len(self.revision(rev, raw=False))
664 664
665 665 def chainbase(self, rev):
666 666 base = self._chainbasecache.get(rev)
667 667 if base is not None:
668 668 return base
669 669
670 670 index = self.index
671 671 iterrev = rev
672 672 base = index[iterrev][3]
673 673 while base != iterrev:
674 674 iterrev = base
675 675 base = index[iterrev][3]
676 676
677 677 self._chainbasecache[rev] = base
678 678 return base
679 679
680 680 def linkrev(self, rev):
681 681 return self.index[rev][4]
682 682
683 683 def parentrevs(self, rev):
684 684 try:
685 685 entry = self.index[rev]
686 686 except IndexError:
687 687 if rev == wdirrev:
688 688 raise error.WdirUnsupported
689 689 raise
690 690
691 691 return entry[5], entry[6]
692 692
693 693 # fast parentrevs(rev) where rev isn't filtered
694 694 _uncheckedparentrevs = parentrevs
695 695
696 696 def node(self, rev):
697 697 try:
698 698 return self.index[rev][7]
699 699 except IndexError:
700 700 if rev == wdirrev:
701 701 raise error.WdirUnsupported
702 702 raise
703 703
704 704 # Derived from index values.
705 705
706 706 def end(self, rev):
707 707 return self.start(rev) + self.length(rev)
708 708
709 709 def parents(self, node):
710 710 i = self.index
711 711 d = i[self.rev(node)]
712 712 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
713 713
714 714 def chainlen(self, rev):
715 715 return self._chaininfo(rev)[0]
716 716
717 717 def _chaininfo(self, rev):
718 718 chaininfocache = self._chaininfocache
719 719 if rev in chaininfocache:
720 720 return chaininfocache[rev]
721 721 index = self.index
722 722 generaldelta = self._generaldelta
723 723 iterrev = rev
724 724 e = index[iterrev]
725 725 clen = 0
726 726 compresseddeltalen = 0
727 727 while iterrev != e[3]:
728 728 clen += 1
729 729 compresseddeltalen += e[1]
730 730 if generaldelta:
731 731 iterrev = e[3]
732 732 else:
733 733 iterrev -= 1
734 734 if iterrev in chaininfocache:
735 735 t = chaininfocache[iterrev]
736 736 clen += t[0]
737 737 compresseddeltalen += t[1]
738 738 break
739 739 e = index[iterrev]
740 740 else:
741 741 # Add text length of base since decompressing that also takes
742 742 # work. For cache hits the length is already included.
743 743 compresseddeltalen += e[1]
744 744 r = (clen, compresseddeltalen)
745 745 chaininfocache[rev] = r
746 746 return r
747 747
748 748 def _deltachain(self, rev, stoprev=None):
749 749 """Obtain the delta chain for a revision.
750 750
751 751 ``stoprev`` specifies a revision to stop at. If not specified, we
752 752 stop at the base of the chain.
753 753
754 754 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
755 755 revs in ascending order and ``stopped`` is a bool indicating whether
756 756 ``stoprev`` was hit.
757 757 """
758 758 # Try C implementation.
759 759 try:
760 760 return self.index.deltachain(rev, stoprev, self._generaldelta)
761 761 except AttributeError:
762 762 pass
763 763
764 764 chain = []
765 765
766 766 # Alias to prevent attribute lookup in tight loop.
767 767 index = self.index
768 768 generaldelta = self._generaldelta
769 769
770 770 iterrev = rev
771 771 e = index[iterrev]
772 772 while iterrev != e[3] and iterrev != stoprev:
773 773 chain.append(iterrev)
774 774 if generaldelta:
775 775 iterrev = e[3]
776 776 else:
777 777 iterrev -= 1
778 778 e = index[iterrev]
779 779
780 780 if iterrev == stoprev:
781 781 stopped = True
782 782 else:
783 783 chain.append(iterrev)
784 784 stopped = False
785 785
786 786 chain.reverse()
787 787 return chain, stopped
788 788
789 789 def ancestors(self, revs, stoprev=0, inclusive=False):
790 790 """Generate the ancestors of 'revs' in reverse revision order.
791 791 Does not generate revs lower than stoprev.
792 792
793 793 See the documentation for ancestor.lazyancestors for more details."""
794 794
795 795 # first, make sure start revisions aren't filtered
796 796 revs = list(revs)
797 797 checkrev = self.node
798 798 for r in revs:
799 799 checkrev(r)
800 800 # and we're sure ancestors aren't filtered as well
801 801
802 802 if rustext is not None:
803 803 lazyancestors = rustext.ancestor.LazyAncestors
804 804 arg = self.index
805 805 elif util.safehasattr(parsers, 'rustlazyancestors'):
806 806 lazyancestors = ancestor.rustlazyancestors
807 807 arg = self.index
808 808 else:
809 809 lazyancestors = ancestor.lazyancestors
810 810 arg = self._uncheckedparentrevs
811 811 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
812 812
813 813 def descendants(self, revs):
814 814 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
815 815
816 816 def findcommonmissing(self, common=None, heads=None):
817 817 """Return a tuple of the ancestors of common and the ancestors of heads
818 818 that are not ancestors of common. In revset terminology, we return the
819 819 tuple:
820 820
821 821 ::common, (::heads) - (::common)
822 822
823 823 The list is sorted by revision number, meaning it is
824 824 topologically sorted.
825 825
826 826 'heads' and 'common' are both lists of node IDs. If heads is
827 827 not supplied, uses all of the revlog's heads. If common is not
828 828 supplied, uses nullid."""
829 829 if common is None:
830 830 common = [nullid]
831 831 if heads is None:
832 832 heads = self.heads()
833 833
834 834 common = [self.rev(n) for n in common]
835 835 heads = [self.rev(n) for n in heads]
836 836
837 837 # we want the ancestors, but inclusive
838 838 class lazyset(object):
839 839 def __init__(self, lazyvalues):
840 840 self.addedvalues = set()
841 841 self.lazyvalues = lazyvalues
842 842
843 843 def __contains__(self, value):
844 844 return value in self.addedvalues or value in self.lazyvalues
845 845
846 846 def __iter__(self):
847 847 added = self.addedvalues
848 848 for r in added:
849 849 yield r
850 850 for r in self.lazyvalues:
851 851 if not r in added:
852 852 yield r
853 853
854 854 def add(self, value):
855 855 self.addedvalues.add(value)
856 856
857 857 def update(self, values):
858 858 self.addedvalues.update(values)
859 859
860 860 has = lazyset(self.ancestors(common))
861 861 has.add(nullrev)
862 862 has.update(common)
863 863
864 864 # take all ancestors from heads that aren't in has
865 865 missing = set()
866 866 visit = collections.deque(r for r in heads if r not in has)
867 867 while visit:
868 868 r = visit.popleft()
869 869 if r in missing:
870 870 continue
871 871 else:
872 872 missing.add(r)
873 873 for p in self.parentrevs(r):
874 874 if p not in has:
875 875 visit.append(p)
876 876 missing = list(missing)
877 877 missing.sort()
878 878 return has, [self.node(miss) for miss in missing]
879 879
880 880 def incrementalmissingrevs(self, common=None):
881 881 """Return an object that can be used to incrementally compute the
882 882 revision numbers of the ancestors of arbitrary sets that are not
883 883 ancestors of common. This is an ancestor.incrementalmissingancestors
884 884 object.
885 885
886 886 'common' is a list of revision numbers. If common is not supplied, uses
887 887 nullrev.
888 888 """
889 889 if common is None:
890 890 common = [nullrev]
891 891
892 892 return ancestor.incrementalmissingancestors(self.parentrevs, common)
893 893
894 894 def findmissingrevs(self, common=None, heads=None):
895 895 """Return the revision numbers of the ancestors of heads that
896 896 are not ancestors of common.
897 897
898 898 More specifically, return a list of revision numbers corresponding to
899 899 nodes N such that every N satisfies the following constraints:
900 900
901 901 1. N is an ancestor of some node in 'heads'
902 902 2. N is not an ancestor of any node in 'common'
903 903
904 904 The list is sorted by revision number, meaning it is
905 905 topologically sorted.
906 906
907 907 'heads' and 'common' are both lists of revision numbers. If heads is
908 908 not supplied, uses all of the revlog's heads. If common is not
909 909 supplied, uses nullid."""
910 910 if common is None:
911 911 common = [nullrev]
912 912 if heads is None:
913 913 heads = self.headrevs()
914 914
915 915 inc = self.incrementalmissingrevs(common=common)
916 916 return inc.missingancestors(heads)
917 917
918 918 def findmissing(self, common=None, heads=None):
919 919 """Return the ancestors of heads that are not ancestors of common.
920 920
921 921 More specifically, return a list of nodes N such that every N
922 922 satisfies the following constraints:
923 923
924 924 1. N is an ancestor of some node in 'heads'
925 925 2. N is not an ancestor of any node in 'common'
926 926
927 927 The list is sorted by revision number, meaning it is
928 928 topologically sorted.
929 929
930 930 'heads' and 'common' are both lists of node IDs. If heads is
931 931 not supplied, uses all of the revlog's heads. If common is not
932 932 supplied, uses nullid."""
933 933 if common is None:
934 934 common = [nullid]
935 935 if heads is None:
936 936 heads = self.heads()
937 937
938 938 common = [self.rev(n) for n in common]
939 939 heads = [self.rev(n) for n in heads]
940 940
941 941 inc = self.incrementalmissingrevs(common=common)
942 942 return [self.node(r) for r in inc.missingancestors(heads)]
943 943
944 944 def nodesbetween(self, roots=None, heads=None):
945 945 """Return a topological path from 'roots' to 'heads'.
946 946
947 947 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
948 948 topologically sorted list of all nodes N that satisfy both of
949 949 these constraints:
950 950
951 951 1. N is a descendant of some node in 'roots'
952 952 2. N is an ancestor of some node in 'heads'
953 953
954 954 Every node is considered to be both a descendant and an ancestor
955 955 of itself, so every reachable node in 'roots' and 'heads' will be
956 956 included in 'nodes'.
957 957
958 958 'outroots' is the list of reachable nodes in 'roots', i.e., the
959 959 subset of 'roots' that is returned in 'nodes'. Likewise,
960 960 'outheads' is the subset of 'heads' that is also in 'nodes'.
961 961
962 962 'roots' and 'heads' are both lists of node IDs. If 'roots' is
963 963 unspecified, uses nullid as the only root. If 'heads' is
964 964 unspecified, uses list of all of the revlog's heads."""
965 965 nonodes = ([], [], [])
966 966 if roots is not None:
967 967 roots = list(roots)
968 968 if not roots:
969 969 return nonodes
970 970 lowestrev = min([self.rev(n) for n in roots])
971 971 else:
972 972 roots = [nullid] # Everybody's a descendant of nullid
973 973 lowestrev = nullrev
974 974 if (lowestrev == nullrev) and (heads is None):
975 975 # We want _all_ the nodes!
976 976 return ([self.node(r) for r in self], [nullid], list(self.heads()))
977 977 if heads is None:
978 978 # All nodes are ancestors, so the latest ancestor is the last
979 979 # node.
980 980 highestrev = len(self) - 1
981 981 # Set ancestors to None to signal that every node is an ancestor.
982 982 ancestors = None
983 983 # Set heads to an empty dictionary for later discovery of heads
984 984 heads = {}
985 985 else:
986 986 heads = list(heads)
987 987 if not heads:
988 988 return nonodes
989 989 ancestors = set()
990 990 # Turn heads into a dictionary so we can remove 'fake' heads.
991 991 # Also, later we will be using it to filter out the heads we can't
992 992 # find from roots.
993 993 heads = dict.fromkeys(heads, False)
994 994 # Start at the top and keep marking parents until we're done.
995 995 nodestotag = set(heads)
996 996 # Remember where the top was so we can use it as a limit later.
997 997 highestrev = max([self.rev(n) for n in nodestotag])
998 998 while nodestotag:
999 999 # grab a node to tag
1000 1000 n = nodestotag.pop()
1001 1001 # Never tag nullid
1002 1002 if n == nullid:
1003 1003 continue
1004 1004 # A node's revision number represents its place in a
1005 1005 # topologically sorted list of nodes.
1006 1006 r = self.rev(n)
1007 1007 if r >= lowestrev:
1008 1008 if n not in ancestors:
1009 1009 # If we are possibly a descendant of one of the roots
1010 1010 # and we haven't already been marked as an ancestor
1011 1011 ancestors.add(n) # Mark as ancestor
1012 1012 # Add non-nullid parents to list of nodes to tag.
1013 1013 nodestotag.update([p for p in self.parents(n) if
1014 1014 p != nullid])
1015 1015 elif n in heads: # We've seen it before, is it a fake head?
1016 1016 # So it is, real heads should not be the ancestors of
1017 1017 # any other heads.
1018 1018 heads.pop(n)
1019 1019 if not ancestors:
1020 1020 return nonodes
1021 1021 # Now that we have our set of ancestors, we want to remove any
1022 1022 # roots that are not ancestors.
1023 1023
1024 1024 # If one of the roots was nullid, everything is included anyway.
1025 1025 if lowestrev > nullrev:
1026 1026 # But, since we weren't, let's recompute the lowest rev to not
1027 1027 # include roots that aren't ancestors.
1028 1028
1029 1029 # Filter out roots that aren't ancestors of heads
1030 1030 roots = [root for root in roots if root in ancestors]
1031 1031 # Recompute the lowest revision
1032 1032 if roots:
1033 1033 lowestrev = min([self.rev(root) for root in roots])
1034 1034 else:
1035 1035 # No more roots? Return empty list
1036 1036 return nonodes
1037 1037 else:
1038 1038 # We are descending from nullid, and don't need to care about
1039 1039 # any other roots.
1040 1040 lowestrev = nullrev
1041 1041 roots = [nullid]
1042 1042 # Transform our roots list into a set.
1043 1043 descendants = set(roots)
1044 1044 # Also, keep the original roots so we can filter out roots that aren't
1045 1045 # 'real' roots (i.e. are descended from other roots).
1046 1046 roots = descendants.copy()
1047 1047 # Our topologically sorted list of output nodes.
1048 1048 orderedout = []
1049 1049 # Don't start at nullid since we don't want nullid in our output list,
1050 1050 # and if nullid shows up in descendants, empty parents will look like
1051 1051 # they're descendants.
1052 1052 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1053 1053 n = self.node(r)
1054 1054 isdescendant = False
1055 1055 if lowestrev == nullrev: # Everybody is a descendant of nullid
1056 1056 isdescendant = True
1057 1057 elif n in descendants:
1058 1058 # n is already a descendant
1059 1059 isdescendant = True
1060 1060 # This check only needs to be done here because all the roots
1061 1061 # will start being marked is descendants before the loop.
1062 1062 if n in roots:
1063 1063 # If n was a root, check if it's a 'real' root.
1064 1064 p = tuple(self.parents(n))
1065 1065 # If any of its parents are descendants, it's not a root.
1066 1066 if (p[0] in descendants) or (p[1] in descendants):
1067 1067 roots.remove(n)
1068 1068 else:
1069 1069 p = tuple(self.parents(n))
1070 1070 # A node is a descendant if either of its parents are
1071 1071 # descendants. (We seeded the dependents list with the roots
1072 1072 # up there, remember?)
1073 1073 if (p[0] in descendants) or (p[1] in descendants):
1074 1074 descendants.add(n)
1075 1075 isdescendant = True
1076 1076 if isdescendant and ((ancestors is None) or (n in ancestors)):
1077 1077 # Only include nodes that are both descendants and ancestors.
1078 1078 orderedout.append(n)
1079 1079 if (ancestors is not None) and (n in heads):
1080 1080 # We're trying to figure out which heads are reachable
1081 1081 # from roots.
1082 1082 # Mark this head as having been reached
1083 1083 heads[n] = True
1084 1084 elif ancestors is None:
1085 1085 # Otherwise, we're trying to discover the heads.
1086 1086 # Assume this is a head because if it isn't, the next step
1087 1087 # will eventually remove it.
1088 1088 heads[n] = True
1089 1089 # But, obviously its parents aren't.
1090 1090 for p in self.parents(n):
1091 1091 heads.pop(p, None)
1092 1092 heads = [head for head, flag in heads.iteritems() if flag]
1093 1093 roots = list(roots)
1094 1094 assert orderedout
1095 1095 assert roots
1096 1096 assert heads
1097 1097 return (orderedout, roots, heads)
1098 1098
1099 1099 def headrevs(self):
1100 1100 try:
1101 1101 return self.index.headrevs()
1102 1102 except AttributeError:
1103 1103 return self._headrevs()
1104 1104
1105 1105 def computephases(self, roots):
1106 1106 return self.index.computephasesmapsets(roots)
1107 1107
1108 1108 def _headrevs(self):
1109 1109 count = len(self)
1110 1110 if not count:
1111 1111 return [nullrev]
1112 1112 # we won't iter over filtered rev so nobody is a head at start
1113 1113 ishead = [0] * (count + 1)
1114 1114 index = self.index
1115 1115 for r in self:
1116 1116 ishead[r] = 1 # I may be an head
1117 1117 e = index[r]
1118 1118 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1119 1119 return [r for r, val in enumerate(ishead) if val]
1120 1120
1121 1121 def heads(self, start=None, stop=None):
1122 1122 """return the list of all nodes that have no children
1123 1123
1124 1124 if start is specified, only heads that are descendants of
1125 1125 start will be returned
1126 1126 if stop is specified, it will consider all the revs from stop
1127 1127 as if they had no children
1128 1128 """
1129 1129 if start is None and stop is None:
1130 1130 if not len(self):
1131 1131 return [nullid]
1132 1132 return [self.node(r) for r in self.headrevs()]
1133 1133
1134 1134 if start is None:
1135 1135 start = nullrev
1136 1136 else:
1137 1137 start = self.rev(start)
1138 1138
1139 1139 stoprevs = set(self.rev(n) for n in stop or [])
1140 1140
1141 1141 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1142 1142 stoprevs=stoprevs)
1143 1143
1144 1144 return [self.node(rev) for rev in revs]
1145 1145
1146 1146 def children(self, node):
1147 1147 """find the children of a given node"""
1148 1148 c = []
1149 1149 p = self.rev(node)
1150 1150 for r in self.revs(start=p + 1):
1151 1151 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1152 1152 if prevs:
1153 1153 for pr in prevs:
1154 1154 if pr == p:
1155 1155 c.append(self.node(r))
1156 1156 elif p == nullrev:
1157 1157 c.append(self.node(r))
1158 1158 return c
1159 1159
1160 1160 def commonancestorsheads(self, a, b):
1161 1161 """calculate all the heads of the common ancestors of nodes a and b"""
1162 1162 a, b = self.rev(a), self.rev(b)
1163 1163 ancs = self._commonancestorsheads(a, b)
1164 1164 return pycompat.maplist(self.node, ancs)
1165 1165
1166 1166 def _commonancestorsheads(self, *revs):
1167 1167 """calculate all the heads of the common ancestors of revs"""
1168 1168 try:
1169 1169 ancs = self.index.commonancestorsheads(*revs)
1170 1170 except (AttributeError, OverflowError): # C implementation failed
1171 1171 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1172 1172 return ancs
1173 1173
1174 1174 def isancestor(self, a, b):
1175 1175 """return True if node a is an ancestor of node b
1176 1176
1177 1177 A revision is considered an ancestor of itself."""
1178 1178 a, b = self.rev(a), self.rev(b)
1179 1179 return self.isancestorrev(a, b)
1180 1180
1181 1181 def isancestorrev(self, a, b):
1182 1182 """return True if revision a is an ancestor of revision b
1183 1183
1184 1184 A revision is considered an ancestor of itself.
1185 1185
1186 1186 The implementation of this is trivial but the use of
1187 1187 commonancestorsheads is not."""
1188 1188 if a == nullrev:
1189 1189 return True
1190 1190 elif a == b:
1191 1191 return True
1192 1192 elif a > b:
1193 1193 return False
1194 1194 return a in self._commonancestorsheads(a, b)
1195 1195
1196 1196 def ancestor(self, a, b):
1197 1197 """calculate the "best" common ancestor of nodes a and b"""
1198 1198
1199 1199 a, b = self.rev(a), self.rev(b)
1200 1200 try:
1201 1201 ancs = self.index.ancestors(a, b)
1202 1202 except (AttributeError, OverflowError):
1203 1203 ancs = ancestor.ancestors(self.parentrevs, a, b)
1204 1204 if ancs:
1205 1205 # choose a consistent winner when there's a tie
1206 1206 return min(map(self.node, ancs))
1207 1207 return nullid
1208 1208
1209 1209 def _match(self, id):
1210 1210 if isinstance(id, int):
1211 1211 # rev
1212 1212 return self.node(id)
1213 1213 if len(id) == 20:
1214 1214 # possibly a binary node
1215 1215 # odds of a binary node being all hex in ASCII are 1 in 10**25
1216 1216 try:
1217 1217 node = id
1218 1218 self.rev(node) # quick search the index
1219 1219 return node
1220 1220 except error.LookupError:
1221 1221 pass # may be partial hex id
1222 1222 try:
1223 1223 # str(rev)
1224 1224 rev = int(id)
1225 1225 if "%d" % rev != id:
1226 1226 raise ValueError
1227 1227 if rev < 0:
1228 1228 rev = len(self) + rev
1229 1229 if rev < 0 or rev >= len(self):
1230 1230 raise ValueError
1231 1231 return self.node(rev)
1232 1232 except (ValueError, OverflowError):
1233 1233 pass
1234 1234 if len(id) == 40:
1235 1235 try:
1236 1236 # a full hex nodeid?
1237 1237 node = bin(id)
1238 1238 self.rev(node)
1239 1239 return node
1240 1240 except (TypeError, error.LookupError):
1241 1241 pass
1242 1242
1243 1243 def _partialmatch(self, id):
1244 1244 # we don't care wdirfilenodeids as they should be always full hash
1245 1245 maybewdir = wdirhex.startswith(id)
1246 1246 try:
1247 1247 partial = self.index.partialmatch(id)
1248 1248 if partial and self.hasnode(partial):
1249 1249 if maybewdir:
1250 1250 # single 'ff...' match in radix tree, ambiguous with wdir
1251 1251 raise error.RevlogError
1252 1252 return partial
1253 1253 if maybewdir:
1254 1254 # no 'ff...' match in radix tree, wdir identified
1255 1255 raise error.WdirUnsupported
1256 1256 return None
1257 1257 except error.RevlogError:
1258 1258 # parsers.c radix tree lookup gave multiple matches
1259 1259 # fast path: for unfiltered changelog, radix tree is accurate
1260 1260 if not getattr(self, 'filteredrevs', None):
1261 1261 raise error.AmbiguousPrefixLookupError(
1262 1262 id, self.indexfile, _('ambiguous identifier'))
1263 1263 # fall through to slow path that filters hidden revisions
1264 1264 except (AttributeError, ValueError):
1265 1265 # we are pure python, or key was too short to search radix tree
1266 1266 pass
1267 1267
1268 1268 if id in self._pcache:
1269 1269 return self._pcache[id]
1270 1270
1271 1271 if len(id) <= 40:
1272 1272 try:
1273 1273 # hex(node)[:...]
1274 1274 l = len(id) // 2 # grab an even number of digits
1275 1275 prefix = bin(id[:l * 2])
1276 1276 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1277 1277 nl = [n for n in nl if hex(n).startswith(id) and
1278 1278 self.hasnode(n)]
1279 1279 if nullhex.startswith(id):
1280 1280 nl.append(nullid)
1281 1281 if len(nl) > 0:
1282 1282 if len(nl) == 1 and not maybewdir:
1283 1283 self._pcache[id] = nl[0]
1284 1284 return nl[0]
1285 1285 raise error.AmbiguousPrefixLookupError(
1286 1286 id, self.indexfile, _('ambiguous identifier'))
1287 1287 if maybewdir:
1288 1288 raise error.WdirUnsupported
1289 1289 return None
1290 1290 except TypeError:
1291 1291 pass
1292 1292
1293 1293 def lookup(self, id):
1294 1294 """locate a node based on:
1295 1295 - revision number or str(revision number)
1296 1296 - nodeid or subset of hex nodeid
1297 1297 """
1298 1298 n = self._match(id)
1299 1299 if n is not None:
1300 1300 return n
1301 1301 n = self._partialmatch(id)
1302 1302 if n:
1303 1303 return n
1304 1304
1305 1305 raise error.LookupError(id, self.indexfile, _('no match found'))
1306 1306
1307 1307 def shortest(self, node, minlength=1):
1308 1308 """Find the shortest unambiguous prefix that matches node."""
1309 1309 def isvalid(prefix):
1310 1310 try:
1311 1311 node = self._partialmatch(prefix)
1312 1312 except error.AmbiguousPrefixLookupError:
1313 1313 return False
1314 1314 except error.WdirUnsupported:
1315 1315 # single 'ff...' match
1316 1316 return True
1317 1317 if node is None:
1318 1318 raise error.LookupError(node, self.indexfile, _('no node'))
1319 1319 return True
1320 1320
1321 1321 def maybewdir(prefix):
1322 1322 return all(c == 'f' for c in prefix)
1323 1323
1324 1324 hexnode = hex(node)
1325 1325
1326 1326 def disambiguate(hexnode, minlength):
1327 1327 """Disambiguate against wdirid."""
1328 1328 for length in range(minlength, 41):
1329 1329 prefix = hexnode[:length]
1330 1330 if not maybewdir(prefix):
1331 1331 return prefix
1332 1332
1333 1333 if not getattr(self, 'filteredrevs', None):
1334 1334 try:
1335 1335 length = max(self.index.shortest(node), minlength)
1336 1336 return disambiguate(hexnode, length)
1337 1337 except error.RevlogError:
1338 1338 if node != wdirid:
1339 1339 raise error.LookupError(node, self.indexfile, _('no node'))
1340 1340 except AttributeError:
1341 1341 # Fall through to pure code
1342 1342 pass
1343 1343
1344 1344 if node == wdirid:
1345 1345 for length in range(minlength, 41):
1346 1346 prefix = hexnode[:length]
1347 1347 if isvalid(prefix):
1348 1348 return prefix
1349 1349
1350 1350 for length in range(minlength, 41):
1351 1351 prefix = hexnode[:length]
1352 1352 if isvalid(prefix):
1353 1353 return disambiguate(hexnode, length)
1354 1354
1355 1355 def cmp(self, node, text):
1356 1356 """compare text with a given file revision
1357 1357
1358 1358 returns True if text is different than what is stored.
1359 1359 """
1360 1360 p1, p2 = self.parents(node)
1361 1361 return storageutil.hashrevisionsha1(text, p1, p2) != node
1362 1362
1363 1363 def _cachesegment(self, offset, data):
1364 1364 """Add a segment to the revlog cache.
1365 1365
1366 1366 Accepts an absolute offset and the data that is at that location.
1367 1367 """
1368 1368 o, d = self._chunkcache
1369 1369 # try to add to existing cache
1370 1370 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1371 1371 self._chunkcache = o, d + data
1372 1372 else:
1373 1373 self._chunkcache = offset, data
1374 1374
1375 1375 def _readsegment(self, offset, length, df=None):
1376 1376 """Load a segment of raw data from the revlog.
1377 1377
1378 1378 Accepts an absolute offset, length to read, and an optional existing
1379 1379 file handle to read from.
1380 1380
1381 1381 If an existing file handle is passed, it will be seeked and the
1382 1382 original seek position will NOT be restored.
1383 1383
1384 1384 Returns a str or buffer of raw byte data.
1385 1385
1386 1386 Raises if the requested number of bytes could not be read.
1387 1387 """
1388 1388 # Cache data both forward and backward around the requested
1389 1389 # data, in a fixed size window. This helps speed up operations
1390 1390 # involving reading the revlog backwards.
1391 1391 cachesize = self._chunkcachesize
1392 1392 realoffset = offset & ~(cachesize - 1)
1393 1393 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1394 1394 - realoffset)
1395 1395 with self._datareadfp(df) as df:
1396 1396 df.seek(realoffset)
1397 1397 d = df.read(reallength)
1398 1398
1399 1399 self._cachesegment(realoffset, d)
1400 1400 if offset != realoffset or reallength != length:
1401 1401 startoffset = offset - realoffset
1402 1402 if len(d) - startoffset < length:
1403 1403 raise error.RevlogError(
1404 1404 _('partial read of revlog %s; expected %d bytes from '
1405 1405 'offset %d, got %d') %
1406 1406 (self.indexfile if self._inline else self.datafile,
1407 1407 length, realoffset, len(d) - startoffset))
1408 1408
1409 1409 return util.buffer(d, startoffset, length)
1410 1410
1411 1411 if len(d) < length:
1412 1412 raise error.RevlogError(
1413 1413 _('partial read of revlog %s; expected %d bytes from offset '
1414 1414 '%d, got %d') %
1415 1415 (self.indexfile if self._inline else self.datafile,
1416 1416 length, offset, len(d)))
1417 1417
1418 1418 return d
1419 1419
1420 1420 def _getsegment(self, offset, length, df=None):
1421 1421 """Obtain a segment of raw data from the revlog.
1422 1422
1423 1423 Accepts an absolute offset, length of bytes to obtain, and an
1424 1424 optional file handle to the already-opened revlog. If the file
1425 1425 handle is used, it's original seek position will not be preserved.
1426 1426
1427 1427 Requests for data may be returned from a cache.
1428 1428
1429 1429 Returns a str or a buffer instance of raw byte data.
1430 1430 """
1431 1431 o, d = self._chunkcache
1432 1432 l = len(d)
1433 1433
1434 1434 # is it in the cache?
1435 1435 cachestart = offset - o
1436 1436 cacheend = cachestart + length
1437 1437 if cachestart >= 0 and cacheend <= l:
1438 1438 if cachestart == 0 and cacheend == l:
1439 1439 return d # avoid a copy
1440 1440 return util.buffer(d, cachestart, cacheend - cachestart)
1441 1441
1442 1442 return self._readsegment(offset, length, df=df)
1443 1443
1444 1444 def _getsegmentforrevs(self, startrev, endrev, df=None):
1445 1445 """Obtain a segment of raw data corresponding to a range of revisions.
1446 1446
1447 1447 Accepts the start and end revisions and an optional already-open
1448 1448 file handle to be used for reading. If the file handle is read, its
1449 1449 seek position will not be preserved.
1450 1450
1451 1451 Requests for data may be satisfied by a cache.
1452 1452
1453 1453 Returns a 2-tuple of (offset, data) for the requested range of
1454 1454 revisions. Offset is the integer offset from the beginning of the
1455 1455 revlog and data is a str or buffer of the raw byte data.
1456 1456
1457 1457 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1458 1458 to determine where each revision's data begins and ends.
1459 1459 """
1460 1460 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1461 1461 # (functions are expensive).
1462 1462 index = self.index
1463 1463 istart = index[startrev]
1464 1464 start = int(istart[0] >> 16)
1465 1465 if startrev == endrev:
1466 1466 end = start + istart[1]
1467 1467 else:
1468 1468 iend = index[endrev]
1469 1469 end = int(iend[0] >> 16) + iend[1]
1470 1470
1471 1471 if self._inline:
1472 1472 start += (startrev + 1) * self._io.size
1473 1473 end += (endrev + 1) * self._io.size
1474 1474 length = end - start
1475 1475
1476 1476 return start, self._getsegment(start, length, df=df)
1477 1477
1478 1478 def _chunk(self, rev, df=None):
1479 1479 """Obtain a single decompressed chunk for a revision.
1480 1480
1481 1481 Accepts an integer revision and an optional already-open file handle
1482 1482 to be used for reading. If used, the seek position of the file will not
1483 1483 be preserved.
1484 1484
1485 1485 Returns a str holding uncompressed data for the requested revision.
1486 1486 """
1487 1487 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1488 1488
1489 1489 def _chunks(self, revs, df=None, targetsize=None):
1490 1490 """Obtain decompressed chunks for the specified revisions.
1491 1491
1492 1492 Accepts an iterable of numeric revisions that are assumed to be in
1493 1493 ascending order. Also accepts an optional already-open file handle
1494 1494 to be used for reading. If used, the seek position of the file will
1495 1495 not be preserved.
1496 1496
1497 1497 This function is similar to calling ``self._chunk()`` multiple times,
1498 1498 but is faster.
1499 1499
1500 1500 Returns a list with decompressed data for each requested revision.
1501 1501 """
1502 1502 if not revs:
1503 1503 return []
1504 1504 start = self.start
1505 1505 length = self.length
1506 1506 inline = self._inline
1507 1507 iosize = self._io.size
1508 1508 buffer = util.buffer
1509 1509
1510 1510 l = []
1511 1511 ladd = l.append
1512 1512
1513 1513 if not self._withsparseread:
1514 1514 slicedchunks = (revs,)
1515 1515 else:
1516 1516 slicedchunks = deltautil.slicechunk(self, revs,
1517 1517 targetsize=targetsize)
1518 1518
1519 1519 for revschunk in slicedchunks:
1520 1520 firstrev = revschunk[0]
1521 1521 # Skip trailing revisions with empty diff
1522 1522 for lastrev in revschunk[::-1]:
1523 1523 if length(lastrev) != 0:
1524 1524 break
1525 1525
1526 1526 try:
1527 1527 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1528 1528 except OverflowError:
1529 1529 # issue4215 - we can't cache a run of chunks greater than
1530 1530 # 2G on Windows
1531 1531 return [self._chunk(rev, df=df) for rev in revschunk]
1532 1532
1533 1533 decomp = self.decompress
1534 1534 for rev in revschunk:
1535 1535 chunkstart = start(rev)
1536 1536 if inline:
1537 1537 chunkstart += (rev + 1) * iosize
1538 1538 chunklength = length(rev)
1539 1539 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1540 1540
1541 1541 return l
1542 1542
1543 1543 def _chunkclear(self):
1544 1544 """Clear the raw chunk cache."""
1545 1545 self._chunkcache = (0, '')
1546 1546
1547 1547 def deltaparent(self, rev):
1548 1548 """return deltaparent of the given revision"""
1549 1549 base = self.index[rev][3]
1550 1550 if base == rev:
1551 1551 return nullrev
1552 1552 elif self._generaldelta:
1553 1553 return base
1554 1554 else:
1555 1555 return rev - 1
1556 1556
1557 1557 def issnapshot(self, rev):
1558 1558 """tells whether rev is a snapshot
1559 1559 """
1560 1560 if not self._sparserevlog:
1561 1561 return self.deltaparent(rev) == nullrev
1562 1562 elif util.safehasattr(self.index, 'issnapshot'):
1563 1563 # directly assign the method to cache the testing and access
1564 1564 self.issnapshot = self.index.issnapshot
1565 1565 return self.issnapshot(rev)
1566 1566 if rev == nullrev:
1567 1567 return True
1568 1568 entry = self.index[rev]
1569 1569 base = entry[3]
1570 1570 if base == rev:
1571 1571 return True
1572 1572 if base == nullrev:
1573 1573 return True
1574 1574 p1 = entry[5]
1575 1575 p2 = entry[6]
1576 1576 if base == p1 or base == p2:
1577 1577 return False
1578 1578 return self.issnapshot(base)
1579 1579
1580 1580 def snapshotdepth(self, rev):
1581 1581 """number of snapshot in the chain before this one"""
1582 1582 if not self.issnapshot(rev):
1583 1583 raise error.ProgrammingError('revision %d not a snapshot')
1584 1584 return len(self._deltachain(rev)[0]) - 1
1585 1585
1586 1586 def revdiff(self, rev1, rev2):
1587 1587 """return or calculate a delta between two revisions
1588 1588
1589 1589 The delta calculated is in binary form and is intended to be written to
1590 1590 revlog data directly. So this function needs raw revision data.
1591 1591 """
1592 1592 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1593 1593 return bytes(self._chunk(rev2))
1594 1594
1595 1595 return mdiff.textdiff(self.revision(rev1, raw=True),
1596 1596 self.revision(rev2, raw=True))
1597 1597
1598 1598 def revision(self, nodeorrev, _df=None, raw=False):
1599 1599 """return an uncompressed revision of a given node or revision
1600 1600 number.
1601 1601
1602 1602 _df - an existing file handle to read from. (internal-only)
1603 1603 raw - an optional argument specifying if the revision data is to be
1604 1604 treated as raw data when applying flag transforms. 'raw' should be set
1605 1605 to True when generating changegroups or in debug commands.
1606 1606 """
1607 1607 if isinstance(nodeorrev, int):
1608 1608 rev = nodeorrev
1609 1609 node = self.node(rev)
1610 1610 else:
1611 1611 node = nodeorrev
1612 1612 rev = None
1613 1613
1614 1614 cachedrev = None
1615 1615 flags = None
1616 1616 rawtext = None
1617 1617 if node == nullid:
1618 1618 return ""
1619 1619 if self._revisioncache:
1620 1620 if self._revisioncache[0] == node:
1621 1621 # _cache only stores rawtext
1622 1622 if raw:
1623 1623 return self._revisioncache[2]
1624 1624 # duplicated, but good for perf
1625 1625 if rev is None:
1626 1626 rev = self.rev(node)
1627 1627 if flags is None:
1628 1628 flags = self.flags(rev)
1629 1629 # no extra flags set, no flag processor runs, text = rawtext
1630 1630 if flags == REVIDX_DEFAULT_FLAGS:
1631 1631 return self._revisioncache[2]
1632 1632 # rawtext is reusable. need to run flag processor
1633 1633 rawtext = self._revisioncache[2]
1634 1634
1635 1635 cachedrev = self._revisioncache[1]
1636 1636
1637 1637 # look up what we need to read
1638 1638 if rawtext is None:
1639 1639 if rev is None:
1640 1640 rev = self.rev(node)
1641 1641
1642 1642 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1643 1643 if stopped:
1644 1644 rawtext = self._revisioncache[2]
1645 1645
1646 1646 # drop cache to save memory
1647 1647 self._revisioncache = None
1648 1648
1649 1649 targetsize = None
1650 1650 rawsize = self.index[rev][2]
1651 1651 if 0 <= rawsize:
1652 1652 targetsize = 4 * rawsize
1653 1653
1654 1654 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1655 1655 if rawtext is None:
1656 1656 rawtext = bytes(bins[0])
1657 1657 bins = bins[1:]
1658 1658
1659 1659 rawtext = mdiff.patches(rawtext, bins)
1660 1660 self._revisioncache = (node, rev, rawtext)
1661 1661
1662 1662 if flags is None:
1663 1663 if rev is None:
1664 1664 rev = self.rev(node)
1665 1665 flags = self.flags(rev)
1666 1666
1667 1667 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1668 1668 if validatehash:
1669 1669 self.checkhash(text, node, rev=rev)
1670 1670
1671 1671 return text
1672 1672
1673 1673 def hash(self, text, p1, p2):
1674 1674 """Compute a node hash.
1675 1675
1676 1676 Available as a function so that subclasses can replace the hash
1677 1677 as needed.
1678 1678 """
1679 1679 return storageutil.hashrevisionsha1(text, p1, p2)
1680 1680
1681 1681 def _processflags(self, text, flags, operation, raw=False):
1682 1682 """Inspect revision data flags and applies transforms defined by
1683 1683 registered flag processors.
1684 1684
1685 1685 ``text`` - the revision data to process
1686 1686 ``flags`` - the revision flags
1687 1687 ``operation`` - the operation being performed (read or write)
1688 1688 ``raw`` - an optional argument describing if the raw transform should be
1689 1689 applied.
1690 1690
1691 1691 This method processes the flags in the order (or reverse order if
1692 1692 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1693 1693 flag processors registered for present flags. The order of flags defined
1694 1694 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1695 1695
1696 1696 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1697 1697 processed text and ``validatehash`` is a bool indicating whether the
1698 1698 returned text should be checked for hash integrity.
1699 1699
1700 1700 Note: If the ``raw`` argument is set, it has precedence over the
1701 1701 operation and will only update the value of ``validatehash``.
1702 1702 """
1703 1703 # fast path: no flag processors will run
1704 1704 if flags == 0:
1705 1705 return text, True
1706 1706 if not operation in ('read', 'write'):
1707 1707 raise error.ProgrammingError(_("invalid '%s' operation") %
1708 1708 operation)
1709 1709 # Check all flags are known.
1710 1710 if flags & ~REVIDX_KNOWN_FLAGS:
1711 1711 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1712 1712 (flags & ~REVIDX_KNOWN_FLAGS))
1713 1713 validatehash = True
1714 1714 # Depending on the operation (read or write), the order might be
1715 1715 # reversed due to non-commutative transforms.
1716 1716 orderedflags = REVIDX_FLAGS_ORDER
1717 1717 if operation == 'write':
1718 1718 orderedflags = reversed(orderedflags)
1719 1719
1720 1720 for flag in orderedflags:
1721 1721 # If a flagprocessor has been registered for a known flag, apply the
1722 1722 # related operation transform and update result tuple.
1723 1723 if flag & flags:
1724 1724 vhash = True
1725 1725
1726 1726 if flag not in self._flagprocessors:
1727 1727 message = _("missing processor for flag '%#x'") % (flag)
1728 1728 raise error.RevlogError(message)
1729 1729
1730 1730 processor = self._flagprocessors[flag]
1731 1731 if processor is not None:
1732 1732 readtransform, writetransform, rawtransform = processor
1733 1733
1734 1734 if raw:
1735 1735 vhash = rawtransform(self, text)
1736 1736 elif operation == 'read':
1737 1737 text, vhash = readtransform(self, text)
1738 1738 else: # write operation
1739 1739 text, vhash = writetransform(self, text)
1740 1740 validatehash = validatehash and vhash
1741 1741
1742 1742 return text, validatehash
1743 1743
1744 1744 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1745 1745 """Check node hash integrity.
1746 1746
1747 1747 Available as a function so that subclasses can extend hash mismatch
1748 1748 behaviors as needed.
1749 1749 """
1750 1750 try:
1751 1751 if p1 is None and p2 is None:
1752 1752 p1, p2 = self.parents(node)
1753 1753 if node != self.hash(text, p1, p2):
1754 1754 # Clear the revision cache on hash failure. The revision cache
1755 1755 # only stores the raw revision and clearing the cache does have
1756 1756 # the side-effect that we won't have a cache hit when the raw
1757 1757 # revision data is accessed. But this case should be rare and
1758 1758 # it is extra work to teach the cache about the hash
1759 1759 # verification state.
1760 1760 if self._revisioncache and self._revisioncache[0] == node:
1761 1761 self._revisioncache = None
1762 1762
1763 1763 revornode = rev
1764 1764 if revornode is None:
1765 1765 revornode = templatefilters.short(hex(node))
1766 1766 raise error.RevlogError(_("integrity check failed on %s:%s")
1767 1767 % (self.indexfile, pycompat.bytestr(revornode)))
1768 1768 except error.RevlogError:
1769 1769 if self._censorable and storageutil.iscensoredtext(text):
1770 1770 raise error.CensoredNodeError(self.indexfile, node, text)
1771 1771 raise
1772 1772
1773 1773 def _enforceinlinesize(self, tr, fp=None):
1774 1774 """Check if the revlog is too big for inline and convert if so.
1775 1775
1776 1776 This should be called after revisions are added to the revlog. If the
1777 1777 revlog has grown too large to be an inline revlog, it will convert it
1778 1778 to use multiple index and data files.
1779 1779 """
1780 1780 tiprev = len(self) - 1
1781 1781 if (not self._inline or
1782 1782 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1783 1783 return
1784 1784
1785 1785 trinfo = tr.find(self.indexfile)
1786 1786 if trinfo is None:
1787 1787 raise error.RevlogError(_("%s not found in the transaction")
1788 1788 % self.indexfile)
1789 1789
1790 1790 trindex = trinfo[2]
1791 1791 if trindex is not None:
1792 1792 dataoff = self.start(trindex)
1793 1793 else:
1794 1794 # revlog was stripped at start of transaction, use all leftover data
1795 1795 trindex = len(self) - 1
1796 1796 dataoff = self.end(tiprev)
1797 1797
1798 1798 tr.add(self.datafile, dataoff)
1799 1799
1800 1800 if fp:
1801 1801 fp.flush()
1802 1802 fp.close()
1803 1803 # We can't use the cached file handle after close(). So prevent
1804 1804 # its usage.
1805 1805 self._writinghandles = None
1806 1806
1807 1807 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1808 1808 for r in self:
1809 1809 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1810 1810
1811 1811 with self._indexfp('w') as fp:
1812 1812 self.version &= ~FLAG_INLINE_DATA
1813 1813 self._inline = False
1814 1814 io = self._io
1815 1815 for i in self:
1816 1816 e = io.packentry(self.index[i], self.node, self.version, i)
1817 1817 fp.write(e)
1818 1818
1819 1819 # the temp file replace the real index when we exit the context
1820 1820 # manager
1821 1821
1822 1822 tr.replace(self.indexfile, trindex * self._io.size)
1823 1823 self._chunkclear()
1824 1824
1825 1825 def _nodeduplicatecallback(self, transaction, node):
1826 1826 """called when trying to add a node already stored.
1827 1827 """
1828 1828
1829 1829 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1830 1830 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1831 1831 """add a revision to the log
1832 1832
1833 1833 text - the revision data to add
1834 1834 transaction - the transaction object used for rollback
1835 1835 link - the linkrev data to add
1836 1836 p1, p2 - the parent nodeids of the revision
1837 1837 cachedelta - an optional precomputed delta
1838 1838 node - nodeid of revision; typically node is not specified, and it is
1839 1839 computed by default as hash(text, p1, p2), however subclasses might
1840 1840 use different hashing method (and override checkhash() in such case)
1841 1841 flags - the known flags to set on the revision
1842 1842 deltacomputer - an optional deltacomputer instance shared between
1843 1843 multiple calls
1844 1844 """
1845 1845 if link == nullrev:
1846 1846 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1847 1847 % self.indexfile)
1848 1848
1849 1849 if flags:
1850 1850 node = node or self.hash(text, p1, p2)
1851 1851
1852 1852 rawtext, validatehash = self._processflags(text, flags, 'write')
1853 1853
1854 1854 # If the flag processor modifies the revision data, ignore any provided
1855 1855 # cachedelta.
1856 1856 if rawtext != text:
1857 1857 cachedelta = None
1858 1858
1859 1859 if len(rawtext) > _maxentrysize:
1860 1860 raise error.RevlogError(
1861 1861 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1862 1862 % (self.indexfile, len(rawtext)))
1863 1863
1864 1864 node = node or self.hash(rawtext, p1, p2)
1865 1865 if node in self.nodemap:
1866 1866 return node
1867 1867
1868 1868 if validatehash:
1869 1869 self.checkhash(rawtext, node, p1=p1, p2=p2)
1870 1870
1871 1871 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1872 1872 flags, cachedelta=cachedelta,
1873 1873 deltacomputer=deltacomputer)
1874 1874
1875 1875 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1876 1876 cachedelta=None, deltacomputer=None):
1877 1877 """add a raw revision with known flags, node and parents
1878 1878 useful when reusing a revision not stored in this revlog (ex: received
1879 1879 over wire, or read from an external bundle).
1880 1880 """
1881 1881 dfh = None
1882 1882 if not self._inline:
1883 1883 dfh = self._datafp("a+")
1884 1884 ifh = self._indexfp("a+")
1885 1885 try:
1886 1886 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1887 1887 flags, cachedelta, ifh, dfh,
1888 1888 deltacomputer=deltacomputer)
1889 1889 finally:
1890 1890 if dfh:
1891 1891 dfh.close()
1892 1892 ifh.close()
1893 1893
1894 1894 def compress(self, data):
1895 1895 """Generate a possibly-compressed representation of data."""
1896 1896 if not data:
1897 1897 return '', data
1898 1898
1899 1899 compressed = self._compressor.compress(data)
1900 1900
1901 1901 if compressed:
1902 1902 # The revlog compressor added the header in the returned data.
1903 1903 return '', compressed
1904 1904
1905 1905 if data[0:1] == '\0':
1906 1906 return '', data
1907 1907 return 'u', data
1908 1908
1909 1909 def decompress(self, data):
1910 1910 """Decompress a revlog chunk.
1911 1911
1912 1912 The chunk is expected to begin with a header identifying the
1913 1913 format type so it can be routed to an appropriate decompressor.
1914 1914 """
1915 1915 if not data:
1916 1916 return data
1917 1917
1918 1918 # Revlogs are read much more frequently than they are written and many
1919 1919 # chunks only take microseconds to decompress, so performance is
1920 1920 # important here.
1921 1921 #
1922 1922 # We can make a few assumptions about revlogs:
1923 1923 #
1924 1924 # 1) the majority of chunks will be compressed (as opposed to inline
1925 1925 # raw data).
1926 1926 # 2) decompressing *any* data will likely by at least 10x slower than
1927 1927 # returning raw inline data.
1928 1928 # 3) we want to prioritize common and officially supported compression
1929 1929 # engines
1930 1930 #
1931 1931 # It follows that we want to optimize for "decompress compressed data
1932 1932 # when encoded with common and officially supported compression engines"
1933 1933 # case over "raw data" and "data encoded by less common or non-official
1934 1934 # compression engines." That is why we have the inline lookup first
1935 1935 # followed by the compengines lookup.
1936 1936 #
1937 1937 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1938 1938 # compressed chunks. And this matters for changelog and manifest reads.
1939 1939 t = data[0:1]
1940 1940
1941 1941 if t == 'x':
1942 1942 try:
1943 1943 return _zlibdecompress(data)
1944 1944 except zlib.error as e:
1945 1945 raise error.RevlogError(_('revlog decompress error: %s') %
1946 1946 stringutil.forcebytestr(e))
1947 1947 # '\0' is more common than 'u' so it goes first.
1948 1948 elif t == '\0':
1949 1949 return data
1950 1950 elif t == 'u':
1951 1951 return util.buffer(data, 1)
1952 1952
1953 1953 try:
1954 1954 compressor = self._decompressors[t]
1955 1955 except KeyError:
1956 1956 try:
1957 1957 engine = util.compengines.forrevlogheader(t)
1958 1958 compressor = engine.revlogcompressor()
1959 1959 self._decompressors[t] = compressor
1960 1960 except KeyError:
1961 1961 raise error.RevlogError(_('unknown compression type %r') % t)
1962 1962
1963 1963 return compressor.decompress(data)
1964 1964
1965 1965 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1966 1966 cachedelta, ifh, dfh, alwayscache=False,
1967 1967 deltacomputer=None):
1968 1968 """internal function to add revisions to the log
1969 1969
1970 1970 see addrevision for argument descriptions.
1971 1971
1972 1972 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1973 1973
1974 1974 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1975 1975 be used.
1976 1976
1977 1977 invariants:
1978 1978 - rawtext is optional (can be None); if not set, cachedelta must be set.
1979 1979 if both are set, they must correspond to each other.
1980 1980 """
1981 1981 if node == nullid:
1982 1982 raise error.RevlogError(_("%s: attempt to add null revision") %
1983 1983 self.indexfile)
1984 1984 if node == wdirid or node in wdirfilenodeids:
1985 1985 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1986 1986 self.indexfile)
1987 1987
1988 1988 if self._inline:
1989 1989 fh = ifh
1990 1990 else:
1991 1991 fh = dfh
1992 1992
1993 1993 btext = [rawtext]
1994 1994
1995 1995 curr = len(self)
1996 1996 prev = curr - 1
1997 1997 offset = self.end(prev)
1998 1998 p1r, p2r = self.rev(p1), self.rev(p2)
1999 1999
2000 2000 # full versions are inserted when the needed deltas
2001 2001 # become comparable to the uncompressed text
2002 2002 if rawtext is None:
2003 2003 # need rawtext size, before changed by flag processors, which is
2004 2004 # the non-raw size. use revlog explicitly to avoid filelog's extra
2005 2005 # logic that might remove metadata size.
2006 2006 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2007 2007 cachedelta[1])
2008 2008 else:
2009 2009 textlen = len(rawtext)
2010 2010
2011 2011 if deltacomputer is None:
2012 2012 deltacomputer = deltautil.deltacomputer(self)
2013 2013
2014 2014 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2015 2015
2016 2016 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2017 2017
2018 2018 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2019 2019 deltainfo.base, link, p1r, p2r, node)
2020 2020 self.index.append(e)
2021 2021 self.nodemap[node] = curr
2022 2022
2023 2023 # Reset the pure node cache start lookup offset to account for new
2024 2024 # revision.
2025 2025 if self._nodepos is not None:
2026 2026 self._nodepos = curr
2027 2027
2028 2028 entry = self._io.packentry(e, self.node, self.version, curr)
2029 2029 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2030 2030 link, offset)
2031 2031
2032 2032 rawtext = btext[0]
2033 2033
2034 2034 if alwayscache and rawtext is None:
2035 2035 rawtext = deltacomputer.buildtext(revinfo, fh)
2036 2036
2037 2037 if type(rawtext) == bytes: # only accept immutable objects
2038 2038 self._revisioncache = (node, curr, rawtext)
2039 2039 self._chainbasecache[curr] = deltainfo.chainbase
2040 2040 return node
2041 2041
2042 2042 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2043 2043 # Files opened in a+ mode have inconsistent behavior on various
2044 2044 # platforms. Windows requires that a file positioning call be made
2045 2045 # when the file handle transitions between reads and writes. See
2046 2046 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2047 2047 # platforms, Python or the platform itself can be buggy. Some versions
2048 2048 # of Solaris have been observed to not append at the end of the file
2049 2049 # if the file was seeked to before the end. See issue4943 for more.
2050 2050 #
2051 2051 # We work around this issue by inserting a seek() before writing.
2052 2052 # Note: This is likely not necessary on Python 3. However, because
2053 2053 # the file handle is reused for reads and may be seeked there, we need
2054 2054 # to be careful before changing this.
2055 2055 ifh.seek(0, os.SEEK_END)
2056 2056 if dfh:
2057 2057 dfh.seek(0, os.SEEK_END)
2058 2058
2059 2059 curr = len(self) - 1
2060 2060 if not self._inline:
2061 2061 transaction.add(self.datafile, offset)
2062 2062 transaction.add(self.indexfile, curr * len(entry))
2063 2063 if data[0]:
2064 2064 dfh.write(data[0])
2065 2065 dfh.write(data[1])
2066 2066 ifh.write(entry)
2067 2067 else:
2068 2068 offset += curr * self._io.size
2069 2069 transaction.add(self.indexfile, offset, curr)
2070 2070 ifh.write(entry)
2071 2071 ifh.write(data[0])
2072 2072 ifh.write(data[1])
2073 2073 self._enforceinlinesize(transaction, ifh)
2074 2074
2075 2075 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2076 2076 """
2077 2077 add a delta group
2078 2078
2079 2079 given a set of deltas, add them to the revision log. the
2080 2080 first delta is against its parent, which should be in our
2081 2081 log, the rest are against the previous delta.
2082 2082
2083 2083 If ``addrevisioncb`` is defined, it will be called with arguments of
2084 2084 this revlog and the node that was added.
2085 2085 """
2086 2086
2087 2087 if self._writinghandles:
2088 2088 raise error.ProgrammingError('cannot nest addgroup() calls')
2089 2089
2090 2090 nodes = []
2091 2091
2092 2092 r = len(self)
2093 2093 end = 0
2094 2094 if r:
2095 2095 end = self.end(r - 1)
2096 2096 ifh = self._indexfp("a+")
2097 2097 isize = r * self._io.size
2098 2098 if self._inline:
2099 2099 transaction.add(self.indexfile, end + isize, r)
2100 2100 dfh = None
2101 2101 else:
2102 2102 transaction.add(self.indexfile, isize, r)
2103 2103 transaction.add(self.datafile, end)
2104 2104 dfh = self._datafp("a+")
2105 2105 def flush():
2106 2106 if dfh:
2107 2107 dfh.flush()
2108 2108 ifh.flush()
2109 2109
2110 2110 self._writinghandles = (ifh, dfh)
2111 2111
2112 2112 try:
2113 2113 deltacomputer = deltautil.deltacomputer(self)
2114 2114 # loop through our set of deltas
2115 2115 for data in deltas:
2116 2116 node, p1, p2, linknode, deltabase, delta, flags = data
2117 2117 link = linkmapper(linknode)
2118 2118 flags = flags or REVIDX_DEFAULT_FLAGS
2119 2119
2120 2120 nodes.append(node)
2121 2121
2122 2122 if node in self.nodemap:
2123 2123 self._nodeduplicatecallback(transaction, node)
2124 2124 # this can happen if two branches make the same change
2125 2125 continue
2126 2126
2127 2127 for p in (p1, p2):
2128 2128 if p not in self.nodemap:
2129 2129 raise error.LookupError(p, self.indexfile,
2130 2130 _('unknown parent'))
2131 2131
2132 2132 if deltabase not in self.nodemap:
2133 2133 raise error.LookupError(deltabase, self.indexfile,
2134 2134 _('unknown delta base'))
2135 2135
2136 2136 baserev = self.rev(deltabase)
2137 2137
2138 2138 if baserev != nullrev and self.iscensored(baserev):
2139 2139 # if base is censored, delta must be full replacement in a
2140 2140 # single patch operation
2141 2141 hlen = struct.calcsize(">lll")
2142 2142 oldlen = self.rawsize(baserev)
2143 2143 newlen = len(delta) - hlen
2144 2144 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2145 2145 raise error.CensoredBaseError(self.indexfile,
2146 2146 self.node(baserev))
2147 2147
2148 2148 if not flags and self._peek_iscensored(baserev, delta, flush):
2149 2149 flags |= REVIDX_ISCENSORED
2150 2150
2151 2151 # We assume consumers of addrevisioncb will want to retrieve
2152 2152 # the added revision, which will require a call to
2153 2153 # revision(). revision() will fast path if there is a cache
2154 2154 # hit. So, we tell _addrevision() to always cache in this case.
2155 2155 # We're only using addgroup() in the context of changegroup
2156 2156 # generation so the revision data can always be handled as raw
2157 2157 # by the flagprocessor.
2158 2158 self._addrevision(node, None, transaction, link,
2159 2159 p1, p2, flags, (baserev, delta),
2160 2160 ifh, dfh,
2161 2161 alwayscache=bool(addrevisioncb),
2162 2162 deltacomputer=deltacomputer)
2163 2163
2164 2164 if addrevisioncb:
2165 2165 addrevisioncb(self, node)
2166 2166
2167 2167 if not dfh and not self._inline:
2168 2168 # addrevision switched from inline to conventional
2169 2169 # reopen the index
2170 2170 ifh.close()
2171 2171 dfh = self._datafp("a+")
2172 2172 ifh = self._indexfp("a+")
2173 2173 self._writinghandles = (ifh, dfh)
2174 2174 finally:
2175 2175 self._writinghandles = None
2176 2176
2177 2177 if dfh:
2178 2178 dfh.close()
2179 2179 ifh.close()
2180 2180
2181 2181 return nodes
2182 2182
2183 2183 def iscensored(self, rev):
2184 2184 """Check if a file revision is censored."""
2185 2185 if not self._censorable:
2186 2186 return False
2187 2187
2188 2188 return self.flags(rev) & REVIDX_ISCENSORED
2189 2189
2190 2190 def _peek_iscensored(self, baserev, delta, flush):
2191 2191 """Quickly check if a delta produces a censored revision."""
2192 2192 if not self._censorable:
2193 2193 return False
2194 2194
2195 2195 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2196 2196
2197 2197 def getstrippoint(self, minlink):
2198 2198 """find the minimum rev that must be stripped to strip the linkrev
2199 2199
2200 2200 Returns a tuple containing the minimum rev and a set of all revs that
2201 2201 have linkrevs that will be broken by this strip.
2202 2202 """
2203 2203 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2204 2204 self.headrevs(),
2205 2205 self.linkrev, self.parentrevs)
2206 2206
2207 2207 def strip(self, minlink, transaction):
2208 2208 """truncate the revlog on the first revision with a linkrev >= minlink
2209 2209
2210 2210 This function is called when we're stripping revision minlink and
2211 2211 its descendants from the repository.
2212 2212
2213 2213 We have to remove all revisions with linkrev >= minlink, because
2214 2214 the equivalent changelog revisions will be renumbered after the
2215 2215 strip.
2216 2216
2217 2217 So we truncate the revlog on the first of these revisions, and
2218 2218 trust that the caller has saved the revisions that shouldn't be
2219 2219 removed and that it'll re-add them after this truncation.
2220 2220 """
2221 2221 if len(self) == 0:
2222 2222 return
2223 2223
2224 2224 rev, _ = self.getstrippoint(minlink)
2225 2225 if rev == len(self):
2226 2226 return
2227 2227
2228 2228 # first truncate the files on disk
2229 2229 end = self.start(rev)
2230 2230 if not self._inline:
2231 2231 transaction.add(self.datafile, end)
2232 2232 end = rev * self._io.size
2233 2233 else:
2234 2234 end += rev * self._io.size
2235 2235
2236 2236 transaction.add(self.indexfile, end)
2237 2237
2238 2238 # then reset internal state in memory to forget those revisions
2239 2239 self._revisioncache = None
2240 2240 self._chaininfocache = {}
2241 2241 self._chunkclear()
2242 2242 for x in pycompat.xrange(rev, len(self)):
2243 2243 del self.nodemap[self.node(x)]
2244 2244
2245 2245 del self.index[rev:-1]
2246 2246 self._nodepos = None
2247 2247
2248 2248 def checksize(self):
2249 2249 expected = 0
2250 2250 if len(self):
2251 2251 expected = max(0, self.end(len(self) - 1))
2252 2252
2253 2253 try:
2254 2254 with self._datafp() as f:
2255 2255 f.seek(0, 2)
2256 2256 actual = f.tell()
2257 2257 dd = actual - expected
2258 2258 except IOError as inst:
2259 2259 if inst.errno != errno.ENOENT:
2260 2260 raise
2261 2261 dd = 0
2262 2262
2263 2263 try:
2264 2264 f = self.opener(self.indexfile)
2265 2265 f.seek(0, 2)
2266 2266 actual = f.tell()
2267 2267 f.close()
2268 2268 s = self._io.size
2269 2269 i = max(0, actual // s)
2270 2270 di = actual - (i * s)
2271 2271 if self._inline:
2272 2272 databytes = 0
2273 2273 for r in self:
2274 2274 databytes += max(0, self.length(r))
2275 2275 dd = 0
2276 2276 di = actual - len(self) * s - databytes
2277 2277 except IOError as inst:
2278 2278 if inst.errno != errno.ENOENT:
2279 2279 raise
2280 2280 di = 0
2281 2281
2282 2282 return (dd, di)
2283 2283
2284 2284 def files(self):
2285 2285 res = [self.indexfile]
2286 2286 if not self._inline:
2287 2287 res.append(self.datafile)
2288 2288 return res
2289 2289
2290 2290 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2291 2291 assumehaveparentrevisions=False,
2292 2292 deltamode=repository.CG_DELTAMODE_STD):
2293 2293 if nodesorder not in ('nodes', 'storage', 'linear', None):
2294 2294 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2295 2295 nodesorder)
2296 2296
2297 2297 if nodesorder is None and not self._generaldelta:
2298 2298 nodesorder = 'storage'
2299 2299
2300 2300 if (not self._storedeltachains and
2301 2301 deltamode != repository.CG_DELTAMODE_PREV):
2302 2302 deltamode = repository.CG_DELTAMODE_FULL
2303 2303
2304 2304 return storageutil.emitrevisions(
2305 2305 self, nodes, nodesorder, revlogrevisiondelta,
2306 2306 deltaparentfn=self.deltaparent,
2307 2307 candeltafn=self.candelta,
2308 2308 rawsizefn=self.rawsize,
2309 2309 revdifffn=self.revdiff,
2310 2310 flagsfn=self.flags,
2311 2311 deltamode=deltamode,
2312 2312 revisiondata=revisiondata,
2313 2313 assumehaveparentrevisions=assumehaveparentrevisions)
2314 2314
2315 2315 DELTAREUSEALWAYS = 'always'
2316 2316 DELTAREUSESAMEREVS = 'samerevs'
2317 2317 DELTAREUSENEVER = 'never'
2318 2318
2319 2319 DELTAREUSEFULLADD = 'fulladd'
2320 2320
2321 2321 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2322 2322
2323 2323 def clone(self, tr, destrevlog, addrevisioncb=None,
2324 2324 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2325 2325 """Copy this revlog to another, possibly with format changes.
2326 2326
2327 2327 The destination revlog will contain the same revisions and nodes.
2328 2328 However, it may not be bit-for-bit identical due to e.g. delta encoding
2329 2329 differences.
2330 2330
2331 2331 The ``deltareuse`` argument control how deltas from the existing revlog
2332 2332 are preserved in the destination revlog. The argument can have the
2333 2333 following values:
2334 2334
2335 2335 DELTAREUSEALWAYS
2336 2336 Deltas will always be reused (if possible), even if the destination
2337 2337 revlog would not select the same revisions for the delta. This is the
2338 2338 fastest mode of operation.
2339 2339 DELTAREUSESAMEREVS
2340 2340 Deltas will be reused if the destination revlog would pick the same
2341 2341 revisions for the delta. This mode strikes a balance between speed
2342 2342 and optimization.
2343 2343 DELTAREUSENEVER
2344 2344 Deltas will never be reused. This is the slowest mode of execution.
2345 2345 This mode can be used to recompute deltas (e.g. if the diff/delta
2346 2346 algorithm changes).
2347 2347
2348 2348 Delta computation can be slow, so the choice of delta reuse policy can
2349 2349 significantly affect run time.
2350 2350
2351 2351 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2352 2352 two extremes. Deltas will be reused if they are appropriate. But if the
2353 2353 delta could choose a better revision, it will do so. This means if you
2354 2354 are converting a non-generaldelta revlog to a generaldelta revlog,
2355 2355 deltas will be recomputed if the delta's parent isn't a parent of the
2356 2356 revision.
2357 2357
2358 2358 In addition to the delta policy, the ``forcedeltabothparents``
2359 2359 argument controls whether to force compute deltas against both parents
2360 2360 for merges. By default, the current default is used.
2361 2361 """
2362 2362 if deltareuse not in self.DELTAREUSEALL:
2363 2363 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2364 2364
2365 2365 if len(destrevlog):
2366 2366 raise ValueError(_('destination revlog is not empty'))
2367 2367
2368 2368 if getattr(self, 'filteredrevs', None):
2369 2369 raise ValueError(_('source revlog has filtered revisions'))
2370 2370 if getattr(destrevlog, 'filteredrevs', None):
2371 2371 raise ValueError(_('destination revlog has filtered revisions'))
2372 2372
2373 2373 # lazydeltabase controls whether to reuse a cached delta, if possible.
2374 2374 oldlazydeltabase = destrevlog._lazydeltabase
2375 2375 oldamd = destrevlog._deltabothparents
2376 2376
2377 2377 try:
2378 2378 if deltareuse == self.DELTAREUSEALWAYS:
2379 2379 destrevlog._lazydeltabase = True
2380 2380 elif deltareuse == self.DELTAREUSESAMEREVS:
2381 2381 destrevlog._lazydeltabase = False
2382 2382
2383 2383 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2384 2384
2385 2385 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2386 2386 self.DELTAREUSESAMEREVS)
2387 2387
2388 2388 deltacomputer = deltautil.deltacomputer(destrevlog)
2389 2389 index = self.index
2390 2390 for rev in self:
2391 2391 entry = index[rev]
2392 2392
2393 2393 # Some classes override linkrev to take filtered revs into
2394 2394 # account. Use raw entry from index.
2395 2395 flags = entry[0] & 0xffff
2396 2396 linkrev = entry[4]
2397 2397 p1 = index[entry[5]][7]
2398 2398 p2 = index[entry[6]][7]
2399 2399 node = entry[7]
2400 2400
2401 2401 # (Possibly) reuse the delta from the revlog if allowed and
2402 2402 # the revlog chunk is a delta.
2403 2403 cachedelta = None
2404 2404 rawtext = None
2405 2405 if populatecachedelta:
2406 2406 dp = self.deltaparent(rev)
2407 2407 if dp != nullrev:
2408 2408 cachedelta = (dp, bytes(self._chunk(rev)))
2409 2409
2410 2410 if not cachedelta:
2411 2411 rawtext = self.revision(rev, raw=True)
2412 2412
2413 2413
2414 2414 if deltareuse == self.DELTAREUSEFULLADD:
2415 2415 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2416 2416 cachedelta=cachedelta,
2417 2417 node=node, flags=flags,
2418 2418 deltacomputer=deltacomputer)
2419 2419 else:
2420 2420 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2421 2421 checkambig=False)
2422 2422 dfh = None
2423 2423 if not destrevlog._inline:
2424 2424 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2425 2425 try:
2426 2426 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2427 2427 p2, flags, cachedelta, ifh, dfh,
2428 2428 deltacomputer=deltacomputer)
2429 2429 finally:
2430 2430 if dfh:
2431 2431 dfh.close()
2432 2432 ifh.close()
2433 2433
2434 2434 if addrevisioncb:
2435 2435 addrevisioncb(self, rev, node)
2436 2436 finally:
2437 2437 destrevlog._lazydeltabase = oldlazydeltabase
2438 2438 destrevlog._deltabothparents = oldamd
2439 2439
2440 2440 def censorrevision(self, tr, censornode, tombstone=b''):
2441 2441 if (self.version & 0xFFFF) == REVLOGV0:
2442 2442 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2443 2443 self.version)
2444 2444
2445 2445 censorrev = self.rev(censornode)
2446 2446 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2447 2447
2448 2448 if len(tombstone) > self.rawsize(censorrev):
2449 2449 raise error.Abort(_('censor tombstone must be no longer than '
2450 2450 'censored data'))
2451 2451
2452 2452 # Rewriting the revlog in place is hard. Our strategy for censoring is
2453 2453 # to create a new revlog, copy all revisions to it, then replace the
2454 2454 # revlogs on transaction close.
2455 2455
2456 2456 newindexfile = self.indexfile + b'.tmpcensored'
2457 2457 newdatafile = self.datafile + b'.tmpcensored'
2458 2458
2459 2459 # This is a bit dangerous. We could easily have a mismatch of state.
2460 2460 newrl = revlog(self.opener, newindexfile, newdatafile,
2461 2461 censorable=True)
2462 2462 newrl.version = self.version
2463 2463 newrl._generaldelta = self._generaldelta
2464 2464 newrl._io = self._io
2465 2465
2466 2466 for rev in self.revs():
2467 2467 node = self.node(rev)
2468 2468 p1, p2 = self.parents(node)
2469 2469
2470 2470 if rev == censorrev:
2471 2471 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2472 2472 p1, p2, censornode, REVIDX_ISCENSORED)
2473 2473
2474 2474 if newrl.deltaparent(rev) != nullrev:
2475 2475 raise error.Abort(_('censored revision stored as delta; '
2476 2476 'cannot censor'),
2477 2477 hint=_('censoring of revlogs is not '
2478 2478 'fully implemented; please report '
2479 2479 'this bug'))
2480 2480 continue
2481 2481
2482 2482 if self.iscensored(rev):
2483 2483 if self.deltaparent(rev) != nullrev:
2484 2484 raise error.Abort(_('cannot censor due to censored '
2485 2485 'revision having delta stored'))
2486 2486 rawtext = self._chunk(rev)
2487 2487 else:
2488 2488 rawtext = self.revision(rev, raw=True)
2489 2489
2490 2490 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2491 2491 self.flags(rev))
2492 2492
2493 2493 tr.addbackup(self.indexfile, location='store')
2494 2494 if not self._inline:
2495 2495 tr.addbackup(self.datafile, location='store')
2496 2496
2497 2497 self.opener.rename(newrl.indexfile, self.indexfile)
2498 2498 if not self._inline:
2499 2499 self.opener.rename(newrl.datafile, self.datafile)
2500 2500
2501 2501 self.clearcaches()
2502 self._loadindex(self.version, None)
2502 self._loadindex()
2503 2503
2504 2504 def verifyintegrity(self, state):
2505 2505 """Verifies the integrity of the revlog.
2506 2506
2507 2507 Yields ``revlogproblem`` instances describing problems that are
2508 2508 found.
2509 2509 """
2510 2510 dd, di = self.checksize()
2511 2511 if dd:
2512 2512 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2513 2513 if di:
2514 2514 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2515 2515
2516 2516 version = self.version & 0xFFFF
2517 2517
2518 2518 # The verifier tells us what version revlog we should be.
2519 2519 if version != state['expectedversion']:
2520 2520 yield revlogproblem(
2521 2521 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2522 2522 (self.indexfile, version, state['expectedversion']))
2523 2523
2524 2524 state['skipread'] = set()
2525 2525
2526 2526 for rev in self:
2527 2527 node = self.node(rev)
2528 2528
2529 2529 # Verify contents. 4 cases to care about:
2530 2530 #
2531 2531 # common: the most common case
2532 2532 # rename: with a rename
2533 2533 # meta: file content starts with b'\1\n', the metadata
2534 2534 # header defined in filelog.py, but without a rename
2535 2535 # ext: content stored externally
2536 2536 #
2537 2537 # More formally, their differences are shown below:
2538 2538 #
2539 2539 # | common | rename | meta | ext
2540 2540 # -------------------------------------------------------
2541 2541 # flags() | 0 | 0 | 0 | not 0
2542 2542 # renamed() | False | True | False | ?
2543 2543 # rawtext[0:2]=='\1\n'| False | True | True | ?
2544 2544 #
2545 2545 # "rawtext" means the raw text stored in revlog data, which
2546 2546 # could be retrieved by "revision(rev, raw=True)". "text"
2547 2547 # mentioned below is "revision(rev, raw=False)".
2548 2548 #
2549 2549 # There are 3 different lengths stored physically:
2550 2550 # 1. L1: rawsize, stored in revlog index
2551 2551 # 2. L2: len(rawtext), stored in revlog data
2552 2552 # 3. L3: len(text), stored in revlog data if flags==0, or
2553 2553 # possibly somewhere else if flags!=0
2554 2554 #
2555 2555 # L1 should be equal to L2. L3 could be different from them.
2556 2556 # "text" may or may not affect commit hash depending on flag
2557 2557 # processors (see revlog.addflagprocessor).
2558 2558 #
2559 2559 # | common | rename | meta | ext
2560 2560 # -------------------------------------------------
2561 2561 # rawsize() | L1 | L1 | L1 | L1
2562 2562 # size() | L1 | L2-LM | L1(*) | L1 (?)
2563 2563 # len(rawtext) | L2 | L2 | L2 | L2
2564 2564 # len(text) | L2 | L2 | L2 | L3
2565 2565 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2566 2566 #
2567 2567 # LM: length of metadata, depending on rawtext
2568 2568 # (*): not ideal, see comment in filelog.size
2569 2569 # (?): could be "- len(meta)" if the resolved content has
2570 2570 # rename metadata
2571 2571 #
2572 2572 # Checks needed to be done:
2573 2573 # 1. length check: L1 == L2, in all cases.
2574 2574 # 2. hash check: depending on flag processor, we may need to
2575 2575 # use either "text" (external), or "rawtext" (in revlog).
2576 2576
2577 2577 try:
2578 2578 skipflags = state.get('skipflags', 0)
2579 2579 if skipflags:
2580 2580 skipflags &= self.flags(rev)
2581 2581
2582 2582 if skipflags:
2583 2583 state['skipread'].add(node)
2584 2584 else:
2585 2585 # Side-effect: read content and verify hash.
2586 2586 self.revision(node)
2587 2587
2588 2588 l1 = self.rawsize(rev)
2589 2589 l2 = len(self.revision(node, raw=True))
2590 2590
2591 2591 if l1 != l2:
2592 2592 yield revlogproblem(
2593 2593 error=_('unpacked size is %d, %d expected') % (l2, l1),
2594 2594 node=node)
2595 2595
2596 2596 except error.CensoredNodeError:
2597 2597 if state['erroroncensored']:
2598 2598 yield revlogproblem(error=_('censored file data'),
2599 2599 node=node)
2600 2600 state['skipread'].add(node)
2601 2601 except Exception as e:
2602 2602 yield revlogproblem(
2603 2603 error=_('unpacking %s: %s') % (short(node),
2604 2604 stringutil.forcebytestr(e)),
2605 2605 node=node)
2606 2606 state['skipread'].add(node)
2607 2607
2608 2608 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2609 2609 revisionscount=False, trackedsize=False,
2610 2610 storedsize=False):
2611 2611 d = {}
2612 2612
2613 2613 if exclusivefiles:
2614 2614 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2615 2615 if not self._inline:
2616 2616 d['exclusivefiles'].append((self.opener, self.datafile))
2617 2617
2618 2618 if sharedfiles:
2619 2619 d['sharedfiles'] = []
2620 2620
2621 2621 if revisionscount:
2622 2622 d['revisionscount'] = len(self)
2623 2623
2624 2624 if trackedsize:
2625 2625 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2626 2626
2627 2627 if storedsize:
2628 2628 d['storedsize'] = sum(self.opener.stat(path).st_size
2629 2629 for path in self.files())
2630 2630
2631 2631 return d
General Comments 0
You need to be logged in to leave comments. Login now