##// END OF EJS Templates
revlog: optimize ancestors() to not check filtered revisions for each...
Yuya Nishihara -
r40188:adbf8ca2 default
parent child Browse files
Show More
@@ -1,2535 +1,2544 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100
101 101 # Aliased for performance.
102 102 _zlibdecompress = zlib.decompress
103 103
104 104 # max size of revlog with inline data
105 105 _maxinline = 131072
106 106 _chunksize = 1048576
107 107
108 108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 109 _flagprocessors = {
110 110 REVIDX_ISCENSORED: None,
111 111 }
112 112
113 113 # Flag processors for REVIDX_ELLIPSIS.
114 114 def ellipsisreadprocessor(rl, text):
115 115 return text, False
116 116
117 117 def ellipsiswriteprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsisrawprocessor(rl, text):
121 121 return False
122 122
123 123 ellipsisprocessor = (
124 124 ellipsisreadprocessor,
125 125 ellipsiswriteprocessor,
126 126 ellipsisrawprocessor,
127 127 )
128 128
129 129 def addflagprocessor(flag, processor):
130 130 """Register a flag processor on a revision data flag.
131 131
132 132 Invariant:
133 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 135 - Only one flag processor can be registered on a specific flag.
136 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 137 following signatures:
138 138 - (read) f(self, rawtext) -> text, bool
139 139 - (write) f(self, text) -> rawtext, bool
140 140 - (raw) f(self, rawtext) -> bool
141 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 142 directly visible to the user.
143 143 The boolean returned by these transforms is used to determine whether
144 144 the returned text can be used for hash integrity checking. For example,
145 145 if "write" returns False, then "text" is used to generate hash. If
146 146 "write" returns True, that basically means "rawtext" returned by "write"
147 147 should be used to generate hash. Usually, "write" and "read" return
148 148 different booleans. And "raw" returns a same boolean as "write".
149 149
150 150 Note: The 'raw' transform is used for changegroup generation and in some
151 151 debug commands. In this case the transform only indicates whether the
152 152 contents can be used for hash integrity checks.
153 153 """
154 154 if not flag & REVIDX_KNOWN_FLAGS:
155 155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 156 raise error.ProgrammingError(msg)
157 157 if flag not in REVIDX_FLAGS_ORDER:
158 158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 159 raise error.ProgrammingError(msg)
160 160 if flag in _flagprocessors:
161 161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 162 raise error.Abort(msg)
163 163 _flagprocessors[flag] = processor
164 164
165 165 def getoffset(q):
166 166 return int(q >> 16)
167 167
168 168 def gettype(q):
169 169 return int(q & 0xFFFF)
170 170
171 171 def offset_type(offset, type):
172 172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 173 raise ValueError('unknown revlog index flags')
174 174 return int(int(offset) << 16 | type)
175 175
176 176 @attr.s(slots=True, frozen=True)
177 177 class _revisioninfo(object):
178 178 """Information about a revision that allows building its fulltext
179 179 node: expected hash of the revision
180 180 p1, p2: parent revs of the revision
181 181 btext: built text cache consisting of a one-element list
182 182 cachedelta: (baserev, uncompressed_delta) or None
183 183 flags: flags associated to the revision storage
184 184
185 185 One of btext[0] or cachedelta must be set.
186 186 """
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208 @interfaceutil.implementer(repository.iverifyproblem)
209 209 @attr.s(frozen=True)
210 210 class revlogproblem(object):
211 211 warning = attr.ib(default=None)
212 212 error = attr.ib(default=None)
213 213 node = attr.ib(default=None)
214 214
215 215 # index v0:
216 216 # 4 bytes: offset
217 217 # 4 bytes: compressed length
218 218 # 4 bytes: base rev
219 219 # 4 bytes: link rev
220 220 # 20 bytes: parent 1 nodeid
221 221 # 20 bytes: parent 2 nodeid
222 222 # 20 bytes: nodeid
223 223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 224 indexformatv0_pack = indexformatv0.pack
225 225 indexformatv0_unpack = indexformatv0.unpack
226 226
227 227 class revlogoldindex(list):
228 228 def __getitem__(self, i):
229 229 if i == -1:
230 230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 231 return list.__getitem__(self, i)
232 232
233 233 class revlogoldio(object):
234 234 def __init__(self):
235 235 self.size = indexformatv0.size
236 236
237 237 def parseindex(self, data, inline):
238 238 s = self.size
239 239 index = []
240 240 nodemap = {nullid: nullrev}
241 241 n = off = 0
242 242 l = len(data)
243 243 while off + s <= l:
244 244 cur = data[off:off + s]
245 245 off += s
246 246 e = indexformatv0_unpack(cur)
247 247 # transform to revlogv1 format
248 248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 250 index.append(e2)
251 251 nodemap[e[6]] = n
252 252 n += 1
253 253
254 254 return revlogoldindex(index), nodemap, None
255 255
256 256 def packentry(self, entry, node, version, rev):
257 257 if gettype(entry[0]):
258 258 raise error.RevlogError(_('index entry flags need revlog '
259 259 'version 1'))
260 260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 261 node(entry[5]), node(entry[6]), entry[7])
262 262 return indexformatv0_pack(*e2)
263 263
264 264 # index ng:
265 265 # 6 bytes: offset
266 266 # 2 bytes: flags
267 267 # 4 bytes: compressed length
268 268 # 4 bytes: uncompressed length
269 269 # 4 bytes: base rev
270 270 # 4 bytes: link rev
271 271 # 4 bytes: parent 1 rev
272 272 # 4 bytes: parent 2 rev
273 273 # 32 bytes: nodeid
274 274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 275 indexformatng_pack = indexformatng.pack
276 276 versionformat = struct.Struct(">I")
277 277 versionformat_pack = versionformat.pack
278 278 versionformat_unpack = versionformat.unpack
279 279
280 280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 281 # signed integer)
282 282 _maxentrysize = 0x7fffffff
283 283
284 284 class revlogio(object):
285 285 def __init__(self):
286 286 self.size = indexformatng.size
287 287
288 288 def parseindex(self, data, inline):
289 289 # call the C implementation to parse the index data
290 290 index, cache = parsers.parse_index2(data, inline)
291 291 return index, getattr(index, 'nodemap', None), cache
292 292
293 293 def packentry(self, entry, node, version, rev):
294 294 p = indexformatng_pack(*entry)
295 295 if rev == 0:
296 296 p = versionformat_pack(version) + p[4:]
297 297 return p
298 298
299 299 class revlog(object):
300 300 """
301 301 the underlying revision storage object
302 302
303 303 A revlog consists of two parts, an index and the revision data.
304 304
305 305 The index is a file with a fixed record size containing
306 306 information on each revision, including its nodeid (hash), the
307 307 nodeids of its parents, the position and offset of its data within
308 308 the data file, and the revision it's based on. Finally, each entry
309 309 contains a linkrev entry that can serve as a pointer to external
310 310 data.
311 311
312 312 The revision data itself is a linear collection of data chunks.
313 313 Each chunk represents a revision and is usually represented as a
314 314 delta against the previous chunk. To bound lookup time, runs of
315 315 deltas are limited to about 2 times the length of the original
316 316 version data. This makes retrieval of a version proportional to
317 317 its size, or O(1) relative to the number of revisions.
318 318
319 319 Both pieces of the revlog are written to in an append-only
320 320 fashion, which means we never need to rewrite a file to insert or
321 321 remove data, and can use some simple techniques to avoid the need
322 322 for locking while reading.
323 323
324 324 If checkambig, indexfile is opened with checkambig=True at
325 325 writing, to avoid file stat ambiguity.
326 326
327 327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 328 index will be mmapped rather than read if it is larger than the
329 329 configured threshold.
330 330
331 331 If censorable is True, the revlog can have censored revisions.
332 332 """
333 333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 334 mmaplargeindex=False, censorable=False):
335 335 """
336 336 create a revlog object
337 337
338 338 opener is a function that abstracts the file opening operation
339 339 and can be used to implement COW semantics or the like.
340 340 """
341 341 self.indexfile = indexfile
342 342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 343 self.opener = opener
344 344 # When True, indexfile is opened with checkambig=True at writing, to
345 345 # avoid file stat ambiguity.
346 346 self._checkambig = checkambig
347 347 self._censorable = censorable
348 348 # 3-tuple of (node, rev, text) for a raw revision.
349 349 self._revisioncache = None
350 350 # Maps rev to chain base rev.
351 351 self._chainbasecache = util.lrucachedict(100)
352 352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 353 self._chunkcache = (0, '')
354 354 # How much data to read and cache into the raw revlog data cache.
355 355 self._chunkcachesize = 65536
356 356 self._maxchainlen = None
357 357 self._deltabothparents = True
358 358 self.index = []
359 359 # Mapping of partial identifiers to full nodes.
360 360 self._pcache = {}
361 361 # Mapping of revision integer to full node.
362 362 self._nodecache = {nullid: nullrev}
363 363 self._nodepos = None
364 364 self._compengine = 'zlib'
365 365 self._maxdeltachainspan = -1
366 366 self._withsparseread = False
367 367 self._sparserevlog = False
368 368 self._srdensitythreshold = 0.50
369 369 self._srmingapsize = 262144
370 370
371 371 # Make copy of flag processors so each revlog instance can support
372 372 # custom flags.
373 373 self._flagprocessors = dict(_flagprocessors)
374 374
375 375 mmapindexthreshold = None
376 376 v = REVLOG_DEFAULT_VERSION
377 377 opts = getattr(opener, 'options', None)
378 378 if opts is not None:
379 379 if 'revlogv2' in opts:
380 380 # version 2 revlogs always use generaldelta.
381 381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 382 elif 'revlogv1' in opts:
383 383 if 'generaldelta' in opts:
384 384 v |= FLAG_GENERALDELTA
385 385 else:
386 386 v = 0
387 387 if 'chunkcachesize' in opts:
388 388 self._chunkcachesize = opts['chunkcachesize']
389 389 if 'maxchainlen' in opts:
390 390 self._maxchainlen = opts['maxchainlen']
391 391 if 'deltabothparents' in opts:
392 392 self._deltabothparents = opts['deltabothparents']
393 393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 394 if 'compengine' in opts:
395 395 self._compengine = opts['compengine']
396 396 if 'maxdeltachainspan' in opts:
397 397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 399 mmapindexthreshold = opts['mmapindexthreshold']
400 400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 401 withsparseread = bool(opts.get('with-sparse-read', False))
402 402 # sparse-revlog forces sparse-read
403 403 self._withsparseread = self._sparserevlog or withsparseread
404 404 if 'sparse-read-density-threshold' in opts:
405 405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 406 if 'sparse-read-min-gap-size' in opts:
407 407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 408 if opts.get('enableellipsis'):
409 409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410 410
411 411 if self._chunkcachesize <= 0:
412 412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 413 'greater than 0') % self._chunkcachesize)
414 414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 416 'power of 2') % self._chunkcachesize)
417 417
418 418 self._loadindex(v, mmapindexthreshold)
419 419
420 420 def _loadindex(self, v, mmapindexthreshold):
421 421 indexdata = ''
422 422 self._initempty = True
423 423 try:
424 424 with self._indexfp() as f:
425 425 if (mmapindexthreshold is not None and
426 426 self.opener.fstat(f).st_size >= mmapindexthreshold):
427 427 indexdata = util.buffer(util.mmapread(f))
428 428 else:
429 429 indexdata = f.read()
430 430 if len(indexdata) > 0:
431 431 v = versionformat_unpack(indexdata[:4])[0]
432 432 self._initempty = False
433 433 except IOError as inst:
434 434 if inst.errno != errno.ENOENT:
435 435 raise
436 436
437 437 self.version = v
438 438 self._inline = v & FLAG_INLINE_DATA
439 439 self._generaldelta = v & FLAG_GENERALDELTA
440 440 flags = v & ~0xFFFF
441 441 fmt = v & 0xFFFF
442 442 if fmt == REVLOGV0:
443 443 if flags:
444 444 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
445 445 'revlog %s') %
446 446 (flags >> 16, fmt, self.indexfile))
447 447 elif fmt == REVLOGV1:
448 448 if flags & ~REVLOGV1_FLAGS:
449 449 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
450 450 'revlog %s') %
451 451 (flags >> 16, fmt, self.indexfile))
452 452 elif fmt == REVLOGV2:
453 453 if flags & ~REVLOGV2_FLAGS:
454 454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 455 'revlog %s') %
456 456 (flags >> 16, fmt, self.indexfile))
457 457 else:
458 458 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
459 459 (fmt, self.indexfile))
460 460
461 461 self._storedeltachains = True
462 462
463 463 self._io = revlogio()
464 464 if self.version == REVLOGV0:
465 465 self._io = revlogoldio()
466 466 try:
467 467 d = self._io.parseindex(indexdata, self._inline)
468 468 except (ValueError, IndexError):
469 469 raise error.RevlogError(_("index %s is corrupted") %
470 470 self.indexfile)
471 471 self.index, nodemap, self._chunkcache = d
472 472 if nodemap is not None:
473 473 self.nodemap = self._nodecache = nodemap
474 474 if not self._chunkcache:
475 475 self._chunkclear()
476 476 # revnum -> (chain-length, sum-delta-length)
477 477 self._chaininfocache = {}
478 478 # revlog header -> revlog compressor
479 479 self._decompressors = {}
480 480
481 481 @util.propertycache
482 482 def _compressor(self):
483 483 return util.compengines[self._compengine].revlogcompressor()
484 484
485 485 def _indexfp(self, mode='r'):
486 486 """file object for the revlog's index file"""
487 487 args = {r'mode': mode}
488 488 if mode != 'r':
489 489 args[r'checkambig'] = self._checkambig
490 490 if mode == 'w':
491 491 args[r'atomictemp'] = True
492 492 return self.opener(self.indexfile, **args)
493 493
494 494 def _datafp(self, mode='r'):
495 495 """file object for the revlog's data file"""
496 496 return self.opener(self.datafile, mode=mode)
497 497
498 498 @contextlib.contextmanager
499 499 def _datareadfp(self, existingfp=None):
500 500 """file object suitable to read data"""
501 501 if existingfp is not None:
502 502 yield existingfp
503 503 else:
504 504 if self._inline:
505 505 func = self._indexfp
506 506 else:
507 507 func = self._datafp
508 508 with func() as fp:
509 509 yield fp
510 510
511 511 def tip(self):
512 512 return self.node(len(self.index) - 1)
513 513 def __contains__(self, rev):
514 514 return 0 <= rev < len(self)
515 515 def __len__(self):
516 516 return len(self.index)
517 517 def __iter__(self):
518 518 return iter(pycompat.xrange(len(self)))
519 519 def revs(self, start=0, stop=None):
520 520 """iterate over all rev in this revlog (from start to stop)"""
521 521 return storageutil.iterrevs(len(self), start=start, stop=stop)
522 522
523 523 @util.propertycache
524 524 def nodemap(self):
525 525 if self.index:
526 526 # populate mapping down to the initial node
527 527 node0 = self.index[0][7] # get around changelog filtering
528 528 self.rev(node0)
529 529 return self._nodecache
530 530
531 531 def hasnode(self, node):
532 532 try:
533 533 self.rev(node)
534 534 return True
535 535 except KeyError:
536 536 return False
537 537
538 538 def candelta(self, baserev, rev):
539 539 """whether two revisions (baserev, rev) can be delta-ed or not"""
540 540 # Disable delta if either rev requires a content-changing flag
541 541 # processor (ex. LFS). This is because such flag processor can alter
542 542 # the rawtext content that the delta will be based on, and two clients
543 543 # could have a same revlog node with different flags (i.e. different
544 544 # rawtext contents) and the delta could be incompatible.
545 545 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
546 546 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
547 547 return False
548 548 return True
549 549
550 550 def clearcaches(self):
551 551 self._revisioncache = None
552 552 self._chainbasecache.clear()
553 553 self._chunkcache = (0, '')
554 554 self._pcache = {}
555 555
556 556 try:
557 557 self._nodecache.clearcaches()
558 558 except AttributeError:
559 559 self._nodecache = {nullid: nullrev}
560 560 self._nodepos = None
561 561
562 562 def rev(self, node):
563 563 try:
564 564 return self._nodecache[node]
565 565 except TypeError:
566 566 raise
567 567 except error.RevlogError:
568 568 # parsers.c radix tree lookup failed
569 569 if node == wdirid or node in wdirfilenodeids:
570 570 raise error.WdirUnsupported
571 571 raise error.LookupError(node, self.indexfile, _('no node'))
572 572 except KeyError:
573 573 # pure python cache lookup failed
574 574 n = self._nodecache
575 575 i = self.index
576 576 p = self._nodepos
577 577 if p is None:
578 578 p = len(i) - 1
579 579 else:
580 580 assert p < len(i)
581 581 for r in pycompat.xrange(p, -1, -1):
582 582 v = i[r][7]
583 583 n[v] = r
584 584 if v == node:
585 585 self._nodepos = r - 1
586 586 return r
587 587 if node == wdirid or node in wdirfilenodeids:
588 588 raise error.WdirUnsupported
589 589 raise error.LookupError(node, self.indexfile, _('no node'))
590 590
591 591 # Accessors for index entries.
592 592
593 593 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
594 594 # are flags.
595 595 def start(self, rev):
596 596 return int(self.index[rev][0] >> 16)
597 597
598 598 def flags(self, rev):
599 599 return self.index[rev][0] & 0xFFFF
600 600
601 601 def length(self, rev):
602 602 return self.index[rev][1]
603 603
604 604 def rawsize(self, rev):
605 605 """return the length of the uncompressed text for a given revision"""
606 606 l = self.index[rev][2]
607 607 if l >= 0:
608 608 return l
609 609
610 610 t = self.revision(rev, raw=True)
611 611 return len(t)
612 612
613 613 def size(self, rev):
614 614 """length of non-raw text (processed by a "read" flag processor)"""
615 615 # fast path: if no "read" flag processor could change the content,
616 616 # size is rawsize. note: ELLIPSIS is known to not change the content.
617 617 flags = self.flags(rev)
618 618 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
619 619 return self.rawsize(rev)
620 620
621 621 return len(self.revision(rev, raw=False))
622 622
623 623 def chainbase(self, rev):
624 624 base = self._chainbasecache.get(rev)
625 625 if base is not None:
626 626 return base
627 627
628 628 index = self.index
629 629 iterrev = rev
630 630 base = index[iterrev][3]
631 631 while base != iterrev:
632 632 iterrev = base
633 633 base = index[iterrev][3]
634 634
635 635 self._chainbasecache[rev] = base
636 636 return base
637 637
638 638 def linkrev(self, rev):
639 639 return self.index[rev][4]
640 640
641 641 def parentrevs(self, rev):
642 642 try:
643 643 entry = self.index[rev]
644 644 except IndexError:
645 645 if rev == wdirrev:
646 646 raise error.WdirUnsupported
647 647 raise
648 648
649 649 return entry[5], entry[6]
650 650
651 # fast parentrevs(rev) where rev isn't filtered
652 _uncheckedparentrevs = parentrevs
653
651 654 def node(self, rev):
652 655 try:
653 656 return self.index[rev][7]
654 657 except IndexError:
655 658 if rev == wdirrev:
656 659 raise error.WdirUnsupported
657 660 raise
658 661
659 662 # Derived from index values.
660 663
661 664 def end(self, rev):
662 665 return self.start(rev) + self.length(rev)
663 666
664 667 def parents(self, node):
665 668 i = self.index
666 669 d = i[self.rev(node)]
667 670 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
668 671
669 672 def chainlen(self, rev):
670 673 return self._chaininfo(rev)[0]
671 674
672 675 def _chaininfo(self, rev):
673 676 chaininfocache = self._chaininfocache
674 677 if rev in chaininfocache:
675 678 return chaininfocache[rev]
676 679 index = self.index
677 680 generaldelta = self._generaldelta
678 681 iterrev = rev
679 682 e = index[iterrev]
680 683 clen = 0
681 684 compresseddeltalen = 0
682 685 while iterrev != e[3]:
683 686 clen += 1
684 687 compresseddeltalen += e[1]
685 688 if generaldelta:
686 689 iterrev = e[3]
687 690 else:
688 691 iterrev -= 1
689 692 if iterrev in chaininfocache:
690 693 t = chaininfocache[iterrev]
691 694 clen += t[0]
692 695 compresseddeltalen += t[1]
693 696 break
694 697 e = index[iterrev]
695 698 else:
696 699 # Add text length of base since decompressing that also takes
697 700 # work. For cache hits the length is already included.
698 701 compresseddeltalen += e[1]
699 702 r = (clen, compresseddeltalen)
700 703 chaininfocache[rev] = r
701 704 return r
702 705
703 706 def _deltachain(self, rev, stoprev=None):
704 707 """Obtain the delta chain for a revision.
705 708
706 709 ``stoprev`` specifies a revision to stop at. If not specified, we
707 710 stop at the base of the chain.
708 711
709 712 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
710 713 revs in ascending order and ``stopped`` is a bool indicating whether
711 714 ``stoprev`` was hit.
712 715 """
713 716 # Try C implementation.
714 717 try:
715 718 return self.index.deltachain(rev, stoprev, self._generaldelta)
716 719 except AttributeError:
717 720 pass
718 721
719 722 chain = []
720 723
721 724 # Alias to prevent attribute lookup in tight loop.
722 725 index = self.index
723 726 generaldelta = self._generaldelta
724 727
725 728 iterrev = rev
726 729 e = index[iterrev]
727 730 while iterrev != e[3] and iterrev != stoprev:
728 731 chain.append(iterrev)
729 732 if generaldelta:
730 733 iterrev = e[3]
731 734 else:
732 735 iterrev -= 1
733 736 e = index[iterrev]
734 737
735 738 if iterrev == stoprev:
736 739 stopped = True
737 740 else:
738 741 chain.append(iterrev)
739 742 stopped = False
740 743
741 744 chain.reverse()
742 745 return chain, stopped
743 746
744 747 def ancestors(self, revs, stoprev=0, inclusive=False):
745 748 """Generate the ancestors of 'revs' in reverse topological order.
746 749 Does not generate revs lower than stoprev.
747 750
748 751 See the documentation for ancestor.lazyancestors for more details."""
749 752
750 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
751 inclusive=inclusive)
753 # first, make sure start revisions aren't filtered
754 revs = list(revs)
755 checkrev = self.node
756 for r in revs:
757 checkrev(r)
758 # and we're sure ancestors aren't filtered as well
759 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
760 stoprev=stoprev, inclusive=inclusive)
752 761
753 762 def descendants(self, revs):
754 763 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
755 764
756 765 def findcommonmissing(self, common=None, heads=None):
757 766 """Return a tuple of the ancestors of common and the ancestors of heads
758 767 that are not ancestors of common. In revset terminology, we return the
759 768 tuple:
760 769
761 770 ::common, (::heads) - (::common)
762 771
763 772 The list is sorted by revision number, meaning it is
764 773 topologically sorted.
765 774
766 775 'heads' and 'common' are both lists of node IDs. If heads is
767 776 not supplied, uses all of the revlog's heads. If common is not
768 777 supplied, uses nullid."""
769 778 if common is None:
770 779 common = [nullid]
771 780 if heads is None:
772 781 heads = self.heads()
773 782
774 783 common = [self.rev(n) for n in common]
775 784 heads = [self.rev(n) for n in heads]
776 785
777 786 # we want the ancestors, but inclusive
778 787 class lazyset(object):
779 788 def __init__(self, lazyvalues):
780 789 self.addedvalues = set()
781 790 self.lazyvalues = lazyvalues
782 791
783 792 def __contains__(self, value):
784 793 return value in self.addedvalues or value in self.lazyvalues
785 794
786 795 def __iter__(self):
787 796 added = self.addedvalues
788 797 for r in added:
789 798 yield r
790 799 for r in self.lazyvalues:
791 800 if not r in added:
792 801 yield r
793 802
794 803 def add(self, value):
795 804 self.addedvalues.add(value)
796 805
797 806 def update(self, values):
798 807 self.addedvalues.update(values)
799 808
800 809 has = lazyset(self.ancestors(common))
801 810 has.add(nullrev)
802 811 has.update(common)
803 812
804 813 # take all ancestors from heads that aren't in has
805 814 missing = set()
806 815 visit = collections.deque(r for r in heads if r not in has)
807 816 while visit:
808 817 r = visit.popleft()
809 818 if r in missing:
810 819 continue
811 820 else:
812 821 missing.add(r)
813 822 for p in self.parentrevs(r):
814 823 if p not in has:
815 824 visit.append(p)
816 825 missing = list(missing)
817 826 missing.sort()
818 827 return has, [self.node(miss) for miss in missing]
819 828
820 829 def incrementalmissingrevs(self, common=None):
821 830 """Return an object that can be used to incrementally compute the
822 831 revision numbers of the ancestors of arbitrary sets that are not
823 832 ancestors of common. This is an ancestor.incrementalmissingancestors
824 833 object.
825 834
826 835 'common' is a list of revision numbers. If common is not supplied, uses
827 836 nullrev.
828 837 """
829 838 if common is None:
830 839 common = [nullrev]
831 840
832 841 return ancestor.incrementalmissingancestors(self.parentrevs, common)
833 842
834 843 def findmissingrevs(self, common=None, heads=None):
835 844 """Return the revision numbers of the ancestors of heads that
836 845 are not ancestors of common.
837 846
838 847 More specifically, return a list of revision numbers corresponding to
839 848 nodes N such that every N satisfies the following constraints:
840 849
841 850 1. N is an ancestor of some node in 'heads'
842 851 2. N is not an ancestor of any node in 'common'
843 852
844 853 The list is sorted by revision number, meaning it is
845 854 topologically sorted.
846 855
847 856 'heads' and 'common' are both lists of revision numbers. If heads is
848 857 not supplied, uses all of the revlog's heads. If common is not
849 858 supplied, uses nullid."""
850 859 if common is None:
851 860 common = [nullrev]
852 861 if heads is None:
853 862 heads = self.headrevs()
854 863
855 864 inc = self.incrementalmissingrevs(common=common)
856 865 return inc.missingancestors(heads)
857 866
858 867 def findmissing(self, common=None, heads=None):
859 868 """Return the ancestors of heads that are not ancestors of common.
860 869
861 870 More specifically, return a list of nodes N such that every N
862 871 satisfies the following constraints:
863 872
864 873 1. N is an ancestor of some node in 'heads'
865 874 2. N is not an ancestor of any node in 'common'
866 875
867 876 The list is sorted by revision number, meaning it is
868 877 topologically sorted.
869 878
870 879 'heads' and 'common' are both lists of node IDs. If heads is
871 880 not supplied, uses all of the revlog's heads. If common is not
872 881 supplied, uses nullid."""
873 882 if common is None:
874 883 common = [nullid]
875 884 if heads is None:
876 885 heads = self.heads()
877 886
878 887 common = [self.rev(n) for n in common]
879 888 heads = [self.rev(n) for n in heads]
880 889
881 890 inc = self.incrementalmissingrevs(common=common)
882 891 return [self.node(r) for r in inc.missingancestors(heads)]
883 892
884 893 def nodesbetween(self, roots=None, heads=None):
885 894 """Return a topological path from 'roots' to 'heads'.
886 895
887 896 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
888 897 topologically sorted list of all nodes N that satisfy both of
889 898 these constraints:
890 899
891 900 1. N is a descendant of some node in 'roots'
892 901 2. N is an ancestor of some node in 'heads'
893 902
894 903 Every node is considered to be both a descendant and an ancestor
895 904 of itself, so every reachable node in 'roots' and 'heads' will be
896 905 included in 'nodes'.
897 906
898 907 'outroots' is the list of reachable nodes in 'roots', i.e., the
899 908 subset of 'roots' that is returned in 'nodes'. Likewise,
900 909 'outheads' is the subset of 'heads' that is also in 'nodes'.
901 910
902 911 'roots' and 'heads' are both lists of node IDs. If 'roots' is
903 912 unspecified, uses nullid as the only root. If 'heads' is
904 913 unspecified, uses list of all of the revlog's heads."""
905 914 nonodes = ([], [], [])
906 915 if roots is not None:
907 916 roots = list(roots)
908 917 if not roots:
909 918 return nonodes
910 919 lowestrev = min([self.rev(n) for n in roots])
911 920 else:
912 921 roots = [nullid] # Everybody's a descendant of nullid
913 922 lowestrev = nullrev
914 923 if (lowestrev == nullrev) and (heads is None):
915 924 # We want _all_ the nodes!
916 925 return ([self.node(r) for r in self], [nullid], list(self.heads()))
917 926 if heads is None:
918 927 # All nodes are ancestors, so the latest ancestor is the last
919 928 # node.
920 929 highestrev = len(self) - 1
921 930 # Set ancestors to None to signal that every node is an ancestor.
922 931 ancestors = None
923 932 # Set heads to an empty dictionary for later discovery of heads
924 933 heads = {}
925 934 else:
926 935 heads = list(heads)
927 936 if not heads:
928 937 return nonodes
929 938 ancestors = set()
930 939 # Turn heads into a dictionary so we can remove 'fake' heads.
931 940 # Also, later we will be using it to filter out the heads we can't
932 941 # find from roots.
933 942 heads = dict.fromkeys(heads, False)
934 943 # Start at the top and keep marking parents until we're done.
935 944 nodestotag = set(heads)
936 945 # Remember where the top was so we can use it as a limit later.
937 946 highestrev = max([self.rev(n) for n in nodestotag])
938 947 while nodestotag:
939 948 # grab a node to tag
940 949 n = nodestotag.pop()
941 950 # Never tag nullid
942 951 if n == nullid:
943 952 continue
944 953 # A node's revision number represents its place in a
945 954 # topologically sorted list of nodes.
946 955 r = self.rev(n)
947 956 if r >= lowestrev:
948 957 if n not in ancestors:
949 958 # If we are possibly a descendant of one of the roots
950 959 # and we haven't already been marked as an ancestor
951 960 ancestors.add(n) # Mark as ancestor
952 961 # Add non-nullid parents to list of nodes to tag.
953 962 nodestotag.update([p for p in self.parents(n) if
954 963 p != nullid])
955 964 elif n in heads: # We've seen it before, is it a fake head?
956 965 # So it is, real heads should not be the ancestors of
957 966 # any other heads.
958 967 heads.pop(n)
959 968 if not ancestors:
960 969 return nonodes
961 970 # Now that we have our set of ancestors, we want to remove any
962 971 # roots that are not ancestors.
963 972
964 973 # If one of the roots was nullid, everything is included anyway.
965 974 if lowestrev > nullrev:
966 975 # But, since we weren't, let's recompute the lowest rev to not
967 976 # include roots that aren't ancestors.
968 977
969 978 # Filter out roots that aren't ancestors of heads
970 979 roots = [root for root in roots if root in ancestors]
971 980 # Recompute the lowest revision
972 981 if roots:
973 982 lowestrev = min([self.rev(root) for root in roots])
974 983 else:
975 984 # No more roots? Return empty list
976 985 return nonodes
977 986 else:
978 987 # We are descending from nullid, and don't need to care about
979 988 # any other roots.
980 989 lowestrev = nullrev
981 990 roots = [nullid]
982 991 # Transform our roots list into a set.
983 992 descendants = set(roots)
984 993 # Also, keep the original roots so we can filter out roots that aren't
985 994 # 'real' roots (i.e. are descended from other roots).
986 995 roots = descendants.copy()
987 996 # Our topologically sorted list of output nodes.
988 997 orderedout = []
989 998 # Don't start at nullid since we don't want nullid in our output list,
990 999 # and if nullid shows up in descendants, empty parents will look like
991 1000 # they're descendants.
992 1001 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
993 1002 n = self.node(r)
994 1003 isdescendant = False
995 1004 if lowestrev == nullrev: # Everybody is a descendant of nullid
996 1005 isdescendant = True
997 1006 elif n in descendants:
998 1007 # n is already a descendant
999 1008 isdescendant = True
1000 1009 # This check only needs to be done here because all the roots
1001 1010 # will start being marked is descendants before the loop.
1002 1011 if n in roots:
1003 1012 # If n was a root, check if it's a 'real' root.
1004 1013 p = tuple(self.parents(n))
1005 1014 # If any of its parents are descendants, it's not a root.
1006 1015 if (p[0] in descendants) or (p[1] in descendants):
1007 1016 roots.remove(n)
1008 1017 else:
1009 1018 p = tuple(self.parents(n))
1010 1019 # A node is a descendant if either of its parents are
1011 1020 # descendants. (We seeded the dependents list with the roots
1012 1021 # up there, remember?)
1013 1022 if (p[0] in descendants) or (p[1] in descendants):
1014 1023 descendants.add(n)
1015 1024 isdescendant = True
1016 1025 if isdescendant and ((ancestors is None) or (n in ancestors)):
1017 1026 # Only include nodes that are both descendants and ancestors.
1018 1027 orderedout.append(n)
1019 1028 if (ancestors is not None) and (n in heads):
1020 1029 # We're trying to figure out which heads are reachable
1021 1030 # from roots.
1022 1031 # Mark this head as having been reached
1023 1032 heads[n] = True
1024 1033 elif ancestors is None:
1025 1034 # Otherwise, we're trying to discover the heads.
1026 1035 # Assume this is a head because if it isn't, the next step
1027 1036 # will eventually remove it.
1028 1037 heads[n] = True
1029 1038 # But, obviously its parents aren't.
1030 1039 for p in self.parents(n):
1031 1040 heads.pop(p, None)
1032 1041 heads = [head for head, flag in heads.iteritems() if flag]
1033 1042 roots = list(roots)
1034 1043 assert orderedout
1035 1044 assert roots
1036 1045 assert heads
1037 1046 return (orderedout, roots, heads)
1038 1047
1039 1048 def headrevs(self):
1040 1049 try:
1041 1050 return self.index.headrevs()
1042 1051 except AttributeError:
1043 1052 return self._headrevs()
1044 1053
1045 1054 def computephases(self, roots):
1046 1055 return self.index.computephasesmapsets(roots)
1047 1056
1048 1057 def _headrevs(self):
1049 1058 count = len(self)
1050 1059 if not count:
1051 1060 return [nullrev]
1052 1061 # we won't iter over filtered rev so nobody is a head at start
1053 1062 ishead = [0] * (count + 1)
1054 1063 index = self.index
1055 1064 for r in self:
1056 1065 ishead[r] = 1 # I may be an head
1057 1066 e = index[r]
1058 1067 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1059 1068 return [r for r, val in enumerate(ishead) if val]
1060 1069
1061 1070 def heads(self, start=None, stop=None):
1062 1071 """return the list of all nodes that have no children
1063 1072
1064 1073 if start is specified, only heads that are descendants of
1065 1074 start will be returned
1066 1075 if stop is specified, it will consider all the revs from stop
1067 1076 as if they had no children
1068 1077 """
1069 1078 if start is None and stop is None:
1070 1079 if not len(self):
1071 1080 return [nullid]
1072 1081 return [self.node(r) for r in self.headrevs()]
1073 1082
1074 1083 if start is None:
1075 1084 start = nullrev
1076 1085 else:
1077 1086 start = self.rev(start)
1078 1087
1079 1088 stoprevs = set(self.rev(n) for n in stop or [])
1080 1089
1081 1090 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1082 1091 stoprevs=stoprevs)
1083 1092
1084 1093 return [self.node(rev) for rev in revs]
1085 1094
1086 1095 def children(self, node):
1087 1096 """find the children of a given node"""
1088 1097 c = []
1089 1098 p = self.rev(node)
1090 1099 for r in self.revs(start=p + 1):
1091 1100 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1092 1101 if prevs:
1093 1102 for pr in prevs:
1094 1103 if pr == p:
1095 1104 c.append(self.node(r))
1096 1105 elif p == nullrev:
1097 1106 c.append(self.node(r))
1098 1107 return c
1099 1108
1100 1109 def commonancestorsheads(self, a, b):
1101 1110 """calculate all the heads of the common ancestors of nodes a and b"""
1102 1111 a, b = self.rev(a), self.rev(b)
1103 1112 ancs = self._commonancestorsheads(a, b)
1104 1113 return pycompat.maplist(self.node, ancs)
1105 1114
1106 1115 def _commonancestorsheads(self, *revs):
1107 1116 """calculate all the heads of the common ancestors of revs"""
1108 1117 try:
1109 1118 ancs = self.index.commonancestorsheads(*revs)
1110 1119 except (AttributeError, OverflowError): # C implementation failed
1111 1120 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1112 1121 return ancs
1113 1122
1114 1123 def isancestor(self, a, b):
1115 1124 """return True if node a is an ancestor of node b
1116 1125
1117 1126 A revision is considered an ancestor of itself."""
1118 1127 a, b = self.rev(a), self.rev(b)
1119 1128 return self.isancestorrev(a, b)
1120 1129
1121 1130 def isancestorrev(self, a, b):
1122 1131 """return True if revision a is an ancestor of revision b
1123 1132
1124 1133 A revision is considered an ancestor of itself.
1125 1134
1126 1135 The implementation of this is trivial but the use of
1127 1136 commonancestorsheads is not."""
1128 1137 if a == nullrev:
1129 1138 return True
1130 1139 elif a == b:
1131 1140 return True
1132 1141 elif a > b:
1133 1142 return False
1134 1143 return a in self._commonancestorsheads(a, b)
1135 1144
1136 1145 def ancestor(self, a, b):
1137 1146 """calculate the "best" common ancestor of nodes a and b"""
1138 1147
1139 1148 a, b = self.rev(a), self.rev(b)
1140 1149 try:
1141 1150 ancs = self.index.ancestors(a, b)
1142 1151 except (AttributeError, OverflowError):
1143 1152 ancs = ancestor.ancestors(self.parentrevs, a, b)
1144 1153 if ancs:
1145 1154 # choose a consistent winner when there's a tie
1146 1155 return min(map(self.node, ancs))
1147 1156 return nullid
1148 1157
1149 1158 def _match(self, id):
1150 1159 if isinstance(id, int):
1151 1160 # rev
1152 1161 return self.node(id)
1153 1162 if len(id) == 20:
1154 1163 # possibly a binary node
1155 1164 # odds of a binary node being all hex in ASCII are 1 in 10**25
1156 1165 try:
1157 1166 node = id
1158 1167 self.rev(node) # quick search the index
1159 1168 return node
1160 1169 except error.LookupError:
1161 1170 pass # may be partial hex id
1162 1171 try:
1163 1172 # str(rev)
1164 1173 rev = int(id)
1165 1174 if "%d" % rev != id:
1166 1175 raise ValueError
1167 1176 if rev < 0:
1168 1177 rev = len(self) + rev
1169 1178 if rev < 0 or rev >= len(self):
1170 1179 raise ValueError
1171 1180 return self.node(rev)
1172 1181 except (ValueError, OverflowError):
1173 1182 pass
1174 1183 if len(id) == 40:
1175 1184 try:
1176 1185 # a full hex nodeid?
1177 1186 node = bin(id)
1178 1187 self.rev(node)
1179 1188 return node
1180 1189 except (TypeError, error.LookupError):
1181 1190 pass
1182 1191
1183 1192 def _partialmatch(self, id):
1184 1193 # we don't care wdirfilenodeids as they should be always full hash
1185 1194 maybewdir = wdirhex.startswith(id)
1186 1195 try:
1187 1196 partial = self.index.partialmatch(id)
1188 1197 if partial and self.hasnode(partial):
1189 1198 if maybewdir:
1190 1199 # single 'ff...' match in radix tree, ambiguous with wdir
1191 1200 raise error.RevlogError
1192 1201 return partial
1193 1202 if maybewdir:
1194 1203 # no 'ff...' match in radix tree, wdir identified
1195 1204 raise error.WdirUnsupported
1196 1205 return None
1197 1206 except error.RevlogError:
1198 1207 # parsers.c radix tree lookup gave multiple matches
1199 1208 # fast path: for unfiltered changelog, radix tree is accurate
1200 1209 if not getattr(self, 'filteredrevs', None):
1201 1210 raise error.AmbiguousPrefixLookupError(
1202 1211 id, self.indexfile, _('ambiguous identifier'))
1203 1212 # fall through to slow path that filters hidden revisions
1204 1213 except (AttributeError, ValueError):
1205 1214 # we are pure python, or key was too short to search radix tree
1206 1215 pass
1207 1216
1208 1217 if id in self._pcache:
1209 1218 return self._pcache[id]
1210 1219
1211 1220 if len(id) <= 40:
1212 1221 try:
1213 1222 # hex(node)[:...]
1214 1223 l = len(id) // 2 # grab an even number of digits
1215 1224 prefix = bin(id[:l * 2])
1216 1225 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1217 1226 nl = [n for n in nl if hex(n).startswith(id) and
1218 1227 self.hasnode(n)]
1219 1228 if nullhex.startswith(id):
1220 1229 nl.append(nullid)
1221 1230 if len(nl) > 0:
1222 1231 if len(nl) == 1 and not maybewdir:
1223 1232 self._pcache[id] = nl[0]
1224 1233 return nl[0]
1225 1234 raise error.AmbiguousPrefixLookupError(
1226 1235 id, self.indexfile, _('ambiguous identifier'))
1227 1236 if maybewdir:
1228 1237 raise error.WdirUnsupported
1229 1238 return None
1230 1239 except TypeError:
1231 1240 pass
1232 1241
1233 1242 def lookup(self, id):
1234 1243 """locate a node based on:
1235 1244 - revision number or str(revision number)
1236 1245 - nodeid or subset of hex nodeid
1237 1246 """
1238 1247 n = self._match(id)
1239 1248 if n is not None:
1240 1249 return n
1241 1250 n = self._partialmatch(id)
1242 1251 if n:
1243 1252 return n
1244 1253
1245 1254 raise error.LookupError(id, self.indexfile, _('no match found'))
1246 1255
1247 1256 def shortest(self, node, minlength=1):
1248 1257 """Find the shortest unambiguous prefix that matches node."""
1249 1258 def isvalid(prefix):
1250 1259 try:
1251 1260 node = self._partialmatch(prefix)
1252 1261 except error.AmbiguousPrefixLookupError:
1253 1262 return False
1254 1263 except error.WdirUnsupported:
1255 1264 # single 'ff...' match
1256 1265 return True
1257 1266 if node is None:
1258 1267 raise error.LookupError(node, self.indexfile, _('no node'))
1259 1268 return True
1260 1269
1261 1270 def maybewdir(prefix):
1262 1271 return all(c == 'f' for c in prefix)
1263 1272
1264 1273 hexnode = hex(node)
1265 1274
1266 1275 def disambiguate(hexnode, minlength):
1267 1276 """Disambiguate against wdirid."""
1268 1277 for length in range(minlength, 41):
1269 1278 prefix = hexnode[:length]
1270 1279 if not maybewdir(prefix):
1271 1280 return prefix
1272 1281
1273 1282 if not getattr(self, 'filteredrevs', None):
1274 1283 try:
1275 1284 length = max(self.index.shortest(node), minlength)
1276 1285 return disambiguate(hexnode, length)
1277 1286 except error.RevlogError:
1278 1287 if node != wdirid:
1279 1288 raise error.LookupError(node, self.indexfile, _('no node'))
1280 1289 except AttributeError:
1281 1290 # Fall through to pure code
1282 1291 pass
1283 1292
1284 1293 if node == wdirid:
1285 1294 for length in range(minlength, 41):
1286 1295 prefix = hexnode[:length]
1287 1296 if isvalid(prefix):
1288 1297 return prefix
1289 1298
1290 1299 for length in range(minlength, 41):
1291 1300 prefix = hexnode[:length]
1292 1301 if isvalid(prefix):
1293 1302 return disambiguate(hexnode, length)
1294 1303
1295 1304 def cmp(self, node, text):
1296 1305 """compare text with a given file revision
1297 1306
1298 1307 returns True if text is different than what is stored.
1299 1308 """
1300 1309 p1, p2 = self.parents(node)
1301 1310 return storageutil.hashrevisionsha1(text, p1, p2) != node
1302 1311
1303 1312 def _cachesegment(self, offset, data):
1304 1313 """Add a segment to the revlog cache.
1305 1314
1306 1315 Accepts an absolute offset and the data that is at that location.
1307 1316 """
1308 1317 o, d = self._chunkcache
1309 1318 # try to add to existing cache
1310 1319 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1311 1320 self._chunkcache = o, d + data
1312 1321 else:
1313 1322 self._chunkcache = offset, data
1314 1323
1315 1324 def _readsegment(self, offset, length, df=None):
1316 1325 """Load a segment of raw data from the revlog.
1317 1326
1318 1327 Accepts an absolute offset, length to read, and an optional existing
1319 1328 file handle to read from.
1320 1329
1321 1330 If an existing file handle is passed, it will be seeked and the
1322 1331 original seek position will NOT be restored.
1323 1332
1324 1333 Returns a str or buffer of raw byte data.
1325 1334 """
1326 1335 # Cache data both forward and backward around the requested
1327 1336 # data, in a fixed size window. This helps speed up operations
1328 1337 # involving reading the revlog backwards.
1329 1338 cachesize = self._chunkcachesize
1330 1339 realoffset = offset & ~(cachesize - 1)
1331 1340 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1332 1341 - realoffset)
1333 1342 with self._datareadfp(df) as df:
1334 1343 df.seek(realoffset)
1335 1344 d = df.read(reallength)
1336 1345 self._cachesegment(realoffset, d)
1337 1346 if offset != realoffset or reallength != length:
1338 1347 return util.buffer(d, offset - realoffset, length)
1339 1348 return d
1340 1349
1341 1350 def _getsegment(self, offset, length, df=None):
1342 1351 """Obtain a segment of raw data from the revlog.
1343 1352
1344 1353 Accepts an absolute offset, length of bytes to obtain, and an
1345 1354 optional file handle to the already-opened revlog. If the file
1346 1355 handle is used, it's original seek position will not be preserved.
1347 1356
1348 1357 Requests for data may be returned from a cache.
1349 1358
1350 1359 Returns a str or a buffer instance of raw byte data.
1351 1360 """
1352 1361 o, d = self._chunkcache
1353 1362 l = len(d)
1354 1363
1355 1364 # is it in the cache?
1356 1365 cachestart = offset - o
1357 1366 cacheend = cachestart + length
1358 1367 if cachestart >= 0 and cacheend <= l:
1359 1368 if cachestart == 0 and cacheend == l:
1360 1369 return d # avoid a copy
1361 1370 return util.buffer(d, cachestart, cacheend - cachestart)
1362 1371
1363 1372 return self._readsegment(offset, length, df=df)
1364 1373
1365 1374 def _getsegmentforrevs(self, startrev, endrev, df=None):
1366 1375 """Obtain a segment of raw data corresponding to a range of revisions.
1367 1376
1368 1377 Accepts the start and end revisions and an optional already-open
1369 1378 file handle to be used for reading. If the file handle is read, its
1370 1379 seek position will not be preserved.
1371 1380
1372 1381 Requests for data may be satisfied by a cache.
1373 1382
1374 1383 Returns a 2-tuple of (offset, data) for the requested range of
1375 1384 revisions. Offset is the integer offset from the beginning of the
1376 1385 revlog and data is a str or buffer of the raw byte data.
1377 1386
1378 1387 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1379 1388 to determine where each revision's data begins and ends.
1380 1389 """
1381 1390 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1382 1391 # (functions are expensive).
1383 1392 index = self.index
1384 1393 istart = index[startrev]
1385 1394 start = int(istart[0] >> 16)
1386 1395 if startrev == endrev:
1387 1396 end = start + istart[1]
1388 1397 else:
1389 1398 iend = index[endrev]
1390 1399 end = int(iend[0] >> 16) + iend[1]
1391 1400
1392 1401 if self._inline:
1393 1402 start += (startrev + 1) * self._io.size
1394 1403 end += (endrev + 1) * self._io.size
1395 1404 length = end - start
1396 1405
1397 1406 return start, self._getsegment(start, length, df=df)
1398 1407
1399 1408 def _chunk(self, rev, df=None):
1400 1409 """Obtain a single decompressed chunk for a revision.
1401 1410
1402 1411 Accepts an integer revision and an optional already-open file handle
1403 1412 to be used for reading. If used, the seek position of the file will not
1404 1413 be preserved.
1405 1414
1406 1415 Returns a str holding uncompressed data for the requested revision.
1407 1416 """
1408 1417 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1409 1418
1410 1419 def _chunks(self, revs, df=None, targetsize=None):
1411 1420 """Obtain decompressed chunks for the specified revisions.
1412 1421
1413 1422 Accepts an iterable of numeric revisions that are assumed to be in
1414 1423 ascending order. Also accepts an optional already-open file handle
1415 1424 to be used for reading. If used, the seek position of the file will
1416 1425 not be preserved.
1417 1426
1418 1427 This function is similar to calling ``self._chunk()`` multiple times,
1419 1428 but is faster.
1420 1429
1421 1430 Returns a list with decompressed data for each requested revision.
1422 1431 """
1423 1432 if not revs:
1424 1433 return []
1425 1434 start = self.start
1426 1435 length = self.length
1427 1436 inline = self._inline
1428 1437 iosize = self._io.size
1429 1438 buffer = util.buffer
1430 1439
1431 1440 l = []
1432 1441 ladd = l.append
1433 1442
1434 1443 if not self._withsparseread:
1435 1444 slicedchunks = (revs,)
1436 1445 else:
1437 1446 slicedchunks = deltautil.slicechunk(self, revs,
1438 1447 targetsize=targetsize)
1439 1448
1440 1449 for revschunk in slicedchunks:
1441 1450 firstrev = revschunk[0]
1442 1451 # Skip trailing revisions with empty diff
1443 1452 for lastrev in revschunk[::-1]:
1444 1453 if length(lastrev) != 0:
1445 1454 break
1446 1455
1447 1456 try:
1448 1457 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1449 1458 except OverflowError:
1450 1459 # issue4215 - we can't cache a run of chunks greater than
1451 1460 # 2G on Windows
1452 1461 return [self._chunk(rev, df=df) for rev in revschunk]
1453 1462
1454 1463 decomp = self.decompress
1455 1464 for rev in revschunk:
1456 1465 chunkstart = start(rev)
1457 1466 if inline:
1458 1467 chunkstart += (rev + 1) * iosize
1459 1468 chunklength = length(rev)
1460 1469 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1461 1470
1462 1471 return l
1463 1472
1464 1473 def _chunkclear(self):
1465 1474 """Clear the raw chunk cache."""
1466 1475 self._chunkcache = (0, '')
1467 1476
1468 1477 def deltaparent(self, rev):
1469 1478 """return deltaparent of the given revision"""
1470 1479 base = self.index[rev][3]
1471 1480 if base == rev:
1472 1481 return nullrev
1473 1482 elif self._generaldelta:
1474 1483 return base
1475 1484 else:
1476 1485 return rev - 1
1477 1486
1478 1487 def issnapshot(self, rev):
1479 1488 """tells whether rev is a snapshot
1480 1489 """
1481 1490 if rev == nullrev:
1482 1491 return True
1483 1492 deltap = self.deltaparent(rev)
1484 1493 if deltap == nullrev:
1485 1494 return True
1486 1495 p1, p2 = self.parentrevs(rev)
1487 1496 if deltap in (p1, p2):
1488 1497 return False
1489 1498 return self.issnapshot(deltap)
1490 1499
1491 1500 def snapshotdepth(self, rev):
1492 1501 """number of snapshot in the chain before this one"""
1493 1502 if not self.issnapshot(rev):
1494 1503 raise error.ProgrammingError('revision %d not a snapshot')
1495 1504 return len(self._deltachain(rev)[0]) - 1
1496 1505
1497 1506 def revdiff(self, rev1, rev2):
1498 1507 """return or calculate a delta between two revisions
1499 1508
1500 1509 The delta calculated is in binary form and is intended to be written to
1501 1510 revlog data directly. So this function needs raw revision data.
1502 1511 """
1503 1512 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1504 1513 return bytes(self._chunk(rev2))
1505 1514
1506 1515 return mdiff.textdiff(self.revision(rev1, raw=True),
1507 1516 self.revision(rev2, raw=True))
1508 1517
1509 1518 def revision(self, nodeorrev, _df=None, raw=False):
1510 1519 """return an uncompressed revision of a given node or revision
1511 1520 number.
1512 1521
1513 1522 _df - an existing file handle to read from. (internal-only)
1514 1523 raw - an optional argument specifying if the revision data is to be
1515 1524 treated as raw data when applying flag transforms. 'raw' should be set
1516 1525 to True when generating changegroups or in debug commands.
1517 1526 """
1518 1527 if isinstance(nodeorrev, int):
1519 1528 rev = nodeorrev
1520 1529 node = self.node(rev)
1521 1530 else:
1522 1531 node = nodeorrev
1523 1532 rev = None
1524 1533
1525 1534 cachedrev = None
1526 1535 flags = None
1527 1536 rawtext = None
1528 1537 if node == nullid:
1529 1538 return ""
1530 1539 if self._revisioncache:
1531 1540 if self._revisioncache[0] == node:
1532 1541 # _cache only stores rawtext
1533 1542 if raw:
1534 1543 return self._revisioncache[2]
1535 1544 # duplicated, but good for perf
1536 1545 if rev is None:
1537 1546 rev = self.rev(node)
1538 1547 if flags is None:
1539 1548 flags = self.flags(rev)
1540 1549 # no extra flags set, no flag processor runs, text = rawtext
1541 1550 if flags == REVIDX_DEFAULT_FLAGS:
1542 1551 return self._revisioncache[2]
1543 1552 # rawtext is reusable. need to run flag processor
1544 1553 rawtext = self._revisioncache[2]
1545 1554
1546 1555 cachedrev = self._revisioncache[1]
1547 1556
1548 1557 # look up what we need to read
1549 1558 if rawtext is None:
1550 1559 if rev is None:
1551 1560 rev = self.rev(node)
1552 1561
1553 1562 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1554 1563 if stopped:
1555 1564 rawtext = self._revisioncache[2]
1556 1565
1557 1566 # drop cache to save memory
1558 1567 self._revisioncache = None
1559 1568
1560 1569 targetsize = None
1561 1570 rawsize = self.index[rev][2]
1562 1571 if 0 <= rawsize:
1563 1572 targetsize = 4 * rawsize
1564 1573
1565 1574 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1566 1575 if rawtext is None:
1567 1576 rawtext = bytes(bins[0])
1568 1577 bins = bins[1:]
1569 1578
1570 1579 rawtext = mdiff.patches(rawtext, bins)
1571 1580 self._revisioncache = (node, rev, rawtext)
1572 1581
1573 1582 if flags is None:
1574 1583 if rev is None:
1575 1584 rev = self.rev(node)
1576 1585 flags = self.flags(rev)
1577 1586
1578 1587 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1579 1588 if validatehash:
1580 1589 self.checkhash(text, node, rev=rev)
1581 1590
1582 1591 return text
1583 1592
1584 1593 def hash(self, text, p1, p2):
1585 1594 """Compute a node hash.
1586 1595
1587 1596 Available as a function so that subclasses can replace the hash
1588 1597 as needed.
1589 1598 """
1590 1599 return storageutil.hashrevisionsha1(text, p1, p2)
1591 1600
1592 1601 def _processflags(self, text, flags, operation, raw=False):
1593 1602 """Inspect revision data flags and applies transforms defined by
1594 1603 registered flag processors.
1595 1604
1596 1605 ``text`` - the revision data to process
1597 1606 ``flags`` - the revision flags
1598 1607 ``operation`` - the operation being performed (read or write)
1599 1608 ``raw`` - an optional argument describing if the raw transform should be
1600 1609 applied.
1601 1610
1602 1611 This method processes the flags in the order (or reverse order if
1603 1612 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1604 1613 flag processors registered for present flags. The order of flags defined
1605 1614 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1606 1615
1607 1616 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1608 1617 processed text and ``validatehash`` is a bool indicating whether the
1609 1618 returned text should be checked for hash integrity.
1610 1619
1611 1620 Note: If the ``raw`` argument is set, it has precedence over the
1612 1621 operation and will only update the value of ``validatehash``.
1613 1622 """
1614 1623 # fast path: no flag processors will run
1615 1624 if flags == 0:
1616 1625 return text, True
1617 1626 if not operation in ('read', 'write'):
1618 1627 raise error.ProgrammingError(_("invalid '%s' operation") %
1619 1628 operation)
1620 1629 # Check all flags are known.
1621 1630 if flags & ~REVIDX_KNOWN_FLAGS:
1622 1631 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1623 1632 (flags & ~REVIDX_KNOWN_FLAGS))
1624 1633 validatehash = True
1625 1634 # Depending on the operation (read or write), the order might be
1626 1635 # reversed due to non-commutative transforms.
1627 1636 orderedflags = REVIDX_FLAGS_ORDER
1628 1637 if operation == 'write':
1629 1638 orderedflags = reversed(orderedflags)
1630 1639
1631 1640 for flag in orderedflags:
1632 1641 # If a flagprocessor has been registered for a known flag, apply the
1633 1642 # related operation transform and update result tuple.
1634 1643 if flag & flags:
1635 1644 vhash = True
1636 1645
1637 1646 if flag not in self._flagprocessors:
1638 1647 message = _("missing processor for flag '%#x'") % (flag)
1639 1648 raise error.RevlogError(message)
1640 1649
1641 1650 processor = self._flagprocessors[flag]
1642 1651 if processor is not None:
1643 1652 readtransform, writetransform, rawtransform = processor
1644 1653
1645 1654 if raw:
1646 1655 vhash = rawtransform(self, text)
1647 1656 elif operation == 'read':
1648 1657 text, vhash = readtransform(self, text)
1649 1658 else: # write operation
1650 1659 text, vhash = writetransform(self, text)
1651 1660 validatehash = validatehash and vhash
1652 1661
1653 1662 return text, validatehash
1654 1663
1655 1664 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1656 1665 """Check node hash integrity.
1657 1666
1658 1667 Available as a function so that subclasses can extend hash mismatch
1659 1668 behaviors as needed.
1660 1669 """
1661 1670 try:
1662 1671 if p1 is None and p2 is None:
1663 1672 p1, p2 = self.parents(node)
1664 1673 if node != self.hash(text, p1, p2):
1665 1674 # Clear the revision cache on hash failure. The revision cache
1666 1675 # only stores the raw revision and clearing the cache does have
1667 1676 # the side-effect that we won't have a cache hit when the raw
1668 1677 # revision data is accessed. But this case should be rare and
1669 1678 # it is extra work to teach the cache about the hash
1670 1679 # verification state.
1671 1680 if self._revisioncache and self._revisioncache[0] == node:
1672 1681 self._revisioncache = None
1673 1682
1674 1683 revornode = rev
1675 1684 if revornode is None:
1676 1685 revornode = templatefilters.short(hex(node))
1677 1686 raise error.RevlogError(_("integrity check failed on %s:%s")
1678 1687 % (self.indexfile, pycompat.bytestr(revornode)))
1679 1688 except error.RevlogError:
1680 1689 if self._censorable and storageutil.iscensoredtext(text):
1681 1690 raise error.CensoredNodeError(self.indexfile, node, text)
1682 1691 raise
1683 1692
1684 1693 def _enforceinlinesize(self, tr, fp=None):
1685 1694 """Check if the revlog is too big for inline and convert if so.
1686 1695
1687 1696 This should be called after revisions are added to the revlog. If the
1688 1697 revlog has grown too large to be an inline revlog, it will convert it
1689 1698 to use multiple index and data files.
1690 1699 """
1691 1700 tiprev = len(self) - 1
1692 1701 if (not self._inline or
1693 1702 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1694 1703 return
1695 1704
1696 1705 trinfo = tr.find(self.indexfile)
1697 1706 if trinfo is None:
1698 1707 raise error.RevlogError(_("%s not found in the transaction")
1699 1708 % self.indexfile)
1700 1709
1701 1710 trindex = trinfo[2]
1702 1711 if trindex is not None:
1703 1712 dataoff = self.start(trindex)
1704 1713 else:
1705 1714 # revlog was stripped at start of transaction, use all leftover data
1706 1715 trindex = len(self) - 1
1707 1716 dataoff = self.end(tiprev)
1708 1717
1709 1718 tr.add(self.datafile, dataoff)
1710 1719
1711 1720 if fp:
1712 1721 fp.flush()
1713 1722 fp.close()
1714 1723
1715 1724 with self._datafp('w') as df:
1716 1725 for r in self:
1717 1726 df.write(self._getsegmentforrevs(r, r)[1])
1718 1727
1719 1728 with self._indexfp('w') as fp:
1720 1729 self.version &= ~FLAG_INLINE_DATA
1721 1730 self._inline = False
1722 1731 io = self._io
1723 1732 for i in self:
1724 1733 e = io.packentry(self.index[i], self.node, self.version, i)
1725 1734 fp.write(e)
1726 1735
1727 1736 # the temp file replace the real index when we exit the context
1728 1737 # manager
1729 1738
1730 1739 tr.replace(self.indexfile, trindex * self._io.size)
1731 1740 self._chunkclear()
1732 1741
1733 1742 def _nodeduplicatecallback(self, transaction, node):
1734 1743 """called when trying to add a node already stored.
1735 1744 """
1736 1745
1737 1746 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1738 1747 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1739 1748 """add a revision to the log
1740 1749
1741 1750 text - the revision data to add
1742 1751 transaction - the transaction object used for rollback
1743 1752 link - the linkrev data to add
1744 1753 p1, p2 - the parent nodeids of the revision
1745 1754 cachedelta - an optional precomputed delta
1746 1755 node - nodeid of revision; typically node is not specified, and it is
1747 1756 computed by default as hash(text, p1, p2), however subclasses might
1748 1757 use different hashing method (and override checkhash() in such case)
1749 1758 flags - the known flags to set on the revision
1750 1759 deltacomputer - an optional deltacomputer instance shared between
1751 1760 multiple calls
1752 1761 """
1753 1762 if link == nullrev:
1754 1763 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1755 1764 % self.indexfile)
1756 1765
1757 1766 if flags:
1758 1767 node = node or self.hash(text, p1, p2)
1759 1768
1760 1769 rawtext, validatehash = self._processflags(text, flags, 'write')
1761 1770
1762 1771 # If the flag processor modifies the revision data, ignore any provided
1763 1772 # cachedelta.
1764 1773 if rawtext != text:
1765 1774 cachedelta = None
1766 1775
1767 1776 if len(rawtext) > _maxentrysize:
1768 1777 raise error.RevlogError(
1769 1778 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1770 1779 % (self.indexfile, len(rawtext)))
1771 1780
1772 1781 node = node or self.hash(rawtext, p1, p2)
1773 1782 if node in self.nodemap:
1774 1783 return node
1775 1784
1776 1785 if validatehash:
1777 1786 self.checkhash(rawtext, node, p1=p1, p2=p2)
1778 1787
1779 1788 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1780 1789 flags, cachedelta=cachedelta,
1781 1790 deltacomputer=deltacomputer)
1782 1791
1783 1792 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1784 1793 cachedelta=None, deltacomputer=None):
1785 1794 """add a raw revision with known flags, node and parents
1786 1795 useful when reusing a revision not stored in this revlog (ex: received
1787 1796 over wire, or read from an external bundle).
1788 1797 """
1789 1798 dfh = None
1790 1799 if not self._inline:
1791 1800 dfh = self._datafp("a+")
1792 1801 ifh = self._indexfp("a+")
1793 1802 try:
1794 1803 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1795 1804 flags, cachedelta, ifh, dfh,
1796 1805 deltacomputer=deltacomputer)
1797 1806 finally:
1798 1807 if dfh:
1799 1808 dfh.close()
1800 1809 ifh.close()
1801 1810
1802 1811 def compress(self, data):
1803 1812 """Generate a possibly-compressed representation of data."""
1804 1813 if not data:
1805 1814 return '', data
1806 1815
1807 1816 compressed = self._compressor.compress(data)
1808 1817
1809 1818 if compressed:
1810 1819 # The revlog compressor added the header in the returned data.
1811 1820 return '', compressed
1812 1821
1813 1822 if data[0:1] == '\0':
1814 1823 return '', data
1815 1824 return 'u', data
1816 1825
1817 1826 def decompress(self, data):
1818 1827 """Decompress a revlog chunk.
1819 1828
1820 1829 The chunk is expected to begin with a header identifying the
1821 1830 format type so it can be routed to an appropriate decompressor.
1822 1831 """
1823 1832 if not data:
1824 1833 return data
1825 1834
1826 1835 # Revlogs are read much more frequently than they are written and many
1827 1836 # chunks only take microseconds to decompress, so performance is
1828 1837 # important here.
1829 1838 #
1830 1839 # We can make a few assumptions about revlogs:
1831 1840 #
1832 1841 # 1) the majority of chunks will be compressed (as opposed to inline
1833 1842 # raw data).
1834 1843 # 2) decompressing *any* data will likely by at least 10x slower than
1835 1844 # returning raw inline data.
1836 1845 # 3) we want to prioritize common and officially supported compression
1837 1846 # engines
1838 1847 #
1839 1848 # It follows that we want to optimize for "decompress compressed data
1840 1849 # when encoded with common and officially supported compression engines"
1841 1850 # case over "raw data" and "data encoded by less common or non-official
1842 1851 # compression engines." That is why we have the inline lookup first
1843 1852 # followed by the compengines lookup.
1844 1853 #
1845 1854 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1846 1855 # compressed chunks. And this matters for changelog and manifest reads.
1847 1856 t = data[0:1]
1848 1857
1849 1858 if t == 'x':
1850 1859 try:
1851 1860 return _zlibdecompress(data)
1852 1861 except zlib.error as e:
1853 1862 raise error.RevlogError(_('revlog decompress error: %s') %
1854 1863 stringutil.forcebytestr(e))
1855 1864 # '\0' is more common than 'u' so it goes first.
1856 1865 elif t == '\0':
1857 1866 return data
1858 1867 elif t == 'u':
1859 1868 return util.buffer(data, 1)
1860 1869
1861 1870 try:
1862 1871 compressor = self._decompressors[t]
1863 1872 except KeyError:
1864 1873 try:
1865 1874 engine = util.compengines.forrevlogheader(t)
1866 1875 compressor = engine.revlogcompressor()
1867 1876 self._decompressors[t] = compressor
1868 1877 except KeyError:
1869 1878 raise error.RevlogError(_('unknown compression type %r') % t)
1870 1879
1871 1880 return compressor.decompress(data)
1872 1881
1873 1882 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1874 1883 cachedelta, ifh, dfh, alwayscache=False,
1875 1884 deltacomputer=None):
1876 1885 """internal function to add revisions to the log
1877 1886
1878 1887 see addrevision for argument descriptions.
1879 1888
1880 1889 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1881 1890
1882 1891 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1883 1892 be used.
1884 1893
1885 1894 invariants:
1886 1895 - rawtext is optional (can be None); if not set, cachedelta must be set.
1887 1896 if both are set, they must correspond to each other.
1888 1897 """
1889 1898 if node == nullid:
1890 1899 raise error.RevlogError(_("%s: attempt to add null revision") %
1891 1900 self.indexfile)
1892 1901 if node == wdirid or node in wdirfilenodeids:
1893 1902 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1894 1903 self.indexfile)
1895 1904
1896 1905 if self._inline:
1897 1906 fh = ifh
1898 1907 else:
1899 1908 fh = dfh
1900 1909
1901 1910 btext = [rawtext]
1902 1911
1903 1912 curr = len(self)
1904 1913 prev = curr - 1
1905 1914 offset = self.end(prev)
1906 1915 p1r, p2r = self.rev(p1), self.rev(p2)
1907 1916
1908 1917 # full versions are inserted when the needed deltas
1909 1918 # become comparable to the uncompressed text
1910 1919 if rawtext is None:
1911 1920 # need rawtext size, before changed by flag processors, which is
1912 1921 # the non-raw size. use revlog explicitly to avoid filelog's extra
1913 1922 # logic that might remove metadata size.
1914 1923 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1915 1924 cachedelta[1])
1916 1925 else:
1917 1926 textlen = len(rawtext)
1918 1927
1919 1928 if deltacomputer is None:
1920 1929 deltacomputer = deltautil.deltacomputer(self)
1921 1930
1922 1931 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1923 1932
1924 1933 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1925 1934
1926 1935 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1927 1936 deltainfo.base, link, p1r, p2r, node)
1928 1937 self.index.append(e)
1929 1938 self.nodemap[node] = curr
1930 1939
1931 1940 entry = self._io.packentry(e, self.node, self.version, curr)
1932 1941 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1933 1942 link, offset)
1934 1943
1935 1944 rawtext = btext[0]
1936 1945
1937 1946 if alwayscache and rawtext is None:
1938 1947 rawtext = deltacomputer.buildtext(revinfo, fh)
1939 1948
1940 1949 if type(rawtext) == bytes: # only accept immutable objects
1941 1950 self._revisioncache = (node, curr, rawtext)
1942 1951 self._chainbasecache[curr] = deltainfo.chainbase
1943 1952 return node
1944 1953
1945 1954 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1946 1955 # Files opened in a+ mode have inconsistent behavior on various
1947 1956 # platforms. Windows requires that a file positioning call be made
1948 1957 # when the file handle transitions between reads and writes. See
1949 1958 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1950 1959 # platforms, Python or the platform itself can be buggy. Some versions
1951 1960 # of Solaris have been observed to not append at the end of the file
1952 1961 # if the file was seeked to before the end. See issue4943 for more.
1953 1962 #
1954 1963 # We work around this issue by inserting a seek() before writing.
1955 1964 # Note: This is likely not necessary on Python 3.
1956 1965 ifh.seek(0, os.SEEK_END)
1957 1966 if dfh:
1958 1967 dfh.seek(0, os.SEEK_END)
1959 1968
1960 1969 curr = len(self) - 1
1961 1970 if not self._inline:
1962 1971 transaction.add(self.datafile, offset)
1963 1972 transaction.add(self.indexfile, curr * len(entry))
1964 1973 if data[0]:
1965 1974 dfh.write(data[0])
1966 1975 dfh.write(data[1])
1967 1976 ifh.write(entry)
1968 1977 else:
1969 1978 offset += curr * self._io.size
1970 1979 transaction.add(self.indexfile, offset, curr)
1971 1980 ifh.write(entry)
1972 1981 ifh.write(data[0])
1973 1982 ifh.write(data[1])
1974 1983 self._enforceinlinesize(transaction, ifh)
1975 1984
1976 1985 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1977 1986 """
1978 1987 add a delta group
1979 1988
1980 1989 given a set of deltas, add them to the revision log. the
1981 1990 first delta is against its parent, which should be in our
1982 1991 log, the rest are against the previous delta.
1983 1992
1984 1993 If ``addrevisioncb`` is defined, it will be called with arguments of
1985 1994 this revlog and the node that was added.
1986 1995 """
1987 1996
1988 1997 nodes = []
1989 1998
1990 1999 r = len(self)
1991 2000 end = 0
1992 2001 if r:
1993 2002 end = self.end(r - 1)
1994 2003 ifh = self._indexfp("a+")
1995 2004 isize = r * self._io.size
1996 2005 if self._inline:
1997 2006 transaction.add(self.indexfile, end + isize, r)
1998 2007 dfh = None
1999 2008 else:
2000 2009 transaction.add(self.indexfile, isize, r)
2001 2010 transaction.add(self.datafile, end)
2002 2011 dfh = self._datafp("a+")
2003 2012 def flush():
2004 2013 if dfh:
2005 2014 dfh.flush()
2006 2015 ifh.flush()
2007 2016 try:
2008 2017 deltacomputer = deltautil.deltacomputer(self)
2009 2018 # loop through our set of deltas
2010 2019 for data in deltas:
2011 2020 node, p1, p2, linknode, deltabase, delta, flags = data
2012 2021 link = linkmapper(linknode)
2013 2022 flags = flags or REVIDX_DEFAULT_FLAGS
2014 2023
2015 2024 nodes.append(node)
2016 2025
2017 2026 if node in self.nodemap:
2018 2027 self._nodeduplicatecallback(transaction, node)
2019 2028 # this can happen if two branches make the same change
2020 2029 continue
2021 2030
2022 2031 for p in (p1, p2):
2023 2032 if p not in self.nodemap:
2024 2033 raise error.LookupError(p, self.indexfile,
2025 2034 _('unknown parent'))
2026 2035
2027 2036 if deltabase not in self.nodemap:
2028 2037 raise error.LookupError(deltabase, self.indexfile,
2029 2038 _('unknown delta base'))
2030 2039
2031 2040 baserev = self.rev(deltabase)
2032 2041
2033 2042 if baserev != nullrev and self.iscensored(baserev):
2034 2043 # if base is censored, delta must be full replacement in a
2035 2044 # single patch operation
2036 2045 hlen = struct.calcsize(">lll")
2037 2046 oldlen = self.rawsize(baserev)
2038 2047 newlen = len(delta) - hlen
2039 2048 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2040 2049 raise error.CensoredBaseError(self.indexfile,
2041 2050 self.node(baserev))
2042 2051
2043 2052 if not flags and self._peek_iscensored(baserev, delta, flush):
2044 2053 flags |= REVIDX_ISCENSORED
2045 2054
2046 2055 # We assume consumers of addrevisioncb will want to retrieve
2047 2056 # the added revision, which will require a call to
2048 2057 # revision(). revision() will fast path if there is a cache
2049 2058 # hit. So, we tell _addrevision() to always cache in this case.
2050 2059 # We're only using addgroup() in the context of changegroup
2051 2060 # generation so the revision data can always be handled as raw
2052 2061 # by the flagprocessor.
2053 2062 self._addrevision(node, None, transaction, link,
2054 2063 p1, p2, flags, (baserev, delta),
2055 2064 ifh, dfh,
2056 2065 alwayscache=bool(addrevisioncb),
2057 2066 deltacomputer=deltacomputer)
2058 2067
2059 2068 if addrevisioncb:
2060 2069 addrevisioncb(self, node)
2061 2070
2062 2071 if not dfh and not self._inline:
2063 2072 # addrevision switched from inline to conventional
2064 2073 # reopen the index
2065 2074 ifh.close()
2066 2075 dfh = self._datafp("a+")
2067 2076 ifh = self._indexfp("a+")
2068 2077 finally:
2069 2078 if dfh:
2070 2079 dfh.close()
2071 2080 ifh.close()
2072 2081
2073 2082 return nodes
2074 2083
2075 2084 def iscensored(self, rev):
2076 2085 """Check if a file revision is censored."""
2077 2086 if not self._censorable:
2078 2087 return False
2079 2088
2080 2089 return self.flags(rev) & REVIDX_ISCENSORED
2081 2090
2082 2091 def _peek_iscensored(self, baserev, delta, flush):
2083 2092 """Quickly check if a delta produces a censored revision."""
2084 2093 if not self._censorable:
2085 2094 return False
2086 2095
2087 2096 # Fragile heuristic: unless new file meta keys are added alphabetically
2088 2097 # preceding "censored", all censored revisions are prefixed by
2089 2098 # "\1\ncensored:". A delta producing such a censored revision must be a
2090 2099 # full-replacement delta, so we inspect the first and only patch in the
2091 2100 # delta for this prefix.
2092 2101 hlen = struct.calcsize(">lll")
2093 2102 if len(delta) <= hlen:
2094 2103 return False
2095 2104
2096 2105 oldlen = self.rawsize(baserev)
2097 2106 newlen = len(delta) - hlen
2098 2107 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2099 2108 return False
2100 2109
2101 2110 add = "\1\ncensored:"
2102 2111 addlen = len(add)
2103 2112 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2104 2113
2105 2114 def getstrippoint(self, minlink):
2106 2115 """find the minimum rev that must be stripped to strip the linkrev
2107 2116
2108 2117 Returns a tuple containing the minimum rev and a set of all revs that
2109 2118 have linkrevs that will be broken by this strip.
2110 2119 """
2111 2120 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2112 2121 self.headrevs(),
2113 2122 self.linkrev, self.parentrevs)
2114 2123
2115 2124 def strip(self, minlink, transaction):
2116 2125 """truncate the revlog on the first revision with a linkrev >= minlink
2117 2126
2118 2127 This function is called when we're stripping revision minlink and
2119 2128 its descendants from the repository.
2120 2129
2121 2130 We have to remove all revisions with linkrev >= minlink, because
2122 2131 the equivalent changelog revisions will be renumbered after the
2123 2132 strip.
2124 2133
2125 2134 So we truncate the revlog on the first of these revisions, and
2126 2135 trust that the caller has saved the revisions that shouldn't be
2127 2136 removed and that it'll re-add them after this truncation.
2128 2137 """
2129 2138 if len(self) == 0:
2130 2139 return
2131 2140
2132 2141 rev, _ = self.getstrippoint(minlink)
2133 2142 if rev == len(self):
2134 2143 return
2135 2144
2136 2145 # first truncate the files on disk
2137 2146 end = self.start(rev)
2138 2147 if not self._inline:
2139 2148 transaction.add(self.datafile, end)
2140 2149 end = rev * self._io.size
2141 2150 else:
2142 2151 end += rev * self._io.size
2143 2152
2144 2153 transaction.add(self.indexfile, end)
2145 2154
2146 2155 # then reset internal state in memory to forget those revisions
2147 2156 self._revisioncache = None
2148 2157 self._chaininfocache = {}
2149 2158 self._chunkclear()
2150 2159 for x in pycompat.xrange(rev, len(self)):
2151 2160 del self.nodemap[self.node(x)]
2152 2161
2153 2162 del self.index[rev:-1]
2154 2163 self._nodepos = None
2155 2164
2156 2165 def checksize(self):
2157 2166 expected = 0
2158 2167 if len(self):
2159 2168 expected = max(0, self.end(len(self) - 1))
2160 2169
2161 2170 try:
2162 2171 with self._datafp() as f:
2163 2172 f.seek(0, 2)
2164 2173 actual = f.tell()
2165 2174 dd = actual - expected
2166 2175 except IOError as inst:
2167 2176 if inst.errno != errno.ENOENT:
2168 2177 raise
2169 2178 dd = 0
2170 2179
2171 2180 try:
2172 2181 f = self.opener(self.indexfile)
2173 2182 f.seek(0, 2)
2174 2183 actual = f.tell()
2175 2184 f.close()
2176 2185 s = self._io.size
2177 2186 i = max(0, actual // s)
2178 2187 di = actual - (i * s)
2179 2188 if self._inline:
2180 2189 databytes = 0
2181 2190 for r in self:
2182 2191 databytes += max(0, self.length(r))
2183 2192 dd = 0
2184 2193 di = actual - len(self) * s - databytes
2185 2194 except IOError as inst:
2186 2195 if inst.errno != errno.ENOENT:
2187 2196 raise
2188 2197 di = 0
2189 2198
2190 2199 return (dd, di)
2191 2200
2192 2201 def files(self):
2193 2202 res = [self.indexfile]
2194 2203 if not self._inline:
2195 2204 res.append(self.datafile)
2196 2205 return res
2197 2206
2198 2207 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2199 2208 assumehaveparentrevisions=False, deltaprevious=False):
2200 2209 if nodesorder not in ('nodes', 'storage', None):
2201 2210 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2202 2211 nodesorder)
2203 2212
2204 2213 if nodesorder is None and not self._generaldelta:
2205 2214 nodesorder = 'storage'
2206 2215
2207 2216 return storageutil.emitrevisions(
2208 2217 self, nodes, nodesorder, revlogrevisiondelta,
2209 2218 deltaparentfn=self.deltaparent,
2210 2219 candeltafn=self.candelta,
2211 2220 rawsizefn=self.rawsize,
2212 2221 revdifffn=self.revdiff,
2213 2222 flagsfn=self.flags,
2214 2223 sendfulltext=not self._storedeltachains,
2215 2224 revisiondata=revisiondata,
2216 2225 assumehaveparentrevisions=assumehaveparentrevisions,
2217 2226 deltaprevious=deltaprevious)
2218 2227
2219 2228 DELTAREUSEALWAYS = 'always'
2220 2229 DELTAREUSESAMEREVS = 'samerevs'
2221 2230 DELTAREUSENEVER = 'never'
2222 2231
2223 2232 DELTAREUSEFULLADD = 'fulladd'
2224 2233
2225 2234 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2226 2235
2227 2236 def clone(self, tr, destrevlog, addrevisioncb=None,
2228 2237 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2229 2238 """Copy this revlog to another, possibly with format changes.
2230 2239
2231 2240 The destination revlog will contain the same revisions and nodes.
2232 2241 However, it may not be bit-for-bit identical due to e.g. delta encoding
2233 2242 differences.
2234 2243
2235 2244 The ``deltareuse`` argument control how deltas from the existing revlog
2236 2245 are preserved in the destination revlog. The argument can have the
2237 2246 following values:
2238 2247
2239 2248 DELTAREUSEALWAYS
2240 2249 Deltas will always be reused (if possible), even if the destination
2241 2250 revlog would not select the same revisions for the delta. This is the
2242 2251 fastest mode of operation.
2243 2252 DELTAREUSESAMEREVS
2244 2253 Deltas will be reused if the destination revlog would pick the same
2245 2254 revisions for the delta. This mode strikes a balance between speed
2246 2255 and optimization.
2247 2256 DELTAREUSENEVER
2248 2257 Deltas will never be reused. This is the slowest mode of execution.
2249 2258 This mode can be used to recompute deltas (e.g. if the diff/delta
2250 2259 algorithm changes).
2251 2260
2252 2261 Delta computation can be slow, so the choice of delta reuse policy can
2253 2262 significantly affect run time.
2254 2263
2255 2264 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2256 2265 two extremes. Deltas will be reused if they are appropriate. But if the
2257 2266 delta could choose a better revision, it will do so. This means if you
2258 2267 are converting a non-generaldelta revlog to a generaldelta revlog,
2259 2268 deltas will be recomputed if the delta's parent isn't a parent of the
2260 2269 revision.
2261 2270
2262 2271 In addition to the delta policy, the ``deltabothparents`` argument
2263 2272 controls whether to compute deltas against both parents for merges.
2264 2273 By default, the current default is used.
2265 2274 """
2266 2275 if deltareuse not in self.DELTAREUSEALL:
2267 2276 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2268 2277
2269 2278 if len(destrevlog):
2270 2279 raise ValueError(_('destination revlog is not empty'))
2271 2280
2272 2281 if getattr(self, 'filteredrevs', None):
2273 2282 raise ValueError(_('source revlog has filtered revisions'))
2274 2283 if getattr(destrevlog, 'filteredrevs', None):
2275 2284 raise ValueError(_('destination revlog has filtered revisions'))
2276 2285
2277 2286 # lazydeltabase controls whether to reuse a cached delta, if possible.
2278 2287 oldlazydeltabase = destrevlog._lazydeltabase
2279 2288 oldamd = destrevlog._deltabothparents
2280 2289
2281 2290 try:
2282 2291 if deltareuse == self.DELTAREUSEALWAYS:
2283 2292 destrevlog._lazydeltabase = True
2284 2293 elif deltareuse == self.DELTAREUSESAMEREVS:
2285 2294 destrevlog._lazydeltabase = False
2286 2295
2287 2296 destrevlog._deltabothparents = deltabothparents or oldamd
2288 2297
2289 2298 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2290 2299 self.DELTAREUSESAMEREVS)
2291 2300
2292 2301 deltacomputer = deltautil.deltacomputer(destrevlog)
2293 2302 index = self.index
2294 2303 for rev in self:
2295 2304 entry = index[rev]
2296 2305
2297 2306 # Some classes override linkrev to take filtered revs into
2298 2307 # account. Use raw entry from index.
2299 2308 flags = entry[0] & 0xffff
2300 2309 linkrev = entry[4]
2301 2310 p1 = index[entry[5]][7]
2302 2311 p2 = index[entry[6]][7]
2303 2312 node = entry[7]
2304 2313
2305 2314 # (Possibly) reuse the delta from the revlog if allowed and
2306 2315 # the revlog chunk is a delta.
2307 2316 cachedelta = None
2308 2317 rawtext = None
2309 2318 if populatecachedelta:
2310 2319 dp = self.deltaparent(rev)
2311 2320 if dp != nullrev:
2312 2321 cachedelta = (dp, bytes(self._chunk(rev)))
2313 2322
2314 2323 if not cachedelta:
2315 2324 rawtext = self.revision(rev, raw=True)
2316 2325
2317 2326
2318 2327 if deltareuse == self.DELTAREUSEFULLADD:
2319 2328 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2320 2329 cachedelta=cachedelta,
2321 2330 node=node, flags=flags,
2322 2331 deltacomputer=deltacomputer)
2323 2332 else:
2324 2333 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2325 2334 checkambig=False)
2326 2335 dfh = None
2327 2336 if not destrevlog._inline:
2328 2337 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2329 2338 try:
2330 2339 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2331 2340 p2, flags, cachedelta, ifh, dfh,
2332 2341 deltacomputer=deltacomputer)
2333 2342 finally:
2334 2343 if dfh:
2335 2344 dfh.close()
2336 2345 ifh.close()
2337 2346
2338 2347 if addrevisioncb:
2339 2348 addrevisioncb(self, rev, node)
2340 2349 finally:
2341 2350 destrevlog._lazydeltabase = oldlazydeltabase
2342 2351 destrevlog._deltabothparents = oldamd
2343 2352
2344 2353 def censorrevision(self, tr, censornode, tombstone=b''):
2345 2354 if (self.version & 0xFFFF) == REVLOGV0:
2346 2355 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2347 2356 self.version)
2348 2357
2349 2358 censorrev = self.rev(censornode)
2350 2359 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2351 2360
2352 2361 if len(tombstone) > self.rawsize(censorrev):
2353 2362 raise error.Abort(_('censor tombstone must be no longer than '
2354 2363 'censored data'))
2355 2364
2356 2365 # Rewriting the revlog in place is hard. Our strategy for censoring is
2357 2366 # to create a new revlog, copy all revisions to it, then replace the
2358 2367 # revlogs on transaction close.
2359 2368
2360 2369 newindexfile = self.indexfile + b'.tmpcensored'
2361 2370 newdatafile = self.datafile + b'.tmpcensored'
2362 2371
2363 2372 # This is a bit dangerous. We could easily have a mismatch of state.
2364 2373 newrl = revlog(self.opener, newindexfile, newdatafile,
2365 2374 censorable=True)
2366 2375 newrl.version = self.version
2367 2376 newrl._generaldelta = self._generaldelta
2368 2377 newrl._io = self._io
2369 2378
2370 2379 for rev in self.revs():
2371 2380 node = self.node(rev)
2372 2381 p1, p2 = self.parents(node)
2373 2382
2374 2383 if rev == censorrev:
2375 2384 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2376 2385 p1, p2, censornode, REVIDX_ISCENSORED)
2377 2386
2378 2387 if newrl.deltaparent(rev) != nullrev:
2379 2388 raise error.Abort(_('censored revision stored as delta; '
2380 2389 'cannot censor'),
2381 2390 hint=_('censoring of revlogs is not '
2382 2391 'fully implemented; please report '
2383 2392 'this bug'))
2384 2393 continue
2385 2394
2386 2395 if self.iscensored(rev):
2387 2396 if self.deltaparent(rev) != nullrev:
2388 2397 raise error.Abort(_('cannot censor due to censored '
2389 2398 'revision having delta stored'))
2390 2399 rawtext = self._chunk(rev)
2391 2400 else:
2392 2401 rawtext = self.revision(rev, raw=True)
2393 2402
2394 2403 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2395 2404 self.flags(rev))
2396 2405
2397 2406 tr.addbackup(self.indexfile, location='store')
2398 2407 if not self._inline:
2399 2408 tr.addbackup(self.datafile, location='store')
2400 2409
2401 2410 self.opener.rename(newrl.indexfile, self.indexfile)
2402 2411 if not self._inline:
2403 2412 self.opener.rename(newrl.datafile, self.datafile)
2404 2413
2405 2414 self.clearcaches()
2406 2415 self._loadindex(self.version, None)
2407 2416
2408 2417 def verifyintegrity(self, state):
2409 2418 """Verifies the integrity of the revlog.
2410 2419
2411 2420 Yields ``revlogproblem`` instances describing problems that are
2412 2421 found.
2413 2422 """
2414 2423 dd, di = self.checksize()
2415 2424 if dd:
2416 2425 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2417 2426 if di:
2418 2427 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2419 2428
2420 2429 version = self.version & 0xFFFF
2421 2430
2422 2431 # The verifier tells us what version revlog we should be.
2423 2432 if version != state['expectedversion']:
2424 2433 yield revlogproblem(
2425 2434 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2426 2435 (self.indexfile, version, state['expectedversion']))
2427 2436
2428 2437 state['skipread'] = set()
2429 2438
2430 2439 for rev in self:
2431 2440 node = self.node(rev)
2432 2441
2433 2442 # Verify contents. 4 cases to care about:
2434 2443 #
2435 2444 # common: the most common case
2436 2445 # rename: with a rename
2437 2446 # meta: file content starts with b'\1\n', the metadata
2438 2447 # header defined in filelog.py, but without a rename
2439 2448 # ext: content stored externally
2440 2449 #
2441 2450 # More formally, their differences are shown below:
2442 2451 #
2443 2452 # | common | rename | meta | ext
2444 2453 # -------------------------------------------------------
2445 2454 # flags() | 0 | 0 | 0 | not 0
2446 2455 # renamed() | False | True | False | ?
2447 2456 # rawtext[0:2]=='\1\n'| False | True | True | ?
2448 2457 #
2449 2458 # "rawtext" means the raw text stored in revlog data, which
2450 2459 # could be retrieved by "revision(rev, raw=True)". "text"
2451 2460 # mentioned below is "revision(rev, raw=False)".
2452 2461 #
2453 2462 # There are 3 different lengths stored physically:
2454 2463 # 1. L1: rawsize, stored in revlog index
2455 2464 # 2. L2: len(rawtext), stored in revlog data
2456 2465 # 3. L3: len(text), stored in revlog data if flags==0, or
2457 2466 # possibly somewhere else if flags!=0
2458 2467 #
2459 2468 # L1 should be equal to L2. L3 could be different from them.
2460 2469 # "text" may or may not affect commit hash depending on flag
2461 2470 # processors (see revlog.addflagprocessor).
2462 2471 #
2463 2472 # | common | rename | meta | ext
2464 2473 # -------------------------------------------------
2465 2474 # rawsize() | L1 | L1 | L1 | L1
2466 2475 # size() | L1 | L2-LM | L1(*) | L1 (?)
2467 2476 # len(rawtext) | L2 | L2 | L2 | L2
2468 2477 # len(text) | L2 | L2 | L2 | L3
2469 2478 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2470 2479 #
2471 2480 # LM: length of metadata, depending on rawtext
2472 2481 # (*): not ideal, see comment in filelog.size
2473 2482 # (?): could be "- len(meta)" if the resolved content has
2474 2483 # rename metadata
2475 2484 #
2476 2485 # Checks needed to be done:
2477 2486 # 1. length check: L1 == L2, in all cases.
2478 2487 # 2. hash check: depending on flag processor, we may need to
2479 2488 # use either "text" (external), or "rawtext" (in revlog).
2480 2489
2481 2490 try:
2482 2491 skipflags = state.get('skipflags', 0)
2483 2492 if skipflags:
2484 2493 skipflags &= self.flags(rev)
2485 2494
2486 2495 if skipflags:
2487 2496 state['skipread'].add(node)
2488 2497 else:
2489 2498 # Side-effect: read content and verify hash.
2490 2499 self.revision(node)
2491 2500
2492 2501 l1 = self.rawsize(rev)
2493 2502 l2 = len(self.revision(node, raw=True))
2494 2503
2495 2504 if l1 != l2:
2496 2505 yield revlogproblem(
2497 2506 error=_('unpacked size is %d, %d expected') % (l2, l1),
2498 2507 node=node)
2499 2508
2500 2509 except error.CensoredNodeError:
2501 2510 if state['erroroncensored']:
2502 2511 yield revlogproblem(error=_('censored file data'),
2503 2512 node=node)
2504 2513 state['skipread'].add(node)
2505 2514 except Exception as e:
2506 2515 yield revlogproblem(
2507 2516 error=_('unpacking %s: %s') % (short(node),
2508 2517 stringutil.forcebytestr(e)),
2509 2518 node=node)
2510 2519 state['skipread'].add(node)
2511 2520
2512 2521 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2513 2522 revisionscount=False, trackedsize=False,
2514 2523 storedsize=False):
2515 2524 d = {}
2516 2525
2517 2526 if exclusivefiles:
2518 2527 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2519 2528 if not self._inline:
2520 2529 d['exclusivefiles'].append((self.opener, self.datafile))
2521 2530
2522 2531 if sharedfiles:
2523 2532 d['sharedfiles'] = []
2524 2533
2525 2534 if revisionscount:
2526 2535 d['revisionscount'] = len(self)
2527 2536
2528 2537 if trackedsize:
2529 2538 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2530 2539
2531 2540 if storedsize:
2532 2541 d['storedsize'] = sum(self.opener.stat(path).st_size
2533 2542 for path in self.files())
2534 2543
2535 2544 return d
General Comments 0
You need to be logged in to leave comments. Login now