##// END OF EJS Templates
revlog: move loading of index data into own method...
Gregory Szorc -
r40091:0a4625ff default
parent child Browse files
Show More
@@ -1,2557 +1,2560 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100
101 101 # Aliased for performance.
102 102 _zlibdecompress = zlib.decompress
103 103
104 104 # max size of revlog with inline data
105 105 _maxinline = 131072
106 106 _chunksize = 1048576
107 107
108 108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 109 _flagprocessors = {
110 110 REVIDX_ISCENSORED: None,
111 111 }
112 112
113 113 # Flag processors for REVIDX_ELLIPSIS.
114 114 def ellipsisreadprocessor(rl, text):
115 115 return text, False
116 116
117 117 def ellipsiswriteprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsisrawprocessor(rl, text):
121 121 return False
122 122
123 123 ellipsisprocessor = (
124 124 ellipsisreadprocessor,
125 125 ellipsiswriteprocessor,
126 126 ellipsisrawprocessor,
127 127 )
128 128
129 129 def addflagprocessor(flag, processor):
130 130 """Register a flag processor on a revision data flag.
131 131
132 132 Invariant:
133 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 135 - Only one flag processor can be registered on a specific flag.
136 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 137 following signatures:
138 138 - (read) f(self, rawtext) -> text, bool
139 139 - (write) f(self, text) -> rawtext, bool
140 140 - (raw) f(self, rawtext) -> bool
141 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 142 directly visible to the user.
143 143 The boolean returned by these transforms is used to determine whether
144 144 the returned text can be used for hash integrity checking. For example,
145 145 if "write" returns False, then "text" is used to generate hash. If
146 146 "write" returns True, that basically means "rawtext" returned by "write"
147 147 should be used to generate hash. Usually, "write" and "read" return
148 148 different booleans. And "raw" returns a same boolean as "write".
149 149
150 150 Note: The 'raw' transform is used for changegroup generation and in some
151 151 debug commands. In this case the transform only indicates whether the
152 152 contents can be used for hash integrity checks.
153 153 """
154 154 if not flag & REVIDX_KNOWN_FLAGS:
155 155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 156 raise error.ProgrammingError(msg)
157 157 if flag not in REVIDX_FLAGS_ORDER:
158 158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 159 raise error.ProgrammingError(msg)
160 160 if flag in _flagprocessors:
161 161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 162 raise error.Abort(msg)
163 163 _flagprocessors[flag] = processor
164 164
165 165 def getoffset(q):
166 166 return int(q >> 16)
167 167
168 168 def gettype(q):
169 169 return int(q & 0xFFFF)
170 170
171 171 def offset_type(offset, type):
172 172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 173 raise ValueError('unknown revlog index flags')
174 174 return int(int(offset) << 16 | type)
175 175
176 176 @attr.s(slots=True, frozen=True)
177 177 class _revisioninfo(object):
178 178 """Information about a revision that allows building its fulltext
179 179 node: expected hash of the revision
180 180 p1, p2: parent revs of the revision
181 181 btext: built text cache consisting of a one-element list
182 182 cachedelta: (baserev, uncompressed_delta) or None
183 183 flags: flags associated to the revision storage
184 184
185 185 One of btext[0] or cachedelta must be set.
186 186 """
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208 @interfaceutil.implementer(repository.iverifyproblem)
209 209 @attr.s(frozen=True)
210 210 class revlogproblem(object):
211 211 warning = attr.ib(default=None)
212 212 error = attr.ib(default=None)
213 213 node = attr.ib(default=None)
214 214
215 215 # index v0:
216 216 # 4 bytes: offset
217 217 # 4 bytes: compressed length
218 218 # 4 bytes: base rev
219 219 # 4 bytes: link rev
220 220 # 20 bytes: parent 1 nodeid
221 221 # 20 bytes: parent 2 nodeid
222 222 # 20 bytes: nodeid
223 223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 224 indexformatv0_pack = indexformatv0.pack
225 225 indexformatv0_unpack = indexformatv0.unpack
226 226
227 227 class revlogoldindex(list):
228 228 def __getitem__(self, i):
229 229 if i == -1:
230 230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 231 return list.__getitem__(self, i)
232 232
233 233 class revlogoldio(object):
234 234 def __init__(self):
235 235 self.size = indexformatv0.size
236 236
237 237 def parseindex(self, data, inline):
238 238 s = self.size
239 239 index = []
240 240 nodemap = {nullid: nullrev}
241 241 n = off = 0
242 242 l = len(data)
243 243 while off + s <= l:
244 244 cur = data[off:off + s]
245 245 off += s
246 246 e = indexformatv0_unpack(cur)
247 247 # transform to revlogv1 format
248 248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 250 index.append(e2)
251 251 nodemap[e[6]] = n
252 252 n += 1
253 253
254 254 return revlogoldindex(index), nodemap, None
255 255
256 256 def packentry(self, entry, node, version, rev):
257 257 if gettype(entry[0]):
258 258 raise error.RevlogError(_('index entry flags need revlog '
259 259 'version 1'))
260 260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 261 node(entry[5]), node(entry[6]), entry[7])
262 262 return indexformatv0_pack(*e2)
263 263
264 264 # index ng:
265 265 # 6 bytes: offset
266 266 # 2 bytes: flags
267 267 # 4 bytes: compressed length
268 268 # 4 bytes: uncompressed length
269 269 # 4 bytes: base rev
270 270 # 4 bytes: link rev
271 271 # 4 bytes: parent 1 rev
272 272 # 4 bytes: parent 2 rev
273 273 # 32 bytes: nodeid
274 274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 275 indexformatng_pack = indexformatng.pack
276 276 versionformat = struct.Struct(">I")
277 277 versionformat_pack = versionformat.pack
278 278 versionformat_unpack = versionformat.unpack
279 279
280 280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 281 # signed integer)
282 282 _maxentrysize = 0x7fffffff
283 283
284 284 class revlogio(object):
285 285 def __init__(self):
286 286 self.size = indexformatng.size
287 287
288 288 def parseindex(self, data, inline):
289 289 # call the C implementation to parse the index data
290 290 index, cache = parsers.parse_index2(data, inline)
291 291 return index, getattr(index, 'nodemap', None), cache
292 292
293 293 def packentry(self, entry, node, version, rev):
294 294 p = indexformatng_pack(*entry)
295 295 if rev == 0:
296 296 p = versionformat_pack(version) + p[4:]
297 297 return p
298 298
299 299 class revlog(object):
300 300 """
301 301 the underlying revision storage object
302 302
303 303 A revlog consists of two parts, an index and the revision data.
304 304
305 305 The index is a file with a fixed record size containing
306 306 information on each revision, including its nodeid (hash), the
307 307 nodeids of its parents, the position and offset of its data within
308 308 the data file, and the revision it's based on. Finally, each entry
309 309 contains a linkrev entry that can serve as a pointer to external
310 310 data.
311 311
312 312 The revision data itself is a linear collection of data chunks.
313 313 Each chunk represents a revision and is usually represented as a
314 314 delta against the previous chunk. To bound lookup time, runs of
315 315 deltas are limited to about 2 times the length of the original
316 316 version data. This makes retrieval of a version proportional to
317 317 its size, or O(1) relative to the number of revisions.
318 318
319 319 Both pieces of the revlog are written to in an append-only
320 320 fashion, which means we never need to rewrite a file to insert or
321 321 remove data, and can use some simple techniques to avoid the need
322 322 for locking while reading.
323 323
324 324 If checkambig, indexfile is opened with checkambig=True at
325 325 writing, to avoid file stat ambiguity.
326 326
327 327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 328 index will be mmapped rather than read if it is larger than the
329 329 configured threshold.
330 330
331 331 If censorable is True, the revlog can have censored revisions.
332 332 """
333 333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 334 mmaplargeindex=False, censorable=False):
335 335 """
336 336 create a revlog object
337 337
338 338 opener is a function that abstracts the file opening operation
339 339 and can be used to implement COW semantics or the like.
340 340 """
341 341 self.indexfile = indexfile
342 342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 343 self.opener = opener
344 344 # When True, indexfile is opened with checkambig=True at writing, to
345 345 # avoid file stat ambiguity.
346 346 self._checkambig = checkambig
347 347 self._censorable = censorable
348 348 # 3-tuple of (node, rev, text) for a raw revision.
349 349 self._revisioncache = None
350 350 # Maps rev to chain base rev.
351 351 self._chainbasecache = util.lrucachedict(100)
352 352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 353 self._chunkcache = (0, '')
354 354 # How much data to read and cache into the raw revlog data cache.
355 355 self._chunkcachesize = 65536
356 356 self._maxchainlen = None
357 357 self._deltabothparents = True
358 358 self.index = []
359 359 # Mapping of partial identifiers to full nodes.
360 360 self._pcache = {}
361 361 # Mapping of revision integer to full node.
362 362 self._nodecache = {nullid: nullrev}
363 363 self._nodepos = None
364 364 self._compengine = 'zlib'
365 365 self._maxdeltachainspan = -1
366 366 self._withsparseread = False
367 367 self._sparserevlog = False
368 368 self._srdensitythreshold = 0.50
369 369 self._srmingapsize = 262144
370 370
371 371 # Make copy of flag processors so each revlog instance can support
372 372 # custom flags.
373 373 self._flagprocessors = dict(_flagprocessors)
374 374
375 375 mmapindexthreshold = None
376 376 v = REVLOG_DEFAULT_VERSION
377 377 opts = getattr(opener, 'options', None)
378 378 if opts is not None:
379 379 if 'revlogv2' in opts:
380 380 # version 2 revlogs always use generaldelta.
381 381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 382 elif 'revlogv1' in opts:
383 383 if 'generaldelta' in opts:
384 384 v |= FLAG_GENERALDELTA
385 385 else:
386 386 v = 0
387 387 if 'chunkcachesize' in opts:
388 388 self._chunkcachesize = opts['chunkcachesize']
389 389 if 'maxchainlen' in opts:
390 390 self._maxchainlen = opts['maxchainlen']
391 391 if 'deltabothparents' in opts:
392 392 self._deltabothparents = opts['deltabothparents']
393 393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 394 if 'compengine' in opts:
395 395 self._compengine = opts['compengine']
396 396 if 'maxdeltachainspan' in opts:
397 397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 399 mmapindexthreshold = opts['mmapindexthreshold']
400 400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 401 withsparseread = bool(opts.get('with-sparse-read', False))
402 402 # sparse-revlog forces sparse-read
403 403 self._withsparseread = self._sparserevlog or withsparseread
404 404 if 'sparse-read-density-threshold' in opts:
405 405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 406 if 'sparse-read-min-gap-size' in opts:
407 407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 408 if opts.get('enableellipsis'):
409 409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410 410
411 411 if self._chunkcachesize <= 0:
412 412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 413 'greater than 0') % self._chunkcachesize)
414 414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 416 'power of 2') % self._chunkcachesize)
417 417
418 self._loadindex(v, mmapindexthreshold)
419
420 def _loadindex(self, v, mmapindexthreshold):
418 421 indexdata = ''
419 422 self._initempty = True
420 423 try:
421 424 with self._indexfp() as f:
422 425 if (mmapindexthreshold is not None and
423 426 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 427 indexdata = util.buffer(util.mmapread(f))
425 428 else:
426 429 indexdata = f.read()
427 430 if len(indexdata) > 0:
428 431 v = versionformat_unpack(indexdata[:4])[0]
429 432 self._initempty = False
430 433 except IOError as inst:
431 434 if inst.errno != errno.ENOENT:
432 435 raise
433 436
434 437 self.version = v
435 438 self._inline = v & FLAG_INLINE_DATA
436 439 self._generaldelta = v & FLAG_GENERALDELTA
437 440 flags = v & ~0xFFFF
438 441 fmt = v & 0xFFFF
439 442 if fmt == REVLOGV0:
440 443 if flags:
441 444 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 445 'revlog %s') %
443 446 (flags >> 16, fmt, self.indexfile))
444 447 elif fmt == REVLOGV1:
445 448 if flags & ~REVLOGV1_FLAGS:
446 449 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
447 450 'revlog %s') %
448 451 (flags >> 16, fmt, self.indexfile))
449 452 elif fmt == REVLOGV2:
450 453 if flags & ~REVLOGV2_FLAGS:
451 454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 455 'revlog %s') %
453 456 (flags >> 16, fmt, self.indexfile))
454 457 else:
455 458 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
456 459 (fmt, self.indexfile))
457 460
458 461 self._storedeltachains = True
459 462
460 463 self._io = revlogio()
461 464 if self.version == REVLOGV0:
462 465 self._io = revlogoldio()
463 466 try:
464 467 d = self._io.parseindex(indexdata, self._inline)
465 468 except (ValueError, IndexError):
466 469 raise error.RevlogError(_("index %s is corrupted") %
467 470 self.indexfile)
468 471 self.index, nodemap, self._chunkcache = d
469 472 if nodemap is not None:
470 473 self.nodemap = self._nodecache = nodemap
471 474 if not self._chunkcache:
472 475 self._chunkclear()
473 476 # revnum -> (chain-length, sum-delta-length)
474 477 self._chaininfocache = {}
475 478 # revlog header -> revlog compressor
476 479 self._decompressors = {}
477 480
478 481 @util.propertycache
479 482 def _compressor(self):
480 483 return util.compengines[self._compengine].revlogcompressor()
481 484
482 485 def _indexfp(self, mode='r'):
483 486 """file object for the revlog's index file"""
484 487 args = {r'mode': mode}
485 488 if mode != 'r':
486 489 args[r'checkambig'] = self._checkambig
487 490 if mode == 'w':
488 491 args[r'atomictemp'] = True
489 492 return self.opener(self.indexfile, **args)
490 493
491 494 def _datafp(self, mode='r'):
492 495 """file object for the revlog's data file"""
493 496 return self.opener(self.datafile, mode=mode)
494 497
495 498 @contextlib.contextmanager
496 499 def _datareadfp(self, existingfp=None):
497 500 """file object suitable to read data"""
498 501 if existingfp is not None:
499 502 yield existingfp
500 503 else:
501 504 if self._inline:
502 505 func = self._indexfp
503 506 else:
504 507 func = self._datafp
505 508 with func() as fp:
506 509 yield fp
507 510
508 511 def tip(self):
509 512 return self.node(len(self.index) - 1)
510 513 def __contains__(self, rev):
511 514 return 0 <= rev < len(self)
512 515 def __len__(self):
513 516 return len(self.index)
514 517 def __iter__(self):
515 518 return iter(pycompat.xrange(len(self)))
516 519 def revs(self, start=0, stop=None):
517 520 """iterate over all rev in this revlog (from start to stop)"""
518 521 return storageutil.iterrevs(len(self), start=start, stop=stop)
519 522
520 523 @util.propertycache
521 524 def nodemap(self):
522 525 if self.index:
523 526 # populate mapping down to the initial node
524 527 node0 = self.index[0][7] # get around changelog filtering
525 528 self.rev(node0)
526 529 return self._nodecache
527 530
528 531 def hasnode(self, node):
529 532 try:
530 533 self.rev(node)
531 534 return True
532 535 except KeyError:
533 536 return False
534 537
535 538 def candelta(self, baserev, rev):
536 539 """whether two revisions (baserev, rev) can be delta-ed or not"""
537 540 # Disable delta if either rev requires a content-changing flag
538 541 # processor (ex. LFS). This is because such flag processor can alter
539 542 # the rawtext content that the delta will be based on, and two clients
540 543 # could have a same revlog node with different flags (i.e. different
541 544 # rawtext contents) and the delta could be incompatible.
542 545 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
543 546 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
544 547 return False
545 548 return True
546 549
547 550 def clearcaches(self):
548 551 self._revisioncache = None
549 552 self._chainbasecache.clear()
550 553 self._chunkcache = (0, '')
551 554 self._pcache = {}
552 555
553 556 try:
554 557 self._nodecache.clearcaches()
555 558 except AttributeError:
556 559 self._nodecache = {nullid: nullrev}
557 560 self._nodepos = None
558 561
559 562 def rev(self, node):
560 563 try:
561 564 return self._nodecache[node]
562 565 except TypeError:
563 566 raise
564 567 except error.RevlogError:
565 568 # parsers.c radix tree lookup failed
566 569 if node == wdirid or node in wdirfilenodeids:
567 570 raise error.WdirUnsupported
568 571 raise error.LookupError(node, self.indexfile, _('no node'))
569 572 except KeyError:
570 573 # pure python cache lookup failed
571 574 n = self._nodecache
572 575 i = self.index
573 576 p = self._nodepos
574 577 if p is None:
575 578 p = len(i) - 1
576 579 else:
577 580 assert p < len(i)
578 581 for r in pycompat.xrange(p, -1, -1):
579 582 v = i[r][7]
580 583 n[v] = r
581 584 if v == node:
582 585 self._nodepos = r - 1
583 586 return r
584 587 if node == wdirid or node in wdirfilenodeids:
585 588 raise error.WdirUnsupported
586 589 raise error.LookupError(node, self.indexfile, _('no node'))
587 590
588 591 # Accessors for index entries.
589 592
590 593 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
591 594 # are flags.
592 595 def start(self, rev):
593 596 return int(self.index[rev][0] >> 16)
594 597
595 598 def flags(self, rev):
596 599 return self.index[rev][0] & 0xFFFF
597 600
598 601 def length(self, rev):
599 602 return self.index[rev][1]
600 603
601 604 def rawsize(self, rev):
602 605 """return the length of the uncompressed text for a given revision"""
603 606 l = self.index[rev][2]
604 607 if l >= 0:
605 608 return l
606 609
607 610 t = self.revision(rev, raw=True)
608 611 return len(t)
609 612
610 613 def size(self, rev):
611 614 """length of non-raw text (processed by a "read" flag processor)"""
612 615 # fast path: if no "read" flag processor could change the content,
613 616 # size is rawsize. note: ELLIPSIS is known to not change the content.
614 617 flags = self.flags(rev)
615 618 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
616 619 return self.rawsize(rev)
617 620
618 621 return len(self.revision(rev, raw=False))
619 622
620 623 def chainbase(self, rev):
621 624 base = self._chainbasecache.get(rev)
622 625 if base is not None:
623 626 return base
624 627
625 628 index = self.index
626 629 iterrev = rev
627 630 base = index[iterrev][3]
628 631 while base != iterrev:
629 632 iterrev = base
630 633 base = index[iterrev][3]
631 634
632 635 self._chainbasecache[rev] = base
633 636 return base
634 637
635 638 def linkrev(self, rev):
636 639 return self.index[rev][4]
637 640
638 641 def parentrevs(self, rev):
639 642 try:
640 643 entry = self.index[rev]
641 644 except IndexError:
642 645 if rev == wdirrev:
643 646 raise error.WdirUnsupported
644 647 raise
645 648
646 649 return entry[5], entry[6]
647 650
648 651 def node(self, rev):
649 652 try:
650 653 return self.index[rev][7]
651 654 except IndexError:
652 655 if rev == wdirrev:
653 656 raise error.WdirUnsupported
654 657 raise
655 658
656 659 # Derived from index values.
657 660
658 661 def end(self, rev):
659 662 return self.start(rev) + self.length(rev)
660 663
661 664 def parents(self, node):
662 665 i = self.index
663 666 d = i[self.rev(node)]
664 667 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
665 668
666 669 def chainlen(self, rev):
667 670 return self._chaininfo(rev)[0]
668 671
669 672 def _chaininfo(self, rev):
670 673 chaininfocache = self._chaininfocache
671 674 if rev in chaininfocache:
672 675 return chaininfocache[rev]
673 676 index = self.index
674 677 generaldelta = self._generaldelta
675 678 iterrev = rev
676 679 e = index[iterrev]
677 680 clen = 0
678 681 compresseddeltalen = 0
679 682 while iterrev != e[3]:
680 683 clen += 1
681 684 compresseddeltalen += e[1]
682 685 if generaldelta:
683 686 iterrev = e[3]
684 687 else:
685 688 iterrev -= 1
686 689 if iterrev in chaininfocache:
687 690 t = chaininfocache[iterrev]
688 691 clen += t[0]
689 692 compresseddeltalen += t[1]
690 693 break
691 694 e = index[iterrev]
692 695 else:
693 696 # Add text length of base since decompressing that also takes
694 697 # work. For cache hits the length is already included.
695 698 compresseddeltalen += e[1]
696 699 r = (clen, compresseddeltalen)
697 700 chaininfocache[rev] = r
698 701 return r
699 702
700 703 def _deltachain(self, rev, stoprev=None):
701 704 """Obtain the delta chain for a revision.
702 705
703 706 ``stoprev`` specifies a revision to stop at. If not specified, we
704 707 stop at the base of the chain.
705 708
706 709 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
707 710 revs in ascending order and ``stopped`` is a bool indicating whether
708 711 ``stoprev`` was hit.
709 712 """
710 713 # Try C implementation.
711 714 try:
712 715 return self.index.deltachain(rev, stoprev, self._generaldelta)
713 716 except AttributeError:
714 717 pass
715 718
716 719 chain = []
717 720
718 721 # Alias to prevent attribute lookup in tight loop.
719 722 index = self.index
720 723 generaldelta = self._generaldelta
721 724
722 725 iterrev = rev
723 726 e = index[iterrev]
724 727 while iterrev != e[3] and iterrev != stoprev:
725 728 chain.append(iterrev)
726 729 if generaldelta:
727 730 iterrev = e[3]
728 731 else:
729 732 iterrev -= 1
730 733 e = index[iterrev]
731 734
732 735 if iterrev == stoprev:
733 736 stopped = True
734 737 else:
735 738 chain.append(iterrev)
736 739 stopped = False
737 740
738 741 chain.reverse()
739 742 return chain, stopped
740 743
741 744 def ancestors(self, revs, stoprev=0, inclusive=False):
742 745 """Generate the ancestors of 'revs' in reverse topological order.
743 746 Does not generate revs lower than stoprev.
744 747
745 748 See the documentation for ancestor.lazyancestors for more details."""
746 749
747 750 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
748 751 inclusive=inclusive)
749 752
750 753 def descendants(self, revs):
751 754 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
752 755
753 756 def findcommonmissing(self, common=None, heads=None):
754 757 """Return a tuple of the ancestors of common and the ancestors of heads
755 758 that are not ancestors of common. In revset terminology, we return the
756 759 tuple:
757 760
758 761 ::common, (::heads) - (::common)
759 762
760 763 The list is sorted by revision number, meaning it is
761 764 topologically sorted.
762 765
763 766 'heads' and 'common' are both lists of node IDs. If heads is
764 767 not supplied, uses all of the revlog's heads. If common is not
765 768 supplied, uses nullid."""
766 769 if common is None:
767 770 common = [nullid]
768 771 if heads is None:
769 772 heads = self.heads()
770 773
771 774 common = [self.rev(n) for n in common]
772 775 heads = [self.rev(n) for n in heads]
773 776
774 777 # we want the ancestors, but inclusive
775 778 class lazyset(object):
776 779 def __init__(self, lazyvalues):
777 780 self.addedvalues = set()
778 781 self.lazyvalues = lazyvalues
779 782
780 783 def __contains__(self, value):
781 784 return value in self.addedvalues or value in self.lazyvalues
782 785
783 786 def __iter__(self):
784 787 added = self.addedvalues
785 788 for r in added:
786 789 yield r
787 790 for r in self.lazyvalues:
788 791 if not r in added:
789 792 yield r
790 793
791 794 def add(self, value):
792 795 self.addedvalues.add(value)
793 796
794 797 def update(self, values):
795 798 self.addedvalues.update(values)
796 799
797 800 has = lazyset(self.ancestors(common))
798 801 has.add(nullrev)
799 802 has.update(common)
800 803
801 804 # take all ancestors from heads that aren't in has
802 805 missing = set()
803 806 visit = collections.deque(r for r in heads if r not in has)
804 807 while visit:
805 808 r = visit.popleft()
806 809 if r in missing:
807 810 continue
808 811 else:
809 812 missing.add(r)
810 813 for p in self.parentrevs(r):
811 814 if p not in has:
812 815 visit.append(p)
813 816 missing = list(missing)
814 817 missing.sort()
815 818 return has, [self.node(miss) for miss in missing]
816 819
817 820 def incrementalmissingrevs(self, common=None):
818 821 """Return an object that can be used to incrementally compute the
819 822 revision numbers of the ancestors of arbitrary sets that are not
820 823 ancestors of common. This is an ancestor.incrementalmissingancestors
821 824 object.
822 825
823 826 'common' is a list of revision numbers. If common is not supplied, uses
824 827 nullrev.
825 828 """
826 829 if common is None:
827 830 common = [nullrev]
828 831
829 832 return ancestor.incrementalmissingancestors(self.parentrevs, common)
830 833
831 834 def findmissingrevs(self, common=None, heads=None):
832 835 """Return the revision numbers of the ancestors of heads that
833 836 are not ancestors of common.
834 837
835 838 More specifically, return a list of revision numbers corresponding to
836 839 nodes N such that every N satisfies the following constraints:
837 840
838 841 1. N is an ancestor of some node in 'heads'
839 842 2. N is not an ancestor of any node in 'common'
840 843
841 844 The list is sorted by revision number, meaning it is
842 845 topologically sorted.
843 846
844 847 'heads' and 'common' are both lists of revision numbers. If heads is
845 848 not supplied, uses all of the revlog's heads. If common is not
846 849 supplied, uses nullid."""
847 850 if common is None:
848 851 common = [nullrev]
849 852 if heads is None:
850 853 heads = self.headrevs()
851 854
852 855 inc = self.incrementalmissingrevs(common=common)
853 856 return inc.missingancestors(heads)
854 857
855 858 def findmissing(self, common=None, heads=None):
856 859 """Return the ancestors of heads that are not ancestors of common.
857 860
858 861 More specifically, return a list of nodes N such that every N
859 862 satisfies the following constraints:
860 863
861 864 1. N is an ancestor of some node in 'heads'
862 865 2. N is not an ancestor of any node in 'common'
863 866
864 867 The list is sorted by revision number, meaning it is
865 868 topologically sorted.
866 869
867 870 'heads' and 'common' are both lists of node IDs. If heads is
868 871 not supplied, uses all of the revlog's heads. If common is not
869 872 supplied, uses nullid."""
870 873 if common is None:
871 874 common = [nullid]
872 875 if heads is None:
873 876 heads = self.heads()
874 877
875 878 common = [self.rev(n) for n in common]
876 879 heads = [self.rev(n) for n in heads]
877 880
878 881 inc = self.incrementalmissingrevs(common=common)
879 882 return [self.node(r) for r in inc.missingancestors(heads)]
880 883
881 884 def nodesbetween(self, roots=None, heads=None):
882 885 """Return a topological path from 'roots' to 'heads'.
883 886
884 887 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
885 888 topologically sorted list of all nodes N that satisfy both of
886 889 these constraints:
887 890
888 891 1. N is a descendant of some node in 'roots'
889 892 2. N is an ancestor of some node in 'heads'
890 893
891 894 Every node is considered to be both a descendant and an ancestor
892 895 of itself, so every reachable node in 'roots' and 'heads' will be
893 896 included in 'nodes'.
894 897
895 898 'outroots' is the list of reachable nodes in 'roots', i.e., the
896 899 subset of 'roots' that is returned in 'nodes'. Likewise,
897 900 'outheads' is the subset of 'heads' that is also in 'nodes'.
898 901
899 902 'roots' and 'heads' are both lists of node IDs. If 'roots' is
900 903 unspecified, uses nullid as the only root. If 'heads' is
901 904 unspecified, uses list of all of the revlog's heads."""
902 905 nonodes = ([], [], [])
903 906 if roots is not None:
904 907 roots = list(roots)
905 908 if not roots:
906 909 return nonodes
907 910 lowestrev = min([self.rev(n) for n in roots])
908 911 else:
909 912 roots = [nullid] # Everybody's a descendant of nullid
910 913 lowestrev = nullrev
911 914 if (lowestrev == nullrev) and (heads is None):
912 915 # We want _all_ the nodes!
913 916 return ([self.node(r) for r in self], [nullid], list(self.heads()))
914 917 if heads is None:
915 918 # All nodes are ancestors, so the latest ancestor is the last
916 919 # node.
917 920 highestrev = len(self) - 1
918 921 # Set ancestors to None to signal that every node is an ancestor.
919 922 ancestors = None
920 923 # Set heads to an empty dictionary for later discovery of heads
921 924 heads = {}
922 925 else:
923 926 heads = list(heads)
924 927 if not heads:
925 928 return nonodes
926 929 ancestors = set()
927 930 # Turn heads into a dictionary so we can remove 'fake' heads.
928 931 # Also, later we will be using it to filter out the heads we can't
929 932 # find from roots.
930 933 heads = dict.fromkeys(heads, False)
931 934 # Start at the top and keep marking parents until we're done.
932 935 nodestotag = set(heads)
933 936 # Remember where the top was so we can use it as a limit later.
934 937 highestrev = max([self.rev(n) for n in nodestotag])
935 938 while nodestotag:
936 939 # grab a node to tag
937 940 n = nodestotag.pop()
938 941 # Never tag nullid
939 942 if n == nullid:
940 943 continue
941 944 # A node's revision number represents its place in a
942 945 # topologically sorted list of nodes.
943 946 r = self.rev(n)
944 947 if r >= lowestrev:
945 948 if n not in ancestors:
946 949 # If we are possibly a descendant of one of the roots
947 950 # and we haven't already been marked as an ancestor
948 951 ancestors.add(n) # Mark as ancestor
949 952 # Add non-nullid parents to list of nodes to tag.
950 953 nodestotag.update([p for p in self.parents(n) if
951 954 p != nullid])
952 955 elif n in heads: # We've seen it before, is it a fake head?
953 956 # So it is, real heads should not be the ancestors of
954 957 # any other heads.
955 958 heads.pop(n)
956 959 if not ancestors:
957 960 return nonodes
958 961 # Now that we have our set of ancestors, we want to remove any
959 962 # roots that are not ancestors.
960 963
961 964 # If one of the roots was nullid, everything is included anyway.
962 965 if lowestrev > nullrev:
963 966 # But, since we weren't, let's recompute the lowest rev to not
964 967 # include roots that aren't ancestors.
965 968
966 969 # Filter out roots that aren't ancestors of heads
967 970 roots = [root for root in roots if root in ancestors]
968 971 # Recompute the lowest revision
969 972 if roots:
970 973 lowestrev = min([self.rev(root) for root in roots])
971 974 else:
972 975 # No more roots? Return empty list
973 976 return nonodes
974 977 else:
975 978 # We are descending from nullid, and don't need to care about
976 979 # any other roots.
977 980 lowestrev = nullrev
978 981 roots = [nullid]
979 982 # Transform our roots list into a set.
980 983 descendants = set(roots)
981 984 # Also, keep the original roots so we can filter out roots that aren't
982 985 # 'real' roots (i.e. are descended from other roots).
983 986 roots = descendants.copy()
984 987 # Our topologically sorted list of output nodes.
985 988 orderedout = []
986 989 # Don't start at nullid since we don't want nullid in our output list,
987 990 # and if nullid shows up in descendants, empty parents will look like
988 991 # they're descendants.
989 992 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
990 993 n = self.node(r)
991 994 isdescendant = False
992 995 if lowestrev == nullrev: # Everybody is a descendant of nullid
993 996 isdescendant = True
994 997 elif n in descendants:
995 998 # n is already a descendant
996 999 isdescendant = True
997 1000 # This check only needs to be done here because all the roots
998 1001 # will start being marked is descendants before the loop.
999 1002 if n in roots:
1000 1003 # If n was a root, check if it's a 'real' root.
1001 1004 p = tuple(self.parents(n))
1002 1005 # If any of its parents are descendants, it's not a root.
1003 1006 if (p[0] in descendants) or (p[1] in descendants):
1004 1007 roots.remove(n)
1005 1008 else:
1006 1009 p = tuple(self.parents(n))
1007 1010 # A node is a descendant if either of its parents are
1008 1011 # descendants. (We seeded the dependents list with the roots
1009 1012 # up there, remember?)
1010 1013 if (p[0] in descendants) or (p[1] in descendants):
1011 1014 descendants.add(n)
1012 1015 isdescendant = True
1013 1016 if isdescendant and ((ancestors is None) or (n in ancestors)):
1014 1017 # Only include nodes that are both descendants and ancestors.
1015 1018 orderedout.append(n)
1016 1019 if (ancestors is not None) and (n in heads):
1017 1020 # We're trying to figure out which heads are reachable
1018 1021 # from roots.
1019 1022 # Mark this head as having been reached
1020 1023 heads[n] = True
1021 1024 elif ancestors is None:
1022 1025 # Otherwise, we're trying to discover the heads.
1023 1026 # Assume this is a head because if it isn't, the next step
1024 1027 # will eventually remove it.
1025 1028 heads[n] = True
1026 1029 # But, obviously its parents aren't.
1027 1030 for p in self.parents(n):
1028 1031 heads.pop(p, None)
1029 1032 heads = [head for head, flag in heads.iteritems() if flag]
1030 1033 roots = list(roots)
1031 1034 assert orderedout
1032 1035 assert roots
1033 1036 assert heads
1034 1037 return (orderedout, roots, heads)
1035 1038
1036 1039 def headrevs(self):
1037 1040 try:
1038 1041 return self.index.headrevs()
1039 1042 except AttributeError:
1040 1043 return self._headrevs()
1041 1044
1042 1045 def computephases(self, roots):
1043 1046 return self.index.computephasesmapsets(roots)
1044 1047
1045 1048 def _headrevs(self):
1046 1049 count = len(self)
1047 1050 if not count:
1048 1051 return [nullrev]
1049 1052 # we won't iter over filtered rev so nobody is a head at start
1050 1053 ishead = [0] * (count + 1)
1051 1054 index = self.index
1052 1055 for r in self:
1053 1056 ishead[r] = 1 # I may be an head
1054 1057 e = index[r]
1055 1058 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1056 1059 return [r for r, val in enumerate(ishead) if val]
1057 1060
1058 1061 def heads(self, start=None, stop=None):
1059 1062 """return the list of all nodes that have no children
1060 1063
1061 1064 if start is specified, only heads that are descendants of
1062 1065 start will be returned
1063 1066 if stop is specified, it will consider all the revs from stop
1064 1067 as if they had no children
1065 1068 """
1066 1069 if start is None and stop is None:
1067 1070 if not len(self):
1068 1071 return [nullid]
1069 1072 return [self.node(r) for r in self.headrevs()]
1070 1073
1071 1074 if start is None:
1072 1075 start = nullrev
1073 1076 else:
1074 1077 start = self.rev(start)
1075 1078
1076 1079 stoprevs = set(self.rev(n) for n in stop or [])
1077 1080
1078 1081 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1079 1082 stoprevs=stoprevs)
1080 1083
1081 1084 return [self.node(rev) for rev in revs]
1082 1085
1083 1086 def children(self, node):
1084 1087 """find the children of a given node"""
1085 1088 c = []
1086 1089 p = self.rev(node)
1087 1090 for r in self.revs(start=p + 1):
1088 1091 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1089 1092 if prevs:
1090 1093 for pr in prevs:
1091 1094 if pr == p:
1092 1095 c.append(self.node(r))
1093 1096 elif p == nullrev:
1094 1097 c.append(self.node(r))
1095 1098 return c
1096 1099
1097 1100 def commonancestorsheads(self, a, b):
1098 1101 """calculate all the heads of the common ancestors of nodes a and b"""
1099 1102 a, b = self.rev(a), self.rev(b)
1100 1103 ancs = self._commonancestorsheads(a, b)
1101 1104 return pycompat.maplist(self.node, ancs)
1102 1105
1103 1106 def _commonancestorsheads(self, *revs):
1104 1107 """calculate all the heads of the common ancestors of revs"""
1105 1108 try:
1106 1109 ancs = self.index.commonancestorsheads(*revs)
1107 1110 except (AttributeError, OverflowError): # C implementation failed
1108 1111 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1109 1112 return ancs
1110 1113
1111 1114 def isancestor(self, a, b):
1112 1115 """return True if node a is an ancestor of node b
1113 1116
1114 1117 A revision is considered an ancestor of itself."""
1115 1118 a, b = self.rev(a), self.rev(b)
1116 1119 return self.isancestorrev(a, b)
1117 1120
1118 1121 def isancestorrev(self, a, b):
1119 1122 """return True if revision a is an ancestor of revision b
1120 1123
1121 1124 A revision is considered an ancestor of itself.
1122 1125
1123 1126 The implementation of this is trivial but the use of
1124 1127 commonancestorsheads is not."""
1125 1128 if a == nullrev:
1126 1129 return True
1127 1130 elif a == b:
1128 1131 return True
1129 1132 elif a > b:
1130 1133 return False
1131 1134 return a in self._commonancestorsheads(a, b)
1132 1135
1133 1136 def ancestor(self, a, b):
1134 1137 """calculate the "best" common ancestor of nodes a and b"""
1135 1138
1136 1139 a, b = self.rev(a), self.rev(b)
1137 1140 try:
1138 1141 ancs = self.index.ancestors(a, b)
1139 1142 except (AttributeError, OverflowError):
1140 1143 ancs = ancestor.ancestors(self.parentrevs, a, b)
1141 1144 if ancs:
1142 1145 # choose a consistent winner when there's a tie
1143 1146 return min(map(self.node, ancs))
1144 1147 return nullid
1145 1148
1146 1149 def _match(self, id):
1147 1150 if isinstance(id, int):
1148 1151 # rev
1149 1152 return self.node(id)
1150 1153 if len(id) == 20:
1151 1154 # possibly a binary node
1152 1155 # odds of a binary node being all hex in ASCII are 1 in 10**25
1153 1156 try:
1154 1157 node = id
1155 1158 self.rev(node) # quick search the index
1156 1159 return node
1157 1160 except error.LookupError:
1158 1161 pass # may be partial hex id
1159 1162 try:
1160 1163 # str(rev)
1161 1164 rev = int(id)
1162 1165 if "%d" % rev != id:
1163 1166 raise ValueError
1164 1167 if rev < 0:
1165 1168 rev = len(self) + rev
1166 1169 if rev < 0 or rev >= len(self):
1167 1170 raise ValueError
1168 1171 return self.node(rev)
1169 1172 except (ValueError, OverflowError):
1170 1173 pass
1171 1174 if len(id) == 40:
1172 1175 try:
1173 1176 # a full hex nodeid?
1174 1177 node = bin(id)
1175 1178 self.rev(node)
1176 1179 return node
1177 1180 except (TypeError, error.LookupError):
1178 1181 pass
1179 1182
1180 1183 def _partialmatch(self, id):
1181 1184 # we don't care wdirfilenodeids as they should be always full hash
1182 1185 maybewdir = wdirhex.startswith(id)
1183 1186 try:
1184 1187 partial = self.index.partialmatch(id)
1185 1188 if partial and self.hasnode(partial):
1186 1189 if maybewdir:
1187 1190 # single 'ff...' match in radix tree, ambiguous with wdir
1188 1191 raise error.RevlogError
1189 1192 return partial
1190 1193 if maybewdir:
1191 1194 # no 'ff...' match in radix tree, wdir identified
1192 1195 raise error.WdirUnsupported
1193 1196 return None
1194 1197 except error.RevlogError:
1195 1198 # parsers.c radix tree lookup gave multiple matches
1196 1199 # fast path: for unfiltered changelog, radix tree is accurate
1197 1200 if not getattr(self, 'filteredrevs', None):
1198 1201 raise error.AmbiguousPrefixLookupError(
1199 1202 id, self.indexfile, _('ambiguous identifier'))
1200 1203 # fall through to slow path that filters hidden revisions
1201 1204 except (AttributeError, ValueError):
1202 1205 # we are pure python, or key was too short to search radix tree
1203 1206 pass
1204 1207
1205 1208 if id in self._pcache:
1206 1209 return self._pcache[id]
1207 1210
1208 1211 if len(id) <= 40:
1209 1212 try:
1210 1213 # hex(node)[:...]
1211 1214 l = len(id) // 2 # grab an even number of digits
1212 1215 prefix = bin(id[:l * 2])
1213 1216 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1214 1217 nl = [n for n in nl if hex(n).startswith(id) and
1215 1218 self.hasnode(n)]
1216 1219 if nullhex.startswith(id):
1217 1220 nl.append(nullid)
1218 1221 if len(nl) > 0:
1219 1222 if len(nl) == 1 and not maybewdir:
1220 1223 self._pcache[id] = nl[0]
1221 1224 return nl[0]
1222 1225 raise error.AmbiguousPrefixLookupError(
1223 1226 id, self.indexfile, _('ambiguous identifier'))
1224 1227 if maybewdir:
1225 1228 raise error.WdirUnsupported
1226 1229 return None
1227 1230 except TypeError:
1228 1231 pass
1229 1232
1230 1233 def lookup(self, id):
1231 1234 """locate a node based on:
1232 1235 - revision number or str(revision number)
1233 1236 - nodeid or subset of hex nodeid
1234 1237 """
1235 1238 n = self._match(id)
1236 1239 if n is not None:
1237 1240 return n
1238 1241 n = self._partialmatch(id)
1239 1242 if n:
1240 1243 return n
1241 1244
1242 1245 raise error.LookupError(id, self.indexfile, _('no match found'))
1243 1246
1244 1247 def shortest(self, node, minlength=1):
1245 1248 """Find the shortest unambiguous prefix that matches node."""
1246 1249 def isvalid(prefix):
1247 1250 try:
1248 1251 node = self._partialmatch(prefix)
1249 1252 except error.AmbiguousPrefixLookupError:
1250 1253 return False
1251 1254 except error.WdirUnsupported:
1252 1255 # single 'ff...' match
1253 1256 return True
1254 1257 if node is None:
1255 1258 raise error.LookupError(node, self.indexfile, _('no node'))
1256 1259 return True
1257 1260
1258 1261 def maybewdir(prefix):
1259 1262 return all(c == 'f' for c in prefix)
1260 1263
1261 1264 hexnode = hex(node)
1262 1265
1263 1266 def disambiguate(hexnode, minlength):
1264 1267 """Disambiguate against wdirid."""
1265 1268 for length in range(minlength, 41):
1266 1269 prefix = hexnode[:length]
1267 1270 if not maybewdir(prefix):
1268 1271 return prefix
1269 1272
1270 1273 if not getattr(self, 'filteredrevs', None):
1271 1274 try:
1272 1275 length = max(self.index.shortest(node), minlength)
1273 1276 return disambiguate(hexnode, length)
1274 1277 except error.RevlogError:
1275 1278 if node != wdirid:
1276 1279 raise error.LookupError(node, self.indexfile, _('no node'))
1277 1280 except AttributeError:
1278 1281 # Fall through to pure code
1279 1282 pass
1280 1283
1281 1284 if node == wdirid:
1282 1285 for length in range(minlength, 41):
1283 1286 prefix = hexnode[:length]
1284 1287 if isvalid(prefix):
1285 1288 return prefix
1286 1289
1287 1290 for length in range(minlength, 41):
1288 1291 prefix = hexnode[:length]
1289 1292 if isvalid(prefix):
1290 1293 return disambiguate(hexnode, length)
1291 1294
1292 1295 def cmp(self, node, text):
1293 1296 """compare text with a given file revision
1294 1297
1295 1298 returns True if text is different than what is stored.
1296 1299 """
1297 1300 p1, p2 = self.parents(node)
1298 1301 return storageutil.hashrevisionsha1(text, p1, p2) != node
1299 1302
1300 1303 def _cachesegment(self, offset, data):
1301 1304 """Add a segment to the revlog cache.
1302 1305
1303 1306 Accepts an absolute offset and the data that is at that location.
1304 1307 """
1305 1308 o, d = self._chunkcache
1306 1309 # try to add to existing cache
1307 1310 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1308 1311 self._chunkcache = o, d + data
1309 1312 else:
1310 1313 self._chunkcache = offset, data
1311 1314
1312 1315 def _readsegment(self, offset, length, df=None):
1313 1316 """Load a segment of raw data from the revlog.
1314 1317
1315 1318 Accepts an absolute offset, length to read, and an optional existing
1316 1319 file handle to read from.
1317 1320
1318 1321 If an existing file handle is passed, it will be seeked and the
1319 1322 original seek position will NOT be restored.
1320 1323
1321 1324 Returns a str or buffer of raw byte data.
1322 1325 """
1323 1326 # Cache data both forward and backward around the requested
1324 1327 # data, in a fixed size window. This helps speed up operations
1325 1328 # involving reading the revlog backwards.
1326 1329 cachesize = self._chunkcachesize
1327 1330 realoffset = offset & ~(cachesize - 1)
1328 1331 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1329 1332 - realoffset)
1330 1333 with self._datareadfp(df) as df:
1331 1334 df.seek(realoffset)
1332 1335 d = df.read(reallength)
1333 1336 self._cachesegment(realoffset, d)
1334 1337 if offset != realoffset or reallength != length:
1335 1338 return util.buffer(d, offset - realoffset, length)
1336 1339 return d
1337 1340
1338 1341 def _getsegment(self, offset, length, df=None):
1339 1342 """Obtain a segment of raw data from the revlog.
1340 1343
1341 1344 Accepts an absolute offset, length of bytes to obtain, and an
1342 1345 optional file handle to the already-opened revlog. If the file
1343 1346 handle is used, it's original seek position will not be preserved.
1344 1347
1345 1348 Requests for data may be returned from a cache.
1346 1349
1347 1350 Returns a str or a buffer instance of raw byte data.
1348 1351 """
1349 1352 o, d = self._chunkcache
1350 1353 l = len(d)
1351 1354
1352 1355 # is it in the cache?
1353 1356 cachestart = offset - o
1354 1357 cacheend = cachestart + length
1355 1358 if cachestart >= 0 and cacheend <= l:
1356 1359 if cachestart == 0 and cacheend == l:
1357 1360 return d # avoid a copy
1358 1361 return util.buffer(d, cachestart, cacheend - cachestart)
1359 1362
1360 1363 return self._readsegment(offset, length, df=df)
1361 1364
1362 1365 def _getsegmentforrevs(self, startrev, endrev, df=None):
1363 1366 """Obtain a segment of raw data corresponding to a range of revisions.
1364 1367
1365 1368 Accepts the start and end revisions and an optional already-open
1366 1369 file handle to be used for reading. If the file handle is read, its
1367 1370 seek position will not be preserved.
1368 1371
1369 1372 Requests for data may be satisfied by a cache.
1370 1373
1371 1374 Returns a 2-tuple of (offset, data) for the requested range of
1372 1375 revisions. Offset is the integer offset from the beginning of the
1373 1376 revlog and data is a str or buffer of the raw byte data.
1374 1377
1375 1378 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1376 1379 to determine where each revision's data begins and ends.
1377 1380 """
1378 1381 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1379 1382 # (functions are expensive).
1380 1383 index = self.index
1381 1384 istart = index[startrev]
1382 1385 start = int(istart[0] >> 16)
1383 1386 if startrev == endrev:
1384 1387 end = start + istart[1]
1385 1388 else:
1386 1389 iend = index[endrev]
1387 1390 end = int(iend[0] >> 16) + iend[1]
1388 1391
1389 1392 if self._inline:
1390 1393 start += (startrev + 1) * self._io.size
1391 1394 end += (endrev + 1) * self._io.size
1392 1395 length = end - start
1393 1396
1394 1397 return start, self._getsegment(start, length, df=df)
1395 1398
1396 1399 def _chunk(self, rev, df=None):
1397 1400 """Obtain a single decompressed chunk for a revision.
1398 1401
1399 1402 Accepts an integer revision and an optional already-open file handle
1400 1403 to be used for reading. If used, the seek position of the file will not
1401 1404 be preserved.
1402 1405
1403 1406 Returns a str holding uncompressed data for the requested revision.
1404 1407 """
1405 1408 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1406 1409
1407 1410 def _chunks(self, revs, df=None, targetsize=None):
1408 1411 """Obtain decompressed chunks for the specified revisions.
1409 1412
1410 1413 Accepts an iterable of numeric revisions that are assumed to be in
1411 1414 ascending order. Also accepts an optional already-open file handle
1412 1415 to be used for reading. If used, the seek position of the file will
1413 1416 not be preserved.
1414 1417
1415 1418 This function is similar to calling ``self._chunk()`` multiple times,
1416 1419 but is faster.
1417 1420
1418 1421 Returns a list with decompressed data for each requested revision.
1419 1422 """
1420 1423 if not revs:
1421 1424 return []
1422 1425 start = self.start
1423 1426 length = self.length
1424 1427 inline = self._inline
1425 1428 iosize = self._io.size
1426 1429 buffer = util.buffer
1427 1430
1428 1431 l = []
1429 1432 ladd = l.append
1430 1433
1431 1434 if not self._withsparseread:
1432 1435 slicedchunks = (revs,)
1433 1436 else:
1434 1437 slicedchunks = deltautil.slicechunk(self, revs,
1435 1438 targetsize=targetsize)
1436 1439
1437 1440 for revschunk in slicedchunks:
1438 1441 firstrev = revschunk[0]
1439 1442 # Skip trailing revisions with empty diff
1440 1443 for lastrev in revschunk[::-1]:
1441 1444 if length(lastrev) != 0:
1442 1445 break
1443 1446
1444 1447 try:
1445 1448 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1446 1449 except OverflowError:
1447 1450 # issue4215 - we can't cache a run of chunks greater than
1448 1451 # 2G on Windows
1449 1452 return [self._chunk(rev, df=df) for rev in revschunk]
1450 1453
1451 1454 decomp = self.decompress
1452 1455 for rev in revschunk:
1453 1456 chunkstart = start(rev)
1454 1457 if inline:
1455 1458 chunkstart += (rev + 1) * iosize
1456 1459 chunklength = length(rev)
1457 1460 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1458 1461
1459 1462 return l
1460 1463
1461 1464 def _chunkclear(self):
1462 1465 """Clear the raw chunk cache."""
1463 1466 self._chunkcache = (0, '')
1464 1467
1465 1468 def deltaparent(self, rev):
1466 1469 """return deltaparent of the given revision"""
1467 1470 base = self.index[rev][3]
1468 1471 if base == rev:
1469 1472 return nullrev
1470 1473 elif self._generaldelta:
1471 1474 return base
1472 1475 else:
1473 1476 return rev - 1
1474 1477
1475 1478 def issnapshot(self, rev):
1476 1479 """tells whether rev is a snapshot
1477 1480 """
1478 1481 if rev == nullrev:
1479 1482 return True
1480 1483 deltap = self.deltaparent(rev)
1481 1484 if deltap == nullrev:
1482 1485 return True
1483 1486 p1, p2 = self.parentrevs(rev)
1484 1487 if deltap in (p1, p2):
1485 1488 return False
1486 1489 return self.issnapshot(deltap)
1487 1490
1488 1491 def snapshotdepth(self, rev):
1489 1492 """number of snapshot in the chain before this one"""
1490 1493 if not self.issnapshot(rev):
1491 1494 raise error.ProgrammingError('revision %d not a snapshot')
1492 1495 return len(self._deltachain(rev)[0]) - 1
1493 1496
1494 1497 def revdiff(self, rev1, rev2):
1495 1498 """return or calculate a delta between two revisions
1496 1499
1497 1500 The delta calculated is in binary form and is intended to be written to
1498 1501 revlog data directly. So this function needs raw revision data.
1499 1502 """
1500 1503 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1501 1504 return bytes(self._chunk(rev2))
1502 1505
1503 1506 return mdiff.textdiff(self.revision(rev1, raw=True),
1504 1507 self.revision(rev2, raw=True))
1505 1508
1506 1509 def revision(self, nodeorrev, _df=None, raw=False):
1507 1510 """return an uncompressed revision of a given node or revision
1508 1511 number.
1509 1512
1510 1513 _df - an existing file handle to read from. (internal-only)
1511 1514 raw - an optional argument specifying if the revision data is to be
1512 1515 treated as raw data when applying flag transforms. 'raw' should be set
1513 1516 to True when generating changegroups or in debug commands.
1514 1517 """
1515 1518 if isinstance(nodeorrev, int):
1516 1519 rev = nodeorrev
1517 1520 node = self.node(rev)
1518 1521 else:
1519 1522 node = nodeorrev
1520 1523 rev = None
1521 1524
1522 1525 cachedrev = None
1523 1526 flags = None
1524 1527 rawtext = None
1525 1528 if node == nullid:
1526 1529 return ""
1527 1530 if self._revisioncache:
1528 1531 if self._revisioncache[0] == node:
1529 1532 # _cache only stores rawtext
1530 1533 if raw:
1531 1534 return self._revisioncache[2]
1532 1535 # duplicated, but good for perf
1533 1536 if rev is None:
1534 1537 rev = self.rev(node)
1535 1538 if flags is None:
1536 1539 flags = self.flags(rev)
1537 1540 # no extra flags set, no flag processor runs, text = rawtext
1538 1541 if flags == REVIDX_DEFAULT_FLAGS:
1539 1542 return self._revisioncache[2]
1540 1543 # rawtext is reusable. need to run flag processor
1541 1544 rawtext = self._revisioncache[2]
1542 1545
1543 1546 cachedrev = self._revisioncache[1]
1544 1547
1545 1548 # look up what we need to read
1546 1549 if rawtext is None:
1547 1550 if rev is None:
1548 1551 rev = self.rev(node)
1549 1552
1550 1553 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1551 1554 if stopped:
1552 1555 rawtext = self._revisioncache[2]
1553 1556
1554 1557 # drop cache to save memory
1555 1558 self._revisioncache = None
1556 1559
1557 1560 targetsize = None
1558 1561 rawsize = self.index[rev][2]
1559 1562 if 0 <= rawsize:
1560 1563 targetsize = 4 * rawsize
1561 1564
1562 1565 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1563 1566 if rawtext is None:
1564 1567 rawtext = bytes(bins[0])
1565 1568 bins = bins[1:]
1566 1569
1567 1570 rawtext = mdiff.patches(rawtext, bins)
1568 1571 self._revisioncache = (node, rev, rawtext)
1569 1572
1570 1573 if flags is None:
1571 1574 if rev is None:
1572 1575 rev = self.rev(node)
1573 1576 flags = self.flags(rev)
1574 1577
1575 1578 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1576 1579 if validatehash:
1577 1580 self.checkhash(text, node, rev=rev)
1578 1581
1579 1582 return text
1580 1583
1581 1584 def hash(self, text, p1, p2):
1582 1585 """Compute a node hash.
1583 1586
1584 1587 Available as a function so that subclasses can replace the hash
1585 1588 as needed.
1586 1589 """
1587 1590 return storageutil.hashrevisionsha1(text, p1, p2)
1588 1591
1589 1592 def _processflags(self, text, flags, operation, raw=False):
1590 1593 """Inspect revision data flags and applies transforms defined by
1591 1594 registered flag processors.
1592 1595
1593 1596 ``text`` - the revision data to process
1594 1597 ``flags`` - the revision flags
1595 1598 ``operation`` - the operation being performed (read or write)
1596 1599 ``raw`` - an optional argument describing if the raw transform should be
1597 1600 applied.
1598 1601
1599 1602 This method processes the flags in the order (or reverse order if
1600 1603 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1601 1604 flag processors registered for present flags. The order of flags defined
1602 1605 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1603 1606
1604 1607 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1605 1608 processed text and ``validatehash`` is a bool indicating whether the
1606 1609 returned text should be checked for hash integrity.
1607 1610
1608 1611 Note: If the ``raw`` argument is set, it has precedence over the
1609 1612 operation and will only update the value of ``validatehash``.
1610 1613 """
1611 1614 # fast path: no flag processors will run
1612 1615 if flags == 0:
1613 1616 return text, True
1614 1617 if not operation in ('read', 'write'):
1615 1618 raise error.ProgrammingError(_("invalid '%s' operation") %
1616 1619 operation)
1617 1620 # Check all flags are known.
1618 1621 if flags & ~REVIDX_KNOWN_FLAGS:
1619 1622 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1620 1623 (flags & ~REVIDX_KNOWN_FLAGS))
1621 1624 validatehash = True
1622 1625 # Depending on the operation (read or write), the order might be
1623 1626 # reversed due to non-commutative transforms.
1624 1627 orderedflags = REVIDX_FLAGS_ORDER
1625 1628 if operation == 'write':
1626 1629 orderedflags = reversed(orderedflags)
1627 1630
1628 1631 for flag in orderedflags:
1629 1632 # If a flagprocessor has been registered for a known flag, apply the
1630 1633 # related operation transform and update result tuple.
1631 1634 if flag & flags:
1632 1635 vhash = True
1633 1636
1634 1637 if flag not in self._flagprocessors:
1635 1638 message = _("missing processor for flag '%#x'") % (flag)
1636 1639 raise error.RevlogError(message)
1637 1640
1638 1641 processor = self._flagprocessors[flag]
1639 1642 if processor is not None:
1640 1643 readtransform, writetransform, rawtransform = processor
1641 1644
1642 1645 if raw:
1643 1646 vhash = rawtransform(self, text)
1644 1647 elif operation == 'read':
1645 1648 text, vhash = readtransform(self, text)
1646 1649 else: # write operation
1647 1650 text, vhash = writetransform(self, text)
1648 1651 validatehash = validatehash and vhash
1649 1652
1650 1653 return text, validatehash
1651 1654
1652 1655 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1653 1656 """Check node hash integrity.
1654 1657
1655 1658 Available as a function so that subclasses can extend hash mismatch
1656 1659 behaviors as needed.
1657 1660 """
1658 1661 try:
1659 1662 if p1 is None and p2 is None:
1660 1663 p1, p2 = self.parents(node)
1661 1664 if node != self.hash(text, p1, p2):
1662 1665 # Clear the revision cache on hash failure. The revision cache
1663 1666 # only stores the raw revision and clearing the cache does have
1664 1667 # the side-effect that we won't have a cache hit when the raw
1665 1668 # revision data is accessed. But this case should be rare and
1666 1669 # it is extra work to teach the cache about the hash
1667 1670 # verification state.
1668 1671 if self._revisioncache and self._revisioncache[0] == node:
1669 1672 self._revisioncache = None
1670 1673
1671 1674 revornode = rev
1672 1675 if revornode is None:
1673 1676 revornode = templatefilters.short(hex(node))
1674 1677 raise error.RevlogError(_("integrity check failed on %s:%s")
1675 1678 % (self.indexfile, pycompat.bytestr(revornode)))
1676 1679 except error.RevlogError:
1677 1680 if self._censorable and storageutil.iscensoredtext(text):
1678 1681 raise error.CensoredNodeError(self.indexfile, node, text)
1679 1682 raise
1680 1683
1681 1684 def _enforceinlinesize(self, tr, fp=None):
1682 1685 """Check if the revlog is too big for inline and convert if so.
1683 1686
1684 1687 This should be called after revisions are added to the revlog. If the
1685 1688 revlog has grown too large to be an inline revlog, it will convert it
1686 1689 to use multiple index and data files.
1687 1690 """
1688 1691 tiprev = len(self) - 1
1689 1692 if (not self._inline or
1690 1693 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1691 1694 return
1692 1695
1693 1696 trinfo = tr.find(self.indexfile)
1694 1697 if trinfo is None:
1695 1698 raise error.RevlogError(_("%s not found in the transaction")
1696 1699 % self.indexfile)
1697 1700
1698 1701 trindex = trinfo[2]
1699 1702 if trindex is not None:
1700 1703 dataoff = self.start(trindex)
1701 1704 else:
1702 1705 # revlog was stripped at start of transaction, use all leftover data
1703 1706 trindex = len(self) - 1
1704 1707 dataoff = self.end(tiprev)
1705 1708
1706 1709 tr.add(self.datafile, dataoff)
1707 1710
1708 1711 if fp:
1709 1712 fp.flush()
1710 1713 fp.close()
1711 1714
1712 1715 with self._datafp('w') as df:
1713 1716 for r in self:
1714 1717 df.write(self._getsegmentforrevs(r, r)[1])
1715 1718
1716 1719 with self._indexfp('w') as fp:
1717 1720 self.version &= ~FLAG_INLINE_DATA
1718 1721 self._inline = False
1719 1722 io = self._io
1720 1723 for i in self:
1721 1724 e = io.packentry(self.index[i], self.node, self.version, i)
1722 1725 fp.write(e)
1723 1726
1724 1727 # the temp file replace the real index when we exit the context
1725 1728 # manager
1726 1729
1727 1730 tr.replace(self.indexfile, trindex * self._io.size)
1728 1731 self._chunkclear()
1729 1732
1730 1733 def _nodeduplicatecallback(self, transaction, node):
1731 1734 """called when trying to add a node already stored.
1732 1735 """
1733 1736
1734 1737 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1735 1738 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1736 1739 """add a revision to the log
1737 1740
1738 1741 text - the revision data to add
1739 1742 transaction - the transaction object used for rollback
1740 1743 link - the linkrev data to add
1741 1744 p1, p2 - the parent nodeids of the revision
1742 1745 cachedelta - an optional precomputed delta
1743 1746 node - nodeid of revision; typically node is not specified, and it is
1744 1747 computed by default as hash(text, p1, p2), however subclasses might
1745 1748 use different hashing method (and override checkhash() in such case)
1746 1749 flags - the known flags to set on the revision
1747 1750 deltacomputer - an optional deltacomputer instance shared between
1748 1751 multiple calls
1749 1752 """
1750 1753 if link == nullrev:
1751 1754 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1752 1755 % self.indexfile)
1753 1756
1754 1757 if flags:
1755 1758 node = node or self.hash(text, p1, p2)
1756 1759
1757 1760 rawtext, validatehash = self._processflags(text, flags, 'write')
1758 1761
1759 1762 # If the flag processor modifies the revision data, ignore any provided
1760 1763 # cachedelta.
1761 1764 if rawtext != text:
1762 1765 cachedelta = None
1763 1766
1764 1767 if len(rawtext) > _maxentrysize:
1765 1768 raise error.RevlogError(
1766 1769 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1767 1770 % (self.indexfile, len(rawtext)))
1768 1771
1769 1772 node = node or self.hash(rawtext, p1, p2)
1770 1773 if node in self.nodemap:
1771 1774 return node
1772 1775
1773 1776 if validatehash:
1774 1777 self.checkhash(rawtext, node, p1=p1, p2=p2)
1775 1778
1776 1779 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1777 1780 flags, cachedelta=cachedelta,
1778 1781 deltacomputer=deltacomputer)
1779 1782
1780 1783 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1781 1784 cachedelta=None, deltacomputer=None):
1782 1785 """add a raw revision with known flags, node and parents
1783 1786 useful when reusing a revision not stored in this revlog (ex: received
1784 1787 over wire, or read from an external bundle).
1785 1788 """
1786 1789 dfh = None
1787 1790 if not self._inline:
1788 1791 dfh = self._datafp("a+")
1789 1792 ifh = self._indexfp("a+")
1790 1793 try:
1791 1794 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1792 1795 flags, cachedelta, ifh, dfh,
1793 1796 deltacomputer=deltacomputer)
1794 1797 finally:
1795 1798 if dfh:
1796 1799 dfh.close()
1797 1800 ifh.close()
1798 1801
1799 1802 def compress(self, data):
1800 1803 """Generate a possibly-compressed representation of data."""
1801 1804 if not data:
1802 1805 return '', data
1803 1806
1804 1807 compressed = self._compressor.compress(data)
1805 1808
1806 1809 if compressed:
1807 1810 # The revlog compressor added the header in the returned data.
1808 1811 return '', compressed
1809 1812
1810 1813 if data[0:1] == '\0':
1811 1814 return '', data
1812 1815 return 'u', data
1813 1816
1814 1817 def decompress(self, data):
1815 1818 """Decompress a revlog chunk.
1816 1819
1817 1820 The chunk is expected to begin with a header identifying the
1818 1821 format type so it can be routed to an appropriate decompressor.
1819 1822 """
1820 1823 if not data:
1821 1824 return data
1822 1825
1823 1826 # Revlogs are read much more frequently than they are written and many
1824 1827 # chunks only take microseconds to decompress, so performance is
1825 1828 # important here.
1826 1829 #
1827 1830 # We can make a few assumptions about revlogs:
1828 1831 #
1829 1832 # 1) the majority of chunks will be compressed (as opposed to inline
1830 1833 # raw data).
1831 1834 # 2) decompressing *any* data will likely by at least 10x slower than
1832 1835 # returning raw inline data.
1833 1836 # 3) we want to prioritize common and officially supported compression
1834 1837 # engines
1835 1838 #
1836 1839 # It follows that we want to optimize for "decompress compressed data
1837 1840 # when encoded with common and officially supported compression engines"
1838 1841 # case over "raw data" and "data encoded by less common or non-official
1839 1842 # compression engines." That is why we have the inline lookup first
1840 1843 # followed by the compengines lookup.
1841 1844 #
1842 1845 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1843 1846 # compressed chunks. And this matters for changelog and manifest reads.
1844 1847 t = data[0:1]
1845 1848
1846 1849 if t == 'x':
1847 1850 try:
1848 1851 return _zlibdecompress(data)
1849 1852 except zlib.error as e:
1850 1853 raise error.RevlogError(_('revlog decompress error: %s') %
1851 1854 stringutil.forcebytestr(e))
1852 1855 # '\0' is more common than 'u' so it goes first.
1853 1856 elif t == '\0':
1854 1857 return data
1855 1858 elif t == 'u':
1856 1859 return util.buffer(data, 1)
1857 1860
1858 1861 try:
1859 1862 compressor = self._decompressors[t]
1860 1863 except KeyError:
1861 1864 try:
1862 1865 engine = util.compengines.forrevlogheader(t)
1863 1866 compressor = engine.revlogcompressor()
1864 1867 self._decompressors[t] = compressor
1865 1868 except KeyError:
1866 1869 raise error.RevlogError(_('unknown compression type %r') % t)
1867 1870
1868 1871 return compressor.decompress(data)
1869 1872
1870 1873 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1871 1874 cachedelta, ifh, dfh, alwayscache=False,
1872 1875 deltacomputer=None):
1873 1876 """internal function to add revisions to the log
1874 1877
1875 1878 see addrevision for argument descriptions.
1876 1879
1877 1880 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1878 1881
1879 1882 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1880 1883 be used.
1881 1884
1882 1885 invariants:
1883 1886 - rawtext is optional (can be None); if not set, cachedelta must be set.
1884 1887 if both are set, they must correspond to each other.
1885 1888 """
1886 1889 if node == nullid:
1887 1890 raise error.RevlogError(_("%s: attempt to add null revision") %
1888 1891 self.indexfile)
1889 1892 if node == wdirid or node in wdirfilenodeids:
1890 1893 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1891 1894 self.indexfile)
1892 1895
1893 1896 if self._inline:
1894 1897 fh = ifh
1895 1898 else:
1896 1899 fh = dfh
1897 1900
1898 1901 btext = [rawtext]
1899 1902
1900 1903 curr = len(self)
1901 1904 prev = curr - 1
1902 1905 offset = self.end(prev)
1903 1906 p1r, p2r = self.rev(p1), self.rev(p2)
1904 1907
1905 1908 # full versions are inserted when the needed deltas
1906 1909 # become comparable to the uncompressed text
1907 1910 if rawtext is None:
1908 1911 # need rawtext size, before changed by flag processors, which is
1909 1912 # the non-raw size. use revlog explicitly to avoid filelog's extra
1910 1913 # logic that might remove metadata size.
1911 1914 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1912 1915 cachedelta[1])
1913 1916 else:
1914 1917 textlen = len(rawtext)
1915 1918
1916 1919 if deltacomputer is None:
1917 1920 deltacomputer = deltautil.deltacomputer(self)
1918 1921
1919 1922 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1920 1923
1921 1924 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1922 1925
1923 1926 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1924 1927 deltainfo.base, link, p1r, p2r, node)
1925 1928 self.index.append(e)
1926 1929 self.nodemap[node] = curr
1927 1930
1928 1931 entry = self._io.packentry(e, self.node, self.version, curr)
1929 1932 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1930 1933 link, offset)
1931 1934
1932 1935 rawtext = btext[0]
1933 1936
1934 1937 if alwayscache and rawtext is None:
1935 1938 rawtext = deltacomputer.buildtext(revinfo, fh)
1936 1939
1937 1940 if type(rawtext) == bytes: # only accept immutable objects
1938 1941 self._revisioncache = (node, curr, rawtext)
1939 1942 self._chainbasecache[curr] = deltainfo.chainbase
1940 1943 return node
1941 1944
1942 1945 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1943 1946 # Files opened in a+ mode have inconsistent behavior on various
1944 1947 # platforms. Windows requires that a file positioning call be made
1945 1948 # when the file handle transitions between reads and writes. See
1946 1949 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1947 1950 # platforms, Python or the platform itself can be buggy. Some versions
1948 1951 # of Solaris have been observed to not append at the end of the file
1949 1952 # if the file was seeked to before the end. See issue4943 for more.
1950 1953 #
1951 1954 # We work around this issue by inserting a seek() before writing.
1952 1955 # Note: This is likely not necessary on Python 3.
1953 1956 ifh.seek(0, os.SEEK_END)
1954 1957 if dfh:
1955 1958 dfh.seek(0, os.SEEK_END)
1956 1959
1957 1960 curr = len(self) - 1
1958 1961 if not self._inline:
1959 1962 transaction.add(self.datafile, offset)
1960 1963 transaction.add(self.indexfile, curr * len(entry))
1961 1964 if data[0]:
1962 1965 dfh.write(data[0])
1963 1966 dfh.write(data[1])
1964 1967 ifh.write(entry)
1965 1968 else:
1966 1969 offset += curr * self._io.size
1967 1970 transaction.add(self.indexfile, offset, curr)
1968 1971 ifh.write(entry)
1969 1972 ifh.write(data[0])
1970 1973 ifh.write(data[1])
1971 1974 self._enforceinlinesize(transaction, ifh)
1972 1975
1973 1976 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1974 1977 """
1975 1978 add a delta group
1976 1979
1977 1980 given a set of deltas, add them to the revision log. the
1978 1981 first delta is against its parent, which should be in our
1979 1982 log, the rest are against the previous delta.
1980 1983
1981 1984 If ``addrevisioncb`` is defined, it will be called with arguments of
1982 1985 this revlog and the node that was added.
1983 1986 """
1984 1987
1985 1988 nodes = []
1986 1989
1987 1990 r = len(self)
1988 1991 end = 0
1989 1992 if r:
1990 1993 end = self.end(r - 1)
1991 1994 ifh = self._indexfp("a+")
1992 1995 isize = r * self._io.size
1993 1996 if self._inline:
1994 1997 transaction.add(self.indexfile, end + isize, r)
1995 1998 dfh = None
1996 1999 else:
1997 2000 transaction.add(self.indexfile, isize, r)
1998 2001 transaction.add(self.datafile, end)
1999 2002 dfh = self._datafp("a+")
2000 2003 def flush():
2001 2004 if dfh:
2002 2005 dfh.flush()
2003 2006 ifh.flush()
2004 2007 try:
2005 2008 deltacomputer = deltautil.deltacomputer(self)
2006 2009 # loop through our set of deltas
2007 2010 for data in deltas:
2008 2011 node, p1, p2, linknode, deltabase, delta, flags = data
2009 2012 link = linkmapper(linknode)
2010 2013 flags = flags or REVIDX_DEFAULT_FLAGS
2011 2014
2012 2015 nodes.append(node)
2013 2016
2014 2017 if node in self.nodemap:
2015 2018 self._nodeduplicatecallback(transaction, node)
2016 2019 # this can happen if two branches make the same change
2017 2020 continue
2018 2021
2019 2022 for p in (p1, p2):
2020 2023 if p not in self.nodemap:
2021 2024 raise error.LookupError(p, self.indexfile,
2022 2025 _('unknown parent'))
2023 2026
2024 2027 if deltabase not in self.nodemap:
2025 2028 raise error.LookupError(deltabase, self.indexfile,
2026 2029 _('unknown delta base'))
2027 2030
2028 2031 baserev = self.rev(deltabase)
2029 2032
2030 2033 if baserev != nullrev and self.iscensored(baserev):
2031 2034 # if base is censored, delta must be full replacement in a
2032 2035 # single patch operation
2033 2036 hlen = struct.calcsize(">lll")
2034 2037 oldlen = self.rawsize(baserev)
2035 2038 newlen = len(delta) - hlen
2036 2039 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2037 2040 raise error.CensoredBaseError(self.indexfile,
2038 2041 self.node(baserev))
2039 2042
2040 2043 if not flags and self._peek_iscensored(baserev, delta, flush):
2041 2044 flags |= REVIDX_ISCENSORED
2042 2045
2043 2046 # We assume consumers of addrevisioncb will want to retrieve
2044 2047 # the added revision, which will require a call to
2045 2048 # revision(). revision() will fast path if there is a cache
2046 2049 # hit. So, we tell _addrevision() to always cache in this case.
2047 2050 # We're only using addgroup() in the context of changegroup
2048 2051 # generation so the revision data can always be handled as raw
2049 2052 # by the flagprocessor.
2050 2053 self._addrevision(node, None, transaction, link,
2051 2054 p1, p2, flags, (baserev, delta),
2052 2055 ifh, dfh,
2053 2056 alwayscache=bool(addrevisioncb),
2054 2057 deltacomputer=deltacomputer)
2055 2058
2056 2059 if addrevisioncb:
2057 2060 addrevisioncb(self, node)
2058 2061
2059 2062 if not dfh and not self._inline:
2060 2063 # addrevision switched from inline to conventional
2061 2064 # reopen the index
2062 2065 ifh.close()
2063 2066 dfh = self._datafp("a+")
2064 2067 ifh = self._indexfp("a+")
2065 2068 finally:
2066 2069 if dfh:
2067 2070 dfh.close()
2068 2071 ifh.close()
2069 2072
2070 2073 return nodes
2071 2074
2072 2075 def iscensored(self, rev):
2073 2076 """Check if a file revision is censored."""
2074 2077 if not self._censorable:
2075 2078 return False
2076 2079
2077 2080 return self.flags(rev) & REVIDX_ISCENSORED
2078 2081
2079 2082 def _peek_iscensored(self, baserev, delta, flush):
2080 2083 """Quickly check if a delta produces a censored revision."""
2081 2084 if not self._censorable:
2082 2085 return False
2083 2086
2084 2087 # Fragile heuristic: unless new file meta keys are added alphabetically
2085 2088 # preceding "censored", all censored revisions are prefixed by
2086 2089 # "\1\ncensored:". A delta producing such a censored revision must be a
2087 2090 # full-replacement delta, so we inspect the first and only patch in the
2088 2091 # delta for this prefix.
2089 2092 hlen = struct.calcsize(">lll")
2090 2093 if len(delta) <= hlen:
2091 2094 return False
2092 2095
2093 2096 oldlen = self.rawsize(baserev)
2094 2097 newlen = len(delta) - hlen
2095 2098 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2096 2099 return False
2097 2100
2098 2101 add = "\1\ncensored:"
2099 2102 addlen = len(add)
2100 2103 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2101 2104
2102 2105 def getstrippoint(self, minlink):
2103 2106 """find the minimum rev that must be stripped to strip the linkrev
2104 2107
2105 2108 Returns a tuple containing the minimum rev and a set of all revs that
2106 2109 have linkrevs that will be broken by this strip.
2107 2110 """
2108 2111 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2109 2112 self.headrevs(),
2110 2113 self.linkrev, self.parentrevs)
2111 2114
2112 2115 def strip(self, minlink, transaction):
2113 2116 """truncate the revlog on the first revision with a linkrev >= minlink
2114 2117
2115 2118 This function is called when we're stripping revision minlink and
2116 2119 its descendants from the repository.
2117 2120
2118 2121 We have to remove all revisions with linkrev >= minlink, because
2119 2122 the equivalent changelog revisions will be renumbered after the
2120 2123 strip.
2121 2124
2122 2125 So we truncate the revlog on the first of these revisions, and
2123 2126 trust that the caller has saved the revisions that shouldn't be
2124 2127 removed and that it'll re-add them after this truncation.
2125 2128 """
2126 2129 if len(self) == 0:
2127 2130 return
2128 2131
2129 2132 rev, _ = self.getstrippoint(minlink)
2130 2133 if rev == len(self):
2131 2134 return
2132 2135
2133 2136 # first truncate the files on disk
2134 2137 end = self.start(rev)
2135 2138 if not self._inline:
2136 2139 transaction.add(self.datafile, end)
2137 2140 end = rev * self._io.size
2138 2141 else:
2139 2142 end += rev * self._io.size
2140 2143
2141 2144 transaction.add(self.indexfile, end)
2142 2145
2143 2146 # then reset internal state in memory to forget those revisions
2144 2147 self._revisioncache = None
2145 2148 self._chaininfocache = {}
2146 2149 self._chunkclear()
2147 2150 for x in pycompat.xrange(rev, len(self)):
2148 2151 del self.nodemap[self.node(x)]
2149 2152
2150 2153 del self.index[rev:-1]
2151 2154 self._nodepos = None
2152 2155
2153 2156 def checksize(self):
2154 2157 expected = 0
2155 2158 if len(self):
2156 2159 expected = max(0, self.end(len(self) - 1))
2157 2160
2158 2161 try:
2159 2162 with self._datafp() as f:
2160 2163 f.seek(0, 2)
2161 2164 actual = f.tell()
2162 2165 dd = actual - expected
2163 2166 except IOError as inst:
2164 2167 if inst.errno != errno.ENOENT:
2165 2168 raise
2166 2169 dd = 0
2167 2170
2168 2171 try:
2169 2172 f = self.opener(self.indexfile)
2170 2173 f.seek(0, 2)
2171 2174 actual = f.tell()
2172 2175 f.close()
2173 2176 s = self._io.size
2174 2177 i = max(0, actual // s)
2175 2178 di = actual - (i * s)
2176 2179 if self._inline:
2177 2180 databytes = 0
2178 2181 for r in self:
2179 2182 databytes += max(0, self.length(r))
2180 2183 dd = 0
2181 2184 di = actual - len(self) * s - databytes
2182 2185 except IOError as inst:
2183 2186 if inst.errno != errno.ENOENT:
2184 2187 raise
2185 2188 di = 0
2186 2189
2187 2190 return (dd, di)
2188 2191
2189 2192 def files(self):
2190 2193 res = [self.indexfile]
2191 2194 if not self._inline:
2192 2195 res.append(self.datafile)
2193 2196 return res
2194 2197
2195 2198 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2196 2199 assumehaveparentrevisions=False, deltaprevious=False):
2197 2200 if nodesorder not in ('nodes', 'storage', None):
2198 2201 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2199 2202 nodesorder)
2200 2203
2201 2204 if nodesorder is None and not self._generaldelta:
2202 2205 nodesorder = 'storage'
2203 2206
2204 2207 return storageutil.emitrevisions(
2205 2208 self, nodes, nodesorder, revlogrevisiondelta,
2206 2209 deltaparentfn=self.deltaparent,
2207 2210 candeltafn=self.candelta,
2208 2211 rawsizefn=self.rawsize,
2209 2212 revdifffn=self.revdiff,
2210 2213 flagsfn=self.flags,
2211 2214 sendfulltext=not self._storedeltachains,
2212 2215 revisiondata=revisiondata,
2213 2216 assumehaveparentrevisions=assumehaveparentrevisions,
2214 2217 deltaprevious=deltaprevious)
2215 2218
2216 2219 DELTAREUSEALWAYS = 'always'
2217 2220 DELTAREUSESAMEREVS = 'samerevs'
2218 2221 DELTAREUSENEVER = 'never'
2219 2222
2220 2223 DELTAREUSEFULLADD = 'fulladd'
2221 2224
2222 2225 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2223 2226
2224 2227 def clone(self, tr, destrevlog, addrevisioncb=None,
2225 2228 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2226 2229 """Copy this revlog to another, possibly with format changes.
2227 2230
2228 2231 The destination revlog will contain the same revisions and nodes.
2229 2232 However, it may not be bit-for-bit identical due to e.g. delta encoding
2230 2233 differences.
2231 2234
2232 2235 The ``deltareuse`` argument control how deltas from the existing revlog
2233 2236 are preserved in the destination revlog. The argument can have the
2234 2237 following values:
2235 2238
2236 2239 DELTAREUSEALWAYS
2237 2240 Deltas will always be reused (if possible), even if the destination
2238 2241 revlog would not select the same revisions for the delta. This is the
2239 2242 fastest mode of operation.
2240 2243 DELTAREUSESAMEREVS
2241 2244 Deltas will be reused if the destination revlog would pick the same
2242 2245 revisions for the delta. This mode strikes a balance between speed
2243 2246 and optimization.
2244 2247 DELTAREUSENEVER
2245 2248 Deltas will never be reused. This is the slowest mode of execution.
2246 2249 This mode can be used to recompute deltas (e.g. if the diff/delta
2247 2250 algorithm changes).
2248 2251
2249 2252 Delta computation can be slow, so the choice of delta reuse policy can
2250 2253 significantly affect run time.
2251 2254
2252 2255 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2253 2256 two extremes. Deltas will be reused if they are appropriate. But if the
2254 2257 delta could choose a better revision, it will do so. This means if you
2255 2258 are converting a non-generaldelta revlog to a generaldelta revlog,
2256 2259 deltas will be recomputed if the delta's parent isn't a parent of the
2257 2260 revision.
2258 2261
2259 2262 In addition to the delta policy, the ``deltabothparents`` argument
2260 2263 controls whether to compute deltas against both parents for merges.
2261 2264 By default, the current default is used.
2262 2265 """
2263 2266 if deltareuse not in self.DELTAREUSEALL:
2264 2267 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2265 2268
2266 2269 if len(destrevlog):
2267 2270 raise ValueError(_('destination revlog is not empty'))
2268 2271
2269 2272 if getattr(self, 'filteredrevs', None):
2270 2273 raise ValueError(_('source revlog has filtered revisions'))
2271 2274 if getattr(destrevlog, 'filteredrevs', None):
2272 2275 raise ValueError(_('destination revlog has filtered revisions'))
2273 2276
2274 2277 # lazydeltabase controls whether to reuse a cached delta, if possible.
2275 2278 oldlazydeltabase = destrevlog._lazydeltabase
2276 2279 oldamd = destrevlog._deltabothparents
2277 2280
2278 2281 try:
2279 2282 if deltareuse == self.DELTAREUSEALWAYS:
2280 2283 destrevlog._lazydeltabase = True
2281 2284 elif deltareuse == self.DELTAREUSESAMEREVS:
2282 2285 destrevlog._lazydeltabase = False
2283 2286
2284 2287 destrevlog._deltabothparents = deltabothparents or oldamd
2285 2288
2286 2289 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2287 2290 self.DELTAREUSESAMEREVS)
2288 2291
2289 2292 deltacomputer = deltautil.deltacomputer(destrevlog)
2290 2293 index = self.index
2291 2294 for rev in self:
2292 2295 entry = index[rev]
2293 2296
2294 2297 # Some classes override linkrev to take filtered revs into
2295 2298 # account. Use raw entry from index.
2296 2299 flags = entry[0] & 0xffff
2297 2300 linkrev = entry[4]
2298 2301 p1 = index[entry[5]][7]
2299 2302 p2 = index[entry[6]][7]
2300 2303 node = entry[7]
2301 2304
2302 2305 # (Possibly) reuse the delta from the revlog if allowed and
2303 2306 # the revlog chunk is a delta.
2304 2307 cachedelta = None
2305 2308 rawtext = None
2306 2309 if populatecachedelta:
2307 2310 dp = self.deltaparent(rev)
2308 2311 if dp != nullrev:
2309 2312 cachedelta = (dp, bytes(self._chunk(rev)))
2310 2313
2311 2314 if not cachedelta:
2312 2315 rawtext = self.revision(rev, raw=True)
2313 2316
2314 2317
2315 2318 if deltareuse == self.DELTAREUSEFULLADD:
2316 2319 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2317 2320 cachedelta=cachedelta,
2318 2321 node=node, flags=flags,
2319 2322 deltacomputer=deltacomputer)
2320 2323 else:
2321 2324 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2322 2325 checkambig=False)
2323 2326 dfh = None
2324 2327 if not destrevlog._inline:
2325 2328 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2326 2329 try:
2327 2330 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2328 2331 p2, flags, cachedelta, ifh, dfh,
2329 2332 deltacomputer=deltacomputer)
2330 2333 finally:
2331 2334 if dfh:
2332 2335 dfh.close()
2333 2336 ifh.close()
2334 2337
2335 2338 if addrevisioncb:
2336 2339 addrevisioncb(self, rev, node)
2337 2340 finally:
2338 2341 destrevlog._lazydeltabase = oldlazydeltabase
2339 2342 destrevlog._deltabothparents = oldamd
2340 2343
2341 2344 def censorrevision(self, node, tombstone=b''):
2342 2345 if (self.version & 0xFFFF) == REVLOGV0:
2343 2346 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2344 2347 self.version)
2345 2348
2346 2349 rev = self.rev(node)
2347 2350 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2348 2351
2349 2352 if len(tombstone) > self.rawsize(rev):
2350 2353 raise error.Abort(_('censor tombstone must be no longer than '
2351 2354 'censored data'))
2352 2355
2353 2356 # Using two files instead of one makes it easy to rewrite entry-by-entry
2354 2357 idxread = self.opener(self.indexfile, 'r')
2355 2358 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2356 2359 if self.version & FLAG_INLINE_DATA:
2357 2360 dataread, datawrite = idxread, idxwrite
2358 2361 else:
2359 2362 dataread = self.opener(self.datafile, 'r')
2360 2363 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2361 2364
2362 2365 # Copy all revlog data up to the entry to be censored.
2363 2366 offset = self.start(rev)
2364 2367
2365 2368 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2366 2369 idxwrite.write(chunk)
2367 2370 for chunk in util.filechunkiter(dataread, limit=offset):
2368 2371 datawrite.write(chunk)
2369 2372
2370 2373 def rewriteindex(r, newoffs, newdata=None):
2371 2374 """Rewrite the index entry with a new data offset and new data.
2372 2375
2373 2376 The newdata argument, if given, is a tuple of three positive
2374 2377 integers: (new compressed, new uncompressed, added flag bits).
2375 2378 """
2376 2379 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2377 2380 flags = gettype(offlags)
2378 2381 if newdata:
2379 2382 comp, uncomp, nflags = newdata
2380 2383 flags |= nflags
2381 2384 offlags = offset_type(newoffs, flags)
2382 2385 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2383 2386 idxwrite.write(self._io.packentry(e, None, self.version, r))
2384 2387 idxread.seek(self._io.size, 1)
2385 2388
2386 2389 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2387 2390 """Write the given fulltext with the given data offset.
2388 2391
2389 2392 Returns:
2390 2393 The integer number of data bytes written, for tracking data
2391 2394 offsets.
2392 2395 """
2393 2396 flag, compdata = self.compress(data)
2394 2397 newcomp = len(flag) + len(compdata)
2395 2398 rewriteindex(r, offs, (newcomp, len(data), nflags))
2396 2399 datawrite.write(flag)
2397 2400 datawrite.write(compdata)
2398 2401 dataread.seek(self.length(r), 1)
2399 2402 return newcomp
2400 2403
2401 2404 # Rewrite censored entry with (padded) tombstone data.
2402 2405 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2403 2406 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2404 2407
2405 2408 # Rewrite all following filelog revisions fixing up offsets and deltas.
2406 2409 for srev in pycompat.xrange(rev + 1, len(self)):
2407 2410 if rev in self.parentrevs(srev):
2408 2411 # Immediate children of censored node must be re-added as
2409 2412 # fulltext.
2410 2413 try:
2411 2414 revdata = self.revision(srev)
2412 2415 except error.CensoredNodeError as e:
2413 2416 revdata = e.tombstone
2414 2417 dlen = rewrite(srev, offset, revdata)
2415 2418 else:
2416 2419 # Copy any other revision data verbatim after fixing up the
2417 2420 # offset.
2418 2421 rewriteindex(srev, offset)
2419 2422 dlen = self.length(srev)
2420 2423 for chunk in util.filechunkiter(dataread, limit=dlen):
2421 2424 datawrite.write(chunk)
2422 2425 offset += dlen
2423 2426
2424 2427 idxread.close()
2425 2428 idxwrite.close()
2426 2429 if dataread is not idxread:
2427 2430 dataread.close()
2428 2431 datawrite.close()
2429 2432
2430 2433 def verifyintegrity(self, state):
2431 2434 """Verifies the integrity of the revlog.
2432 2435
2433 2436 Yields ``revlogproblem`` instances describing problems that are
2434 2437 found.
2435 2438 """
2436 2439 dd, di = self.checksize()
2437 2440 if dd:
2438 2441 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2439 2442 if di:
2440 2443 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2441 2444
2442 2445 version = self.version & 0xFFFF
2443 2446
2444 2447 # The verifier tells us what version revlog we should be.
2445 2448 if version != state['expectedversion']:
2446 2449 yield revlogproblem(
2447 2450 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2448 2451 (self.indexfile, version, state['expectedversion']))
2449 2452
2450 2453 state['skipread'] = set()
2451 2454
2452 2455 for rev in self:
2453 2456 node = self.node(rev)
2454 2457
2455 2458 # Verify contents. 4 cases to care about:
2456 2459 #
2457 2460 # common: the most common case
2458 2461 # rename: with a rename
2459 2462 # meta: file content starts with b'\1\n', the metadata
2460 2463 # header defined in filelog.py, but without a rename
2461 2464 # ext: content stored externally
2462 2465 #
2463 2466 # More formally, their differences are shown below:
2464 2467 #
2465 2468 # | common | rename | meta | ext
2466 2469 # -------------------------------------------------------
2467 2470 # flags() | 0 | 0 | 0 | not 0
2468 2471 # renamed() | False | True | False | ?
2469 2472 # rawtext[0:2]=='\1\n'| False | True | True | ?
2470 2473 #
2471 2474 # "rawtext" means the raw text stored in revlog data, which
2472 2475 # could be retrieved by "revision(rev, raw=True)". "text"
2473 2476 # mentioned below is "revision(rev, raw=False)".
2474 2477 #
2475 2478 # There are 3 different lengths stored physically:
2476 2479 # 1. L1: rawsize, stored in revlog index
2477 2480 # 2. L2: len(rawtext), stored in revlog data
2478 2481 # 3. L3: len(text), stored in revlog data if flags==0, or
2479 2482 # possibly somewhere else if flags!=0
2480 2483 #
2481 2484 # L1 should be equal to L2. L3 could be different from them.
2482 2485 # "text" may or may not affect commit hash depending on flag
2483 2486 # processors (see revlog.addflagprocessor).
2484 2487 #
2485 2488 # | common | rename | meta | ext
2486 2489 # -------------------------------------------------
2487 2490 # rawsize() | L1 | L1 | L1 | L1
2488 2491 # size() | L1 | L2-LM | L1(*) | L1 (?)
2489 2492 # len(rawtext) | L2 | L2 | L2 | L2
2490 2493 # len(text) | L2 | L2 | L2 | L3
2491 2494 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2492 2495 #
2493 2496 # LM: length of metadata, depending on rawtext
2494 2497 # (*): not ideal, see comment in filelog.size
2495 2498 # (?): could be "- len(meta)" if the resolved content has
2496 2499 # rename metadata
2497 2500 #
2498 2501 # Checks needed to be done:
2499 2502 # 1. length check: L1 == L2, in all cases.
2500 2503 # 2. hash check: depending on flag processor, we may need to
2501 2504 # use either "text" (external), or "rawtext" (in revlog).
2502 2505
2503 2506 try:
2504 2507 skipflags = state.get('skipflags', 0)
2505 2508 if skipflags:
2506 2509 skipflags &= self.flags(rev)
2507 2510
2508 2511 if skipflags:
2509 2512 state['skipread'].add(node)
2510 2513 else:
2511 2514 # Side-effect: read content and verify hash.
2512 2515 self.revision(node)
2513 2516
2514 2517 l1 = self.rawsize(rev)
2515 2518 l2 = len(self.revision(node, raw=True))
2516 2519
2517 2520 if l1 != l2:
2518 2521 yield revlogproblem(
2519 2522 error=_('unpacked size is %d, %d expected') % (l2, l1),
2520 2523 node=node)
2521 2524
2522 2525 except error.CensoredNodeError:
2523 2526 if state['erroroncensored']:
2524 2527 yield revlogproblem(error=_('censored file data'),
2525 2528 node=node)
2526 2529 state['skipread'].add(node)
2527 2530 except Exception as e:
2528 2531 yield revlogproblem(
2529 2532 error=_('unpacking %s: %s') % (short(node),
2530 2533 stringutil.forcebytestr(e)),
2531 2534 node=node)
2532 2535 state['skipread'].add(node)
2533 2536
2534 2537 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2535 2538 revisionscount=False, trackedsize=False,
2536 2539 storedsize=False):
2537 2540 d = {}
2538 2541
2539 2542 if exclusivefiles:
2540 2543 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2541 2544 if not self._inline:
2542 2545 d['exclusivefiles'].append((self.opener, self.datafile))
2543 2546
2544 2547 if sharedfiles:
2545 2548 d['sharedfiles'] = []
2546 2549
2547 2550 if revisionscount:
2548 2551 d['revisionscount'] = len(self)
2549 2552
2550 2553 if trackedsize:
2551 2554 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2552 2555
2553 2556 if storedsize:
2554 2557 d['storedsize'] = sum(self.opener.stat(path).st_size
2555 2558 for path in self.files())
2556 2559
2557 2560 return d
General Comments 0
You need to be logged in to leave comments. Login now