##// END OF EJS Templates
revlog: document that mmap resources are released implicitly by GC...
Yuya Nishihara -
r41322:00b314c4 default
parent child Browse files
Show More
@@ -1,2639 +1,2641 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._maxdeltachainspan = -1
375 375 self._withsparseread = False
376 376 self._sparserevlog = False
377 377 self._srdensitythreshold = 0.50
378 378 self._srmingapsize = 262144
379 379
380 380 # Make copy of flag processors so each revlog instance can support
381 381 # custom flags.
382 382 self._flagprocessors = dict(_flagprocessors)
383 383
384 384 # 2-tuple of file handles being used for active writing.
385 385 self._writinghandles = None
386 386
387 387 self._loadindex()
388 388
389 389 def _loadindex(self):
390 390 mmapindexthreshold = None
391 391 opts = getattr(self.opener, 'options', {}) or {}
392 392
393 393 if 'revlogv2' in opts:
394 394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 395 elif 'revlogv1' in opts:
396 396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 397 if 'generaldelta' in opts:
398 398 newversionflags |= FLAG_GENERALDELTA
399 399 else:
400 400 newversionflags = REVLOG_DEFAULT_VERSION
401 401
402 402 if 'chunkcachesize' in opts:
403 403 self._chunkcachesize = opts['chunkcachesize']
404 404 if 'maxchainlen' in opts:
405 405 self._maxchainlen = opts['maxchainlen']
406 406 if 'deltabothparents' in opts:
407 407 self._deltabothparents = opts['deltabothparents']
408 408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 409 if 'compengine' in opts:
410 410 self._compengine = opts['compengine']
411 411 if 'maxdeltachainspan' in opts:
412 412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 414 mmapindexthreshold = opts['mmapindexthreshold']
415 415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 416 withsparseread = bool(opts.get('with-sparse-read', False))
417 417 # sparse-revlog forces sparse-read
418 418 self._withsparseread = self._sparserevlog or withsparseread
419 419 if 'sparse-read-density-threshold' in opts:
420 420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 421 if 'sparse-read-min-gap-size' in opts:
422 422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 423 if opts.get('enableellipsis'):
424 424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425 425
426 426 # revlog v0 doesn't have flag processors
427 427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 428 _insertflagprocessor(flag, processor, self._flagprocessors)
429 429
430 430 if self._chunkcachesize <= 0:
431 431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 432 'greater than 0') % self._chunkcachesize)
433 433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 435 'power of 2') % self._chunkcachesize)
436 436
437 437 indexdata = ''
438 438 self._initempty = True
439 439 try:
440 440 with self._indexfp() as f:
441 441 if (mmapindexthreshold is not None and
442 442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 # TODO: should .close() to release resources without
444 # relying on Python GC
443 445 indexdata = util.buffer(util.mmapread(f))
444 446 else:
445 447 indexdata = f.read()
446 448 if len(indexdata) > 0:
447 449 versionflags = versionformat_unpack(indexdata[:4])[0]
448 450 self._initempty = False
449 451 else:
450 452 versionflags = newversionflags
451 453 except IOError as inst:
452 454 if inst.errno != errno.ENOENT:
453 455 raise
454 456
455 457 versionflags = newversionflags
456 458
457 459 self.version = versionflags
458 460
459 461 flags = versionflags & ~0xFFFF
460 462 fmt = versionflags & 0xFFFF
461 463
462 464 if fmt == REVLOGV0:
463 465 if flags:
464 466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 467 'revlog %s') %
466 468 (flags >> 16, fmt, self.indexfile))
467 469
468 470 self._inline = False
469 471 self._generaldelta = False
470 472
471 473 elif fmt == REVLOGV1:
472 474 if flags & ~REVLOGV1_FLAGS:
473 475 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
474 476 'revlog %s') %
475 477 (flags >> 16, fmt, self.indexfile))
476 478
477 479 self._inline = versionflags & FLAG_INLINE_DATA
478 480 self._generaldelta = versionflags & FLAG_GENERALDELTA
479 481
480 482 elif fmt == REVLOGV2:
481 483 if flags & ~REVLOGV2_FLAGS:
482 484 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 485 'revlog %s') %
484 486 (flags >> 16, fmt, self.indexfile))
485 487
486 488 self._inline = versionflags & FLAG_INLINE_DATA
487 489 # generaldelta implied by version 2 revlogs.
488 490 self._generaldelta = True
489 491
490 492 else:
491 493 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
492 494 (fmt, self.indexfile))
493 495
494 496 self._storedeltachains = True
495 497
496 498 self._io = revlogio()
497 499 if self.version == REVLOGV0:
498 500 self._io = revlogoldio()
499 501 try:
500 502 d = self._io.parseindex(indexdata, self._inline)
501 503 except (ValueError, IndexError):
502 504 raise error.RevlogError(_("index %s is corrupted") %
503 505 self.indexfile)
504 506 self.index, nodemap, self._chunkcache = d
505 507 if nodemap is not None:
506 508 self.nodemap = self._nodecache = nodemap
507 509 if not self._chunkcache:
508 510 self._chunkclear()
509 511 # revnum -> (chain-length, sum-delta-length)
510 512 self._chaininfocache = {}
511 513 # revlog header -> revlog compressor
512 514 self._decompressors = {}
513 515
514 516 @util.propertycache
515 517 def _compressor(self):
516 518 return util.compengines[self._compengine].revlogcompressor()
517 519
518 520 def _indexfp(self, mode='r'):
519 521 """file object for the revlog's index file"""
520 522 args = {r'mode': mode}
521 523 if mode != 'r':
522 524 args[r'checkambig'] = self._checkambig
523 525 if mode == 'w':
524 526 args[r'atomictemp'] = True
525 527 return self.opener(self.indexfile, **args)
526 528
527 529 def _datafp(self, mode='r'):
528 530 """file object for the revlog's data file"""
529 531 return self.opener(self.datafile, mode=mode)
530 532
531 533 @contextlib.contextmanager
532 534 def _datareadfp(self, existingfp=None):
533 535 """file object suitable to read data"""
534 536 # Use explicit file handle, if given.
535 537 if existingfp is not None:
536 538 yield existingfp
537 539
538 540 # Use a file handle being actively used for writes, if available.
539 541 # There is some danger to doing this because reads will seek the
540 542 # file. However, _writeentry() performs a SEEK_END before all writes,
541 543 # so we should be safe.
542 544 elif self._writinghandles:
543 545 if self._inline:
544 546 yield self._writinghandles[0]
545 547 else:
546 548 yield self._writinghandles[1]
547 549
548 550 # Otherwise open a new file handle.
549 551 else:
550 552 if self._inline:
551 553 func = self._indexfp
552 554 else:
553 555 func = self._datafp
554 556 with func() as fp:
555 557 yield fp
556 558
557 559 def tip(self):
558 560 return self.node(len(self.index) - 1)
559 561 def __contains__(self, rev):
560 562 return 0 <= rev < len(self)
561 563 def __len__(self):
562 564 return len(self.index)
563 565 def __iter__(self):
564 566 return iter(pycompat.xrange(len(self)))
565 567 def revs(self, start=0, stop=None):
566 568 """iterate over all rev in this revlog (from start to stop)"""
567 569 return storageutil.iterrevs(len(self), start=start, stop=stop)
568 570
569 571 @util.propertycache
570 572 def nodemap(self):
571 573 if self.index:
572 574 # populate mapping down to the initial node
573 575 node0 = self.index[0][7] # get around changelog filtering
574 576 self.rev(node0)
575 577 return self._nodecache
576 578
577 579 def hasnode(self, node):
578 580 try:
579 581 self.rev(node)
580 582 return True
581 583 except KeyError:
582 584 return False
583 585
584 586 def candelta(self, baserev, rev):
585 587 """whether two revisions (baserev, rev) can be delta-ed or not"""
586 588 # Disable delta if either rev requires a content-changing flag
587 589 # processor (ex. LFS). This is because such flag processor can alter
588 590 # the rawtext content that the delta will be based on, and two clients
589 591 # could have a same revlog node with different flags (i.e. different
590 592 # rawtext contents) and the delta could be incompatible.
591 593 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
592 594 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
593 595 return False
594 596 return True
595 597
596 598 def clearcaches(self):
597 599 self._revisioncache = None
598 600 self._chainbasecache.clear()
599 601 self._chunkcache = (0, '')
600 602 self._pcache = {}
601 603
602 604 try:
603 605 self._nodecache.clearcaches()
604 606 except AttributeError:
605 607 self._nodecache = {nullid: nullrev}
606 608 self._nodepos = None
607 609
608 610 def rev(self, node):
609 611 try:
610 612 return self._nodecache[node]
611 613 except TypeError:
612 614 raise
613 615 except error.RevlogError:
614 616 # parsers.c radix tree lookup failed
615 617 if node == wdirid or node in wdirfilenodeids:
616 618 raise error.WdirUnsupported
617 619 raise error.LookupError(node, self.indexfile, _('no node'))
618 620 except KeyError:
619 621 # pure python cache lookup failed
620 622 n = self._nodecache
621 623 i = self.index
622 624 p = self._nodepos
623 625 if p is None:
624 626 p = len(i) - 1
625 627 else:
626 628 assert p < len(i)
627 629 for r in pycompat.xrange(p, -1, -1):
628 630 v = i[r][7]
629 631 n[v] = r
630 632 if v == node:
631 633 self._nodepos = r - 1
632 634 return r
633 635 if node == wdirid or node in wdirfilenodeids:
634 636 raise error.WdirUnsupported
635 637 raise error.LookupError(node, self.indexfile, _('no node'))
636 638
637 639 # Accessors for index entries.
638 640
639 641 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
640 642 # are flags.
641 643 def start(self, rev):
642 644 return int(self.index[rev][0] >> 16)
643 645
644 646 def flags(self, rev):
645 647 return self.index[rev][0] & 0xFFFF
646 648
647 649 def length(self, rev):
648 650 return self.index[rev][1]
649 651
650 652 def rawsize(self, rev):
651 653 """return the length of the uncompressed text for a given revision"""
652 654 l = self.index[rev][2]
653 655 if l >= 0:
654 656 return l
655 657
656 658 t = self.revision(rev, raw=True)
657 659 return len(t)
658 660
659 661 def size(self, rev):
660 662 """length of non-raw text (processed by a "read" flag processor)"""
661 663 # fast path: if no "read" flag processor could change the content,
662 664 # size is rawsize. note: ELLIPSIS is known to not change the content.
663 665 flags = self.flags(rev)
664 666 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
665 667 return self.rawsize(rev)
666 668
667 669 return len(self.revision(rev, raw=False))
668 670
669 671 def chainbase(self, rev):
670 672 base = self._chainbasecache.get(rev)
671 673 if base is not None:
672 674 return base
673 675
674 676 index = self.index
675 677 iterrev = rev
676 678 base = index[iterrev][3]
677 679 while base != iterrev:
678 680 iterrev = base
679 681 base = index[iterrev][3]
680 682
681 683 self._chainbasecache[rev] = base
682 684 return base
683 685
684 686 def linkrev(self, rev):
685 687 return self.index[rev][4]
686 688
687 689 def parentrevs(self, rev):
688 690 try:
689 691 entry = self.index[rev]
690 692 except IndexError:
691 693 if rev == wdirrev:
692 694 raise error.WdirUnsupported
693 695 raise
694 696
695 697 return entry[5], entry[6]
696 698
697 699 # fast parentrevs(rev) where rev isn't filtered
698 700 _uncheckedparentrevs = parentrevs
699 701
700 702 def node(self, rev):
701 703 try:
702 704 return self.index[rev][7]
703 705 except IndexError:
704 706 if rev == wdirrev:
705 707 raise error.WdirUnsupported
706 708 raise
707 709
708 710 # Derived from index values.
709 711
710 712 def end(self, rev):
711 713 return self.start(rev) + self.length(rev)
712 714
713 715 def parents(self, node):
714 716 i = self.index
715 717 d = i[self.rev(node)]
716 718 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
717 719
718 720 def chainlen(self, rev):
719 721 return self._chaininfo(rev)[0]
720 722
721 723 def _chaininfo(self, rev):
722 724 chaininfocache = self._chaininfocache
723 725 if rev in chaininfocache:
724 726 return chaininfocache[rev]
725 727 index = self.index
726 728 generaldelta = self._generaldelta
727 729 iterrev = rev
728 730 e = index[iterrev]
729 731 clen = 0
730 732 compresseddeltalen = 0
731 733 while iterrev != e[3]:
732 734 clen += 1
733 735 compresseddeltalen += e[1]
734 736 if generaldelta:
735 737 iterrev = e[3]
736 738 else:
737 739 iterrev -= 1
738 740 if iterrev in chaininfocache:
739 741 t = chaininfocache[iterrev]
740 742 clen += t[0]
741 743 compresseddeltalen += t[1]
742 744 break
743 745 e = index[iterrev]
744 746 else:
745 747 # Add text length of base since decompressing that also takes
746 748 # work. For cache hits the length is already included.
747 749 compresseddeltalen += e[1]
748 750 r = (clen, compresseddeltalen)
749 751 chaininfocache[rev] = r
750 752 return r
751 753
752 754 def _deltachain(self, rev, stoprev=None):
753 755 """Obtain the delta chain for a revision.
754 756
755 757 ``stoprev`` specifies a revision to stop at. If not specified, we
756 758 stop at the base of the chain.
757 759
758 760 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
759 761 revs in ascending order and ``stopped`` is a bool indicating whether
760 762 ``stoprev`` was hit.
761 763 """
762 764 # Try C implementation.
763 765 try:
764 766 return self.index.deltachain(rev, stoprev, self._generaldelta)
765 767 except AttributeError:
766 768 pass
767 769
768 770 chain = []
769 771
770 772 # Alias to prevent attribute lookup in tight loop.
771 773 index = self.index
772 774 generaldelta = self._generaldelta
773 775
774 776 iterrev = rev
775 777 e = index[iterrev]
776 778 while iterrev != e[3] and iterrev != stoprev:
777 779 chain.append(iterrev)
778 780 if generaldelta:
779 781 iterrev = e[3]
780 782 else:
781 783 iterrev -= 1
782 784 e = index[iterrev]
783 785
784 786 if iterrev == stoprev:
785 787 stopped = True
786 788 else:
787 789 chain.append(iterrev)
788 790 stopped = False
789 791
790 792 chain.reverse()
791 793 return chain, stopped
792 794
793 795 def ancestors(self, revs, stoprev=0, inclusive=False):
794 796 """Generate the ancestors of 'revs' in reverse revision order.
795 797 Does not generate revs lower than stoprev.
796 798
797 799 See the documentation for ancestor.lazyancestors for more details."""
798 800
799 801 # first, make sure start revisions aren't filtered
800 802 revs = list(revs)
801 803 checkrev = self.node
802 804 for r in revs:
803 805 checkrev(r)
804 806 # and we're sure ancestors aren't filtered as well
805 807
806 808 if rustext is not None:
807 809 lazyancestors = rustext.ancestor.LazyAncestors
808 810 arg = self.index
809 811 elif util.safehasattr(parsers, 'rustlazyancestors'):
810 812 lazyancestors = ancestor.rustlazyancestors
811 813 arg = self.index
812 814 else:
813 815 lazyancestors = ancestor.lazyancestors
814 816 arg = self._uncheckedparentrevs
815 817 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
816 818
817 819 def descendants(self, revs):
818 820 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
819 821
820 822 def findcommonmissing(self, common=None, heads=None):
821 823 """Return a tuple of the ancestors of common and the ancestors of heads
822 824 that are not ancestors of common. In revset terminology, we return the
823 825 tuple:
824 826
825 827 ::common, (::heads) - (::common)
826 828
827 829 The list is sorted by revision number, meaning it is
828 830 topologically sorted.
829 831
830 832 'heads' and 'common' are both lists of node IDs. If heads is
831 833 not supplied, uses all of the revlog's heads. If common is not
832 834 supplied, uses nullid."""
833 835 if common is None:
834 836 common = [nullid]
835 837 if heads is None:
836 838 heads = self.heads()
837 839
838 840 common = [self.rev(n) for n in common]
839 841 heads = [self.rev(n) for n in heads]
840 842
841 843 # we want the ancestors, but inclusive
842 844 class lazyset(object):
843 845 def __init__(self, lazyvalues):
844 846 self.addedvalues = set()
845 847 self.lazyvalues = lazyvalues
846 848
847 849 def __contains__(self, value):
848 850 return value in self.addedvalues or value in self.lazyvalues
849 851
850 852 def __iter__(self):
851 853 added = self.addedvalues
852 854 for r in added:
853 855 yield r
854 856 for r in self.lazyvalues:
855 857 if not r in added:
856 858 yield r
857 859
858 860 def add(self, value):
859 861 self.addedvalues.add(value)
860 862
861 863 def update(self, values):
862 864 self.addedvalues.update(values)
863 865
864 866 has = lazyset(self.ancestors(common))
865 867 has.add(nullrev)
866 868 has.update(common)
867 869
868 870 # take all ancestors from heads that aren't in has
869 871 missing = set()
870 872 visit = collections.deque(r for r in heads if r not in has)
871 873 while visit:
872 874 r = visit.popleft()
873 875 if r in missing:
874 876 continue
875 877 else:
876 878 missing.add(r)
877 879 for p in self.parentrevs(r):
878 880 if p not in has:
879 881 visit.append(p)
880 882 missing = list(missing)
881 883 missing.sort()
882 884 return has, [self.node(miss) for miss in missing]
883 885
884 886 def incrementalmissingrevs(self, common=None):
885 887 """Return an object that can be used to incrementally compute the
886 888 revision numbers of the ancestors of arbitrary sets that are not
887 889 ancestors of common. This is an ancestor.incrementalmissingancestors
888 890 object.
889 891
890 892 'common' is a list of revision numbers. If common is not supplied, uses
891 893 nullrev.
892 894 """
893 895 if common is None:
894 896 common = [nullrev]
895 897
896 898 if rustext is not None:
897 899 return rustext.ancestor.MissingAncestors(self.index, common)
898 900 return ancestor.incrementalmissingancestors(self.parentrevs, common)
899 901
900 902 def findmissingrevs(self, common=None, heads=None):
901 903 """Return the revision numbers of the ancestors of heads that
902 904 are not ancestors of common.
903 905
904 906 More specifically, return a list of revision numbers corresponding to
905 907 nodes N such that every N satisfies the following constraints:
906 908
907 909 1. N is an ancestor of some node in 'heads'
908 910 2. N is not an ancestor of any node in 'common'
909 911
910 912 The list is sorted by revision number, meaning it is
911 913 topologically sorted.
912 914
913 915 'heads' and 'common' are both lists of revision numbers. If heads is
914 916 not supplied, uses all of the revlog's heads. If common is not
915 917 supplied, uses nullid."""
916 918 if common is None:
917 919 common = [nullrev]
918 920 if heads is None:
919 921 heads = self.headrevs()
920 922
921 923 inc = self.incrementalmissingrevs(common=common)
922 924 return inc.missingancestors(heads)
923 925
924 926 def findmissing(self, common=None, heads=None):
925 927 """Return the ancestors of heads that are not ancestors of common.
926 928
927 929 More specifically, return a list of nodes N such that every N
928 930 satisfies the following constraints:
929 931
930 932 1. N is an ancestor of some node in 'heads'
931 933 2. N is not an ancestor of any node in 'common'
932 934
933 935 The list is sorted by revision number, meaning it is
934 936 topologically sorted.
935 937
936 938 'heads' and 'common' are both lists of node IDs. If heads is
937 939 not supplied, uses all of the revlog's heads. If common is not
938 940 supplied, uses nullid."""
939 941 if common is None:
940 942 common = [nullid]
941 943 if heads is None:
942 944 heads = self.heads()
943 945
944 946 common = [self.rev(n) for n in common]
945 947 heads = [self.rev(n) for n in heads]
946 948
947 949 inc = self.incrementalmissingrevs(common=common)
948 950 return [self.node(r) for r in inc.missingancestors(heads)]
949 951
950 952 def nodesbetween(self, roots=None, heads=None):
951 953 """Return a topological path from 'roots' to 'heads'.
952 954
953 955 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
954 956 topologically sorted list of all nodes N that satisfy both of
955 957 these constraints:
956 958
957 959 1. N is a descendant of some node in 'roots'
958 960 2. N is an ancestor of some node in 'heads'
959 961
960 962 Every node is considered to be both a descendant and an ancestor
961 963 of itself, so every reachable node in 'roots' and 'heads' will be
962 964 included in 'nodes'.
963 965
964 966 'outroots' is the list of reachable nodes in 'roots', i.e., the
965 967 subset of 'roots' that is returned in 'nodes'. Likewise,
966 968 'outheads' is the subset of 'heads' that is also in 'nodes'.
967 969
968 970 'roots' and 'heads' are both lists of node IDs. If 'roots' is
969 971 unspecified, uses nullid as the only root. If 'heads' is
970 972 unspecified, uses list of all of the revlog's heads."""
971 973 nonodes = ([], [], [])
972 974 if roots is not None:
973 975 roots = list(roots)
974 976 if not roots:
975 977 return nonodes
976 978 lowestrev = min([self.rev(n) for n in roots])
977 979 else:
978 980 roots = [nullid] # Everybody's a descendant of nullid
979 981 lowestrev = nullrev
980 982 if (lowestrev == nullrev) and (heads is None):
981 983 # We want _all_ the nodes!
982 984 return ([self.node(r) for r in self], [nullid], list(self.heads()))
983 985 if heads is None:
984 986 # All nodes are ancestors, so the latest ancestor is the last
985 987 # node.
986 988 highestrev = len(self) - 1
987 989 # Set ancestors to None to signal that every node is an ancestor.
988 990 ancestors = None
989 991 # Set heads to an empty dictionary for later discovery of heads
990 992 heads = {}
991 993 else:
992 994 heads = list(heads)
993 995 if not heads:
994 996 return nonodes
995 997 ancestors = set()
996 998 # Turn heads into a dictionary so we can remove 'fake' heads.
997 999 # Also, later we will be using it to filter out the heads we can't
998 1000 # find from roots.
999 1001 heads = dict.fromkeys(heads, False)
1000 1002 # Start at the top and keep marking parents until we're done.
1001 1003 nodestotag = set(heads)
1002 1004 # Remember where the top was so we can use it as a limit later.
1003 1005 highestrev = max([self.rev(n) for n in nodestotag])
1004 1006 while nodestotag:
1005 1007 # grab a node to tag
1006 1008 n = nodestotag.pop()
1007 1009 # Never tag nullid
1008 1010 if n == nullid:
1009 1011 continue
1010 1012 # A node's revision number represents its place in a
1011 1013 # topologically sorted list of nodes.
1012 1014 r = self.rev(n)
1013 1015 if r >= lowestrev:
1014 1016 if n not in ancestors:
1015 1017 # If we are possibly a descendant of one of the roots
1016 1018 # and we haven't already been marked as an ancestor
1017 1019 ancestors.add(n) # Mark as ancestor
1018 1020 # Add non-nullid parents to list of nodes to tag.
1019 1021 nodestotag.update([p for p in self.parents(n) if
1020 1022 p != nullid])
1021 1023 elif n in heads: # We've seen it before, is it a fake head?
1022 1024 # So it is, real heads should not be the ancestors of
1023 1025 # any other heads.
1024 1026 heads.pop(n)
1025 1027 if not ancestors:
1026 1028 return nonodes
1027 1029 # Now that we have our set of ancestors, we want to remove any
1028 1030 # roots that are not ancestors.
1029 1031
1030 1032 # If one of the roots was nullid, everything is included anyway.
1031 1033 if lowestrev > nullrev:
1032 1034 # But, since we weren't, let's recompute the lowest rev to not
1033 1035 # include roots that aren't ancestors.
1034 1036
1035 1037 # Filter out roots that aren't ancestors of heads
1036 1038 roots = [root for root in roots if root in ancestors]
1037 1039 # Recompute the lowest revision
1038 1040 if roots:
1039 1041 lowestrev = min([self.rev(root) for root in roots])
1040 1042 else:
1041 1043 # No more roots? Return empty list
1042 1044 return nonodes
1043 1045 else:
1044 1046 # We are descending from nullid, and don't need to care about
1045 1047 # any other roots.
1046 1048 lowestrev = nullrev
1047 1049 roots = [nullid]
1048 1050 # Transform our roots list into a set.
1049 1051 descendants = set(roots)
1050 1052 # Also, keep the original roots so we can filter out roots that aren't
1051 1053 # 'real' roots (i.e. are descended from other roots).
1052 1054 roots = descendants.copy()
1053 1055 # Our topologically sorted list of output nodes.
1054 1056 orderedout = []
1055 1057 # Don't start at nullid since we don't want nullid in our output list,
1056 1058 # and if nullid shows up in descendants, empty parents will look like
1057 1059 # they're descendants.
1058 1060 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1059 1061 n = self.node(r)
1060 1062 isdescendant = False
1061 1063 if lowestrev == nullrev: # Everybody is a descendant of nullid
1062 1064 isdescendant = True
1063 1065 elif n in descendants:
1064 1066 # n is already a descendant
1065 1067 isdescendant = True
1066 1068 # This check only needs to be done here because all the roots
1067 1069 # will start being marked is descendants before the loop.
1068 1070 if n in roots:
1069 1071 # If n was a root, check if it's a 'real' root.
1070 1072 p = tuple(self.parents(n))
1071 1073 # If any of its parents are descendants, it's not a root.
1072 1074 if (p[0] in descendants) or (p[1] in descendants):
1073 1075 roots.remove(n)
1074 1076 else:
1075 1077 p = tuple(self.parents(n))
1076 1078 # A node is a descendant if either of its parents are
1077 1079 # descendants. (We seeded the dependents list with the roots
1078 1080 # up there, remember?)
1079 1081 if (p[0] in descendants) or (p[1] in descendants):
1080 1082 descendants.add(n)
1081 1083 isdescendant = True
1082 1084 if isdescendant and ((ancestors is None) or (n in ancestors)):
1083 1085 # Only include nodes that are both descendants and ancestors.
1084 1086 orderedout.append(n)
1085 1087 if (ancestors is not None) and (n in heads):
1086 1088 # We're trying to figure out which heads are reachable
1087 1089 # from roots.
1088 1090 # Mark this head as having been reached
1089 1091 heads[n] = True
1090 1092 elif ancestors is None:
1091 1093 # Otherwise, we're trying to discover the heads.
1092 1094 # Assume this is a head because if it isn't, the next step
1093 1095 # will eventually remove it.
1094 1096 heads[n] = True
1095 1097 # But, obviously its parents aren't.
1096 1098 for p in self.parents(n):
1097 1099 heads.pop(p, None)
1098 1100 heads = [head for head, flag in heads.iteritems() if flag]
1099 1101 roots = list(roots)
1100 1102 assert orderedout
1101 1103 assert roots
1102 1104 assert heads
1103 1105 return (orderedout, roots, heads)
1104 1106
1105 1107 def headrevs(self, revs=None):
1106 1108 if revs is None:
1107 1109 try:
1108 1110 return self.index.headrevs()
1109 1111 except AttributeError:
1110 1112 return self._headrevs()
1111 1113 return dagop.headrevs(revs, self.parentrevs)
1112 1114
1113 1115 def computephases(self, roots):
1114 1116 return self.index.computephasesmapsets(roots)
1115 1117
1116 1118 def _headrevs(self):
1117 1119 count = len(self)
1118 1120 if not count:
1119 1121 return [nullrev]
1120 1122 # we won't iter over filtered rev so nobody is a head at start
1121 1123 ishead = [0] * (count + 1)
1122 1124 index = self.index
1123 1125 for r in self:
1124 1126 ishead[r] = 1 # I may be an head
1125 1127 e = index[r]
1126 1128 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1127 1129 return [r for r, val in enumerate(ishead) if val]
1128 1130
1129 1131 def heads(self, start=None, stop=None):
1130 1132 """return the list of all nodes that have no children
1131 1133
1132 1134 if start is specified, only heads that are descendants of
1133 1135 start will be returned
1134 1136 if stop is specified, it will consider all the revs from stop
1135 1137 as if they had no children
1136 1138 """
1137 1139 if start is None and stop is None:
1138 1140 if not len(self):
1139 1141 return [nullid]
1140 1142 return [self.node(r) for r in self.headrevs()]
1141 1143
1142 1144 if start is None:
1143 1145 start = nullrev
1144 1146 else:
1145 1147 start = self.rev(start)
1146 1148
1147 1149 stoprevs = set(self.rev(n) for n in stop or [])
1148 1150
1149 1151 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1150 1152 stoprevs=stoprevs)
1151 1153
1152 1154 return [self.node(rev) for rev in revs]
1153 1155
1154 1156 def children(self, node):
1155 1157 """find the children of a given node"""
1156 1158 c = []
1157 1159 p = self.rev(node)
1158 1160 for r in self.revs(start=p + 1):
1159 1161 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1160 1162 if prevs:
1161 1163 for pr in prevs:
1162 1164 if pr == p:
1163 1165 c.append(self.node(r))
1164 1166 elif p == nullrev:
1165 1167 c.append(self.node(r))
1166 1168 return c
1167 1169
1168 1170 def commonancestorsheads(self, a, b):
1169 1171 """calculate all the heads of the common ancestors of nodes a and b"""
1170 1172 a, b = self.rev(a), self.rev(b)
1171 1173 ancs = self._commonancestorsheads(a, b)
1172 1174 return pycompat.maplist(self.node, ancs)
1173 1175
1174 1176 def _commonancestorsheads(self, *revs):
1175 1177 """calculate all the heads of the common ancestors of revs"""
1176 1178 try:
1177 1179 ancs = self.index.commonancestorsheads(*revs)
1178 1180 except (AttributeError, OverflowError): # C implementation failed
1179 1181 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1180 1182 return ancs
1181 1183
1182 1184 def isancestor(self, a, b):
1183 1185 """return True if node a is an ancestor of node b
1184 1186
1185 1187 A revision is considered an ancestor of itself."""
1186 1188 a, b = self.rev(a), self.rev(b)
1187 1189 return self.isancestorrev(a, b)
1188 1190
1189 1191 def isancestorrev(self, a, b):
1190 1192 """return True if revision a is an ancestor of revision b
1191 1193
1192 1194 A revision is considered an ancestor of itself.
1193 1195
1194 1196 The implementation of this is trivial but the use of
1195 1197 commonancestorsheads is not."""
1196 1198 if a == nullrev:
1197 1199 return True
1198 1200 elif a == b:
1199 1201 return True
1200 1202 elif a > b:
1201 1203 return False
1202 1204 return a in self._commonancestorsheads(a, b)
1203 1205
1204 1206 def ancestor(self, a, b):
1205 1207 """calculate the "best" common ancestor of nodes a and b"""
1206 1208
1207 1209 a, b = self.rev(a), self.rev(b)
1208 1210 try:
1209 1211 ancs = self.index.ancestors(a, b)
1210 1212 except (AttributeError, OverflowError):
1211 1213 ancs = ancestor.ancestors(self.parentrevs, a, b)
1212 1214 if ancs:
1213 1215 # choose a consistent winner when there's a tie
1214 1216 return min(map(self.node, ancs))
1215 1217 return nullid
1216 1218
1217 1219 def _match(self, id):
1218 1220 if isinstance(id, int):
1219 1221 # rev
1220 1222 return self.node(id)
1221 1223 if len(id) == 20:
1222 1224 # possibly a binary node
1223 1225 # odds of a binary node being all hex in ASCII are 1 in 10**25
1224 1226 try:
1225 1227 node = id
1226 1228 self.rev(node) # quick search the index
1227 1229 return node
1228 1230 except error.LookupError:
1229 1231 pass # may be partial hex id
1230 1232 try:
1231 1233 # str(rev)
1232 1234 rev = int(id)
1233 1235 if "%d" % rev != id:
1234 1236 raise ValueError
1235 1237 if rev < 0:
1236 1238 rev = len(self) + rev
1237 1239 if rev < 0 or rev >= len(self):
1238 1240 raise ValueError
1239 1241 return self.node(rev)
1240 1242 except (ValueError, OverflowError):
1241 1243 pass
1242 1244 if len(id) == 40:
1243 1245 try:
1244 1246 # a full hex nodeid?
1245 1247 node = bin(id)
1246 1248 self.rev(node)
1247 1249 return node
1248 1250 except (TypeError, error.LookupError):
1249 1251 pass
1250 1252
1251 1253 def _partialmatch(self, id):
1252 1254 # we don't care wdirfilenodeids as they should be always full hash
1253 1255 maybewdir = wdirhex.startswith(id)
1254 1256 try:
1255 1257 partial = self.index.partialmatch(id)
1256 1258 if partial and self.hasnode(partial):
1257 1259 if maybewdir:
1258 1260 # single 'ff...' match in radix tree, ambiguous with wdir
1259 1261 raise error.RevlogError
1260 1262 return partial
1261 1263 if maybewdir:
1262 1264 # no 'ff...' match in radix tree, wdir identified
1263 1265 raise error.WdirUnsupported
1264 1266 return None
1265 1267 except error.RevlogError:
1266 1268 # parsers.c radix tree lookup gave multiple matches
1267 1269 # fast path: for unfiltered changelog, radix tree is accurate
1268 1270 if not getattr(self, 'filteredrevs', None):
1269 1271 raise error.AmbiguousPrefixLookupError(
1270 1272 id, self.indexfile, _('ambiguous identifier'))
1271 1273 # fall through to slow path that filters hidden revisions
1272 1274 except (AttributeError, ValueError):
1273 1275 # we are pure python, or key was too short to search radix tree
1274 1276 pass
1275 1277
1276 1278 if id in self._pcache:
1277 1279 return self._pcache[id]
1278 1280
1279 1281 if len(id) <= 40:
1280 1282 try:
1281 1283 # hex(node)[:...]
1282 1284 l = len(id) // 2 # grab an even number of digits
1283 1285 prefix = bin(id[:l * 2])
1284 1286 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1285 1287 nl = [n for n in nl if hex(n).startswith(id) and
1286 1288 self.hasnode(n)]
1287 1289 if nullhex.startswith(id):
1288 1290 nl.append(nullid)
1289 1291 if len(nl) > 0:
1290 1292 if len(nl) == 1 and not maybewdir:
1291 1293 self._pcache[id] = nl[0]
1292 1294 return nl[0]
1293 1295 raise error.AmbiguousPrefixLookupError(
1294 1296 id, self.indexfile, _('ambiguous identifier'))
1295 1297 if maybewdir:
1296 1298 raise error.WdirUnsupported
1297 1299 return None
1298 1300 except TypeError:
1299 1301 pass
1300 1302
1301 1303 def lookup(self, id):
1302 1304 """locate a node based on:
1303 1305 - revision number or str(revision number)
1304 1306 - nodeid or subset of hex nodeid
1305 1307 """
1306 1308 n = self._match(id)
1307 1309 if n is not None:
1308 1310 return n
1309 1311 n = self._partialmatch(id)
1310 1312 if n:
1311 1313 return n
1312 1314
1313 1315 raise error.LookupError(id, self.indexfile, _('no match found'))
1314 1316
1315 1317 def shortest(self, node, minlength=1):
1316 1318 """Find the shortest unambiguous prefix that matches node."""
1317 1319 def isvalid(prefix):
1318 1320 try:
1319 1321 node = self._partialmatch(prefix)
1320 1322 except error.AmbiguousPrefixLookupError:
1321 1323 return False
1322 1324 except error.WdirUnsupported:
1323 1325 # single 'ff...' match
1324 1326 return True
1325 1327 if node is None:
1326 1328 raise error.LookupError(node, self.indexfile, _('no node'))
1327 1329 return True
1328 1330
1329 1331 def maybewdir(prefix):
1330 1332 return all(c == 'f' for c in prefix)
1331 1333
1332 1334 hexnode = hex(node)
1333 1335
1334 1336 def disambiguate(hexnode, minlength):
1335 1337 """Disambiguate against wdirid."""
1336 1338 for length in range(minlength, 41):
1337 1339 prefix = hexnode[:length]
1338 1340 if not maybewdir(prefix):
1339 1341 return prefix
1340 1342
1341 1343 if not getattr(self, 'filteredrevs', None):
1342 1344 try:
1343 1345 length = max(self.index.shortest(node), minlength)
1344 1346 return disambiguate(hexnode, length)
1345 1347 except error.RevlogError:
1346 1348 if node != wdirid:
1347 1349 raise error.LookupError(node, self.indexfile, _('no node'))
1348 1350 except AttributeError:
1349 1351 # Fall through to pure code
1350 1352 pass
1351 1353
1352 1354 if node == wdirid:
1353 1355 for length in range(minlength, 41):
1354 1356 prefix = hexnode[:length]
1355 1357 if isvalid(prefix):
1356 1358 return prefix
1357 1359
1358 1360 for length in range(minlength, 41):
1359 1361 prefix = hexnode[:length]
1360 1362 if isvalid(prefix):
1361 1363 return disambiguate(hexnode, length)
1362 1364
1363 1365 def cmp(self, node, text):
1364 1366 """compare text with a given file revision
1365 1367
1366 1368 returns True if text is different than what is stored.
1367 1369 """
1368 1370 p1, p2 = self.parents(node)
1369 1371 return storageutil.hashrevisionsha1(text, p1, p2) != node
1370 1372
1371 1373 def _cachesegment(self, offset, data):
1372 1374 """Add a segment to the revlog cache.
1373 1375
1374 1376 Accepts an absolute offset and the data that is at that location.
1375 1377 """
1376 1378 o, d = self._chunkcache
1377 1379 # try to add to existing cache
1378 1380 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1379 1381 self._chunkcache = o, d + data
1380 1382 else:
1381 1383 self._chunkcache = offset, data
1382 1384
1383 1385 def _readsegment(self, offset, length, df=None):
1384 1386 """Load a segment of raw data from the revlog.
1385 1387
1386 1388 Accepts an absolute offset, length to read, and an optional existing
1387 1389 file handle to read from.
1388 1390
1389 1391 If an existing file handle is passed, it will be seeked and the
1390 1392 original seek position will NOT be restored.
1391 1393
1392 1394 Returns a str or buffer of raw byte data.
1393 1395
1394 1396 Raises if the requested number of bytes could not be read.
1395 1397 """
1396 1398 # Cache data both forward and backward around the requested
1397 1399 # data, in a fixed size window. This helps speed up operations
1398 1400 # involving reading the revlog backwards.
1399 1401 cachesize = self._chunkcachesize
1400 1402 realoffset = offset & ~(cachesize - 1)
1401 1403 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1402 1404 - realoffset)
1403 1405 with self._datareadfp(df) as df:
1404 1406 df.seek(realoffset)
1405 1407 d = df.read(reallength)
1406 1408
1407 1409 self._cachesegment(realoffset, d)
1408 1410 if offset != realoffset or reallength != length:
1409 1411 startoffset = offset - realoffset
1410 1412 if len(d) - startoffset < length:
1411 1413 raise error.RevlogError(
1412 1414 _('partial read of revlog %s; expected %d bytes from '
1413 1415 'offset %d, got %d') %
1414 1416 (self.indexfile if self._inline else self.datafile,
1415 1417 length, realoffset, len(d) - startoffset))
1416 1418
1417 1419 return util.buffer(d, startoffset, length)
1418 1420
1419 1421 if len(d) < length:
1420 1422 raise error.RevlogError(
1421 1423 _('partial read of revlog %s; expected %d bytes from offset '
1422 1424 '%d, got %d') %
1423 1425 (self.indexfile if self._inline else self.datafile,
1424 1426 length, offset, len(d)))
1425 1427
1426 1428 return d
1427 1429
1428 1430 def _getsegment(self, offset, length, df=None):
1429 1431 """Obtain a segment of raw data from the revlog.
1430 1432
1431 1433 Accepts an absolute offset, length of bytes to obtain, and an
1432 1434 optional file handle to the already-opened revlog. If the file
1433 1435 handle is used, it's original seek position will not be preserved.
1434 1436
1435 1437 Requests for data may be returned from a cache.
1436 1438
1437 1439 Returns a str or a buffer instance of raw byte data.
1438 1440 """
1439 1441 o, d = self._chunkcache
1440 1442 l = len(d)
1441 1443
1442 1444 # is it in the cache?
1443 1445 cachestart = offset - o
1444 1446 cacheend = cachestart + length
1445 1447 if cachestart >= 0 and cacheend <= l:
1446 1448 if cachestart == 0 and cacheend == l:
1447 1449 return d # avoid a copy
1448 1450 return util.buffer(d, cachestart, cacheend - cachestart)
1449 1451
1450 1452 return self._readsegment(offset, length, df=df)
1451 1453
1452 1454 def _getsegmentforrevs(self, startrev, endrev, df=None):
1453 1455 """Obtain a segment of raw data corresponding to a range of revisions.
1454 1456
1455 1457 Accepts the start and end revisions and an optional already-open
1456 1458 file handle to be used for reading. If the file handle is read, its
1457 1459 seek position will not be preserved.
1458 1460
1459 1461 Requests for data may be satisfied by a cache.
1460 1462
1461 1463 Returns a 2-tuple of (offset, data) for the requested range of
1462 1464 revisions. Offset is the integer offset from the beginning of the
1463 1465 revlog and data is a str or buffer of the raw byte data.
1464 1466
1465 1467 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1466 1468 to determine where each revision's data begins and ends.
1467 1469 """
1468 1470 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1469 1471 # (functions are expensive).
1470 1472 index = self.index
1471 1473 istart = index[startrev]
1472 1474 start = int(istart[0] >> 16)
1473 1475 if startrev == endrev:
1474 1476 end = start + istart[1]
1475 1477 else:
1476 1478 iend = index[endrev]
1477 1479 end = int(iend[0] >> 16) + iend[1]
1478 1480
1479 1481 if self._inline:
1480 1482 start += (startrev + 1) * self._io.size
1481 1483 end += (endrev + 1) * self._io.size
1482 1484 length = end - start
1483 1485
1484 1486 return start, self._getsegment(start, length, df=df)
1485 1487
1486 1488 def _chunk(self, rev, df=None):
1487 1489 """Obtain a single decompressed chunk for a revision.
1488 1490
1489 1491 Accepts an integer revision and an optional already-open file handle
1490 1492 to be used for reading. If used, the seek position of the file will not
1491 1493 be preserved.
1492 1494
1493 1495 Returns a str holding uncompressed data for the requested revision.
1494 1496 """
1495 1497 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1496 1498
1497 1499 def _chunks(self, revs, df=None, targetsize=None):
1498 1500 """Obtain decompressed chunks for the specified revisions.
1499 1501
1500 1502 Accepts an iterable of numeric revisions that are assumed to be in
1501 1503 ascending order. Also accepts an optional already-open file handle
1502 1504 to be used for reading. If used, the seek position of the file will
1503 1505 not be preserved.
1504 1506
1505 1507 This function is similar to calling ``self._chunk()`` multiple times,
1506 1508 but is faster.
1507 1509
1508 1510 Returns a list with decompressed data for each requested revision.
1509 1511 """
1510 1512 if not revs:
1511 1513 return []
1512 1514 start = self.start
1513 1515 length = self.length
1514 1516 inline = self._inline
1515 1517 iosize = self._io.size
1516 1518 buffer = util.buffer
1517 1519
1518 1520 l = []
1519 1521 ladd = l.append
1520 1522
1521 1523 if not self._withsparseread:
1522 1524 slicedchunks = (revs,)
1523 1525 else:
1524 1526 slicedchunks = deltautil.slicechunk(self, revs,
1525 1527 targetsize=targetsize)
1526 1528
1527 1529 for revschunk in slicedchunks:
1528 1530 firstrev = revschunk[0]
1529 1531 # Skip trailing revisions with empty diff
1530 1532 for lastrev in revschunk[::-1]:
1531 1533 if length(lastrev) != 0:
1532 1534 break
1533 1535
1534 1536 try:
1535 1537 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1536 1538 except OverflowError:
1537 1539 # issue4215 - we can't cache a run of chunks greater than
1538 1540 # 2G on Windows
1539 1541 return [self._chunk(rev, df=df) for rev in revschunk]
1540 1542
1541 1543 decomp = self.decompress
1542 1544 for rev in revschunk:
1543 1545 chunkstart = start(rev)
1544 1546 if inline:
1545 1547 chunkstart += (rev + 1) * iosize
1546 1548 chunklength = length(rev)
1547 1549 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1548 1550
1549 1551 return l
1550 1552
1551 1553 def _chunkclear(self):
1552 1554 """Clear the raw chunk cache."""
1553 1555 self._chunkcache = (0, '')
1554 1556
1555 1557 def deltaparent(self, rev):
1556 1558 """return deltaparent of the given revision"""
1557 1559 base = self.index[rev][3]
1558 1560 if base == rev:
1559 1561 return nullrev
1560 1562 elif self._generaldelta:
1561 1563 return base
1562 1564 else:
1563 1565 return rev - 1
1564 1566
1565 1567 def issnapshot(self, rev):
1566 1568 """tells whether rev is a snapshot
1567 1569 """
1568 1570 if not self._sparserevlog:
1569 1571 return self.deltaparent(rev) == nullrev
1570 1572 elif util.safehasattr(self.index, 'issnapshot'):
1571 1573 # directly assign the method to cache the testing and access
1572 1574 self.issnapshot = self.index.issnapshot
1573 1575 return self.issnapshot(rev)
1574 1576 if rev == nullrev:
1575 1577 return True
1576 1578 entry = self.index[rev]
1577 1579 base = entry[3]
1578 1580 if base == rev:
1579 1581 return True
1580 1582 if base == nullrev:
1581 1583 return True
1582 1584 p1 = entry[5]
1583 1585 p2 = entry[6]
1584 1586 if base == p1 or base == p2:
1585 1587 return False
1586 1588 return self.issnapshot(base)
1587 1589
1588 1590 def snapshotdepth(self, rev):
1589 1591 """number of snapshot in the chain before this one"""
1590 1592 if not self.issnapshot(rev):
1591 1593 raise error.ProgrammingError('revision %d not a snapshot')
1592 1594 return len(self._deltachain(rev)[0]) - 1
1593 1595
1594 1596 def revdiff(self, rev1, rev2):
1595 1597 """return or calculate a delta between two revisions
1596 1598
1597 1599 The delta calculated is in binary form and is intended to be written to
1598 1600 revlog data directly. So this function needs raw revision data.
1599 1601 """
1600 1602 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1601 1603 return bytes(self._chunk(rev2))
1602 1604
1603 1605 return mdiff.textdiff(self.revision(rev1, raw=True),
1604 1606 self.revision(rev2, raw=True))
1605 1607
1606 1608 def revision(self, nodeorrev, _df=None, raw=False):
1607 1609 """return an uncompressed revision of a given node or revision
1608 1610 number.
1609 1611
1610 1612 _df - an existing file handle to read from. (internal-only)
1611 1613 raw - an optional argument specifying if the revision data is to be
1612 1614 treated as raw data when applying flag transforms. 'raw' should be set
1613 1615 to True when generating changegroups or in debug commands.
1614 1616 """
1615 1617 if isinstance(nodeorrev, int):
1616 1618 rev = nodeorrev
1617 1619 node = self.node(rev)
1618 1620 else:
1619 1621 node = nodeorrev
1620 1622 rev = None
1621 1623
1622 1624 cachedrev = None
1623 1625 flags = None
1624 1626 rawtext = None
1625 1627 if node == nullid:
1626 1628 return ""
1627 1629 if self._revisioncache:
1628 1630 if self._revisioncache[0] == node:
1629 1631 # _cache only stores rawtext
1630 1632 if raw:
1631 1633 return self._revisioncache[2]
1632 1634 # duplicated, but good for perf
1633 1635 if rev is None:
1634 1636 rev = self.rev(node)
1635 1637 if flags is None:
1636 1638 flags = self.flags(rev)
1637 1639 # no extra flags set, no flag processor runs, text = rawtext
1638 1640 if flags == REVIDX_DEFAULT_FLAGS:
1639 1641 return self._revisioncache[2]
1640 1642 # rawtext is reusable. need to run flag processor
1641 1643 rawtext = self._revisioncache[2]
1642 1644
1643 1645 cachedrev = self._revisioncache[1]
1644 1646
1645 1647 # look up what we need to read
1646 1648 if rawtext is None:
1647 1649 if rev is None:
1648 1650 rev = self.rev(node)
1649 1651
1650 1652 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1651 1653 if stopped:
1652 1654 rawtext = self._revisioncache[2]
1653 1655
1654 1656 # drop cache to save memory
1655 1657 self._revisioncache = None
1656 1658
1657 1659 targetsize = None
1658 1660 rawsize = self.index[rev][2]
1659 1661 if 0 <= rawsize:
1660 1662 targetsize = 4 * rawsize
1661 1663
1662 1664 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1663 1665 if rawtext is None:
1664 1666 rawtext = bytes(bins[0])
1665 1667 bins = bins[1:]
1666 1668
1667 1669 rawtext = mdiff.patches(rawtext, bins)
1668 1670 self._revisioncache = (node, rev, rawtext)
1669 1671
1670 1672 if flags is None:
1671 1673 if rev is None:
1672 1674 rev = self.rev(node)
1673 1675 flags = self.flags(rev)
1674 1676
1675 1677 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1676 1678 if validatehash:
1677 1679 self.checkhash(text, node, rev=rev)
1678 1680
1679 1681 return text
1680 1682
1681 1683 def hash(self, text, p1, p2):
1682 1684 """Compute a node hash.
1683 1685
1684 1686 Available as a function so that subclasses can replace the hash
1685 1687 as needed.
1686 1688 """
1687 1689 return storageutil.hashrevisionsha1(text, p1, p2)
1688 1690
1689 1691 def _processflags(self, text, flags, operation, raw=False):
1690 1692 """Inspect revision data flags and applies transforms defined by
1691 1693 registered flag processors.
1692 1694
1693 1695 ``text`` - the revision data to process
1694 1696 ``flags`` - the revision flags
1695 1697 ``operation`` - the operation being performed (read or write)
1696 1698 ``raw`` - an optional argument describing if the raw transform should be
1697 1699 applied.
1698 1700
1699 1701 This method processes the flags in the order (or reverse order if
1700 1702 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1701 1703 flag processors registered for present flags. The order of flags defined
1702 1704 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1703 1705
1704 1706 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1705 1707 processed text and ``validatehash`` is a bool indicating whether the
1706 1708 returned text should be checked for hash integrity.
1707 1709
1708 1710 Note: If the ``raw`` argument is set, it has precedence over the
1709 1711 operation and will only update the value of ``validatehash``.
1710 1712 """
1711 1713 # fast path: no flag processors will run
1712 1714 if flags == 0:
1713 1715 return text, True
1714 1716 if not operation in ('read', 'write'):
1715 1717 raise error.ProgrammingError(_("invalid '%s' operation") %
1716 1718 operation)
1717 1719 # Check all flags are known.
1718 1720 if flags & ~REVIDX_KNOWN_FLAGS:
1719 1721 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1720 1722 (flags & ~REVIDX_KNOWN_FLAGS))
1721 1723 validatehash = True
1722 1724 # Depending on the operation (read or write), the order might be
1723 1725 # reversed due to non-commutative transforms.
1724 1726 orderedflags = REVIDX_FLAGS_ORDER
1725 1727 if operation == 'write':
1726 1728 orderedflags = reversed(orderedflags)
1727 1729
1728 1730 for flag in orderedflags:
1729 1731 # If a flagprocessor has been registered for a known flag, apply the
1730 1732 # related operation transform and update result tuple.
1731 1733 if flag & flags:
1732 1734 vhash = True
1733 1735
1734 1736 if flag not in self._flagprocessors:
1735 1737 message = _("missing processor for flag '%#x'") % (flag)
1736 1738 raise error.RevlogError(message)
1737 1739
1738 1740 processor = self._flagprocessors[flag]
1739 1741 if processor is not None:
1740 1742 readtransform, writetransform, rawtransform = processor
1741 1743
1742 1744 if raw:
1743 1745 vhash = rawtransform(self, text)
1744 1746 elif operation == 'read':
1745 1747 text, vhash = readtransform(self, text)
1746 1748 else: # write operation
1747 1749 text, vhash = writetransform(self, text)
1748 1750 validatehash = validatehash and vhash
1749 1751
1750 1752 return text, validatehash
1751 1753
1752 1754 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1753 1755 """Check node hash integrity.
1754 1756
1755 1757 Available as a function so that subclasses can extend hash mismatch
1756 1758 behaviors as needed.
1757 1759 """
1758 1760 try:
1759 1761 if p1 is None and p2 is None:
1760 1762 p1, p2 = self.parents(node)
1761 1763 if node != self.hash(text, p1, p2):
1762 1764 # Clear the revision cache on hash failure. The revision cache
1763 1765 # only stores the raw revision and clearing the cache does have
1764 1766 # the side-effect that we won't have a cache hit when the raw
1765 1767 # revision data is accessed. But this case should be rare and
1766 1768 # it is extra work to teach the cache about the hash
1767 1769 # verification state.
1768 1770 if self._revisioncache and self._revisioncache[0] == node:
1769 1771 self._revisioncache = None
1770 1772
1771 1773 revornode = rev
1772 1774 if revornode is None:
1773 1775 revornode = templatefilters.short(hex(node))
1774 1776 raise error.RevlogError(_("integrity check failed on %s:%s")
1775 1777 % (self.indexfile, pycompat.bytestr(revornode)))
1776 1778 except error.RevlogError:
1777 1779 if self._censorable and storageutil.iscensoredtext(text):
1778 1780 raise error.CensoredNodeError(self.indexfile, node, text)
1779 1781 raise
1780 1782
1781 1783 def _enforceinlinesize(self, tr, fp=None):
1782 1784 """Check if the revlog is too big for inline and convert if so.
1783 1785
1784 1786 This should be called after revisions are added to the revlog. If the
1785 1787 revlog has grown too large to be an inline revlog, it will convert it
1786 1788 to use multiple index and data files.
1787 1789 """
1788 1790 tiprev = len(self) - 1
1789 1791 if (not self._inline or
1790 1792 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1791 1793 return
1792 1794
1793 1795 trinfo = tr.find(self.indexfile)
1794 1796 if trinfo is None:
1795 1797 raise error.RevlogError(_("%s not found in the transaction")
1796 1798 % self.indexfile)
1797 1799
1798 1800 trindex = trinfo[2]
1799 1801 if trindex is not None:
1800 1802 dataoff = self.start(trindex)
1801 1803 else:
1802 1804 # revlog was stripped at start of transaction, use all leftover data
1803 1805 trindex = len(self) - 1
1804 1806 dataoff = self.end(tiprev)
1805 1807
1806 1808 tr.add(self.datafile, dataoff)
1807 1809
1808 1810 if fp:
1809 1811 fp.flush()
1810 1812 fp.close()
1811 1813 # We can't use the cached file handle after close(). So prevent
1812 1814 # its usage.
1813 1815 self._writinghandles = None
1814 1816
1815 1817 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1816 1818 for r in self:
1817 1819 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1818 1820
1819 1821 with self._indexfp('w') as fp:
1820 1822 self.version &= ~FLAG_INLINE_DATA
1821 1823 self._inline = False
1822 1824 io = self._io
1823 1825 for i in self:
1824 1826 e = io.packentry(self.index[i], self.node, self.version, i)
1825 1827 fp.write(e)
1826 1828
1827 1829 # the temp file replace the real index when we exit the context
1828 1830 # manager
1829 1831
1830 1832 tr.replace(self.indexfile, trindex * self._io.size)
1831 1833 self._chunkclear()
1832 1834
1833 1835 def _nodeduplicatecallback(self, transaction, node):
1834 1836 """called when trying to add a node already stored.
1835 1837 """
1836 1838
1837 1839 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1838 1840 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1839 1841 """add a revision to the log
1840 1842
1841 1843 text - the revision data to add
1842 1844 transaction - the transaction object used for rollback
1843 1845 link - the linkrev data to add
1844 1846 p1, p2 - the parent nodeids of the revision
1845 1847 cachedelta - an optional precomputed delta
1846 1848 node - nodeid of revision; typically node is not specified, and it is
1847 1849 computed by default as hash(text, p1, p2), however subclasses might
1848 1850 use different hashing method (and override checkhash() in such case)
1849 1851 flags - the known flags to set on the revision
1850 1852 deltacomputer - an optional deltacomputer instance shared between
1851 1853 multiple calls
1852 1854 """
1853 1855 if link == nullrev:
1854 1856 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1855 1857 % self.indexfile)
1856 1858
1857 1859 if flags:
1858 1860 node = node or self.hash(text, p1, p2)
1859 1861
1860 1862 rawtext, validatehash = self._processflags(text, flags, 'write')
1861 1863
1862 1864 # If the flag processor modifies the revision data, ignore any provided
1863 1865 # cachedelta.
1864 1866 if rawtext != text:
1865 1867 cachedelta = None
1866 1868
1867 1869 if len(rawtext) > _maxentrysize:
1868 1870 raise error.RevlogError(
1869 1871 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1870 1872 % (self.indexfile, len(rawtext)))
1871 1873
1872 1874 node = node or self.hash(rawtext, p1, p2)
1873 1875 if node in self.nodemap:
1874 1876 return node
1875 1877
1876 1878 if validatehash:
1877 1879 self.checkhash(rawtext, node, p1=p1, p2=p2)
1878 1880
1879 1881 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1880 1882 flags, cachedelta=cachedelta,
1881 1883 deltacomputer=deltacomputer)
1882 1884
1883 1885 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1884 1886 cachedelta=None, deltacomputer=None):
1885 1887 """add a raw revision with known flags, node and parents
1886 1888 useful when reusing a revision not stored in this revlog (ex: received
1887 1889 over wire, or read from an external bundle).
1888 1890 """
1889 1891 dfh = None
1890 1892 if not self._inline:
1891 1893 dfh = self._datafp("a+")
1892 1894 ifh = self._indexfp("a+")
1893 1895 try:
1894 1896 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1895 1897 flags, cachedelta, ifh, dfh,
1896 1898 deltacomputer=deltacomputer)
1897 1899 finally:
1898 1900 if dfh:
1899 1901 dfh.close()
1900 1902 ifh.close()
1901 1903
1902 1904 def compress(self, data):
1903 1905 """Generate a possibly-compressed representation of data."""
1904 1906 if not data:
1905 1907 return '', data
1906 1908
1907 1909 compressed = self._compressor.compress(data)
1908 1910
1909 1911 if compressed:
1910 1912 # The revlog compressor added the header in the returned data.
1911 1913 return '', compressed
1912 1914
1913 1915 if data[0:1] == '\0':
1914 1916 return '', data
1915 1917 return 'u', data
1916 1918
1917 1919 def decompress(self, data):
1918 1920 """Decompress a revlog chunk.
1919 1921
1920 1922 The chunk is expected to begin with a header identifying the
1921 1923 format type so it can be routed to an appropriate decompressor.
1922 1924 """
1923 1925 if not data:
1924 1926 return data
1925 1927
1926 1928 # Revlogs are read much more frequently than they are written and many
1927 1929 # chunks only take microseconds to decompress, so performance is
1928 1930 # important here.
1929 1931 #
1930 1932 # We can make a few assumptions about revlogs:
1931 1933 #
1932 1934 # 1) the majority of chunks will be compressed (as opposed to inline
1933 1935 # raw data).
1934 1936 # 2) decompressing *any* data will likely by at least 10x slower than
1935 1937 # returning raw inline data.
1936 1938 # 3) we want to prioritize common and officially supported compression
1937 1939 # engines
1938 1940 #
1939 1941 # It follows that we want to optimize for "decompress compressed data
1940 1942 # when encoded with common and officially supported compression engines"
1941 1943 # case over "raw data" and "data encoded by less common or non-official
1942 1944 # compression engines." That is why we have the inline lookup first
1943 1945 # followed by the compengines lookup.
1944 1946 #
1945 1947 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1946 1948 # compressed chunks. And this matters for changelog and manifest reads.
1947 1949 t = data[0:1]
1948 1950
1949 1951 if t == 'x':
1950 1952 try:
1951 1953 return _zlibdecompress(data)
1952 1954 except zlib.error as e:
1953 1955 raise error.RevlogError(_('revlog decompress error: %s') %
1954 1956 stringutil.forcebytestr(e))
1955 1957 # '\0' is more common than 'u' so it goes first.
1956 1958 elif t == '\0':
1957 1959 return data
1958 1960 elif t == 'u':
1959 1961 return util.buffer(data, 1)
1960 1962
1961 1963 try:
1962 1964 compressor = self._decompressors[t]
1963 1965 except KeyError:
1964 1966 try:
1965 1967 engine = util.compengines.forrevlogheader(t)
1966 1968 compressor = engine.revlogcompressor()
1967 1969 self._decompressors[t] = compressor
1968 1970 except KeyError:
1969 1971 raise error.RevlogError(_('unknown compression type %r') % t)
1970 1972
1971 1973 return compressor.decompress(data)
1972 1974
1973 1975 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1974 1976 cachedelta, ifh, dfh, alwayscache=False,
1975 1977 deltacomputer=None):
1976 1978 """internal function to add revisions to the log
1977 1979
1978 1980 see addrevision for argument descriptions.
1979 1981
1980 1982 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1981 1983
1982 1984 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1983 1985 be used.
1984 1986
1985 1987 invariants:
1986 1988 - rawtext is optional (can be None); if not set, cachedelta must be set.
1987 1989 if both are set, they must correspond to each other.
1988 1990 """
1989 1991 if node == nullid:
1990 1992 raise error.RevlogError(_("%s: attempt to add null revision") %
1991 1993 self.indexfile)
1992 1994 if node == wdirid or node in wdirfilenodeids:
1993 1995 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1994 1996 self.indexfile)
1995 1997
1996 1998 if self._inline:
1997 1999 fh = ifh
1998 2000 else:
1999 2001 fh = dfh
2000 2002
2001 2003 btext = [rawtext]
2002 2004
2003 2005 curr = len(self)
2004 2006 prev = curr - 1
2005 2007 offset = self.end(prev)
2006 2008 p1r, p2r = self.rev(p1), self.rev(p2)
2007 2009
2008 2010 # full versions are inserted when the needed deltas
2009 2011 # become comparable to the uncompressed text
2010 2012 if rawtext is None:
2011 2013 # need rawtext size, before changed by flag processors, which is
2012 2014 # the non-raw size. use revlog explicitly to avoid filelog's extra
2013 2015 # logic that might remove metadata size.
2014 2016 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2015 2017 cachedelta[1])
2016 2018 else:
2017 2019 textlen = len(rawtext)
2018 2020
2019 2021 if deltacomputer is None:
2020 2022 deltacomputer = deltautil.deltacomputer(self)
2021 2023
2022 2024 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2023 2025
2024 2026 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2025 2027
2026 2028 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2027 2029 deltainfo.base, link, p1r, p2r, node)
2028 2030 self.index.append(e)
2029 2031 self.nodemap[node] = curr
2030 2032
2031 2033 # Reset the pure node cache start lookup offset to account for new
2032 2034 # revision.
2033 2035 if self._nodepos is not None:
2034 2036 self._nodepos = curr
2035 2037
2036 2038 entry = self._io.packentry(e, self.node, self.version, curr)
2037 2039 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2038 2040 link, offset)
2039 2041
2040 2042 rawtext = btext[0]
2041 2043
2042 2044 if alwayscache and rawtext is None:
2043 2045 rawtext = deltacomputer.buildtext(revinfo, fh)
2044 2046
2045 2047 if type(rawtext) == bytes: # only accept immutable objects
2046 2048 self._revisioncache = (node, curr, rawtext)
2047 2049 self._chainbasecache[curr] = deltainfo.chainbase
2048 2050 return node
2049 2051
2050 2052 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2051 2053 # Files opened in a+ mode have inconsistent behavior on various
2052 2054 # platforms. Windows requires that a file positioning call be made
2053 2055 # when the file handle transitions between reads and writes. See
2054 2056 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2055 2057 # platforms, Python or the platform itself can be buggy. Some versions
2056 2058 # of Solaris have been observed to not append at the end of the file
2057 2059 # if the file was seeked to before the end. See issue4943 for more.
2058 2060 #
2059 2061 # We work around this issue by inserting a seek() before writing.
2060 2062 # Note: This is likely not necessary on Python 3. However, because
2061 2063 # the file handle is reused for reads and may be seeked there, we need
2062 2064 # to be careful before changing this.
2063 2065 ifh.seek(0, os.SEEK_END)
2064 2066 if dfh:
2065 2067 dfh.seek(0, os.SEEK_END)
2066 2068
2067 2069 curr = len(self) - 1
2068 2070 if not self._inline:
2069 2071 transaction.add(self.datafile, offset)
2070 2072 transaction.add(self.indexfile, curr * len(entry))
2071 2073 if data[0]:
2072 2074 dfh.write(data[0])
2073 2075 dfh.write(data[1])
2074 2076 ifh.write(entry)
2075 2077 else:
2076 2078 offset += curr * self._io.size
2077 2079 transaction.add(self.indexfile, offset, curr)
2078 2080 ifh.write(entry)
2079 2081 ifh.write(data[0])
2080 2082 ifh.write(data[1])
2081 2083 self._enforceinlinesize(transaction, ifh)
2082 2084
2083 2085 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2084 2086 """
2085 2087 add a delta group
2086 2088
2087 2089 given a set of deltas, add them to the revision log. the
2088 2090 first delta is against its parent, which should be in our
2089 2091 log, the rest are against the previous delta.
2090 2092
2091 2093 If ``addrevisioncb`` is defined, it will be called with arguments of
2092 2094 this revlog and the node that was added.
2093 2095 """
2094 2096
2095 2097 if self._writinghandles:
2096 2098 raise error.ProgrammingError('cannot nest addgroup() calls')
2097 2099
2098 2100 nodes = []
2099 2101
2100 2102 r = len(self)
2101 2103 end = 0
2102 2104 if r:
2103 2105 end = self.end(r - 1)
2104 2106 ifh = self._indexfp("a+")
2105 2107 isize = r * self._io.size
2106 2108 if self._inline:
2107 2109 transaction.add(self.indexfile, end + isize, r)
2108 2110 dfh = None
2109 2111 else:
2110 2112 transaction.add(self.indexfile, isize, r)
2111 2113 transaction.add(self.datafile, end)
2112 2114 dfh = self._datafp("a+")
2113 2115 def flush():
2114 2116 if dfh:
2115 2117 dfh.flush()
2116 2118 ifh.flush()
2117 2119
2118 2120 self._writinghandles = (ifh, dfh)
2119 2121
2120 2122 try:
2121 2123 deltacomputer = deltautil.deltacomputer(self)
2122 2124 # loop through our set of deltas
2123 2125 for data in deltas:
2124 2126 node, p1, p2, linknode, deltabase, delta, flags = data
2125 2127 link = linkmapper(linknode)
2126 2128 flags = flags or REVIDX_DEFAULT_FLAGS
2127 2129
2128 2130 nodes.append(node)
2129 2131
2130 2132 if node in self.nodemap:
2131 2133 self._nodeduplicatecallback(transaction, node)
2132 2134 # this can happen if two branches make the same change
2133 2135 continue
2134 2136
2135 2137 for p in (p1, p2):
2136 2138 if p not in self.nodemap:
2137 2139 raise error.LookupError(p, self.indexfile,
2138 2140 _('unknown parent'))
2139 2141
2140 2142 if deltabase not in self.nodemap:
2141 2143 raise error.LookupError(deltabase, self.indexfile,
2142 2144 _('unknown delta base'))
2143 2145
2144 2146 baserev = self.rev(deltabase)
2145 2147
2146 2148 if baserev != nullrev and self.iscensored(baserev):
2147 2149 # if base is censored, delta must be full replacement in a
2148 2150 # single patch operation
2149 2151 hlen = struct.calcsize(">lll")
2150 2152 oldlen = self.rawsize(baserev)
2151 2153 newlen = len(delta) - hlen
2152 2154 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2153 2155 raise error.CensoredBaseError(self.indexfile,
2154 2156 self.node(baserev))
2155 2157
2156 2158 if not flags and self._peek_iscensored(baserev, delta, flush):
2157 2159 flags |= REVIDX_ISCENSORED
2158 2160
2159 2161 # We assume consumers of addrevisioncb will want to retrieve
2160 2162 # the added revision, which will require a call to
2161 2163 # revision(). revision() will fast path if there is a cache
2162 2164 # hit. So, we tell _addrevision() to always cache in this case.
2163 2165 # We're only using addgroup() in the context of changegroup
2164 2166 # generation so the revision data can always be handled as raw
2165 2167 # by the flagprocessor.
2166 2168 self._addrevision(node, None, transaction, link,
2167 2169 p1, p2, flags, (baserev, delta),
2168 2170 ifh, dfh,
2169 2171 alwayscache=bool(addrevisioncb),
2170 2172 deltacomputer=deltacomputer)
2171 2173
2172 2174 if addrevisioncb:
2173 2175 addrevisioncb(self, node)
2174 2176
2175 2177 if not dfh and not self._inline:
2176 2178 # addrevision switched from inline to conventional
2177 2179 # reopen the index
2178 2180 ifh.close()
2179 2181 dfh = self._datafp("a+")
2180 2182 ifh = self._indexfp("a+")
2181 2183 self._writinghandles = (ifh, dfh)
2182 2184 finally:
2183 2185 self._writinghandles = None
2184 2186
2185 2187 if dfh:
2186 2188 dfh.close()
2187 2189 ifh.close()
2188 2190
2189 2191 return nodes
2190 2192
2191 2193 def iscensored(self, rev):
2192 2194 """Check if a file revision is censored."""
2193 2195 if not self._censorable:
2194 2196 return False
2195 2197
2196 2198 return self.flags(rev) & REVIDX_ISCENSORED
2197 2199
2198 2200 def _peek_iscensored(self, baserev, delta, flush):
2199 2201 """Quickly check if a delta produces a censored revision."""
2200 2202 if not self._censorable:
2201 2203 return False
2202 2204
2203 2205 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2204 2206
2205 2207 def getstrippoint(self, minlink):
2206 2208 """find the minimum rev that must be stripped to strip the linkrev
2207 2209
2208 2210 Returns a tuple containing the minimum rev and a set of all revs that
2209 2211 have linkrevs that will be broken by this strip.
2210 2212 """
2211 2213 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2212 2214 self.headrevs(),
2213 2215 self.linkrev, self.parentrevs)
2214 2216
2215 2217 def strip(self, minlink, transaction):
2216 2218 """truncate the revlog on the first revision with a linkrev >= minlink
2217 2219
2218 2220 This function is called when we're stripping revision minlink and
2219 2221 its descendants from the repository.
2220 2222
2221 2223 We have to remove all revisions with linkrev >= minlink, because
2222 2224 the equivalent changelog revisions will be renumbered after the
2223 2225 strip.
2224 2226
2225 2227 So we truncate the revlog on the first of these revisions, and
2226 2228 trust that the caller has saved the revisions that shouldn't be
2227 2229 removed and that it'll re-add them after this truncation.
2228 2230 """
2229 2231 if len(self) == 0:
2230 2232 return
2231 2233
2232 2234 rev, _ = self.getstrippoint(minlink)
2233 2235 if rev == len(self):
2234 2236 return
2235 2237
2236 2238 # first truncate the files on disk
2237 2239 end = self.start(rev)
2238 2240 if not self._inline:
2239 2241 transaction.add(self.datafile, end)
2240 2242 end = rev * self._io.size
2241 2243 else:
2242 2244 end += rev * self._io.size
2243 2245
2244 2246 transaction.add(self.indexfile, end)
2245 2247
2246 2248 # then reset internal state in memory to forget those revisions
2247 2249 self._revisioncache = None
2248 2250 self._chaininfocache = {}
2249 2251 self._chunkclear()
2250 2252 for x in pycompat.xrange(rev, len(self)):
2251 2253 del self.nodemap[self.node(x)]
2252 2254
2253 2255 del self.index[rev:-1]
2254 2256 self._nodepos = None
2255 2257
2256 2258 def checksize(self):
2257 2259 expected = 0
2258 2260 if len(self):
2259 2261 expected = max(0, self.end(len(self) - 1))
2260 2262
2261 2263 try:
2262 2264 with self._datafp() as f:
2263 2265 f.seek(0, 2)
2264 2266 actual = f.tell()
2265 2267 dd = actual - expected
2266 2268 except IOError as inst:
2267 2269 if inst.errno != errno.ENOENT:
2268 2270 raise
2269 2271 dd = 0
2270 2272
2271 2273 try:
2272 2274 f = self.opener(self.indexfile)
2273 2275 f.seek(0, 2)
2274 2276 actual = f.tell()
2275 2277 f.close()
2276 2278 s = self._io.size
2277 2279 i = max(0, actual // s)
2278 2280 di = actual - (i * s)
2279 2281 if self._inline:
2280 2282 databytes = 0
2281 2283 for r in self:
2282 2284 databytes += max(0, self.length(r))
2283 2285 dd = 0
2284 2286 di = actual - len(self) * s - databytes
2285 2287 except IOError as inst:
2286 2288 if inst.errno != errno.ENOENT:
2287 2289 raise
2288 2290 di = 0
2289 2291
2290 2292 return (dd, di)
2291 2293
2292 2294 def files(self):
2293 2295 res = [self.indexfile]
2294 2296 if not self._inline:
2295 2297 res.append(self.datafile)
2296 2298 return res
2297 2299
2298 2300 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2299 2301 assumehaveparentrevisions=False,
2300 2302 deltamode=repository.CG_DELTAMODE_STD):
2301 2303 if nodesorder not in ('nodes', 'storage', 'linear', None):
2302 2304 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2303 2305 nodesorder)
2304 2306
2305 2307 if nodesorder is None and not self._generaldelta:
2306 2308 nodesorder = 'storage'
2307 2309
2308 2310 if (not self._storedeltachains and
2309 2311 deltamode != repository.CG_DELTAMODE_PREV):
2310 2312 deltamode = repository.CG_DELTAMODE_FULL
2311 2313
2312 2314 return storageutil.emitrevisions(
2313 2315 self, nodes, nodesorder, revlogrevisiondelta,
2314 2316 deltaparentfn=self.deltaparent,
2315 2317 candeltafn=self.candelta,
2316 2318 rawsizefn=self.rawsize,
2317 2319 revdifffn=self.revdiff,
2318 2320 flagsfn=self.flags,
2319 2321 deltamode=deltamode,
2320 2322 revisiondata=revisiondata,
2321 2323 assumehaveparentrevisions=assumehaveparentrevisions)
2322 2324
2323 2325 DELTAREUSEALWAYS = 'always'
2324 2326 DELTAREUSESAMEREVS = 'samerevs'
2325 2327 DELTAREUSENEVER = 'never'
2326 2328
2327 2329 DELTAREUSEFULLADD = 'fulladd'
2328 2330
2329 2331 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2330 2332
2331 2333 def clone(self, tr, destrevlog, addrevisioncb=None,
2332 2334 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2333 2335 """Copy this revlog to another, possibly with format changes.
2334 2336
2335 2337 The destination revlog will contain the same revisions and nodes.
2336 2338 However, it may not be bit-for-bit identical due to e.g. delta encoding
2337 2339 differences.
2338 2340
2339 2341 The ``deltareuse`` argument control how deltas from the existing revlog
2340 2342 are preserved in the destination revlog. The argument can have the
2341 2343 following values:
2342 2344
2343 2345 DELTAREUSEALWAYS
2344 2346 Deltas will always be reused (if possible), even if the destination
2345 2347 revlog would not select the same revisions for the delta. This is the
2346 2348 fastest mode of operation.
2347 2349 DELTAREUSESAMEREVS
2348 2350 Deltas will be reused if the destination revlog would pick the same
2349 2351 revisions for the delta. This mode strikes a balance between speed
2350 2352 and optimization.
2351 2353 DELTAREUSENEVER
2352 2354 Deltas will never be reused. This is the slowest mode of execution.
2353 2355 This mode can be used to recompute deltas (e.g. if the diff/delta
2354 2356 algorithm changes).
2355 2357
2356 2358 Delta computation can be slow, so the choice of delta reuse policy can
2357 2359 significantly affect run time.
2358 2360
2359 2361 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2360 2362 two extremes. Deltas will be reused if they are appropriate. But if the
2361 2363 delta could choose a better revision, it will do so. This means if you
2362 2364 are converting a non-generaldelta revlog to a generaldelta revlog,
2363 2365 deltas will be recomputed if the delta's parent isn't a parent of the
2364 2366 revision.
2365 2367
2366 2368 In addition to the delta policy, the ``forcedeltabothparents``
2367 2369 argument controls whether to force compute deltas against both parents
2368 2370 for merges. By default, the current default is used.
2369 2371 """
2370 2372 if deltareuse not in self.DELTAREUSEALL:
2371 2373 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2372 2374
2373 2375 if len(destrevlog):
2374 2376 raise ValueError(_('destination revlog is not empty'))
2375 2377
2376 2378 if getattr(self, 'filteredrevs', None):
2377 2379 raise ValueError(_('source revlog has filtered revisions'))
2378 2380 if getattr(destrevlog, 'filteredrevs', None):
2379 2381 raise ValueError(_('destination revlog has filtered revisions'))
2380 2382
2381 2383 # lazydeltabase controls whether to reuse a cached delta, if possible.
2382 2384 oldlazydeltabase = destrevlog._lazydeltabase
2383 2385 oldamd = destrevlog._deltabothparents
2384 2386
2385 2387 try:
2386 2388 if deltareuse == self.DELTAREUSEALWAYS:
2387 2389 destrevlog._lazydeltabase = True
2388 2390 elif deltareuse == self.DELTAREUSESAMEREVS:
2389 2391 destrevlog._lazydeltabase = False
2390 2392
2391 2393 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2392 2394
2393 2395 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2394 2396 self.DELTAREUSESAMEREVS)
2395 2397
2396 2398 deltacomputer = deltautil.deltacomputer(destrevlog)
2397 2399 index = self.index
2398 2400 for rev in self:
2399 2401 entry = index[rev]
2400 2402
2401 2403 # Some classes override linkrev to take filtered revs into
2402 2404 # account. Use raw entry from index.
2403 2405 flags = entry[0] & 0xffff
2404 2406 linkrev = entry[4]
2405 2407 p1 = index[entry[5]][7]
2406 2408 p2 = index[entry[6]][7]
2407 2409 node = entry[7]
2408 2410
2409 2411 # (Possibly) reuse the delta from the revlog if allowed and
2410 2412 # the revlog chunk is a delta.
2411 2413 cachedelta = None
2412 2414 rawtext = None
2413 2415 if populatecachedelta:
2414 2416 dp = self.deltaparent(rev)
2415 2417 if dp != nullrev:
2416 2418 cachedelta = (dp, bytes(self._chunk(rev)))
2417 2419
2418 2420 if not cachedelta:
2419 2421 rawtext = self.revision(rev, raw=True)
2420 2422
2421 2423
2422 2424 if deltareuse == self.DELTAREUSEFULLADD:
2423 2425 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2424 2426 cachedelta=cachedelta,
2425 2427 node=node, flags=flags,
2426 2428 deltacomputer=deltacomputer)
2427 2429 else:
2428 2430 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2429 2431 checkambig=False)
2430 2432 dfh = None
2431 2433 if not destrevlog._inline:
2432 2434 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2433 2435 try:
2434 2436 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2435 2437 p2, flags, cachedelta, ifh, dfh,
2436 2438 deltacomputer=deltacomputer)
2437 2439 finally:
2438 2440 if dfh:
2439 2441 dfh.close()
2440 2442 ifh.close()
2441 2443
2442 2444 if addrevisioncb:
2443 2445 addrevisioncb(self, rev, node)
2444 2446 finally:
2445 2447 destrevlog._lazydeltabase = oldlazydeltabase
2446 2448 destrevlog._deltabothparents = oldamd
2447 2449
2448 2450 def censorrevision(self, tr, censornode, tombstone=b''):
2449 2451 if (self.version & 0xFFFF) == REVLOGV0:
2450 2452 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2451 2453 self.version)
2452 2454
2453 2455 censorrev = self.rev(censornode)
2454 2456 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2455 2457
2456 2458 if len(tombstone) > self.rawsize(censorrev):
2457 2459 raise error.Abort(_('censor tombstone must be no longer than '
2458 2460 'censored data'))
2459 2461
2460 2462 # Rewriting the revlog in place is hard. Our strategy for censoring is
2461 2463 # to create a new revlog, copy all revisions to it, then replace the
2462 2464 # revlogs on transaction close.
2463 2465
2464 2466 newindexfile = self.indexfile + b'.tmpcensored'
2465 2467 newdatafile = self.datafile + b'.tmpcensored'
2466 2468
2467 2469 # This is a bit dangerous. We could easily have a mismatch of state.
2468 2470 newrl = revlog(self.opener, newindexfile, newdatafile,
2469 2471 censorable=True)
2470 2472 newrl.version = self.version
2471 2473 newrl._generaldelta = self._generaldelta
2472 2474 newrl._io = self._io
2473 2475
2474 2476 for rev in self.revs():
2475 2477 node = self.node(rev)
2476 2478 p1, p2 = self.parents(node)
2477 2479
2478 2480 if rev == censorrev:
2479 2481 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2480 2482 p1, p2, censornode, REVIDX_ISCENSORED)
2481 2483
2482 2484 if newrl.deltaparent(rev) != nullrev:
2483 2485 raise error.Abort(_('censored revision stored as delta; '
2484 2486 'cannot censor'),
2485 2487 hint=_('censoring of revlogs is not '
2486 2488 'fully implemented; please report '
2487 2489 'this bug'))
2488 2490 continue
2489 2491
2490 2492 if self.iscensored(rev):
2491 2493 if self.deltaparent(rev) != nullrev:
2492 2494 raise error.Abort(_('cannot censor due to censored '
2493 2495 'revision having delta stored'))
2494 2496 rawtext = self._chunk(rev)
2495 2497 else:
2496 2498 rawtext = self.revision(rev, raw=True)
2497 2499
2498 2500 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2499 2501 self.flags(rev))
2500 2502
2501 2503 tr.addbackup(self.indexfile, location='store')
2502 2504 if not self._inline:
2503 2505 tr.addbackup(self.datafile, location='store')
2504 2506
2505 2507 self.opener.rename(newrl.indexfile, self.indexfile)
2506 2508 if not self._inline:
2507 2509 self.opener.rename(newrl.datafile, self.datafile)
2508 2510
2509 2511 self.clearcaches()
2510 2512 self._loadindex()
2511 2513
2512 2514 def verifyintegrity(self, state):
2513 2515 """Verifies the integrity of the revlog.
2514 2516
2515 2517 Yields ``revlogproblem`` instances describing problems that are
2516 2518 found.
2517 2519 """
2518 2520 dd, di = self.checksize()
2519 2521 if dd:
2520 2522 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2521 2523 if di:
2522 2524 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2523 2525
2524 2526 version = self.version & 0xFFFF
2525 2527
2526 2528 # The verifier tells us what version revlog we should be.
2527 2529 if version != state['expectedversion']:
2528 2530 yield revlogproblem(
2529 2531 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2530 2532 (self.indexfile, version, state['expectedversion']))
2531 2533
2532 2534 state['skipread'] = set()
2533 2535
2534 2536 for rev in self:
2535 2537 node = self.node(rev)
2536 2538
2537 2539 # Verify contents. 4 cases to care about:
2538 2540 #
2539 2541 # common: the most common case
2540 2542 # rename: with a rename
2541 2543 # meta: file content starts with b'\1\n', the metadata
2542 2544 # header defined in filelog.py, but without a rename
2543 2545 # ext: content stored externally
2544 2546 #
2545 2547 # More formally, their differences are shown below:
2546 2548 #
2547 2549 # | common | rename | meta | ext
2548 2550 # -------------------------------------------------------
2549 2551 # flags() | 0 | 0 | 0 | not 0
2550 2552 # renamed() | False | True | False | ?
2551 2553 # rawtext[0:2]=='\1\n'| False | True | True | ?
2552 2554 #
2553 2555 # "rawtext" means the raw text stored in revlog data, which
2554 2556 # could be retrieved by "revision(rev, raw=True)". "text"
2555 2557 # mentioned below is "revision(rev, raw=False)".
2556 2558 #
2557 2559 # There are 3 different lengths stored physically:
2558 2560 # 1. L1: rawsize, stored in revlog index
2559 2561 # 2. L2: len(rawtext), stored in revlog data
2560 2562 # 3. L3: len(text), stored in revlog data if flags==0, or
2561 2563 # possibly somewhere else if flags!=0
2562 2564 #
2563 2565 # L1 should be equal to L2. L3 could be different from them.
2564 2566 # "text" may or may not affect commit hash depending on flag
2565 2567 # processors (see revlog.addflagprocessor).
2566 2568 #
2567 2569 # | common | rename | meta | ext
2568 2570 # -------------------------------------------------
2569 2571 # rawsize() | L1 | L1 | L1 | L1
2570 2572 # size() | L1 | L2-LM | L1(*) | L1 (?)
2571 2573 # len(rawtext) | L2 | L2 | L2 | L2
2572 2574 # len(text) | L2 | L2 | L2 | L3
2573 2575 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2574 2576 #
2575 2577 # LM: length of metadata, depending on rawtext
2576 2578 # (*): not ideal, see comment in filelog.size
2577 2579 # (?): could be "- len(meta)" if the resolved content has
2578 2580 # rename metadata
2579 2581 #
2580 2582 # Checks needed to be done:
2581 2583 # 1. length check: L1 == L2, in all cases.
2582 2584 # 2. hash check: depending on flag processor, we may need to
2583 2585 # use either "text" (external), or "rawtext" (in revlog).
2584 2586
2585 2587 try:
2586 2588 skipflags = state.get('skipflags', 0)
2587 2589 if skipflags:
2588 2590 skipflags &= self.flags(rev)
2589 2591
2590 2592 if skipflags:
2591 2593 state['skipread'].add(node)
2592 2594 else:
2593 2595 # Side-effect: read content and verify hash.
2594 2596 self.revision(node)
2595 2597
2596 2598 l1 = self.rawsize(rev)
2597 2599 l2 = len(self.revision(node, raw=True))
2598 2600
2599 2601 if l1 != l2:
2600 2602 yield revlogproblem(
2601 2603 error=_('unpacked size is %d, %d expected') % (l2, l1),
2602 2604 node=node)
2603 2605
2604 2606 except error.CensoredNodeError:
2605 2607 if state['erroroncensored']:
2606 2608 yield revlogproblem(error=_('censored file data'),
2607 2609 node=node)
2608 2610 state['skipread'].add(node)
2609 2611 except Exception as e:
2610 2612 yield revlogproblem(
2611 2613 error=_('unpacking %s: %s') % (short(node),
2612 2614 stringutil.forcebytestr(e)),
2613 2615 node=node)
2614 2616 state['skipread'].add(node)
2615 2617
2616 2618 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2617 2619 revisionscount=False, trackedsize=False,
2618 2620 storedsize=False):
2619 2621 d = {}
2620 2622
2621 2623 if exclusivefiles:
2622 2624 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2623 2625 if not self._inline:
2624 2626 d['exclusivefiles'].append((self.opener, self.datafile))
2625 2627
2626 2628 if sharedfiles:
2627 2629 d['sharedfiles'] = []
2628 2630
2629 2631 if revisionscount:
2630 2632 d['revisionscount'] = len(self)
2631 2633
2632 2634 if trackedsize:
2633 2635 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2634 2636
2635 2637 if storedsize:
2636 2638 d['storedsize'] = sum(self.opener.stat(path).st_size
2637 2639 for path in self.files())
2638 2640
2639 2641 return d
General Comments 0
You need to be logged in to leave comments. Login now