##// END OF EJS Templates
revlog: use iterbytestr()...
Gregory Szorc -
r41838:f63ba0b9 default
parent child Browse files
Show More
@@ -1,2652 +1,2652 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._maxdeltachainspan = -1
375 375 self._withsparseread = False
376 376 self._sparserevlog = False
377 377 self._srdensitythreshold = 0.50
378 378 self._srmingapsize = 262144
379 379
380 380 # Make copy of flag processors so each revlog instance can support
381 381 # custom flags.
382 382 self._flagprocessors = dict(_flagprocessors)
383 383
384 384 # 2-tuple of file handles being used for active writing.
385 385 self._writinghandles = None
386 386
387 387 self._loadindex()
388 388
389 389 def _loadindex(self):
390 390 mmapindexthreshold = None
391 391 opts = getattr(self.opener, 'options', {}) or {}
392 392
393 393 if 'revlogv2' in opts:
394 394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 395 elif 'revlogv1' in opts:
396 396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 397 if 'generaldelta' in opts:
398 398 newversionflags |= FLAG_GENERALDELTA
399 399 elif getattr(self.opener, 'options', None) is not None:
400 400 # If options provided but no 'revlog*' found, the repository
401 401 # would have no 'requires' file in it, which means we have to
402 402 # stick to the old format.
403 403 newversionflags = REVLOGV0
404 404 else:
405 405 newversionflags = REVLOG_DEFAULT_VERSION
406 406
407 407 if 'chunkcachesize' in opts:
408 408 self._chunkcachesize = opts['chunkcachesize']
409 409 if 'maxchainlen' in opts:
410 410 self._maxchainlen = opts['maxchainlen']
411 411 if 'deltabothparents' in opts:
412 412 self._deltabothparents = opts['deltabothparents']
413 413 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
414 414 if 'compengine' in opts:
415 415 self._compengine = opts['compengine']
416 416 if 'maxdeltachainspan' in opts:
417 417 self._maxdeltachainspan = opts['maxdeltachainspan']
418 418 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
419 419 mmapindexthreshold = opts['mmapindexthreshold']
420 420 self._sparserevlog = bool(opts.get('sparse-revlog', False))
421 421 withsparseread = bool(opts.get('with-sparse-read', False))
422 422 # sparse-revlog forces sparse-read
423 423 self._withsparseread = self._sparserevlog or withsparseread
424 424 if 'sparse-read-density-threshold' in opts:
425 425 self._srdensitythreshold = opts['sparse-read-density-threshold']
426 426 if 'sparse-read-min-gap-size' in opts:
427 427 self._srmingapsize = opts['sparse-read-min-gap-size']
428 428 if opts.get('enableellipsis'):
429 429 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
430 430
431 431 # revlog v0 doesn't have flag processors
432 432 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
433 433 _insertflagprocessor(flag, processor, self._flagprocessors)
434 434
435 435 if self._chunkcachesize <= 0:
436 436 raise error.RevlogError(_('revlog chunk cache size %r is not '
437 437 'greater than 0') % self._chunkcachesize)
438 438 elif self._chunkcachesize & (self._chunkcachesize - 1):
439 439 raise error.RevlogError(_('revlog chunk cache size %r is not a '
440 440 'power of 2') % self._chunkcachesize)
441 441
442 442 indexdata = ''
443 443 self._initempty = True
444 444 try:
445 445 with self._indexfp() as f:
446 446 if (mmapindexthreshold is not None and
447 447 self.opener.fstat(f).st_size >= mmapindexthreshold):
448 448 # TODO: should .close() to release resources without
449 449 # relying on Python GC
450 450 indexdata = util.buffer(util.mmapread(f))
451 451 else:
452 452 indexdata = f.read()
453 453 if len(indexdata) > 0:
454 454 versionflags = versionformat_unpack(indexdata[:4])[0]
455 455 self._initempty = False
456 456 else:
457 457 versionflags = newversionflags
458 458 except IOError as inst:
459 459 if inst.errno != errno.ENOENT:
460 460 raise
461 461
462 462 versionflags = newversionflags
463 463
464 464 self.version = versionflags
465 465
466 466 flags = versionflags & ~0xFFFF
467 467 fmt = versionflags & 0xFFFF
468 468
469 469 if fmt == REVLOGV0:
470 470 if flags:
471 471 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
472 472 'revlog %s') %
473 473 (flags >> 16, fmt, self.indexfile))
474 474
475 475 self._inline = False
476 476 self._generaldelta = False
477 477
478 478 elif fmt == REVLOGV1:
479 479 if flags & ~REVLOGV1_FLAGS:
480 480 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
481 481 'revlog %s') %
482 482 (flags >> 16, fmt, self.indexfile))
483 483
484 484 self._inline = versionflags & FLAG_INLINE_DATA
485 485 self._generaldelta = versionflags & FLAG_GENERALDELTA
486 486
487 487 elif fmt == REVLOGV2:
488 488 if flags & ~REVLOGV2_FLAGS:
489 489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
490 490 'revlog %s') %
491 491 (flags >> 16, fmt, self.indexfile))
492 492
493 493 self._inline = versionflags & FLAG_INLINE_DATA
494 494 # generaldelta implied by version 2 revlogs.
495 495 self._generaldelta = True
496 496
497 497 else:
498 498 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
499 499 (fmt, self.indexfile))
500 500 # sparse-revlog can't be on without general-delta (issue6056)
501 501 if not self._generaldelta:
502 502 self._sparserevlog = False
503 503
504 504 self._storedeltachains = True
505 505
506 506 self._io = revlogio()
507 507 if self.version == REVLOGV0:
508 508 self._io = revlogoldio()
509 509 try:
510 510 d = self._io.parseindex(indexdata, self._inline)
511 511 except (ValueError, IndexError):
512 512 raise error.RevlogError(_("index %s is corrupted") %
513 513 self.indexfile)
514 514 self.index, nodemap, self._chunkcache = d
515 515 if nodemap is not None:
516 516 self.nodemap = self._nodecache = nodemap
517 517 if not self._chunkcache:
518 518 self._chunkclear()
519 519 # revnum -> (chain-length, sum-delta-length)
520 520 self._chaininfocache = {}
521 521 # revlog header -> revlog compressor
522 522 self._decompressors = {}
523 523
524 524 @util.propertycache
525 525 def _compressor(self):
526 526 return util.compengines[self._compengine].revlogcompressor()
527 527
528 528 def _indexfp(self, mode='r'):
529 529 """file object for the revlog's index file"""
530 530 args = {r'mode': mode}
531 531 if mode != 'r':
532 532 args[r'checkambig'] = self._checkambig
533 533 if mode == 'w':
534 534 args[r'atomictemp'] = True
535 535 return self.opener(self.indexfile, **args)
536 536
537 537 def _datafp(self, mode='r'):
538 538 """file object for the revlog's data file"""
539 539 return self.opener(self.datafile, mode=mode)
540 540
541 541 @contextlib.contextmanager
542 542 def _datareadfp(self, existingfp=None):
543 543 """file object suitable to read data"""
544 544 # Use explicit file handle, if given.
545 545 if existingfp is not None:
546 546 yield existingfp
547 547
548 548 # Use a file handle being actively used for writes, if available.
549 549 # There is some danger to doing this because reads will seek the
550 550 # file. However, _writeentry() performs a SEEK_END before all writes,
551 551 # so we should be safe.
552 552 elif self._writinghandles:
553 553 if self._inline:
554 554 yield self._writinghandles[0]
555 555 else:
556 556 yield self._writinghandles[1]
557 557
558 558 # Otherwise open a new file handle.
559 559 else:
560 560 if self._inline:
561 561 func = self._indexfp
562 562 else:
563 563 func = self._datafp
564 564 with func() as fp:
565 565 yield fp
566 566
567 567 def tip(self):
568 568 return self.node(len(self.index) - 1)
569 569 def __contains__(self, rev):
570 570 return 0 <= rev < len(self)
571 571 def __len__(self):
572 572 return len(self.index)
573 573 def __iter__(self):
574 574 return iter(pycompat.xrange(len(self)))
575 575 def revs(self, start=0, stop=None):
576 576 """iterate over all rev in this revlog (from start to stop)"""
577 577 return storageutil.iterrevs(len(self), start=start, stop=stop)
578 578
579 579 @util.propertycache
580 580 def nodemap(self):
581 581 if self.index:
582 582 # populate mapping down to the initial node
583 583 node0 = self.index[0][7] # get around changelog filtering
584 584 self.rev(node0)
585 585 return self._nodecache
586 586
587 587 def hasnode(self, node):
588 588 try:
589 589 self.rev(node)
590 590 return True
591 591 except KeyError:
592 592 return False
593 593
594 594 def candelta(self, baserev, rev):
595 595 """whether two revisions (baserev, rev) can be delta-ed or not"""
596 596 # Disable delta if either rev requires a content-changing flag
597 597 # processor (ex. LFS). This is because such flag processor can alter
598 598 # the rawtext content that the delta will be based on, and two clients
599 599 # could have a same revlog node with different flags (i.e. different
600 600 # rawtext contents) and the delta could be incompatible.
601 601 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
602 602 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
603 603 return False
604 604 return True
605 605
606 606 def clearcaches(self):
607 607 self._revisioncache = None
608 608 self._chainbasecache.clear()
609 609 self._chunkcache = (0, '')
610 610 self._pcache = {}
611 611
612 612 try:
613 613 # If we are using the native C version, you are in a fun case
614 614 # where self.index, self.nodemap and self._nodecaches is the same
615 615 # object.
616 616 self._nodecache.clearcaches()
617 617 except AttributeError:
618 618 self._nodecache = {nullid: nullrev}
619 619 self._nodepos = None
620 620
621 621 def rev(self, node):
622 622 try:
623 623 return self._nodecache[node]
624 624 except TypeError:
625 625 raise
626 626 except error.RevlogError:
627 627 # parsers.c radix tree lookup failed
628 628 if node == wdirid or node in wdirfilenodeids:
629 629 raise error.WdirUnsupported
630 630 raise error.LookupError(node, self.indexfile, _('no node'))
631 631 except KeyError:
632 632 # pure python cache lookup failed
633 633 n = self._nodecache
634 634 i = self.index
635 635 p = self._nodepos
636 636 if p is None:
637 637 p = len(i) - 1
638 638 else:
639 639 assert p < len(i)
640 640 for r in pycompat.xrange(p, -1, -1):
641 641 v = i[r][7]
642 642 n[v] = r
643 643 if v == node:
644 644 self._nodepos = r - 1
645 645 return r
646 646 if node == wdirid or node in wdirfilenodeids:
647 647 raise error.WdirUnsupported
648 648 raise error.LookupError(node, self.indexfile, _('no node'))
649 649
650 650 # Accessors for index entries.
651 651
652 652 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
653 653 # are flags.
654 654 def start(self, rev):
655 655 return int(self.index[rev][0] >> 16)
656 656
657 657 def flags(self, rev):
658 658 return self.index[rev][0] & 0xFFFF
659 659
660 660 def length(self, rev):
661 661 return self.index[rev][1]
662 662
663 663 def rawsize(self, rev):
664 664 """return the length of the uncompressed text for a given revision"""
665 665 l = self.index[rev][2]
666 666 if l >= 0:
667 667 return l
668 668
669 669 t = self.revision(rev, raw=True)
670 670 return len(t)
671 671
672 672 def size(self, rev):
673 673 """length of non-raw text (processed by a "read" flag processor)"""
674 674 # fast path: if no "read" flag processor could change the content,
675 675 # size is rawsize. note: ELLIPSIS is known to not change the content.
676 676 flags = self.flags(rev)
677 677 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
678 678 return self.rawsize(rev)
679 679
680 680 return len(self.revision(rev, raw=False))
681 681
682 682 def chainbase(self, rev):
683 683 base = self._chainbasecache.get(rev)
684 684 if base is not None:
685 685 return base
686 686
687 687 index = self.index
688 688 iterrev = rev
689 689 base = index[iterrev][3]
690 690 while base != iterrev:
691 691 iterrev = base
692 692 base = index[iterrev][3]
693 693
694 694 self._chainbasecache[rev] = base
695 695 return base
696 696
697 697 def linkrev(self, rev):
698 698 return self.index[rev][4]
699 699
700 700 def parentrevs(self, rev):
701 701 try:
702 702 entry = self.index[rev]
703 703 except IndexError:
704 704 if rev == wdirrev:
705 705 raise error.WdirUnsupported
706 706 raise
707 707
708 708 return entry[5], entry[6]
709 709
710 710 # fast parentrevs(rev) where rev isn't filtered
711 711 _uncheckedparentrevs = parentrevs
712 712
713 713 def node(self, rev):
714 714 try:
715 715 return self.index[rev][7]
716 716 except IndexError:
717 717 if rev == wdirrev:
718 718 raise error.WdirUnsupported
719 719 raise
720 720
721 721 # Derived from index values.
722 722
723 723 def end(self, rev):
724 724 return self.start(rev) + self.length(rev)
725 725
726 726 def parents(self, node):
727 727 i = self.index
728 728 d = i[self.rev(node)]
729 729 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
730 730
731 731 def chainlen(self, rev):
732 732 return self._chaininfo(rev)[0]
733 733
734 734 def _chaininfo(self, rev):
735 735 chaininfocache = self._chaininfocache
736 736 if rev in chaininfocache:
737 737 return chaininfocache[rev]
738 738 index = self.index
739 739 generaldelta = self._generaldelta
740 740 iterrev = rev
741 741 e = index[iterrev]
742 742 clen = 0
743 743 compresseddeltalen = 0
744 744 while iterrev != e[3]:
745 745 clen += 1
746 746 compresseddeltalen += e[1]
747 747 if generaldelta:
748 748 iterrev = e[3]
749 749 else:
750 750 iterrev -= 1
751 751 if iterrev in chaininfocache:
752 752 t = chaininfocache[iterrev]
753 753 clen += t[0]
754 754 compresseddeltalen += t[1]
755 755 break
756 756 e = index[iterrev]
757 757 else:
758 758 # Add text length of base since decompressing that also takes
759 759 # work. For cache hits the length is already included.
760 760 compresseddeltalen += e[1]
761 761 r = (clen, compresseddeltalen)
762 762 chaininfocache[rev] = r
763 763 return r
764 764
765 765 def _deltachain(self, rev, stoprev=None):
766 766 """Obtain the delta chain for a revision.
767 767
768 768 ``stoprev`` specifies a revision to stop at. If not specified, we
769 769 stop at the base of the chain.
770 770
771 771 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
772 772 revs in ascending order and ``stopped`` is a bool indicating whether
773 773 ``stoprev`` was hit.
774 774 """
775 775 # Try C implementation.
776 776 try:
777 777 return self.index.deltachain(rev, stoprev, self._generaldelta)
778 778 except AttributeError:
779 779 pass
780 780
781 781 chain = []
782 782
783 783 # Alias to prevent attribute lookup in tight loop.
784 784 index = self.index
785 785 generaldelta = self._generaldelta
786 786
787 787 iterrev = rev
788 788 e = index[iterrev]
789 789 while iterrev != e[3] and iterrev != stoprev:
790 790 chain.append(iterrev)
791 791 if generaldelta:
792 792 iterrev = e[3]
793 793 else:
794 794 iterrev -= 1
795 795 e = index[iterrev]
796 796
797 797 if iterrev == stoprev:
798 798 stopped = True
799 799 else:
800 800 chain.append(iterrev)
801 801 stopped = False
802 802
803 803 chain.reverse()
804 804 return chain, stopped
805 805
806 806 def ancestors(self, revs, stoprev=0, inclusive=False):
807 807 """Generate the ancestors of 'revs' in reverse revision order.
808 808 Does not generate revs lower than stoprev.
809 809
810 810 See the documentation for ancestor.lazyancestors for more details."""
811 811
812 812 # first, make sure start revisions aren't filtered
813 813 revs = list(revs)
814 814 checkrev = self.node
815 815 for r in revs:
816 816 checkrev(r)
817 817 # and we're sure ancestors aren't filtered as well
818 818
819 819 if rustext is not None:
820 820 lazyancestors = rustext.ancestor.LazyAncestors
821 821 arg = self.index
822 822 elif util.safehasattr(parsers, 'rustlazyancestors'):
823 823 lazyancestors = ancestor.rustlazyancestors
824 824 arg = self.index
825 825 else:
826 826 lazyancestors = ancestor.lazyancestors
827 827 arg = self._uncheckedparentrevs
828 828 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
829 829
830 830 def descendants(self, revs):
831 831 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
832 832
833 833 def findcommonmissing(self, common=None, heads=None):
834 834 """Return a tuple of the ancestors of common and the ancestors of heads
835 835 that are not ancestors of common. In revset terminology, we return the
836 836 tuple:
837 837
838 838 ::common, (::heads) - (::common)
839 839
840 840 The list is sorted by revision number, meaning it is
841 841 topologically sorted.
842 842
843 843 'heads' and 'common' are both lists of node IDs. If heads is
844 844 not supplied, uses all of the revlog's heads. If common is not
845 845 supplied, uses nullid."""
846 846 if common is None:
847 847 common = [nullid]
848 848 if heads is None:
849 849 heads = self.heads()
850 850
851 851 common = [self.rev(n) for n in common]
852 852 heads = [self.rev(n) for n in heads]
853 853
854 854 # we want the ancestors, but inclusive
855 855 class lazyset(object):
856 856 def __init__(self, lazyvalues):
857 857 self.addedvalues = set()
858 858 self.lazyvalues = lazyvalues
859 859
860 860 def __contains__(self, value):
861 861 return value in self.addedvalues or value in self.lazyvalues
862 862
863 863 def __iter__(self):
864 864 added = self.addedvalues
865 865 for r in added:
866 866 yield r
867 867 for r in self.lazyvalues:
868 868 if not r in added:
869 869 yield r
870 870
871 871 def add(self, value):
872 872 self.addedvalues.add(value)
873 873
874 874 def update(self, values):
875 875 self.addedvalues.update(values)
876 876
877 877 has = lazyset(self.ancestors(common))
878 878 has.add(nullrev)
879 879 has.update(common)
880 880
881 881 # take all ancestors from heads that aren't in has
882 882 missing = set()
883 883 visit = collections.deque(r for r in heads if r not in has)
884 884 while visit:
885 885 r = visit.popleft()
886 886 if r in missing:
887 887 continue
888 888 else:
889 889 missing.add(r)
890 890 for p in self.parentrevs(r):
891 891 if p not in has:
892 892 visit.append(p)
893 893 missing = list(missing)
894 894 missing.sort()
895 895 return has, [self.node(miss) for miss in missing]
896 896
897 897 def incrementalmissingrevs(self, common=None):
898 898 """Return an object that can be used to incrementally compute the
899 899 revision numbers of the ancestors of arbitrary sets that are not
900 900 ancestors of common. This is an ancestor.incrementalmissingancestors
901 901 object.
902 902
903 903 'common' is a list of revision numbers. If common is not supplied, uses
904 904 nullrev.
905 905 """
906 906 if common is None:
907 907 common = [nullrev]
908 908
909 909 if rustext is not None:
910 910 return rustext.ancestor.MissingAncestors(self.index, common)
911 911 return ancestor.incrementalmissingancestors(self.parentrevs, common)
912 912
913 913 def findmissingrevs(self, common=None, heads=None):
914 914 """Return the revision numbers of the ancestors of heads that
915 915 are not ancestors of common.
916 916
917 917 More specifically, return a list of revision numbers corresponding to
918 918 nodes N such that every N satisfies the following constraints:
919 919
920 920 1. N is an ancestor of some node in 'heads'
921 921 2. N is not an ancestor of any node in 'common'
922 922
923 923 The list is sorted by revision number, meaning it is
924 924 topologically sorted.
925 925
926 926 'heads' and 'common' are both lists of revision numbers. If heads is
927 927 not supplied, uses all of the revlog's heads. If common is not
928 928 supplied, uses nullid."""
929 929 if common is None:
930 930 common = [nullrev]
931 931 if heads is None:
932 932 heads = self.headrevs()
933 933
934 934 inc = self.incrementalmissingrevs(common=common)
935 935 return inc.missingancestors(heads)
936 936
937 937 def findmissing(self, common=None, heads=None):
938 938 """Return the ancestors of heads that are not ancestors of common.
939 939
940 940 More specifically, return a list of nodes N such that every N
941 941 satisfies the following constraints:
942 942
943 943 1. N is an ancestor of some node in 'heads'
944 944 2. N is not an ancestor of any node in 'common'
945 945
946 946 The list is sorted by revision number, meaning it is
947 947 topologically sorted.
948 948
949 949 'heads' and 'common' are both lists of node IDs. If heads is
950 950 not supplied, uses all of the revlog's heads. If common is not
951 951 supplied, uses nullid."""
952 952 if common is None:
953 953 common = [nullid]
954 954 if heads is None:
955 955 heads = self.heads()
956 956
957 957 common = [self.rev(n) for n in common]
958 958 heads = [self.rev(n) for n in heads]
959 959
960 960 inc = self.incrementalmissingrevs(common=common)
961 961 return [self.node(r) for r in inc.missingancestors(heads)]
962 962
963 963 def nodesbetween(self, roots=None, heads=None):
964 964 """Return a topological path from 'roots' to 'heads'.
965 965
966 966 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
967 967 topologically sorted list of all nodes N that satisfy both of
968 968 these constraints:
969 969
970 970 1. N is a descendant of some node in 'roots'
971 971 2. N is an ancestor of some node in 'heads'
972 972
973 973 Every node is considered to be both a descendant and an ancestor
974 974 of itself, so every reachable node in 'roots' and 'heads' will be
975 975 included in 'nodes'.
976 976
977 977 'outroots' is the list of reachable nodes in 'roots', i.e., the
978 978 subset of 'roots' that is returned in 'nodes'. Likewise,
979 979 'outheads' is the subset of 'heads' that is also in 'nodes'.
980 980
981 981 'roots' and 'heads' are both lists of node IDs. If 'roots' is
982 982 unspecified, uses nullid as the only root. If 'heads' is
983 983 unspecified, uses list of all of the revlog's heads."""
984 984 nonodes = ([], [], [])
985 985 if roots is not None:
986 986 roots = list(roots)
987 987 if not roots:
988 988 return nonodes
989 989 lowestrev = min([self.rev(n) for n in roots])
990 990 else:
991 991 roots = [nullid] # Everybody's a descendant of nullid
992 992 lowestrev = nullrev
993 993 if (lowestrev == nullrev) and (heads is None):
994 994 # We want _all_ the nodes!
995 995 return ([self.node(r) for r in self], [nullid], list(self.heads()))
996 996 if heads is None:
997 997 # All nodes are ancestors, so the latest ancestor is the last
998 998 # node.
999 999 highestrev = len(self) - 1
1000 1000 # Set ancestors to None to signal that every node is an ancestor.
1001 1001 ancestors = None
1002 1002 # Set heads to an empty dictionary for later discovery of heads
1003 1003 heads = {}
1004 1004 else:
1005 1005 heads = list(heads)
1006 1006 if not heads:
1007 1007 return nonodes
1008 1008 ancestors = set()
1009 1009 # Turn heads into a dictionary so we can remove 'fake' heads.
1010 1010 # Also, later we will be using it to filter out the heads we can't
1011 1011 # find from roots.
1012 1012 heads = dict.fromkeys(heads, False)
1013 1013 # Start at the top and keep marking parents until we're done.
1014 1014 nodestotag = set(heads)
1015 1015 # Remember where the top was so we can use it as a limit later.
1016 1016 highestrev = max([self.rev(n) for n in nodestotag])
1017 1017 while nodestotag:
1018 1018 # grab a node to tag
1019 1019 n = nodestotag.pop()
1020 1020 # Never tag nullid
1021 1021 if n == nullid:
1022 1022 continue
1023 1023 # A node's revision number represents its place in a
1024 1024 # topologically sorted list of nodes.
1025 1025 r = self.rev(n)
1026 1026 if r >= lowestrev:
1027 1027 if n not in ancestors:
1028 1028 # If we are possibly a descendant of one of the roots
1029 1029 # and we haven't already been marked as an ancestor
1030 1030 ancestors.add(n) # Mark as ancestor
1031 1031 # Add non-nullid parents to list of nodes to tag.
1032 1032 nodestotag.update([p for p in self.parents(n) if
1033 1033 p != nullid])
1034 1034 elif n in heads: # We've seen it before, is it a fake head?
1035 1035 # So it is, real heads should not be the ancestors of
1036 1036 # any other heads.
1037 1037 heads.pop(n)
1038 1038 if not ancestors:
1039 1039 return nonodes
1040 1040 # Now that we have our set of ancestors, we want to remove any
1041 1041 # roots that are not ancestors.
1042 1042
1043 1043 # If one of the roots was nullid, everything is included anyway.
1044 1044 if lowestrev > nullrev:
1045 1045 # But, since we weren't, let's recompute the lowest rev to not
1046 1046 # include roots that aren't ancestors.
1047 1047
1048 1048 # Filter out roots that aren't ancestors of heads
1049 1049 roots = [root for root in roots if root in ancestors]
1050 1050 # Recompute the lowest revision
1051 1051 if roots:
1052 1052 lowestrev = min([self.rev(root) for root in roots])
1053 1053 else:
1054 1054 # No more roots? Return empty list
1055 1055 return nonodes
1056 1056 else:
1057 1057 # We are descending from nullid, and don't need to care about
1058 1058 # any other roots.
1059 1059 lowestrev = nullrev
1060 1060 roots = [nullid]
1061 1061 # Transform our roots list into a set.
1062 1062 descendants = set(roots)
1063 1063 # Also, keep the original roots so we can filter out roots that aren't
1064 1064 # 'real' roots (i.e. are descended from other roots).
1065 1065 roots = descendants.copy()
1066 1066 # Our topologically sorted list of output nodes.
1067 1067 orderedout = []
1068 1068 # Don't start at nullid since we don't want nullid in our output list,
1069 1069 # and if nullid shows up in descendants, empty parents will look like
1070 1070 # they're descendants.
1071 1071 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1072 1072 n = self.node(r)
1073 1073 isdescendant = False
1074 1074 if lowestrev == nullrev: # Everybody is a descendant of nullid
1075 1075 isdescendant = True
1076 1076 elif n in descendants:
1077 1077 # n is already a descendant
1078 1078 isdescendant = True
1079 1079 # This check only needs to be done here because all the roots
1080 1080 # will start being marked is descendants before the loop.
1081 1081 if n in roots:
1082 1082 # If n was a root, check if it's a 'real' root.
1083 1083 p = tuple(self.parents(n))
1084 1084 # If any of its parents are descendants, it's not a root.
1085 1085 if (p[0] in descendants) or (p[1] in descendants):
1086 1086 roots.remove(n)
1087 1087 else:
1088 1088 p = tuple(self.parents(n))
1089 1089 # A node is a descendant if either of its parents are
1090 1090 # descendants. (We seeded the dependents list with the roots
1091 1091 # up there, remember?)
1092 1092 if (p[0] in descendants) or (p[1] in descendants):
1093 1093 descendants.add(n)
1094 1094 isdescendant = True
1095 1095 if isdescendant and ((ancestors is None) or (n in ancestors)):
1096 1096 # Only include nodes that are both descendants and ancestors.
1097 1097 orderedout.append(n)
1098 1098 if (ancestors is not None) and (n in heads):
1099 1099 # We're trying to figure out which heads are reachable
1100 1100 # from roots.
1101 1101 # Mark this head as having been reached
1102 1102 heads[n] = True
1103 1103 elif ancestors is None:
1104 1104 # Otherwise, we're trying to discover the heads.
1105 1105 # Assume this is a head because if it isn't, the next step
1106 1106 # will eventually remove it.
1107 1107 heads[n] = True
1108 1108 # But, obviously its parents aren't.
1109 1109 for p in self.parents(n):
1110 1110 heads.pop(p, None)
1111 1111 heads = [head for head, flag in heads.iteritems() if flag]
1112 1112 roots = list(roots)
1113 1113 assert orderedout
1114 1114 assert roots
1115 1115 assert heads
1116 1116 return (orderedout, roots, heads)
1117 1117
1118 1118 def headrevs(self, revs=None):
1119 1119 if revs is None:
1120 1120 try:
1121 1121 return self.index.headrevs()
1122 1122 except AttributeError:
1123 1123 return self._headrevs()
1124 1124 return dagop.headrevs(revs, self.parentrevs)
1125 1125
1126 1126 def computephases(self, roots):
1127 1127 return self.index.computephasesmapsets(roots)
1128 1128
1129 1129 def _headrevs(self):
1130 1130 count = len(self)
1131 1131 if not count:
1132 1132 return [nullrev]
1133 1133 # we won't iter over filtered rev so nobody is a head at start
1134 1134 ishead = [0] * (count + 1)
1135 1135 index = self.index
1136 1136 for r in self:
1137 1137 ishead[r] = 1 # I may be an head
1138 1138 e = index[r]
1139 1139 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1140 1140 return [r for r, val in enumerate(ishead) if val]
1141 1141
1142 1142 def heads(self, start=None, stop=None):
1143 1143 """return the list of all nodes that have no children
1144 1144
1145 1145 if start is specified, only heads that are descendants of
1146 1146 start will be returned
1147 1147 if stop is specified, it will consider all the revs from stop
1148 1148 as if they had no children
1149 1149 """
1150 1150 if start is None and stop is None:
1151 1151 if not len(self):
1152 1152 return [nullid]
1153 1153 return [self.node(r) for r in self.headrevs()]
1154 1154
1155 1155 if start is None:
1156 1156 start = nullrev
1157 1157 else:
1158 1158 start = self.rev(start)
1159 1159
1160 1160 stoprevs = set(self.rev(n) for n in stop or [])
1161 1161
1162 1162 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1163 1163 stoprevs=stoprevs)
1164 1164
1165 1165 return [self.node(rev) for rev in revs]
1166 1166
1167 1167 def children(self, node):
1168 1168 """find the children of a given node"""
1169 1169 c = []
1170 1170 p = self.rev(node)
1171 1171 for r in self.revs(start=p + 1):
1172 1172 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1173 1173 if prevs:
1174 1174 for pr in prevs:
1175 1175 if pr == p:
1176 1176 c.append(self.node(r))
1177 1177 elif p == nullrev:
1178 1178 c.append(self.node(r))
1179 1179 return c
1180 1180
1181 1181 def commonancestorsheads(self, a, b):
1182 1182 """calculate all the heads of the common ancestors of nodes a and b"""
1183 1183 a, b = self.rev(a), self.rev(b)
1184 1184 ancs = self._commonancestorsheads(a, b)
1185 1185 return pycompat.maplist(self.node, ancs)
1186 1186
1187 1187 def _commonancestorsheads(self, *revs):
1188 1188 """calculate all the heads of the common ancestors of revs"""
1189 1189 try:
1190 1190 ancs = self.index.commonancestorsheads(*revs)
1191 1191 except (AttributeError, OverflowError): # C implementation failed
1192 1192 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1193 1193 return ancs
1194 1194
1195 1195 def isancestor(self, a, b):
1196 1196 """return True if node a is an ancestor of node b
1197 1197
1198 1198 A revision is considered an ancestor of itself."""
1199 1199 a, b = self.rev(a), self.rev(b)
1200 1200 return self.isancestorrev(a, b)
1201 1201
1202 1202 def isancestorrev(self, a, b):
1203 1203 """return True if revision a is an ancestor of revision b
1204 1204
1205 1205 A revision is considered an ancestor of itself.
1206 1206
1207 1207 The implementation of this is trivial but the use of
1208 1208 commonancestorsheads is not."""
1209 1209 if a == nullrev:
1210 1210 return True
1211 1211 elif a == b:
1212 1212 return True
1213 1213 elif a > b:
1214 1214 return False
1215 1215 return a in self._commonancestorsheads(a, b)
1216 1216
1217 1217 def ancestor(self, a, b):
1218 1218 """calculate the "best" common ancestor of nodes a and b"""
1219 1219
1220 1220 a, b = self.rev(a), self.rev(b)
1221 1221 try:
1222 1222 ancs = self.index.ancestors(a, b)
1223 1223 except (AttributeError, OverflowError):
1224 1224 ancs = ancestor.ancestors(self.parentrevs, a, b)
1225 1225 if ancs:
1226 1226 # choose a consistent winner when there's a tie
1227 1227 return min(map(self.node, ancs))
1228 1228 return nullid
1229 1229
1230 1230 def _match(self, id):
1231 1231 if isinstance(id, int):
1232 1232 # rev
1233 1233 return self.node(id)
1234 1234 if len(id) == 20:
1235 1235 # possibly a binary node
1236 1236 # odds of a binary node being all hex in ASCII are 1 in 10**25
1237 1237 try:
1238 1238 node = id
1239 1239 self.rev(node) # quick search the index
1240 1240 return node
1241 1241 except error.LookupError:
1242 1242 pass # may be partial hex id
1243 1243 try:
1244 1244 # str(rev)
1245 1245 rev = int(id)
1246 1246 if "%d" % rev != id:
1247 1247 raise ValueError
1248 1248 if rev < 0:
1249 1249 rev = len(self) + rev
1250 1250 if rev < 0 or rev >= len(self):
1251 1251 raise ValueError
1252 1252 return self.node(rev)
1253 1253 except (ValueError, OverflowError):
1254 1254 pass
1255 1255 if len(id) == 40:
1256 1256 try:
1257 1257 # a full hex nodeid?
1258 1258 node = bin(id)
1259 1259 self.rev(node)
1260 1260 return node
1261 1261 except (TypeError, error.LookupError):
1262 1262 pass
1263 1263
1264 1264 def _partialmatch(self, id):
1265 1265 # we don't care wdirfilenodeids as they should be always full hash
1266 1266 maybewdir = wdirhex.startswith(id)
1267 1267 try:
1268 1268 partial = self.index.partialmatch(id)
1269 1269 if partial and self.hasnode(partial):
1270 1270 if maybewdir:
1271 1271 # single 'ff...' match in radix tree, ambiguous with wdir
1272 1272 raise error.RevlogError
1273 1273 return partial
1274 1274 if maybewdir:
1275 1275 # no 'ff...' match in radix tree, wdir identified
1276 1276 raise error.WdirUnsupported
1277 1277 return None
1278 1278 except error.RevlogError:
1279 1279 # parsers.c radix tree lookup gave multiple matches
1280 1280 # fast path: for unfiltered changelog, radix tree is accurate
1281 1281 if not getattr(self, 'filteredrevs', None):
1282 1282 raise error.AmbiguousPrefixLookupError(
1283 1283 id, self.indexfile, _('ambiguous identifier'))
1284 1284 # fall through to slow path that filters hidden revisions
1285 1285 except (AttributeError, ValueError):
1286 1286 # we are pure python, or key was too short to search radix tree
1287 1287 pass
1288 1288
1289 1289 if id in self._pcache:
1290 1290 return self._pcache[id]
1291 1291
1292 1292 if len(id) <= 40:
1293 1293 try:
1294 1294 # hex(node)[:...]
1295 1295 l = len(id) // 2 # grab an even number of digits
1296 1296 prefix = bin(id[:l * 2])
1297 1297 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1298 1298 nl = [n for n in nl if hex(n).startswith(id) and
1299 1299 self.hasnode(n)]
1300 1300 if nullhex.startswith(id):
1301 1301 nl.append(nullid)
1302 1302 if len(nl) > 0:
1303 1303 if len(nl) == 1 and not maybewdir:
1304 1304 self._pcache[id] = nl[0]
1305 1305 return nl[0]
1306 1306 raise error.AmbiguousPrefixLookupError(
1307 1307 id, self.indexfile, _('ambiguous identifier'))
1308 1308 if maybewdir:
1309 1309 raise error.WdirUnsupported
1310 1310 return None
1311 1311 except TypeError:
1312 1312 pass
1313 1313
1314 1314 def lookup(self, id):
1315 1315 """locate a node based on:
1316 1316 - revision number or str(revision number)
1317 1317 - nodeid or subset of hex nodeid
1318 1318 """
1319 1319 n = self._match(id)
1320 1320 if n is not None:
1321 1321 return n
1322 1322 n = self._partialmatch(id)
1323 1323 if n:
1324 1324 return n
1325 1325
1326 1326 raise error.LookupError(id, self.indexfile, _('no match found'))
1327 1327
1328 1328 def shortest(self, node, minlength=1):
1329 1329 """Find the shortest unambiguous prefix that matches node."""
1330 1330 def isvalid(prefix):
1331 1331 try:
1332 1332 node = self._partialmatch(prefix)
1333 1333 except error.AmbiguousPrefixLookupError:
1334 1334 return False
1335 1335 except error.WdirUnsupported:
1336 1336 # single 'ff...' match
1337 1337 return True
1338 1338 if node is None:
1339 1339 raise error.LookupError(node, self.indexfile, _('no node'))
1340 1340 return True
1341 1341
1342 1342 def maybewdir(prefix):
1343 return all(c == 'f' for c in prefix)
1343 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1344 1344
1345 1345 hexnode = hex(node)
1346 1346
1347 1347 def disambiguate(hexnode, minlength):
1348 1348 """Disambiguate against wdirid."""
1349 1349 for length in range(minlength, 41):
1350 1350 prefix = hexnode[:length]
1351 1351 if not maybewdir(prefix):
1352 1352 return prefix
1353 1353
1354 1354 if not getattr(self, 'filteredrevs', None):
1355 1355 try:
1356 1356 length = max(self.index.shortest(node), minlength)
1357 1357 return disambiguate(hexnode, length)
1358 1358 except error.RevlogError:
1359 1359 if node != wdirid:
1360 1360 raise error.LookupError(node, self.indexfile, _('no node'))
1361 1361 except AttributeError:
1362 1362 # Fall through to pure code
1363 1363 pass
1364 1364
1365 1365 if node == wdirid:
1366 1366 for length in range(minlength, 41):
1367 1367 prefix = hexnode[:length]
1368 1368 if isvalid(prefix):
1369 1369 return prefix
1370 1370
1371 1371 for length in range(minlength, 41):
1372 1372 prefix = hexnode[:length]
1373 1373 if isvalid(prefix):
1374 1374 return disambiguate(hexnode, length)
1375 1375
1376 1376 def cmp(self, node, text):
1377 1377 """compare text with a given file revision
1378 1378
1379 1379 returns True if text is different than what is stored.
1380 1380 """
1381 1381 p1, p2 = self.parents(node)
1382 1382 return storageutil.hashrevisionsha1(text, p1, p2) != node
1383 1383
1384 1384 def _cachesegment(self, offset, data):
1385 1385 """Add a segment to the revlog cache.
1386 1386
1387 1387 Accepts an absolute offset and the data that is at that location.
1388 1388 """
1389 1389 o, d = self._chunkcache
1390 1390 # try to add to existing cache
1391 1391 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1392 1392 self._chunkcache = o, d + data
1393 1393 else:
1394 1394 self._chunkcache = offset, data
1395 1395
1396 1396 def _readsegment(self, offset, length, df=None):
1397 1397 """Load a segment of raw data from the revlog.
1398 1398
1399 1399 Accepts an absolute offset, length to read, and an optional existing
1400 1400 file handle to read from.
1401 1401
1402 1402 If an existing file handle is passed, it will be seeked and the
1403 1403 original seek position will NOT be restored.
1404 1404
1405 1405 Returns a str or buffer of raw byte data.
1406 1406
1407 1407 Raises if the requested number of bytes could not be read.
1408 1408 """
1409 1409 # Cache data both forward and backward around the requested
1410 1410 # data, in a fixed size window. This helps speed up operations
1411 1411 # involving reading the revlog backwards.
1412 1412 cachesize = self._chunkcachesize
1413 1413 realoffset = offset & ~(cachesize - 1)
1414 1414 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1415 1415 - realoffset)
1416 1416 with self._datareadfp(df) as df:
1417 1417 df.seek(realoffset)
1418 1418 d = df.read(reallength)
1419 1419
1420 1420 self._cachesegment(realoffset, d)
1421 1421 if offset != realoffset or reallength != length:
1422 1422 startoffset = offset - realoffset
1423 1423 if len(d) - startoffset < length:
1424 1424 raise error.RevlogError(
1425 1425 _('partial read of revlog %s; expected %d bytes from '
1426 1426 'offset %d, got %d') %
1427 1427 (self.indexfile if self._inline else self.datafile,
1428 1428 length, realoffset, len(d) - startoffset))
1429 1429
1430 1430 return util.buffer(d, startoffset, length)
1431 1431
1432 1432 if len(d) < length:
1433 1433 raise error.RevlogError(
1434 1434 _('partial read of revlog %s; expected %d bytes from offset '
1435 1435 '%d, got %d') %
1436 1436 (self.indexfile if self._inline else self.datafile,
1437 1437 length, offset, len(d)))
1438 1438
1439 1439 return d
1440 1440
1441 1441 def _getsegment(self, offset, length, df=None):
1442 1442 """Obtain a segment of raw data from the revlog.
1443 1443
1444 1444 Accepts an absolute offset, length of bytes to obtain, and an
1445 1445 optional file handle to the already-opened revlog. If the file
1446 1446 handle is used, it's original seek position will not be preserved.
1447 1447
1448 1448 Requests for data may be returned from a cache.
1449 1449
1450 1450 Returns a str or a buffer instance of raw byte data.
1451 1451 """
1452 1452 o, d = self._chunkcache
1453 1453 l = len(d)
1454 1454
1455 1455 # is it in the cache?
1456 1456 cachestart = offset - o
1457 1457 cacheend = cachestart + length
1458 1458 if cachestart >= 0 and cacheend <= l:
1459 1459 if cachestart == 0 and cacheend == l:
1460 1460 return d # avoid a copy
1461 1461 return util.buffer(d, cachestart, cacheend - cachestart)
1462 1462
1463 1463 return self._readsegment(offset, length, df=df)
1464 1464
1465 1465 def _getsegmentforrevs(self, startrev, endrev, df=None):
1466 1466 """Obtain a segment of raw data corresponding to a range of revisions.
1467 1467
1468 1468 Accepts the start and end revisions and an optional already-open
1469 1469 file handle to be used for reading. If the file handle is read, its
1470 1470 seek position will not be preserved.
1471 1471
1472 1472 Requests for data may be satisfied by a cache.
1473 1473
1474 1474 Returns a 2-tuple of (offset, data) for the requested range of
1475 1475 revisions. Offset is the integer offset from the beginning of the
1476 1476 revlog and data is a str or buffer of the raw byte data.
1477 1477
1478 1478 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1479 1479 to determine where each revision's data begins and ends.
1480 1480 """
1481 1481 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1482 1482 # (functions are expensive).
1483 1483 index = self.index
1484 1484 istart = index[startrev]
1485 1485 start = int(istart[0] >> 16)
1486 1486 if startrev == endrev:
1487 1487 end = start + istart[1]
1488 1488 else:
1489 1489 iend = index[endrev]
1490 1490 end = int(iend[0] >> 16) + iend[1]
1491 1491
1492 1492 if self._inline:
1493 1493 start += (startrev + 1) * self._io.size
1494 1494 end += (endrev + 1) * self._io.size
1495 1495 length = end - start
1496 1496
1497 1497 return start, self._getsegment(start, length, df=df)
1498 1498
1499 1499 def _chunk(self, rev, df=None):
1500 1500 """Obtain a single decompressed chunk for a revision.
1501 1501
1502 1502 Accepts an integer revision and an optional already-open file handle
1503 1503 to be used for reading. If used, the seek position of the file will not
1504 1504 be preserved.
1505 1505
1506 1506 Returns a str holding uncompressed data for the requested revision.
1507 1507 """
1508 1508 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1509 1509
1510 1510 def _chunks(self, revs, df=None, targetsize=None):
1511 1511 """Obtain decompressed chunks for the specified revisions.
1512 1512
1513 1513 Accepts an iterable of numeric revisions that are assumed to be in
1514 1514 ascending order. Also accepts an optional already-open file handle
1515 1515 to be used for reading. If used, the seek position of the file will
1516 1516 not be preserved.
1517 1517
1518 1518 This function is similar to calling ``self._chunk()`` multiple times,
1519 1519 but is faster.
1520 1520
1521 1521 Returns a list with decompressed data for each requested revision.
1522 1522 """
1523 1523 if not revs:
1524 1524 return []
1525 1525 start = self.start
1526 1526 length = self.length
1527 1527 inline = self._inline
1528 1528 iosize = self._io.size
1529 1529 buffer = util.buffer
1530 1530
1531 1531 l = []
1532 1532 ladd = l.append
1533 1533
1534 1534 if not self._withsparseread:
1535 1535 slicedchunks = (revs,)
1536 1536 else:
1537 1537 slicedchunks = deltautil.slicechunk(self, revs,
1538 1538 targetsize=targetsize)
1539 1539
1540 1540 for revschunk in slicedchunks:
1541 1541 firstrev = revschunk[0]
1542 1542 # Skip trailing revisions with empty diff
1543 1543 for lastrev in revschunk[::-1]:
1544 1544 if length(lastrev) != 0:
1545 1545 break
1546 1546
1547 1547 try:
1548 1548 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1549 1549 except OverflowError:
1550 1550 # issue4215 - we can't cache a run of chunks greater than
1551 1551 # 2G on Windows
1552 1552 return [self._chunk(rev, df=df) for rev in revschunk]
1553 1553
1554 1554 decomp = self.decompress
1555 1555 for rev in revschunk:
1556 1556 chunkstart = start(rev)
1557 1557 if inline:
1558 1558 chunkstart += (rev + 1) * iosize
1559 1559 chunklength = length(rev)
1560 1560 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1561 1561
1562 1562 return l
1563 1563
1564 1564 def _chunkclear(self):
1565 1565 """Clear the raw chunk cache."""
1566 1566 self._chunkcache = (0, '')
1567 1567
1568 1568 def deltaparent(self, rev):
1569 1569 """return deltaparent of the given revision"""
1570 1570 base = self.index[rev][3]
1571 1571 if base == rev:
1572 1572 return nullrev
1573 1573 elif self._generaldelta:
1574 1574 return base
1575 1575 else:
1576 1576 return rev - 1
1577 1577
1578 1578 def issnapshot(self, rev):
1579 1579 """tells whether rev is a snapshot
1580 1580 """
1581 1581 if not self._sparserevlog:
1582 1582 return self.deltaparent(rev) == nullrev
1583 1583 elif util.safehasattr(self.index, 'issnapshot'):
1584 1584 # directly assign the method to cache the testing and access
1585 1585 self.issnapshot = self.index.issnapshot
1586 1586 return self.issnapshot(rev)
1587 1587 if rev == nullrev:
1588 1588 return True
1589 1589 entry = self.index[rev]
1590 1590 base = entry[3]
1591 1591 if base == rev:
1592 1592 return True
1593 1593 if base == nullrev:
1594 1594 return True
1595 1595 p1 = entry[5]
1596 1596 p2 = entry[6]
1597 1597 if base == p1 or base == p2:
1598 1598 return False
1599 1599 return self.issnapshot(base)
1600 1600
1601 1601 def snapshotdepth(self, rev):
1602 1602 """number of snapshot in the chain before this one"""
1603 1603 if not self.issnapshot(rev):
1604 1604 raise error.ProgrammingError('revision %d not a snapshot')
1605 1605 return len(self._deltachain(rev)[0]) - 1
1606 1606
1607 1607 def revdiff(self, rev1, rev2):
1608 1608 """return or calculate a delta between two revisions
1609 1609
1610 1610 The delta calculated is in binary form and is intended to be written to
1611 1611 revlog data directly. So this function needs raw revision data.
1612 1612 """
1613 1613 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1614 1614 return bytes(self._chunk(rev2))
1615 1615
1616 1616 return mdiff.textdiff(self.revision(rev1, raw=True),
1617 1617 self.revision(rev2, raw=True))
1618 1618
1619 1619 def revision(self, nodeorrev, _df=None, raw=False):
1620 1620 """return an uncompressed revision of a given node or revision
1621 1621 number.
1622 1622
1623 1623 _df - an existing file handle to read from. (internal-only)
1624 1624 raw - an optional argument specifying if the revision data is to be
1625 1625 treated as raw data when applying flag transforms. 'raw' should be set
1626 1626 to True when generating changegroups or in debug commands.
1627 1627 """
1628 1628 if isinstance(nodeorrev, int):
1629 1629 rev = nodeorrev
1630 1630 node = self.node(rev)
1631 1631 else:
1632 1632 node = nodeorrev
1633 1633 rev = None
1634 1634
1635 1635 cachedrev = None
1636 1636 flags = None
1637 1637 rawtext = None
1638 1638 if node == nullid:
1639 1639 return ""
1640 1640 if self._revisioncache:
1641 1641 if self._revisioncache[0] == node:
1642 1642 # _cache only stores rawtext
1643 1643 if raw:
1644 1644 return self._revisioncache[2]
1645 1645 # duplicated, but good for perf
1646 1646 if rev is None:
1647 1647 rev = self.rev(node)
1648 1648 if flags is None:
1649 1649 flags = self.flags(rev)
1650 1650 # no extra flags set, no flag processor runs, text = rawtext
1651 1651 if flags == REVIDX_DEFAULT_FLAGS:
1652 1652 return self._revisioncache[2]
1653 1653 # rawtext is reusable. need to run flag processor
1654 1654 rawtext = self._revisioncache[2]
1655 1655
1656 1656 cachedrev = self._revisioncache[1]
1657 1657
1658 1658 # look up what we need to read
1659 1659 if rawtext is None:
1660 1660 if rev is None:
1661 1661 rev = self.rev(node)
1662 1662
1663 1663 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1664 1664 if stopped:
1665 1665 rawtext = self._revisioncache[2]
1666 1666
1667 1667 # drop cache to save memory
1668 1668 self._revisioncache = None
1669 1669
1670 1670 targetsize = None
1671 1671 rawsize = self.index[rev][2]
1672 1672 if 0 <= rawsize:
1673 1673 targetsize = 4 * rawsize
1674 1674
1675 1675 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1676 1676 if rawtext is None:
1677 1677 rawtext = bytes(bins[0])
1678 1678 bins = bins[1:]
1679 1679
1680 1680 rawtext = mdiff.patches(rawtext, bins)
1681 1681 self._revisioncache = (node, rev, rawtext)
1682 1682
1683 1683 if flags is None:
1684 1684 if rev is None:
1685 1685 rev = self.rev(node)
1686 1686 flags = self.flags(rev)
1687 1687
1688 1688 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1689 1689 if validatehash:
1690 1690 self.checkhash(text, node, rev=rev)
1691 1691
1692 1692 return text
1693 1693
1694 1694 def hash(self, text, p1, p2):
1695 1695 """Compute a node hash.
1696 1696
1697 1697 Available as a function so that subclasses can replace the hash
1698 1698 as needed.
1699 1699 """
1700 1700 return storageutil.hashrevisionsha1(text, p1, p2)
1701 1701
1702 1702 def _processflags(self, text, flags, operation, raw=False):
1703 1703 """Inspect revision data flags and applies transforms defined by
1704 1704 registered flag processors.
1705 1705
1706 1706 ``text`` - the revision data to process
1707 1707 ``flags`` - the revision flags
1708 1708 ``operation`` - the operation being performed (read or write)
1709 1709 ``raw`` - an optional argument describing if the raw transform should be
1710 1710 applied.
1711 1711
1712 1712 This method processes the flags in the order (or reverse order if
1713 1713 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1714 1714 flag processors registered for present flags. The order of flags defined
1715 1715 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1716 1716
1717 1717 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1718 1718 processed text and ``validatehash`` is a bool indicating whether the
1719 1719 returned text should be checked for hash integrity.
1720 1720
1721 1721 Note: If the ``raw`` argument is set, it has precedence over the
1722 1722 operation and will only update the value of ``validatehash``.
1723 1723 """
1724 1724 # fast path: no flag processors will run
1725 1725 if flags == 0:
1726 1726 return text, True
1727 1727 if not operation in ('read', 'write'):
1728 1728 raise error.ProgrammingError(_("invalid '%s' operation") %
1729 1729 operation)
1730 1730 # Check all flags are known.
1731 1731 if flags & ~REVIDX_KNOWN_FLAGS:
1732 1732 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1733 1733 (flags & ~REVIDX_KNOWN_FLAGS))
1734 1734 validatehash = True
1735 1735 # Depending on the operation (read or write), the order might be
1736 1736 # reversed due to non-commutative transforms.
1737 1737 orderedflags = REVIDX_FLAGS_ORDER
1738 1738 if operation == 'write':
1739 1739 orderedflags = reversed(orderedflags)
1740 1740
1741 1741 for flag in orderedflags:
1742 1742 # If a flagprocessor has been registered for a known flag, apply the
1743 1743 # related operation transform and update result tuple.
1744 1744 if flag & flags:
1745 1745 vhash = True
1746 1746
1747 1747 if flag not in self._flagprocessors:
1748 1748 message = _("missing processor for flag '%#x'") % (flag)
1749 1749 raise error.RevlogError(message)
1750 1750
1751 1751 processor = self._flagprocessors[flag]
1752 1752 if processor is not None:
1753 1753 readtransform, writetransform, rawtransform = processor
1754 1754
1755 1755 if raw:
1756 1756 vhash = rawtransform(self, text)
1757 1757 elif operation == 'read':
1758 1758 text, vhash = readtransform(self, text)
1759 1759 else: # write operation
1760 1760 text, vhash = writetransform(self, text)
1761 1761 validatehash = validatehash and vhash
1762 1762
1763 1763 return text, validatehash
1764 1764
1765 1765 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1766 1766 """Check node hash integrity.
1767 1767
1768 1768 Available as a function so that subclasses can extend hash mismatch
1769 1769 behaviors as needed.
1770 1770 """
1771 1771 try:
1772 1772 if p1 is None and p2 is None:
1773 1773 p1, p2 = self.parents(node)
1774 1774 if node != self.hash(text, p1, p2):
1775 1775 # Clear the revision cache on hash failure. The revision cache
1776 1776 # only stores the raw revision and clearing the cache does have
1777 1777 # the side-effect that we won't have a cache hit when the raw
1778 1778 # revision data is accessed. But this case should be rare and
1779 1779 # it is extra work to teach the cache about the hash
1780 1780 # verification state.
1781 1781 if self._revisioncache and self._revisioncache[0] == node:
1782 1782 self._revisioncache = None
1783 1783
1784 1784 revornode = rev
1785 1785 if revornode is None:
1786 1786 revornode = templatefilters.short(hex(node))
1787 1787 raise error.RevlogError(_("integrity check failed on %s:%s")
1788 1788 % (self.indexfile, pycompat.bytestr(revornode)))
1789 1789 except error.RevlogError:
1790 1790 if self._censorable and storageutil.iscensoredtext(text):
1791 1791 raise error.CensoredNodeError(self.indexfile, node, text)
1792 1792 raise
1793 1793
1794 1794 def _enforceinlinesize(self, tr, fp=None):
1795 1795 """Check if the revlog is too big for inline and convert if so.
1796 1796
1797 1797 This should be called after revisions are added to the revlog. If the
1798 1798 revlog has grown too large to be an inline revlog, it will convert it
1799 1799 to use multiple index and data files.
1800 1800 """
1801 1801 tiprev = len(self) - 1
1802 1802 if (not self._inline or
1803 1803 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1804 1804 return
1805 1805
1806 1806 trinfo = tr.find(self.indexfile)
1807 1807 if trinfo is None:
1808 1808 raise error.RevlogError(_("%s not found in the transaction")
1809 1809 % self.indexfile)
1810 1810
1811 1811 trindex = trinfo[2]
1812 1812 if trindex is not None:
1813 1813 dataoff = self.start(trindex)
1814 1814 else:
1815 1815 # revlog was stripped at start of transaction, use all leftover data
1816 1816 trindex = len(self) - 1
1817 1817 dataoff = self.end(tiprev)
1818 1818
1819 1819 tr.add(self.datafile, dataoff)
1820 1820
1821 1821 if fp:
1822 1822 fp.flush()
1823 1823 fp.close()
1824 1824 # We can't use the cached file handle after close(). So prevent
1825 1825 # its usage.
1826 1826 self._writinghandles = None
1827 1827
1828 1828 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1829 1829 for r in self:
1830 1830 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1831 1831
1832 1832 with self._indexfp('w') as fp:
1833 1833 self.version &= ~FLAG_INLINE_DATA
1834 1834 self._inline = False
1835 1835 io = self._io
1836 1836 for i in self:
1837 1837 e = io.packentry(self.index[i], self.node, self.version, i)
1838 1838 fp.write(e)
1839 1839
1840 1840 # the temp file replace the real index when we exit the context
1841 1841 # manager
1842 1842
1843 1843 tr.replace(self.indexfile, trindex * self._io.size)
1844 1844 self._chunkclear()
1845 1845
1846 1846 def _nodeduplicatecallback(self, transaction, node):
1847 1847 """called when trying to add a node already stored.
1848 1848 """
1849 1849
1850 1850 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1851 1851 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1852 1852 """add a revision to the log
1853 1853
1854 1854 text - the revision data to add
1855 1855 transaction - the transaction object used for rollback
1856 1856 link - the linkrev data to add
1857 1857 p1, p2 - the parent nodeids of the revision
1858 1858 cachedelta - an optional precomputed delta
1859 1859 node - nodeid of revision; typically node is not specified, and it is
1860 1860 computed by default as hash(text, p1, p2), however subclasses might
1861 1861 use different hashing method (and override checkhash() in such case)
1862 1862 flags - the known flags to set on the revision
1863 1863 deltacomputer - an optional deltacomputer instance shared between
1864 1864 multiple calls
1865 1865 """
1866 1866 if link == nullrev:
1867 1867 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1868 1868 % self.indexfile)
1869 1869
1870 1870 if flags:
1871 1871 node = node or self.hash(text, p1, p2)
1872 1872
1873 1873 rawtext, validatehash = self._processflags(text, flags, 'write')
1874 1874
1875 1875 # If the flag processor modifies the revision data, ignore any provided
1876 1876 # cachedelta.
1877 1877 if rawtext != text:
1878 1878 cachedelta = None
1879 1879
1880 1880 if len(rawtext) > _maxentrysize:
1881 1881 raise error.RevlogError(
1882 1882 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1883 1883 % (self.indexfile, len(rawtext)))
1884 1884
1885 1885 node = node or self.hash(rawtext, p1, p2)
1886 1886 if node in self.nodemap:
1887 1887 return node
1888 1888
1889 1889 if validatehash:
1890 1890 self.checkhash(rawtext, node, p1=p1, p2=p2)
1891 1891
1892 1892 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1893 1893 flags, cachedelta=cachedelta,
1894 1894 deltacomputer=deltacomputer)
1895 1895
1896 1896 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1897 1897 cachedelta=None, deltacomputer=None):
1898 1898 """add a raw revision with known flags, node and parents
1899 1899 useful when reusing a revision not stored in this revlog (ex: received
1900 1900 over wire, or read from an external bundle).
1901 1901 """
1902 1902 dfh = None
1903 1903 if not self._inline:
1904 1904 dfh = self._datafp("a+")
1905 1905 ifh = self._indexfp("a+")
1906 1906 try:
1907 1907 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1908 1908 flags, cachedelta, ifh, dfh,
1909 1909 deltacomputer=deltacomputer)
1910 1910 finally:
1911 1911 if dfh:
1912 1912 dfh.close()
1913 1913 ifh.close()
1914 1914
1915 1915 def compress(self, data):
1916 1916 """Generate a possibly-compressed representation of data."""
1917 1917 if not data:
1918 1918 return '', data
1919 1919
1920 1920 compressed = self._compressor.compress(data)
1921 1921
1922 1922 if compressed:
1923 1923 # The revlog compressor added the header in the returned data.
1924 1924 return '', compressed
1925 1925
1926 1926 if data[0:1] == '\0':
1927 1927 return '', data
1928 1928 return 'u', data
1929 1929
1930 1930 def decompress(self, data):
1931 1931 """Decompress a revlog chunk.
1932 1932
1933 1933 The chunk is expected to begin with a header identifying the
1934 1934 format type so it can be routed to an appropriate decompressor.
1935 1935 """
1936 1936 if not data:
1937 1937 return data
1938 1938
1939 1939 # Revlogs are read much more frequently than they are written and many
1940 1940 # chunks only take microseconds to decompress, so performance is
1941 1941 # important here.
1942 1942 #
1943 1943 # We can make a few assumptions about revlogs:
1944 1944 #
1945 1945 # 1) the majority of chunks will be compressed (as opposed to inline
1946 1946 # raw data).
1947 1947 # 2) decompressing *any* data will likely by at least 10x slower than
1948 1948 # returning raw inline data.
1949 1949 # 3) we want to prioritize common and officially supported compression
1950 1950 # engines
1951 1951 #
1952 1952 # It follows that we want to optimize for "decompress compressed data
1953 1953 # when encoded with common and officially supported compression engines"
1954 1954 # case over "raw data" and "data encoded by less common or non-official
1955 1955 # compression engines." That is why we have the inline lookup first
1956 1956 # followed by the compengines lookup.
1957 1957 #
1958 1958 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1959 1959 # compressed chunks. And this matters for changelog and manifest reads.
1960 1960 t = data[0:1]
1961 1961
1962 1962 if t == 'x':
1963 1963 try:
1964 1964 return _zlibdecompress(data)
1965 1965 except zlib.error as e:
1966 1966 raise error.RevlogError(_('revlog decompress error: %s') %
1967 1967 stringutil.forcebytestr(e))
1968 1968 # '\0' is more common than 'u' so it goes first.
1969 1969 elif t == '\0':
1970 1970 return data
1971 1971 elif t == 'u':
1972 1972 return util.buffer(data, 1)
1973 1973
1974 1974 try:
1975 1975 compressor = self._decompressors[t]
1976 1976 except KeyError:
1977 1977 try:
1978 1978 engine = util.compengines.forrevlogheader(t)
1979 1979 compressor = engine.revlogcompressor()
1980 1980 self._decompressors[t] = compressor
1981 1981 except KeyError:
1982 1982 raise error.RevlogError(_('unknown compression type %r') % t)
1983 1983
1984 1984 return compressor.decompress(data)
1985 1985
1986 1986 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1987 1987 cachedelta, ifh, dfh, alwayscache=False,
1988 1988 deltacomputer=None):
1989 1989 """internal function to add revisions to the log
1990 1990
1991 1991 see addrevision for argument descriptions.
1992 1992
1993 1993 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1994 1994
1995 1995 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1996 1996 be used.
1997 1997
1998 1998 invariants:
1999 1999 - rawtext is optional (can be None); if not set, cachedelta must be set.
2000 2000 if both are set, they must correspond to each other.
2001 2001 """
2002 2002 if node == nullid:
2003 2003 raise error.RevlogError(_("%s: attempt to add null revision") %
2004 2004 self.indexfile)
2005 2005 if node == wdirid or node in wdirfilenodeids:
2006 2006 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2007 2007 self.indexfile)
2008 2008
2009 2009 if self._inline:
2010 2010 fh = ifh
2011 2011 else:
2012 2012 fh = dfh
2013 2013
2014 2014 btext = [rawtext]
2015 2015
2016 2016 curr = len(self)
2017 2017 prev = curr - 1
2018 2018 offset = self.end(prev)
2019 2019 p1r, p2r = self.rev(p1), self.rev(p2)
2020 2020
2021 2021 # full versions are inserted when the needed deltas
2022 2022 # become comparable to the uncompressed text
2023 2023 if rawtext is None:
2024 2024 # need rawtext size, before changed by flag processors, which is
2025 2025 # the non-raw size. use revlog explicitly to avoid filelog's extra
2026 2026 # logic that might remove metadata size.
2027 2027 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2028 2028 cachedelta[1])
2029 2029 else:
2030 2030 textlen = len(rawtext)
2031 2031
2032 2032 if deltacomputer is None:
2033 2033 deltacomputer = deltautil.deltacomputer(self)
2034 2034
2035 2035 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2036 2036
2037 2037 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2038 2038
2039 2039 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2040 2040 deltainfo.base, link, p1r, p2r, node)
2041 2041 self.index.append(e)
2042 2042 self.nodemap[node] = curr
2043 2043
2044 2044 # Reset the pure node cache start lookup offset to account for new
2045 2045 # revision.
2046 2046 if self._nodepos is not None:
2047 2047 self._nodepos = curr
2048 2048
2049 2049 entry = self._io.packentry(e, self.node, self.version, curr)
2050 2050 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2051 2051 link, offset)
2052 2052
2053 2053 rawtext = btext[0]
2054 2054
2055 2055 if alwayscache and rawtext is None:
2056 2056 rawtext = deltacomputer.buildtext(revinfo, fh)
2057 2057
2058 2058 if type(rawtext) == bytes: # only accept immutable objects
2059 2059 self._revisioncache = (node, curr, rawtext)
2060 2060 self._chainbasecache[curr] = deltainfo.chainbase
2061 2061 return node
2062 2062
2063 2063 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2064 2064 # Files opened in a+ mode have inconsistent behavior on various
2065 2065 # platforms. Windows requires that a file positioning call be made
2066 2066 # when the file handle transitions between reads and writes. See
2067 2067 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2068 2068 # platforms, Python or the platform itself can be buggy. Some versions
2069 2069 # of Solaris have been observed to not append at the end of the file
2070 2070 # if the file was seeked to before the end. See issue4943 for more.
2071 2071 #
2072 2072 # We work around this issue by inserting a seek() before writing.
2073 2073 # Note: This is likely not necessary on Python 3. However, because
2074 2074 # the file handle is reused for reads and may be seeked there, we need
2075 2075 # to be careful before changing this.
2076 2076 ifh.seek(0, os.SEEK_END)
2077 2077 if dfh:
2078 2078 dfh.seek(0, os.SEEK_END)
2079 2079
2080 2080 curr = len(self) - 1
2081 2081 if not self._inline:
2082 2082 transaction.add(self.datafile, offset)
2083 2083 transaction.add(self.indexfile, curr * len(entry))
2084 2084 if data[0]:
2085 2085 dfh.write(data[0])
2086 2086 dfh.write(data[1])
2087 2087 ifh.write(entry)
2088 2088 else:
2089 2089 offset += curr * self._io.size
2090 2090 transaction.add(self.indexfile, offset, curr)
2091 2091 ifh.write(entry)
2092 2092 ifh.write(data[0])
2093 2093 ifh.write(data[1])
2094 2094 self._enforceinlinesize(transaction, ifh)
2095 2095
2096 2096 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2097 2097 """
2098 2098 add a delta group
2099 2099
2100 2100 given a set of deltas, add them to the revision log. the
2101 2101 first delta is against its parent, which should be in our
2102 2102 log, the rest are against the previous delta.
2103 2103
2104 2104 If ``addrevisioncb`` is defined, it will be called with arguments of
2105 2105 this revlog and the node that was added.
2106 2106 """
2107 2107
2108 2108 if self._writinghandles:
2109 2109 raise error.ProgrammingError('cannot nest addgroup() calls')
2110 2110
2111 2111 nodes = []
2112 2112
2113 2113 r = len(self)
2114 2114 end = 0
2115 2115 if r:
2116 2116 end = self.end(r - 1)
2117 2117 ifh = self._indexfp("a+")
2118 2118 isize = r * self._io.size
2119 2119 if self._inline:
2120 2120 transaction.add(self.indexfile, end + isize, r)
2121 2121 dfh = None
2122 2122 else:
2123 2123 transaction.add(self.indexfile, isize, r)
2124 2124 transaction.add(self.datafile, end)
2125 2125 dfh = self._datafp("a+")
2126 2126 def flush():
2127 2127 if dfh:
2128 2128 dfh.flush()
2129 2129 ifh.flush()
2130 2130
2131 2131 self._writinghandles = (ifh, dfh)
2132 2132
2133 2133 try:
2134 2134 deltacomputer = deltautil.deltacomputer(self)
2135 2135 # loop through our set of deltas
2136 2136 for data in deltas:
2137 2137 node, p1, p2, linknode, deltabase, delta, flags = data
2138 2138 link = linkmapper(linknode)
2139 2139 flags = flags or REVIDX_DEFAULT_FLAGS
2140 2140
2141 2141 nodes.append(node)
2142 2142
2143 2143 if node in self.nodemap:
2144 2144 self._nodeduplicatecallback(transaction, node)
2145 2145 # this can happen if two branches make the same change
2146 2146 continue
2147 2147
2148 2148 for p in (p1, p2):
2149 2149 if p not in self.nodemap:
2150 2150 raise error.LookupError(p, self.indexfile,
2151 2151 _('unknown parent'))
2152 2152
2153 2153 if deltabase not in self.nodemap:
2154 2154 raise error.LookupError(deltabase, self.indexfile,
2155 2155 _('unknown delta base'))
2156 2156
2157 2157 baserev = self.rev(deltabase)
2158 2158
2159 2159 if baserev != nullrev and self.iscensored(baserev):
2160 2160 # if base is censored, delta must be full replacement in a
2161 2161 # single patch operation
2162 2162 hlen = struct.calcsize(">lll")
2163 2163 oldlen = self.rawsize(baserev)
2164 2164 newlen = len(delta) - hlen
2165 2165 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2166 2166 raise error.CensoredBaseError(self.indexfile,
2167 2167 self.node(baserev))
2168 2168
2169 2169 if not flags and self._peek_iscensored(baserev, delta, flush):
2170 2170 flags |= REVIDX_ISCENSORED
2171 2171
2172 2172 # We assume consumers of addrevisioncb will want to retrieve
2173 2173 # the added revision, which will require a call to
2174 2174 # revision(). revision() will fast path if there is a cache
2175 2175 # hit. So, we tell _addrevision() to always cache in this case.
2176 2176 # We're only using addgroup() in the context of changegroup
2177 2177 # generation so the revision data can always be handled as raw
2178 2178 # by the flagprocessor.
2179 2179 self._addrevision(node, None, transaction, link,
2180 2180 p1, p2, flags, (baserev, delta),
2181 2181 ifh, dfh,
2182 2182 alwayscache=bool(addrevisioncb),
2183 2183 deltacomputer=deltacomputer)
2184 2184
2185 2185 if addrevisioncb:
2186 2186 addrevisioncb(self, node)
2187 2187
2188 2188 if not dfh and not self._inline:
2189 2189 # addrevision switched from inline to conventional
2190 2190 # reopen the index
2191 2191 ifh.close()
2192 2192 dfh = self._datafp("a+")
2193 2193 ifh = self._indexfp("a+")
2194 2194 self._writinghandles = (ifh, dfh)
2195 2195 finally:
2196 2196 self._writinghandles = None
2197 2197
2198 2198 if dfh:
2199 2199 dfh.close()
2200 2200 ifh.close()
2201 2201
2202 2202 return nodes
2203 2203
2204 2204 def iscensored(self, rev):
2205 2205 """Check if a file revision is censored."""
2206 2206 if not self._censorable:
2207 2207 return False
2208 2208
2209 2209 return self.flags(rev) & REVIDX_ISCENSORED
2210 2210
2211 2211 def _peek_iscensored(self, baserev, delta, flush):
2212 2212 """Quickly check if a delta produces a censored revision."""
2213 2213 if not self._censorable:
2214 2214 return False
2215 2215
2216 2216 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2217 2217
2218 2218 def getstrippoint(self, minlink):
2219 2219 """find the minimum rev that must be stripped to strip the linkrev
2220 2220
2221 2221 Returns a tuple containing the minimum rev and a set of all revs that
2222 2222 have linkrevs that will be broken by this strip.
2223 2223 """
2224 2224 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2225 2225 self.headrevs(),
2226 2226 self.linkrev, self.parentrevs)
2227 2227
2228 2228 def strip(self, minlink, transaction):
2229 2229 """truncate the revlog on the first revision with a linkrev >= minlink
2230 2230
2231 2231 This function is called when we're stripping revision minlink and
2232 2232 its descendants from the repository.
2233 2233
2234 2234 We have to remove all revisions with linkrev >= minlink, because
2235 2235 the equivalent changelog revisions will be renumbered after the
2236 2236 strip.
2237 2237
2238 2238 So we truncate the revlog on the first of these revisions, and
2239 2239 trust that the caller has saved the revisions that shouldn't be
2240 2240 removed and that it'll re-add them after this truncation.
2241 2241 """
2242 2242 if len(self) == 0:
2243 2243 return
2244 2244
2245 2245 rev, _ = self.getstrippoint(minlink)
2246 2246 if rev == len(self):
2247 2247 return
2248 2248
2249 2249 # first truncate the files on disk
2250 2250 end = self.start(rev)
2251 2251 if not self._inline:
2252 2252 transaction.add(self.datafile, end)
2253 2253 end = rev * self._io.size
2254 2254 else:
2255 2255 end += rev * self._io.size
2256 2256
2257 2257 transaction.add(self.indexfile, end)
2258 2258
2259 2259 # then reset internal state in memory to forget those revisions
2260 2260 self._revisioncache = None
2261 2261 self._chaininfocache = {}
2262 2262 self._chunkclear()
2263 2263 for x in pycompat.xrange(rev, len(self)):
2264 2264 del self.nodemap[self.node(x)]
2265 2265
2266 2266 del self.index[rev:-1]
2267 2267 self._nodepos = None
2268 2268
2269 2269 def checksize(self):
2270 2270 expected = 0
2271 2271 if len(self):
2272 2272 expected = max(0, self.end(len(self) - 1))
2273 2273
2274 2274 try:
2275 2275 with self._datafp() as f:
2276 2276 f.seek(0, 2)
2277 2277 actual = f.tell()
2278 2278 dd = actual - expected
2279 2279 except IOError as inst:
2280 2280 if inst.errno != errno.ENOENT:
2281 2281 raise
2282 2282 dd = 0
2283 2283
2284 2284 try:
2285 2285 f = self.opener(self.indexfile)
2286 2286 f.seek(0, 2)
2287 2287 actual = f.tell()
2288 2288 f.close()
2289 2289 s = self._io.size
2290 2290 i = max(0, actual // s)
2291 2291 di = actual - (i * s)
2292 2292 if self._inline:
2293 2293 databytes = 0
2294 2294 for r in self:
2295 2295 databytes += max(0, self.length(r))
2296 2296 dd = 0
2297 2297 di = actual - len(self) * s - databytes
2298 2298 except IOError as inst:
2299 2299 if inst.errno != errno.ENOENT:
2300 2300 raise
2301 2301 di = 0
2302 2302
2303 2303 return (dd, di)
2304 2304
2305 2305 def files(self):
2306 2306 res = [self.indexfile]
2307 2307 if not self._inline:
2308 2308 res.append(self.datafile)
2309 2309 return res
2310 2310
2311 2311 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2312 2312 assumehaveparentrevisions=False,
2313 2313 deltamode=repository.CG_DELTAMODE_STD):
2314 2314 if nodesorder not in ('nodes', 'storage', 'linear', None):
2315 2315 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2316 2316 nodesorder)
2317 2317
2318 2318 if nodesorder is None and not self._generaldelta:
2319 2319 nodesorder = 'storage'
2320 2320
2321 2321 if (not self._storedeltachains and
2322 2322 deltamode != repository.CG_DELTAMODE_PREV):
2323 2323 deltamode = repository.CG_DELTAMODE_FULL
2324 2324
2325 2325 return storageutil.emitrevisions(
2326 2326 self, nodes, nodesorder, revlogrevisiondelta,
2327 2327 deltaparentfn=self.deltaparent,
2328 2328 candeltafn=self.candelta,
2329 2329 rawsizefn=self.rawsize,
2330 2330 revdifffn=self.revdiff,
2331 2331 flagsfn=self.flags,
2332 2332 deltamode=deltamode,
2333 2333 revisiondata=revisiondata,
2334 2334 assumehaveparentrevisions=assumehaveparentrevisions)
2335 2335
2336 2336 DELTAREUSEALWAYS = 'always'
2337 2337 DELTAREUSESAMEREVS = 'samerevs'
2338 2338 DELTAREUSENEVER = 'never'
2339 2339
2340 2340 DELTAREUSEFULLADD = 'fulladd'
2341 2341
2342 2342 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2343 2343
2344 2344 def clone(self, tr, destrevlog, addrevisioncb=None,
2345 2345 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2346 2346 """Copy this revlog to another, possibly with format changes.
2347 2347
2348 2348 The destination revlog will contain the same revisions and nodes.
2349 2349 However, it may not be bit-for-bit identical due to e.g. delta encoding
2350 2350 differences.
2351 2351
2352 2352 The ``deltareuse`` argument control how deltas from the existing revlog
2353 2353 are preserved in the destination revlog. The argument can have the
2354 2354 following values:
2355 2355
2356 2356 DELTAREUSEALWAYS
2357 2357 Deltas will always be reused (if possible), even if the destination
2358 2358 revlog would not select the same revisions for the delta. This is the
2359 2359 fastest mode of operation.
2360 2360 DELTAREUSESAMEREVS
2361 2361 Deltas will be reused if the destination revlog would pick the same
2362 2362 revisions for the delta. This mode strikes a balance between speed
2363 2363 and optimization.
2364 2364 DELTAREUSENEVER
2365 2365 Deltas will never be reused. This is the slowest mode of execution.
2366 2366 This mode can be used to recompute deltas (e.g. if the diff/delta
2367 2367 algorithm changes).
2368 2368
2369 2369 Delta computation can be slow, so the choice of delta reuse policy can
2370 2370 significantly affect run time.
2371 2371
2372 2372 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2373 2373 two extremes. Deltas will be reused if they are appropriate. But if the
2374 2374 delta could choose a better revision, it will do so. This means if you
2375 2375 are converting a non-generaldelta revlog to a generaldelta revlog,
2376 2376 deltas will be recomputed if the delta's parent isn't a parent of the
2377 2377 revision.
2378 2378
2379 2379 In addition to the delta policy, the ``forcedeltabothparents``
2380 2380 argument controls whether to force compute deltas against both parents
2381 2381 for merges. By default, the current default is used.
2382 2382 """
2383 2383 if deltareuse not in self.DELTAREUSEALL:
2384 2384 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2385 2385
2386 2386 if len(destrevlog):
2387 2387 raise ValueError(_('destination revlog is not empty'))
2388 2388
2389 2389 if getattr(self, 'filteredrevs', None):
2390 2390 raise ValueError(_('source revlog has filtered revisions'))
2391 2391 if getattr(destrevlog, 'filteredrevs', None):
2392 2392 raise ValueError(_('destination revlog has filtered revisions'))
2393 2393
2394 2394 # lazydeltabase controls whether to reuse a cached delta, if possible.
2395 2395 oldlazydeltabase = destrevlog._lazydeltabase
2396 2396 oldamd = destrevlog._deltabothparents
2397 2397
2398 2398 try:
2399 2399 if deltareuse == self.DELTAREUSEALWAYS:
2400 2400 destrevlog._lazydeltabase = True
2401 2401 elif deltareuse == self.DELTAREUSESAMEREVS:
2402 2402 destrevlog._lazydeltabase = False
2403 2403
2404 2404 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2405 2405
2406 2406 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2407 2407 self.DELTAREUSESAMEREVS)
2408 2408
2409 2409 deltacomputer = deltautil.deltacomputer(destrevlog)
2410 2410 index = self.index
2411 2411 for rev in self:
2412 2412 entry = index[rev]
2413 2413
2414 2414 # Some classes override linkrev to take filtered revs into
2415 2415 # account. Use raw entry from index.
2416 2416 flags = entry[0] & 0xffff
2417 2417 linkrev = entry[4]
2418 2418 p1 = index[entry[5]][7]
2419 2419 p2 = index[entry[6]][7]
2420 2420 node = entry[7]
2421 2421
2422 2422 # (Possibly) reuse the delta from the revlog if allowed and
2423 2423 # the revlog chunk is a delta.
2424 2424 cachedelta = None
2425 2425 rawtext = None
2426 2426 if populatecachedelta:
2427 2427 dp = self.deltaparent(rev)
2428 2428 if dp != nullrev:
2429 2429 cachedelta = (dp, bytes(self._chunk(rev)))
2430 2430
2431 2431 if not cachedelta:
2432 2432 rawtext = self.revision(rev, raw=True)
2433 2433
2434 2434
2435 2435 if deltareuse == self.DELTAREUSEFULLADD:
2436 2436 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2437 2437 cachedelta=cachedelta,
2438 2438 node=node, flags=flags,
2439 2439 deltacomputer=deltacomputer)
2440 2440 else:
2441 2441 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2442 2442 checkambig=False)
2443 2443 dfh = None
2444 2444 if not destrevlog._inline:
2445 2445 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2446 2446 try:
2447 2447 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2448 2448 p2, flags, cachedelta, ifh, dfh,
2449 2449 deltacomputer=deltacomputer)
2450 2450 finally:
2451 2451 if dfh:
2452 2452 dfh.close()
2453 2453 ifh.close()
2454 2454
2455 2455 if addrevisioncb:
2456 2456 addrevisioncb(self, rev, node)
2457 2457 finally:
2458 2458 destrevlog._lazydeltabase = oldlazydeltabase
2459 2459 destrevlog._deltabothparents = oldamd
2460 2460
2461 2461 def censorrevision(self, tr, censornode, tombstone=b''):
2462 2462 if (self.version & 0xFFFF) == REVLOGV0:
2463 2463 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2464 2464 self.version)
2465 2465
2466 2466 censorrev = self.rev(censornode)
2467 2467 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2468 2468
2469 2469 if len(tombstone) > self.rawsize(censorrev):
2470 2470 raise error.Abort(_('censor tombstone must be no longer than '
2471 2471 'censored data'))
2472 2472
2473 2473 # Rewriting the revlog in place is hard. Our strategy for censoring is
2474 2474 # to create a new revlog, copy all revisions to it, then replace the
2475 2475 # revlogs on transaction close.
2476 2476
2477 2477 newindexfile = self.indexfile + b'.tmpcensored'
2478 2478 newdatafile = self.datafile + b'.tmpcensored'
2479 2479
2480 2480 # This is a bit dangerous. We could easily have a mismatch of state.
2481 2481 newrl = revlog(self.opener, newindexfile, newdatafile,
2482 2482 censorable=True)
2483 2483 newrl.version = self.version
2484 2484 newrl._generaldelta = self._generaldelta
2485 2485 newrl._io = self._io
2486 2486
2487 2487 for rev in self.revs():
2488 2488 node = self.node(rev)
2489 2489 p1, p2 = self.parents(node)
2490 2490
2491 2491 if rev == censorrev:
2492 2492 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2493 2493 p1, p2, censornode, REVIDX_ISCENSORED)
2494 2494
2495 2495 if newrl.deltaparent(rev) != nullrev:
2496 2496 raise error.Abort(_('censored revision stored as delta; '
2497 2497 'cannot censor'),
2498 2498 hint=_('censoring of revlogs is not '
2499 2499 'fully implemented; please report '
2500 2500 'this bug'))
2501 2501 continue
2502 2502
2503 2503 if self.iscensored(rev):
2504 2504 if self.deltaparent(rev) != nullrev:
2505 2505 raise error.Abort(_('cannot censor due to censored '
2506 2506 'revision having delta stored'))
2507 2507 rawtext = self._chunk(rev)
2508 2508 else:
2509 2509 rawtext = self.revision(rev, raw=True)
2510 2510
2511 2511 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2512 2512 self.flags(rev))
2513 2513
2514 2514 tr.addbackup(self.indexfile, location='store')
2515 2515 if not self._inline:
2516 2516 tr.addbackup(self.datafile, location='store')
2517 2517
2518 2518 self.opener.rename(newrl.indexfile, self.indexfile)
2519 2519 if not self._inline:
2520 2520 self.opener.rename(newrl.datafile, self.datafile)
2521 2521
2522 2522 self.clearcaches()
2523 2523 self._loadindex()
2524 2524
2525 2525 def verifyintegrity(self, state):
2526 2526 """Verifies the integrity of the revlog.
2527 2527
2528 2528 Yields ``revlogproblem`` instances describing problems that are
2529 2529 found.
2530 2530 """
2531 2531 dd, di = self.checksize()
2532 2532 if dd:
2533 2533 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2534 2534 if di:
2535 2535 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2536 2536
2537 2537 version = self.version & 0xFFFF
2538 2538
2539 2539 # The verifier tells us what version revlog we should be.
2540 2540 if version != state['expectedversion']:
2541 2541 yield revlogproblem(
2542 2542 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2543 2543 (self.indexfile, version, state['expectedversion']))
2544 2544
2545 2545 state['skipread'] = set()
2546 2546
2547 2547 for rev in self:
2548 2548 node = self.node(rev)
2549 2549
2550 2550 # Verify contents. 4 cases to care about:
2551 2551 #
2552 2552 # common: the most common case
2553 2553 # rename: with a rename
2554 2554 # meta: file content starts with b'\1\n', the metadata
2555 2555 # header defined in filelog.py, but without a rename
2556 2556 # ext: content stored externally
2557 2557 #
2558 2558 # More formally, their differences are shown below:
2559 2559 #
2560 2560 # | common | rename | meta | ext
2561 2561 # -------------------------------------------------------
2562 2562 # flags() | 0 | 0 | 0 | not 0
2563 2563 # renamed() | False | True | False | ?
2564 2564 # rawtext[0:2]=='\1\n'| False | True | True | ?
2565 2565 #
2566 2566 # "rawtext" means the raw text stored in revlog data, which
2567 2567 # could be retrieved by "revision(rev, raw=True)". "text"
2568 2568 # mentioned below is "revision(rev, raw=False)".
2569 2569 #
2570 2570 # There are 3 different lengths stored physically:
2571 2571 # 1. L1: rawsize, stored in revlog index
2572 2572 # 2. L2: len(rawtext), stored in revlog data
2573 2573 # 3. L3: len(text), stored in revlog data if flags==0, or
2574 2574 # possibly somewhere else if flags!=0
2575 2575 #
2576 2576 # L1 should be equal to L2. L3 could be different from them.
2577 2577 # "text" may or may not affect commit hash depending on flag
2578 2578 # processors (see revlog.addflagprocessor).
2579 2579 #
2580 2580 # | common | rename | meta | ext
2581 2581 # -------------------------------------------------
2582 2582 # rawsize() | L1 | L1 | L1 | L1
2583 2583 # size() | L1 | L2-LM | L1(*) | L1 (?)
2584 2584 # len(rawtext) | L2 | L2 | L2 | L2
2585 2585 # len(text) | L2 | L2 | L2 | L3
2586 2586 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2587 2587 #
2588 2588 # LM: length of metadata, depending on rawtext
2589 2589 # (*): not ideal, see comment in filelog.size
2590 2590 # (?): could be "- len(meta)" if the resolved content has
2591 2591 # rename metadata
2592 2592 #
2593 2593 # Checks needed to be done:
2594 2594 # 1. length check: L1 == L2, in all cases.
2595 2595 # 2. hash check: depending on flag processor, we may need to
2596 2596 # use either "text" (external), or "rawtext" (in revlog).
2597 2597
2598 2598 try:
2599 2599 skipflags = state.get('skipflags', 0)
2600 2600 if skipflags:
2601 2601 skipflags &= self.flags(rev)
2602 2602
2603 2603 if skipflags:
2604 2604 state['skipread'].add(node)
2605 2605 else:
2606 2606 # Side-effect: read content and verify hash.
2607 2607 self.revision(node)
2608 2608
2609 2609 l1 = self.rawsize(rev)
2610 2610 l2 = len(self.revision(node, raw=True))
2611 2611
2612 2612 if l1 != l2:
2613 2613 yield revlogproblem(
2614 2614 error=_('unpacked size is %d, %d expected') % (l2, l1),
2615 2615 node=node)
2616 2616
2617 2617 except error.CensoredNodeError:
2618 2618 if state['erroroncensored']:
2619 2619 yield revlogproblem(error=_('censored file data'),
2620 2620 node=node)
2621 2621 state['skipread'].add(node)
2622 2622 except Exception as e:
2623 2623 yield revlogproblem(
2624 2624 error=_('unpacking %s: %s') % (short(node),
2625 2625 stringutil.forcebytestr(e)),
2626 2626 node=node)
2627 2627 state['skipread'].add(node)
2628 2628
2629 2629 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2630 2630 revisionscount=False, trackedsize=False,
2631 2631 storedsize=False):
2632 2632 d = {}
2633 2633
2634 2634 if exclusivefiles:
2635 2635 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2636 2636 if not self._inline:
2637 2637 d['exclusivefiles'].append((self.opener, self.datafile))
2638 2638
2639 2639 if sharedfiles:
2640 2640 d['sharedfiles'] = []
2641 2641
2642 2642 if revisionscount:
2643 2643 d['revisionscount'] = len(self)
2644 2644
2645 2645 if trackedsize:
2646 2646 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2647 2647
2648 2648 if storedsize:
2649 2649 d['storedsize'] = sum(self.opener.stat(path).st_size
2650 2650 for path in self.files())
2651 2651
2652 2652 return d
General Comments 0
You need to be logged in to leave comments. Login now