##// END OF EJS Templates
revlog: always process opener options...
Gregory Szorc -
r41236:cecf3f8b default
parent child Browse files
Show More
@@ -1,2617 +1,2618
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._censorable = censorable
356 356 # 3-tuple of (node, rev, text) for a raw revision.
357 357 self._revisioncache = None
358 358 # Maps rev to chain base rev.
359 359 self._chainbasecache = util.lrucachedict(100)
360 360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 361 self._chunkcache = (0, '')
362 362 # How much data to read and cache into the raw revlog data cache.
363 363 self._chunkcachesize = 65536
364 364 self._maxchainlen = None
365 365 self._deltabothparents = True
366 366 self.index = []
367 367 # Mapping of partial identifiers to full nodes.
368 368 self._pcache = {}
369 369 # Mapping of revision integer to full node.
370 370 self._nodecache = {nullid: nullrev}
371 371 self._nodepos = None
372 372 self._compengine = 'zlib'
373 373 self._maxdeltachainspan = -1
374 374 self._withsparseread = False
375 375 self._sparserevlog = False
376 376 self._srdensitythreshold = 0.50
377 377 self._srmingapsize = 262144
378 378
379 379 # Make copy of flag processors so each revlog instance can support
380 380 # custom flags.
381 381 self._flagprocessors = dict(_flagprocessors)
382 382
383 383 # 2-tuple of file handles being used for active writing.
384 384 self._writinghandles = None
385 385
386 386 mmapindexthreshold = None
387 v = REVLOG_DEFAULT_VERSION
388 opts = getattr(opener, 'options', None)
389 if opts is not None:
390 if 'revlogv2' in opts:
391 # version 2 revlogs always use generaldelta.
392 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
393 elif 'revlogv1' in opts:
394 if 'generaldelta' in opts:
395 v |= FLAG_GENERALDELTA
396 else:
397 v = 0
398 if 'chunkcachesize' in opts:
399 self._chunkcachesize = opts['chunkcachesize']
400 if 'maxchainlen' in opts:
401 self._maxchainlen = opts['maxchainlen']
402 if 'deltabothparents' in opts:
403 self._deltabothparents = opts['deltabothparents']
404 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
405 if 'compengine' in opts:
406 self._compengine = opts['compengine']
407 if 'maxdeltachainspan' in opts:
408 self._maxdeltachainspan = opts['maxdeltachainspan']
409 if mmaplargeindex and 'mmapindexthreshold' in opts:
410 mmapindexthreshold = opts['mmapindexthreshold']
411 self._sparserevlog = bool(opts.get('sparse-revlog', False))
412 withsparseread = bool(opts.get('with-sparse-read', False))
413 # sparse-revlog forces sparse-read
414 self._withsparseread = self._sparserevlog or withsparseread
415 if 'sparse-read-density-threshold' in opts:
416 self._srdensitythreshold = opts['sparse-read-density-threshold']
417 if 'sparse-read-min-gap-size' in opts:
418 self._srmingapsize = opts['sparse-read-min-gap-size']
419 if opts.get('enableellipsis'):
420 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
421
422 # revlog v0 doesn't have flag processors
423 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
424 _insertflagprocessor(flag, processor, self._flagprocessors)
387 opts = getattr(opener, 'options', {}) or {}
388
389 if 'revlogv2' in opts:
390 # version 2 revlogs always use generaldelta.
391 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
392 elif 'revlogv1' in opts:
393 v = REVLOGV1 | FLAG_INLINE_DATA
394 if 'generaldelta' in opts:
395 v |= FLAG_GENERALDELTA
396 else:
397 v = REVLOG_DEFAULT_VERSION
398
399 if 'chunkcachesize' in opts:
400 self._chunkcachesize = opts['chunkcachesize']
401 if 'maxchainlen' in opts:
402 self._maxchainlen = opts['maxchainlen']
403 if 'deltabothparents' in opts:
404 self._deltabothparents = opts['deltabothparents']
405 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
406 if 'compengine' in opts:
407 self._compengine = opts['compengine']
408 if 'maxdeltachainspan' in opts:
409 self._maxdeltachainspan = opts['maxdeltachainspan']
410 if mmaplargeindex and 'mmapindexthreshold' in opts:
411 mmapindexthreshold = opts['mmapindexthreshold']
412 self._sparserevlog = bool(opts.get('sparse-revlog', False))
413 withsparseread = bool(opts.get('with-sparse-read', False))
414 # sparse-revlog forces sparse-read
415 self._withsparseread = self._sparserevlog or withsparseread
416 if 'sparse-read-density-threshold' in opts:
417 self._srdensitythreshold = opts['sparse-read-density-threshold']
418 if 'sparse-read-min-gap-size' in opts:
419 self._srmingapsize = opts['sparse-read-min-gap-size']
420 if opts.get('enableellipsis'):
421 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
422
423 # revlog v0 doesn't have flag processors
424 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
425 _insertflagprocessor(flag, processor, self._flagprocessors)
425 426
426 427 if self._chunkcachesize <= 0:
427 428 raise error.RevlogError(_('revlog chunk cache size %r is not '
428 429 'greater than 0') % self._chunkcachesize)
429 430 elif self._chunkcachesize & (self._chunkcachesize - 1):
430 431 raise error.RevlogError(_('revlog chunk cache size %r is not a '
431 432 'power of 2') % self._chunkcachesize)
432 433
433 434 self._loadindex(v, mmapindexthreshold)
434 435
435 436 def _loadindex(self, v, mmapindexthreshold):
436 437 indexdata = ''
437 438 self._initempty = True
438 439 try:
439 440 with self._indexfp() as f:
440 441 if (mmapindexthreshold is not None and
441 442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 443 indexdata = util.buffer(util.mmapread(f))
443 444 else:
444 445 indexdata = f.read()
445 446 if len(indexdata) > 0:
446 447 v = versionformat_unpack(indexdata[:4])[0]
447 448 self._initempty = False
448 449 except IOError as inst:
449 450 if inst.errno != errno.ENOENT:
450 451 raise
451 452
452 453 self.version = v
453 454 self._inline = v & FLAG_INLINE_DATA
454 455 self._generaldelta = v & FLAG_GENERALDELTA
455 456 flags = v & ~0xFFFF
456 457 fmt = v & 0xFFFF
457 458 if fmt == REVLOGV0:
458 459 if flags:
459 460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 461 'revlog %s') %
461 462 (flags >> 16, fmt, self.indexfile))
462 463 elif fmt == REVLOGV1:
463 464 if flags & ~REVLOGV1_FLAGS:
464 465 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 466 'revlog %s') %
466 467 (flags >> 16, fmt, self.indexfile))
467 468 elif fmt == REVLOGV2:
468 469 if flags & ~REVLOGV2_FLAGS:
469 470 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
470 471 'revlog %s') %
471 472 (flags >> 16, fmt, self.indexfile))
472 473 else:
473 474 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
474 475 (fmt, self.indexfile))
475 476
476 477 self._storedeltachains = True
477 478
478 479 self._io = revlogio()
479 480 if self.version == REVLOGV0:
480 481 self._io = revlogoldio()
481 482 try:
482 483 d = self._io.parseindex(indexdata, self._inline)
483 484 except (ValueError, IndexError):
484 485 raise error.RevlogError(_("index %s is corrupted") %
485 486 self.indexfile)
486 487 self.index, nodemap, self._chunkcache = d
487 488 if nodemap is not None:
488 489 self.nodemap = self._nodecache = nodemap
489 490 if not self._chunkcache:
490 491 self._chunkclear()
491 492 # revnum -> (chain-length, sum-delta-length)
492 493 self._chaininfocache = {}
493 494 # revlog header -> revlog compressor
494 495 self._decompressors = {}
495 496
496 497 @util.propertycache
497 498 def _compressor(self):
498 499 return util.compengines[self._compengine].revlogcompressor()
499 500
500 501 def _indexfp(self, mode='r'):
501 502 """file object for the revlog's index file"""
502 503 args = {r'mode': mode}
503 504 if mode != 'r':
504 505 args[r'checkambig'] = self._checkambig
505 506 if mode == 'w':
506 507 args[r'atomictemp'] = True
507 508 return self.opener(self.indexfile, **args)
508 509
509 510 def _datafp(self, mode='r'):
510 511 """file object for the revlog's data file"""
511 512 return self.opener(self.datafile, mode=mode)
512 513
513 514 @contextlib.contextmanager
514 515 def _datareadfp(self, existingfp=None):
515 516 """file object suitable to read data"""
516 517 # Use explicit file handle, if given.
517 518 if existingfp is not None:
518 519 yield existingfp
519 520
520 521 # Use a file handle being actively used for writes, if available.
521 522 # There is some danger to doing this because reads will seek the
522 523 # file. However, _writeentry() performs a SEEK_END before all writes,
523 524 # so we should be safe.
524 525 elif self._writinghandles:
525 526 if self._inline:
526 527 yield self._writinghandles[0]
527 528 else:
528 529 yield self._writinghandles[1]
529 530
530 531 # Otherwise open a new file handle.
531 532 else:
532 533 if self._inline:
533 534 func = self._indexfp
534 535 else:
535 536 func = self._datafp
536 537 with func() as fp:
537 538 yield fp
538 539
539 540 def tip(self):
540 541 return self.node(len(self.index) - 1)
541 542 def __contains__(self, rev):
542 543 return 0 <= rev < len(self)
543 544 def __len__(self):
544 545 return len(self.index)
545 546 def __iter__(self):
546 547 return iter(pycompat.xrange(len(self)))
547 548 def revs(self, start=0, stop=None):
548 549 """iterate over all rev in this revlog (from start to stop)"""
549 550 return storageutil.iterrevs(len(self), start=start, stop=stop)
550 551
551 552 @util.propertycache
552 553 def nodemap(self):
553 554 if self.index:
554 555 # populate mapping down to the initial node
555 556 node0 = self.index[0][7] # get around changelog filtering
556 557 self.rev(node0)
557 558 return self._nodecache
558 559
559 560 def hasnode(self, node):
560 561 try:
561 562 self.rev(node)
562 563 return True
563 564 except KeyError:
564 565 return False
565 566
566 567 def candelta(self, baserev, rev):
567 568 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 569 # Disable delta if either rev requires a content-changing flag
569 570 # processor (ex. LFS). This is because such flag processor can alter
570 571 # the rawtext content that the delta will be based on, and two clients
571 572 # could have a same revlog node with different flags (i.e. different
572 573 # rawtext contents) and the delta could be incompatible.
573 574 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 575 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 576 return False
576 577 return True
577 578
578 579 def clearcaches(self):
579 580 self._revisioncache = None
580 581 self._chainbasecache.clear()
581 582 self._chunkcache = (0, '')
582 583 self._pcache = {}
583 584
584 585 try:
585 586 self._nodecache.clearcaches()
586 587 except AttributeError:
587 588 self._nodecache = {nullid: nullrev}
588 589 self._nodepos = None
589 590
590 591 def rev(self, node):
591 592 try:
592 593 return self._nodecache[node]
593 594 except TypeError:
594 595 raise
595 596 except error.RevlogError:
596 597 # parsers.c radix tree lookup failed
597 598 if node == wdirid or node in wdirfilenodeids:
598 599 raise error.WdirUnsupported
599 600 raise error.LookupError(node, self.indexfile, _('no node'))
600 601 except KeyError:
601 602 # pure python cache lookup failed
602 603 n = self._nodecache
603 604 i = self.index
604 605 p = self._nodepos
605 606 if p is None:
606 607 p = len(i) - 1
607 608 else:
608 609 assert p < len(i)
609 610 for r in pycompat.xrange(p, -1, -1):
610 611 v = i[r][7]
611 612 n[v] = r
612 613 if v == node:
613 614 self._nodepos = r - 1
614 615 return r
615 616 if node == wdirid or node in wdirfilenodeids:
616 617 raise error.WdirUnsupported
617 618 raise error.LookupError(node, self.indexfile, _('no node'))
618 619
619 620 # Accessors for index entries.
620 621
621 622 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
622 623 # are flags.
623 624 def start(self, rev):
624 625 return int(self.index[rev][0] >> 16)
625 626
626 627 def flags(self, rev):
627 628 return self.index[rev][0] & 0xFFFF
628 629
629 630 def length(self, rev):
630 631 return self.index[rev][1]
631 632
632 633 def rawsize(self, rev):
633 634 """return the length of the uncompressed text for a given revision"""
634 635 l = self.index[rev][2]
635 636 if l >= 0:
636 637 return l
637 638
638 639 t = self.revision(rev, raw=True)
639 640 return len(t)
640 641
641 642 def size(self, rev):
642 643 """length of non-raw text (processed by a "read" flag processor)"""
643 644 # fast path: if no "read" flag processor could change the content,
644 645 # size is rawsize. note: ELLIPSIS is known to not change the content.
645 646 flags = self.flags(rev)
646 647 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
647 648 return self.rawsize(rev)
648 649
649 650 return len(self.revision(rev, raw=False))
650 651
651 652 def chainbase(self, rev):
652 653 base = self._chainbasecache.get(rev)
653 654 if base is not None:
654 655 return base
655 656
656 657 index = self.index
657 658 iterrev = rev
658 659 base = index[iterrev][3]
659 660 while base != iterrev:
660 661 iterrev = base
661 662 base = index[iterrev][3]
662 663
663 664 self._chainbasecache[rev] = base
664 665 return base
665 666
666 667 def linkrev(self, rev):
667 668 return self.index[rev][4]
668 669
669 670 def parentrevs(self, rev):
670 671 try:
671 672 entry = self.index[rev]
672 673 except IndexError:
673 674 if rev == wdirrev:
674 675 raise error.WdirUnsupported
675 676 raise
676 677
677 678 return entry[5], entry[6]
678 679
679 680 # fast parentrevs(rev) where rev isn't filtered
680 681 _uncheckedparentrevs = parentrevs
681 682
682 683 def node(self, rev):
683 684 try:
684 685 return self.index[rev][7]
685 686 except IndexError:
686 687 if rev == wdirrev:
687 688 raise error.WdirUnsupported
688 689 raise
689 690
690 691 # Derived from index values.
691 692
692 693 def end(self, rev):
693 694 return self.start(rev) + self.length(rev)
694 695
695 696 def parents(self, node):
696 697 i = self.index
697 698 d = i[self.rev(node)]
698 699 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
699 700
700 701 def chainlen(self, rev):
701 702 return self._chaininfo(rev)[0]
702 703
703 704 def _chaininfo(self, rev):
704 705 chaininfocache = self._chaininfocache
705 706 if rev in chaininfocache:
706 707 return chaininfocache[rev]
707 708 index = self.index
708 709 generaldelta = self._generaldelta
709 710 iterrev = rev
710 711 e = index[iterrev]
711 712 clen = 0
712 713 compresseddeltalen = 0
713 714 while iterrev != e[3]:
714 715 clen += 1
715 716 compresseddeltalen += e[1]
716 717 if generaldelta:
717 718 iterrev = e[3]
718 719 else:
719 720 iterrev -= 1
720 721 if iterrev in chaininfocache:
721 722 t = chaininfocache[iterrev]
722 723 clen += t[0]
723 724 compresseddeltalen += t[1]
724 725 break
725 726 e = index[iterrev]
726 727 else:
727 728 # Add text length of base since decompressing that also takes
728 729 # work. For cache hits the length is already included.
729 730 compresseddeltalen += e[1]
730 731 r = (clen, compresseddeltalen)
731 732 chaininfocache[rev] = r
732 733 return r
733 734
734 735 def _deltachain(self, rev, stoprev=None):
735 736 """Obtain the delta chain for a revision.
736 737
737 738 ``stoprev`` specifies a revision to stop at. If not specified, we
738 739 stop at the base of the chain.
739 740
740 741 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
741 742 revs in ascending order and ``stopped`` is a bool indicating whether
742 743 ``stoprev`` was hit.
743 744 """
744 745 # Try C implementation.
745 746 try:
746 747 return self.index.deltachain(rev, stoprev, self._generaldelta)
747 748 except AttributeError:
748 749 pass
749 750
750 751 chain = []
751 752
752 753 # Alias to prevent attribute lookup in tight loop.
753 754 index = self.index
754 755 generaldelta = self._generaldelta
755 756
756 757 iterrev = rev
757 758 e = index[iterrev]
758 759 while iterrev != e[3] and iterrev != stoprev:
759 760 chain.append(iterrev)
760 761 if generaldelta:
761 762 iterrev = e[3]
762 763 else:
763 764 iterrev -= 1
764 765 e = index[iterrev]
765 766
766 767 if iterrev == stoprev:
767 768 stopped = True
768 769 else:
769 770 chain.append(iterrev)
770 771 stopped = False
771 772
772 773 chain.reverse()
773 774 return chain, stopped
774 775
775 776 def ancestors(self, revs, stoprev=0, inclusive=False):
776 777 """Generate the ancestors of 'revs' in reverse revision order.
777 778 Does not generate revs lower than stoprev.
778 779
779 780 See the documentation for ancestor.lazyancestors for more details."""
780 781
781 782 # first, make sure start revisions aren't filtered
782 783 revs = list(revs)
783 784 checkrev = self.node
784 785 for r in revs:
785 786 checkrev(r)
786 787 # and we're sure ancestors aren't filtered as well
787 788
788 789 if rustext is not None:
789 790 lazyancestors = rustext.ancestor.LazyAncestors
790 791 arg = self.index
791 792 elif util.safehasattr(parsers, 'rustlazyancestors'):
792 793 lazyancestors = ancestor.rustlazyancestors
793 794 arg = self.index
794 795 else:
795 796 lazyancestors = ancestor.lazyancestors
796 797 arg = self._uncheckedparentrevs
797 798 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
798 799
799 800 def descendants(self, revs):
800 801 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
801 802
802 803 def findcommonmissing(self, common=None, heads=None):
803 804 """Return a tuple of the ancestors of common and the ancestors of heads
804 805 that are not ancestors of common. In revset terminology, we return the
805 806 tuple:
806 807
807 808 ::common, (::heads) - (::common)
808 809
809 810 The list is sorted by revision number, meaning it is
810 811 topologically sorted.
811 812
812 813 'heads' and 'common' are both lists of node IDs. If heads is
813 814 not supplied, uses all of the revlog's heads. If common is not
814 815 supplied, uses nullid."""
815 816 if common is None:
816 817 common = [nullid]
817 818 if heads is None:
818 819 heads = self.heads()
819 820
820 821 common = [self.rev(n) for n in common]
821 822 heads = [self.rev(n) for n in heads]
822 823
823 824 # we want the ancestors, but inclusive
824 825 class lazyset(object):
825 826 def __init__(self, lazyvalues):
826 827 self.addedvalues = set()
827 828 self.lazyvalues = lazyvalues
828 829
829 830 def __contains__(self, value):
830 831 return value in self.addedvalues or value in self.lazyvalues
831 832
832 833 def __iter__(self):
833 834 added = self.addedvalues
834 835 for r in added:
835 836 yield r
836 837 for r in self.lazyvalues:
837 838 if not r in added:
838 839 yield r
839 840
840 841 def add(self, value):
841 842 self.addedvalues.add(value)
842 843
843 844 def update(self, values):
844 845 self.addedvalues.update(values)
845 846
846 847 has = lazyset(self.ancestors(common))
847 848 has.add(nullrev)
848 849 has.update(common)
849 850
850 851 # take all ancestors from heads that aren't in has
851 852 missing = set()
852 853 visit = collections.deque(r for r in heads if r not in has)
853 854 while visit:
854 855 r = visit.popleft()
855 856 if r in missing:
856 857 continue
857 858 else:
858 859 missing.add(r)
859 860 for p in self.parentrevs(r):
860 861 if p not in has:
861 862 visit.append(p)
862 863 missing = list(missing)
863 864 missing.sort()
864 865 return has, [self.node(miss) for miss in missing]
865 866
866 867 def incrementalmissingrevs(self, common=None):
867 868 """Return an object that can be used to incrementally compute the
868 869 revision numbers of the ancestors of arbitrary sets that are not
869 870 ancestors of common. This is an ancestor.incrementalmissingancestors
870 871 object.
871 872
872 873 'common' is a list of revision numbers. If common is not supplied, uses
873 874 nullrev.
874 875 """
875 876 if common is None:
876 877 common = [nullrev]
877 878
878 879 return ancestor.incrementalmissingancestors(self.parentrevs, common)
879 880
880 881 def findmissingrevs(self, common=None, heads=None):
881 882 """Return the revision numbers of the ancestors of heads that
882 883 are not ancestors of common.
883 884
884 885 More specifically, return a list of revision numbers corresponding to
885 886 nodes N such that every N satisfies the following constraints:
886 887
887 888 1. N is an ancestor of some node in 'heads'
888 889 2. N is not an ancestor of any node in 'common'
889 890
890 891 The list is sorted by revision number, meaning it is
891 892 topologically sorted.
892 893
893 894 'heads' and 'common' are both lists of revision numbers. If heads is
894 895 not supplied, uses all of the revlog's heads. If common is not
895 896 supplied, uses nullid."""
896 897 if common is None:
897 898 common = [nullrev]
898 899 if heads is None:
899 900 heads = self.headrevs()
900 901
901 902 inc = self.incrementalmissingrevs(common=common)
902 903 return inc.missingancestors(heads)
903 904
904 905 def findmissing(self, common=None, heads=None):
905 906 """Return the ancestors of heads that are not ancestors of common.
906 907
907 908 More specifically, return a list of nodes N such that every N
908 909 satisfies the following constraints:
909 910
910 911 1. N is an ancestor of some node in 'heads'
911 912 2. N is not an ancestor of any node in 'common'
912 913
913 914 The list is sorted by revision number, meaning it is
914 915 topologically sorted.
915 916
916 917 'heads' and 'common' are both lists of node IDs. If heads is
917 918 not supplied, uses all of the revlog's heads. If common is not
918 919 supplied, uses nullid."""
919 920 if common is None:
920 921 common = [nullid]
921 922 if heads is None:
922 923 heads = self.heads()
923 924
924 925 common = [self.rev(n) for n in common]
925 926 heads = [self.rev(n) for n in heads]
926 927
927 928 inc = self.incrementalmissingrevs(common=common)
928 929 return [self.node(r) for r in inc.missingancestors(heads)]
929 930
930 931 def nodesbetween(self, roots=None, heads=None):
931 932 """Return a topological path from 'roots' to 'heads'.
932 933
933 934 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
934 935 topologically sorted list of all nodes N that satisfy both of
935 936 these constraints:
936 937
937 938 1. N is a descendant of some node in 'roots'
938 939 2. N is an ancestor of some node in 'heads'
939 940
940 941 Every node is considered to be both a descendant and an ancestor
941 942 of itself, so every reachable node in 'roots' and 'heads' will be
942 943 included in 'nodes'.
943 944
944 945 'outroots' is the list of reachable nodes in 'roots', i.e., the
945 946 subset of 'roots' that is returned in 'nodes'. Likewise,
946 947 'outheads' is the subset of 'heads' that is also in 'nodes'.
947 948
948 949 'roots' and 'heads' are both lists of node IDs. If 'roots' is
949 950 unspecified, uses nullid as the only root. If 'heads' is
950 951 unspecified, uses list of all of the revlog's heads."""
951 952 nonodes = ([], [], [])
952 953 if roots is not None:
953 954 roots = list(roots)
954 955 if not roots:
955 956 return nonodes
956 957 lowestrev = min([self.rev(n) for n in roots])
957 958 else:
958 959 roots = [nullid] # Everybody's a descendant of nullid
959 960 lowestrev = nullrev
960 961 if (lowestrev == nullrev) and (heads is None):
961 962 # We want _all_ the nodes!
962 963 return ([self.node(r) for r in self], [nullid], list(self.heads()))
963 964 if heads is None:
964 965 # All nodes are ancestors, so the latest ancestor is the last
965 966 # node.
966 967 highestrev = len(self) - 1
967 968 # Set ancestors to None to signal that every node is an ancestor.
968 969 ancestors = None
969 970 # Set heads to an empty dictionary for later discovery of heads
970 971 heads = {}
971 972 else:
972 973 heads = list(heads)
973 974 if not heads:
974 975 return nonodes
975 976 ancestors = set()
976 977 # Turn heads into a dictionary so we can remove 'fake' heads.
977 978 # Also, later we will be using it to filter out the heads we can't
978 979 # find from roots.
979 980 heads = dict.fromkeys(heads, False)
980 981 # Start at the top and keep marking parents until we're done.
981 982 nodestotag = set(heads)
982 983 # Remember where the top was so we can use it as a limit later.
983 984 highestrev = max([self.rev(n) for n in nodestotag])
984 985 while nodestotag:
985 986 # grab a node to tag
986 987 n = nodestotag.pop()
987 988 # Never tag nullid
988 989 if n == nullid:
989 990 continue
990 991 # A node's revision number represents its place in a
991 992 # topologically sorted list of nodes.
992 993 r = self.rev(n)
993 994 if r >= lowestrev:
994 995 if n not in ancestors:
995 996 # If we are possibly a descendant of one of the roots
996 997 # and we haven't already been marked as an ancestor
997 998 ancestors.add(n) # Mark as ancestor
998 999 # Add non-nullid parents to list of nodes to tag.
999 1000 nodestotag.update([p for p in self.parents(n) if
1000 1001 p != nullid])
1001 1002 elif n in heads: # We've seen it before, is it a fake head?
1002 1003 # So it is, real heads should not be the ancestors of
1003 1004 # any other heads.
1004 1005 heads.pop(n)
1005 1006 if not ancestors:
1006 1007 return nonodes
1007 1008 # Now that we have our set of ancestors, we want to remove any
1008 1009 # roots that are not ancestors.
1009 1010
1010 1011 # If one of the roots was nullid, everything is included anyway.
1011 1012 if lowestrev > nullrev:
1012 1013 # But, since we weren't, let's recompute the lowest rev to not
1013 1014 # include roots that aren't ancestors.
1014 1015
1015 1016 # Filter out roots that aren't ancestors of heads
1016 1017 roots = [root for root in roots if root in ancestors]
1017 1018 # Recompute the lowest revision
1018 1019 if roots:
1019 1020 lowestrev = min([self.rev(root) for root in roots])
1020 1021 else:
1021 1022 # No more roots? Return empty list
1022 1023 return nonodes
1023 1024 else:
1024 1025 # We are descending from nullid, and don't need to care about
1025 1026 # any other roots.
1026 1027 lowestrev = nullrev
1027 1028 roots = [nullid]
1028 1029 # Transform our roots list into a set.
1029 1030 descendants = set(roots)
1030 1031 # Also, keep the original roots so we can filter out roots that aren't
1031 1032 # 'real' roots (i.e. are descended from other roots).
1032 1033 roots = descendants.copy()
1033 1034 # Our topologically sorted list of output nodes.
1034 1035 orderedout = []
1035 1036 # Don't start at nullid since we don't want nullid in our output list,
1036 1037 # and if nullid shows up in descendants, empty parents will look like
1037 1038 # they're descendants.
1038 1039 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1039 1040 n = self.node(r)
1040 1041 isdescendant = False
1041 1042 if lowestrev == nullrev: # Everybody is a descendant of nullid
1042 1043 isdescendant = True
1043 1044 elif n in descendants:
1044 1045 # n is already a descendant
1045 1046 isdescendant = True
1046 1047 # This check only needs to be done here because all the roots
1047 1048 # will start being marked is descendants before the loop.
1048 1049 if n in roots:
1049 1050 # If n was a root, check if it's a 'real' root.
1050 1051 p = tuple(self.parents(n))
1051 1052 # If any of its parents are descendants, it's not a root.
1052 1053 if (p[0] in descendants) or (p[1] in descendants):
1053 1054 roots.remove(n)
1054 1055 else:
1055 1056 p = tuple(self.parents(n))
1056 1057 # A node is a descendant if either of its parents are
1057 1058 # descendants. (We seeded the dependents list with the roots
1058 1059 # up there, remember?)
1059 1060 if (p[0] in descendants) or (p[1] in descendants):
1060 1061 descendants.add(n)
1061 1062 isdescendant = True
1062 1063 if isdescendant and ((ancestors is None) or (n in ancestors)):
1063 1064 # Only include nodes that are both descendants and ancestors.
1064 1065 orderedout.append(n)
1065 1066 if (ancestors is not None) and (n in heads):
1066 1067 # We're trying to figure out which heads are reachable
1067 1068 # from roots.
1068 1069 # Mark this head as having been reached
1069 1070 heads[n] = True
1070 1071 elif ancestors is None:
1071 1072 # Otherwise, we're trying to discover the heads.
1072 1073 # Assume this is a head because if it isn't, the next step
1073 1074 # will eventually remove it.
1074 1075 heads[n] = True
1075 1076 # But, obviously its parents aren't.
1076 1077 for p in self.parents(n):
1077 1078 heads.pop(p, None)
1078 1079 heads = [head for head, flag in heads.iteritems() if flag]
1079 1080 roots = list(roots)
1080 1081 assert orderedout
1081 1082 assert roots
1082 1083 assert heads
1083 1084 return (orderedout, roots, heads)
1084 1085
1085 1086 def headrevs(self):
1086 1087 try:
1087 1088 return self.index.headrevs()
1088 1089 except AttributeError:
1089 1090 return self._headrevs()
1090 1091
1091 1092 def computephases(self, roots):
1092 1093 return self.index.computephasesmapsets(roots)
1093 1094
1094 1095 def _headrevs(self):
1095 1096 count = len(self)
1096 1097 if not count:
1097 1098 return [nullrev]
1098 1099 # we won't iter over filtered rev so nobody is a head at start
1099 1100 ishead = [0] * (count + 1)
1100 1101 index = self.index
1101 1102 for r in self:
1102 1103 ishead[r] = 1 # I may be an head
1103 1104 e = index[r]
1104 1105 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1105 1106 return [r for r, val in enumerate(ishead) if val]
1106 1107
1107 1108 def heads(self, start=None, stop=None):
1108 1109 """return the list of all nodes that have no children
1109 1110
1110 1111 if start is specified, only heads that are descendants of
1111 1112 start will be returned
1112 1113 if stop is specified, it will consider all the revs from stop
1113 1114 as if they had no children
1114 1115 """
1115 1116 if start is None and stop is None:
1116 1117 if not len(self):
1117 1118 return [nullid]
1118 1119 return [self.node(r) for r in self.headrevs()]
1119 1120
1120 1121 if start is None:
1121 1122 start = nullrev
1122 1123 else:
1123 1124 start = self.rev(start)
1124 1125
1125 1126 stoprevs = set(self.rev(n) for n in stop or [])
1126 1127
1127 1128 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1128 1129 stoprevs=stoprevs)
1129 1130
1130 1131 return [self.node(rev) for rev in revs]
1131 1132
1132 1133 def children(self, node):
1133 1134 """find the children of a given node"""
1134 1135 c = []
1135 1136 p = self.rev(node)
1136 1137 for r in self.revs(start=p + 1):
1137 1138 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1138 1139 if prevs:
1139 1140 for pr in prevs:
1140 1141 if pr == p:
1141 1142 c.append(self.node(r))
1142 1143 elif p == nullrev:
1143 1144 c.append(self.node(r))
1144 1145 return c
1145 1146
1146 1147 def commonancestorsheads(self, a, b):
1147 1148 """calculate all the heads of the common ancestors of nodes a and b"""
1148 1149 a, b = self.rev(a), self.rev(b)
1149 1150 ancs = self._commonancestorsheads(a, b)
1150 1151 return pycompat.maplist(self.node, ancs)
1151 1152
1152 1153 def _commonancestorsheads(self, *revs):
1153 1154 """calculate all the heads of the common ancestors of revs"""
1154 1155 try:
1155 1156 ancs = self.index.commonancestorsheads(*revs)
1156 1157 except (AttributeError, OverflowError): # C implementation failed
1157 1158 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1158 1159 return ancs
1159 1160
1160 1161 def isancestor(self, a, b):
1161 1162 """return True if node a is an ancestor of node b
1162 1163
1163 1164 A revision is considered an ancestor of itself."""
1164 1165 a, b = self.rev(a), self.rev(b)
1165 1166 return self.isancestorrev(a, b)
1166 1167
1167 1168 def isancestorrev(self, a, b):
1168 1169 """return True if revision a is an ancestor of revision b
1169 1170
1170 1171 A revision is considered an ancestor of itself.
1171 1172
1172 1173 The implementation of this is trivial but the use of
1173 1174 commonancestorsheads is not."""
1174 1175 if a == nullrev:
1175 1176 return True
1176 1177 elif a == b:
1177 1178 return True
1178 1179 elif a > b:
1179 1180 return False
1180 1181 return a in self._commonancestorsheads(a, b)
1181 1182
1182 1183 def ancestor(self, a, b):
1183 1184 """calculate the "best" common ancestor of nodes a and b"""
1184 1185
1185 1186 a, b = self.rev(a), self.rev(b)
1186 1187 try:
1187 1188 ancs = self.index.ancestors(a, b)
1188 1189 except (AttributeError, OverflowError):
1189 1190 ancs = ancestor.ancestors(self.parentrevs, a, b)
1190 1191 if ancs:
1191 1192 # choose a consistent winner when there's a tie
1192 1193 return min(map(self.node, ancs))
1193 1194 return nullid
1194 1195
1195 1196 def _match(self, id):
1196 1197 if isinstance(id, int):
1197 1198 # rev
1198 1199 return self.node(id)
1199 1200 if len(id) == 20:
1200 1201 # possibly a binary node
1201 1202 # odds of a binary node being all hex in ASCII are 1 in 10**25
1202 1203 try:
1203 1204 node = id
1204 1205 self.rev(node) # quick search the index
1205 1206 return node
1206 1207 except error.LookupError:
1207 1208 pass # may be partial hex id
1208 1209 try:
1209 1210 # str(rev)
1210 1211 rev = int(id)
1211 1212 if "%d" % rev != id:
1212 1213 raise ValueError
1213 1214 if rev < 0:
1214 1215 rev = len(self) + rev
1215 1216 if rev < 0 or rev >= len(self):
1216 1217 raise ValueError
1217 1218 return self.node(rev)
1218 1219 except (ValueError, OverflowError):
1219 1220 pass
1220 1221 if len(id) == 40:
1221 1222 try:
1222 1223 # a full hex nodeid?
1223 1224 node = bin(id)
1224 1225 self.rev(node)
1225 1226 return node
1226 1227 except (TypeError, error.LookupError):
1227 1228 pass
1228 1229
1229 1230 def _partialmatch(self, id):
1230 1231 # we don't care wdirfilenodeids as they should be always full hash
1231 1232 maybewdir = wdirhex.startswith(id)
1232 1233 try:
1233 1234 partial = self.index.partialmatch(id)
1234 1235 if partial and self.hasnode(partial):
1235 1236 if maybewdir:
1236 1237 # single 'ff...' match in radix tree, ambiguous with wdir
1237 1238 raise error.RevlogError
1238 1239 return partial
1239 1240 if maybewdir:
1240 1241 # no 'ff...' match in radix tree, wdir identified
1241 1242 raise error.WdirUnsupported
1242 1243 return None
1243 1244 except error.RevlogError:
1244 1245 # parsers.c radix tree lookup gave multiple matches
1245 1246 # fast path: for unfiltered changelog, radix tree is accurate
1246 1247 if not getattr(self, 'filteredrevs', None):
1247 1248 raise error.AmbiguousPrefixLookupError(
1248 1249 id, self.indexfile, _('ambiguous identifier'))
1249 1250 # fall through to slow path that filters hidden revisions
1250 1251 except (AttributeError, ValueError):
1251 1252 # we are pure python, or key was too short to search radix tree
1252 1253 pass
1253 1254
1254 1255 if id in self._pcache:
1255 1256 return self._pcache[id]
1256 1257
1257 1258 if len(id) <= 40:
1258 1259 try:
1259 1260 # hex(node)[:...]
1260 1261 l = len(id) // 2 # grab an even number of digits
1261 1262 prefix = bin(id[:l * 2])
1262 1263 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1263 1264 nl = [n for n in nl if hex(n).startswith(id) and
1264 1265 self.hasnode(n)]
1265 1266 if nullhex.startswith(id):
1266 1267 nl.append(nullid)
1267 1268 if len(nl) > 0:
1268 1269 if len(nl) == 1 and not maybewdir:
1269 1270 self._pcache[id] = nl[0]
1270 1271 return nl[0]
1271 1272 raise error.AmbiguousPrefixLookupError(
1272 1273 id, self.indexfile, _('ambiguous identifier'))
1273 1274 if maybewdir:
1274 1275 raise error.WdirUnsupported
1275 1276 return None
1276 1277 except TypeError:
1277 1278 pass
1278 1279
1279 1280 def lookup(self, id):
1280 1281 """locate a node based on:
1281 1282 - revision number or str(revision number)
1282 1283 - nodeid or subset of hex nodeid
1283 1284 """
1284 1285 n = self._match(id)
1285 1286 if n is not None:
1286 1287 return n
1287 1288 n = self._partialmatch(id)
1288 1289 if n:
1289 1290 return n
1290 1291
1291 1292 raise error.LookupError(id, self.indexfile, _('no match found'))
1292 1293
1293 1294 def shortest(self, node, minlength=1):
1294 1295 """Find the shortest unambiguous prefix that matches node."""
1295 1296 def isvalid(prefix):
1296 1297 try:
1297 1298 node = self._partialmatch(prefix)
1298 1299 except error.AmbiguousPrefixLookupError:
1299 1300 return False
1300 1301 except error.WdirUnsupported:
1301 1302 # single 'ff...' match
1302 1303 return True
1303 1304 if node is None:
1304 1305 raise error.LookupError(node, self.indexfile, _('no node'))
1305 1306 return True
1306 1307
1307 1308 def maybewdir(prefix):
1308 1309 return all(c == 'f' for c in prefix)
1309 1310
1310 1311 hexnode = hex(node)
1311 1312
1312 1313 def disambiguate(hexnode, minlength):
1313 1314 """Disambiguate against wdirid."""
1314 1315 for length in range(minlength, 41):
1315 1316 prefix = hexnode[:length]
1316 1317 if not maybewdir(prefix):
1317 1318 return prefix
1318 1319
1319 1320 if not getattr(self, 'filteredrevs', None):
1320 1321 try:
1321 1322 length = max(self.index.shortest(node), minlength)
1322 1323 return disambiguate(hexnode, length)
1323 1324 except error.RevlogError:
1324 1325 if node != wdirid:
1325 1326 raise error.LookupError(node, self.indexfile, _('no node'))
1326 1327 except AttributeError:
1327 1328 # Fall through to pure code
1328 1329 pass
1329 1330
1330 1331 if node == wdirid:
1331 1332 for length in range(minlength, 41):
1332 1333 prefix = hexnode[:length]
1333 1334 if isvalid(prefix):
1334 1335 return prefix
1335 1336
1336 1337 for length in range(minlength, 41):
1337 1338 prefix = hexnode[:length]
1338 1339 if isvalid(prefix):
1339 1340 return disambiguate(hexnode, length)
1340 1341
1341 1342 def cmp(self, node, text):
1342 1343 """compare text with a given file revision
1343 1344
1344 1345 returns True if text is different than what is stored.
1345 1346 """
1346 1347 p1, p2 = self.parents(node)
1347 1348 return storageutil.hashrevisionsha1(text, p1, p2) != node
1348 1349
1349 1350 def _cachesegment(self, offset, data):
1350 1351 """Add a segment to the revlog cache.
1351 1352
1352 1353 Accepts an absolute offset and the data that is at that location.
1353 1354 """
1354 1355 o, d = self._chunkcache
1355 1356 # try to add to existing cache
1356 1357 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1357 1358 self._chunkcache = o, d + data
1358 1359 else:
1359 1360 self._chunkcache = offset, data
1360 1361
1361 1362 def _readsegment(self, offset, length, df=None):
1362 1363 """Load a segment of raw data from the revlog.
1363 1364
1364 1365 Accepts an absolute offset, length to read, and an optional existing
1365 1366 file handle to read from.
1366 1367
1367 1368 If an existing file handle is passed, it will be seeked and the
1368 1369 original seek position will NOT be restored.
1369 1370
1370 1371 Returns a str or buffer of raw byte data.
1371 1372
1372 1373 Raises if the requested number of bytes could not be read.
1373 1374 """
1374 1375 # Cache data both forward and backward around the requested
1375 1376 # data, in a fixed size window. This helps speed up operations
1376 1377 # involving reading the revlog backwards.
1377 1378 cachesize = self._chunkcachesize
1378 1379 realoffset = offset & ~(cachesize - 1)
1379 1380 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1380 1381 - realoffset)
1381 1382 with self._datareadfp(df) as df:
1382 1383 df.seek(realoffset)
1383 1384 d = df.read(reallength)
1384 1385
1385 1386 self._cachesegment(realoffset, d)
1386 1387 if offset != realoffset or reallength != length:
1387 1388 startoffset = offset - realoffset
1388 1389 if len(d) - startoffset < length:
1389 1390 raise error.RevlogError(
1390 1391 _('partial read of revlog %s; expected %d bytes from '
1391 1392 'offset %d, got %d') %
1392 1393 (self.indexfile if self._inline else self.datafile,
1393 1394 length, realoffset, len(d) - startoffset))
1394 1395
1395 1396 return util.buffer(d, startoffset, length)
1396 1397
1397 1398 if len(d) < length:
1398 1399 raise error.RevlogError(
1399 1400 _('partial read of revlog %s; expected %d bytes from offset '
1400 1401 '%d, got %d') %
1401 1402 (self.indexfile if self._inline else self.datafile,
1402 1403 length, offset, len(d)))
1403 1404
1404 1405 return d
1405 1406
1406 1407 def _getsegment(self, offset, length, df=None):
1407 1408 """Obtain a segment of raw data from the revlog.
1408 1409
1409 1410 Accepts an absolute offset, length of bytes to obtain, and an
1410 1411 optional file handle to the already-opened revlog. If the file
1411 1412 handle is used, it's original seek position will not be preserved.
1412 1413
1413 1414 Requests for data may be returned from a cache.
1414 1415
1415 1416 Returns a str or a buffer instance of raw byte data.
1416 1417 """
1417 1418 o, d = self._chunkcache
1418 1419 l = len(d)
1419 1420
1420 1421 # is it in the cache?
1421 1422 cachestart = offset - o
1422 1423 cacheend = cachestart + length
1423 1424 if cachestart >= 0 and cacheend <= l:
1424 1425 if cachestart == 0 and cacheend == l:
1425 1426 return d # avoid a copy
1426 1427 return util.buffer(d, cachestart, cacheend - cachestart)
1427 1428
1428 1429 return self._readsegment(offset, length, df=df)
1429 1430
1430 1431 def _getsegmentforrevs(self, startrev, endrev, df=None):
1431 1432 """Obtain a segment of raw data corresponding to a range of revisions.
1432 1433
1433 1434 Accepts the start and end revisions and an optional already-open
1434 1435 file handle to be used for reading. If the file handle is read, its
1435 1436 seek position will not be preserved.
1436 1437
1437 1438 Requests for data may be satisfied by a cache.
1438 1439
1439 1440 Returns a 2-tuple of (offset, data) for the requested range of
1440 1441 revisions. Offset is the integer offset from the beginning of the
1441 1442 revlog and data is a str or buffer of the raw byte data.
1442 1443
1443 1444 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1444 1445 to determine where each revision's data begins and ends.
1445 1446 """
1446 1447 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1447 1448 # (functions are expensive).
1448 1449 index = self.index
1449 1450 istart = index[startrev]
1450 1451 start = int(istart[0] >> 16)
1451 1452 if startrev == endrev:
1452 1453 end = start + istart[1]
1453 1454 else:
1454 1455 iend = index[endrev]
1455 1456 end = int(iend[0] >> 16) + iend[1]
1456 1457
1457 1458 if self._inline:
1458 1459 start += (startrev + 1) * self._io.size
1459 1460 end += (endrev + 1) * self._io.size
1460 1461 length = end - start
1461 1462
1462 1463 return start, self._getsegment(start, length, df=df)
1463 1464
1464 1465 def _chunk(self, rev, df=None):
1465 1466 """Obtain a single decompressed chunk for a revision.
1466 1467
1467 1468 Accepts an integer revision and an optional already-open file handle
1468 1469 to be used for reading. If used, the seek position of the file will not
1469 1470 be preserved.
1470 1471
1471 1472 Returns a str holding uncompressed data for the requested revision.
1472 1473 """
1473 1474 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1474 1475
1475 1476 def _chunks(self, revs, df=None, targetsize=None):
1476 1477 """Obtain decompressed chunks for the specified revisions.
1477 1478
1478 1479 Accepts an iterable of numeric revisions that are assumed to be in
1479 1480 ascending order. Also accepts an optional already-open file handle
1480 1481 to be used for reading. If used, the seek position of the file will
1481 1482 not be preserved.
1482 1483
1483 1484 This function is similar to calling ``self._chunk()`` multiple times,
1484 1485 but is faster.
1485 1486
1486 1487 Returns a list with decompressed data for each requested revision.
1487 1488 """
1488 1489 if not revs:
1489 1490 return []
1490 1491 start = self.start
1491 1492 length = self.length
1492 1493 inline = self._inline
1493 1494 iosize = self._io.size
1494 1495 buffer = util.buffer
1495 1496
1496 1497 l = []
1497 1498 ladd = l.append
1498 1499
1499 1500 if not self._withsparseread:
1500 1501 slicedchunks = (revs,)
1501 1502 else:
1502 1503 slicedchunks = deltautil.slicechunk(self, revs,
1503 1504 targetsize=targetsize)
1504 1505
1505 1506 for revschunk in slicedchunks:
1506 1507 firstrev = revschunk[0]
1507 1508 # Skip trailing revisions with empty diff
1508 1509 for lastrev in revschunk[::-1]:
1509 1510 if length(lastrev) != 0:
1510 1511 break
1511 1512
1512 1513 try:
1513 1514 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1514 1515 except OverflowError:
1515 1516 # issue4215 - we can't cache a run of chunks greater than
1516 1517 # 2G on Windows
1517 1518 return [self._chunk(rev, df=df) for rev in revschunk]
1518 1519
1519 1520 decomp = self.decompress
1520 1521 for rev in revschunk:
1521 1522 chunkstart = start(rev)
1522 1523 if inline:
1523 1524 chunkstart += (rev + 1) * iosize
1524 1525 chunklength = length(rev)
1525 1526 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1526 1527
1527 1528 return l
1528 1529
1529 1530 def _chunkclear(self):
1530 1531 """Clear the raw chunk cache."""
1531 1532 self._chunkcache = (0, '')
1532 1533
1533 1534 def deltaparent(self, rev):
1534 1535 """return deltaparent of the given revision"""
1535 1536 base = self.index[rev][3]
1536 1537 if base == rev:
1537 1538 return nullrev
1538 1539 elif self._generaldelta:
1539 1540 return base
1540 1541 else:
1541 1542 return rev - 1
1542 1543
1543 1544 def issnapshot(self, rev):
1544 1545 """tells whether rev is a snapshot
1545 1546 """
1546 1547 if not self._sparserevlog:
1547 1548 return self.deltaparent(rev) == nullrev
1548 1549 elif util.safehasattr(self.index, 'issnapshot'):
1549 1550 # directly assign the method to cache the testing and access
1550 1551 self.issnapshot = self.index.issnapshot
1551 1552 return self.issnapshot(rev)
1552 1553 if rev == nullrev:
1553 1554 return True
1554 1555 entry = self.index[rev]
1555 1556 base = entry[3]
1556 1557 if base == rev:
1557 1558 return True
1558 1559 if base == nullrev:
1559 1560 return True
1560 1561 p1 = entry[5]
1561 1562 p2 = entry[6]
1562 1563 if base == p1 or base == p2:
1563 1564 return False
1564 1565 return self.issnapshot(base)
1565 1566
1566 1567 def snapshotdepth(self, rev):
1567 1568 """number of snapshot in the chain before this one"""
1568 1569 if not self.issnapshot(rev):
1569 1570 raise error.ProgrammingError('revision %d not a snapshot')
1570 1571 return len(self._deltachain(rev)[0]) - 1
1571 1572
1572 1573 def revdiff(self, rev1, rev2):
1573 1574 """return or calculate a delta between two revisions
1574 1575
1575 1576 The delta calculated is in binary form and is intended to be written to
1576 1577 revlog data directly. So this function needs raw revision data.
1577 1578 """
1578 1579 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1579 1580 return bytes(self._chunk(rev2))
1580 1581
1581 1582 return mdiff.textdiff(self.revision(rev1, raw=True),
1582 1583 self.revision(rev2, raw=True))
1583 1584
1584 1585 def revision(self, nodeorrev, _df=None, raw=False):
1585 1586 """return an uncompressed revision of a given node or revision
1586 1587 number.
1587 1588
1588 1589 _df - an existing file handle to read from. (internal-only)
1589 1590 raw - an optional argument specifying if the revision data is to be
1590 1591 treated as raw data when applying flag transforms. 'raw' should be set
1591 1592 to True when generating changegroups or in debug commands.
1592 1593 """
1593 1594 if isinstance(nodeorrev, int):
1594 1595 rev = nodeorrev
1595 1596 node = self.node(rev)
1596 1597 else:
1597 1598 node = nodeorrev
1598 1599 rev = None
1599 1600
1600 1601 cachedrev = None
1601 1602 flags = None
1602 1603 rawtext = None
1603 1604 if node == nullid:
1604 1605 return ""
1605 1606 if self._revisioncache:
1606 1607 if self._revisioncache[0] == node:
1607 1608 # _cache only stores rawtext
1608 1609 if raw:
1609 1610 return self._revisioncache[2]
1610 1611 # duplicated, but good for perf
1611 1612 if rev is None:
1612 1613 rev = self.rev(node)
1613 1614 if flags is None:
1614 1615 flags = self.flags(rev)
1615 1616 # no extra flags set, no flag processor runs, text = rawtext
1616 1617 if flags == REVIDX_DEFAULT_FLAGS:
1617 1618 return self._revisioncache[2]
1618 1619 # rawtext is reusable. need to run flag processor
1619 1620 rawtext = self._revisioncache[2]
1620 1621
1621 1622 cachedrev = self._revisioncache[1]
1622 1623
1623 1624 # look up what we need to read
1624 1625 if rawtext is None:
1625 1626 if rev is None:
1626 1627 rev = self.rev(node)
1627 1628
1628 1629 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1629 1630 if stopped:
1630 1631 rawtext = self._revisioncache[2]
1631 1632
1632 1633 # drop cache to save memory
1633 1634 self._revisioncache = None
1634 1635
1635 1636 targetsize = None
1636 1637 rawsize = self.index[rev][2]
1637 1638 if 0 <= rawsize:
1638 1639 targetsize = 4 * rawsize
1639 1640
1640 1641 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1641 1642 if rawtext is None:
1642 1643 rawtext = bytes(bins[0])
1643 1644 bins = bins[1:]
1644 1645
1645 1646 rawtext = mdiff.patches(rawtext, bins)
1646 1647 self._revisioncache = (node, rev, rawtext)
1647 1648
1648 1649 if flags is None:
1649 1650 if rev is None:
1650 1651 rev = self.rev(node)
1651 1652 flags = self.flags(rev)
1652 1653
1653 1654 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1654 1655 if validatehash:
1655 1656 self.checkhash(text, node, rev=rev)
1656 1657
1657 1658 return text
1658 1659
1659 1660 def hash(self, text, p1, p2):
1660 1661 """Compute a node hash.
1661 1662
1662 1663 Available as a function so that subclasses can replace the hash
1663 1664 as needed.
1664 1665 """
1665 1666 return storageutil.hashrevisionsha1(text, p1, p2)
1666 1667
1667 1668 def _processflags(self, text, flags, operation, raw=False):
1668 1669 """Inspect revision data flags and applies transforms defined by
1669 1670 registered flag processors.
1670 1671
1671 1672 ``text`` - the revision data to process
1672 1673 ``flags`` - the revision flags
1673 1674 ``operation`` - the operation being performed (read or write)
1674 1675 ``raw`` - an optional argument describing if the raw transform should be
1675 1676 applied.
1676 1677
1677 1678 This method processes the flags in the order (or reverse order if
1678 1679 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1679 1680 flag processors registered for present flags. The order of flags defined
1680 1681 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1681 1682
1682 1683 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1683 1684 processed text and ``validatehash`` is a bool indicating whether the
1684 1685 returned text should be checked for hash integrity.
1685 1686
1686 1687 Note: If the ``raw`` argument is set, it has precedence over the
1687 1688 operation and will only update the value of ``validatehash``.
1688 1689 """
1689 1690 # fast path: no flag processors will run
1690 1691 if flags == 0:
1691 1692 return text, True
1692 1693 if not operation in ('read', 'write'):
1693 1694 raise error.ProgrammingError(_("invalid '%s' operation") %
1694 1695 operation)
1695 1696 # Check all flags are known.
1696 1697 if flags & ~REVIDX_KNOWN_FLAGS:
1697 1698 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1698 1699 (flags & ~REVIDX_KNOWN_FLAGS))
1699 1700 validatehash = True
1700 1701 # Depending on the operation (read or write), the order might be
1701 1702 # reversed due to non-commutative transforms.
1702 1703 orderedflags = REVIDX_FLAGS_ORDER
1703 1704 if operation == 'write':
1704 1705 orderedflags = reversed(orderedflags)
1705 1706
1706 1707 for flag in orderedflags:
1707 1708 # If a flagprocessor has been registered for a known flag, apply the
1708 1709 # related operation transform and update result tuple.
1709 1710 if flag & flags:
1710 1711 vhash = True
1711 1712
1712 1713 if flag not in self._flagprocessors:
1713 1714 message = _("missing processor for flag '%#x'") % (flag)
1714 1715 raise error.RevlogError(message)
1715 1716
1716 1717 processor = self._flagprocessors[flag]
1717 1718 if processor is not None:
1718 1719 readtransform, writetransform, rawtransform = processor
1719 1720
1720 1721 if raw:
1721 1722 vhash = rawtransform(self, text)
1722 1723 elif operation == 'read':
1723 1724 text, vhash = readtransform(self, text)
1724 1725 else: # write operation
1725 1726 text, vhash = writetransform(self, text)
1726 1727 validatehash = validatehash and vhash
1727 1728
1728 1729 return text, validatehash
1729 1730
1730 1731 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1731 1732 """Check node hash integrity.
1732 1733
1733 1734 Available as a function so that subclasses can extend hash mismatch
1734 1735 behaviors as needed.
1735 1736 """
1736 1737 try:
1737 1738 if p1 is None and p2 is None:
1738 1739 p1, p2 = self.parents(node)
1739 1740 if node != self.hash(text, p1, p2):
1740 1741 # Clear the revision cache on hash failure. The revision cache
1741 1742 # only stores the raw revision and clearing the cache does have
1742 1743 # the side-effect that we won't have a cache hit when the raw
1743 1744 # revision data is accessed. But this case should be rare and
1744 1745 # it is extra work to teach the cache about the hash
1745 1746 # verification state.
1746 1747 if self._revisioncache and self._revisioncache[0] == node:
1747 1748 self._revisioncache = None
1748 1749
1749 1750 revornode = rev
1750 1751 if revornode is None:
1751 1752 revornode = templatefilters.short(hex(node))
1752 1753 raise error.RevlogError(_("integrity check failed on %s:%s")
1753 1754 % (self.indexfile, pycompat.bytestr(revornode)))
1754 1755 except error.RevlogError:
1755 1756 if self._censorable and storageutil.iscensoredtext(text):
1756 1757 raise error.CensoredNodeError(self.indexfile, node, text)
1757 1758 raise
1758 1759
1759 1760 def _enforceinlinesize(self, tr, fp=None):
1760 1761 """Check if the revlog is too big for inline and convert if so.
1761 1762
1762 1763 This should be called after revisions are added to the revlog. If the
1763 1764 revlog has grown too large to be an inline revlog, it will convert it
1764 1765 to use multiple index and data files.
1765 1766 """
1766 1767 tiprev = len(self) - 1
1767 1768 if (not self._inline or
1768 1769 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1769 1770 return
1770 1771
1771 1772 trinfo = tr.find(self.indexfile)
1772 1773 if trinfo is None:
1773 1774 raise error.RevlogError(_("%s not found in the transaction")
1774 1775 % self.indexfile)
1775 1776
1776 1777 trindex = trinfo[2]
1777 1778 if trindex is not None:
1778 1779 dataoff = self.start(trindex)
1779 1780 else:
1780 1781 # revlog was stripped at start of transaction, use all leftover data
1781 1782 trindex = len(self) - 1
1782 1783 dataoff = self.end(tiprev)
1783 1784
1784 1785 tr.add(self.datafile, dataoff)
1785 1786
1786 1787 if fp:
1787 1788 fp.flush()
1788 1789 fp.close()
1789 1790 # We can't use the cached file handle after close(). So prevent
1790 1791 # its usage.
1791 1792 self._writinghandles = None
1792 1793
1793 1794 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1794 1795 for r in self:
1795 1796 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1796 1797
1797 1798 with self._indexfp('w') as fp:
1798 1799 self.version &= ~FLAG_INLINE_DATA
1799 1800 self._inline = False
1800 1801 io = self._io
1801 1802 for i in self:
1802 1803 e = io.packentry(self.index[i], self.node, self.version, i)
1803 1804 fp.write(e)
1804 1805
1805 1806 # the temp file replace the real index when we exit the context
1806 1807 # manager
1807 1808
1808 1809 tr.replace(self.indexfile, trindex * self._io.size)
1809 1810 self._chunkclear()
1810 1811
1811 1812 def _nodeduplicatecallback(self, transaction, node):
1812 1813 """called when trying to add a node already stored.
1813 1814 """
1814 1815
1815 1816 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1816 1817 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1817 1818 """add a revision to the log
1818 1819
1819 1820 text - the revision data to add
1820 1821 transaction - the transaction object used for rollback
1821 1822 link - the linkrev data to add
1822 1823 p1, p2 - the parent nodeids of the revision
1823 1824 cachedelta - an optional precomputed delta
1824 1825 node - nodeid of revision; typically node is not specified, and it is
1825 1826 computed by default as hash(text, p1, p2), however subclasses might
1826 1827 use different hashing method (and override checkhash() in such case)
1827 1828 flags - the known flags to set on the revision
1828 1829 deltacomputer - an optional deltacomputer instance shared between
1829 1830 multiple calls
1830 1831 """
1831 1832 if link == nullrev:
1832 1833 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1833 1834 % self.indexfile)
1834 1835
1835 1836 if flags:
1836 1837 node = node or self.hash(text, p1, p2)
1837 1838
1838 1839 rawtext, validatehash = self._processflags(text, flags, 'write')
1839 1840
1840 1841 # If the flag processor modifies the revision data, ignore any provided
1841 1842 # cachedelta.
1842 1843 if rawtext != text:
1843 1844 cachedelta = None
1844 1845
1845 1846 if len(rawtext) > _maxentrysize:
1846 1847 raise error.RevlogError(
1847 1848 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1848 1849 % (self.indexfile, len(rawtext)))
1849 1850
1850 1851 node = node or self.hash(rawtext, p1, p2)
1851 1852 if node in self.nodemap:
1852 1853 return node
1853 1854
1854 1855 if validatehash:
1855 1856 self.checkhash(rawtext, node, p1=p1, p2=p2)
1856 1857
1857 1858 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1858 1859 flags, cachedelta=cachedelta,
1859 1860 deltacomputer=deltacomputer)
1860 1861
1861 1862 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1862 1863 cachedelta=None, deltacomputer=None):
1863 1864 """add a raw revision with known flags, node and parents
1864 1865 useful when reusing a revision not stored in this revlog (ex: received
1865 1866 over wire, or read from an external bundle).
1866 1867 """
1867 1868 dfh = None
1868 1869 if not self._inline:
1869 1870 dfh = self._datafp("a+")
1870 1871 ifh = self._indexfp("a+")
1871 1872 try:
1872 1873 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1873 1874 flags, cachedelta, ifh, dfh,
1874 1875 deltacomputer=deltacomputer)
1875 1876 finally:
1876 1877 if dfh:
1877 1878 dfh.close()
1878 1879 ifh.close()
1879 1880
1880 1881 def compress(self, data):
1881 1882 """Generate a possibly-compressed representation of data."""
1882 1883 if not data:
1883 1884 return '', data
1884 1885
1885 1886 compressed = self._compressor.compress(data)
1886 1887
1887 1888 if compressed:
1888 1889 # The revlog compressor added the header in the returned data.
1889 1890 return '', compressed
1890 1891
1891 1892 if data[0:1] == '\0':
1892 1893 return '', data
1893 1894 return 'u', data
1894 1895
1895 1896 def decompress(self, data):
1896 1897 """Decompress a revlog chunk.
1897 1898
1898 1899 The chunk is expected to begin with a header identifying the
1899 1900 format type so it can be routed to an appropriate decompressor.
1900 1901 """
1901 1902 if not data:
1902 1903 return data
1903 1904
1904 1905 # Revlogs are read much more frequently than they are written and many
1905 1906 # chunks only take microseconds to decompress, so performance is
1906 1907 # important here.
1907 1908 #
1908 1909 # We can make a few assumptions about revlogs:
1909 1910 #
1910 1911 # 1) the majority of chunks will be compressed (as opposed to inline
1911 1912 # raw data).
1912 1913 # 2) decompressing *any* data will likely by at least 10x slower than
1913 1914 # returning raw inline data.
1914 1915 # 3) we want to prioritize common and officially supported compression
1915 1916 # engines
1916 1917 #
1917 1918 # It follows that we want to optimize for "decompress compressed data
1918 1919 # when encoded with common and officially supported compression engines"
1919 1920 # case over "raw data" and "data encoded by less common or non-official
1920 1921 # compression engines." That is why we have the inline lookup first
1921 1922 # followed by the compengines lookup.
1922 1923 #
1923 1924 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1924 1925 # compressed chunks. And this matters for changelog and manifest reads.
1925 1926 t = data[0:1]
1926 1927
1927 1928 if t == 'x':
1928 1929 try:
1929 1930 return _zlibdecompress(data)
1930 1931 except zlib.error as e:
1931 1932 raise error.RevlogError(_('revlog decompress error: %s') %
1932 1933 stringutil.forcebytestr(e))
1933 1934 # '\0' is more common than 'u' so it goes first.
1934 1935 elif t == '\0':
1935 1936 return data
1936 1937 elif t == 'u':
1937 1938 return util.buffer(data, 1)
1938 1939
1939 1940 try:
1940 1941 compressor = self._decompressors[t]
1941 1942 except KeyError:
1942 1943 try:
1943 1944 engine = util.compengines.forrevlogheader(t)
1944 1945 compressor = engine.revlogcompressor()
1945 1946 self._decompressors[t] = compressor
1946 1947 except KeyError:
1947 1948 raise error.RevlogError(_('unknown compression type %r') % t)
1948 1949
1949 1950 return compressor.decompress(data)
1950 1951
1951 1952 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1952 1953 cachedelta, ifh, dfh, alwayscache=False,
1953 1954 deltacomputer=None):
1954 1955 """internal function to add revisions to the log
1955 1956
1956 1957 see addrevision for argument descriptions.
1957 1958
1958 1959 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1959 1960
1960 1961 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1961 1962 be used.
1962 1963
1963 1964 invariants:
1964 1965 - rawtext is optional (can be None); if not set, cachedelta must be set.
1965 1966 if both are set, they must correspond to each other.
1966 1967 """
1967 1968 if node == nullid:
1968 1969 raise error.RevlogError(_("%s: attempt to add null revision") %
1969 1970 self.indexfile)
1970 1971 if node == wdirid or node in wdirfilenodeids:
1971 1972 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1972 1973 self.indexfile)
1973 1974
1974 1975 if self._inline:
1975 1976 fh = ifh
1976 1977 else:
1977 1978 fh = dfh
1978 1979
1979 1980 btext = [rawtext]
1980 1981
1981 1982 curr = len(self)
1982 1983 prev = curr - 1
1983 1984 offset = self.end(prev)
1984 1985 p1r, p2r = self.rev(p1), self.rev(p2)
1985 1986
1986 1987 # full versions are inserted when the needed deltas
1987 1988 # become comparable to the uncompressed text
1988 1989 if rawtext is None:
1989 1990 # need rawtext size, before changed by flag processors, which is
1990 1991 # the non-raw size. use revlog explicitly to avoid filelog's extra
1991 1992 # logic that might remove metadata size.
1992 1993 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1993 1994 cachedelta[1])
1994 1995 else:
1995 1996 textlen = len(rawtext)
1996 1997
1997 1998 if deltacomputer is None:
1998 1999 deltacomputer = deltautil.deltacomputer(self)
1999 2000
2000 2001 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2001 2002
2002 2003 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2003 2004
2004 2005 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2005 2006 deltainfo.base, link, p1r, p2r, node)
2006 2007 self.index.append(e)
2007 2008 self.nodemap[node] = curr
2008 2009
2009 2010 # Reset the pure node cache start lookup offset to account for new
2010 2011 # revision.
2011 2012 if self._nodepos is not None:
2012 2013 self._nodepos = curr
2013 2014
2014 2015 entry = self._io.packentry(e, self.node, self.version, curr)
2015 2016 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2016 2017 link, offset)
2017 2018
2018 2019 rawtext = btext[0]
2019 2020
2020 2021 if alwayscache and rawtext is None:
2021 2022 rawtext = deltacomputer.buildtext(revinfo, fh)
2022 2023
2023 2024 if type(rawtext) == bytes: # only accept immutable objects
2024 2025 self._revisioncache = (node, curr, rawtext)
2025 2026 self._chainbasecache[curr] = deltainfo.chainbase
2026 2027 return node
2027 2028
2028 2029 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2029 2030 # Files opened in a+ mode have inconsistent behavior on various
2030 2031 # platforms. Windows requires that a file positioning call be made
2031 2032 # when the file handle transitions between reads and writes. See
2032 2033 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2033 2034 # platforms, Python or the platform itself can be buggy. Some versions
2034 2035 # of Solaris have been observed to not append at the end of the file
2035 2036 # if the file was seeked to before the end. See issue4943 for more.
2036 2037 #
2037 2038 # We work around this issue by inserting a seek() before writing.
2038 2039 # Note: This is likely not necessary on Python 3. However, because
2039 2040 # the file handle is reused for reads and may be seeked there, we need
2040 2041 # to be careful before changing this.
2041 2042 ifh.seek(0, os.SEEK_END)
2042 2043 if dfh:
2043 2044 dfh.seek(0, os.SEEK_END)
2044 2045
2045 2046 curr = len(self) - 1
2046 2047 if not self._inline:
2047 2048 transaction.add(self.datafile, offset)
2048 2049 transaction.add(self.indexfile, curr * len(entry))
2049 2050 if data[0]:
2050 2051 dfh.write(data[0])
2051 2052 dfh.write(data[1])
2052 2053 ifh.write(entry)
2053 2054 else:
2054 2055 offset += curr * self._io.size
2055 2056 transaction.add(self.indexfile, offset, curr)
2056 2057 ifh.write(entry)
2057 2058 ifh.write(data[0])
2058 2059 ifh.write(data[1])
2059 2060 self._enforceinlinesize(transaction, ifh)
2060 2061
2061 2062 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2062 2063 """
2063 2064 add a delta group
2064 2065
2065 2066 given a set of deltas, add them to the revision log. the
2066 2067 first delta is against its parent, which should be in our
2067 2068 log, the rest are against the previous delta.
2068 2069
2069 2070 If ``addrevisioncb`` is defined, it will be called with arguments of
2070 2071 this revlog and the node that was added.
2071 2072 """
2072 2073
2073 2074 if self._writinghandles:
2074 2075 raise error.ProgrammingError('cannot nest addgroup() calls')
2075 2076
2076 2077 nodes = []
2077 2078
2078 2079 r = len(self)
2079 2080 end = 0
2080 2081 if r:
2081 2082 end = self.end(r - 1)
2082 2083 ifh = self._indexfp("a+")
2083 2084 isize = r * self._io.size
2084 2085 if self._inline:
2085 2086 transaction.add(self.indexfile, end + isize, r)
2086 2087 dfh = None
2087 2088 else:
2088 2089 transaction.add(self.indexfile, isize, r)
2089 2090 transaction.add(self.datafile, end)
2090 2091 dfh = self._datafp("a+")
2091 2092 def flush():
2092 2093 if dfh:
2093 2094 dfh.flush()
2094 2095 ifh.flush()
2095 2096
2096 2097 self._writinghandles = (ifh, dfh)
2097 2098
2098 2099 try:
2099 2100 deltacomputer = deltautil.deltacomputer(self)
2100 2101 # loop through our set of deltas
2101 2102 for data in deltas:
2102 2103 node, p1, p2, linknode, deltabase, delta, flags = data
2103 2104 link = linkmapper(linknode)
2104 2105 flags = flags or REVIDX_DEFAULT_FLAGS
2105 2106
2106 2107 nodes.append(node)
2107 2108
2108 2109 if node in self.nodemap:
2109 2110 self._nodeduplicatecallback(transaction, node)
2110 2111 # this can happen if two branches make the same change
2111 2112 continue
2112 2113
2113 2114 for p in (p1, p2):
2114 2115 if p not in self.nodemap:
2115 2116 raise error.LookupError(p, self.indexfile,
2116 2117 _('unknown parent'))
2117 2118
2118 2119 if deltabase not in self.nodemap:
2119 2120 raise error.LookupError(deltabase, self.indexfile,
2120 2121 _('unknown delta base'))
2121 2122
2122 2123 baserev = self.rev(deltabase)
2123 2124
2124 2125 if baserev != nullrev and self.iscensored(baserev):
2125 2126 # if base is censored, delta must be full replacement in a
2126 2127 # single patch operation
2127 2128 hlen = struct.calcsize(">lll")
2128 2129 oldlen = self.rawsize(baserev)
2129 2130 newlen = len(delta) - hlen
2130 2131 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2131 2132 raise error.CensoredBaseError(self.indexfile,
2132 2133 self.node(baserev))
2133 2134
2134 2135 if not flags and self._peek_iscensored(baserev, delta, flush):
2135 2136 flags |= REVIDX_ISCENSORED
2136 2137
2137 2138 # We assume consumers of addrevisioncb will want to retrieve
2138 2139 # the added revision, which will require a call to
2139 2140 # revision(). revision() will fast path if there is a cache
2140 2141 # hit. So, we tell _addrevision() to always cache in this case.
2141 2142 # We're only using addgroup() in the context of changegroup
2142 2143 # generation so the revision data can always be handled as raw
2143 2144 # by the flagprocessor.
2144 2145 self._addrevision(node, None, transaction, link,
2145 2146 p1, p2, flags, (baserev, delta),
2146 2147 ifh, dfh,
2147 2148 alwayscache=bool(addrevisioncb),
2148 2149 deltacomputer=deltacomputer)
2149 2150
2150 2151 if addrevisioncb:
2151 2152 addrevisioncb(self, node)
2152 2153
2153 2154 if not dfh and not self._inline:
2154 2155 # addrevision switched from inline to conventional
2155 2156 # reopen the index
2156 2157 ifh.close()
2157 2158 dfh = self._datafp("a+")
2158 2159 ifh = self._indexfp("a+")
2159 2160 self._writinghandles = (ifh, dfh)
2160 2161 finally:
2161 2162 self._writinghandles = None
2162 2163
2163 2164 if dfh:
2164 2165 dfh.close()
2165 2166 ifh.close()
2166 2167
2167 2168 return nodes
2168 2169
2169 2170 def iscensored(self, rev):
2170 2171 """Check if a file revision is censored."""
2171 2172 if not self._censorable:
2172 2173 return False
2173 2174
2174 2175 return self.flags(rev) & REVIDX_ISCENSORED
2175 2176
2176 2177 def _peek_iscensored(self, baserev, delta, flush):
2177 2178 """Quickly check if a delta produces a censored revision."""
2178 2179 if not self._censorable:
2179 2180 return False
2180 2181
2181 2182 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2182 2183
2183 2184 def getstrippoint(self, minlink):
2184 2185 """find the minimum rev that must be stripped to strip the linkrev
2185 2186
2186 2187 Returns a tuple containing the minimum rev and a set of all revs that
2187 2188 have linkrevs that will be broken by this strip.
2188 2189 """
2189 2190 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2190 2191 self.headrevs(),
2191 2192 self.linkrev, self.parentrevs)
2192 2193
2193 2194 def strip(self, minlink, transaction):
2194 2195 """truncate the revlog on the first revision with a linkrev >= minlink
2195 2196
2196 2197 This function is called when we're stripping revision minlink and
2197 2198 its descendants from the repository.
2198 2199
2199 2200 We have to remove all revisions with linkrev >= minlink, because
2200 2201 the equivalent changelog revisions will be renumbered after the
2201 2202 strip.
2202 2203
2203 2204 So we truncate the revlog on the first of these revisions, and
2204 2205 trust that the caller has saved the revisions that shouldn't be
2205 2206 removed and that it'll re-add them after this truncation.
2206 2207 """
2207 2208 if len(self) == 0:
2208 2209 return
2209 2210
2210 2211 rev, _ = self.getstrippoint(minlink)
2211 2212 if rev == len(self):
2212 2213 return
2213 2214
2214 2215 # first truncate the files on disk
2215 2216 end = self.start(rev)
2216 2217 if not self._inline:
2217 2218 transaction.add(self.datafile, end)
2218 2219 end = rev * self._io.size
2219 2220 else:
2220 2221 end += rev * self._io.size
2221 2222
2222 2223 transaction.add(self.indexfile, end)
2223 2224
2224 2225 # then reset internal state in memory to forget those revisions
2225 2226 self._revisioncache = None
2226 2227 self._chaininfocache = {}
2227 2228 self._chunkclear()
2228 2229 for x in pycompat.xrange(rev, len(self)):
2229 2230 del self.nodemap[self.node(x)]
2230 2231
2231 2232 del self.index[rev:-1]
2232 2233 self._nodepos = None
2233 2234
2234 2235 def checksize(self):
2235 2236 expected = 0
2236 2237 if len(self):
2237 2238 expected = max(0, self.end(len(self) - 1))
2238 2239
2239 2240 try:
2240 2241 with self._datafp() as f:
2241 2242 f.seek(0, 2)
2242 2243 actual = f.tell()
2243 2244 dd = actual - expected
2244 2245 except IOError as inst:
2245 2246 if inst.errno != errno.ENOENT:
2246 2247 raise
2247 2248 dd = 0
2248 2249
2249 2250 try:
2250 2251 f = self.opener(self.indexfile)
2251 2252 f.seek(0, 2)
2252 2253 actual = f.tell()
2253 2254 f.close()
2254 2255 s = self._io.size
2255 2256 i = max(0, actual // s)
2256 2257 di = actual - (i * s)
2257 2258 if self._inline:
2258 2259 databytes = 0
2259 2260 for r in self:
2260 2261 databytes += max(0, self.length(r))
2261 2262 dd = 0
2262 2263 di = actual - len(self) * s - databytes
2263 2264 except IOError as inst:
2264 2265 if inst.errno != errno.ENOENT:
2265 2266 raise
2266 2267 di = 0
2267 2268
2268 2269 return (dd, di)
2269 2270
2270 2271 def files(self):
2271 2272 res = [self.indexfile]
2272 2273 if not self._inline:
2273 2274 res.append(self.datafile)
2274 2275 return res
2275 2276
2276 2277 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2277 2278 assumehaveparentrevisions=False,
2278 2279 deltamode=repository.CG_DELTAMODE_STD):
2279 2280 if nodesorder not in ('nodes', 'storage', 'linear', None):
2280 2281 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2281 2282 nodesorder)
2282 2283
2283 2284 if nodesorder is None and not self._generaldelta:
2284 2285 nodesorder = 'storage'
2285 2286
2286 2287 if (not self._storedeltachains and
2287 2288 deltamode != repository.CG_DELTAMODE_PREV):
2288 2289 deltamode = repository.CG_DELTAMODE_FULL
2289 2290
2290 2291 return storageutil.emitrevisions(
2291 2292 self, nodes, nodesorder, revlogrevisiondelta,
2292 2293 deltaparentfn=self.deltaparent,
2293 2294 candeltafn=self.candelta,
2294 2295 rawsizefn=self.rawsize,
2295 2296 revdifffn=self.revdiff,
2296 2297 flagsfn=self.flags,
2297 2298 deltamode=deltamode,
2298 2299 revisiondata=revisiondata,
2299 2300 assumehaveparentrevisions=assumehaveparentrevisions)
2300 2301
2301 2302 DELTAREUSEALWAYS = 'always'
2302 2303 DELTAREUSESAMEREVS = 'samerevs'
2303 2304 DELTAREUSENEVER = 'never'
2304 2305
2305 2306 DELTAREUSEFULLADD = 'fulladd'
2306 2307
2307 2308 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2308 2309
2309 2310 def clone(self, tr, destrevlog, addrevisioncb=None,
2310 2311 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2311 2312 """Copy this revlog to another, possibly with format changes.
2312 2313
2313 2314 The destination revlog will contain the same revisions and nodes.
2314 2315 However, it may not be bit-for-bit identical due to e.g. delta encoding
2315 2316 differences.
2316 2317
2317 2318 The ``deltareuse`` argument control how deltas from the existing revlog
2318 2319 are preserved in the destination revlog. The argument can have the
2319 2320 following values:
2320 2321
2321 2322 DELTAREUSEALWAYS
2322 2323 Deltas will always be reused (if possible), even if the destination
2323 2324 revlog would not select the same revisions for the delta. This is the
2324 2325 fastest mode of operation.
2325 2326 DELTAREUSESAMEREVS
2326 2327 Deltas will be reused if the destination revlog would pick the same
2327 2328 revisions for the delta. This mode strikes a balance between speed
2328 2329 and optimization.
2329 2330 DELTAREUSENEVER
2330 2331 Deltas will never be reused. This is the slowest mode of execution.
2331 2332 This mode can be used to recompute deltas (e.g. if the diff/delta
2332 2333 algorithm changes).
2333 2334
2334 2335 Delta computation can be slow, so the choice of delta reuse policy can
2335 2336 significantly affect run time.
2336 2337
2337 2338 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2338 2339 two extremes. Deltas will be reused if they are appropriate. But if the
2339 2340 delta could choose a better revision, it will do so. This means if you
2340 2341 are converting a non-generaldelta revlog to a generaldelta revlog,
2341 2342 deltas will be recomputed if the delta's parent isn't a parent of the
2342 2343 revision.
2343 2344
2344 2345 In addition to the delta policy, the ``forcedeltabothparents``
2345 2346 argument controls whether to force compute deltas against both parents
2346 2347 for merges. By default, the current default is used.
2347 2348 """
2348 2349 if deltareuse not in self.DELTAREUSEALL:
2349 2350 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2350 2351
2351 2352 if len(destrevlog):
2352 2353 raise ValueError(_('destination revlog is not empty'))
2353 2354
2354 2355 if getattr(self, 'filteredrevs', None):
2355 2356 raise ValueError(_('source revlog has filtered revisions'))
2356 2357 if getattr(destrevlog, 'filteredrevs', None):
2357 2358 raise ValueError(_('destination revlog has filtered revisions'))
2358 2359
2359 2360 # lazydeltabase controls whether to reuse a cached delta, if possible.
2360 2361 oldlazydeltabase = destrevlog._lazydeltabase
2361 2362 oldamd = destrevlog._deltabothparents
2362 2363
2363 2364 try:
2364 2365 if deltareuse == self.DELTAREUSEALWAYS:
2365 2366 destrevlog._lazydeltabase = True
2366 2367 elif deltareuse == self.DELTAREUSESAMEREVS:
2367 2368 destrevlog._lazydeltabase = False
2368 2369
2369 2370 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2370 2371
2371 2372 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2372 2373 self.DELTAREUSESAMEREVS)
2373 2374
2374 2375 deltacomputer = deltautil.deltacomputer(destrevlog)
2375 2376 index = self.index
2376 2377 for rev in self:
2377 2378 entry = index[rev]
2378 2379
2379 2380 # Some classes override linkrev to take filtered revs into
2380 2381 # account. Use raw entry from index.
2381 2382 flags = entry[0] & 0xffff
2382 2383 linkrev = entry[4]
2383 2384 p1 = index[entry[5]][7]
2384 2385 p2 = index[entry[6]][7]
2385 2386 node = entry[7]
2386 2387
2387 2388 # (Possibly) reuse the delta from the revlog if allowed and
2388 2389 # the revlog chunk is a delta.
2389 2390 cachedelta = None
2390 2391 rawtext = None
2391 2392 if populatecachedelta:
2392 2393 dp = self.deltaparent(rev)
2393 2394 if dp != nullrev:
2394 2395 cachedelta = (dp, bytes(self._chunk(rev)))
2395 2396
2396 2397 if not cachedelta:
2397 2398 rawtext = self.revision(rev, raw=True)
2398 2399
2399 2400
2400 2401 if deltareuse == self.DELTAREUSEFULLADD:
2401 2402 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2402 2403 cachedelta=cachedelta,
2403 2404 node=node, flags=flags,
2404 2405 deltacomputer=deltacomputer)
2405 2406 else:
2406 2407 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2407 2408 checkambig=False)
2408 2409 dfh = None
2409 2410 if not destrevlog._inline:
2410 2411 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2411 2412 try:
2412 2413 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2413 2414 p2, flags, cachedelta, ifh, dfh,
2414 2415 deltacomputer=deltacomputer)
2415 2416 finally:
2416 2417 if dfh:
2417 2418 dfh.close()
2418 2419 ifh.close()
2419 2420
2420 2421 if addrevisioncb:
2421 2422 addrevisioncb(self, rev, node)
2422 2423 finally:
2423 2424 destrevlog._lazydeltabase = oldlazydeltabase
2424 2425 destrevlog._deltabothparents = oldamd
2425 2426
2426 2427 def censorrevision(self, tr, censornode, tombstone=b''):
2427 2428 if (self.version & 0xFFFF) == REVLOGV0:
2428 2429 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2429 2430 self.version)
2430 2431
2431 2432 censorrev = self.rev(censornode)
2432 2433 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2433 2434
2434 2435 if len(tombstone) > self.rawsize(censorrev):
2435 2436 raise error.Abort(_('censor tombstone must be no longer than '
2436 2437 'censored data'))
2437 2438
2438 2439 # Rewriting the revlog in place is hard. Our strategy for censoring is
2439 2440 # to create a new revlog, copy all revisions to it, then replace the
2440 2441 # revlogs on transaction close.
2441 2442
2442 2443 newindexfile = self.indexfile + b'.tmpcensored'
2443 2444 newdatafile = self.datafile + b'.tmpcensored'
2444 2445
2445 2446 # This is a bit dangerous. We could easily have a mismatch of state.
2446 2447 newrl = revlog(self.opener, newindexfile, newdatafile,
2447 2448 censorable=True)
2448 2449 newrl.version = self.version
2449 2450 newrl._generaldelta = self._generaldelta
2450 2451 newrl._io = self._io
2451 2452
2452 2453 for rev in self.revs():
2453 2454 node = self.node(rev)
2454 2455 p1, p2 = self.parents(node)
2455 2456
2456 2457 if rev == censorrev:
2457 2458 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2458 2459 p1, p2, censornode, REVIDX_ISCENSORED)
2459 2460
2460 2461 if newrl.deltaparent(rev) != nullrev:
2461 2462 raise error.Abort(_('censored revision stored as delta; '
2462 2463 'cannot censor'),
2463 2464 hint=_('censoring of revlogs is not '
2464 2465 'fully implemented; please report '
2465 2466 'this bug'))
2466 2467 continue
2467 2468
2468 2469 if self.iscensored(rev):
2469 2470 if self.deltaparent(rev) != nullrev:
2470 2471 raise error.Abort(_('cannot censor due to censored '
2471 2472 'revision having delta stored'))
2472 2473 rawtext = self._chunk(rev)
2473 2474 else:
2474 2475 rawtext = self.revision(rev, raw=True)
2475 2476
2476 2477 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2477 2478 self.flags(rev))
2478 2479
2479 2480 tr.addbackup(self.indexfile, location='store')
2480 2481 if not self._inline:
2481 2482 tr.addbackup(self.datafile, location='store')
2482 2483
2483 2484 self.opener.rename(newrl.indexfile, self.indexfile)
2484 2485 if not self._inline:
2485 2486 self.opener.rename(newrl.datafile, self.datafile)
2486 2487
2487 2488 self.clearcaches()
2488 2489 self._loadindex(self.version, None)
2489 2490
2490 2491 def verifyintegrity(self, state):
2491 2492 """Verifies the integrity of the revlog.
2492 2493
2493 2494 Yields ``revlogproblem`` instances describing problems that are
2494 2495 found.
2495 2496 """
2496 2497 dd, di = self.checksize()
2497 2498 if dd:
2498 2499 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2499 2500 if di:
2500 2501 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2501 2502
2502 2503 version = self.version & 0xFFFF
2503 2504
2504 2505 # The verifier tells us what version revlog we should be.
2505 2506 if version != state['expectedversion']:
2506 2507 yield revlogproblem(
2507 2508 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2508 2509 (self.indexfile, version, state['expectedversion']))
2509 2510
2510 2511 state['skipread'] = set()
2511 2512
2512 2513 for rev in self:
2513 2514 node = self.node(rev)
2514 2515
2515 2516 # Verify contents. 4 cases to care about:
2516 2517 #
2517 2518 # common: the most common case
2518 2519 # rename: with a rename
2519 2520 # meta: file content starts with b'\1\n', the metadata
2520 2521 # header defined in filelog.py, but without a rename
2521 2522 # ext: content stored externally
2522 2523 #
2523 2524 # More formally, their differences are shown below:
2524 2525 #
2525 2526 # | common | rename | meta | ext
2526 2527 # -------------------------------------------------------
2527 2528 # flags() | 0 | 0 | 0 | not 0
2528 2529 # renamed() | False | True | False | ?
2529 2530 # rawtext[0:2]=='\1\n'| False | True | True | ?
2530 2531 #
2531 2532 # "rawtext" means the raw text stored in revlog data, which
2532 2533 # could be retrieved by "revision(rev, raw=True)". "text"
2533 2534 # mentioned below is "revision(rev, raw=False)".
2534 2535 #
2535 2536 # There are 3 different lengths stored physically:
2536 2537 # 1. L1: rawsize, stored in revlog index
2537 2538 # 2. L2: len(rawtext), stored in revlog data
2538 2539 # 3. L3: len(text), stored in revlog data if flags==0, or
2539 2540 # possibly somewhere else if flags!=0
2540 2541 #
2541 2542 # L1 should be equal to L2. L3 could be different from them.
2542 2543 # "text" may or may not affect commit hash depending on flag
2543 2544 # processors (see revlog.addflagprocessor).
2544 2545 #
2545 2546 # | common | rename | meta | ext
2546 2547 # -------------------------------------------------
2547 2548 # rawsize() | L1 | L1 | L1 | L1
2548 2549 # size() | L1 | L2-LM | L1(*) | L1 (?)
2549 2550 # len(rawtext) | L2 | L2 | L2 | L2
2550 2551 # len(text) | L2 | L2 | L2 | L3
2551 2552 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2552 2553 #
2553 2554 # LM: length of metadata, depending on rawtext
2554 2555 # (*): not ideal, see comment in filelog.size
2555 2556 # (?): could be "- len(meta)" if the resolved content has
2556 2557 # rename metadata
2557 2558 #
2558 2559 # Checks needed to be done:
2559 2560 # 1. length check: L1 == L2, in all cases.
2560 2561 # 2. hash check: depending on flag processor, we may need to
2561 2562 # use either "text" (external), or "rawtext" (in revlog).
2562 2563
2563 2564 try:
2564 2565 skipflags = state.get('skipflags', 0)
2565 2566 if skipflags:
2566 2567 skipflags &= self.flags(rev)
2567 2568
2568 2569 if skipflags:
2569 2570 state['skipread'].add(node)
2570 2571 else:
2571 2572 # Side-effect: read content and verify hash.
2572 2573 self.revision(node)
2573 2574
2574 2575 l1 = self.rawsize(rev)
2575 2576 l2 = len(self.revision(node, raw=True))
2576 2577
2577 2578 if l1 != l2:
2578 2579 yield revlogproblem(
2579 2580 error=_('unpacked size is %d, %d expected') % (l2, l1),
2580 2581 node=node)
2581 2582
2582 2583 except error.CensoredNodeError:
2583 2584 if state['erroroncensored']:
2584 2585 yield revlogproblem(error=_('censored file data'),
2585 2586 node=node)
2586 2587 state['skipread'].add(node)
2587 2588 except Exception as e:
2588 2589 yield revlogproblem(
2589 2590 error=_('unpacking %s: %s') % (short(node),
2590 2591 stringutil.forcebytestr(e)),
2591 2592 node=node)
2592 2593 state['skipread'].add(node)
2593 2594
2594 2595 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2595 2596 revisionscount=False, trackedsize=False,
2596 2597 storedsize=False):
2597 2598 d = {}
2598 2599
2599 2600 if exclusivefiles:
2600 2601 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2601 2602 if not self._inline:
2602 2603 d['exclusivefiles'].append((self.opener, self.datafile))
2603 2604
2604 2605 if sharedfiles:
2605 2606 d['sharedfiles'] = []
2606 2607
2607 2608 if revisionscount:
2608 2609 d['revisionscount'] = len(self)
2609 2610
2610 2611 if trackedsize:
2611 2612 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2612 2613
2613 2614 if storedsize:
2614 2615 d['storedsize'] = sum(self.opener.stat(path).st_size
2615 2616 for path in self.files())
2616 2617
2617 2618 return d
General Comments 0
You need to be logged in to leave comments. Login now