##// END OF EJS Templates
flagutil: introduce a flagprocessorsmixin class...
marmoute -
r43140:5bb68fb7 default
parent child Browse files
Show More
@@ -1,2682 +1,2619 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 57 )
58 58 from .thirdparty import (
59 59 attr,
60 60 )
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 )
79 79 from .utils import (
80 80 storageutil,
81 81 stringutil,
82 82 )
83 83
84 84 # blanked usage of all the name to prevent pyflakes constraints
85 85 # We need these name available in the module for extensions.
86 86 REVLOGV0
87 87 REVLOGV1
88 88 REVLOGV2
89 89 FLAG_INLINE_DATA
90 90 FLAG_GENERALDELTA
91 91 REVLOG_DEFAULT_FLAGS
92 92 REVLOG_DEFAULT_FORMAT
93 93 REVLOG_DEFAULT_VERSION
94 94 REVLOGV1_FLAGS
95 95 REVLOGV2_FLAGS
96 96 REVIDX_ISCENSORED
97 97 REVIDX_ELLIPSIS
98 98 REVIDX_EXTSTORED
99 99 REVIDX_DEFAULT_FLAGS
100 100 REVIDX_FLAGS_ORDER
101 101 REVIDX_RAWTEXT_CHANGING_FLAGS
102 102
103 103 parsers = policy.importmod(r'parsers')
104 104 rustancestor = policy.importrust(r'ancestor')
105 105 rustdagop = policy.importrust(r'dagop')
106 106
107 107 # Aliased for performance.
108 108 _zlibdecompress = zlib.decompress
109 109
110 110 # max size of revlog with inline data
111 111 _maxinline = 131072
112 112 _chunksize = 1048576
113 113
114 114 # Flag processors for REVIDX_ELLIPSIS.
115 115 def ellipsisreadprocessor(rl, text):
116 116 return text, False
117 117
118 118 def ellipsiswriteprocessor(rl, text):
119 119 return text, False
120 120
121 121 def ellipsisrawprocessor(rl, text):
122 122 return False
123 123
124 124 ellipsisprocessor = (
125 125 ellipsisreadprocessor,
126 126 ellipsiswriteprocessor,
127 127 ellipsisrawprocessor,
128 128 )
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 @attr.s(slots=True, frozen=True)
142 142 class _revisioninfo(object):
143 143 """Information about a revision that allows building its fulltext
144 144 node: expected hash of the revision
145 145 p1, p2: parent revs of the revision
146 146 btext: built text cache consisting of a one-element list
147 147 cachedelta: (baserev, uncompressed_delta) or None
148 148 flags: flags associated to the revision storage
149 149
150 150 One of btext[0] or cachedelta must be set.
151 151 """
152 152 node = attr.ib()
153 153 p1 = attr.ib()
154 154 p2 = attr.ib()
155 155 btext = attr.ib()
156 156 textlen = attr.ib()
157 157 cachedelta = attr.ib()
158 158 flags = attr.ib()
159 159
160 160 @interfaceutil.implementer(repository.irevisiondelta)
161 161 @attr.s(slots=True)
162 162 class revlogrevisiondelta(object):
163 163 node = attr.ib()
164 164 p1node = attr.ib()
165 165 p2node = attr.ib()
166 166 basenode = attr.ib()
167 167 flags = attr.ib()
168 168 baserevisionsize = attr.ib()
169 169 revision = attr.ib()
170 170 delta = attr.ib()
171 171 linknode = attr.ib(default=None)
172 172
173 173 @interfaceutil.implementer(repository.iverifyproblem)
174 174 @attr.s(frozen=True)
175 175 class revlogproblem(object):
176 176 warning = attr.ib(default=None)
177 177 error = attr.ib(default=None)
178 178 node = attr.ib(default=None)
179 179
180 180 # index v0:
181 181 # 4 bytes: offset
182 182 # 4 bytes: compressed length
183 183 # 4 bytes: base rev
184 184 # 4 bytes: link rev
185 185 # 20 bytes: parent 1 nodeid
186 186 # 20 bytes: parent 2 nodeid
187 187 # 20 bytes: nodeid
188 188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 189 indexformatv0_pack = indexformatv0.pack
190 190 indexformatv0_unpack = indexformatv0.unpack
191 191
192 192 class revlogoldindex(list):
193 193 def __getitem__(self, i):
194 194 if i == -1:
195 195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 196 return list.__getitem__(self, i)
197 197
198 198 class revlogoldio(object):
199 199 def __init__(self):
200 200 self.size = indexformatv0.size
201 201
202 202 def parseindex(self, data, inline):
203 203 s = self.size
204 204 index = []
205 205 nodemap = {nullid: nullrev}
206 206 n = off = 0
207 207 l = len(data)
208 208 while off + s <= l:
209 209 cur = data[off:off + s]
210 210 off += s
211 211 e = indexformatv0_unpack(cur)
212 212 # transform to revlogv1 format
213 213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 215 index.append(e2)
216 216 nodemap[e[6]] = n
217 217 n += 1
218 218
219 219 return revlogoldindex(index), nodemap, None
220 220
221 221 def packentry(self, entry, node, version, rev):
222 222 if gettype(entry[0]):
223 223 raise error.RevlogError(_('index entry flags need revlog '
224 224 'version 1'))
225 225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 226 node(entry[5]), node(entry[6]), entry[7])
227 227 return indexformatv0_pack(*e2)
228 228
229 229 # index ng:
230 230 # 6 bytes: offset
231 231 # 2 bytes: flags
232 232 # 4 bytes: compressed length
233 233 # 4 bytes: uncompressed length
234 234 # 4 bytes: base rev
235 235 # 4 bytes: link rev
236 236 # 4 bytes: parent 1 rev
237 237 # 4 bytes: parent 2 rev
238 238 # 32 bytes: nodeid
239 239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 240 indexformatng_pack = indexformatng.pack
241 241 versionformat = struct.Struct(">I")
242 242 versionformat_pack = versionformat.pack
243 243 versionformat_unpack = versionformat.unpack
244 244
245 245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 246 # signed integer)
247 247 _maxentrysize = 0x7fffffff
248 248
249 249 class revlogio(object):
250 250 def __init__(self):
251 251 self.size = indexformatng.size
252 252
253 253 def parseindex(self, data, inline):
254 254 # call the C implementation to parse the index data
255 255 index, cache = parsers.parse_index2(data, inline)
256 256 return index, getattr(index, 'nodemap', None), cache
257 257
258 258 def packentry(self, entry, node, version, rev):
259 259 p = indexformatng_pack(*entry)
260 260 if rev == 0:
261 261 p = versionformat_pack(version) + p[4:]
262 262 return p
263 263
264 class revlog(object):
264 class revlog(flagutil.flagprocessorsmixin):
265 265 """
266 266 the underlying revision storage object
267 267
268 268 A revlog consists of two parts, an index and the revision data.
269 269
270 270 The index is a file with a fixed record size containing
271 271 information on each revision, including its nodeid (hash), the
272 272 nodeids of its parents, the position and offset of its data within
273 273 the data file, and the revision it's based on. Finally, each entry
274 274 contains a linkrev entry that can serve as a pointer to external
275 275 data.
276 276
277 277 The revision data itself is a linear collection of data chunks.
278 278 Each chunk represents a revision and is usually represented as a
279 279 delta against the previous chunk. To bound lookup time, runs of
280 280 deltas are limited to about 2 times the length of the original
281 281 version data. This makes retrieval of a version proportional to
282 282 its size, or O(1) relative to the number of revisions.
283 283
284 284 Both pieces of the revlog are written to in an append-only
285 285 fashion, which means we never need to rewrite a file to insert or
286 286 remove data, and can use some simple techniques to avoid the need
287 287 for locking while reading.
288 288
289 289 If checkambig, indexfile is opened with checkambig=True at
290 290 writing, to avoid file stat ambiguity.
291 291
292 292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 293 index will be mmapped rather than read if it is larger than the
294 294 configured threshold.
295 295
296 296 If censorable is True, the revlog can have censored revisions.
297 297
298 298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 299 compression for the data content.
300 300 """
301 301 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
302 302 mmaplargeindex=False, censorable=False,
303 303 upperboundcomp=None):
304 304 """
305 305 create a revlog object
306 306
307 307 opener is a function that abstracts the file opening operation
308 308 and can be used to implement COW semantics or the like.
309 309
310 310 """
311 311 self.upperboundcomp = upperboundcomp
312 312 self.indexfile = indexfile
313 313 self.datafile = datafile or (indexfile[:-2] + ".d")
314 314 self.opener = opener
315 315 # When True, indexfile is opened with checkambig=True at writing, to
316 316 # avoid file stat ambiguity.
317 317 self._checkambig = checkambig
318 318 self._mmaplargeindex = mmaplargeindex
319 319 self._censorable = censorable
320 320 # 3-tuple of (node, rev, text) for a raw revision.
321 321 self._revisioncache = None
322 322 # Maps rev to chain base rev.
323 323 self._chainbasecache = util.lrucachedict(100)
324 324 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
325 325 self._chunkcache = (0, '')
326 326 # How much data to read and cache into the raw revlog data cache.
327 327 self._chunkcachesize = 65536
328 328 self._maxchainlen = None
329 329 self._deltabothparents = True
330 330 self.index = []
331 331 # Mapping of partial identifiers to full nodes.
332 332 self._pcache = {}
333 333 # Mapping of revision integer to full node.
334 334 self._nodecache = {nullid: nullrev}
335 335 self._nodepos = None
336 336 self._compengine = 'zlib'
337 337 self._compengineopts = {}
338 338 self._maxdeltachainspan = -1
339 339 self._withsparseread = False
340 340 self._sparserevlog = False
341 341 self._srdensitythreshold = 0.50
342 342 self._srmingapsize = 262144
343 343
344 344 # Make copy of flag processors so each revlog instance can support
345 345 # custom flags.
346 346 self._flagprocessors = dict(flagutil.flagprocessors)
347 347
348 348 # 2-tuple of file handles being used for active writing.
349 349 self._writinghandles = None
350 350
351 351 self._loadindex()
352 352
353 353 def _loadindex(self):
354 354 mmapindexthreshold = None
355 355 opts = getattr(self.opener, 'options', {}) or {}
356 356
357 357 if 'revlogv2' in opts:
358 358 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
359 359 elif 'revlogv1' in opts:
360 360 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
361 361 if 'generaldelta' in opts:
362 362 newversionflags |= FLAG_GENERALDELTA
363 363 elif getattr(self.opener, 'options', None) is not None:
364 364 # If options provided but no 'revlog*' found, the repository
365 365 # would have no 'requires' file in it, which means we have to
366 366 # stick to the old format.
367 367 newversionflags = REVLOGV0
368 368 else:
369 369 newversionflags = REVLOG_DEFAULT_VERSION
370 370
371 371 if 'chunkcachesize' in opts:
372 372 self._chunkcachesize = opts['chunkcachesize']
373 373 if 'maxchainlen' in opts:
374 374 self._maxchainlen = opts['maxchainlen']
375 375 if 'deltabothparents' in opts:
376 376 self._deltabothparents = opts['deltabothparents']
377 377 self._lazydelta = bool(opts.get('lazydelta', True))
378 378 self._lazydeltabase = False
379 379 if self._lazydelta:
380 380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 381 if 'compengine' in opts:
382 382 self._compengine = opts['compengine']
383 383 if 'zlib.level' in opts:
384 384 self._compengineopts['zlib.level'] = opts['zlib.level']
385 385 if 'zstd.level' in opts:
386 386 self._compengineopts['zstd.level'] = opts['zstd.level']
387 387 if 'maxdeltachainspan' in opts:
388 388 self._maxdeltachainspan = opts['maxdeltachainspan']
389 389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 390 mmapindexthreshold = opts['mmapindexthreshold']
391 391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
392 392 withsparseread = bool(opts.get('with-sparse-read', False))
393 393 # sparse-revlog forces sparse-read
394 394 self._withsparseread = self._sparserevlog or withsparseread
395 395 if 'sparse-read-density-threshold' in opts:
396 396 self._srdensitythreshold = opts['sparse-read-density-threshold']
397 397 if 'sparse-read-min-gap-size' in opts:
398 398 self._srmingapsize = opts['sparse-read-min-gap-size']
399 399 if opts.get('enableellipsis'):
400 400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
401 401
402 402 # revlog v0 doesn't have flag processors
403 403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
404 404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
405 405
406 406 if self._chunkcachesize <= 0:
407 407 raise error.RevlogError(_('revlog chunk cache size %r is not '
408 408 'greater than 0') % self._chunkcachesize)
409 409 elif self._chunkcachesize & (self._chunkcachesize - 1):
410 410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
411 411 'power of 2') % self._chunkcachesize)
412 412
413 413 indexdata = ''
414 414 self._initempty = True
415 415 try:
416 416 with self._indexfp() as f:
417 417 if (mmapindexthreshold is not None and
418 418 self.opener.fstat(f).st_size >= mmapindexthreshold):
419 419 # TODO: should .close() to release resources without
420 420 # relying on Python GC
421 421 indexdata = util.buffer(util.mmapread(f))
422 422 else:
423 423 indexdata = f.read()
424 424 if len(indexdata) > 0:
425 425 versionflags = versionformat_unpack(indexdata[:4])[0]
426 426 self._initempty = False
427 427 else:
428 428 versionflags = newversionflags
429 429 except IOError as inst:
430 430 if inst.errno != errno.ENOENT:
431 431 raise
432 432
433 433 versionflags = newversionflags
434 434
435 435 self.version = versionflags
436 436
437 437 flags = versionflags & ~0xFFFF
438 438 fmt = versionflags & 0xFFFF
439 439
440 440 if fmt == REVLOGV0:
441 441 if flags:
442 442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
443 443 'revlog %s') %
444 444 (flags >> 16, fmt, self.indexfile))
445 445
446 446 self._inline = False
447 447 self._generaldelta = False
448 448
449 449 elif fmt == REVLOGV1:
450 450 if flags & ~REVLOGV1_FLAGS:
451 451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 452 'revlog %s') %
453 453 (flags >> 16, fmt, self.indexfile))
454 454
455 455 self._inline = versionflags & FLAG_INLINE_DATA
456 456 self._generaldelta = versionflags & FLAG_GENERALDELTA
457 457
458 458 elif fmt == REVLOGV2:
459 459 if flags & ~REVLOGV2_FLAGS:
460 460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 461 'revlog %s') %
462 462 (flags >> 16, fmt, self.indexfile))
463 463
464 464 self._inline = versionflags & FLAG_INLINE_DATA
465 465 # generaldelta implied by version 2 revlogs.
466 466 self._generaldelta = True
467 467
468 468 else:
469 469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
470 470 (fmt, self.indexfile))
471 471 # sparse-revlog can't be on without general-delta (issue6056)
472 472 if not self._generaldelta:
473 473 self._sparserevlog = False
474 474
475 475 self._storedeltachains = True
476 476
477 477 self._io = revlogio()
478 478 if self.version == REVLOGV0:
479 479 self._io = revlogoldio()
480 480 try:
481 481 d = self._io.parseindex(indexdata, self._inline)
482 482 except (ValueError, IndexError):
483 483 raise error.RevlogError(_("index %s is corrupted") %
484 484 self.indexfile)
485 485 self.index, nodemap, self._chunkcache = d
486 486 if nodemap is not None:
487 487 self.nodemap = self._nodecache = nodemap
488 488 if not self._chunkcache:
489 489 self._chunkclear()
490 490 # revnum -> (chain-length, sum-delta-length)
491 491 self._chaininfocache = {}
492 492 # revlog header -> revlog compressor
493 493 self._decompressors = {}
494 494
495 495 @util.propertycache
496 496 def _compressor(self):
497 497 engine = util.compengines[self._compengine]
498 498 return engine.revlogcompressor(self._compengineopts)
499 499
500 500 def _indexfp(self, mode='r'):
501 501 """file object for the revlog's index file"""
502 502 args = {r'mode': mode}
503 503 if mode != 'r':
504 504 args[r'checkambig'] = self._checkambig
505 505 if mode == 'w':
506 506 args[r'atomictemp'] = True
507 507 return self.opener(self.indexfile, **args)
508 508
509 509 def _datafp(self, mode='r'):
510 510 """file object for the revlog's data file"""
511 511 return self.opener(self.datafile, mode=mode)
512 512
513 513 @contextlib.contextmanager
514 514 def _datareadfp(self, existingfp=None):
515 515 """file object suitable to read data"""
516 516 # Use explicit file handle, if given.
517 517 if existingfp is not None:
518 518 yield existingfp
519 519
520 520 # Use a file handle being actively used for writes, if available.
521 521 # There is some danger to doing this because reads will seek the
522 522 # file. However, _writeentry() performs a SEEK_END before all writes,
523 523 # so we should be safe.
524 524 elif self._writinghandles:
525 525 if self._inline:
526 526 yield self._writinghandles[0]
527 527 else:
528 528 yield self._writinghandles[1]
529 529
530 530 # Otherwise open a new file handle.
531 531 else:
532 532 if self._inline:
533 533 func = self._indexfp
534 534 else:
535 535 func = self._datafp
536 536 with func() as fp:
537 537 yield fp
538 538
539 539 def tip(self):
540 540 return self.node(len(self.index) - 1)
541 541 def __contains__(self, rev):
542 542 return 0 <= rev < len(self)
543 543 def __len__(self):
544 544 return len(self.index)
545 545 def __iter__(self):
546 546 return iter(pycompat.xrange(len(self)))
547 547 def revs(self, start=0, stop=None):
548 548 """iterate over all rev in this revlog (from start to stop)"""
549 549 return storageutil.iterrevs(len(self), start=start, stop=stop)
550 550
551 551 @util.propertycache
552 552 def nodemap(self):
553 553 if self.index:
554 554 # populate mapping down to the initial node
555 555 node0 = self.index[0][7] # get around changelog filtering
556 556 self.rev(node0)
557 557 return self._nodecache
558 558
559 559 def hasnode(self, node):
560 560 try:
561 561 self.rev(node)
562 562 return True
563 563 except KeyError:
564 564 return False
565 565
566 566 def candelta(self, baserev, rev):
567 567 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 568 # Disable delta if either rev requires a content-changing flag
569 569 # processor (ex. LFS). This is because such flag processor can alter
570 570 # the rawtext content that the delta will be based on, and two clients
571 571 # could have a same revlog node with different flags (i.e. different
572 572 # rawtext contents) and the delta could be incompatible.
573 573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 575 return False
576 576 return True
577 577
578 578 def clearcaches(self):
579 579 self._revisioncache = None
580 580 self._chainbasecache.clear()
581 581 self._chunkcache = (0, '')
582 582 self._pcache = {}
583 583
584 584 try:
585 585 # If we are using the native C version, you are in a fun case
586 586 # where self.index, self.nodemap and self._nodecaches is the same
587 587 # object.
588 588 self._nodecache.clearcaches()
589 589 except AttributeError:
590 590 self._nodecache = {nullid: nullrev}
591 591 self._nodepos = None
592 592
593 593 def rev(self, node):
594 594 try:
595 595 return self._nodecache[node]
596 596 except TypeError:
597 597 raise
598 598 except error.RevlogError:
599 599 # parsers.c radix tree lookup failed
600 600 if node == wdirid or node in wdirfilenodeids:
601 601 raise error.WdirUnsupported
602 602 raise error.LookupError(node, self.indexfile, _('no node'))
603 603 except KeyError:
604 604 # pure python cache lookup failed
605 605 n = self._nodecache
606 606 i = self.index
607 607 p = self._nodepos
608 608 if p is None:
609 609 p = len(i) - 1
610 610 else:
611 611 assert p < len(i)
612 612 for r in pycompat.xrange(p, -1, -1):
613 613 v = i[r][7]
614 614 n[v] = r
615 615 if v == node:
616 616 self._nodepos = r - 1
617 617 return r
618 618 if node == wdirid or node in wdirfilenodeids:
619 619 raise error.WdirUnsupported
620 620 raise error.LookupError(node, self.indexfile, _('no node'))
621 621
622 622 # Accessors for index entries.
623 623
624 624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 625 # are flags.
626 626 def start(self, rev):
627 627 return int(self.index[rev][0] >> 16)
628 628
629 629 def flags(self, rev):
630 630 return self.index[rev][0] & 0xFFFF
631 631
632 632 def length(self, rev):
633 633 return self.index[rev][1]
634 634
635 635 def rawsize(self, rev):
636 636 """return the length of the uncompressed text for a given revision"""
637 637 l = self.index[rev][2]
638 638 if l >= 0:
639 639 return l
640 640
641 641 t = self.rawdata(rev)
642 642 return len(t)
643 643
644 644 def size(self, rev):
645 645 """length of non-raw text (processed by a "read" flag processor)"""
646 646 # fast path: if no "read" flag processor could change the content,
647 647 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 648 flags = self.flags(rev)
649 649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 650 return self.rawsize(rev)
651 651
652 652 return len(self.revision(rev, raw=False))
653 653
654 654 def chainbase(self, rev):
655 655 base = self._chainbasecache.get(rev)
656 656 if base is not None:
657 657 return base
658 658
659 659 index = self.index
660 660 iterrev = rev
661 661 base = index[iterrev][3]
662 662 while base != iterrev:
663 663 iterrev = base
664 664 base = index[iterrev][3]
665 665
666 666 self._chainbasecache[rev] = base
667 667 return base
668 668
669 669 def linkrev(self, rev):
670 670 return self.index[rev][4]
671 671
672 672 def parentrevs(self, rev):
673 673 try:
674 674 entry = self.index[rev]
675 675 except IndexError:
676 676 if rev == wdirrev:
677 677 raise error.WdirUnsupported
678 678 raise
679 679
680 680 return entry[5], entry[6]
681 681
682 682 # fast parentrevs(rev) where rev isn't filtered
683 683 _uncheckedparentrevs = parentrevs
684 684
685 685 def node(self, rev):
686 686 try:
687 687 return self.index[rev][7]
688 688 except IndexError:
689 689 if rev == wdirrev:
690 690 raise error.WdirUnsupported
691 691 raise
692 692
693 693 # Derived from index values.
694 694
695 695 def end(self, rev):
696 696 return self.start(rev) + self.length(rev)
697 697
698 698 def parents(self, node):
699 699 i = self.index
700 700 d = i[self.rev(node)]
701 701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
702 702
703 703 def chainlen(self, rev):
704 704 return self._chaininfo(rev)[0]
705 705
706 706 def _chaininfo(self, rev):
707 707 chaininfocache = self._chaininfocache
708 708 if rev in chaininfocache:
709 709 return chaininfocache[rev]
710 710 index = self.index
711 711 generaldelta = self._generaldelta
712 712 iterrev = rev
713 713 e = index[iterrev]
714 714 clen = 0
715 715 compresseddeltalen = 0
716 716 while iterrev != e[3]:
717 717 clen += 1
718 718 compresseddeltalen += e[1]
719 719 if generaldelta:
720 720 iterrev = e[3]
721 721 else:
722 722 iterrev -= 1
723 723 if iterrev in chaininfocache:
724 724 t = chaininfocache[iterrev]
725 725 clen += t[0]
726 726 compresseddeltalen += t[1]
727 727 break
728 728 e = index[iterrev]
729 729 else:
730 730 # Add text length of base since decompressing that also takes
731 731 # work. For cache hits the length is already included.
732 732 compresseddeltalen += e[1]
733 733 r = (clen, compresseddeltalen)
734 734 chaininfocache[rev] = r
735 735 return r
736 736
737 737 def _deltachain(self, rev, stoprev=None):
738 738 """Obtain the delta chain for a revision.
739 739
740 740 ``stoprev`` specifies a revision to stop at. If not specified, we
741 741 stop at the base of the chain.
742 742
743 743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
744 744 revs in ascending order and ``stopped`` is a bool indicating whether
745 745 ``stoprev`` was hit.
746 746 """
747 747 # Try C implementation.
748 748 try:
749 749 return self.index.deltachain(rev, stoprev, self._generaldelta)
750 750 except AttributeError:
751 751 pass
752 752
753 753 chain = []
754 754
755 755 # Alias to prevent attribute lookup in tight loop.
756 756 index = self.index
757 757 generaldelta = self._generaldelta
758 758
759 759 iterrev = rev
760 760 e = index[iterrev]
761 761 while iterrev != e[3] and iterrev != stoprev:
762 762 chain.append(iterrev)
763 763 if generaldelta:
764 764 iterrev = e[3]
765 765 else:
766 766 iterrev -= 1
767 767 e = index[iterrev]
768 768
769 769 if iterrev == stoprev:
770 770 stopped = True
771 771 else:
772 772 chain.append(iterrev)
773 773 stopped = False
774 774
775 775 chain.reverse()
776 776 return chain, stopped
777 777
778 778 def ancestors(self, revs, stoprev=0, inclusive=False):
779 779 """Generate the ancestors of 'revs' in reverse revision order.
780 780 Does not generate revs lower than stoprev.
781 781
782 782 See the documentation for ancestor.lazyancestors for more details."""
783 783
784 784 # first, make sure start revisions aren't filtered
785 785 revs = list(revs)
786 786 checkrev = self.node
787 787 for r in revs:
788 788 checkrev(r)
789 789 # and we're sure ancestors aren't filtered as well
790 790
791 791 if rustancestor is not None:
792 792 lazyancestors = rustancestor.LazyAncestors
793 793 arg = self.index
794 794 elif util.safehasattr(parsers, 'rustlazyancestors'):
795 795 lazyancestors = ancestor.rustlazyancestors
796 796 arg = self.index
797 797 else:
798 798 lazyancestors = ancestor.lazyancestors
799 799 arg = self._uncheckedparentrevs
800 800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
801 801
802 802 def descendants(self, revs):
803 803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
804 804
805 805 def findcommonmissing(self, common=None, heads=None):
806 806 """Return a tuple of the ancestors of common and the ancestors of heads
807 807 that are not ancestors of common. In revset terminology, we return the
808 808 tuple:
809 809
810 810 ::common, (::heads) - (::common)
811 811
812 812 The list is sorted by revision number, meaning it is
813 813 topologically sorted.
814 814
815 815 'heads' and 'common' are both lists of node IDs. If heads is
816 816 not supplied, uses all of the revlog's heads. If common is not
817 817 supplied, uses nullid."""
818 818 if common is None:
819 819 common = [nullid]
820 820 if heads is None:
821 821 heads = self.heads()
822 822
823 823 common = [self.rev(n) for n in common]
824 824 heads = [self.rev(n) for n in heads]
825 825
826 826 # we want the ancestors, but inclusive
827 827 class lazyset(object):
828 828 def __init__(self, lazyvalues):
829 829 self.addedvalues = set()
830 830 self.lazyvalues = lazyvalues
831 831
832 832 def __contains__(self, value):
833 833 return value in self.addedvalues or value in self.lazyvalues
834 834
835 835 def __iter__(self):
836 836 added = self.addedvalues
837 837 for r in added:
838 838 yield r
839 839 for r in self.lazyvalues:
840 840 if not r in added:
841 841 yield r
842 842
843 843 def add(self, value):
844 844 self.addedvalues.add(value)
845 845
846 846 def update(self, values):
847 847 self.addedvalues.update(values)
848 848
849 849 has = lazyset(self.ancestors(common))
850 850 has.add(nullrev)
851 851 has.update(common)
852 852
853 853 # take all ancestors from heads that aren't in has
854 854 missing = set()
855 855 visit = collections.deque(r for r in heads if r not in has)
856 856 while visit:
857 857 r = visit.popleft()
858 858 if r in missing:
859 859 continue
860 860 else:
861 861 missing.add(r)
862 862 for p in self.parentrevs(r):
863 863 if p not in has:
864 864 visit.append(p)
865 865 missing = list(missing)
866 866 missing.sort()
867 867 return has, [self.node(miss) for miss in missing]
868 868
869 869 def incrementalmissingrevs(self, common=None):
870 870 """Return an object that can be used to incrementally compute the
871 871 revision numbers of the ancestors of arbitrary sets that are not
872 872 ancestors of common. This is an ancestor.incrementalmissingancestors
873 873 object.
874 874
875 875 'common' is a list of revision numbers. If common is not supplied, uses
876 876 nullrev.
877 877 """
878 878 if common is None:
879 879 common = [nullrev]
880 880
881 881 if rustancestor is not None:
882 882 return rustancestor.MissingAncestors(self.index, common)
883 883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
884 884
885 885 def findmissingrevs(self, common=None, heads=None):
886 886 """Return the revision numbers of the ancestors of heads that
887 887 are not ancestors of common.
888 888
889 889 More specifically, return a list of revision numbers corresponding to
890 890 nodes N such that every N satisfies the following constraints:
891 891
892 892 1. N is an ancestor of some node in 'heads'
893 893 2. N is not an ancestor of any node in 'common'
894 894
895 895 The list is sorted by revision number, meaning it is
896 896 topologically sorted.
897 897
898 898 'heads' and 'common' are both lists of revision numbers. If heads is
899 899 not supplied, uses all of the revlog's heads. If common is not
900 900 supplied, uses nullid."""
901 901 if common is None:
902 902 common = [nullrev]
903 903 if heads is None:
904 904 heads = self.headrevs()
905 905
906 906 inc = self.incrementalmissingrevs(common=common)
907 907 return inc.missingancestors(heads)
908 908
909 909 def findmissing(self, common=None, heads=None):
910 910 """Return the ancestors of heads that are not ancestors of common.
911 911
912 912 More specifically, return a list of nodes N such that every N
913 913 satisfies the following constraints:
914 914
915 915 1. N is an ancestor of some node in 'heads'
916 916 2. N is not an ancestor of any node in 'common'
917 917
918 918 The list is sorted by revision number, meaning it is
919 919 topologically sorted.
920 920
921 921 'heads' and 'common' are both lists of node IDs. If heads is
922 922 not supplied, uses all of the revlog's heads. If common is not
923 923 supplied, uses nullid."""
924 924 if common is None:
925 925 common = [nullid]
926 926 if heads is None:
927 927 heads = self.heads()
928 928
929 929 common = [self.rev(n) for n in common]
930 930 heads = [self.rev(n) for n in heads]
931 931
932 932 inc = self.incrementalmissingrevs(common=common)
933 933 return [self.node(r) for r in inc.missingancestors(heads)]
934 934
935 935 def nodesbetween(self, roots=None, heads=None):
936 936 """Return a topological path from 'roots' to 'heads'.
937 937
938 938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
939 939 topologically sorted list of all nodes N that satisfy both of
940 940 these constraints:
941 941
942 942 1. N is a descendant of some node in 'roots'
943 943 2. N is an ancestor of some node in 'heads'
944 944
945 945 Every node is considered to be both a descendant and an ancestor
946 946 of itself, so every reachable node in 'roots' and 'heads' will be
947 947 included in 'nodes'.
948 948
949 949 'outroots' is the list of reachable nodes in 'roots', i.e., the
950 950 subset of 'roots' that is returned in 'nodes'. Likewise,
951 951 'outheads' is the subset of 'heads' that is also in 'nodes'.
952 952
953 953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
954 954 unspecified, uses nullid as the only root. If 'heads' is
955 955 unspecified, uses list of all of the revlog's heads."""
956 956 nonodes = ([], [], [])
957 957 if roots is not None:
958 958 roots = list(roots)
959 959 if not roots:
960 960 return nonodes
961 961 lowestrev = min([self.rev(n) for n in roots])
962 962 else:
963 963 roots = [nullid] # Everybody's a descendant of nullid
964 964 lowestrev = nullrev
965 965 if (lowestrev == nullrev) and (heads is None):
966 966 # We want _all_ the nodes!
967 967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
968 968 if heads is None:
969 969 # All nodes are ancestors, so the latest ancestor is the last
970 970 # node.
971 971 highestrev = len(self) - 1
972 972 # Set ancestors to None to signal that every node is an ancestor.
973 973 ancestors = None
974 974 # Set heads to an empty dictionary for later discovery of heads
975 975 heads = {}
976 976 else:
977 977 heads = list(heads)
978 978 if not heads:
979 979 return nonodes
980 980 ancestors = set()
981 981 # Turn heads into a dictionary so we can remove 'fake' heads.
982 982 # Also, later we will be using it to filter out the heads we can't
983 983 # find from roots.
984 984 heads = dict.fromkeys(heads, False)
985 985 # Start at the top and keep marking parents until we're done.
986 986 nodestotag = set(heads)
987 987 # Remember where the top was so we can use it as a limit later.
988 988 highestrev = max([self.rev(n) for n in nodestotag])
989 989 while nodestotag:
990 990 # grab a node to tag
991 991 n = nodestotag.pop()
992 992 # Never tag nullid
993 993 if n == nullid:
994 994 continue
995 995 # A node's revision number represents its place in a
996 996 # topologically sorted list of nodes.
997 997 r = self.rev(n)
998 998 if r >= lowestrev:
999 999 if n not in ancestors:
1000 1000 # If we are possibly a descendant of one of the roots
1001 1001 # and we haven't already been marked as an ancestor
1002 1002 ancestors.add(n) # Mark as ancestor
1003 1003 # Add non-nullid parents to list of nodes to tag.
1004 1004 nodestotag.update([p for p in self.parents(n) if
1005 1005 p != nullid])
1006 1006 elif n in heads: # We've seen it before, is it a fake head?
1007 1007 # So it is, real heads should not be the ancestors of
1008 1008 # any other heads.
1009 1009 heads.pop(n)
1010 1010 if not ancestors:
1011 1011 return nonodes
1012 1012 # Now that we have our set of ancestors, we want to remove any
1013 1013 # roots that are not ancestors.
1014 1014
1015 1015 # If one of the roots was nullid, everything is included anyway.
1016 1016 if lowestrev > nullrev:
1017 1017 # But, since we weren't, let's recompute the lowest rev to not
1018 1018 # include roots that aren't ancestors.
1019 1019
1020 1020 # Filter out roots that aren't ancestors of heads
1021 1021 roots = [root for root in roots if root in ancestors]
1022 1022 # Recompute the lowest revision
1023 1023 if roots:
1024 1024 lowestrev = min([self.rev(root) for root in roots])
1025 1025 else:
1026 1026 # No more roots? Return empty list
1027 1027 return nonodes
1028 1028 else:
1029 1029 # We are descending from nullid, and don't need to care about
1030 1030 # any other roots.
1031 1031 lowestrev = nullrev
1032 1032 roots = [nullid]
1033 1033 # Transform our roots list into a set.
1034 1034 descendants = set(roots)
1035 1035 # Also, keep the original roots so we can filter out roots that aren't
1036 1036 # 'real' roots (i.e. are descended from other roots).
1037 1037 roots = descendants.copy()
1038 1038 # Our topologically sorted list of output nodes.
1039 1039 orderedout = []
1040 1040 # Don't start at nullid since we don't want nullid in our output list,
1041 1041 # and if nullid shows up in descendants, empty parents will look like
1042 1042 # they're descendants.
1043 1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1044 1044 n = self.node(r)
1045 1045 isdescendant = False
1046 1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1047 1047 isdescendant = True
1048 1048 elif n in descendants:
1049 1049 # n is already a descendant
1050 1050 isdescendant = True
1051 1051 # This check only needs to be done here because all the roots
1052 1052 # will start being marked is descendants before the loop.
1053 1053 if n in roots:
1054 1054 # If n was a root, check if it's a 'real' root.
1055 1055 p = tuple(self.parents(n))
1056 1056 # If any of its parents are descendants, it's not a root.
1057 1057 if (p[0] in descendants) or (p[1] in descendants):
1058 1058 roots.remove(n)
1059 1059 else:
1060 1060 p = tuple(self.parents(n))
1061 1061 # A node is a descendant if either of its parents are
1062 1062 # descendants. (We seeded the dependents list with the roots
1063 1063 # up there, remember?)
1064 1064 if (p[0] in descendants) or (p[1] in descendants):
1065 1065 descendants.add(n)
1066 1066 isdescendant = True
1067 1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1068 1068 # Only include nodes that are both descendants and ancestors.
1069 1069 orderedout.append(n)
1070 1070 if (ancestors is not None) and (n in heads):
1071 1071 # We're trying to figure out which heads are reachable
1072 1072 # from roots.
1073 1073 # Mark this head as having been reached
1074 1074 heads[n] = True
1075 1075 elif ancestors is None:
1076 1076 # Otherwise, we're trying to discover the heads.
1077 1077 # Assume this is a head because if it isn't, the next step
1078 1078 # will eventually remove it.
1079 1079 heads[n] = True
1080 1080 # But, obviously its parents aren't.
1081 1081 for p in self.parents(n):
1082 1082 heads.pop(p, None)
1083 1083 heads = [head for head, flag in heads.iteritems() if flag]
1084 1084 roots = list(roots)
1085 1085 assert orderedout
1086 1086 assert roots
1087 1087 assert heads
1088 1088 return (orderedout, roots, heads)
1089 1089
1090 1090 def headrevs(self, revs=None):
1091 1091 if revs is None:
1092 1092 try:
1093 1093 return self.index.headrevs()
1094 1094 except AttributeError:
1095 1095 return self._headrevs()
1096 1096 if rustdagop is not None:
1097 1097 return rustdagop.headrevs(self.index, revs)
1098 1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1099 1099
1100 1100 def computephases(self, roots):
1101 1101 return self.index.computephasesmapsets(roots)
1102 1102
1103 1103 def _headrevs(self):
1104 1104 count = len(self)
1105 1105 if not count:
1106 1106 return [nullrev]
1107 1107 # we won't iter over filtered rev so nobody is a head at start
1108 1108 ishead = [0] * (count + 1)
1109 1109 index = self.index
1110 1110 for r in self:
1111 1111 ishead[r] = 1 # I may be an head
1112 1112 e = index[r]
1113 1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1114 1114 return [r for r, val in enumerate(ishead) if val]
1115 1115
1116 1116 def heads(self, start=None, stop=None):
1117 1117 """return the list of all nodes that have no children
1118 1118
1119 1119 if start is specified, only heads that are descendants of
1120 1120 start will be returned
1121 1121 if stop is specified, it will consider all the revs from stop
1122 1122 as if they had no children
1123 1123 """
1124 1124 if start is None and stop is None:
1125 1125 if not len(self):
1126 1126 return [nullid]
1127 1127 return [self.node(r) for r in self.headrevs()]
1128 1128
1129 1129 if start is None:
1130 1130 start = nullrev
1131 1131 else:
1132 1132 start = self.rev(start)
1133 1133
1134 1134 stoprevs = set(self.rev(n) for n in stop or [])
1135 1135
1136 1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1137 1137 stoprevs=stoprevs)
1138 1138
1139 1139 return [self.node(rev) for rev in revs]
1140 1140
1141 1141 def children(self, node):
1142 1142 """find the children of a given node"""
1143 1143 c = []
1144 1144 p = self.rev(node)
1145 1145 for r in self.revs(start=p + 1):
1146 1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1147 1147 if prevs:
1148 1148 for pr in prevs:
1149 1149 if pr == p:
1150 1150 c.append(self.node(r))
1151 1151 elif p == nullrev:
1152 1152 c.append(self.node(r))
1153 1153 return c
1154 1154
1155 1155 def commonancestorsheads(self, a, b):
1156 1156 """calculate all the heads of the common ancestors of nodes a and b"""
1157 1157 a, b = self.rev(a), self.rev(b)
1158 1158 ancs = self._commonancestorsheads(a, b)
1159 1159 return pycompat.maplist(self.node, ancs)
1160 1160
1161 1161 def _commonancestorsheads(self, *revs):
1162 1162 """calculate all the heads of the common ancestors of revs"""
1163 1163 try:
1164 1164 ancs = self.index.commonancestorsheads(*revs)
1165 1165 except (AttributeError, OverflowError): # C implementation failed
1166 1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1167 1167 return ancs
1168 1168
1169 1169 def isancestor(self, a, b):
1170 1170 """return True if node a is an ancestor of node b
1171 1171
1172 1172 A revision is considered an ancestor of itself."""
1173 1173 a, b = self.rev(a), self.rev(b)
1174 1174 return self.isancestorrev(a, b)
1175 1175
1176 1176 def isancestorrev(self, a, b):
1177 1177 """return True if revision a is an ancestor of revision b
1178 1178
1179 1179 A revision is considered an ancestor of itself.
1180 1180
1181 1181 The implementation of this is trivial but the use of
1182 1182 reachableroots is not."""
1183 1183 if a == nullrev:
1184 1184 return True
1185 1185 elif a == b:
1186 1186 return True
1187 1187 elif a > b:
1188 1188 return False
1189 1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1190 1190
1191 1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1192 1192 """return (heads(::<roots> and <roots>::<heads>))
1193 1193
1194 1194 If includepath is True, return (<roots>::<heads>)."""
1195 1195 try:
1196 1196 return self.index.reachableroots2(minroot, heads, roots,
1197 1197 includepath)
1198 1198 except AttributeError:
1199 1199 return dagop._reachablerootspure(self.parentrevs,
1200 1200 minroot, roots, heads, includepath)
1201 1201
1202 1202 def ancestor(self, a, b):
1203 1203 """calculate the "best" common ancestor of nodes a and b"""
1204 1204
1205 1205 a, b = self.rev(a), self.rev(b)
1206 1206 try:
1207 1207 ancs = self.index.ancestors(a, b)
1208 1208 except (AttributeError, OverflowError):
1209 1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 1210 if ancs:
1211 1211 # choose a consistent winner when there's a tie
1212 1212 return min(map(self.node, ancs))
1213 1213 return nullid
1214 1214
1215 1215 def _match(self, id):
1216 1216 if isinstance(id, int):
1217 1217 # rev
1218 1218 return self.node(id)
1219 1219 if len(id) == 20:
1220 1220 # possibly a binary node
1221 1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 1222 try:
1223 1223 node = id
1224 1224 self.rev(node) # quick search the index
1225 1225 return node
1226 1226 except error.LookupError:
1227 1227 pass # may be partial hex id
1228 1228 try:
1229 1229 # str(rev)
1230 1230 rev = int(id)
1231 1231 if "%d" % rev != id:
1232 1232 raise ValueError
1233 1233 if rev < 0:
1234 1234 rev = len(self) + rev
1235 1235 if rev < 0 or rev >= len(self):
1236 1236 raise ValueError
1237 1237 return self.node(rev)
1238 1238 except (ValueError, OverflowError):
1239 1239 pass
1240 1240 if len(id) == 40:
1241 1241 try:
1242 1242 # a full hex nodeid?
1243 1243 node = bin(id)
1244 1244 self.rev(node)
1245 1245 return node
1246 1246 except (TypeError, error.LookupError):
1247 1247 pass
1248 1248
1249 1249 def _partialmatch(self, id):
1250 1250 # we don't care wdirfilenodeids as they should be always full hash
1251 1251 maybewdir = wdirhex.startswith(id)
1252 1252 try:
1253 1253 partial = self.index.partialmatch(id)
1254 1254 if partial and self.hasnode(partial):
1255 1255 if maybewdir:
1256 1256 # single 'ff...' match in radix tree, ambiguous with wdir
1257 1257 raise error.RevlogError
1258 1258 return partial
1259 1259 if maybewdir:
1260 1260 # no 'ff...' match in radix tree, wdir identified
1261 1261 raise error.WdirUnsupported
1262 1262 return None
1263 1263 except error.RevlogError:
1264 1264 # parsers.c radix tree lookup gave multiple matches
1265 1265 # fast path: for unfiltered changelog, radix tree is accurate
1266 1266 if not getattr(self, 'filteredrevs', None):
1267 1267 raise error.AmbiguousPrefixLookupError(
1268 1268 id, self.indexfile, _('ambiguous identifier'))
1269 1269 # fall through to slow path that filters hidden revisions
1270 1270 except (AttributeError, ValueError):
1271 1271 # we are pure python, or key was too short to search radix tree
1272 1272 pass
1273 1273
1274 1274 if id in self._pcache:
1275 1275 return self._pcache[id]
1276 1276
1277 1277 if len(id) <= 40:
1278 1278 try:
1279 1279 # hex(node)[:...]
1280 1280 l = len(id) // 2 # grab an even number of digits
1281 1281 prefix = bin(id[:l * 2])
1282 1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 1283 nl = [n for n in nl if hex(n).startswith(id) and
1284 1284 self.hasnode(n)]
1285 1285 if nullhex.startswith(id):
1286 1286 nl.append(nullid)
1287 1287 if len(nl) > 0:
1288 1288 if len(nl) == 1 and not maybewdir:
1289 1289 self._pcache[id] = nl[0]
1290 1290 return nl[0]
1291 1291 raise error.AmbiguousPrefixLookupError(
1292 1292 id, self.indexfile, _('ambiguous identifier'))
1293 1293 if maybewdir:
1294 1294 raise error.WdirUnsupported
1295 1295 return None
1296 1296 except TypeError:
1297 1297 pass
1298 1298
1299 1299 def lookup(self, id):
1300 1300 """locate a node based on:
1301 1301 - revision number or str(revision number)
1302 1302 - nodeid or subset of hex nodeid
1303 1303 """
1304 1304 n = self._match(id)
1305 1305 if n is not None:
1306 1306 return n
1307 1307 n = self._partialmatch(id)
1308 1308 if n:
1309 1309 return n
1310 1310
1311 1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1312 1312
1313 1313 def shortest(self, node, minlength=1):
1314 1314 """Find the shortest unambiguous prefix that matches node."""
1315 1315 def isvalid(prefix):
1316 1316 try:
1317 1317 matchednode = self._partialmatch(prefix)
1318 1318 except error.AmbiguousPrefixLookupError:
1319 1319 return False
1320 1320 except error.WdirUnsupported:
1321 1321 # single 'ff...' match
1322 1322 return True
1323 1323 if matchednode is None:
1324 1324 raise error.LookupError(node, self.indexfile, _('no node'))
1325 1325 return True
1326 1326
1327 1327 def maybewdir(prefix):
1328 1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1329 1329
1330 1330 hexnode = hex(node)
1331 1331
1332 1332 def disambiguate(hexnode, minlength):
1333 1333 """Disambiguate against wdirid."""
1334 1334 for length in range(minlength, 41):
1335 1335 prefix = hexnode[:length]
1336 1336 if not maybewdir(prefix):
1337 1337 return prefix
1338 1338
1339 1339 if not getattr(self, 'filteredrevs', None):
1340 1340 try:
1341 1341 length = max(self.index.shortest(node), minlength)
1342 1342 return disambiguate(hexnode, length)
1343 1343 except error.RevlogError:
1344 1344 if node != wdirid:
1345 1345 raise error.LookupError(node, self.indexfile, _('no node'))
1346 1346 except AttributeError:
1347 1347 # Fall through to pure code
1348 1348 pass
1349 1349
1350 1350 if node == wdirid:
1351 1351 for length in range(minlength, 41):
1352 1352 prefix = hexnode[:length]
1353 1353 if isvalid(prefix):
1354 1354 return prefix
1355 1355
1356 1356 for length in range(minlength, 41):
1357 1357 prefix = hexnode[:length]
1358 1358 if isvalid(prefix):
1359 1359 return disambiguate(hexnode, length)
1360 1360
1361 1361 def cmp(self, node, text):
1362 1362 """compare text with a given file revision
1363 1363
1364 1364 returns True if text is different than what is stored.
1365 1365 """
1366 1366 p1, p2 = self.parents(node)
1367 1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368 1368
1369 1369 def _cachesegment(self, offset, data):
1370 1370 """Add a segment to the revlog cache.
1371 1371
1372 1372 Accepts an absolute offset and the data that is at that location.
1373 1373 """
1374 1374 o, d = self._chunkcache
1375 1375 # try to add to existing cache
1376 1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 1377 self._chunkcache = o, d + data
1378 1378 else:
1379 1379 self._chunkcache = offset, data
1380 1380
1381 1381 def _readsegment(self, offset, length, df=None):
1382 1382 """Load a segment of raw data from the revlog.
1383 1383
1384 1384 Accepts an absolute offset, length to read, and an optional existing
1385 1385 file handle to read from.
1386 1386
1387 1387 If an existing file handle is passed, it will be seeked and the
1388 1388 original seek position will NOT be restored.
1389 1389
1390 1390 Returns a str or buffer of raw byte data.
1391 1391
1392 1392 Raises if the requested number of bytes could not be read.
1393 1393 """
1394 1394 # Cache data both forward and backward around the requested
1395 1395 # data, in a fixed size window. This helps speed up operations
1396 1396 # involving reading the revlog backwards.
1397 1397 cachesize = self._chunkcachesize
1398 1398 realoffset = offset & ~(cachesize - 1)
1399 1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 1400 - realoffset)
1401 1401 with self._datareadfp(df) as df:
1402 1402 df.seek(realoffset)
1403 1403 d = df.read(reallength)
1404 1404
1405 1405 self._cachesegment(realoffset, d)
1406 1406 if offset != realoffset or reallength != length:
1407 1407 startoffset = offset - realoffset
1408 1408 if len(d) - startoffset < length:
1409 1409 raise error.RevlogError(
1410 1410 _('partial read of revlog %s; expected %d bytes from '
1411 1411 'offset %d, got %d') %
1412 1412 (self.indexfile if self._inline else self.datafile,
1413 1413 length, realoffset, len(d) - startoffset))
1414 1414
1415 1415 return util.buffer(d, startoffset, length)
1416 1416
1417 1417 if len(d) < length:
1418 1418 raise error.RevlogError(
1419 1419 _('partial read of revlog %s; expected %d bytes from offset '
1420 1420 '%d, got %d') %
1421 1421 (self.indexfile if self._inline else self.datafile,
1422 1422 length, offset, len(d)))
1423 1423
1424 1424 return d
1425 1425
1426 1426 def _getsegment(self, offset, length, df=None):
1427 1427 """Obtain a segment of raw data from the revlog.
1428 1428
1429 1429 Accepts an absolute offset, length of bytes to obtain, and an
1430 1430 optional file handle to the already-opened revlog. If the file
1431 1431 handle is used, it's original seek position will not be preserved.
1432 1432
1433 1433 Requests for data may be returned from a cache.
1434 1434
1435 1435 Returns a str or a buffer instance of raw byte data.
1436 1436 """
1437 1437 o, d = self._chunkcache
1438 1438 l = len(d)
1439 1439
1440 1440 # is it in the cache?
1441 1441 cachestart = offset - o
1442 1442 cacheend = cachestart + length
1443 1443 if cachestart >= 0 and cacheend <= l:
1444 1444 if cachestart == 0 and cacheend == l:
1445 1445 return d # avoid a copy
1446 1446 return util.buffer(d, cachestart, cacheend - cachestart)
1447 1447
1448 1448 return self._readsegment(offset, length, df=df)
1449 1449
1450 1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 1451 """Obtain a segment of raw data corresponding to a range of revisions.
1452 1452
1453 1453 Accepts the start and end revisions and an optional already-open
1454 1454 file handle to be used for reading. If the file handle is read, its
1455 1455 seek position will not be preserved.
1456 1456
1457 1457 Requests for data may be satisfied by a cache.
1458 1458
1459 1459 Returns a 2-tuple of (offset, data) for the requested range of
1460 1460 revisions. Offset is the integer offset from the beginning of the
1461 1461 revlog and data is a str or buffer of the raw byte data.
1462 1462
1463 1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 1464 to determine where each revision's data begins and ends.
1465 1465 """
1466 1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 1467 # (functions are expensive).
1468 1468 index = self.index
1469 1469 istart = index[startrev]
1470 1470 start = int(istart[0] >> 16)
1471 1471 if startrev == endrev:
1472 1472 end = start + istart[1]
1473 1473 else:
1474 1474 iend = index[endrev]
1475 1475 end = int(iend[0] >> 16) + iend[1]
1476 1476
1477 1477 if self._inline:
1478 1478 start += (startrev + 1) * self._io.size
1479 1479 end += (endrev + 1) * self._io.size
1480 1480 length = end - start
1481 1481
1482 1482 return start, self._getsegment(start, length, df=df)
1483 1483
1484 1484 def _chunk(self, rev, df=None):
1485 1485 """Obtain a single decompressed chunk for a revision.
1486 1486
1487 1487 Accepts an integer revision and an optional already-open file handle
1488 1488 to be used for reading. If used, the seek position of the file will not
1489 1489 be preserved.
1490 1490
1491 1491 Returns a str holding uncompressed data for the requested revision.
1492 1492 """
1493 1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494 1494
1495 1495 def _chunks(self, revs, df=None, targetsize=None):
1496 1496 """Obtain decompressed chunks for the specified revisions.
1497 1497
1498 1498 Accepts an iterable of numeric revisions that are assumed to be in
1499 1499 ascending order. Also accepts an optional already-open file handle
1500 1500 to be used for reading. If used, the seek position of the file will
1501 1501 not be preserved.
1502 1502
1503 1503 This function is similar to calling ``self._chunk()`` multiple times,
1504 1504 but is faster.
1505 1505
1506 1506 Returns a list with decompressed data for each requested revision.
1507 1507 """
1508 1508 if not revs:
1509 1509 return []
1510 1510 start = self.start
1511 1511 length = self.length
1512 1512 inline = self._inline
1513 1513 iosize = self._io.size
1514 1514 buffer = util.buffer
1515 1515
1516 1516 l = []
1517 1517 ladd = l.append
1518 1518
1519 1519 if not self._withsparseread:
1520 1520 slicedchunks = (revs,)
1521 1521 else:
1522 1522 slicedchunks = deltautil.slicechunk(self, revs,
1523 1523 targetsize=targetsize)
1524 1524
1525 1525 for revschunk in slicedchunks:
1526 1526 firstrev = revschunk[0]
1527 1527 # Skip trailing revisions with empty diff
1528 1528 for lastrev in revschunk[::-1]:
1529 1529 if length(lastrev) != 0:
1530 1530 break
1531 1531
1532 1532 try:
1533 1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 1534 except OverflowError:
1535 1535 # issue4215 - we can't cache a run of chunks greater than
1536 1536 # 2G on Windows
1537 1537 return [self._chunk(rev, df=df) for rev in revschunk]
1538 1538
1539 1539 decomp = self.decompress
1540 1540 for rev in revschunk:
1541 1541 chunkstart = start(rev)
1542 1542 if inline:
1543 1543 chunkstart += (rev + 1) * iosize
1544 1544 chunklength = length(rev)
1545 1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546 1546
1547 1547 return l
1548 1548
1549 1549 def _chunkclear(self):
1550 1550 """Clear the raw chunk cache."""
1551 1551 self._chunkcache = (0, '')
1552 1552
1553 1553 def deltaparent(self, rev):
1554 1554 """return deltaparent of the given revision"""
1555 1555 base = self.index[rev][3]
1556 1556 if base == rev:
1557 1557 return nullrev
1558 1558 elif self._generaldelta:
1559 1559 return base
1560 1560 else:
1561 1561 return rev - 1
1562 1562
1563 1563 def issnapshot(self, rev):
1564 1564 """tells whether rev is a snapshot
1565 1565 """
1566 1566 if not self._sparserevlog:
1567 1567 return self.deltaparent(rev) == nullrev
1568 1568 elif util.safehasattr(self.index, 'issnapshot'):
1569 1569 # directly assign the method to cache the testing and access
1570 1570 self.issnapshot = self.index.issnapshot
1571 1571 return self.issnapshot(rev)
1572 1572 if rev == nullrev:
1573 1573 return True
1574 1574 entry = self.index[rev]
1575 1575 base = entry[3]
1576 1576 if base == rev:
1577 1577 return True
1578 1578 if base == nullrev:
1579 1579 return True
1580 1580 p1 = entry[5]
1581 1581 p2 = entry[6]
1582 1582 if base == p1 or base == p2:
1583 1583 return False
1584 1584 return self.issnapshot(base)
1585 1585
1586 1586 def snapshotdepth(self, rev):
1587 1587 """number of snapshot in the chain before this one"""
1588 1588 if not self.issnapshot(rev):
1589 1589 raise error.ProgrammingError('revision %d not a snapshot')
1590 1590 return len(self._deltachain(rev)[0]) - 1
1591 1591
1592 1592 def revdiff(self, rev1, rev2):
1593 1593 """return or calculate a delta between two revisions
1594 1594
1595 1595 The delta calculated is in binary form and is intended to be written to
1596 1596 revlog data directly. So this function needs raw revision data.
1597 1597 """
1598 1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 1599 return bytes(self._chunk(rev2))
1600 1600
1601 1601 return mdiff.textdiff(self.rawdata(rev1),
1602 1602 self.rawdata(rev2))
1603 1603
1604 1604 def revision(self, nodeorrev, _df=None, raw=False):
1605 1605 """return an uncompressed revision of a given node or revision
1606 1606 number.
1607 1607
1608 1608 _df - an existing file handle to read from. (internal-only)
1609 1609 raw - an optional argument specifying if the revision data is to be
1610 1610 treated as raw data when applying flag transforms. 'raw' should be set
1611 1611 to True when generating changegroups or in debug commands.
1612 1612 """
1613 1613 if raw:
1614 1614 msg = ('revlog.revision(..., raw=True) is deprecated, '
1615 1615 'use revlog.rawdata(...)')
1616 1616 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1617 1617 return self._revisiondata(nodeorrev, _df, raw=raw)
1618 1618
1619 1619 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1620 1620 # deal with <nodeorrev> argument type
1621 1621 if isinstance(nodeorrev, int):
1622 1622 rev = nodeorrev
1623 1623 node = self.node(rev)
1624 1624 else:
1625 1625 node = nodeorrev
1626 1626 rev = None
1627 1627
1628 1628 # fast path the special `nullid` rev
1629 1629 if node == nullid:
1630 1630 return ""
1631 1631
1632 1632 # The text as stored inside the revlog. Might be the revision or might
1633 1633 # need to be processed to retrieve the revision.
1634 1634 rawtext = None
1635 1635
1636 1636 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1637 1637
1638 1638 if raw and validated:
1639 1639 # if we don't want to process the raw text and that raw
1640 1640 # text is cached, we can exit early.
1641 1641 return rawtext
1642 1642 if rev is None:
1643 1643 rev = self.rev(node)
1644 1644 # the revlog's flag for this revision
1645 1645 # (usually alter its state or content)
1646 1646 flags = self.flags(rev)
1647 1647
1648 1648 if validated and flags == REVIDX_DEFAULT_FLAGS:
1649 1649 # no extra flags set, no flag processor runs, text = rawtext
1650 1650 return rawtext
1651 1651
1652 1652 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1653 1653 if validatehash:
1654 1654 self.checkhash(text, node, rev=rev)
1655 1655 if not validated:
1656 1656 self._revisioncache = (node, rev, rawtext)
1657 1657
1658 1658 return text
1659 1659
1660 1660 def _rawtext(self, node, rev, _df=None):
1661 1661 """return the possibly unvalidated rawtext for a revision
1662 1662
1663 1663 returns (rev, rawtext, validated)
1664 1664 """
1665 1665
1666 1666 # revision in the cache (could be useful to apply delta)
1667 1667 cachedrev = None
1668 1668 # An intermediate text to apply deltas to
1669 1669 basetext = None
1670 1670
1671 1671 # Check if we have the entry in cache
1672 1672 # The cache entry looks like (node, rev, rawtext)
1673 1673 if self._revisioncache:
1674 1674 if self._revisioncache[0] == node:
1675 1675 return (rev, self._revisioncache[2], True)
1676 1676 cachedrev = self._revisioncache[1]
1677 1677
1678 1678 if rev is None:
1679 1679 rev = self.rev(node)
1680 1680
1681 1681 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1682 1682 if stopped:
1683 1683 basetext = self._revisioncache[2]
1684 1684
1685 1685 # drop cache to save memory, the caller is expected to
1686 1686 # update self._revisioncache after validating the text
1687 1687 self._revisioncache = None
1688 1688
1689 1689 targetsize = None
1690 1690 rawsize = self.index[rev][2]
1691 1691 if 0 <= rawsize:
1692 1692 targetsize = 4 * rawsize
1693 1693
1694 1694 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1695 1695 if basetext is None:
1696 1696 basetext = bytes(bins[0])
1697 1697 bins = bins[1:]
1698 1698
1699 1699 rawtext = mdiff.patches(basetext, bins)
1700 1700 del basetext # let us have a chance to free memory early
1701 1701 return (rev, rawtext, False)
1702 1702
1703 1703 def rawdata(self, nodeorrev, _df=None):
1704 1704 """return an uncompressed raw data of a given node or revision number.
1705 1705
1706 1706 _df - an existing file handle to read from. (internal-only)
1707 1707 """
1708 1708 return self._revisiondata(nodeorrev, _df, raw=True)
1709 1709
1710 1710 def hash(self, text, p1, p2):
1711 1711 """Compute a node hash.
1712 1712
1713 1713 Available as a function so that subclasses can replace the hash
1714 1714 as needed.
1715 1715 """
1716 1716 return storageutil.hashrevisionsha1(text, p1, p2)
1717 1717
1718 def _processflags(self, text, flags, operation, raw=False):
1719 """Inspect revision data flags and applies transforms defined by
1720 registered flag processors.
1721
1722 ``text`` - the revision data to process
1723 ``flags`` - the revision flags
1724 ``operation`` - the operation being performed (read or write)
1725 ``raw`` - an optional argument describing if the raw transform should be
1726 applied.
1727
1728 This method processes the flags in the order (or reverse order if
1729 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1730 flag processors registered for present flags. The order of flags defined
1731 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1732
1733 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1734 processed text and ``validatehash`` is a bool indicating whether the
1735 returned text should be checked for hash integrity.
1736
1737 Note: If the ``raw`` argument is set, it has precedence over the
1738 operation and will only update the value of ``validatehash``.
1739 """
1740 # fast path: no flag processors will run
1741 if flags == 0:
1742 return text, True
1743 if not operation in ('read', 'write'):
1744 raise error.ProgrammingError(_("invalid '%s' operation") %
1745 operation)
1746 # Check all flags are known.
1747 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
1748 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1749 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
1750 validatehash = True
1751 # Depending on the operation (read or write), the order might be
1752 # reversed due to non-commutative transforms.
1753 orderedflags = REVIDX_FLAGS_ORDER
1754 if operation == 'write':
1755 orderedflags = reversed(orderedflags)
1756
1757 for flag in orderedflags:
1758 # If a flagprocessor has been registered for a known flag, apply the
1759 # related operation transform and update result tuple.
1760 if flag & flags:
1761 vhash = True
1762
1763 if flag not in self._flagprocessors:
1764 message = _("missing processor for flag '%#x'") % (flag)
1765 raise error.RevlogError(message)
1766
1767 processor = self._flagprocessors[flag]
1768 if processor is not None:
1769 readtransform, writetransform, rawtransform = processor
1770
1771 if raw:
1772 vhash = rawtransform(self, text)
1773 elif operation == 'read':
1774 text, vhash = readtransform(self, text)
1775 else: # write operation
1776 text, vhash = writetransform(self, text)
1777 validatehash = validatehash and vhash
1778
1779 return text, validatehash
1780
1781 1718 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1782 1719 """Check node hash integrity.
1783 1720
1784 1721 Available as a function so that subclasses can extend hash mismatch
1785 1722 behaviors as needed.
1786 1723 """
1787 1724 try:
1788 1725 if p1 is None and p2 is None:
1789 1726 p1, p2 = self.parents(node)
1790 1727 if node != self.hash(text, p1, p2):
1791 1728 # Clear the revision cache on hash failure. The revision cache
1792 1729 # only stores the raw revision and clearing the cache does have
1793 1730 # the side-effect that we won't have a cache hit when the raw
1794 1731 # revision data is accessed. But this case should be rare and
1795 1732 # it is extra work to teach the cache about the hash
1796 1733 # verification state.
1797 1734 if self._revisioncache and self._revisioncache[0] == node:
1798 1735 self._revisioncache = None
1799 1736
1800 1737 revornode = rev
1801 1738 if revornode is None:
1802 1739 revornode = templatefilters.short(hex(node))
1803 1740 raise error.RevlogError(_("integrity check failed on %s:%s")
1804 1741 % (self.indexfile, pycompat.bytestr(revornode)))
1805 1742 except error.RevlogError:
1806 1743 if self._censorable and storageutil.iscensoredtext(text):
1807 1744 raise error.CensoredNodeError(self.indexfile, node, text)
1808 1745 raise
1809 1746
1810 1747 def _enforceinlinesize(self, tr, fp=None):
1811 1748 """Check if the revlog is too big for inline and convert if so.
1812 1749
1813 1750 This should be called after revisions are added to the revlog. If the
1814 1751 revlog has grown too large to be an inline revlog, it will convert it
1815 1752 to use multiple index and data files.
1816 1753 """
1817 1754 tiprev = len(self) - 1
1818 1755 if (not self._inline or
1819 1756 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1820 1757 return
1821 1758
1822 1759 trinfo = tr.find(self.indexfile)
1823 1760 if trinfo is None:
1824 1761 raise error.RevlogError(_("%s not found in the transaction")
1825 1762 % self.indexfile)
1826 1763
1827 1764 trindex = trinfo[2]
1828 1765 if trindex is not None:
1829 1766 dataoff = self.start(trindex)
1830 1767 else:
1831 1768 # revlog was stripped at start of transaction, use all leftover data
1832 1769 trindex = len(self) - 1
1833 1770 dataoff = self.end(tiprev)
1834 1771
1835 1772 tr.add(self.datafile, dataoff)
1836 1773
1837 1774 if fp:
1838 1775 fp.flush()
1839 1776 fp.close()
1840 1777 # We can't use the cached file handle after close(). So prevent
1841 1778 # its usage.
1842 1779 self._writinghandles = None
1843 1780
1844 1781 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1845 1782 for r in self:
1846 1783 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1847 1784
1848 1785 with self._indexfp('w') as fp:
1849 1786 self.version &= ~FLAG_INLINE_DATA
1850 1787 self._inline = False
1851 1788 io = self._io
1852 1789 for i in self:
1853 1790 e = io.packentry(self.index[i], self.node, self.version, i)
1854 1791 fp.write(e)
1855 1792
1856 1793 # the temp file replace the real index when we exit the context
1857 1794 # manager
1858 1795
1859 1796 tr.replace(self.indexfile, trindex * self._io.size)
1860 1797 self._chunkclear()
1861 1798
1862 1799 def _nodeduplicatecallback(self, transaction, node):
1863 1800 """called when trying to add a node already stored.
1864 1801 """
1865 1802
1866 1803 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1867 1804 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1868 1805 """add a revision to the log
1869 1806
1870 1807 text - the revision data to add
1871 1808 transaction - the transaction object used for rollback
1872 1809 link - the linkrev data to add
1873 1810 p1, p2 - the parent nodeids of the revision
1874 1811 cachedelta - an optional precomputed delta
1875 1812 node - nodeid of revision; typically node is not specified, and it is
1876 1813 computed by default as hash(text, p1, p2), however subclasses might
1877 1814 use different hashing method (and override checkhash() in such case)
1878 1815 flags - the known flags to set on the revision
1879 1816 deltacomputer - an optional deltacomputer instance shared between
1880 1817 multiple calls
1881 1818 """
1882 1819 if link == nullrev:
1883 1820 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1884 1821 % self.indexfile)
1885 1822
1886 1823 if flags:
1887 1824 node = node or self.hash(text, p1, p2)
1888 1825
1889 1826 rawtext, validatehash = self._processflags(text, flags, 'write')
1890 1827
1891 1828 # If the flag processor modifies the revision data, ignore any provided
1892 1829 # cachedelta.
1893 1830 if rawtext != text:
1894 1831 cachedelta = None
1895 1832
1896 1833 if len(rawtext) > _maxentrysize:
1897 1834 raise error.RevlogError(
1898 1835 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1899 1836 % (self.indexfile, len(rawtext)))
1900 1837
1901 1838 node = node or self.hash(rawtext, p1, p2)
1902 1839 if node in self.nodemap:
1903 1840 return node
1904 1841
1905 1842 if validatehash:
1906 1843 self.checkhash(rawtext, node, p1=p1, p2=p2)
1907 1844
1908 1845 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1909 1846 flags, cachedelta=cachedelta,
1910 1847 deltacomputer=deltacomputer)
1911 1848
1912 1849 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1913 1850 cachedelta=None, deltacomputer=None):
1914 1851 """add a raw revision with known flags, node and parents
1915 1852 useful when reusing a revision not stored in this revlog (ex: received
1916 1853 over wire, or read from an external bundle).
1917 1854 """
1918 1855 dfh = None
1919 1856 if not self._inline:
1920 1857 dfh = self._datafp("a+")
1921 1858 ifh = self._indexfp("a+")
1922 1859 try:
1923 1860 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1924 1861 flags, cachedelta, ifh, dfh,
1925 1862 deltacomputer=deltacomputer)
1926 1863 finally:
1927 1864 if dfh:
1928 1865 dfh.close()
1929 1866 ifh.close()
1930 1867
1931 1868 def compress(self, data):
1932 1869 """Generate a possibly-compressed representation of data."""
1933 1870 if not data:
1934 1871 return '', data
1935 1872
1936 1873 compressed = self._compressor.compress(data)
1937 1874
1938 1875 if compressed:
1939 1876 # The revlog compressor added the header in the returned data.
1940 1877 return '', compressed
1941 1878
1942 1879 if data[0:1] == '\0':
1943 1880 return '', data
1944 1881 return 'u', data
1945 1882
1946 1883 def decompress(self, data):
1947 1884 """Decompress a revlog chunk.
1948 1885
1949 1886 The chunk is expected to begin with a header identifying the
1950 1887 format type so it can be routed to an appropriate decompressor.
1951 1888 """
1952 1889 if not data:
1953 1890 return data
1954 1891
1955 1892 # Revlogs are read much more frequently than they are written and many
1956 1893 # chunks only take microseconds to decompress, so performance is
1957 1894 # important here.
1958 1895 #
1959 1896 # We can make a few assumptions about revlogs:
1960 1897 #
1961 1898 # 1) the majority of chunks will be compressed (as opposed to inline
1962 1899 # raw data).
1963 1900 # 2) decompressing *any* data will likely by at least 10x slower than
1964 1901 # returning raw inline data.
1965 1902 # 3) we want to prioritize common and officially supported compression
1966 1903 # engines
1967 1904 #
1968 1905 # It follows that we want to optimize for "decompress compressed data
1969 1906 # when encoded with common and officially supported compression engines"
1970 1907 # case over "raw data" and "data encoded by less common or non-official
1971 1908 # compression engines." That is why we have the inline lookup first
1972 1909 # followed by the compengines lookup.
1973 1910 #
1974 1911 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1975 1912 # compressed chunks. And this matters for changelog and manifest reads.
1976 1913 t = data[0:1]
1977 1914
1978 1915 if t == 'x':
1979 1916 try:
1980 1917 return _zlibdecompress(data)
1981 1918 except zlib.error as e:
1982 1919 raise error.RevlogError(_('revlog decompress error: %s') %
1983 1920 stringutil.forcebytestr(e))
1984 1921 # '\0' is more common than 'u' so it goes first.
1985 1922 elif t == '\0':
1986 1923 return data
1987 1924 elif t == 'u':
1988 1925 return util.buffer(data, 1)
1989 1926
1990 1927 try:
1991 1928 compressor = self._decompressors[t]
1992 1929 except KeyError:
1993 1930 try:
1994 1931 engine = util.compengines.forrevlogheader(t)
1995 1932 compressor = engine.revlogcompressor(self._compengineopts)
1996 1933 self._decompressors[t] = compressor
1997 1934 except KeyError:
1998 1935 raise error.RevlogError(_('unknown compression type %r') % t)
1999 1936
2000 1937 return compressor.decompress(data)
2001 1938
2002 1939 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2003 1940 cachedelta, ifh, dfh, alwayscache=False,
2004 1941 deltacomputer=None):
2005 1942 """internal function to add revisions to the log
2006 1943
2007 1944 see addrevision for argument descriptions.
2008 1945
2009 1946 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2010 1947
2011 1948 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2012 1949 be used.
2013 1950
2014 1951 invariants:
2015 1952 - rawtext is optional (can be None); if not set, cachedelta must be set.
2016 1953 if both are set, they must correspond to each other.
2017 1954 """
2018 1955 if node == nullid:
2019 1956 raise error.RevlogError(_("%s: attempt to add null revision") %
2020 1957 self.indexfile)
2021 1958 if node == wdirid or node in wdirfilenodeids:
2022 1959 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2023 1960 self.indexfile)
2024 1961
2025 1962 if self._inline:
2026 1963 fh = ifh
2027 1964 else:
2028 1965 fh = dfh
2029 1966
2030 1967 btext = [rawtext]
2031 1968
2032 1969 curr = len(self)
2033 1970 prev = curr - 1
2034 1971 offset = self.end(prev)
2035 1972 p1r, p2r = self.rev(p1), self.rev(p2)
2036 1973
2037 1974 # full versions are inserted when the needed deltas
2038 1975 # become comparable to the uncompressed text
2039 1976 if rawtext is None:
2040 1977 # need rawtext size, before changed by flag processors, which is
2041 1978 # the non-raw size. use revlog explicitly to avoid filelog's extra
2042 1979 # logic that might remove metadata size.
2043 1980 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2044 1981 cachedelta[1])
2045 1982 else:
2046 1983 textlen = len(rawtext)
2047 1984
2048 1985 if deltacomputer is None:
2049 1986 deltacomputer = deltautil.deltacomputer(self)
2050 1987
2051 1988 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2052 1989
2053 1990 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2054 1991
2055 1992 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2056 1993 deltainfo.base, link, p1r, p2r, node)
2057 1994 self.index.append(e)
2058 1995 self.nodemap[node] = curr
2059 1996
2060 1997 # Reset the pure node cache start lookup offset to account for new
2061 1998 # revision.
2062 1999 if self._nodepos is not None:
2063 2000 self._nodepos = curr
2064 2001
2065 2002 entry = self._io.packentry(e, self.node, self.version, curr)
2066 2003 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2067 2004 link, offset)
2068 2005
2069 2006 rawtext = btext[0]
2070 2007
2071 2008 if alwayscache and rawtext is None:
2072 2009 rawtext = deltacomputer.buildtext(revinfo, fh)
2073 2010
2074 2011 if type(rawtext) == bytes: # only accept immutable objects
2075 2012 self._revisioncache = (node, curr, rawtext)
2076 2013 self._chainbasecache[curr] = deltainfo.chainbase
2077 2014 return node
2078 2015
2079 2016 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2080 2017 # Files opened in a+ mode have inconsistent behavior on various
2081 2018 # platforms. Windows requires that a file positioning call be made
2082 2019 # when the file handle transitions between reads and writes. See
2083 2020 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2084 2021 # platforms, Python or the platform itself can be buggy. Some versions
2085 2022 # of Solaris have been observed to not append at the end of the file
2086 2023 # if the file was seeked to before the end. See issue4943 for more.
2087 2024 #
2088 2025 # We work around this issue by inserting a seek() before writing.
2089 2026 # Note: This is likely not necessary on Python 3. However, because
2090 2027 # the file handle is reused for reads and may be seeked there, we need
2091 2028 # to be careful before changing this.
2092 2029 ifh.seek(0, os.SEEK_END)
2093 2030 if dfh:
2094 2031 dfh.seek(0, os.SEEK_END)
2095 2032
2096 2033 curr = len(self) - 1
2097 2034 if not self._inline:
2098 2035 transaction.add(self.datafile, offset)
2099 2036 transaction.add(self.indexfile, curr * len(entry))
2100 2037 if data[0]:
2101 2038 dfh.write(data[0])
2102 2039 dfh.write(data[1])
2103 2040 ifh.write(entry)
2104 2041 else:
2105 2042 offset += curr * self._io.size
2106 2043 transaction.add(self.indexfile, offset, curr)
2107 2044 ifh.write(entry)
2108 2045 ifh.write(data[0])
2109 2046 ifh.write(data[1])
2110 2047 self._enforceinlinesize(transaction, ifh)
2111 2048
2112 2049 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2113 2050 """
2114 2051 add a delta group
2115 2052
2116 2053 given a set of deltas, add them to the revision log. the
2117 2054 first delta is against its parent, which should be in our
2118 2055 log, the rest are against the previous delta.
2119 2056
2120 2057 If ``addrevisioncb`` is defined, it will be called with arguments of
2121 2058 this revlog and the node that was added.
2122 2059 """
2123 2060
2124 2061 if self._writinghandles:
2125 2062 raise error.ProgrammingError('cannot nest addgroup() calls')
2126 2063
2127 2064 nodes = []
2128 2065
2129 2066 r = len(self)
2130 2067 end = 0
2131 2068 if r:
2132 2069 end = self.end(r - 1)
2133 2070 ifh = self._indexfp("a+")
2134 2071 isize = r * self._io.size
2135 2072 if self._inline:
2136 2073 transaction.add(self.indexfile, end + isize, r)
2137 2074 dfh = None
2138 2075 else:
2139 2076 transaction.add(self.indexfile, isize, r)
2140 2077 transaction.add(self.datafile, end)
2141 2078 dfh = self._datafp("a+")
2142 2079 def flush():
2143 2080 if dfh:
2144 2081 dfh.flush()
2145 2082 ifh.flush()
2146 2083
2147 2084 self._writinghandles = (ifh, dfh)
2148 2085
2149 2086 try:
2150 2087 deltacomputer = deltautil.deltacomputer(self)
2151 2088 # loop through our set of deltas
2152 2089 for data in deltas:
2153 2090 node, p1, p2, linknode, deltabase, delta, flags = data
2154 2091 link = linkmapper(linknode)
2155 2092 flags = flags or REVIDX_DEFAULT_FLAGS
2156 2093
2157 2094 nodes.append(node)
2158 2095
2159 2096 if node in self.nodemap:
2160 2097 self._nodeduplicatecallback(transaction, node)
2161 2098 # this can happen if two branches make the same change
2162 2099 continue
2163 2100
2164 2101 for p in (p1, p2):
2165 2102 if p not in self.nodemap:
2166 2103 raise error.LookupError(p, self.indexfile,
2167 2104 _('unknown parent'))
2168 2105
2169 2106 if deltabase not in self.nodemap:
2170 2107 raise error.LookupError(deltabase, self.indexfile,
2171 2108 _('unknown delta base'))
2172 2109
2173 2110 baserev = self.rev(deltabase)
2174 2111
2175 2112 if baserev != nullrev and self.iscensored(baserev):
2176 2113 # if base is censored, delta must be full replacement in a
2177 2114 # single patch operation
2178 2115 hlen = struct.calcsize(">lll")
2179 2116 oldlen = self.rawsize(baserev)
2180 2117 newlen = len(delta) - hlen
2181 2118 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2182 2119 raise error.CensoredBaseError(self.indexfile,
2183 2120 self.node(baserev))
2184 2121
2185 2122 if not flags and self._peek_iscensored(baserev, delta, flush):
2186 2123 flags |= REVIDX_ISCENSORED
2187 2124
2188 2125 # We assume consumers of addrevisioncb will want to retrieve
2189 2126 # the added revision, which will require a call to
2190 2127 # revision(). revision() will fast path if there is a cache
2191 2128 # hit. So, we tell _addrevision() to always cache in this case.
2192 2129 # We're only using addgroup() in the context of changegroup
2193 2130 # generation so the revision data can always be handled as raw
2194 2131 # by the flagprocessor.
2195 2132 self._addrevision(node, None, transaction, link,
2196 2133 p1, p2, flags, (baserev, delta),
2197 2134 ifh, dfh,
2198 2135 alwayscache=bool(addrevisioncb),
2199 2136 deltacomputer=deltacomputer)
2200 2137
2201 2138 if addrevisioncb:
2202 2139 addrevisioncb(self, node)
2203 2140
2204 2141 if not dfh and not self._inline:
2205 2142 # addrevision switched from inline to conventional
2206 2143 # reopen the index
2207 2144 ifh.close()
2208 2145 dfh = self._datafp("a+")
2209 2146 ifh = self._indexfp("a+")
2210 2147 self._writinghandles = (ifh, dfh)
2211 2148 finally:
2212 2149 self._writinghandles = None
2213 2150
2214 2151 if dfh:
2215 2152 dfh.close()
2216 2153 ifh.close()
2217 2154
2218 2155 return nodes
2219 2156
2220 2157 def iscensored(self, rev):
2221 2158 """Check if a file revision is censored."""
2222 2159 if not self._censorable:
2223 2160 return False
2224 2161
2225 2162 return self.flags(rev) & REVIDX_ISCENSORED
2226 2163
2227 2164 def _peek_iscensored(self, baserev, delta, flush):
2228 2165 """Quickly check if a delta produces a censored revision."""
2229 2166 if not self._censorable:
2230 2167 return False
2231 2168
2232 2169 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2233 2170
2234 2171 def getstrippoint(self, minlink):
2235 2172 """find the minimum rev that must be stripped to strip the linkrev
2236 2173
2237 2174 Returns a tuple containing the minimum rev and a set of all revs that
2238 2175 have linkrevs that will be broken by this strip.
2239 2176 """
2240 2177 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2241 2178 self.headrevs(),
2242 2179 self.linkrev, self.parentrevs)
2243 2180
2244 2181 def strip(self, minlink, transaction):
2245 2182 """truncate the revlog on the first revision with a linkrev >= minlink
2246 2183
2247 2184 This function is called when we're stripping revision minlink and
2248 2185 its descendants from the repository.
2249 2186
2250 2187 We have to remove all revisions with linkrev >= minlink, because
2251 2188 the equivalent changelog revisions will be renumbered after the
2252 2189 strip.
2253 2190
2254 2191 So we truncate the revlog on the first of these revisions, and
2255 2192 trust that the caller has saved the revisions that shouldn't be
2256 2193 removed and that it'll re-add them after this truncation.
2257 2194 """
2258 2195 if len(self) == 0:
2259 2196 return
2260 2197
2261 2198 rev, _ = self.getstrippoint(minlink)
2262 2199 if rev == len(self):
2263 2200 return
2264 2201
2265 2202 # first truncate the files on disk
2266 2203 end = self.start(rev)
2267 2204 if not self._inline:
2268 2205 transaction.add(self.datafile, end)
2269 2206 end = rev * self._io.size
2270 2207 else:
2271 2208 end += rev * self._io.size
2272 2209
2273 2210 transaction.add(self.indexfile, end)
2274 2211
2275 2212 # then reset internal state in memory to forget those revisions
2276 2213 self._revisioncache = None
2277 2214 self._chaininfocache = {}
2278 2215 self._chunkclear()
2279 2216 for x in pycompat.xrange(rev, len(self)):
2280 2217 del self.nodemap[self.node(x)]
2281 2218
2282 2219 del self.index[rev:-1]
2283 2220 self._nodepos = None
2284 2221
2285 2222 def checksize(self):
2286 2223 """Check size of index and data files
2287 2224
2288 2225 return a (dd, di) tuple.
2289 2226 - dd: extra bytes for the "data" file
2290 2227 - di: extra bytes for the "index" file
2291 2228
2292 2229 A healthy revlog will return (0, 0).
2293 2230 """
2294 2231 expected = 0
2295 2232 if len(self):
2296 2233 expected = max(0, self.end(len(self) - 1))
2297 2234
2298 2235 try:
2299 2236 with self._datafp() as f:
2300 2237 f.seek(0, io.SEEK_END)
2301 2238 actual = f.tell()
2302 2239 dd = actual - expected
2303 2240 except IOError as inst:
2304 2241 if inst.errno != errno.ENOENT:
2305 2242 raise
2306 2243 dd = 0
2307 2244
2308 2245 try:
2309 2246 f = self.opener(self.indexfile)
2310 2247 f.seek(0, io.SEEK_END)
2311 2248 actual = f.tell()
2312 2249 f.close()
2313 2250 s = self._io.size
2314 2251 i = max(0, actual // s)
2315 2252 di = actual - (i * s)
2316 2253 if self._inline:
2317 2254 databytes = 0
2318 2255 for r in self:
2319 2256 databytes += max(0, self.length(r))
2320 2257 dd = 0
2321 2258 di = actual - len(self) * s - databytes
2322 2259 except IOError as inst:
2323 2260 if inst.errno != errno.ENOENT:
2324 2261 raise
2325 2262 di = 0
2326 2263
2327 2264 return (dd, di)
2328 2265
2329 2266 def files(self):
2330 2267 res = [self.indexfile]
2331 2268 if not self._inline:
2332 2269 res.append(self.datafile)
2333 2270 return res
2334 2271
2335 2272 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2336 2273 assumehaveparentrevisions=False,
2337 2274 deltamode=repository.CG_DELTAMODE_STD):
2338 2275 if nodesorder not in ('nodes', 'storage', 'linear', None):
2339 2276 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2340 2277 nodesorder)
2341 2278
2342 2279 if nodesorder is None and not self._generaldelta:
2343 2280 nodesorder = 'storage'
2344 2281
2345 2282 if (not self._storedeltachains and
2346 2283 deltamode != repository.CG_DELTAMODE_PREV):
2347 2284 deltamode = repository.CG_DELTAMODE_FULL
2348 2285
2349 2286 return storageutil.emitrevisions(
2350 2287 self, nodes, nodesorder, revlogrevisiondelta,
2351 2288 deltaparentfn=self.deltaparent,
2352 2289 candeltafn=self.candelta,
2353 2290 rawsizefn=self.rawsize,
2354 2291 revdifffn=self.revdiff,
2355 2292 flagsfn=self.flags,
2356 2293 deltamode=deltamode,
2357 2294 revisiondata=revisiondata,
2358 2295 assumehaveparentrevisions=assumehaveparentrevisions)
2359 2296
2360 2297 DELTAREUSEALWAYS = 'always'
2361 2298 DELTAREUSESAMEREVS = 'samerevs'
2362 2299 DELTAREUSENEVER = 'never'
2363 2300
2364 2301 DELTAREUSEFULLADD = 'fulladd'
2365 2302
2366 2303 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2367 2304
2368 2305 def clone(self, tr, destrevlog, addrevisioncb=None,
2369 2306 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2370 2307 """Copy this revlog to another, possibly with format changes.
2371 2308
2372 2309 The destination revlog will contain the same revisions and nodes.
2373 2310 However, it may not be bit-for-bit identical due to e.g. delta encoding
2374 2311 differences.
2375 2312
2376 2313 The ``deltareuse`` argument control how deltas from the existing revlog
2377 2314 are preserved in the destination revlog. The argument can have the
2378 2315 following values:
2379 2316
2380 2317 DELTAREUSEALWAYS
2381 2318 Deltas will always be reused (if possible), even if the destination
2382 2319 revlog would not select the same revisions for the delta. This is the
2383 2320 fastest mode of operation.
2384 2321 DELTAREUSESAMEREVS
2385 2322 Deltas will be reused if the destination revlog would pick the same
2386 2323 revisions for the delta. This mode strikes a balance between speed
2387 2324 and optimization.
2388 2325 DELTAREUSENEVER
2389 2326 Deltas will never be reused. This is the slowest mode of execution.
2390 2327 This mode can be used to recompute deltas (e.g. if the diff/delta
2391 2328 algorithm changes).
2392 2329
2393 2330 Delta computation can be slow, so the choice of delta reuse policy can
2394 2331 significantly affect run time.
2395 2332
2396 2333 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2397 2334 two extremes. Deltas will be reused if they are appropriate. But if the
2398 2335 delta could choose a better revision, it will do so. This means if you
2399 2336 are converting a non-generaldelta revlog to a generaldelta revlog,
2400 2337 deltas will be recomputed if the delta's parent isn't a parent of the
2401 2338 revision.
2402 2339
2403 2340 In addition to the delta policy, the ``forcedeltabothparents``
2404 2341 argument controls whether to force compute deltas against both parents
2405 2342 for merges. By default, the current default is used.
2406 2343 """
2407 2344 if deltareuse not in self.DELTAREUSEALL:
2408 2345 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2409 2346
2410 2347 if len(destrevlog):
2411 2348 raise ValueError(_('destination revlog is not empty'))
2412 2349
2413 2350 if getattr(self, 'filteredrevs', None):
2414 2351 raise ValueError(_('source revlog has filtered revisions'))
2415 2352 if getattr(destrevlog, 'filteredrevs', None):
2416 2353 raise ValueError(_('destination revlog has filtered revisions'))
2417 2354
2418 2355 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2419 2356 # if possible.
2420 2357 oldlazydelta = destrevlog._lazydelta
2421 2358 oldlazydeltabase = destrevlog._lazydeltabase
2422 2359 oldamd = destrevlog._deltabothparents
2423 2360
2424 2361 try:
2425 2362 if deltareuse == self.DELTAREUSEALWAYS:
2426 2363 destrevlog._lazydeltabase = True
2427 2364 destrevlog._lazydelta = True
2428 2365 elif deltareuse == self.DELTAREUSESAMEREVS:
2429 2366 destrevlog._lazydeltabase = False
2430 2367 destrevlog._lazydelta = True
2431 2368 elif deltareuse == self.DELTAREUSENEVER:
2432 2369 destrevlog._lazydeltabase = False
2433 2370 destrevlog._lazydelta = False
2434 2371
2435 2372 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2436 2373
2437 2374 deltacomputer = deltautil.deltacomputer(destrevlog)
2438 2375 index = self.index
2439 2376 for rev in self:
2440 2377 entry = index[rev]
2441 2378
2442 2379 # Some classes override linkrev to take filtered revs into
2443 2380 # account. Use raw entry from index.
2444 2381 flags = entry[0] & 0xffff
2445 2382 linkrev = entry[4]
2446 2383 p1 = index[entry[5]][7]
2447 2384 p2 = index[entry[6]][7]
2448 2385 node = entry[7]
2449 2386
2450 2387 # (Possibly) reuse the delta from the revlog if allowed and
2451 2388 # the revlog chunk is a delta.
2452 2389 cachedelta = None
2453 2390 rawtext = None
2454 2391 if (deltareuse != self.DELTAREUSEFULLADD
2455 2392 and destrevlog._lazydelta):
2456 2393 dp = self.deltaparent(rev)
2457 2394 if dp != nullrev:
2458 2395 cachedelta = (dp, bytes(self._chunk(rev)))
2459 2396
2460 2397 if not cachedelta:
2461 2398 rawtext = self.rawdata(rev)
2462 2399
2463 2400
2464 2401 if deltareuse == self.DELTAREUSEFULLADD:
2465 2402 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2466 2403 cachedelta=cachedelta,
2467 2404 node=node, flags=flags,
2468 2405 deltacomputer=deltacomputer)
2469 2406 else:
2470 2407 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2471 2408 checkambig=False)
2472 2409 dfh = None
2473 2410 if not destrevlog._inline:
2474 2411 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2475 2412 try:
2476 2413 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2477 2414 p2, flags, cachedelta, ifh, dfh,
2478 2415 deltacomputer=deltacomputer)
2479 2416 finally:
2480 2417 if dfh:
2481 2418 dfh.close()
2482 2419 ifh.close()
2483 2420
2484 2421 if addrevisioncb:
2485 2422 addrevisioncb(self, rev, node)
2486 2423 finally:
2487 2424 destrevlog._lazydelta = oldlazydelta
2488 2425 destrevlog._lazydeltabase = oldlazydeltabase
2489 2426 destrevlog._deltabothparents = oldamd
2490 2427
2491 2428 def censorrevision(self, tr, censornode, tombstone=b''):
2492 2429 if (self.version & 0xFFFF) == REVLOGV0:
2493 2430 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2494 2431 self.version)
2495 2432
2496 2433 censorrev = self.rev(censornode)
2497 2434 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2498 2435
2499 2436 if len(tombstone) > self.rawsize(censorrev):
2500 2437 raise error.Abort(_('censor tombstone must be no longer than '
2501 2438 'censored data'))
2502 2439
2503 2440 # Rewriting the revlog in place is hard. Our strategy for censoring is
2504 2441 # to create a new revlog, copy all revisions to it, then replace the
2505 2442 # revlogs on transaction close.
2506 2443
2507 2444 newindexfile = self.indexfile + b'.tmpcensored'
2508 2445 newdatafile = self.datafile + b'.tmpcensored'
2509 2446
2510 2447 # This is a bit dangerous. We could easily have a mismatch of state.
2511 2448 newrl = revlog(self.opener, newindexfile, newdatafile,
2512 2449 censorable=True)
2513 2450 newrl.version = self.version
2514 2451 newrl._generaldelta = self._generaldelta
2515 2452 newrl._io = self._io
2516 2453
2517 2454 for rev in self.revs():
2518 2455 node = self.node(rev)
2519 2456 p1, p2 = self.parents(node)
2520 2457
2521 2458 if rev == censorrev:
2522 2459 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2523 2460 p1, p2, censornode, REVIDX_ISCENSORED)
2524 2461
2525 2462 if newrl.deltaparent(rev) != nullrev:
2526 2463 raise error.Abort(_('censored revision stored as delta; '
2527 2464 'cannot censor'),
2528 2465 hint=_('censoring of revlogs is not '
2529 2466 'fully implemented; please report '
2530 2467 'this bug'))
2531 2468 continue
2532 2469
2533 2470 if self.iscensored(rev):
2534 2471 if self.deltaparent(rev) != nullrev:
2535 2472 raise error.Abort(_('cannot censor due to censored '
2536 2473 'revision having delta stored'))
2537 2474 rawtext = self._chunk(rev)
2538 2475 else:
2539 2476 rawtext = self.rawdata(rev)
2540 2477
2541 2478 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2542 2479 self.flags(rev))
2543 2480
2544 2481 tr.addbackup(self.indexfile, location='store')
2545 2482 if not self._inline:
2546 2483 tr.addbackup(self.datafile, location='store')
2547 2484
2548 2485 self.opener.rename(newrl.indexfile, self.indexfile)
2549 2486 if not self._inline:
2550 2487 self.opener.rename(newrl.datafile, self.datafile)
2551 2488
2552 2489 self.clearcaches()
2553 2490 self._loadindex()
2554 2491
2555 2492 def verifyintegrity(self, state):
2556 2493 """Verifies the integrity of the revlog.
2557 2494
2558 2495 Yields ``revlogproblem`` instances describing problems that are
2559 2496 found.
2560 2497 """
2561 2498 dd, di = self.checksize()
2562 2499 if dd:
2563 2500 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2564 2501 if di:
2565 2502 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2566 2503
2567 2504 version = self.version & 0xFFFF
2568 2505
2569 2506 # The verifier tells us what version revlog we should be.
2570 2507 if version != state['expectedversion']:
2571 2508 yield revlogproblem(
2572 2509 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2573 2510 (self.indexfile, version, state['expectedversion']))
2574 2511
2575 2512 state['skipread'] = set()
2576 2513
2577 2514 for rev in self:
2578 2515 node = self.node(rev)
2579 2516
2580 2517 # Verify contents. 4 cases to care about:
2581 2518 #
2582 2519 # common: the most common case
2583 2520 # rename: with a rename
2584 2521 # meta: file content starts with b'\1\n', the metadata
2585 2522 # header defined in filelog.py, but without a rename
2586 2523 # ext: content stored externally
2587 2524 #
2588 2525 # More formally, their differences are shown below:
2589 2526 #
2590 2527 # | common | rename | meta | ext
2591 2528 # -------------------------------------------------------
2592 2529 # flags() | 0 | 0 | 0 | not 0
2593 2530 # renamed() | False | True | False | ?
2594 2531 # rawtext[0:2]=='\1\n'| False | True | True | ?
2595 2532 #
2596 2533 # "rawtext" means the raw text stored in revlog data, which
2597 2534 # could be retrieved by "rawdata(rev)". "text"
2598 2535 # mentioned below is "revision(rev)".
2599 2536 #
2600 2537 # There are 3 different lengths stored physically:
2601 2538 # 1. L1: rawsize, stored in revlog index
2602 2539 # 2. L2: len(rawtext), stored in revlog data
2603 2540 # 3. L3: len(text), stored in revlog data if flags==0, or
2604 2541 # possibly somewhere else if flags!=0
2605 2542 #
2606 2543 # L1 should be equal to L2. L3 could be different from them.
2607 2544 # "text" may or may not affect commit hash depending on flag
2608 2545 # processors (see flagutil.addflagprocessor).
2609 2546 #
2610 2547 # | common | rename | meta | ext
2611 2548 # -------------------------------------------------
2612 2549 # rawsize() | L1 | L1 | L1 | L1
2613 2550 # size() | L1 | L2-LM | L1(*) | L1 (?)
2614 2551 # len(rawtext) | L2 | L2 | L2 | L2
2615 2552 # len(text) | L2 | L2 | L2 | L3
2616 2553 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2617 2554 #
2618 2555 # LM: length of metadata, depending on rawtext
2619 2556 # (*): not ideal, see comment in filelog.size
2620 2557 # (?): could be "- len(meta)" if the resolved content has
2621 2558 # rename metadata
2622 2559 #
2623 2560 # Checks needed to be done:
2624 2561 # 1. length check: L1 == L2, in all cases.
2625 2562 # 2. hash check: depending on flag processor, we may need to
2626 2563 # use either "text" (external), or "rawtext" (in revlog).
2627 2564
2628 2565 try:
2629 2566 skipflags = state.get('skipflags', 0)
2630 2567 if skipflags:
2631 2568 skipflags &= self.flags(rev)
2632 2569
2633 2570 if skipflags:
2634 2571 state['skipread'].add(node)
2635 2572 else:
2636 2573 # Side-effect: read content and verify hash.
2637 2574 self.revision(node)
2638 2575
2639 2576 l1 = self.rawsize(rev)
2640 2577 l2 = len(self.rawdata(node))
2641 2578
2642 2579 if l1 != l2:
2643 2580 yield revlogproblem(
2644 2581 error=_('unpacked size is %d, %d expected') % (l2, l1),
2645 2582 node=node)
2646 2583
2647 2584 except error.CensoredNodeError:
2648 2585 if state['erroroncensored']:
2649 2586 yield revlogproblem(error=_('censored file data'),
2650 2587 node=node)
2651 2588 state['skipread'].add(node)
2652 2589 except Exception as e:
2653 2590 yield revlogproblem(
2654 2591 error=_('unpacking %s: %s') % (short(node),
2655 2592 stringutil.forcebytestr(e)),
2656 2593 node=node)
2657 2594 state['skipread'].add(node)
2658 2595
2659 2596 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2660 2597 revisionscount=False, trackedsize=False,
2661 2598 storedsize=False):
2662 2599 d = {}
2663 2600
2664 2601 if exclusivefiles:
2665 2602 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2666 2603 if not self._inline:
2667 2604 d['exclusivefiles'].append((self.opener, self.datafile))
2668 2605
2669 2606 if sharedfiles:
2670 2607 d['sharedfiles'] = []
2671 2608
2672 2609 if revisionscount:
2673 2610 d['revisionscount'] = len(self)
2674 2611
2675 2612 if trackedsize:
2676 2613 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2677 2614
2678 2615 if storedsize:
2679 2616 d['storedsize'] = sum(self.opener.stat(path).st_size
2680 2617 for path in self.files())
2681 2618
2682 2619 return d
@@ -1,80 +1,151 b''
1 1 # flagutils.py - code to deal with revlog flags and their processors
2 2 #
3 3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 from ..i18n import _
12 12
13 13 from .constants import (
14 14 REVIDX_DEFAULT_FLAGS,
15 15 REVIDX_ELLIPSIS,
16 16 REVIDX_EXTSTORED,
17 17 REVIDX_FLAGS_ORDER,
18 18 REVIDX_ISCENSORED,
19 19 REVIDX_RAWTEXT_CHANGING_FLAGS,
20 20 )
21 21
22 22 from .. import (
23 23 error,
24 24 util
25 25 )
26 26
27 27 # blanked usage of all the name to prevent pyflakes constraints
28 28 # We need these name available in the module for extensions.
29 29 REVIDX_ISCENSORED
30 30 REVIDX_ELLIPSIS
31 31 REVIDX_EXTSTORED
32 32 REVIDX_DEFAULT_FLAGS
33 33 REVIDX_FLAGS_ORDER
34 34 REVIDX_RAWTEXT_CHANGING_FLAGS
35 35
36 36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
37 37
38 38 # Store flag processors (cf. 'addflagprocessor()' to register)
39 39 flagprocessors = {
40 40 REVIDX_ISCENSORED: None,
41 41 }
42 42
43 43 def addflagprocessor(flag, processor):
44 44 """Register a flag processor on a revision data flag.
45 45
46 46 Invariant:
47 47 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
48 48 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
49 49 - Only one flag processor can be registered on a specific flag.
50 50 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
51 51 following signatures:
52 52 - (read) f(self, rawtext) -> text, bool
53 53 - (write) f(self, text) -> rawtext, bool
54 54 - (raw) f(self, rawtext) -> bool
55 55 "text" is presented to the user. "rawtext" is stored in revlog data, not
56 56 directly visible to the user.
57 57 The boolean returned by these transforms is used to determine whether
58 58 the returned text can be used for hash integrity checking. For example,
59 59 if "write" returns False, then "text" is used to generate hash. If
60 60 "write" returns True, that basically means "rawtext" returned by "write"
61 61 should be used to generate hash. Usually, "write" and "read" return
62 62 different booleans. And "raw" returns a same boolean as "write".
63 63
64 64 Note: The 'raw' transform is used for changegroup generation and in some
65 65 debug commands. In this case the transform only indicates whether the
66 66 contents can be used for hash integrity checks.
67 67 """
68 68 insertflagprocessor(flag, processor, flagprocessors)
69 69
70 70 def insertflagprocessor(flag, processor, flagprocessors):
71 71 if not flag & REVIDX_KNOWN_FLAGS:
72 72 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
73 73 raise error.ProgrammingError(msg)
74 74 if flag not in REVIDX_FLAGS_ORDER:
75 75 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
76 76 raise error.ProgrammingError(msg)
77 77 if flag in flagprocessors:
78 78 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
79 79 raise error.Abort(msg)
80 80 flagprocessors[flag] = processor
81
82 class flagprocessorsmixin(object):
83 """basic mixin to support revlog flag processing
84
85 Make sure the `_flagprocessors` attribute is set at ``__init__`` time.
86
87 See the documentation of the ``_processflags`` method for details.
88 """
89
90 def _processflags(self, text, flags, operation, raw=False):
91 """Inspect revision data flags and applies transforms defined by
92 registered flag processors.
93
94 ``text`` - the revision data to process
95 ``flags`` - the revision flags
96 ``operation`` - the operation being performed (read or write)
97 ``raw`` - an optional argument describing if the raw transform should be
98 applied.
99
100 This method processes the flags in the order (or reverse order if
101 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
102 flag processors registered for present flags. The order of flags defined
103 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
104
105 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
106 processed text and ``validatehash`` is a bool indicating whether the
107 returned text should be checked for hash integrity.
108
109 Note: If the ``raw`` argument is set, it has precedence over the
110 operation and will only update the value of ``validatehash``.
111 """
112 # fast path: no flag processors will run
113 if flags == 0:
114 return text, True
115 if not operation in ('read', 'write'):
116 raise error.ProgrammingError(_("invalid '%s' operation") %
117 operation)
118 # Check all flags are known.
119 if flags & ~REVIDX_KNOWN_FLAGS:
120 raise error.RevlogError(_("incompatible revision flag '%#x'") %
121 (flags & ~REVIDX_KNOWN_FLAGS))
122 validatehash = True
123 # Depending on the operation (read or write), the order might be
124 # reversed due to non-commutative transforms.
125 orderedflags = REVIDX_FLAGS_ORDER
126 if operation == 'write':
127 orderedflags = reversed(orderedflags)
128
129 for flag in orderedflags:
130 # If a flagprocessor has been registered for a known flag, apply the
131 # related operation transform and update result tuple.
132 if flag & flags:
133 vhash = True
134
135 if flag not in self._flagprocessors:
136 message = _("missing processor for flag '%#x'") % (flag)
137 raise error.RevlogError(message)
138
139 processor = self._flagprocessors[flag]
140 if processor is not None:
141 readtransform, writetransform, rawtransform = processor
142
143 if raw:
144 vhash = rawtransform(self, text)
145 elif operation == 'read':
146 text, vhash = readtransform(self, text)
147 else: # write operation
148 text, vhash = writetransform(self, text)
149 validatehash = validatehash and vhash
150
151 return text, validatehash
General Comments 0
You need to be logged in to leave comments. Login now