##// END OF EJS Templates
sidedata: register the flag processors if the repository allows for it...
marmoute -
r43305:142deb53 default
parent child Browse files
Show More
@@ -1,2671 +1,2676 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_SIDEDATA,
57 58 )
58 59 from .thirdparty import (
59 60 attr,
60 61 )
61 62 from . import (
62 63 ancestor,
63 64 dagop,
64 65 error,
65 66 mdiff,
66 67 policy,
67 68 pycompat,
68 69 templatefilters,
69 70 util,
70 71 )
71 72 from .interfaces import (
72 73 repository,
73 74 util as interfaceutil,
74 75 )
75 76 from .revlogutils import (
76 77 deltas as deltautil,
77 78 flagutil,
79 sidedata as sidedatautil,
78 80 )
79 81 from .utils import (
80 82 storageutil,
81 83 stringutil,
82 84 )
83 85
84 86 # blanked usage of all the name to prevent pyflakes constraints
85 87 # We need these name available in the module for extensions.
86 88 REVLOGV0
87 89 REVLOGV1
88 90 REVLOGV2
89 91 FLAG_INLINE_DATA
90 92 FLAG_GENERALDELTA
91 93 REVLOG_DEFAULT_FLAGS
92 94 REVLOG_DEFAULT_FORMAT
93 95 REVLOG_DEFAULT_VERSION
94 96 REVLOGV1_FLAGS
95 97 REVLOGV2_FLAGS
96 98 REVIDX_ISCENSORED
97 99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
98 101 REVIDX_EXTSTORED
99 102 REVIDX_DEFAULT_FLAGS
100 103 REVIDX_FLAGS_ORDER
101 104 REVIDX_RAWTEXT_CHANGING_FLAGS
102 105
103 106 parsers = policy.importmod(r'parsers')
104 107 rustancestor = policy.importrust(r'ancestor')
105 108 rustdagop = policy.importrust(r'dagop')
106 109
107 110 # Aliased for performance.
108 111 _zlibdecompress = zlib.decompress
109 112
110 113 # max size of revlog with inline data
111 114 _maxinline = 131072
112 115 _chunksize = 1048576
113 116
114 117 # Flag processors for REVIDX_ELLIPSIS.
115 118 def ellipsisreadprocessor(rl, text):
116 119 return text, False, {}
117 120
118 121 def ellipsiswriteprocessor(rl, text, sidedata):
119 122 return text, False
120 123
121 124 def ellipsisrawprocessor(rl, text):
122 125 return False
123 126
124 127 ellipsisprocessor = (
125 128 ellipsisreadprocessor,
126 129 ellipsiswriteprocessor,
127 130 ellipsisrawprocessor,
128 131 )
129 132
130 133 def getoffset(q):
131 134 return int(q >> 16)
132 135
133 136 def gettype(q):
134 137 return int(q & 0xFFFF)
135 138
136 139 def offset_type(offset, type):
137 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 141 raise ValueError('unknown revlog index flags')
139 142 return int(int(offset) << 16 | type)
140 143
141 144 @attr.s(slots=True, frozen=True)
142 145 class _revisioninfo(object):
143 146 """Information about a revision that allows building its fulltext
144 147 node: expected hash of the revision
145 148 p1, p2: parent revs of the revision
146 149 btext: built text cache consisting of a one-element list
147 150 cachedelta: (baserev, uncompressed_delta) or None
148 151 flags: flags associated to the revision storage
149 152
150 153 One of btext[0] or cachedelta must be set.
151 154 """
152 155 node = attr.ib()
153 156 p1 = attr.ib()
154 157 p2 = attr.ib()
155 158 btext = attr.ib()
156 159 textlen = attr.ib()
157 160 cachedelta = attr.ib()
158 161 flags = attr.ib()
159 162
160 163 @interfaceutil.implementer(repository.irevisiondelta)
161 164 @attr.s(slots=True)
162 165 class revlogrevisiondelta(object):
163 166 node = attr.ib()
164 167 p1node = attr.ib()
165 168 p2node = attr.ib()
166 169 basenode = attr.ib()
167 170 flags = attr.ib()
168 171 baserevisionsize = attr.ib()
169 172 revision = attr.ib()
170 173 delta = attr.ib()
171 174 linknode = attr.ib(default=None)
172 175
173 176 @interfaceutil.implementer(repository.iverifyproblem)
174 177 @attr.s(frozen=True)
175 178 class revlogproblem(object):
176 179 warning = attr.ib(default=None)
177 180 error = attr.ib(default=None)
178 181 node = attr.ib(default=None)
179 182
180 183 # index v0:
181 184 # 4 bytes: offset
182 185 # 4 bytes: compressed length
183 186 # 4 bytes: base rev
184 187 # 4 bytes: link rev
185 188 # 20 bytes: parent 1 nodeid
186 189 # 20 bytes: parent 2 nodeid
187 190 # 20 bytes: nodeid
188 191 indexformatv0 = struct.Struct(">4l20s20s20s")
189 192 indexformatv0_pack = indexformatv0.pack
190 193 indexformatv0_unpack = indexformatv0.unpack
191 194
192 195 class revlogoldindex(list):
193 196 def __getitem__(self, i):
194 197 if i == -1:
195 198 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 199 return list.__getitem__(self, i)
197 200
198 201 class revlogoldio(object):
199 202 def __init__(self):
200 203 self.size = indexformatv0.size
201 204
202 205 def parseindex(self, data, inline):
203 206 s = self.size
204 207 index = []
205 208 nodemap = {nullid: nullrev}
206 209 n = off = 0
207 210 l = len(data)
208 211 while off + s <= l:
209 212 cur = data[off:off + s]
210 213 off += s
211 214 e = indexformatv0_unpack(cur)
212 215 # transform to revlogv1 format
213 216 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 217 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 218 index.append(e2)
216 219 nodemap[e[6]] = n
217 220 n += 1
218 221
219 222 return revlogoldindex(index), nodemap, None
220 223
221 224 def packentry(self, entry, node, version, rev):
222 225 if gettype(entry[0]):
223 226 raise error.RevlogError(_('index entry flags need revlog '
224 227 'version 1'))
225 228 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 229 node(entry[5]), node(entry[6]), entry[7])
227 230 return indexformatv0_pack(*e2)
228 231
229 232 # index ng:
230 233 # 6 bytes: offset
231 234 # 2 bytes: flags
232 235 # 4 bytes: compressed length
233 236 # 4 bytes: uncompressed length
234 237 # 4 bytes: base rev
235 238 # 4 bytes: link rev
236 239 # 4 bytes: parent 1 rev
237 240 # 4 bytes: parent 2 rev
238 241 # 32 bytes: nodeid
239 242 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 243 indexformatng_pack = indexformatng.pack
241 244 versionformat = struct.Struct(">I")
242 245 versionformat_pack = versionformat.pack
243 246 versionformat_unpack = versionformat.unpack
244 247
245 248 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 249 # signed integer)
247 250 _maxentrysize = 0x7fffffff
248 251
249 252 class revlogio(object):
250 253 def __init__(self):
251 254 self.size = indexformatng.size
252 255
253 256 def parseindex(self, data, inline):
254 257 # call the C implementation to parse the index data
255 258 index, cache = parsers.parse_index2(data, inline)
256 259 return index, getattr(index, 'nodemap', None), cache
257 260
258 261 def packentry(self, entry, node, version, rev):
259 262 p = indexformatng_pack(*entry)
260 263 if rev == 0:
261 264 p = versionformat_pack(version) + p[4:]
262 265 return p
263 266
264 267 class revlog(object):
265 268 """
266 269 the underlying revision storage object
267 270
268 271 A revlog consists of two parts, an index and the revision data.
269 272
270 273 The index is a file with a fixed record size containing
271 274 information on each revision, including its nodeid (hash), the
272 275 nodeids of its parents, the position and offset of its data within
273 276 the data file, and the revision it's based on. Finally, each entry
274 277 contains a linkrev entry that can serve as a pointer to external
275 278 data.
276 279
277 280 The revision data itself is a linear collection of data chunks.
278 281 Each chunk represents a revision and is usually represented as a
279 282 delta against the previous chunk. To bound lookup time, runs of
280 283 deltas are limited to about 2 times the length of the original
281 284 version data. This makes retrieval of a version proportional to
282 285 its size, or O(1) relative to the number of revisions.
283 286
284 287 Both pieces of the revlog are written to in an append-only
285 288 fashion, which means we never need to rewrite a file to insert or
286 289 remove data, and can use some simple techniques to avoid the need
287 290 for locking while reading.
288 291
289 292 If checkambig, indexfile is opened with checkambig=True at
290 293 writing, to avoid file stat ambiguity.
291 294
292 295 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 296 index will be mmapped rather than read if it is larger than the
294 297 configured threshold.
295 298
296 299 If censorable is True, the revlog can have censored revisions.
297 300
298 301 If `upperboundcomp` is not None, this is the expected maximal gain from
299 302 compression for the data content.
300 303 """
301 304
302 305 _flagserrorclass = error.RevlogError
303 306
304 307 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 308 mmaplargeindex=False, censorable=False,
306 309 upperboundcomp=None):
307 310 """
308 311 create a revlog object
309 312
310 313 opener is a function that abstracts the file opening operation
311 314 and can be used to implement COW semantics or the like.
312 315
313 316 """
314 317 self.upperboundcomp = upperboundcomp
315 318 self.indexfile = indexfile
316 319 self.datafile = datafile or (indexfile[:-2] + ".d")
317 320 self.opener = opener
318 321 # When True, indexfile is opened with checkambig=True at writing, to
319 322 # avoid file stat ambiguity.
320 323 self._checkambig = checkambig
321 324 self._mmaplargeindex = mmaplargeindex
322 325 self._censorable = censorable
323 326 # 3-tuple of (node, rev, text) for a raw revision.
324 327 self._revisioncache = None
325 328 # Maps rev to chain base rev.
326 329 self._chainbasecache = util.lrucachedict(100)
327 330 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 331 self._chunkcache = (0, '')
329 332 # How much data to read and cache into the raw revlog data cache.
330 333 self._chunkcachesize = 65536
331 334 self._maxchainlen = None
332 335 self._deltabothparents = True
333 336 self.index = []
334 337 # Mapping of partial identifiers to full nodes.
335 338 self._pcache = {}
336 339 # Mapping of revision integer to full node.
337 340 self._nodecache = {nullid: nullrev}
338 341 self._nodepos = None
339 342 self._compengine = 'zlib'
340 343 self._compengineopts = {}
341 344 self._maxdeltachainspan = -1
342 345 self._withsparseread = False
343 346 self._sparserevlog = False
344 347 self._srdensitythreshold = 0.50
345 348 self._srmingapsize = 262144
346 349
347 350 # Make copy of flag processors so each revlog instance can support
348 351 # custom flags.
349 352 self._flagprocessors = dict(flagutil.flagprocessors)
350 353
351 354 # 2-tuple of file handles being used for active writing.
352 355 self._writinghandles = None
353 356
354 357 self._loadindex()
355 358
356 359 def _loadindex(self):
357 360 mmapindexthreshold = None
358 361 opts = self.opener.options
359 362
360 363 if 'revlogv2' in opts:
361 364 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 365 elif 'revlogv1' in opts:
363 366 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 367 if 'generaldelta' in opts:
365 368 newversionflags |= FLAG_GENERALDELTA
366 369 elif 'revlogv0' in self.opener.options:
367 370 newversionflags = REVLOGV0
368 371 else:
369 372 newversionflags = REVLOG_DEFAULT_VERSION
370 373
371 374 if 'chunkcachesize' in opts:
372 375 self._chunkcachesize = opts['chunkcachesize']
373 376 if 'maxchainlen' in opts:
374 377 self._maxchainlen = opts['maxchainlen']
375 378 if 'deltabothparents' in opts:
376 379 self._deltabothparents = opts['deltabothparents']
377 380 self._lazydelta = bool(opts.get('lazydelta', True))
378 381 self._lazydeltabase = False
379 382 if self._lazydelta:
380 383 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 384 if 'compengine' in opts:
382 385 self._compengine = opts['compengine']
383 386 if 'zlib.level' in opts:
384 387 self._compengineopts['zlib.level'] = opts['zlib.level']
385 388 if 'zstd.level' in opts:
386 389 self._compengineopts['zstd.level'] = opts['zstd.level']
387 390 if 'maxdeltachainspan' in opts:
388 391 self._maxdeltachainspan = opts['maxdeltachainspan']
389 392 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 393 mmapindexthreshold = opts['mmapindexthreshold']
391 394 self.hassidedata = bool(opts.get('side-data', False))
395 if self.hassidedata:
396 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
392 397 self._sparserevlog = bool(opts.get('sparse-revlog', False))
393 398 withsparseread = bool(opts.get('with-sparse-read', False))
394 399 # sparse-revlog forces sparse-read
395 400 self._withsparseread = self._sparserevlog or withsparseread
396 401 if 'sparse-read-density-threshold' in opts:
397 402 self._srdensitythreshold = opts['sparse-read-density-threshold']
398 403 if 'sparse-read-min-gap-size' in opts:
399 404 self._srmingapsize = opts['sparse-read-min-gap-size']
400 405 if opts.get('enableellipsis'):
401 406 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
402 407
403 408 # revlog v0 doesn't have flag processors
404 409 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
405 410 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
406 411
407 412 if self._chunkcachesize <= 0:
408 413 raise error.RevlogError(_('revlog chunk cache size %r is not '
409 414 'greater than 0') % self._chunkcachesize)
410 415 elif self._chunkcachesize & (self._chunkcachesize - 1):
411 416 raise error.RevlogError(_('revlog chunk cache size %r is not a '
412 417 'power of 2') % self._chunkcachesize)
413 418
414 419 indexdata = ''
415 420 self._initempty = True
416 421 try:
417 422 with self._indexfp() as f:
418 423 if (mmapindexthreshold is not None and
419 424 self.opener.fstat(f).st_size >= mmapindexthreshold):
420 425 # TODO: should .close() to release resources without
421 426 # relying on Python GC
422 427 indexdata = util.buffer(util.mmapread(f))
423 428 else:
424 429 indexdata = f.read()
425 430 if len(indexdata) > 0:
426 431 versionflags = versionformat_unpack(indexdata[:4])[0]
427 432 self._initempty = False
428 433 else:
429 434 versionflags = newversionflags
430 435 except IOError as inst:
431 436 if inst.errno != errno.ENOENT:
432 437 raise
433 438
434 439 versionflags = newversionflags
435 440
436 441 self.version = versionflags
437 442
438 443 flags = versionflags & ~0xFFFF
439 444 fmt = versionflags & 0xFFFF
440 445
441 446 if fmt == REVLOGV0:
442 447 if flags:
443 448 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
444 449 'revlog %s') %
445 450 (flags >> 16, fmt, self.indexfile))
446 451
447 452 self._inline = False
448 453 self._generaldelta = False
449 454
450 455 elif fmt == REVLOGV1:
451 456 if flags & ~REVLOGV1_FLAGS:
452 457 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
453 458 'revlog %s') %
454 459 (flags >> 16, fmt, self.indexfile))
455 460
456 461 self._inline = versionflags & FLAG_INLINE_DATA
457 462 self._generaldelta = versionflags & FLAG_GENERALDELTA
458 463
459 464 elif fmt == REVLOGV2:
460 465 if flags & ~REVLOGV2_FLAGS:
461 466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
462 467 'revlog %s') %
463 468 (flags >> 16, fmt, self.indexfile))
464 469
465 470 self._inline = versionflags & FLAG_INLINE_DATA
466 471 # generaldelta implied by version 2 revlogs.
467 472 self._generaldelta = True
468 473
469 474 else:
470 475 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
471 476 (fmt, self.indexfile))
472 477 # sparse-revlog can't be on without general-delta (issue6056)
473 478 if not self._generaldelta:
474 479 self._sparserevlog = False
475 480
476 481 self._storedeltachains = True
477 482
478 483 self._io = revlogio()
479 484 if self.version == REVLOGV0:
480 485 self._io = revlogoldio()
481 486 try:
482 487 d = self._io.parseindex(indexdata, self._inline)
483 488 except (ValueError, IndexError):
484 489 raise error.RevlogError(_("index %s is corrupted") %
485 490 self.indexfile)
486 491 self.index, nodemap, self._chunkcache = d
487 492 if nodemap is not None:
488 493 self.nodemap = self._nodecache = nodemap
489 494 if not self._chunkcache:
490 495 self._chunkclear()
491 496 # revnum -> (chain-length, sum-delta-length)
492 497 self._chaininfocache = {}
493 498 # revlog header -> revlog compressor
494 499 self._decompressors = {}
495 500
496 501 @util.propertycache
497 502 def _compressor(self):
498 503 engine = util.compengines[self._compengine]
499 504 return engine.revlogcompressor(self._compengineopts)
500 505
501 506 def _indexfp(self, mode='r'):
502 507 """file object for the revlog's index file"""
503 508 args = {r'mode': mode}
504 509 if mode != 'r':
505 510 args[r'checkambig'] = self._checkambig
506 511 if mode == 'w':
507 512 args[r'atomictemp'] = True
508 513 return self.opener(self.indexfile, **args)
509 514
510 515 def _datafp(self, mode='r'):
511 516 """file object for the revlog's data file"""
512 517 return self.opener(self.datafile, mode=mode)
513 518
514 519 @contextlib.contextmanager
515 520 def _datareadfp(self, existingfp=None):
516 521 """file object suitable to read data"""
517 522 # Use explicit file handle, if given.
518 523 if existingfp is not None:
519 524 yield existingfp
520 525
521 526 # Use a file handle being actively used for writes, if available.
522 527 # There is some danger to doing this because reads will seek the
523 528 # file. However, _writeentry() performs a SEEK_END before all writes,
524 529 # so we should be safe.
525 530 elif self._writinghandles:
526 531 if self._inline:
527 532 yield self._writinghandles[0]
528 533 else:
529 534 yield self._writinghandles[1]
530 535
531 536 # Otherwise open a new file handle.
532 537 else:
533 538 if self._inline:
534 539 func = self._indexfp
535 540 else:
536 541 func = self._datafp
537 542 with func() as fp:
538 543 yield fp
539 544
540 545 def tip(self):
541 546 return self.node(len(self.index) - 1)
542 547 def __contains__(self, rev):
543 548 return 0 <= rev < len(self)
544 549 def __len__(self):
545 550 return len(self.index)
546 551 def __iter__(self):
547 552 return iter(pycompat.xrange(len(self)))
548 553 def revs(self, start=0, stop=None):
549 554 """iterate over all rev in this revlog (from start to stop)"""
550 555 return storageutil.iterrevs(len(self), start=start, stop=stop)
551 556
552 557 @util.propertycache
553 558 def nodemap(self):
554 559 if self.index:
555 560 # populate mapping down to the initial node
556 561 node0 = self.index[0][7] # get around changelog filtering
557 562 self.rev(node0)
558 563 return self._nodecache
559 564
560 565 def hasnode(self, node):
561 566 try:
562 567 self.rev(node)
563 568 return True
564 569 except KeyError:
565 570 return False
566 571
567 572 def candelta(self, baserev, rev):
568 573 """whether two revisions (baserev, rev) can be delta-ed or not"""
569 574 # Disable delta if either rev requires a content-changing flag
570 575 # processor (ex. LFS). This is because such flag processor can alter
571 576 # the rawtext content that the delta will be based on, and two clients
572 577 # could have a same revlog node with different flags (i.e. different
573 578 # rawtext contents) and the delta could be incompatible.
574 579 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
575 580 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
576 581 return False
577 582 return True
578 583
579 584 def clearcaches(self):
580 585 self._revisioncache = None
581 586 self._chainbasecache.clear()
582 587 self._chunkcache = (0, '')
583 588 self._pcache = {}
584 589
585 590 try:
586 591 # If we are using the native C version, you are in a fun case
587 592 # where self.index, self.nodemap and self._nodecaches is the same
588 593 # object.
589 594 self._nodecache.clearcaches()
590 595 except AttributeError:
591 596 self._nodecache = {nullid: nullrev}
592 597 self._nodepos = None
593 598
594 599 def rev(self, node):
595 600 try:
596 601 return self._nodecache[node]
597 602 except TypeError:
598 603 raise
599 604 except error.RevlogError:
600 605 # parsers.c radix tree lookup failed
601 606 if node == wdirid or node in wdirfilenodeids:
602 607 raise error.WdirUnsupported
603 608 raise error.LookupError(node, self.indexfile, _('no node'))
604 609 except KeyError:
605 610 # pure python cache lookup failed
606 611 n = self._nodecache
607 612 i = self.index
608 613 p = self._nodepos
609 614 if p is None:
610 615 p = len(i) - 1
611 616 else:
612 617 assert p < len(i)
613 618 for r in pycompat.xrange(p, -1, -1):
614 619 v = i[r][7]
615 620 n[v] = r
616 621 if v == node:
617 622 self._nodepos = r - 1
618 623 return r
619 624 if node == wdirid or node in wdirfilenodeids:
620 625 raise error.WdirUnsupported
621 626 raise error.LookupError(node, self.indexfile, _('no node'))
622 627
623 628 # Accessors for index entries.
624 629
625 630 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
626 631 # are flags.
627 632 def start(self, rev):
628 633 return int(self.index[rev][0] >> 16)
629 634
630 635 def flags(self, rev):
631 636 return self.index[rev][0] & 0xFFFF
632 637
633 638 def length(self, rev):
634 639 return self.index[rev][1]
635 640
636 641 def rawsize(self, rev):
637 642 """return the length of the uncompressed text for a given revision"""
638 643 l = self.index[rev][2]
639 644 if l >= 0:
640 645 return l
641 646
642 647 t = self.rawdata(rev)
643 648 return len(t)
644 649
645 650 def size(self, rev):
646 651 """length of non-raw text (processed by a "read" flag processor)"""
647 652 # fast path: if no "read" flag processor could change the content,
648 653 # size is rawsize. note: ELLIPSIS is known to not change the content.
649 654 flags = self.flags(rev)
650 655 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
651 656 return self.rawsize(rev)
652 657
653 658 return len(self.revision(rev, raw=False))
654 659
655 660 def chainbase(self, rev):
656 661 base = self._chainbasecache.get(rev)
657 662 if base is not None:
658 663 return base
659 664
660 665 index = self.index
661 666 iterrev = rev
662 667 base = index[iterrev][3]
663 668 while base != iterrev:
664 669 iterrev = base
665 670 base = index[iterrev][3]
666 671
667 672 self._chainbasecache[rev] = base
668 673 return base
669 674
670 675 def linkrev(self, rev):
671 676 return self.index[rev][4]
672 677
673 678 def parentrevs(self, rev):
674 679 try:
675 680 entry = self.index[rev]
676 681 except IndexError:
677 682 if rev == wdirrev:
678 683 raise error.WdirUnsupported
679 684 raise
680 685
681 686 return entry[5], entry[6]
682 687
683 688 # fast parentrevs(rev) where rev isn't filtered
684 689 _uncheckedparentrevs = parentrevs
685 690
686 691 def node(self, rev):
687 692 try:
688 693 return self.index[rev][7]
689 694 except IndexError:
690 695 if rev == wdirrev:
691 696 raise error.WdirUnsupported
692 697 raise
693 698
694 699 # Derived from index values.
695 700
696 701 def end(self, rev):
697 702 return self.start(rev) + self.length(rev)
698 703
699 704 def parents(self, node):
700 705 i = self.index
701 706 d = i[self.rev(node)]
702 707 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
703 708
704 709 def chainlen(self, rev):
705 710 return self._chaininfo(rev)[0]
706 711
707 712 def _chaininfo(self, rev):
708 713 chaininfocache = self._chaininfocache
709 714 if rev in chaininfocache:
710 715 return chaininfocache[rev]
711 716 index = self.index
712 717 generaldelta = self._generaldelta
713 718 iterrev = rev
714 719 e = index[iterrev]
715 720 clen = 0
716 721 compresseddeltalen = 0
717 722 while iterrev != e[3]:
718 723 clen += 1
719 724 compresseddeltalen += e[1]
720 725 if generaldelta:
721 726 iterrev = e[3]
722 727 else:
723 728 iterrev -= 1
724 729 if iterrev in chaininfocache:
725 730 t = chaininfocache[iterrev]
726 731 clen += t[0]
727 732 compresseddeltalen += t[1]
728 733 break
729 734 e = index[iterrev]
730 735 else:
731 736 # Add text length of base since decompressing that also takes
732 737 # work. For cache hits the length is already included.
733 738 compresseddeltalen += e[1]
734 739 r = (clen, compresseddeltalen)
735 740 chaininfocache[rev] = r
736 741 return r
737 742
738 743 def _deltachain(self, rev, stoprev=None):
739 744 """Obtain the delta chain for a revision.
740 745
741 746 ``stoprev`` specifies a revision to stop at. If not specified, we
742 747 stop at the base of the chain.
743 748
744 749 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
745 750 revs in ascending order and ``stopped`` is a bool indicating whether
746 751 ``stoprev`` was hit.
747 752 """
748 753 # Try C implementation.
749 754 try:
750 755 return self.index.deltachain(rev, stoprev, self._generaldelta)
751 756 except AttributeError:
752 757 pass
753 758
754 759 chain = []
755 760
756 761 # Alias to prevent attribute lookup in tight loop.
757 762 index = self.index
758 763 generaldelta = self._generaldelta
759 764
760 765 iterrev = rev
761 766 e = index[iterrev]
762 767 while iterrev != e[3] and iterrev != stoprev:
763 768 chain.append(iterrev)
764 769 if generaldelta:
765 770 iterrev = e[3]
766 771 else:
767 772 iterrev -= 1
768 773 e = index[iterrev]
769 774
770 775 if iterrev == stoprev:
771 776 stopped = True
772 777 else:
773 778 chain.append(iterrev)
774 779 stopped = False
775 780
776 781 chain.reverse()
777 782 return chain, stopped
778 783
779 784 def ancestors(self, revs, stoprev=0, inclusive=False):
780 785 """Generate the ancestors of 'revs' in reverse revision order.
781 786 Does not generate revs lower than stoprev.
782 787
783 788 See the documentation for ancestor.lazyancestors for more details."""
784 789
785 790 # first, make sure start revisions aren't filtered
786 791 revs = list(revs)
787 792 checkrev = self.node
788 793 for r in revs:
789 794 checkrev(r)
790 795 # and we're sure ancestors aren't filtered as well
791 796
792 797 if rustancestor is not None:
793 798 lazyancestors = rustancestor.LazyAncestors
794 799 arg = self.index
795 800 elif util.safehasattr(parsers, 'rustlazyancestors'):
796 801 lazyancestors = ancestor.rustlazyancestors
797 802 arg = self.index
798 803 else:
799 804 lazyancestors = ancestor.lazyancestors
800 805 arg = self._uncheckedparentrevs
801 806 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
802 807
803 808 def descendants(self, revs):
804 809 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
805 810
806 811 def findcommonmissing(self, common=None, heads=None):
807 812 """Return a tuple of the ancestors of common and the ancestors of heads
808 813 that are not ancestors of common. In revset terminology, we return the
809 814 tuple:
810 815
811 816 ::common, (::heads) - (::common)
812 817
813 818 The list is sorted by revision number, meaning it is
814 819 topologically sorted.
815 820
816 821 'heads' and 'common' are both lists of node IDs. If heads is
817 822 not supplied, uses all of the revlog's heads. If common is not
818 823 supplied, uses nullid."""
819 824 if common is None:
820 825 common = [nullid]
821 826 if heads is None:
822 827 heads = self.heads()
823 828
824 829 common = [self.rev(n) for n in common]
825 830 heads = [self.rev(n) for n in heads]
826 831
827 832 # we want the ancestors, but inclusive
828 833 class lazyset(object):
829 834 def __init__(self, lazyvalues):
830 835 self.addedvalues = set()
831 836 self.lazyvalues = lazyvalues
832 837
833 838 def __contains__(self, value):
834 839 return value in self.addedvalues or value in self.lazyvalues
835 840
836 841 def __iter__(self):
837 842 added = self.addedvalues
838 843 for r in added:
839 844 yield r
840 845 for r in self.lazyvalues:
841 846 if not r in added:
842 847 yield r
843 848
844 849 def add(self, value):
845 850 self.addedvalues.add(value)
846 851
847 852 def update(self, values):
848 853 self.addedvalues.update(values)
849 854
850 855 has = lazyset(self.ancestors(common))
851 856 has.add(nullrev)
852 857 has.update(common)
853 858
854 859 # take all ancestors from heads that aren't in has
855 860 missing = set()
856 861 visit = collections.deque(r for r in heads if r not in has)
857 862 while visit:
858 863 r = visit.popleft()
859 864 if r in missing:
860 865 continue
861 866 else:
862 867 missing.add(r)
863 868 for p in self.parentrevs(r):
864 869 if p not in has:
865 870 visit.append(p)
866 871 missing = list(missing)
867 872 missing.sort()
868 873 return has, [self.node(miss) for miss in missing]
869 874
870 875 def incrementalmissingrevs(self, common=None):
871 876 """Return an object that can be used to incrementally compute the
872 877 revision numbers of the ancestors of arbitrary sets that are not
873 878 ancestors of common. This is an ancestor.incrementalmissingancestors
874 879 object.
875 880
876 881 'common' is a list of revision numbers. If common is not supplied, uses
877 882 nullrev.
878 883 """
879 884 if common is None:
880 885 common = [nullrev]
881 886
882 887 if rustancestor is not None:
883 888 return rustancestor.MissingAncestors(self.index, common)
884 889 return ancestor.incrementalmissingancestors(self.parentrevs, common)
885 890
886 891 def findmissingrevs(self, common=None, heads=None):
887 892 """Return the revision numbers of the ancestors of heads that
888 893 are not ancestors of common.
889 894
890 895 More specifically, return a list of revision numbers corresponding to
891 896 nodes N such that every N satisfies the following constraints:
892 897
893 898 1. N is an ancestor of some node in 'heads'
894 899 2. N is not an ancestor of any node in 'common'
895 900
896 901 The list is sorted by revision number, meaning it is
897 902 topologically sorted.
898 903
899 904 'heads' and 'common' are both lists of revision numbers. If heads is
900 905 not supplied, uses all of the revlog's heads. If common is not
901 906 supplied, uses nullid."""
902 907 if common is None:
903 908 common = [nullrev]
904 909 if heads is None:
905 910 heads = self.headrevs()
906 911
907 912 inc = self.incrementalmissingrevs(common=common)
908 913 return inc.missingancestors(heads)
909 914
910 915 def findmissing(self, common=None, heads=None):
911 916 """Return the ancestors of heads that are not ancestors of common.
912 917
913 918 More specifically, return a list of nodes N such that every N
914 919 satisfies the following constraints:
915 920
916 921 1. N is an ancestor of some node in 'heads'
917 922 2. N is not an ancestor of any node in 'common'
918 923
919 924 The list is sorted by revision number, meaning it is
920 925 topologically sorted.
921 926
922 927 'heads' and 'common' are both lists of node IDs. If heads is
923 928 not supplied, uses all of the revlog's heads. If common is not
924 929 supplied, uses nullid."""
925 930 if common is None:
926 931 common = [nullid]
927 932 if heads is None:
928 933 heads = self.heads()
929 934
930 935 common = [self.rev(n) for n in common]
931 936 heads = [self.rev(n) for n in heads]
932 937
933 938 inc = self.incrementalmissingrevs(common=common)
934 939 return [self.node(r) for r in inc.missingancestors(heads)]
935 940
936 941 def nodesbetween(self, roots=None, heads=None):
937 942 """Return a topological path from 'roots' to 'heads'.
938 943
939 944 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
940 945 topologically sorted list of all nodes N that satisfy both of
941 946 these constraints:
942 947
943 948 1. N is a descendant of some node in 'roots'
944 949 2. N is an ancestor of some node in 'heads'
945 950
946 951 Every node is considered to be both a descendant and an ancestor
947 952 of itself, so every reachable node in 'roots' and 'heads' will be
948 953 included in 'nodes'.
949 954
950 955 'outroots' is the list of reachable nodes in 'roots', i.e., the
951 956 subset of 'roots' that is returned in 'nodes'. Likewise,
952 957 'outheads' is the subset of 'heads' that is also in 'nodes'.
953 958
954 959 'roots' and 'heads' are both lists of node IDs. If 'roots' is
955 960 unspecified, uses nullid as the only root. If 'heads' is
956 961 unspecified, uses list of all of the revlog's heads."""
957 962 nonodes = ([], [], [])
958 963 if roots is not None:
959 964 roots = list(roots)
960 965 if not roots:
961 966 return nonodes
962 967 lowestrev = min([self.rev(n) for n in roots])
963 968 else:
964 969 roots = [nullid] # Everybody's a descendant of nullid
965 970 lowestrev = nullrev
966 971 if (lowestrev == nullrev) and (heads is None):
967 972 # We want _all_ the nodes!
968 973 return ([self.node(r) for r in self], [nullid], list(self.heads()))
969 974 if heads is None:
970 975 # All nodes are ancestors, so the latest ancestor is the last
971 976 # node.
972 977 highestrev = len(self) - 1
973 978 # Set ancestors to None to signal that every node is an ancestor.
974 979 ancestors = None
975 980 # Set heads to an empty dictionary for later discovery of heads
976 981 heads = {}
977 982 else:
978 983 heads = list(heads)
979 984 if not heads:
980 985 return nonodes
981 986 ancestors = set()
982 987 # Turn heads into a dictionary so we can remove 'fake' heads.
983 988 # Also, later we will be using it to filter out the heads we can't
984 989 # find from roots.
985 990 heads = dict.fromkeys(heads, False)
986 991 # Start at the top and keep marking parents until we're done.
987 992 nodestotag = set(heads)
988 993 # Remember where the top was so we can use it as a limit later.
989 994 highestrev = max([self.rev(n) for n in nodestotag])
990 995 while nodestotag:
991 996 # grab a node to tag
992 997 n = nodestotag.pop()
993 998 # Never tag nullid
994 999 if n == nullid:
995 1000 continue
996 1001 # A node's revision number represents its place in a
997 1002 # topologically sorted list of nodes.
998 1003 r = self.rev(n)
999 1004 if r >= lowestrev:
1000 1005 if n not in ancestors:
1001 1006 # If we are possibly a descendant of one of the roots
1002 1007 # and we haven't already been marked as an ancestor
1003 1008 ancestors.add(n) # Mark as ancestor
1004 1009 # Add non-nullid parents to list of nodes to tag.
1005 1010 nodestotag.update([p for p in self.parents(n) if
1006 1011 p != nullid])
1007 1012 elif n in heads: # We've seen it before, is it a fake head?
1008 1013 # So it is, real heads should not be the ancestors of
1009 1014 # any other heads.
1010 1015 heads.pop(n)
1011 1016 if not ancestors:
1012 1017 return nonodes
1013 1018 # Now that we have our set of ancestors, we want to remove any
1014 1019 # roots that are not ancestors.
1015 1020
1016 1021 # If one of the roots was nullid, everything is included anyway.
1017 1022 if lowestrev > nullrev:
1018 1023 # But, since we weren't, let's recompute the lowest rev to not
1019 1024 # include roots that aren't ancestors.
1020 1025
1021 1026 # Filter out roots that aren't ancestors of heads
1022 1027 roots = [root for root in roots if root in ancestors]
1023 1028 # Recompute the lowest revision
1024 1029 if roots:
1025 1030 lowestrev = min([self.rev(root) for root in roots])
1026 1031 else:
1027 1032 # No more roots? Return empty list
1028 1033 return nonodes
1029 1034 else:
1030 1035 # We are descending from nullid, and don't need to care about
1031 1036 # any other roots.
1032 1037 lowestrev = nullrev
1033 1038 roots = [nullid]
1034 1039 # Transform our roots list into a set.
1035 1040 descendants = set(roots)
1036 1041 # Also, keep the original roots so we can filter out roots that aren't
1037 1042 # 'real' roots (i.e. are descended from other roots).
1038 1043 roots = descendants.copy()
1039 1044 # Our topologically sorted list of output nodes.
1040 1045 orderedout = []
1041 1046 # Don't start at nullid since we don't want nullid in our output list,
1042 1047 # and if nullid shows up in descendants, empty parents will look like
1043 1048 # they're descendants.
1044 1049 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1045 1050 n = self.node(r)
1046 1051 isdescendant = False
1047 1052 if lowestrev == nullrev: # Everybody is a descendant of nullid
1048 1053 isdescendant = True
1049 1054 elif n in descendants:
1050 1055 # n is already a descendant
1051 1056 isdescendant = True
1052 1057 # This check only needs to be done here because all the roots
1053 1058 # will start being marked is descendants before the loop.
1054 1059 if n in roots:
1055 1060 # If n was a root, check if it's a 'real' root.
1056 1061 p = tuple(self.parents(n))
1057 1062 # If any of its parents are descendants, it's not a root.
1058 1063 if (p[0] in descendants) or (p[1] in descendants):
1059 1064 roots.remove(n)
1060 1065 else:
1061 1066 p = tuple(self.parents(n))
1062 1067 # A node is a descendant if either of its parents are
1063 1068 # descendants. (We seeded the dependents list with the roots
1064 1069 # up there, remember?)
1065 1070 if (p[0] in descendants) or (p[1] in descendants):
1066 1071 descendants.add(n)
1067 1072 isdescendant = True
1068 1073 if isdescendant and ((ancestors is None) or (n in ancestors)):
1069 1074 # Only include nodes that are both descendants and ancestors.
1070 1075 orderedout.append(n)
1071 1076 if (ancestors is not None) and (n in heads):
1072 1077 # We're trying to figure out which heads are reachable
1073 1078 # from roots.
1074 1079 # Mark this head as having been reached
1075 1080 heads[n] = True
1076 1081 elif ancestors is None:
1077 1082 # Otherwise, we're trying to discover the heads.
1078 1083 # Assume this is a head because if it isn't, the next step
1079 1084 # will eventually remove it.
1080 1085 heads[n] = True
1081 1086 # But, obviously its parents aren't.
1082 1087 for p in self.parents(n):
1083 1088 heads.pop(p, None)
1084 1089 heads = [head for head, flag in heads.iteritems() if flag]
1085 1090 roots = list(roots)
1086 1091 assert orderedout
1087 1092 assert roots
1088 1093 assert heads
1089 1094 return (orderedout, roots, heads)
1090 1095
1091 1096 def headrevs(self, revs=None):
1092 1097 if revs is None:
1093 1098 try:
1094 1099 return self.index.headrevs()
1095 1100 except AttributeError:
1096 1101 return self._headrevs()
1097 1102 if rustdagop is not None:
1098 1103 return rustdagop.headrevs(self.index, revs)
1099 1104 return dagop.headrevs(revs, self._uncheckedparentrevs)
1100 1105
1101 1106 def computephases(self, roots):
1102 1107 return self.index.computephasesmapsets(roots)
1103 1108
1104 1109 def _headrevs(self):
1105 1110 count = len(self)
1106 1111 if not count:
1107 1112 return [nullrev]
1108 1113 # we won't iter over filtered rev so nobody is a head at start
1109 1114 ishead = [0] * (count + 1)
1110 1115 index = self.index
1111 1116 for r in self:
1112 1117 ishead[r] = 1 # I may be an head
1113 1118 e = index[r]
1114 1119 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1115 1120 return [r for r, val in enumerate(ishead) if val]
1116 1121
1117 1122 def heads(self, start=None, stop=None):
1118 1123 """return the list of all nodes that have no children
1119 1124
1120 1125 if start is specified, only heads that are descendants of
1121 1126 start will be returned
1122 1127 if stop is specified, it will consider all the revs from stop
1123 1128 as if they had no children
1124 1129 """
1125 1130 if start is None and stop is None:
1126 1131 if not len(self):
1127 1132 return [nullid]
1128 1133 return [self.node(r) for r in self.headrevs()]
1129 1134
1130 1135 if start is None:
1131 1136 start = nullrev
1132 1137 else:
1133 1138 start = self.rev(start)
1134 1139
1135 1140 stoprevs = set(self.rev(n) for n in stop or [])
1136 1141
1137 1142 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1138 1143 stoprevs=stoprevs)
1139 1144
1140 1145 return [self.node(rev) for rev in revs]
1141 1146
1142 1147 def children(self, node):
1143 1148 """find the children of a given node"""
1144 1149 c = []
1145 1150 p = self.rev(node)
1146 1151 for r in self.revs(start=p + 1):
1147 1152 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1148 1153 if prevs:
1149 1154 for pr in prevs:
1150 1155 if pr == p:
1151 1156 c.append(self.node(r))
1152 1157 elif p == nullrev:
1153 1158 c.append(self.node(r))
1154 1159 return c
1155 1160
1156 1161 def commonancestorsheads(self, a, b):
1157 1162 """calculate all the heads of the common ancestors of nodes a and b"""
1158 1163 a, b = self.rev(a), self.rev(b)
1159 1164 ancs = self._commonancestorsheads(a, b)
1160 1165 return pycompat.maplist(self.node, ancs)
1161 1166
1162 1167 def _commonancestorsheads(self, *revs):
1163 1168 """calculate all the heads of the common ancestors of revs"""
1164 1169 try:
1165 1170 ancs = self.index.commonancestorsheads(*revs)
1166 1171 except (AttributeError, OverflowError): # C implementation failed
1167 1172 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1168 1173 return ancs
1169 1174
1170 1175 def isancestor(self, a, b):
1171 1176 """return True if node a is an ancestor of node b
1172 1177
1173 1178 A revision is considered an ancestor of itself."""
1174 1179 a, b = self.rev(a), self.rev(b)
1175 1180 return self.isancestorrev(a, b)
1176 1181
1177 1182 def isancestorrev(self, a, b):
1178 1183 """return True if revision a is an ancestor of revision b
1179 1184
1180 1185 A revision is considered an ancestor of itself.
1181 1186
1182 1187 The implementation of this is trivial but the use of
1183 1188 reachableroots is not."""
1184 1189 if a == nullrev:
1185 1190 return True
1186 1191 elif a == b:
1187 1192 return True
1188 1193 elif a > b:
1189 1194 return False
1190 1195 return bool(self.reachableroots(a, [b], [a], includepath=False))
1191 1196
1192 1197 def reachableroots(self, minroot, heads, roots, includepath=False):
1193 1198 """return (heads(::<roots> and <roots>::<heads>))
1194 1199
1195 1200 If includepath is True, return (<roots>::<heads>)."""
1196 1201 try:
1197 1202 return self.index.reachableroots2(minroot, heads, roots,
1198 1203 includepath)
1199 1204 except AttributeError:
1200 1205 return dagop._reachablerootspure(self.parentrevs,
1201 1206 minroot, roots, heads, includepath)
1202 1207
1203 1208 def ancestor(self, a, b):
1204 1209 """calculate the "best" common ancestor of nodes a and b"""
1205 1210
1206 1211 a, b = self.rev(a), self.rev(b)
1207 1212 try:
1208 1213 ancs = self.index.ancestors(a, b)
1209 1214 except (AttributeError, OverflowError):
1210 1215 ancs = ancestor.ancestors(self.parentrevs, a, b)
1211 1216 if ancs:
1212 1217 # choose a consistent winner when there's a tie
1213 1218 return min(map(self.node, ancs))
1214 1219 return nullid
1215 1220
1216 1221 def _match(self, id):
1217 1222 if isinstance(id, int):
1218 1223 # rev
1219 1224 return self.node(id)
1220 1225 if len(id) == 20:
1221 1226 # possibly a binary node
1222 1227 # odds of a binary node being all hex in ASCII are 1 in 10**25
1223 1228 try:
1224 1229 node = id
1225 1230 self.rev(node) # quick search the index
1226 1231 return node
1227 1232 except error.LookupError:
1228 1233 pass # may be partial hex id
1229 1234 try:
1230 1235 # str(rev)
1231 1236 rev = int(id)
1232 1237 if "%d" % rev != id:
1233 1238 raise ValueError
1234 1239 if rev < 0:
1235 1240 rev = len(self) + rev
1236 1241 if rev < 0 or rev >= len(self):
1237 1242 raise ValueError
1238 1243 return self.node(rev)
1239 1244 except (ValueError, OverflowError):
1240 1245 pass
1241 1246 if len(id) == 40:
1242 1247 try:
1243 1248 # a full hex nodeid?
1244 1249 node = bin(id)
1245 1250 self.rev(node)
1246 1251 return node
1247 1252 except (TypeError, error.LookupError):
1248 1253 pass
1249 1254
1250 1255 def _partialmatch(self, id):
1251 1256 # we don't care wdirfilenodeids as they should be always full hash
1252 1257 maybewdir = wdirhex.startswith(id)
1253 1258 try:
1254 1259 partial = self.index.partialmatch(id)
1255 1260 if partial and self.hasnode(partial):
1256 1261 if maybewdir:
1257 1262 # single 'ff...' match in radix tree, ambiguous with wdir
1258 1263 raise error.RevlogError
1259 1264 return partial
1260 1265 if maybewdir:
1261 1266 # no 'ff...' match in radix tree, wdir identified
1262 1267 raise error.WdirUnsupported
1263 1268 return None
1264 1269 except error.RevlogError:
1265 1270 # parsers.c radix tree lookup gave multiple matches
1266 1271 # fast path: for unfiltered changelog, radix tree is accurate
1267 1272 if not getattr(self, 'filteredrevs', None):
1268 1273 raise error.AmbiguousPrefixLookupError(
1269 1274 id, self.indexfile, _('ambiguous identifier'))
1270 1275 # fall through to slow path that filters hidden revisions
1271 1276 except (AttributeError, ValueError):
1272 1277 # we are pure python, or key was too short to search radix tree
1273 1278 pass
1274 1279
1275 1280 if id in self._pcache:
1276 1281 return self._pcache[id]
1277 1282
1278 1283 if len(id) <= 40:
1279 1284 try:
1280 1285 # hex(node)[:...]
1281 1286 l = len(id) // 2 # grab an even number of digits
1282 1287 prefix = bin(id[:l * 2])
1283 1288 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1284 1289 nl = [n for n in nl if hex(n).startswith(id) and
1285 1290 self.hasnode(n)]
1286 1291 if nullhex.startswith(id):
1287 1292 nl.append(nullid)
1288 1293 if len(nl) > 0:
1289 1294 if len(nl) == 1 and not maybewdir:
1290 1295 self._pcache[id] = nl[0]
1291 1296 return nl[0]
1292 1297 raise error.AmbiguousPrefixLookupError(
1293 1298 id, self.indexfile, _('ambiguous identifier'))
1294 1299 if maybewdir:
1295 1300 raise error.WdirUnsupported
1296 1301 return None
1297 1302 except TypeError:
1298 1303 pass
1299 1304
1300 1305 def lookup(self, id):
1301 1306 """locate a node based on:
1302 1307 - revision number or str(revision number)
1303 1308 - nodeid or subset of hex nodeid
1304 1309 """
1305 1310 n = self._match(id)
1306 1311 if n is not None:
1307 1312 return n
1308 1313 n = self._partialmatch(id)
1309 1314 if n:
1310 1315 return n
1311 1316
1312 1317 raise error.LookupError(id, self.indexfile, _('no match found'))
1313 1318
1314 1319 def shortest(self, node, minlength=1):
1315 1320 """Find the shortest unambiguous prefix that matches node."""
1316 1321 def isvalid(prefix):
1317 1322 try:
1318 1323 matchednode = self._partialmatch(prefix)
1319 1324 except error.AmbiguousPrefixLookupError:
1320 1325 return False
1321 1326 except error.WdirUnsupported:
1322 1327 # single 'ff...' match
1323 1328 return True
1324 1329 if matchednode is None:
1325 1330 raise error.LookupError(node, self.indexfile, _('no node'))
1326 1331 return True
1327 1332
1328 1333 def maybewdir(prefix):
1329 1334 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1330 1335
1331 1336 hexnode = hex(node)
1332 1337
1333 1338 def disambiguate(hexnode, minlength):
1334 1339 """Disambiguate against wdirid."""
1335 1340 for length in range(minlength, 41):
1336 1341 prefix = hexnode[:length]
1337 1342 if not maybewdir(prefix):
1338 1343 return prefix
1339 1344
1340 1345 if not getattr(self, 'filteredrevs', None):
1341 1346 try:
1342 1347 length = max(self.index.shortest(node), minlength)
1343 1348 return disambiguate(hexnode, length)
1344 1349 except error.RevlogError:
1345 1350 if node != wdirid:
1346 1351 raise error.LookupError(node, self.indexfile, _('no node'))
1347 1352 except AttributeError:
1348 1353 # Fall through to pure code
1349 1354 pass
1350 1355
1351 1356 if node == wdirid:
1352 1357 for length in range(minlength, 41):
1353 1358 prefix = hexnode[:length]
1354 1359 if isvalid(prefix):
1355 1360 return prefix
1356 1361
1357 1362 for length in range(minlength, 41):
1358 1363 prefix = hexnode[:length]
1359 1364 if isvalid(prefix):
1360 1365 return disambiguate(hexnode, length)
1361 1366
1362 1367 def cmp(self, node, text):
1363 1368 """compare text with a given file revision
1364 1369
1365 1370 returns True if text is different than what is stored.
1366 1371 """
1367 1372 p1, p2 = self.parents(node)
1368 1373 return storageutil.hashrevisionsha1(text, p1, p2) != node
1369 1374
1370 1375 def _cachesegment(self, offset, data):
1371 1376 """Add a segment to the revlog cache.
1372 1377
1373 1378 Accepts an absolute offset and the data that is at that location.
1374 1379 """
1375 1380 o, d = self._chunkcache
1376 1381 # try to add to existing cache
1377 1382 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1378 1383 self._chunkcache = o, d + data
1379 1384 else:
1380 1385 self._chunkcache = offset, data
1381 1386
1382 1387 def _readsegment(self, offset, length, df=None):
1383 1388 """Load a segment of raw data from the revlog.
1384 1389
1385 1390 Accepts an absolute offset, length to read, and an optional existing
1386 1391 file handle to read from.
1387 1392
1388 1393 If an existing file handle is passed, it will be seeked and the
1389 1394 original seek position will NOT be restored.
1390 1395
1391 1396 Returns a str or buffer of raw byte data.
1392 1397
1393 1398 Raises if the requested number of bytes could not be read.
1394 1399 """
1395 1400 # Cache data both forward and backward around the requested
1396 1401 # data, in a fixed size window. This helps speed up operations
1397 1402 # involving reading the revlog backwards.
1398 1403 cachesize = self._chunkcachesize
1399 1404 realoffset = offset & ~(cachesize - 1)
1400 1405 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1401 1406 - realoffset)
1402 1407 with self._datareadfp(df) as df:
1403 1408 df.seek(realoffset)
1404 1409 d = df.read(reallength)
1405 1410
1406 1411 self._cachesegment(realoffset, d)
1407 1412 if offset != realoffset or reallength != length:
1408 1413 startoffset = offset - realoffset
1409 1414 if len(d) - startoffset < length:
1410 1415 raise error.RevlogError(
1411 1416 _('partial read of revlog %s; expected %d bytes from '
1412 1417 'offset %d, got %d') %
1413 1418 (self.indexfile if self._inline else self.datafile,
1414 1419 length, realoffset, len(d) - startoffset))
1415 1420
1416 1421 return util.buffer(d, startoffset, length)
1417 1422
1418 1423 if len(d) < length:
1419 1424 raise error.RevlogError(
1420 1425 _('partial read of revlog %s; expected %d bytes from offset '
1421 1426 '%d, got %d') %
1422 1427 (self.indexfile if self._inline else self.datafile,
1423 1428 length, offset, len(d)))
1424 1429
1425 1430 return d
1426 1431
1427 1432 def _getsegment(self, offset, length, df=None):
1428 1433 """Obtain a segment of raw data from the revlog.
1429 1434
1430 1435 Accepts an absolute offset, length of bytes to obtain, and an
1431 1436 optional file handle to the already-opened revlog. If the file
1432 1437 handle is used, it's original seek position will not be preserved.
1433 1438
1434 1439 Requests for data may be returned from a cache.
1435 1440
1436 1441 Returns a str or a buffer instance of raw byte data.
1437 1442 """
1438 1443 o, d = self._chunkcache
1439 1444 l = len(d)
1440 1445
1441 1446 # is it in the cache?
1442 1447 cachestart = offset - o
1443 1448 cacheend = cachestart + length
1444 1449 if cachestart >= 0 and cacheend <= l:
1445 1450 if cachestart == 0 and cacheend == l:
1446 1451 return d # avoid a copy
1447 1452 return util.buffer(d, cachestart, cacheend - cachestart)
1448 1453
1449 1454 return self._readsegment(offset, length, df=df)
1450 1455
1451 1456 def _getsegmentforrevs(self, startrev, endrev, df=None):
1452 1457 """Obtain a segment of raw data corresponding to a range of revisions.
1453 1458
1454 1459 Accepts the start and end revisions and an optional already-open
1455 1460 file handle to be used for reading. If the file handle is read, its
1456 1461 seek position will not be preserved.
1457 1462
1458 1463 Requests for data may be satisfied by a cache.
1459 1464
1460 1465 Returns a 2-tuple of (offset, data) for the requested range of
1461 1466 revisions. Offset is the integer offset from the beginning of the
1462 1467 revlog and data is a str or buffer of the raw byte data.
1463 1468
1464 1469 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1465 1470 to determine where each revision's data begins and ends.
1466 1471 """
1467 1472 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1468 1473 # (functions are expensive).
1469 1474 index = self.index
1470 1475 istart = index[startrev]
1471 1476 start = int(istart[0] >> 16)
1472 1477 if startrev == endrev:
1473 1478 end = start + istart[1]
1474 1479 else:
1475 1480 iend = index[endrev]
1476 1481 end = int(iend[0] >> 16) + iend[1]
1477 1482
1478 1483 if self._inline:
1479 1484 start += (startrev + 1) * self._io.size
1480 1485 end += (endrev + 1) * self._io.size
1481 1486 length = end - start
1482 1487
1483 1488 return start, self._getsegment(start, length, df=df)
1484 1489
1485 1490 def _chunk(self, rev, df=None):
1486 1491 """Obtain a single decompressed chunk for a revision.
1487 1492
1488 1493 Accepts an integer revision and an optional already-open file handle
1489 1494 to be used for reading. If used, the seek position of the file will not
1490 1495 be preserved.
1491 1496
1492 1497 Returns a str holding uncompressed data for the requested revision.
1493 1498 """
1494 1499 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1495 1500
1496 1501 def _chunks(self, revs, df=None, targetsize=None):
1497 1502 """Obtain decompressed chunks for the specified revisions.
1498 1503
1499 1504 Accepts an iterable of numeric revisions that are assumed to be in
1500 1505 ascending order. Also accepts an optional already-open file handle
1501 1506 to be used for reading. If used, the seek position of the file will
1502 1507 not be preserved.
1503 1508
1504 1509 This function is similar to calling ``self._chunk()`` multiple times,
1505 1510 but is faster.
1506 1511
1507 1512 Returns a list with decompressed data for each requested revision.
1508 1513 """
1509 1514 if not revs:
1510 1515 return []
1511 1516 start = self.start
1512 1517 length = self.length
1513 1518 inline = self._inline
1514 1519 iosize = self._io.size
1515 1520 buffer = util.buffer
1516 1521
1517 1522 l = []
1518 1523 ladd = l.append
1519 1524
1520 1525 if not self._withsparseread:
1521 1526 slicedchunks = (revs,)
1522 1527 else:
1523 1528 slicedchunks = deltautil.slicechunk(self, revs,
1524 1529 targetsize=targetsize)
1525 1530
1526 1531 for revschunk in slicedchunks:
1527 1532 firstrev = revschunk[0]
1528 1533 # Skip trailing revisions with empty diff
1529 1534 for lastrev in revschunk[::-1]:
1530 1535 if length(lastrev) != 0:
1531 1536 break
1532 1537
1533 1538 try:
1534 1539 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1535 1540 except OverflowError:
1536 1541 # issue4215 - we can't cache a run of chunks greater than
1537 1542 # 2G on Windows
1538 1543 return [self._chunk(rev, df=df) for rev in revschunk]
1539 1544
1540 1545 decomp = self.decompress
1541 1546 for rev in revschunk:
1542 1547 chunkstart = start(rev)
1543 1548 if inline:
1544 1549 chunkstart += (rev + 1) * iosize
1545 1550 chunklength = length(rev)
1546 1551 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1547 1552
1548 1553 return l
1549 1554
1550 1555 def _chunkclear(self):
1551 1556 """Clear the raw chunk cache."""
1552 1557 self._chunkcache = (0, '')
1553 1558
1554 1559 def deltaparent(self, rev):
1555 1560 """return deltaparent of the given revision"""
1556 1561 base = self.index[rev][3]
1557 1562 if base == rev:
1558 1563 return nullrev
1559 1564 elif self._generaldelta:
1560 1565 return base
1561 1566 else:
1562 1567 return rev - 1
1563 1568
1564 1569 def issnapshot(self, rev):
1565 1570 """tells whether rev is a snapshot
1566 1571 """
1567 1572 if not self._sparserevlog:
1568 1573 return self.deltaparent(rev) == nullrev
1569 1574 elif util.safehasattr(self.index, 'issnapshot'):
1570 1575 # directly assign the method to cache the testing and access
1571 1576 self.issnapshot = self.index.issnapshot
1572 1577 return self.issnapshot(rev)
1573 1578 if rev == nullrev:
1574 1579 return True
1575 1580 entry = self.index[rev]
1576 1581 base = entry[3]
1577 1582 if base == rev:
1578 1583 return True
1579 1584 if base == nullrev:
1580 1585 return True
1581 1586 p1 = entry[5]
1582 1587 p2 = entry[6]
1583 1588 if base == p1 or base == p2:
1584 1589 return False
1585 1590 return self.issnapshot(base)
1586 1591
1587 1592 def snapshotdepth(self, rev):
1588 1593 """number of snapshot in the chain before this one"""
1589 1594 if not self.issnapshot(rev):
1590 1595 raise error.ProgrammingError('revision %d not a snapshot')
1591 1596 return len(self._deltachain(rev)[0]) - 1
1592 1597
1593 1598 def revdiff(self, rev1, rev2):
1594 1599 """return or calculate a delta between two revisions
1595 1600
1596 1601 The delta calculated is in binary form and is intended to be written to
1597 1602 revlog data directly. So this function needs raw revision data.
1598 1603 """
1599 1604 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1600 1605 return bytes(self._chunk(rev2))
1601 1606
1602 1607 return mdiff.textdiff(self.rawdata(rev1),
1603 1608 self.rawdata(rev2))
1604 1609
1605 1610 def _processflags(self, text, flags, operation, raw=False):
1606 1611 """deprecated entry point to access flag processors"""
1607 1612 msg = ('_processflag(...) use the specialized variant')
1608 1613 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1609 1614 if raw:
1610 1615 return text, flagutil.processflagsraw(self, text, flags)
1611 1616 elif operation == 'read':
1612 1617 return flagutil.processflagsread(self, text, flags)
1613 1618 else: # write operation
1614 1619 return flagutil.processflagswrite(self, text, flags)
1615 1620
1616 1621 def revision(self, nodeorrev, _df=None, raw=False):
1617 1622 """return an uncompressed revision of a given node or revision
1618 1623 number.
1619 1624
1620 1625 _df - an existing file handle to read from. (internal-only)
1621 1626 raw - an optional argument specifying if the revision data is to be
1622 1627 treated as raw data when applying flag transforms. 'raw' should be set
1623 1628 to True when generating changegroups or in debug commands.
1624 1629 """
1625 1630 if raw:
1626 1631 msg = ('revlog.revision(..., raw=True) is deprecated, '
1627 1632 'use revlog.rawdata(...)')
1628 1633 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1629 1634 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1630 1635
1631 1636 def sidedata(self, nodeorrev, _df=None):
1632 1637 """a map of extra data related to the changeset but not part of the hash
1633 1638
1634 1639 This function currently return a dictionary. However, more advanced
1635 1640 mapping object will likely be used in the future for a more
1636 1641 efficient/lazy code.
1637 1642 """
1638 1643 return self._revisiondata(nodeorrev, _df)[1]
1639 1644
1640 1645 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1641 1646 # deal with <nodeorrev> argument type
1642 1647 if isinstance(nodeorrev, int):
1643 1648 rev = nodeorrev
1644 1649 node = self.node(rev)
1645 1650 else:
1646 1651 node = nodeorrev
1647 1652 rev = None
1648 1653
1649 1654 # fast path the special `nullid` rev
1650 1655 if node == nullid:
1651 1656 return "", {}
1652 1657
1653 1658 # The text as stored inside the revlog. Might be the revision or might
1654 1659 # need to be processed to retrieve the revision.
1655 1660 rawtext = None
1656 1661
1657 1662 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1658 1663
1659 1664 if raw and validated:
1660 1665 # if we don't want to process the raw text and that raw
1661 1666 # text is cached, we can exit early.
1662 1667 return rawtext, {}
1663 1668 if rev is None:
1664 1669 rev = self.rev(node)
1665 1670 # the revlog's flag for this revision
1666 1671 # (usually alter its state or content)
1667 1672 flags = self.flags(rev)
1668 1673
1669 1674 if validated and flags == REVIDX_DEFAULT_FLAGS:
1670 1675 # no extra flags set, no flag processor runs, text = rawtext
1671 1676 return rawtext, {}
1672 1677
1673 1678 sidedata = {}
1674 1679 if raw:
1675 1680 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1676 1681 text = rawtext
1677 1682 else:
1678 1683 try:
1679 1684 r = flagutil.processflagsread(self, rawtext, flags)
1680 1685 except error.SidedataHashError as exc:
1681 1686 msg = _("integrity check failed on %s:%s sidedata key %d")
1682 1687 msg %= (self.indexfile, pycompat.bytestr(rev),
1683 1688 exc.sidedatakey)
1684 1689 raise error.RevlogError(msg)
1685 1690 text, validatehash, sidedata = r
1686 1691 if validatehash:
1687 1692 self.checkhash(text, node, rev=rev)
1688 1693 if not validated:
1689 1694 self._revisioncache = (node, rev, rawtext)
1690 1695
1691 1696 return text, sidedata
1692 1697
1693 1698 def _rawtext(self, node, rev, _df=None):
1694 1699 """return the possibly unvalidated rawtext for a revision
1695 1700
1696 1701 returns (rev, rawtext, validated)
1697 1702 """
1698 1703
1699 1704 # revision in the cache (could be useful to apply delta)
1700 1705 cachedrev = None
1701 1706 # An intermediate text to apply deltas to
1702 1707 basetext = None
1703 1708
1704 1709 # Check if we have the entry in cache
1705 1710 # The cache entry looks like (node, rev, rawtext)
1706 1711 if self._revisioncache:
1707 1712 if self._revisioncache[0] == node:
1708 1713 return (rev, self._revisioncache[2], True)
1709 1714 cachedrev = self._revisioncache[1]
1710 1715
1711 1716 if rev is None:
1712 1717 rev = self.rev(node)
1713 1718
1714 1719 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1715 1720 if stopped:
1716 1721 basetext = self._revisioncache[2]
1717 1722
1718 1723 # drop cache to save memory, the caller is expected to
1719 1724 # update self._revisioncache after validating the text
1720 1725 self._revisioncache = None
1721 1726
1722 1727 targetsize = None
1723 1728 rawsize = self.index[rev][2]
1724 1729 if 0 <= rawsize:
1725 1730 targetsize = 4 * rawsize
1726 1731
1727 1732 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1728 1733 if basetext is None:
1729 1734 basetext = bytes(bins[0])
1730 1735 bins = bins[1:]
1731 1736
1732 1737 rawtext = mdiff.patches(basetext, bins)
1733 1738 del basetext # let us have a chance to free memory early
1734 1739 return (rev, rawtext, False)
1735 1740
1736 1741 def rawdata(self, nodeorrev, _df=None):
1737 1742 """return an uncompressed raw data of a given node or revision number.
1738 1743
1739 1744 _df - an existing file handle to read from. (internal-only)
1740 1745 """
1741 1746 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1742 1747
1743 1748 def hash(self, text, p1, p2):
1744 1749 """Compute a node hash.
1745 1750
1746 1751 Available as a function so that subclasses can replace the hash
1747 1752 as needed.
1748 1753 """
1749 1754 return storageutil.hashrevisionsha1(text, p1, p2)
1750 1755
1751 1756 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1752 1757 """Check node hash integrity.
1753 1758
1754 1759 Available as a function so that subclasses can extend hash mismatch
1755 1760 behaviors as needed.
1756 1761 """
1757 1762 try:
1758 1763 if p1 is None and p2 is None:
1759 1764 p1, p2 = self.parents(node)
1760 1765 if node != self.hash(text, p1, p2):
1761 1766 # Clear the revision cache on hash failure. The revision cache
1762 1767 # only stores the raw revision and clearing the cache does have
1763 1768 # the side-effect that we won't have a cache hit when the raw
1764 1769 # revision data is accessed. But this case should be rare and
1765 1770 # it is extra work to teach the cache about the hash
1766 1771 # verification state.
1767 1772 if self._revisioncache and self._revisioncache[0] == node:
1768 1773 self._revisioncache = None
1769 1774
1770 1775 revornode = rev
1771 1776 if revornode is None:
1772 1777 revornode = templatefilters.short(hex(node))
1773 1778 raise error.RevlogError(_("integrity check failed on %s:%s")
1774 1779 % (self.indexfile, pycompat.bytestr(revornode)))
1775 1780 except error.RevlogError:
1776 1781 if self._censorable and storageutil.iscensoredtext(text):
1777 1782 raise error.CensoredNodeError(self.indexfile, node, text)
1778 1783 raise
1779 1784
1780 1785 def _enforceinlinesize(self, tr, fp=None):
1781 1786 """Check if the revlog is too big for inline and convert if so.
1782 1787
1783 1788 This should be called after revisions are added to the revlog. If the
1784 1789 revlog has grown too large to be an inline revlog, it will convert it
1785 1790 to use multiple index and data files.
1786 1791 """
1787 1792 tiprev = len(self) - 1
1788 1793 if (not self._inline or
1789 1794 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1790 1795 return
1791 1796
1792 1797 trinfo = tr.find(self.indexfile)
1793 1798 if trinfo is None:
1794 1799 raise error.RevlogError(_("%s not found in the transaction")
1795 1800 % self.indexfile)
1796 1801
1797 1802 trindex = trinfo[2]
1798 1803 if trindex is not None:
1799 1804 dataoff = self.start(trindex)
1800 1805 else:
1801 1806 # revlog was stripped at start of transaction, use all leftover data
1802 1807 trindex = len(self) - 1
1803 1808 dataoff = self.end(tiprev)
1804 1809
1805 1810 tr.add(self.datafile, dataoff)
1806 1811
1807 1812 if fp:
1808 1813 fp.flush()
1809 1814 fp.close()
1810 1815 # We can't use the cached file handle after close(). So prevent
1811 1816 # its usage.
1812 1817 self._writinghandles = None
1813 1818
1814 1819 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1815 1820 for r in self:
1816 1821 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1817 1822
1818 1823 with self._indexfp('w') as fp:
1819 1824 self.version &= ~FLAG_INLINE_DATA
1820 1825 self._inline = False
1821 1826 io = self._io
1822 1827 for i in self:
1823 1828 e = io.packentry(self.index[i], self.node, self.version, i)
1824 1829 fp.write(e)
1825 1830
1826 1831 # the temp file replace the real index when we exit the context
1827 1832 # manager
1828 1833
1829 1834 tr.replace(self.indexfile, trindex * self._io.size)
1830 1835 self._chunkclear()
1831 1836
1832 1837 def _nodeduplicatecallback(self, transaction, node):
1833 1838 """called when trying to add a node already stored.
1834 1839 """
1835 1840
1836 1841 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1837 1842 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1838 1843 sidedata=None):
1839 1844 """add a revision to the log
1840 1845
1841 1846 text - the revision data to add
1842 1847 transaction - the transaction object used for rollback
1843 1848 link - the linkrev data to add
1844 1849 p1, p2 - the parent nodeids of the revision
1845 1850 cachedelta - an optional precomputed delta
1846 1851 node - nodeid of revision; typically node is not specified, and it is
1847 1852 computed by default as hash(text, p1, p2), however subclasses might
1848 1853 use different hashing method (and override checkhash() in such case)
1849 1854 flags - the known flags to set on the revision
1850 1855 deltacomputer - an optional deltacomputer instance shared between
1851 1856 multiple calls
1852 1857 """
1853 1858 if link == nullrev:
1854 1859 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1855 1860 % self.indexfile)
1856 1861
1857 1862 if sidedata is None:
1858 1863 sidedata = {}
1859 1864 elif not self.hassidedata:
1860 1865 raise error.ProgrammingError(
1861 1866 _("trying to add sidedata to a revlog who don't support them")
1862 1867 )
1863 1868
1864 1869 if flags:
1865 1870 node = node or self.hash(text, p1, p2)
1866 1871
1867 1872 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1868 1873 sidedata=sidedata)
1869 1874
1870 1875 # If the flag processor modifies the revision data, ignore any provided
1871 1876 # cachedelta.
1872 1877 if rawtext != text:
1873 1878 cachedelta = None
1874 1879
1875 1880 if len(rawtext) > _maxentrysize:
1876 1881 raise error.RevlogError(
1877 1882 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1878 1883 % (self.indexfile, len(rawtext)))
1879 1884
1880 1885 node = node or self.hash(rawtext, p1, p2)
1881 1886 if node in self.nodemap:
1882 1887 return node
1883 1888
1884 1889 if validatehash:
1885 1890 self.checkhash(rawtext, node, p1=p1, p2=p2)
1886 1891
1887 1892 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1888 1893 flags, cachedelta=cachedelta,
1889 1894 deltacomputer=deltacomputer)
1890 1895
1891 1896 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1892 1897 cachedelta=None, deltacomputer=None):
1893 1898 """add a raw revision with known flags, node and parents
1894 1899 useful when reusing a revision not stored in this revlog (ex: received
1895 1900 over wire, or read from an external bundle).
1896 1901 """
1897 1902 dfh = None
1898 1903 if not self._inline:
1899 1904 dfh = self._datafp("a+")
1900 1905 ifh = self._indexfp("a+")
1901 1906 try:
1902 1907 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1903 1908 flags, cachedelta, ifh, dfh,
1904 1909 deltacomputer=deltacomputer)
1905 1910 finally:
1906 1911 if dfh:
1907 1912 dfh.close()
1908 1913 ifh.close()
1909 1914
1910 1915 def compress(self, data):
1911 1916 """Generate a possibly-compressed representation of data."""
1912 1917 if not data:
1913 1918 return '', data
1914 1919
1915 1920 compressed = self._compressor.compress(data)
1916 1921
1917 1922 if compressed:
1918 1923 # The revlog compressor added the header in the returned data.
1919 1924 return '', compressed
1920 1925
1921 1926 if data[0:1] == '\0':
1922 1927 return '', data
1923 1928 return 'u', data
1924 1929
1925 1930 def decompress(self, data):
1926 1931 """Decompress a revlog chunk.
1927 1932
1928 1933 The chunk is expected to begin with a header identifying the
1929 1934 format type so it can be routed to an appropriate decompressor.
1930 1935 """
1931 1936 if not data:
1932 1937 return data
1933 1938
1934 1939 # Revlogs are read much more frequently than they are written and many
1935 1940 # chunks only take microseconds to decompress, so performance is
1936 1941 # important here.
1937 1942 #
1938 1943 # We can make a few assumptions about revlogs:
1939 1944 #
1940 1945 # 1) the majority of chunks will be compressed (as opposed to inline
1941 1946 # raw data).
1942 1947 # 2) decompressing *any* data will likely by at least 10x slower than
1943 1948 # returning raw inline data.
1944 1949 # 3) we want to prioritize common and officially supported compression
1945 1950 # engines
1946 1951 #
1947 1952 # It follows that we want to optimize for "decompress compressed data
1948 1953 # when encoded with common and officially supported compression engines"
1949 1954 # case over "raw data" and "data encoded by less common or non-official
1950 1955 # compression engines." That is why we have the inline lookup first
1951 1956 # followed by the compengines lookup.
1952 1957 #
1953 1958 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1954 1959 # compressed chunks. And this matters for changelog and manifest reads.
1955 1960 t = data[0:1]
1956 1961
1957 1962 if t == 'x':
1958 1963 try:
1959 1964 return _zlibdecompress(data)
1960 1965 except zlib.error as e:
1961 1966 raise error.RevlogError(_('revlog decompress error: %s') %
1962 1967 stringutil.forcebytestr(e))
1963 1968 # '\0' is more common than 'u' so it goes first.
1964 1969 elif t == '\0':
1965 1970 return data
1966 1971 elif t == 'u':
1967 1972 return util.buffer(data, 1)
1968 1973
1969 1974 try:
1970 1975 compressor = self._decompressors[t]
1971 1976 except KeyError:
1972 1977 try:
1973 1978 engine = util.compengines.forrevlogheader(t)
1974 1979 compressor = engine.revlogcompressor(self._compengineopts)
1975 1980 self._decompressors[t] = compressor
1976 1981 except KeyError:
1977 1982 raise error.RevlogError(_('unknown compression type %r') % t)
1978 1983
1979 1984 return compressor.decompress(data)
1980 1985
1981 1986 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1982 1987 cachedelta, ifh, dfh, alwayscache=False,
1983 1988 deltacomputer=None):
1984 1989 """internal function to add revisions to the log
1985 1990
1986 1991 see addrevision for argument descriptions.
1987 1992
1988 1993 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1989 1994
1990 1995 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1991 1996 be used.
1992 1997
1993 1998 invariants:
1994 1999 - rawtext is optional (can be None); if not set, cachedelta must be set.
1995 2000 if both are set, they must correspond to each other.
1996 2001 """
1997 2002 if node == nullid:
1998 2003 raise error.RevlogError(_("%s: attempt to add null revision") %
1999 2004 self.indexfile)
2000 2005 if node == wdirid or node in wdirfilenodeids:
2001 2006 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2002 2007 self.indexfile)
2003 2008
2004 2009 if self._inline:
2005 2010 fh = ifh
2006 2011 else:
2007 2012 fh = dfh
2008 2013
2009 2014 btext = [rawtext]
2010 2015
2011 2016 curr = len(self)
2012 2017 prev = curr - 1
2013 2018 offset = self.end(prev)
2014 2019 p1r, p2r = self.rev(p1), self.rev(p2)
2015 2020
2016 2021 # full versions are inserted when the needed deltas
2017 2022 # become comparable to the uncompressed text
2018 2023 if rawtext is None:
2019 2024 # need rawtext size, before changed by flag processors, which is
2020 2025 # the non-raw size. use revlog explicitly to avoid filelog's extra
2021 2026 # logic that might remove metadata size.
2022 2027 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2023 2028 cachedelta[1])
2024 2029 else:
2025 2030 textlen = len(rawtext)
2026 2031
2027 2032 if deltacomputer is None:
2028 2033 deltacomputer = deltautil.deltacomputer(self)
2029 2034
2030 2035 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2031 2036
2032 2037 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2033 2038
2034 2039 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2035 2040 deltainfo.base, link, p1r, p2r, node)
2036 2041 self.index.append(e)
2037 2042 self.nodemap[node] = curr
2038 2043
2039 2044 # Reset the pure node cache start lookup offset to account for new
2040 2045 # revision.
2041 2046 if self._nodepos is not None:
2042 2047 self._nodepos = curr
2043 2048
2044 2049 entry = self._io.packentry(e, self.node, self.version, curr)
2045 2050 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2046 2051 link, offset)
2047 2052
2048 2053 rawtext = btext[0]
2049 2054
2050 2055 if alwayscache and rawtext is None:
2051 2056 rawtext = deltacomputer.buildtext(revinfo, fh)
2052 2057
2053 2058 if type(rawtext) == bytes: # only accept immutable objects
2054 2059 self._revisioncache = (node, curr, rawtext)
2055 2060 self._chainbasecache[curr] = deltainfo.chainbase
2056 2061 return node
2057 2062
2058 2063 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2059 2064 # Files opened in a+ mode have inconsistent behavior on various
2060 2065 # platforms. Windows requires that a file positioning call be made
2061 2066 # when the file handle transitions between reads and writes. See
2062 2067 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2063 2068 # platforms, Python or the platform itself can be buggy. Some versions
2064 2069 # of Solaris have been observed to not append at the end of the file
2065 2070 # if the file was seeked to before the end. See issue4943 for more.
2066 2071 #
2067 2072 # We work around this issue by inserting a seek() before writing.
2068 2073 # Note: This is likely not necessary on Python 3. However, because
2069 2074 # the file handle is reused for reads and may be seeked there, we need
2070 2075 # to be careful before changing this.
2071 2076 ifh.seek(0, os.SEEK_END)
2072 2077 if dfh:
2073 2078 dfh.seek(0, os.SEEK_END)
2074 2079
2075 2080 curr = len(self) - 1
2076 2081 if not self._inline:
2077 2082 transaction.add(self.datafile, offset)
2078 2083 transaction.add(self.indexfile, curr * len(entry))
2079 2084 if data[0]:
2080 2085 dfh.write(data[0])
2081 2086 dfh.write(data[1])
2082 2087 ifh.write(entry)
2083 2088 else:
2084 2089 offset += curr * self._io.size
2085 2090 transaction.add(self.indexfile, offset, curr)
2086 2091 ifh.write(entry)
2087 2092 ifh.write(data[0])
2088 2093 ifh.write(data[1])
2089 2094 self._enforceinlinesize(transaction, ifh)
2090 2095
2091 2096 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2092 2097 """
2093 2098 add a delta group
2094 2099
2095 2100 given a set of deltas, add them to the revision log. the
2096 2101 first delta is against its parent, which should be in our
2097 2102 log, the rest are against the previous delta.
2098 2103
2099 2104 If ``addrevisioncb`` is defined, it will be called with arguments of
2100 2105 this revlog and the node that was added.
2101 2106 """
2102 2107
2103 2108 if self._writinghandles:
2104 2109 raise error.ProgrammingError('cannot nest addgroup() calls')
2105 2110
2106 2111 nodes = []
2107 2112
2108 2113 r = len(self)
2109 2114 end = 0
2110 2115 if r:
2111 2116 end = self.end(r - 1)
2112 2117 ifh = self._indexfp("a+")
2113 2118 isize = r * self._io.size
2114 2119 if self._inline:
2115 2120 transaction.add(self.indexfile, end + isize, r)
2116 2121 dfh = None
2117 2122 else:
2118 2123 transaction.add(self.indexfile, isize, r)
2119 2124 transaction.add(self.datafile, end)
2120 2125 dfh = self._datafp("a+")
2121 2126 def flush():
2122 2127 if dfh:
2123 2128 dfh.flush()
2124 2129 ifh.flush()
2125 2130
2126 2131 self._writinghandles = (ifh, dfh)
2127 2132
2128 2133 try:
2129 2134 deltacomputer = deltautil.deltacomputer(self)
2130 2135 # loop through our set of deltas
2131 2136 for data in deltas:
2132 2137 node, p1, p2, linknode, deltabase, delta, flags = data
2133 2138 link = linkmapper(linknode)
2134 2139 flags = flags or REVIDX_DEFAULT_FLAGS
2135 2140
2136 2141 nodes.append(node)
2137 2142
2138 2143 if node in self.nodemap:
2139 2144 self._nodeduplicatecallback(transaction, node)
2140 2145 # this can happen if two branches make the same change
2141 2146 continue
2142 2147
2143 2148 for p in (p1, p2):
2144 2149 if p not in self.nodemap:
2145 2150 raise error.LookupError(p, self.indexfile,
2146 2151 _('unknown parent'))
2147 2152
2148 2153 if deltabase not in self.nodemap:
2149 2154 raise error.LookupError(deltabase, self.indexfile,
2150 2155 _('unknown delta base'))
2151 2156
2152 2157 baserev = self.rev(deltabase)
2153 2158
2154 2159 if baserev != nullrev and self.iscensored(baserev):
2155 2160 # if base is censored, delta must be full replacement in a
2156 2161 # single patch operation
2157 2162 hlen = struct.calcsize(">lll")
2158 2163 oldlen = self.rawsize(baserev)
2159 2164 newlen = len(delta) - hlen
2160 2165 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2161 2166 raise error.CensoredBaseError(self.indexfile,
2162 2167 self.node(baserev))
2163 2168
2164 2169 if not flags and self._peek_iscensored(baserev, delta, flush):
2165 2170 flags |= REVIDX_ISCENSORED
2166 2171
2167 2172 # We assume consumers of addrevisioncb will want to retrieve
2168 2173 # the added revision, which will require a call to
2169 2174 # revision(). revision() will fast path if there is a cache
2170 2175 # hit. So, we tell _addrevision() to always cache in this case.
2171 2176 # We're only using addgroup() in the context of changegroup
2172 2177 # generation so the revision data can always be handled as raw
2173 2178 # by the flagprocessor.
2174 2179 self._addrevision(node, None, transaction, link,
2175 2180 p1, p2, flags, (baserev, delta),
2176 2181 ifh, dfh,
2177 2182 alwayscache=bool(addrevisioncb),
2178 2183 deltacomputer=deltacomputer)
2179 2184
2180 2185 if addrevisioncb:
2181 2186 addrevisioncb(self, node)
2182 2187
2183 2188 if not dfh and not self._inline:
2184 2189 # addrevision switched from inline to conventional
2185 2190 # reopen the index
2186 2191 ifh.close()
2187 2192 dfh = self._datafp("a+")
2188 2193 ifh = self._indexfp("a+")
2189 2194 self._writinghandles = (ifh, dfh)
2190 2195 finally:
2191 2196 self._writinghandles = None
2192 2197
2193 2198 if dfh:
2194 2199 dfh.close()
2195 2200 ifh.close()
2196 2201
2197 2202 return nodes
2198 2203
2199 2204 def iscensored(self, rev):
2200 2205 """Check if a file revision is censored."""
2201 2206 if not self._censorable:
2202 2207 return False
2203 2208
2204 2209 return self.flags(rev) & REVIDX_ISCENSORED
2205 2210
2206 2211 def _peek_iscensored(self, baserev, delta, flush):
2207 2212 """Quickly check if a delta produces a censored revision."""
2208 2213 if not self._censorable:
2209 2214 return False
2210 2215
2211 2216 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2212 2217
2213 2218 def getstrippoint(self, minlink):
2214 2219 """find the minimum rev that must be stripped to strip the linkrev
2215 2220
2216 2221 Returns a tuple containing the minimum rev and a set of all revs that
2217 2222 have linkrevs that will be broken by this strip.
2218 2223 """
2219 2224 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2220 2225 self.headrevs(),
2221 2226 self.linkrev, self.parentrevs)
2222 2227
2223 2228 def strip(self, minlink, transaction):
2224 2229 """truncate the revlog on the first revision with a linkrev >= minlink
2225 2230
2226 2231 This function is called when we're stripping revision minlink and
2227 2232 its descendants from the repository.
2228 2233
2229 2234 We have to remove all revisions with linkrev >= minlink, because
2230 2235 the equivalent changelog revisions will be renumbered after the
2231 2236 strip.
2232 2237
2233 2238 So we truncate the revlog on the first of these revisions, and
2234 2239 trust that the caller has saved the revisions that shouldn't be
2235 2240 removed and that it'll re-add them after this truncation.
2236 2241 """
2237 2242 if len(self) == 0:
2238 2243 return
2239 2244
2240 2245 rev, _ = self.getstrippoint(minlink)
2241 2246 if rev == len(self):
2242 2247 return
2243 2248
2244 2249 # first truncate the files on disk
2245 2250 end = self.start(rev)
2246 2251 if not self._inline:
2247 2252 transaction.add(self.datafile, end)
2248 2253 end = rev * self._io.size
2249 2254 else:
2250 2255 end += rev * self._io.size
2251 2256
2252 2257 transaction.add(self.indexfile, end)
2253 2258
2254 2259 # then reset internal state in memory to forget those revisions
2255 2260 self._revisioncache = None
2256 2261 self._chaininfocache = {}
2257 2262 self._chunkclear()
2258 2263 for x in pycompat.xrange(rev, len(self)):
2259 2264 del self.nodemap[self.node(x)]
2260 2265
2261 2266 del self.index[rev:-1]
2262 2267 self._nodepos = None
2263 2268
2264 2269 def checksize(self):
2265 2270 """Check size of index and data files
2266 2271
2267 2272 return a (dd, di) tuple.
2268 2273 - dd: extra bytes for the "data" file
2269 2274 - di: extra bytes for the "index" file
2270 2275
2271 2276 A healthy revlog will return (0, 0).
2272 2277 """
2273 2278 expected = 0
2274 2279 if len(self):
2275 2280 expected = max(0, self.end(len(self) - 1))
2276 2281
2277 2282 try:
2278 2283 with self._datafp() as f:
2279 2284 f.seek(0, io.SEEK_END)
2280 2285 actual = f.tell()
2281 2286 dd = actual - expected
2282 2287 except IOError as inst:
2283 2288 if inst.errno != errno.ENOENT:
2284 2289 raise
2285 2290 dd = 0
2286 2291
2287 2292 try:
2288 2293 f = self.opener(self.indexfile)
2289 2294 f.seek(0, io.SEEK_END)
2290 2295 actual = f.tell()
2291 2296 f.close()
2292 2297 s = self._io.size
2293 2298 i = max(0, actual // s)
2294 2299 di = actual - (i * s)
2295 2300 if self._inline:
2296 2301 databytes = 0
2297 2302 for r in self:
2298 2303 databytes += max(0, self.length(r))
2299 2304 dd = 0
2300 2305 di = actual - len(self) * s - databytes
2301 2306 except IOError as inst:
2302 2307 if inst.errno != errno.ENOENT:
2303 2308 raise
2304 2309 di = 0
2305 2310
2306 2311 return (dd, di)
2307 2312
2308 2313 def files(self):
2309 2314 res = [self.indexfile]
2310 2315 if not self._inline:
2311 2316 res.append(self.datafile)
2312 2317 return res
2313 2318
2314 2319 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2315 2320 assumehaveparentrevisions=False,
2316 2321 deltamode=repository.CG_DELTAMODE_STD):
2317 2322 if nodesorder not in ('nodes', 'storage', 'linear', None):
2318 2323 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2319 2324 nodesorder)
2320 2325
2321 2326 if nodesorder is None and not self._generaldelta:
2322 2327 nodesorder = 'storage'
2323 2328
2324 2329 if (not self._storedeltachains and
2325 2330 deltamode != repository.CG_DELTAMODE_PREV):
2326 2331 deltamode = repository.CG_DELTAMODE_FULL
2327 2332
2328 2333 return storageutil.emitrevisions(
2329 2334 self, nodes, nodesorder, revlogrevisiondelta,
2330 2335 deltaparentfn=self.deltaparent,
2331 2336 candeltafn=self.candelta,
2332 2337 rawsizefn=self.rawsize,
2333 2338 revdifffn=self.revdiff,
2334 2339 flagsfn=self.flags,
2335 2340 deltamode=deltamode,
2336 2341 revisiondata=revisiondata,
2337 2342 assumehaveparentrevisions=assumehaveparentrevisions)
2338 2343
2339 2344 DELTAREUSEALWAYS = 'always'
2340 2345 DELTAREUSESAMEREVS = 'samerevs'
2341 2346 DELTAREUSENEVER = 'never'
2342 2347
2343 2348 DELTAREUSEFULLADD = 'fulladd'
2344 2349
2345 2350 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2346 2351
2347 2352 def clone(self, tr, destrevlog, addrevisioncb=None,
2348 2353 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2349 2354 """Copy this revlog to another, possibly with format changes.
2350 2355
2351 2356 The destination revlog will contain the same revisions and nodes.
2352 2357 However, it may not be bit-for-bit identical due to e.g. delta encoding
2353 2358 differences.
2354 2359
2355 2360 The ``deltareuse`` argument control how deltas from the existing revlog
2356 2361 are preserved in the destination revlog. The argument can have the
2357 2362 following values:
2358 2363
2359 2364 DELTAREUSEALWAYS
2360 2365 Deltas will always be reused (if possible), even if the destination
2361 2366 revlog would not select the same revisions for the delta. This is the
2362 2367 fastest mode of operation.
2363 2368 DELTAREUSESAMEREVS
2364 2369 Deltas will be reused if the destination revlog would pick the same
2365 2370 revisions for the delta. This mode strikes a balance between speed
2366 2371 and optimization.
2367 2372 DELTAREUSENEVER
2368 2373 Deltas will never be reused. This is the slowest mode of execution.
2369 2374 This mode can be used to recompute deltas (e.g. if the diff/delta
2370 2375 algorithm changes).
2371 2376 DELTAREUSEFULLADD
2372 2377 Revision will be re-added as if their were new content. This is
2373 2378 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2374 2379 eg: large file detection and handling.
2375 2380
2376 2381 Delta computation can be slow, so the choice of delta reuse policy can
2377 2382 significantly affect run time.
2378 2383
2379 2384 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2380 2385 two extremes. Deltas will be reused if they are appropriate. But if the
2381 2386 delta could choose a better revision, it will do so. This means if you
2382 2387 are converting a non-generaldelta revlog to a generaldelta revlog,
2383 2388 deltas will be recomputed if the delta's parent isn't a parent of the
2384 2389 revision.
2385 2390
2386 2391 In addition to the delta policy, the ``forcedeltabothparents``
2387 2392 argument controls whether to force compute deltas against both parents
2388 2393 for merges. By default, the current default is used.
2389 2394 """
2390 2395 if deltareuse not in self.DELTAREUSEALL:
2391 2396 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2392 2397
2393 2398 if len(destrevlog):
2394 2399 raise ValueError(_('destination revlog is not empty'))
2395 2400
2396 2401 if getattr(self, 'filteredrevs', None):
2397 2402 raise ValueError(_('source revlog has filtered revisions'))
2398 2403 if getattr(destrevlog, 'filteredrevs', None):
2399 2404 raise ValueError(_('destination revlog has filtered revisions'))
2400 2405
2401 2406 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2402 2407 # if possible.
2403 2408 oldlazydelta = destrevlog._lazydelta
2404 2409 oldlazydeltabase = destrevlog._lazydeltabase
2405 2410 oldamd = destrevlog._deltabothparents
2406 2411
2407 2412 try:
2408 2413 if deltareuse == self.DELTAREUSEALWAYS:
2409 2414 destrevlog._lazydeltabase = True
2410 2415 destrevlog._lazydelta = True
2411 2416 elif deltareuse == self.DELTAREUSESAMEREVS:
2412 2417 destrevlog._lazydeltabase = False
2413 2418 destrevlog._lazydelta = True
2414 2419 elif deltareuse == self.DELTAREUSENEVER:
2415 2420 destrevlog._lazydeltabase = False
2416 2421 destrevlog._lazydelta = False
2417 2422
2418 2423 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2419 2424
2420 2425 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2421 2426 forcedeltabothparents)
2422 2427
2423 2428 finally:
2424 2429 destrevlog._lazydelta = oldlazydelta
2425 2430 destrevlog._lazydeltabase = oldlazydeltabase
2426 2431 destrevlog._deltabothparents = oldamd
2427 2432
2428 2433 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2429 2434 forcedeltabothparents):
2430 2435 """perform the core duty of `revlog.clone` after parameter processing"""
2431 2436 deltacomputer = deltautil.deltacomputer(destrevlog)
2432 2437 index = self.index
2433 2438 for rev in self:
2434 2439 entry = index[rev]
2435 2440
2436 2441 # Some classes override linkrev to take filtered revs into
2437 2442 # account. Use raw entry from index.
2438 2443 flags = entry[0] & 0xffff
2439 2444 linkrev = entry[4]
2440 2445 p1 = index[entry[5]][7]
2441 2446 p2 = index[entry[6]][7]
2442 2447 node = entry[7]
2443 2448
2444 2449 # (Possibly) reuse the delta from the revlog if allowed and
2445 2450 # the revlog chunk is a delta.
2446 2451 cachedelta = None
2447 2452 rawtext = None
2448 2453 if deltareuse == self.DELTAREUSEFULLADD:
2449 2454 text = self.revision(rev)
2450 2455 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2451 2456 cachedelta=cachedelta,
2452 2457 node=node, flags=flags,
2453 2458 deltacomputer=deltacomputer)
2454 2459 else:
2455 2460 if destrevlog._lazydelta:
2456 2461 dp = self.deltaparent(rev)
2457 2462 if dp != nullrev:
2458 2463 cachedelta = (dp, bytes(self._chunk(rev)))
2459 2464
2460 2465 if not cachedelta:
2461 2466 rawtext = self.rawdata(rev)
2462 2467
2463 2468 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2464 2469 checkambig=False)
2465 2470 dfh = None
2466 2471 if not destrevlog._inline:
2467 2472 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2468 2473 try:
2469 2474 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2470 2475 p2, flags, cachedelta, ifh, dfh,
2471 2476 deltacomputer=deltacomputer)
2472 2477 finally:
2473 2478 if dfh:
2474 2479 dfh.close()
2475 2480 ifh.close()
2476 2481
2477 2482 if addrevisioncb:
2478 2483 addrevisioncb(self, rev, node)
2479 2484
2480 2485 def censorrevision(self, tr, censornode, tombstone=b''):
2481 2486 if (self.version & 0xFFFF) == REVLOGV0:
2482 2487 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2483 2488 self.version)
2484 2489
2485 2490 censorrev = self.rev(censornode)
2486 2491 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2487 2492
2488 2493 if len(tombstone) > self.rawsize(censorrev):
2489 2494 raise error.Abort(_('censor tombstone must be no longer than '
2490 2495 'censored data'))
2491 2496
2492 2497 # Rewriting the revlog in place is hard. Our strategy for censoring is
2493 2498 # to create a new revlog, copy all revisions to it, then replace the
2494 2499 # revlogs on transaction close.
2495 2500
2496 2501 newindexfile = self.indexfile + b'.tmpcensored'
2497 2502 newdatafile = self.datafile + b'.tmpcensored'
2498 2503
2499 2504 # This is a bit dangerous. We could easily have a mismatch of state.
2500 2505 newrl = revlog(self.opener, newindexfile, newdatafile,
2501 2506 censorable=True)
2502 2507 newrl.version = self.version
2503 2508 newrl._generaldelta = self._generaldelta
2504 2509 newrl._io = self._io
2505 2510
2506 2511 for rev in self.revs():
2507 2512 node = self.node(rev)
2508 2513 p1, p2 = self.parents(node)
2509 2514
2510 2515 if rev == censorrev:
2511 2516 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2512 2517 p1, p2, censornode, REVIDX_ISCENSORED)
2513 2518
2514 2519 if newrl.deltaparent(rev) != nullrev:
2515 2520 raise error.Abort(_('censored revision stored as delta; '
2516 2521 'cannot censor'),
2517 2522 hint=_('censoring of revlogs is not '
2518 2523 'fully implemented; please report '
2519 2524 'this bug'))
2520 2525 continue
2521 2526
2522 2527 if self.iscensored(rev):
2523 2528 if self.deltaparent(rev) != nullrev:
2524 2529 raise error.Abort(_('cannot censor due to censored '
2525 2530 'revision having delta stored'))
2526 2531 rawtext = self._chunk(rev)
2527 2532 else:
2528 2533 rawtext = self.rawdata(rev)
2529 2534
2530 2535 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2531 2536 self.flags(rev))
2532 2537
2533 2538 tr.addbackup(self.indexfile, location='store')
2534 2539 if not self._inline:
2535 2540 tr.addbackup(self.datafile, location='store')
2536 2541
2537 2542 self.opener.rename(newrl.indexfile, self.indexfile)
2538 2543 if not self._inline:
2539 2544 self.opener.rename(newrl.datafile, self.datafile)
2540 2545
2541 2546 self.clearcaches()
2542 2547 self._loadindex()
2543 2548
2544 2549 def verifyintegrity(self, state):
2545 2550 """Verifies the integrity of the revlog.
2546 2551
2547 2552 Yields ``revlogproblem`` instances describing problems that are
2548 2553 found.
2549 2554 """
2550 2555 dd, di = self.checksize()
2551 2556 if dd:
2552 2557 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2553 2558 if di:
2554 2559 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2555 2560
2556 2561 version = self.version & 0xFFFF
2557 2562
2558 2563 # The verifier tells us what version revlog we should be.
2559 2564 if version != state['expectedversion']:
2560 2565 yield revlogproblem(
2561 2566 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2562 2567 (self.indexfile, version, state['expectedversion']))
2563 2568
2564 2569 state['skipread'] = set()
2565 2570
2566 2571 for rev in self:
2567 2572 node = self.node(rev)
2568 2573
2569 2574 # Verify contents. 4 cases to care about:
2570 2575 #
2571 2576 # common: the most common case
2572 2577 # rename: with a rename
2573 2578 # meta: file content starts with b'\1\n', the metadata
2574 2579 # header defined in filelog.py, but without a rename
2575 2580 # ext: content stored externally
2576 2581 #
2577 2582 # More formally, their differences are shown below:
2578 2583 #
2579 2584 # | common | rename | meta | ext
2580 2585 # -------------------------------------------------------
2581 2586 # flags() | 0 | 0 | 0 | not 0
2582 2587 # renamed() | False | True | False | ?
2583 2588 # rawtext[0:2]=='\1\n'| False | True | True | ?
2584 2589 #
2585 2590 # "rawtext" means the raw text stored in revlog data, which
2586 2591 # could be retrieved by "rawdata(rev)". "text"
2587 2592 # mentioned below is "revision(rev)".
2588 2593 #
2589 2594 # There are 3 different lengths stored physically:
2590 2595 # 1. L1: rawsize, stored in revlog index
2591 2596 # 2. L2: len(rawtext), stored in revlog data
2592 2597 # 3. L3: len(text), stored in revlog data if flags==0, or
2593 2598 # possibly somewhere else if flags!=0
2594 2599 #
2595 2600 # L1 should be equal to L2. L3 could be different from them.
2596 2601 # "text" may or may not affect commit hash depending on flag
2597 2602 # processors (see flagutil.addflagprocessor).
2598 2603 #
2599 2604 # | common | rename | meta | ext
2600 2605 # -------------------------------------------------
2601 2606 # rawsize() | L1 | L1 | L1 | L1
2602 2607 # size() | L1 | L2-LM | L1(*) | L1 (?)
2603 2608 # len(rawtext) | L2 | L2 | L2 | L2
2604 2609 # len(text) | L2 | L2 | L2 | L3
2605 2610 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2606 2611 #
2607 2612 # LM: length of metadata, depending on rawtext
2608 2613 # (*): not ideal, see comment in filelog.size
2609 2614 # (?): could be "- len(meta)" if the resolved content has
2610 2615 # rename metadata
2611 2616 #
2612 2617 # Checks needed to be done:
2613 2618 # 1. length check: L1 == L2, in all cases.
2614 2619 # 2. hash check: depending on flag processor, we may need to
2615 2620 # use either "text" (external), or "rawtext" (in revlog).
2616 2621
2617 2622 try:
2618 2623 skipflags = state.get('skipflags', 0)
2619 2624 if skipflags:
2620 2625 skipflags &= self.flags(rev)
2621 2626
2622 2627 if skipflags:
2623 2628 state['skipread'].add(node)
2624 2629 else:
2625 2630 # Side-effect: read content and verify hash.
2626 2631 self.revision(node)
2627 2632
2628 2633 l1 = self.rawsize(rev)
2629 2634 l2 = len(self.rawdata(node))
2630 2635
2631 2636 if l1 != l2:
2632 2637 yield revlogproblem(
2633 2638 error=_('unpacked size is %d, %d expected') % (l2, l1),
2634 2639 node=node)
2635 2640
2636 2641 except error.CensoredNodeError:
2637 2642 if state['erroroncensored']:
2638 2643 yield revlogproblem(error=_('censored file data'),
2639 2644 node=node)
2640 2645 state['skipread'].add(node)
2641 2646 except Exception as e:
2642 2647 yield revlogproblem(
2643 2648 error=_('unpacking %s: %s') % (short(node),
2644 2649 stringutil.forcebytestr(e)),
2645 2650 node=node)
2646 2651 state['skipread'].add(node)
2647 2652
2648 2653 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2649 2654 revisionscount=False, trackedsize=False,
2650 2655 storedsize=False):
2651 2656 d = {}
2652 2657
2653 2658 if exclusivefiles:
2654 2659 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2655 2660 if not self._inline:
2656 2661 d['exclusivefiles'].append((self.opener, self.datafile))
2657 2662
2658 2663 if sharedfiles:
2659 2664 d['sharedfiles'] = []
2660 2665
2661 2666 if revisionscount:
2662 2667 d['revisionscount'] = len(self)
2663 2668
2664 2669 if trackedsize:
2665 2670 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2666 2671
2667 2672 if storedsize:
2668 2673 d['storedsize'] = sum(self.opener.stat(path).st_size
2669 2674 for path in self.files())
2670 2675
2671 2676 return d
@@ -1,185 +1,187 b''
1 1 # flagutils.py - code to deal with revlog flags and their processors
2 2 #
3 3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 from ..i18n import _
12 12
13 13 from .constants import (
14 14 REVIDX_DEFAULT_FLAGS,
15 15 REVIDX_ELLIPSIS,
16 16 REVIDX_EXTSTORED,
17 17 REVIDX_FLAGS_ORDER,
18 18 REVIDX_ISCENSORED,
19 19 REVIDX_RAWTEXT_CHANGING_FLAGS,
20 REVIDX_SIDEDATA,
20 21 )
21 22
22 23 from .. import (
23 24 error,
24 25 util
25 26 )
26 27
27 28 # blanked usage of all the name to prevent pyflakes constraints
28 29 # We need these name available in the module for extensions.
29 30 REVIDX_ISCENSORED
30 31 REVIDX_ELLIPSIS
31 32 REVIDX_EXTSTORED
33 REVIDX_SIDEDATA
32 34 REVIDX_DEFAULT_FLAGS
33 35 REVIDX_FLAGS_ORDER
34 36 REVIDX_RAWTEXT_CHANGING_FLAGS
35 37
36 38 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
37 39
38 40 # Store flag processors (cf. 'addflagprocessor()' to register)
39 41 flagprocessors = {
40 42 REVIDX_ISCENSORED: None,
41 43 }
42 44
43 45 def addflagprocessor(flag, processor):
44 46 """Register a flag processor on a revision data flag.
45 47
46 48 Invariant:
47 49 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
48 50 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
49 51 - Only one flag processor can be registered on a specific flag.
50 52 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
51 53 following signatures:
52 54 - (read) f(self, rawtext) -> text, bool
53 55 - (write) f(self, text) -> rawtext, bool
54 56 - (raw) f(self, rawtext) -> bool
55 57 "text" is presented to the user. "rawtext" is stored in revlog data, not
56 58 directly visible to the user.
57 59 The boolean returned by these transforms is used to determine whether
58 60 the returned text can be used for hash integrity checking. For example,
59 61 if "write" returns False, then "text" is used to generate hash. If
60 62 "write" returns True, that basically means "rawtext" returned by "write"
61 63 should be used to generate hash. Usually, "write" and "read" return
62 64 different booleans. And "raw" returns a same boolean as "write".
63 65
64 66 Note: The 'raw' transform is used for changegroup generation and in some
65 67 debug commands. In this case the transform only indicates whether the
66 68 contents can be used for hash integrity checks.
67 69 """
68 70 insertflagprocessor(flag, processor, flagprocessors)
69 71
70 72 def insertflagprocessor(flag, processor, flagprocessors):
71 73 if not flag & REVIDX_KNOWN_FLAGS:
72 74 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
73 75 raise error.ProgrammingError(msg)
74 76 if flag not in REVIDX_FLAGS_ORDER:
75 77 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
76 78 raise error.ProgrammingError(msg)
77 79 if flag in flagprocessors:
78 80 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
79 81 raise error.Abort(msg)
80 82 flagprocessors[flag] = processor
81 83
82 84 def processflagswrite(revlog, text, flags, sidedata):
83 85 """Inspect revision data flags and applies write transformations defined
84 86 by registered flag processors.
85 87
86 88 ``text`` - the revision data to process
87 89 ``flags`` - the revision flags
88 90
89 91 This method processes the flags in the order (or reverse order if
90 92 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
91 93 flag processors registered for present flags. The order of flags defined
92 94 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
93 95
94 96 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
95 97 processed text and ``validatehash`` is a bool indicating whether the
96 98 returned text should be checked for hash integrity.
97 99 """
98 100 return _processflagsfunc(revlog, text, flags, 'write',
99 101 sidedata=sidedata)[:2]
100 102
101 103 def processflagsread(revlog, text, flags):
102 104 """Inspect revision data flags and applies read transformations defined
103 105 by registered flag processors.
104 106
105 107 ``text`` - the revision data to process
106 108 ``flags`` - the revision flags
107 109 ``raw`` - an optional argument describing if the raw transform should be
108 110 applied.
109 111
110 112 This method processes the flags in the order (or reverse order if
111 113 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
112 114 flag processors registered for present flags. The order of flags defined
113 115 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
114 116
115 117 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
116 118 processed text and ``validatehash`` is a bool indicating whether the
117 119 returned text should be checked for hash integrity.
118 120 """
119 121 return _processflagsfunc(revlog, text, flags, 'read')
120 122
121 123 def processflagsraw(revlog, text, flags):
122 124 """Inspect revision data flags to check is the content hash should be
123 125 validated.
124 126
125 127 ``text`` - the revision data to process
126 128 ``flags`` - the revision flags
127 129
128 130 This method processes the flags in the order (or reverse order if
129 131 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
130 132 flag processors registered for present flags. The order of flags defined
131 133 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
132 134
133 135 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
134 136 processed text and ``validatehash`` is a bool indicating whether the
135 137 returned text should be checked for hash integrity.
136 138 """
137 139 return _processflagsfunc(revlog, text, flags, 'raw')[1]
138 140
139 141 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
140 142 """internal function to process flag on a revlog
141 143
142 144 This function is private to this module, code should never needs to call it
143 145 directly."""
144 146 # fast path: no flag processors will run
145 147 if flags == 0:
146 148 return text, True, {}
147 149 if operation not in ('read', 'write', 'raw'):
148 150 raise error.ProgrammingError(_("invalid '%s' operation") %
149 151 operation)
150 152 # Check all flags are known.
151 153 if flags & ~REVIDX_KNOWN_FLAGS:
152 154 raise revlog._flagserrorclass(_("incompatible revision flag '%#x'") %
153 155 (flags & ~REVIDX_KNOWN_FLAGS))
154 156 validatehash = True
155 157 # Depending on the operation (read or write), the order might be
156 158 # reversed due to non-commutative transforms.
157 159 orderedflags = REVIDX_FLAGS_ORDER
158 160 if operation == 'write':
159 161 orderedflags = reversed(orderedflags)
160 162
161 163 outsidedata = {}
162 164 for flag in orderedflags:
163 165 # If a flagprocessor has been registered for a known flag, apply the
164 166 # related operation transform and update result tuple.
165 167 if flag & flags:
166 168 vhash = True
167 169
168 170 if flag not in revlog._flagprocessors:
169 171 message = _("missing processor for flag '%#x'") % (flag)
170 172 raise revlog._flagserrorclass(message)
171 173
172 174 processor = revlog._flagprocessors[flag]
173 175 if processor is not None:
174 176 readtransform, writetransform, rawtransform = processor
175 177
176 178 if operation == 'raw':
177 179 vhash = rawtransform(revlog, text)
178 180 elif operation == 'read':
179 181 text, vhash, s = readtransform(revlog, text)
180 182 outsidedata.update(s)
181 183 else: # write operation
182 184 text, vhash = writetransform(revlog, text, sidedata)
183 185 validatehash = validatehash and vhash
184 186
185 187 return text, validatehash, outsidedata
@@ -1,79 +1,85 b''
1 1 # sidedata.py - Logic around store extra data alongside revlog revisions
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """core code for "sidedata" support
8 8
9 9 The "sidedata" are stored alongside the revision without actually being part of
10 10 its content and not affecting its hash. It's main use cases is to cache
11 11 important information related to a changesets.
12 12
13 13 The current implementation is experimental and subject to changes. Do not rely
14 14 on it in production.
15 15
16 16 Sidedata are stored in the revlog itself, withing the revision rawtext. They
17 17 are inserted, removed from it using the flagprocessors mechanism. The following
18 18 format is currently used::
19 19
20 20 initial header:
21 21 <number of sidedata; 2 bytes>
22 22 sidedata (repeated N times):
23 23 <sidedata-key; 2 bytes>
24 24 <sidedata-entry-length: 4 bytes>
25 25 <sidedata-content-sha1-digest: 20 bytes>
26 26 <sidedata-content; X bytes>
27 27 normal raw text:
28 28 <all bytes remaining in the rawtext>
29 29
30 30 This is a simple and effective format. It should be enought to experiment with
31 31 the concept.
32 32 """
33 33
34 34 from __future__ import absolute_import
35 35
36 36 import hashlib
37 37 import struct
38 38
39 39 from .. import error
40 40
41 41 SIDEDATA_HEADER = struct.Struct('>H')
42 42 SIDEDATA_ENTRY = struct.Struct('>HL20s')
43 43
44 44 def sidedatawriteprocessor(rl, text, sidedata):
45 45 sidedata = list(sidedata.items())
46 46 sidedata.sort()
47 47 rawtext = [SIDEDATA_HEADER.pack(len(sidedata))]
48 48 for key, value in sidedata:
49 49 digest = hashlib.sha1(value).digest()
50 50 rawtext.append(SIDEDATA_ENTRY.pack(key, len(value), digest))
51 51 for key, value in sidedata:
52 52 rawtext.append(value)
53 53 rawtext.append(bytes(text))
54 54 return ''.join(rawtext), False
55 55
56 56 def sidedatareadprocessor(rl, text):
57 57 sidedata = {}
58 58 offset = 0
59 59 nbentry, = SIDEDATA_HEADER.unpack(text[:SIDEDATA_HEADER.size])
60 60 offset += SIDEDATA_HEADER.size
61 61 dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
62 62 for i in range(nbentry):
63 63 nextoffset = offset + SIDEDATA_ENTRY.size
64 64 key, size, storeddigest = SIDEDATA_ENTRY.unpack(text[offset:nextoffset])
65 65 offset = nextoffset
66 66 # read the data associated with that entry
67 67 nextdataoffset = dataoffset + size
68 68 entrytext = text[dataoffset:nextdataoffset]
69 69 readdigest = hashlib.sha1(entrytext).digest()
70 70 if storeddigest != readdigest:
71 71 raise error.SidedataHashError(key, storeddigest, readdigest)
72 72 sidedata[key] = entrytext
73 73 dataoffset = nextdataoffset
74 74 text = text[dataoffset:]
75 75 return text, True, sidedata
76 76
77 77 def sidedatarawprocessor(rl, text):
78 78 # side data modifies rawtext and prevent rawtext hash validation
79 79 return False
80
81 processors = (
82 sidedatareadprocessor,
83 sidedatawriteprocessor,
84 sidedatarawprocessor,
85 )
General Comments 0
You need to be logged in to leave comments. Login now