##// END OF EJS Templates
flagprocessors: make `processflagsraw` a module level function...
marmoute -
r43262:dff95420 default
parent child Browse files
Show More
@@ -1,2639 +1,2639 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 57 )
58 58 from .thirdparty import (
59 59 attr,
60 60 )
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 )
79 79 from .utils import (
80 80 storageutil,
81 81 stringutil,
82 82 )
83 83
84 84 # blanked usage of all the name to prevent pyflakes constraints
85 85 # We need these name available in the module for extensions.
86 86 REVLOGV0
87 87 REVLOGV1
88 88 REVLOGV2
89 89 FLAG_INLINE_DATA
90 90 FLAG_GENERALDELTA
91 91 REVLOG_DEFAULT_FLAGS
92 92 REVLOG_DEFAULT_FORMAT
93 93 REVLOG_DEFAULT_VERSION
94 94 REVLOGV1_FLAGS
95 95 REVLOGV2_FLAGS
96 96 REVIDX_ISCENSORED
97 97 REVIDX_ELLIPSIS
98 98 REVIDX_EXTSTORED
99 99 REVIDX_DEFAULT_FLAGS
100 100 REVIDX_FLAGS_ORDER
101 101 REVIDX_RAWTEXT_CHANGING_FLAGS
102 102
103 103 parsers = policy.importmod(r'parsers')
104 104 rustancestor = policy.importrust(r'ancestor')
105 105 rustdagop = policy.importrust(r'dagop')
106 106
107 107 # Aliased for performance.
108 108 _zlibdecompress = zlib.decompress
109 109
110 110 # max size of revlog with inline data
111 111 _maxinline = 131072
112 112 _chunksize = 1048576
113 113
114 114 # Flag processors for REVIDX_ELLIPSIS.
115 115 def ellipsisreadprocessor(rl, text):
116 116 return text, False, {}
117 117
118 118 def ellipsiswriteprocessor(rl, text, sidedata):
119 119 return text, False
120 120
121 121 def ellipsisrawprocessor(rl, text):
122 122 return False
123 123
124 124 ellipsisprocessor = (
125 125 ellipsisreadprocessor,
126 126 ellipsiswriteprocessor,
127 127 ellipsisrawprocessor,
128 128 )
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 @attr.s(slots=True, frozen=True)
142 142 class _revisioninfo(object):
143 143 """Information about a revision that allows building its fulltext
144 144 node: expected hash of the revision
145 145 p1, p2: parent revs of the revision
146 146 btext: built text cache consisting of a one-element list
147 147 cachedelta: (baserev, uncompressed_delta) or None
148 148 flags: flags associated to the revision storage
149 149
150 150 One of btext[0] or cachedelta must be set.
151 151 """
152 152 node = attr.ib()
153 153 p1 = attr.ib()
154 154 p2 = attr.ib()
155 155 btext = attr.ib()
156 156 textlen = attr.ib()
157 157 cachedelta = attr.ib()
158 158 flags = attr.ib()
159 159
160 160 @interfaceutil.implementer(repository.irevisiondelta)
161 161 @attr.s(slots=True)
162 162 class revlogrevisiondelta(object):
163 163 node = attr.ib()
164 164 p1node = attr.ib()
165 165 p2node = attr.ib()
166 166 basenode = attr.ib()
167 167 flags = attr.ib()
168 168 baserevisionsize = attr.ib()
169 169 revision = attr.ib()
170 170 delta = attr.ib()
171 171 linknode = attr.ib(default=None)
172 172
173 173 @interfaceutil.implementer(repository.iverifyproblem)
174 174 @attr.s(frozen=True)
175 175 class revlogproblem(object):
176 176 warning = attr.ib(default=None)
177 177 error = attr.ib(default=None)
178 178 node = attr.ib(default=None)
179 179
180 180 # index v0:
181 181 # 4 bytes: offset
182 182 # 4 bytes: compressed length
183 183 # 4 bytes: base rev
184 184 # 4 bytes: link rev
185 185 # 20 bytes: parent 1 nodeid
186 186 # 20 bytes: parent 2 nodeid
187 187 # 20 bytes: nodeid
188 188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 189 indexformatv0_pack = indexformatv0.pack
190 190 indexformatv0_unpack = indexformatv0.unpack
191 191
192 192 class revlogoldindex(list):
193 193 def __getitem__(self, i):
194 194 if i == -1:
195 195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 196 return list.__getitem__(self, i)
197 197
198 198 class revlogoldio(object):
199 199 def __init__(self):
200 200 self.size = indexformatv0.size
201 201
202 202 def parseindex(self, data, inline):
203 203 s = self.size
204 204 index = []
205 205 nodemap = {nullid: nullrev}
206 206 n = off = 0
207 207 l = len(data)
208 208 while off + s <= l:
209 209 cur = data[off:off + s]
210 210 off += s
211 211 e = indexformatv0_unpack(cur)
212 212 # transform to revlogv1 format
213 213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 215 index.append(e2)
216 216 nodemap[e[6]] = n
217 217 n += 1
218 218
219 219 return revlogoldindex(index), nodemap, None
220 220
221 221 def packentry(self, entry, node, version, rev):
222 222 if gettype(entry[0]):
223 223 raise error.RevlogError(_('index entry flags need revlog '
224 224 'version 1'))
225 225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 226 node(entry[5]), node(entry[6]), entry[7])
227 227 return indexformatv0_pack(*e2)
228 228
229 229 # index ng:
230 230 # 6 bytes: offset
231 231 # 2 bytes: flags
232 232 # 4 bytes: compressed length
233 233 # 4 bytes: uncompressed length
234 234 # 4 bytes: base rev
235 235 # 4 bytes: link rev
236 236 # 4 bytes: parent 1 rev
237 237 # 4 bytes: parent 2 rev
238 238 # 32 bytes: nodeid
239 239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 240 indexformatng_pack = indexformatng.pack
241 241 versionformat = struct.Struct(">I")
242 242 versionformat_pack = versionformat.pack
243 243 versionformat_unpack = versionformat.unpack
244 244
245 245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 246 # signed integer)
247 247 _maxentrysize = 0x7fffffff
248 248
249 249 class revlogio(object):
250 250 def __init__(self):
251 251 self.size = indexformatng.size
252 252
253 253 def parseindex(self, data, inline):
254 254 # call the C implementation to parse the index data
255 255 index, cache = parsers.parse_index2(data, inline)
256 256 return index, getattr(index, 'nodemap', None), cache
257 257
258 258 def packentry(self, entry, node, version, rev):
259 259 p = indexformatng_pack(*entry)
260 260 if rev == 0:
261 261 p = versionformat_pack(version) + p[4:]
262 262 return p
263 263
264 264 class revlog(flagutil.flagprocessorsmixin):
265 265 """
266 266 the underlying revision storage object
267 267
268 268 A revlog consists of two parts, an index and the revision data.
269 269
270 270 The index is a file with a fixed record size containing
271 271 information on each revision, including its nodeid (hash), the
272 272 nodeids of its parents, the position and offset of its data within
273 273 the data file, and the revision it's based on. Finally, each entry
274 274 contains a linkrev entry that can serve as a pointer to external
275 275 data.
276 276
277 277 The revision data itself is a linear collection of data chunks.
278 278 Each chunk represents a revision and is usually represented as a
279 279 delta against the previous chunk. To bound lookup time, runs of
280 280 deltas are limited to about 2 times the length of the original
281 281 version data. This makes retrieval of a version proportional to
282 282 its size, or O(1) relative to the number of revisions.
283 283
284 284 Both pieces of the revlog are written to in an append-only
285 285 fashion, which means we never need to rewrite a file to insert or
286 286 remove data, and can use some simple techniques to avoid the need
287 287 for locking while reading.
288 288
289 289 If checkambig, indexfile is opened with checkambig=True at
290 290 writing, to avoid file stat ambiguity.
291 291
292 292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 293 index will be mmapped rather than read if it is larger than the
294 294 configured threshold.
295 295
296 296 If censorable is True, the revlog can have censored revisions.
297 297
298 298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 299 compression for the data content.
300 300 """
301 301 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
302 302 mmaplargeindex=False, censorable=False,
303 303 upperboundcomp=None):
304 304 """
305 305 create a revlog object
306 306
307 307 opener is a function that abstracts the file opening operation
308 308 and can be used to implement COW semantics or the like.
309 309
310 310 """
311 311 self.upperboundcomp = upperboundcomp
312 312 self.indexfile = indexfile
313 313 self.datafile = datafile or (indexfile[:-2] + ".d")
314 314 self.opener = opener
315 315 # When True, indexfile is opened with checkambig=True at writing, to
316 316 # avoid file stat ambiguity.
317 317 self._checkambig = checkambig
318 318 self._mmaplargeindex = mmaplargeindex
319 319 self._censorable = censorable
320 320 # 3-tuple of (node, rev, text) for a raw revision.
321 321 self._revisioncache = None
322 322 # Maps rev to chain base rev.
323 323 self._chainbasecache = util.lrucachedict(100)
324 324 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
325 325 self._chunkcache = (0, '')
326 326 # How much data to read and cache into the raw revlog data cache.
327 327 self._chunkcachesize = 65536
328 328 self._maxchainlen = None
329 329 self._deltabothparents = True
330 330 self.index = []
331 331 # Mapping of partial identifiers to full nodes.
332 332 self._pcache = {}
333 333 # Mapping of revision integer to full node.
334 334 self._nodecache = {nullid: nullrev}
335 335 self._nodepos = None
336 336 self._compengine = 'zlib'
337 337 self._compengineopts = {}
338 338 self._maxdeltachainspan = -1
339 339 self._withsparseread = False
340 340 self._sparserevlog = False
341 341 self._srdensitythreshold = 0.50
342 342 self._srmingapsize = 262144
343 343
344 344 # Make copy of flag processors so each revlog instance can support
345 345 # custom flags.
346 346 self._flagprocessors = dict(flagutil.flagprocessors)
347 347
348 348 # 2-tuple of file handles being used for active writing.
349 349 self._writinghandles = None
350 350
351 351 self._loadindex()
352 352
353 353 def _loadindex(self):
354 354 mmapindexthreshold = None
355 355 opts = getattr(self.opener, 'options', {}) or {}
356 356
357 357 if 'revlogv2' in opts:
358 358 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
359 359 elif 'revlogv1' in opts:
360 360 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
361 361 if 'generaldelta' in opts:
362 362 newversionflags |= FLAG_GENERALDELTA
363 363 elif getattr(self.opener, 'options', None) is not None:
364 364 # If options provided but no 'revlog*' found, the repository
365 365 # would have no 'requires' file in it, which means we have to
366 366 # stick to the old format.
367 367 newversionflags = REVLOGV0
368 368 else:
369 369 newversionflags = REVLOG_DEFAULT_VERSION
370 370
371 371 if 'chunkcachesize' in opts:
372 372 self._chunkcachesize = opts['chunkcachesize']
373 373 if 'maxchainlen' in opts:
374 374 self._maxchainlen = opts['maxchainlen']
375 375 if 'deltabothparents' in opts:
376 376 self._deltabothparents = opts['deltabothparents']
377 377 self._lazydelta = bool(opts.get('lazydelta', True))
378 378 self._lazydeltabase = False
379 379 if self._lazydelta:
380 380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 381 if 'compengine' in opts:
382 382 self._compengine = opts['compengine']
383 383 if 'zlib.level' in opts:
384 384 self._compengineopts['zlib.level'] = opts['zlib.level']
385 385 if 'zstd.level' in opts:
386 386 self._compengineopts['zstd.level'] = opts['zstd.level']
387 387 if 'maxdeltachainspan' in opts:
388 388 self._maxdeltachainspan = opts['maxdeltachainspan']
389 389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 390 mmapindexthreshold = opts['mmapindexthreshold']
391 391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
392 392 withsparseread = bool(opts.get('with-sparse-read', False))
393 393 # sparse-revlog forces sparse-read
394 394 self._withsparseread = self._sparserevlog or withsparseread
395 395 if 'sparse-read-density-threshold' in opts:
396 396 self._srdensitythreshold = opts['sparse-read-density-threshold']
397 397 if 'sparse-read-min-gap-size' in opts:
398 398 self._srmingapsize = opts['sparse-read-min-gap-size']
399 399 if opts.get('enableellipsis'):
400 400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
401 401
402 402 # revlog v0 doesn't have flag processors
403 403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
404 404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
405 405
406 406 if self._chunkcachesize <= 0:
407 407 raise error.RevlogError(_('revlog chunk cache size %r is not '
408 408 'greater than 0') % self._chunkcachesize)
409 409 elif self._chunkcachesize & (self._chunkcachesize - 1):
410 410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
411 411 'power of 2') % self._chunkcachesize)
412 412
413 413 indexdata = ''
414 414 self._initempty = True
415 415 try:
416 416 with self._indexfp() as f:
417 417 if (mmapindexthreshold is not None and
418 418 self.opener.fstat(f).st_size >= mmapindexthreshold):
419 419 # TODO: should .close() to release resources without
420 420 # relying on Python GC
421 421 indexdata = util.buffer(util.mmapread(f))
422 422 else:
423 423 indexdata = f.read()
424 424 if len(indexdata) > 0:
425 425 versionflags = versionformat_unpack(indexdata[:4])[0]
426 426 self._initempty = False
427 427 else:
428 428 versionflags = newversionflags
429 429 except IOError as inst:
430 430 if inst.errno != errno.ENOENT:
431 431 raise
432 432
433 433 versionflags = newversionflags
434 434
435 435 self.version = versionflags
436 436
437 437 flags = versionflags & ~0xFFFF
438 438 fmt = versionflags & 0xFFFF
439 439
440 440 if fmt == REVLOGV0:
441 441 if flags:
442 442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
443 443 'revlog %s') %
444 444 (flags >> 16, fmt, self.indexfile))
445 445
446 446 self._inline = False
447 447 self._generaldelta = False
448 448
449 449 elif fmt == REVLOGV1:
450 450 if flags & ~REVLOGV1_FLAGS:
451 451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 452 'revlog %s') %
453 453 (flags >> 16, fmt, self.indexfile))
454 454
455 455 self._inline = versionflags & FLAG_INLINE_DATA
456 456 self._generaldelta = versionflags & FLAG_GENERALDELTA
457 457
458 458 elif fmt == REVLOGV2:
459 459 if flags & ~REVLOGV2_FLAGS:
460 460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 461 'revlog %s') %
462 462 (flags >> 16, fmt, self.indexfile))
463 463
464 464 self._inline = versionflags & FLAG_INLINE_DATA
465 465 # generaldelta implied by version 2 revlogs.
466 466 self._generaldelta = True
467 467
468 468 else:
469 469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
470 470 (fmt, self.indexfile))
471 471 # sparse-revlog can't be on without general-delta (issue6056)
472 472 if not self._generaldelta:
473 473 self._sparserevlog = False
474 474
475 475 self._storedeltachains = True
476 476
477 477 self._io = revlogio()
478 478 if self.version == REVLOGV0:
479 479 self._io = revlogoldio()
480 480 try:
481 481 d = self._io.parseindex(indexdata, self._inline)
482 482 except (ValueError, IndexError):
483 483 raise error.RevlogError(_("index %s is corrupted") %
484 484 self.indexfile)
485 485 self.index, nodemap, self._chunkcache = d
486 486 if nodemap is not None:
487 487 self.nodemap = self._nodecache = nodemap
488 488 if not self._chunkcache:
489 489 self._chunkclear()
490 490 # revnum -> (chain-length, sum-delta-length)
491 491 self._chaininfocache = {}
492 492 # revlog header -> revlog compressor
493 493 self._decompressors = {}
494 494
495 495 @util.propertycache
496 496 def _compressor(self):
497 497 engine = util.compengines[self._compengine]
498 498 return engine.revlogcompressor(self._compengineopts)
499 499
500 500 def _indexfp(self, mode='r'):
501 501 """file object for the revlog's index file"""
502 502 args = {r'mode': mode}
503 503 if mode != 'r':
504 504 args[r'checkambig'] = self._checkambig
505 505 if mode == 'w':
506 506 args[r'atomictemp'] = True
507 507 return self.opener(self.indexfile, **args)
508 508
509 509 def _datafp(self, mode='r'):
510 510 """file object for the revlog's data file"""
511 511 return self.opener(self.datafile, mode=mode)
512 512
513 513 @contextlib.contextmanager
514 514 def _datareadfp(self, existingfp=None):
515 515 """file object suitable to read data"""
516 516 # Use explicit file handle, if given.
517 517 if existingfp is not None:
518 518 yield existingfp
519 519
520 520 # Use a file handle being actively used for writes, if available.
521 521 # There is some danger to doing this because reads will seek the
522 522 # file. However, _writeentry() performs a SEEK_END before all writes,
523 523 # so we should be safe.
524 524 elif self._writinghandles:
525 525 if self._inline:
526 526 yield self._writinghandles[0]
527 527 else:
528 528 yield self._writinghandles[1]
529 529
530 530 # Otherwise open a new file handle.
531 531 else:
532 532 if self._inline:
533 533 func = self._indexfp
534 534 else:
535 535 func = self._datafp
536 536 with func() as fp:
537 537 yield fp
538 538
539 539 def tip(self):
540 540 return self.node(len(self.index) - 1)
541 541 def __contains__(self, rev):
542 542 return 0 <= rev < len(self)
543 543 def __len__(self):
544 544 return len(self.index)
545 545 def __iter__(self):
546 546 return iter(pycompat.xrange(len(self)))
547 547 def revs(self, start=0, stop=None):
548 548 """iterate over all rev in this revlog (from start to stop)"""
549 549 return storageutil.iterrevs(len(self), start=start, stop=stop)
550 550
551 551 @util.propertycache
552 552 def nodemap(self):
553 553 if self.index:
554 554 # populate mapping down to the initial node
555 555 node0 = self.index[0][7] # get around changelog filtering
556 556 self.rev(node0)
557 557 return self._nodecache
558 558
559 559 def hasnode(self, node):
560 560 try:
561 561 self.rev(node)
562 562 return True
563 563 except KeyError:
564 564 return False
565 565
566 566 def candelta(self, baserev, rev):
567 567 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 568 # Disable delta if either rev requires a content-changing flag
569 569 # processor (ex. LFS). This is because such flag processor can alter
570 570 # the rawtext content that the delta will be based on, and two clients
571 571 # could have a same revlog node with different flags (i.e. different
572 572 # rawtext contents) and the delta could be incompatible.
573 573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 575 return False
576 576 return True
577 577
578 578 def clearcaches(self):
579 579 self._revisioncache = None
580 580 self._chainbasecache.clear()
581 581 self._chunkcache = (0, '')
582 582 self._pcache = {}
583 583
584 584 try:
585 585 # If we are using the native C version, you are in a fun case
586 586 # where self.index, self.nodemap and self._nodecaches is the same
587 587 # object.
588 588 self._nodecache.clearcaches()
589 589 except AttributeError:
590 590 self._nodecache = {nullid: nullrev}
591 591 self._nodepos = None
592 592
593 593 def rev(self, node):
594 594 try:
595 595 return self._nodecache[node]
596 596 except TypeError:
597 597 raise
598 598 except error.RevlogError:
599 599 # parsers.c radix tree lookup failed
600 600 if node == wdirid or node in wdirfilenodeids:
601 601 raise error.WdirUnsupported
602 602 raise error.LookupError(node, self.indexfile, _('no node'))
603 603 except KeyError:
604 604 # pure python cache lookup failed
605 605 n = self._nodecache
606 606 i = self.index
607 607 p = self._nodepos
608 608 if p is None:
609 609 p = len(i) - 1
610 610 else:
611 611 assert p < len(i)
612 612 for r in pycompat.xrange(p, -1, -1):
613 613 v = i[r][7]
614 614 n[v] = r
615 615 if v == node:
616 616 self._nodepos = r - 1
617 617 return r
618 618 if node == wdirid or node in wdirfilenodeids:
619 619 raise error.WdirUnsupported
620 620 raise error.LookupError(node, self.indexfile, _('no node'))
621 621
622 622 # Accessors for index entries.
623 623
624 624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 625 # are flags.
626 626 def start(self, rev):
627 627 return int(self.index[rev][0] >> 16)
628 628
629 629 def flags(self, rev):
630 630 return self.index[rev][0] & 0xFFFF
631 631
632 632 def length(self, rev):
633 633 return self.index[rev][1]
634 634
635 635 def rawsize(self, rev):
636 636 """return the length of the uncompressed text for a given revision"""
637 637 l = self.index[rev][2]
638 638 if l >= 0:
639 639 return l
640 640
641 641 t = self.rawdata(rev)
642 642 return len(t)
643 643
644 644 def size(self, rev):
645 645 """length of non-raw text (processed by a "read" flag processor)"""
646 646 # fast path: if no "read" flag processor could change the content,
647 647 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 648 flags = self.flags(rev)
649 649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 650 return self.rawsize(rev)
651 651
652 652 return len(self.revision(rev, raw=False))
653 653
654 654 def chainbase(self, rev):
655 655 base = self._chainbasecache.get(rev)
656 656 if base is not None:
657 657 return base
658 658
659 659 index = self.index
660 660 iterrev = rev
661 661 base = index[iterrev][3]
662 662 while base != iterrev:
663 663 iterrev = base
664 664 base = index[iterrev][3]
665 665
666 666 self._chainbasecache[rev] = base
667 667 return base
668 668
669 669 def linkrev(self, rev):
670 670 return self.index[rev][4]
671 671
672 672 def parentrevs(self, rev):
673 673 try:
674 674 entry = self.index[rev]
675 675 except IndexError:
676 676 if rev == wdirrev:
677 677 raise error.WdirUnsupported
678 678 raise
679 679
680 680 return entry[5], entry[6]
681 681
682 682 # fast parentrevs(rev) where rev isn't filtered
683 683 _uncheckedparentrevs = parentrevs
684 684
685 685 def node(self, rev):
686 686 try:
687 687 return self.index[rev][7]
688 688 except IndexError:
689 689 if rev == wdirrev:
690 690 raise error.WdirUnsupported
691 691 raise
692 692
693 693 # Derived from index values.
694 694
695 695 def end(self, rev):
696 696 return self.start(rev) + self.length(rev)
697 697
698 698 def parents(self, node):
699 699 i = self.index
700 700 d = i[self.rev(node)]
701 701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
702 702
703 703 def chainlen(self, rev):
704 704 return self._chaininfo(rev)[0]
705 705
706 706 def _chaininfo(self, rev):
707 707 chaininfocache = self._chaininfocache
708 708 if rev in chaininfocache:
709 709 return chaininfocache[rev]
710 710 index = self.index
711 711 generaldelta = self._generaldelta
712 712 iterrev = rev
713 713 e = index[iterrev]
714 714 clen = 0
715 715 compresseddeltalen = 0
716 716 while iterrev != e[3]:
717 717 clen += 1
718 718 compresseddeltalen += e[1]
719 719 if generaldelta:
720 720 iterrev = e[3]
721 721 else:
722 722 iterrev -= 1
723 723 if iterrev in chaininfocache:
724 724 t = chaininfocache[iterrev]
725 725 clen += t[0]
726 726 compresseddeltalen += t[1]
727 727 break
728 728 e = index[iterrev]
729 729 else:
730 730 # Add text length of base since decompressing that also takes
731 731 # work. For cache hits the length is already included.
732 732 compresseddeltalen += e[1]
733 733 r = (clen, compresseddeltalen)
734 734 chaininfocache[rev] = r
735 735 return r
736 736
737 737 def _deltachain(self, rev, stoprev=None):
738 738 """Obtain the delta chain for a revision.
739 739
740 740 ``stoprev`` specifies a revision to stop at. If not specified, we
741 741 stop at the base of the chain.
742 742
743 743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
744 744 revs in ascending order and ``stopped`` is a bool indicating whether
745 745 ``stoprev`` was hit.
746 746 """
747 747 # Try C implementation.
748 748 try:
749 749 return self.index.deltachain(rev, stoprev, self._generaldelta)
750 750 except AttributeError:
751 751 pass
752 752
753 753 chain = []
754 754
755 755 # Alias to prevent attribute lookup in tight loop.
756 756 index = self.index
757 757 generaldelta = self._generaldelta
758 758
759 759 iterrev = rev
760 760 e = index[iterrev]
761 761 while iterrev != e[3] and iterrev != stoprev:
762 762 chain.append(iterrev)
763 763 if generaldelta:
764 764 iterrev = e[3]
765 765 else:
766 766 iterrev -= 1
767 767 e = index[iterrev]
768 768
769 769 if iterrev == stoprev:
770 770 stopped = True
771 771 else:
772 772 chain.append(iterrev)
773 773 stopped = False
774 774
775 775 chain.reverse()
776 776 return chain, stopped
777 777
778 778 def ancestors(self, revs, stoprev=0, inclusive=False):
779 779 """Generate the ancestors of 'revs' in reverse revision order.
780 780 Does not generate revs lower than stoprev.
781 781
782 782 See the documentation for ancestor.lazyancestors for more details."""
783 783
784 784 # first, make sure start revisions aren't filtered
785 785 revs = list(revs)
786 786 checkrev = self.node
787 787 for r in revs:
788 788 checkrev(r)
789 789 # and we're sure ancestors aren't filtered as well
790 790
791 791 if rustancestor is not None:
792 792 lazyancestors = rustancestor.LazyAncestors
793 793 arg = self.index
794 794 elif util.safehasattr(parsers, 'rustlazyancestors'):
795 795 lazyancestors = ancestor.rustlazyancestors
796 796 arg = self.index
797 797 else:
798 798 lazyancestors = ancestor.lazyancestors
799 799 arg = self._uncheckedparentrevs
800 800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
801 801
802 802 def descendants(self, revs):
803 803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
804 804
805 805 def findcommonmissing(self, common=None, heads=None):
806 806 """Return a tuple of the ancestors of common and the ancestors of heads
807 807 that are not ancestors of common. In revset terminology, we return the
808 808 tuple:
809 809
810 810 ::common, (::heads) - (::common)
811 811
812 812 The list is sorted by revision number, meaning it is
813 813 topologically sorted.
814 814
815 815 'heads' and 'common' are both lists of node IDs. If heads is
816 816 not supplied, uses all of the revlog's heads. If common is not
817 817 supplied, uses nullid."""
818 818 if common is None:
819 819 common = [nullid]
820 820 if heads is None:
821 821 heads = self.heads()
822 822
823 823 common = [self.rev(n) for n in common]
824 824 heads = [self.rev(n) for n in heads]
825 825
826 826 # we want the ancestors, but inclusive
827 827 class lazyset(object):
828 828 def __init__(self, lazyvalues):
829 829 self.addedvalues = set()
830 830 self.lazyvalues = lazyvalues
831 831
832 832 def __contains__(self, value):
833 833 return value in self.addedvalues or value in self.lazyvalues
834 834
835 835 def __iter__(self):
836 836 added = self.addedvalues
837 837 for r in added:
838 838 yield r
839 839 for r in self.lazyvalues:
840 840 if not r in added:
841 841 yield r
842 842
843 843 def add(self, value):
844 844 self.addedvalues.add(value)
845 845
846 846 def update(self, values):
847 847 self.addedvalues.update(values)
848 848
849 849 has = lazyset(self.ancestors(common))
850 850 has.add(nullrev)
851 851 has.update(common)
852 852
853 853 # take all ancestors from heads that aren't in has
854 854 missing = set()
855 855 visit = collections.deque(r for r in heads if r not in has)
856 856 while visit:
857 857 r = visit.popleft()
858 858 if r in missing:
859 859 continue
860 860 else:
861 861 missing.add(r)
862 862 for p in self.parentrevs(r):
863 863 if p not in has:
864 864 visit.append(p)
865 865 missing = list(missing)
866 866 missing.sort()
867 867 return has, [self.node(miss) for miss in missing]
868 868
869 869 def incrementalmissingrevs(self, common=None):
870 870 """Return an object that can be used to incrementally compute the
871 871 revision numbers of the ancestors of arbitrary sets that are not
872 872 ancestors of common. This is an ancestor.incrementalmissingancestors
873 873 object.
874 874
875 875 'common' is a list of revision numbers. If common is not supplied, uses
876 876 nullrev.
877 877 """
878 878 if common is None:
879 879 common = [nullrev]
880 880
881 881 if rustancestor is not None:
882 882 return rustancestor.MissingAncestors(self.index, common)
883 883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
884 884
885 885 def findmissingrevs(self, common=None, heads=None):
886 886 """Return the revision numbers of the ancestors of heads that
887 887 are not ancestors of common.
888 888
889 889 More specifically, return a list of revision numbers corresponding to
890 890 nodes N such that every N satisfies the following constraints:
891 891
892 892 1. N is an ancestor of some node in 'heads'
893 893 2. N is not an ancestor of any node in 'common'
894 894
895 895 The list is sorted by revision number, meaning it is
896 896 topologically sorted.
897 897
898 898 'heads' and 'common' are both lists of revision numbers. If heads is
899 899 not supplied, uses all of the revlog's heads. If common is not
900 900 supplied, uses nullid."""
901 901 if common is None:
902 902 common = [nullrev]
903 903 if heads is None:
904 904 heads = self.headrevs()
905 905
906 906 inc = self.incrementalmissingrevs(common=common)
907 907 return inc.missingancestors(heads)
908 908
909 909 def findmissing(self, common=None, heads=None):
910 910 """Return the ancestors of heads that are not ancestors of common.
911 911
912 912 More specifically, return a list of nodes N such that every N
913 913 satisfies the following constraints:
914 914
915 915 1. N is an ancestor of some node in 'heads'
916 916 2. N is not an ancestor of any node in 'common'
917 917
918 918 The list is sorted by revision number, meaning it is
919 919 topologically sorted.
920 920
921 921 'heads' and 'common' are both lists of node IDs. If heads is
922 922 not supplied, uses all of the revlog's heads. If common is not
923 923 supplied, uses nullid."""
924 924 if common is None:
925 925 common = [nullid]
926 926 if heads is None:
927 927 heads = self.heads()
928 928
929 929 common = [self.rev(n) for n in common]
930 930 heads = [self.rev(n) for n in heads]
931 931
932 932 inc = self.incrementalmissingrevs(common=common)
933 933 return [self.node(r) for r in inc.missingancestors(heads)]
934 934
935 935 def nodesbetween(self, roots=None, heads=None):
936 936 """Return a topological path from 'roots' to 'heads'.
937 937
938 938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
939 939 topologically sorted list of all nodes N that satisfy both of
940 940 these constraints:
941 941
942 942 1. N is a descendant of some node in 'roots'
943 943 2. N is an ancestor of some node in 'heads'
944 944
945 945 Every node is considered to be both a descendant and an ancestor
946 946 of itself, so every reachable node in 'roots' and 'heads' will be
947 947 included in 'nodes'.
948 948
949 949 'outroots' is the list of reachable nodes in 'roots', i.e., the
950 950 subset of 'roots' that is returned in 'nodes'. Likewise,
951 951 'outheads' is the subset of 'heads' that is also in 'nodes'.
952 952
953 953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
954 954 unspecified, uses nullid as the only root. If 'heads' is
955 955 unspecified, uses list of all of the revlog's heads."""
956 956 nonodes = ([], [], [])
957 957 if roots is not None:
958 958 roots = list(roots)
959 959 if not roots:
960 960 return nonodes
961 961 lowestrev = min([self.rev(n) for n in roots])
962 962 else:
963 963 roots = [nullid] # Everybody's a descendant of nullid
964 964 lowestrev = nullrev
965 965 if (lowestrev == nullrev) and (heads is None):
966 966 # We want _all_ the nodes!
967 967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
968 968 if heads is None:
969 969 # All nodes are ancestors, so the latest ancestor is the last
970 970 # node.
971 971 highestrev = len(self) - 1
972 972 # Set ancestors to None to signal that every node is an ancestor.
973 973 ancestors = None
974 974 # Set heads to an empty dictionary for later discovery of heads
975 975 heads = {}
976 976 else:
977 977 heads = list(heads)
978 978 if not heads:
979 979 return nonodes
980 980 ancestors = set()
981 981 # Turn heads into a dictionary so we can remove 'fake' heads.
982 982 # Also, later we will be using it to filter out the heads we can't
983 983 # find from roots.
984 984 heads = dict.fromkeys(heads, False)
985 985 # Start at the top and keep marking parents until we're done.
986 986 nodestotag = set(heads)
987 987 # Remember where the top was so we can use it as a limit later.
988 988 highestrev = max([self.rev(n) for n in nodestotag])
989 989 while nodestotag:
990 990 # grab a node to tag
991 991 n = nodestotag.pop()
992 992 # Never tag nullid
993 993 if n == nullid:
994 994 continue
995 995 # A node's revision number represents its place in a
996 996 # topologically sorted list of nodes.
997 997 r = self.rev(n)
998 998 if r >= lowestrev:
999 999 if n not in ancestors:
1000 1000 # If we are possibly a descendant of one of the roots
1001 1001 # and we haven't already been marked as an ancestor
1002 1002 ancestors.add(n) # Mark as ancestor
1003 1003 # Add non-nullid parents to list of nodes to tag.
1004 1004 nodestotag.update([p for p in self.parents(n) if
1005 1005 p != nullid])
1006 1006 elif n in heads: # We've seen it before, is it a fake head?
1007 1007 # So it is, real heads should not be the ancestors of
1008 1008 # any other heads.
1009 1009 heads.pop(n)
1010 1010 if not ancestors:
1011 1011 return nonodes
1012 1012 # Now that we have our set of ancestors, we want to remove any
1013 1013 # roots that are not ancestors.
1014 1014
1015 1015 # If one of the roots was nullid, everything is included anyway.
1016 1016 if lowestrev > nullrev:
1017 1017 # But, since we weren't, let's recompute the lowest rev to not
1018 1018 # include roots that aren't ancestors.
1019 1019
1020 1020 # Filter out roots that aren't ancestors of heads
1021 1021 roots = [root for root in roots if root in ancestors]
1022 1022 # Recompute the lowest revision
1023 1023 if roots:
1024 1024 lowestrev = min([self.rev(root) for root in roots])
1025 1025 else:
1026 1026 # No more roots? Return empty list
1027 1027 return nonodes
1028 1028 else:
1029 1029 # We are descending from nullid, and don't need to care about
1030 1030 # any other roots.
1031 1031 lowestrev = nullrev
1032 1032 roots = [nullid]
1033 1033 # Transform our roots list into a set.
1034 1034 descendants = set(roots)
1035 1035 # Also, keep the original roots so we can filter out roots that aren't
1036 1036 # 'real' roots (i.e. are descended from other roots).
1037 1037 roots = descendants.copy()
1038 1038 # Our topologically sorted list of output nodes.
1039 1039 orderedout = []
1040 1040 # Don't start at nullid since we don't want nullid in our output list,
1041 1041 # and if nullid shows up in descendants, empty parents will look like
1042 1042 # they're descendants.
1043 1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1044 1044 n = self.node(r)
1045 1045 isdescendant = False
1046 1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1047 1047 isdescendant = True
1048 1048 elif n in descendants:
1049 1049 # n is already a descendant
1050 1050 isdescendant = True
1051 1051 # This check only needs to be done here because all the roots
1052 1052 # will start being marked is descendants before the loop.
1053 1053 if n in roots:
1054 1054 # If n was a root, check if it's a 'real' root.
1055 1055 p = tuple(self.parents(n))
1056 1056 # If any of its parents are descendants, it's not a root.
1057 1057 if (p[0] in descendants) or (p[1] in descendants):
1058 1058 roots.remove(n)
1059 1059 else:
1060 1060 p = tuple(self.parents(n))
1061 1061 # A node is a descendant if either of its parents are
1062 1062 # descendants. (We seeded the dependents list with the roots
1063 1063 # up there, remember?)
1064 1064 if (p[0] in descendants) or (p[1] in descendants):
1065 1065 descendants.add(n)
1066 1066 isdescendant = True
1067 1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1068 1068 # Only include nodes that are both descendants and ancestors.
1069 1069 orderedout.append(n)
1070 1070 if (ancestors is not None) and (n in heads):
1071 1071 # We're trying to figure out which heads are reachable
1072 1072 # from roots.
1073 1073 # Mark this head as having been reached
1074 1074 heads[n] = True
1075 1075 elif ancestors is None:
1076 1076 # Otherwise, we're trying to discover the heads.
1077 1077 # Assume this is a head because if it isn't, the next step
1078 1078 # will eventually remove it.
1079 1079 heads[n] = True
1080 1080 # But, obviously its parents aren't.
1081 1081 for p in self.parents(n):
1082 1082 heads.pop(p, None)
1083 1083 heads = [head for head, flag in heads.iteritems() if flag]
1084 1084 roots = list(roots)
1085 1085 assert orderedout
1086 1086 assert roots
1087 1087 assert heads
1088 1088 return (orderedout, roots, heads)
1089 1089
1090 1090 def headrevs(self, revs=None):
1091 1091 if revs is None:
1092 1092 try:
1093 1093 return self.index.headrevs()
1094 1094 except AttributeError:
1095 1095 return self._headrevs()
1096 1096 if rustdagop is not None:
1097 1097 return rustdagop.headrevs(self.index, revs)
1098 1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1099 1099
1100 1100 def computephases(self, roots):
1101 1101 return self.index.computephasesmapsets(roots)
1102 1102
1103 1103 def _headrevs(self):
1104 1104 count = len(self)
1105 1105 if not count:
1106 1106 return [nullrev]
1107 1107 # we won't iter over filtered rev so nobody is a head at start
1108 1108 ishead = [0] * (count + 1)
1109 1109 index = self.index
1110 1110 for r in self:
1111 1111 ishead[r] = 1 # I may be an head
1112 1112 e = index[r]
1113 1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1114 1114 return [r for r, val in enumerate(ishead) if val]
1115 1115
1116 1116 def heads(self, start=None, stop=None):
1117 1117 """return the list of all nodes that have no children
1118 1118
1119 1119 if start is specified, only heads that are descendants of
1120 1120 start will be returned
1121 1121 if stop is specified, it will consider all the revs from stop
1122 1122 as if they had no children
1123 1123 """
1124 1124 if start is None and stop is None:
1125 1125 if not len(self):
1126 1126 return [nullid]
1127 1127 return [self.node(r) for r in self.headrevs()]
1128 1128
1129 1129 if start is None:
1130 1130 start = nullrev
1131 1131 else:
1132 1132 start = self.rev(start)
1133 1133
1134 1134 stoprevs = set(self.rev(n) for n in stop or [])
1135 1135
1136 1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1137 1137 stoprevs=stoprevs)
1138 1138
1139 1139 return [self.node(rev) for rev in revs]
1140 1140
1141 1141 def children(self, node):
1142 1142 """find the children of a given node"""
1143 1143 c = []
1144 1144 p = self.rev(node)
1145 1145 for r in self.revs(start=p + 1):
1146 1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1147 1147 if prevs:
1148 1148 for pr in prevs:
1149 1149 if pr == p:
1150 1150 c.append(self.node(r))
1151 1151 elif p == nullrev:
1152 1152 c.append(self.node(r))
1153 1153 return c
1154 1154
1155 1155 def commonancestorsheads(self, a, b):
1156 1156 """calculate all the heads of the common ancestors of nodes a and b"""
1157 1157 a, b = self.rev(a), self.rev(b)
1158 1158 ancs = self._commonancestorsheads(a, b)
1159 1159 return pycompat.maplist(self.node, ancs)
1160 1160
1161 1161 def _commonancestorsheads(self, *revs):
1162 1162 """calculate all the heads of the common ancestors of revs"""
1163 1163 try:
1164 1164 ancs = self.index.commonancestorsheads(*revs)
1165 1165 except (AttributeError, OverflowError): # C implementation failed
1166 1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1167 1167 return ancs
1168 1168
1169 1169 def isancestor(self, a, b):
1170 1170 """return True if node a is an ancestor of node b
1171 1171
1172 1172 A revision is considered an ancestor of itself."""
1173 1173 a, b = self.rev(a), self.rev(b)
1174 1174 return self.isancestorrev(a, b)
1175 1175
1176 1176 def isancestorrev(self, a, b):
1177 1177 """return True if revision a is an ancestor of revision b
1178 1178
1179 1179 A revision is considered an ancestor of itself.
1180 1180
1181 1181 The implementation of this is trivial but the use of
1182 1182 reachableroots is not."""
1183 1183 if a == nullrev:
1184 1184 return True
1185 1185 elif a == b:
1186 1186 return True
1187 1187 elif a > b:
1188 1188 return False
1189 1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1190 1190
1191 1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1192 1192 """return (heads(::<roots> and <roots>::<heads>))
1193 1193
1194 1194 If includepath is True, return (<roots>::<heads>)."""
1195 1195 try:
1196 1196 return self.index.reachableroots2(minroot, heads, roots,
1197 1197 includepath)
1198 1198 except AttributeError:
1199 1199 return dagop._reachablerootspure(self.parentrevs,
1200 1200 minroot, roots, heads, includepath)
1201 1201
1202 1202 def ancestor(self, a, b):
1203 1203 """calculate the "best" common ancestor of nodes a and b"""
1204 1204
1205 1205 a, b = self.rev(a), self.rev(b)
1206 1206 try:
1207 1207 ancs = self.index.ancestors(a, b)
1208 1208 except (AttributeError, OverflowError):
1209 1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 1210 if ancs:
1211 1211 # choose a consistent winner when there's a tie
1212 1212 return min(map(self.node, ancs))
1213 1213 return nullid
1214 1214
1215 1215 def _match(self, id):
1216 1216 if isinstance(id, int):
1217 1217 # rev
1218 1218 return self.node(id)
1219 1219 if len(id) == 20:
1220 1220 # possibly a binary node
1221 1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 1222 try:
1223 1223 node = id
1224 1224 self.rev(node) # quick search the index
1225 1225 return node
1226 1226 except error.LookupError:
1227 1227 pass # may be partial hex id
1228 1228 try:
1229 1229 # str(rev)
1230 1230 rev = int(id)
1231 1231 if "%d" % rev != id:
1232 1232 raise ValueError
1233 1233 if rev < 0:
1234 1234 rev = len(self) + rev
1235 1235 if rev < 0 or rev >= len(self):
1236 1236 raise ValueError
1237 1237 return self.node(rev)
1238 1238 except (ValueError, OverflowError):
1239 1239 pass
1240 1240 if len(id) == 40:
1241 1241 try:
1242 1242 # a full hex nodeid?
1243 1243 node = bin(id)
1244 1244 self.rev(node)
1245 1245 return node
1246 1246 except (TypeError, error.LookupError):
1247 1247 pass
1248 1248
1249 1249 def _partialmatch(self, id):
1250 1250 # we don't care wdirfilenodeids as they should be always full hash
1251 1251 maybewdir = wdirhex.startswith(id)
1252 1252 try:
1253 1253 partial = self.index.partialmatch(id)
1254 1254 if partial and self.hasnode(partial):
1255 1255 if maybewdir:
1256 1256 # single 'ff...' match in radix tree, ambiguous with wdir
1257 1257 raise error.RevlogError
1258 1258 return partial
1259 1259 if maybewdir:
1260 1260 # no 'ff...' match in radix tree, wdir identified
1261 1261 raise error.WdirUnsupported
1262 1262 return None
1263 1263 except error.RevlogError:
1264 1264 # parsers.c radix tree lookup gave multiple matches
1265 1265 # fast path: for unfiltered changelog, radix tree is accurate
1266 1266 if not getattr(self, 'filteredrevs', None):
1267 1267 raise error.AmbiguousPrefixLookupError(
1268 1268 id, self.indexfile, _('ambiguous identifier'))
1269 1269 # fall through to slow path that filters hidden revisions
1270 1270 except (AttributeError, ValueError):
1271 1271 # we are pure python, or key was too short to search radix tree
1272 1272 pass
1273 1273
1274 1274 if id in self._pcache:
1275 1275 return self._pcache[id]
1276 1276
1277 1277 if len(id) <= 40:
1278 1278 try:
1279 1279 # hex(node)[:...]
1280 1280 l = len(id) // 2 # grab an even number of digits
1281 1281 prefix = bin(id[:l * 2])
1282 1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 1283 nl = [n for n in nl if hex(n).startswith(id) and
1284 1284 self.hasnode(n)]
1285 1285 if nullhex.startswith(id):
1286 1286 nl.append(nullid)
1287 1287 if len(nl) > 0:
1288 1288 if len(nl) == 1 and not maybewdir:
1289 1289 self._pcache[id] = nl[0]
1290 1290 return nl[0]
1291 1291 raise error.AmbiguousPrefixLookupError(
1292 1292 id, self.indexfile, _('ambiguous identifier'))
1293 1293 if maybewdir:
1294 1294 raise error.WdirUnsupported
1295 1295 return None
1296 1296 except TypeError:
1297 1297 pass
1298 1298
1299 1299 def lookup(self, id):
1300 1300 """locate a node based on:
1301 1301 - revision number or str(revision number)
1302 1302 - nodeid or subset of hex nodeid
1303 1303 """
1304 1304 n = self._match(id)
1305 1305 if n is not None:
1306 1306 return n
1307 1307 n = self._partialmatch(id)
1308 1308 if n:
1309 1309 return n
1310 1310
1311 1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1312 1312
1313 1313 def shortest(self, node, minlength=1):
1314 1314 """Find the shortest unambiguous prefix that matches node."""
1315 1315 def isvalid(prefix):
1316 1316 try:
1317 1317 matchednode = self._partialmatch(prefix)
1318 1318 except error.AmbiguousPrefixLookupError:
1319 1319 return False
1320 1320 except error.WdirUnsupported:
1321 1321 # single 'ff...' match
1322 1322 return True
1323 1323 if matchednode is None:
1324 1324 raise error.LookupError(node, self.indexfile, _('no node'))
1325 1325 return True
1326 1326
1327 1327 def maybewdir(prefix):
1328 1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1329 1329
1330 1330 hexnode = hex(node)
1331 1331
1332 1332 def disambiguate(hexnode, minlength):
1333 1333 """Disambiguate against wdirid."""
1334 1334 for length in range(minlength, 41):
1335 1335 prefix = hexnode[:length]
1336 1336 if not maybewdir(prefix):
1337 1337 return prefix
1338 1338
1339 1339 if not getattr(self, 'filteredrevs', None):
1340 1340 try:
1341 1341 length = max(self.index.shortest(node), minlength)
1342 1342 return disambiguate(hexnode, length)
1343 1343 except error.RevlogError:
1344 1344 if node != wdirid:
1345 1345 raise error.LookupError(node, self.indexfile, _('no node'))
1346 1346 except AttributeError:
1347 1347 # Fall through to pure code
1348 1348 pass
1349 1349
1350 1350 if node == wdirid:
1351 1351 for length in range(minlength, 41):
1352 1352 prefix = hexnode[:length]
1353 1353 if isvalid(prefix):
1354 1354 return prefix
1355 1355
1356 1356 for length in range(minlength, 41):
1357 1357 prefix = hexnode[:length]
1358 1358 if isvalid(prefix):
1359 1359 return disambiguate(hexnode, length)
1360 1360
1361 1361 def cmp(self, node, text):
1362 1362 """compare text with a given file revision
1363 1363
1364 1364 returns True if text is different than what is stored.
1365 1365 """
1366 1366 p1, p2 = self.parents(node)
1367 1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368 1368
1369 1369 def _cachesegment(self, offset, data):
1370 1370 """Add a segment to the revlog cache.
1371 1371
1372 1372 Accepts an absolute offset and the data that is at that location.
1373 1373 """
1374 1374 o, d = self._chunkcache
1375 1375 # try to add to existing cache
1376 1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 1377 self._chunkcache = o, d + data
1378 1378 else:
1379 1379 self._chunkcache = offset, data
1380 1380
1381 1381 def _readsegment(self, offset, length, df=None):
1382 1382 """Load a segment of raw data from the revlog.
1383 1383
1384 1384 Accepts an absolute offset, length to read, and an optional existing
1385 1385 file handle to read from.
1386 1386
1387 1387 If an existing file handle is passed, it will be seeked and the
1388 1388 original seek position will NOT be restored.
1389 1389
1390 1390 Returns a str or buffer of raw byte data.
1391 1391
1392 1392 Raises if the requested number of bytes could not be read.
1393 1393 """
1394 1394 # Cache data both forward and backward around the requested
1395 1395 # data, in a fixed size window. This helps speed up operations
1396 1396 # involving reading the revlog backwards.
1397 1397 cachesize = self._chunkcachesize
1398 1398 realoffset = offset & ~(cachesize - 1)
1399 1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 1400 - realoffset)
1401 1401 with self._datareadfp(df) as df:
1402 1402 df.seek(realoffset)
1403 1403 d = df.read(reallength)
1404 1404
1405 1405 self._cachesegment(realoffset, d)
1406 1406 if offset != realoffset or reallength != length:
1407 1407 startoffset = offset - realoffset
1408 1408 if len(d) - startoffset < length:
1409 1409 raise error.RevlogError(
1410 1410 _('partial read of revlog %s; expected %d bytes from '
1411 1411 'offset %d, got %d') %
1412 1412 (self.indexfile if self._inline else self.datafile,
1413 1413 length, realoffset, len(d) - startoffset))
1414 1414
1415 1415 return util.buffer(d, startoffset, length)
1416 1416
1417 1417 if len(d) < length:
1418 1418 raise error.RevlogError(
1419 1419 _('partial read of revlog %s; expected %d bytes from offset '
1420 1420 '%d, got %d') %
1421 1421 (self.indexfile if self._inline else self.datafile,
1422 1422 length, offset, len(d)))
1423 1423
1424 1424 return d
1425 1425
1426 1426 def _getsegment(self, offset, length, df=None):
1427 1427 """Obtain a segment of raw data from the revlog.
1428 1428
1429 1429 Accepts an absolute offset, length of bytes to obtain, and an
1430 1430 optional file handle to the already-opened revlog. If the file
1431 1431 handle is used, it's original seek position will not be preserved.
1432 1432
1433 1433 Requests for data may be returned from a cache.
1434 1434
1435 1435 Returns a str or a buffer instance of raw byte data.
1436 1436 """
1437 1437 o, d = self._chunkcache
1438 1438 l = len(d)
1439 1439
1440 1440 # is it in the cache?
1441 1441 cachestart = offset - o
1442 1442 cacheend = cachestart + length
1443 1443 if cachestart >= 0 and cacheend <= l:
1444 1444 if cachestart == 0 and cacheend == l:
1445 1445 return d # avoid a copy
1446 1446 return util.buffer(d, cachestart, cacheend - cachestart)
1447 1447
1448 1448 return self._readsegment(offset, length, df=df)
1449 1449
1450 1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 1451 """Obtain a segment of raw data corresponding to a range of revisions.
1452 1452
1453 1453 Accepts the start and end revisions and an optional already-open
1454 1454 file handle to be used for reading. If the file handle is read, its
1455 1455 seek position will not be preserved.
1456 1456
1457 1457 Requests for data may be satisfied by a cache.
1458 1458
1459 1459 Returns a 2-tuple of (offset, data) for the requested range of
1460 1460 revisions. Offset is the integer offset from the beginning of the
1461 1461 revlog and data is a str or buffer of the raw byte data.
1462 1462
1463 1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 1464 to determine where each revision's data begins and ends.
1465 1465 """
1466 1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 1467 # (functions are expensive).
1468 1468 index = self.index
1469 1469 istart = index[startrev]
1470 1470 start = int(istart[0] >> 16)
1471 1471 if startrev == endrev:
1472 1472 end = start + istart[1]
1473 1473 else:
1474 1474 iend = index[endrev]
1475 1475 end = int(iend[0] >> 16) + iend[1]
1476 1476
1477 1477 if self._inline:
1478 1478 start += (startrev + 1) * self._io.size
1479 1479 end += (endrev + 1) * self._io.size
1480 1480 length = end - start
1481 1481
1482 1482 return start, self._getsegment(start, length, df=df)
1483 1483
1484 1484 def _chunk(self, rev, df=None):
1485 1485 """Obtain a single decompressed chunk for a revision.
1486 1486
1487 1487 Accepts an integer revision and an optional already-open file handle
1488 1488 to be used for reading. If used, the seek position of the file will not
1489 1489 be preserved.
1490 1490
1491 1491 Returns a str holding uncompressed data for the requested revision.
1492 1492 """
1493 1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494 1494
1495 1495 def _chunks(self, revs, df=None, targetsize=None):
1496 1496 """Obtain decompressed chunks for the specified revisions.
1497 1497
1498 1498 Accepts an iterable of numeric revisions that are assumed to be in
1499 1499 ascending order. Also accepts an optional already-open file handle
1500 1500 to be used for reading. If used, the seek position of the file will
1501 1501 not be preserved.
1502 1502
1503 1503 This function is similar to calling ``self._chunk()`` multiple times,
1504 1504 but is faster.
1505 1505
1506 1506 Returns a list with decompressed data for each requested revision.
1507 1507 """
1508 1508 if not revs:
1509 1509 return []
1510 1510 start = self.start
1511 1511 length = self.length
1512 1512 inline = self._inline
1513 1513 iosize = self._io.size
1514 1514 buffer = util.buffer
1515 1515
1516 1516 l = []
1517 1517 ladd = l.append
1518 1518
1519 1519 if not self._withsparseread:
1520 1520 slicedchunks = (revs,)
1521 1521 else:
1522 1522 slicedchunks = deltautil.slicechunk(self, revs,
1523 1523 targetsize=targetsize)
1524 1524
1525 1525 for revschunk in slicedchunks:
1526 1526 firstrev = revschunk[0]
1527 1527 # Skip trailing revisions with empty diff
1528 1528 for lastrev in revschunk[::-1]:
1529 1529 if length(lastrev) != 0:
1530 1530 break
1531 1531
1532 1532 try:
1533 1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 1534 except OverflowError:
1535 1535 # issue4215 - we can't cache a run of chunks greater than
1536 1536 # 2G on Windows
1537 1537 return [self._chunk(rev, df=df) for rev in revschunk]
1538 1538
1539 1539 decomp = self.decompress
1540 1540 for rev in revschunk:
1541 1541 chunkstart = start(rev)
1542 1542 if inline:
1543 1543 chunkstart += (rev + 1) * iosize
1544 1544 chunklength = length(rev)
1545 1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546 1546
1547 1547 return l
1548 1548
1549 1549 def _chunkclear(self):
1550 1550 """Clear the raw chunk cache."""
1551 1551 self._chunkcache = (0, '')
1552 1552
1553 1553 def deltaparent(self, rev):
1554 1554 """return deltaparent of the given revision"""
1555 1555 base = self.index[rev][3]
1556 1556 if base == rev:
1557 1557 return nullrev
1558 1558 elif self._generaldelta:
1559 1559 return base
1560 1560 else:
1561 1561 return rev - 1
1562 1562
1563 1563 def issnapshot(self, rev):
1564 1564 """tells whether rev is a snapshot
1565 1565 """
1566 1566 if not self._sparserevlog:
1567 1567 return self.deltaparent(rev) == nullrev
1568 1568 elif util.safehasattr(self.index, 'issnapshot'):
1569 1569 # directly assign the method to cache the testing and access
1570 1570 self.issnapshot = self.index.issnapshot
1571 1571 return self.issnapshot(rev)
1572 1572 if rev == nullrev:
1573 1573 return True
1574 1574 entry = self.index[rev]
1575 1575 base = entry[3]
1576 1576 if base == rev:
1577 1577 return True
1578 1578 if base == nullrev:
1579 1579 return True
1580 1580 p1 = entry[5]
1581 1581 p2 = entry[6]
1582 1582 if base == p1 or base == p2:
1583 1583 return False
1584 1584 return self.issnapshot(base)
1585 1585
1586 1586 def snapshotdepth(self, rev):
1587 1587 """number of snapshot in the chain before this one"""
1588 1588 if not self.issnapshot(rev):
1589 1589 raise error.ProgrammingError('revision %d not a snapshot')
1590 1590 return len(self._deltachain(rev)[0]) - 1
1591 1591
1592 1592 def revdiff(self, rev1, rev2):
1593 1593 """return or calculate a delta between two revisions
1594 1594
1595 1595 The delta calculated is in binary form and is intended to be written to
1596 1596 revlog data directly. So this function needs raw revision data.
1597 1597 """
1598 1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 1599 return bytes(self._chunk(rev2))
1600 1600
1601 1601 return mdiff.textdiff(self.rawdata(rev1),
1602 1602 self.rawdata(rev2))
1603 1603
1604 1604 def revision(self, nodeorrev, _df=None, raw=False):
1605 1605 """return an uncompressed revision of a given node or revision
1606 1606 number.
1607 1607
1608 1608 _df - an existing file handle to read from. (internal-only)
1609 1609 raw - an optional argument specifying if the revision data is to be
1610 1610 treated as raw data when applying flag transforms. 'raw' should be set
1611 1611 to True when generating changegroups or in debug commands.
1612 1612 """
1613 1613 if raw:
1614 1614 msg = ('revlog.revision(..., raw=True) is deprecated, '
1615 1615 'use revlog.rawdata(...)')
1616 1616 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1617 1617 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1618 1618
1619 1619 def sidedata(self, nodeorrev, _df=None):
1620 1620 """a map of extra data related to the changeset but not part of the hash
1621 1621
1622 1622 This function currently return a dictionary. However, more advanced
1623 1623 mapping object will likely be used in the future for a more
1624 1624 efficient/lazy code.
1625 1625 """
1626 1626 return self._revisiondata(nodeorrev, _df)[1]
1627 1627
1628 1628 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1629 1629 # deal with <nodeorrev> argument type
1630 1630 if isinstance(nodeorrev, int):
1631 1631 rev = nodeorrev
1632 1632 node = self.node(rev)
1633 1633 else:
1634 1634 node = nodeorrev
1635 1635 rev = None
1636 1636
1637 1637 # fast path the special `nullid` rev
1638 1638 if node == nullid:
1639 1639 return "", {}
1640 1640
1641 1641 # The text as stored inside the revlog. Might be the revision or might
1642 1642 # need to be processed to retrieve the revision.
1643 1643 rawtext = None
1644 1644
1645 1645 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1646 1646
1647 1647 if raw and validated:
1648 1648 # if we don't want to process the raw text and that raw
1649 1649 # text is cached, we can exit early.
1650 1650 return rawtext, {}
1651 1651 if rev is None:
1652 1652 rev = self.rev(node)
1653 1653 # the revlog's flag for this revision
1654 1654 # (usually alter its state or content)
1655 1655 flags = self.flags(rev)
1656 1656
1657 1657 if validated and flags == REVIDX_DEFAULT_FLAGS:
1658 1658 # no extra flags set, no flag processor runs, text = rawtext
1659 1659 return rawtext, {}
1660 1660
1661 1661 sidedata = {}
1662 1662 if raw:
1663 validatehash = self._processflagsraw(rawtext, flags)
1663 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1664 1664 text = rawtext
1665 1665 else:
1666 1666 r = flagutil.processflagsread(self, rawtext, flags)
1667 1667 text, validatehash, sidedata = r
1668 1668 if validatehash:
1669 1669 self.checkhash(text, node, rev=rev)
1670 1670 if not validated:
1671 1671 self._revisioncache = (node, rev, rawtext)
1672 1672
1673 1673 return text, sidedata
1674 1674
1675 1675 def _rawtext(self, node, rev, _df=None):
1676 1676 """return the possibly unvalidated rawtext for a revision
1677 1677
1678 1678 returns (rev, rawtext, validated)
1679 1679 """
1680 1680
1681 1681 # revision in the cache (could be useful to apply delta)
1682 1682 cachedrev = None
1683 1683 # An intermediate text to apply deltas to
1684 1684 basetext = None
1685 1685
1686 1686 # Check if we have the entry in cache
1687 1687 # The cache entry looks like (node, rev, rawtext)
1688 1688 if self._revisioncache:
1689 1689 if self._revisioncache[0] == node:
1690 1690 return (rev, self._revisioncache[2], True)
1691 1691 cachedrev = self._revisioncache[1]
1692 1692
1693 1693 if rev is None:
1694 1694 rev = self.rev(node)
1695 1695
1696 1696 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1697 1697 if stopped:
1698 1698 basetext = self._revisioncache[2]
1699 1699
1700 1700 # drop cache to save memory, the caller is expected to
1701 1701 # update self._revisioncache after validating the text
1702 1702 self._revisioncache = None
1703 1703
1704 1704 targetsize = None
1705 1705 rawsize = self.index[rev][2]
1706 1706 if 0 <= rawsize:
1707 1707 targetsize = 4 * rawsize
1708 1708
1709 1709 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1710 1710 if basetext is None:
1711 1711 basetext = bytes(bins[0])
1712 1712 bins = bins[1:]
1713 1713
1714 1714 rawtext = mdiff.patches(basetext, bins)
1715 1715 del basetext # let us have a chance to free memory early
1716 1716 return (rev, rawtext, False)
1717 1717
1718 1718 def rawdata(self, nodeorrev, _df=None):
1719 1719 """return an uncompressed raw data of a given node or revision number.
1720 1720
1721 1721 _df - an existing file handle to read from. (internal-only)
1722 1722 """
1723 1723 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1724 1724
1725 1725 def hash(self, text, p1, p2):
1726 1726 """Compute a node hash.
1727 1727
1728 1728 Available as a function so that subclasses can replace the hash
1729 1729 as needed.
1730 1730 """
1731 1731 return storageutil.hashrevisionsha1(text, p1, p2)
1732 1732
1733 1733 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1734 1734 """Check node hash integrity.
1735 1735
1736 1736 Available as a function so that subclasses can extend hash mismatch
1737 1737 behaviors as needed.
1738 1738 """
1739 1739 try:
1740 1740 if p1 is None and p2 is None:
1741 1741 p1, p2 = self.parents(node)
1742 1742 if node != self.hash(text, p1, p2):
1743 1743 # Clear the revision cache on hash failure. The revision cache
1744 1744 # only stores the raw revision and clearing the cache does have
1745 1745 # the side-effect that we won't have a cache hit when the raw
1746 1746 # revision data is accessed. But this case should be rare and
1747 1747 # it is extra work to teach the cache about the hash
1748 1748 # verification state.
1749 1749 if self._revisioncache and self._revisioncache[0] == node:
1750 1750 self._revisioncache = None
1751 1751
1752 1752 revornode = rev
1753 1753 if revornode is None:
1754 1754 revornode = templatefilters.short(hex(node))
1755 1755 raise error.RevlogError(_("integrity check failed on %s:%s")
1756 1756 % (self.indexfile, pycompat.bytestr(revornode)))
1757 1757 except error.RevlogError:
1758 1758 if self._censorable and storageutil.iscensoredtext(text):
1759 1759 raise error.CensoredNodeError(self.indexfile, node, text)
1760 1760 raise
1761 1761
1762 1762 def _enforceinlinesize(self, tr, fp=None):
1763 1763 """Check if the revlog is too big for inline and convert if so.
1764 1764
1765 1765 This should be called after revisions are added to the revlog. If the
1766 1766 revlog has grown too large to be an inline revlog, it will convert it
1767 1767 to use multiple index and data files.
1768 1768 """
1769 1769 tiprev = len(self) - 1
1770 1770 if (not self._inline or
1771 1771 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1772 1772 return
1773 1773
1774 1774 trinfo = tr.find(self.indexfile)
1775 1775 if trinfo is None:
1776 1776 raise error.RevlogError(_("%s not found in the transaction")
1777 1777 % self.indexfile)
1778 1778
1779 1779 trindex = trinfo[2]
1780 1780 if trindex is not None:
1781 1781 dataoff = self.start(trindex)
1782 1782 else:
1783 1783 # revlog was stripped at start of transaction, use all leftover data
1784 1784 trindex = len(self) - 1
1785 1785 dataoff = self.end(tiprev)
1786 1786
1787 1787 tr.add(self.datafile, dataoff)
1788 1788
1789 1789 if fp:
1790 1790 fp.flush()
1791 1791 fp.close()
1792 1792 # We can't use the cached file handle after close(). So prevent
1793 1793 # its usage.
1794 1794 self._writinghandles = None
1795 1795
1796 1796 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1797 1797 for r in self:
1798 1798 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1799 1799
1800 1800 with self._indexfp('w') as fp:
1801 1801 self.version &= ~FLAG_INLINE_DATA
1802 1802 self._inline = False
1803 1803 io = self._io
1804 1804 for i in self:
1805 1805 e = io.packentry(self.index[i], self.node, self.version, i)
1806 1806 fp.write(e)
1807 1807
1808 1808 # the temp file replace the real index when we exit the context
1809 1809 # manager
1810 1810
1811 1811 tr.replace(self.indexfile, trindex * self._io.size)
1812 1812 self._chunkclear()
1813 1813
1814 1814 def _nodeduplicatecallback(self, transaction, node):
1815 1815 """called when trying to add a node already stored.
1816 1816 """
1817 1817
1818 1818 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1819 1819 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1820 1820 sidedata=None):
1821 1821 """add a revision to the log
1822 1822
1823 1823 text - the revision data to add
1824 1824 transaction - the transaction object used for rollback
1825 1825 link - the linkrev data to add
1826 1826 p1, p2 - the parent nodeids of the revision
1827 1827 cachedelta - an optional precomputed delta
1828 1828 node - nodeid of revision; typically node is not specified, and it is
1829 1829 computed by default as hash(text, p1, p2), however subclasses might
1830 1830 use different hashing method (and override checkhash() in such case)
1831 1831 flags - the known flags to set on the revision
1832 1832 deltacomputer - an optional deltacomputer instance shared between
1833 1833 multiple calls
1834 1834 """
1835 1835 if link == nullrev:
1836 1836 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1837 1837 % self.indexfile)
1838 1838
1839 1839 if sidedata is None:
1840 1840 sidedata = {}
1841 1841
1842 1842 if flags:
1843 1843 node = node or self.hash(text, p1, p2)
1844 1844
1845 1845 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1846 1846 sidedata=sidedata)
1847 1847
1848 1848 # If the flag processor modifies the revision data, ignore any provided
1849 1849 # cachedelta.
1850 1850 if rawtext != text:
1851 1851 cachedelta = None
1852 1852
1853 1853 if len(rawtext) > _maxentrysize:
1854 1854 raise error.RevlogError(
1855 1855 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1856 1856 % (self.indexfile, len(rawtext)))
1857 1857
1858 1858 node = node or self.hash(rawtext, p1, p2)
1859 1859 if node in self.nodemap:
1860 1860 return node
1861 1861
1862 1862 if validatehash:
1863 1863 self.checkhash(rawtext, node, p1=p1, p2=p2)
1864 1864
1865 1865 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1866 1866 flags, cachedelta=cachedelta,
1867 1867 deltacomputer=deltacomputer)
1868 1868
1869 1869 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1870 1870 cachedelta=None, deltacomputer=None):
1871 1871 """add a raw revision with known flags, node and parents
1872 1872 useful when reusing a revision not stored in this revlog (ex: received
1873 1873 over wire, or read from an external bundle).
1874 1874 """
1875 1875 dfh = None
1876 1876 if not self._inline:
1877 1877 dfh = self._datafp("a+")
1878 1878 ifh = self._indexfp("a+")
1879 1879 try:
1880 1880 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1881 1881 flags, cachedelta, ifh, dfh,
1882 1882 deltacomputer=deltacomputer)
1883 1883 finally:
1884 1884 if dfh:
1885 1885 dfh.close()
1886 1886 ifh.close()
1887 1887
1888 1888 def compress(self, data):
1889 1889 """Generate a possibly-compressed representation of data."""
1890 1890 if not data:
1891 1891 return '', data
1892 1892
1893 1893 compressed = self._compressor.compress(data)
1894 1894
1895 1895 if compressed:
1896 1896 # The revlog compressor added the header in the returned data.
1897 1897 return '', compressed
1898 1898
1899 1899 if data[0:1] == '\0':
1900 1900 return '', data
1901 1901 return 'u', data
1902 1902
1903 1903 def decompress(self, data):
1904 1904 """Decompress a revlog chunk.
1905 1905
1906 1906 The chunk is expected to begin with a header identifying the
1907 1907 format type so it can be routed to an appropriate decompressor.
1908 1908 """
1909 1909 if not data:
1910 1910 return data
1911 1911
1912 1912 # Revlogs are read much more frequently than they are written and many
1913 1913 # chunks only take microseconds to decompress, so performance is
1914 1914 # important here.
1915 1915 #
1916 1916 # We can make a few assumptions about revlogs:
1917 1917 #
1918 1918 # 1) the majority of chunks will be compressed (as opposed to inline
1919 1919 # raw data).
1920 1920 # 2) decompressing *any* data will likely by at least 10x slower than
1921 1921 # returning raw inline data.
1922 1922 # 3) we want to prioritize common and officially supported compression
1923 1923 # engines
1924 1924 #
1925 1925 # It follows that we want to optimize for "decompress compressed data
1926 1926 # when encoded with common and officially supported compression engines"
1927 1927 # case over "raw data" and "data encoded by less common or non-official
1928 1928 # compression engines." That is why we have the inline lookup first
1929 1929 # followed by the compengines lookup.
1930 1930 #
1931 1931 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1932 1932 # compressed chunks. And this matters for changelog and manifest reads.
1933 1933 t = data[0:1]
1934 1934
1935 1935 if t == 'x':
1936 1936 try:
1937 1937 return _zlibdecompress(data)
1938 1938 except zlib.error as e:
1939 1939 raise error.RevlogError(_('revlog decompress error: %s') %
1940 1940 stringutil.forcebytestr(e))
1941 1941 # '\0' is more common than 'u' so it goes first.
1942 1942 elif t == '\0':
1943 1943 return data
1944 1944 elif t == 'u':
1945 1945 return util.buffer(data, 1)
1946 1946
1947 1947 try:
1948 1948 compressor = self._decompressors[t]
1949 1949 except KeyError:
1950 1950 try:
1951 1951 engine = util.compengines.forrevlogheader(t)
1952 1952 compressor = engine.revlogcompressor(self._compengineopts)
1953 1953 self._decompressors[t] = compressor
1954 1954 except KeyError:
1955 1955 raise error.RevlogError(_('unknown compression type %r') % t)
1956 1956
1957 1957 return compressor.decompress(data)
1958 1958
1959 1959 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1960 1960 cachedelta, ifh, dfh, alwayscache=False,
1961 1961 deltacomputer=None):
1962 1962 """internal function to add revisions to the log
1963 1963
1964 1964 see addrevision for argument descriptions.
1965 1965
1966 1966 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1967 1967
1968 1968 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1969 1969 be used.
1970 1970
1971 1971 invariants:
1972 1972 - rawtext is optional (can be None); if not set, cachedelta must be set.
1973 1973 if both are set, they must correspond to each other.
1974 1974 """
1975 1975 if node == nullid:
1976 1976 raise error.RevlogError(_("%s: attempt to add null revision") %
1977 1977 self.indexfile)
1978 1978 if node == wdirid or node in wdirfilenodeids:
1979 1979 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1980 1980 self.indexfile)
1981 1981
1982 1982 if self._inline:
1983 1983 fh = ifh
1984 1984 else:
1985 1985 fh = dfh
1986 1986
1987 1987 btext = [rawtext]
1988 1988
1989 1989 curr = len(self)
1990 1990 prev = curr - 1
1991 1991 offset = self.end(prev)
1992 1992 p1r, p2r = self.rev(p1), self.rev(p2)
1993 1993
1994 1994 # full versions are inserted when the needed deltas
1995 1995 # become comparable to the uncompressed text
1996 1996 if rawtext is None:
1997 1997 # need rawtext size, before changed by flag processors, which is
1998 1998 # the non-raw size. use revlog explicitly to avoid filelog's extra
1999 1999 # logic that might remove metadata size.
2000 2000 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2001 2001 cachedelta[1])
2002 2002 else:
2003 2003 textlen = len(rawtext)
2004 2004
2005 2005 if deltacomputer is None:
2006 2006 deltacomputer = deltautil.deltacomputer(self)
2007 2007
2008 2008 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2009 2009
2010 2010 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2011 2011
2012 2012 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2013 2013 deltainfo.base, link, p1r, p2r, node)
2014 2014 self.index.append(e)
2015 2015 self.nodemap[node] = curr
2016 2016
2017 2017 # Reset the pure node cache start lookup offset to account for new
2018 2018 # revision.
2019 2019 if self._nodepos is not None:
2020 2020 self._nodepos = curr
2021 2021
2022 2022 entry = self._io.packentry(e, self.node, self.version, curr)
2023 2023 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2024 2024 link, offset)
2025 2025
2026 2026 rawtext = btext[0]
2027 2027
2028 2028 if alwayscache and rawtext is None:
2029 2029 rawtext = deltacomputer.buildtext(revinfo, fh)
2030 2030
2031 2031 if type(rawtext) == bytes: # only accept immutable objects
2032 2032 self._revisioncache = (node, curr, rawtext)
2033 2033 self._chainbasecache[curr] = deltainfo.chainbase
2034 2034 return node
2035 2035
2036 2036 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2037 2037 # Files opened in a+ mode have inconsistent behavior on various
2038 2038 # platforms. Windows requires that a file positioning call be made
2039 2039 # when the file handle transitions between reads and writes. See
2040 2040 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2041 2041 # platforms, Python or the platform itself can be buggy. Some versions
2042 2042 # of Solaris have been observed to not append at the end of the file
2043 2043 # if the file was seeked to before the end. See issue4943 for more.
2044 2044 #
2045 2045 # We work around this issue by inserting a seek() before writing.
2046 2046 # Note: This is likely not necessary on Python 3. However, because
2047 2047 # the file handle is reused for reads and may be seeked there, we need
2048 2048 # to be careful before changing this.
2049 2049 ifh.seek(0, os.SEEK_END)
2050 2050 if dfh:
2051 2051 dfh.seek(0, os.SEEK_END)
2052 2052
2053 2053 curr = len(self) - 1
2054 2054 if not self._inline:
2055 2055 transaction.add(self.datafile, offset)
2056 2056 transaction.add(self.indexfile, curr * len(entry))
2057 2057 if data[0]:
2058 2058 dfh.write(data[0])
2059 2059 dfh.write(data[1])
2060 2060 ifh.write(entry)
2061 2061 else:
2062 2062 offset += curr * self._io.size
2063 2063 transaction.add(self.indexfile, offset, curr)
2064 2064 ifh.write(entry)
2065 2065 ifh.write(data[0])
2066 2066 ifh.write(data[1])
2067 2067 self._enforceinlinesize(transaction, ifh)
2068 2068
2069 2069 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2070 2070 """
2071 2071 add a delta group
2072 2072
2073 2073 given a set of deltas, add them to the revision log. the
2074 2074 first delta is against its parent, which should be in our
2075 2075 log, the rest are against the previous delta.
2076 2076
2077 2077 If ``addrevisioncb`` is defined, it will be called with arguments of
2078 2078 this revlog and the node that was added.
2079 2079 """
2080 2080
2081 2081 if self._writinghandles:
2082 2082 raise error.ProgrammingError('cannot nest addgroup() calls')
2083 2083
2084 2084 nodes = []
2085 2085
2086 2086 r = len(self)
2087 2087 end = 0
2088 2088 if r:
2089 2089 end = self.end(r - 1)
2090 2090 ifh = self._indexfp("a+")
2091 2091 isize = r * self._io.size
2092 2092 if self._inline:
2093 2093 transaction.add(self.indexfile, end + isize, r)
2094 2094 dfh = None
2095 2095 else:
2096 2096 transaction.add(self.indexfile, isize, r)
2097 2097 transaction.add(self.datafile, end)
2098 2098 dfh = self._datafp("a+")
2099 2099 def flush():
2100 2100 if dfh:
2101 2101 dfh.flush()
2102 2102 ifh.flush()
2103 2103
2104 2104 self._writinghandles = (ifh, dfh)
2105 2105
2106 2106 try:
2107 2107 deltacomputer = deltautil.deltacomputer(self)
2108 2108 # loop through our set of deltas
2109 2109 for data in deltas:
2110 2110 node, p1, p2, linknode, deltabase, delta, flags = data
2111 2111 link = linkmapper(linknode)
2112 2112 flags = flags or REVIDX_DEFAULT_FLAGS
2113 2113
2114 2114 nodes.append(node)
2115 2115
2116 2116 if node in self.nodemap:
2117 2117 self._nodeduplicatecallback(transaction, node)
2118 2118 # this can happen if two branches make the same change
2119 2119 continue
2120 2120
2121 2121 for p in (p1, p2):
2122 2122 if p not in self.nodemap:
2123 2123 raise error.LookupError(p, self.indexfile,
2124 2124 _('unknown parent'))
2125 2125
2126 2126 if deltabase not in self.nodemap:
2127 2127 raise error.LookupError(deltabase, self.indexfile,
2128 2128 _('unknown delta base'))
2129 2129
2130 2130 baserev = self.rev(deltabase)
2131 2131
2132 2132 if baserev != nullrev and self.iscensored(baserev):
2133 2133 # if base is censored, delta must be full replacement in a
2134 2134 # single patch operation
2135 2135 hlen = struct.calcsize(">lll")
2136 2136 oldlen = self.rawsize(baserev)
2137 2137 newlen = len(delta) - hlen
2138 2138 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2139 2139 raise error.CensoredBaseError(self.indexfile,
2140 2140 self.node(baserev))
2141 2141
2142 2142 if not flags and self._peek_iscensored(baserev, delta, flush):
2143 2143 flags |= REVIDX_ISCENSORED
2144 2144
2145 2145 # We assume consumers of addrevisioncb will want to retrieve
2146 2146 # the added revision, which will require a call to
2147 2147 # revision(). revision() will fast path if there is a cache
2148 2148 # hit. So, we tell _addrevision() to always cache in this case.
2149 2149 # We're only using addgroup() in the context of changegroup
2150 2150 # generation so the revision data can always be handled as raw
2151 2151 # by the flagprocessor.
2152 2152 self._addrevision(node, None, transaction, link,
2153 2153 p1, p2, flags, (baserev, delta),
2154 2154 ifh, dfh,
2155 2155 alwayscache=bool(addrevisioncb),
2156 2156 deltacomputer=deltacomputer)
2157 2157
2158 2158 if addrevisioncb:
2159 2159 addrevisioncb(self, node)
2160 2160
2161 2161 if not dfh and not self._inline:
2162 2162 # addrevision switched from inline to conventional
2163 2163 # reopen the index
2164 2164 ifh.close()
2165 2165 dfh = self._datafp("a+")
2166 2166 ifh = self._indexfp("a+")
2167 2167 self._writinghandles = (ifh, dfh)
2168 2168 finally:
2169 2169 self._writinghandles = None
2170 2170
2171 2171 if dfh:
2172 2172 dfh.close()
2173 2173 ifh.close()
2174 2174
2175 2175 return nodes
2176 2176
2177 2177 def iscensored(self, rev):
2178 2178 """Check if a file revision is censored."""
2179 2179 if not self._censorable:
2180 2180 return False
2181 2181
2182 2182 return self.flags(rev) & REVIDX_ISCENSORED
2183 2183
2184 2184 def _peek_iscensored(self, baserev, delta, flush):
2185 2185 """Quickly check if a delta produces a censored revision."""
2186 2186 if not self._censorable:
2187 2187 return False
2188 2188
2189 2189 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2190 2190
2191 2191 def getstrippoint(self, minlink):
2192 2192 """find the minimum rev that must be stripped to strip the linkrev
2193 2193
2194 2194 Returns a tuple containing the minimum rev and a set of all revs that
2195 2195 have linkrevs that will be broken by this strip.
2196 2196 """
2197 2197 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2198 2198 self.headrevs(),
2199 2199 self.linkrev, self.parentrevs)
2200 2200
2201 2201 def strip(self, minlink, transaction):
2202 2202 """truncate the revlog on the first revision with a linkrev >= minlink
2203 2203
2204 2204 This function is called when we're stripping revision minlink and
2205 2205 its descendants from the repository.
2206 2206
2207 2207 We have to remove all revisions with linkrev >= minlink, because
2208 2208 the equivalent changelog revisions will be renumbered after the
2209 2209 strip.
2210 2210
2211 2211 So we truncate the revlog on the first of these revisions, and
2212 2212 trust that the caller has saved the revisions that shouldn't be
2213 2213 removed and that it'll re-add them after this truncation.
2214 2214 """
2215 2215 if len(self) == 0:
2216 2216 return
2217 2217
2218 2218 rev, _ = self.getstrippoint(minlink)
2219 2219 if rev == len(self):
2220 2220 return
2221 2221
2222 2222 # first truncate the files on disk
2223 2223 end = self.start(rev)
2224 2224 if not self._inline:
2225 2225 transaction.add(self.datafile, end)
2226 2226 end = rev * self._io.size
2227 2227 else:
2228 2228 end += rev * self._io.size
2229 2229
2230 2230 transaction.add(self.indexfile, end)
2231 2231
2232 2232 # then reset internal state in memory to forget those revisions
2233 2233 self._revisioncache = None
2234 2234 self._chaininfocache = {}
2235 2235 self._chunkclear()
2236 2236 for x in pycompat.xrange(rev, len(self)):
2237 2237 del self.nodemap[self.node(x)]
2238 2238
2239 2239 del self.index[rev:-1]
2240 2240 self._nodepos = None
2241 2241
2242 2242 def checksize(self):
2243 2243 """Check size of index and data files
2244 2244
2245 2245 return a (dd, di) tuple.
2246 2246 - dd: extra bytes for the "data" file
2247 2247 - di: extra bytes for the "index" file
2248 2248
2249 2249 A healthy revlog will return (0, 0).
2250 2250 """
2251 2251 expected = 0
2252 2252 if len(self):
2253 2253 expected = max(0, self.end(len(self) - 1))
2254 2254
2255 2255 try:
2256 2256 with self._datafp() as f:
2257 2257 f.seek(0, io.SEEK_END)
2258 2258 actual = f.tell()
2259 2259 dd = actual - expected
2260 2260 except IOError as inst:
2261 2261 if inst.errno != errno.ENOENT:
2262 2262 raise
2263 2263 dd = 0
2264 2264
2265 2265 try:
2266 2266 f = self.opener(self.indexfile)
2267 2267 f.seek(0, io.SEEK_END)
2268 2268 actual = f.tell()
2269 2269 f.close()
2270 2270 s = self._io.size
2271 2271 i = max(0, actual // s)
2272 2272 di = actual - (i * s)
2273 2273 if self._inline:
2274 2274 databytes = 0
2275 2275 for r in self:
2276 2276 databytes += max(0, self.length(r))
2277 2277 dd = 0
2278 2278 di = actual - len(self) * s - databytes
2279 2279 except IOError as inst:
2280 2280 if inst.errno != errno.ENOENT:
2281 2281 raise
2282 2282 di = 0
2283 2283
2284 2284 return (dd, di)
2285 2285
2286 2286 def files(self):
2287 2287 res = [self.indexfile]
2288 2288 if not self._inline:
2289 2289 res.append(self.datafile)
2290 2290 return res
2291 2291
2292 2292 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2293 2293 assumehaveparentrevisions=False,
2294 2294 deltamode=repository.CG_DELTAMODE_STD):
2295 2295 if nodesorder not in ('nodes', 'storage', 'linear', None):
2296 2296 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2297 2297 nodesorder)
2298 2298
2299 2299 if nodesorder is None and not self._generaldelta:
2300 2300 nodesorder = 'storage'
2301 2301
2302 2302 if (not self._storedeltachains and
2303 2303 deltamode != repository.CG_DELTAMODE_PREV):
2304 2304 deltamode = repository.CG_DELTAMODE_FULL
2305 2305
2306 2306 return storageutil.emitrevisions(
2307 2307 self, nodes, nodesorder, revlogrevisiondelta,
2308 2308 deltaparentfn=self.deltaparent,
2309 2309 candeltafn=self.candelta,
2310 2310 rawsizefn=self.rawsize,
2311 2311 revdifffn=self.revdiff,
2312 2312 flagsfn=self.flags,
2313 2313 deltamode=deltamode,
2314 2314 revisiondata=revisiondata,
2315 2315 assumehaveparentrevisions=assumehaveparentrevisions)
2316 2316
2317 2317 DELTAREUSEALWAYS = 'always'
2318 2318 DELTAREUSESAMEREVS = 'samerevs'
2319 2319 DELTAREUSENEVER = 'never'
2320 2320
2321 2321 DELTAREUSEFULLADD = 'fulladd'
2322 2322
2323 2323 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2324 2324
2325 2325 def clone(self, tr, destrevlog, addrevisioncb=None,
2326 2326 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2327 2327 """Copy this revlog to another, possibly with format changes.
2328 2328
2329 2329 The destination revlog will contain the same revisions and nodes.
2330 2330 However, it may not be bit-for-bit identical due to e.g. delta encoding
2331 2331 differences.
2332 2332
2333 2333 The ``deltareuse`` argument control how deltas from the existing revlog
2334 2334 are preserved in the destination revlog. The argument can have the
2335 2335 following values:
2336 2336
2337 2337 DELTAREUSEALWAYS
2338 2338 Deltas will always be reused (if possible), even if the destination
2339 2339 revlog would not select the same revisions for the delta. This is the
2340 2340 fastest mode of operation.
2341 2341 DELTAREUSESAMEREVS
2342 2342 Deltas will be reused if the destination revlog would pick the same
2343 2343 revisions for the delta. This mode strikes a balance between speed
2344 2344 and optimization.
2345 2345 DELTAREUSENEVER
2346 2346 Deltas will never be reused. This is the slowest mode of execution.
2347 2347 This mode can be used to recompute deltas (e.g. if the diff/delta
2348 2348 algorithm changes).
2349 2349
2350 2350 Delta computation can be slow, so the choice of delta reuse policy can
2351 2351 significantly affect run time.
2352 2352
2353 2353 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2354 2354 two extremes. Deltas will be reused if they are appropriate. But if the
2355 2355 delta could choose a better revision, it will do so. This means if you
2356 2356 are converting a non-generaldelta revlog to a generaldelta revlog,
2357 2357 deltas will be recomputed if the delta's parent isn't a parent of the
2358 2358 revision.
2359 2359
2360 2360 In addition to the delta policy, the ``forcedeltabothparents``
2361 2361 argument controls whether to force compute deltas against both parents
2362 2362 for merges. By default, the current default is used.
2363 2363 """
2364 2364 if deltareuse not in self.DELTAREUSEALL:
2365 2365 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2366 2366
2367 2367 if len(destrevlog):
2368 2368 raise ValueError(_('destination revlog is not empty'))
2369 2369
2370 2370 if getattr(self, 'filteredrevs', None):
2371 2371 raise ValueError(_('source revlog has filtered revisions'))
2372 2372 if getattr(destrevlog, 'filteredrevs', None):
2373 2373 raise ValueError(_('destination revlog has filtered revisions'))
2374 2374
2375 2375 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2376 2376 # if possible.
2377 2377 oldlazydelta = destrevlog._lazydelta
2378 2378 oldlazydeltabase = destrevlog._lazydeltabase
2379 2379 oldamd = destrevlog._deltabothparents
2380 2380
2381 2381 try:
2382 2382 if deltareuse == self.DELTAREUSEALWAYS:
2383 2383 destrevlog._lazydeltabase = True
2384 2384 destrevlog._lazydelta = True
2385 2385 elif deltareuse == self.DELTAREUSESAMEREVS:
2386 2386 destrevlog._lazydeltabase = False
2387 2387 destrevlog._lazydelta = True
2388 2388 elif deltareuse == self.DELTAREUSENEVER:
2389 2389 destrevlog._lazydeltabase = False
2390 2390 destrevlog._lazydelta = False
2391 2391
2392 2392 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2393 2393
2394 2394 deltacomputer = deltautil.deltacomputer(destrevlog)
2395 2395 index = self.index
2396 2396 for rev in self:
2397 2397 entry = index[rev]
2398 2398
2399 2399 # Some classes override linkrev to take filtered revs into
2400 2400 # account. Use raw entry from index.
2401 2401 flags = entry[0] & 0xffff
2402 2402 linkrev = entry[4]
2403 2403 p1 = index[entry[5]][7]
2404 2404 p2 = index[entry[6]][7]
2405 2405 node = entry[7]
2406 2406
2407 2407 # (Possibly) reuse the delta from the revlog if allowed and
2408 2408 # the revlog chunk is a delta.
2409 2409 cachedelta = None
2410 2410 rawtext = None
2411 2411 if (deltareuse != self.DELTAREUSEFULLADD
2412 2412 and destrevlog._lazydelta):
2413 2413 dp = self.deltaparent(rev)
2414 2414 if dp != nullrev:
2415 2415 cachedelta = (dp, bytes(self._chunk(rev)))
2416 2416
2417 2417 if not cachedelta:
2418 2418 rawtext = self.rawdata(rev)
2419 2419
2420 2420
2421 2421 if deltareuse == self.DELTAREUSEFULLADD:
2422 2422 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2423 2423 cachedelta=cachedelta,
2424 2424 node=node, flags=flags,
2425 2425 deltacomputer=deltacomputer)
2426 2426 else:
2427 2427 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2428 2428 checkambig=False)
2429 2429 dfh = None
2430 2430 if not destrevlog._inline:
2431 2431 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2432 2432 try:
2433 2433 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2434 2434 p2, flags, cachedelta, ifh, dfh,
2435 2435 deltacomputer=deltacomputer)
2436 2436 finally:
2437 2437 if dfh:
2438 2438 dfh.close()
2439 2439 ifh.close()
2440 2440
2441 2441 if addrevisioncb:
2442 2442 addrevisioncb(self, rev, node)
2443 2443 finally:
2444 2444 destrevlog._lazydelta = oldlazydelta
2445 2445 destrevlog._lazydeltabase = oldlazydeltabase
2446 2446 destrevlog._deltabothparents = oldamd
2447 2447
2448 2448 def censorrevision(self, tr, censornode, tombstone=b''):
2449 2449 if (self.version & 0xFFFF) == REVLOGV0:
2450 2450 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2451 2451 self.version)
2452 2452
2453 2453 censorrev = self.rev(censornode)
2454 2454 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2455 2455
2456 2456 if len(tombstone) > self.rawsize(censorrev):
2457 2457 raise error.Abort(_('censor tombstone must be no longer than '
2458 2458 'censored data'))
2459 2459
2460 2460 # Rewriting the revlog in place is hard. Our strategy for censoring is
2461 2461 # to create a new revlog, copy all revisions to it, then replace the
2462 2462 # revlogs on transaction close.
2463 2463
2464 2464 newindexfile = self.indexfile + b'.tmpcensored'
2465 2465 newdatafile = self.datafile + b'.tmpcensored'
2466 2466
2467 2467 # This is a bit dangerous. We could easily have a mismatch of state.
2468 2468 newrl = revlog(self.opener, newindexfile, newdatafile,
2469 2469 censorable=True)
2470 2470 newrl.version = self.version
2471 2471 newrl._generaldelta = self._generaldelta
2472 2472 newrl._io = self._io
2473 2473
2474 2474 for rev in self.revs():
2475 2475 node = self.node(rev)
2476 2476 p1, p2 = self.parents(node)
2477 2477
2478 2478 if rev == censorrev:
2479 2479 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2480 2480 p1, p2, censornode, REVIDX_ISCENSORED)
2481 2481
2482 2482 if newrl.deltaparent(rev) != nullrev:
2483 2483 raise error.Abort(_('censored revision stored as delta; '
2484 2484 'cannot censor'),
2485 2485 hint=_('censoring of revlogs is not '
2486 2486 'fully implemented; please report '
2487 2487 'this bug'))
2488 2488 continue
2489 2489
2490 2490 if self.iscensored(rev):
2491 2491 if self.deltaparent(rev) != nullrev:
2492 2492 raise error.Abort(_('cannot censor due to censored '
2493 2493 'revision having delta stored'))
2494 2494 rawtext = self._chunk(rev)
2495 2495 else:
2496 2496 rawtext = self.rawdata(rev)
2497 2497
2498 2498 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2499 2499 self.flags(rev))
2500 2500
2501 2501 tr.addbackup(self.indexfile, location='store')
2502 2502 if not self._inline:
2503 2503 tr.addbackup(self.datafile, location='store')
2504 2504
2505 2505 self.opener.rename(newrl.indexfile, self.indexfile)
2506 2506 if not self._inline:
2507 2507 self.opener.rename(newrl.datafile, self.datafile)
2508 2508
2509 2509 self.clearcaches()
2510 2510 self._loadindex()
2511 2511
2512 2512 def verifyintegrity(self, state):
2513 2513 """Verifies the integrity of the revlog.
2514 2514
2515 2515 Yields ``revlogproblem`` instances describing problems that are
2516 2516 found.
2517 2517 """
2518 2518 dd, di = self.checksize()
2519 2519 if dd:
2520 2520 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2521 2521 if di:
2522 2522 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2523 2523
2524 2524 version = self.version & 0xFFFF
2525 2525
2526 2526 # The verifier tells us what version revlog we should be.
2527 2527 if version != state['expectedversion']:
2528 2528 yield revlogproblem(
2529 2529 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2530 2530 (self.indexfile, version, state['expectedversion']))
2531 2531
2532 2532 state['skipread'] = set()
2533 2533
2534 2534 for rev in self:
2535 2535 node = self.node(rev)
2536 2536
2537 2537 # Verify contents. 4 cases to care about:
2538 2538 #
2539 2539 # common: the most common case
2540 2540 # rename: with a rename
2541 2541 # meta: file content starts with b'\1\n', the metadata
2542 2542 # header defined in filelog.py, but without a rename
2543 2543 # ext: content stored externally
2544 2544 #
2545 2545 # More formally, their differences are shown below:
2546 2546 #
2547 2547 # | common | rename | meta | ext
2548 2548 # -------------------------------------------------------
2549 2549 # flags() | 0 | 0 | 0 | not 0
2550 2550 # renamed() | False | True | False | ?
2551 2551 # rawtext[0:2]=='\1\n'| False | True | True | ?
2552 2552 #
2553 2553 # "rawtext" means the raw text stored in revlog data, which
2554 2554 # could be retrieved by "rawdata(rev)". "text"
2555 2555 # mentioned below is "revision(rev)".
2556 2556 #
2557 2557 # There are 3 different lengths stored physically:
2558 2558 # 1. L1: rawsize, stored in revlog index
2559 2559 # 2. L2: len(rawtext), stored in revlog data
2560 2560 # 3. L3: len(text), stored in revlog data if flags==0, or
2561 2561 # possibly somewhere else if flags!=0
2562 2562 #
2563 2563 # L1 should be equal to L2. L3 could be different from them.
2564 2564 # "text" may or may not affect commit hash depending on flag
2565 2565 # processors (see flagutil.addflagprocessor).
2566 2566 #
2567 2567 # | common | rename | meta | ext
2568 2568 # -------------------------------------------------
2569 2569 # rawsize() | L1 | L1 | L1 | L1
2570 2570 # size() | L1 | L2-LM | L1(*) | L1 (?)
2571 2571 # len(rawtext) | L2 | L2 | L2 | L2
2572 2572 # len(text) | L2 | L2 | L2 | L3
2573 2573 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2574 2574 #
2575 2575 # LM: length of metadata, depending on rawtext
2576 2576 # (*): not ideal, see comment in filelog.size
2577 2577 # (?): could be "- len(meta)" if the resolved content has
2578 2578 # rename metadata
2579 2579 #
2580 2580 # Checks needed to be done:
2581 2581 # 1. length check: L1 == L2, in all cases.
2582 2582 # 2. hash check: depending on flag processor, we may need to
2583 2583 # use either "text" (external), or "rawtext" (in revlog).
2584 2584
2585 2585 try:
2586 2586 skipflags = state.get('skipflags', 0)
2587 2587 if skipflags:
2588 2588 skipflags &= self.flags(rev)
2589 2589
2590 2590 if skipflags:
2591 2591 state['skipread'].add(node)
2592 2592 else:
2593 2593 # Side-effect: read content and verify hash.
2594 2594 self.revision(node)
2595 2595
2596 2596 l1 = self.rawsize(rev)
2597 2597 l2 = len(self.rawdata(node))
2598 2598
2599 2599 if l1 != l2:
2600 2600 yield revlogproblem(
2601 2601 error=_('unpacked size is %d, %d expected') % (l2, l1),
2602 2602 node=node)
2603 2603
2604 2604 except error.CensoredNodeError:
2605 2605 if state['erroroncensored']:
2606 2606 yield revlogproblem(error=_('censored file data'),
2607 2607 node=node)
2608 2608 state['skipread'].add(node)
2609 2609 except Exception as e:
2610 2610 yield revlogproblem(
2611 2611 error=_('unpacking %s: %s') % (short(node),
2612 2612 stringutil.forcebytestr(e)),
2613 2613 node=node)
2614 2614 state['skipread'].add(node)
2615 2615
2616 2616 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2617 2617 revisionscount=False, trackedsize=False,
2618 2618 storedsize=False):
2619 2619 d = {}
2620 2620
2621 2621 if exclusivefiles:
2622 2622 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2623 2623 if not self._inline:
2624 2624 d['exclusivefiles'].append((self.opener, self.datafile))
2625 2625
2626 2626 if sharedfiles:
2627 2627 d['sharedfiles'] = []
2628 2628
2629 2629 if revisionscount:
2630 2630 d['revisionscount'] = len(self)
2631 2631
2632 2632 if trackedsize:
2633 2633 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2634 2634
2635 2635 if storedsize:
2636 2636 d['storedsize'] = sum(self.opener.stat(path).st_size
2637 2637 for path in self.files())
2638 2638
2639 2639 return d
@@ -1,1047 +1,1051 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import (
17 17 nullrev,
18 18 )
19 19 from ..i18n import _
20 20
21 21 from .constants import (
22 22 REVIDX_ISCENSORED,
23 23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 24 )
25 25
26 26 from ..thirdparty import (
27 27 attr,
28 28 )
29 29
30 30 from .. import (
31 31 error,
32 32 mdiff,
33 33 util,
34 34 )
35 35
36 from . import (
37 flagutil,
38 )
39
36 40 # maximum <delta-chain-data>/<revision-text-length> ratio
37 41 LIMIT_DELTA2TEXT = 2
38 42
39 43 class _testrevlog(object):
40 44 """minimalist fake revlog to use in doctests"""
41 45
42 46 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 47 """data is an list of revision payload boundaries"""
44 48 self._data = data
45 49 self._srdensitythreshold = density
46 50 self._srmingapsize = mingap
47 51 self._snapshot = set(snapshot)
48 52 self.index = None
49 53
50 54 def start(self, rev):
51 55 if rev == nullrev:
52 56 return 0
53 57 if rev == 0:
54 58 return 0
55 59 return self._data[rev - 1]
56 60
57 61 def end(self, rev):
58 62 if rev == nullrev:
59 63 return 0
60 64 return self._data[rev]
61 65
62 66 def length(self, rev):
63 67 return self.end(rev) - self.start(rev)
64 68
65 69 def __len__(self):
66 70 return len(self._data)
67 71
68 72 def issnapshot(self, rev):
69 73 if rev == nullrev:
70 74 return True
71 75 return rev in self._snapshot
72 76
73 77 def slicechunk(revlog, revs, targetsize=None):
74 78 """slice revs to reduce the amount of unrelated data to be read from disk.
75 79
76 80 ``revs`` is sliced into groups that should be read in one time.
77 81 Assume that revs are sorted.
78 82
79 83 The initial chunk is sliced until the overall density (payload/chunks-span
80 84 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 85 `revlog._srmingapsize` is skipped.
82 86
83 87 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 88 For consistency with other slicing choice, this limit won't go lower than
85 89 `revlog._srmingapsize`.
86 90
87 91 If individual revisions chunk are larger than this limit, they will still
88 92 be raised individually.
89 93
90 94 >>> data = [
91 95 ... 5, #00 (5)
92 96 ... 10, #01 (5)
93 97 ... 12, #02 (2)
94 98 ... 12, #03 (empty)
95 99 ... 27, #04 (15)
96 100 ... 31, #05 (4)
97 101 ... 31, #06 (empty)
98 102 ... 42, #07 (11)
99 103 ... 47, #08 (5)
100 104 ... 47, #09 (empty)
101 105 ... 48, #10 (1)
102 106 ... 51, #11 (3)
103 107 ... 74, #12 (23)
104 108 ... 85, #13 (11)
105 109 ... 86, #14 (1)
106 110 ... 91, #15 (5)
107 111 ... ]
108 112 >>> revlog = _testrevlog(data, snapshot=range(16))
109 113
110 114 >>> list(slicechunk(revlog, list(range(16))))
111 115 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 116 >>> list(slicechunk(revlog, [0, 15]))
113 117 [[0], [15]]
114 118 >>> list(slicechunk(revlog, [0, 11, 15]))
115 119 [[0], [11], [15]]
116 120 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 121 [[0], [11, 13, 15]]
118 122 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 123 [[1, 2], [5, 8, 10, 11], [14]]
120 124
121 125 Slicing with a maximum chunk size
122 126 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 127 [[0], [11], [13], [15]]
124 128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 129 [[0], [11], [13, 15]]
126 130
127 131 Slicing involving nullrev
128 132 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 133 [[-1, 0], [11], [13, 15]]
130 134 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 135 [[-1], [13], [15]]
132 136 """
133 137 if targetsize is not None:
134 138 targetsize = max(targetsize, revlog._srmingapsize)
135 139 # targetsize should not be specified when evaluating delta candidates:
136 140 # * targetsize is used to ensure we stay within specification when reading,
137 141 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 142 if densityslicing is None:
139 143 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 144 for chunk in densityslicing(revs,
141 145 revlog._srdensitythreshold,
142 146 revlog._srmingapsize):
143 147 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 148 yield subchunk
145 149
146 150 def _slicechunktosize(revlog, revs, targetsize=None):
147 151 """slice revs to match the target size
148 152
149 153 This is intended to be used on chunk that density slicing selected by that
150 154 are still too large compared to the read garantee of revlog. This might
151 155 happens when "minimal gap size" interrupted the slicing or when chain are
152 156 built in a way that create large blocks next to each other.
153 157
154 158 >>> data = [
155 159 ... 3, #0 (3)
156 160 ... 5, #1 (2)
157 161 ... 6, #2 (1)
158 162 ... 8, #3 (2)
159 163 ... 8, #4 (empty)
160 164 ... 11, #5 (3)
161 165 ... 12, #6 (1)
162 166 ... 13, #7 (1)
163 167 ... 14, #8 (1)
164 168 ... ]
165 169
166 170 == All snapshots cases ==
167 171 >>> revlog = _testrevlog(data, snapshot=range(9))
168 172
169 173 Cases where chunk is already small enough
170 174 >>> list(_slicechunktosize(revlog, [0], 3))
171 175 [[0]]
172 176 >>> list(_slicechunktosize(revlog, [6, 7], 3))
173 177 [[6, 7]]
174 178 >>> list(_slicechunktosize(revlog, [0], None))
175 179 [[0]]
176 180 >>> list(_slicechunktosize(revlog, [6, 7], None))
177 181 [[6, 7]]
178 182
179 183 cases where we need actual slicing
180 184 >>> list(_slicechunktosize(revlog, [0, 1], 3))
181 185 [[0], [1]]
182 186 >>> list(_slicechunktosize(revlog, [1, 3], 3))
183 187 [[1], [3]]
184 188 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
185 189 [[1, 2], [3]]
186 190 >>> list(_slicechunktosize(revlog, [3, 5], 3))
187 191 [[3], [5]]
188 192 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
189 193 [[3], [5]]
190 194 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
191 195 [[5], [6, 7, 8]]
192 196 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
193 197 [[0], [1, 2], [3], [5], [6, 7, 8]]
194 198
195 199 Case with too large individual chunk (must return valid chunk)
196 200 >>> list(_slicechunktosize(revlog, [0, 1], 2))
197 201 [[0], [1]]
198 202 >>> list(_slicechunktosize(revlog, [1, 3], 1))
199 203 [[1], [3]]
200 204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
201 205 [[3], [5]]
202 206
203 207 == No Snapshot cases ==
204 208 >>> revlog = _testrevlog(data)
205 209
206 210 Cases where chunk is already small enough
207 211 >>> list(_slicechunktosize(revlog, [0], 3))
208 212 [[0]]
209 213 >>> list(_slicechunktosize(revlog, [6, 7], 3))
210 214 [[6, 7]]
211 215 >>> list(_slicechunktosize(revlog, [0], None))
212 216 [[0]]
213 217 >>> list(_slicechunktosize(revlog, [6, 7], None))
214 218 [[6, 7]]
215 219
216 220 cases where we need actual slicing
217 221 >>> list(_slicechunktosize(revlog, [0, 1], 3))
218 222 [[0], [1]]
219 223 >>> list(_slicechunktosize(revlog, [1, 3], 3))
220 224 [[1], [3]]
221 225 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
222 226 [[1], [2, 3]]
223 227 >>> list(_slicechunktosize(revlog, [3, 5], 3))
224 228 [[3], [5]]
225 229 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
226 230 [[3], [4, 5]]
227 231 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
228 232 [[5], [6, 7, 8]]
229 233 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
230 234 [[0], [1, 2], [3], [5], [6, 7, 8]]
231 235
232 236 Case with too large individual chunk (must return valid chunk)
233 237 >>> list(_slicechunktosize(revlog, [0, 1], 2))
234 238 [[0], [1]]
235 239 >>> list(_slicechunktosize(revlog, [1, 3], 1))
236 240 [[1], [3]]
237 241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
238 242 [[3], [5]]
239 243
240 244 == mixed case ==
241 245 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
242 246 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
243 247 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
244 248 """
245 249 assert targetsize is None or 0 <= targetsize
246 250 startdata = revlog.start(revs[0])
247 251 enddata = revlog.end(revs[-1])
248 252 fullspan = enddata - startdata
249 253 if targetsize is None or fullspan <= targetsize:
250 254 yield revs
251 255 return
252 256
253 257 startrevidx = 0
254 258 endrevidx = 1
255 259 iterrevs = enumerate(revs)
256 260 next(iterrevs) # skip first rev.
257 261 # first step: get snapshots out of the way
258 262 for idx, r in iterrevs:
259 263 span = revlog.end(r) - startdata
260 264 snapshot = revlog.issnapshot(r)
261 265 if span <= targetsize and snapshot:
262 266 endrevidx = idx + 1
263 267 else:
264 268 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
265 269 if chunk:
266 270 yield chunk
267 271 startrevidx = idx
268 272 startdata = revlog.start(r)
269 273 endrevidx = idx + 1
270 274 if not snapshot:
271 275 break
272 276
273 277 # for the others, we use binary slicing to quickly converge toward valid
274 278 # chunks (otherwise, we might end up looking for start/end of many
275 279 # revisions). This logic is not looking for the perfect slicing point, it
276 280 # focuses on quickly converging toward valid chunks.
277 281 nbitem = len(revs)
278 282 while (enddata - startdata) > targetsize:
279 283 endrevidx = nbitem
280 284 if nbitem - startrevidx <= 1:
281 285 break # protect against individual chunk larger than limit
282 286 localenddata = revlog.end(revs[endrevidx - 1])
283 287 span = localenddata - startdata
284 288 while span > targetsize:
285 289 if endrevidx - startrevidx <= 1:
286 290 break # protect against individual chunk larger than limit
287 291 endrevidx -= (endrevidx - startrevidx) // 2
288 292 localenddata = revlog.end(revs[endrevidx - 1])
289 293 span = localenddata - startdata
290 294 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
291 295 if chunk:
292 296 yield chunk
293 297 startrevidx = endrevidx
294 298 startdata = revlog.start(revs[startrevidx])
295 299
296 300 chunk = _trimchunk(revlog, revs, startrevidx)
297 301 if chunk:
298 302 yield chunk
299 303
300 304 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
301 305 mingapsize=0):
302 306 """slice revs to reduce the amount of unrelated data to be read from disk.
303 307
304 308 ``revs`` is sliced into groups that should be read in one time.
305 309 Assume that revs are sorted.
306 310
307 311 The initial chunk is sliced until the overall density (payload/chunks-span
308 312 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
309 313 skipped.
310 314
311 315 >>> revlog = _testrevlog([
312 316 ... 5, #00 (5)
313 317 ... 10, #01 (5)
314 318 ... 12, #02 (2)
315 319 ... 12, #03 (empty)
316 320 ... 27, #04 (15)
317 321 ... 31, #05 (4)
318 322 ... 31, #06 (empty)
319 323 ... 42, #07 (11)
320 324 ... 47, #08 (5)
321 325 ... 47, #09 (empty)
322 326 ... 48, #10 (1)
323 327 ... 51, #11 (3)
324 328 ... 74, #12 (23)
325 329 ... 85, #13 (11)
326 330 ... 86, #14 (1)
327 331 ... 91, #15 (5)
328 332 ... ])
329 333
330 334 >>> list(_slicechunktodensity(revlog, list(range(16))))
331 335 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
332 336 >>> list(_slicechunktodensity(revlog, [0, 15]))
333 337 [[0], [15]]
334 338 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
335 339 [[0], [11], [15]]
336 340 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
337 341 [[0], [11, 13, 15]]
338 342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
339 343 [[1, 2], [5, 8, 10, 11], [14]]
340 344 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
341 345 ... mingapsize=20))
342 346 [[1, 2, 3, 5, 8, 10, 11], [14]]
343 347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
344 348 ... targetdensity=0.95))
345 349 [[1, 2], [5], [8, 10, 11], [14]]
346 350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 351 ... targetdensity=0.95, mingapsize=12))
348 352 [[1, 2], [5, 8, 10, 11], [14]]
349 353 """
350 354 start = revlog.start
351 355 length = revlog.length
352 356
353 357 if len(revs) <= 1:
354 358 yield revs
355 359 return
356 360
357 361 deltachainspan = segmentspan(revlog, revs)
358 362
359 363 if deltachainspan < mingapsize:
360 364 yield revs
361 365 return
362 366
363 367 readdata = deltachainspan
364 368 chainpayload = sum(length(r) for r in revs)
365 369
366 370 if deltachainspan:
367 371 density = chainpayload / float(deltachainspan)
368 372 else:
369 373 density = 1.0
370 374
371 375 if density >= targetdensity:
372 376 yield revs
373 377 return
374 378
375 379 # Store the gaps in a heap to have them sorted by decreasing size
376 380 gaps = []
377 381 prevend = None
378 382 for i, rev in enumerate(revs):
379 383 revstart = start(rev)
380 384 revlen = length(rev)
381 385
382 386 # Skip empty revisions to form larger holes
383 387 if revlen == 0:
384 388 continue
385 389
386 390 if prevend is not None:
387 391 gapsize = revstart - prevend
388 392 # only consider holes that are large enough
389 393 if gapsize > mingapsize:
390 394 gaps.append((gapsize, i))
391 395
392 396 prevend = revstart + revlen
393 397 # sort the gaps to pop them from largest to small
394 398 gaps.sort()
395 399
396 400 # Collect the indices of the largest holes until the density is acceptable
397 401 selected = []
398 402 while gaps and density < targetdensity:
399 403 gapsize, gapidx = gaps.pop()
400 404
401 405 selected.append(gapidx)
402 406
403 407 # the gap sizes are stored as negatives to be sorted decreasingly
404 408 # by the heap
405 409 readdata -= gapsize
406 410 if readdata > 0:
407 411 density = chainpayload / float(readdata)
408 412 else:
409 413 density = 1.0
410 414 selected.sort()
411 415
412 416 # Cut the revs at collected indices
413 417 previdx = 0
414 418 for idx in selected:
415 419
416 420 chunk = _trimchunk(revlog, revs, previdx, idx)
417 421 if chunk:
418 422 yield chunk
419 423
420 424 previdx = idx
421 425
422 426 chunk = _trimchunk(revlog, revs, previdx)
423 427 if chunk:
424 428 yield chunk
425 429
426 430 def _trimchunk(revlog, revs, startidx, endidx=None):
427 431 """returns revs[startidx:endidx] without empty trailing revs
428 432
429 433 Doctest Setup
430 434 >>> revlog = _testrevlog([
431 435 ... 5, #0
432 436 ... 10, #1
433 437 ... 12, #2
434 438 ... 12, #3 (empty)
435 439 ... 17, #4
436 440 ... 21, #5
437 441 ... 21, #6 (empty)
438 442 ... ])
439 443
440 444 Contiguous cases:
441 445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
442 446 [0, 1, 2, 3, 4, 5]
443 447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
444 448 [0, 1, 2, 3, 4]
445 449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
446 450 [0, 1, 2]
447 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
448 452 [2]
449 453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
450 454 [3, 4, 5]
451 455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
452 456 [3, 4]
453 457
454 458 Discontiguous cases:
455 459 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
456 460 [1, 3, 5]
457 461 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
458 462 [1]
459 463 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
460 464 [3, 5]
461 465 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
462 466 [3, 5]
463 467 """
464 468 length = revlog.length
465 469
466 470 if endidx is None:
467 471 endidx = len(revs)
468 472
469 473 # If we have a non-emtpy delta candidate, there are nothing to trim
470 474 if revs[endidx - 1] < len(revlog):
471 475 # Trim empty revs at the end, except the very first revision of a chain
472 476 while (endidx > 1
473 477 and endidx > startidx
474 478 and length(revs[endidx - 1]) == 0):
475 479 endidx -= 1
476 480
477 481 return revs[startidx:endidx]
478 482
479 483 def segmentspan(revlog, revs):
480 484 """Get the byte span of a segment of revisions
481 485
482 486 revs is a sorted array of revision numbers
483 487
484 488 >>> revlog = _testrevlog([
485 489 ... 5, #0
486 490 ... 10, #1
487 491 ... 12, #2
488 492 ... 12, #3 (empty)
489 493 ... 17, #4
490 494 ... ])
491 495
492 496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
493 497 17
494 498 >>> segmentspan(revlog, [0, 4])
495 499 17
496 500 >>> segmentspan(revlog, [3, 4])
497 501 5
498 502 >>> segmentspan(revlog, [1, 2, 3,])
499 503 7
500 504 >>> segmentspan(revlog, [1, 3])
501 505 7
502 506 """
503 507 if not revs:
504 508 return 0
505 509 end = revlog.end(revs[-1])
506 510 return end - revlog.start(revs[0])
507 511
508 512 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
509 513 """build full text from a (base, delta) pair and other metadata"""
510 514 # special case deltas which replace entire base; no need to decode
511 515 # base revision. this neatly avoids censored bases, which throw when
512 516 # they're decoded.
513 517 hlen = struct.calcsize(">lll")
514 518 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
515 519 len(delta) - hlen):
516 520 fulltext = delta[hlen:]
517 521 else:
518 522 # deltabase is rawtext before changed by flag processors, which is
519 523 # equivalent to non-raw text
520 524 basetext = revlog.revision(baserev, _df=fh, raw=False)
521 525 fulltext = mdiff.patch(basetext, delta)
522 526
523 527 try:
524 validatehash = revlog._processflagsraw(fulltext, flags)
528 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
525 529 if validatehash:
526 530 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
527 531 if flags & REVIDX_ISCENSORED:
528 532 raise error.StorageError(_('node %s is not censored') %
529 533 expectednode)
530 534 except error.CensoredNodeError:
531 535 # must pass the censored index flag to add censored revisions
532 536 if not flags & REVIDX_ISCENSORED:
533 537 raise
534 538 return fulltext
535 539
536 540 @attr.s(slots=True, frozen=True)
537 541 class _deltainfo(object):
538 542 distance = attr.ib()
539 543 deltalen = attr.ib()
540 544 data = attr.ib()
541 545 base = attr.ib()
542 546 chainbase = attr.ib()
543 547 chainlen = attr.ib()
544 548 compresseddeltalen = attr.ib()
545 549 snapshotdepth = attr.ib()
546 550
547 551 def isgooddeltainfo(revlog, deltainfo, revinfo):
548 552 """Returns True if the given delta is good. Good means that it is within
549 553 the disk span, disk size, and chain length bounds that we know to be
550 554 performant."""
551 555 if deltainfo is None:
552 556 return False
553 557
554 558 # - 'deltainfo.distance' is the distance from the base revision --
555 559 # bounding it limits the amount of I/O we need to do.
556 560 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
557 561 # deltas we need to apply -- bounding it limits the amount of CPU
558 562 # we consume.
559 563
560 564 textlen = revinfo.textlen
561 565 defaultmax = textlen * 4
562 566 maxdist = revlog._maxdeltachainspan
563 567 if not maxdist:
564 568 maxdist = deltainfo.distance # ensure the conditional pass
565 569 maxdist = max(maxdist, defaultmax)
566 570
567 571 # Bad delta from read span:
568 572 #
569 573 # If the span of data read is larger than the maximum allowed.
570 574 #
571 575 # In the sparse-revlog case, we rely on the associated "sparse reading"
572 576 # to avoid issue related to the span of data. In theory, it would be
573 577 # possible to build pathological revlog where delta pattern would lead
574 578 # to too many reads. However, they do not happen in practice at all. So
575 579 # we skip the span check entirely.
576 580 if not revlog._sparserevlog and maxdist < deltainfo.distance:
577 581 return False
578 582
579 583 # Bad delta from new delta size:
580 584 #
581 585 # If the delta size is larger than the target text, storing the
582 586 # delta will be inefficient.
583 587 if textlen < deltainfo.deltalen:
584 588 return False
585 589
586 590 # Bad delta from cumulated payload size:
587 591 #
588 592 # If the sum of delta get larger than K * target text length.
589 593 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
590 594 return False
591 595
592 596 # Bad delta from chain length:
593 597 #
594 598 # If the number of delta in the chain gets too high.
595 599 if (revlog._maxchainlen
596 600 and revlog._maxchainlen < deltainfo.chainlen):
597 601 return False
598 602
599 603 # bad delta from intermediate snapshot size limit
600 604 #
601 605 # If an intermediate snapshot size is higher than the limit. The
602 606 # limit exist to prevent endless chain of intermediate delta to be
603 607 # created.
604 608 if (deltainfo.snapshotdepth is not None and
605 609 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
606 610 return False
607 611
608 612 # bad delta if new intermediate snapshot is larger than the previous
609 613 # snapshot
610 614 if (deltainfo.snapshotdepth
611 615 and revlog.length(deltainfo.base) < deltainfo.deltalen):
612 616 return False
613 617
614 618 return True
615 619
616 620 # If a revision's full text is that much bigger than a base candidate full
617 621 # text's, it is very unlikely that it will produce a valid delta. We no longer
618 622 # consider these candidates.
619 623 LIMIT_BASE2TEXT = 500
620 624
621 625 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
622 626 """Provides group of revision to be tested as delta base
623 627
624 628 This top level function focus on emitting groups with unique and worthwhile
625 629 content. See _raw_candidate_groups for details about the group order.
626 630 """
627 631 # should we try to build a delta?
628 632 if not (len(revlog) and revlog._storedeltachains):
629 633 yield None
630 634 return
631 635
632 636 deltalength = revlog.length
633 637 deltaparent = revlog.deltaparent
634 638 sparse = revlog._sparserevlog
635 639 good = None
636 640
637 641 deltas_limit = textlen * LIMIT_DELTA2TEXT
638 642
639 643 tested = {nullrev}
640 644 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
641 645 while True:
642 646 temptative = candidates.send(good)
643 647 if temptative is None:
644 648 break
645 649 group = []
646 650 for rev in temptative:
647 651 # skip over empty delta (no need to include them in a chain)
648 652 while (revlog._generaldelta
649 653 and not (rev == nullrev
650 654 or rev in tested
651 655 or deltalength(rev))):
652 656 tested.add(rev)
653 657 rev = deltaparent(rev)
654 658 # no need to try a delta against nullrev, this will be done as a
655 659 # last resort.
656 660 if rev == nullrev:
657 661 continue
658 662 # filter out revision we tested already
659 663 if rev in tested:
660 664 continue
661 665 tested.add(rev)
662 666 # filter out delta base that will never produce good delta
663 667 if deltas_limit < revlog.length(rev):
664 668 continue
665 669 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
666 670 continue
667 671 # no delta for rawtext-changing revs (see "candelta" for why)
668 672 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
669 673 continue
670 674 # If we reach here, we are about to build and test a delta.
671 675 # The delta building process will compute the chaininfo in all
672 676 # case, since that computation is cached, it is fine to access it
673 677 # here too.
674 678 chainlen, chainsize = revlog._chaininfo(rev)
675 679 # if chain will be too long, skip base
676 680 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
677 681 continue
678 682 # if chain already have too much data, skip base
679 683 if deltas_limit < chainsize:
680 684 continue
681 685 if sparse and revlog.upperboundcomp is not None:
682 686 maxcomp = revlog.upperboundcomp
683 687 basenotsnap = (p1, p2, nullrev)
684 688 if rev not in basenotsnap and revlog.issnapshot(rev):
685 689 snapshotdepth = revlog.snapshotdepth(rev)
686 690 # If text is significantly larger than the base, we can
687 691 # expect the resulting delta to be proportional to the size
688 692 # difference
689 693 revsize = revlog.rawsize(rev)
690 694 rawsizedistance = max(textlen - revsize, 0)
691 695 # use an estimate of the compression upper bound.
692 696 lowestrealisticdeltalen = rawsizedistance // maxcomp
693 697
694 698 # check the absolute constraint on the delta size
695 699 snapshotlimit = textlen >> snapshotdepth
696 700 if snapshotlimit < lowestrealisticdeltalen:
697 701 # delta lower bound is larger than accepted upper bound
698 702 continue
699 703
700 704 # check the relative constraint on the delta size
701 705 revlength = revlog.length(rev)
702 706 if revlength < lowestrealisticdeltalen:
703 707 # delta probable lower bound is larger than target base
704 708 continue
705 709
706 710 group.append(rev)
707 711 if group:
708 712 # XXX: in the sparse revlog case, group can become large,
709 713 # impacting performances. Some bounding or slicing mecanism
710 714 # would help to reduce this impact.
711 715 good = yield tuple(group)
712 716 yield None
713 717
714 718 def _findsnapshots(revlog, cache, start_rev):
715 719 """find snapshot from start_rev to tip"""
716 720 if util.safehasattr(revlog.index, 'findsnapshots'):
717 721 revlog.index.findsnapshots(cache, start_rev)
718 722 else:
719 723 deltaparent = revlog.deltaparent
720 724 issnapshot = revlog.issnapshot
721 725 for rev in revlog.revs(start_rev):
722 726 if issnapshot(rev):
723 727 cache[deltaparent(rev)].append(rev)
724 728
725 729 def _refinedgroups(revlog, p1, p2, cachedelta):
726 730 good = None
727 731 # First we try to reuse a the delta contained in the bundle.
728 732 # (or from the source revlog)
729 733 #
730 734 # This logic only applies to general delta repositories and can be disabled
731 735 # through configuration. Disabling reuse source delta is useful when
732 736 # we want to make sure we recomputed "optimal" deltas.
733 737 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
734 738 # Assume what we received from the server is a good choice
735 739 # build delta will reuse the cache
736 740 good = yield (cachedelta[0],)
737 741 if good is not None:
738 742 yield None
739 743 return
740 744 snapshots = collections.defaultdict(list)
741 745 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
742 746 good = yield candidates
743 747 if good is not None:
744 748 break
745 749
746 750 # If sparse revlog is enabled, we can try to refine the available deltas
747 751 if not revlog._sparserevlog:
748 752 yield None
749 753 return
750 754
751 755 # if we have a refinable value, try to refine it
752 756 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
753 757 # refine snapshot down
754 758 previous = None
755 759 while previous != good:
756 760 previous = good
757 761 base = revlog.deltaparent(good)
758 762 if base == nullrev:
759 763 break
760 764 good = yield (base,)
761 765 # refine snapshot up
762 766 if not snapshots:
763 767 _findsnapshots(revlog, snapshots, good + 1)
764 768 previous = None
765 769 while good != previous:
766 770 previous = good
767 771 children = tuple(sorted(c for c in snapshots[good]))
768 772 good = yield children
769 773
770 774 # we have found nothing
771 775 yield None
772 776
773 777 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
774 778 """Provides group of revision to be tested as delta base
775 779
776 780 This lower level function focus on emitting delta theorically interresting
777 781 without looking it any practical details.
778 782
779 783 The group order aims at providing fast or small candidates first.
780 784 """
781 785 gdelta = revlog._generaldelta
782 786 # gate sparse behind general-delta because of issue6056
783 787 sparse = gdelta and revlog._sparserevlog
784 788 curr = len(revlog)
785 789 prev = curr - 1
786 790 deltachain = lambda rev: revlog._deltachain(rev)[0]
787 791
788 792 if gdelta:
789 793 # exclude already lazy tested base if any
790 794 parents = [p for p in (p1, p2) if p != nullrev]
791 795
792 796 if not revlog._deltabothparents and len(parents) == 2:
793 797 parents.sort()
794 798 # To minimize the chance of having to build a fulltext,
795 799 # pick first whichever parent is closest to us (max rev)
796 800 yield (parents[1],)
797 801 # then the other one (min rev) if the first did not fit
798 802 yield (parents[0],)
799 803 elif len(parents) > 0:
800 804 # Test all parents (1 or 2), and keep the best candidate
801 805 yield parents
802 806
803 807 if sparse and parents:
804 808 if snapshots is None:
805 809 # map: base-rev: snapshot-rev
806 810 snapshots = collections.defaultdict(list)
807 811 # See if we can use an existing snapshot in the parent chains to use as
808 812 # a base for a new intermediate-snapshot
809 813 #
810 814 # search for snapshot in parents delta chain
811 815 # map: snapshot-level: snapshot-rev
812 816 parents_snaps = collections.defaultdict(set)
813 817 candidate_chains = [deltachain(p) for p in parents]
814 818 for chain in candidate_chains:
815 819 for idx, s in enumerate(chain):
816 820 if not revlog.issnapshot(s):
817 821 break
818 822 parents_snaps[idx].add(s)
819 823 snapfloor = min(parents_snaps[0]) + 1
820 824 _findsnapshots(revlog, snapshots, snapfloor)
821 825 # search for the highest "unrelated" revision
822 826 #
823 827 # Adding snapshots used by "unrelated" revision increase the odd we
824 828 # reuse an independant, yet better snapshot chain.
825 829 #
826 830 # XXX instead of building a set of revisions, we could lazily enumerate
827 831 # over the chains. That would be more efficient, however we stick to
828 832 # simple code for now.
829 833 all_revs = set()
830 834 for chain in candidate_chains:
831 835 all_revs.update(chain)
832 836 other = None
833 837 for r in revlog.revs(prev, snapfloor):
834 838 if r not in all_revs:
835 839 other = r
836 840 break
837 841 if other is not None:
838 842 # To avoid unfair competition, we won't use unrelated intermediate
839 843 # snapshot that are deeper than the ones from the parent delta
840 844 # chain.
841 845 max_depth = max(parents_snaps.keys())
842 846 chain = deltachain(other)
843 847 for idx, s in enumerate(chain):
844 848 if s < snapfloor:
845 849 continue
846 850 if max_depth < idx:
847 851 break
848 852 if not revlog.issnapshot(s):
849 853 break
850 854 parents_snaps[idx].add(s)
851 855 # Test them as possible intermediate snapshot base
852 856 # We test them from highest to lowest level. High level one are more
853 857 # likely to result in small delta
854 858 floor = None
855 859 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
856 860 siblings = set()
857 861 for s in snaps:
858 862 siblings.update(snapshots[s])
859 863 # Before considering making a new intermediate snapshot, we check
860 864 # if an existing snapshot, children of base we consider, would be
861 865 # suitable.
862 866 #
863 867 # It give a change to reuse a delta chain "unrelated" to the
864 868 # current revision instead of starting our own. Without such
865 869 # re-use, topological branches would keep reopening new chains.
866 870 # Creating more and more snapshot as the repository grow.
867 871
868 872 if floor is not None:
869 873 # We only do this for siblings created after the one in our
870 874 # parent's delta chain. Those created before has less chances
871 875 # to be valid base since our ancestors had to create a new
872 876 # snapshot.
873 877 siblings = [r for r in siblings if floor < r]
874 878 yield tuple(sorted(siblings))
875 879 # then test the base from our parent's delta chain.
876 880 yield tuple(sorted(snaps))
877 881 floor = min(snaps)
878 882 # No suitable base found in the parent chain, search if any full
879 883 # snapshots emitted since parent's base would be a suitable base for an
880 884 # intermediate snapshot.
881 885 #
882 886 # It give a chance to reuse a delta chain unrelated to the current
883 887 # revisions instead of starting our own. Without such re-use,
884 888 # topological branches would keep reopening new full chains. Creating
885 889 # more and more snapshot as the repository grow.
886 890 yield tuple(snapshots[nullrev])
887 891
888 892 if not sparse:
889 893 # other approach failed try against prev to hopefully save us a
890 894 # fulltext.
891 895 yield (prev,)
892 896
893 897 class deltacomputer(object):
894 898 def __init__(self, revlog):
895 899 self.revlog = revlog
896 900
897 901 def buildtext(self, revinfo, fh):
898 902 """Builds a fulltext version of a revision
899 903
900 904 revinfo: _revisioninfo instance that contains all needed info
901 905 fh: file handle to either the .i or the .d revlog file,
902 906 depending on whether it is inlined or not
903 907 """
904 908 btext = revinfo.btext
905 909 if btext[0] is not None:
906 910 return btext[0]
907 911
908 912 revlog = self.revlog
909 913 cachedelta = revinfo.cachedelta
910 914 baserev = cachedelta[0]
911 915 delta = cachedelta[1]
912 916
913 917 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
914 918 revinfo.p1, revinfo.p2,
915 919 revinfo.flags, revinfo.node)
916 920 return fulltext
917 921
918 922 def _builddeltadiff(self, base, revinfo, fh):
919 923 revlog = self.revlog
920 924 t = self.buildtext(revinfo, fh)
921 925 if revlog.iscensored(base):
922 926 # deltas based on a censored revision must replace the
923 927 # full content in one patch, so delta works everywhere
924 928 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
925 929 delta = header + t
926 930 else:
927 931 ptext = revlog.rawdata(base, _df=fh)
928 932 delta = mdiff.textdiff(ptext, t)
929 933
930 934 return delta
931 935
932 936 def _builddeltainfo(self, revinfo, base, fh):
933 937 # can we use the cached delta?
934 938 revlog = self.revlog
935 939 chainbase = revlog.chainbase(base)
936 940 if revlog._generaldelta:
937 941 deltabase = base
938 942 else:
939 943 deltabase = chainbase
940 944 snapshotdepth = None
941 945 if revlog._sparserevlog and deltabase == nullrev:
942 946 snapshotdepth = 0
943 947 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
944 948 # A delta chain should always be one full snapshot,
945 949 # zero or more semi-snapshots, and zero or more deltas
946 950 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
947 951 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
948 952 snapshotdepth = len(revlog._deltachain(deltabase)[0])
949 953 delta = None
950 954 if revinfo.cachedelta:
951 955 cachebase, cachediff = revinfo.cachedelta
952 956 #check if the diff still apply
953 957 currentbase = cachebase
954 958 while (currentbase != nullrev
955 959 and currentbase != base
956 960 and self.revlog.length(currentbase) == 0):
957 961 currentbase = self.revlog.deltaparent(currentbase)
958 962 if self.revlog._lazydelta and currentbase == base:
959 963 delta = revinfo.cachedelta[1]
960 964 if delta is None:
961 965 delta = self._builddeltadiff(base, revinfo, fh)
962 966 # snapshotdept need to be neither None nor 0 level snapshot
963 967 if revlog.upperboundcomp is not None and snapshotdepth:
964 968 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
965 969 snapshotlimit = revinfo.textlen >> snapshotdepth
966 970 if snapshotlimit < lowestrealisticdeltalen:
967 971 return None
968 972 if revlog.length(base) < lowestrealisticdeltalen:
969 973 return None
970 974 header, data = revlog.compress(delta)
971 975 deltalen = len(header) + len(data)
972 976 offset = revlog.end(len(revlog) - 1)
973 977 dist = deltalen + offset - revlog.start(chainbase)
974 978 chainlen, compresseddeltalen = revlog._chaininfo(base)
975 979 chainlen += 1
976 980 compresseddeltalen += deltalen
977 981
978 982 return _deltainfo(dist, deltalen, (header, data), deltabase,
979 983 chainbase, chainlen, compresseddeltalen,
980 984 snapshotdepth)
981 985
982 986 def _fullsnapshotinfo(self, fh, revinfo):
983 987 curr = len(self.revlog)
984 988 rawtext = self.buildtext(revinfo, fh)
985 989 data = self.revlog.compress(rawtext)
986 990 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
987 991 deltabase = chainbase = curr
988 992 snapshotdepth = 0
989 993 chainlen = 1
990 994
991 995 return _deltainfo(dist, deltalen, data, deltabase,
992 996 chainbase, chainlen, compresseddeltalen,
993 997 snapshotdepth)
994 998
995 999 def finddeltainfo(self, revinfo, fh):
996 1000 """Find an acceptable delta against a candidate revision
997 1001
998 1002 revinfo: information about the revision (instance of _revisioninfo)
999 1003 fh: file handle to either the .i or the .d revlog file,
1000 1004 depending on whether it is inlined or not
1001 1005
1002 1006 Returns the first acceptable candidate revision, as ordered by
1003 1007 _candidategroups
1004 1008
1005 1009 If no suitable deltabase is found, we return delta info for a full
1006 1010 snapshot.
1007 1011 """
1008 1012 if not revinfo.textlen:
1009 1013 return self._fullsnapshotinfo(fh, revinfo)
1010 1014
1011 1015 # no delta for flag processor revision (see "candelta" for why)
1012 1016 # not calling candelta since only one revision needs test, also to
1013 1017 # avoid overhead fetching flags again.
1014 1018 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1015 1019 return self._fullsnapshotinfo(fh, revinfo)
1016 1020
1017 1021 cachedelta = revinfo.cachedelta
1018 1022 p1 = revinfo.p1
1019 1023 p2 = revinfo.p2
1020 1024 revlog = self.revlog
1021 1025
1022 1026 deltainfo = None
1023 1027 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1024 1028 groups = _candidategroups(self.revlog, revinfo.textlen,
1025 1029 p1r, p2r, cachedelta)
1026 1030 candidaterevs = next(groups)
1027 1031 while candidaterevs is not None:
1028 1032 nominateddeltas = []
1029 1033 if deltainfo is not None:
1030 1034 # if we already found a good delta,
1031 1035 # challenge it against refined candidates
1032 1036 nominateddeltas.append(deltainfo)
1033 1037 for candidaterev in candidaterevs:
1034 1038 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1035 1039 if candidatedelta is not None:
1036 1040 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1037 1041 nominateddeltas.append(candidatedelta)
1038 1042 if nominateddeltas:
1039 1043 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1040 1044 if deltainfo is not None:
1041 1045 candidaterevs = groups.send(deltainfo.base)
1042 1046 else:
1043 1047 candidaterevs = next(groups)
1044 1048
1045 1049 if deltainfo is None:
1046 1050 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1047 1051 return deltainfo
@@ -1,206 +1,206 b''
1 1 # flagutils.py - code to deal with revlog flags and their processors
2 2 #
3 3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 from ..i18n import _
12 12
13 13 from .constants import (
14 14 REVIDX_DEFAULT_FLAGS,
15 15 REVIDX_ELLIPSIS,
16 16 REVIDX_EXTSTORED,
17 17 REVIDX_FLAGS_ORDER,
18 18 REVIDX_ISCENSORED,
19 19 REVIDX_RAWTEXT_CHANGING_FLAGS,
20 20 )
21 21
22 22 from .. import (
23 23 error,
24 24 util
25 25 )
26 26
27 27 # blanked usage of all the name to prevent pyflakes constraints
28 28 # We need these name available in the module for extensions.
29 29 REVIDX_ISCENSORED
30 30 REVIDX_ELLIPSIS
31 31 REVIDX_EXTSTORED
32 32 REVIDX_DEFAULT_FLAGS
33 33 REVIDX_FLAGS_ORDER
34 34 REVIDX_RAWTEXT_CHANGING_FLAGS
35 35
36 36 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
37 37
38 38 # Store flag processors (cf. 'addflagprocessor()' to register)
39 39 flagprocessors = {
40 40 REVIDX_ISCENSORED: None,
41 41 }
42 42
43 43 def addflagprocessor(flag, processor):
44 44 """Register a flag processor on a revision data flag.
45 45
46 46 Invariant:
47 47 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
48 48 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
49 49 - Only one flag processor can be registered on a specific flag.
50 50 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
51 51 following signatures:
52 52 - (read) f(self, rawtext) -> text, bool
53 53 - (write) f(self, text) -> rawtext, bool
54 54 - (raw) f(self, rawtext) -> bool
55 55 "text" is presented to the user. "rawtext" is stored in revlog data, not
56 56 directly visible to the user.
57 57 The boolean returned by these transforms is used to determine whether
58 58 the returned text can be used for hash integrity checking. For example,
59 59 if "write" returns False, then "text" is used to generate hash. If
60 60 "write" returns True, that basically means "rawtext" returned by "write"
61 61 should be used to generate hash. Usually, "write" and "read" return
62 62 different booleans. And "raw" returns a same boolean as "write".
63 63
64 64 Note: The 'raw' transform is used for changegroup generation and in some
65 65 debug commands. In this case the transform only indicates whether the
66 66 contents can be used for hash integrity checks.
67 67 """
68 68 insertflagprocessor(flag, processor, flagprocessors)
69 69
70 70 def insertflagprocessor(flag, processor, flagprocessors):
71 71 if not flag & REVIDX_KNOWN_FLAGS:
72 72 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
73 73 raise error.ProgrammingError(msg)
74 74 if flag not in REVIDX_FLAGS_ORDER:
75 75 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
76 76 raise error.ProgrammingError(msg)
77 77 if flag in flagprocessors:
78 78 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
79 79 raise error.Abort(msg)
80 80 flagprocessors[flag] = processor
81 81
82 82 class flagprocessorsmixin(object):
83 83 """basic mixin to support revlog flag processing
84 84
85 85 Make sure the `_flagprocessors` attribute is set at ``__init__`` time.
86 86
87 87 See the documentation of the ``_processflags`` method for details.
88 88 """
89 89
90 90 _flagserrorclass = error.RevlogError
91 91
92 92 def _processflags(self, text, flags, operation, raw=False):
93 93 """deprecated entry point to access flag processors"""
94 94 msg = ('_processflag(...) use the specialized variant')
95 95 util.nouideprecwarn(msg, '5.2', stacklevel=2)
96 96 if raw:
97 return text, self._processflagsraw(text, flags)
97 return text, processflagsraw(self, text, flags)
98 98 elif operation == 'read':
99 99 return processflagsread(self, text, flags)
100 100 else: # write operation
101 101 return processflagswrite(self, text, flags)
102 102
103 def _processflagsraw(self, text, flags):
104 """Inspect revision data flags to check is the content hash should be
105 validated.
106
107 ``text`` - the revision data to process
108 ``flags`` - the revision flags
109
110 This method processes the flags in the order (or reverse order if
111 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
112 flag processors registered for present flags. The order of flags defined
113 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
114
115 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
116 processed text and ``validatehash`` is a bool indicating whether the
117 returned text should be checked for hash integrity.
118 """
119 return _processflagsfunc(self, text, flags, 'raw')[1]
120
121 103 def processflagswrite(revlog, text, flags, sidedata):
122 104 """Inspect revision data flags and applies write transformations defined
123 105 by registered flag processors.
124 106
125 107 ``text`` - the revision data to process
126 108 ``flags`` - the revision flags
127 109
128 110 This method processes the flags in the order (or reverse order if
129 111 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
130 112 flag processors registered for present flags. The order of flags defined
131 113 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
132 114
133 115 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
134 116 processed text and ``validatehash`` is a bool indicating whether the
135 117 returned text should be checked for hash integrity.
136 118 """
137 119 return _processflagsfunc(revlog, text, flags, 'write',
138 120 sidedata=sidedata)[:2]
139 121
140 122 def processflagsread(revlog, text, flags):
141 123 """Inspect revision data flags and applies read transformations defined
142 124 by registered flag processors.
143 125
144 126 ``text`` - the revision data to process
145 127 ``flags`` - the revision flags
146 128 ``raw`` - an optional argument describing if the raw transform should be
147 129 applied.
148 130
149 131 This method processes the flags in the order (or reverse order if
150 132 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
151 133 flag processors registered for present flags. The order of flags defined
152 134 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
153 135
154 136 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
155 137 processed text and ``validatehash`` is a bool indicating whether the
156 138 returned text should be checked for hash integrity.
157 139 """
158 140 return _processflagsfunc(revlog, text, flags, 'read')
159 141
142 def processflagsraw(revlog, text, flags):
143 """Inspect revision data flags to check is the content hash should be
144 validated.
145
146 ``text`` - the revision data to process
147 ``flags`` - the revision flags
148
149 This method processes the flags in the order (or reverse order if
150 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
151 flag processors registered for present flags. The order of flags defined
152 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
153
154 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
155 processed text and ``validatehash`` is a bool indicating whether the
156 returned text should be checked for hash integrity.
157 """
158 return _processflagsfunc(revlog, text, flags, 'raw')[1]
159
160 160 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
161 161 """internal function to process flag on a revlog
162 162
163 163 This function is private to this module, code should never needs to call it
164 164 directly."""
165 165 # fast path: no flag processors will run
166 166 if flags == 0:
167 167 return text, True, {}
168 168 if operation not in ('read', 'write', 'raw'):
169 169 raise error.ProgrammingError(_("invalid '%s' operation") %
170 170 operation)
171 171 # Check all flags are known.
172 172 if flags & ~REVIDX_KNOWN_FLAGS:
173 173 raise revlog._flagserrorclass(_("incompatible revision flag '%#x'") %
174 174 (flags & ~REVIDX_KNOWN_FLAGS))
175 175 validatehash = True
176 176 # Depending on the operation (read or write), the order might be
177 177 # reversed due to non-commutative transforms.
178 178 orderedflags = REVIDX_FLAGS_ORDER
179 179 if operation == 'write':
180 180 orderedflags = reversed(orderedflags)
181 181
182 182 outsidedata = {}
183 183 for flag in orderedflags:
184 184 # If a flagprocessor has been registered for a known flag, apply the
185 185 # related operation transform and update result tuple.
186 186 if flag & flags:
187 187 vhash = True
188 188
189 189 if flag not in revlog._flagprocessors:
190 190 message = _("missing processor for flag '%#x'") % (flag)
191 191 raise revlog._flagserrorclass(message)
192 192
193 193 processor = revlog._flagprocessors[flag]
194 194 if processor is not None:
195 195 readtransform, writetransform, rawtransform = processor
196 196
197 197 if operation == 'raw':
198 198 vhash = rawtransform(revlog, text)
199 199 elif operation == 'read':
200 200 text, vhash, s = readtransform(revlog, text)
201 201 outsidedata.update(s)
202 202 else: # write operation
203 203 text, vhash = writetransform(revlog, text, sidedata)
204 204 validatehash = validatehash and vhash
205 205
206 206 return text, validatehash, outsidedata
@@ -1,683 +1,683 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 nullrev,
23 23 )
24 24 from mercurial.thirdparty import (
25 25 attr,
26 26 )
27 27 from mercurial import (
28 28 ancestor,
29 29 bundlerepo,
30 30 error,
31 31 extensions,
32 32 localrepo,
33 33 mdiff,
34 34 pycompat,
35 35 revlog,
36 36 store,
37 37 verify,
38 38 )
39 39 from mercurial.interfaces import (
40 40 repository,
41 41 util as interfaceutil,
42 42 )
43 43 from mercurial.utils import (
44 44 cborutil,
45 45 storageutil,
46 46 )
47 47 from mercurial.revlogutils import (
48 48 flagutil,
49 49 )
50 50
51 51 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
52 52 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
53 53 # be specifying the version(s) of Mercurial they are tested with, or
54 54 # leave the attribute unspecified.
55 55 testedwith = 'ships-with-hg-core'
56 56
57 57 REQUIREMENT = 'testonly-simplestore'
58 58
59 59 def validatenode(node):
60 60 if isinstance(node, int):
61 61 raise ValueError('expected node; got int')
62 62
63 63 if len(node) != 20:
64 64 raise ValueError('expected 20 byte node')
65 65
66 66 def validaterev(rev):
67 67 if not isinstance(rev, int):
68 68 raise ValueError('expected int')
69 69
70 70 class simplestoreerror(error.StorageError):
71 71 pass
72 72
73 73 @interfaceutil.implementer(repository.irevisiondelta)
74 74 @attr.s(slots=True)
75 75 class simplestorerevisiondelta(object):
76 76 node = attr.ib()
77 77 p1node = attr.ib()
78 78 p2node = attr.ib()
79 79 basenode = attr.ib()
80 80 flags = attr.ib()
81 81 baserevisionsize = attr.ib()
82 82 revision = attr.ib()
83 83 delta = attr.ib()
84 84 linknode = attr.ib(default=None)
85 85
86 86 @interfaceutil.implementer(repository.iverifyproblem)
87 87 @attr.s(frozen=True)
88 88 class simplefilestoreproblem(object):
89 89 warning = attr.ib(default=None)
90 90 error = attr.ib(default=None)
91 91 node = attr.ib(default=None)
92 92
93 93 @interfaceutil.implementer(repository.ifilestorage)
94 94 class filestorage(flagutil.flagprocessorsmixin):
95 95 """Implements storage for a tracked path.
96 96
97 97 Data is stored in the VFS in a directory corresponding to the tracked
98 98 path.
99 99
100 100 Index data is stored in an ``index`` file using CBOR.
101 101
102 102 Fulltext data is stored in files having names of the node.
103 103 """
104 104
105 105 _flagserrorclass = simplestoreerror
106 106
107 107 def __init__(self, svfs, path):
108 108 self._svfs = svfs
109 109 self._path = path
110 110
111 111 self._storepath = b'/'.join([b'data', path])
112 112 self._indexpath = b'/'.join([self._storepath, b'index'])
113 113
114 114 indexdata = self._svfs.tryread(self._indexpath)
115 115 if indexdata:
116 116 indexdata = cborutil.decodeall(indexdata)
117 117
118 118 self._indexdata = indexdata or []
119 119 self._indexbynode = {}
120 120 self._indexbyrev = {}
121 121 self._index = []
122 122 self._refreshindex()
123 123
124 124 self._flagprocessors = dict(flagutil.flagprocessors)
125 125
126 126 def _refreshindex(self):
127 127 self._indexbynode.clear()
128 128 self._indexbyrev.clear()
129 129 self._index = []
130 130
131 131 for i, entry in enumerate(self._indexdata):
132 132 self._indexbynode[entry[b'node']] = entry
133 133 self._indexbyrev[i] = entry
134 134
135 135 self._indexbynode[nullid] = {
136 136 b'node': nullid,
137 137 b'p1': nullid,
138 138 b'p2': nullid,
139 139 b'linkrev': nullrev,
140 140 b'flags': 0,
141 141 }
142 142
143 143 self._indexbyrev[nullrev] = {
144 144 b'node': nullid,
145 145 b'p1': nullid,
146 146 b'p2': nullid,
147 147 b'linkrev': nullrev,
148 148 b'flags': 0,
149 149 }
150 150
151 151 for i, entry in enumerate(self._indexdata):
152 152 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
153 153
154 154 # start, length, rawsize, chainbase, linkrev, p1, p2, node
155 155 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
156 156 entry[b'node']))
157 157
158 158 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
159 159
160 160 def __len__(self):
161 161 return len(self._indexdata)
162 162
163 163 def __iter__(self):
164 164 return iter(range(len(self)))
165 165
166 166 def revs(self, start=0, stop=None):
167 167 step = 1
168 168 if stop is not None:
169 169 if start > stop:
170 170 step = -1
171 171
172 172 stop += step
173 173 else:
174 174 stop = len(self)
175 175
176 176 return range(start, stop, step)
177 177
178 178 def parents(self, node):
179 179 validatenode(node)
180 180
181 181 if node not in self._indexbynode:
182 182 raise KeyError('unknown node')
183 183
184 184 entry = self._indexbynode[node]
185 185
186 186 return entry[b'p1'], entry[b'p2']
187 187
188 188 def parentrevs(self, rev):
189 189 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
190 190 return self.rev(p1), self.rev(p2)
191 191
192 192 def rev(self, node):
193 193 validatenode(node)
194 194
195 195 try:
196 196 self._indexbynode[node]
197 197 except KeyError:
198 198 raise error.LookupError(node, self._indexpath, _('no node'))
199 199
200 200 for rev, entry in self._indexbyrev.items():
201 201 if entry[b'node'] == node:
202 202 return rev
203 203
204 204 raise error.ProgrammingError('this should not occur')
205 205
206 206 def node(self, rev):
207 207 validaterev(rev)
208 208
209 209 return self._indexbyrev[rev][b'node']
210 210
211 211 def hasnode(self, node):
212 212 validatenode(node)
213 213 return node in self._indexbynode
214 214
215 215 def censorrevision(self, tr, censornode, tombstone=b''):
216 216 raise NotImplementedError('TODO')
217 217
218 218 def lookup(self, node):
219 219 if isinstance(node, int):
220 220 return self.node(node)
221 221
222 222 if len(node) == 20:
223 223 self.rev(node)
224 224 return node
225 225
226 226 try:
227 227 rev = int(node)
228 228 if '%d' % rev != node:
229 229 raise ValueError
230 230
231 231 if rev < 0:
232 232 rev = len(self) + rev
233 233 if rev < 0 or rev >= len(self):
234 234 raise ValueError
235 235
236 236 return self.node(rev)
237 237 except (ValueError, OverflowError):
238 238 pass
239 239
240 240 if len(node) == 40:
241 241 try:
242 242 rawnode = bin(node)
243 243 self.rev(rawnode)
244 244 return rawnode
245 245 except TypeError:
246 246 pass
247 247
248 248 raise error.LookupError(node, self._path, _('invalid lookup input'))
249 249
250 250 def linkrev(self, rev):
251 251 validaterev(rev)
252 252
253 253 return self._indexbyrev[rev][b'linkrev']
254 254
255 255 def _flags(self, rev):
256 256 validaterev(rev)
257 257
258 258 return self._indexbyrev[rev][b'flags']
259 259
260 260 def _candelta(self, baserev, rev):
261 261 validaterev(baserev)
262 262 validaterev(rev)
263 263
264 264 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
265 265 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
266 266 return False
267 267
268 268 return True
269 269
270 270 def checkhash(self, text, node, p1=None, p2=None, rev=None):
271 271 if p1 is None and p2 is None:
272 272 p1, p2 = self.parents(node)
273 273 if node != storageutil.hashrevisionsha1(text, p1, p2):
274 274 raise simplestoreerror(_("integrity check failed on %s") %
275 275 self._path)
276 276
277 277 def revision(self, nodeorrev, raw=False):
278 278 if isinstance(nodeorrev, int):
279 279 node = self.node(nodeorrev)
280 280 else:
281 281 node = nodeorrev
282 282 validatenode(node)
283 283
284 284 if node == nullid:
285 285 return b''
286 286
287 287 rev = self.rev(node)
288 288 flags = self._flags(rev)
289 289
290 290 path = b'/'.join([self._storepath, hex(node)])
291 291 rawtext = self._svfs.read(path)
292 292
293 293 if raw:
294 validatehash = self._processflagsraw(rawtext, flags)
294 validatehash = flagutil.processflagsraw(self, rawtext, flags)
295 295 text = rawtext
296 296 else:
297 297 r = flagutil.processflagsread(self, rawtext, flags)
298 298 text, validatehash, sidedata = r
299 299 if validatehash:
300 300 self.checkhash(text, node, rev=rev)
301 301
302 302 return text
303 303
304 304 def rawdata(self, nodeorrev):
305 305 return self.revision(raw=True)
306 306
307 307 def read(self, node):
308 308 validatenode(node)
309 309
310 310 revision = self.revision(node)
311 311
312 312 if not revision.startswith(b'\1\n'):
313 313 return revision
314 314
315 315 start = revision.index(b'\1\n', 2)
316 316 return revision[start + 2:]
317 317
318 318 def renamed(self, node):
319 319 validatenode(node)
320 320
321 321 if self.parents(node)[0] != nullid:
322 322 return False
323 323
324 324 fulltext = self.revision(node)
325 325 m = storageutil.parsemeta(fulltext)[0]
326 326
327 327 if m and 'copy' in m:
328 328 return m['copy'], bin(m['copyrev'])
329 329
330 330 return False
331 331
332 332 def cmp(self, node, text):
333 333 validatenode(node)
334 334
335 335 t = text
336 336
337 337 if text.startswith(b'\1\n'):
338 338 t = b'\1\n\1\n' + text
339 339
340 340 p1, p2 = self.parents(node)
341 341
342 342 if storageutil.hashrevisionsha1(t, p1, p2) == node:
343 343 return False
344 344
345 345 if self.iscensored(self.rev(node)):
346 346 return text != b''
347 347
348 348 if self.renamed(node):
349 349 t2 = self.read(node)
350 350 return t2 != text
351 351
352 352 return True
353 353
354 354 def size(self, rev):
355 355 validaterev(rev)
356 356
357 357 node = self._indexbyrev[rev][b'node']
358 358
359 359 if self.renamed(node):
360 360 return len(self.read(node))
361 361
362 362 if self.iscensored(rev):
363 363 return 0
364 364
365 365 return len(self.revision(node))
366 366
367 367 def iscensored(self, rev):
368 368 validaterev(rev)
369 369
370 370 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
371 371
372 372 def commonancestorsheads(self, a, b):
373 373 validatenode(a)
374 374 validatenode(b)
375 375
376 376 a = self.rev(a)
377 377 b = self.rev(b)
378 378
379 379 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
380 380 return pycompat.maplist(self.node, ancestors)
381 381
382 382 def descendants(self, revs):
383 383 # This is a copy of revlog.descendants()
384 384 first = min(revs)
385 385 if first == nullrev:
386 386 for i in self:
387 387 yield i
388 388 return
389 389
390 390 seen = set(revs)
391 391 for i in self.revs(start=first + 1):
392 392 for x in self.parentrevs(i):
393 393 if x != nullrev and x in seen:
394 394 seen.add(i)
395 395 yield i
396 396 break
397 397
398 398 # Required by verify.
399 399 def files(self):
400 400 entries = self._svfs.listdir(self._storepath)
401 401
402 402 # Strip out undo.backup.* files created as part of transaction
403 403 # recording.
404 404 entries = [f for f in entries if not f.startswith('undo.backup.')]
405 405
406 406 return [b'/'.join((self._storepath, f)) for f in entries]
407 407
408 408 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
409 409 revisionscount=False, trackedsize=False,
410 410 storedsize=False):
411 411 # TODO do a real implementation of this
412 412 return {
413 413 'exclusivefiles': [],
414 414 'sharedfiles': [],
415 415 'revisionscount': len(self),
416 416 'trackedsize': 0,
417 417 'storedsize': None,
418 418 }
419 419
420 420 def verifyintegrity(self, state):
421 421 state['skipread'] = set()
422 422 for rev in self:
423 423 node = self.node(rev)
424 424 try:
425 425 self.revision(node)
426 426 except Exception as e:
427 427 yield simplefilestoreproblem(
428 428 error='unpacking %s: %s' % (node, e),
429 429 node=node)
430 430 state['skipread'].add(node)
431 431
432 432 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
433 433 assumehaveparentrevisions=False,
434 434 deltamode=repository.CG_DELTAMODE_STD):
435 435 # TODO this will probably break on some ordering options.
436 436 nodes = [n for n in nodes if n != nullid]
437 437 if not nodes:
438 438 return
439 439 for delta in storageutil.emitrevisions(
440 440 self, nodes, nodesorder, simplestorerevisiondelta,
441 441 revisiondata=revisiondata,
442 442 assumehaveparentrevisions=assumehaveparentrevisions,
443 443 deltamode=deltamode):
444 444 yield delta
445 445
446 446 def add(self, text, meta, transaction, linkrev, p1, p2):
447 447 if meta or text.startswith(b'\1\n'):
448 448 text = storageutil.packmeta(meta, text)
449 449
450 450 return self.addrevision(text, transaction, linkrev, p1, p2)
451 451
452 452 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
453 453 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
454 454 validatenode(p1)
455 455 validatenode(p2)
456 456
457 457 if flags:
458 458 node = node or storageutil.hashrevisionsha1(text, p1, p2)
459 459
460 460 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
461 461
462 462 node = node or storageutil.hashrevisionsha1(text, p1, p2)
463 463
464 464 if node in self._indexbynode:
465 465 return node
466 466
467 467 if validatehash:
468 468 self.checkhash(rawtext, node, p1=p1, p2=p2)
469 469
470 470 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
471 471 flags)
472 472
473 473 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
474 474 transaction.addbackup(self._indexpath)
475 475
476 476 path = b'/'.join([self._storepath, hex(node)])
477 477
478 478 self._svfs.write(path, rawtext)
479 479
480 480 self._indexdata.append({
481 481 b'node': node,
482 482 b'p1': p1,
483 483 b'p2': p2,
484 484 b'linkrev': link,
485 485 b'flags': flags,
486 486 })
487 487
488 488 self._reflectindexupdate()
489 489
490 490 return node
491 491
492 492 def _reflectindexupdate(self):
493 493 self._refreshindex()
494 494 self._svfs.write(self._indexpath,
495 495 ''.join(cborutil.streamencode(self._indexdata)))
496 496
497 497 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
498 498 maybemissingparents=False):
499 499 if maybemissingparents:
500 500 raise error.Abort(_('simple store does not support missing parents '
501 501 'write mode'))
502 502
503 503 nodes = []
504 504
505 505 transaction.addbackup(self._indexpath)
506 506
507 507 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
508 508 linkrev = linkmapper(linknode)
509 509 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
510 510
511 511 nodes.append(node)
512 512
513 513 if node in self._indexbynode:
514 514 continue
515 515
516 516 # Need to resolve the fulltext from the delta base.
517 517 if deltabase == nullid:
518 518 text = mdiff.patch(b'', delta)
519 519 else:
520 520 text = mdiff.patch(self.revision(deltabase), delta)
521 521
522 522 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
523 523 flags)
524 524
525 525 if addrevisioncb:
526 526 addrevisioncb(self, node)
527 527 return nodes
528 528
529 529 def _headrevs(self):
530 530 # Assume all revisions are heads by default.
531 531 revishead = {rev: True for rev in self._indexbyrev}
532 532
533 533 for rev, entry in self._indexbyrev.items():
534 534 # Unset head flag for all seen parents.
535 535 revishead[self.rev(entry[b'p1'])] = False
536 536 revishead[self.rev(entry[b'p2'])] = False
537 537
538 538 return [rev for rev, ishead in sorted(revishead.items())
539 539 if ishead]
540 540
541 541 def heads(self, start=None, stop=None):
542 542 # This is copied from revlog.py.
543 543 if start is None and stop is None:
544 544 if not len(self):
545 545 return [nullid]
546 546 return [self.node(r) for r in self._headrevs()]
547 547
548 548 if start is None:
549 549 start = nullid
550 550 if stop is None:
551 551 stop = []
552 552 stoprevs = set([self.rev(n) for n in stop])
553 553 startrev = self.rev(start)
554 554 reachable = {startrev}
555 555 heads = {startrev}
556 556
557 557 parentrevs = self.parentrevs
558 558 for r in self.revs(start=startrev + 1):
559 559 for p in parentrevs(r):
560 560 if p in reachable:
561 561 if r not in stoprevs:
562 562 reachable.add(r)
563 563 heads.add(r)
564 564 if p in heads and p not in stoprevs:
565 565 heads.remove(p)
566 566
567 567 return [self.node(r) for r in heads]
568 568
569 569 def children(self, node):
570 570 validatenode(node)
571 571
572 572 # This is a copy of revlog.children().
573 573 c = []
574 574 p = self.rev(node)
575 575 for r in self.revs(start=p + 1):
576 576 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
577 577 if prevs:
578 578 for pr in prevs:
579 579 if pr == p:
580 580 c.append(self.node(r))
581 581 elif p == nullrev:
582 582 c.append(self.node(r))
583 583 return c
584 584
585 585 def getstrippoint(self, minlink):
586 586 return storageutil.resolvestripinfo(
587 587 minlink, len(self) - 1, self._headrevs(), self.linkrev,
588 588 self.parentrevs)
589 589
590 590 def strip(self, minlink, transaction):
591 591 if not len(self):
592 592 return
593 593
594 594 rev, _ignored = self.getstrippoint(minlink)
595 595 if rev == len(self):
596 596 return
597 597
598 598 # Purge index data starting at the requested revision.
599 599 self._indexdata[rev:] = []
600 600 self._reflectindexupdate()
601 601
602 602 def issimplestorefile(f, kind, st):
603 603 if kind != stat.S_IFREG:
604 604 return False
605 605
606 606 if store.isrevlog(f, kind, st):
607 607 return False
608 608
609 609 # Ignore transaction undo files.
610 610 if f.startswith('undo.'):
611 611 return False
612 612
613 613 # Otherwise assume it belongs to the simple store.
614 614 return True
615 615
616 616 class simplestore(store.encodedstore):
617 617 def datafiles(self):
618 618 for x in super(simplestore, self).datafiles():
619 619 yield x
620 620
621 621 # Supplement with non-revlog files.
622 622 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
623 623
624 624 for unencoded, encoded, size in extrafiles:
625 625 try:
626 626 unencoded = store.decodefilename(unencoded)
627 627 except KeyError:
628 628 unencoded = None
629 629
630 630 yield unencoded, encoded, size
631 631
632 632 def reposetup(ui, repo):
633 633 if not repo.local():
634 634 return
635 635
636 636 if isinstance(repo, bundlerepo.bundlerepository):
637 637 raise error.Abort(_('cannot use simple store with bundlerepo'))
638 638
639 639 class simplestorerepo(repo.__class__):
640 640 def file(self, f):
641 641 return filestorage(self.svfs, f)
642 642
643 643 repo.__class__ = simplestorerepo
644 644
645 645 def featuresetup(ui, supported):
646 646 supported.add(REQUIREMENT)
647 647
648 648 def newreporequirements(orig, ui, createopts):
649 649 """Modifies default requirements for new repos to use the simple store."""
650 650 requirements = orig(ui, createopts)
651 651
652 652 # These requirements are only used to affect creation of the store
653 653 # object. We have our own store. So we can remove them.
654 654 # TODO do this once we feel like taking the test hit.
655 655 #if 'fncache' in requirements:
656 656 # requirements.remove('fncache')
657 657 #if 'dotencode' in requirements:
658 658 # requirements.remove('dotencode')
659 659
660 660 requirements.add(REQUIREMENT)
661 661
662 662 return requirements
663 663
664 664 def makestore(orig, requirements, path, vfstype):
665 665 if REQUIREMENT not in requirements:
666 666 return orig(requirements, path, vfstype)
667 667
668 668 return simplestore(path, vfstype)
669 669
670 670 def verifierinit(orig, self, *args, **kwargs):
671 671 orig(self, *args, **kwargs)
672 672
673 673 # We don't care that files in the store don't align with what is
674 674 # advertised. So suppress these warnings.
675 675 self.warnorphanstorefiles = False
676 676
677 677 def extsetup(ui):
678 678 localrepo.featuresetupfuncs.add(featuresetup)
679 679
680 680 extensions.wrapfunction(localrepo, 'newreporequirements',
681 681 newreporequirements)
682 682 extensions.wrapfunction(localrepo, 'makestore', makestore)
683 683 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now