##// END OF EJS Templates
revlog: make sure we never use sparserevlog without general delta (issue6056)...
Boris Feld -
r41525:189e06b2 stable
parent child Browse files
Show More
@@ -1,2646 +1,2649
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 try:
101 101 from . import rustext
102 102 rustext.__name__ # force actual import (see hgdemandimport)
103 103 except ImportError:
104 104 rustext = None
105 105
106 106 # Aliased for performance.
107 107 _zlibdecompress = zlib.decompress
108 108
109 109 # max size of revlog with inline data
110 110 _maxinline = 131072
111 111 _chunksize = 1048576
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsiswriteprocessor(rl, text):
123 123 return text, False
124 124
125 125 def ellipsisrawprocessor(rl, text):
126 126 return False
127 127
128 128 ellipsisprocessor = (
129 129 ellipsisreadprocessor,
130 130 ellipsiswriteprocessor,
131 131 ellipsisrawprocessor,
132 132 )
133 133
134 134 def addflagprocessor(flag, processor):
135 135 """Register a flag processor on a revision data flag.
136 136
137 137 Invariant:
138 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 140 - Only one flag processor can be registered on a specific flag.
141 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 142 following signatures:
143 143 - (read) f(self, rawtext) -> text, bool
144 144 - (write) f(self, text) -> rawtext, bool
145 145 - (raw) f(self, rawtext) -> bool
146 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 147 directly visible to the user.
148 148 The boolean returned by these transforms is used to determine whether
149 149 the returned text can be used for hash integrity checking. For example,
150 150 if "write" returns False, then "text" is used to generate hash. If
151 151 "write" returns True, that basically means "rawtext" returned by "write"
152 152 should be used to generate hash. Usually, "write" and "read" return
153 153 different booleans. And "raw" returns a same boolean as "write".
154 154
155 155 Note: The 'raw' transform is used for changegroup generation and in some
156 156 debug commands. In this case the transform only indicates whether the
157 157 contents can be used for hash integrity checks.
158 158 """
159 159 _insertflagprocessor(flag, processor, _flagprocessors)
160 160
161 161 def _insertflagprocessor(flag, processor, flagprocessors):
162 162 if not flag & REVIDX_KNOWN_FLAGS:
163 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag not in REVIDX_FLAGS_ORDER:
166 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 167 raise error.ProgrammingError(msg)
168 168 if flag in flagprocessors:
169 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 170 raise error.Abort(msg)
171 171 flagprocessors[flag] = processor
172 172
173 173 def getoffset(q):
174 174 return int(q >> 16)
175 175
176 176 def gettype(q):
177 177 return int(q & 0xFFFF)
178 178
179 179 def offset_type(offset, type):
180 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 181 raise ValueError('unknown revlog index flags')
182 182 return int(int(offset) << 16 | type)
183 183
184 184 @attr.s(slots=True, frozen=True)
185 185 class _revisioninfo(object):
186 186 """Information about a revision that allows building its fulltext
187 187 node: expected hash of the revision
188 188 p1, p2: parent revs of the revision
189 189 btext: built text cache consisting of a one-element list
190 190 cachedelta: (baserev, uncompressed_delta) or None
191 191 flags: flags associated to the revision storage
192 192
193 193 One of btext[0] or cachedelta must be set.
194 194 """
195 195 node = attr.ib()
196 196 p1 = attr.ib()
197 197 p2 = attr.ib()
198 198 btext = attr.ib()
199 199 textlen = attr.ib()
200 200 cachedelta = attr.ib()
201 201 flags = attr.ib()
202 202
203 203 @interfaceutil.implementer(repository.irevisiondelta)
204 204 @attr.s(slots=True)
205 205 class revlogrevisiondelta(object):
206 206 node = attr.ib()
207 207 p1node = attr.ib()
208 208 p2node = attr.ib()
209 209 basenode = attr.ib()
210 210 flags = attr.ib()
211 211 baserevisionsize = attr.ib()
212 212 revision = attr.ib()
213 213 delta = attr.ib()
214 214 linknode = attr.ib(default=None)
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223 # index v0:
224 224 # 4 bytes: offset
225 225 # 4 bytes: compressed length
226 226 # 4 bytes: base rev
227 227 # 4 bytes: link rev
228 228 # 20 bytes: parent 1 nodeid
229 229 # 20 bytes: parent 2 nodeid
230 230 # 20 bytes: nodeid
231 231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 232 indexformatv0_pack = indexformatv0.pack
233 233 indexformatv0_unpack = indexformatv0.unpack
234 234
235 235 class revlogoldindex(list):
236 236 def __getitem__(self, i):
237 237 if i == -1:
238 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 239 return list.__getitem__(self, i)
240 240
241 241 class revlogoldio(object):
242 242 def __init__(self):
243 243 self.size = indexformatv0.size
244 244
245 245 def parseindex(self, data, inline):
246 246 s = self.size
247 247 index = []
248 248 nodemap = {nullid: nullrev}
249 249 n = off = 0
250 250 l = len(data)
251 251 while off + s <= l:
252 252 cur = data[off:off + s]
253 253 off += s
254 254 e = indexformatv0_unpack(cur)
255 255 # transform to revlogv1 format
256 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 258 index.append(e2)
259 259 nodemap[e[6]] = n
260 260 n += 1
261 261
262 262 return revlogoldindex(index), nodemap, None
263 263
264 264 def packentry(self, entry, node, version, rev):
265 265 if gettype(entry[0]):
266 266 raise error.RevlogError(_('index entry flags need revlog '
267 267 'version 1'))
268 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 269 node(entry[5]), node(entry[6]), entry[7])
270 270 return indexformatv0_pack(*e2)
271 271
272 272 # index ng:
273 273 # 6 bytes: offset
274 274 # 2 bytes: flags
275 275 # 4 bytes: compressed length
276 276 # 4 bytes: uncompressed length
277 277 # 4 bytes: base rev
278 278 # 4 bytes: link rev
279 279 # 4 bytes: parent 1 rev
280 280 # 4 bytes: parent 2 rev
281 281 # 32 bytes: nodeid
282 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 283 indexformatng_pack = indexformatng.pack
284 284 versionformat = struct.Struct(">I")
285 285 versionformat_pack = versionformat.pack
286 286 versionformat_unpack = versionformat.unpack
287 287
288 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 289 # signed integer)
290 290 _maxentrysize = 0x7fffffff
291 291
292 292 class revlogio(object):
293 293 def __init__(self):
294 294 self.size = indexformatng.size
295 295
296 296 def parseindex(self, data, inline):
297 297 # call the C implementation to parse the index data
298 298 index, cache = parsers.parse_index2(data, inline)
299 299 return index, getattr(index, 'nodemap', None), cache
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 p = indexformatng_pack(*entry)
303 303 if rev == 0:
304 304 p = versionformat_pack(version) + p[4:]
305 305 return p
306 306
307 307 class revlog(object):
308 308 """
309 309 the underlying revision storage object
310 310
311 311 A revlog consists of two parts, an index and the revision data.
312 312
313 313 The index is a file with a fixed record size containing
314 314 information on each revision, including its nodeid (hash), the
315 315 nodeids of its parents, the position and offset of its data within
316 316 the data file, and the revision it's based on. Finally, each entry
317 317 contains a linkrev entry that can serve as a pointer to external
318 318 data.
319 319
320 320 The revision data itself is a linear collection of data chunks.
321 321 Each chunk represents a revision and is usually represented as a
322 322 delta against the previous chunk. To bound lookup time, runs of
323 323 deltas are limited to about 2 times the length of the original
324 324 version data. This makes retrieval of a version proportional to
325 325 its size, or O(1) relative to the number of revisions.
326 326
327 327 Both pieces of the revlog are written to in an append-only
328 328 fashion, which means we never need to rewrite a file to insert or
329 329 remove data, and can use some simple techniques to avoid the need
330 330 for locking while reading.
331 331
332 332 If checkambig, indexfile is opened with checkambig=True at
333 333 writing, to avoid file stat ambiguity.
334 334
335 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 336 index will be mmapped rather than read if it is larger than the
337 337 configured threshold.
338 338
339 339 If censorable is True, the revlog can have censored revisions.
340 340 """
341 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 342 mmaplargeindex=False, censorable=False):
343 343 """
344 344 create a revlog object
345 345
346 346 opener is a function that abstracts the file opening operation
347 347 and can be used to implement COW semantics or the like.
348 348 """
349 349 self.indexfile = indexfile
350 350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 351 self.opener = opener
352 352 # When True, indexfile is opened with checkambig=True at writing, to
353 353 # avoid file stat ambiguity.
354 354 self._checkambig = checkambig
355 355 self._mmaplargeindex = mmaplargeindex
356 356 self._censorable = censorable
357 357 # 3-tuple of (node, rev, text) for a raw revision.
358 358 self._revisioncache = None
359 359 # Maps rev to chain base rev.
360 360 self._chainbasecache = util.lrucachedict(100)
361 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 362 self._chunkcache = (0, '')
363 363 # How much data to read and cache into the raw revlog data cache.
364 364 self._chunkcachesize = 65536
365 365 self._maxchainlen = None
366 366 self._deltabothparents = True
367 367 self.index = []
368 368 # Mapping of partial identifiers to full nodes.
369 369 self._pcache = {}
370 370 # Mapping of revision integer to full node.
371 371 self._nodecache = {nullid: nullrev}
372 372 self._nodepos = None
373 373 self._compengine = 'zlib'
374 374 self._maxdeltachainspan = -1
375 375 self._withsparseread = False
376 376 self._sparserevlog = False
377 377 self._srdensitythreshold = 0.50
378 378 self._srmingapsize = 262144
379 379
380 380 # Make copy of flag processors so each revlog instance can support
381 381 # custom flags.
382 382 self._flagprocessors = dict(_flagprocessors)
383 383
384 384 # 2-tuple of file handles being used for active writing.
385 385 self._writinghandles = None
386 386
387 387 self._loadindex()
388 388
389 389 def _loadindex(self):
390 390 mmapindexthreshold = None
391 391 opts = getattr(self.opener, 'options', {}) or {}
392 392
393 393 if 'revlogv2' in opts:
394 394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 395 elif 'revlogv1' in opts:
396 396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 397 if 'generaldelta' in opts:
398 398 newversionflags |= FLAG_GENERALDELTA
399 399 elif getattr(self.opener, 'options', None) is not None:
400 400 # If options provided but no 'revlog*' found, the repository
401 401 # would have no 'requires' file in it, which means we have to
402 402 # stick to the old format.
403 403 newversionflags = REVLOGV0
404 404 else:
405 405 newversionflags = REVLOG_DEFAULT_VERSION
406 406
407 407 if 'chunkcachesize' in opts:
408 408 self._chunkcachesize = opts['chunkcachesize']
409 409 if 'maxchainlen' in opts:
410 410 self._maxchainlen = opts['maxchainlen']
411 411 if 'deltabothparents' in opts:
412 412 self._deltabothparents = opts['deltabothparents']
413 413 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
414 414 if 'compengine' in opts:
415 415 self._compengine = opts['compengine']
416 416 if 'maxdeltachainspan' in opts:
417 417 self._maxdeltachainspan = opts['maxdeltachainspan']
418 418 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
419 419 mmapindexthreshold = opts['mmapindexthreshold']
420 420 self._sparserevlog = bool(opts.get('sparse-revlog', False))
421 421 withsparseread = bool(opts.get('with-sparse-read', False))
422 422 # sparse-revlog forces sparse-read
423 423 self._withsparseread = self._sparserevlog or withsparseread
424 424 if 'sparse-read-density-threshold' in opts:
425 425 self._srdensitythreshold = opts['sparse-read-density-threshold']
426 426 if 'sparse-read-min-gap-size' in opts:
427 427 self._srmingapsize = opts['sparse-read-min-gap-size']
428 428 if opts.get('enableellipsis'):
429 429 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
430 430
431 431 # revlog v0 doesn't have flag processors
432 432 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
433 433 _insertflagprocessor(flag, processor, self._flagprocessors)
434 434
435 435 if self._chunkcachesize <= 0:
436 436 raise error.RevlogError(_('revlog chunk cache size %r is not '
437 437 'greater than 0') % self._chunkcachesize)
438 438 elif self._chunkcachesize & (self._chunkcachesize - 1):
439 439 raise error.RevlogError(_('revlog chunk cache size %r is not a '
440 440 'power of 2') % self._chunkcachesize)
441 441
442 442 indexdata = ''
443 443 self._initempty = True
444 444 try:
445 445 with self._indexfp() as f:
446 446 if (mmapindexthreshold is not None and
447 447 self.opener.fstat(f).st_size >= mmapindexthreshold):
448 448 # TODO: should .close() to release resources without
449 449 # relying on Python GC
450 450 indexdata = util.buffer(util.mmapread(f))
451 451 else:
452 452 indexdata = f.read()
453 453 if len(indexdata) > 0:
454 454 versionflags = versionformat_unpack(indexdata[:4])[0]
455 455 self._initempty = False
456 456 else:
457 457 versionflags = newversionflags
458 458 except IOError as inst:
459 459 if inst.errno != errno.ENOENT:
460 460 raise
461 461
462 462 versionflags = newversionflags
463 463
464 464 self.version = versionflags
465 465
466 466 flags = versionflags & ~0xFFFF
467 467 fmt = versionflags & 0xFFFF
468 468
469 469 if fmt == REVLOGV0:
470 470 if flags:
471 471 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
472 472 'revlog %s') %
473 473 (flags >> 16, fmt, self.indexfile))
474 474
475 475 self._inline = False
476 476 self._generaldelta = False
477 477
478 478 elif fmt == REVLOGV1:
479 479 if flags & ~REVLOGV1_FLAGS:
480 480 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
481 481 'revlog %s') %
482 482 (flags >> 16, fmt, self.indexfile))
483 483
484 484 self._inline = versionflags & FLAG_INLINE_DATA
485 485 self._generaldelta = versionflags & FLAG_GENERALDELTA
486 486
487 487 elif fmt == REVLOGV2:
488 488 if flags & ~REVLOGV2_FLAGS:
489 489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
490 490 'revlog %s') %
491 491 (flags >> 16, fmt, self.indexfile))
492 492
493 493 self._inline = versionflags & FLAG_INLINE_DATA
494 494 # generaldelta implied by version 2 revlogs.
495 495 self._generaldelta = True
496 496
497 497 else:
498 498 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
499 499 (fmt, self.indexfile))
500 # sparse-revlog can't be on without general-delta (issue6056)
501 if not self._generaldelta:
502 self._sparserevlog = False
500 503
501 504 self._storedeltachains = True
502 505
503 506 self._io = revlogio()
504 507 if self.version == REVLOGV0:
505 508 self._io = revlogoldio()
506 509 try:
507 510 d = self._io.parseindex(indexdata, self._inline)
508 511 except (ValueError, IndexError):
509 512 raise error.RevlogError(_("index %s is corrupted") %
510 513 self.indexfile)
511 514 self.index, nodemap, self._chunkcache = d
512 515 if nodemap is not None:
513 516 self.nodemap = self._nodecache = nodemap
514 517 if not self._chunkcache:
515 518 self._chunkclear()
516 519 # revnum -> (chain-length, sum-delta-length)
517 520 self._chaininfocache = {}
518 521 # revlog header -> revlog compressor
519 522 self._decompressors = {}
520 523
521 524 @util.propertycache
522 525 def _compressor(self):
523 526 return util.compengines[self._compengine].revlogcompressor()
524 527
525 528 def _indexfp(self, mode='r'):
526 529 """file object for the revlog's index file"""
527 530 args = {r'mode': mode}
528 531 if mode != 'r':
529 532 args[r'checkambig'] = self._checkambig
530 533 if mode == 'w':
531 534 args[r'atomictemp'] = True
532 535 return self.opener(self.indexfile, **args)
533 536
534 537 def _datafp(self, mode='r'):
535 538 """file object for the revlog's data file"""
536 539 return self.opener(self.datafile, mode=mode)
537 540
538 541 @contextlib.contextmanager
539 542 def _datareadfp(self, existingfp=None):
540 543 """file object suitable to read data"""
541 544 # Use explicit file handle, if given.
542 545 if existingfp is not None:
543 546 yield existingfp
544 547
545 548 # Use a file handle being actively used for writes, if available.
546 549 # There is some danger to doing this because reads will seek the
547 550 # file. However, _writeentry() performs a SEEK_END before all writes,
548 551 # so we should be safe.
549 552 elif self._writinghandles:
550 553 if self._inline:
551 554 yield self._writinghandles[0]
552 555 else:
553 556 yield self._writinghandles[1]
554 557
555 558 # Otherwise open a new file handle.
556 559 else:
557 560 if self._inline:
558 561 func = self._indexfp
559 562 else:
560 563 func = self._datafp
561 564 with func() as fp:
562 565 yield fp
563 566
564 567 def tip(self):
565 568 return self.node(len(self.index) - 1)
566 569 def __contains__(self, rev):
567 570 return 0 <= rev < len(self)
568 571 def __len__(self):
569 572 return len(self.index)
570 573 def __iter__(self):
571 574 return iter(pycompat.xrange(len(self)))
572 575 def revs(self, start=0, stop=None):
573 576 """iterate over all rev in this revlog (from start to stop)"""
574 577 return storageutil.iterrevs(len(self), start=start, stop=stop)
575 578
576 579 @util.propertycache
577 580 def nodemap(self):
578 581 if self.index:
579 582 # populate mapping down to the initial node
580 583 node0 = self.index[0][7] # get around changelog filtering
581 584 self.rev(node0)
582 585 return self._nodecache
583 586
584 587 def hasnode(self, node):
585 588 try:
586 589 self.rev(node)
587 590 return True
588 591 except KeyError:
589 592 return False
590 593
591 594 def candelta(self, baserev, rev):
592 595 """whether two revisions (baserev, rev) can be delta-ed or not"""
593 596 # Disable delta if either rev requires a content-changing flag
594 597 # processor (ex. LFS). This is because such flag processor can alter
595 598 # the rawtext content that the delta will be based on, and two clients
596 599 # could have a same revlog node with different flags (i.e. different
597 600 # rawtext contents) and the delta could be incompatible.
598 601 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
599 602 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
600 603 return False
601 604 return True
602 605
603 606 def clearcaches(self):
604 607 self._revisioncache = None
605 608 self._chainbasecache.clear()
606 609 self._chunkcache = (0, '')
607 610 self._pcache = {}
608 611
609 612 try:
610 613 self._nodecache.clearcaches()
611 614 except AttributeError:
612 615 self._nodecache = {nullid: nullrev}
613 616 self._nodepos = None
614 617
615 618 def rev(self, node):
616 619 try:
617 620 return self._nodecache[node]
618 621 except TypeError:
619 622 raise
620 623 except error.RevlogError:
621 624 # parsers.c radix tree lookup failed
622 625 if node == wdirid or node in wdirfilenodeids:
623 626 raise error.WdirUnsupported
624 627 raise error.LookupError(node, self.indexfile, _('no node'))
625 628 except KeyError:
626 629 # pure python cache lookup failed
627 630 n = self._nodecache
628 631 i = self.index
629 632 p = self._nodepos
630 633 if p is None:
631 634 p = len(i) - 1
632 635 else:
633 636 assert p < len(i)
634 637 for r in pycompat.xrange(p, -1, -1):
635 638 v = i[r][7]
636 639 n[v] = r
637 640 if v == node:
638 641 self._nodepos = r - 1
639 642 return r
640 643 if node == wdirid or node in wdirfilenodeids:
641 644 raise error.WdirUnsupported
642 645 raise error.LookupError(node, self.indexfile, _('no node'))
643 646
644 647 # Accessors for index entries.
645 648
646 649 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
647 650 # are flags.
648 651 def start(self, rev):
649 652 return int(self.index[rev][0] >> 16)
650 653
651 654 def flags(self, rev):
652 655 return self.index[rev][0] & 0xFFFF
653 656
654 657 def length(self, rev):
655 658 return self.index[rev][1]
656 659
657 660 def rawsize(self, rev):
658 661 """return the length of the uncompressed text for a given revision"""
659 662 l = self.index[rev][2]
660 663 if l >= 0:
661 664 return l
662 665
663 666 t = self.revision(rev, raw=True)
664 667 return len(t)
665 668
666 669 def size(self, rev):
667 670 """length of non-raw text (processed by a "read" flag processor)"""
668 671 # fast path: if no "read" flag processor could change the content,
669 672 # size is rawsize. note: ELLIPSIS is known to not change the content.
670 673 flags = self.flags(rev)
671 674 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
672 675 return self.rawsize(rev)
673 676
674 677 return len(self.revision(rev, raw=False))
675 678
676 679 def chainbase(self, rev):
677 680 base = self._chainbasecache.get(rev)
678 681 if base is not None:
679 682 return base
680 683
681 684 index = self.index
682 685 iterrev = rev
683 686 base = index[iterrev][3]
684 687 while base != iterrev:
685 688 iterrev = base
686 689 base = index[iterrev][3]
687 690
688 691 self._chainbasecache[rev] = base
689 692 return base
690 693
691 694 def linkrev(self, rev):
692 695 return self.index[rev][4]
693 696
694 697 def parentrevs(self, rev):
695 698 try:
696 699 entry = self.index[rev]
697 700 except IndexError:
698 701 if rev == wdirrev:
699 702 raise error.WdirUnsupported
700 703 raise
701 704
702 705 return entry[5], entry[6]
703 706
704 707 # fast parentrevs(rev) where rev isn't filtered
705 708 _uncheckedparentrevs = parentrevs
706 709
707 710 def node(self, rev):
708 711 try:
709 712 return self.index[rev][7]
710 713 except IndexError:
711 714 if rev == wdirrev:
712 715 raise error.WdirUnsupported
713 716 raise
714 717
715 718 # Derived from index values.
716 719
717 720 def end(self, rev):
718 721 return self.start(rev) + self.length(rev)
719 722
720 723 def parents(self, node):
721 724 i = self.index
722 725 d = i[self.rev(node)]
723 726 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
724 727
725 728 def chainlen(self, rev):
726 729 return self._chaininfo(rev)[0]
727 730
728 731 def _chaininfo(self, rev):
729 732 chaininfocache = self._chaininfocache
730 733 if rev in chaininfocache:
731 734 return chaininfocache[rev]
732 735 index = self.index
733 736 generaldelta = self._generaldelta
734 737 iterrev = rev
735 738 e = index[iterrev]
736 739 clen = 0
737 740 compresseddeltalen = 0
738 741 while iterrev != e[3]:
739 742 clen += 1
740 743 compresseddeltalen += e[1]
741 744 if generaldelta:
742 745 iterrev = e[3]
743 746 else:
744 747 iterrev -= 1
745 748 if iterrev in chaininfocache:
746 749 t = chaininfocache[iterrev]
747 750 clen += t[0]
748 751 compresseddeltalen += t[1]
749 752 break
750 753 e = index[iterrev]
751 754 else:
752 755 # Add text length of base since decompressing that also takes
753 756 # work. For cache hits the length is already included.
754 757 compresseddeltalen += e[1]
755 758 r = (clen, compresseddeltalen)
756 759 chaininfocache[rev] = r
757 760 return r
758 761
759 762 def _deltachain(self, rev, stoprev=None):
760 763 """Obtain the delta chain for a revision.
761 764
762 765 ``stoprev`` specifies a revision to stop at. If not specified, we
763 766 stop at the base of the chain.
764 767
765 768 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
766 769 revs in ascending order and ``stopped`` is a bool indicating whether
767 770 ``stoprev`` was hit.
768 771 """
769 772 # Try C implementation.
770 773 try:
771 774 return self.index.deltachain(rev, stoprev, self._generaldelta)
772 775 except AttributeError:
773 776 pass
774 777
775 778 chain = []
776 779
777 780 # Alias to prevent attribute lookup in tight loop.
778 781 index = self.index
779 782 generaldelta = self._generaldelta
780 783
781 784 iterrev = rev
782 785 e = index[iterrev]
783 786 while iterrev != e[3] and iterrev != stoprev:
784 787 chain.append(iterrev)
785 788 if generaldelta:
786 789 iterrev = e[3]
787 790 else:
788 791 iterrev -= 1
789 792 e = index[iterrev]
790 793
791 794 if iterrev == stoprev:
792 795 stopped = True
793 796 else:
794 797 chain.append(iterrev)
795 798 stopped = False
796 799
797 800 chain.reverse()
798 801 return chain, stopped
799 802
800 803 def ancestors(self, revs, stoprev=0, inclusive=False):
801 804 """Generate the ancestors of 'revs' in reverse revision order.
802 805 Does not generate revs lower than stoprev.
803 806
804 807 See the documentation for ancestor.lazyancestors for more details."""
805 808
806 809 # first, make sure start revisions aren't filtered
807 810 revs = list(revs)
808 811 checkrev = self.node
809 812 for r in revs:
810 813 checkrev(r)
811 814 # and we're sure ancestors aren't filtered as well
812 815
813 816 if rustext is not None:
814 817 lazyancestors = rustext.ancestor.LazyAncestors
815 818 arg = self.index
816 819 elif util.safehasattr(parsers, 'rustlazyancestors'):
817 820 lazyancestors = ancestor.rustlazyancestors
818 821 arg = self.index
819 822 else:
820 823 lazyancestors = ancestor.lazyancestors
821 824 arg = self._uncheckedparentrevs
822 825 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
823 826
824 827 def descendants(self, revs):
825 828 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
826 829
827 830 def findcommonmissing(self, common=None, heads=None):
828 831 """Return a tuple of the ancestors of common and the ancestors of heads
829 832 that are not ancestors of common. In revset terminology, we return the
830 833 tuple:
831 834
832 835 ::common, (::heads) - (::common)
833 836
834 837 The list is sorted by revision number, meaning it is
835 838 topologically sorted.
836 839
837 840 'heads' and 'common' are both lists of node IDs. If heads is
838 841 not supplied, uses all of the revlog's heads. If common is not
839 842 supplied, uses nullid."""
840 843 if common is None:
841 844 common = [nullid]
842 845 if heads is None:
843 846 heads = self.heads()
844 847
845 848 common = [self.rev(n) for n in common]
846 849 heads = [self.rev(n) for n in heads]
847 850
848 851 # we want the ancestors, but inclusive
849 852 class lazyset(object):
850 853 def __init__(self, lazyvalues):
851 854 self.addedvalues = set()
852 855 self.lazyvalues = lazyvalues
853 856
854 857 def __contains__(self, value):
855 858 return value in self.addedvalues or value in self.lazyvalues
856 859
857 860 def __iter__(self):
858 861 added = self.addedvalues
859 862 for r in added:
860 863 yield r
861 864 for r in self.lazyvalues:
862 865 if not r in added:
863 866 yield r
864 867
865 868 def add(self, value):
866 869 self.addedvalues.add(value)
867 870
868 871 def update(self, values):
869 872 self.addedvalues.update(values)
870 873
871 874 has = lazyset(self.ancestors(common))
872 875 has.add(nullrev)
873 876 has.update(common)
874 877
875 878 # take all ancestors from heads that aren't in has
876 879 missing = set()
877 880 visit = collections.deque(r for r in heads if r not in has)
878 881 while visit:
879 882 r = visit.popleft()
880 883 if r in missing:
881 884 continue
882 885 else:
883 886 missing.add(r)
884 887 for p in self.parentrevs(r):
885 888 if p not in has:
886 889 visit.append(p)
887 890 missing = list(missing)
888 891 missing.sort()
889 892 return has, [self.node(miss) for miss in missing]
890 893
891 894 def incrementalmissingrevs(self, common=None):
892 895 """Return an object that can be used to incrementally compute the
893 896 revision numbers of the ancestors of arbitrary sets that are not
894 897 ancestors of common. This is an ancestor.incrementalmissingancestors
895 898 object.
896 899
897 900 'common' is a list of revision numbers. If common is not supplied, uses
898 901 nullrev.
899 902 """
900 903 if common is None:
901 904 common = [nullrev]
902 905
903 906 if rustext is not None:
904 907 return rustext.ancestor.MissingAncestors(self.index, common)
905 908 return ancestor.incrementalmissingancestors(self.parentrevs, common)
906 909
907 910 def findmissingrevs(self, common=None, heads=None):
908 911 """Return the revision numbers of the ancestors of heads that
909 912 are not ancestors of common.
910 913
911 914 More specifically, return a list of revision numbers corresponding to
912 915 nodes N such that every N satisfies the following constraints:
913 916
914 917 1. N is an ancestor of some node in 'heads'
915 918 2. N is not an ancestor of any node in 'common'
916 919
917 920 The list is sorted by revision number, meaning it is
918 921 topologically sorted.
919 922
920 923 'heads' and 'common' are both lists of revision numbers. If heads is
921 924 not supplied, uses all of the revlog's heads. If common is not
922 925 supplied, uses nullid."""
923 926 if common is None:
924 927 common = [nullrev]
925 928 if heads is None:
926 929 heads = self.headrevs()
927 930
928 931 inc = self.incrementalmissingrevs(common=common)
929 932 return inc.missingancestors(heads)
930 933
931 934 def findmissing(self, common=None, heads=None):
932 935 """Return the ancestors of heads that are not ancestors of common.
933 936
934 937 More specifically, return a list of nodes N such that every N
935 938 satisfies the following constraints:
936 939
937 940 1. N is an ancestor of some node in 'heads'
938 941 2. N is not an ancestor of any node in 'common'
939 942
940 943 The list is sorted by revision number, meaning it is
941 944 topologically sorted.
942 945
943 946 'heads' and 'common' are both lists of node IDs. If heads is
944 947 not supplied, uses all of the revlog's heads. If common is not
945 948 supplied, uses nullid."""
946 949 if common is None:
947 950 common = [nullid]
948 951 if heads is None:
949 952 heads = self.heads()
950 953
951 954 common = [self.rev(n) for n in common]
952 955 heads = [self.rev(n) for n in heads]
953 956
954 957 inc = self.incrementalmissingrevs(common=common)
955 958 return [self.node(r) for r in inc.missingancestors(heads)]
956 959
957 960 def nodesbetween(self, roots=None, heads=None):
958 961 """Return a topological path from 'roots' to 'heads'.
959 962
960 963 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
961 964 topologically sorted list of all nodes N that satisfy both of
962 965 these constraints:
963 966
964 967 1. N is a descendant of some node in 'roots'
965 968 2. N is an ancestor of some node in 'heads'
966 969
967 970 Every node is considered to be both a descendant and an ancestor
968 971 of itself, so every reachable node in 'roots' and 'heads' will be
969 972 included in 'nodes'.
970 973
971 974 'outroots' is the list of reachable nodes in 'roots', i.e., the
972 975 subset of 'roots' that is returned in 'nodes'. Likewise,
973 976 'outheads' is the subset of 'heads' that is also in 'nodes'.
974 977
975 978 'roots' and 'heads' are both lists of node IDs. If 'roots' is
976 979 unspecified, uses nullid as the only root. If 'heads' is
977 980 unspecified, uses list of all of the revlog's heads."""
978 981 nonodes = ([], [], [])
979 982 if roots is not None:
980 983 roots = list(roots)
981 984 if not roots:
982 985 return nonodes
983 986 lowestrev = min([self.rev(n) for n in roots])
984 987 else:
985 988 roots = [nullid] # Everybody's a descendant of nullid
986 989 lowestrev = nullrev
987 990 if (lowestrev == nullrev) and (heads is None):
988 991 # We want _all_ the nodes!
989 992 return ([self.node(r) for r in self], [nullid], list(self.heads()))
990 993 if heads is None:
991 994 # All nodes are ancestors, so the latest ancestor is the last
992 995 # node.
993 996 highestrev = len(self) - 1
994 997 # Set ancestors to None to signal that every node is an ancestor.
995 998 ancestors = None
996 999 # Set heads to an empty dictionary for later discovery of heads
997 1000 heads = {}
998 1001 else:
999 1002 heads = list(heads)
1000 1003 if not heads:
1001 1004 return nonodes
1002 1005 ancestors = set()
1003 1006 # Turn heads into a dictionary so we can remove 'fake' heads.
1004 1007 # Also, later we will be using it to filter out the heads we can't
1005 1008 # find from roots.
1006 1009 heads = dict.fromkeys(heads, False)
1007 1010 # Start at the top and keep marking parents until we're done.
1008 1011 nodestotag = set(heads)
1009 1012 # Remember where the top was so we can use it as a limit later.
1010 1013 highestrev = max([self.rev(n) for n in nodestotag])
1011 1014 while nodestotag:
1012 1015 # grab a node to tag
1013 1016 n = nodestotag.pop()
1014 1017 # Never tag nullid
1015 1018 if n == nullid:
1016 1019 continue
1017 1020 # A node's revision number represents its place in a
1018 1021 # topologically sorted list of nodes.
1019 1022 r = self.rev(n)
1020 1023 if r >= lowestrev:
1021 1024 if n not in ancestors:
1022 1025 # If we are possibly a descendant of one of the roots
1023 1026 # and we haven't already been marked as an ancestor
1024 1027 ancestors.add(n) # Mark as ancestor
1025 1028 # Add non-nullid parents to list of nodes to tag.
1026 1029 nodestotag.update([p for p in self.parents(n) if
1027 1030 p != nullid])
1028 1031 elif n in heads: # We've seen it before, is it a fake head?
1029 1032 # So it is, real heads should not be the ancestors of
1030 1033 # any other heads.
1031 1034 heads.pop(n)
1032 1035 if not ancestors:
1033 1036 return nonodes
1034 1037 # Now that we have our set of ancestors, we want to remove any
1035 1038 # roots that are not ancestors.
1036 1039
1037 1040 # If one of the roots was nullid, everything is included anyway.
1038 1041 if lowestrev > nullrev:
1039 1042 # But, since we weren't, let's recompute the lowest rev to not
1040 1043 # include roots that aren't ancestors.
1041 1044
1042 1045 # Filter out roots that aren't ancestors of heads
1043 1046 roots = [root for root in roots if root in ancestors]
1044 1047 # Recompute the lowest revision
1045 1048 if roots:
1046 1049 lowestrev = min([self.rev(root) for root in roots])
1047 1050 else:
1048 1051 # No more roots? Return empty list
1049 1052 return nonodes
1050 1053 else:
1051 1054 # We are descending from nullid, and don't need to care about
1052 1055 # any other roots.
1053 1056 lowestrev = nullrev
1054 1057 roots = [nullid]
1055 1058 # Transform our roots list into a set.
1056 1059 descendants = set(roots)
1057 1060 # Also, keep the original roots so we can filter out roots that aren't
1058 1061 # 'real' roots (i.e. are descended from other roots).
1059 1062 roots = descendants.copy()
1060 1063 # Our topologically sorted list of output nodes.
1061 1064 orderedout = []
1062 1065 # Don't start at nullid since we don't want nullid in our output list,
1063 1066 # and if nullid shows up in descendants, empty parents will look like
1064 1067 # they're descendants.
1065 1068 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1066 1069 n = self.node(r)
1067 1070 isdescendant = False
1068 1071 if lowestrev == nullrev: # Everybody is a descendant of nullid
1069 1072 isdescendant = True
1070 1073 elif n in descendants:
1071 1074 # n is already a descendant
1072 1075 isdescendant = True
1073 1076 # This check only needs to be done here because all the roots
1074 1077 # will start being marked is descendants before the loop.
1075 1078 if n in roots:
1076 1079 # If n was a root, check if it's a 'real' root.
1077 1080 p = tuple(self.parents(n))
1078 1081 # If any of its parents are descendants, it's not a root.
1079 1082 if (p[0] in descendants) or (p[1] in descendants):
1080 1083 roots.remove(n)
1081 1084 else:
1082 1085 p = tuple(self.parents(n))
1083 1086 # A node is a descendant if either of its parents are
1084 1087 # descendants. (We seeded the dependents list with the roots
1085 1088 # up there, remember?)
1086 1089 if (p[0] in descendants) or (p[1] in descendants):
1087 1090 descendants.add(n)
1088 1091 isdescendant = True
1089 1092 if isdescendant and ((ancestors is None) or (n in ancestors)):
1090 1093 # Only include nodes that are both descendants and ancestors.
1091 1094 orderedout.append(n)
1092 1095 if (ancestors is not None) and (n in heads):
1093 1096 # We're trying to figure out which heads are reachable
1094 1097 # from roots.
1095 1098 # Mark this head as having been reached
1096 1099 heads[n] = True
1097 1100 elif ancestors is None:
1098 1101 # Otherwise, we're trying to discover the heads.
1099 1102 # Assume this is a head because if it isn't, the next step
1100 1103 # will eventually remove it.
1101 1104 heads[n] = True
1102 1105 # But, obviously its parents aren't.
1103 1106 for p in self.parents(n):
1104 1107 heads.pop(p, None)
1105 1108 heads = [head for head, flag in heads.iteritems() if flag]
1106 1109 roots = list(roots)
1107 1110 assert orderedout
1108 1111 assert roots
1109 1112 assert heads
1110 1113 return (orderedout, roots, heads)
1111 1114
1112 1115 def headrevs(self, revs=None):
1113 1116 if revs is None:
1114 1117 try:
1115 1118 return self.index.headrevs()
1116 1119 except AttributeError:
1117 1120 return self._headrevs()
1118 1121 return dagop.headrevs(revs, self.parentrevs)
1119 1122
1120 1123 def computephases(self, roots):
1121 1124 return self.index.computephasesmapsets(roots)
1122 1125
1123 1126 def _headrevs(self):
1124 1127 count = len(self)
1125 1128 if not count:
1126 1129 return [nullrev]
1127 1130 # we won't iter over filtered rev so nobody is a head at start
1128 1131 ishead = [0] * (count + 1)
1129 1132 index = self.index
1130 1133 for r in self:
1131 1134 ishead[r] = 1 # I may be an head
1132 1135 e = index[r]
1133 1136 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1134 1137 return [r for r, val in enumerate(ishead) if val]
1135 1138
1136 1139 def heads(self, start=None, stop=None):
1137 1140 """return the list of all nodes that have no children
1138 1141
1139 1142 if start is specified, only heads that are descendants of
1140 1143 start will be returned
1141 1144 if stop is specified, it will consider all the revs from stop
1142 1145 as if they had no children
1143 1146 """
1144 1147 if start is None and stop is None:
1145 1148 if not len(self):
1146 1149 return [nullid]
1147 1150 return [self.node(r) for r in self.headrevs()]
1148 1151
1149 1152 if start is None:
1150 1153 start = nullrev
1151 1154 else:
1152 1155 start = self.rev(start)
1153 1156
1154 1157 stoprevs = set(self.rev(n) for n in stop or [])
1155 1158
1156 1159 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1157 1160 stoprevs=stoprevs)
1158 1161
1159 1162 return [self.node(rev) for rev in revs]
1160 1163
1161 1164 def children(self, node):
1162 1165 """find the children of a given node"""
1163 1166 c = []
1164 1167 p = self.rev(node)
1165 1168 for r in self.revs(start=p + 1):
1166 1169 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1167 1170 if prevs:
1168 1171 for pr in prevs:
1169 1172 if pr == p:
1170 1173 c.append(self.node(r))
1171 1174 elif p == nullrev:
1172 1175 c.append(self.node(r))
1173 1176 return c
1174 1177
1175 1178 def commonancestorsheads(self, a, b):
1176 1179 """calculate all the heads of the common ancestors of nodes a and b"""
1177 1180 a, b = self.rev(a), self.rev(b)
1178 1181 ancs = self._commonancestorsheads(a, b)
1179 1182 return pycompat.maplist(self.node, ancs)
1180 1183
1181 1184 def _commonancestorsheads(self, *revs):
1182 1185 """calculate all the heads of the common ancestors of revs"""
1183 1186 try:
1184 1187 ancs = self.index.commonancestorsheads(*revs)
1185 1188 except (AttributeError, OverflowError): # C implementation failed
1186 1189 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1187 1190 return ancs
1188 1191
1189 1192 def isancestor(self, a, b):
1190 1193 """return True if node a is an ancestor of node b
1191 1194
1192 1195 A revision is considered an ancestor of itself."""
1193 1196 a, b = self.rev(a), self.rev(b)
1194 1197 return self.isancestorrev(a, b)
1195 1198
1196 1199 def isancestorrev(self, a, b):
1197 1200 """return True if revision a is an ancestor of revision b
1198 1201
1199 1202 A revision is considered an ancestor of itself.
1200 1203
1201 1204 The implementation of this is trivial but the use of
1202 1205 commonancestorsheads is not."""
1203 1206 if a == nullrev:
1204 1207 return True
1205 1208 elif a == b:
1206 1209 return True
1207 1210 elif a > b:
1208 1211 return False
1209 1212 return a in self._commonancestorsheads(a, b)
1210 1213
1211 1214 def ancestor(self, a, b):
1212 1215 """calculate the "best" common ancestor of nodes a and b"""
1213 1216
1214 1217 a, b = self.rev(a), self.rev(b)
1215 1218 try:
1216 1219 ancs = self.index.ancestors(a, b)
1217 1220 except (AttributeError, OverflowError):
1218 1221 ancs = ancestor.ancestors(self.parentrevs, a, b)
1219 1222 if ancs:
1220 1223 # choose a consistent winner when there's a tie
1221 1224 return min(map(self.node, ancs))
1222 1225 return nullid
1223 1226
1224 1227 def _match(self, id):
1225 1228 if isinstance(id, int):
1226 1229 # rev
1227 1230 return self.node(id)
1228 1231 if len(id) == 20:
1229 1232 # possibly a binary node
1230 1233 # odds of a binary node being all hex in ASCII are 1 in 10**25
1231 1234 try:
1232 1235 node = id
1233 1236 self.rev(node) # quick search the index
1234 1237 return node
1235 1238 except error.LookupError:
1236 1239 pass # may be partial hex id
1237 1240 try:
1238 1241 # str(rev)
1239 1242 rev = int(id)
1240 1243 if "%d" % rev != id:
1241 1244 raise ValueError
1242 1245 if rev < 0:
1243 1246 rev = len(self) + rev
1244 1247 if rev < 0 or rev >= len(self):
1245 1248 raise ValueError
1246 1249 return self.node(rev)
1247 1250 except (ValueError, OverflowError):
1248 1251 pass
1249 1252 if len(id) == 40:
1250 1253 try:
1251 1254 # a full hex nodeid?
1252 1255 node = bin(id)
1253 1256 self.rev(node)
1254 1257 return node
1255 1258 except (TypeError, error.LookupError):
1256 1259 pass
1257 1260
1258 1261 def _partialmatch(self, id):
1259 1262 # we don't care wdirfilenodeids as they should be always full hash
1260 1263 maybewdir = wdirhex.startswith(id)
1261 1264 try:
1262 1265 partial = self.index.partialmatch(id)
1263 1266 if partial and self.hasnode(partial):
1264 1267 if maybewdir:
1265 1268 # single 'ff...' match in radix tree, ambiguous with wdir
1266 1269 raise error.RevlogError
1267 1270 return partial
1268 1271 if maybewdir:
1269 1272 # no 'ff...' match in radix tree, wdir identified
1270 1273 raise error.WdirUnsupported
1271 1274 return None
1272 1275 except error.RevlogError:
1273 1276 # parsers.c radix tree lookup gave multiple matches
1274 1277 # fast path: for unfiltered changelog, radix tree is accurate
1275 1278 if not getattr(self, 'filteredrevs', None):
1276 1279 raise error.AmbiguousPrefixLookupError(
1277 1280 id, self.indexfile, _('ambiguous identifier'))
1278 1281 # fall through to slow path that filters hidden revisions
1279 1282 except (AttributeError, ValueError):
1280 1283 # we are pure python, or key was too short to search radix tree
1281 1284 pass
1282 1285
1283 1286 if id in self._pcache:
1284 1287 return self._pcache[id]
1285 1288
1286 1289 if len(id) <= 40:
1287 1290 try:
1288 1291 # hex(node)[:...]
1289 1292 l = len(id) // 2 # grab an even number of digits
1290 1293 prefix = bin(id[:l * 2])
1291 1294 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1292 1295 nl = [n for n in nl if hex(n).startswith(id) and
1293 1296 self.hasnode(n)]
1294 1297 if nullhex.startswith(id):
1295 1298 nl.append(nullid)
1296 1299 if len(nl) > 0:
1297 1300 if len(nl) == 1 and not maybewdir:
1298 1301 self._pcache[id] = nl[0]
1299 1302 return nl[0]
1300 1303 raise error.AmbiguousPrefixLookupError(
1301 1304 id, self.indexfile, _('ambiguous identifier'))
1302 1305 if maybewdir:
1303 1306 raise error.WdirUnsupported
1304 1307 return None
1305 1308 except TypeError:
1306 1309 pass
1307 1310
1308 1311 def lookup(self, id):
1309 1312 """locate a node based on:
1310 1313 - revision number or str(revision number)
1311 1314 - nodeid or subset of hex nodeid
1312 1315 """
1313 1316 n = self._match(id)
1314 1317 if n is not None:
1315 1318 return n
1316 1319 n = self._partialmatch(id)
1317 1320 if n:
1318 1321 return n
1319 1322
1320 1323 raise error.LookupError(id, self.indexfile, _('no match found'))
1321 1324
1322 1325 def shortest(self, node, minlength=1):
1323 1326 """Find the shortest unambiguous prefix that matches node."""
1324 1327 def isvalid(prefix):
1325 1328 try:
1326 1329 node = self._partialmatch(prefix)
1327 1330 except error.AmbiguousPrefixLookupError:
1328 1331 return False
1329 1332 except error.WdirUnsupported:
1330 1333 # single 'ff...' match
1331 1334 return True
1332 1335 if node is None:
1333 1336 raise error.LookupError(node, self.indexfile, _('no node'))
1334 1337 return True
1335 1338
1336 1339 def maybewdir(prefix):
1337 1340 return all(c == 'f' for c in prefix)
1338 1341
1339 1342 hexnode = hex(node)
1340 1343
1341 1344 def disambiguate(hexnode, minlength):
1342 1345 """Disambiguate against wdirid."""
1343 1346 for length in range(minlength, 41):
1344 1347 prefix = hexnode[:length]
1345 1348 if not maybewdir(prefix):
1346 1349 return prefix
1347 1350
1348 1351 if not getattr(self, 'filteredrevs', None):
1349 1352 try:
1350 1353 length = max(self.index.shortest(node), minlength)
1351 1354 return disambiguate(hexnode, length)
1352 1355 except error.RevlogError:
1353 1356 if node != wdirid:
1354 1357 raise error.LookupError(node, self.indexfile, _('no node'))
1355 1358 except AttributeError:
1356 1359 # Fall through to pure code
1357 1360 pass
1358 1361
1359 1362 if node == wdirid:
1360 1363 for length in range(minlength, 41):
1361 1364 prefix = hexnode[:length]
1362 1365 if isvalid(prefix):
1363 1366 return prefix
1364 1367
1365 1368 for length in range(minlength, 41):
1366 1369 prefix = hexnode[:length]
1367 1370 if isvalid(prefix):
1368 1371 return disambiguate(hexnode, length)
1369 1372
1370 1373 def cmp(self, node, text):
1371 1374 """compare text with a given file revision
1372 1375
1373 1376 returns True if text is different than what is stored.
1374 1377 """
1375 1378 p1, p2 = self.parents(node)
1376 1379 return storageutil.hashrevisionsha1(text, p1, p2) != node
1377 1380
1378 1381 def _cachesegment(self, offset, data):
1379 1382 """Add a segment to the revlog cache.
1380 1383
1381 1384 Accepts an absolute offset and the data that is at that location.
1382 1385 """
1383 1386 o, d = self._chunkcache
1384 1387 # try to add to existing cache
1385 1388 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1386 1389 self._chunkcache = o, d + data
1387 1390 else:
1388 1391 self._chunkcache = offset, data
1389 1392
1390 1393 def _readsegment(self, offset, length, df=None):
1391 1394 """Load a segment of raw data from the revlog.
1392 1395
1393 1396 Accepts an absolute offset, length to read, and an optional existing
1394 1397 file handle to read from.
1395 1398
1396 1399 If an existing file handle is passed, it will be seeked and the
1397 1400 original seek position will NOT be restored.
1398 1401
1399 1402 Returns a str or buffer of raw byte data.
1400 1403
1401 1404 Raises if the requested number of bytes could not be read.
1402 1405 """
1403 1406 # Cache data both forward and backward around the requested
1404 1407 # data, in a fixed size window. This helps speed up operations
1405 1408 # involving reading the revlog backwards.
1406 1409 cachesize = self._chunkcachesize
1407 1410 realoffset = offset & ~(cachesize - 1)
1408 1411 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1409 1412 - realoffset)
1410 1413 with self._datareadfp(df) as df:
1411 1414 df.seek(realoffset)
1412 1415 d = df.read(reallength)
1413 1416
1414 1417 self._cachesegment(realoffset, d)
1415 1418 if offset != realoffset or reallength != length:
1416 1419 startoffset = offset - realoffset
1417 1420 if len(d) - startoffset < length:
1418 1421 raise error.RevlogError(
1419 1422 _('partial read of revlog %s; expected %d bytes from '
1420 1423 'offset %d, got %d') %
1421 1424 (self.indexfile if self._inline else self.datafile,
1422 1425 length, realoffset, len(d) - startoffset))
1423 1426
1424 1427 return util.buffer(d, startoffset, length)
1425 1428
1426 1429 if len(d) < length:
1427 1430 raise error.RevlogError(
1428 1431 _('partial read of revlog %s; expected %d bytes from offset '
1429 1432 '%d, got %d') %
1430 1433 (self.indexfile if self._inline else self.datafile,
1431 1434 length, offset, len(d)))
1432 1435
1433 1436 return d
1434 1437
1435 1438 def _getsegment(self, offset, length, df=None):
1436 1439 """Obtain a segment of raw data from the revlog.
1437 1440
1438 1441 Accepts an absolute offset, length of bytes to obtain, and an
1439 1442 optional file handle to the already-opened revlog. If the file
1440 1443 handle is used, it's original seek position will not be preserved.
1441 1444
1442 1445 Requests for data may be returned from a cache.
1443 1446
1444 1447 Returns a str or a buffer instance of raw byte data.
1445 1448 """
1446 1449 o, d = self._chunkcache
1447 1450 l = len(d)
1448 1451
1449 1452 # is it in the cache?
1450 1453 cachestart = offset - o
1451 1454 cacheend = cachestart + length
1452 1455 if cachestart >= 0 and cacheend <= l:
1453 1456 if cachestart == 0 and cacheend == l:
1454 1457 return d # avoid a copy
1455 1458 return util.buffer(d, cachestart, cacheend - cachestart)
1456 1459
1457 1460 return self._readsegment(offset, length, df=df)
1458 1461
1459 1462 def _getsegmentforrevs(self, startrev, endrev, df=None):
1460 1463 """Obtain a segment of raw data corresponding to a range of revisions.
1461 1464
1462 1465 Accepts the start and end revisions and an optional already-open
1463 1466 file handle to be used for reading. If the file handle is read, its
1464 1467 seek position will not be preserved.
1465 1468
1466 1469 Requests for data may be satisfied by a cache.
1467 1470
1468 1471 Returns a 2-tuple of (offset, data) for the requested range of
1469 1472 revisions. Offset is the integer offset from the beginning of the
1470 1473 revlog and data is a str or buffer of the raw byte data.
1471 1474
1472 1475 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1473 1476 to determine where each revision's data begins and ends.
1474 1477 """
1475 1478 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1476 1479 # (functions are expensive).
1477 1480 index = self.index
1478 1481 istart = index[startrev]
1479 1482 start = int(istart[0] >> 16)
1480 1483 if startrev == endrev:
1481 1484 end = start + istart[1]
1482 1485 else:
1483 1486 iend = index[endrev]
1484 1487 end = int(iend[0] >> 16) + iend[1]
1485 1488
1486 1489 if self._inline:
1487 1490 start += (startrev + 1) * self._io.size
1488 1491 end += (endrev + 1) * self._io.size
1489 1492 length = end - start
1490 1493
1491 1494 return start, self._getsegment(start, length, df=df)
1492 1495
1493 1496 def _chunk(self, rev, df=None):
1494 1497 """Obtain a single decompressed chunk for a revision.
1495 1498
1496 1499 Accepts an integer revision and an optional already-open file handle
1497 1500 to be used for reading. If used, the seek position of the file will not
1498 1501 be preserved.
1499 1502
1500 1503 Returns a str holding uncompressed data for the requested revision.
1501 1504 """
1502 1505 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1503 1506
1504 1507 def _chunks(self, revs, df=None, targetsize=None):
1505 1508 """Obtain decompressed chunks for the specified revisions.
1506 1509
1507 1510 Accepts an iterable of numeric revisions that are assumed to be in
1508 1511 ascending order. Also accepts an optional already-open file handle
1509 1512 to be used for reading. If used, the seek position of the file will
1510 1513 not be preserved.
1511 1514
1512 1515 This function is similar to calling ``self._chunk()`` multiple times,
1513 1516 but is faster.
1514 1517
1515 1518 Returns a list with decompressed data for each requested revision.
1516 1519 """
1517 1520 if not revs:
1518 1521 return []
1519 1522 start = self.start
1520 1523 length = self.length
1521 1524 inline = self._inline
1522 1525 iosize = self._io.size
1523 1526 buffer = util.buffer
1524 1527
1525 1528 l = []
1526 1529 ladd = l.append
1527 1530
1528 1531 if not self._withsparseread:
1529 1532 slicedchunks = (revs,)
1530 1533 else:
1531 1534 slicedchunks = deltautil.slicechunk(self, revs,
1532 1535 targetsize=targetsize)
1533 1536
1534 1537 for revschunk in slicedchunks:
1535 1538 firstrev = revschunk[0]
1536 1539 # Skip trailing revisions with empty diff
1537 1540 for lastrev in revschunk[::-1]:
1538 1541 if length(lastrev) != 0:
1539 1542 break
1540 1543
1541 1544 try:
1542 1545 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1543 1546 except OverflowError:
1544 1547 # issue4215 - we can't cache a run of chunks greater than
1545 1548 # 2G on Windows
1546 1549 return [self._chunk(rev, df=df) for rev in revschunk]
1547 1550
1548 1551 decomp = self.decompress
1549 1552 for rev in revschunk:
1550 1553 chunkstart = start(rev)
1551 1554 if inline:
1552 1555 chunkstart += (rev + 1) * iosize
1553 1556 chunklength = length(rev)
1554 1557 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1555 1558
1556 1559 return l
1557 1560
1558 1561 def _chunkclear(self):
1559 1562 """Clear the raw chunk cache."""
1560 1563 self._chunkcache = (0, '')
1561 1564
1562 1565 def deltaparent(self, rev):
1563 1566 """return deltaparent of the given revision"""
1564 1567 base = self.index[rev][3]
1565 1568 if base == rev:
1566 1569 return nullrev
1567 1570 elif self._generaldelta:
1568 1571 return base
1569 1572 else:
1570 1573 return rev - 1
1571 1574
1572 1575 def issnapshot(self, rev):
1573 1576 """tells whether rev is a snapshot
1574 1577 """
1575 1578 if not self._sparserevlog:
1576 1579 return self.deltaparent(rev) == nullrev
1577 1580 elif util.safehasattr(self.index, 'issnapshot'):
1578 1581 # directly assign the method to cache the testing and access
1579 1582 self.issnapshot = self.index.issnapshot
1580 1583 return self.issnapshot(rev)
1581 1584 if rev == nullrev:
1582 1585 return True
1583 1586 entry = self.index[rev]
1584 1587 base = entry[3]
1585 1588 if base == rev:
1586 1589 return True
1587 1590 if base == nullrev:
1588 1591 return True
1589 1592 p1 = entry[5]
1590 1593 p2 = entry[6]
1591 1594 if base == p1 or base == p2:
1592 1595 return False
1593 1596 return self.issnapshot(base)
1594 1597
1595 1598 def snapshotdepth(self, rev):
1596 1599 """number of snapshot in the chain before this one"""
1597 1600 if not self.issnapshot(rev):
1598 1601 raise error.ProgrammingError('revision %d not a snapshot')
1599 1602 return len(self._deltachain(rev)[0]) - 1
1600 1603
1601 1604 def revdiff(self, rev1, rev2):
1602 1605 """return or calculate a delta between two revisions
1603 1606
1604 1607 The delta calculated is in binary form and is intended to be written to
1605 1608 revlog data directly. So this function needs raw revision data.
1606 1609 """
1607 1610 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1608 1611 return bytes(self._chunk(rev2))
1609 1612
1610 1613 return mdiff.textdiff(self.revision(rev1, raw=True),
1611 1614 self.revision(rev2, raw=True))
1612 1615
1613 1616 def revision(self, nodeorrev, _df=None, raw=False):
1614 1617 """return an uncompressed revision of a given node or revision
1615 1618 number.
1616 1619
1617 1620 _df - an existing file handle to read from. (internal-only)
1618 1621 raw - an optional argument specifying if the revision data is to be
1619 1622 treated as raw data when applying flag transforms. 'raw' should be set
1620 1623 to True when generating changegroups or in debug commands.
1621 1624 """
1622 1625 if isinstance(nodeorrev, int):
1623 1626 rev = nodeorrev
1624 1627 node = self.node(rev)
1625 1628 else:
1626 1629 node = nodeorrev
1627 1630 rev = None
1628 1631
1629 1632 cachedrev = None
1630 1633 flags = None
1631 1634 rawtext = None
1632 1635 if node == nullid:
1633 1636 return ""
1634 1637 if self._revisioncache:
1635 1638 if self._revisioncache[0] == node:
1636 1639 # _cache only stores rawtext
1637 1640 if raw:
1638 1641 return self._revisioncache[2]
1639 1642 # duplicated, but good for perf
1640 1643 if rev is None:
1641 1644 rev = self.rev(node)
1642 1645 if flags is None:
1643 1646 flags = self.flags(rev)
1644 1647 # no extra flags set, no flag processor runs, text = rawtext
1645 1648 if flags == REVIDX_DEFAULT_FLAGS:
1646 1649 return self._revisioncache[2]
1647 1650 # rawtext is reusable. need to run flag processor
1648 1651 rawtext = self._revisioncache[2]
1649 1652
1650 1653 cachedrev = self._revisioncache[1]
1651 1654
1652 1655 # look up what we need to read
1653 1656 if rawtext is None:
1654 1657 if rev is None:
1655 1658 rev = self.rev(node)
1656 1659
1657 1660 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1658 1661 if stopped:
1659 1662 rawtext = self._revisioncache[2]
1660 1663
1661 1664 # drop cache to save memory
1662 1665 self._revisioncache = None
1663 1666
1664 1667 targetsize = None
1665 1668 rawsize = self.index[rev][2]
1666 1669 if 0 <= rawsize:
1667 1670 targetsize = 4 * rawsize
1668 1671
1669 1672 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1670 1673 if rawtext is None:
1671 1674 rawtext = bytes(bins[0])
1672 1675 bins = bins[1:]
1673 1676
1674 1677 rawtext = mdiff.patches(rawtext, bins)
1675 1678 self._revisioncache = (node, rev, rawtext)
1676 1679
1677 1680 if flags is None:
1678 1681 if rev is None:
1679 1682 rev = self.rev(node)
1680 1683 flags = self.flags(rev)
1681 1684
1682 1685 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1683 1686 if validatehash:
1684 1687 self.checkhash(text, node, rev=rev)
1685 1688
1686 1689 return text
1687 1690
1688 1691 def hash(self, text, p1, p2):
1689 1692 """Compute a node hash.
1690 1693
1691 1694 Available as a function so that subclasses can replace the hash
1692 1695 as needed.
1693 1696 """
1694 1697 return storageutil.hashrevisionsha1(text, p1, p2)
1695 1698
1696 1699 def _processflags(self, text, flags, operation, raw=False):
1697 1700 """Inspect revision data flags and applies transforms defined by
1698 1701 registered flag processors.
1699 1702
1700 1703 ``text`` - the revision data to process
1701 1704 ``flags`` - the revision flags
1702 1705 ``operation`` - the operation being performed (read or write)
1703 1706 ``raw`` - an optional argument describing if the raw transform should be
1704 1707 applied.
1705 1708
1706 1709 This method processes the flags in the order (or reverse order if
1707 1710 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1708 1711 flag processors registered for present flags. The order of flags defined
1709 1712 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1710 1713
1711 1714 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1712 1715 processed text and ``validatehash`` is a bool indicating whether the
1713 1716 returned text should be checked for hash integrity.
1714 1717
1715 1718 Note: If the ``raw`` argument is set, it has precedence over the
1716 1719 operation and will only update the value of ``validatehash``.
1717 1720 """
1718 1721 # fast path: no flag processors will run
1719 1722 if flags == 0:
1720 1723 return text, True
1721 1724 if not operation in ('read', 'write'):
1722 1725 raise error.ProgrammingError(_("invalid '%s' operation") %
1723 1726 operation)
1724 1727 # Check all flags are known.
1725 1728 if flags & ~REVIDX_KNOWN_FLAGS:
1726 1729 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1727 1730 (flags & ~REVIDX_KNOWN_FLAGS))
1728 1731 validatehash = True
1729 1732 # Depending on the operation (read or write), the order might be
1730 1733 # reversed due to non-commutative transforms.
1731 1734 orderedflags = REVIDX_FLAGS_ORDER
1732 1735 if operation == 'write':
1733 1736 orderedflags = reversed(orderedflags)
1734 1737
1735 1738 for flag in orderedflags:
1736 1739 # If a flagprocessor has been registered for a known flag, apply the
1737 1740 # related operation transform and update result tuple.
1738 1741 if flag & flags:
1739 1742 vhash = True
1740 1743
1741 1744 if flag not in self._flagprocessors:
1742 1745 message = _("missing processor for flag '%#x'") % (flag)
1743 1746 raise error.RevlogError(message)
1744 1747
1745 1748 processor = self._flagprocessors[flag]
1746 1749 if processor is not None:
1747 1750 readtransform, writetransform, rawtransform = processor
1748 1751
1749 1752 if raw:
1750 1753 vhash = rawtransform(self, text)
1751 1754 elif operation == 'read':
1752 1755 text, vhash = readtransform(self, text)
1753 1756 else: # write operation
1754 1757 text, vhash = writetransform(self, text)
1755 1758 validatehash = validatehash and vhash
1756 1759
1757 1760 return text, validatehash
1758 1761
1759 1762 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1760 1763 """Check node hash integrity.
1761 1764
1762 1765 Available as a function so that subclasses can extend hash mismatch
1763 1766 behaviors as needed.
1764 1767 """
1765 1768 try:
1766 1769 if p1 is None and p2 is None:
1767 1770 p1, p2 = self.parents(node)
1768 1771 if node != self.hash(text, p1, p2):
1769 1772 # Clear the revision cache on hash failure. The revision cache
1770 1773 # only stores the raw revision and clearing the cache does have
1771 1774 # the side-effect that we won't have a cache hit when the raw
1772 1775 # revision data is accessed. But this case should be rare and
1773 1776 # it is extra work to teach the cache about the hash
1774 1777 # verification state.
1775 1778 if self._revisioncache and self._revisioncache[0] == node:
1776 1779 self._revisioncache = None
1777 1780
1778 1781 revornode = rev
1779 1782 if revornode is None:
1780 1783 revornode = templatefilters.short(hex(node))
1781 1784 raise error.RevlogError(_("integrity check failed on %s:%s")
1782 1785 % (self.indexfile, pycompat.bytestr(revornode)))
1783 1786 except error.RevlogError:
1784 1787 if self._censorable and storageutil.iscensoredtext(text):
1785 1788 raise error.CensoredNodeError(self.indexfile, node, text)
1786 1789 raise
1787 1790
1788 1791 def _enforceinlinesize(self, tr, fp=None):
1789 1792 """Check if the revlog is too big for inline and convert if so.
1790 1793
1791 1794 This should be called after revisions are added to the revlog. If the
1792 1795 revlog has grown too large to be an inline revlog, it will convert it
1793 1796 to use multiple index and data files.
1794 1797 """
1795 1798 tiprev = len(self) - 1
1796 1799 if (not self._inline or
1797 1800 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1798 1801 return
1799 1802
1800 1803 trinfo = tr.find(self.indexfile)
1801 1804 if trinfo is None:
1802 1805 raise error.RevlogError(_("%s not found in the transaction")
1803 1806 % self.indexfile)
1804 1807
1805 1808 trindex = trinfo[2]
1806 1809 if trindex is not None:
1807 1810 dataoff = self.start(trindex)
1808 1811 else:
1809 1812 # revlog was stripped at start of transaction, use all leftover data
1810 1813 trindex = len(self) - 1
1811 1814 dataoff = self.end(tiprev)
1812 1815
1813 1816 tr.add(self.datafile, dataoff)
1814 1817
1815 1818 if fp:
1816 1819 fp.flush()
1817 1820 fp.close()
1818 1821 # We can't use the cached file handle after close(). So prevent
1819 1822 # its usage.
1820 1823 self._writinghandles = None
1821 1824
1822 1825 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1823 1826 for r in self:
1824 1827 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1825 1828
1826 1829 with self._indexfp('w') as fp:
1827 1830 self.version &= ~FLAG_INLINE_DATA
1828 1831 self._inline = False
1829 1832 io = self._io
1830 1833 for i in self:
1831 1834 e = io.packentry(self.index[i], self.node, self.version, i)
1832 1835 fp.write(e)
1833 1836
1834 1837 # the temp file replace the real index when we exit the context
1835 1838 # manager
1836 1839
1837 1840 tr.replace(self.indexfile, trindex * self._io.size)
1838 1841 self._chunkclear()
1839 1842
1840 1843 def _nodeduplicatecallback(self, transaction, node):
1841 1844 """called when trying to add a node already stored.
1842 1845 """
1843 1846
1844 1847 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1845 1848 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1846 1849 """add a revision to the log
1847 1850
1848 1851 text - the revision data to add
1849 1852 transaction - the transaction object used for rollback
1850 1853 link - the linkrev data to add
1851 1854 p1, p2 - the parent nodeids of the revision
1852 1855 cachedelta - an optional precomputed delta
1853 1856 node - nodeid of revision; typically node is not specified, and it is
1854 1857 computed by default as hash(text, p1, p2), however subclasses might
1855 1858 use different hashing method (and override checkhash() in such case)
1856 1859 flags - the known flags to set on the revision
1857 1860 deltacomputer - an optional deltacomputer instance shared between
1858 1861 multiple calls
1859 1862 """
1860 1863 if link == nullrev:
1861 1864 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1862 1865 % self.indexfile)
1863 1866
1864 1867 if flags:
1865 1868 node = node or self.hash(text, p1, p2)
1866 1869
1867 1870 rawtext, validatehash = self._processflags(text, flags, 'write')
1868 1871
1869 1872 # If the flag processor modifies the revision data, ignore any provided
1870 1873 # cachedelta.
1871 1874 if rawtext != text:
1872 1875 cachedelta = None
1873 1876
1874 1877 if len(rawtext) > _maxentrysize:
1875 1878 raise error.RevlogError(
1876 1879 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1877 1880 % (self.indexfile, len(rawtext)))
1878 1881
1879 1882 node = node or self.hash(rawtext, p1, p2)
1880 1883 if node in self.nodemap:
1881 1884 return node
1882 1885
1883 1886 if validatehash:
1884 1887 self.checkhash(rawtext, node, p1=p1, p2=p2)
1885 1888
1886 1889 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1887 1890 flags, cachedelta=cachedelta,
1888 1891 deltacomputer=deltacomputer)
1889 1892
1890 1893 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1891 1894 cachedelta=None, deltacomputer=None):
1892 1895 """add a raw revision with known flags, node and parents
1893 1896 useful when reusing a revision not stored in this revlog (ex: received
1894 1897 over wire, or read from an external bundle).
1895 1898 """
1896 1899 dfh = None
1897 1900 if not self._inline:
1898 1901 dfh = self._datafp("a+")
1899 1902 ifh = self._indexfp("a+")
1900 1903 try:
1901 1904 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1902 1905 flags, cachedelta, ifh, dfh,
1903 1906 deltacomputer=deltacomputer)
1904 1907 finally:
1905 1908 if dfh:
1906 1909 dfh.close()
1907 1910 ifh.close()
1908 1911
1909 1912 def compress(self, data):
1910 1913 """Generate a possibly-compressed representation of data."""
1911 1914 if not data:
1912 1915 return '', data
1913 1916
1914 1917 compressed = self._compressor.compress(data)
1915 1918
1916 1919 if compressed:
1917 1920 # The revlog compressor added the header in the returned data.
1918 1921 return '', compressed
1919 1922
1920 1923 if data[0:1] == '\0':
1921 1924 return '', data
1922 1925 return 'u', data
1923 1926
1924 1927 def decompress(self, data):
1925 1928 """Decompress a revlog chunk.
1926 1929
1927 1930 The chunk is expected to begin with a header identifying the
1928 1931 format type so it can be routed to an appropriate decompressor.
1929 1932 """
1930 1933 if not data:
1931 1934 return data
1932 1935
1933 1936 # Revlogs are read much more frequently than they are written and many
1934 1937 # chunks only take microseconds to decompress, so performance is
1935 1938 # important here.
1936 1939 #
1937 1940 # We can make a few assumptions about revlogs:
1938 1941 #
1939 1942 # 1) the majority of chunks will be compressed (as opposed to inline
1940 1943 # raw data).
1941 1944 # 2) decompressing *any* data will likely by at least 10x slower than
1942 1945 # returning raw inline data.
1943 1946 # 3) we want to prioritize common and officially supported compression
1944 1947 # engines
1945 1948 #
1946 1949 # It follows that we want to optimize for "decompress compressed data
1947 1950 # when encoded with common and officially supported compression engines"
1948 1951 # case over "raw data" and "data encoded by less common or non-official
1949 1952 # compression engines." That is why we have the inline lookup first
1950 1953 # followed by the compengines lookup.
1951 1954 #
1952 1955 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1953 1956 # compressed chunks. And this matters for changelog and manifest reads.
1954 1957 t = data[0:1]
1955 1958
1956 1959 if t == 'x':
1957 1960 try:
1958 1961 return _zlibdecompress(data)
1959 1962 except zlib.error as e:
1960 1963 raise error.RevlogError(_('revlog decompress error: %s') %
1961 1964 stringutil.forcebytestr(e))
1962 1965 # '\0' is more common than 'u' so it goes first.
1963 1966 elif t == '\0':
1964 1967 return data
1965 1968 elif t == 'u':
1966 1969 return util.buffer(data, 1)
1967 1970
1968 1971 try:
1969 1972 compressor = self._decompressors[t]
1970 1973 except KeyError:
1971 1974 try:
1972 1975 engine = util.compengines.forrevlogheader(t)
1973 1976 compressor = engine.revlogcompressor()
1974 1977 self._decompressors[t] = compressor
1975 1978 except KeyError:
1976 1979 raise error.RevlogError(_('unknown compression type %r') % t)
1977 1980
1978 1981 return compressor.decompress(data)
1979 1982
1980 1983 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1981 1984 cachedelta, ifh, dfh, alwayscache=False,
1982 1985 deltacomputer=None):
1983 1986 """internal function to add revisions to the log
1984 1987
1985 1988 see addrevision for argument descriptions.
1986 1989
1987 1990 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1988 1991
1989 1992 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1990 1993 be used.
1991 1994
1992 1995 invariants:
1993 1996 - rawtext is optional (can be None); if not set, cachedelta must be set.
1994 1997 if both are set, they must correspond to each other.
1995 1998 """
1996 1999 if node == nullid:
1997 2000 raise error.RevlogError(_("%s: attempt to add null revision") %
1998 2001 self.indexfile)
1999 2002 if node == wdirid or node in wdirfilenodeids:
2000 2003 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2001 2004 self.indexfile)
2002 2005
2003 2006 if self._inline:
2004 2007 fh = ifh
2005 2008 else:
2006 2009 fh = dfh
2007 2010
2008 2011 btext = [rawtext]
2009 2012
2010 2013 curr = len(self)
2011 2014 prev = curr - 1
2012 2015 offset = self.end(prev)
2013 2016 p1r, p2r = self.rev(p1), self.rev(p2)
2014 2017
2015 2018 # full versions are inserted when the needed deltas
2016 2019 # become comparable to the uncompressed text
2017 2020 if rawtext is None:
2018 2021 # need rawtext size, before changed by flag processors, which is
2019 2022 # the non-raw size. use revlog explicitly to avoid filelog's extra
2020 2023 # logic that might remove metadata size.
2021 2024 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2022 2025 cachedelta[1])
2023 2026 else:
2024 2027 textlen = len(rawtext)
2025 2028
2026 2029 if deltacomputer is None:
2027 2030 deltacomputer = deltautil.deltacomputer(self)
2028 2031
2029 2032 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2030 2033
2031 2034 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2032 2035
2033 2036 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2034 2037 deltainfo.base, link, p1r, p2r, node)
2035 2038 self.index.append(e)
2036 2039 self.nodemap[node] = curr
2037 2040
2038 2041 # Reset the pure node cache start lookup offset to account for new
2039 2042 # revision.
2040 2043 if self._nodepos is not None:
2041 2044 self._nodepos = curr
2042 2045
2043 2046 entry = self._io.packentry(e, self.node, self.version, curr)
2044 2047 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2045 2048 link, offset)
2046 2049
2047 2050 rawtext = btext[0]
2048 2051
2049 2052 if alwayscache and rawtext is None:
2050 2053 rawtext = deltacomputer.buildtext(revinfo, fh)
2051 2054
2052 2055 if type(rawtext) == bytes: # only accept immutable objects
2053 2056 self._revisioncache = (node, curr, rawtext)
2054 2057 self._chainbasecache[curr] = deltainfo.chainbase
2055 2058 return node
2056 2059
2057 2060 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2058 2061 # Files opened in a+ mode have inconsistent behavior on various
2059 2062 # platforms. Windows requires that a file positioning call be made
2060 2063 # when the file handle transitions between reads and writes. See
2061 2064 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2062 2065 # platforms, Python or the platform itself can be buggy. Some versions
2063 2066 # of Solaris have been observed to not append at the end of the file
2064 2067 # if the file was seeked to before the end. See issue4943 for more.
2065 2068 #
2066 2069 # We work around this issue by inserting a seek() before writing.
2067 2070 # Note: This is likely not necessary on Python 3. However, because
2068 2071 # the file handle is reused for reads and may be seeked there, we need
2069 2072 # to be careful before changing this.
2070 2073 ifh.seek(0, os.SEEK_END)
2071 2074 if dfh:
2072 2075 dfh.seek(0, os.SEEK_END)
2073 2076
2074 2077 curr = len(self) - 1
2075 2078 if not self._inline:
2076 2079 transaction.add(self.datafile, offset)
2077 2080 transaction.add(self.indexfile, curr * len(entry))
2078 2081 if data[0]:
2079 2082 dfh.write(data[0])
2080 2083 dfh.write(data[1])
2081 2084 ifh.write(entry)
2082 2085 else:
2083 2086 offset += curr * self._io.size
2084 2087 transaction.add(self.indexfile, offset, curr)
2085 2088 ifh.write(entry)
2086 2089 ifh.write(data[0])
2087 2090 ifh.write(data[1])
2088 2091 self._enforceinlinesize(transaction, ifh)
2089 2092
2090 2093 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2091 2094 """
2092 2095 add a delta group
2093 2096
2094 2097 given a set of deltas, add them to the revision log. the
2095 2098 first delta is against its parent, which should be in our
2096 2099 log, the rest are against the previous delta.
2097 2100
2098 2101 If ``addrevisioncb`` is defined, it will be called with arguments of
2099 2102 this revlog and the node that was added.
2100 2103 """
2101 2104
2102 2105 if self._writinghandles:
2103 2106 raise error.ProgrammingError('cannot nest addgroup() calls')
2104 2107
2105 2108 nodes = []
2106 2109
2107 2110 r = len(self)
2108 2111 end = 0
2109 2112 if r:
2110 2113 end = self.end(r - 1)
2111 2114 ifh = self._indexfp("a+")
2112 2115 isize = r * self._io.size
2113 2116 if self._inline:
2114 2117 transaction.add(self.indexfile, end + isize, r)
2115 2118 dfh = None
2116 2119 else:
2117 2120 transaction.add(self.indexfile, isize, r)
2118 2121 transaction.add(self.datafile, end)
2119 2122 dfh = self._datafp("a+")
2120 2123 def flush():
2121 2124 if dfh:
2122 2125 dfh.flush()
2123 2126 ifh.flush()
2124 2127
2125 2128 self._writinghandles = (ifh, dfh)
2126 2129
2127 2130 try:
2128 2131 deltacomputer = deltautil.deltacomputer(self)
2129 2132 # loop through our set of deltas
2130 2133 for data in deltas:
2131 2134 node, p1, p2, linknode, deltabase, delta, flags = data
2132 2135 link = linkmapper(linknode)
2133 2136 flags = flags or REVIDX_DEFAULT_FLAGS
2134 2137
2135 2138 nodes.append(node)
2136 2139
2137 2140 if node in self.nodemap:
2138 2141 self._nodeduplicatecallback(transaction, node)
2139 2142 # this can happen if two branches make the same change
2140 2143 continue
2141 2144
2142 2145 for p in (p1, p2):
2143 2146 if p not in self.nodemap:
2144 2147 raise error.LookupError(p, self.indexfile,
2145 2148 _('unknown parent'))
2146 2149
2147 2150 if deltabase not in self.nodemap:
2148 2151 raise error.LookupError(deltabase, self.indexfile,
2149 2152 _('unknown delta base'))
2150 2153
2151 2154 baserev = self.rev(deltabase)
2152 2155
2153 2156 if baserev != nullrev and self.iscensored(baserev):
2154 2157 # if base is censored, delta must be full replacement in a
2155 2158 # single patch operation
2156 2159 hlen = struct.calcsize(">lll")
2157 2160 oldlen = self.rawsize(baserev)
2158 2161 newlen = len(delta) - hlen
2159 2162 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2160 2163 raise error.CensoredBaseError(self.indexfile,
2161 2164 self.node(baserev))
2162 2165
2163 2166 if not flags and self._peek_iscensored(baserev, delta, flush):
2164 2167 flags |= REVIDX_ISCENSORED
2165 2168
2166 2169 # We assume consumers of addrevisioncb will want to retrieve
2167 2170 # the added revision, which will require a call to
2168 2171 # revision(). revision() will fast path if there is a cache
2169 2172 # hit. So, we tell _addrevision() to always cache in this case.
2170 2173 # We're only using addgroup() in the context of changegroup
2171 2174 # generation so the revision data can always be handled as raw
2172 2175 # by the flagprocessor.
2173 2176 self._addrevision(node, None, transaction, link,
2174 2177 p1, p2, flags, (baserev, delta),
2175 2178 ifh, dfh,
2176 2179 alwayscache=bool(addrevisioncb),
2177 2180 deltacomputer=deltacomputer)
2178 2181
2179 2182 if addrevisioncb:
2180 2183 addrevisioncb(self, node)
2181 2184
2182 2185 if not dfh and not self._inline:
2183 2186 # addrevision switched from inline to conventional
2184 2187 # reopen the index
2185 2188 ifh.close()
2186 2189 dfh = self._datafp("a+")
2187 2190 ifh = self._indexfp("a+")
2188 2191 self._writinghandles = (ifh, dfh)
2189 2192 finally:
2190 2193 self._writinghandles = None
2191 2194
2192 2195 if dfh:
2193 2196 dfh.close()
2194 2197 ifh.close()
2195 2198
2196 2199 return nodes
2197 2200
2198 2201 def iscensored(self, rev):
2199 2202 """Check if a file revision is censored."""
2200 2203 if not self._censorable:
2201 2204 return False
2202 2205
2203 2206 return self.flags(rev) & REVIDX_ISCENSORED
2204 2207
2205 2208 def _peek_iscensored(self, baserev, delta, flush):
2206 2209 """Quickly check if a delta produces a censored revision."""
2207 2210 if not self._censorable:
2208 2211 return False
2209 2212
2210 2213 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2211 2214
2212 2215 def getstrippoint(self, minlink):
2213 2216 """find the minimum rev that must be stripped to strip the linkrev
2214 2217
2215 2218 Returns a tuple containing the minimum rev and a set of all revs that
2216 2219 have linkrevs that will be broken by this strip.
2217 2220 """
2218 2221 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2219 2222 self.headrevs(),
2220 2223 self.linkrev, self.parentrevs)
2221 2224
2222 2225 def strip(self, minlink, transaction):
2223 2226 """truncate the revlog on the first revision with a linkrev >= minlink
2224 2227
2225 2228 This function is called when we're stripping revision minlink and
2226 2229 its descendants from the repository.
2227 2230
2228 2231 We have to remove all revisions with linkrev >= minlink, because
2229 2232 the equivalent changelog revisions will be renumbered after the
2230 2233 strip.
2231 2234
2232 2235 So we truncate the revlog on the first of these revisions, and
2233 2236 trust that the caller has saved the revisions that shouldn't be
2234 2237 removed and that it'll re-add them after this truncation.
2235 2238 """
2236 2239 if len(self) == 0:
2237 2240 return
2238 2241
2239 2242 rev, _ = self.getstrippoint(minlink)
2240 2243 if rev == len(self):
2241 2244 return
2242 2245
2243 2246 # first truncate the files on disk
2244 2247 end = self.start(rev)
2245 2248 if not self._inline:
2246 2249 transaction.add(self.datafile, end)
2247 2250 end = rev * self._io.size
2248 2251 else:
2249 2252 end += rev * self._io.size
2250 2253
2251 2254 transaction.add(self.indexfile, end)
2252 2255
2253 2256 # then reset internal state in memory to forget those revisions
2254 2257 self._revisioncache = None
2255 2258 self._chaininfocache = {}
2256 2259 self._chunkclear()
2257 2260 for x in pycompat.xrange(rev, len(self)):
2258 2261 del self.nodemap[self.node(x)]
2259 2262
2260 2263 del self.index[rev:-1]
2261 2264 self._nodepos = None
2262 2265
2263 2266 def checksize(self):
2264 2267 expected = 0
2265 2268 if len(self):
2266 2269 expected = max(0, self.end(len(self) - 1))
2267 2270
2268 2271 try:
2269 2272 with self._datafp() as f:
2270 2273 f.seek(0, 2)
2271 2274 actual = f.tell()
2272 2275 dd = actual - expected
2273 2276 except IOError as inst:
2274 2277 if inst.errno != errno.ENOENT:
2275 2278 raise
2276 2279 dd = 0
2277 2280
2278 2281 try:
2279 2282 f = self.opener(self.indexfile)
2280 2283 f.seek(0, 2)
2281 2284 actual = f.tell()
2282 2285 f.close()
2283 2286 s = self._io.size
2284 2287 i = max(0, actual // s)
2285 2288 di = actual - (i * s)
2286 2289 if self._inline:
2287 2290 databytes = 0
2288 2291 for r in self:
2289 2292 databytes += max(0, self.length(r))
2290 2293 dd = 0
2291 2294 di = actual - len(self) * s - databytes
2292 2295 except IOError as inst:
2293 2296 if inst.errno != errno.ENOENT:
2294 2297 raise
2295 2298 di = 0
2296 2299
2297 2300 return (dd, di)
2298 2301
2299 2302 def files(self):
2300 2303 res = [self.indexfile]
2301 2304 if not self._inline:
2302 2305 res.append(self.datafile)
2303 2306 return res
2304 2307
2305 2308 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2306 2309 assumehaveparentrevisions=False,
2307 2310 deltamode=repository.CG_DELTAMODE_STD):
2308 2311 if nodesorder not in ('nodes', 'storage', 'linear', None):
2309 2312 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2310 2313 nodesorder)
2311 2314
2312 2315 if nodesorder is None and not self._generaldelta:
2313 2316 nodesorder = 'storage'
2314 2317
2315 2318 if (not self._storedeltachains and
2316 2319 deltamode != repository.CG_DELTAMODE_PREV):
2317 2320 deltamode = repository.CG_DELTAMODE_FULL
2318 2321
2319 2322 return storageutil.emitrevisions(
2320 2323 self, nodes, nodesorder, revlogrevisiondelta,
2321 2324 deltaparentfn=self.deltaparent,
2322 2325 candeltafn=self.candelta,
2323 2326 rawsizefn=self.rawsize,
2324 2327 revdifffn=self.revdiff,
2325 2328 flagsfn=self.flags,
2326 2329 deltamode=deltamode,
2327 2330 revisiondata=revisiondata,
2328 2331 assumehaveparentrevisions=assumehaveparentrevisions)
2329 2332
2330 2333 DELTAREUSEALWAYS = 'always'
2331 2334 DELTAREUSESAMEREVS = 'samerevs'
2332 2335 DELTAREUSENEVER = 'never'
2333 2336
2334 2337 DELTAREUSEFULLADD = 'fulladd'
2335 2338
2336 2339 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2337 2340
2338 2341 def clone(self, tr, destrevlog, addrevisioncb=None,
2339 2342 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2340 2343 """Copy this revlog to another, possibly with format changes.
2341 2344
2342 2345 The destination revlog will contain the same revisions and nodes.
2343 2346 However, it may not be bit-for-bit identical due to e.g. delta encoding
2344 2347 differences.
2345 2348
2346 2349 The ``deltareuse`` argument control how deltas from the existing revlog
2347 2350 are preserved in the destination revlog. The argument can have the
2348 2351 following values:
2349 2352
2350 2353 DELTAREUSEALWAYS
2351 2354 Deltas will always be reused (if possible), even if the destination
2352 2355 revlog would not select the same revisions for the delta. This is the
2353 2356 fastest mode of operation.
2354 2357 DELTAREUSESAMEREVS
2355 2358 Deltas will be reused if the destination revlog would pick the same
2356 2359 revisions for the delta. This mode strikes a balance between speed
2357 2360 and optimization.
2358 2361 DELTAREUSENEVER
2359 2362 Deltas will never be reused. This is the slowest mode of execution.
2360 2363 This mode can be used to recompute deltas (e.g. if the diff/delta
2361 2364 algorithm changes).
2362 2365
2363 2366 Delta computation can be slow, so the choice of delta reuse policy can
2364 2367 significantly affect run time.
2365 2368
2366 2369 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2367 2370 two extremes. Deltas will be reused if they are appropriate. But if the
2368 2371 delta could choose a better revision, it will do so. This means if you
2369 2372 are converting a non-generaldelta revlog to a generaldelta revlog,
2370 2373 deltas will be recomputed if the delta's parent isn't a parent of the
2371 2374 revision.
2372 2375
2373 2376 In addition to the delta policy, the ``forcedeltabothparents``
2374 2377 argument controls whether to force compute deltas against both parents
2375 2378 for merges. By default, the current default is used.
2376 2379 """
2377 2380 if deltareuse not in self.DELTAREUSEALL:
2378 2381 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2379 2382
2380 2383 if len(destrevlog):
2381 2384 raise ValueError(_('destination revlog is not empty'))
2382 2385
2383 2386 if getattr(self, 'filteredrevs', None):
2384 2387 raise ValueError(_('source revlog has filtered revisions'))
2385 2388 if getattr(destrevlog, 'filteredrevs', None):
2386 2389 raise ValueError(_('destination revlog has filtered revisions'))
2387 2390
2388 2391 # lazydeltabase controls whether to reuse a cached delta, if possible.
2389 2392 oldlazydeltabase = destrevlog._lazydeltabase
2390 2393 oldamd = destrevlog._deltabothparents
2391 2394
2392 2395 try:
2393 2396 if deltareuse == self.DELTAREUSEALWAYS:
2394 2397 destrevlog._lazydeltabase = True
2395 2398 elif deltareuse == self.DELTAREUSESAMEREVS:
2396 2399 destrevlog._lazydeltabase = False
2397 2400
2398 2401 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2399 2402
2400 2403 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2401 2404 self.DELTAREUSESAMEREVS)
2402 2405
2403 2406 deltacomputer = deltautil.deltacomputer(destrevlog)
2404 2407 index = self.index
2405 2408 for rev in self:
2406 2409 entry = index[rev]
2407 2410
2408 2411 # Some classes override linkrev to take filtered revs into
2409 2412 # account. Use raw entry from index.
2410 2413 flags = entry[0] & 0xffff
2411 2414 linkrev = entry[4]
2412 2415 p1 = index[entry[5]][7]
2413 2416 p2 = index[entry[6]][7]
2414 2417 node = entry[7]
2415 2418
2416 2419 # (Possibly) reuse the delta from the revlog if allowed and
2417 2420 # the revlog chunk is a delta.
2418 2421 cachedelta = None
2419 2422 rawtext = None
2420 2423 if populatecachedelta:
2421 2424 dp = self.deltaparent(rev)
2422 2425 if dp != nullrev:
2423 2426 cachedelta = (dp, bytes(self._chunk(rev)))
2424 2427
2425 2428 if not cachedelta:
2426 2429 rawtext = self.revision(rev, raw=True)
2427 2430
2428 2431
2429 2432 if deltareuse == self.DELTAREUSEFULLADD:
2430 2433 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2431 2434 cachedelta=cachedelta,
2432 2435 node=node, flags=flags,
2433 2436 deltacomputer=deltacomputer)
2434 2437 else:
2435 2438 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2436 2439 checkambig=False)
2437 2440 dfh = None
2438 2441 if not destrevlog._inline:
2439 2442 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2440 2443 try:
2441 2444 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2442 2445 p2, flags, cachedelta, ifh, dfh,
2443 2446 deltacomputer=deltacomputer)
2444 2447 finally:
2445 2448 if dfh:
2446 2449 dfh.close()
2447 2450 ifh.close()
2448 2451
2449 2452 if addrevisioncb:
2450 2453 addrevisioncb(self, rev, node)
2451 2454 finally:
2452 2455 destrevlog._lazydeltabase = oldlazydeltabase
2453 2456 destrevlog._deltabothparents = oldamd
2454 2457
2455 2458 def censorrevision(self, tr, censornode, tombstone=b''):
2456 2459 if (self.version & 0xFFFF) == REVLOGV0:
2457 2460 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2458 2461 self.version)
2459 2462
2460 2463 censorrev = self.rev(censornode)
2461 2464 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2462 2465
2463 2466 if len(tombstone) > self.rawsize(censorrev):
2464 2467 raise error.Abort(_('censor tombstone must be no longer than '
2465 2468 'censored data'))
2466 2469
2467 2470 # Rewriting the revlog in place is hard. Our strategy for censoring is
2468 2471 # to create a new revlog, copy all revisions to it, then replace the
2469 2472 # revlogs on transaction close.
2470 2473
2471 2474 newindexfile = self.indexfile + b'.tmpcensored'
2472 2475 newdatafile = self.datafile + b'.tmpcensored'
2473 2476
2474 2477 # This is a bit dangerous. We could easily have a mismatch of state.
2475 2478 newrl = revlog(self.opener, newindexfile, newdatafile,
2476 2479 censorable=True)
2477 2480 newrl.version = self.version
2478 2481 newrl._generaldelta = self._generaldelta
2479 2482 newrl._io = self._io
2480 2483
2481 2484 for rev in self.revs():
2482 2485 node = self.node(rev)
2483 2486 p1, p2 = self.parents(node)
2484 2487
2485 2488 if rev == censorrev:
2486 2489 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2487 2490 p1, p2, censornode, REVIDX_ISCENSORED)
2488 2491
2489 2492 if newrl.deltaparent(rev) != nullrev:
2490 2493 raise error.Abort(_('censored revision stored as delta; '
2491 2494 'cannot censor'),
2492 2495 hint=_('censoring of revlogs is not '
2493 2496 'fully implemented; please report '
2494 2497 'this bug'))
2495 2498 continue
2496 2499
2497 2500 if self.iscensored(rev):
2498 2501 if self.deltaparent(rev) != nullrev:
2499 2502 raise error.Abort(_('cannot censor due to censored '
2500 2503 'revision having delta stored'))
2501 2504 rawtext = self._chunk(rev)
2502 2505 else:
2503 2506 rawtext = self.revision(rev, raw=True)
2504 2507
2505 2508 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2506 2509 self.flags(rev))
2507 2510
2508 2511 tr.addbackup(self.indexfile, location='store')
2509 2512 if not self._inline:
2510 2513 tr.addbackup(self.datafile, location='store')
2511 2514
2512 2515 self.opener.rename(newrl.indexfile, self.indexfile)
2513 2516 if not self._inline:
2514 2517 self.opener.rename(newrl.datafile, self.datafile)
2515 2518
2516 2519 self.clearcaches()
2517 2520 self._loadindex()
2518 2521
2519 2522 def verifyintegrity(self, state):
2520 2523 """Verifies the integrity of the revlog.
2521 2524
2522 2525 Yields ``revlogproblem`` instances describing problems that are
2523 2526 found.
2524 2527 """
2525 2528 dd, di = self.checksize()
2526 2529 if dd:
2527 2530 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2528 2531 if di:
2529 2532 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2530 2533
2531 2534 version = self.version & 0xFFFF
2532 2535
2533 2536 # The verifier tells us what version revlog we should be.
2534 2537 if version != state['expectedversion']:
2535 2538 yield revlogproblem(
2536 2539 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2537 2540 (self.indexfile, version, state['expectedversion']))
2538 2541
2539 2542 state['skipread'] = set()
2540 2543
2541 2544 for rev in self:
2542 2545 node = self.node(rev)
2543 2546
2544 2547 # Verify contents. 4 cases to care about:
2545 2548 #
2546 2549 # common: the most common case
2547 2550 # rename: with a rename
2548 2551 # meta: file content starts with b'\1\n', the metadata
2549 2552 # header defined in filelog.py, but without a rename
2550 2553 # ext: content stored externally
2551 2554 #
2552 2555 # More formally, their differences are shown below:
2553 2556 #
2554 2557 # | common | rename | meta | ext
2555 2558 # -------------------------------------------------------
2556 2559 # flags() | 0 | 0 | 0 | not 0
2557 2560 # renamed() | False | True | False | ?
2558 2561 # rawtext[0:2]=='\1\n'| False | True | True | ?
2559 2562 #
2560 2563 # "rawtext" means the raw text stored in revlog data, which
2561 2564 # could be retrieved by "revision(rev, raw=True)". "text"
2562 2565 # mentioned below is "revision(rev, raw=False)".
2563 2566 #
2564 2567 # There are 3 different lengths stored physically:
2565 2568 # 1. L1: rawsize, stored in revlog index
2566 2569 # 2. L2: len(rawtext), stored in revlog data
2567 2570 # 3. L3: len(text), stored in revlog data if flags==0, or
2568 2571 # possibly somewhere else if flags!=0
2569 2572 #
2570 2573 # L1 should be equal to L2. L3 could be different from them.
2571 2574 # "text" may or may not affect commit hash depending on flag
2572 2575 # processors (see revlog.addflagprocessor).
2573 2576 #
2574 2577 # | common | rename | meta | ext
2575 2578 # -------------------------------------------------
2576 2579 # rawsize() | L1 | L1 | L1 | L1
2577 2580 # size() | L1 | L2-LM | L1(*) | L1 (?)
2578 2581 # len(rawtext) | L2 | L2 | L2 | L2
2579 2582 # len(text) | L2 | L2 | L2 | L3
2580 2583 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2581 2584 #
2582 2585 # LM: length of metadata, depending on rawtext
2583 2586 # (*): not ideal, see comment in filelog.size
2584 2587 # (?): could be "- len(meta)" if the resolved content has
2585 2588 # rename metadata
2586 2589 #
2587 2590 # Checks needed to be done:
2588 2591 # 1. length check: L1 == L2, in all cases.
2589 2592 # 2. hash check: depending on flag processor, we may need to
2590 2593 # use either "text" (external), or "rawtext" (in revlog).
2591 2594
2592 2595 try:
2593 2596 skipflags = state.get('skipflags', 0)
2594 2597 if skipflags:
2595 2598 skipflags &= self.flags(rev)
2596 2599
2597 2600 if skipflags:
2598 2601 state['skipread'].add(node)
2599 2602 else:
2600 2603 # Side-effect: read content and verify hash.
2601 2604 self.revision(node)
2602 2605
2603 2606 l1 = self.rawsize(rev)
2604 2607 l2 = len(self.revision(node, raw=True))
2605 2608
2606 2609 if l1 != l2:
2607 2610 yield revlogproblem(
2608 2611 error=_('unpacked size is %d, %d expected') % (l2, l1),
2609 2612 node=node)
2610 2613
2611 2614 except error.CensoredNodeError:
2612 2615 if state['erroroncensored']:
2613 2616 yield revlogproblem(error=_('censored file data'),
2614 2617 node=node)
2615 2618 state['skipread'].add(node)
2616 2619 except Exception as e:
2617 2620 yield revlogproblem(
2618 2621 error=_('unpacking %s: %s') % (short(node),
2619 2622 stringutil.forcebytestr(e)),
2620 2623 node=node)
2621 2624 state['skipread'].add(node)
2622 2625
2623 2626 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2624 2627 revisionscount=False, trackedsize=False,
2625 2628 storedsize=False):
2626 2629 d = {}
2627 2630
2628 2631 if exclusivefiles:
2629 2632 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2630 2633 if not self._inline:
2631 2634 d['exclusivefiles'].append((self.opener, self.datafile))
2632 2635
2633 2636 if sharedfiles:
2634 2637 d['sharedfiles'] = []
2635 2638
2636 2639 if revisionscount:
2637 2640 d['revisionscount'] = len(self)
2638 2641
2639 2642 if trackedsize:
2640 2643 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2641 2644
2642 2645 if storedsize:
2643 2646 d['storedsize'] = sum(self.opener.stat(path).st_size
2644 2647 for path in self.files())
2645 2648
2646 2649 return d
@@ -1,1015 +1,1016
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import (
17 17 nullrev,
18 18 )
19 19 from ..i18n import _
20 20
21 21 from .constants import (
22 22 REVIDX_ISCENSORED,
23 23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 24 )
25 25
26 26 from ..thirdparty import (
27 27 attr,
28 28 )
29 29
30 30 from .. import (
31 31 error,
32 32 mdiff,
33 33 util,
34 34 )
35 35
36 36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 37 LIMIT_DELTA2TEXT = 2
38 38
39 39 class _testrevlog(object):
40 40 """minimalist fake revlog to use in doctests"""
41 41
42 42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 43 """data is an list of revision payload boundaries"""
44 44 self._data = data
45 45 self._srdensitythreshold = density
46 46 self._srmingapsize = mingap
47 47 self._snapshot = set(snapshot)
48 48 self.index = None
49 49
50 50 def start(self, rev):
51 51 if rev == nullrev:
52 52 return 0
53 53 if rev == 0:
54 54 return 0
55 55 return self._data[rev - 1]
56 56
57 57 def end(self, rev):
58 58 if rev == nullrev:
59 59 return 0
60 60 return self._data[rev]
61 61
62 62 def length(self, rev):
63 63 return self.end(rev) - self.start(rev)
64 64
65 65 def __len__(self):
66 66 return len(self._data)
67 67
68 68 def issnapshot(self, rev):
69 69 if rev == nullrev:
70 70 return True
71 71 return rev in self._snapshot
72 72
73 73 def slicechunk(revlog, revs, targetsize=None):
74 74 """slice revs to reduce the amount of unrelated data to be read from disk.
75 75
76 76 ``revs`` is sliced into groups that should be read in one time.
77 77 Assume that revs are sorted.
78 78
79 79 The initial chunk is sliced until the overall density (payload/chunks-span
80 80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 81 `revlog._srmingapsize` is skipped.
82 82
83 83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 84 For consistency with other slicing choice, this limit won't go lower than
85 85 `revlog._srmingapsize`.
86 86
87 87 If individual revisions chunk are larger than this limit, they will still
88 88 be raised individually.
89 89
90 90 >>> data = [
91 91 ... 5, #00 (5)
92 92 ... 10, #01 (5)
93 93 ... 12, #02 (2)
94 94 ... 12, #03 (empty)
95 95 ... 27, #04 (15)
96 96 ... 31, #05 (4)
97 97 ... 31, #06 (empty)
98 98 ... 42, #07 (11)
99 99 ... 47, #08 (5)
100 100 ... 47, #09 (empty)
101 101 ... 48, #10 (1)
102 102 ... 51, #11 (3)
103 103 ... 74, #12 (23)
104 104 ... 85, #13 (11)
105 105 ... 86, #14 (1)
106 106 ... 91, #15 (5)
107 107 ... ]
108 108 >>> revlog = _testrevlog(data, snapshot=range(16))
109 109
110 110 >>> list(slicechunk(revlog, list(range(16))))
111 111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 112 >>> list(slicechunk(revlog, [0, 15]))
113 113 [[0], [15]]
114 114 >>> list(slicechunk(revlog, [0, 11, 15]))
115 115 [[0], [11], [15]]
116 116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 117 [[0], [11, 13, 15]]
118 118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 119 [[1, 2], [5, 8, 10, 11], [14]]
120 120
121 121 Slicing with a maximum chunk size
122 122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 123 [[0], [11], [13], [15]]
124 124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 125 [[0], [11], [13, 15]]
126 126
127 127 Slicing involving nullrev
128 128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 129 [[-1, 0], [11], [13, 15]]
130 130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 131 [[-1], [13], [15]]
132 132 """
133 133 if targetsize is not None:
134 134 targetsize = max(targetsize, revlog._srmingapsize)
135 135 # targetsize should not be specified when evaluating delta candidates:
136 136 # * targetsize is used to ensure we stay within specification when reading,
137 137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 138 if densityslicing is None:
139 139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 140 for chunk in densityslicing(revs,
141 141 revlog._srdensitythreshold,
142 142 revlog._srmingapsize):
143 143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 144 yield subchunk
145 145
146 146 def _slicechunktosize(revlog, revs, targetsize=None):
147 147 """slice revs to match the target size
148 148
149 149 This is intended to be used on chunk that density slicing selected by that
150 150 are still too large compared to the read garantee of revlog. This might
151 151 happens when "minimal gap size" interrupted the slicing or when chain are
152 152 built in a way that create large blocks next to each other.
153 153
154 154 >>> data = [
155 155 ... 3, #0 (3)
156 156 ... 5, #1 (2)
157 157 ... 6, #2 (1)
158 158 ... 8, #3 (2)
159 159 ... 8, #4 (empty)
160 160 ... 11, #5 (3)
161 161 ... 12, #6 (1)
162 162 ... 13, #7 (1)
163 163 ... 14, #8 (1)
164 164 ... ]
165 165
166 166 == All snapshots cases ==
167 167 >>> revlog = _testrevlog(data, snapshot=range(9))
168 168
169 169 Cases where chunk is already small enough
170 170 >>> list(_slicechunktosize(revlog, [0], 3))
171 171 [[0]]
172 172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
173 173 [[6, 7]]
174 174 >>> list(_slicechunktosize(revlog, [0], None))
175 175 [[0]]
176 176 >>> list(_slicechunktosize(revlog, [6, 7], None))
177 177 [[6, 7]]
178 178
179 179 cases where we need actual slicing
180 180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
181 181 [[0], [1]]
182 182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
183 183 [[1], [3]]
184 184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
185 185 [[1, 2], [3]]
186 186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
187 187 [[3], [5]]
188 188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
189 189 [[3], [5]]
190 190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
191 191 [[5], [6, 7, 8]]
192 192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
193 193 [[0], [1, 2], [3], [5], [6, 7, 8]]
194 194
195 195 Case with too large individual chunk (must return valid chunk)
196 196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
197 197 [[0], [1]]
198 198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
199 199 [[1], [3]]
200 200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
201 201 [[3], [5]]
202 202
203 203 == No Snapshot cases ==
204 204 >>> revlog = _testrevlog(data)
205 205
206 206 Cases where chunk is already small enough
207 207 >>> list(_slicechunktosize(revlog, [0], 3))
208 208 [[0]]
209 209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
210 210 [[6, 7]]
211 211 >>> list(_slicechunktosize(revlog, [0], None))
212 212 [[0]]
213 213 >>> list(_slicechunktosize(revlog, [6, 7], None))
214 214 [[6, 7]]
215 215
216 216 cases where we need actual slicing
217 217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
218 218 [[0], [1]]
219 219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
220 220 [[1], [3]]
221 221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
222 222 [[1], [2, 3]]
223 223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
224 224 [[3], [5]]
225 225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
226 226 [[3], [4, 5]]
227 227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
228 228 [[5], [6, 7, 8]]
229 229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
230 230 [[0], [1, 2], [3], [5], [6, 7, 8]]
231 231
232 232 Case with too large individual chunk (must return valid chunk)
233 233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
234 234 [[0], [1]]
235 235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
236 236 [[1], [3]]
237 237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
238 238 [[3], [5]]
239 239
240 240 == mixed case ==
241 241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
242 242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
243 243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
244 244 """
245 245 assert targetsize is None or 0 <= targetsize
246 246 startdata = revlog.start(revs[0])
247 247 enddata = revlog.end(revs[-1])
248 248 fullspan = enddata - startdata
249 249 if targetsize is None or fullspan <= targetsize:
250 250 yield revs
251 251 return
252 252
253 253 startrevidx = 0
254 254 endrevidx = 1
255 255 iterrevs = enumerate(revs)
256 256 next(iterrevs) # skip first rev.
257 257 # first step: get snapshots out of the way
258 258 for idx, r in iterrevs:
259 259 span = revlog.end(r) - startdata
260 260 snapshot = revlog.issnapshot(r)
261 261 if span <= targetsize and snapshot:
262 262 endrevidx = idx + 1
263 263 else:
264 264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
265 265 if chunk:
266 266 yield chunk
267 267 startrevidx = idx
268 268 startdata = revlog.start(r)
269 269 endrevidx = idx + 1
270 270 if not snapshot:
271 271 break
272 272
273 273 # for the others, we use binary slicing to quickly converge toward valid
274 274 # chunks (otherwise, we might end up looking for start/end of many
275 275 # revisions). This logic is not looking for the perfect slicing point, it
276 276 # focuses on quickly converging toward valid chunks.
277 277 nbitem = len(revs)
278 278 while (enddata - startdata) > targetsize:
279 279 endrevidx = nbitem
280 280 if nbitem - startrevidx <= 1:
281 281 break # protect against individual chunk larger than limit
282 282 localenddata = revlog.end(revs[endrevidx - 1])
283 283 span = localenddata - startdata
284 284 while span > targetsize:
285 285 if endrevidx - startrevidx <= 1:
286 286 break # protect against individual chunk larger than limit
287 287 endrevidx -= (endrevidx - startrevidx) // 2
288 288 localenddata = revlog.end(revs[endrevidx - 1])
289 289 span = localenddata - startdata
290 290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
291 291 if chunk:
292 292 yield chunk
293 293 startrevidx = endrevidx
294 294 startdata = revlog.start(revs[startrevidx])
295 295
296 296 chunk = _trimchunk(revlog, revs, startrevidx)
297 297 if chunk:
298 298 yield chunk
299 299
300 300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
301 301 mingapsize=0):
302 302 """slice revs to reduce the amount of unrelated data to be read from disk.
303 303
304 304 ``revs`` is sliced into groups that should be read in one time.
305 305 Assume that revs are sorted.
306 306
307 307 The initial chunk is sliced until the overall density (payload/chunks-span
308 308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
309 309 skipped.
310 310
311 311 >>> revlog = _testrevlog([
312 312 ... 5, #00 (5)
313 313 ... 10, #01 (5)
314 314 ... 12, #02 (2)
315 315 ... 12, #03 (empty)
316 316 ... 27, #04 (15)
317 317 ... 31, #05 (4)
318 318 ... 31, #06 (empty)
319 319 ... 42, #07 (11)
320 320 ... 47, #08 (5)
321 321 ... 47, #09 (empty)
322 322 ... 48, #10 (1)
323 323 ... 51, #11 (3)
324 324 ... 74, #12 (23)
325 325 ... 85, #13 (11)
326 326 ... 86, #14 (1)
327 327 ... 91, #15 (5)
328 328 ... ])
329 329
330 330 >>> list(_slicechunktodensity(revlog, list(range(16))))
331 331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
332 332 >>> list(_slicechunktodensity(revlog, [0, 15]))
333 333 [[0], [15]]
334 334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
335 335 [[0], [11], [15]]
336 336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
337 337 [[0], [11, 13, 15]]
338 338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
339 339 [[1, 2], [5, 8, 10, 11], [14]]
340 340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
341 341 ... mingapsize=20))
342 342 [[1, 2, 3, 5, 8, 10, 11], [14]]
343 343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
344 344 ... targetdensity=0.95))
345 345 [[1, 2], [5], [8, 10, 11], [14]]
346 346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 347 ... targetdensity=0.95, mingapsize=12))
348 348 [[1, 2], [5, 8, 10, 11], [14]]
349 349 """
350 350 start = revlog.start
351 351 length = revlog.length
352 352
353 353 if len(revs) <= 1:
354 354 yield revs
355 355 return
356 356
357 357 deltachainspan = segmentspan(revlog, revs)
358 358
359 359 if deltachainspan < mingapsize:
360 360 yield revs
361 361 return
362 362
363 363 readdata = deltachainspan
364 364 chainpayload = sum(length(r) for r in revs)
365 365
366 366 if deltachainspan:
367 367 density = chainpayload / float(deltachainspan)
368 368 else:
369 369 density = 1.0
370 370
371 371 if density >= targetdensity:
372 372 yield revs
373 373 return
374 374
375 375 # Store the gaps in a heap to have them sorted by decreasing size
376 376 gaps = []
377 377 prevend = None
378 378 for i, rev in enumerate(revs):
379 379 revstart = start(rev)
380 380 revlen = length(rev)
381 381
382 382 # Skip empty revisions to form larger holes
383 383 if revlen == 0:
384 384 continue
385 385
386 386 if prevend is not None:
387 387 gapsize = revstart - prevend
388 388 # only consider holes that are large enough
389 389 if gapsize > mingapsize:
390 390 gaps.append((gapsize, i))
391 391
392 392 prevend = revstart + revlen
393 393 # sort the gaps to pop them from largest to small
394 394 gaps.sort()
395 395
396 396 # Collect the indices of the largest holes until the density is acceptable
397 397 selected = []
398 398 while gaps and density < targetdensity:
399 399 gapsize, gapidx = gaps.pop()
400 400
401 401 selected.append(gapidx)
402 402
403 403 # the gap sizes are stored as negatives to be sorted decreasingly
404 404 # by the heap
405 405 readdata -= gapsize
406 406 if readdata > 0:
407 407 density = chainpayload / float(readdata)
408 408 else:
409 409 density = 1.0
410 410 selected.sort()
411 411
412 412 # Cut the revs at collected indices
413 413 previdx = 0
414 414 for idx in selected:
415 415
416 416 chunk = _trimchunk(revlog, revs, previdx, idx)
417 417 if chunk:
418 418 yield chunk
419 419
420 420 previdx = idx
421 421
422 422 chunk = _trimchunk(revlog, revs, previdx)
423 423 if chunk:
424 424 yield chunk
425 425
426 426 def _trimchunk(revlog, revs, startidx, endidx=None):
427 427 """returns revs[startidx:endidx] without empty trailing revs
428 428
429 429 Doctest Setup
430 430 >>> revlog = _testrevlog([
431 431 ... 5, #0
432 432 ... 10, #1
433 433 ... 12, #2
434 434 ... 12, #3 (empty)
435 435 ... 17, #4
436 436 ... 21, #5
437 437 ... 21, #6 (empty)
438 438 ... ])
439 439
440 440 Contiguous cases:
441 441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
442 442 [0, 1, 2, 3, 4, 5]
443 443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
444 444 [0, 1, 2, 3, 4]
445 445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
446 446 [0, 1, 2]
447 447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
448 448 [2]
449 449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
450 450 [3, 4, 5]
451 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
452 452 [3, 4]
453 453
454 454 Discontiguous cases:
455 455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
456 456 [1, 3, 5]
457 457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
458 458 [1]
459 459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
460 460 [3, 5]
461 461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
462 462 [3, 5]
463 463 """
464 464 length = revlog.length
465 465
466 466 if endidx is None:
467 467 endidx = len(revs)
468 468
469 469 # If we have a non-emtpy delta candidate, there are nothing to trim
470 470 if revs[endidx - 1] < len(revlog):
471 471 # Trim empty revs at the end, except the very first revision of a chain
472 472 while (endidx > 1
473 473 and endidx > startidx
474 474 and length(revs[endidx - 1]) == 0):
475 475 endidx -= 1
476 476
477 477 return revs[startidx:endidx]
478 478
479 479 def segmentspan(revlog, revs):
480 480 """Get the byte span of a segment of revisions
481 481
482 482 revs is a sorted array of revision numbers
483 483
484 484 >>> revlog = _testrevlog([
485 485 ... 5, #0
486 486 ... 10, #1
487 487 ... 12, #2
488 488 ... 12, #3 (empty)
489 489 ... 17, #4
490 490 ... ])
491 491
492 492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
493 493 17
494 494 >>> segmentspan(revlog, [0, 4])
495 495 17
496 496 >>> segmentspan(revlog, [3, 4])
497 497 5
498 498 >>> segmentspan(revlog, [1, 2, 3,])
499 499 7
500 500 >>> segmentspan(revlog, [1, 3])
501 501 7
502 502 """
503 503 if not revs:
504 504 return 0
505 505 end = revlog.end(revs[-1])
506 506 return end - revlog.start(revs[0])
507 507
508 508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
509 509 """build full text from a (base, delta) pair and other metadata"""
510 510 # special case deltas which replace entire base; no need to decode
511 511 # base revision. this neatly avoids censored bases, which throw when
512 512 # they're decoded.
513 513 hlen = struct.calcsize(">lll")
514 514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
515 515 len(delta) - hlen):
516 516 fulltext = delta[hlen:]
517 517 else:
518 518 # deltabase is rawtext before changed by flag processors, which is
519 519 # equivalent to non-raw text
520 520 basetext = revlog.revision(baserev, _df=fh, raw=False)
521 521 fulltext = mdiff.patch(basetext, delta)
522 522
523 523 try:
524 524 res = revlog._processflags(fulltext, flags, 'read', raw=True)
525 525 fulltext, validatehash = res
526 526 if validatehash:
527 527 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
528 528 if flags & REVIDX_ISCENSORED:
529 529 raise error.StorageError(_('node %s is not censored') %
530 530 expectednode)
531 531 except error.CensoredNodeError:
532 532 # must pass the censored index flag to add censored revisions
533 533 if not flags & REVIDX_ISCENSORED:
534 534 raise
535 535 return fulltext
536 536
537 537 @attr.s(slots=True, frozen=True)
538 538 class _deltainfo(object):
539 539 distance = attr.ib()
540 540 deltalen = attr.ib()
541 541 data = attr.ib()
542 542 base = attr.ib()
543 543 chainbase = attr.ib()
544 544 chainlen = attr.ib()
545 545 compresseddeltalen = attr.ib()
546 546 snapshotdepth = attr.ib()
547 547
548 548 def isgooddeltainfo(revlog, deltainfo, revinfo):
549 549 """Returns True if the given delta is good. Good means that it is within
550 550 the disk span, disk size, and chain length bounds that we know to be
551 551 performant."""
552 552 if deltainfo is None:
553 553 return False
554 554
555 555 # - 'deltainfo.distance' is the distance from the base revision --
556 556 # bounding it limits the amount of I/O we need to do.
557 557 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
558 558 # deltas we need to apply -- bounding it limits the amount of CPU
559 559 # we consume.
560 560
561 561 textlen = revinfo.textlen
562 562 defaultmax = textlen * 4
563 563 maxdist = revlog._maxdeltachainspan
564 564 if not maxdist:
565 565 maxdist = deltainfo.distance # ensure the conditional pass
566 566 maxdist = max(maxdist, defaultmax)
567 567
568 568 # Bad delta from read span:
569 569 #
570 570 # If the span of data read is larger than the maximum allowed.
571 571 #
572 572 # In the sparse-revlog case, we rely on the associated "sparse reading"
573 573 # to avoid issue related to the span of data. In theory, it would be
574 574 # possible to build pathological revlog where delta pattern would lead
575 575 # to too many reads. However, they do not happen in practice at all. So
576 576 # we skip the span check entirely.
577 577 if not revlog._sparserevlog and maxdist < deltainfo.distance:
578 578 return False
579 579
580 580 # Bad delta from new delta size:
581 581 #
582 582 # If the delta size is larger than the target text, storing the
583 583 # delta will be inefficient.
584 584 if textlen < deltainfo.deltalen:
585 585 return False
586 586
587 587 # Bad delta from cumulated payload size:
588 588 #
589 589 # If the sum of delta get larger than K * target text length.
590 590 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
591 591 return False
592 592
593 593 # Bad delta from chain length:
594 594 #
595 595 # If the number of delta in the chain gets too high.
596 596 if (revlog._maxchainlen
597 597 and revlog._maxchainlen < deltainfo.chainlen):
598 598 return False
599 599
600 600 # bad delta from intermediate snapshot size limit
601 601 #
602 602 # If an intermediate snapshot size is higher than the limit. The
603 603 # limit exist to prevent endless chain of intermediate delta to be
604 604 # created.
605 605 if (deltainfo.snapshotdepth is not None and
606 606 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
607 607 return False
608 608
609 609 # bad delta if new intermediate snapshot is larger than the previous
610 610 # snapshot
611 611 if (deltainfo.snapshotdepth
612 612 and revlog.length(deltainfo.base) < deltainfo.deltalen):
613 613 return False
614 614
615 615 return True
616 616
617 617 # If a revision's full text is that much bigger than a base candidate full
618 618 # text's, it is very unlikely that it will produce a valid delta. We no longer
619 619 # consider these candidates.
620 620 LIMIT_BASE2TEXT = 500
621 621
622 622 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
623 623 """Provides group of revision to be tested as delta base
624 624
625 625 This top level function focus on emitting groups with unique and worthwhile
626 626 content. See _raw_candidate_groups for details about the group order.
627 627 """
628 628 # should we try to build a delta?
629 629 if not (len(revlog) and revlog._storedeltachains):
630 630 yield None
631 631 return
632 632
633 633 deltalength = revlog.length
634 634 deltaparent = revlog.deltaparent
635 635 sparse = revlog._sparserevlog
636 636 good = None
637 637
638 638 deltas_limit = textlen * LIMIT_DELTA2TEXT
639 639
640 640 tested = set([nullrev])
641 641 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
642 642 while True:
643 643 temptative = candidates.send(good)
644 644 if temptative is None:
645 645 break
646 646 group = []
647 647 for rev in temptative:
648 648 # skip over empty delta (no need to include them in a chain)
649 649 while (revlog._generaldelta
650 650 and not (rev == nullrev
651 651 or rev in tested
652 652 or deltalength(rev))):
653 653 tested.add(rev)
654 654 rev = deltaparent(rev)
655 655 # no need to try a delta against nullrev, this will be done as a
656 656 # last resort.
657 657 if rev == nullrev:
658 658 continue
659 659 # filter out revision we tested already
660 660 if rev in tested:
661 661 continue
662 662 tested.add(rev)
663 663 # filter out delta base that will never produce good delta
664 664 if deltas_limit < revlog.length(rev):
665 665 continue
666 666 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
667 667 continue
668 668 # no delta for rawtext-changing revs (see "candelta" for why)
669 669 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
670 670 continue
671 671 # If we reach here, we are about to build and test a delta.
672 672 # The delta building process will compute the chaininfo in all
673 673 # case, since that computation is cached, it is fine to access it
674 674 # here too.
675 675 chainlen, chainsize = revlog._chaininfo(rev)
676 676 # if chain will be too long, skip base
677 677 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
678 678 continue
679 679 # if chain already have too much data, skip base
680 680 if deltas_limit < chainsize:
681 681 continue
682 682 group.append(rev)
683 683 if group:
684 684 # XXX: in the sparse revlog case, group can become large,
685 685 # impacting performances. Some bounding or slicing mecanism
686 686 # would help to reduce this impact.
687 687 good = yield tuple(group)
688 688 yield None
689 689
690 690 def _findsnapshots(revlog, cache, start_rev):
691 691 """find snapshot from start_rev to tip"""
692 692 if util.safehasattr(revlog.index, 'findsnapshots'):
693 693 revlog.index.findsnapshots(cache, start_rev)
694 694 else:
695 695 deltaparent = revlog.deltaparent
696 696 issnapshot = revlog.issnapshot
697 697 for rev in revlog.revs(start_rev):
698 698 if issnapshot(rev):
699 699 cache[deltaparent(rev)].append(rev)
700 700
701 701 def _refinedgroups(revlog, p1, p2, cachedelta):
702 702 good = None
703 703 # First we try to reuse a the delta contained in the bundle.
704 704 # (or from the source revlog)
705 705 #
706 706 # This logic only applies to general delta repositories and can be disabled
707 707 # through configuration. Disabling reuse source delta is useful when
708 708 # we want to make sure we recomputed "optimal" deltas.
709 709 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
710 710 # Assume what we received from the server is a good choice
711 711 # build delta will reuse the cache
712 712 good = yield (cachedelta[0],)
713 713 if good is not None:
714 714 yield None
715 715 return
716 716 snapshots = collections.defaultdict(list)
717 717 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
718 718 good = yield candidates
719 719 if good is not None:
720 720 break
721 721
722 722 # If sparse revlog is enabled, we can try to refine the available deltas
723 723 if not revlog._sparserevlog:
724 724 yield None
725 725 return
726 726
727 727 # if we have a refinable value, try to refine it
728 728 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
729 729 # refine snapshot down
730 730 previous = None
731 731 while previous != good:
732 732 previous = good
733 733 base = revlog.deltaparent(good)
734 734 if base == nullrev:
735 735 break
736 736 good = yield (base,)
737 737 # refine snapshot up
738 738 if not snapshots:
739 739 _findsnapshots(revlog, snapshots, good + 1)
740 740 previous = None
741 741 while good != previous:
742 742 previous = good
743 743 children = tuple(sorted(c for c in snapshots[good]))
744 744 good = yield children
745 745
746 746 # we have found nothing
747 747 yield None
748 748
749 749 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
750 750 """Provides group of revision to be tested as delta base
751 751
752 752 This lower level function focus on emitting delta theorically interresting
753 753 without looking it any practical details.
754 754
755 755 The group order aims at providing fast or small candidates first.
756 756 """
757 757 gdelta = revlog._generaldelta
758 sparse = revlog._sparserevlog
758 # gate sparse behind general-delta because of issue6056
759 sparse = gdelta and revlog._sparserevlog
759 760 curr = len(revlog)
760 761 prev = curr - 1
761 762 deltachain = lambda rev: revlog._deltachain(rev)[0]
762 763
763 764 if gdelta:
764 765 # exclude already lazy tested base if any
765 766 parents = [p for p in (p1, p2) if p != nullrev]
766 767
767 768 if not revlog._deltabothparents and len(parents) == 2:
768 769 parents.sort()
769 770 # To minimize the chance of having to build a fulltext,
770 771 # pick first whichever parent is closest to us (max rev)
771 772 yield (parents[1],)
772 773 # then the other one (min rev) if the first did not fit
773 774 yield (parents[0],)
774 775 elif len(parents) > 0:
775 776 # Test all parents (1 or 2), and keep the best candidate
776 777 yield parents
777 778
778 779 if sparse and parents:
779 780 if snapshots is None:
780 781 # map: base-rev: snapshot-rev
781 782 snapshots = collections.defaultdict(list)
782 783 # See if we can use an existing snapshot in the parent chains to use as
783 784 # a base for a new intermediate-snapshot
784 785 #
785 786 # search for snapshot in parents delta chain
786 787 # map: snapshot-level: snapshot-rev
787 788 parents_snaps = collections.defaultdict(set)
788 789 candidate_chains = [deltachain(p) for p in parents]
789 790 for chain in candidate_chains:
790 791 for idx, s in enumerate(chain):
791 792 if not revlog.issnapshot(s):
792 793 break
793 794 parents_snaps[idx].add(s)
794 795 snapfloor = min(parents_snaps[0]) + 1
795 796 _findsnapshots(revlog, snapshots, snapfloor)
796 797 # search for the highest "unrelated" revision
797 798 #
798 799 # Adding snapshots used by "unrelated" revision increase the odd we
799 800 # reuse an independant, yet better snapshot chain.
800 801 #
801 802 # XXX instead of building a set of revisions, we could lazily enumerate
802 803 # over the chains. That would be more efficient, however we stick to
803 804 # simple code for now.
804 805 all_revs = set()
805 806 for chain in candidate_chains:
806 807 all_revs.update(chain)
807 808 other = None
808 809 for r in revlog.revs(prev, snapfloor):
809 810 if r not in all_revs:
810 811 other = r
811 812 break
812 813 if other is not None:
813 814 # To avoid unfair competition, we won't use unrelated intermediate
814 815 # snapshot that are deeper than the ones from the parent delta
815 816 # chain.
816 817 max_depth = max(parents_snaps.keys())
817 818 chain = deltachain(other)
818 819 for idx, s in enumerate(chain):
819 820 if s < snapfloor:
820 821 continue
821 822 if max_depth < idx:
822 823 break
823 824 if not revlog.issnapshot(s):
824 825 break
825 826 parents_snaps[idx].add(s)
826 827 # Test them as possible intermediate snapshot base
827 828 # We test them from highest to lowest level. High level one are more
828 829 # likely to result in small delta
829 830 floor = None
830 831 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
831 832 siblings = set()
832 833 for s in snaps:
833 834 siblings.update(snapshots[s])
834 835 # Before considering making a new intermediate snapshot, we check
835 836 # if an existing snapshot, children of base we consider, would be
836 837 # suitable.
837 838 #
838 839 # It give a change to reuse a delta chain "unrelated" to the
839 840 # current revision instead of starting our own. Without such
840 841 # re-use, topological branches would keep reopening new chains.
841 842 # Creating more and more snapshot as the repository grow.
842 843
843 844 if floor is not None:
844 845 # We only do this for siblings created after the one in our
845 846 # parent's delta chain. Those created before has less chances
846 847 # to be valid base since our ancestors had to create a new
847 848 # snapshot.
848 849 siblings = [r for r in siblings if floor < r]
849 850 yield tuple(sorted(siblings))
850 851 # then test the base from our parent's delta chain.
851 852 yield tuple(sorted(snaps))
852 853 floor = min(snaps)
853 854 # No suitable base found in the parent chain, search if any full
854 855 # snapshots emitted since parent's base would be a suitable base for an
855 856 # intermediate snapshot.
856 857 #
857 858 # It give a chance to reuse a delta chain unrelated to the current
858 859 # revisions instead of starting our own. Without such re-use,
859 860 # topological branches would keep reopening new full chains. Creating
860 861 # more and more snapshot as the repository grow.
861 862 yield tuple(snapshots[nullrev])
862 863
863 864 if not sparse:
864 865 # other approach failed try against prev to hopefully save us a
865 866 # fulltext.
866 867 yield (prev,)
867 868
868 869 class deltacomputer(object):
869 870 def __init__(self, revlog):
870 871 self.revlog = revlog
871 872
872 873 def buildtext(self, revinfo, fh):
873 874 """Builds a fulltext version of a revision
874 875
875 876 revinfo: _revisioninfo instance that contains all needed info
876 877 fh: file handle to either the .i or the .d revlog file,
877 878 depending on whether it is inlined or not
878 879 """
879 880 btext = revinfo.btext
880 881 if btext[0] is not None:
881 882 return btext[0]
882 883
883 884 revlog = self.revlog
884 885 cachedelta = revinfo.cachedelta
885 886 baserev = cachedelta[0]
886 887 delta = cachedelta[1]
887 888
888 889 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
889 890 revinfo.p1, revinfo.p2,
890 891 revinfo.flags, revinfo.node)
891 892 return fulltext
892 893
893 894 def _builddeltadiff(self, base, revinfo, fh):
894 895 revlog = self.revlog
895 896 t = self.buildtext(revinfo, fh)
896 897 if revlog.iscensored(base):
897 898 # deltas based on a censored revision must replace the
898 899 # full content in one patch, so delta works everywhere
899 900 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
900 901 delta = header + t
901 902 else:
902 903 ptext = revlog.revision(base, _df=fh, raw=True)
903 904 delta = mdiff.textdiff(ptext, t)
904 905
905 906 return delta
906 907
907 908 def _builddeltainfo(self, revinfo, base, fh):
908 909 # can we use the cached delta?
909 910 delta = None
910 911 if revinfo.cachedelta:
911 912 cachebase, cachediff = revinfo.cachedelta
912 913 #check if the diff still apply
913 914 currentbase = cachebase
914 915 while (currentbase != nullrev
915 916 and currentbase != base
916 917 and self.revlog.length(currentbase) == 0):
917 918 currentbase = self.revlog.deltaparent(currentbase)
918 919 if currentbase == base:
919 920 delta = revinfo.cachedelta[1]
920 921 if delta is None:
921 922 delta = self._builddeltadiff(base, revinfo, fh)
922 923 revlog = self.revlog
923 924 header, data = revlog.compress(delta)
924 925 deltalen = len(header) + len(data)
925 926 chainbase = revlog.chainbase(base)
926 927 offset = revlog.end(len(revlog) - 1)
927 928 dist = deltalen + offset - revlog.start(chainbase)
928 929 if revlog._generaldelta:
929 930 deltabase = base
930 931 else:
931 932 deltabase = chainbase
932 933 chainlen, compresseddeltalen = revlog._chaininfo(base)
933 934 chainlen += 1
934 935 compresseddeltalen += deltalen
935 936
936 937 revlog = self.revlog
937 938 snapshotdepth = None
938 939 if deltabase == nullrev:
939 940 snapshotdepth = 0
940 941 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
941 942 # A delta chain should always be one full snapshot,
942 943 # zero or more semi-snapshots, and zero or more deltas
943 944 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
944 945 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
945 946 snapshotdepth = len(revlog._deltachain(deltabase)[0])
946 947
947 948 return _deltainfo(dist, deltalen, (header, data), deltabase,
948 949 chainbase, chainlen, compresseddeltalen,
949 950 snapshotdepth)
950 951
951 952 def _fullsnapshotinfo(self, fh, revinfo):
952 953 curr = len(self.revlog)
953 954 rawtext = self.buildtext(revinfo, fh)
954 955 data = self.revlog.compress(rawtext)
955 956 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
956 957 deltabase = chainbase = curr
957 958 snapshotdepth = 0
958 959 chainlen = 1
959 960
960 961 return _deltainfo(dist, deltalen, data, deltabase,
961 962 chainbase, chainlen, compresseddeltalen,
962 963 snapshotdepth)
963 964
964 965 def finddeltainfo(self, revinfo, fh):
965 966 """Find an acceptable delta against a candidate revision
966 967
967 968 revinfo: information about the revision (instance of _revisioninfo)
968 969 fh: file handle to either the .i or the .d revlog file,
969 970 depending on whether it is inlined or not
970 971
971 972 Returns the first acceptable candidate revision, as ordered by
972 973 _candidategroups
973 974
974 975 If no suitable deltabase is found, we return delta info for a full
975 976 snapshot.
976 977 """
977 978 if not revinfo.textlen:
978 979 return self._fullsnapshotinfo(fh, revinfo)
979 980
980 981 # no delta for flag processor revision (see "candelta" for why)
981 982 # not calling candelta since only one revision needs test, also to
982 983 # avoid overhead fetching flags again.
983 984 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
984 985 return self._fullsnapshotinfo(fh, revinfo)
985 986
986 987 cachedelta = revinfo.cachedelta
987 988 p1 = revinfo.p1
988 989 p2 = revinfo.p2
989 990 revlog = self.revlog
990 991
991 992 deltainfo = None
992 993 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
993 994 groups = _candidategroups(self.revlog, revinfo.textlen,
994 995 p1r, p2r, cachedelta)
995 996 candidaterevs = next(groups)
996 997 while candidaterevs is not None:
997 998 nominateddeltas = []
998 999 if deltainfo is not None:
999 1000 # if we already found a good delta,
1000 1001 # challenge it against refined candidates
1001 1002 nominateddeltas.append(deltainfo)
1002 1003 for candidaterev in candidaterevs:
1003 1004 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1004 1005 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1005 1006 nominateddeltas.append(candidatedelta)
1006 1007 if nominateddeltas:
1007 1008 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1008 1009 if deltainfo is not None:
1009 1010 candidaterevs = groups.send(deltainfo.base)
1010 1011 else:
1011 1012 candidaterevs = next(groups)
1012 1013
1013 1014 if deltainfo is None:
1014 1015 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1015 1016 return deltainfo
@@ -1,270 +1,292
1 1 This test tries to exercise the ssh functionality with a dummy script
2 2
3 3 $ checknewrepo()
4 4 > {
5 5 > name=$1
6 6 > if [ -d "$name"/.hg/store ]; then
7 7 > echo store created
8 8 > fi
9 9 > if [ -f "$name"/.hg/00changelog.i ]; then
10 10 > echo 00changelog.i created
11 11 > fi
12 12 > cat "$name"/.hg/requires
13 13 > }
14 14
15 15 creating 'local'
16 16
17 17 $ hg init local
18 18 $ checknewrepo local
19 19 store created
20 20 00changelog.i created
21 21 dotencode
22 22 fncache
23 23 generaldelta
24 24 revlogv1
25 25 sparserevlog
26 26 store
27 27 testonly-simplestore (reposimplestore !)
28 28 $ echo this > local/foo
29 29 $ hg ci --cwd local -A -m "init"
30 30 adding foo
31 31
32 32 test custom revlog chunk cache sizes
33 33
34 34 $ hg --config format.chunkcachesize=0 log -R local -pv
35 35 abort: revlog chunk cache size 0 is not greater than 0!
36 36 [255]
37 37 $ hg --config format.chunkcachesize=1023 log -R local -pv
38 38 abort: revlog chunk cache size 1023 is not a power of 2!
39 39 [255]
40 40 $ hg --config format.chunkcachesize=1024 log -R local -pv
41 41 changeset: 0:08b9e9f63b32
42 42 tag: tip
43 43 user: test
44 44 date: Thu Jan 01 00:00:00 1970 +0000
45 45 files: foo
46 46 description:
47 47 init
48 48
49 49
50 50 diff -r 000000000000 -r 08b9e9f63b32 foo
51 51 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
52 52 +++ b/foo Thu Jan 01 00:00:00 1970 +0000
53 53 @@ -0,0 +1,1 @@
54 54 +this
55 55
56 56
57 57 creating repo with format.usestore=false
58 58
59 59 $ hg --config format.usestore=false init old
60 60 $ checknewrepo old
61 61 generaldelta
62 62 revlogv1
63 63 testonly-simplestore (reposimplestore !)
64 64 sparserevlog
65 65
66 66 creating repo with format.usefncache=false
67 67
68 68 $ hg --config format.usefncache=false init old2
69 69 $ checknewrepo old2
70 70 store created
71 71 00changelog.i created
72 72 generaldelta
73 73 revlogv1
74 74 sparserevlog
75 75 store
76 76 testonly-simplestore (reposimplestore !)
77 77
78 78 creating repo with format.dotencode=false
79 79
80 80 $ hg --config format.dotencode=false init old3
81 81 $ checknewrepo old3
82 82 store created
83 83 00changelog.i created
84 84 fncache
85 85 generaldelta
86 86 revlogv1
87 87 sparserevlog
88 88 store
89 89 testonly-simplestore (reposimplestore !)
90 90
91 91 creating repo with format.dotencode=false
92 92
93 93 $ hg --config format.generaldelta=false --config format.usegeneraldelta=false --config format.sparse-revlog=no init old4
94 94 $ checknewrepo old4
95 95 store created
96 96 00changelog.i created
97 97 dotencode
98 98 fncache
99 99 revlogv1
100 100 store
101 101 testonly-simplestore (reposimplestore !)
102 102
103 103 test failure
104 104
105 105 $ hg init local
106 106 abort: repository local already exists!
107 107 [255]
108 108
109 109 init+push to remote2
110 110
111 111 $ hg init -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote2
112 112 $ hg incoming -R remote2 local
113 113 comparing with local
114 114 changeset: 0:08b9e9f63b32
115 115 tag: tip
116 116 user: test
117 117 date: Thu Jan 01 00:00:00 1970 +0000
118 118 summary: init
119 119
120 120
121 121 $ hg push -R local -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote2
122 122 pushing to ssh://user@dummy/remote2
123 123 searching for changes
124 124 remote: adding changesets
125 125 remote: adding manifests
126 126 remote: adding file changes
127 127 remote: added 1 changesets with 1 changes to 1 files
128 128
129 129 clone to remote1
130 130
131 131 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote1
132 132 searching for changes
133 133 remote: adding changesets
134 134 remote: adding manifests
135 135 remote: adding file changes
136 136 remote: added 1 changesets with 1 changes to 1 files
137 137
138 138 The largefiles extension doesn't crash
139 139 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remotelf --config extensions.largefiles=
140 140 The fsmonitor extension is incompatible with the largefiles extension and has been disabled. (fsmonitor !)
141 141 The fsmonitor extension is incompatible with the largefiles extension and has been disabled. (fsmonitor !)
142 142 searching for changes
143 143 remote: adding changesets
144 144 remote: adding manifests
145 145 remote: adding file changes
146 146 remote: added 1 changesets with 1 changes to 1 files
147 147
148 148 init to existing repo
149 149
150 150 $ hg init -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote1
151 151 abort: repository remote1 already exists!
152 152 abort: could not create remote repo!
153 153 [255]
154 154
155 155 clone to existing repo
156 156
157 157 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote1
158 158 abort: repository remote1 already exists!
159 159 abort: could not create remote repo!
160 160 [255]
161 161
162 162 output of dummyssh
163 163
164 164 $ cat dummylog
165 165 Got arguments 1:user@dummy 2:hg init remote2
166 166 Got arguments 1:user@dummy 2:hg -R remote2 serve --stdio
167 167 Got arguments 1:user@dummy 2:hg -R remote2 serve --stdio
168 168 Got arguments 1:user@dummy 2:hg init remote1
169 169 Got arguments 1:user@dummy 2:hg -R remote1 serve --stdio
170 170 Got arguments 1:user@dummy 2:hg init remotelf
171 171 Got arguments 1:user@dummy 2:hg -R remotelf serve --stdio
172 172 Got arguments 1:user@dummy 2:hg init remote1
173 173 Got arguments 1:user@dummy 2:hg init remote1
174 174
175 175 comparing repositories
176 176
177 177 $ hg tip -q -R local
178 178 0:08b9e9f63b32
179 179 $ hg tip -q -R remote1
180 180 0:08b9e9f63b32
181 181 $ hg tip -q -R remote2
182 182 0:08b9e9f63b32
183 183
184 184 check names for repositories (clashes with URL schemes, special chars)
185 185
186 186 $ for i in bundle file hg http https old-http ssh static-http "with space"; do
187 187 > printf "hg init \"$i\"... "
188 188 > hg init "$i"
189 189 > test -d "$i" -a -d "$i/.hg" && echo "ok" || echo "failed"
190 190 > done
191 191 hg init "bundle"... ok
192 192 hg init "file"... ok
193 193 hg init "hg"... ok
194 194 hg init "http"... ok
195 195 hg init "https"... ok
196 196 hg init "old-http"... ok
197 197 hg init "ssh"... ok
198 198 hg init "static-http"... ok
199 199 hg init "with space"... ok
200 200 #if eol-in-paths
201 201 /* " " is not a valid name for a directory on Windows */
202 202 $ hg init " "
203 203 $ test -d " "
204 204 $ test -d " /.hg"
205 205 #endif
206 206
207 207 creating 'local/sub/repo'
208 208
209 209 $ hg init local/sub/repo
210 210 $ checknewrepo local/sub/repo
211 211 store created
212 212 00changelog.i created
213 213 dotencode
214 214 fncache
215 215 generaldelta
216 216 revlogv1
217 217 sparserevlog
218 218 store
219 219 testonly-simplestore (reposimplestore !)
220 220
221 221 prepare test of init of url configured from paths
222 222
223 223 $ echo '[paths]' >> $HGRCPATH
224 224 $ echo "somewhere = `pwd`/url from paths" >> $HGRCPATH
225 225 $ echo "elsewhere = `pwd`/another paths url" >> $HGRCPATH
226 226
227 227 init should (for consistency with clone) expand the url
228 228
229 229 $ hg init somewhere
230 230 $ checknewrepo "url from paths"
231 231 store created
232 232 00changelog.i created
233 233 dotencode
234 234 fncache
235 235 generaldelta
236 236 revlogv1
237 237 sparserevlog
238 238 store
239 239 testonly-simplestore (reposimplestore !)
240 240
241 241 verify that clone also expand urls
242 242
243 243 $ hg clone somewhere elsewhere
244 244 updating to branch default
245 245 0 files updated, 0 files merged, 0 files removed, 0 files unresolved
246 246 $ checknewrepo "another paths url"
247 247 store created
248 248 00changelog.i created
249 249 dotencode
250 250 fncache
251 251 generaldelta
252 252 revlogv1
253 253 sparserevlog
254 254 store
255 255 testonly-simplestore (reposimplestore !)
256 256
257 257 clone bookmarks
258 258
259 259 $ hg -R local bookmark test
260 260 $ hg -R local bookmarks
261 261 * test 0:08b9e9f63b32
262 262 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote-bookmarks
263 263 searching for changes
264 264 remote: adding changesets
265 265 remote: adding manifests
266 266 remote: adding file changes
267 267 remote: added 1 changesets with 1 changes to 1 files
268 268 exporting bookmark test
269 269 $ hg -R remote-bookmarks bookmarks
270 270 test 0:08b9e9f63b32
271
272 Check format constraint
273 -----------------------
274
275 $ hg init issue6056 --config format.usegeneraldelta=0 --config format.sparse-revlog=0
276 $ cd issue6056
277 $ echo foo > 1
278 $ echo foo > 2
279 $ echo foo > 3
280 $ echo foo > 4
281 $ echo foo > 5
282 $ hg add *
283
284 Build a bogus repository (sparserevlog without general delta)
285
286 $ hg commit -m 'initial changesets'
287 $ echo 'sparserevlog' >> .hg/requires
288 $ for x in `$TESTDIR/seq.py 100`; do
289 > echo $x >> `expr $x % 5 + 1`
290 > hg commit -m $x
291 > done
292 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now