##// END OF EJS Templates
revlogdeltas: move special cases around raw revisions in finddeltainfo...
Boris Feld -
r39368:6f4b8f60 default
parent child Browse files
Show More
@@ -1,2489 +1,2483 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import os
21 21 import re
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullhex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVIDX_DEFAULT_FLAGS,
42 42 REVIDX_ELLIPSIS,
43 43 REVIDX_EXTSTORED,
44 44 REVIDX_FLAGS_ORDER,
45 45 REVIDX_ISCENSORED,
46 46 REVIDX_KNOWN_FLAGS,
47 47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 )
57 57 from .thirdparty import (
58 58 attr,
59 59 )
60 60 from . import (
61 61 ancestor,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 stringutil,
76 76 )
77 77
78 78 # blanked usage of all the name to prevent pyflakes constraints
79 79 # We need these name available in the module for extensions.
80 80 REVLOGV0
81 81 REVLOGV1
82 82 REVLOGV2
83 83 FLAG_INLINE_DATA
84 84 FLAG_GENERALDELTA
85 85 REVLOG_DEFAULT_FLAGS
86 86 REVLOG_DEFAULT_FORMAT
87 87 REVLOG_DEFAULT_VERSION
88 88 REVLOGV1_FLAGS
89 89 REVLOGV2_FLAGS
90 90 REVIDX_ISCENSORED
91 91 REVIDX_ELLIPSIS
92 92 REVIDX_EXTSTORED
93 93 REVIDX_DEFAULT_FLAGS
94 94 REVIDX_FLAGS_ORDER
95 95 REVIDX_KNOWN_FLAGS
96 96 REVIDX_RAWTEXT_CHANGING_FLAGS
97 97
98 98 parsers = policy.importmod(r'parsers')
99 99
100 100 # Aliased for performance.
101 101 _zlibdecompress = zlib.decompress
102 102
103 103 # max size of revlog with inline data
104 104 _maxinline = 131072
105 105 _chunksize = 1048576
106 106
107 107 RevlogError = error.RevlogError
108 108 LookupError = error.LookupError
109 109 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
110 110 CensoredNodeError = error.CensoredNodeError
111 111 ProgrammingError = error.ProgrammingError
112 112
113 113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 114 _flagprocessors = {
115 115 REVIDX_ISCENSORED: None,
116 116 }
117 117
118 118 _mdre = re.compile('\1\n')
119 119 def parsemeta(text):
120 120 """return (metadatadict, metadatasize)"""
121 121 # text can be buffer, so we can't use .startswith or .index
122 122 if text[:2] != '\1\n':
123 123 return None, None
124 124 s = _mdre.search(text, 2).start()
125 125 mtext = text[2:s]
126 126 meta = {}
127 127 for l in mtext.splitlines():
128 128 k, v = l.split(": ", 1)
129 129 meta[k] = v
130 130 return meta, (s + 2)
131 131
132 132 def packmeta(meta, text):
133 133 keys = sorted(meta)
134 134 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
135 135 return "\1\n%s\1\n%s" % (metatext, text)
136 136
137 137 def _censoredtext(text):
138 138 m, offs = parsemeta(text)
139 139 return m and "censored" in m
140 140
141 141 def addflagprocessor(flag, processor):
142 142 """Register a flag processor on a revision data flag.
143 143
144 144 Invariant:
145 145 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
146 146 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
147 147 - Only one flag processor can be registered on a specific flag.
148 148 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
149 149 following signatures:
150 150 - (read) f(self, rawtext) -> text, bool
151 151 - (write) f(self, text) -> rawtext, bool
152 152 - (raw) f(self, rawtext) -> bool
153 153 "text" is presented to the user. "rawtext" is stored in revlog data, not
154 154 directly visible to the user.
155 155 The boolean returned by these transforms is used to determine whether
156 156 the returned text can be used for hash integrity checking. For example,
157 157 if "write" returns False, then "text" is used to generate hash. If
158 158 "write" returns True, that basically means "rawtext" returned by "write"
159 159 should be used to generate hash. Usually, "write" and "read" return
160 160 different booleans. And "raw" returns a same boolean as "write".
161 161
162 162 Note: The 'raw' transform is used for changegroup generation and in some
163 163 debug commands. In this case the transform only indicates whether the
164 164 contents can be used for hash integrity checks.
165 165 """
166 166 if not flag & REVIDX_KNOWN_FLAGS:
167 167 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
168 168 raise ProgrammingError(msg)
169 169 if flag not in REVIDX_FLAGS_ORDER:
170 170 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
171 171 raise ProgrammingError(msg)
172 172 if flag in _flagprocessors:
173 173 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
174 174 raise error.Abort(msg)
175 175 _flagprocessors[flag] = processor
176 176
177 177 def getoffset(q):
178 178 return int(q >> 16)
179 179
180 180 def gettype(q):
181 181 return int(q & 0xFFFF)
182 182
183 183 def offset_type(offset, type):
184 184 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
185 185 raise ValueError('unknown revlog index flags')
186 186 return int(int(offset) << 16 | type)
187 187
188 188 _nullhash = hashlib.sha1(nullid)
189 189
190 190 def hash(text, p1, p2):
191 191 """generate a hash from the given text and its parent hashes
192 192
193 193 This hash combines both the current file contents and its history
194 194 in a manner that makes it easy to distinguish nodes with the same
195 195 content in the revision graph.
196 196 """
197 197 # As of now, if one of the parent node is null, p2 is null
198 198 if p2 == nullid:
199 199 # deep copy of a hash is faster than creating one
200 200 s = _nullhash.copy()
201 201 s.update(p1)
202 202 else:
203 203 # none of the parent nodes are nullid
204 204 if p1 < p2:
205 205 a = p1
206 206 b = p2
207 207 else:
208 208 a = p2
209 209 b = p1
210 210 s = hashlib.sha1(a)
211 211 s.update(b)
212 212 s.update(text)
213 213 return s.digest()
214 214
215 215 @attr.s(slots=True, frozen=True)
216 216 class _revisioninfo(object):
217 217 """Information about a revision that allows building its fulltext
218 218 node: expected hash of the revision
219 219 p1, p2: parent revs of the revision
220 220 btext: built text cache consisting of a one-element list
221 221 cachedelta: (baserev, uncompressed_delta) or None
222 222 flags: flags associated to the revision storage
223 223
224 224 One of btext[0] or cachedelta must be set.
225 225 """
226 226 node = attr.ib()
227 227 p1 = attr.ib()
228 228 p2 = attr.ib()
229 229 btext = attr.ib()
230 230 textlen = attr.ib()
231 231 cachedelta = attr.ib()
232 232 flags = attr.ib()
233 233
234 234 @interfaceutil.implementer(repository.irevisiondelta)
235 235 @attr.s(slots=True, frozen=True)
236 236 class revlogrevisiondelta(object):
237 237 node = attr.ib()
238 238 p1node = attr.ib()
239 239 p2node = attr.ib()
240 240 basenode = attr.ib()
241 241 linknode = attr.ib()
242 242 flags = attr.ib()
243 243 baserevisionsize = attr.ib()
244 244 revision = attr.ib()
245 245 delta = attr.ib()
246 246
247 247 # index v0:
248 248 # 4 bytes: offset
249 249 # 4 bytes: compressed length
250 250 # 4 bytes: base rev
251 251 # 4 bytes: link rev
252 252 # 20 bytes: parent 1 nodeid
253 253 # 20 bytes: parent 2 nodeid
254 254 # 20 bytes: nodeid
255 255 indexformatv0 = struct.Struct(">4l20s20s20s")
256 256 indexformatv0_pack = indexformatv0.pack
257 257 indexformatv0_unpack = indexformatv0.unpack
258 258
259 259 class revlogoldindex(list):
260 260 def __getitem__(self, i):
261 261 if i == -1:
262 262 return (0, 0, 0, -1, -1, -1, -1, nullid)
263 263 return list.__getitem__(self, i)
264 264
265 265 class revlogoldio(object):
266 266 def __init__(self):
267 267 self.size = indexformatv0.size
268 268
269 269 def parseindex(self, data, inline):
270 270 s = self.size
271 271 index = []
272 272 nodemap = {nullid: nullrev}
273 273 n = off = 0
274 274 l = len(data)
275 275 while off + s <= l:
276 276 cur = data[off:off + s]
277 277 off += s
278 278 e = indexformatv0_unpack(cur)
279 279 # transform to revlogv1 format
280 280 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
281 281 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
282 282 index.append(e2)
283 283 nodemap[e[6]] = n
284 284 n += 1
285 285
286 286 return revlogoldindex(index), nodemap, None
287 287
288 288 def packentry(self, entry, node, version, rev):
289 289 if gettype(entry[0]):
290 290 raise RevlogError(_('index entry flags need revlog version 1'))
291 291 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
292 292 node(entry[5]), node(entry[6]), entry[7])
293 293 return indexformatv0_pack(*e2)
294 294
295 295 # index ng:
296 296 # 6 bytes: offset
297 297 # 2 bytes: flags
298 298 # 4 bytes: compressed length
299 299 # 4 bytes: uncompressed length
300 300 # 4 bytes: base rev
301 301 # 4 bytes: link rev
302 302 # 4 bytes: parent 1 rev
303 303 # 4 bytes: parent 2 rev
304 304 # 32 bytes: nodeid
305 305 indexformatng = struct.Struct(">Qiiiiii20s12x")
306 306 indexformatng_pack = indexformatng.pack
307 307 versionformat = struct.Struct(">I")
308 308 versionformat_pack = versionformat.pack
309 309 versionformat_unpack = versionformat.unpack
310 310
311 311 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
312 312 # signed integer)
313 313 _maxentrysize = 0x7fffffff
314 314
315 315 class revlogio(object):
316 316 def __init__(self):
317 317 self.size = indexformatng.size
318 318
319 319 def parseindex(self, data, inline):
320 320 # call the C implementation to parse the index data
321 321 index, cache = parsers.parse_index2(data, inline)
322 322 return index, getattr(index, 'nodemap', None), cache
323 323
324 324 def packentry(self, entry, node, version, rev):
325 325 p = indexformatng_pack(*entry)
326 326 if rev == 0:
327 327 p = versionformat_pack(version) + p[4:]
328 328 return p
329 329
330 330 class revlog(object):
331 331 """
332 332 the underlying revision storage object
333 333
334 334 A revlog consists of two parts, an index and the revision data.
335 335
336 336 The index is a file with a fixed record size containing
337 337 information on each revision, including its nodeid (hash), the
338 338 nodeids of its parents, the position and offset of its data within
339 339 the data file, and the revision it's based on. Finally, each entry
340 340 contains a linkrev entry that can serve as a pointer to external
341 341 data.
342 342
343 343 The revision data itself is a linear collection of data chunks.
344 344 Each chunk represents a revision and is usually represented as a
345 345 delta against the previous chunk. To bound lookup time, runs of
346 346 deltas are limited to about 2 times the length of the original
347 347 version data. This makes retrieval of a version proportional to
348 348 its size, or O(1) relative to the number of revisions.
349 349
350 350 Both pieces of the revlog are written to in an append-only
351 351 fashion, which means we never need to rewrite a file to insert or
352 352 remove data, and can use some simple techniques to avoid the need
353 353 for locking while reading.
354 354
355 355 If checkambig, indexfile is opened with checkambig=True at
356 356 writing, to avoid file stat ambiguity.
357 357
358 358 If mmaplargeindex is True, and an mmapindexthreshold is set, the
359 359 index will be mmapped rather than read if it is larger than the
360 360 configured threshold.
361 361
362 362 If censorable is True, the revlog can have censored revisions.
363 363 """
364 364 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
365 365 mmaplargeindex=False, censorable=False):
366 366 """
367 367 create a revlog object
368 368
369 369 opener is a function that abstracts the file opening operation
370 370 and can be used to implement COW semantics or the like.
371 371 """
372 372 self.indexfile = indexfile
373 373 self.datafile = datafile or (indexfile[:-2] + ".d")
374 374 self.opener = opener
375 375 # When True, indexfile is opened with checkambig=True at writing, to
376 376 # avoid file stat ambiguity.
377 377 self._checkambig = checkambig
378 378 self._censorable = censorable
379 379 # 3-tuple of (node, rev, text) for a raw revision.
380 380 self._cache = None
381 381 # Maps rev to chain base rev.
382 382 self._chainbasecache = util.lrucachedict(100)
383 383 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
384 384 self._chunkcache = (0, '')
385 385 # How much data to read and cache into the raw revlog data cache.
386 386 self._chunkcachesize = 65536
387 387 self._maxchainlen = None
388 388 self._deltabothparents = True
389 389 self.index = []
390 390 # Mapping of partial identifiers to full nodes.
391 391 self._pcache = {}
392 392 # Mapping of revision integer to full node.
393 393 self._nodecache = {nullid: nullrev}
394 394 self._nodepos = None
395 395 self._compengine = 'zlib'
396 396 self._maxdeltachainspan = -1
397 397 self._withsparseread = False
398 398 self._sparserevlog = False
399 399 self._srdensitythreshold = 0.50
400 400 self._srmingapsize = 262144
401 401
402 402 mmapindexthreshold = None
403 403 v = REVLOG_DEFAULT_VERSION
404 404 opts = getattr(opener, 'options', None)
405 405 if opts is not None:
406 406 if 'revlogv2' in opts:
407 407 # version 2 revlogs always use generaldelta.
408 408 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
409 409 elif 'revlogv1' in opts:
410 410 if 'generaldelta' in opts:
411 411 v |= FLAG_GENERALDELTA
412 412 else:
413 413 v = 0
414 414 if 'chunkcachesize' in opts:
415 415 self._chunkcachesize = opts['chunkcachesize']
416 416 if 'maxchainlen' in opts:
417 417 self._maxchainlen = opts['maxchainlen']
418 418 if 'deltabothparents' in opts:
419 419 self._deltabothparents = opts['deltabothparents']
420 420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
421 421 if 'compengine' in opts:
422 422 self._compengine = opts['compengine']
423 423 if 'maxdeltachainspan' in opts:
424 424 self._maxdeltachainspan = opts['maxdeltachainspan']
425 425 if mmaplargeindex and 'mmapindexthreshold' in opts:
426 426 mmapindexthreshold = opts['mmapindexthreshold']
427 427 self._sparserevlog = bool(opts.get('sparse-revlog', False))
428 428 withsparseread = bool(opts.get('with-sparse-read', False))
429 429 # sparse-revlog forces sparse-read
430 430 self._withsparseread = self._sparserevlog or withsparseread
431 431 if 'sparse-read-density-threshold' in opts:
432 432 self._srdensitythreshold = opts['sparse-read-density-threshold']
433 433 if 'sparse-read-min-gap-size' in opts:
434 434 self._srmingapsize = opts['sparse-read-min-gap-size']
435 435
436 436 if self._chunkcachesize <= 0:
437 437 raise RevlogError(_('revlog chunk cache size %r is not greater '
438 438 'than 0') % self._chunkcachesize)
439 439 elif self._chunkcachesize & (self._chunkcachesize - 1):
440 440 raise RevlogError(_('revlog chunk cache size %r is not a power '
441 441 'of 2') % self._chunkcachesize)
442 442
443 443 indexdata = ''
444 444 self._initempty = True
445 445 try:
446 446 with self._indexfp() as f:
447 447 if (mmapindexthreshold is not None and
448 448 self.opener.fstat(f).st_size >= mmapindexthreshold):
449 449 indexdata = util.buffer(util.mmapread(f))
450 450 else:
451 451 indexdata = f.read()
452 452 if len(indexdata) > 0:
453 453 v = versionformat_unpack(indexdata[:4])[0]
454 454 self._initempty = False
455 455 except IOError as inst:
456 456 if inst.errno != errno.ENOENT:
457 457 raise
458 458
459 459 self.version = v
460 460 self._inline = v & FLAG_INLINE_DATA
461 461 self._generaldelta = v & FLAG_GENERALDELTA
462 462 flags = v & ~0xFFFF
463 463 fmt = v & 0xFFFF
464 464 if fmt == REVLOGV0:
465 465 if flags:
466 466 raise RevlogError(_('unknown flags (%#04x) in version %d '
467 467 'revlog %s') %
468 468 (flags >> 16, fmt, self.indexfile))
469 469 elif fmt == REVLOGV1:
470 470 if flags & ~REVLOGV1_FLAGS:
471 471 raise RevlogError(_('unknown flags (%#04x) in version %d '
472 472 'revlog %s') %
473 473 (flags >> 16, fmt, self.indexfile))
474 474 elif fmt == REVLOGV2:
475 475 if flags & ~REVLOGV2_FLAGS:
476 476 raise RevlogError(_('unknown flags (%#04x) in version %d '
477 477 'revlog %s') %
478 478 (flags >> 16, fmt, self.indexfile))
479 479 else:
480 480 raise RevlogError(_('unknown version (%d) in revlog %s') %
481 481 (fmt, self.indexfile))
482 482
483 483 self._storedeltachains = True
484 484
485 485 self._io = revlogio()
486 486 if self.version == REVLOGV0:
487 487 self._io = revlogoldio()
488 488 try:
489 489 d = self._io.parseindex(indexdata, self._inline)
490 490 except (ValueError, IndexError):
491 491 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
492 492 self.index, nodemap, self._chunkcache = d
493 493 if nodemap is not None:
494 494 self.nodemap = self._nodecache = nodemap
495 495 if not self._chunkcache:
496 496 self._chunkclear()
497 497 # revnum -> (chain-length, sum-delta-length)
498 498 self._chaininfocache = {}
499 499 # revlog header -> revlog compressor
500 500 self._decompressors = {}
501 501
502 502 @util.propertycache
503 503 def _compressor(self):
504 504 return util.compengines[self._compengine].revlogcompressor()
505 505
506 506 def _indexfp(self, mode='r'):
507 507 """file object for the revlog's index file"""
508 508 args = {r'mode': mode}
509 509 if mode != 'r':
510 510 args[r'checkambig'] = self._checkambig
511 511 if mode == 'w':
512 512 args[r'atomictemp'] = True
513 513 return self.opener(self.indexfile, **args)
514 514
515 515 def _datafp(self, mode='r'):
516 516 """file object for the revlog's data file"""
517 517 return self.opener(self.datafile, mode=mode)
518 518
519 519 @contextlib.contextmanager
520 520 def _datareadfp(self, existingfp=None):
521 521 """file object suitable to read data"""
522 522 if existingfp is not None:
523 523 yield existingfp
524 524 else:
525 525 if self._inline:
526 526 func = self._indexfp
527 527 else:
528 528 func = self._datafp
529 529 with func() as fp:
530 530 yield fp
531 531
532 532 def tip(self):
533 533 return self.node(len(self.index) - 1)
534 534 def __contains__(self, rev):
535 535 return 0 <= rev < len(self)
536 536 def __len__(self):
537 537 return len(self.index)
538 538 def __iter__(self):
539 539 return iter(pycompat.xrange(len(self)))
540 540 def revs(self, start=0, stop=None):
541 541 """iterate over all rev in this revlog (from start to stop)"""
542 542 step = 1
543 543 length = len(self)
544 544 if stop is not None:
545 545 if start > stop:
546 546 step = -1
547 547 stop += step
548 548 if stop > length:
549 549 stop = length
550 550 else:
551 551 stop = length
552 552 return pycompat.xrange(start, stop, step)
553 553
554 554 @util.propertycache
555 555 def nodemap(self):
556 556 if self.index:
557 557 # populate mapping down to the initial node
558 558 node0 = self.index[0][7] # get around changelog filtering
559 559 self.rev(node0)
560 560 return self._nodecache
561 561
562 562 def hasnode(self, node):
563 563 try:
564 564 self.rev(node)
565 565 return True
566 566 except KeyError:
567 567 return False
568 568
569 569 def candelta(self, baserev, rev):
570 570 """whether two revisions (baserev, rev) can be delta-ed or not"""
571 571 # Disable delta if either rev requires a content-changing flag
572 572 # processor (ex. LFS). This is because such flag processor can alter
573 573 # the rawtext content that the delta will be based on, and two clients
574 574 # could have a same revlog node with different flags (i.e. different
575 575 # rawtext contents) and the delta could be incompatible.
576 576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
577 577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
578 578 return False
579 579 return True
580 580
581 581 def clearcaches(self):
582 582 self._cache = None
583 583 self._chainbasecache.clear()
584 584 self._chunkcache = (0, '')
585 585 self._pcache = {}
586 586
587 587 try:
588 588 self._nodecache.clearcaches()
589 589 except AttributeError:
590 590 self._nodecache = {nullid: nullrev}
591 591 self._nodepos = None
592 592
593 593 def rev(self, node):
594 594 try:
595 595 return self._nodecache[node]
596 596 except TypeError:
597 597 raise
598 598 except RevlogError:
599 599 # parsers.c radix tree lookup failed
600 600 if node == wdirid or node in wdirfilenodeids:
601 601 raise error.WdirUnsupported
602 602 raise LookupError(node, self.indexfile, _('no node'))
603 603 except KeyError:
604 604 # pure python cache lookup failed
605 605 n = self._nodecache
606 606 i = self.index
607 607 p = self._nodepos
608 608 if p is None:
609 609 p = len(i) - 1
610 610 else:
611 611 assert p < len(i)
612 612 for r in pycompat.xrange(p, -1, -1):
613 613 v = i[r][7]
614 614 n[v] = r
615 615 if v == node:
616 616 self._nodepos = r - 1
617 617 return r
618 618 if node == wdirid or node in wdirfilenodeids:
619 619 raise error.WdirUnsupported
620 620 raise LookupError(node, self.indexfile, _('no node'))
621 621
622 622 # Accessors for index entries.
623 623
624 624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 625 # are flags.
626 626 def start(self, rev):
627 627 return int(self.index[rev][0] >> 16)
628 628
629 629 def flags(self, rev):
630 630 return self.index[rev][0] & 0xFFFF
631 631
632 632 def length(self, rev):
633 633 return self.index[rev][1]
634 634
635 635 def rawsize(self, rev):
636 636 """return the length of the uncompressed text for a given revision"""
637 637 l = self.index[rev][2]
638 638 if l >= 0:
639 639 return l
640 640
641 641 t = self.revision(rev, raw=True)
642 642 return len(t)
643 643
644 644 def size(self, rev):
645 645 """length of non-raw text (processed by a "read" flag processor)"""
646 646 # fast path: if no "read" flag processor could change the content,
647 647 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 648 flags = self.flags(rev)
649 649 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 650 return self.rawsize(rev)
651 651
652 652 return len(self.revision(rev, raw=False))
653 653
654 654 def chainbase(self, rev):
655 655 base = self._chainbasecache.get(rev)
656 656 if base is not None:
657 657 return base
658 658
659 659 index = self.index
660 660 iterrev = rev
661 661 base = index[iterrev][3]
662 662 while base != iterrev:
663 663 iterrev = base
664 664 base = index[iterrev][3]
665 665
666 666 self._chainbasecache[rev] = base
667 667 return base
668 668
669 669 def linkrev(self, rev):
670 670 return self.index[rev][4]
671 671
672 672 def parentrevs(self, rev):
673 673 try:
674 674 entry = self.index[rev]
675 675 except IndexError:
676 676 if rev == wdirrev:
677 677 raise error.WdirUnsupported
678 678 raise
679 679
680 680 return entry[5], entry[6]
681 681
682 682 def node(self, rev):
683 683 try:
684 684 return self.index[rev][7]
685 685 except IndexError:
686 686 if rev == wdirrev:
687 687 raise error.WdirUnsupported
688 688 raise
689 689
690 690 # Derived from index values.
691 691
692 692 def end(self, rev):
693 693 return self.start(rev) + self.length(rev)
694 694
695 695 def parents(self, node):
696 696 i = self.index
697 697 d = i[self.rev(node)]
698 698 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
699 699
700 700 def chainlen(self, rev):
701 701 return self._chaininfo(rev)[0]
702 702
703 703 def _chaininfo(self, rev):
704 704 chaininfocache = self._chaininfocache
705 705 if rev in chaininfocache:
706 706 return chaininfocache[rev]
707 707 index = self.index
708 708 generaldelta = self._generaldelta
709 709 iterrev = rev
710 710 e = index[iterrev]
711 711 clen = 0
712 712 compresseddeltalen = 0
713 713 while iterrev != e[3]:
714 714 clen += 1
715 715 compresseddeltalen += e[1]
716 716 if generaldelta:
717 717 iterrev = e[3]
718 718 else:
719 719 iterrev -= 1
720 720 if iterrev in chaininfocache:
721 721 t = chaininfocache[iterrev]
722 722 clen += t[0]
723 723 compresseddeltalen += t[1]
724 724 break
725 725 e = index[iterrev]
726 726 else:
727 727 # Add text length of base since decompressing that also takes
728 728 # work. For cache hits the length is already included.
729 729 compresseddeltalen += e[1]
730 730 r = (clen, compresseddeltalen)
731 731 chaininfocache[rev] = r
732 732 return r
733 733
734 734 def _deltachain(self, rev, stoprev=None):
735 735 """Obtain the delta chain for a revision.
736 736
737 737 ``stoprev`` specifies a revision to stop at. If not specified, we
738 738 stop at the base of the chain.
739 739
740 740 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
741 741 revs in ascending order and ``stopped`` is a bool indicating whether
742 742 ``stoprev`` was hit.
743 743 """
744 744 # Try C implementation.
745 745 try:
746 746 return self.index.deltachain(rev, stoprev, self._generaldelta)
747 747 except AttributeError:
748 748 pass
749 749
750 750 chain = []
751 751
752 752 # Alias to prevent attribute lookup in tight loop.
753 753 index = self.index
754 754 generaldelta = self._generaldelta
755 755
756 756 iterrev = rev
757 757 e = index[iterrev]
758 758 while iterrev != e[3] and iterrev != stoprev:
759 759 chain.append(iterrev)
760 760 if generaldelta:
761 761 iterrev = e[3]
762 762 else:
763 763 iterrev -= 1
764 764 e = index[iterrev]
765 765
766 766 if iterrev == stoprev:
767 767 stopped = True
768 768 else:
769 769 chain.append(iterrev)
770 770 stopped = False
771 771
772 772 chain.reverse()
773 773 return chain, stopped
774 774
775 775 def ancestors(self, revs, stoprev=0, inclusive=False):
776 776 """Generate the ancestors of 'revs' in reverse topological order.
777 777 Does not generate revs lower than stoprev.
778 778
779 779 See the documentation for ancestor.lazyancestors for more details."""
780 780
781 781 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
782 782 inclusive=inclusive)
783 783
784 784 def descendants(self, revs):
785 785 """Generate the descendants of 'revs' in revision order.
786 786
787 787 Yield a sequence of revision numbers starting with a child of
788 788 some rev in revs, i.e., each revision is *not* considered a
789 789 descendant of itself. Results are ordered by revision number (a
790 790 topological sort)."""
791 791 first = min(revs)
792 792 if first == nullrev:
793 793 for i in self:
794 794 yield i
795 795 return
796 796
797 797 seen = set(revs)
798 798 for i in self.revs(start=first + 1):
799 799 for x in self.parentrevs(i):
800 800 if x != nullrev and x in seen:
801 801 seen.add(i)
802 802 yield i
803 803 break
804 804
805 805 def findcommonmissing(self, common=None, heads=None):
806 806 """Return a tuple of the ancestors of common and the ancestors of heads
807 807 that are not ancestors of common. In revset terminology, we return the
808 808 tuple:
809 809
810 810 ::common, (::heads) - (::common)
811 811
812 812 The list is sorted by revision number, meaning it is
813 813 topologically sorted.
814 814
815 815 'heads' and 'common' are both lists of node IDs. If heads is
816 816 not supplied, uses all of the revlog's heads. If common is not
817 817 supplied, uses nullid."""
818 818 if common is None:
819 819 common = [nullid]
820 820 if heads is None:
821 821 heads = self.heads()
822 822
823 823 common = [self.rev(n) for n in common]
824 824 heads = [self.rev(n) for n in heads]
825 825
826 826 # we want the ancestors, but inclusive
827 827 class lazyset(object):
828 828 def __init__(self, lazyvalues):
829 829 self.addedvalues = set()
830 830 self.lazyvalues = lazyvalues
831 831
832 832 def __contains__(self, value):
833 833 return value in self.addedvalues or value in self.lazyvalues
834 834
835 835 def __iter__(self):
836 836 added = self.addedvalues
837 837 for r in added:
838 838 yield r
839 839 for r in self.lazyvalues:
840 840 if not r in added:
841 841 yield r
842 842
843 843 def add(self, value):
844 844 self.addedvalues.add(value)
845 845
846 846 def update(self, values):
847 847 self.addedvalues.update(values)
848 848
849 849 has = lazyset(self.ancestors(common))
850 850 has.add(nullrev)
851 851 has.update(common)
852 852
853 853 # take all ancestors from heads that aren't in has
854 854 missing = set()
855 855 visit = collections.deque(r for r in heads if r not in has)
856 856 while visit:
857 857 r = visit.popleft()
858 858 if r in missing:
859 859 continue
860 860 else:
861 861 missing.add(r)
862 862 for p in self.parentrevs(r):
863 863 if p not in has:
864 864 visit.append(p)
865 865 missing = list(missing)
866 866 missing.sort()
867 867 return has, [self.node(miss) for miss in missing]
868 868
869 869 def incrementalmissingrevs(self, common=None):
870 870 """Return an object that can be used to incrementally compute the
871 871 revision numbers of the ancestors of arbitrary sets that are not
872 872 ancestors of common. This is an ancestor.incrementalmissingancestors
873 873 object.
874 874
875 875 'common' is a list of revision numbers. If common is not supplied, uses
876 876 nullrev.
877 877 """
878 878 if common is None:
879 879 common = [nullrev]
880 880
881 881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
882 882
883 883 def findmissingrevs(self, common=None, heads=None):
884 884 """Return the revision numbers of the ancestors of heads that
885 885 are not ancestors of common.
886 886
887 887 More specifically, return a list of revision numbers corresponding to
888 888 nodes N such that every N satisfies the following constraints:
889 889
890 890 1. N is an ancestor of some node in 'heads'
891 891 2. N is not an ancestor of any node in 'common'
892 892
893 893 The list is sorted by revision number, meaning it is
894 894 topologically sorted.
895 895
896 896 'heads' and 'common' are both lists of revision numbers. If heads is
897 897 not supplied, uses all of the revlog's heads. If common is not
898 898 supplied, uses nullid."""
899 899 if common is None:
900 900 common = [nullrev]
901 901 if heads is None:
902 902 heads = self.headrevs()
903 903
904 904 inc = self.incrementalmissingrevs(common=common)
905 905 return inc.missingancestors(heads)
906 906
907 907 def findmissing(self, common=None, heads=None):
908 908 """Return the ancestors of heads that are not ancestors of common.
909 909
910 910 More specifically, return a list of nodes N such that every N
911 911 satisfies the following constraints:
912 912
913 913 1. N is an ancestor of some node in 'heads'
914 914 2. N is not an ancestor of any node in 'common'
915 915
916 916 The list is sorted by revision number, meaning it is
917 917 topologically sorted.
918 918
919 919 'heads' and 'common' are both lists of node IDs. If heads is
920 920 not supplied, uses all of the revlog's heads. If common is not
921 921 supplied, uses nullid."""
922 922 if common is None:
923 923 common = [nullid]
924 924 if heads is None:
925 925 heads = self.heads()
926 926
927 927 common = [self.rev(n) for n in common]
928 928 heads = [self.rev(n) for n in heads]
929 929
930 930 inc = self.incrementalmissingrevs(common=common)
931 931 return [self.node(r) for r in inc.missingancestors(heads)]
932 932
933 933 def nodesbetween(self, roots=None, heads=None):
934 934 """Return a topological path from 'roots' to 'heads'.
935 935
936 936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
937 937 topologically sorted list of all nodes N that satisfy both of
938 938 these constraints:
939 939
940 940 1. N is a descendant of some node in 'roots'
941 941 2. N is an ancestor of some node in 'heads'
942 942
943 943 Every node is considered to be both a descendant and an ancestor
944 944 of itself, so every reachable node in 'roots' and 'heads' will be
945 945 included in 'nodes'.
946 946
947 947 'outroots' is the list of reachable nodes in 'roots', i.e., the
948 948 subset of 'roots' that is returned in 'nodes'. Likewise,
949 949 'outheads' is the subset of 'heads' that is also in 'nodes'.
950 950
951 951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
952 952 unspecified, uses nullid as the only root. If 'heads' is
953 953 unspecified, uses list of all of the revlog's heads."""
954 954 nonodes = ([], [], [])
955 955 if roots is not None:
956 956 roots = list(roots)
957 957 if not roots:
958 958 return nonodes
959 959 lowestrev = min([self.rev(n) for n in roots])
960 960 else:
961 961 roots = [nullid] # Everybody's a descendant of nullid
962 962 lowestrev = nullrev
963 963 if (lowestrev == nullrev) and (heads is None):
964 964 # We want _all_ the nodes!
965 965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
966 966 if heads is None:
967 967 # All nodes are ancestors, so the latest ancestor is the last
968 968 # node.
969 969 highestrev = len(self) - 1
970 970 # Set ancestors to None to signal that every node is an ancestor.
971 971 ancestors = None
972 972 # Set heads to an empty dictionary for later discovery of heads
973 973 heads = {}
974 974 else:
975 975 heads = list(heads)
976 976 if not heads:
977 977 return nonodes
978 978 ancestors = set()
979 979 # Turn heads into a dictionary so we can remove 'fake' heads.
980 980 # Also, later we will be using it to filter out the heads we can't
981 981 # find from roots.
982 982 heads = dict.fromkeys(heads, False)
983 983 # Start at the top and keep marking parents until we're done.
984 984 nodestotag = set(heads)
985 985 # Remember where the top was so we can use it as a limit later.
986 986 highestrev = max([self.rev(n) for n in nodestotag])
987 987 while nodestotag:
988 988 # grab a node to tag
989 989 n = nodestotag.pop()
990 990 # Never tag nullid
991 991 if n == nullid:
992 992 continue
993 993 # A node's revision number represents its place in a
994 994 # topologically sorted list of nodes.
995 995 r = self.rev(n)
996 996 if r >= lowestrev:
997 997 if n not in ancestors:
998 998 # If we are possibly a descendant of one of the roots
999 999 # and we haven't already been marked as an ancestor
1000 1000 ancestors.add(n) # Mark as ancestor
1001 1001 # Add non-nullid parents to list of nodes to tag.
1002 1002 nodestotag.update([p for p in self.parents(n) if
1003 1003 p != nullid])
1004 1004 elif n in heads: # We've seen it before, is it a fake head?
1005 1005 # So it is, real heads should not be the ancestors of
1006 1006 # any other heads.
1007 1007 heads.pop(n)
1008 1008 if not ancestors:
1009 1009 return nonodes
1010 1010 # Now that we have our set of ancestors, we want to remove any
1011 1011 # roots that are not ancestors.
1012 1012
1013 1013 # If one of the roots was nullid, everything is included anyway.
1014 1014 if lowestrev > nullrev:
1015 1015 # But, since we weren't, let's recompute the lowest rev to not
1016 1016 # include roots that aren't ancestors.
1017 1017
1018 1018 # Filter out roots that aren't ancestors of heads
1019 1019 roots = [root for root in roots if root in ancestors]
1020 1020 # Recompute the lowest revision
1021 1021 if roots:
1022 1022 lowestrev = min([self.rev(root) for root in roots])
1023 1023 else:
1024 1024 # No more roots? Return empty list
1025 1025 return nonodes
1026 1026 else:
1027 1027 # We are descending from nullid, and don't need to care about
1028 1028 # any other roots.
1029 1029 lowestrev = nullrev
1030 1030 roots = [nullid]
1031 1031 # Transform our roots list into a set.
1032 1032 descendants = set(roots)
1033 1033 # Also, keep the original roots so we can filter out roots that aren't
1034 1034 # 'real' roots (i.e. are descended from other roots).
1035 1035 roots = descendants.copy()
1036 1036 # Our topologically sorted list of output nodes.
1037 1037 orderedout = []
1038 1038 # Don't start at nullid since we don't want nullid in our output list,
1039 1039 # and if nullid shows up in descendants, empty parents will look like
1040 1040 # they're descendants.
1041 1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1042 1042 n = self.node(r)
1043 1043 isdescendant = False
1044 1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1045 1045 isdescendant = True
1046 1046 elif n in descendants:
1047 1047 # n is already a descendant
1048 1048 isdescendant = True
1049 1049 # This check only needs to be done here because all the roots
1050 1050 # will start being marked is descendants before the loop.
1051 1051 if n in roots:
1052 1052 # If n was a root, check if it's a 'real' root.
1053 1053 p = tuple(self.parents(n))
1054 1054 # If any of its parents are descendants, it's not a root.
1055 1055 if (p[0] in descendants) or (p[1] in descendants):
1056 1056 roots.remove(n)
1057 1057 else:
1058 1058 p = tuple(self.parents(n))
1059 1059 # A node is a descendant if either of its parents are
1060 1060 # descendants. (We seeded the dependents list with the roots
1061 1061 # up there, remember?)
1062 1062 if (p[0] in descendants) or (p[1] in descendants):
1063 1063 descendants.add(n)
1064 1064 isdescendant = True
1065 1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1066 1066 # Only include nodes that are both descendants and ancestors.
1067 1067 orderedout.append(n)
1068 1068 if (ancestors is not None) and (n in heads):
1069 1069 # We're trying to figure out which heads are reachable
1070 1070 # from roots.
1071 1071 # Mark this head as having been reached
1072 1072 heads[n] = True
1073 1073 elif ancestors is None:
1074 1074 # Otherwise, we're trying to discover the heads.
1075 1075 # Assume this is a head because if it isn't, the next step
1076 1076 # will eventually remove it.
1077 1077 heads[n] = True
1078 1078 # But, obviously its parents aren't.
1079 1079 for p in self.parents(n):
1080 1080 heads.pop(p, None)
1081 1081 heads = [head for head, flag in heads.iteritems() if flag]
1082 1082 roots = list(roots)
1083 1083 assert orderedout
1084 1084 assert roots
1085 1085 assert heads
1086 1086 return (orderedout, roots, heads)
1087 1087
1088 1088 def headrevs(self):
1089 1089 try:
1090 1090 return self.index.headrevs()
1091 1091 except AttributeError:
1092 1092 return self._headrevs()
1093 1093
1094 1094 def computephases(self, roots):
1095 1095 return self.index.computephasesmapsets(roots)
1096 1096
1097 1097 def _headrevs(self):
1098 1098 count = len(self)
1099 1099 if not count:
1100 1100 return [nullrev]
1101 1101 # we won't iter over filtered rev so nobody is a head at start
1102 1102 ishead = [0] * (count + 1)
1103 1103 index = self.index
1104 1104 for r in self:
1105 1105 ishead[r] = 1 # I may be an head
1106 1106 e = index[r]
1107 1107 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1108 1108 return [r for r, val in enumerate(ishead) if val]
1109 1109
1110 1110 def heads(self, start=None, stop=None):
1111 1111 """return the list of all nodes that have no children
1112 1112
1113 1113 if start is specified, only heads that are descendants of
1114 1114 start will be returned
1115 1115 if stop is specified, it will consider all the revs from stop
1116 1116 as if they had no children
1117 1117 """
1118 1118 if start is None and stop is None:
1119 1119 if not len(self):
1120 1120 return [nullid]
1121 1121 return [self.node(r) for r in self.headrevs()]
1122 1122
1123 1123 if start is None:
1124 1124 start = nullid
1125 1125 if stop is None:
1126 1126 stop = []
1127 1127 stoprevs = set([self.rev(n) for n in stop])
1128 1128 startrev = self.rev(start)
1129 1129 reachable = {startrev}
1130 1130 heads = {startrev}
1131 1131
1132 1132 parentrevs = self.parentrevs
1133 1133 for r in self.revs(start=startrev + 1):
1134 1134 for p in parentrevs(r):
1135 1135 if p in reachable:
1136 1136 if r not in stoprevs:
1137 1137 reachable.add(r)
1138 1138 heads.add(r)
1139 1139 if p in heads and p not in stoprevs:
1140 1140 heads.remove(p)
1141 1141
1142 1142 return [self.node(r) for r in heads]
1143 1143
1144 1144 def children(self, node):
1145 1145 """find the children of a given node"""
1146 1146 c = []
1147 1147 p = self.rev(node)
1148 1148 for r in self.revs(start=p + 1):
1149 1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1150 1150 if prevs:
1151 1151 for pr in prevs:
1152 1152 if pr == p:
1153 1153 c.append(self.node(r))
1154 1154 elif p == nullrev:
1155 1155 c.append(self.node(r))
1156 1156 return c
1157 1157
1158 1158 def commonancestorsheads(self, a, b):
1159 1159 """calculate all the heads of the common ancestors of nodes a and b"""
1160 1160 a, b = self.rev(a), self.rev(b)
1161 1161 ancs = self._commonancestorsheads(a, b)
1162 1162 return pycompat.maplist(self.node, ancs)
1163 1163
1164 1164 def _commonancestorsheads(self, *revs):
1165 1165 """calculate all the heads of the common ancestors of revs"""
1166 1166 try:
1167 1167 ancs = self.index.commonancestorsheads(*revs)
1168 1168 except (AttributeError, OverflowError): # C implementation failed
1169 1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1170 1170 return ancs
1171 1171
1172 1172 def isancestor(self, a, b):
1173 1173 """return True if node a is an ancestor of node b
1174 1174
1175 1175 A revision is considered an ancestor of itself."""
1176 1176 a, b = self.rev(a), self.rev(b)
1177 1177 return self.isancestorrev(a, b)
1178 1178
1179 1179 def isancestorrev(self, a, b):
1180 1180 """return True if revision a is an ancestor of revision b
1181 1181
1182 1182 A revision is considered an ancestor of itself.
1183 1183
1184 1184 The implementation of this is trivial but the use of
1185 1185 commonancestorsheads is not."""
1186 1186 if a == nullrev:
1187 1187 return True
1188 1188 elif a == b:
1189 1189 return True
1190 1190 elif a > b:
1191 1191 return False
1192 1192 return a in self._commonancestorsheads(a, b)
1193 1193
1194 1194 def ancestor(self, a, b):
1195 1195 """calculate the "best" common ancestor of nodes a and b"""
1196 1196
1197 1197 a, b = self.rev(a), self.rev(b)
1198 1198 try:
1199 1199 ancs = self.index.ancestors(a, b)
1200 1200 except (AttributeError, OverflowError):
1201 1201 ancs = ancestor.ancestors(self.parentrevs, a, b)
1202 1202 if ancs:
1203 1203 # choose a consistent winner when there's a tie
1204 1204 return min(map(self.node, ancs))
1205 1205 return nullid
1206 1206
1207 1207 def _match(self, id):
1208 1208 if isinstance(id, int):
1209 1209 # rev
1210 1210 return self.node(id)
1211 1211 if len(id) == 20:
1212 1212 # possibly a binary node
1213 1213 # odds of a binary node being all hex in ASCII are 1 in 10**25
1214 1214 try:
1215 1215 node = id
1216 1216 self.rev(node) # quick search the index
1217 1217 return node
1218 1218 except LookupError:
1219 1219 pass # may be partial hex id
1220 1220 try:
1221 1221 # str(rev)
1222 1222 rev = int(id)
1223 1223 if "%d" % rev != id:
1224 1224 raise ValueError
1225 1225 if rev < 0:
1226 1226 rev = len(self) + rev
1227 1227 if rev < 0 or rev >= len(self):
1228 1228 raise ValueError
1229 1229 return self.node(rev)
1230 1230 except (ValueError, OverflowError):
1231 1231 pass
1232 1232 if len(id) == 40:
1233 1233 try:
1234 1234 # a full hex nodeid?
1235 1235 node = bin(id)
1236 1236 self.rev(node)
1237 1237 return node
1238 1238 except (TypeError, LookupError):
1239 1239 pass
1240 1240
1241 1241 def _partialmatch(self, id):
1242 1242 # we don't care wdirfilenodeids as they should be always full hash
1243 1243 maybewdir = wdirhex.startswith(id)
1244 1244 try:
1245 1245 partial = self.index.partialmatch(id)
1246 1246 if partial and self.hasnode(partial):
1247 1247 if maybewdir:
1248 1248 # single 'ff...' match in radix tree, ambiguous with wdir
1249 1249 raise RevlogError
1250 1250 return partial
1251 1251 if maybewdir:
1252 1252 # no 'ff...' match in radix tree, wdir identified
1253 1253 raise error.WdirUnsupported
1254 1254 return None
1255 1255 except RevlogError:
1256 1256 # parsers.c radix tree lookup gave multiple matches
1257 1257 # fast path: for unfiltered changelog, radix tree is accurate
1258 1258 if not getattr(self, 'filteredrevs', None):
1259 1259 raise AmbiguousPrefixLookupError(id, self.indexfile,
1260 1260 _('ambiguous identifier'))
1261 1261 # fall through to slow path that filters hidden revisions
1262 1262 except (AttributeError, ValueError):
1263 1263 # we are pure python, or key was too short to search radix tree
1264 1264 pass
1265 1265
1266 1266 if id in self._pcache:
1267 1267 return self._pcache[id]
1268 1268
1269 1269 if len(id) <= 40:
1270 1270 try:
1271 1271 # hex(node)[:...]
1272 1272 l = len(id) // 2 # grab an even number of digits
1273 1273 prefix = bin(id[:l * 2])
1274 1274 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1275 1275 nl = [n for n in nl if hex(n).startswith(id) and
1276 1276 self.hasnode(n)]
1277 1277 if nullhex.startswith(id):
1278 1278 nl.append(nullid)
1279 1279 if len(nl) > 0:
1280 1280 if len(nl) == 1 and not maybewdir:
1281 1281 self._pcache[id] = nl[0]
1282 1282 return nl[0]
1283 1283 raise AmbiguousPrefixLookupError(id, self.indexfile,
1284 1284 _('ambiguous identifier'))
1285 1285 if maybewdir:
1286 1286 raise error.WdirUnsupported
1287 1287 return None
1288 1288 except TypeError:
1289 1289 pass
1290 1290
1291 1291 def lookup(self, id):
1292 1292 """locate a node based on:
1293 1293 - revision number or str(revision number)
1294 1294 - nodeid or subset of hex nodeid
1295 1295 """
1296 1296 n = self._match(id)
1297 1297 if n is not None:
1298 1298 return n
1299 1299 n = self._partialmatch(id)
1300 1300 if n:
1301 1301 return n
1302 1302
1303 1303 raise LookupError(id, self.indexfile, _('no match found'))
1304 1304
1305 1305 def shortest(self, node, minlength=1):
1306 1306 """Find the shortest unambiguous prefix that matches node."""
1307 1307 def isvalid(prefix):
1308 1308 try:
1309 1309 node = self._partialmatch(prefix)
1310 1310 except error.RevlogError:
1311 1311 return False
1312 1312 except error.WdirUnsupported:
1313 1313 # single 'ff...' match
1314 1314 return True
1315 1315 if node is None:
1316 1316 raise LookupError(node, self.indexfile, _('no node'))
1317 1317 return True
1318 1318
1319 1319 def maybewdir(prefix):
1320 1320 return all(c == 'f' for c in prefix)
1321 1321
1322 1322 hexnode = hex(node)
1323 1323
1324 1324 def disambiguate(hexnode, minlength):
1325 1325 """Disambiguate against wdirid."""
1326 1326 for length in range(minlength, 41):
1327 1327 prefix = hexnode[:length]
1328 1328 if not maybewdir(prefix):
1329 1329 return prefix
1330 1330
1331 1331 if not getattr(self, 'filteredrevs', None):
1332 1332 try:
1333 1333 length = max(self.index.shortest(node), minlength)
1334 1334 return disambiguate(hexnode, length)
1335 1335 except RevlogError:
1336 1336 if node != wdirid:
1337 1337 raise LookupError(node, self.indexfile, _('no node'))
1338 1338 except AttributeError:
1339 1339 # Fall through to pure code
1340 1340 pass
1341 1341
1342 1342 if node == wdirid:
1343 1343 for length in range(minlength, 41):
1344 1344 prefix = hexnode[:length]
1345 1345 if isvalid(prefix):
1346 1346 return prefix
1347 1347
1348 1348 for length in range(minlength, 41):
1349 1349 prefix = hexnode[:length]
1350 1350 if isvalid(prefix):
1351 1351 return disambiguate(hexnode, length)
1352 1352
1353 1353 def cmp(self, node, text):
1354 1354 """compare text with a given file revision
1355 1355
1356 1356 returns True if text is different than what is stored.
1357 1357 """
1358 1358 p1, p2 = self.parents(node)
1359 1359 return hash(text, p1, p2) != node
1360 1360
1361 1361 def _cachesegment(self, offset, data):
1362 1362 """Add a segment to the revlog cache.
1363 1363
1364 1364 Accepts an absolute offset and the data that is at that location.
1365 1365 """
1366 1366 o, d = self._chunkcache
1367 1367 # try to add to existing cache
1368 1368 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1369 1369 self._chunkcache = o, d + data
1370 1370 else:
1371 1371 self._chunkcache = offset, data
1372 1372
1373 1373 def _readsegment(self, offset, length, df=None):
1374 1374 """Load a segment of raw data from the revlog.
1375 1375
1376 1376 Accepts an absolute offset, length to read, and an optional existing
1377 1377 file handle to read from.
1378 1378
1379 1379 If an existing file handle is passed, it will be seeked and the
1380 1380 original seek position will NOT be restored.
1381 1381
1382 1382 Returns a str or buffer of raw byte data.
1383 1383 """
1384 1384 # Cache data both forward and backward around the requested
1385 1385 # data, in a fixed size window. This helps speed up operations
1386 1386 # involving reading the revlog backwards.
1387 1387 cachesize = self._chunkcachesize
1388 1388 realoffset = offset & ~(cachesize - 1)
1389 1389 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1390 1390 - realoffset)
1391 1391 with self._datareadfp(df) as df:
1392 1392 df.seek(realoffset)
1393 1393 d = df.read(reallength)
1394 1394 self._cachesegment(realoffset, d)
1395 1395 if offset != realoffset or reallength != length:
1396 1396 return util.buffer(d, offset - realoffset, length)
1397 1397 return d
1398 1398
1399 1399 def _getsegment(self, offset, length, df=None):
1400 1400 """Obtain a segment of raw data from the revlog.
1401 1401
1402 1402 Accepts an absolute offset, length of bytes to obtain, and an
1403 1403 optional file handle to the already-opened revlog. If the file
1404 1404 handle is used, it's original seek position will not be preserved.
1405 1405
1406 1406 Requests for data may be returned from a cache.
1407 1407
1408 1408 Returns a str or a buffer instance of raw byte data.
1409 1409 """
1410 1410 o, d = self._chunkcache
1411 1411 l = len(d)
1412 1412
1413 1413 # is it in the cache?
1414 1414 cachestart = offset - o
1415 1415 cacheend = cachestart + length
1416 1416 if cachestart >= 0 and cacheend <= l:
1417 1417 if cachestart == 0 and cacheend == l:
1418 1418 return d # avoid a copy
1419 1419 return util.buffer(d, cachestart, cacheend - cachestart)
1420 1420
1421 1421 return self._readsegment(offset, length, df=df)
1422 1422
1423 1423 def _getsegmentforrevs(self, startrev, endrev, df=None):
1424 1424 """Obtain a segment of raw data corresponding to a range of revisions.
1425 1425
1426 1426 Accepts the start and end revisions and an optional already-open
1427 1427 file handle to be used for reading. If the file handle is read, its
1428 1428 seek position will not be preserved.
1429 1429
1430 1430 Requests for data may be satisfied by a cache.
1431 1431
1432 1432 Returns a 2-tuple of (offset, data) for the requested range of
1433 1433 revisions. Offset is the integer offset from the beginning of the
1434 1434 revlog and data is a str or buffer of the raw byte data.
1435 1435
1436 1436 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1437 1437 to determine where each revision's data begins and ends.
1438 1438 """
1439 1439 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1440 1440 # (functions are expensive).
1441 1441 index = self.index
1442 1442 istart = index[startrev]
1443 1443 start = int(istart[0] >> 16)
1444 1444 if startrev == endrev:
1445 1445 end = start + istart[1]
1446 1446 else:
1447 1447 iend = index[endrev]
1448 1448 end = int(iend[0] >> 16) + iend[1]
1449 1449
1450 1450 if self._inline:
1451 1451 start += (startrev + 1) * self._io.size
1452 1452 end += (endrev + 1) * self._io.size
1453 1453 length = end - start
1454 1454
1455 1455 return start, self._getsegment(start, length, df=df)
1456 1456
1457 1457 def _chunk(self, rev, df=None):
1458 1458 """Obtain a single decompressed chunk for a revision.
1459 1459
1460 1460 Accepts an integer revision and an optional already-open file handle
1461 1461 to be used for reading. If used, the seek position of the file will not
1462 1462 be preserved.
1463 1463
1464 1464 Returns a str holding uncompressed data for the requested revision.
1465 1465 """
1466 1466 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1467 1467
1468 1468 def _chunks(self, revs, df=None, targetsize=None):
1469 1469 """Obtain decompressed chunks for the specified revisions.
1470 1470
1471 1471 Accepts an iterable of numeric revisions that are assumed to be in
1472 1472 ascending order. Also accepts an optional already-open file handle
1473 1473 to be used for reading. If used, the seek position of the file will
1474 1474 not be preserved.
1475 1475
1476 1476 This function is similar to calling ``self._chunk()`` multiple times,
1477 1477 but is faster.
1478 1478
1479 1479 Returns a list with decompressed data for each requested revision.
1480 1480 """
1481 1481 if not revs:
1482 1482 return []
1483 1483 start = self.start
1484 1484 length = self.length
1485 1485 inline = self._inline
1486 1486 iosize = self._io.size
1487 1487 buffer = util.buffer
1488 1488
1489 1489 l = []
1490 1490 ladd = l.append
1491 1491
1492 1492 if not self._withsparseread:
1493 1493 slicedchunks = (revs,)
1494 1494 else:
1495 1495 slicedchunks = deltautil.slicechunk(self, revs,
1496 1496 targetsize=targetsize)
1497 1497
1498 1498 for revschunk in slicedchunks:
1499 1499 firstrev = revschunk[0]
1500 1500 # Skip trailing revisions with empty diff
1501 1501 for lastrev in revschunk[::-1]:
1502 1502 if length(lastrev) != 0:
1503 1503 break
1504 1504
1505 1505 try:
1506 1506 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1507 1507 except OverflowError:
1508 1508 # issue4215 - we can't cache a run of chunks greater than
1509 1509 # 2G on Windows
1510 1510 return [self._chunk(rev, df=df) for rev in revschunk]
1511 1511
1512 1512 decomp = self.decompress
1513 1513 for rev in revschunk:
1514 1514 chunkstart = start(rev)
1515 1515 if inline:
1516 1516 chunkstart += (rev + 1) * iosize
1517 1517 chunklength = length(rev)
1518 1518 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1519 1519
1520 1520 return l
1521 1521
1522 1522 def _chunkclear(self):
1523 1523 """Clear the raw chunk cache."""
1524 1524 self._chunkcache = (0, '')
1525 1525
1526 1526 def deltaparent(self, rev):
1527 1527 """return deltaparent of the given revision"""
1528 1528 base = self.index[rev][3]
1529 1529 if base == rev:
1530 1530 return nullrev
1531 1531 elif self._generaldelta:
1532 1532 return base
1533 1533 else:
1534 1534 return rev - 1
1535 1535
1536 1536 def issnapshot(self, rev):
1537 1537 """tells whether rev is a snapshot
1538 1538 """
1539 1539 if rev == nullrev:
1540 1540 return True
1541 1541 deltap = self.deltaparent(rev)
1542 1542 if deltap == nullrev:
1543 1543 return True
1544 1544 p1, p2 = self.parentrevs(rev)
1545 1545 if deltap in (p1, p2):
1546 1546 return False
1547 1547 return self.issnapshot(deltap)
1548 1548
1549 1549 def snapshotdepth(self, rev):
1550 1550 """number of snapshot in the chain before this one"""
1551 1551 if not self.issnapshot(rev):
1552 1552 raise ProgrammingError('revision %d not a snapshot')
1553 1553 return len(self._deltachain(rev)[0]) - 1
1554 1554
1555 1555 def revdiff(self, rev1, rev2):
1556 1556 """return or calculate a delta between two revisions
1557 1557
1558 1558 The delta calculated is in binary form and is intended to be written to
1559 1559 revlog data directly. So this function needs raw revision data.
1560 1560 """
1561 1561 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1562 1562 return bytes(self._chunk(rev2))
1563 1563
1564 1564 return mdiff.textdiff(self.revision(rev1, raw=True),
1565 1565 self.revision(rev2, raw=True))
1566 1566
1567 1567 def revision(self, nodeorrev, _df=None, raw=False):
1568 1568 """return an uncompressed revision of a given node or revision
1569 1569 number.
1570 1570
1571 1571 _df - an existing file handle to read from. (internal-only)
1572 1572 raw - an optional argument specifying if the revision data is to be
1573 1573 treated as raw data when applying flag transforms. 'raw' should be set
1574 1574 to True when generating changegroups or in debug commands.
1575 1575 """
1576 1576 if isinstance(nodeorrev, int):
1577 1577 rev = nodeorrev
1578 1578 node = self.node(rev)
1579 1579 else:
1580 1580 node = nodeorrev
1581 1581 rev = None
1582 1582
1583 1583 cachedrev = None
1584 1584 flags = None
1585 1585 rawtext = None
1586 1586 if node == nullid:
1587 1587 return ""
1588 1588 if self._cache:
1589 1589 if self._cache[0] == node:
1590 1590 # _cache only stores rawtext
1591 1591 if raw:
1592 1592 return self._cache[2]
1593 1593 # duplicated, but good for perf
1594 1594 if rev is None:
1595 1595 rev = self.rev(node)
1596 1596 if flags is None:
1597 1597 flags = self.flags(rev)
1598 1598 # no extra flags set, no flag processor runs, text = rawtext
1599 1599 if flags == REVIDX_DEFAULT_FLAGS:
1600 1600 return self._cache[2]
1601 1601 # rawtext is reusable. need to run flag processor
1602 1602 rawtext = self._cache[2]
1603 1603
1604 1604 cachedrev = self._cache[1]
1605 1605
1606 1606 # look up what we need to read
1607 1607 if rawtext is None:
1608 1608 if rev is None:
1609 1609 rev = self.rev(node)
1610 1610
1611 1611 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1612 1612 if stopped:
1613 1613 rawtext = self._cache[2]
1614 1614
1615 1615 # drop cache to save memory
1616 1616 self._cache = None
1617 1617
1618 1618 targetsize = None
1619 1619 rawsize = self.index[rev][2]
1620 1620 if 0 <= rawsize:
1621 1621 targetsize = 4 * rawsize
1622 1622
1623 1623 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1624 1624 if rawtext is None:
1625 1625 rawtext = bytes(bins[0])
1626 1626 bins = bins[1:]
1627 1627
1628 1628 rawtext = mdiff.patches(rawtext, bins)
1629 1629 self._cache = (node, rev, rawtext)
1630 1630
1631 1631 if flags is None:
1632 1632 if rev is None:
1633 1633 rev = self.rev(node)
1634 1634 flags = self.flags(rev)
1635 1635
1636 1636 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1637 1637 if validatehash:
1638 1638 self.checkhash(text, node, rev=rev)
1639 1639
1640 1640 return text
1641 1641
1642 1642 def hash(self, text, p1, p2):
1643 1643 """Compute a node hash.
1644 1644
1645 1645 Available as a function so that subclasses can replace the hash
1646 1646 as needed.
1647 1647 """
1648 1648 return hash(text, p1, p2)
1649 1649
1650 1650 def _processflags(self, text, flags, operation, raw=False):
1651 1651 """Inspect revision data flags and applies transforms defined by
1652 1652 registered flag processors.
1653 1653
1654 1654 ``text`` - the revision data to process
1655 1655 ``flags`` - the revision flags
1656 1656 ``operation`` - the operation being performed (read or write)
1657 1657 ``raw`` - an optional argument describing if the raw transform should be
1658 1658 applied.
1659 1659
1660 1660 This method processes the flags in the order (or reverse order if
1661 1661 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1662 1662 flag processors registered for present flags. The order of flags defined
1663 1663 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1664 1664
1665 1665 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1666 1666 processed text and ``validatehash`` is a bool indicating whether the
1667 1667 returned text should be checked for hash integrity.
1668 1668
1669 1669 Note: If the ``raw`` argument is set, it has precedence over the
1670 1670 operation and will only update the value of ``validatehash``.
1671 1671 """
1672 1672 # fast path: no flag processors will run
1673 1673 if flags == 0:
1674 1674 return text, True
1675 1675 if not operation in ('read', 'write'):
1676 1676 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1677 1677 # Check all flags are known.
1678 1678 if flags & ~REVIDX_KNOWN_FLAGS:
1679 1679 raise RevlogError(_("incompatible revision flag '%#x'") %
1680 1680 (flags & ~REVIDX_KNOWN_FLAGS))
1681 1681 validatehash = True
1682 1682 # Depending on the operation (read or write), the order might be
1683 1683 # reversed due to non-commutative transforms.
1684 1684 orderedflags = REVIDX_FLAGS_ORDER
1685 1685 if operation == 'write':
1686 1686 orderedflags = reversed(orderedflags)
1687 1687
1688 1688 for flag in orderedflags:
1689 1689 # If a flagprocessor has been registered for a known flag, apply the
1690 1690 # related operation transform and update result tuple.
1691 1691 if flag & flags:
1692 1692 vhash = True
1693 1693
1694 1694 if flag not in _flagprocessors:
1695 1695 message = _("missing processor for flag '%#x'") % (flag)
1696 1696 raise RevlogError(message)
1697 1697
1698 1698 processor = _flagprocessors[flag]
1699 1699 if processor is not None:
1700 1700 readtransform, writetransform, rawtransform = processor
1701 1701
1702 1702 if raw:
1703 1703 vhash = rawtransform(self, text)
1704 1704 elif operation == 'read':
1705 1705 text, vhash = readtransform(self, text)
1706 1706 else: # write operation
1707 1707 text, vhash = writetransform(self, text)
1708 1708 validatehash = validatehash and vhash
1709 1709
1710 1710 return text, validatehash
1711 1711
1712 1712 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1713 1713 """Check node hash integrity.
1714 1714
1715 1715 Available as a function so that subclasses can extend hash mismatch
1716 1716 behaviors as needed.
1717 1717 """
1718 1718 try:
1719 1719 if p1 is None and p2 is None:
1720 1720 p1, p2 = self.parents(node)
1721 1721 if node != self.hash(text, p1, p2):
1722 1722 revornode = rev
1723 1723 if revornode is None:
1724 1724 revornode = templatefilters.short(hex(node))
1725 1725 raise RevlogError(_("integrity check failed on %s:%s")
1726 1726 % (self.indexfile, pycompat.bytestr(revornode)))
1727 1727 except RevlogError:
1728 1728 if self._censorable and _censoredtext(text):
1729 1729 raise error.CensoredNodeError(self.indexfile, node, text)
1730 1730 raise
1731 1731
1732 1732 def _enforceinlinesize(self, tr, fp=None):
1733 1733 """Check if the revlog is too big for inline and convert if so.
1734 1734
1735 1735 This should be called after revisions are added to the revlog. If the
1736 1736 revlog has grown too large to be an inline revlog, it will convert it
1737 1737 to use multiple index and data files.
1738 1738 """
1739 1739 tiprev = len(self) - 1
1740 1740 if (not self._inline or
1741 1741 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1742 1742 return
1743 1743
1744 1744 trinfo = tr.find(self.indexfile)
1745 1745 if trinfo is None:
1746 1746 raise RevlogError(_("%s not found in the transaction")
1747 1747 % self.indexfile)
1748 1748
1749 1749 trindex = trinfo[2]
1750 1750 if trindex is not None:
1751 1751 dataoff = self.start(trindex)
1752 1752 else:
1753 1753 # revlog was stripped at start of transaction, use all leftover data
1754 1754 trindex = len(self) - 1
1755 1755 dataoff = self.end(tiprev)
1756 1756
1757 1757 tr.add(self.datafile, dataoff)
1758 1758
1759 1759 if fp:
1760 1760 fp.flush()
1761 1761 fp.close()
1762 1762
1763 1763 with self._datafp('w') as df:
1764 1764 for r in self:
1765 1765 df.write(self._getsegmentforrevs(r, r)[1])
1766 1766
1767 1767 with self._indexfp('w') as fp:
1768 1768 self.version &= ~FLAG_INLINE_DATA
1769 1769 self._inline = False
1770 1770 io = self._io
1771 1771 for i in self:
1772 1772 e = io.packentry(self.index[i], self.node, self.version, i)
1773 1773 fp.write(e)
1774 1774
1775 1775 # the temp file replace the real index when we exit the context
1776 1776 # manager
1777 1777
1778 1778 tr.replace(self.indexfile, trindex * self._io.size)
1779 1779 self._chunkclear()
1780 1780
1781 1781 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1782 1782 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1783 1783 """add a revision to the log
1784 1784
1785 1785 text - the revision data to add
1786 1786 transaction - the transaction object used for rollback
1787 1787 link - the linkrev data to add
1788 1788 p1, p2 - the parent nodeids of the revision
1789 1789 cachedelta - an optional precomputed delta
1790 1790 node - nodeid of revision; typically node is not specified, and it is
1791 1791 computed by default as hash(text, p1, p2), however subclasses might
1792 1792 use different hashing method (and override checkhash() in such case)
1793 1793 flags - the known flags to set on the revision
1794 1794 deltacomputer - an optional deltacomputer instance shared between
1795 1795 multiple calls
1796 1796 """
1797 1797 if link == nullrev:
1798 1798 raise RevlogError(_("attempted to add linkrev -1 to %s")
1799 1799 % self.indexfile)
1800 1800
1801 1801 if flags:
1802 1802 node = node or self.hash(text, p1, p2)
1803 1803
1804 1804 rawtext, validatehash = self._processflags(text, flags, 'write')
1805 1805
1806 1806 # If the flag processor modifies the revision data, ignore any provided
1807 1807 # cachedelta.
1808 1808 if rawtext != text:
1809 1809 cachedelta = None
1810 1810
1811 1811 if len(rawtext) > _maxentrysize:
1812 1812 raise RevlogError(
1813 1813 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1814 1814 % (self.indexfile, len(rawtext)))
1815 1815
1816 1816 node = node or self.hash(rawtext, p1, p2)
1817 1817 if node in self.nodemap:
1818 1818 return node
1819 1819
1820 1820 if validatehash:
1821 1821 self.checkhash(rawtext, node, p1=p1, p2=p2)
1822 1822
1823 1823 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1824 1824 flags, cachedelta=cachedelta,
1825 1825 deltacomputer=deltacomputer)
1826 1826
1827 1827 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1828 1828 cachedelta=None, deltacomputer=None):
1829 1829 """add a raw revision with known flags, node and parents
1830 1830 useful when reusing a revision not stored in this revlog (ex: received
1831 1831 over wire, or read from an external bundle).
1832 1832 """
1833 1833 dfh = None
1834 1834 if not self._inline:
1835 1835 dfh = self._datafp("a+")
1836 1836 ifh = self._indexfp("a+")
1837 1837 try:
1838 1838 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1839 1839 flags, cachedelta, ifh, dfh,
1840 1840 deltacomputer=deltacomputer)
1841 1841 finally:
1842 1842 if dfh:
1843 1843 dfh.close()
1844 1844 ifh.close()
1845 1845
1846 1846 def compress(self, data):
1847 1847 """Generate a possibly-compressed representation of data."""
1848 1848 if not data:
1849 1849 return '', data
1850 1850
1851 1851 compressed = self._compressor.compress(data)
1852 1852
1853 1853 if compressed:
1854 1854 # The revlog compressor added the header in the returned data.
1855 1855 return '', compressed
1856 1856
1857 1857 if data[0:1] == '\0':
1858 1858 return '', data
1859 1859 return 'u', data
1860 1860
1861 1861 def decompress(self, data):
1862 1862 """Decompress a revlog chunk.
1863 1863
1864 1864 The chunk is expected to begin with a header identifying the
1865 1865 format type so it can be routed to an appropriate decompressor.
1866 1866 """
1867 1867 if not data:
1868 1868 return data
1869 1869
1870 1870 # Revlogs are read much more frequently than they are written and many
1871 1871 # chunks only take microseconds to decompress, so performance is
1872 1872 # important here.
1873 1873 #
1874 1874 # We can make a few assumptions about revlogs:
1875 1875 #
1876 1876 # 1) the majority of chunks will be compressed (as opposed to inline
1877 1877 # raw data).
1878 1878 # 2) decompressing *any* data will likely by at least 10x slower than
1879 1879 # returning raw inline data.
1880 1880 # 3) we want to prioritize common and officially supported compression
1881 1881 # engines
1882 1882 #
1883 1883 # It follows that we want to optimize for "decompress compressed data
1884 1884 # when encoded with common and officially supported compression engines"
1885 1885 # case over "raw data" and "data encoded by less common or non-official
1886 1886 # compression engines." That is why we have the inline lookup first
1887 1887 # followed by the compengines lookup.
1888 1888 #
1889 1889 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1890 1890 # compressed chunks. And this matters for changelog and manifest reads.
1891 1891 t = data[0:1]
1892 1892
1893 1893 if t == 'x':
1894 1894 try:
1895 1895 return _zlibdecompress(data)
1896 1896 except zlib.error as e:
1897 1897 raise RevlogError(_('revlog decompress error: %s') %
1898 1898 stringutil.forcebytestr(e))
1899 1899 # '\0' is more common than 'u' so it goes first.
1900 1900 elif t == '\0':
1901 1901 return data
1902 1902 elif t == 'u':
1903 1903 return util.buffer(data, 1)
1904 1904
1905 1905 try:
1906 1906 compressor = self._decompressors[t]
1907 1907 except KeyError:
1908 1908 try:
1909 1909 engine = util.compengines.forrevlogheader(t)
1910 1910 compressor = engine.revlogcompressor()
1911 1911 self._decompressors[t] = compressor
1912 1912 except KeyError:
1913 1913 raise RevlogError(_('unknown compression type %r') % t)
1914 1914
1915 1915 return compressor.decompress(data)
1916 1916
1917 1917 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1918 1918 cachedelta, ifh, dfh, alwayscache=False,
1919 1919 deltacomputer=None):
1920 1920 """internal function to add revisions to the log
1921 1921
1922 1922 see addrevision for argument descriptions.
1923 1923
1924 1924 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1925 1925
1926 1926 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1927 1927 be used.
1928 1928
1929 1929 invariants:
1930 1930 - rawtext is optional (can be None); if not set, cachedelta must be set.
1931 1931 if both are set, they must correspond to each other.
1932 1932 """
1933 1933 if node == nullid:
1934 1934 raise RevlogError(_("%s: attempt to add null revision") %
1935 1935 (self.indexfile))
1936 1936 if node == wdirid or node in wdirfilenodeids:
1937 1937 raise RevlogError(_("%s: attempt to add wdir revision") %
1938 1938 (self.indexfile))
1939 1939
1940 1940 if self._inline:
1941 1941 fh = ifh
1942 1942 else:
1943 1943 fh = dfh
1944 1944
1945 1945 btext = [rawtext]
1946 1946
1947 1947 curr = len(self)
1948 1948 prev = curr - 1
1949 1949 offset = self.end(prev)
1950 1950 p1r, p2r = self.rev(p1), self.rev(p2)
1951 1951
1952 1952 # full versions are inserted when the needed deltas
1953 1953 # become comparable to the uncompressed text
1954 1954 if rawtext is None:
1955 1955 # need rawtext size, before changed by flag processors, which is
1956 1956 # the non-raw size. use revlog explicitly to avoid filelog's extra
1957 1957 # logic that might remove metadata size.
1958 1958 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1959 1959 cachedelta[1])
1960 1960 else:
1961 1961 textlen = len(rawtext)
1962 1962
1963 1963 if deltacomputer is None:
1964 1964 deltacomputer = deltautil.deltacomputer(self)
1965 1965
1966 1966 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1967 1967
1968 # no delta for flag processor revision (see "candelta" for why)
1969 # not calling candelta since only one revision needs test, also to
1970 # avoid overhead fetching flags again.
1971 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1972 deltainfo = None
1973 else:
1974 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1968 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1975 1969
1976 1970 if deltainfo is not None:
1977 1971 base = deltainfo.base
1978 1972 chainbase = deltainfo.chainbase
1979 1973 data = deltainfo.data
1980 1974 l = deltainfo.deltalen
1981 1975 else:
1982 1976 rawtext = deltacomputer.buildtext(revinfo, fh)
1983 1977 data = self.compress(rawtext)
1984 1978 l = len(data[1]) + len(data[0])
1985 1979 base = chainbase = curr
1986 1980
1987 1981 e = (offset_type(offset, flags), l, textlen,
1988 1982 base, link, p1r, p2r, node)
1989 1983 self.index.append(e)
1990 1984 self.nodemap[node] = curr
1991 1985
1992 1986 entry = self._io.packentry(e, self.node, self.version, curr)
1993 1987 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1994 1988
1995 1989 if alwayscache and rawtext is None:
1996 1990 rawtext = deltacomputer.buildtext(revinfo, fh)
1997 1991
1998 1992 if type(rawtext) == bytes: # only accept immutable objects
1999 1993 self._cache = (node, curr, rawtext)
2000 1994 self._chainbasecache[curr] = chainbase
2001 1995 return node
2002 1996
2003 1997 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2004 1998 # Files opened in a+ mode have inconsistent behavior on various
2005 1999 # platforms. Windows requires that a file positioning call be made
2006 2000 # when the file handle transitions between reads and writes. See
2007 2001 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2008 2002 # platforms, Python or the platform itself can be buggy. Some versions
2009 2003 # of Solaris have been observed to not append at the end of the file
2010 2004 # if the file was seeked to before the end. See issue4943 for more.
2011 2005 #
2012 2006 # We work around this issue by inserting a seek() before writing.
2013 2007 # Note: This is likely not necessary on Python 3.
2014 2008 ifh.seek(0, os.SEEK_END)
2015 2009 if dfh:
2016 2010 dfh.seek(0, os.SEEK_END)
2017 2011
2018 2012 curr = len(self) - 1
2019 2013 if not self._inline:
2020 2014 transaction.add(self.datafile, offset)
2021 2015 transaction.add(self.indexfile, curr * len(entry))
2022 2016 if data[0]:
2023 2017 dfh.write(data[0])
2024 2018 dfh.write(data[1])
2025 2019 ifh.write(entry)
2026 2020 else:
2027 2021 offset += curr * self._io.size
2028 2022 transaction.add(self.indexfile, offset, curr)
2029 2023 ifh.write(entry)
2030 2024 ifh.write(data[0])
2031 2025 ifh.write(data[1])
2032 2026 self._enforceinlinesize(transaction, ifh)
2033 2027
2034 2028 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2035 2029 """
2036 2030 add a delta group
2037 2031
2038 2032 given a set of deltas, add them to the revision log. the
2039 2033 first delta is against its parent, which should be in our
2040 2034 log, the rest are against the previous delta.
2041 2035
2042 2036 If ``addrevisioncb`` is defined, it will be called with arguments of
2043 2037 this revlog and the node that was added.
2044 2038 """
2045 2039
2046 2040 nodes = []
2047 2041
2048 2042 r = len(self)
2049 2043 end = 0
2050 2044 if r:
2051 2045 end = self.end(r - 1)
2052 2046 ifh = self._indexfp("a+")
2053 2047 isize = r * self._io.size
2054 2048 if self._inline:
2055 2049 transaction.add(self.indexfile, end + isize, r)
2056 2050 dfh = None
2057 2051 else:
2058 2052 transaction.add(self.indexfile, isize, r)
2059 2053 transaction.add(self.datafile, end)
2060 2054 dfh = self._datafp("a+")
2061 2055 def flush():
2062 2056 if dfh:
2063 2057 dfh.flush()
2064 2058 ifh.flush()
2065 2059 try:
2066 2060 deltacomputer = deltautil.deltacomputer(self)
2067 2061 # loop through our set of deltas
2068 2062 for data in deltas:
2069 2063 node, p1, p2, linknode, deltabase, delta, flags = data
2070 2064 link = linkmapper(linknode)
2071 2065 flags = flags or REVIDX_DEFAULT_FLAGS
2072 2066
2073 2067 nodes.append(node)
2074 2068
2075 2069 if node in self.nodemap:
2076 2070 # this can happen if two branches make the same change
2077 2071 continue
2078 2072
2079 2073 for p in (p1, p2):
2080 2074 if p not in self.nodemap:
2081 2075 raise LookupError(p, self.indexfile,
2082 2076 _('unknown parent'))
2083 2077
2084 2078 if deltabase not in self.nodemap:
2085 2079 raise LookupError(deltabase, self.indexfile,
2086 2080 _('unknown delta base'))
2087 2081
2088 2082 baserev = self.rev(deltabase)
2089 2083
2090 2084 if baserev != nullrev and self.iscensored(baserev):
2091 2085 # if base is censored, delta must be full replacement in a
2092 2086 # single patch operation
2093 2087 hlen = struct.calcsize(">lll")
2094 2088 oldlen = self.rawsize(baserev)
2095 2089 newlen = len(delta) - hlen
2096 2090 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2097 2091 raise error.CensoredBaseError(self.indexfile,
2098 2092 self.node(baserev))
2099 2093
2100 2094 if not flags and self._peek_iscensored(baserev, delta, flush):
2101 2095 flags |= REVIDX_ISCENSORED
2102 2096
2103 2097 # We assume consumers of addrevisioncb will want to retrieve
2104 2098 # the added revision, which will require a call to
2105 2099 # revision(). revision() will fast path if there is a cache
2106 2100 # hit. So, we tell _addrevision() to always cache in this case.
2107 2101 # We're only using addgroup() in the context of changegroup
2108 2102 # generation so the revision data can always be handled as raw
2109 2103 # by the flagprocessor.
2110 2104 self._addrevision(node, None, transaction, link,
2111 2105 p1, p2, flags, (baserev, delta),
2112 2106 ifh, dfh,
2113 2107 alwayscache=bool(addrevisioncb),
2114 2108 deltacomputer=deltacomputer)
2115 2109
2116 2110 if addrevisioncb:
2117 2111 addrevisioncb(self, node)
2118 2112
2119 2113 if not dfh and not self._inline:
2120 2114 # addrevision switched from inline to conventional
2121 2115 # reopen the index
2122 2116 ifh.close()
2123 2117 dfh = self._datafp("a+")
2124 2118 ifh = self._indexfp("a+")
2125 2119 finally:
2126 2120 if dfh:
2127 2121 dfh.close()
2128 2122 ifh.close()
2129 2123
2130 2124 return nodes
2131 2125
2132 2126 def iscensored(self, rev):
2133 2127 """Check if a file revision is censored."""
2134 2128 if not self._censorable:
2135 2129 return False
2136 2130
2137 2131 return self.flags(rev) & REVIDX_ISCENSORED
2138 2132
2139 2133 def _peek_iscensored(self, baserev, delta, flush):
2140 2134 """Quickly check if a delta produces a censored revision."""
2141 2135 if not self._censorable:
2142 2136 return False
2143 2137
2144 2138 # Fragile heuristic: unless new file meta keys are added alphabetically
2145 2139 # preceding "censored", all censored revisions are prefixed by
2146 2140 # "\1\ncensored:". A delta producing such a censored revision must be a
2147 2141 # full-replacement delta, so we inspect the first and only patch in the
2148 2142 # delta for this prefix.
2149 2143 hlen = struct.calcsize(">lll")
2150 2144 if len(delta) <= hlen:
2151 2145 return False
2152 2146
2153 2147 oldlen = self.rawsize(baserev)
2154 2148 newlen = len(delta) - hlen
2155 2149 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2156 2150 return False
2157 2151
2158 2152 add = "\1\ncensored:"
2159 2153 addlen = len(add)
2160 2154 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2161 2155
2162 2156 def getstrippoint(self, minlink):
2163 2157 """find the minimum rev that must be stripped to strip the linkrev
2164 2158
2165 2159 Returns a tuple containing the minimum rev and a set of all revs that
2166 2160 have linkrevs that will be broken by this strip.
2167 2161 """
2168 2162 brokenrevs = set()
2169 2163 strippoint = len(self)
2170 2164
2171 2165 heads = {}
2172 2166 futurelargelinkrevs = set()
2173 2167 for head in self.headrevs():
2174 2168 headlinkrev = self.linkrev(head)
2175 2169 heads[head] = headlinkrev
2176 2170 if headlinkrev >= minlink:
2177 2171 futurelargelinkrevs.add(headlinkrev)
2178 2172
2179 2173 # This algorithm involves walking down the rev graph, starting at the
2180 2174 # heads. Since the revs are topologically sorted according to linkrev,
2181 2175 # once all head linkrevs are below the minlink, we know there are
2182 2176 # no more revs that could have a linkrev greater than minlink.
2183 2177 # So we can stop walking.
2184 2178 while futurelargelinkrevs:
2185 2179 strippoint -= 1
2186 2180 linkrev = heads.pop(strippoint)
2187 2181
2188 2182 if linkrev < minlink:
2189 2183 brokenrevs.add(strippoint)
2190 2184 else:
2191 2185 futurelargelinkrevs.remove(linkrev)
2192 2186
2193 2187 for p in self.parentrevs(strippoint):
2194 2188 if p != nullrev:
2195 2189 plinkrev = self.linkrev(p)
2196 2190 heads[p] = plinkrev
2197 2191 if plinkrev >= minlink:
2198 2192 futurelargelinkrevs.add(plinkrev)
2199 2193
2200 2194 return strippoint, brokenrevs
2201 2195
2202 2196 def strip(self, minlink, transaction):
2203 2197 """truncate the revlog on the first revision with a linkrev >= minlink
2204 2198
2205 2199 This function is called when we're stripping revision minlink and
2206 2200 its descendants from the repository.
2207 2201
2208 2202 We have to remove all revisions with linkrev >= minlink, because
2209 2203 the equivalent changelog revisions will be renumbered after the
2210 2204 strip.
2211 2205
2212 2206 So we truncate the revlog on the first of these revisions, and
2213 2207 trust that the caller has saved the revisions that shouldn't be
2214 2208 removed and that it'll re-add them after this truncation.
2215 2209 """
2216 2210 if len(self) == 0:
2217 2211 return
2218 2212
2219 2213 rev, _ = self.getstrippoint(minlink)
2220 2214 if rev == len(self):
2221 2215 return
2222 2216
2223 2217 # first truncate the files on disk
2224 2218 end = self.start(rev)
2225 2219 if not self._inline:
2226 2220 transaction.add(self.datafile, end)
2227 2221 end = rev * self._io.size
2228 2222 else:
2229 2223 end += rev * self._io.size
2230 2224
2231 2225 transaction.add(self.indexfile, end)
2232 2226
2233 2227 # then reset internal state in memory to forget those revisions
2234 2228 self._cache = None
2235 2229 self._chaininfocache = {}
2236 2230 self._chunkclear()
2237 2231 for x in pycompat.xrange(rev, len(self)):
2238 2232 del self.nodemap[self.node(x)]
2239 2233
2240 2234 del self.index[rev:-1]
2241 2235 self._nodepos = None
2242 2236
2243 2237 def checksize(self):
2244 2238 expected = 0
2245 2239 if len(self):
2246 2240 expected = max(0, self.end(len(self) - 1))
2247 2241
2248 2242 try:
2249 2243 with self._datafp() as f:
2250 2244 f.seek(0, 2)
2251 2245 actual = f.tell()
2252 2246 dd = actual - expected
2253 2247 except IOError as inst:
2254 2248 if inst.errno != errno.ENOENT:
2255 2249 raise
2256 2250 dd = 0
2257 2251
2258 2252 try:
2259 2253 f = self.opener(self.indexfile)
2260 2254 f.seek(0, 2)
2261 2255 actual = f.tell()
2262 2256 f.close()
2263 2257 s = self._io.size
2264 2258 i = max(0, actual // s)
2265 2259 di = actual - (i * s)
2266 2260 if self._inline:
2267 2261 databytes = 0
2268 2262 for r in self:
2269 2263 databytes += max(0, self.length(r))
2270 2264 dd = 0
2271 2265 di = actual - len(self) * s - databytes
2272 2266 except IOError as inst:
2273 2267 if inst.errno != errno.ENOENT:
2274 2268 raise
2275 2269 di = 0
2276 2270
2277 2271 return (dd, di)
2278 2272
2279 2273 def files(self):
2280 2274 res = [self.indexfile]
2281 2275 if not self._inline:
2282 2276 res.append(self.datafile)
2283 2277 return res
2284 2278
2285 2279 def emitrevisiondeltas(self, requests):
2286 2280 frev = self.rev
2287 2281
2288 2282 prevrev = None
2289 2283 for request in requests:
2290 2284 node = request.node
2291 2285 rev = frev(node)
2292 2286
2293 2287 if prevrev is None:
2294 2288 prevrev = self.index[rev][5]
2295 2289
2296 2290 # Requesting a full revision.
2297 2291 if request.basenode == nullid:
2298 2292 baserev = nullrev
2299 2293 # Requesting an explicit revision.
2300 2294 elif request.basenode is not None:
2301 2295 baserev = frev(request.basenode)
2302 2296 # Allowing us to choose.
2303 2297 else:
2304 2298 p1rev, p2rev = self.parentrevs(rev)
2305 2299 deltaparentrev = self.deltaparent(rev)
2306 2300
2307 2301 # Avoid sending full revisions when delta parent is null. Pick
2308 2302 # prev in that case. It's tempting to pick p1 in this case, as
2309 2303 # p1 will be smaller in the common case. However, computing a
2310 2304 # delta against p1 may require resolving the raw text of p1,
2311 2305 # which could be expensive. The revlog caches should have prev
2312 2306 # cached, meaning less CPU for delta generation. There is
2313 2307 # likely room to add a flag and/or config option to control this
2314 2308 # behavior.
2315 2309 if deltaparentrev == nullrev and self._storedeltachains:
2316 2310 baserev = prevrev
2317 2311
2318 2312 # Revlog is configured to use full snapshot for a reason.
2319 2313 # Stick to full snapshot.
2320 2314 elif deltaparentrev == nullrev:
2321 2315 baserev = nullrev
2322 2316
2323 2317 # Pick previous when we can't be sure the base is available
2324 2318 # on consumer.
2325 2319 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2326 2320 baserev = prevrev
2327 2321 else:
2328 2322 baserev = deltaparentrev
2329 2323
2330 2324 if baserev != nullrev and not self.candelta(baserev, rev):
2331 2325 baserev = nullrev
2332 2326
2333 2327 revision = None
2334 2328 delta = None
2335 2329 baserevisionsize = None
2336 2330
2337 2331 if self.iscensored(baserev) or self.iscensored(rev):
2338 2332 try:
2339 2333 revision = self.revision(node, raw=True)
2340 2334 except error.CensoredNodeError as e:
2341 2335 revision = e.tombstone
2342 2336
2343 2337 if baserev != nullrev:
2344 2338 baserevisionsize = self.rawsize(baserev)
2345 2339
2346 2340 elif baserev == nullrev:
2347 2341 revision = self.revision(node, raw=True)
2348 2342 else:
2349 2343 delta = self.revdiff(baserev, rev)
2350 2344
2351 2345 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2352 2346
2353 2347 yield revlogrevisiondelta(
2354 2348 node=node,
2355 2349 p1node=request.p1node,
2356 2350 p2node=request.p2node,
2357 2351 linknode=request.linknode,
2358 2352 basenode=self.node(baserev),
2359 2353 flags=self.flags(rev) | extraflags,
2360 2354 baserevisionsize=baserevisionsize,
2361 2355 revision=revision,
2362 2356 delta=delta)
2363 2357
2364 2358 prevrev = rev
2365 2359
2366 2360 DELTAREUSEALWAYS = 'always'
2367 2361 DELTAREUSESAMEREVS = 'samerevs'
2368 2362 DELTAREUSENEVER = 'never'
2369 2363
2370 2364 DELTAREUSEFULLADD = 'fulladd'
2371 2365
2372 2366 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2373 2367
2374 2368 def clone(self, tr, destrevlog, addrevisioncb=None,
2375 2369 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2376 2370 """Copy this revlog to another, possibly with format changes.
2377 2371
2378 2372 The destination revlog will contain the same revisions and nodes.
2379 2373 However, it may not be bit-for-bit identical due to e.g. delta encoding
2380 2374 differences.
2381 2375
2382 2376 The ``deltareuse`` argument control how deltas from the existing revlog
2383 2377 are preserved in the destination revlog. The argument can have the
2384 2378 following values:
2385 2379
2386 2380 DELTAREUSEALWAYS
2387 2381 Deltas will always be reused (if possible), even if the destination
2388 2382 revlog would not select the same revisions for the delta. This is the
2389 2383 fastest mode of operation.
2390 2384 DELTAREUSESAMEREVS
2391 2385 Deltas will be reused if the destination revlog would pick the same
2392 2386 revisions for the delta. This mode strikes a balance between speed
2393 2387 and optimization.
2394 2388 DELTAREUSENEVER
2395 2389 Deltas will never be reused. This is the slowest mode of execution.
2396 2390 This mode can be used to recompute deltas (e.g. if the diff/delta
2397 2391 algorithm changes).
2398 2392
2399 2393 Delta computation can be slow, so the choice of delta reuse policy can
2400 2394 significantly affect run time.
2401 2395
2402 2396 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2403 2397 two extremes. Deltas will be reused if they are appropriate. But if the
2404 2398 delta could choose a better revision, it will do so. This means if you
2405 2399 are converting a non-generaldelta revlog to a generaldelta revlog,
2406 2400 deltas will be recomputed if the delta's parent isn't a parent of the
2407 2401 revision.
2408 2402
2409 2403 In addition to the delta policy, the ``deltabothparents`` argument
2410 2404 controls whether to compute deltas against both parents for merges.
2411 2405 By default, the current default is used.
2412 2406 """
2413 2407 if deltareuse not in self.DELTAREUSEALL:
2414 2408 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2415 2409
2416 2410 if len(destrevlog):
2417 2411 raise ValueError(_('destination revlog is not empty'))
2418 2412
2419 2413 if getattr(self, 'filteredrevs', None):
2420 2414 raise ValueError(_('source revlog has filtered revisions'))
2421 2415 if getattr(destrevlog, 'filteredrevs', None):
2422 2416 raise ValueError(_('destination revlog has filtered revisions'))
2423 2417
2424 2418 # lazydeltabase controls whether to reuse a cached delta, if possible.
2425 2419 oldlazydeltabase = destrevlog._lazydeltabase
2426 2420 oldamd = destrevlog._deltabothparents
2427 2421
2428 2422 try:
2429 2423 if deltareuse == self.DELTAREUSEALWAYS:
2430 2424 destrevlog._lazydeltabase = True
2431 2425 elif deltareuse == self.DELTAREUSESAMEREVS:
2432 2426 destrevlog._lazydeltabase = False
2433 2427
2434 2428 destrevlog._deltabothparents = deltabothparents or oldamd
2435 2429
2436 2430 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2437 2431 self.DELTAREUSESAMEREVS)
2438 2432
2439 2433 deltacomputer = deltautil.deltacomputer(destrevlog)
2440 2434 index = self.index
2441 2435 for rev in self:
2442 2436 entry = index[rev]
2443 2437
2444 2438 # Some classes override linkrev to take filtered revs into
2445 2439 # account. Use raw entry from index.
2446 2440 flags = entry[0] & 0xffff
2447 2441 linkrev = entry[4]
2448 2442 p1 = index[entry[5]][7]
2449 2443 p2 = index[entry[6]][7]
2450 2444 node = entry[7]
2451 2445
2452 2446 # (Possibly) reuse the delta from the revlog if allowed and
2453 2447 # the revlog chunk is a delta.
2454 2448 cachedelta = None
2455 2449 rawtext = None
2456 2450 if populatecachedelta:
2457 2451 dp = self.deltaparent(rev)
2458 2452 if dp != nullrev:
2459 2453 cachedelta = (dp, bytes(self._chunk(rev)))
2460 2454
2461 2455 if not cachedelta:
2462 2456 rawtext = self.revision(rev, raw=True)
2463 2457
2464 2458
2465 2459 if deltareuse == self.DELTAREUSEFULLADD:
2466 2460 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2467 2461 cachedelta=cachedelta,
2468 2462 node=node, flags=flags,
2469 2463 deltacomputer=deltacomputer)
2470 2464 else:
2471 2465 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2472 2466 checkambig=False)
2473 2467 dfh = None
2474 2468 if not destrevlog._inline:
2475 2469 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2476 2470 try:
2477 2471 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2478 2472 p2, flags, cachedelta, ifh, dfh,
2479 2473 deltacomputer=deltacomputer)
2480 2474 finally:
2481 2475 if dfh:
2482 2476 dfh.close()
2483 2477 ifh.close()
2484 2478
2485 2479 if addrevisioncb:
2486 2480 addrevisioncb(self, rev, node)
2487 2481 finally:
2488 2482 destrevlog._lazydeltabase = oldlazydeltabase
2489 2483 destrevlog._deltabothparents = oldamd
@@ -1,739 +1,745 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import heapq
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import (
17 17 nullrev,
18 18 )
19 19 from ..i18n import _
20 20
21 21 from .constants import (
22 22 REVIDX_ISCENSORED,
23 23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 24 )
25 25
26 26 from ..thirdparty import (
27 27 attr,
28 28 )
29 29
30 30 from .. import (
31 31 error,
32 32 mdiff,
33 33 )
34 34
35 35 RevlogError = error.RevlogError
36 36 CensoredNodeError = error.CensoredNodeError
37 37
38 38 # maximum <delta-chain-data>/<revision-text-length> ratio
39 39 LIMIT_DELTA2TEXT = 2
40 40
41 41 class _testrevlog(object):
42 42 """minimalist fake revlog to use in doctests"""
43 43
44 44 def __init__(self, data, density=0.5, mingap=0):
45 45 """data is an list of revision payload boundaries"""
46 46 self._data = data
47 47 self._srdensitythreshold = density
48 48 self._srmingapsize = mingap
49 49
50 50 def start(self, rev):
51 51 if rev == 0:
52 52 return 0
53 53 return self._data[rev - 1]
54 54
55 55 def end(self, rev):
56 56 return self._data[rev]
57 57
58 58 def length(self, rev):
59 59 return self.end(rev) - self.start(rev)
60 60
61 61 def __len__(self):
62 62 return len(self._data)
63 63
64 64 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
65 65 """slice revs to reduce the amount of unrelated data to be read from disk.
66 66
67 67 ``revs`` is sliced into groups that should be read in one time.
68 68 Assume that revs are sorted.
69 69
70 70 The initial chunk is sliced until the overall density (payload/chunks-span
71 71 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
72 72 `revlog._srmingapsize` is skipped.
73 73
74 74 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
75 75 For consistency with other slicing choice, this limit won't go lower than
76 76 `revlog._srmingapsize`.
77 77
78 78 If individual revisions chunk are larger than this limit, they will still
79 79 be raised individually.
80 80
81 81 >>> revlog = _testrevlog([
82 82 ... 5, #00 (5)
83 83 ... 10, #01 (5)
84 84 ... 12, #02 (2)
85 85 ... 12, #03 (empty)
86 86 ... 27, #04 (15)
87 87 ... 31, #05 (4)
88 88 ... 31, #06 (empty)
89 89 ... 42, #07 (11)
90 90 ... 47, #08 (5)
91 91 ... 47, #09 (empty)
92 92 ... 48, #10 (1)
93 93 ... 51, #11 (3)
94 94 ... 74, #12 (23)
95 95 ... 85, #13 (11)
96 96 ... 86, #14 (1)
97 97 ... 91, #15 (5)
98 98 ... ])
99 99
100 100 >>> list(slicechunk(revlog, list(range(16))))
101 101 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
102 102 >>> list(slicechunk(revlog, [0, 15]))
103 103 [[0], [15]]
104 104 >>> list(slicechunk(revlog, [0, 11, 15]))
105 105 [[0], [11], [15]]
106 106 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
107 107 [[0], [11, 13, 15]]
108 108 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
109 109 [[1, 2], [5, 8, 10, 11], [14]]
110 110
111 111 Slicing with a maximum chunk size
112 112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
113 113 [[0], [11], [13], [15]]
114 114 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
115 115 [[0], [11], [13, 15]]
116 116 """
117 117 if targetsize is not None:
118 118 targetsize = max(targetsize, revlog._srmingapsize)
119 119 # targetsize should not be specified when evaluating delta candidates:
120 120 # * targetsize is used to ensure we stay within specification when reading,
121 121 # * deltainfo is used to pick are good delta chain when writing.
122 122 if not (deltainfo is None or targetsize is None):
123 123 msg = 'cannot use `targetsize` with a `deltainfo`'
124 124 raise error.ProgrammingError(msg)
125 125 for chunk in _slicechunktodensity(revlog, revs,
126 126 deltainfo,
127 127 revlog._srdensitythreshold,
128 128 revlog._srmingapsize):
129 129 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
130 130 yield subchunk
131 131
132 132 def _slicechunktosize(revlog, revs, targetsize=None):
133 133 """slice revs to match the target size
134 134
135 135 This is intended to be used on chunk that density slicing selected by that
136 136 are still too large compared to the read garantee of revlog. This might
137 137 happens when "minimal gap size" interrupted the slicing or when chain are
138 138 built in a way that create large blocks next to each other.
139 139
140 140 >>> revlog = _testrevlog([
141 141 ... 3, #0 (3)
142 142 ... 5, #1 (2)
143 143 ... 6, #2 (1)
144 144 ... 8, #3 (2)
145 145 ... 8, #4 (empty)
146 146 ... 11, #5 (3)
147 147 ... 12, #6 (1)
148 148 ... 13, #7 (1)
149 149 ... 14, #8 (1)
150 150 ... ])
151 151
152 152 Cases where chunk is already small enough
153 153 >>> list(_slicechunktosize(revlog, [0], 3))
154 154 [[0]]
155 155 >>> list(_slicechunktosize(revlog, [6, 7], 3))
156 156 [[6, 7]]
157 157 >>> list(_slicechunktosize(revlog, [0], None))
158 158 [[0]]
159 159 >>> list(_slicechunktosize(revlog, [6, 7], None))
160 160 [[6, 7]]
161 161
162 162 cases where we need actual slicing
163 163 >>> list(_slicechunktosize(revlog, [0, 1], 3))
164 164 [[0], [1]]
165 165 >>> list(_slicechunktosize(revlog, [1, 3], 3))
166 166 [[1], [3]]
167 167 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
168 168 [[1, 2], [3]]
169 169 >>> list(_slicechunktosize(revlog, [3, 5], 3))
170 170 [[3], [5]]
171 171 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
172 172 [[3], [5]]
173 173 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
174 174 [[5], [6, 7, 8]]
175 175 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
176 176 [[0], [1, 2], [3], [5], [6, 7, 8]]
177 177
178 178 Case with too large individual chunk (must return valid chunk)
179 179 >>> list(_slicechunktosize(revlog, [0, 1], 2))
180 180 [[0], [1]]
181 181 >>> list(_slicechunktosize(revlog, [1, 3], 1))
182 182 [[1], [3]]
183 183 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
184 184 [[3], [5]]
185 185 """
186 186 assert targetsize is None or 0 <= targetsize
187 187 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
188 188 yield revs
189 189 return
190 190
191 191 startrevidx = 0
192 192 startdata = revlog.start(revs[0])
193 193 endrevidx = 0
194 194 iterrevs = enumerate(revs)
195 195 next(iterrevs) # skip first rev.
196 196 for idx, r in iterrevs:
197 197 span = revlog.end(r) - startdata
198 198 if span <= targetsize:
199 199 endrevidx = idx
200 200 else:
201 201 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
202 202 if chunk:
203 203 yield chunk
204 204 startrevidx = idx
205 205 startdata = revlog.start(r)
206 206 endrevidx = idx
207 207 yield _trimchunk(revlog, revs, startrevidx)
208 208
209 209 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
210 210 mingapsize=0):
211 211 """slice revs to reduce the amount of unrelated data to be read from disk.
212 212
213 213 ``revs`` is sliced into groups that should be read in one time.
214 214 Assume that revs are sorted.
215 215
216 216 ``deltainfo`` is a _deltainfo instance of a revision that we would append
217 217 to the top of the revlog.
218 218
219 219 The initial chunk is sliced until the overall density (payload/chunks-span
220 220 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
221 221 skipped.
222 222
223 223 >>> revlog = _testrevlog([
224 224 ... 5, #00 (5)
225 225 ... 10, #01 (5)
226 226 ... 12, #02 (2)
227 227 ... 12, #03 (empty)
228 228 ... 27, #04 (15)
229 229 ... 31, #05 (4)
230 230 ... 31, #06 (empty)
231 231 ... 42, #07 (11)
232 232 ... 47, #08 (5)
233 233 ... 47, #09 (empty)
234 234 ... 48, #10 (1)
235 235 ... 51, #11 (3)
236 236 ... 74, #12 (23)
237 237 ... 85, #13 (11)
238 238 ... 86, #14 (1)
239 239 ... 91, #15 (5)
240 240 ... ])
241 241
242 242 >>> list(_slicechunktodensity(revlog, list(range(16))))
243 243 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
244 244 >>> list(_slicechunktodensity(revlog, [0, 15]))
245 245 [[0], [15]]
246 246 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
247 247 [[0], [11], [15]]
248 248 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
249 249 [[0], [11, 13, 15]]
250 250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
251 251 [[1, 2], [5, 8, 10, 11], [14]]
252 252 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
253 253 ... mingapsize=20))
254 254 [[1, 2, 3, 5, 8, 10, 11], [14]]
255 255 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
256 256 ... targetdensity=0.95))
257 257 [[1, 2], [5], [8, 10, 11], [14]]
258 258 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
259 259 ... targetdensity=0.95, mingapsize=12))
260 260 [[1, 2], [5, 8, 10, 11], [14]]
261 261 """
262 262 start = revlog.start
263 263 length = revlog.length
264 264
265 265 if len(revs) <= 1:
266 266 yield revs
267 267 return
268 268
269 269 nextrev = len(revlog)
270 270 nextoffset = revlog.end(nextrev - 1)
271 271
272 272 if deltainfo is None:
273 273 deltachainspan = segmentspan(revlog, revs)
274 274 chainpayload = sum(length(r) for r in revs)
275 275 else:
276 276 deltachainspan = deltainfo.distance
277 277 chainpayload = deltainfo.compresseddeltalen
278 278
279 279 if deltachainspan < mingapsize:
280 280 yield revs
281 281 return
282 282
283 283 readdata = deltachainspan
284 284
285 285 if deltachainspan:
286 286 density = chainpayload / float(deltachainspan)
287 287 else:
288 288 density = 1.0
289 289
290 290 if density >= targetdensity:
291 291 yield revs
292 292 return
293 293
294 294 if deltainfo is not None and deltainfo.deltalen:
295 295 revs = list(revs)
296 296 revs.append(nextrev)
297 297
298 298 # Store the gaps in a heap to have them sorted by decreasing size
299 299 gapsheap = []
300 300 heapq.heapify(gapsheap)
301 301 prevend = None
302 302 for i, rev in enumerate(revs):
303 303 if rev < nextrev:
304 304 revstart = start(rev)
305 305 revlen = length(rev)
306 306 else:
307 307 revstart = nextoffset
308 308 revlen = deltainfo.deltalen
309 309
310 310 # Skip empty revisions to form larger holes
311 311 if revlen == 0:
312 312 continue
313 313
314 314 if prevend is not None:
315 315 gapsize = revstart - prevend
316 316 # only consider holes that are large enough
317 317 if gapsize > mingapsize:
318 318 heapq.heappush(gapsheap, (-gapsize, i))
319 319
320 320 prevend = revstart + revlen
321 321
322 322 # Collect the indices of the largest holes until the density is acceptable
323 323 indicesheap = []
324 324 heapq.heapify(indicesheap)
325 325 while gapsheap and density < targetdensity:
326 326 oppgapsize, gapidx = heapq.heappop(gapsheap)
327 327
328 328 heapq.heappush(indicesheap, gapidx)
329 329
330 330 # the gap sizes are stored as negatives to be sorted decreasingly
331 331 # by the heap
332 332 readdata -= (-oppgapsize)
333 333 if readdata > 0:
334 334 density = chainpayload / float(readdata)
335 335 else:
336 336 density = 1.0
337 337
338 338 # Cut the revs at collected indices
339 339 previdx = 0
340 340 while indicesheap:
341 341 idx = heapq.heappop(indicesheap)
342 342
343 343 chunk = _trimchunk(revlog, revs, previdx, idx)
344 344 if chunk:
345 345 yield chunk
346 346
347 347 previdx = idx
348 348
349 349 chunk = _trimchunk(revlog, revs, previdx)
350 350 if chunk:
351 351 yield chunk
352 352
353 353 def _trimchunk(revlog, revs, startidx, endidx=None):
354 354 """returns revs[startidx:endidx] without empty trailing revs
355 355
356 356 Doctest Setup
357 357 >>> revlog = _testrevlog([
358 358 ... 5, #0
359 359 ... 10, #1
360 360 ... 12, #2
361 361 ... 12, #3 (empty)
362 362 ... 17, #4
363 363 ... 21, #5
364 364 ... 21, #6 (empty)
365 365 ... ])
366 366
367 367 Contiguous cases:
368 368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
369 369 [0, 1, 2, 3, 4, 5]
370 370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
371 371 [0, 1, 2, 3, 4]
372 372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
373 373 [0, 1, 2]
374 374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
375 375 [2]
376 376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
377 377 [3, 4, 5]
378 378 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
379 379 [3, 4]
380 380
381 381 Discontiguous cases:
382 382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
383 383 [1, 3, 5]
384 384 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
385 385 [1]
386 386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
387 387 [3, 5]
388 388 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
389 389 [3, 5]
390 390 """
391 391 length = revlog.length
392 392
393 393 if endidx is None:
394 394 endidx = len(revs)
395 395
396 396 # If we have a non-emtpy delta candidate, there are nothing to trim
397 397 if revs[endidx - 1] < len(revlog):
398 398 # Trim empty revs at the end, except the very first revision of a chain
399 399 while (endidx > 1
400 400 and endidx > startidx
401 401 and length(revs[endidx - 1]) == 0):
402 402 endidx -= 1
403 403
404 404 return revs[startidx:endidx]
405 405
406 406 def segmentspan(revlog, revs, deltainfo=None):
407 407 """Get the byte span of a segment of revisions
408 408
409 409 revs is a sorted array of revision numbers
410 410
411 411 >>> revlog = _testrevlog([
412 412 ... 5, #0
413 413 ... 10, #1
414 414 ... 12, #2
415 415 ... 12, #3 (empty)
416 416 ... 17, #4
417 417 ... ])
418 418
419 419 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
420 420 17
421 421 >>> segmentspan(revlog, [0, 4])
422 422 17
423 423 >>> segmentspan(revlog, [3, 4])
424 424 5
425 425 >>> segmentspan(revlog, [1, 2, 3,])
426 426 7
427 427 >>> segmentspan(revlog, [1, 3])
428 428 7
429 429 """
430 430 if not revs:
431 431 return 0
432 432 if deltainfo is not None and len(revlog) <= revs[-1]:
433 433 if len(revs) == 1:
434 434 return deltainfo.deltalen
435 435 offset = revlog.end(len(revlog) - 1)
436 436 end = deltainfo.deltalen + offset
437 437 else:
438 438 end = revlog.end(revs[-1])
439 439 return end - revlog.start(revs[0])
440 440
441 441 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
442 442 """build full text from a (base, delta) pair and other metadata"""
443 443 # special case deltas which replace entire base; no need to decode
444 444 # base revision. this neatly avoids censored bases, which throw when
445 445 # they're decoded.
446 446 hlen = struct.calcsize(">lll")
447 447 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
448 448 len(delta) - hlen):
449 449 fulltext = delta[hlen:]
450 450 else:
451 451 # deltabase is rawtext before changed by flag processors, which is
452 452 # equivalent to non-raw text
453 453 basetext = revlog.revision(baserev, _df=fh, raw=False)
454 454 fulltext = mdiff.patch(basetext, delta)
455 455
456 456 try:
457 457 res = revlog._processflags(fulltext, flags, 'read', raw=True)
458 458 fulltext, validatehash = res
459 459 if validatehash:
460 460 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
461 461 if flags & REVIDX_ISCENSORED:
462 462 raise RevlogError(_('node %s is not censored') % expectednode)
463 463 except CensoredNodeError:
464 464 # must pass the censored index flag to add censored revisions
465 465 if not flags & REVIDX_ISCENSORED:
466 466 raise
467 467 return fulltext
468 468
469 469 @attr.s(slots=True, frozen=True)
470 470 class _deltainfo(object):
471 471 distance = attr.ib()
472 472 deltalen = attr.ib()
473 473 data = attr.ib()
474 474 base = attr.ib()
475 475 chainbase = attr.ib()
476 476 chainlen = attr.ib()
477 477 compresseddeltalen = attr.ib()
478 478 snapshotdepth = attr.ib()
479 479
480 480 def isgooddeltainfo(revlog, deltainfo, revinfo):
481 481 """Returns True if the given delta is good. Good means that it is within
482 482 the disk span, disk size, and chain length bounds that we know to be
483 483 performant."""
484 484 if deltainfo is None:
485 485 return False
486 486
487 487 # - 'deltainfo.distance' is the distance from the base revision --
488 488 # bounding it limits the amount of I/O we need to do.
489 489 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
490 490 # deltas we need to apply -- bounding it limits the amount of CPU
491 491 # we consume.
492 492
493 493 if revlog._sparserevlog:
494 494 # As sparse-read will be used, we can consider that the distance,
495 495 # instead of being the span of the whole chunk,
496 496 # is the span of the largest read chunk
497 497 base = deltainfo.base
498 498
499 499 if base != nullrev:
500 500 deltachain = revlog._deltachain(base)[0]
501 501 else:
502 502 deltachain = []
503 503
504 504 # search for the first non-snapshot revision
505 505 for idx, r in enumerate(deltachain):
506 506 if not revlog.issnapshot(r):
507 507 break
508 508 deltachain = deltachain[idx:]
509 509 chunks = slicechunk(revlog, deltachain, deltainfo)
510 510 all_span = [segmentspan(revlog, revs, deltainfo)
511 511 for revs in chunks]
512 512 distance = max(all_span)
513 513 else:
514 514 distance = deltainfo.distance
515 515
516 516 textlen = revinfo.textlen
517 517 defaultmax = textlen * 4
518 518 maxdist = revlog._maxdeltachainspan
519 519 if not maxdist:
520 520 maxdist = distance # ensure the conditional pass
521 521 maxdist = max(maxdist, defaultmax)
522 522 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
523 523 # In multiple place, we are ignoring irrelevant data range below a
524 524 # certain size. Be also apply this tradeoff here and relax span
525 525 # constraint for small enought content.
526 526 maxdist = revlog._srmingapsize
527 527
528 528 # Bad delta from read span:
529 529 #
530 530 # If the span of data read is larger than the maximum allowed.
531 531 if maxdist < distance:
532 532 return False
533 533
534 534 # Bad delta from new delta size:
535 535 #
536 536 # If the delta size is larger than the target text, storing the
537 537 # delta will be inefficient.
538 538 if textlen < deltainfo.deltalen:
539 539 return False
540 540
541 541 # Bad delta from cumulated payload size:
542 542 #
543 543 # If the sum of delta get larger than K * target text length.
544 544 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
545 545 return False
546 546
547 547 # Bad delta from chain length:
548 548 #
549 549 # If the number of delta in the chain gets too high.
550 550 if (revlog._maxchainlen
551 551 and revlog._maxchainlen < deltainfo.chainlen):
552 552 return False
553 553
554 554 # bad delta from intermediate snapshot size limit
555 555 #
556 556 # If an intermediate snapshot size is higher than the limit. The
557 557 # limit exist to prevent endless chain of intermediate delta to be
558 558 # created.
559 559 if (deltainfo.snapshotdepth is not None and
560 560 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
561 561 return False
562 562
563 563 # bad delta if new intermediate snapshot is larger than the previous
564 564 # snapshot
565 565 if (deltainfo.snapshotdepth
566 566 and revlog.length(deltainfo.base) < deltainfo.deltalen):
567 567 return False
568 568
569 569 return True
570 570
571 571 class deltacomputer(object):
572 572 def __init__(self, revlog):
573 573 self.revlog = revlog
574 574
575 575 def _getcandidaterevs(self, p1, p2, cachedelta):
576 576 """
577 577 Provides revisions that present an interest to be diffed against,
578 578 grouped by level of easiness.
579 579 """
580 580 revlog = self.revlog
581 581 gdelta = revlog._generaldelta
582 582 curr = len(revlog)
583 583 prev = curr - 1
584 584 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
585 585
586 586 # should we try to build a delta?
587 587 if prev != nullrev and revlog._storedeltachains:
588 588 tested = set()
589 589 # This condition is true most of the time when processing
590 590 # changegroup data into a generaldelta repo. The only time it
591 591 # isn't true is if this is the first revision in a delta chain
592 592 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
593 593 if cachedelta and gdelta and revlog._lazydeltabase:
594 594 # Assume what we received from the server is a good choice
595 595 # build delta will reuse the cache
596 596 yield (cachedelta[0],)
597 597 tested.add(cachedelta[0])
598 598
599 599 if gdelta:
600 600 # exclude already lazy tested base if any
601 601 parents = [p for p in (p1r, p2r)
602 602 if p != nullrev and p not in tested]
603 603
604 604 if not revlog._deltabothparents and len(parents) == 2:
605 605 parents.sort()
606 606 # To minimize the chance of having to build a fulltext,
607 607 # pick first whichever parent is closest to us (max rev)
608 608 yield (parents[1],)
609 609 # then the other one (min rev) if the first did not fit
610 610 yield (parents[0],)
611 611 tested.update(parents)
612 612 elif len(parents) > 0:
613 613 # Test all parents (1 or 2), and keep the best candidate
614 614 yield parents
615 615 tested.update(parents)
616 616
617 617 if prev not in tested:
618 618 # other approach failed try against prev to hopefully save us a
619 619 # fulltext.
620 620 yield (prev,)
621 621 tested.add(prev)
622 622
623 623 def buildtext(self, revinfo, fh):
624 624 """Builds a fulltext version of a revision
625 625
626 626 revinfo: _revisioninfo instance that contains all needed info
627 627 fh: file handle to either the .i or the .d revlog file,
628 628 depending on whether it is inlined or not
629 629 """
630 630 btext = revinfo.btext
631 631 if btext[0] is not None:
632 632 return btext[0]
633 633
634 634 revlog = self.revlog
635 635 cachedelta = revinfo.cachedelta
636 636 baserev = cachedelta[0]
637 637 delta = cachedelta[1]
638 638
639 639 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
640 640 revinfo.p1, revinfo.p2,
641 641 revinfo.flags, revinfo.node)
642 642 return fulltext
643 643
644 644 def _builddeltadiff(self, base, revinfo, fh):
645 645 revlog = self.revlog
646 646 t = self.buildtext(revinfo, fh)
647 647 if revlog.iscensored(base):
648 648 # deltas based on a censored revision must replace the
649 649 # full content in one patch, so delta works everywhere
650 650 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
651 651 delta = header + t
652 652 else:
653 653 ptext = revlog.revision(base, _df=fh, raw=True)
654 654 delta = mdiff.textdiff(ptext, t)
655 655
656 656 return delta
657 657
658 658 def _builddeltainfo(self, revinfo, base, fh):
659 659 # can we use the cached delta?
660 660 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
661 661 delta = revinfo.cachedelta[1]
662 662 else:
663 663 delta = self._builddeltadiff(base, revinfo, fh)
664 664 revlog = self.revlog
665 665 header, data = revlog.compress(delta)
666 666 deltalen = len(header) + len(data)
667 667 chainbase = revlog.chainbase(base)
668 668 offset = revlog.end(len(revlog) - 1)
669 669 dist = deltalen + offset - revlog.start(chainbase)
670 670 if revlog._generaldelta:
671 671 deltabase = base
672 672 else:
673 673 deltabase = chainbase
674 674 chainlen, compresseddeltalen = revlog._chaininfo(base)
675 675 chainlen += 1
676 676 compresseddeltalen += deltalen
677 677
678 678 revlog = self.revlog
679 679 snapshotdepth = None
680 680 if deltabase == nullrev:
681 681 snapshotdepth = 0
682 682 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
683 683 # A delta chain should always be one full snapshot,
684 684 # zero or more semi-snapshots, and zero or more deltas
685 685 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
686 686 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
687 687 snapshotdepth = len(revlog._deltachain(deltabase)[0])
688 688
689 689 return _deltainfo(dist, deltalen, (header, data), deltabase,
690 690 chainbase, chainlen, compresseddeltalen,
691 691 snapshotdepth)
692 692
693 693 def finddeltainfo(self, revinfo, fh):
694 694 """Find an acceptable delta against a candidate revision
695 695
696 696 revinfo: information about the revision (instance of _revisioninfo)
697 697 fh: file handle to either the .i or the .d revlog file,
698 698 depending on whether it is inlined or not
699 699
700 700 Returns the first acceptable candidate revision, as ordered by
701 701 _getcandidaterevs
702 702 """
703 703 if not revinfo.textlen:
704 704 return None # empty file do not need delta
705 705
706 # no delta for flag processor revision (see "candelta" for why)
707 # not calling candelta since only one revision needs test, also to
708 # avoid overhead fetching flags again.
709 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
710 return None
711
706 712 cachedelta = revinfo.cachedelta
707 713 p1 = revinfo.p1
708 714 p2 = revinfo.p2
709 715 revlog = self.revlog
710 716
711 717 deltalength = self.revlog.length
712 718 deltaparent = self.revlog.deltaparent
713 719
714 720 deltainfo = None
715 721 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
716 722 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
717 723 # filter out delta base that will never produce good delta
718 724 candidaterevs = [r for r in candidaterevs
719 725 if self.revlog.length(r) <= deltas_limit]
720 726 nominateddeltas = []
721 727 for candidaterev in candidaterevs:
722 728 # skip over empty delta (no need to include them in a chain)
723 729 while candidaterev != nullrev and not deltalength(candidaterev):
724 730 candidaterev = deltaparent(candidaterev)
725 731 # no need to try a delta against nullid, this will be handled
726 732 # by fulltext later.
727 733 if candidaterev == nullrev:
728 734 continue
729 735 # no delta for rawtext-changing revs (see "candelta" for why)
730 736 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
731 737 continue
732 738 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
733 739 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
734 740 nominateddeltas.append(candidatedelta)
735 741 if nominateddeltas:
736 742 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
737 743 break
738 744
739 745 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now