##// END OF EJS Templates
py3: use util.forcebytestr() to convert error messages to bytes...
Pulkit Goyal -
r39944:d70e620e default
parent child Browse files
Show More
@@ -1,2699 +1,2700 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100
101 101 # Aliased for performance.
102 102 _zlibdecompress = zlib.decompress
103 103
104 104 # max size of revlog with inline data
105 105 _maxinline = 131072
106 106 _chunksize = 1048576
107 107
108 108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 109 _flagprocessors = {
110 110 REVIDX_ISCENSORED: None,
111 111 }
112 112
113 113 # Flag processors for REVIDX_ELLIPSIS.
114 114 def ellipsisreadprocessor(rl, text):
115 115 return text, False
116 116
117 117 def ellipsiswriteprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsisrawprocessor(rl, text):
121 121 return False
122 122
123 123 ellipsisprocessor = (
124 124 ellipsisreadprocessor,
125 125 ellipsiswriteprocessor,
126 126 ellipsisrawprocessor,
127 127 )
128 128
129 129 def addflagprocessor(flag, processor):
130 130 """Register a flag processor on a revision data flag.
131 131
132 132 Invariant:
133 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 135 - Only one flag processor can be registered on a specific flag.
136 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 137 following signatures:
138 138 - (read) f(self, rawtext) -> text, bool
139 139 - (write) f(self, text) -> rawtext, bool
140 140 - (raw) f(self, rawtext) -> bool
141 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 142 directly visible to the user.
143 143 The boolean returned by these transforms is used to determine whether
144 144 the returned text can be used for hash integrity checking. For example,
145 145 if "write" returns False, then "text" is used to generate hash. If
146 146 "write" returns True, that basically means "rawtext" returned by "write"
147 147 should be used to generate hash. Usually, "write" and "read" return
148 148 different booleans. And "raw" returns a same boolean as "write".
149 149
150 150 Note: The 'raw' transform is used for changegroup generation and in some
151 151 debug commands. In this case the transform only indicates whether the
152 152 contents can be used for hash integrity checks.
153 153 """
154 154 if not flag & REVIDX_KNOWN_FLAGS:
155 155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 156 raise error.ProgrammingError(msg)
157 157 if flag not in REVIDX_FLAGS_ORDER:
158 158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 159 raise error.ProgrammingError(msg)
160 160 if flag in _flagprocessors:
161 161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 162 raise error.Abort(msg)
163 163 _flagprocessors[flag] = processor
164 164
165 165 def getoffset(q):
166 166 return int(q >> 16)
167 167
168 168 def gettype(q):
169 169 return int(q & 0xFFFF)
170 170
171 171 def offset_type(offset, type):
172 172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 173 raise ValueError('unknown revlog index flags')
174 174 return int(int(offset) << 16 | type)
175 175
176 176 @attr.s(slots=True, frozen=True)
177 177 class _revisioninfo(object):
178 178 """Information about a revision that allows building its fulltext
179 179 node: expected hash of the revision
180 180 p1, p2: parent revs of the revision
181 181 btext: built text cache consisting of a one-element list
182 182 cachedelta: (baserev, uncompressed_delta) or None
183 183 flags: flags associated to the revision storage
184 184
185 185 One of btext[0] or cachedelta must be set.
186 186 """
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208 @interfaceutil.implementer(repository.iverifyproblem)
209 209 @attr.s(frozen=True)
210 210 class revlogproblem(object):
211 211 warning = attr.ib(default=None)
212 212 error = attr.ib(default=None)
213 213 node = attr.ib(default=None)
214 214
215 215 # index v0:
216 216 # 4 bytes: offset
217 217 # 4 bytes: compressed length
218 218 # 4 bytes: base rev
219 219 # 4 bytes: link rev
220 220 # 20 bytes: parent 1 nodeid
221 221 # 20 bytes: parent 2 nodeid
222 222 # 20 bytes: nodeid
223 223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 224 indexformatv0_pack = indexformatv0.pack
225 225 indexformatv0_unpack = indexformatv0.unpack
226 226
227 227 class revlogoldindex(list):
228 228 def __getitem__(self, i):
229 229 if i == -1:
230 230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 231 return list.__getitem__(self, i)
232 232
233 233 class revlogoldio(object):
234 234 def __init__(self):
235 235 self.size = indexformatv0.size
236 236
237 237 def parseindex(self, data, inline):
238 238 s = self.size
239 239 index = []
240 240 nodemap = {nullid: nullrev}
241 241 n = off = 0
242 242 l = len(data)
243 243 while off + s <= l:
244 244 cur = data[off:off + s]
245 245 off += s
246 246 e = indexformatv0_unpack(cur)
247 247 # transform to revlogv1 format
248 248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 250 index.append(e2)
251 251 nodemap[e[6]] = n
252 252 n += 1
253 253
254 254 return revlogoldindex(index), nodemap, None
255 255
256 256 def packentry(self, entry, node, version, rev):
257 257 if gettype(entry[0]):
258 258 raise error.RevlogError(_('index entry flags need revlog '
259 259 'version 1'))
260 260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 261 node(entry[5]), node(entry[6]), entry[7])
262 262 return indexformatv0_pack(*e2)
263 263
264 264 # index ng:
265 265 # 6 bytes: offset
266 266 # 2 bytes: flags
267 267 # 4 bytes: compressed length
268 268 # 4 bytes: uncompressed length
269 269 # 4 bytes: base rev
270 270 # 4 bytes: link rev
271 271 # 4 bytes: parent 1 rev
272 272 # 4 bytes: parent 2 rev
273 273 # 32 bytes: nodeid
274 274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 275 indexformatng_pack = indexformatng.pack
276 276 versionformat = struct.Struct(">I")
277 277 versionformat_pack = versionformat.pack
278 278 versionformat_unpack = versionformat.unpack
279 279
280 280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 281 # signed integer)
282 282 _maxentrysize = 0x7fffffff
283 283
284 284 class revlogio(object):
285 285 def __init__(self):
286 286 self.size = indexformatng.size
287 287
288 288 def parseindex(self, data, inline):
289 289 # call the C implementation to parse the index data
290 290 index, cache = parsers.parse_index2(data, inline)
291 291 return index, getattr(index, 'nodemap', None), cache
292 292
293 293 def packentry(self, entry, node, version, rev):
294 294 p = indexformatng_pack(*entry)
295 295 if rev == 0:
296 296 p = versionformat_pack(version) + p[4:]
297 297 return p
298 298
299 299 class revlog(object):
300 300 """
301 301 the underlying revision storage object
302 302
303 303 A revlog consists of two parts, an index and the revision data.
304 304
305 305 The index is a file with a fixed record size containing
306 306 information on each revision, including its nodeid (hash), the
307 307 nodeids of its parents, the position and offset of its data within
308 308 the data file, and the revision it's based on. Finally, each entry
309 309 contains a linkrev entry that can serve as a pointer to external
310 310 data.
311 311
312 312 The revision data itself is a linear collection of data chunks.
313 313 Each chunk represents a revision and is usually represented as a
314 314 delta against the previous chunk. To bound lookup time, runs of
315 315 deltas are limited to about 2 times the length of the original
316 316 version data. This makes retrieval of a version proportional to
317 317 its size, or O(1) relative to the number of revisions.
318 318
319 319 Both pieces of the revlog are written to in an append-only
320 320 fashion, which means we never need to rewrite a file to insert or
321 321 remove data, and can use some simple techniques to avoid the need
322 322 for locking while reading.
323 323
324 324 If checkambig, indexfile is opened with checkambig=True at
325 325 writing, to avoid file stat ambiguity.
326 326
327 327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 328 index will be mmapped rather than read if it is larger than the
329 329 configured threshold.
330 330
331 331 If censorable is True, the revlog can have censored revisions.
332 332 """
333 333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 334 mmaplargeindex=False, censorable=False):
335 335 """
336 336 create a revlog object
337 337
338 338 opener is a function that abstracts the file opening operation
339 339 and can be used to implement COW semantics or the like.
340 340 """
341 341 self.indexfile = indexfile
342 342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 343 self.opener = opener
344 344 # When True, indexfile is opened with checkambig=True at writing, to
345 345 # avoid file stat ambiguity.
346 346 self._checkambig = checkambig
347 347 self._censorable = censorable
348 348 # 3-tuple of (node, rev, text) for a raw revision.
349 349 self._cache = None
350 350 # Maps rev to chain base rev.
351 351 self._chainbasecache = util.lrucachedict(100)
352 352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 353 self._chunkcache = (0, '')
354 354 # How much data to read and cache into the raw revlog data cache.
355 355 self._chunkcachesize = 65536
356 356 self._maxchainlen = None
357 357 self._deltabothparents = True
358 358 self.index = []
359 359 # Mapping of partial identifiers to full nodes.
360 360 self._pcache = {}
361 361 # Mapping of revision integer to full node.
362 362 self._nodecache = {nullid: nullrev}
363 363 self._nodepos = None
364 364 self._compengine = 'zlib'
365 365 self._maxdeltachainspan = -1
366 366 self._withsparseread = False
367 367 self._sparserevlog = False
368 368 self._srdensitythreshold = 0.50
369 369 self._srmingapsize = 262144
370 370
371 371 # Make copy of flag processors so each revlog instance can support
372 372 # custom flags.
373 373 self._flagprocessors = dict(_flagprocessors)
374 374
375 375 mmapindexthreshold = None
376 376 v = REVLOG_DEFAULT_VERSION
377 377 opts = getattr(opener, 'options', None)
378 378 if opts is not None:
379 379 if 'revlogv2' in opts:
380 380 # version 2 revlogs always use generaldelta.
381 381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 382 elif 'revlogv1' in opts:
383 383 if 'generaldelta' in opts:
384 384 v |= FLAG_GENERALDELTA
385 385 else:
386 386 v = 0
387 387 if 'chunkcachesize' in opts:
388 388 self._chunkcachesize = opts['chunkcachesize']
389 389 if 'maxchainlen' in opts:
390 390 self._maxchainlen = opts['maxchainlen']
391 391 if 'deltabothparents' in opts:
392 392 self._deltabothparents = opts['deltabothparents']
393 393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 394 if 'compengine' in opts:
395 395 self._compengine = opts['compengine']
396 396 if 'maxdeltachainspan' in opts:
397 397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 399 mmapindexthreshold = opts['mmapindexthreshold']
400 400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 401 withsparseread = bool(opts.get('with-sparse-read', False))
402 402 # sparse-revlog forces sparse-read
403 403 self._withsparseread = self._sparserevlog or withsparseread
404 404 if 'sparse-read-density-threshold' in opts:
405 405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 406 if 'sparse-read-min-gap-size' in opts:
407 407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 408 if opts.get('enableellipsis'):
409 409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410 410
411 411 if self._chunkcachesize <= 0:
412 412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 413 'greater than 0') % self._chunkcachesize)
414 414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 416 'power of 2') % self._chunkcachesize)
417 417
418 418 indexdata = ''
419 419 self._initempty = True
420 420 try:
421 421 with self._indexfp() as f:
422 422 if (mmapindexthreshold is not None and
423 423 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 424 indexdata = util.buffer(util.mmapread(f))
425 425 else:
426 426 indexdata = f.read()
427 427 if len(indexdata) > 0:
428 428 v = versionformat_unpack(indexdata[:4])[0]
429 429 self._initempty = False
430 430 except IOError as inst:
431 431 if inst.errno != errno.ENOENT:
432 432 raise
433 433
434 434 self.version = v
435 435 self._inline = v & FLAG_INLINE_DATA
436 436 self._generaldelta = v & FLAG_GENERALDELTA
437 437 flags = v & ~0xFFFF
438 438 fmt = v & 0xFFFF
439 439 if fmt == REVLOGV0:
440 440 if flags:
441 441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 442 'revlog %s') %
443 443 (flags >> 16, fmt, self.indexfile))
444 444 elif fmt == REVLOGV1:
445 445 if flags & ~REVLOGV1_FLAGS:
446 446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
447 447 'revlog %s') %
448 448 (flags >> 16, fmt, self.indexfile))
449 449 elif fmt == REVLOGV2:
450 450 if flags & ~REVLOGV2_FLAGS:
451 451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 452 'revlog %s') %
453 453 (flags >> 16, fmt, self.indexfile))
454 454 else:
455 455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
456 456 (fmt, self.indexfile))
457 457
458 458 self._storedeltachains = True
459 459
460 460 self._io = revlogio()
461 461 if self.version == REVLOGV0:
462 462 self._io = revlogoldio()
463 463 try:
464 464 d = self._io.parseindex(indexdata, self._inline)
465 465 except (ValueError, IndexError):
466 466 raise error.RevlogError(_("index %s is corrupted") %
467 467 self.indexfile)
468 468 self.index, nodemap, self._chunkcache = d
469 469 if nodemap is not None:
470 470 self.nodemap = self._nodecache = nodemap
471 471 if not self._chunkcache:
472 472 self._chunkclear()
473 473 # revnum -> (chain-length, sum-delta-length)
474 474 self._chaininfocache = {}
475 475 # revlog header -> revlog compressor
476 476 self._decompressors = {}
477 477
478 478 @util.propertycache
479 479 def _compressor(self):
480 480 return util.compengines[self._compengine].revlogcompressor()
481 481
482 482 def _indexfp(self, mode='r'):
483 483 """file object for the revlog's index file"""
484 484 args = {r'mode': mode}
485 485 if mode != 'r':
486 486 args[r'checkambig'] = self._checkambig
487 487 if mode == 'w':
488 488 args[r'atomictemp'] = True
489 489 return self.opener(self.indexfile, **args)
490 490
491 491 def _datafp(self, mode='r'):
492 492 """file object for the revlog's data file"""
493 493 return self.opener(self.datafile, mode=mode)
494 494
495 495 @contextlib.contextmanager
496 496 def _datareadfp(self, existingfp=None):
497 497 """file object suitable to read data"""
498 498 if existingfp is not None:
499 499 yield existingfp
500 500 else:
501 501 if self._inline:
502 502 func = self._indexfp
503 503 else:
504 504 func = self._datafp
505 505 with func() as fp:
506 506 yield fp
507 507
508 508 def tip(self):
509 509 return self.node(len(self.index) - 1)
510 510 def __contains__(self, rev):
511 511 return 0 <= rev < len(self)
512 512 def __len__(self):
513 513 return len(self.index)
514 514 def __iter__(self):
515 515 return iter(pycompat.xrange(len(self)))
516 516 def revs(self, start=0, stop=None):
517 517 """iterate over all rev in this revlog (from start to stop)"""
518 518 return storageutil.iterrevs(len(self), start=start, stop=stop)
519 519
520 520 @util.propertycache
521 521 def nodemap(self):
522 522 if self.index:
523 523 # populate mapping down to the initial node
524 524 node0 = self.index[0][7] # get around changelog filtering
525 525 self.rev(node0)
526 526 return self._nodecache
527 527
528 528 def hasnode(self, node):
529 529 try:
530 530 self.rev(node)
531 531 return True
532 532 except KeyError:
533 533 return False
534 534
535 535 def candelta(self, baserev, rev):
536 536 """whether two revisions (baserev, rev) can be delta-ed or not"""
537 537 # Disable delta if either rev requires a content-changing flag
538 538 # processor (ex. LFS). This is because such flag processor can alter
539 539 # the rawtext content that the delta will be based on, and two clients
540 540 # could have a same revlog node with different flags (i.e. different
541 541 # rawtext contents) and the delta could be incompatible.
542 542 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
543 543 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
544 544 return False
545 545 return True
546 546
547 547 def clearcaches(self):
548 548 self._cache = None
549 549 self._chainbasecache.clear()
550 550 self._chunkcache = (0, '')
551 551 self._pcache = {}
552 552
553 553 try:
554 554 self._nodecache.clearcaches()
555 555 except AttributeError:
556 556 self._nodecache = {nullid: nullrev}
557 557 self._nodepos = None
558 558
559 559 def rev(self, node):
560 560 try:
561 561 return self._nodecache[node]
562 562 except TypeError:
563 563 raise
564 564 except error.RevlogError:
565 565 # parsers.c radix tree lookup failed
566 566 if node == wdirid or node in wdirfilenodeids:
567 567 raise error.WdirUnsupported
568 568 raise error.LookupError(node, self.indexfile, _('no node'))
569 569 except KeyError:
570 570 # pure python cache lookup failed
571 571 n = self._nodecache
572 572 i = self.index
573 573 p = self._nodepos
574 574 if p is None:
575 575 p = len(i) - 1
576 576 else:
577 577 assert p < len(i)
578 578 for r in pycompat.xrange(p, -1, -1):
579 579 v = i[r][7]
580 580 n[v] = r
581 581 if v == node:
582 582 self._nodepos = r - 1
583 583 return r
584 584 if node == wdirid or node in wdirfilenodeids:
585 585 raise error.WdirUnsupported
586 586 raise error.LookupError(node, self.indexfile, _('no node'))
587 587
588 588 # Accessors for index entries.
589 589
590 590 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
591 591 # are flags.
592 592 def start(self, rev):
593 593 return int(self.index[rev][0] >> 16)
594 594
595 595 def flags(self, rev):
596 596 return self.index[rev][0] & 0xFFFF
597 597
598 598 def length(self, rev):
599 599 return self.index[rev][1]
600 600
601 601 def rawsize(self, rev):
602 602 """return the length of the uncompressed text for a given revision"""
603 603 l = self.index[rev][2]
604 604 if l >= 0:
605 605 return l
606 606
607 607 t = self.revision(rev, raw=True)
608 608 return len(t)
609 609
610 610 def size(self, rev):
611 611 """length of non-raw text (processed by a "read" flag processor)"""
612 612 # fast path: if no "read" flag processor could change the content,
613 613 # size is rawsize. note: ELLIPSIS is known to not change the content.
614 614 flags = self.flags(rev)
615 615 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
616 616 return self.rawsize(rev)
617 617
618 618 return len(self.revision(rev, raw=False))
619 619
620 620 def chainbase(self, rev):
621 621 base = self._chainbasecache.get(rev)
622 622 if base is not None:
623 623 return base
624 624
625 625 index = self.index
626 626 iterrev = rev
627 627 base = index[iterrev][3]
628 628 while base != iterrev:
629 629 iterrev = base
630 630 base = index[iterrev][3]
631 631
632 632 self._chainbasecache[rev] = base
633 633 return base
634 634
635 635 def linkrev(self, rev):
636 636 return self.index[rev][4]
637 637
638 638 def parentrevs(self, rev):
639 639 try:
640 640 entry = self.index[rev]
641 641 except IndexError:
642 642 if rev == wdirrev:
643 643 raise error.WdirUnsupported
644 644 raise
645 645
646 646 return entry[5], entry[6]
647 647
648 648 def node(self, rev):
649 649 try:
650 650 return self.index[rev][7]
651 651 except IndexError:
652 652 if rev == wdirrev:
653 653 raise error.WdirUnsupported
654 654 raise
655 655
656 656 # Derived from index values.
657 657
658 658 def end(self, rev):
659 659 return self.start(rev) + self.length(rev)
660 660
661 661 def parents(self, node):
662 662 i = self.index
663 663 d = i[self.rev(node)]
664 664 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
665 665
666 666 def chainlen(self, rev):
667 667 return self._chaininfo(rev)[0]
668 668
669 669 def _chaininfo(self, rev):
670 670 chaininfocache = self._chaininfocache
671 671 if rev in chaininfocache:
672 672 return chaininfocache[rev]
673 673 index = self.index
674 674 generaldelta = self._generaldelta
675 675 iterrev = rev
676 676 e = index[iterrev]
677 677 clen = 0
678 678 compresseddeltalen = 0
679 679 while iterrev != e[3]:
680 680 clen += 1
681 681 compresseddeltalen += e[1]
682 682 if generaldelta:
683 683 iterrev = e[3]
684 684 else:
685 685 iterrev -= 1
686 686 if iterrev in chaininfocache:
687 687 t = chaininfocache[iterrev]
688 688 clen += t[0]
689 689 compresseddeltalen += t[1]
690 690 break
691 691 e = index[iterrev]
692 692 else:
693 693 # Add text length of base since decompressing that also takes
694 694 # work. For cache hits the length is already included.
695 695 compresseddeltalen += e[1]
696 696 r = (clen, compresseddeltalen)
697 697 chaininfocache[rev] = r
698 698 return r
699 699
700 700 def _deltachain(self, rev, stoprev=None):
701 701 """Obtain the delta chain for a revision.
702 702
703 703 ``stoprev`` specifies a revision to stop at. If not specified, we
704 704 stop at the base of the chain.
705 705
706 706 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
707 707 revs in ascending order and ``stopped`` is a bool indicating whether
708 708 ``stoprev`` was hit.
709 709 """
710 710 # Try C implementation.
711 711 try:
712 712 return self.index.deltachain(rev, stoprev, self._generaldelta)
713 713 except AttributeError:
714 714 pass
715 715
716 716 chain = []
717 717
718 718 # Alias to prevent attribute lookup in tight loop.
719 719 index = self.index
720 720 generaldelta = self._generaldelta
721 721
722 722 iterrev = rev
723 723 e = index[iterrev]
724 724 while iterrev != e[3] and iterrev != stoprev:
725 725 chain.append(iterrev)
726 726 if generaldelta:
727 727 iterrev = e[3]
728 728 else:
729 729 iterrev -= 1
730 730 e = index[iterrev]
731 731
732 732 if iterrev == stoprev:
733 733 stopped = True
734 734 else:
735 735 chain.append(iterrev)
736 736 stopped = False
737 737
738 738 chain.reverse()
739 739 return chain, stopped
740 740
741 741 def ancestors(self, revs, stoprev=0, inclusive=False):
742 742 """Generate the ancestors of 'revs' in reverse topological order.
743 743 Does not generate revs lower than stoprev.
744 744
745 745 See the documentation for ancestor.lazyancestors for more details."""
746 746
747 747 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
748 748 inclusive=inclusive)
749 749
750 750 def descendants(self, revs):
751 751 """Generate the descendants of 'revs' in revision order.
752 752
753 753 Yield a sequence of revision numbers starting with a child of
754 754 some rev in revs, i.e., each revision is *not* considered a
755 755 descendant of itself. Results are ordered by revision number (a
756 756 topological sort)."""
757 757 first = min(revs)
758 758 if first == nullrev:
759 759 for i in self:
760 760 yield i
761 761 return
762 762
763 763 seen = set(revs)
764 764 for i in self.revs(start=first + 1):
765 765 for x in self.parentrevs(i):
766 766 if x != nullrev and x in seen:
767 767 seen.add(i)
768 768 yield i
769 769 break
770 770
771 771 def findcommonmissing(self, common=None, heads=None):
772 772 """Return a tuple of the ancestors of common and the ancestors of heads
773 773 that are not ancestors of common. In revset terminology, we return the
774 774 tuple:
775 775
776 776 ::common, (::heads) - (::common)
777 777
778 778 The list is sorted by revision number, meaning it is
779 779 topologically sorted.
780 780
781 781 'heads' and 'common' are both lists of node IDs. If heads is
782 782 not supplied, uses all of the revlog's heads. If common is not
783 783 supplied, uses nullid."""
784 784 if common is None:
785 785 common = [nullid]
786 786 if heads is None:
787 787 heads = self.heads()
788 788
789 789 common = [self.rev(n) for n in common]
790 790 heads = [self.rev(n) for n in heads]
791 791
792 792 # we want the ancestors, but inclusive
793 793 class lazyset(object):
794 794 def __init__(self, lazyvalues):
795 795 self.addedvalues = set()
796 796 self.lazyvalues = lazyvalues
797 797
798 798 def __contains__(self, value):
799 799 return value in self.addedvalues or value in self.lazyvalues
800 800
801 801 def __iter__(self):
802 802 added = self.addedvalues
803 803 for r in added:
804 804 yield r
805 805 for r in self.lazyvalues:
806 806 if not r in added:
807 807 yield r
808 808
809 809 def add(self, value):
810 810 self.addedvalues.add(value)
811 811
812 812 def update(self, values):
813 813 self.addedvalues.update(values)
814 814
815 815 has = lazyset(self.ancestors(common))
816 816 has.add(nullrev)
817 817 has.update(common)
818 818
819 819 # take all ancestors from heads that aren't in has
820 820 missing = set()
821 821 visit = collections.deque(r for r in heads if r not in has)
822 822 while visit:
823 823 r = visit.popleft()
824 824 if r in missing:
825 825 continue
826 826 else:
827 827 missing.add(r)
828 828 for p in self.parentrevs(r):
829 829 if p not in has:
830 830 visit.append(p)
831 831 missing = list(missing)
832 832 missing.sort()
833 833 return has, [self.node(miss) for miss in missing]
834 834
835 835 def incrementalmissingrevs(self, common=None):
836 836 """Return an object that can be used to incrementally compute the
837 837 revision numbers of the ancestors of arbitrary sets that are not
838 838 ancestors of common. This is an ancestor.incrementalmissingancestors
839 839 object.
840 840
841 841 'common' is a list of revision numbers. If common is not supplied, uses
842 842 nullrev.
843 843 """
844 844 if common is None:
845 845 common = [nullrev]
846 846
847 847 return ancestor.incrementalmissingancestors(self.parentrevs, common)
848 848
849 849 def findmissingrevs(self, common=None, heads=None):
850 850 """Return the revision numbers of the ancestors of heads that
851 851 are not ancestors of common.
852 852
853 853 More specifically, return a list of revision numbers corresponding to
854 854 nodes N such that every N satisfies the following constraints:
855 855
856 856 1. N is an ancestor of some node in 'heads'
857 857 2. N is not an ancestor of any node in 'common'
858 858
859 859 The list is sorted by revision number, meaning it is
860 860 topologically sorted.
861 861
862 862 'heads' and 'common' are both lists of revision numbers. If heads is
863 863 not supplied, uses all of the revlog's heads. If common is not
864 864 supplied, uses nullid."""
865 865 if common is None:
866 866 common = [nullrev]
867 867 if heads is None:
868 868 heads = self.headrevs()
869 869
870 870 inc = self.incrementalmissingrevs(common=common)
871 871 return inc.missingancestors(heads)
872 872
873 873 def findmissing(self, common=None, heads=None):
874 874 """Return the ancestors of heads that are not ancestors of common.
875 875
876 876 More specifically, return a list of nodes N such that every N
877 877 satisfies the following constraints:
878 878
879 879 1. N is an ancestor of some node in 'heads'
880 880 2. N is not an ancestor of any node in 'common'
881 881
882 882 The list is sorted by revision number, meaning it is
883 883 topologically sorted.
884 884
885 885 'heads' and 'common' are both lists of node IDs. If heads is
886 886 not supplied, uses all of the revlog's heads. If common is not
887 887 supplied, uses nullid."""
888 888 if common is None:
889 889 common = [nullid]
890 890 if heads is None:
891 891 heads = self.heads()
892 892
893 893 common = [self.rev(n) for n in common]
894 894 heads = [self.rev(n) for n in heads]
895 895
896 896 inc = self.incrementalmissingrevs(common=common)
897 897 return [self.node(r) for r in inc.missingancestors(heads)]
898 898
899 899 def nodesbetween(self, roots=None, heads=None):
900 900 """Return a topological path from 'roots' to 'heads'.
901 901
902 902 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
903 903 topologically sorted list of all nodes N that satisfy both of
904 904 these constraints:
905 905
906 906 1. N is a descendant of some node in 'roots'
907 907 2. N is an ancestor of some node in 'heads'
908 908
909 909 Every node is considered to be both a descendant and an ancestor
910 910 of itself, so every reachable node in 'roots' and 'heads' will be
911 911 included in 'nodes'.
912 912
913 913 'outroots' is the list of reachable nodes in 'roots', i.e., the
914 914 subset of 'roots' that is returned in 'nodes'. Likewise,
915 915 'outheads' is the subset of 'heads' that is also in 'nodes'.
916 916
917 917 'roots' and 'heads' are both lists of node IDs. If 'roots' is
918 918 unspecified, uses nullid as the only root. If 'heads' is
919 919 unspecified, uses list of all of the revlog's heads."""
920 920 nonodes = ([], [], [])
921 921 if roots is not None:
922 922 roots = list(roots)
923 923 if not roots:
924 924 return nonodes
925 925 lowestrev = min([self.rev(n) for n in roots])
926 926 else:
927 927 roots = [nullid] # Everybody's a descendant of nullid
928 928 lowestrev = nullrev
929 929 if (lowestrev == nullrev) and (heads is None):
930 930 # We want _all_ the nodes!
931 931 return ([self.node(r) for r in self], [nullid], list(self.heads()))
932 932 if heads is None:
933 933 # All nodes are ancestors, so the latest ancestor is the last
934 934 # node.
935 935 highestrev = len(self) - 1
936 936 # Set ancestors to None to signal that every node is an ancestor.
937 937 ancestors = None
938 938 # Set heads to an empty dictionary for later discovery of heads
939 939 heads = {}
940 940 else:
941 941 heads = list(heads)
942 942 if not heads:
943 943 return nonodes
944 944 ancestors = set()
945 945 # Turn heads into a dictionary so we can remove 'fake' heads.
946 946 # Also, later we will be using it to filter out the heads we can't
947 947 # find from roots.
948 948 heads = dict.fromkeys(heads, False)
949 949 # Start at the top and keep marking parents until we're done.
950 950 nodestotag = set(heads)
951 951 # Remember where the top was so we can use it as a limit later.
952 952 highestrev = max([self.rev(n) for n in nodestotag])
953 953 while nodestotag:
954 954 # grab a node to tag
955 955 n = nodestotag.pop()
956 956 # Never tag nullid
957 957 if n == nullid:
958 958 continue
959 959 # A node's revision number represents its place in a
960 960 # topologically sorted list of nodes.
961 961 r = self.rev(n)
962 962 if r >= lowestrev:
963 963 if n not in ancestors:
964 964 # If we are possibly a descendant of one of the roots
965 965 # and we haven't already been marked as an ancestor
966 966 ancestors.add(n) # Mark as ancestor
967 967 # Add non-nullid parents to list of nodes to tag.
968 968 nodestotag.update([p for p in self.parents(n) if
969 969 p != nullid])
970 970 elif n in heads: # We've seen it before, is it a fake head?
971 971 # So it is, real heads should not be the ancestors of
972 972 # any other heads.
973 973 heads.pop(n)
974 974 if not ancestors:
975 975 return nonodes
976 976 # Now that we have our set of ancestors, we want to remove any
977 977 # roots that are not ancestors.
978 978
979 979 # If one of the roots was nullid, everything is included anyway.
980 980 if lowestrev > nullrev:
981 981 # But, since we weren't, let's recompute the lowest rev to not
982 982 # include roots that aren't ancestors.
983 983
984 984 # Filter out roots that aren't ancestors of heads
985 985 roots = [root for root in roots if root in ancestors]
986 986 # Recompute the lowest revision
987 987 if roots:
988 988 lowestrev = min([self.rev(root) for root in roots])
989 989 else:
990 990 # No more roots? Return empty list
991 991 return nonodes
992 992 else:
993 993 # We are descending from nullid, and don't need to care about
994 994 # any other roots.
995 995 lowestrev = nullrev
996 996 roots = [nullid]
997 997 # Transform our roots list into a set.
998 998 descendants = set(roots)
999 999 # Also, keep the original roots so we can filter out roots that aren't
1000 1000 # 'real' roots (i.e. are descended from other roots).
1001 1001 roots = descendants.copy()
1002 1002 # Our topologically sorted list of output nodes.
1003 1003 orderedout = []
1004 1004 # Don't start at nullid since we don't want nullid in our output list,
1005 1005 # and if nullid shows up in descendants, empty parents will look like
1006 1006 # they're descendants.
1007 1007 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1008 1008 n = self.node(r)
1009 1009 isdescendant = False
1010 1010 if lowestrev == nullrev: # Everybody is a descendant of nullid
1011 1011 isdescendant = True
1012 1012 elif n in descendants:
1013 1013 # n is already a descendant
1014 1014 isdescendant = True
1015 1015 # This check only needs to be done here because all the roots
1016 1016 # will start being marked is descendants before the loop.
1017 1017 if n in roots:
1018 1018 # If n was a root, check if it's a 'real' root.
1019 1019 p = tuple(self.parents(n))
1020 1020 # If any of its parents are descendants, it's not a root.
1021 1021 if (p[0] in descendants) or (p[1] in descendants):
1022 1022 roots.remove(n)
1023 1023 else:
1024 1024 p = tuple(self.parents(n))
1025 1025 # A node is a descendant if either of its parents are
1026 1026 # descendants. (We seeded the dependents list with the roots
1027 1027 # up there, remember?)
1028 1028 if (p[0] in descendants) or (p[1] in descendants):
1029 1029 descendants.add(n)
1030 1030 isdescendant = True
1031 1031 if isdescendant and ((ancestors is None) or (n in ancestors)):
1032 1032 # Only include nodes that are both descendants and ancestors.
1033 1033 orderedout.append(n)
1034 1034 if (ancestors is not None) and (n in heads):
1035 1035 # We're trying to figure out which heads are reachable
1036 1036 # from roots.
1037 1037 # Mark this head as having been reached
1038 1038 heads[n] = True
1039 1039 elif ancestors is None:
1040 1040 # Otherwise, we're trying to discover the heads.
1041 1041 # Assume this is a head because if it isn't, the next step
1042 1042 # will eventually remove it.
1043 1043 heads[n] = True
1044 1044 # But, obviously its parents aren't.
1045 1045 for p in self.parents(n):
1046 1046 heads.pop(p, None)
1047 1047 heads = [head for head, flag in heads.iteritems() if flag]
1048 1048 roots = list(roots)
1049 1049 assert orderedout
1050 1050 assert roots
1051 1051 assert heads
1052 1052 return (orderedout, roots, heads)
1053 1053
1054 1054 def headrevs(self):
1055 1055 try:
1056 1056 return self.index.headrevs()
1057 1057 except AttributeError:
1058 1058 return self._headrevs()
1059 1059
1060 1060 def computephases(self, roots):
1061 1061 return self.index.computephasesmapsets(roots)
1062 1062
1063 1063 def _headrevs(self):
1064 1064 count = len(self)
1065 1065 if not count:
1066 1066 return [nullrev]
1067 1067 # we won't iter over filtered rev so nobody is a head at start
1068 1068 ishead = [0] * (count + 1)
1069 1069 index = self.index
1070 1070 for r in self:
1071 1071 ishead[r] = 1 # I may be an head
1072 1072 e = index[r]
1073 1073 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1074 1074 return [r for r, val in enumerate(ishead) if val]
1075 1075
1076 1076 def heads(self, start=None, stop=None):
1077 1077 """return the list of all nodes that have no children
1078 1078
1079 1079 if start is specified, only heads that are descendants of
1080 1080 start will be returned
1081 1081 if stop is specified, it will consider all the revs from stop
1082 1082 as if they had no children
1083 1083 """
1084 1084 if start is None and stop is None:
1085 1085 if not len(self):
1086 1086 return [nullid]
1087 1087 return [self.node(r) for r in self.headrevs()]
1088 1088
1089 1089 if start is None:
1090 1090 start = nullid
1091 1091 if stop is None:
1092 1092 stop = []
1093 1093 stoprevs = set([self.rev(n) for n in stop])
1094 1094 startrev = self.rev(start)
1095 1095 reachable = {startrev}
1096 1096 heads = {startrev}
1097 1097
1098 1098 parentrevs = self.parentrevs
1099 1099 for r in self.revs(start=startrev + 1):
1100 1100 for p in parentrevs(r):
1101 1101 if p in reachable:
1102 1102 if r not in stoprevs:
1103 1103 reachable.add(r)
1104 1104 heads.add(r)
1105 1105 if p in heads and p not in stoprevs:
1106 1106 heads.remove(p)
1107 1107
1108 1108 return [self.node(r) for r in heads]
1109 1109
1110 1110 def children(self, node):
1111 1111 """find the children of a given node"""
1112 1112 c = []
1113 1113 p = self.rev(node)
1114 1114 for r in self.revs(start=p + 1):
1115 1115 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1116 1116 if prevs:
1117 1117 for pr in prevs:
1118 1118 if pr == p:
1119 1119 c.append(self.node(r))
1120 1120 elif p == nullrev:
1121 1121 c.append(self.node(r))
1122 1122 return c
1123 1123
1124 1124 def commonancestorsheads(self, a, b):
1125 1125 """calculate all the heads of the common ancestors of nodes a and b"""
1126 1126 a, b = self.rev(a), self.rev(b)
1127 1127 ancs = self._commonancestorsheads(a, b)
1128 1128 return pycompat.maplist(self.node, ancs)
1129 1129
1130 1130 def _commonancestorsheads(self, *revs):
1131 1131 """calculate all the heads of the common ancestors of revs"""
1132 1132 try:
1133 1133 ancs = self.index.commonancestorsheads(*revs)
1134 1134 except (AttributeError, OverflowError): # C implementation failed
1135 1135 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1136 1136 return ancs
1137 1137
1138 1138 def isancestor(self, a, b):
1139 1139 """return True if node a is an ancestor of node b
1140 1140
1141 1141 A revision is considered an ancestor of itself."""
1142 1142 a, b = self.rev(a), self.rev(b)
1143 1143 return self.isancestorrev(a, b)
1144 1144
1145 1145 def isancestorrev(self, a, b):
1146 1146 """return True if revision a is an ancestor of revision b
1147 1147
1148 1148 A revision is considered an ancestor of itself.
1149 1149
1150 1150 The implementation of this is trivial but the use of
1151 1151 commonancestorsheads is not."""
1152 1152 if a == nullrev:
1153 1153 return True
1154 1154 elif a == b:
1155 1155 return True
1156 1156 elif a > b:
1157 1157 return False
1158 1158 return a in self._commonancestorsheads(a, b)
1159 1159
1160 1160 def ancestor(self, a, b):
1161 1161 """calculate the "best" common ancestor of nodes a and b"""
1162 1162
1163 1163 a, b = self.rev(a), self.rev(b)
1164 1164 try:
1165 1165 ancs = self.index.ancestors(a, b)
1166 1166 except (AttributeError, OverflowError):
1167 1167 ancs = ancestor.ancestors(self.parentrevs, a, b)
1168 1168 if ancs:
1169 1169 # choose a consistent winner when there's a tie
1170 1170 return min(map(self.node, ancs))
1171 1171 return nullid
1172 1172
1173 1173 def _match(self, id):
1174 1174 if isinstance(id, int):
1175 1175 # rev
1176 1176 return self.node(id)
1177 1177 if len(id) == 20:
1178 1178 # possibly a binary node
1179 1179 # odds of a binary node being all hex in ASCII are 1 in 10**25
1180 1180 try:
1181 1181 node = id
1182 1182 self.rev(node) # quick search the index
1183 1183 return node
1184 1184 except error.LookupError:
1185 1185 pass # may be partial hex id
1186 1186 try:
1187 1187 # str(rev)
1188 1188 rev = int(id)
1189 1189 if "%d" % rev != id:
1190 1190 raise ValueError
1191 1191 if rev < 0:
1192 1192 rev = len(self) + rev
1193 1193 if rev < 0 or rev >= len(self):
1194 1194 raise ValueError
1195 1195 return self.node(rev)
1196 1196 except (ValueError, OverflowError):
1197 1197 pass
1198 1198 if len(id) == 40:
1199 1199 try:
1200 1200 # a full hex nodeid?
1201 1201 node = bin(id)
1202 1202 self.rev(node)
1203 1203 return node
1204 1204 except (TypeError, error.LookupError):
1205 1205 pass
1206 1206
1207 1207 def _partialmatch(self, id):
1208 1208 # we don't care wdirfilenodeids as they should be always full hash
1209 1209 maybewdir = wdirhex.startswith(id)
1210 1210 try:
1211 1211 partial = self.index.partialmatch(id)
1212 1212 if partial and self.hasnode(partial):
1213 1213 if maybewdir:
1214 1214 # single 'ff...' match in radix tree, ambiguous with wdir
1215 1215 raise error.RevlogError
1216 1216 return partial
1217 1217 if maybewdir:
1218 1218 # no 'ff...' match in radix tree, wdir identified
1219 1219 raise error.WdirUnsupported
1220 1220 return None
1221 1221 except error.RevlogError:
1222 1222 # parsers.c radix tree lookup gave multiple matches
1223 1223 # fast path: for unfiltered changelog, radix tree is accurate
1224 1224 if not getattr(self, 'filteredrevs', None):
1225 1225 raise error.AmbiguousPrefixLookupError(
1226 1226 id, self.indexfile, _('ambiguous identifier'))
1227 1227 # fall through to slow path that filters hidden revisions
1228 1228 except (AttributeError, ValueError):
1229 1229 # we are pure python, or key was too short to search radix tree
1230 1230 pass
1231 1231
1232 1232 if id in self._pcache:
1233 1233 return self._pcache[id]
1234 1234
1235 1235 if len(id) <= 40:
1236 1236 try:
1237 1237 # hex(node)[:...]
1238 1238 l = len(id) // 2 # grab an even number of digits
1239 1239 prefix = bin(id[:l * 2])
1240 1240 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1241 1241 nl = [n for n in nl if hex(n).startswith(id) and
1242 1242 self.hasnode(n)]
1243 1243 if nullhex.startswith(id):
1244 1244 nl.append(nullid)
1245 1245 if len(nl) > 0:
1246 1246 if len(nl) == 1 and not maybewdir:
1247 1247 self._pcache[id] = nl[0]
1248 1248 return nl[0]
1249 1249 raise error.AmbiguousPrefixLookupError(
1250 1250 id, self.indexfile, _('ambiguous identifier'))
1251 1251 if maybewdir:
1252 1252 raise error.WdirUnsupported
1253 1253 return None
1254 1254 except TypeError:
1255 1255 pass
1256 1256
1257 1257 def lookup(self, id):
1258 1258 """locate a node based on:
1259 1259 - revision number or str(revision number)
1260 1260 - nodeid or subset of hex nodeid
1261 1261 """
1262 1262 n = self._match(id)
1263 1263 if n is not None:
1264 1264 return n
1265 1265 n = self._partialmatch(id)
1266 1266 if n:
1267 1267 return n
1268 1268
1269 1269 raise error.LookupError(id, self.indexfile, _('no match found'))
1270 1270
1271 1271 def shortest(self, node, minlength=1):
1272 1272 """Find the shortest unambiguous prefix that matches node."""
1273 1273 def isvalid(prefix):
1274 1274 try:
1275 1275 node = self._partialmatch(prefix)
1276 1276 except error.AmbiguousPrefixLookupError:
1277 1277 return False
1278 1278 except error.WdirUnsupported:
1279 1279 # single 'ff...' match
1280 1280 return True
1281 1281 if node is None:
1282 1282 raise error.LookupError(node, self.indexfile, _('no node'))
1283 1283 return True
1284 1284
1285 1285 def maybewdir(prefix):
1286 1286 return all(c == 'f' for c in prefix)
1287 1287
1288 1288 hexnode = hex(node)
1289 1289
1290 1290 def disambiguate(hexnode, minlength):
1291 1291 """Disambiguate against wdirid."""
1292 1292 for length in range(minlength, 41):
1293 1293 prefix = hexnode[:length]
1294 1294 if not maybewdir(prefix):
1295 1295 return prefix
1296 1296
1297 1297 if not getattr(self, 'filteredrevs', None):
1298 1298 try:
1299 1299 length = max(self.index.shortest(node), minlength)
1300 1300 return disambiguate(hexnode, length)
1301 1301 except error.RevlogError:
1302 1302 if node != wdirid:
1303 1303 raise error.LookupError(node, self.indexfile, _('no node'))
1304 1304 except AttributeError:
1305 1305 # Fall through to pure code
1306 1306 pass
1307 1307
1308 1308 if node == wdirid:
1309 1309 for length in range(minlength, 41):
1310 1310 prefix = hexnode[:length]
1311 1311 if isvalid(prefix):
1312 1312 return prefix
1313 1313
1314 1314 for length in range(minlength, 41):
1315 1315 prefix = hexnode[:length]
1316 1316 if isvalid(prefix):
1317 1317 return disambiguate(hexnode, length)
1318 1318
1319 1319 def cmp(self, node, text):
1320 1320 """compare text with a given file revision
1321 1321
1322 1322 returns True if text is different than what is stored.
1323 1323 """
1324 1324 p1, p2 = self.parents(node)
1325 1325 return storageutil.hashrevisionsha1(text, p1, p2) != node
1326 1326
1327 1327 def _cachesegment(self, offset, data):
1328 1328 """Add a segment to the revlog cache.
1329 1329
1330 1330 Accepts an absolute offset and the data that is at that location.
1331 1331 """
1332 1332 o, d = self._chunkcache
1333 1333 # try to add to existing cache
1334 1334 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1335 1335 self._chunkcache = o, d + data
1336 1336 else:
1337 1337 self._chunkcache = offset, data
1338 1338
1339 1339 def _readsegment(self, offset, length, df=None):
1340 1340 """Load a segment of raw data from the revlog.
1341 1341
1342 1342 Accepts an absolute offset, length to read, and an optional existing
1343 1343 file handle to read from.
1344 1344
1345 1345 If an existing file handle is passed, it will be seeked and the
1346 1346 original seek position will NOT be restored.
1347 1347
1348 1348 Returns a str or buffer of raw byte data.
1349 1349 """
1350 1350 # Cache data both forward and backward around the requested
1351 1351 # data, in a fixed size window. This helps speed up operations
1352 1352 # involving reading the revlog backwards.
1353 1353 cachesize = self._chunkcachesize
1354 1354 realoffset = offset & ~(cachesize - 1)
1355 1355 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1356 1356 - realoffset)
1357 1357 with self._datareadfp(df) as df:
1358 1358 df.seek(realoffset)
1359 1359 d = df.read(reallength)
1360 1360 self._cachesegment(realoffset, d)
1361 1361 if offset != realoffset or reallength != length:
1362 1362 return util.buffer(d, offset - realoffset, length)
1363 1363 return d
1364 1364
1365 1365 def _getsegment(self, offset, length, df=None):
1366 1366 """Obtain a segment of raw data from the revlog.
1367 1367
1368 1368 Accepts an absolute offset, length of bytes to obtain, and an
1369 1369 optional file handle to the already-opened revlog. If the file
1370 1370 handle is used, it's original seek position will not be preserved.
1371 1371
1372 1372 Requests for data may be returned from a cache.
1373 1373
1374 1374 Returns a str or a buffer instance of raw byte data.
1375 1375 """
1376 1376 o, d = self._chunkcache
1377 1377 l = len(d)
1378 1378
1379 1379 # is it in the cache?
1380 1380 cachestart = offset - o
1381 1381 cacheend = cachestart + length
1382 1382 if cachestart >= 0 and cacheend <= l:
1383 1383 if cachestart == 0 and cacheend == l:
1384 1384 return d # avoid a copy
1385 1385 return util.buffer(d, cachestart, cacheend - cachestart)
1386 1386
1387 1387 return self._readsegment(offset, length, df=df)
1388 1388
1389 1389 def _getsegmentforrevs(self, startrev, endrev, df=None):
1390 1390 """Obtain a segment of raw data corresponding to a range of revisions.
1391 1391
1392 1392 Accepts the start and end revisions and an optional already-open
1393 1393 file handle to be used for reading. If the file handle is read, its
1394 1394 seek position will not be preserved.
1395 1395
1396 1396 Requests for data may be satisfied by a cache.
1397 1397
1398 1398 Returns a 2-tuple of (offset, data) for the requested range of
1399 1399 revisions. Offset is the integer offset from the beginning of the
1400 1400 revlog and data is a str or buffer of the raw byte data.
1401 1401
1402 1402 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1403 1403 to determine where each revision's data begins and ends.
1404 1404 """
1405 1405 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1406 1406 # (functions are expensive).
1407 1407 index = self.index
1408 1408 istart = index[startrev]
1409 1409 start = int(istart[0] >> 16)
1410 1410 if startrev == endrev:
1411 1411 end = start + istart[1]
1412 1412 else:
1413 1413 iend = index[endrev]
1414 1414 end = int(iend[0] >> 16) + iend[1]
1415 1415
1416 1416 if self._inline:
1417 1417 start += (startrev + 1) * self._io.size
1418 1418 end += (endrev + 1) * self._io.size
1419 1419 length = end - start
1420 1420
1421 1421 return start, self._getsegment(start, length, df=df)
1422 1422
1423 1423 def _chunk(self, rev, df=None):
1424 1424 """Obtain a single decompressed chunk for a revision.
1425 1425
1426 1426 Accepts an integer revision and an optional already-open file handle
1427 1427 to be used for reading. If used, the seek position of the file will not
1428 1428 be preserved.
1429 1429
1430 1430 Returns a str holding uncompressed data for the requested revision.
1431 1431 """
1432 1432 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1433 1433
1434 1434 def _chunks(self, revs, df=None, targetsize=None):
1435 1435 """Obtain decompressed chunks for the specified revisions.
1436 1436
1437 1437 Accepts an iterable of numeric revisions that are assumed to be in
1438 1438 ascending order. Also accepts an optional already-open file handle
1439 1439 to be used for reading. If used, the seek position of the file will
1440 1440 not be preserved.
1441 1441
1442 1442 This function is similar to calling ``self._chunk()`` multiple times,
1443 1443 but is faster.
1444 1444
1445 1445 Returns a list with decompressed data for each requested revision.
1446 1446 """
1447 1447 if not revs:
1448 1448 return []
1449 1449 start = self.start
1450 1450 length = self.length
1451 1451 inline = self._inline
1452 1452 iosize = self._io.size
1453 1453 buffer = util.buffer
1454 1454
1455 1455 l = []
1456 1456 ladd = l.append
1457 1457
1458 1458 if not self._withsparseread:
1459 1459 slicedchunks = (revs,)
1460 1460 else:
1461 1461 slicedchunks = deltautil.slicechunk(self, revs,
1462 1462 targetsize=targetsize)
1463 1463
1464 1464 for revschunk in slicedchunks:
1465 1465 firstrev = revschunk[0]
1466 1466 # Skip trailing revisions with empty diff
1467 1467 for lastrev in revschunk[::-1]:
1468 1468 if length(lastrev) != 0:
1469 1469 break
1470 1470
1471 1471 try:
1472 1472 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1473 1473 except OverflowError:
1474 1474 # issue4215 - we can't cache a run of chunks greater than
1475 1475 # 2G on Windows
1476 1476 return [self._chunk(rev, df=df) for rev in revschunk]
1477 1477
1478 1478 decomp = self.decompress
1479 1479 for rev in revschunk:
1480 1480 chunkstart = start(rev)
1481 1481 if inline:
1482 1482 chunkstart += (rev + 1) * iosize
1483 1483 chunklength = length(rev)
1484 1484 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1485 1485
1486 1486 return l
1487 1487
1488 1488 def _chunkclear(self):
1489 1489 """Clear the raw chunk cache."""
1490 1490 self._chunkcache = (0, '')
1491 1491
1492 1492 def deltaparent(self, rev):
1493 1493 """return deltaparent of the given revision"""
1494 1494 base = self.index[rev][3]
1495 1495 if base == rev:
1496 1496 return nullrev
1497 1497 elif self._generaldelta:
1498 1498 return base
1499 1499 else:
1500 1500 return rev - 1
1501 1501
1502 1502 def issnapshot(self, rev):
1503 1503 """tells whether rev is a snapshot
1504 1504 """
1505 1505 if rev == nullrev:
1506 1506 return True
1507 1507 deltap = self.deltaparent(rev)
1508 1508 if deltap == nullrev:
1509 1509 return True
1510 1510 p1, p2 = self.parentrevs(rev)
1511 1511 if deltap in (p1, p2):
1512 1512 return False
1513 1513 return self.issnapshot(deltap)
1514 1514
1515 1515 def snapshotdepth(self, rev):
1516 1516 """number of snapshot in the chain before this one"""
1517 1517 if not self.issnapshot(rev):
1518 1518 raise error.ProgrammingError('revision %d not a snapshot')
1519 1519 return len(self._deltachain(rev)[0]) - 1
1520 1520
1521 1521 def revdiff(self, rev1, rev2):
1522 1522 """return or calculate a delta between two revisions
1523 1523
1524 1524 The delta calculated is in binary form and is intended to be written to
1525 1525 revlog data directly. So this function needs raw revision data.
1526 1526 """
1527 1527 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1528 1528 return bytes(self._chunk(rev2))
1529 1529
1530 1530 return mdiff.textdiff(self.revision(rev1, raw=True),
1531 1531 self.revision(rev2, raw=True))
1532 1532
1533 1533 def revision(self, nodeorrev, _df=None, raw=False):
1534 1534 """return an uncompressed revision of a given node or revision
1535 1535 number.
1536 1536
1537 1537 _df - an existing file handle to read from. (internal-only)
1538 1538 raw - an optional argument specifying if the revision data is to be
1539 1539 treated as raw data when applying flag transforms. 'raw' should be set
1540 1540 to True when generating changegroups or in debug commands.
1541 1541 """
1542 1542 if isinstance(nodeorrev, int):
1543 1543 rev = nodeorrev
1544 1544 node = self.node(rev)
1545 1545 else:
1546 1546 node = nodeorrev
1547 1547 rev = None
1548 1548
1549 1549 cachedrev = None
1550 1550 flags = None
1551 1551 rawtext = None
1552 1552 if node == nullid:
1553 1553 return ""
1554 1554 if self._cache:
1555 1555 if self._cache[0] == node:
1556 1556 # _cache only stores rawtext
1557 1557 if raw:
1558 1558 return self._cache[2]
1559 1559 # duplicated, but good for perf
1560 1560 if rev is None:
1561 1561 rev = self.rev(node)
1562 1562 if flags is None:
1563 1563 flags = self.flags(rev)
1564 1564 # no extra flags set, no flag processor runs, text = rawtext
1565 1565 if flags == REVIDX_DEFAULT_FLAGS:
1566 1566 return self._cache[2]
1567 1567 # rawtext is reusable. need to run flag processor
1568 1568 rawtext = self._cache[2]
1569 1569
1570 1570 cachedrev = self._cache[1]
1571 1571
1572 1572 # look up what we need to read
1573 1573 if rawtext is None:
1574 1574 if rev is None:
1575 1575 rev = self.rev(node)
1576 1576
1577 1577 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1578 1578 if stopped:
1579 1579 rawtext = self._cache[2]
1580 1580
1581 1581 # drop cache to save memory
1582 1582 self._cache = None
1583 1583
1584 1584 targetsize = None
1585 1585 rawsize = self.index[rev][2]
1586 1586 if 0 <= rawsize:
1587 1587 targetsize = 4 * rawsize
1588 1588
1589 1589 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1590 1590 if rawtext is None:
1591 1591 rawtext = bytes(bins[0])
1592 1592 bins = bins[1:]
1593 1593
1594 1594 rawtext = mdiff.patches(rawtext, bins)
1595 1595 self._cache = (node, rev, rawtext)
1596 1596
1597 1597 if flags is None:
1598 1598 if rev is None:
1599 1599 rev = self.rev(node)
1600 1600 flags = self.flags(rev)
1601 1601
1602 1602 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1603 1603 if validatehash:
1604 1604 self.checkhash(text, node, rev=rev)
1605 1605
1606 1606 return text
1607 1607
1608 1608 def hash(self, text, p1, p2):
1609 1609 """Compute a node hash.
1610 1610
1611 1611 Available as a function so that subclasses can replace the hash
1612 1612 as needed.
1613 1613 """
1614 1614 return storageutil.hashrevisionsha1(text, p1, p2)
1615 1615
1616 1616 def _processflags(self, text, flags, operation, raw=False):
1617 1617 """Inspect revision data flags and applies transforms defined by
1618 1618 registered flag processors.
1619 1619
1620 1620 ``text`` - the revision data to process
1621 1621 ``flags`` - the revision flags
1622 1622 ``operation`` - the operation being performed (read or write)
1623 1623 ``raw`` - an optional argument describing if the raw transform should be
1624 1624 applied.
1625 1625
1626 1626 This method processes the flags in the order (or reverse order if
1627 1627 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1628 1628 flag processors registered for present flags. The order of flags defined
1629 1629 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1630 1630
1631 1631 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1632 1632 processed text and ``validatehash`` is a bool indicating whether the
1633 1633 returned text should be checked for hash integrity.
1634 1634
1635 1635 Note: If the ``raw`` argument is set, it has precedence over the
1636 1636 operation and will only update the value of ``validatehash``.
1637 1637 """
1638 1638 # fast path: no flag processors will run
1639 1639 if flags == 0:
1640 1640 return text, True
1641 1641 if not operation in ('read', 'write'):
1642 1642 raise error.ProgrammingError(_("invalid '%s' operation") %
1643 1643 operation)
1644 1644 # Check all flags are known.
1645 1645 if flags & ~REVIDX_KNOWN_FLAGS:
1646 1646 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1647 1647 (flags & ~REVIDX_KNOWN_FLAGS))
1648 1648 validatehash = True
1649 1649 # Depending on the operation (read or write), the order might be
1650 1650 # reversed due to non-commutative transforms.
1651 1651 orderedflags = REVIDX_FLAGS_ORDER
1652 1652 if operation == 'write':
1653 1653 orderedflags = reversed(orderedflags)
1654 1654
1655 1655 for flag in orderedflags:
1656 1656 # If a flagprocessor has been registered for a known flag, apply the
1657 1657 # related operation transform and update result tuple.
1658 1658 if flag & flags:
1659 1659 vhash = True
1660 1660
1661 1661 if flag not in self._flagprocessors:
1662 1662 message = _("missing processor for flag '%#x'") % (flag)
1663 1663 raise error.RevlogError(message)
1664 1664
1665 1665 processor = self._flagprocessors[flag]
1666 1666 if processor is not None:
1667 1667 readtransform, writetransform, rawtransform = processor
1668 1668
1669 1669 if raw:
1670 1670 vhash = rawtransform(self, text)
1671 1671 elif operation == 'read':
1672 1672 text, vhash = readtransform(self, text)
1673 1673 else: # write operation
1674 1674 text, vhash = writetransform(self, text)
1675 1675 validatehash = validatehash and vhash
1676 1676
1677 1677 return text, validatehash
1678 1678
1679 1679 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1680 1680 """Check node hash integrity.
1681 1681
1682 1682 Available as a function so that subclasses can extend hash mismatch
1683 1683 behaviors as needed.
1684 1684 """
1685 1685 try:
1686 1686 if p1 is None and p2 is None:
1687 1687 p1, p2 = self.parents(node)
1688 1688 if node != self.hash(text, p1, p2):
1689 1689 revornode = rev
1690 1690 if revornode is None:
1691 1691 revornode = templatefilters.short(hex(node))
1692 1692 raise error.RevlogError(_("integrity check failed on %s:%s")
1693 1693 % (self.indexfile, pycompat.bytestr(revornode)))
1694 1694 except error.RevlogError:
1695 1695 if self._censorable and storageutil.iscensoredtext(text):
1696 1696 raise error.CensoredNodeError(self.indexfile, node, text)
1697 1697 raise
1698 1698
1699 1699 def _enforceinlinesize(self, tr, fp=None):
1700 1700 """Check if the revlog is too big for inline and convert if so.
1701 1701
1702 1702 This should be called after revisions are added to the revlog. If the
1703 1703 revlog has grown too large to be an inline revlog, it will convert it
1704 1704 to use multiple index and data files.
1705 1705 """
1706 1706 tiprev = len(self) - 1
1707 1707 if (not self._inline or
1708 1708 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1709 1709 return
1710 1710
1711 1711 trinfo = tr.find(self.indexfile)
1712 1712 if trinfo is None:
1713 1713 raise error.RevlogError(_("%s not found in the transaction")
1714 1714 % self.indexfile)
1715 1715
1716 1716 trindex = trinfo[2]
1717 1717 if trindex is not None:
1718 1718 dataoff = self.start(trindex)
1719 1719 else:
1720 1720 # revlog was stripped at start of transaction, use all leftover data
1721 1721 trindex = len(self) - 1
1722 1722 dataoff = self.end(tiprev)
1723 1723
1724 1724 tr.add(self.datafile, dataoff)
1725 1725
1726 1726 if fp:
1727 1727 fp.flush()
1728 1728 fp.close()
1729 1729
1730 1730 with self._datafp('w') as df:
1731 1731 for r in self:
1732 1732 df.write(self._getsegmentforrevs(r, r)[1])
1733 1733
1734 1734 with self._indexfp('w') as fp:
1735 1735 self.version &= ~FLAG_INLINE_DATA
1736 1736 self._inline = False
1737 1737 io = self._io
1738 1738 for i in self:
1739 1739 e = io.packentry(self.index[i], self.node, self.version, i)
1740 1740 fp.write(e)
1741 1741
1742 1742 # the temp file replace the real index when we exit the context
1743 1743 # manager
1744 1744
1745 1745 tr.replace(self.indexfile, trindex * self._io.size)
1746 1746 self._chunkclear()
1747 1747
1748 1748 def _nodeduplicatecallback(self, transaction, node):
1749 1749 """called when trying to add a node already stored.
1750 1750 """
1751 1751
1752 1752 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1753 1753 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1754 1754 """add a revision to the log
1755 1755
1756 1756 text - the revision data to add
1757 1757 transaction - the transaction object used for rollback
1758 1758 link - the linkrev data to add
1759 1759 p1, p2 - the parent nodeids of the revision
1760 1760 cachedelta - an optional precomputed delta
1761 1761 node - nodeid of revision; typically node is not specified, and it is
1762 1762 computed by default as hash(text, p1, p2), however subclasses might
1763 1763 use different hashing method (and override checkhash() in such case)
1764 1764 flags - the known flags to set on the revision
1765 1765 deltacomputer - an optional deltacomputer instance shared between
1766 1766 multiple calls
1767 1767 """
1768 1768 if link == nullrev:
1769 1769 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1770 1770 % self.indexfile)
1771 1771
1772 1772 if flags:
1773 1773 node = node or self.hash(text, p1, p2)
1774 1774
1775 1775 rawtext, validatehash = self._processflags(text, flags, 'write')
1776 1776
1777 1777 # If the flag processor modifies the revision data, ignore any provided
1778 1778 # cachedelta.
1779 1779 if rawtext != text:
1780 1780 cachedelta = None
1781 1781
1782 1782 if len(rawtext) > _maxentrysize:
1783 1783 raise error.RevlogError(
1784 1784 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1785 1785 % (self.indexfile, len(rawtext)))
1786 1786
1787 1787 node = node or self.hash(rawtext, p1, p2)
1788 1788 if node in self.nodemap:
1789 1789 return node
1790 1790
1791 1791 if validatehash:
1792 1792 self.checkhash(rawtext, node, p1=p1, p2=p2)
1793 1793
1794 1794 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1795 1795 flags, cachedelta=cachedelta,
1796 1796 deltacomputer=deltacomputer)
1797 1797
1798 1798 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1799 1799 cachedelta=None, deltacomputer=None):
1800 1800 """add a raw revision with known flags, node and parents
1801 1801 useful when reusing a revision not stored in this revlog (ex: received
1802 1802 over wire, or read from an external bundle).
1803 1803 """
1804 1804 dfh = None
1805 1805 if not self._inline:
1806 1806 dfh = self._datafp("a+")
1807 1807 ifh = self._indexfp("a+")
1808 1808 try:
1809 1809 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1810 1810 flags, cachedelta, ifh, dfh,
1811 1811 deltacomputer=deltacomputer)
1812 1812 finally:
1813 1813 if dfh:
1814 1814 dfh.close()
1815 1815 ifh.close()
1816 1816
1817 1817 def compress(self, data):
1818 1818 """Generate a possibly-compressed representation of data."""
1819 1819 if not data:
1820 1820 return '', data
1821 1821
1822 1822 compressed = self._compressor.compress(data)
1823 1823
1824 1824 if compressed:
1825 1825 # The revlog compressor added the header in the returned data.
1826 1826 return '', compressed
1827 1827
1828 1828 if data[0:1] == '\0':
1829 1829 return '', data
1830 1830 return 'u', data
1831 1831
1832 1832 def decompress(self, data):
1833 1833 """Decompress a revlog chunk.
1834 1834
1835 1835 The chunk is expected to begin with a header identifying the
1836 1836 format type so it can be routed to an appropriate decompressor.
1837 1837 """
1838 1838 if not data:
1839 1839 return data
1840 1840
1841 1841 # Revlogs are read much more frequently than they are written and many
1842 1842 # chunks only take microseconds to decompress, so performance is
1843 1843 # important here.
1844 1844 #
1845 1845 # We can make a few assumptions about revlogs:
1846 1846 #
1847 1847 # 1) the majority of chunks will be compressed (as opposed to inline
1848 1848 # raw data).
1849 1849 # 2) decompressing *any* data will likely by at least 10x slower than
1850 1850 # returning raw inline data.
1851 1851 # 3) we want to prioritize common and officially supported compression
1852 1852 # engines
1853 1853 #
1854 1854 # It follows that we want to optimize for "decompress compressed data
1855 1855 # when encoded with common and officially supported compression engines"
1856 1856 # case over "raw data" and "data encoded by less common or non-official
1857 1857 # compression engines." That is why we have the inline lookup first
1858 1858 # followed by the compengines lookup.
1859 1859 #
1860 1860 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1861 1861 # compressed chunks. And this matters for changelog and manifest reads.
1862 1862 t = data[0:1]
1863 1863
1864 1864 if t == 'x':
1865 1865 try:
1866 1866 return _zlibdecompress(data)
1867 1867 except zlib.error as e:
1868 1868 raise error.RevlogError(_('revlog decompress error: %s') %
1869 1869 stringutil.forcebytestr(e))
1870 1870 # '\0' is more common than 'u' so it goes first.
1871 1871 elif t == '\0':
1872 1872 return data
1873 1873 elif t == 'u':
1874 1874 return util.buffer(data, 1)
1875 1875
1876 1876 try:
1877 1877 compressor = self._decompressors[t]
1878 1878 except KeyError:
1879 1879 try:
1880 1880 engine = util.compengines.forrevlogheader(t)
1881 1881 compressor = engine.revlogcompressor()
1882 1882 self._decompressors[t] = compressor
1883 1883 except KeyError:
1884 1884 raise error.RevlogError(_('unknown compression type %r') % t)
1885 1885
1886 1886 return compressor.decompress(data)
1887 1887
1888 1888 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1889 1889 cachedelta, ifh, dfh, alwayscache=False,
1890 1890 deltacomputer=None):
1891 1891 """internal function to add revisions to the log
1892 1892
1893 1893 see addrevision for argument descriptions.
1894 1894
1895 1895 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1896 1896
1897 1897 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1898 1898 be used.
1899 1899
1900 1900 invariants:
1901 1901 - rawtext is optional (can be None); if not set, cachedelta must be set.
1902 1902 if both are set, they must correspond to each other.
1903 1903 """
1904 1904 if node == nullid:
1905 1905 raise error.RevlogError(_("%s: attempt to add null revision") %
1906 1906 self.indexfile)
1907 1907 if node == wdirid or node in wdirfilenodeids:
1908 1908 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1909 1909 self.indexfile)
1910 1910
1911 1911 if self._inline:
1912 1912 fh = ifh
1913 1913 else:
1914 1914 fh = dfh
1915 1915
1916 1916 btext = [rawtext]
1917 1917
1918 1918 curr = len(self)
1919 1919 prev = curr - 1
1920 1920 offset = self.end(prev)
1921 1921 p1r, p2r = self.rev(p1), self.rev(p2)
1922 1922
1923 1923 # full versions are inserted when the needed deltas
1924 1924 # become comparable to the uncompressed text
1925 1925 if rawtext is None:
1926 1926 # need rawtext size, before changed by flag processors, which is
1927 1927 # the non-raw size. use revlog explicitly to avoid filelog's extra
1928 1928 # logic that might remove metadata size.
1929 1929 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1930 1930 cachedelta[1])
1931 1931 else:
1932 1932 textlen = len(rawtext)
1933 1933
1934 1934 if deltacomputer is None:
1935 1935 deltacomputer = deltautil.deltacomputer(self)
1936 1936
1937 1937 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1938 1938
1939 1939 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1940 1940
1941 1941 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1942 1942 deltainfo.base, link, p1r, p2r, node)
1943 1943 self.index.append(e)
1944 1944 self.nodemap[node] = curr
1945 1945
1946 1946 entry = self._io.packentry(e, self.node, self.version, curr)
1947 1947 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1948 1948 link, offset)
1949 1949
1950 1950 rawtext = btext[0]
1951 1951
1952 1952 if alwayscache and rawtext is None:
1953 1953 rawtext = deltacomputer.buildtext(revinfo, fh)
1954 1954
1955 1955 if type(rawtext) == bytes: # only accept immutable objects
1956 1956 self._cache = (node, curr, rawtext)
1957 1957 self._chainbasecache[curr] = deltainfo.chainbase
1958 1958 return node
1959 1959
1960 1960 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1961 1961 # Files opened in a+ mode have inconsistent behavior on various
1962 1962 # platforms. Windows requires that a file positioning call be made
1963 1963 # when the file handle transitions between reads and writes. See
1964 1964 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1965 1965 # platforms, Python or the platform itself can be buggy. Some versions
1966 1966 # of Solaris have been observed to not append at the end of the file
1967 1967 # if the file was seeked to before the end. See issue4943 for more.
1968 1968 #
1969 1969 # We work around this issue by inserting a seek() before writing.
1970 1970 # Note: This is likely not necessary on Python 3.
1971 1971 ifh.seek(0, os.SEEK_END)
1972 1972 if dfh:
1973 1973 dfh.seek(0, os.SEEK_END)
1974 1974
1975 1975 curr = len(self) - 1
1976 1976 if not self._inline:
1977 1977 transaction.add(self.datafile, offset)
1978 1978 transaction.add(self.indexfile, curr * len(entry))
1979 1979 if data[0]:
1980 1980 dfh.write(data[0])
1981 1981 dfh.write(data[1])
1982 1982 ifh.write(entry)
1983 1983 else:
1984 1984 offset += curr * self._io.size
1985 1985 transaction.add(self.indexfile, offset, curr)
1986 1986 ifh.write(entry)
1987 1987 ifh.write(data[0])
1988 1988 ifh.write(data[1])
1989 1989 self._enforceinlinesize(transaction, ifh)
1990 1990
1991 1991 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1992 1992 """
1993 1993 add a delta group
1994 1994
1995 1995 given a set of deltas, add them to the revision log. the
1996 1996 first delta is against its parent, which should be in our
1997 1997 log, the rest are against the previous delta.
1998 1998
1999 1999 If ``addrevisioncb`` is defined, it will be called with arguments of
2000 2000 this revlog and the node that was added.
2001 2001 """
2002 2002
2003 2003 nodes = []
2004 2004
2005 2005 r = len(self)
2006 2006 end = 0
2007 2007 if r:
2008 2008 end = self.end(r - 1)
2009 2009 ifh = self._indexfp("a+")
2010 2010 isize = r * self._io.size
2011 2011 if self._inline:
2012 2012 transaction.add(self.indexfile, end + isize, r)
2013 2013 dfh = None
2014 2014 else:
2015 2015 transaction.add(self.indexfile, isize, r)
2016 2016 transaction.add(self.datafile, end)
2017 2017 dfh = self._datafp("a+")
2018 2018 def flush():
2019 2019 if dfh:
2020 2020 dfh.flush()
2021 2021 ifh.flush()
2022 2022 try:
2023 2023 deltacomputer = deltautil.deltacomputer(self)
2024 2024 # loop through our set of deltas
2025 2025 for data in deltas:
2026 2026 node, p1, p2, linknode, deltabase, delta, flags = data
2027 2027 link = linkmapper(linknode)
2028 2028 flags = flags or REVIDX_DEFAULT_FLAGS
2029 2029
2030 2030 nodes.append(node)
2031 2031
2032 2032 if node in self.nodemap:
2033 2033 self._nodeduplicatecallback(transaction, node)
2034 2034 # this can happen if two branches make the same change
2035 2035 continue
2036 2036
2037 2037 for p in (p1, p2):
2038 2038 if p not in self.nodemap:
2039 2039 raise error.LookupError(p, self.indexfile,
2040 2040 _('unknown parent'))
2041 2041
2042 2042 if deltabase not in self.nodemap:
2043 2043 raise error.LookupError(deltabase, self.indexfile,
2044 2044 _('unknown delta base'))
2045 2045
2046 2046 baserev = self.rev(deltabase)
2047 2047
2048 2048 if baserev != nullrev and self.iscensored(baserev):
2049 2049 # if base is censored, delta must be full replacement in a
2050 2050 # single patch operation
2051 2051 hlen = struct.calcsize(">lll")
2052 2052 oldlen = self.rawsize(baserev)
2053 2053 newlen = len(delta) - hlen
2054 2054 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2055 2055 raise error.CensoredBaseError(self.indexfile,
2056 2056 self.node(baserev))
2057 2057
2058 2058 if not flags and self._peek_iscensored(baserev, delta, flush):
2059 2059 flags |= REVIDX_ISCENSORED
2060 2060
2061 2061 # We assume consumers of addrevisioncb will want to retrieve
2062 2062 # the added revision, which will require a call to
2063 2063 # revision(). revision() will fast path if there is a cache
2064 2064 # hit. So, we tell _addrevision() to always cache in this case.
2065 2065 # We're only using addgroup() in the context of changegroup
2066 2066 # generation so the revision data can always be handled as raw
2067 2067 # by the flagprocessor.
2068 2068 self._addrevision(node, None, transaction, link,
2069 2069 p1, p2, flags, (baserev, delta),
2070 2070 ifh, dfh,
2071 2071 alwayscache=bool(addrevisioncb),
2072 2072 deltacomputer=deltacomputer)
2073 2073
2074 2074 if addrevisioncb:
2075 2075 addrevisioncb(self, node)
2076 2076
2077 2077 if not dfh and not self._inline:
2078 2078 # addrevision switched from inline to conventional
2079 2079 # reopen the index
2080 2080 ifh.close()
2081 2081 dfh = self._datafp("a+")
2082 2082 ifh = self._indexfp("a+")
2083 2083 finally:
2084 2084 if dfh:
2085 2085 dfh.close()
2086 2086 ifh.close()
2087 2087
2088 2088 return nodes
2089 2089
2090 2090 def iscensored(self, rev):
2091 2091 """Check if a file revision is censored."""
2092 2092 if not self._censorable:
2093 2093 return False
2094 2094
2095 2095 return self.flags(rev) & REVIDX_ISCENSORED
2096 2096
2097 2097 def _peek_iscensored(self, baserev, delta, flush):
2098 2098 """Quickly check if a delta produces a censored revision."""
2099 2099 if not self._censorable:
2100 2100 return False
2101 2101
2102 2102 # Fragile heuristic: unless new file meta keys are added alphabetically
2103 2103 # preceding "censored", all censored revisions are prefixed by
2104 2104 # "\1\ncensored:". A delta producing such a censored revision must be a
2105 2105 # full-replacement delta, so we inspect the first and only patch in the
2106 2106 # delta for this prefix.
2107 2107 hlen = struct.calcsize(">lll")
2108 2108 if len(delta) <= hlen:
2109 2109 return False
2110 2110
2111 2111 oldlen = self.rawsize(baserev)
2112 2112 newlen = len(delta) - hlen
2113 2113 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2114 2114 return False
2115 2115
2116 2116 add = "\1\ncensored:"
2117 2117 addlen = len(add)
2118 2118 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2119 2119
2120 2120 def getstrippoint(self, minlink):
2121 2121 """find the minimum rev that must be stripped to strip the linkrev
2122 2122
2123 2123 Returns a tuple containing the minimum rev and a set of all revs that
2124 2124 have linkrevs that will be broken by this strip.
2125 2125 """
2126 2126 brokenrevs = set()
2127 2127 strippoint = len(self)
2128 2128
2129 2129 heads = {}
2130 2130 futurelargelinkrevs = set()
2131 2131 for head in self.headrevs():
2132 2132 headlinkrev = self.linkrev(head)
2133 2133 heads[head] = headlinkrev
2134 2134 if headlinkrev >= minlink:
2135 2135 futurelargelinkrevs.add(headlinkrev)
2136 2136
2137 2137 # This algorithm involves walking down the rev graph, starting at the
2138 2138 # heads. Since the revs are topologically sorted according to linkrev,
2139 2139 # once all head linkrevs are below the minlink, we know there are
2140 2140 # no more revs that could have a linkrev greater than minlink.
2141 2141 # So we can stop walking.
2142 2142 while futurelargelinkrevs:
2143 2143 strippoint -= 1
2144 2144 linkrev = heads.pop(strippoint)
2145 2145
2146 2146 if linkrev < minlink:
2147 2147 brokenrevs.add(strippoint)
2148 2148 else:
2149 2149 futurelargelinkrevs.remove(linkrev)
2150 2150
2151 2151 for p in self.parentrevs(strippoint):
2152 2152 if p != nullrev:
2153 2153 plinkrev = self.linkrev(p)
2154 2154 heads[p] = plinkrev
2155 2155 if plinkrev >= minlink:
2156 2156 futurelargelinkrevs.add(plinkrev)
2157 2157
2158 2158 return strippoint, brokenrevs
2159 2159
2160 2160 def strip(self, minlink, transaction):
2161 2161 """truncate the revlog on the first revision with a linkrev >= minlink
2162 2162
2163 2163 This function is called when we're stripping revision minlink and
2164 2164 its descendants from the repository.
2165 2165
2166 2166 We have to remove all revisions with linkrev >= minlink, because
2167 2167 the equivalent changelog revisions will be renumbered after the
2168 2168 strip.
2169 2169
2170 2170 So we truncate the revlog on the first of these revisions, and
2171 2171 trust that the caller has saved the revisions that shouldn't be
2172 2172 removed and that it'll re-add them after this truncation.
2173 2173 """
2174 2174 if len(self) == 0:
2175 2175 return
2176 2176
2177 2177 rev, _ = self.getstrippoint(minlink)
2178 2178 if rev == len(self):
2179 2179 return
2180 2180
2181 2181 # first truncate the files on disk
2182 2182 end = self.start(rev)
2183 2183 if not self._inline:
2184 2184 transaction.add(self.datafile, end)
2185 2185 end = rev * self._io.size
2186 2186 else:
2187 2187 end += rev * self._io.size
2188 2188
2189 2189 transaction.add(self.indexfile, end)
2190 2190
2191 2191 # then reset internal state in memory to forget those revisions
2192 2192 self._cache = None
2193 2193 self._chaininfocache = {}
2194 2194 self._chunkclear()
2195 2195 for x in pycompat.xrange(rev, len(self)):
2196 2196 del self.nodemap[self.node(x)]
2197 2197
2198 2198 del self.index[rev:-1]
2199 2199 self._nodepos = None
2200 2200
2201 2201 def checksize(self):
2202 2202 expected = 0
2203 2203 if len(self):
2204 2204 expected = max(0, self.end(len(self) - 1))
2205 2205
2206 2206 try:
2207 2207 with self._datafp() as f:
2208 2208 f.seek(0, 2)
2209 2209 actual = f.tell()
2210 2210 dd = actual - expected
2211 2211 except IOError as inst:
2212 2212 if inst.errno != errno.ENOENT:
2213 2213 raise
2214 2214 dd = 0
2215 2215
2216 2216 try:
2217 2217 f = self.opener(self.indexfile)
2218 2218 f.seek(0, 2)
2219 2219 actual = f.tell()
2220 2220 f.close()
2221 2221 s = self._io.size
2222 2222 i = max(0, actual // s)
2223 2223 di = actual - (i * s)
2224 2224 if self._inline:
2225 2225 databytes = 0
2226 2226 for r in self:
2227 2227 databytes += max(0, self.length(r))
2228 2228 dd = 0
2229 2229 di = actual - len(self) * s - databytes
2230 2230 except IOError as inst:
2231 2231 if inst.errno != errno.ENOENT:
2232 2232 raise
2233 2233 di = 0
2234 2234
2235 2235 return (dd, di)
2236 2236
2237 2237 def files(self):
2238 2238 res = [self.indexfile]
2239 2239 if not self._inline:
2240 2240 res.append(self.datafile)
2241 2241 return res
2242 2242
2243 2243 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2244 2244 assumehaveparentrevisions=False, deltaprevious=False):
2245 2245 if nodesorder not in ('nodes', 'storage', None):
2246 2246 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2247 2247 nodesorder)
2248 2248
2249 2249 if nodesorder is None and not self._generaldelta:
2250 2250 nodesorder = 'storage'
2251 2251
2252 2252 frev = self.rev
2253 2253 fnode = self.node
2254 2254
2255 2255 if nodesorder == 'nodes':
2256 2256 revs = [frev(n) for n in nodes]
2257 2257 elif nodesorder == 'storage':
2258 2258 revs = sorted(frev(n) for n in nodes)
2259 2259 else:
2260 2260 assert self._generaldelta
2261 2261 revs = set(frev(n) for n in nodes)
2262 2262 revs = dagop.linearize(revs, self.parentrevs)
2263 2263
2264 2264 prevrev = None
2265 2265
2266 2266 if deltaprevious or assumehaveparentrevisions:
2267 2267 prevrev = self.parentrevs(revs[0])[0]
2268 2268
2269 2269 # Set of revs available to delta against.
2270 2270 available = set()
2271 2271
2272 2272 for rev in revs:
2273 2273 if rev == nullrev:
2274 2274 continue
2275 2275
2276 2276 node = fnode(rev)
2277 2277 deltaparentrev = self.deltaparent(rev)
2278 2278 p1rev, p2rev = self.parentrevs(rev)
2279 2279
2280 2280 # Forced delta against previous mode.
2281 2281 if deltaprevious:
2282 2282 baserev = prevrev
2283 2283
2284 2284 # Revlog is configured to use full snapshots. Stick to that.
2285 2285 elif not self._storedeltachains:
2286 2286 baserev = nullrev
2287 2287
2288 2288 # There is a delta in storage. We try to use that because it
2289 2289 # amounts to effectively copying data from storage and is
2290 2290 # therefore the fastest.
2291 2291 elif deltaparentrev != nullrev:
2292 2292 # Base revision was already emitted in this group. We can
2293 2293 # always safely use the delta.
2294 2294 if deltaparentrev in available:
2295 2295 baserev = deltaparentrev
2296 2296
2297 2297 # Base revision is a parent that hasn't been emitted already.
2298 2298 # Use it if we can assume the receiver has the parent revision.
2299 2299 elif (assumehaveparentrevisions
2300 2300 and deltaparentrev in (p1rev, p2rev)):
2301 2301 baserev = deltaparentrev
2302 2302
2303 2303 # No guarantee the receiver has the delta parent. Send delta
2304 2304 # against last revision (if possible), which in the common case
2305 2305 # should be similar enough to this revision that the delta is
2306 2306 # reasonable.
2307 2307 elif prevrev is not None:
2308 2308 baserev = prevrev
2309 2309 else:
2310 2310 baserev = nullrev
2311 2311
2312 2312 # Storage has a fulltext revision.
2313 2313
2314 2314 # Let's use the previous revision, which is as good a guess as any.
2315 2315 # There is definitely room to improve this logic.
2316 2316 elif prevrev is not None:
2317 2317 baserev = prevrev
2318 2318 else:
2319 2319 baserev = nullrev
2320 2320
2321 2321 # But we can't actually use our chosen delta base for whatever
2322 2322 # reason. Reset to fulltext.
2323 2323 if baserev != nullrev and not self.candelta(baserev, rev):
2324 2324 baserev = nullrev
2325 2325
2326 2326 revision = None
2327 2327 delta = None
2328 2328 baserevisionsize = None
2329 2329
2330 2330 if revisiondata:
2331 2331 if self.iscensored(baserev) or self.iscensored(rev):
2332 2332 try:
2333 2333 revision = self.revision(node, raw=True)
2334 2334 except error.CensoredNodeError as e:
2335 2335 revision = e.tombstone
2336 2336
2337 2337 if baserev != nullrev:
2338 2338 baserevisionsize = self.rawsize(baserev)
2339 2339
2340 2340 elif baserev == nullrev and not deltaprevious:
2341 2341 revision = self.revision(node, raw=True)
2342 2342 available.add(rev)
2343 2343 else:
2344 2344 delta = self.revdiff(baserev, rev)
2345 2345 available.add(rev)
2346 2346
2347 2347 yield revlogrevisiondelta(
2348 2348 node=node,
2349 2349 p1node=fnode(p1rev),
2350 2350 p2node=fnode(p2rev),
2351 2351 basenode=fnode(baserev),
2352 2352 flags=self.flags(rev),
2353 2353 baserevisionsize=baserevisionsize,
2354 2354 revision=revision,
2355 2355 delta=delta)
2356 2356
2357 2357 prevrev = rev
2358 2358
2359 2359 DELTAREUSEALWAYS = 'always'
2360 2360 DELTAREUSESAMEREVS = 'samerevs'
2361 2361 DELTAREUSENEVER = 'never'
2362 2362
2363 2363 DELTAREUSEFULLADD = 'fulladd'
2364 2364
2365 2365 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2366 2366
2367 2367 def clone(self, tr, destrevlog, addrevisioncb=None,
2368 2368 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2369 2369 """Copy this revlog to another, possibly with format changes.
2370 2370
2371 2371 The destination revlog will contain the same revisions and nodes.
2372 2372 However, it may not be bit-for-bit identical due to e.g. delta encoding
2373 2373 differences.
2374 2374
2375 2375 The ``deltareuse`` argument control how deltas from the existing revlog
2376 2376 are preserved in the destination revlog. The argument can have the
2377 2377 following values:
2378 2378
2379 2379 DELTAREUSEALWAYS
2380 2380 Deltas will always be reused (if possible), even if the destination
2381 2381 revlog would not select the same revisions for the delta. This is the
2382 2382 fastest mode of operation.
2383 2383 DELTAREUSESAMEREVS
2384 2384 Deltas will be reused if the destination revlog would pick the same
2385 2385 revisions for the delta. This mode strikes a balance between speed
2386 2386 and optimization.
2387 2387 DELTAREUSENEVER
2388 2388 Deltas will never be reused. This is the slowest mode of execution.
2389 2389 This mode can be used to recompute deltas (e.g. if the diff/delta
2390 2390 algorithm changes).
2391 2391
2392 2392 Delta computation can be slow, so the choice of delta reuse policy can
2393 2393 significantly affect run time.
2394 2394
2395 2395 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2396 2396 two extremes. Deltas will be reused if they are appropriate. But if the
2397 2397 delta could choose a better revision, it will do so. This means if you
2398 2398 are converting a non-generaldelta revlog to a generaldelta revlog,
2399 2399 deltas will be recomputed if the delta's parent isn't a parent of the
2400 2400 revision.
2401 2401
2402 2402 In addition to the delta policy, the ``deltabothparents`` argument
2403 2403 controls whether to compute deltas against both parents for merges.
2404 2404 By default, the current default is used.
2405 2405 """
2406 2406 if deltareuse not in self.DELTAREUSEALL:
2407 2407 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2408 2408
2409 2409 if len(destrevlog):
2410 2410 raise ValueError(_('destination revlog is not empty'))
2411 2411
2412 2412 if getattr(self, 'filteredrevs', None):
2413 2413 raise ValueError(_('source revlog has filtered revisions'))
2414 2414 if getattr(destrevlog, 'filteredrevs', None):
2415 2415 raise ValueError(_('destination revlog has filtered revisions'))
2416 2416
2417 2417 # lazydeltabase controls whether to reuse a cached delta, if possible.
2418 2418 oldlazydeltabase = destrevlog._lazydeltabase
2419 2419 oldamd = destrevlog._deltabothparents
2420 2420
2421 2421 try:
2422 2422 if deltareuse == self.DELTAREUSEALWAYS:
2423 2423 destrevlog._lazydeltabase = True
2424 2424 elif deltareuse == self.DELTAREUSESAMEREVS:
2425 2425 destrevlog._lazydeltabase = False
2426 2426
2427 2427 destrevlog._deltabothparents = deltabothparents or oldamd
2428 2428
2429 2429 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2430 2430 self.DELTAREUSESAMEREVS)
2431 2431
2432 2432 deltacomputer = deltautil.deltacomputer(destrevlog)
2433 2433 index = self.index
2434 2434 for rev in self:
2435 2435 entry = index[rev]
2436 2436
2437 2437 # Some classes override linkrev to take filtered revs into
2438 2438 # account. Use raw entry from index.
2439 2439 flags = entry[0] & 0xffff
2440 2440 linkrev = entry[4]
2441 2441 p1 = index[entry[5]][7]
2442 2442 p2 = index[entry[6]][7]
2443 2443 node = entry[7]
2444 2444
2445 2445 # (Possibly) reuse the delta from the revlog if allowed and
2446 2446 # the revlog chunk is a delta.
2447 2447 cachedelta = None
2448 2448 rawtext = None
2449 2449 if populatecachedelta:
2450 2450 dp = self.deltaparent(rev)
2451 2451 if dp != nullrev:
2452 2452 cachedelta = (dp, bytes(self._chunk(rev)))
2453 2453
2454 2454 if not cachedelta:
2455 2455 rawtext = self.revision(rev, raw=True)
2456 2456
2457 2457
2458 2458 if deltareuse == self.DELTAREUSEFULLADD:
2459 2459 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2460 2460 cachedelta=cachedelta,
2461 2461 node=node, flags=flags,
2462 2462 deltacomputer=deltacomputer)
2463 2463 else:
2464 2464 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2465 2465 checkambig=False)
2466 2466 dfh = None
2467 2467 if not destrevlog._inline:
2468 2468 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2469 2469 try:
2470 2470 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2471 2471 p2, flags, cachedelta, ifh, dfh,
2472 2472 deltacomputer=deltacomputer)
2473 2473 finally:
2474 2474 if dfh:
2475 2475 dfh.close()
2476 2476 ifh.close()
2477 2477
2478 2478 if addrevisioncb:
2479 2479 addrevisioncb(self, rev, node)
2480 2480 finally:
2481 2481 destrevlog._lazydeltabase = oldlazydeltabase
2482 2482 destrevlog._deltabothparents = oldamd
2483 2483
2484 2484 def censorrevision(self, node, tombstone=b''):
2485 2485 if (self.version & 0xFFFF) == REVLOGV0:
2486 2486 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2487 2487 self.version)
2488 2488
2489 2489 rev = self.rev(node)
2490 2490 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2491 2491
2492 2492 if len(tombstone) > self.rawsize(rev):
2493 2493 raise error.Abort(_('censor tombstone must be no longer than '
2494 2494 'censored data'))
2495 2495
2496 2496 # Using two files instead of one makes it easy to rewrite entry-by-entry
2497 2497 idxread = self.opener(self.indexfile, 'r')
2498 2498 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2499 2499 if self.version & FLAG_INLINE_DATA:
2500 2500 dataread, datawrite = idxread, idxwrite
2501 2501 else:
2502 2502 dataread = self.opener(self.datafile, 'r')
2503 2503 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2504 2504
2505 2505 # Copy all revlog data up to the entry to be censored.
2506 2506 offset = self.start(rev)
2507 2507
2508 2508 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2509 2509 idxwrite.write(chunk)
2510 2510 for chunk in util.filechunkiter(dataread, limit=offset):
2511 2511 datawrite.write(chunk)
2512 2512
2513 2513 def rewriteindex(r, newoffs, newdata=None):
2514 2514 """Rewrite the index entry with a new data offset and new data.
2515 2515
2516 2516 The newdata argument, if given, is a tuple of three positive
2517 2517 integers: (new compressed, new uncompressed, added flag bits).
2518 2518 """
2519 2519 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2520 2520 flags = gettype(offlags)
2521 2521 if newdata:
2522 2522 comp, uncomp, nflags = newdata
2523 2523 flags |= nflags
2524 2524 offlags = offset_type(newoffs, flags)
2525 2525 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2526 2526 idxwrite.write(self._io.packentry(e, None, self.version, r))
2527 2527 idxread.seek(self._io.size, 1)
2528 2528
2529 2529 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2530 2530 """Write the given fulltext with the given data offset.
2531 2531
2532 2532 Returns:
2533 2533 The integer number of data bytes written, for tracking data
2534 2534 offsets.
2535 2535 """
2536 2536 flag, compdata = self.compress(data)
2537 2537 newcomp = len(flag) + len(compdata)
2538 2538 rewriteindex(r, offs, (newcomp, len(data), nflags))
2539 2539 datawrite.write(flag)
2540 2540 datawrite.write(compdata)
2541 2541 dataread.seek(self.length(r), 1)
2542 2542 return newcomp
2543 2543
2544 2544 # Rewrite censored entry with (padded) tombstone data.
2545 2545 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2546 2546 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2547 2547
2548 2548 # Rewrite all following filelog revisions fixing up offsets and deltas.
2549 2549 for srev in pycompat.xrange(rev + 1, len(self)):
2550 2550 if rev in self.parentrevs(srev):
2551 2551 # Immediate children of censored node must be re-added as
2552 2552 # fulltext.
2553 2553 try:
2554 2554 revdata = self.revision(srev)
2555 2555 except error.CensoredNodeError as e:
2556 2556 revdata = e.tombstone
2557 2557 dlen = rewrite(srev, offset, revdata)
2558 2558 else:
2559 2559 # Copy any other revision data verbatim after fixing up the
2560 2560 # offset.
2561 2561 rewriteindex(srev, offset)
2562 2562 dlen = self.length(srev)
2563 2563 for chunk in util.filechunkiter(dataread, limit=dlen):
2564 2564 datawrite.write(chunk)
2565 2565 offset += dlen
2566 2566
2567 2567 idxread.close()
2568 2568 idxwrite.close()
2569 2569 if dataread is not idxread:
2570 2570 dataread.close()
2571 2571 datawrite.close()
2572 2572
2573 2573 def verifyintegrity(self, state):
2574 2574 """Verifies the integrity of the revlog.
2575 2575
2576 2576 Yields ``revlogproblem`` instances describing problems that are
2577 2577 found.
2578 2578 """
2579 2579 dd, di = self.checksize()
2580 2580 if dd:
2581 2581 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2582 2582 if di:
2583 2583 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2584 2584
2585 2585 version = self.version & 0xFFFF
2586 2586
2587 2587 # The verifier tells us what version revlog we should be.
2588 2588 if version != state['expectedversion']:
2589 2589 yield revlogproblem(
2590 2590 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2591 2591 (self.indexfile, version, state['expectedversion']))
2592 2592
2593 2593 state['skipread'] = set()
2594 2594
2595 2595 for rev in self:
2596 2596 node = self.node(rev)
2597 2597
2598 2598 # Verify contents. 4 cases to care about:
2599 2599 #
2600 2600 # common: the most common case
2601 2601 # rename: with a rename
2602 2602 # meta: file content starts with b'\1\n', the metadata
2603 2603 # header defined in filelog.py, but without a rename
2604 2604 # ext: content stored externally
2605 2605 #
2606 2606 # More formally, their differences are shown below:
2607 2607 #
2608 2608 # | common | rename | meta | ext
2609 2609 # -------------------------------------------------------
2610 2610 # flags() | 0 | 0 | 0 | not 0
2611 2611 # renamed() | False | True | False | ?
2612 2612 # rawtext[0:2]=='\1\n'| False | True | True | ?
2613 2613 #
2614 2614 # "rawtext" means the raw text stored in revlog data, which
2615 2615 # could be retrieved by "revision(rev, raw=True)". "text"
2616 2616 # mentioned below is "revision(rev, raw=False)".
2617 2617 #
2618 2618 # There are 3 different lengths stored physically:
2619 2619 # 1. L1: rawsize, stored in revlog index
2620 2620 # 2. L2: len(rawtext), stored in revlog data
2621 2621 # 3. L3: len(text), stored in revlog data if flags==0, or
2622 2622 # possibly somewhere else if flags!=0
2623 2623 #
2624 2624 # L1 should be equal to L2. L3 could be different from them.
2625 2625 # "text" may or may not affect commit hash depending on flag
2626 2626 # processors (see revlog.addflagprocessor).
2627 2627 #
2628 2628 # | common | rename | meta | ext
2629 2629 # -------------------------------------------------
2630 2630 # rawsize() | L1 | L1 | L1 | L1
2631 2631 # size() | L1 | L2-LM | L1(*) | L1 (?)
2632 2632 # len(rawtext) | L2 | L2 | L2 | L2
2633 2633 # len(text) | L2 | L2 | L2 | L3
2634 2634 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2635 2635 #
2636 2636 # LM: length of metadata, depending on rawtext
2637 2637 # (*): not ideal, see comment in filelog.size
2638 2638 # (?): could be "- len(meta)" if the resolved content has
2639 2639 # rename metadata
2640 2640 #
2641 2641 # Checks needed to be done:
2642 2642 # 1. length check: L1 == L2, in all cases.
2643 2643 # 2. hash check: depending on flag processor, we may need to
2644 2644 # use either "text" (external), or "rawtext" (in revlog).
2645 2645
2646 2646 try:
2647 2647 skipflags = state.get('skipflags', 0)
2648 2648 if skipflags:
2649 2649 skipflags &= self.flags(rev)
2650 2650
2651 2651 if skipflags:
2652 2652 state['skipread'].add(node)
2653 2653 else:
2654 2654 # Side-effect: read content and verify hash.
2655 2655 self.revision(node)
2656 2656
2657 2657 l1 = self.rawsize(rev)
2658 2658 l2 = len(self.revision(node, raw=True))
2659 2659
2660 2660 if l1 != l2:
2661 2661 yield revlogproblem(
2662 2662 error=_('unpacked size is %d, %d expected') % (l2, l1),
2663 2663 node=node)
2664 2664
2665 2665 except error.CensoredNodeError:
2666 2666 if state['erroroncensored']:
2667 2667 yield revlogproblem(error=_('censored file data'),
2668 2668 node=node)
2669 2669 state['skipread'].add(node)
2670 2670 except Exception as e:
2671 2671 yield revlogproblem(
2672 error=_('unpacking %s: %s') % (short(node), e),
2672 error=_('unpacking %s: %s') % (short(node),
2673 stringutil.forcebytestr(e)),
2673 2674 node=node)
2674 2675 state['skipread'].add(node)
2675 2676
2676 2677 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2677 2678 revisionscount=False, trackedsize=False,
2678 2679 storedsize=False):
2679 2680 d = {}
2680 2681
2681 2682 if exclusivefiles:
2682 2683 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2683 2684 if not self._inline:
2684 2685 d['exclusivefiles'].append((self.opener, self.datafile))
2685 2686
2686 2687 if sharedfiles:
2687 2688 d['sharedfiles'] = []
2688 2689
2689 2690 if revisionscount:
2690 2691 d['revisionscount'] = len(self)
2691 2692
2692 2693 if trackedsize:
2693 2694 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2694 2695
2695 2696 if storedsize:
2696 2697 d['storedsize'] = sum(self.opener.stat(path).st_size
2697 2698 for path in self.files())
2698 2699
2699 2700 return d
General Comments 0
You need to be logged in to leave comments. Login now